Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-07-21

The following pull-request contains BPF updates for your *net-next* tree.

We've added 46 non-merge commits during the last 6 day(s) which contain
a total of 68 files changed, 4929 insertions(+), 526 deletions(-).

The main changes are:

1) Run BPF program on socket lookup, from Jakub.

2) Introduce cpumap, from Lorenzo.

3) s390 JIT fixes, from Ilya.

4) teach riscv JIT to emit compressed insns, from Luke.

5) use build time computed BTF ids in bpf iter, from Yonghong.
====================

Purely independent overlapping changes in both filter.h and xdp.h

Signed-off-by: David S. Miller <davem@davemloft.net>

+4930 -526
+480 -3
arch/riscv/net/bpf_jit.h
··· 13 13 #include <linux/filter.h> 14 14 #include <asm/cacheflush.h> 15 15 16 + static inline bool rvc_enabled(void) 17 + { 18 + return IS_ENABLED(CONFIG_RISCV_ISA_C); 19 + } 20 + 16 21 enum { 17 22 RV_REG_ZERO = 0, /* The constant value 0 */ 18 23 RV_REG_RA = 1, /* Return address */ ··· 53 48 RV_REG_T6 = 31, 54 49 }; 55 50 51 + static inline bool is_creg(u8 reg) 52 + { 53 + return (1 << reg) & (BIT(RV_REG_FP) | 54 + BIT(RV_REG_S1) | 55 + BIT(RV_REG_A0) | 56 + BIT(RV_REG_A1) | 57 + BIT(RV_REG_A2) | 58 + BIT(RV_REG_A3) | 59 + BIT(RV_REG_A4) | 60 + BIT(RV_REG_A5)); 61 + } 62 + 56 63 struct rv_jit_context { 57 64 struct bpf_prog *prog; 58 - u32 *insns; /* RV insns */ 65 + u16 *insns; /* RV insns */ 59 66 int ninsns; 60 67 int epilogue_offset; 61 68 int *offset; /* BPF to RV */ 62 69 unsigned long flags; 63 70 int stack_size; 64 71 }; 72 + 73 + /* Convert from ninsns to bytes. */ 74 + static inline int ninsns_rvoff(int ninsns) 75 + { 76 + return ninsns << 1; 77 + } 65 78 66 79 struct rv_jit_data { 67 80 struct bpf_binary_header *header; ··· 97 74 flush_icache_range((unsigned long)start, (unsigned long)end); 98 75 } 99 76 77 + /* Emit a 4-byte riscv instruction. */ 100 78 static inline void emit(const u32 insn, struct rv_jit_context *ctx) 101 79 { 80 + if (ctx->insns) { 81 + ctx->insns[ctx->ninsns] = insn; 82 + ctx->insns[ctx->ninsns + 1] = (insn >> 16); 83 + } 84 + 85 + ctx->ninsns += 2; 86 + } 87 + 88 + /* Emit a 2-byte riscv compressed instruction. */ 89 + static inline void emitc(const u16 insn, struct rv_jit_context *ctx) 90 + { 91 + BUILD_BUG_ON(!rvc_enabled()); 92 + 102 93 if (ctx->insns) 103 94 ctx->insns[ctx->ninsns] = insn; 104 95 ··· 123 86 { 124 87 int to = ctx->epilogue_offset, from = ctx->ninsns; 125 88 126 - return (to - from) << 2; 89 + return ninsns_rvoff(to - from); 127 90 } 128 91 129 92 /* Return -1 or inverted cond. */ ··· 152 115 return BPF_JSGT; 153 116 } 154 117 return -1; 118 + } 119 + 120 + static inline bool is_6b_int(long val) 121 + { 122 + return -(1L << 5) <= val && val < (1L << 5); 123 + } 124 + 125 + static inline bool is_7b_uint(unsigned long val) 126 + { 127 + return val < (1UL << 7); 128 + } 129 + 130 + static inline bool is_8b_uint(unsigned long val) 131 + { 132 + return val < (1UL << 8); 133 + } 134 + 135 + static inline bool is_9b_uint(unsigned long val) 136 + { 137 + return val < (1UL << 9); 138 + } 139 + 140 + static inline bool is_10b_int(long val) 141 + { 142 + return -(1L << 9) <= val && val < (1L << 9); 143 + } 144 + 145 + static inline bool is_10b_uint(unsigned long val) 146 + { 147 + return val < (1UL << 10); 155 148 } 156 149 157 150 static inline bool is_12b_int(long val) ··· 216 149 off++; /* BPF branch is from PC+1, RV is from PC */ 217 150 from = (insn > 0) ? ctx->offset[insn - 1] : 0; 218 151 to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; 219 - return (to - from) << 2; 152 + return ninsns_rvoff(to - from); 220 153 } 221 154 222 155 /* Instruction formats. */ ··· 272 205 u8 funct7 = (funct5 << 2) | (aq << 1) | rl; 273 206 274 207 return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); 208 + } 209 + 210 + /* RISC-V compressed instruction formats. */ 211 + 212 + static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op) 213 + { 214 + return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op; 215 + } 216 + 217 + static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op) 218 + { 219 + u32 imm; 220 + 221 + imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2); 222 + return (funct3 << 13) | (rd << 7) | op | imm; 223 + } 224 + 225 + static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op) 226 + { 227 + return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op; 228 + } 229 + 230 + static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op) 231 + { 232 + return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op; 233 + } 234 + 235 + static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd, 236 + u8 op) 237 + { 238 + return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) | 239 + (imm_lo << 5) | ((rd & 0x7) << 2) | op; 240 + } 241 + 242 + static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2, 243 + u8 op) 244 + { 245 + return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) | 246 + (imm_lo << 5) | ((rs2 & 0x7) << 2) | op; 247 + } 248 + 249 + static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op) 250 + { 251 + return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) | 252 + ((rs2 & 0x7) << 2) | op; 253 + } 254 + 255 + static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op) 256 + { 257 + u32 imm; 258 + 259 + imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2); 260 + return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm; 275 261 } 276 262 277 263 /* Instructions shared by both RV32 and RV64. */ ··· 534 414 return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 535 415 } 536 416 417 + /* RVC instrutions. */ 418 + 419 + static inline u16 rvc_addi4spn(u8 rd, u32 imm10) 420 + { 421 + u32 imm; 422 + 423 + imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) | 424 + ((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3); 425 + return rv_ciw_insn(0x0, imm, rd, 0x0); 426 + } 427 + 428 + static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1) 429 + { 430 + u32 imm_hi, imm_lo; 431 + 432 + imm_hi = (imm7 & 0x38) >> 3; 433 + imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6); 434 + return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0); 435 + } 436 + 437 + static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2) 438 + { 439 + u32 imm_hi, imm_lo; 440 + 441 + imm_hi = (imm7 & 0x38) >> 3; 442 + imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6); 443 + return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0); 444 + } 445 + 446 + static inline u16 rvc_addi(u8 rd, u32 imm6) 447 + { 448 + return rv_ci_insn(0, imm6, rd, 0x1); 449 + } 450 + 451 + static inline u16 rvc_li(u8 rd, u32 imm6) 452 + { 453 + return rv_ci_insn(0x2, imm6, rd, 0x1); 454 + } 455 + 456 + static inline u16 rvc_addi16sp(u32 imm10) 457 + { 458 + u32 imm; 459 + 460 + imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) | 461 + ((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5); 462 + return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1); 463 + } 464 + 465 + static inline u16 rvc_lui(u8 rd, u32 imm6) 466 + { 467 + return rv_ci_insn(0x3, imm6, rd, 0x1); 468 + } 469 + 470 + static inline u16 rvc_srli(u8 rd, u32 imm6) 471 + { 472 + return rv_cb_insn(0x4, imm6, 0, rd, 0x1); 473 + } 474 + 475 + static inline u16 rvc_srai(u8 rd, u32 imm6) 476 + { 477 + return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1); 478 + } 479 + 480 + static inline u16 rvc_andi(u8 rd, u32 imm6) 481 + { 482 + return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1); 483 + } 484 + 485 + static inline u16 rvc_sub(u8 rd, u8 rs) 486 + { 487 + return rv_ca_insn(0x23, rd, 0, rs, 0x1); 488 + } 489 + 490 + static inline u16 rvc_xor(u8 rd, u8 rs) 491 + { 492 + return rv_ca_insn(0x23, rd, 0x1, rs, 0x1); 493 + } 494 + 495 + static inline u16 rvc_or(u8 rd, u8 rs) 496 + { 497 + return rv_ca_insn(0x23, rd, 0x2, rs, 0x1); 498 + } 499 + 500 + static inline u16 rvc_and(u8 rd, u8 rs) 501 + { 502 + return rv_ca_insn(0x23, rd, 0x3, rs, 0x1); 503 + } 504 + 505 + static inline u16 rvc_slli(u8 rd, u32 imm6) 506 + { 507 + return rv_ci_insn(0, imm6, rd, 0x2); 508 + } 509 + 510 + static inline u16 rvc_lwsp(u8 rd, u32 imm8) 511 + { 512 + u32 imm; 513 + 514 + imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c); 515 + return rv_ci_insn(0x2, imm, rd, 0x2); 516 + } 517 + 518 + static inline u16 rvc_jr(u8 rs1) 519 + { 520 + return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2); 521 + } 522 + 523 + static inline u16 rvc_mv(u8 rd, u8 rs) 524 + { 525 + return rv_cr_insn(0x8, rd, rs, 0x2); 526 + } 527 + 528 + static inline u16 rvc_jalr(u8 rs1) 529 + { 530 + return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2); 531 + } 532 + 533 + static inline u16 rvc_add(u8 rd, u8 rs) 534 + { 535 + return rv_cr_insn(0x9, rd, rs, 0x2); 536 + } 537 + 538 + static inline u16 rvc_swsp(u32 imm8, u8 rs2) 539 + { 540 + u32 imm; 541 + 542 + imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6); 543 + return rv_css_insn(0x6, imm, rs2, 0x2); 544 + } 545 + 537 546 /* 538 547 * RV64-only instructions. 539 548 * ··· 750 501 static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 751 502 { 752 503 return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 504 + } 505 + 506 + /* RV64-only RVC instructions. */ 507 + 508 + static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1) 509 + { 510 + u32 imm_hi, imm_lo; 511 + 512 + imm_hi = (imm8 & 0x38) >> 3; 513 + imm_lo = (imm8 & 0xc0) >> 6; 514 + return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0); 515 + } 516 + 517 + static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2) 518 + { 519 + u32 imm_hi, imm_lo; 520 + 521 + imm_hi = (imm8 & 0x38) >> 3; 522 + imm_lo = (imm8 & 0xc0) >> 6; 523 + return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0); 524 + } 525 + 526 + static inline u16 rvc_subw(u8 rd, u8 rs) 527 + { 528 + return rv_ca_insn(0x27, rd, 0, rs, 0x1); 529 + } 530 + 531 + static inline u16 rvc_addiw(u8 rd, u32 imm6) 532 + { 533 + return rv_ci_insn(0x1, imm6, rd, 0x1); 534 + } 535 + 536 + static inline u16 rvc_ldsp(u8 rd, u32 imm9) 537 + { 538 + u32 imm; 539 + 540 + imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38); 541 + return rv_ci_insn(0x3, imm, rd, 0x2); 542 + } 543 + 544 + static inline u16 rvc_sdsp(u32 imm9, u8 rs2) 545 + { 546 + u32 imm; 547 + 548 + imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6); 549 + return rv_css_insn(0x7, imm, rs2, 0x2); 550 + } 551 + 552 + #endif /* __riscv_xlen == 64 */ 553 + 554 + /* Helper functions that emit RVC instructions when possible. */ 555 + 556 + static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 557 + { 558 + if (rvc_enabled() && rd == RV_REG_RA && rs && !imm) 559 + emitc(rvc_jalr(rs), ctx); 560 + else if (rvc_enabled() && !rd && rs && !imm) 561 + emitc(rvc_jr(rs), ctx); 562 + else 563 + emit(rv_jalr(rd, rs, imm), ctx); 564 + } 565 + 566 + static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx) 567 + { 568 + if (rvc_enabled() && rd && rs) 569 + emitc(rvc_mv(rd, rs), ctx); 570 + else 571 + emit(rv_addi(rd, rs, 0), ctx); 572 + } 573 + 574 + static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 575 + { 576 + if (rvc_enabled() && rd && rd == rs1 && rs2) 577 + emitc(rvc_add(rd, rs2), ctx); 578 + else 579 + emit(rv_add(rd, rs1, rs2), ctx); 580 + } 581 + 582 + static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 583 + { 584 + if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf)) 585 + emitc(rvc_addi16sp(imm), ctx); 586 + else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) && 587 + !(imm & 0x3) && imm) 588 + emitc(rvc_addi4spn(rd, imm), ctx); 589 + else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm)) 590 + emitc(rvc_addi(rd, imm), ctx); 591 + else 592 + emit(rv_addi(rd, rs, imm), ctx); 593 + } 594 + 595 + static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx) 596 + { 597 + if (rvc_enabled() && rd && is_6b_int(imm)) 598 + emitc(rvc_li(rd, imm), ctx); 599 + else 600 + emit(rv_addi(rd, RV_REG_ZERO, imm), ctx); 601 + } 602 + 603 + static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx) 604 + { 605 + if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm) 606 + emitc(rvc_lui(rd, imm), ctx); 607 + else 608 + emit(rv_lui(rd, imm), ctx); 609 + } 610 + 611 + static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 612 + { 613 + if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen) 614 + emitc(rvc_slli(rd, imm), ctx); 615 + else 616 + emit(rv_slli(rd, rs, imm), ctx); 617 + } 618 + 619 + static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 620 + { 621 + if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm)) 622 + emitc(rvc_andi(rd, imm), ctx); 623 + else 624 + emit(rv_andi(rd, rs, imm), ctx); 625 + } 626 + 627 + static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 628 + { 629 + if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen) 630 + emitc(rvc_srli(rd, imm), ctx); 631 + else 632 + emit(rv_srli(rd, rs, imm), ctx); 633 + } 634 + 635 + static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 636 + { 637 + if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen) 638 + emitc(rvc_srai(rd, imm), ctx); 639 + else 640 + emit(rv_srai(rd, rs, imm), ctx); 641 + } 642 + 643 + static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 644 + { 645 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 646 + emitc(rvc_sub(rd, rs2), ctx); 647 + else 648 + emit(rv_sub(rd, rs1, rs2), ctx); 649 + } 650 + 651 + static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 652 + { 653 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 654 + emitc(rvc_or(rd, rs2), ctx); 655 + else 656 + emit(rv_or(rd, rs1, rs2), ctx); 657 + } 658 + 659 + static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 660 + { 661 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 662 + emitc(rvc_and(rd, rs2), ctx); 663 + else 664 + emit(rv_and(rd, rs1, rs2), ctx); 665 + } 666 + 667 + static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 668 + { 669 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 670 + emitc(rvc_xor(rd, rs2), ctx); 671 + else 672 + emit(rv_xor(rd, rs1, rs2), ctx); 673 + } 674 + 675 + static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx) 676 + { 677 + if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3)) 678 + emitc(rvc_lwsp(rd, off), ctx); 679 + else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3)) 680 + emitc(rvc_lw(rd, off, rs1), ctx); 681 + else 682 + emit(rv_lw(rd, off, rs1), ctx); 683 + } 684 + 685 + static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx) 686 + { 687 + if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3)) 688 + emitc(rvc_swsp(off, rs2), ctx); 689 + else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3)) 690 + emitc(rvc_sw(rs1, off, rs2), ctx); 691 + else 692 + emit(rv_sw(rs1, off, rs2), ctx); 693 + } 694 + 695 + /* RV64-only helper functions. */ 696 + #if __riscv_xlen == 64 697 + 698 + static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 699 + { 700 + if (rvc_enabled() && rd && rd == rs && is_6b_int(imm)) 701 + emitc(rvc_addiw(rd, imm), ctx); 702 + else 703 + emit(rv_addiw(rd, rs, imm), ctx); 704 + } 705 + 706 + static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx) 707 + { 708 + if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7)) 709 + emitc(rvc_ldsp(rd, off), ctx); 710 + else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7)) 711 + emitc(rvc_ld(rd, off, rs1), ctx); 712 + else 713 + emit(rv_ld(rd, off, rs1), ctx); 714 + } 715 + 716 + static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx) 717 + { 718 + if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7)) 719 + emitc(rvc_sdsp(off, rs2), ctx); 720 + else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7)) 721 + emitc(rvc_sd(rs1, off, rs2), ctx); 722 + else 723 + emit(rv_sd(rs1, off, rs2), ctx); 724 + } 725 + 726 + static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 727 + { 728 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 729 + emitc(rvc_subw(rd, rs2), ctx); 730 + else 731 + emit(rv_subw(rd, rs1, rs2), ctx); 753 732 } 754 733 755 734 #endif /* __riscv_xlen == 64 */
+7 -7
arch/riscv/net/bpf_jit_comp32.c
··· 644 644 645 645 e = ctx->ninsns; 646 646 /* Adjust for extra insns. */ 647 - rvoff -= (e - s) << 2; 647 + rvoff -= ninsns_rvoff(e - s); 648 648 emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 649 649 return 0; 650 650 } ··· 713 713 if (far) { 714 714 e = ctx->ninsns; 715 715 /* Adjust for extra insns. */ 716 - rvoff -= (e - s) << 2; 716 + rvoff -= ninsns_rvoff(e - s); 717 717 emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 718 718 } 719 719 return 0; ··· 731 731 732 732 e = ctx->ninsns; 733 733 /* Adjust for extra insns. */ 734 - rvoff -= (e - s) << 2; 734 + rvoff -= ninsns_rvoff(e - s); 735 735 736 736 if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx)) 737 737 return -1; ··· 795 795 * if (index >= max_entries) 796 796 * goto out; 797 797 */ 798 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 798 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 799 799 emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); 800 800 801 801 /* ··· 804 804 * goto out; 805 805 */ 806 806 emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); 807 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 807 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 808 808 emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); 809 809 810 810 /* ··· 818 818 if (is_12b_check(off, insn)) 819 819 return -1; 820 820 emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); 821 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 821 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 822 822 emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx); 823 823 824 824 /* ··· 1214 1214 emit_imm32(tmp2, imm, ctx); 1215 1215 src = tmp2; 1216 1216 e = ctx->ninsns; 1217 - rvoff -= (e - s) << 2; 1217 + rvoff -= ninsns_rvoff(e - s); 1218 1218 } 1219 1219 1220 1220 if (is64)
+150 -137
arch/riscv/net/bpf_jit_comp64.c
··· 132 132 * 133 133 * This also means that we need to process LSB to MSB. 134 134 */ 135 - s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff; 135 + s64 upper = (val + (1 << 11)) >> 12; 136 + /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw, 137 + * and addi are signed and RVC checks will perform signed comparisons. 138 + */ 139 + s64 lower = ((val & 0xfff) << 52) >> 52; 136 140 int shift; 137 141 138 142 if (is_32b_int(val)) { 139 143 if (upper) 140 - emit(rv_lui(rd, upper), ctx); 144 + emit_lui(rd, upper, ctx); 141 145 142 146 if (!upper) { 143 - emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); 147 + emit_li(rd, lower, ctx); 144 148 return; 145 149 } 146 150 147 - emit(rv_addiw(rd, rd, lower), ctx); 151 + emit_addiw(rd, rd, lower, ctx); 148 152 return; 149 153 } 150 154 ··· 158 154 159 155 emit_imm(rd, upper, ctx); 160 156 161 - emit(rv_slli(rd, rd, shift), ctx); 157 + emit_slli(rd, rd, shift, ctx); 162 158 if (lower) 163 - emit(rv_addi(rd, rd, lower), ctx); 159 + emit_addi(rd, rd, lower, ctx); 164 160 } 165 161 166 162 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) ··· 168 164 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 169 165 170 166 if (seen_reg(RV_REG_RA, ctx)) { 171 - emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx); 167 + emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx); 172 168 store_offset -= 8; 173 169 } 174 - emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx); 170 + emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx); 175 171 store_offset -= 8; 176 172 if (seen_reg(RV_REG_S1, ctx)) { 177 - emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx); 173 + emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx); 178 174 store_offset -= 8; 179 175 } 180 176 if (seen_reg(RV_REG_S2, ctx)) { 181 - emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx); 177 + emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx); 182 178 store_offset -= 8; 183 179 } 184 180 if (seen_reg(RV_REG_S3, ctx)) { 185 - emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx); 181 + emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx); 186 182 store_offset -= 8; 187 183 } 188 184 if (seen_reg(RV_REG_S4, ctx)) { 189 - emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx); 185 + emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx); 190 186 store_offset -= 8; 191 187 } 192 188 if (seen_reg(RV_REG_S5, ctx)) { 193 - emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx); 189 + emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx); 194 190 store_offset -= 8; 195 191 } 196 192 if (seen_reg(RV_REG_S6, ctx)) { 197 - emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx); 193 + emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); 198 194 store_offset -= 8; 199 195 } 200 196 201 - emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); 197 + emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); 202 198 /* Set return value. */ 203 199 if (!is_tail_call) 204 - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); 205 - emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 206 - is_tail_call ? 4 : 0), /* skip TCC init */ 207 - ctx); 200 + emit_mv(RV_REG_A0, RV_REG_A5, ctx); 201 + emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 202 + is_tail_call ? 4 : 0, /* skip TCC init */ 203 + ctx); 208 204 } 209 205 210 206 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, ··· 284 280 285 281 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) 286 282 { 287 - emit(rv_slli(reg, reg, 32), ctx); 288 - emit(rv_srli(reg, reg, 32), ctx); 283 + emit_slli(reg, reg, 32, ctx); 284 + emit_srli(reg, reg, 32, ctx); 289 285 } 290 286 291 287 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) ··· 308 304 if (is_12b_check(off, insn)) 309 305 return -1; 310 306 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 311 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 307 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 312 308 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 313 309 314 310 /* if (TCC-- < 0) 315 311 * goto out; 316 312 */ 317 - emit(rv_addi(RV_REG_T1, tcc, -1), ctx); 318 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 313 + emit_addi(RV_REG_T1, tcc, -1, ctx); 314 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 319 315 emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); 320 316 321 317 /* prog = array->ptrs[index]; 322 318 * if (!prog) 323 319 * goto out; 324 320 */ 325 - emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx); 326 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx); 321 + emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx); 322 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx); 327 323 off = offsetof(struct bpf_array, ptrs); 328 324 if (is_12b_check(off, insn)) 329 325 return -1; 330 - emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); 331 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 326 + emit_ld(RV_REG_T2, off, RV_REG_T2, ctx); 327 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 332 328 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 333 329 334 330 /* goto *(prog->bpf_func + 4); */ 335 331 off = offsetof(struct bpf_prog, bpf_func); 336 332 if (is_12b_check(off, insn)) 337 333 return -1; 338 - emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); 339 - emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); 334 + emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); 335 + emit_mv(RV_REG_TCC, RV_REG_T1, ctx); 340 336 __build_epilogue(true, ctx); 341 337 return 0; 342 338 } ··· 364 360 365 361 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 366 362 { 367 - emit(rv_addi(RV_REG_T2, *rd, 0), ctx); 363 + emit_mv(RV_REG_T2, *rd, ctx); 368 364 emit_zext_32(RV_REG_T2, ctx); 369 - emit(rv_addi(RV_REG_T1, *rs, 0), ctx); 365 + emit_mv(RV_REG_T1, *rs, ctx); 370 366 emit_zext_32(RV_REG_T1, ctx); 371 367 *rd = RV_REG_T2; 372 368 *rs = RV_REG_T1; ··· 374 370 375 371 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 376 372 { 377 - emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); 378 - emit(rv_addiw(RV_REG_T1, *rs, 0), ctx); 373 + emit_addiw(RV_REG_T2, *rd, 0, ctx); 374 + emit_addiw(RV_REG_T1, *rs, 0, ctx); 379 375 *rd = RV_REG_T2; 380 376 *rs = RV_REG_T1; 381 377 } 382 378 383 379 static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) 384 380 { 385 - emit(rv_addi(RV_REG_T2, *rd, 0), ctx); 381 + emit_mv(RV_REG_T2, *rd, ctx); 386 382 emit_zext_32(RV_REG_T2, ctx); 387 383 emit_zext_32(RV_REG_T1, ctx); 388 384 *rd = RV_REG_T2; ··· 390 386 391 387 static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) 392 388 { 393 - emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); 389 + emit_addiw(RV_REG_T2, *rd, 0, ctx); 394 390 *rd = RV_REG_T2; 395 391 } 396 392 ··· 436 432 if (ret) 437 433 return ret; 438 434 rd = bpf_to_rv_reg(BPF_REG_0, ctx); 439 - emit(rv_addi(rd, RV_REG_A0, 0), ctx); 435 + emit_mv(rd, RV_REG_A0, ctx); 440 436 return 0; 441 437 } 442 438 ··· 462 458 emit_zext_32(rd, ctx); 463 459 break; 464 460 } 465 - emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); 461 + emit_mv(rd, rs, ctx); 466 462 if (!is64 && !aux->verifier_zext) 467 463 emit_zext_32(rd, ctx); 468 464 break; ··· 470 466 /* dst = dst OP src */ 471 467 case BPF_ALU | BPF_ADD | BPF_X: 472 468 case BPF_ALU64 | BPF_ADD | BPF_X: 473 - emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); 469 + emit_add(rd, rd, rs, ctx); 474 470 if (!is64 && !aux->verifier_zext) 475 471 emit_zext_32(rd, ctx); 476 472 break; 477 473 case BPF_ALU | BPF_SUB | BPF_X: 478 474 case BPF_ALU64 | BPF_SUB | BPF_X: 479 - emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); 475 + if (is64) 476 + emit_sub(rd, rd, rs, ctx); 477 + else 478 + emit_subw(rd, rd, rs, ctx); 479 + 480 480 if (!is64 && !aux->verifier_zext) 481 481 emit_zext_32(rd, ctx); 482 482 break; 483 483 case BPF_ALU | BPF_AND | BPF_X: 484 484 case BPF_ALU64 | BPF_AND | BPF_X: 485 - emit(rv_and(rd, rd, rs), ctx); 485 + emit_and(rd, rd, rs, ctx); 486 486 if (!is64 && !aux->verifier_zext) 487 487 emit_zext_32(rd, ctx); 488 488 break; 489 489 case BPF_ALU | BPF_OR | BPF_X: 490 490 case BPF_ALU64 | BPF_OR | BPF_X: 491 - emit(rv_or(rd, rd, rs), ctx); 491 + emit_or(rd, rd, rs, ctx); 492 492 if (!is64 && !aux->verifier_zext) 493 493 emit_zext_32(rd, ctx); 494 494 break; 495 495 case BPF_ALU | BPF_XOR | BPF_X: 496 496 case BPF_ALU64 | BPF_XOR | BPF_X: 497 - emit(rv_xor(rd, rd, rs), ctx); 497 + emit_xor(rd, rd, rs, ctx); 498 498 if (!is64 && !aux->verifier_zext) 499 499 emit_zext_32(rd, ctx); 500 500 break; ··· 542 534 /* dst = -dst */ 543 535 case BPF_ALU | BPF_NEG: 544 536 case BPF_ALU64 | BPF_NEG: 545 - emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : 546 - rv_subw(rd, RV_REG_ZERO, rd), ctx); 537 + emit_sub(rd, RV_REG_ZERO, rd, ctx); 547 538 if (!is64 && !aux->verifier_zext) 548 539 emit_zext_32(rd, ctx); 549 540 break; ··· 551 544 case BPF_ALU | BPF_END | BPF_FROM_LE: 552 545 switch (imm) { 553 546 case 16: 554 - emit(rv_slli(rd, rd, 48), ctx); 555 - emit(rv_srli(rd, rd, 48), ctx); 547 + emit_slli(rd, rd, 48, ctx); 548 + emit_srli(rd, rd, 48, ctx); 556 549 break; 557 550 case 32: 558 551 if (!aux->verifier_zext) ··· 565 558 break; 566 559 567 560 case BPF_ALU | BPF_END | BPF_FROM_BE: 568 - emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx); 561 + emit_li(RV_REG_T2, 0, ctx); 569 562 570 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 571 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 572 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 573 - emit(rv_srli(rd, rd, 8), ctx); 563 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 564 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 565 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 566 + emit_srli(rd, rd, 8, ctx); 574 567 if (imm == 16) 575 568 goto out_be; 576 569 577 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 578 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 579 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 580 - emit(rv_srli(rd, rd, 8), ctx); 570 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 571 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 572 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 573 + emit_srli(rd, rd, 8, ctx); 581 574 582 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 583 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 584 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 585 - emit(rv_srli(rd, rd, 8), ctx); 575 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 576 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 577 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 578 + emit_srli(rd, rd, 8, ctx); 586 579 if (imm == 32) 587 580 goto out_be; 588 581 589 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 590 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 591 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 592 - emit(rv_srli(rd, rd, 8), ctx); 582 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 583 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 584 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 585 + emit_srli(rd, rd, 8, ctx); 593 586 594 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 595 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 596 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 597 - emit(rv_srli(rd, rd, 8), ctx); 587 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 588 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 589 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 590 + emit_srli(rd, rd, 8, ctx); 598 591 599 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 600 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 601 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 602 - emit(rv_srli(rd, rd, 8), ctx); 592 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 593 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 594 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 595 + emit_srli(rd, rd, 8, ctx); 603 596 604 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 605 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 606 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 607 - emit(rv_srli(rd, rd, 8), ctx); 597 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 598 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 599 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 600 + emit_srli(rd, rd, 8, ctx); 608 601 out_be: 609 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 610 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 602 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 603 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 611 604 612 - emit(rv_addi(rd, RV_REG_T2, 0), ctx); 605 + emit_mv(rd, RV_REG_T2, ctx); 613 606 break; 614 607 615 608 /* dst = imm */ ··· 624 617 case BPF_ALU | BPF_ADD | BPF_K: 625 618 case BPF_ALU64 | BPF_ADD | BPF_K: 626 619 if (is_12b_int(imm)) { 627 - emit(is64 ? rv_addi(rd, rd, imm) : 628 - rv_addiw(rd, rd, imm), ctx); 620 + emit_addi(rd, rd, imm, ctx); 629 621 } else { 630 622 emit_imm(RV_REG_T1, imm, ctx); 631 - emit(is64 ? rv_add(rd, rd, RV_REG_T1) : 632 - rv_addw(rd, rd, RV_REG_T1), ctx); 623 + emit_add(rd, rd, RV_REG_T1, ctx); 633 624 } 634 625 if (!is64 && !aux->verifier_zext) 635 626 emit_zext_32(rd, ctx); ··· 635 630 case BPF_ALU | BPF_SUB | BPF_K: 636 631 case BPF_ALU64 | BPF_SUB | BPF_K: 637 632 if (is_12b_int(-imm)) { 638 - emit(is64 ? rv_addi(rd, rd, -imm) : 639 - rv_addiw(rd, rd, -imm), ctx); 633 + emit_addi(rd, rd, -imm, ctx); 640 634 } else { 641 635 emit_imm(RV_REG_T1, imm, ctx); 642 - emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : 643 - rv_subw(rd, rd, RV_REG_T1), ctx); 636 + emit_sub(rd, rd, RV_REG_T1, ctx); 644 637 } 645 638 if (!is64 && !aux->verifier_zext) 646 639 emit_zext_32(rd, ctx); ··· 646 643 case BPF_ALU | BPF_AND | BPF_K: 647 644 case BPF_ALU64 | BPF_AND | BPF_K: 648 645 if (is_12b_int(imm)) { 649 - emit(rv_andi(rd, rd, imm), ctx); 646 + emit_andi(rd, rd, imm, ctx); 650 647 } else { 651 648 emit_imm(RV_REG_T1, imm, ctx); 652 - emit(rv_and(rd, rd, RV_REG_T1), ctx); 649 + emit_and(rd, rd, RV_REG_T1, ctx); 653 650 } 654 651 if (!is64 && !aux->verifier_zext) 655 652 emit_zext_32(rd, ctx); ··· 660 657 emit(rv_ori(rd, rd, imm), ctx); 661 658 } else { 662 659 emit_imm(RV_REG_T1, imm, ctx); 663 - emit(rv_or(rd, rd, RV_REG_T1), ctx); 660 + emit_or(rd, rd, RV_REG_T1, ctx); 664 661 } 665 662 if (!is64 && !aux->verifier_zext) 666 663 emit_zext_32(rd, ctx); ··· 671 668 emit(rv_xori(rd, rd, imm), ctx); 672 669 } else { 673 670 emit_imm(RV_REG_T1, imm, ctx); 674 - emit(rv_xor(rd, rd, RV_REG_T1), ctx); 671 + emit_xor(rd, rd, RV_REG_T1, ctx); 675 672 } 676 673 if (!is64 && !aux->verifier_zext) 677 674 emit_zext_32(rd, ctx); ··· 702 699 break; 703 700 case BPF_ALU | BPF_LSH | BPF_K: 704 701 case BPF_ALU64 | BPF_LSH | BPF_K: 705 - emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); 702 + emit_slli(rd, rd, imm, ctx); 703 + 706 704 if (!is64 && !aux->verifier_zext) 707 705 emit_zext_32(rd, ctx); 708 706 break; 709 707 case BPF_ALU | BPF_RSH | BPF_K: 710 708 case BPF_ALU64 | BPF_RSH | BPF_K: 711 - emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); 709 + if (is64) 710 + emit_srli(rd, rd, imm, ctx); 711 + else 712 + emit(rv_srliw(rd, rd, imm), ctx); 713 + 712 714 if (!is64 && !aux->verifier_zext) 713 715 emit_zext_32(rd, ctx); 714 716 break; 715 717 case BPF_ALU | BPF_ARSH | BPF_K: 716 718 case BPF_ALU64 | BPF_ARSH | BPF_K: 717 - emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); 719 + if (is64) 720 + emit_srai(rd, rd, imm, ctx); 721 + else 722 + emit(rv_sraiw(rd, rd, imm), ctx); 723 + 718 724 if (!is64 && !aux->verifier_zext) 719 725 emit_zext_32(rd, ctx); 720 726 break; ··· 769 757 e = ctx->ninsns; 770 758 771 759 /* Adjust for extra insns */ 772 - rvoff -= (e - s) << 2; 760 + rvoff -= ninsns_rvoff(e - s); 773 761 } 774 762 775 763 if (BPF_OP(code) == BPF_JSET) { 776 764 /* Adjust for and */ 777 765 rvoff -= 4; 778 - emit(rv_and(RV_REG_T1, rd, rs), ctx); 766 + emit_and(RV_REG_T1, rd, rs, ctx); 779 767 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, 780 768 ctx); 781 769 } else { ··· 822 810 e = ctx->ninsns; 823 811 824 812 /* Adjust for extra insns */ 825 - rvoff -= (e - s) << 2; 813 + rvoff -= ninsns_rvoff(e - s); 826 814 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 827 815 break; 828 816 ··· 831 819 rvoff = rv_offset(i, off, ctx); 832 820 s = ctx->ninsns; 833 821 if (is_12b_int(imm)) { 834 - emit(rv_andi(RV_REG_T1, rd, imm), ctx); 822 + emit_andi(RV_REG_T1, rd, imm, ctx); 835 823 } else { 836 824 emit_imm(RV_REG_T1, imm, ctx); 837 - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); 825 + emit_and(RV_REG_T1, rd, RV_REG_T1, ctx); 838 826 } 839 827 /* For jset32, we should clear the upper 32 bits of t1, but 840 828 * sign-extension is sufficient here and saves one instruction, 841 829 * as t1 is used only in comparison against zero. 842 830 */ 843 831 if (!is64 && imm < 0) 844 - emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx); 832 + emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx); 845 833 e = ctx->ninsns; 846 - rvoff -= (e - s) << 2; 834 + rvoff -= ninsns_rvoff(e - s); 847 835 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 848 836 break; 849 837 ··· 899 887 } 900 888 901 889 emit_imm(RV_REG_T1, off, ctx); 902 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 890 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 903 891 emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 904 892 if (insn_is_zext(&insn[1])) 905 893 return 1; ··· 911 899 } 912 900 913 901 emit_imm(RV_REG_T1, off, ctx); 914 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 902 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 915 903 emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 916 904 if (insn_is_zext(&insn[1])) 917 905 return 1; ··· 923 911 } 924 912 925 913 emit_imm(RV_REG_T1, off, ctx); 926 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 914 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 927 915 emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 928 916 if (insn_is_zext(&insn[1])) 929 917 return 1; 930 918 break; 931 919 case BPF_LDX | BPF_MEM | BPF_DW: 932 920 if (is_12b_int(off)) { 933 - emit(rv_ld(rd, off, rs), ctx); 921 + emit_ld(rd, off, rs, ctx); 934 922 break; 935 923 } 936 924 937 925 emit_imm(RV_REG_T1, off, ctx); 938 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 939 - emit(rv_ld(rd, 0, RV_REG_T1), ctx); 926 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 927 + emit_ld(rd, 0, RV_REG_T1, ctx); 940 928 break; 941 929 942 930 /* ST: *(size *)(dst + off) = imm */ ··· 948 936 } 949 937 950 938 emit_imm(RV_REG_T2, off, ctx); 951 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 939 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 952 940 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); 953 941 break; 954 942 ··· 960 948 } 961 949 962 950 emit_imm(RV_REG_T2, off, ctx); 963 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 951 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 964 952 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); 965 953 break; 966 954 case BPF_ST | BPF_MEM | BPF_W: 967 955 emit_imm(RV_REG_T1, imm, ctx); 968 956 if (is_12b_int(off)) { 969 - emit(rv_sw(rd, off, RV_REG_T1), ctx); 957 + emit_sw(rd, off, RV_REG_T1, ctx); 970 958 break; 971 959 } 972 960 973 961 emit_imm(RV_REG_T2, off, ctx); 974 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 975 - emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx); 962 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 963 + emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); 976 964 break; 977 965 case BPF_ST | BPF_MEM | BPF_DW: 978 966 emit_imm(RV_REG_T1, imm, ctx); 979 967 if (is_12b_int(off)) { 980 - emit(rv_sd(rd, off, RV_REG_T1), ctx); 968 + emit_sd(rd, off, RV_REG_T1, ctx); 981 969 break; 982 970 } 983 971 984 972 emit_imm(RV_REG_T2, off, ctx); 985 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 986 - emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx); 973 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 974 + emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); 987 975 break; 988 976 989 977 /* STX: *(size *)(dst + off) = src */ ··· 994 982 } 995 983 996 984 emit_imm(RV_REG_T1, off, ctx); 997 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 985 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 998 986 emit(rv_sb(RV_REG_T1, 0, rs), ctx); 999 987 break; 1000 988 case BPF_STX | BPF_MEM | BPF_H: ··· 1004 992 } 1005 993 1006 994 emit_imm(RV_REG_T1, off, ctx); 1007 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 995 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1008 996 emit(rv_sh(RV_REG_T1, 0, rs), ctx); 1009 997 break; 1010 998 case BPF_STX | BPF_MEM | BPF_W: 1011 999 if (is_12b_int(off)) { 1012 - emit(rv_sw(rd, off, rs), ctx); 1000 + emit_sw(rd, off, rs, ctx); 1013 1001 break; 1014 1002 } 1015 1003 1016 1004 emit_imm(RV_REG_T1, off, ctx); 1017 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1018 - emit(rv_sw(RV_REG_T1, 0, rs), ctx); 1005 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1006 + emit_sw(RV_REG_T1, 0, rs, ctx); 1019 1007 break; 1020 1008 case BPF_STX | BPF_MEM | BPF_DW: 1021 1009 if (is_12b_int(off)) { 1022 - emit(rv_sd(rd, off, rs), ctx); 1010 + emit_sd(rd, off, rs, ctx); 1023 1011 break; 1024 1012 } 1025 1013 1026 1014 emit_imm(RV_REG_T1, off, ctx); 1027 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1028 - emit(rv_sd(RV_REG_T1, 0, rs), ctx); 1015 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1016 + emit_sd(RV_REG_T1, 0, rs, ctx); 1029 1017 break; 1030 1018 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1031 1019 case BPF_STX | BPF_XADD | BPF_W: ··· 1033 1021 case BPF_STX | BPF_XADD | BPF_DW: 1034 1022 if (off) { 1035 1023 if (is_12b_int(off)) { 1036 - emit(rv_addi(RV_REG_T1, rd, off), ctx); 1024 + emit_addi(RV_REG_T1, rd, off, ctx); 1037 1025 } else { 1038 1026 emit_imm(RV_REG_T1, off, ctx); 1039 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1027 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1040 1028 } 1041 1029 1042 1030 rd = RV_REG_T1; ··· 1085 1073 1086 1074 /* First instruction is always setting the tail-call-counter 1087 1075 * (TCC) register. This instruction is skipped for tail calls. 1076 + * Force using a 4-byte (non-compressed) instruction. 1088 1077 */ 1089 1078 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 1090 1079 1091 - emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); 1080 + emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx); 1092 1081 1093 1082 if (seen_reg(RV_REG_RA, ctx)) { 1094 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx); 1083 + emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx); 1095 1084 store_offset -= 8; 1096 1085 } 1097 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx); 1086 + emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx); 1098 1087 store_offset -= 8; 1099 1088 if (seen_reg(RV_REG_S1, ctx)) { 1100 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx); 1089 + emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx); 1101 1090 store_offset -= 8; 1102 1091 } 1103 1092 if (seen_reg(RV_REG_S2, ctx)) { 1104 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx); 1093 + emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx); 1105 1094 store_offset -= 8; 1106 1095 } 1107 1096 if (seen_reg(RV_REG_S3, ctx)) { 1108 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx); 1097 + emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx); 1109 1098 store_offset -= 8; 1110 1099 } 1111 1100 if (seen_reg(RV_REG_S4, ctx)) { 1112 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx); 1101 + emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx); 1113 1102 store_offset -= 8; 1114 1103 } 1115 1104 if (seen_reg(RV_REG_S5, ctx)) { 1116 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx); 1105 + emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx); 1117 1106 store_offset -= 8; 1118 1107 } 1119 1108 if (seen_reg(RV_REG_S6, ctx)) { 1120 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx); 1109 + emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); 1121 1110 store_offset -= 8; 1122 1111 } 1123 1112 1124 - emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); 1113 + emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); 1125 1114 1126 1115 if (bpf_stack_adjust) 1127 - emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx); 1116 + emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx); 1128 1117 1129 1118 /* Program contains calls and tail calls, so RV_REG_TCC need 1130 1119 * to be saved across calls. 1131 1120 */ 1132 1121 if (seen_tail_call(ctx) && seen_call(ctx)) 1133 - emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); 1122 + emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); 1134 1123 1135 1124 ctx->stack_size = stack_adjust; 1136 1125 }
+3 -3
arch/riscv/net/bpf_jit_core.c
··· 73 73 74 74 if (ctx->offset) { 75 75 extra_pass = true; 76 - image_size = sizeof(u32) * ctx->ninsns; 76 + image_size = sizeof(*ctx->insns) * ctx->ninsns; 77 77 goto skip_init_ctx; 78 78 } 79 79 ··· 103 103 if (jit_data->header) 104 104 break; 105 105 106 - image_size = sizeof(u32) * ctx->ninsns; 106 + image_size = sizeof(*ctx->insns) * ctx->ninsns; 107 107 jit_data->header = 108 108 bpf_jit_binary_alloc(image_size, 109 109 &jit_data->image, ··· 114 114 goto out_offset; 115 115 } 116 116 117 - ctx->insns = (u32 *)jit_data->image; 117 + ctx->insns = (u16 *)jit_data->image; 118 118 /* 119 119 * Now, when the image is allocated, the image can 120 120 * potentially shrink more (auipc/jalr -> jal).
+41 -22
arch/s390/net/bpf_jit_comp.c
··· 489 489 } while (re <= last); 490 490 } 491 491 492 + static void bpf_skip(struct bpf_jit *jit, int size) 493 + { 494 + if (size >= 6 && !is_valid_rel(size)) { 495 + /* brcl 0xf,size */ 496 + EMIT6_PCREL_RIL(0xc0f4000000, size); 497 + size -= 6; 498 + } else if (size >= 4 && is_valid_rel(size)) { 499 + /* brc 0xf,size */ 500 + EMIT4_PCREL(0xa7f40000, size); 501 + size -= 4; 502 + } 503 + while (size >= 2) { 504 + /* bcr 0,%0 */ 505 + _EMIT2(0x0700); 506 + size -= 2; 507 + } 508 + } 509 + 492 510 /* 493 511 * Emit function prologue 494 512 * ··· 519 501 /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ 520 502 _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); 521 503 } else { 522 - /* j tail_call_start: NOP if no tail calls are used */ 523 - EMIT4_PCREL(0xa7f40000, 6); 524 - /* bcr 0,%0 */ 525 - EMIT2(0x0700, 0, REG_0); 504 + /* 505 + * There are no tail calls. Insert nops in order to have 506 + * tail_call_start at a predictable offset. 507 + */ 508 + bpf_skip(jit, 6); 526 509 } 527 510 /* Tail calls have to skip above initialization */ 528 511 jit->tail_call_start = jit->prg; ··· 1287 1268 last = (i == fp->len - 1) ? 1 : 0; 1288 1269 if (last) 1289 1270 break; 1290 - /* j <exit> */ 1291 - EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); 1271 + if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip)) 1272 + /* brc 0xf, <exit> */ 1273 + EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip); 1274 + else 1275 + /* brcl 0xf, <exit> */ 1276 + EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip); 1292 1277 break; 1293 1278 /* 1294 1279 * Branch relative (number of skipped instructions) to offset on ··· 1440 1417 } 1441 1418 break; 1442 1419 branch_ku: 1443 - is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1444 - /* clfi or clgfi %dst,imm */ 1445 - EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000, 1446 - dst_reg, imm); 1447 - if (!is_first_pass(jit) && 1448 - can_use_rel(jit, addrs[i + off + 1])) { 1449 - /* brc mask,off */ 1450 - EMIT4_PCREL_RIC(0xa7040000, 1451 - mask >> 12, addrs[i + off + 1]); 1452 - } else { 1453 - /* brcl mask,off */ 1454 - EMIT6_PCREL_RILC(0xc0040000, 1455 - mask >> 12, addrs[i + off + 1]); 1456 - } 1457 - break; 1420 + /* lgfi %w1,imm (load sign extend imm) */ 1421 + src_reg = REG_1; 1422 + EMIT6_IMM(0xc0010000, src_reg, imm); 1423 + goto branch_xu; 1458 1424 branch_xs: 1459 1425 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1460 1426 if (!is_first_pass(jit) && ··· 1522 1510 */ 1523 1511 static int bpf_set_addr(struct bpf_jit *jit, int i) 1524 1512 { 1525 - if (!bpf_is_new_addr_sane(jit, i)) 1513 + int delta; 1514 + 1515 + if (is_codegen_pass(jit)) { 1516 + delta = jit->prg - jit->addrs[i]; 1517 + if (delta < 0) 1518 + bpf_skip(jit, -delta); 1519 + } 1520 + if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i))) 1526 1521 return -1; 1527 1522 jit->addrs[i] = jit->prg; 1528 1523 return 0;
+3
include/linux/bpf-netns.h
··· 8 8 enum netns_bpf_attach_type { 9 9 NETNS_BPF_INVALID = -1, 10 10 NETNS_BPF_FLOW_DISSECTOR = 0, 11 + NETNS_BPF_SK_LOOKUP, 11 12 MAX_NETNS_BPF_ATTACH_TYPE 12 13 }; 13 14 ··· 18 17 switch (attach_type) { 19 18 case BPF_FLOW_DISSECTOR: 20 19 return NETNS_BPF_FLOW_DISSECTOR; 20 + case BPF_SK_LOOKUP: 21 + return NETNS_BPF_SK_LOOKUP; 21 22 default: 22 23 return NETNS_BPF_INVALID; 23 24 }
+11 -4
include/linux/bpf.h
··· 249 249 ARG_PTR_TO_INT, /* pointer to int */ 250 250 ARG_PTR_TO_LONG, /* pointer to long */ 251 251 ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ 252 + ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ 252 253 ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ 253 254 ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ 254 255 ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ··· 668 667 struct bpf_ctx_arg_aux { 669 668 u32 offset; 670 669 enum bpf_reg_type reg_type; 670 + u32 btf_id; 671 671 }; 672 672 673 673 struct bpf_prog_aux { ··· 930 928 931 929 void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, 932 930 struct bpf_prog *old_prog); 931 + int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index); 932 + int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, 933 + struct bpf_prog *prog); 933 934 int bpf_prog_array_copy_info(struct bpf_prog_array *array, 934 935 u32 *prog_ids, u32 request_cnt, 935 936 u32 *prog_cnt); ··· 1277 1272 void __cpu_map_flush(void); 1278 1273 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, 1279 1274 struct net_device *dev_rx); 1275 + bool cpu_map_prog_allowed(struct bpf_map *map); 1280 1276 1281 1277 /* Return map's numa specified by userspace */ 1282 1278 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) ··· 1438 1432 return 0; 1439 1433 } 1440 1434 1435 + static inline bool cpu_map_prog_allowed(struct bpf_map *map) 1436 + { 1437 + return false; 1438 + } 1439 + 1441 1440 static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, 1442 1441 enum bpf_prog_type type) 1443 1442 { ··· 1542 1531 1543 1532 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); 1544 1533 void bpf_map_offload_map_free(struct bpf_map *map); 1545 - void init_btf_sock_ids(struct btf *btf); 1546 1534 #else 1547 1535 static inline int bpf_prog_offload_init(struct bpf_prog *prog, 1548 1536 union bpf_attr *attr) ··· 1565 1555 } 1566 1556 1567 1557 static inline void bpf_map_offload_map_free(struct bpf_map *map) 1568 - { 1569 - } 1570 - static inline void init_btf_sock_ids(struct btf *btf) 1571 1558 { 1572 1559 } 1573 1560 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+2
include/linux/bpf_types.h
··· 64 64 #ifdef CONFIG_INET 65 65 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, 66 66 struct sk_reuseport_md, struct sk_reuseport_kern) 67 + BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup, 68 + struct bpf_sk_lookup, struct bpf_sk_lookup_kern) 67 69 #endif 68 70 #if defined(CONFIG_BPF_JIT) 69 71 BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,
+37 -3
include/linux/btf_ids.h
··· 57 57 * .zero 4 58 58 * 59 59 */ 60 - #define __BTF_ID_LIST(name) \ 60 + #define __BTF_ID_LIST(name, scope) \ 61 61 asm( \ 62 62 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 63 - ".local " #name "; \n" \ 63 + "." #scope " " #name "; \n" \ 64 64 #name ":; \n" \ 65 65 ".popsection; \n"); \ 66 66 67 67 #define BTF_ID_LIST(name) \ 68 - __BTF_ID_LIST(name) \ 68 + __BTF_ID_LIST(name, local) \ 69 69 extern u32 name[]; 70 + 71 + #define BTF_ID_LIST_GLOBAL(name) \ 72 + __BTF_ID_LIST(name, globl) 70 73 71 74 /* 72 75 * The BTF_ID_UNUSED macro defines 4 zero bytes. ··· 93 90 #define BTF_ID_LIST(name) static u32 name[5]; 94 91 #define BTF_ID(prefix, name) 95 92 #define BTF_ID_UNUSED 93 + #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; 96 94 97 95 #endif /* CONFIG_DEBUG_INFO_BTF */ 96 + 97 + #ifdef CONFIG_NET 98 + /* Define a list of socket types which can be the argument for 99 + * skc_to_*_sock() helpers. All these sockets should have 100 + * sock_common as the first argument in its memory layout. 101 + */ 102 + #define BTF_SOCK_TYPE_xxx \ 103 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ 104 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ 105 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ 106 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ 107 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ 108 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ 109 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ 110 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ 111 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ 112 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ 113 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ 114 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ 115 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) 116 + 117 + enum { 118 + #define BTF_SOCK_TYPE(name, str) name, 119 + BTF_SOCK_TYPE_xxx 120 + #undef BTF_SOCK_TYPE 121 + MAX_BTF_SOCK_TYPE, 122 + }; 123 + 124 + extern u32 btf_sock_ids[]; 125 + #endif 98 126 99 127 #endif
+147
include/linux/filter.h
··· 1278 1278 1279 1279 int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len); 1280 1280 1281 + struct bpf_sk_lookup_kern { 1282 + u16 family; 1283 + u16 protocol; 1284 + struct { 1285 + __be32 saddr; 1286 + __be32 daddr; 1287 + } v4; 1288 + struct { 1289 + const struct in6_addr *saddr; 1290 + const struct in6_addr *daddr; 1291 + } v6; 1292 + __be16 sport; 1293 + u16 dport; 1294 + struct sock *selected_sk; 1295 + bool no_reuseport; 1296 + }; 1297 + 1298 + extern struct static_key_false bpf_sk_lookup_enabled; 1299 + 1300 + /* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup. 1301 + * 1302 + * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and 1303 + * SK_DROP. Their meaning is as follows: 1304 + * 1305 + * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result 1306 + * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup 1307 + * SK_DROP : terminate lookup with -ECONNREFUSED 1308 + * 1309 + * This macro aggregates return values and selected sockets from 1310 + * multiple BPF programs according to following rules in order: 1311 + * 1312 + * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk, 1313 + * macro result is SK_PASS and last ctx.selected_sk is used. 1314 + * 2. If any program returned SK_DROP return value, 1315 + * macro result is SK_DROP. 1316 + * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL. 1317 + * 1318 + * Caller must ensure that the prog array is non-NULL, and that the 1319 + * array as well as the programs it contains remain valid. 1320 + */ 1321 + #define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \ 1322 + ({ \ 1323 + struct bpf_sk_lookup_kern *_ctx = &(ctx); \ 1324 + struct bpf_prog_array_item *_item; \ 1325 + struct sock *_selected_sk = NULL; \ 1326 + bool _no_reuseport = false; \ 1327 + struct bpf_prog *_prog; \ 1328 + bool _all_pass = true; \ 1329 + u32 _ret; \ 1330 + \ 1331 + migrate_disable(); \ 1332 + _item = &(array)->items[0]; \ 1333 + while ((_prog = READ_ONCE(_item->prog))) { \ 1334 + /* restore most recent selection */ \ 1335 + _ctx->selected_sk = _selected_sk; \ 1336 + _ctx->no_reuseport = _no_reuseport; \ 1337 + \ 1338 + _ret = func(_prog, _ctx); \ 1339 + if (_ret == SK_PASS && _ctx->selected_sk) { \ 1340 + /* remember last non-NULL socket */ \ 1341 + _selected_sk = _ctx->selected_sk; \ 1342 + _no_reuseport = _ctx->no_reuseport; \ 1343 + } else if (_ret == SK_DROP && _all_pass) { \ 1344 + _all_pass = false; \ 1345 + } \ 1346 + _item++; \ 1347 + } \ 1348 + _ctx->selected_sk = _selected_sk; \ 1349 + _ctx->no_reuseport = _no_reuseport; \ 1350 + migrate_enable(); \ 1351 + _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ 1352 + }) 1353 + 1354 + static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, 1355 + const __be32 saddr, const __be16 sport, 1356 + const __be32 daddr, const u16 dport, 1357 + struct sock **psk) 1358 + { 1359 + struct bpf_prog_array *run_array; 1360 + struct sock *selected_sk = NULL; 1361 + bool no_reuseport = false; 1362 + 1363 + rcu_read_lock(); 1364 + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); 1365 + if (run_array) { 1366 + struct bpf_sk_lookup_kern ctx = { 1367 + .family = AF_INET, 1368 + .protocol = protocol, 1369 + .v4.saddr = saddr, 1370 + .v4.daddr = daddr, 1371 + .sport = sport, 1372 + .dport = dport, 1373 + }; 1374 + u32 act; 1375 + 1376 + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); 1377 + if (act == SK_PASS) { 1378 + selected_sk = ctx.selected_sk; 1379 + no_reuseport = ctx.no_reuseport; 1380 + } else { 1381 + selected_sk = ERR_PTR(-ECONNREFUSED); 1382 + } 1383 + } 1384 + rcu_read_unlock(); 1385 + *psk = selected_sk; 1386 + return no_reuseport; 1387 + } 1388 + 1389 + #if IS_ENABLED(CONFIG_IPV6) 1390 + static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, 1391 + const struct in6_addr *saddr, 1392 + const __be16 sport, 1393 + const struct in6_addr *daddr, 1394 + const u16 dport, 1395 + struct sock **psk) 1396 + { 1397 + struct bpf_prog_array *run_array; 1398 + struct sock *selected_sk = NULL; 1399 + bool no_reuseport = false; 1400 + 1401 + rcu_read_lock(); 1402 + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); 1403 + if (run_array) { 1404 + struct bpf_sk_lookup_kern ctx = { 1405 + .family = AF_INET6, 1406 + .protocol = protocol, 1407 + .v6.saddr = saddr, 1408 + .v6.daddr = daddr, 1409 + .sport = sport, 1410 + .dport = dport, 1411 + }; 1412 + u32 act; 1413 + 1414 + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); 1415 + if (act == SK_PASS) { 1416 + selected_sk = ctx.selected_sk; 1417 + no_reuseport = ctx.no_reuseport; 1418 + } else { 1419 + selected_sk = ERR_PTR(-ECONNREFUSED); 1420 + } 1421 + } 1422 + rcu_read_unlock(); 1423 + *psk = selected_sk; 1424 + return no_reuseport; 1425 + } 1426 + #endif /* IS_ENABLED(CONFIG_IPV6) */ 1427 + 1281 1428 #endif /* __LINUX_FILTER_H__ */
+29 -13
include/net/xdp.h
··· 104 104 struct net_device *dev_rx; /* used by cpumap */ 105 105 }; 106 106 107 + 107 108 static inline struct skb_shared_info * 108 109 xdp_get_shared_info_from_frame(struct xdp_frame *frame) 109 110 { ··· 113 112 return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - 114 113 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 115 114 } 115 + 116 + struct xdp_cpumap_stats { 117 + unsigned int redirect; 118 + unsigned int pass; 119 + unsigned int drop; 120 + }; 116 121 117 122 /* Clear kernel pointers in xdp_frame */ 118 123 static inline void xdp_scrub_frame(struct xdp_frame *frame) ··· 143 136 xdp->frame_sz = frame->frame_sz; 144 137 } 145 138 146 - /* Convert xdp_buff to xdp_frame */ 147 139 static inline 148 - struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) 140 + int xdp_update_frame_from_buff(struct xdp_buff *xdp, 141 + struct xdp_frame *xdp_frame) 149 142 { 150 - struct xdp_frame *xdp_frame; 151 - int metasize; 152 - int headroom; 153 - 154 - if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) 155 - return xdp_convert_zc_to_xdp_frame(xdp); 143 + int metasize, headroom; 156 144 157 145 /* Assure headroom is available for storing info */ 158 146 headroom = xdp->data - xdp->data_hard_start; 159 147 metasize = xdp->data - xdp->data_meta; 160 148 metasize = metasize > 0 ? metasize : 0; 161 149 if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) 162 - return NULL; 150 + return -ENOSPC; 163 151 164 152 /* Catch if driver didn't reserve tailroom for skb_shared_info */ 165 153 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 166 154 XDP_WARN("Driver BUG: missing reserved tailroom"); 167 - return NULL; 155 + return -ENOSPC; 168 156 } 169 - 170 - /* Store info in top of packet */ 171 - xdp_frame = xdp->data_hard_start; 172 157 173 158 xdp_frame->data = xdp->data; 174 159 xdp_frame->len = xdp->data_end - xdp->data; 175 160 xdp_frame->headroom = headroom - sizeof(*xdp_frame); 176 161 xdp_frame->metasize = metasize; 177 162 xdp_frame->frame_sz = xdp->frame_sz; 163 + 164 + return 0; 165 + } 166 + 167 + /* Convert xdp_buff to xdp_frame */ 168 + static inline 169 + struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) 170 + { 171 + struct xdp_frame *xdp_frame; 172 + 173 + if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) 174 + return xdp_convert_zc_to_xdp_frame(xdp); 175 + 176 + /* Store info in top of packet */ 177 + xdp_frame = xdp->data_hard_start; 178 + if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) 179 + return NULL; 178 180 179 181 /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ 180 182 xdp_frame->mem = xdp->rxq->mem;
+12 -4
include/trace/events/xdp.h
··· 177 177 TRACE_EVENT(xdp_cpumap_kthread, 178 178 179 179 TP_PROTO(int map_id, unsigned int processed, unsigned int drops, 180 - int sched), 180 + int sched, struct xdp_cpumap_stats *xdp_stats), 181 181 182 - TP_ARGS(map_id, processed, drops, sched), 182 + TP_ARGS(map_id, processed, drops, sched, xdp_stats), 183 183 184 184 TP_STRUCT__entry( 185 185 __field(int, map_id) ··· 188 188 __field(unsigned int, drops) 189 189 __field(unsigned int, processed) 190 190 __field(int, sched) 191 + __field(unsigned int, xdp_pass) 192 + __field(unsigned int, xdp_drop) 193 + __field(unsigned int, xdp_redirect) 191 194 ), 192 195 193 196 TP_fast_assign( ··· 200 197 __entry->drops = drops; 201 198 __entry->processed = processed; 202 199 __entry->sched = sched; 200 + __entry->xdp_pass = xdp_stats->pass; 201 + __entry->xdp_drop = xdp_stats->drop; 202 + __entry->xdp_redirect = xdp_stats->redirect; 203 203 ), 204 204 205 205 TP_printk("kthread" 206 206 " cpu=%d map_id=%d action=%s" 207 207 " processed=%u drops=%u" 208 - " sched=%d", 208 + " sched=%d" 209 + " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", 209 210 __entry->cpu, __entry->map_id, 210 211 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 211 212 __entry->processed, __entry->drops, 212 - __entry->sched) 213 + __entry->sched, 214 + __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) 213 215 ); 214 216 215 217 TRACE_EVENT(xdp_cpumap_enqueue,
+94 -3
include/uapi/linux/bpf.h
··· 189 189 BPF_PROG_TYPE_STRUCT_OPS, 190 190 BPF_PROG_TYPE_EXT, 191 191 BPF_PROG_TYPE_LSM, 192 + BPF_PROG_TYPE_SK_LOOKUP, 192 193 }; 193 194 194 195 enum bpf_attach_type { ··· 228 227 BPF_CGROUP_INET6_GETSOCKNAME, 229 228 BPF_XDP_DEVMAP, 230 229 BPF_CGROUP_INET_SOCK_RELEASE, 230 + BPF_XDP_CPUMAP, 231 + BPF_SK_LOOKUP, 231 232 __MAX_BPF_ATTACH_TYPE 232 233 }; 233 234 ··· 2422 2419 * Look for an IPv6 socket. 2423 2420 * 2424 2421 * If the *netns* is a negative signed 32-bit integer, then the 2425 - * socket lookup table in the netns associated with the *ctx* will 2422 + * socket lookup table in the netns associated with the *ctx* 2426 2423 * will be used. For the TC hooks, this is the netns of the device 2427 2424 * in the skb. For socket hooks, this is the netns of the socket. 2428 2425 * If *netns* is any other signed 32-bit value greater than or ··· 2459 2456 * Look for an IPv6 socket. 2460 2457 * 2461 2458 * If the *netns* is a negative signed 32-bit integer, then the 2462 - * socket lookup table in the netns associated with the *ctx* will 2459 + * socket lookup table in the netns associated with the *ctx* 2463 2460 * will be used. For the TC hooks, this is the netns of the device 2464 2461 * in the skb. For socket hooks, this is the netns of the socket. 2465 2462 * If *netns* is any other signed 32-bit value greater than or ··· 3071 3068 * 3072 3069 * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) 3073 3070 * Description 3071 + * Helper is overloaded depending on BPF program type. This 3072 + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and 3073 + * **BPF_PROG_TYPE_SCHED_ACT** programs. 3074 + * 3074 3075 * Assign the *sk* to the *skb*. When combined with appropriate 3075 3076 * routing configuration to receive the packet towards the socket, 3076 3077 * will cause *skb* to be delivered to the specified socket. ··· 3099 3092 * 3100 3093 * **-ESOCKTNOSUPPORT** if the socket type is not supported 3101 3094 * (reuseport). 3095 + * 3096 + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) 3097 + * Description 3098 + * Helper is overloaded depending on BPF program type. This 3099 + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. 3100 + * 3101 + * Select the *sk* as a result of a socket lookup. 3102 + * 3103 + * For the operation to succeed passed socket must be compatible 3104 + * with the packet description provided by the *ctx* object. 3105 + * 3106 + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must 3107 + * be an exact match. While IP family (**AF_INET** or 3108 + * **AF_INET6**) must be compatible, that is IPv6 sockets 3109 + * that are not v6-only can be selected for IPv4 packets. 3110 + * 3111 + * Only TCP listeners and UDP unconnected sockets can be 3112 + * selected. *sk* can also be NULL to reset any previous 3113 + * selection. 3114 + * 3115 + * *flags* argument can combination of following values: 3116 + * 3117 + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous 3118 + * socket selection, potentially done by a BPF program 3119 + * that ran before us. 3120 + * 3121 + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip 3122 + * load-balancing within reuseport group for the socket 3123 + * being selected. 3124 + * 3125 + * On success *ctx->sk* will point to the selected socket. 3126 + * 3127 + * Return 3128 + * 0 on success, or a negative errno in case of failure. 3129 + * 3130 + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is 3131 + * not compatible with packet family (*ctx->family*). 3132 + * 3133 + * * **-EEXIST** if socket has been already selected, 3134 + * potentially by another program, and 3135 + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. 3136 + * 3137 + * * **-EINVAL** if unsupported flags were specified. 3138 + * 3139 + * * **-EPROTOTYPE** if socket L4 protocol 3140 + * (*sk->protocol*) doesn't match packet protocol 3141 + * (*ctx->protocol*). 3142 + * 3143 + * * **-ESOCKTNOSUPPORT** if socket is not in allowed 3144 + * state (TCP listening or UDP unconnected). 3102 3145 * 3103 3146 * u64 bpf_ktime_get_boot_ns(void) 3104 3147 * Description ··· 3663 3606 BPF_RINGBUF_HDR_SZ = 8, 3664 3607 }; 3665 3608 3609 + /* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ 3610 + enum { 3611 + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), 3612 + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), 3613 + }; 3614 + 3666 3615 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3667 3616 enum bpf_adj_room_mode { 3668 3617 BPF_ADJ_ROOM_NET, ··· 3912 3849 } bpf_prog; 3913 3850 }; 3914 3851 3852 + /* CPUMAP map-value layout 3853 + * 3854 + * The struct data-layout of map-value is a configuration interface. 3855 + * New members can only be added to the end of this structure. 3856 + */ 3857 + struct bpf_cpumap_val { 3858 + __u32 qsize; /* queue size to remote target CPU */ 3859 + union { 3860 + int fd; /* prog fd on map write */ 3861 + __u32 id; /* prog id on map read */ 3862 + } bpf_prog; 3863 + }; 3864 + 3915 3865 enum sk_action { 3916 3866 SK_DROP = 0, 3917 3867 SK_PASS, ··· 4062 3986 4063 3987 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 4064 3988 * by user and intended to be used by socket (e.g. to bind to, depends on 4065 - * attach attach type). 3989 + * attach type). 4066 3990 */ 4067 3991 struct bpf_sock_addr { 4068 3992 __u32 user_family; /* Allows 4-byte read, but no write. */ ··· 4411 4335 __u32 pid; 4412 4336 __u32 tgid; 4413 4337 }; 4338 + 4339 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ 4340 + struct bpf_sk_lookup { 4341 + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ 4342 + 4343 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ 4344 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ 4345 + __u32 remote_ip4; /* Network byte order */ 4346 + __u32 remote_ip6[4]; /* Network byte order */ 4347 + __u32 remote_port; /* Network byte order */ 4348 + __u32 local_ip4; /* Network byte order */ 4349 + __u32 local_ip6[4]; /* Network byte order */ 4350 + __u32 local_port; /* Host byte order */ 4351 + }; 4352 + 4414 4353 #endif /* _UAPI__LINUX_BPF_H__ */
+3 -3
kernel/bpf/btf.c
··· 3672 3672 goto errout; 3673 3673 3674 3674 bpf_struct_ops_init(btf, log); 3675 - init_btf_sock_ids(btf); 3676 3675 3677 3676 btf_verifier_env_free(env); 3678 3677 refcount_set(&btf->refcnt, 1); ··· 3817 3818 return true; 3818 3819 3819 3820 /* this is a pointer to another type */ 3820 - info->reg_type = PTR_TO_BTF_ID; 3821 3821 for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { 3822 3822 const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; 3823 3823 3824 3824 if (ctx_arg_info->offset == off) { 3825 3825 info->reg_type = ctx_arg_info->reg_type; 3826 - break; 3826 + info->btf_id = ctx_arg_info->btf_id; 3827 + return true; 3827 3828 } 3828 3829 } 3829 3830 3831 + info->reg_type = PTR_TO_BTF_ID; 3830 3832 if (tgt_prog) { 3831 3833 ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg); 3832 3834 if (ret > 0) {
+55
kernel/bpf/core.c
··· 1958 1958 } 1959 1959 } 1960 1960 1961 + /** 1962 + * bpf_prog_array_delete_safe_at() - Replaces the program at the given 1963 + * index into the program array with 1964 + * a dummy no-op program. 1965 + * @array: a bpf_prog_array 1966 + * @index: the index of the program to replace 1967 + * 1968 + * Skips over dummy programs, by not counting them, when calculating 1969 + * the the position of the program to replace. 1970 + * 1971 + * Return: 1972 + * * 0 - Success 1973 + * * -EINVAL - Invalid index value. Must be a non-negative integer. 1974 + * * -ENOENT - Index out of range 1975 + */ 1976 + int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index) 1977 + { 1978 + return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog); 1979 + } 1980 + 1981 + /** 1982 + * bpf_prog_array_update_at() - Updates the program at the given index 1983 + * into the program array. 1984 + * @array: a bpf_prog_array 1985 + * @index: the index of the program to update 1986 + * @prog: the program to insert into the array 1987 + * 1988 + * Skips over dummy programs, by not counting them, when calculating 1989 + * the position of the program to update. 1990 + * 1991 + * Return: 1992 + * * 0 - Success 1993 + * * -EINVAL - Invalid index value. Must be a non-negative integer. 1994 + * * -ENOENT - Index out of range 1995 + */ 1996 + int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, 1997 + struct bpf_prog *prog) 1998 + { 1999 + struct bpf_prog_array_item *item; 2000 + 2001 + if (unlikely(index < 0)) 2002 + return -EINVAL; 2003 + 2004 + for (item = array->items; item->prog; item++) { 2005 + if (item->prog == &dummy_bpf_prog.prog) 2006 + continue; 2007 + if (!index) { 2008 + WRITE_ONCE(item->prog, prog); 2009 + return 0; 2010 + } 2011 + index--; 2012 + } 2013 + return -ENOENT; 2014 + } 2015 + 1961 2016 int bpf_prog_array_copy(struct bpf_prog_array *old_array, 1962 2017 struct bpf_prog *exclude_prog, 1963 2018 struct bpf_prog *include_prog,
+140 -27
kernel/bpf/cpumap.c
··· 52 52 struct bpf_cpu_map_entry { 53 53 u32 cpu; /* kthread CPU and map index */ 54 54 int map_id; /* Back reference to map */ 55 - u32 qsize; /* Queue size placeholder for map lookup */ 56 55 57 56 /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ 58 57 struct xdp_bulk_queue __percpu *bulkq; ··· 61 62 /* Queue with potential multi-producers, and single-consumer kthread */ 62 63 struct ptr_ring *queue; 63 64 struct task_struct *kthread; 64 - struct work_struct kthread_stop_wq; 65 + 66 + struct bpf_cpumap_val value; 67 + struct bpf_prog *prog; 65 68 66 69 atomic_t refcnt; /* Control when this struct can be free'ed */ 67 70 struct rcu_head rcu; 71 + 72 + struct work_struct kthread_stop_wq; 68 73 }; 69 74 70 75 struct bpf_cpu_map { ··· 83 80 84 81 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) 85 82 { 83 + u32 value_size = attr->value_size; 86 84 struct bpf_cpu_map *cmap; 87 85 int err = -ENOMEM; 88 86 u64 cost; ··· 94 90 95 91 /* check sanity of attributes */ 96 92 if (attr->max_entries == 0 || attr->key_size != 4 || 97 - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 93 + (value_size != offsetofend(struct bpf_cpumap_val, qsize) && 94 + value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) || 95 + attr->map_flags & ~BPF_F_NUMA_NODE) 98 96 return ERR_PTR(-EINVAL); 99 97 100 98 cmap = kzalloc(sizeof(*cmap), GFP_USER); ··· 218 212 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 219 213 { 220 214 if (atomic_dec_and_test(&rcpu->refcnt)) { 215 + if (rcpu->prog) 216 + bpf_prog_put(rcpu->prog); 221 217 /* The queue should be empty at this point */ 222 218 __cpu_map_ring_cleanup(rcpu->queue); 223 219 ptr_ring_cleanup(rcpu->queue, NULL); 224 220 kfree(rcpu->queue); 225 221 kfree(rcpu); 226 222 } 223 + } 224 + 225 + static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, 226 + void **frames, int n, 227 + struct xdp_cpumap_stats *stats) 228 + { 229 + struct xdp_rxq_info rxq; 230 + struct xdp_buff xdp; 231 + int i, nframes = 0; 232 + 233 + if (!rcpu->prog) 234 + return n; 235 + 236 + rcu_read_lock_bh(); 237 + 238 + xdp_set_return_frame_no_direct(); 239 + xdp.rxq = &rxq; 240 + 241 + for (i = 0; i < n; i++) { 242 + struct xdp_frame *xdpf = frames[i]; 243 + u32 act; 244 + int err; 245 + 246 + rxq.dev = xdpf->dev_rx; 247 + rxq.mem = xdpf->mem; 248 + /* TODO: report queue_index to xdp_rxq_info */ 249 + 250 + xdp_convert_frame_to_buff(xdpf, &xdp); 251 + 252 + act = bpf_prog_run_xdp(rcpu->prog, &xdp); 253 + switch (act) { 254 + case XDP_PASS: 255 + err = xdp_update_frame_from_buff(&xdp, xdpf); 256 + if (err < 0) { 257 + xdp_return_frame(xdpf); 258 + stats->drop++; 259 + } else { 260 + frames[nframes++] = xdpf; 261 + stats->pass++; 262 + } 263 + break; 264 + case XDP_REDIRECT: 265 + err = xdp_do_redirect(xdpf->dev_rx, &xdp, 266 + rcpu->prog); 267 + if (unlikely(err)) { 268 + xdp_return_frame(xdpf); 269 + stats->drop++; 270 + } else { 271 + stats->redirect++; 272 + } 273 + break; 274 + default: 275 + bpf_warn_invalid_xdp_action(act); 276 + /* fallthrough */ 277 + case XDP_DROP: 278 + xdp_return_frame(xdpf); 279 + stats->drop++; 280 + break; 281 + } 282 + } 283 + 284 + if (stats->redirect) 285 + xdp_do_flush_map(); 286 + 287 + xdp_clear_return_frame_no_direct(); 288 + 289 + rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ 290 + 291 + return nframes; 227 292 } 228 293 229 294 #define CPUMAP_BATCH 8 ··· 311 234 * kthread_stop signal until queue is empty. 312 235 */ 313 236 while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { 237 + struct xdp_cpumap_stats stats = {}; /* zero stats */ 238 + gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; 314 239 unsigned int drops = 0, sched = 0; 315 240 void *frames[CPUMAP_BATCH]; 316 241 void *skbs[CPUMAP_BATCH]; 317 - gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; 318 - int i, n, m; 242 + int i, n, m, nframes; 319 243 320 244 /* Release CPU reschedule checks */ 321 245 if (__ptr_ring_empty(rcpu->queue)) { ··· 337 259 * kthread CPU pinned. Lockless access to ptr_ring 338 260 * consume side valid as no-resize allowed of queue. 339 261 */ 340 - n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); 341 - 262 + n = __ptr_ring_consume_batched(rcpu->queue, frames, 263 + CPUMAP_BATCH); 342 264 for (i = 0; i < n; i++) { 343 265 void *f = frames[i]; 344 266 struct page *page = virt_to_page(f); ··· 350 272 prefetchw(page); 351 273 } 352 274 353 - m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs); 354 - if (unlikely(m == 0)) { 355 - for (i = 0; i < n; i++) 356 - skbs[i] = NULL; /* effect: xdp_return_frame */ 357 - drops = n; 275 + /* Support running another XDP prog on this CPU */ 276 + nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats); 277 + if (nframes) { 278 + m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); 279 + if (unlikely(m == 0)) { 280 + for (i = 0; i < nframes; i++) 281 + skbs[i] = NULL; /* effect: xdp_return_frame */ 282 + drops += nframes; 283 + } 358 284 } 359 285 360 286 local_bh_disable(); 361 - for (i = 0; i < n; i++) { 287 + for (i = 0; i < nframes; i++) { 362 288 struct xdp_frame *xdpf = frames[i]; 363 289 struct sk_buff *skb = skbs[i]; 364 290 int ret; ··· 379 297 drops++; 380 298 } 381 299 /* Feedback loop via tracepoint */ 382 - trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched); 300 + trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats); 383 301 384 302 local_bh_enable(); /* resched point, may call do_softirq() */ 385 303 } ··· 389 307 return 0; 390 308 } 391 309 392 - static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, 393 - int map_id) 310 + bool cpu_map_prog_allowed(struct bpf_map *map) 394 311 { 312 + return map->map_type == BPF_MAP_TYPE_CPUMAP && 313 + map->value_size != offsetofend(struct bpf_cpumap_val, qsize); 314 + } 315 + 316 + static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) 317 + { 318 + struct bpf_prog *prog; 319 + 320 + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); 321 + if (IS_ERR(prog)) 322 + return PTR_ERR(prog); 323 + 324 + if (prog->expected_attach_type != BPF_XDP_CPUMAP) { 325 + bpf_prog_put(prog); 326 + return -EINVAL; 327 + } 328 + 329 + rcpu->value.bpf_prog.id = prog->aux->id; 330 + rcpu->prog = prog; 331 + 332 + return 0; 333 + } 334 + 335 + static struct bpf_cpu_map_entry * 336 + __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) 337 + { 338 + int numa, err, i, fd = value->bpf_prog.fd; 395 339 gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; 396 340 struct bpf_cpu_map_entry *rcpu; 397 341 struct xdp_bulk_queue *bq; 398 - int numa, err, i; 399 342 400 343 /* Have map->numa_node, but choose node of redirect target CPU */ 401 344 numa = cpu_to_node(cpu); ··· 445 338 if (!rcpu->queue) 446 339 goto free_bulkq; 447 340 448 - err = ptr_ring_init(rcpu->queue, qsize, gfp); 341 + err = ptr_ring_init(rcpu->queue, value->qsize, gfp); 449 342 if (err) 450 343 goto free_queue; 451 344 452 345 rcpu->cpu = cpu; 453 346 rcpu->map_id = map_id; 454 - rcpu->qsize = qsize; 347 + rcpu->value.qsize = value->qsize; 348 + 349 + if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) 350 + goto free_ptr_ring; 455 351 456 352 /* Setup kthread */ 457 353 rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, 458 354 "cpumap/%d/map:%d", cpu, map_id); 459 355 if (IS_ERR(rcpu->kthread)) 460 - goto free_ptr_ring; 356 + goto free_prog; 461 357 462 358 get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ 463 359 get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ ··· 471 361 472 362 return rcpu; 473 363 364 + free_prog: 365 + if (rcpu->prog) 366 + bpf_prog_put(rcpu->prog); 474 367 free_ptr_ring: 475 368 ptr_ring_cleanup(rcpu->queue, NULL); 476 369 free_queue: ··· 550 437 u64 map_flags) 551 438 { 552 439 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 440 + struct bpf_cpumap_val cpumap_value = {}; 553 441 struct bpf_cpu_map_entry *rcpu; 554 - 555 442 /* Array index key correspond to CPU number */ 556 443 u32 key_cpu = *(u32 *)key; 557 - /* Value is the queue size */ 558 - u32 qsize = *(u32 *)value; 444 + 445 + memcpy(&cpumap_value, value, map->value_size); 559 446 560 447 if (unlikely(map_flags > BPF_EXIST)) 561 448 return -EINVAL; ··· 563 450 return -E2BIG; 564 451 if (unlikely(map_flags == BPF_NOEXIST)) 565 452 return -EEXIST; 566 - if (unlikely(qsize > 16384)) /* sanity limit on qsize */ 453 + if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */ 567 454 return -EOVERFLOW; 568 455 569 456 /* Make sure CPU is a valid possible cpu */ 570 457 if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu)) 571 458 return -ENODEV; 572 459 573 - if (qsize == 0) { 460 + if (cpumap_value.qsize == 0) { 574 461 rcpu = NULL; /* Same as deleting */ 575 462 } else { 576 463 /* Updating qsize cause re-allocation of bpf_cpu_map_entry */ 577 - rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id); 464 + rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id); 578 465 if (!rcpu) 579 466 return -ENOMEM; 580 467 rcpu->cmap = cmap; ··· 636 523 struct bpf_cpu_map_entry *rcpu = 637 524 __cpu_map_lookup_elem(map, *(u32 *)key); 638 525 639 - return rcpu ? &rcpu->qsize : NULL; 526 + return rcpu ? &rcpu->value : NULL; 640 527 } 641 528 642 529 static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+6 -1
kernel/bpf/map_iter.c
··· 4 4 #include <linux/fs.h> 5 5 #include <linux/filter.h> 6 6 #include <linux/kernel.h> 7 + #include <linux/btf_ids.h> 7 8 8 9 struct bpf_iter_seq_map_info { 9 10 u32 mid; ··· 82 81 .show = bpf_map_seq_show, 83 82 }; 84 83 85 - static const struct bpf_iter_reg bpf_map_reg_info = { 84 + BTF_ID_LIST(btf_bpf_map_id) 85 + BTF_ID(struct, bpf_map) 86 + 87 + static struct bpf_iter_reg bpf_map_reg_info = { 86 88 .target = "bpf_map", 87 89 .seq_ops = &bpf_map_seq_ops, 88 90 .init_seq_private = NULL, ··· 100 96 101 97 static int __init bpf_map_iter_init(void) 102 98 { 99 + bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id; 103 100 return bpf_iter_reg_target(&bpf_map_reg_info); 104 101 } 105 102
+121 -10
kernel/bpf/net_namespace.c
··· 25 25 /* Protects updates to netns_bpf */ 26 26 DEFINE_MUTEX(netns_bpf_mutex); 27 27 28 + static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type) 29 + { 30 + switch (type) { 31 + #ifdef CONFIG_INET 32 + case NETNS_BPF_SK_LOOKUP: 33 + static_branch_dec(&bpf_sk_lookup_enabled); 34 + break; 35 + #endif 36 + default: 37 + break; 38 + } 39 + } 40 + 41 + static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type) 42 + { 43 + switch (type) { 44 + #ifdef CONFIG_INET 45 + case NETNS_BPF_SK_LOOKUP: 46 + static_branch_inc(&bpf_sk_lookup_enabled); 47 + break; 48 + #endif 49 + default: 50 + break; 51 + } 52 + } 53 + 28 54 /* Must be called with netns_bpf_mutex held. */ 29 55 static void netns_bpf_run_array_detach(struct net *net, 30 56 enum netns_bpf_attach_type type) ··· 62 36 bpf_prog_array_free(run_array); 63 37 } 64 38 39 + static int link_index(struct net *net, enum netns_bpf_attach_type type, 40 + struct bpf_netns_link *link) 41 + { 42 + struct bpf_netns_link *pos; 43 + int i = 0; 44 + 45 + list_for_each_entry(pos, &net->bpf.links[type], node) { 46 + if (pos == link) 47 + return i; 48 + i++; 49 + } 50 + return -ENOENT; 51 + } 52 + 53 + static int link_count(struct net *net, enum netns_bpf_attach_type type) 54 + { 55 + struct list_head *pos; 56 + int i = 0; 57 + 58 + list_for_each(pos, &net->bpf.links[type]) 59 + i++; 60 + return i; 61 + } 62 + 63 + static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type, 64 + struct bpf_prog_array *prog_array) 65 + { 66 + struct bpf_netns_link *pos; 67 + unsigned int i = 0; 68 + 69 + list_for_each_entry(pos, &net->bpf.links[type], node) { 70 + prog_array->items[i].prog = pos->link.prog; 71 + i++; 72 + } 73 + } 74 + 65 75 static void bpf_netns_link_release(struct bpf_link *link) 66 76 { 67 77 struct bpf_netns_link *net_link = 68 78 container_of(link, struct bpf_netns_link, link); 69 79 enum netns_bpf_attach_type type = net_link->netns_type; 80 + struct bpf_prog_array *old_array, *new_array; 70 81 struct net *net; 82 + int cnt, idx; 71 83 72 84 mutex_lock(&netns_bpf_mutex); 73 85 ··· 117 53 if (!net) 118 54 goto out_unlock; 119 55 120 - netns_bpf_run_array_detach(net, type); 56 + /* Mark attach point as unused */ 57 + netns_bpf_attach_type_unneed(type); 58 + 59 + /* Remember link position in case of safe delete */ 60 + idx = link_index(net, type, net_link); 121 61 list_del(&net_link->node); 62 + 63 + cnt = link_count(net, type); 64 + if (!cnt) { 65 + netns_bpf_run_array_detach(net, type); 66 + goto out_unlock; 67 + } 68 + 69 + old_array = rcu_dereference_protected(net->bpf.run_array[type], 70 + lockdep_is_held(&netns_bpf_mutex)); 71 + new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL); 72 + if (!new_array) { 73 + WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx)); 74 + goto out_unlock; 75 + } 76 + fill_prog_array(net, type, new_array); 77 + rcu_assign_pointer(net->bpf.run_array[type], new_array); 78 + bpf_prog_array_free(old_array); 122 79 123 80 out_unlock: 124 81 mutex_unlock(&netns_bpf_mutex); ··· 162 77 enum netns_bpf_attach_type type = net_link->netns_type; 163 78 struct bpf_prog_array *run_array; 164 79 struct net *net; 165 - int ret = 0; 80 + int idx, ret; 166 81 167 82 if (old_prog && old_prog != link->prog) 168 83 return -EPERM; ··· 180 95 181 96 run_array = rcu_dereference_protected(net->bpf.run_array[type], 182 97 lockdep_is_held(&netns_bpf_mutex)); 183 - WRITE_ONCE(run_array->items[0].prog, new_prog); 98 + idx = link_index(net, type, net_link); 99 + ret = bpf_prog_array_update_at(run_array, idx, new_prog); 100 + if (ret) 101 + goto out_unlock; 184 102 185 103 old_prog = xchg(&link->prog, new_prog); 186 104 bpf_prog_put(old_prog); ··· 397 309 return ret; 398 310 } 399 311 312 + static int netns_bpf_max_progs(enum netns_bpf_attach_type type) 313 + { 314 + switch (type) { 315 + case NETNS_BPF_FLOW_DISSECTOR: 316 + return 1; 317 + case NETNS_BPF_SK_LOOKUP: 318 + return 64; 319 + default: 320 + return 0; 321 + } 322 + } 323 + 400 324 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, 401 325 enum netns_bpf_attach_type type) 402 326 { 403 327 struct bpf_netns_link *net_link = 404 328 container_of(link, struct bpf_netns_link, link); 405 329 struct bpf_prog_array *run_array; 406 - int err; 330 + int cnt, err; 407 331 408 332 mutex_lock(&netns_bpf_mutex); 409 333 410 - /* Allow attaching only one prog or link for now */ 411 - if (!list_empty(&net->bpf.links[type])) { 334 + cnt = link_count(net, type); 335 + if (cnt >= netns_bpf_max_progs(type)) { 412 336 err = -E2BIG; 413 337 goto out_unlock; 414 338 } ··· 434 334 case NETNS_BPF_FLOW_DISSECTOR: 435 335 err = flow_dissector_bpf_prog_attach_check(net, link->prog); 436 336 break; 337 + case NETNS_BPF_SK_LOOKUP: 338 + err = 0; /* nothing to check */ 339 + break; 437 340 default: 438 341 err = -EINVAL; 439 342 break; ··· 444 341 if (err) 445 342 goto out_unlock; 446 343 447 - run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 344 + run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL); 448 345 if (!run_array) { 449 346 err = -ENOMEM; 450 347 goto out_unlock; 451 348 } 452 - run_array->items[0].prog = link->prog; 453 - rcu_assign_pointer(net->bpf.run_array[type], run_array); 454 349 455 350 list_add_tail(&net_link->node, &net->bpf.links[type]); 351 + 352 + fill_prog_array(net, type, run_array); 353 + run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array, 354 + lockdep_is_held(&netns_bpf_mutex)); 355 + bpf_prog_array_free(run_array); 356 + 357 + /* Mark attach point as used */ 358 + netns_bpf_attach_type_need(type); 456 359 457 360 out_unlock: 458 361 mutex_unlock(&netns_bpf_mutex); ··· 535 426 mutex_lock(&netns_bpf_mutex); 536 427 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { 537 428 netns_bpf_run_array_detach(net, type); 538 - list_for_each_entry(net_link, &net->bpf.links[type], node) 429 + list_for_each_entry(net_link, &net->bpf.links[type], node) { 539 430 net_link->net = NULL; /* auto-detach link */ 431 + netns_bpf_attach_type_unneed(type); 432 + } 540 433 if (net->bpf.progs[type]) 541 434 bpf_prog_put(net->bpf.progs[type]); 542 435 }
+9
kernel/bpf/syscall.c
··· 2022 2022 default: 2023 2023 return -EINVAL; 2024 2024 } 2025 + case BPF_PROG_TYPE_SK_LOOKUP: 2026 + if (expected_attach_type == BPF_SK_LOOKUP) 2027 + return 0; 2028 + return -EINVAL; 2025 2029 case BPF_PROG_TYPE_EXT: 2026 2030 if (expected_attach_type) 2027 2031 return -EINVAL; ··· 2760 2756 case BPF_PROG_TYPE_CGROUP_SOCK: 2761 2757 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2762 2758 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2759 + case BPF_PROG_TYPE_SK_LOOKUP: 2763 2760 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 2764 2761 case BPF_PROG_TYPE_CGROUP_SKB: 2765 2762 if (!capable(CAP_NET_ADMIN)) ··· 2822 2817 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 2823 2818 case BPF_TRACE_ITER: 2824 2819 return BPF_PROG_TYPE_TRACING; 2820 + case BPF_SK_LOOKUP: 2821 + return BPF_PROG_TYPE_SK_LOOKUP; 2825 2822 default: 2826 2823 return BPF_PROG_TYPE_UNSPEC; 2827 2824 } ··· 2960 2953 case BPF_LIRC_MODE2: 2961 2954 return lirc_prog_query(attr, uattr); 2962 2955 case BPF_FLOW_DISSECTOR: 2956 + case BPF_SK_LOOKUP: 2963 2957 return netns_bpf_prog_query(attr, uattr); 2964 2958 default: 2965 2959 return -EINVAL; ··· 3899 3891 ret = tracing_bpf_link_attach(attr, prog); 3900 3892 break; 3901 3893 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3894 + case BPF_PROG_TYPE_SK_LOOKUP: 3902 3895 ret = netns_bpf_link_create(attr, prog); 3903 3896 break; 3904 3897 default:
+10 -2
kernel/bpf/task_iter.c
··· 7 7 #include <linux/fs.h> 8 8 #include <linux/fdtable.h> 9 9 #include <linux/filter.h> 10 + #include <linux/btf_ids.h> 10 11 11 12 struct bpf_iter_seq_task_common { 12 13 struct pid_namespace *ns; ··· 313 312 .show = task_file_seq_show, 314 313 }; 315 314 316 - static const struct bpf_iter_reg task_reg_info = { 315 + BTF_ID_LIST(btf_task_file_ids) 316 + BTF_ID(struct, task_struct) 317 + BTF_ID(struct, file) 318 + 319 + static struct bpf_iter_reg task_reg_info = { 317 320 .target = "task", 318 321 .seq_ops = &task_seq_ops, 319 322 .init_seq_private = init_seq_pidns, ··· 330 325 }, 331 326 }; 332 327 333 - static const struct bpf_iter_reg task_file_reg_info = { 328 + static struct bpf_iter_reg task_file_reg_info = { 334 329 .target = "task_file", 335 330 .seq_ops = &task_file_seq_ops, 336 331 .init_seq_private = init_seq_pidns, ··· 349 344 { 350 345 int ret; 351 346 347 + task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0]; 352 348 ret = bpf_iter_reg_target(&task_reg_info); 353 349 if (ret) 354 350 return ret; 355 351 352 + task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0]; 353 + task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1]; 356 354 return bpf_iter_reg_target(&task_file_reg_info); 357 355 } 358 356 late_initcall(task_iter_init);
+10 -3
kernel/bpf/verifier.c
··· 3878 3878 } 3879 3879 meta->ref_obj_id = reg->ref_obj_id; 3880 3880 } 3881 - } else if (arg_type == ARG_PTR_TO_SOCKET) { 3881 + } else if (arg_type == ARG_PTR_TO_SOCKET || 3882 + arg_type == ARG_PTR_TO_SOCKET_OR_NULL) { 3882 3883 expected_type = PTR_TO_SOCKET; 3883 - if (type != expected_type) 3884 - goto err_type; 3884 + if (!(register_is_null(reg) && 3885 + arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) { 3886 + if (type != expected_type) 3887 + goto err_type; 3888 + } 3885 3889 } else if (arg_type == ARG_PTR_TO_BTF_ID) { 3886 3890 expected_type = PTR_TO_BTF_ID; 3887 3891 if (type != expected_type) ··· 7357 7353 default: 7358 7354 return -ENOTSUPP; 7359 7355 } 7356 + break; 7357 + case BPF_PROG_TYPE_SK_LOOKUP: 7358 + range = tnum_range(SK_DROP, SK_PASS); 7360 7359 break; 7361 7360 case BPF_PROG_TYPE_EXT: 7362 7361 /* freplace program can return anything as its return value
-20
lib/test_bpf.c
··· 5275 5275 { /* Mainly checking JIT here. */ 5276 5276 "BPF_MAXINSNS: Ctx heavy transformations", 5277 5277 { }, 5278 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5279 - CLASSIC | FLAG_EXPECTED_FAIL, 5280 - #else 5281 5278 CLASSIC, 5282 - #endif 5283 5279 { }, 5284 5280 { 5285 5281 { 1, SKB_VLAN_PRESENT }, 5286 5282 { 10, SKB_VLAN_PRESENT } 5287 5283 }, 5288 5284 .fill_helper = bpf_fill_maxinsns6, 5289 - .expected_errcode = -ENOTSUPP, 5290 5285 }, 5291 5286 { /* Mainly checking JIT here. */ 5292 5287 "BPF_MAXINSNS: Call heavy transformations", 5293 5288 { }, 5294 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5295 - CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, 5296 - #else 5297 5289 CLASSIC | FLAG_NO_DATA, 5298 - #endif 5299 5290 { }, 5300 5291 { { 1, 0 }, { 10, 0 } }, 5301 5292 .fill_helper = bpf_fill_maxinsns7, 5302 - .expected_errcode = -ENOTSUPP, 5303 5293 }, 5304 5294 { /* Mainly checking JIT here. */ 5305 5295 "BPF_MAXINSNS: Jump heavy test", ··· 5340 5350 { 5341 5351 "BPF_MAXINSNS: exec all MSH", 5342 5352 { }, 5343 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5344 - CLASSIC | FLAG_EXPECTED_FAIL, 5345 - #else 5346 5353 CLASSIC, 5347 - #endif 5348 5354 { 0xfa, 0xfb, 0xfc, 0xfd, }, 5349 5355 { { 4, 0xababab83 } }, 5350 5356 .fill_helper = bpf_fill_maxinsns13, 5351 - .expected_errcode = -ENOTSUPP, 5352 5357 }, 5353 5358 { 5354 5359 "BPF_MAXINSNS: ld_abs+get_processor_id", 5355 5360 { }, 5356 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5357 - CLASSIC | FLAG_EXPECTED_FAIL, 5358 - #else 5359 5361 CLASSIC, 5360 - #endif 5361 5362 { }, 5362 5363 { { 1, 0xbee } }, 5363 5364 .fill_helper = bpf_fill_ld_abs_get_processor_id, 5364 - .expected_errcode = -ENOTSUPP, 5365 5365 }, 5366 5366 /* 5367 5367 * LD_IND / LD_ABS on fragmented SKBs
+9
net/core/dev.c
··· 5449 5449 for (i = 0; i < new->aux->used_map_cnt; i++) { 5450 5450 if (dev_map_can_have_prog(new->aux->used_maps[i])) 5451 5451 return -EINVAL; 5452 + if (cpu_map_prog_allowed(new->aux->used_maps[i])) 5453 + return -EINVAL; 5452 5454 } 5453 5455 } 5454 5456 ··· 8878 8876 8879 8877 if (prog->expected_attach_type == BPF_XDP_DEVMAP) { 8880 8878 NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); 8879 + bpf_prog_put(prog); 8880 + return -EINVAL; 8881 + } 8882 + 8883 + if (prog->expected_attach_type == BPF_XDP_CPUMAP) { 8884 + NL_SET_ERR_MSG(extack, 8885 + "BPF_XDP_CPUMAP programs can not be attached to a device"); 8881 8886 bpf_prog_put(prog); 8882 8887 return -EINVAL; 8883 8888 }
+188 -44
net/core/filter.c
··· 9252 9252 9253 9253 const struct bpf_prog_ops sk_reuseport_prog_ops = { 9254 9254 }; 9255 + 9256 + DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); 9257 + EXPORT_SYMBOL(bpf_sk_lookup_enabled); 9258 + 9259 + BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, 9260 + struct sock *, sk, u64, flags) 9261 + { 9262 + if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | 9263 + BPF_SK_LOOKUP_F_NO_REUSEPORT))) 9264 + return -EINVAL; 9265 + if (unlikely(sk && sk_is_refcounted(sk))) 9266 + return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ 9267 + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) 9268 + return -ESOCKTNOSUPPORT; /* reject connected sockets */ 9269 + 9270 + /* Check if socket is suitable for packet L3/L4 protocol */ 9271 + if (sk && sk->sk_protocol != ctx->protocol) 9272 + return -EPROTOTYPE; 9273 + if (sk && sk->sk_family != ctx->family && 9274 + (sk->sk_family == AF_INET || ipv6_only_sock(sk))) 9275 + return -EAFNOSUPPORT; 9276 + 9277 + if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) 9278 + return -EEXIST; 9279 + 9280 + /* Select socket as lookup result */ 9281 + ctx->selected_sk = sk; 9282 + ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; 9283 + return 0; 9284 + } 9285 + 9286 + static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { 9287 + .func = bpf_sk_lookup_assign, 9288 + .gpl_only = false, 9289 + .ret_type = RET_INTEGER, 9290 + .arg1_type = ARG_PTR_TO_CTX, 9291 + .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, 9292 + .arg3_type = ARG_ANYTHING, 9293 + }; 9294 + 9295 + static const struct bpf_func_proto * 9296 + sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 9297 + { 9298 + switch (func_id) { 9299 + case BPF_FUNC_perf_event_output: 9300 + return &bpf_event_output_data_proto; 9301 + case BPF_FUNC_sk_assign: 9302 + return &bpf_sk_lookup_assign_proto; 9303 + case BPF_FUNC_sk_release: 9304 + return &bpf_sk_release_proto; 9305 + default: 9306 + return bpf_base_func_proto(func_id); 9307 + } 9308 + } 9309 + 9310 + static bool sk_lookup_is_valid_access(int off, int size, 9311 + enum bpf_access_type type, 9312 + const struct bpf_prog *prog, 9313 + struct bpf_insn_access_aux *info) 9314 + { 9315 + if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) 9316 + return false; 9317 + if (off % size != 0) 9318 + return false; 9319 + if (type != BPF_READ) 9320 + return false; 9321 + 9322 + switch (off) { 9323 + case offsetof(struct bpf_sk_lookup, sk): 9324 + info->reg_type = PTR_TO_SOCKET_OR_NULL; 9325 + return size == sizeof(__u64); 9326 + 9327 + case bpf_ctx_range(struct bpf_sk_lookup, family): 9328 + case bpf_ctx_range(struct bpf_sk_lookup, protocol): 9329 + case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): 9330 + case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): 9331 + case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): 9332 + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): 9333 + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): 9334 + case bpf_ctx_range(struct bpf_sk_lookup, local_port): 9335 + bpf_ctx_record_field_size(info, sizeof(__u32)); 9336 + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); 9337 + 9338 + default: 9339 + return false; 9340 + } 9341 + } 9342 + 9343 + static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, 9344 + const struct bpf_insn *si, 9345 + struct bpf_insn *insn_buf, 9346 + struct bpf_prog *prog, 9347 + u32 *target_size) 9348 + { 9349 + struct bpf_insn *insn = insn_buf; 9350 + 9351 + switch (si->off) { 9352 + case offsetof(struct bpf_sk_lookup, sk): 9353 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9354 + offsetof(struct bpf_sk_lookup_kern, selected_sk)); 9355 + break; 9356 + 9357 + case offsetof(struct bpf_sk_lookup, family): 9358 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9359 + bpf_target_off(struct bpf_sk_lookup_kern, 9360 + family, 2, target_size)); 9361 + break; 9362 + 9363 + case offsetof(struct bpf_sk_lookup, protocol): 9364 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9365 + bpf_target_off(struct bpf_sk_lookup_kern, 9366 + protocol, 2, target_size)); 9367 + break; 9368 + 9369 + case offsetof(struct bpf_sk_lookup, remote_ip4): 9370 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9371 + bpf_target_off(struct bpf_sk_lookup_kern, 9372 + v4.saddr, 4, target_size)); 9373 + break; 9374 + 9375 + case offsetof(struct bpf_sk_lookup, local_ip4): 9376 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9377 + bpf_target_off(struct bpf_sk_lookup_kern, 9378 + v4.daddr, 4, target_size)); 9379 + break; 9380 + 9381 + case bpf_ctx_range_till(struct bpf_sk_lookup, 9382 + remote_ip6[0], remote_ip6[3]): { 9383 + #if IS_ENABLED(CONFIG_IPV6) 9384 + int off = si->off; 9385 + 9386 + off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); 9387 + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 9388 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9389 + offsetof(struct bpf_sk_lookup_kern, v6.saddr)); 9390 + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 9391 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 9392 + #else 9393 + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9394 + #endif 9395 + break; 9396 + } 9397 + case bpf_ctx_range_till(struct bpf_sk_lookup, 9398 + local_ip6[0], local_ip6[3]): { 9399 + #if IS_ENABLED(CONFIG_IPV6) 9400 + int off = si->off; 9401 + 9402 + off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); 9403 + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 9404 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9405 + offsetof(struct bpf_sk_lookup_kern, v6.daddr)); 9406 + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 9407 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 9408 + #else 9409 + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9410 + #endif 9411 + break; 9412 + } 9413 + case offsetof(struct bpf_sk_lookup, remote_port): 9414 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9415 + bpf_target_off(struct bpf_sk_lookup_kern, 9416 + sport, 2, target_size)); 9417 + break; 9418 + 9419 + case offsetof(struct bpf_sk_lookup, local_port): 9420 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9421 + bpf_target_off(struct bpf_sk_lookup_kern, 9422 + dport, 2, target_size)); 9423 + break; 9424 + } 9425 + 9426 + return insn - insn_buf; 9427 + } 9428 + 9429 + const struct bpf_prog_ops sk_lookup_prog_ops = { 9430 + }; 9431 + 9432 + const struct bpf_verifier_ops sk_lookup_verifier_ops = { 9433 + .get_func_proto = sk_lookup_func_proto, 9434 + .is_valid_access = sk_lookup_is_valid_access, 9435 + .convert_ctx_access = sk_lookup_convert_ctx_access, 9436 + }; 9437 + 9255 9438 #endif /* CONFIG_INET */ 9256 9439 9257 9440 DEFINE_BPF_DISPATCHER(xdp) ··· 9444 9261 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); 9445 9262 } 9446 9263 9447 - /* Define a list of socket types which can be the argument for 9448 - * skc_to_*_sock() helpers. All these sockets should have 9449 - * sock_common as the first argument in its memory layout. 9450 - */ 9451 - #define BTF_SOCK_TYPE_xxx \ 9452 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \ 9453 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \ 9454 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \ 9455 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \ 9456 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \ 9457 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \ 9458 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \ 9459 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \ 9460 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \ 9461 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \ 9462 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \ 9463 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \ 9464 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock") 9465 - 9466 - enum { 9467 - #define BTF_SOCK_TYPE(name, str) name, 9264 + #ifdef CONFIG_DEBUG_INFO_BTF 9265 + BTF_ID_LIST_GLOBAL(btf_sock_ids) 9266 + #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) 9468 9267 BTF_SOCK_TYPE_xxx 9469 9268 #undef BTF_SOCK_TYPE 9470 - MAX_BTF_SOCK_TYPE, 9471 - }; 9472 - 9473 - static int btf_sock_ids[MAX_BTF_SOCK_TYPE]; 9474 - 9475 - #ifdef CONFIG_BPF_SYSCALL 9476 - static const char *bpf_sock_types[] = { 9477 - #define BTF_SOCK_TYPE(name, str) str, 9478 - BTF_SOCK_TYPE_xxx 9479 - #undef BTF_SOCK_TYPE 9480 - }; 9481 - 9482 - void init_btf_sock_ids(struct btf *btf) 9483 - { 9484 - int i, btf_id; 9485 - 9486 - for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) { 9487 - btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i], 9488 - BTF_KIND_STRUCT); 9489 - if (btf_id > 0) 9490 - btf_sock_ids[i] = btf_id; 9491 - } 9492 - } 9269 + #else 9270 + u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; 9493 9271 #endif 9494 9272 9495 9273 static bool check_arg_btf_id(u32 btf_id, u32 arg)
+51 -9
net/ipv4/inet_hashtables.c
··· 246 246 return score; 247 247 } 248 248 249 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 250 + struct sk_buff *skb, int doff, 251 + __be32 saddr, __be16 sport, 252 + __be32 daddr, unsigned short hnum) 253 + { 254 + struct sock *reuse_sk = NULL; 255 + u32 phash; 256 + 257 + if (sk->sk_reuseport) { 258 + phash = inet_ehashfn(net, daddr, hnum, saddr, sport); 259 + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); 260 + } 261 + return reuse_sk; 262 + } 263 + 249 264 /* 250 265 * Here are some nice properties to exploit here. The BSD API 251 266 * does not allow a listening sock to specify the remote port nor the ··· 280 265 struct inet_connection_sock *icsk; 281 266 struct sock *sk, *result = NULL; 282 267 int score, hiscore = 0; 283 - u32 phash = 0; 284 268 285 269 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { 286 270 sk = (struct sock *)icsk; 287 271 score = compute_score(sk, net, hnum, daddr, 288 272 dif, sdif, exact_dif); 289 273 if (score > hiscore) { 290 - if (sk->sk_reuseport) { 291 - phash = inet_ehashfn(net, daddr, hnum, 292 - saddr, sport); 293 - result = reuseport_select_sock(sk, phash, 294 - skb, doff); 295 - if (result) 296 - return result; 297 - } 274 + result = lookup_reuseport(net, sk, skb, doff, 275 + saddr, sport, daddr, hnum); 276 + if (result) 277 + return result; 278 + 298 279 result = sk; 299 280 hiscore = score; 300 281 } 301 282 } 302 283 303 284 return result; 285 + } 286 + 287 + static inline struct sock *inet_lookup_run_bpf(struct net *net, 288 + struct inet_hashinfo *hashinfo, 289 + struct sk_buff *skb, int doff, 290 + __be32 saddr, __be16 sport, 291 + __be32 daddr, u16 hnum) 292 + { 293 + struct sock *sk, *reuse_sk; 294 + bool no_reuseport; 295 + 296 + if (hashinfo != &tcp_hashinfo) 297 + return NULL; /* only TCP is supported */ 298 + 299 + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, 300 + saddr, sport, daddr, hnum, &sk); 301 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 302 + return sk; 303 + 304 + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); 305 + if (reuse_sk) 306 + sk = reuse_sk; 307 + return sk; 304 308 } 305 309 306 310 struct sock *__inet_lookup_listener(struct net *net, ··· 332 298 struct inet_listen_hashbucket *ilb2; 333 299 struct sock *result = NULL; 334 300 unsigned int hash2; 301 + 302 + /* Lookup redirect from BPF */ 303 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 304 + result = inet_lookup_run_bpf(net, hashinfo, skb, doff, 305 + saddr, sport, daddr, hnum); 306 + if (result) 307 + goto done; 308 + } 335 309 336 310 hash2 = ipv4_portaddr_hash(net, daddr, hnum); 337 311 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+3 -1
net/ipv4/tcp_ipv4.c
··· 76 76 #include <linux/proc_fs.h> 77 77 #include <linux/seq_file.h> 78 78 #include <linux/inetdevice.h> 79 + #include <linux/btf_ids.h> 79 80 80 81 #include <crypto/hash.h> 81 82 #include <linux/scatterlist.h> ··· 2947 2946 bpf_iter_fini_seq_net(priv_data); 2948 2947 } 2949 2948 2950 - static const struct bpf_iter_reg tcp_reg_info = { 2949 + static struct bpf_iter_reg tcp_reg_info = { 2951 2950 .target = "tcp", 2952 2951 .seq_ops = &bpf_iter_tcp_seq_ops, 2953 2952 .init_seq_private = bpf_iter_init_tcp, ··· 2962 2961 2963 2962 static void __init bpf_iter_register(void) 2964 2963 { 2964 + tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; 2965 2965 if (bpf_iter_reg_target(&tcp_reg_info)) 2966 2966 pr_warn("Warning: could not register bpf iterator tcp\n"); 2967 2967 }
+76 -19
net/ipv4/udp.c
··· 106 106 #include <net/xfrm.h> 107 107 #include <trace/events/udp.h> 108 108 #include <linux/static_key.h> 109 + #include <linux/btf_ids.h> 109 110 #include <trace/events/skb.h> 110 111 #include <net/busy_poll.h> 111 112 #include "udp_impl.h" ··· 409 408 udp_ehash_secret + net_hash_mix(net)); 410 409 } 411 410 411 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 412 + struct sk_buff *skb, 413 + __be32 saddr, __be16 sport, 414 + __be32 daddr, unsigned short hnum) 415 + { 416 + struct sock *reuse_sk = NULL; 417 + u32 hash; 418 + 419 + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { 420 + hash = udp_ehashfn(net, daddr, hnum, saddr, sport); 421 + reuse_sk = reuseport_select_sock(sk, hash, skb, 422 + sizeof(struct udphdr)); 423 + /* Fall back to scoring if group has connections */ 424 + if (reuseport_has_conns(sk, false)) 425 + return NULL; 426 + } 427 + return reuse_sk; 428 + } 429 + 412 430 /* called with rcu_read_lock() */ 413 431 static struct sock *udp4_lib_lookup2(struct net *net, 414 432 __be32 saddr, __be16 sport, ··· 438 418 { 439 419 struct sock *sk, *result; 440 420 int score, badness; 441 - u32 hash = 0; 442 421 443 422 result = NULL; 444 423 badness = 0; ··· 445 426 score = compute_score(sk, net, saddr, sport, 446 427 daddr, hnum, dif, sdif); 447 428 if (score > badness) { 448 - if (sk->sk_reuseport && 449 - sk->sk_state != TCP_ESTABLISHED) { 450 - hash = udp_ehashfn(net, daddr, hnum, 451 - saddr, sport); 452 - result = reuseport_select_sock(sk, hash, skb, 453 - sizeof(struct udphdr)); 454 - if (result && !reuseport_has_conns(sk, false)) 455 - return result; 456 - } 429 + result = lookup_reuseport(net, sk, skb, 430 + saddr, sport, daddr, hnum); 431 + if (result) 432 + return result; 433 + 457 434 badness = score; 458 435 result = sk; 459 436 } 460 437 } 461 438 return result; 439 + } 440 + 441 + static inline struct sock *udp4_lookup_run_bpf(struct net *net, 442 + struct udp_table *udptable, 443 + struct sk_buff *skb, 444 + __be32 saddr, __be16 sport, 445 + __be32 daddr, u16 hnum) 446 + { 447 + struct sock *sk, *reuse_sk; 448 + bool no_reuseport; 449 + 450 + if (udptable != &udp_table) 451 + return NULL; /* only UDP is supported */ 452 + 453 + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, 454 + saddr, sport, daddr, hnum, &sk); 455 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 456 + return sk; 457 + 458 + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); 459 + if (reuse_sk) 460 + sk = reuse_sk; 461 + return sk; 462 462 } 463 463 464 464 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try ··· 487 449 __be16 sport, __be32 daddr, __be16 dport, int dif, 488 450 int sdif, struct udp_table *udptable, struct sk_buff *skb) 489 451 { 490 - struct sock *result; 491 452 unsigned short hnum = ntohs(dport); 492 453 unsigned int hash2, slot2; 493 454 struct udp_hslot *hslot2; 455 + struct sock *result, *sk; 494 456 495 457 hash2 = ipv4_portaddr_hash(net, daddr, hnum); 496 458 slot2 = hash2 & udptable->mask; 497 459 hslot2 = &udptable->hash2[slot2]; 498 460 461 + /* Lookup connected or non-wildcard socket */ 499 462 result = udp4_lib_lookup2(net, saddr, sport, 500 463 daddr, hnum, dif, sdif, 501 464 hslot2, skb); 502 - if (!result) { 503 - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 504 - slot2 = hash2 & udptable->mask; 505 - hslot2 = &udptable->hash2[slot2]; 465 + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) 466 + goto done; 506 467 507 - result = udp4_lib_lookup2(net, saddr, sport, 508 - htonl(INADDR_ANY), hnum, dif, sdif, 509 - hslot2, skb); 468 + /* Lookup redirect from BPF */ 469 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 470 + sk = udp4_lookup_run_bpf(net, udptable, skb, 471 + saddr, sport, daddr, hnum); 472 + if (sk) { 473 + result = sk; 474 + goto done; 475 + } 510 476 } 477 + 478 + /* Got non-wildcard socket or error on first lookup */ 479 + if (result) 480 + goto done; 481 + 482 + /* Lookup wildcard sockets */ 483 + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 484 + slot2 = hash2 & udptable->mask; 485 + hslot2 = &udptable->hash2[slot2]; 486 + 487 + result = udp4_lib_lookup2(net, saddr, sport, 488 + htonl(INADDR_ANY), hnum, dif, sdif, 489 + hslot2, skb); 490 + done: 511 491 if (IS_ERR(result)) 512 492 return NULL; 513 493 return result; ··· 3209 3153 bpf_iter_fini_seq_net(priv_data); 3210 3154 } 3211 3155 3212 - static const struct bpf_iter_reg udp_reg_info = { 3156 + static struct bpf_iter_reg udp_reg_info = { 3213 3157 .target = "udp", 3214 3158 .seq_ops = &bpf_iter_udp_seq_ops, 3215 3159 .init_seq_private = bpf_iter_init_udp, ··· 3224 3168 3225 3169 static void __init bpf_iter_register(void) 3226 3170 { 3171 + udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; 3227 3172 if (bpf_iter_reg_target(&udp_reg_info)) 3228 3173 pr_warn("Warning: could not register bpf iterator udp\n"); 3229 3174 }
+57 -9
net/ipv6/inet6_hashtables.c
··· 21 21 #include <net/ip.h> 22 22 #include <net/sock_reuseport.h> 23 23 24 + extern struct inet_hashinfo tcp_hashinfo; 25 + 24 26 u32 inet6_ehashfn(const struct net *net, 25 27 const struct in6_addr *laddr, const u16 lport, 26 28 const struct in6_addr *faddr, const __be16 fport) ··· 113 111 return score; 114 112 } 115 113 114 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 115 + struct sk_buff *skb, int doff, 116 + const struct in6_addr *saddr, 117 + __be16 sport, 118 + const struct in6_addr *daddr, 119 + unsigned short hnum) 120 + { 121 + struct sock *reuse_sk = NULL; 122 + u32 phash; 123 + 124 + if (sk->sk_reuseport) { 125 + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); 126 + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); 127 + } 128 + return reuse_sk; 129 + } 130 + 116 131 /* called with rcu_read_lock() */ 117 132 static struct sock *inet6_lhash2_lookup(struct net *net, 118 133 struct inet_listen_hashbucket *ilb2, ··· 142 123 struct inet_connection_sock *icsk; 143 124 struct sock *sk, *result = NULL; 144 125 int score, hiscore = 0; 145 - u32 phash = 0; 146 126 147 127 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { 148 128 sk = (struct sock *)icsk; 149 129 score = compute_score(sk, net, hnum, daddr, dif, sdif, 150 130 exact_dif); 151 131 if (score > hiscore) { 152 - if (sk->sk_reuseport) { 153 - phash = inet6_ehashfn(net, daddr, hnum, 154 - saddr, sport); 155 - result = reuseport_select_sock(sk, phash, 156 - skb, doff); 157 - if (result) 158 - return result; 159 - } 132 + result = lookup_reuseport(net, sk, skb, doff, 133 + saddr, sport, daddr, hnum); 134 + if (result) 135 + return result; 136 + 160 137 result = sk; 161 138 hiscore = score; 162 139 } 163 140 } 164 141 165 142 return result; 143 + } 144 + 145 + static inline struct sock *inet6_lookup_run_bpf(struct net *net, 146 + struct inet_hashinfo *hashinfo, 147 + struct sk_buff *skb, int doff, 148 + const struct in6_addr *saddr, 149 + const __be16 sport, 150 + const struct in6_addr *daddr, 151 + const u16 hnum) 152 + { 153 + struct sock *sk, *reuse_sk; 154 + bool no_reuseport; 155 + 156 + if (hashinfo != &tcp_hashinfo) 157 + return NULL; /* only TCP is supported */ 158 + 159 + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, 160 + saddr, sport, daddr, hnum, &sk); 161 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 162 + return sk; 163 + 164 + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); 165 + if (reuse_sk) 166 + sk = reuse_sk; 167 + return sk; 166 168 } 167 169 168 170 struct sock *inet6_lookup_listener(struct net *net, ··· 196 156 struct inet_listen_hashbucket *ilb2; 197 157 struct sock *result = NULL; 198 158 unsigned int hash2; 159 + 160 + /* Lookup redirect from BPF */ 161 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 162 + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, 163 + saddr, sport, daddr, hnum); 164 + if (result) 165 + goto done; 166 + } 199 167 200 168 hash2 = ipv6_portaddr_hash(net, daddr, hnum); 201 169 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+6 -1
net/ipv6/route.c
··· 61 61 #include <net/l3mdev.h> 62 62 #include <net/ip.h> 63 63 #include <linux/uaccess.h> 64 + #include <linux/btf_ids.h> 64 65 65 66 #ifdef CONFIG_SYSCTL 66 67 #include <linux/sysctl.h> ··· 6424 6423 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 6425 6424 DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) 6426 6425 6427 - static const struct bpf_iter_reg ipv6_route_reg_info = { 6426 + BTF_ID_LIST(btf_fib6_info_id) 6427 + BTF_ID(struct, fib6_info) 6428 + 6429 + static struct bpf_iter_reg ipv6_route_reg_info = { 6428 6430 .target = "ipv6_route", 6429 6431 .seq_ops = &ipv6_route_seq_ops, 6430 6432 .init_seq_private = bpf_iter_init_seq_net, ··· 6442 6438 6443 6439 static int __init bpf_iter_register(void) 6444 6440 { 6441 + ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; 6445 6442 return bpf_iter_reg_target(&ipv6_route_reg_info); 6446 6443 } 6447 6444
+76 -19
net/ipv6/udp.c
··· 141 141 return score; 142 142 } 143 143 144 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 145 + struct sk_buff *skb, 146 + const struct in6_addr *saddr, 147 + __be16 sport, 148 + const struct in6_addr *daddr, 149 + unsigned int hnum) 150 + { 151 + struct sock *reuse_sk = NULL; 152 + u32 hash; 153 + 154 + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { 155 + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); 156 + reuse_sk = reuseport_select_sock(sk, hash, skb, 157 + sizeof(struct udphdr)); 158 + /* Fall back to scoring if group has connections */ 159 + if (reuseport_has_conns(sk, false)) 160 + return NULL; 161 + } 162 + return reuse_sk; 163 + } 164 + 144 165 /* called with rcu_read_lock() */ 145 166 static struct sock *udp6_lib_lookup2(struct net *net, 146 167 const struct in6_addr *saddr, __be16 sport, ··· 171 150 { 172 151 struct sock *sk, *result; 173 152 int score, badness; 174 - u32 hash = 0; 175 153 176 154 result = NULL; 177 155 badness = -1; ··· 178 158 score = compute_score(sk, net, saddr, sport, 179 159 daddr, hnum, dif, sdif); 180 160 if (score > badness) { 181 - if (sk->sk_reuseport && 182 - sk->sk_state != TCP_ESTABLISHED) { 183 - hash = udp6_ehashfn(net, daddr, hnum, 184 - saddr, sport); 161 + result = lookup_reuseport(net, sk, skb, 162 + saddr, sport, daddr, hnum); 163 + if (result) 164 + return result; 185 165 186 - result = reuseport_select_sock(sk, hash, skb, 187 - sizeof(struct udphdr)); 188 - if (result && !reuseport_has_conns(sk, false)) 189 - return result; 190 - } 191 166 result = sk; 192 167 badness = score; 193 168 } 194 169 } 195 170 return result; 171 + } 172 + 173 + static inline struct sock *udp6_lookup_run_bpf(struct net *net, 174 + struct udp_table *udptable, 175 + struct sk_buff *skb, 176 + const struct in6_addr *saddr, 177 + __be16 sport, 178 + const struct in6_addr *daddr, 179 + u16 hnum) 180 + { 181 + struct sock *sk, *reuse_sk; 182 + bool no_reuseport; 183 + 184 + if (udptable != &udp_table) 185 + return NULL; /* only UDP is supported */ 186 + 187 + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, 188 + saddr, sport, daddr, hnum, &sk); 189 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 190 + return sk; 191 + 192 + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); 193 + if (reuse_sk) 194 + sk = reuse_sk; 195 + return sk; 196 196 } 197 197 198 198 /* rcu_read_lock() must be held */ ··· 225 185 unsigned short hnum = ntohs(dport); 226 186 unsigned int hash2, slot2; 227 187 struct udp_hslot *hslot2; 228 - struct sock *result; 188 + struct sock *result, *sk; 229 189 230 190 hash2 = ipv6_portaddr_hash(net, daddr, hnum); 231 191 slot2 = hash2 & udptable->mask; 232 192 hslot2 = &udptable->hash2[slot2]; 233 193 194 + /* Lookup connected or non-wildcard sockets */ 234 195 result = udp6_lib_lookup2(net, saddr, sport, 235 196 daddr, hnum, dif, sdif, 236 197 hslot2, skb); 237 - if (!result) { 238 - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); 239 - slot2 = hash2 & udptable->mask; 198 + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) 199 + goto done; 240 200 241 - hslot2 = &udptable->hash2[slot2]; 242 - 243 - result = udp6_lib_lookup2(net, saddr, sport, 244 - &in6addr_any, hnum, dif, sdif, 245 - hslot2, skb); 201 + /* Lookup redirect from BPF */ 202 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 203 + sk = udp6_lookup_run_bpf(net, udptable, skb, 204 + saddr, sport, daddr, hnum); 205 + if (sk) { 206 + result = sk; 207 + goto done; 208 + } 246 209 } 210 + 211 + /* Got non-wildcard socket or error on first lookup */ 212 + if (result) 213 + goto done; 214 + 215 + /* Lookup wildcard sockets */ 216 + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); 217 + slot2 = hash2 & udptable->mask; 218 + hslot2 = &udptable->hash2[slot2]; 219 + 220 + result = udp6_lib_lookup2(net, saddr, sport, 221 + &in6addr_any, hnum, dif, sdif, 222 + hslot2, skb); 223 + done: 247 224 if (IS_ERR(result)) 248 225 return NULL; 249 226 return result;
+6 -1
net/netlink/af_netlink.c
··· 60 60 #include <linux/genetlink.h> 61 61 #include <linux/net_namespace.h> 62 62 #include <linux/nospec.h> 63 + #include <linux/btf_ids.h> 63 64 64 65 #include <net/net_namespace.h> 65 66 #include <net/netns/generic.h> ··· 2804 2803 }; 2805 2804 2806 2805 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2807 - static const struct bpf_iter_reg netlink_reg_info = { 2806 + BTF_ID_LIST(btf_netlink_sock_id) 2807 + BTF_ID(struct, netlink_sock) 2808 + 2809 + static struct bpf_iter_reg netlink_reg_info = { 2808 2810 .target = "netlink", 2809 2811 .seq_ops = &netlink_seq_ops, 2810 2812 .init_seq_private = bpf_iter_init_seq_net, ··· 2822 2818 2823 2819 static int __init bpf_iter_register(void) 2824 2820 { 2821 + netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2825 2822 return bpf_iter_reg_target(&netlink_reg_info); 2826 2823 } 2827 2824 #endif
+6 -1
samples/bpf/offwaketime_kern.c
··· 12 12 #include <bpf/bpf_helpers.h> 13 13 #include <bpf/bpf_tracing.h> 14 14 15 - #define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) 15 + #define _(P) \ 16 + ({ \ 17 + typeof(P) val; \ 18 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 + val; \ 20 + }) 16 21 17 22 #define MINBLOCK_US 1 18 23
+9 -3
samples/bpf/test_overhead_kprobe_kern.c
··· 10 10 #include <bpf/bpf_helpers.h> 11 11 #include <bpf/bpf_tracing.h> 12 12 13 - #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 13 + #define _(P) \ 14 + ({ \ 15 + typeof(P) val = 0; \ 16 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 17 + val; \ 18 + }) 14 19 15 20 SEC("kprobe/__set_task_comm") 16 21 int prog(struct pt_regs *ctx) ··· 30 25 tsk = (void *)PT_REGS_PARM1(ctx); 31 26 32 27 pid = _(tsk->pid); 33 - bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm); 34 - bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx)); 28 + bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); 29 + bpf_probe_read_kernel(newcomm, sizeof(newcomm), 30 + (void *)PT_REGS_PARM2(ctx)); 35 31 signal = _(tsk->signal); 36 32 oom_score_adj = _(signal->oom_score_adj); 37 33 return 0;
+7 -2
samples/bpf/tracex1_kern.c
··· 11 11 #include <bpf/bpf_helpers.h> 12 12 #include <bpf/bpf_tracing.h> 13 13 14 - #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 14 + #define _(P) \ 15 + ({ \ 16 + typeof(P) val = 0; \ 17 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 18 + val; \ 19 + }) 15 20 16 21 /* kprobe is NOT a stable ABI 17 22 * kernel functions can be removed, renamed or completely change semantics. ··· 39 34 dev = _(skb->dev); 40 35 len = _(skb->len); 41 36 42 - bpf_probe_read(devname, sizeof(devname), dev->name); 37 + bpf_probe_read_kernel(devname, sizeof(devname), dev->name); 43 38 44 39 if (devname[0] == 'l' && devname[1] == 'o') { 45 40 char fmt[] = "skb %p len %d\n";
+2 -2
samples/bpf/tracex5_kern.c
··· 47 47 { 48 48 struct seccomp_data sd; 49 49 50 - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 50 + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 51 51 if (sd.args[2] == 512) { 52 52 char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; 53 53 bpf_trace_printk(fmt, sizeof(fmt), ··· 60 60 { 61 61 struct seccomp_data sd; 62 62 63 - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 63 + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 64 64 if (sd.args[2] > 128 && sd.args[2] <= 1024) { 65 65 char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; 66 66 bpf_trace_printk(fmt, sizeof(fmt),
+17 -8
samples/bpf/xdp_redirect_cpu_kern.c
··· 21 21 struct { 22 22 __uint(type, BPF_MAP_TYPE_CPUMAP); 23 23 __uint(key_size, sizeof(u32)); 24 - __uint(value_size, sizeof(u32)); 24 + __uint(value_size, sizeof(struct bpf_cpumap_val)); 25 25 __uint(max_entries, MAX_CPUS); 26 26 } cpu_map SEC(".maps"); 27 27 ··· 30 30 __u64 processed; 31 31 __u64 dropped; 32 32 __u64 issue; 33 + __u64 xdp_pass; 34 + __u64 xdp_drop; 35 + __u64 xdp_redirect; 33 36 }; 34 37 35 38 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success ··· 695 692 * Code in: kernel/include/trace/events/xdp.h 696 693 */ 697 694 struct cpumap_kthread_ctx { 698 - u64 __pad; // First 8 bytes are not accessible by bpf code 699 - int map_id; // offset:8; size:4; signed:1; 700 - u32 act; // offset:12; size:4; signed:0; 701 - int cpu; // offset:16; size:4; signed:1; 702 - unsigned int drops; // offset:20; size:4; signed:0; 703 - unsigned int processed; // offset:24; size:4; signed:0; 704 - int sched; // offset:28; size:4; signed:1; 695 + u64 __pad; // First 8 bytes are not accessible 696 + int map_id; // offset:8; size:4; signed:1; 697 + u32 act; // offset:12; size:4; signed:0; 698 + int cpu; // offset:16; size:4; signed:1; 699 + unsigned int drops; // offset:20; size:4; signed:0; 700 + unsigned int processed; // offset:24; size:4; signed:0; 701 + int sched; // offset:28; size:4; signed:1; 702 + unsigned int xdp_pass; // offset:32; size:4; signed:0; 703 + unsigned int xdp_drop; // offset:36; size:4; signed:0; 704 + unsigned int xdp_redirect; // offset:40; size:4; signed:0; 705 705 }; 706 706 707 707 SEC("tracepoint/xdp/xdp_cpumap_kthread") ··· 718 712 return 0; 719 713 rec->processed += ctx->processed; 720 714 rec->dropped += ctx->drops; 715 + rec->xdp_pass += ctx->xdp_pass; 716 + rec->xdp_drop += ctx->xdp_drop; 717 + rec->xdp_redirect += ctx->xdp_redirect; 721 718 722 719 /* Count times kthread yielded CPU via schedule call */ 723 720 if (ctx->sched)
+187 -22
samples/bpf/xdp_redirect_cpu_user.c
··· 70 70 {"stress-mode", no_argument, NULL, 'x' }, 71 71 {"no-separators", no_argument, NULL, 'z' }, 72 72 {"force", no_argument, NULL, 'F' }, 73 + {"mprog-disable", no_argument, NULL, 'n' }, 74 + {"mprog-name", required_argument, NULL, 'e' }, 75 + {"mprog-filename", required_argument, NULL, 'f' }, 76 + {"redirect-device", required_argument, NULL, 'r' }, 77 + {"redirect-map", required_argument, NULL, 'm' }, 73 78 {0, 0, NULL, 0 } 74 79 }; 75 80 ··· 161 156 __u64 processed; 162 157 __u64 dropped; 163 158 __u64 issue; 159 + __u64 xdp_pass; 160 + __u64 xdp_drop; 161 + __u64 xdp_redirect; 164 162 }; 165 163 struct record { 166 164 __u64 timestamp; ··· 183 175 /* For percpu maps, userspace gets a value per possible CPU */ 184 176 unsigned int nr_cpus = bpf_num_possible_cpus(); 185 177 struct datarec values[nr_cpus]; 178 + __u64 sum_xdp_redirect = 0; 179 + __u64 sum_xdp_pass = 0; 180 + __u64 sum_xdp_drop = 0; 186 181 __u64 sum_processed = 0; 187 182 __u64 sum_dropped = 0; 188 183 __u64 sum_issue = 0; ··· 207 196 sum_dropped += values[i].dropped; 208 197 rec->cpu[i].issue = values[i].issue; 209 198 sum_issue += values[i].issue; 199 + rec->cpu[i].xdp_pass = values[i].xdp_pass; 200 + sum_xdp_pass += values[i].xdp_pass; 201 + rec->cpu[i].xdp_drop = values[i].xdp_drop; 202 + sum_xdp_drop += values[i].xdp_drop; 203 + rec->cpu[i].xdp_redirect = values[i].xdp_redirect; 204 + sum_xdp_redirect += values[i].xdp_redirect; 210 205 } 211 206 rec->total.processed = sum_processed; 212 207 rec->total.dropped = sum_dropped; 213 208 rec->total.issue = sum_issue; 209 + rec->total.xdp_pass = sum_xdp_pass; 210 + rec->total.xdp_drop = sum_xdp_drop; 211 + rec->total.xdp_redirect = sum_xdp_redirect; 214 212 return true; 215 213 } 216 214 ··· 320 300 return pps; 321 301 } 322 302 303 + static void calc_xdp_pps(struct datarec *r, struct datarec *p, 304 + double *xdp_pass, double *xdp_drop, 305 + double *xdp_redirect, double period_) 306 + { 307 + *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; 308 + if (period_ > 0) { 309 + *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; 310 + *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; 311 + *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; 312 + } 313 + } 314 + 323 315 static void stats_print(struct stats_record *stats_rec, 324 316 struct stats_record *stats_prev, 325 - char *prog_name) 317 + char *prog_name, char *mprog_name, int mprog_fd) 326 318 { 327 319 unsigned int nr_cpus = bpf_num_possible_cpus(); 328 320 double pps = 0, drop = 0, err = 0; 321 + bool mprog_enabled = false; 329 322 struct record *rec, *prev; 330 323 int to_cpu; 331 324 double t; 332 325 int i; 326 + 327 + if (mprog_fd > 0) 328 + mprog_enabled = true; 333 329 334 330 /* Header */ 335 331 printf("Running XDP/eBPF prog_name:%s\n", prog_name); ··· 491 455 printf(fm2_err, "xdp_exception", "total", pps, drop); 492 456 } 493 457 458 + /* CPUMAP attached XDP program that runs on remote/destination CPU */ 459 + if (mprog_enabled) { 460 + char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n"; 461 + char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n"; 462 + double xdp_pass, xdp_drop, xdp_redirect; 463 + 464 + printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name); 465 + printf("%-15s %-7s %-14s %-11s %-9s\n", 466 + "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir"); 467 + 468 + rec = &stats_rec->kthread; 469 + prev = &stats_prev->kthread; 470 + t = calc_period(rec, prev); 471 + for (i = 0; i < nr_cpus; i++) { 472 + struct datarec *r = &rec->cpu[i]; 473 + struct datarec *p = &prev->cpu[i]; 474 + 475 + calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, 476 + &xdp_redirect, t); 477 + if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0) 478 + printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop, 479 + xdp_redirect); 480 + } 481 + calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, 482 + &xdp_redirect, t); 483 + printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect); 484 + } 485 + 494 486 printf("\n"); 495 487 fflush(stdout); 496 488 } ··· 555 491 *b = tmp; 556 492 } 557 493 558 - static int create_cpu_entry(__u32 cpu, __u32 queue_size, 494 + static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, 559 495 __u32 avail_idx, bool new) 560 496 { 561 497 __u32 curr_cpus_count = 0; ··· 565 501 /* Add a CPU entry to cpumap, as this allocate a cpu entry in 566 502 * the kernel for the cpu. 567 503 */ 568 - ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); 504 + ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0); 569 505 if (ret) { 570 506 fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); 571 507 exit(EXIT_FAIL_BPF); ··· 596 532 } 597 533 } 598 534 /* map_fd[7] = cpus_iterator */ 599 - printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n", 535 + printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n", 600 536 new ? "Add-new":"Replace", cpu, avail_idx, 601 - queue_size, curr_cpus_count); 537 + value->qsize, value->bpf_prog.fd, curr_cpus_count); 602 538 603 539 return 0; 604 540 } ··· 622 558 } 623 559 624 560 /* Stress cpumap management code by concurrently changing underlying cpumap */ 625 - static void stress_cpumap(void) 561 + static void stress_cpumap(struct bpf_cpumap_val *value) 626 562 { 627 563 /* Changing qsize will cause kernel to free and alloc a new 628 564 * bpf_cpu_map_entry, with an associated/complicated tear-down 629 565 * procedure. 630 566 */ 631 - create_cpu_entry(1, 1024, 0, false); 632 - create_cpu_entry(1, 8, 0, false); 633 - create_cpu_entry(1, 16000, 0, false); 567 + value->qsize = 1024; 568 + create_cpu_entry(1, value, 0, false); 569 + value->qsize = 8; 570 + create_cpu_entry(1, value, 0, false); 571 + value->qsize = 16000; 572 + create_cpu_entry(1, value, 0, false); 634 573 } 635 574 636 575 static void stats_poll(int interval, bool use_separators, char *prog_name, 576 + char *mprog_name, struct bpf_cpumap_val *value, 637 577 bool stress_mode) 638 578 { 639 579 struct stats_record *record, *prev; 580 + int mprog_fd; 640 581 641 582 record = alloc_stats_record(); 642 583 prev = alloc_stats_record(); ··· 653 584 654 585 while (1) { 655 586 swap(&prev, &record); 587 + mprog_fd = value->bpf_prog.fd; 656 588 stats_collect(record); 657 - stats_print(record, prev, prog_name); 589 + stats_print(record, prev, prog_name, mprog_name, mprog_fd); 658 590 sleep(interval); 659 591 if (stress_mode) 660 - stress_cpumap(); 592 + stress_cpumap(value); 661 593 } 662 594 663 595 free_stats_record(record); ··· 731 661 return 0; 732 662 } 733 663 664 + static int load_cpumap_prog(char *file_name, char *prog_name, 665 + char *redir_interface, char *redir_map) 666 + { 667 + struct bpf_prog_load_attr prog_load_attr = { 668 + .prog_type = BPF_PROG_TYPE_XDP, 669 + .expected_attach_type = BPF_XDP_CPUMAP, 670 + .file = file_name, 671 + }; 672 + struct bpf_program *prog; 673 + struct bpf_object *obj; 674 + int fd; 675 + 676 + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd)) 677 + return -1; 678 + 679 + if (fd < 0) { 680 + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", 681 + strerror(errno)); 682 + return fd; 683 + } 684 + 685 + if (redir_interface && redir_map) { 686 + int err, map_fd, ifindex_out, key = 0; 687 + 688 + map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); 689 + if (map_fd < 0) 690 + return map_fd; 691 + 692 + ifindex_out = if_nametoindex(redir_interface); 693 + if (!ifindex_out) 694 + return -1; 695 + 696 + err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0); 697 + if (err < 0) 698 + return err; 699 + } 700 + 701 + prog = bpf_object__find_program_by_title(obj, prog_name); 702 + if (!prog) { 703 + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); 704 + return EXIT_FAIL; 705 + } 706 + 707 + return bpf_program__fd(prog); 708 + } 709 + 734 710 int main(int argc, char **argv) 735 711 { 736 712 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; 737 713 char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; 714 + char *mprog_filename = "xdp_redirect_kern.o"; 715 + char *redir_interface = NULL, *redir_map = NULL; 716 + char *mprog_name = "xdp_redirect_dummy"; 717 + bool mprog_disable = false; 738 718 struct bpf_prog_load_attr prog_load_attr = { 739 719 .prog_type = BPF_PROG_TYPE_UNSPEC, 740 720 }; 741 721 struct bpf_prog_info info = {}; 742 722 __u32 info_len = sizeof(info); 723 + struct bpf_cpumap_val value; 743 724 bool use_separators = true; 744 725 bool stress_mode = false; 745 726 struct bpf_program *prog; ··· 802 681 int add_cpu = -1; 803 682 int opt, err; 804 683 int prog_fd; 684 + int *cpu, i; 805 685 __u32 qsize; 806 686 807 687 n_cpus = get_nprocs_conf(); ··· 838 716 } 839 717 mark_cpus_unavailable(); 840 718 719 + cpu = malloc(n_cpus * sizeof(int)); 720 + if (!cpu) { 721 + fprintf(stderr, "failed to allocate cpu array\n"); 722 + return EXIT_FAIL; 723 + } 724 + memset(cpu, 0, n_cpus * sizeof(int)); 725 + 841 726 /* Parse commands line args */ 842 - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", 727 + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", 843 728 long_options, &longindex)) != -1) { 844 729 switch (opt) { 845 730 case 'd': ··· 880 751 /* Selecting eBPF prog to load */ 881 752 prog_name = optarg; 882 753 break; 754 + case 'n': 755 + mprog_disable = true; 756 + break; 757 + case 'f': 758 + mprog_filename = optarg; 759 + break; 760 + case 'e': 761 + mprog_name = optarg; 762 + break; 763 + case 'r': 764 + redir_interface = optarg; 765 + break; 766 + case 'm': 767 + redir_map = optarg; 768 + break; 883 769 case 'c': 884 770 /* Add multiple CPUs */ 885 771 add_cpu = strtoul(optarg, NULL, 0); ··· 904 760 errno, strerror(errno)); 905 761 goto error; 906 762 } 907 - create_cpu_entry(add_cpu, qsize, added_cpus, true); 908 - added_cpus++; 763 + cpu[added_cpus++] = add_cpu; 909 764 break; 910 765 case 'q': 911 766 qsize = atoi(optarg); ··· 915 772 case 'h': 916 773 error: 917 774 default: 775 + free(cpu); 918 776 usage(argv, obj); 919 777 return EXIT_FAIL_OPTION; 920 778 } ··· 928 784 if (ifindex == -1) { 929 785 fprintf(stderr, "ERR: required option --dev missing\n"); 930 786 usage(argv, obj); 931 - return EXIT_FAIL_OPTION; 787 + err = EXIT_FAIL_OPTION; 788 + goto out; 932 789 } 933 790 /* Required option */ 934 791 if (add_cpu == -1) { 935 792 fprintf(stderr, "ERR: required option --cpu missing\n"); 936 793 fprintf(stderr, " Specify multiple --cpu option to add more\n"); 937 794 usage(argv, obj); 938 - return EXIT_FAIL_OPTION; 795 + err = EXIT_FAIL_OPTION; 796 + goto out; 939 797 } 798 + 799 + value.bpf_prog.fd = 0; 800 + if (!mprog_disable) 801 + value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name, 802 + redir_interface, redir_map); 803 + if (value.bpf_prog.fd < 0) { 804 + err = value.bpf_prog.fd; 805 + goto out; 806 + } 807 + value.qsize = qsize; 808 + 809 + for (i = 0; i < added_cpus; i++) 810 + create_cpu_entry(cpu[i], &value, i, true); 940 811 941 812 /* Remove XDP program when program is interrupted or killed */ 942 813 signal(SIGINT, int_exit); ··· 960 801 prog = bpf_object__find_program_by_title(obj, prog_name); 961 802 if (!prog) { 962 803 fprintf(stderr, "bpf_object__find_program_by_title failed\n"); 963 - return EXIT_FAIL; 804 + err = EXIT_FAIL; 805 + goto out; 964 806 } 965 807 966 808 prog_fd = bpf_program__fd(prog); 967 809 if (prog_fd < 0) { 968 810 fprintf(stderr, "bpf_program__fd failed\n"); 969 - return EXIT_FAIL; 811 + err = EXIT_FAIL; 812 + goto out; 970 813 } 971 814 972 815 if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 973 816 fprintf(stderr, "link set xdp fd failed\n"); 974 - return EXIT_FAIL_XDP; 817 + err = EXIT_FAIL_XDP; 818 + goto out; 975 819 } 976 820 977 821 err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); 978 822 if (err) { 979 823 printf("can't get prog info - %s\n", strerror(errno)); 980 - return err; 824 + goto out; 981 825 } 982 826 prog_id = info.id; 983 827 984 - stats_poll(interval, use_separators, prog_name, stress_mode); 985 - return EXIT_OK; 828 + stats_poll(interval, use_separators, prog_name, mprog_name, 829 + &value, stress_mode); 830 + out: 831 + free(cpu); 832 + return err; 986 833 }
+8 -1
scripts/bpf_helpers_doc.py
··· 404 404 405 405 type_fwds = [ 406 406 'struct bpf_fib_lookup', 407 + 'struct bpf_sk_lookup', 407 408 'struct bpf_perf_event_data', 408 409 'struct bpf_perf_event_value', 409 410 'struct bpf_pidns_info', ··· 451 450 'struct bpf_perf_event_data', 452 451 'struct bpf_perf_event_value', 453 452 'struct bpf_pidns_info', 453 + 'struct bpf_sk_lookup', 454 454 'struct bpf_sock', 455 455 'struct bpf_sock_addr', 456 456 'struct bpf_sock_ops', ··· 489 487 'struct sk_msg_buff': 'struct sk_msg_md', 490 488 'struct xdp_buff': 'struct xdp_md', 491 489 } 490 + # Helpers overloaded for different context types. 491 + overloaded_helpers = [ 492 + 'bpf_get_socket_cookie', 493 + 'bpf_sk_assign', 494 + ] 492 495 493 496 def print_header(self): 494 497 header = '''\ ··· 550 543 for i, a in enumerate(proto['args']): 551 544 t = a['type'] 552 545 n = a['name'] 553 - if proto['name'] == 'bpf_get_socket_cookie' and i == 0: 546 + if proto['name'] in self.overloaded_helpers and i == 0: 554 547 t = 'void' 555 548 n = 'ctx' 556 549 one_arg = '{}{}'.format(comma, self.map_type(t))
+1 -1
tools/bpf/bpftool/Documentation/bpftool-prog.rst
··· 45 45 | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | 46 46 | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | 47 47 | **cgroup/getsockopt** | **cgroup/setsockopt** | 48 - | **struct_ops** | **fentry** | **fexit** | **freplace** 48 + | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** 49 49 | } 50 50 | *ATTACH_TYPE* := { 51 51 | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+1 -1
tools/bpf/bpftool/bash-completion/bpftool
··· 479 479 cgroup/post_bind4 cgroup/post_bind6 \ 480 480 cgroup/sysctl cgroup/getsockopt \ 481 481 cgroup/setsockopt struct_ops \ 482 - fentry fexit freplace" -- \ 482 + fentry fexit freplace sk_lookup" -- \ 483 483 "$cur" ) ) 484 484 return 0 485 485 ;;
+84 -60
tools/bpf/bpftool/common.c
··· 1 1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 2 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ 3 3 4 + #define _GNU_SOURCE 4 5 #include <ctype.h> 5 6 #include <errno.h> 6 7 #include <fcntl.h> 7 - #include <fts.h> 8 + #include <ftw.h> 8 9 #include <libgen.h> 9 10 #include <mntent.h> 10 11 #include <stdbool.h> ··· 65 64 [BPF_TRACE_FEXIT] = "fexit", 66 65 [BPF_MODIFY_RETURN] = "mod_ret", 67 66 [BPF_LSM_MAC] = "lsm_mac", 67 + [BPF_SK_LOOKUP] = "sk_lookup", 68 68 }; 69 69 70 70 void p_err(const char *fmt, ...) ··· 162 160 return err; 163 161 } 164 162 165 - int open_obj_pinned(char *path, bool quiet) 163 + int open_obj_pinned(const char *path, bool quiet) 166 164 { 167 - int fd; 165 + char *pname; 166 + int fd = -1; 168 167 169 - fd = bpf_obj_get(path); 170 - if (fd < 0) { 168 + pname = strdup(path); 169 + if (!pname) { 171 170 if (!quiet) 172 - p_err("bpf obj get (%s): %s", path, 173 - errno == EACCES && !is_bpffs(dirname(path)) ? 174 - "directory not in bpf file system (bpffs)" : 175 - strerror(errno)); 176 - return -1; 171 + p_err("mem alloc failed"); 172 + goto out_ret; 177 173 } 178 174 175 + fd = bpf_obj_get(pname); 176 + if (fd < 0) { 177 + if (!quiet) 178 + p_err("bpf obj get (%s): %s", pname, 179 + errno == EACCES && !is_bpffs(dirname(pname)) ? 180 + "directory not in bpf file system (bpffs)" : 181 + strerror(errno)); 182 + goto out_free; 183 + } 184 + 185 + out_free: 186 + free(pname); 187 + out_ret: 179 188 return fd; 180 189 } 181 190 182 - int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) 191 + int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) 183 192 { 184 193 enum bpf_obj_type type; 185 194 int fd; ··· 380 367 jsonw_end_array(json_wtr); 381 368 } 382 369 370 + /* extra params for nftw cb */ 371 + static struct pinned_obj_table *build_fn_table; 372 + static enum bpf_obj_type build_fn_type; 373 + 374 + static int do_build_table_cb(const char *fpath, const struct stat *sb, 375 + int typeflag, struct FTW *ftwbuf) 376 + { 377 + struct bpf_prog_info pinned_info; 378 + __u32 len = sizeof(pinned_info); 379 + struct pinned_obj *obj_node; 380 + enum bpf_obj_type objtype; 381 + int fd, err = 0; 382 + 383 + if (typeflag != FTW_F) 384 + goto out_ret; 385 + 386 + fd = open_obj_pinned(fpath, true); 387 + if (fd < 0) 388 + goto out_ret; 389 + 390 + objtype = get_fd_type(fd); 391 + if (objtype != build_fn_type) 392 + goto out_close; 393 + 394 + memset(&pinned_info, 0, sizeof(pinned_info)); 395 + if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) 396 + goto out_close; 397 + 398 + obj_node = calloc(1, sizeof(*obj_node)); 399 + if (!obj_node) { 400 + err = -1; 401 + goto out_close; 402 + } 403 + 404 + obj_node->id = pinned_info.id; 405 + obj_node->path = strdup(fpath); 406 + if (!obj_node->path) { 407 + err = -1; 408 + free(obj_node); 409 + goto out_close; 410 + } 411 + 412 + hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); 413 + out_close: 414 + close(fd); 415 + out_ret: 416 + return err; 417 + } 418 + 383 419 int build_pinned_obj_table(struct pinned_obj_table *tab, 384 420 enum bpf_obj_type type) 385 421 { 386 - struct bpf_prog_info pinned_info = {}; 387 - struct pinned_obj *obj_node = NULL; 388 - __u32 len = sizeof(pinned_info); 389 422 struct mntent *mntent = NULL; 390 - enum bpf_obj_type objtype; 391 423 FILE *mntfile = NULL; 392 - FTSENT *ftse = NULL; 393 - FTS *fts = NULL; 394 - int fd, err; 424 + int flags = FTW_PHYS; 425 + int nopenfd = 16; 426 + int err = 0; 395 427 396 428 mntfile = setmntent("/proc/mounts", "r"); 397 429 if (!mntfile) 398 430 return -1; 399 431 432 + build_fn_table = tab; 433 + build_fn_type = type; 434 + 400 435 while ((mntent = getmntent(mntfile))) { 401 - char *path[] = { mntent->mnt_dir, NULL }; 436 + char *path = mntent->mnt_dir; 402 437 403 438 if (strncmp(mntent->mnt_type, "bpf", 3) != 0) 404 439 continue; 405 - 406 - fts = fts_open(path, 0, NULL); 407 - if (!fts) 408 - continue; 409 - 410 - while ((ftse = fts_read(fts))) { 411 - if (!(ftse->fts_info & FTS_F)) 412 - continue; 413 - fd = open_obj_pinned(ftse->fts_path, true); 414 - if (fd < 0) 415 - continue; 416 - 417 - objtype = get_fd_type(fd); 418 - if (objtype != type) { 419 - close(fd); 420 - continue; 421 - } 422 - memset(&pinned_info, 0, sizeof(pinned_info)); 423 - err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len); 424 - if (err) { 425 - close(fd); 426 - continue; 427 - } 428 - 429 - obj_node = malloc(sizeof(*obj_node)); 430 - if (!obj_node) { 431 - close(fd); 432 - fts_close(fts); 433 - fclose(mntfile); 434 - return -1; 435 - } 436 - 437 - memset(obj_node, 0, sizeof(*obj_node)); 438 - obj_node->id = pinned_info.id; 439 - obj_node->path = strdup(ftse->fts_path); 440 - hash_add(tab->table, &obj_node->hash, obj_node->id); 441 - 442 - close(fd); 443 - } 444 - fts_close(fts); 440 + err = nftw(path, do_build_table_cb, nopenfd, flags); 441 + if (err) 442 + break; 445 443 } 446 444 fclose(mntfile); 447 - return 0; 445 + return err; 448 446 } 449 447 450 448 void delete_pinned_obj_table(struct pinned_obj_table *tab)
+4 -1
tools/bpf/bpftool/gen.c
··· 302 302 opts.object_name = obj_name; 303 303 obj = bpf_object__open_mem(obj_data, file_sz, &opts); 304 304 if (IS_ERR(obj)) { 305 + char err_buf[256]; 306 + 307 + libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); 308 + p_err("failed to open BPF object file: %s", err_buf); 305 309 obj = NULL; 306 - p_err("failed to open BPF object file: %ld", PTR_ERR(obj)); 307 310 goto out; 308 311 } 309 312
+2 -2
tools/bpf/bpftool/main.h
··· 152 152 int get_fd_type(int fd); 153 153 const char *get_fd_type_name(enum bpf_obj_type type); 154 154 char *get_fdinfo(int fd, const char *key); 155 - int open_obj_pinned(char *path, bool quiet); 156 - int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); 155 + int open_obj_pinned(const char *path, bool quiet); 156 + int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); 157 157 int mount_bpffs_for_pin(const char *name); 158 158 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); 159 159 int do_pin_fd(int fd, const char *name);
+2 -1
tools/bpf/bpftool/prog.c
··· 59 59 [BPF_PROG_TYPE_TRACING] = "tracing", 60 60 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 61 61 [BPF_PROG_TYPE_EXT] = "ext", 62 + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 62 63 }; 63 64 64 65 const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); ··· 1906 1905 " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" 1907 1906 " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" 1908 1907 " cgroup/getsockopt | cgroup/setsockopt |\n" 1909 - " struct_ops | fentry | fexit | freplace }\n" 1908 + " struct_ops | fentry | fexit | freplace | sk_lookup }\n" 1910 1909 " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" 1911 1910 " flow_dissector }\n" 1912 1911 " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
+2 -1
tools/bpf/bpftool/skeleton/pid_iter.bpf.c
··· 71 71 72 72 e.pid = task->tgid; 73 73 e.id = get_obj_id(file->private_data, obj_type); 74 - bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); 74 + bpf_probe_read_kernel(&e.comm, sizeof(e.comm), 75 + task->group_leader->comm); 75 76 bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); 76 77 77 78 return 0;
+47 -4
tools/include/linux/btf_ids.h
··· 3 3 #ifndef _LINUX_BTF_IDS_H 4 4 #define _LINUX_BTF_IDS_H 5 5 6 + #ifdef CONFIG_DEBUG_INFO_BTF 7 + 6 8 #include <linux/compiler.h> /* for __PASTE */ 7 9 8 10 /* ··· 23 21 asm( \ 24 22 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 25 23 ".local " #symbol " ; \n" \ 26 - ".type " #symbol ", @object; \n" \ 24 + ".type " #symbol ", STT_OBJECT; \n" \ 27 25 ".size " #symbol ", 4; \n" \ 28 26 #symbol ": \n" \ 29 27 ".zero 4 \n" \ ··· 57 55 * .zero 4 58 56 * 59 57 */ 60 - #define __BTF_ID_LIST(name) \ 58 + #define __BTF_ID_LIST(name, scope) \ 61 59 asm( \ 62 60 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 63 - ".local " #name "; \n" \ 61 + "." #scope " " #name "; \n" \ 64 62 #name ":; \n" \ 65 63 ".popsection; \n"); \ 66 64 67 65 #define BTF_ID_LIST(name) \ 68 - __BTF_ID_LIST(name) \ 66 + __BTF_ID_LIST(name, local) \ 69 67 extern u32 name[]; 68 + 69 + #define BTF_ID_LIST_GLOBAL(name) \ 70 + __BTF_ID_LIST(name, globl) 70 71 71 72 /* 72 73 * The BTF_ID_UNUSED macro defines 4 zero bytes. ··· 88 83 ".zero 4 \n" \ 89 84 ".popsection; \n"); 90 85 86 + #else 87 + 88 + #define BTF_ID_LIST(name) static u32 name[5]; 89 + #define BTF_ID(prefix, name) 90 + #define BTF_ID_UNUSED 91 + #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; 92 + 93 + #endif /* CONFIG_DEBUG_INFO_BTF */ 94 + 95 + #ifdef CONFIG_NET 96 + /* Define a list of socket types which can be the argument for 97 + * skc_to_*_sock() helpers. All these sockets should have 98 + * sock_common as the first argument in its memory layout. 99 + */ 100 + #define BTF_SOCK_TYPE_xxx \ 101 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ 102 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ 103 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ 104 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ 105 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ 106 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ 107 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ 108 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ 109 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ 110 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ 111 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ 112 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ 113 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) 114 + 115 + enum { 116 + #define BTF_SOCK_TYPE(name, str) name, 117 + BTF_SOCK_TYPE_xxx 118 + #undef BTF_SOCK_TYPE 119 + MAX_BTF_SOCK_TYPE, 120 + }; 121 + 122 + extern u32 btf_sock_ids[]; 123 + #endif 91 124 92 125 #endif
+94 -3
tools/include/uapi/linux/bpf.h
··· 189 189 BPF_PROG_TYPE_STRUCT_OPS, 190 190 BPF_PROG_TYPE_EXT, 191 191 BPF_PROG_TYPE_LSM, 192 + BPF_PROG_TYPE_SK_LOOKUP, 192 193 }; 193 194 194 195 enum bpf_attach_type { ··· 228 227 BPF_CGROUP_INET6_GETSOCKNAME, 229 228 BPF_XDP_DEVMAP, 230 229 BPF_CGROUP_INET_SOCK_RELEASE, 230 + BPF_XDP_CPUMAP, 231 + BPF_SK_LOOKUP, 231 232 __MAX_BPF_ATTACH_TYPE 232 233 }; 233 234 ··· 2422 2419 * Look for an IPv6 socket. 2423 2420 * 2424 2421 * If the *netns* is a negative signed 32-bit integer, then the 2425 - * socket lookup table in the netns associated with the *ctx* will 2422 + * socket lookup table in the netns associated with the *ctx* 2426 2423 * will be used. For the TC hooks, this is the netns of the device 2427 2424 * in the skb. For socket hooks, this is the netns of the socket. 2428 2425 * If *netns* is any other signed 32-bit value greater than or ··· 2459 2456 * Look for an IPv6 socket. 2460 2457 * 2461 2458 * If the *netns* is a negative signed 32-bit integer, then the 2462 - * socket lookup table in the netns associated with the *ctx* will 2459 + * socket lookup table in the netns associated with the *ctx* 2463 2460 * will be used. For the TC hooks, this is the netns of the device 2464 2461 * in the skb. For socket hooks, this is the netns of the socket. 2465 2462 * If *netns* is any other signed 32-bit value greater than or ··· 3071 3068 * 3072 3069 * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) 3073 3070 * Description 3071 + * Helper is overloaded depending on BPF program type. This 3072 + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and 3073 + * **BPF_PROG_TYPE_SCHED_ACT** programs. 3074 + * 3074 3075 * Assign the *sk* to the *skb*. When combined with appropriate 3075 3076 * routing configuration to receive the packet towards the socket, 3076 3077 * will cause *skb* to be delivered to the specified socket. ··· 3099 3092 * 3100 3093 * **-ESOCKTNOSUPPORT** if the socket type is not supported 3101 3094 * (reuseport). 3095 + * 3096 + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) 3097 + * Description 3098 + * Helper is overloaded depending on BPF program type. This 3099 + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. 3100 + * 3101 + * Select the *sk* as a result of a socket lookup. 3102 + * 3103 + * For the operation to succeed passed socket must be compatible 3104 + * with the packet description provided by the *ctx* object. 3105 + * 3106 + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must 3107 + * be an exact match. While IP family (**AF_INET** or 3108 + * **AF_INET6**) must be compatible, that is IPv6 sockets 3109 + * that are not v6-only can be selected for IPv4 packets. 3110 + * 3111 + * Only TCP listeners and UDP unconnected sockets can be 3112 + * selected. *sk* can also be NULL to reset any previous 3113 + * selection. 3114 + * 3115 + * *flags* argument can combination of following values: 3116 + * 3117 + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous 3118 + * socket selection, potentially done by a BPF program 3119 + * that ran before us. 3120 + * 3121 + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip 3122 + * load-balancing within reuseport group for the socket 3123 + * being selected. 3124 + * 3125 + * On success *ctx->sk* will point to the selected socket. 3126 + * 3127 + * Return 3128 + * 0 on success, or a negative errno in case of failure. 3129 + * 3130 + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is 3131 + * not compatible with packet family (*ctx->family*). 3132 + * 3133 + * * **-EEXIST** if socket has been already selected, 3134 + * potentially by another program, and 3135 + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. 3136 + * 3137 + * * **-EINVAL** if unsupported flags were specified. 3138 + * 3139 + * * **-EPROTOTYPE** if socket L4 protocol 3140 + * (*sk->protocol*) doesn't match packet protocol 3141 + * (*ctx->protocol*). 3142 + * 3143 + * * **-ESOCKTNOSUPPORT** if socket is not in allowed 3144 + * state (TCP listening or UDP unconnected). 3102 3145 * 3103 3146 * u64 bpf_ktime_get_boot_ns(void) 3104 3147 * Description ··· 3663 3606 BPF_RINGBUF_HDR_SZ = 8, 3664 3607 }; 3665 3608 3609 + /* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ 3610 + enum { 3611 + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), 3612 + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), 3613 + }; 3614 + 3666 3615 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3667 3616 enum bpf_adj_room_mode { 3668 3617 BPF_ADJ_ROOM_NET, ··· 3912 3849 } bpf_prog; 3913 3850 }; 3914 3851 3852 + /* CPUMAP map-value layout 3853 + * 3854 + * The struct data-layout of map-value is a configuration interface. 3855 + * New members can only be added to the end of this structure. 3856 + */ 3857 + struct bpf_cpumap_val { 3858 + __u32 qsize; /* queue size to remote target CPU */ 3859 + union { 3860 + int fd; /* prog fd on map write */ 3861 + __u32 id; /* prog id on map read */ 3862 + } bpf_prog; 3863 + }; 3864 + 3915 3865 enum sk_action { 3916 3866 SK_DROP = 0, 3917 3867 SK_PASS, ··· 4062 3986 4063 3987 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 4064 3988 * by user and intended to be used by socket (e.g. to bind to, depends on 4065 - * attach attach type). 3989 + * attach type). 4066 3990 */ 4067 3991 struct bpf_sock_addr { 4068 3992 __u32 user_family; /* Allows 4-byte read, but no write. */ ··· 4411 4335 __u32 pid; 4412 4336 __u32 tgid; 4413 4337 }; 4338 + 4339 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ 4340 + struct bpf_sk_lookup { 4341 + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ 4342 + 4343 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ 4344 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ 4345 + __u32 remote_ip4; /* Network byte order */ 4346 + __u32 remote_ip6[4]; /* Network byte order */ 4347 + __u32 remote_port; /* Network byte order */ 4348 + __u32 local_ip4; /* Network byte order */ 4349 + __u32 local_ip6[4]; /* Network byte order */ 4350 + __u32 local_port; /* Host byte order */ 4351 + }; 4352 + 4414 4353 #endif /* _UAPI__LINUX_BPF_H__ */
+1 -1
tools/lib/bpf/bpf_helpers.h
··· 40 40 * Helper macro to manipulate data structures 41 41 */ 42 42 #ifndef offsetof 43 - #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 43 + #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) 44 44 #endif 45 45 #ifndef container_of 46 46 #define container_of(ptr, type, member) \
+5
tools/lib/bpf/libbpf.c
··· 6799 6799 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); 6800 6800 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); 6801 6801 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); 6802 + BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); 6802 6803 6803 6804 enum bpf_attach_type 6804 6805 bpf_program__get_expected_attach_type(struct bpf_program *prog) ··· 6913 6912 .attach_fn = attach_iter), 6914 6913 BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, 6915 6914 BPF_XDP_DEVMAP), 6915 + BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, 6916 + BPF_XDP_CPUMAP), 6916 6917 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 6917 6918 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 6918 6919 BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), ··· 6982 6979 BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, 6983 6980 BPF_CGROUP_SETSOCKOPT), 6984 6981 BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), 6982 + BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, 6983 + BPF_SK_LOOKUP), 6985 6984 }; 6986 6985 6987 6986 #undef BPF_PROG_SEC_IMPL
+2
tools/lib/bpf/libbpf.h
··· 350 350 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); 351 351 LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); 352 352 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); 353 + LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); 353 354 354 355 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); 355 356 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, ··· 378 377 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); 379 378 LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); 380 379 LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); 380 + LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); 381 381 382 382 /* 383 383 * No need for __attribute__((packed)), all members of 'bpf_map_def'
+2
tools/lib/bpf/libbpf.map
··· 287 287 bpf_map__type; 288 288 bpf_map__value_size; 289 289 bpf_program__autoload; 290 + bpf_program__is_sk_lookup; 290 291 bpf_program__set_autoload; 292 + bpf_program__set_sk_lookup; 291 293 btf__set_fd; 292 294 } LIBBPF_0.0.9;
+3
tools/lib/bpf/libbpf_probes.c
··· 78 78 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 79 79 xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; 80 80 break; 81 + case BPF_PROG_TYPE_SK_LOOKUP: 82 + xattr.expected_attach_type = BPF_SK_LOOKUP; 83 + break; 81 84 case BPF_PROG_TYPE_KPROBE: 82 85 xattr.kern_version = get_kernel_version(); 83 86 break;
+35 -23
tools/testing/selftests/bpf/network_helpers.c
··· 73 73 socklen_t len; 74 74 int fd; 75 75 76 - if (family == AF_INET) { 77 - struct sockaddr_in *sin = (void *)&addr; 78 - 79 - sin->sin_family = AF_INET; 80 - sin->sin_port = htons(port); 81 - if (addr_str && 82 - inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 83 - log_err("inet_pton(AF_INET, %s)", addr_str); 84 - return -1; 85 - } 86 - len = sizeof(*sin); 87 - } else { 88 - struct sockaddr_in6 *sin6 = (void *)&addr; 89 - 90 - sin6->sin6_family = AF_INET6; 91 - sin6->sin6_port = htons(port); 92 - if (addr_str && 93 - inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 94 - log_err("inet_pton(AF_INET6, %s)", addr_str); 95 - return -1; 96 - } 97 - len = sizeof(*sin6); 98 - } 76 + if (make_sockaddr(family, addr_str, port, &addr, &len)) 77 + return -1; 99 78 100 79 fd = socket(family, type, 0); 101 80 if (fd < 0) { ··· 172 193 return -1; 173 194 174 195 return 0; 196 + } 197 + 198 + int make_sockaddr(int family, const char *addr_str, __u16 port, 199 + struct sockaddr_storage *addr, socklen_t *len) 200 + { 201 + if (family == AF_INET) { 202 + struct sockaddr_in *sin = (void *)addr; 203 + 204 + sin->sin_family = AF_INET; 205 + sin->sin_port = htons(port); 206 + if (addr_str && 207 + inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 208 + log_err("inet_pton(AF_INET, %s)", addr_str); 209 + return -1; 210 + } 211 + if (len) 212 + *len = sizeof(*sin); 213 + return 0; 214 + } else if (family == AF_INET6) { 215 + struct sockaddr_in6 *sin6 = (void *)addr; 216 + 217 + sin6->sin6_family = AF_INET6; 218 + sin6->sin6_port = htons(port); 219 + if (addr_str && 220 + inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 221 + log_err("inet_pton(AF_INET6, %s)", addr_str); 222 + return -1; 223 + } 224 + if (len) 225 + *len = sizeof(*sin6); 226 + return 0; 227 + } 228 + return -1; 175 229 }
+2
tools/testing/selftests/bpf/network_helpers.h
··· 37 37 int timeout_ms); 38 38 int connect_to_fd(int server_fd, int timeout_ms); 39 39 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); 40 + int make_sockaddr(int family, const char *addr_str, __u16 port, 41 + struct sockaddr_storage *addr, socklen_t *len); 40 42 41 43 #endif
+26 -8
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
··· 6 6 #include <bpf/libbpf.h> 7 7 #include <linux/btf.h> 8 8 #include <linux/kernel.h> 9 + #define CONFIG_DEBUG_INFO_BTF 9 10 #include <linux/btf_ids.h> 10 11 #include "test_progs.h" 11 12 ··· 28 27 { "func", BTF_KIND_FUNC, -1 }, 29 28 }; 30 29 31 - BTF_ID_LIST(test_list) 30 + BTF_ID_LIST(test_list_local) 31 + BTF_ID_UNUSED 32 + BTF_ID(typedef, S) 33 + BTF_ID(typedef, T) 34 + BTF_ID(typedef, U) 35 + BTF_ID(struct, S) 36 + BTF_ID(union, U) 37 + BTF_ID(func, func) 38 + 39 + extern __u32 test_list_global[]; 40 + BTF_ID_LIST_GLOBAL(test_list_global) 32 41 BTF_ID_UNUSED 33 42 BTF_ID(typedef, S) 34 43 BTF_ID(typedef, T) ··· 104 93 105 94 int test_resolve_btfids(void) 106 95 { 107 - unsigned int i; 96 + __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; 97 + unsigned int i, j; 108 98 int ret = 0; 109 99 110 100 if (resolve_symbols()) 111 101 return -1; 112 102 113 - /* Check BTF_ID_LIST(test_list) IDs */ 114 - for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { 115 - ret = CHECK(test_list[i] != test_symbols[i].id, 116 - "id_check", 117 - "wrong ID for %s (%d != %d)\n", test_symbols[i].name, 118 - test_list[i], test_symbols[i].id); 103 + /* Check BTF_ID_LIST(test_list_local) and 104 + * BTF_ID_LIST_GLOBAL(test_list_global) IDs 105 + */ 106 + for (j = 0; j < ARRAY_SIZE(test_lists); j++) { 107 + test_list = test_lists[j]; 108 + for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { 109 + ret = CHECK(test_list[i] != test_symbols[i].id, 110 + "id_check", 111 + "wrong ID for %s (%d != %d)\n", 112 + test_symbols[i].name, 113 + test_list[i], test_symbols[i].id); 114 + } 119 115 } 120 116 121 117 return ret;
+1282
tools/testing/selftests/bpf/prog_tests/sk_lookup.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + // Copyright (c) 2020 Cloudflare 3 + /* 4 + * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP). 5 + * 6 + * Tests exercise: 7 + * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook, 8 + * - redirecting socket lookup to a socket selected by BPF program, 9 + * - failing a socket lookup on BPF program's request, 10 + * - error scenarios for selecting a socket from BPF program, 11 + * - accessing BPF program context, 12 + * - attaching and running multiple BPF programs. 13 + * 14 + * Tests run in a dedicated network namespace. 15 + */ 16 + 17 + #define _GNU_SOURCE 18 + #include <arpa/inet.h> 19 + #include <assert.h> 20 + #include <errno.h> 21 + #include <error.h> 22 + #include <fcntl.h> 23 + #include <sched.h> 24 + #include <stdio.h> 25 + #include <sys/types.h> 26 + #include <sys/stat.h> 27 + #include <unistd.h> 28 + 29 + #include <bpf/libbpf.h> 30 + #include <bpf/bpf.h> 31 + 32 + #include "test_progs.h" 33 + #include "bpf_rlimit.h" 34 + #include "bpf_util.h" 35 + #include "cgroup_helpers.h" 36 + #include "network_helpers.h" 37 + #include "test_sk_lookup.skel.h" 38 + 39 + /* External (address, port) pairs the client sends packets to. */ 40 + #define EXT_IP4 "127.0.0.1" 41 + #define EXT_IP6 "fd00::1" 42 + #define EXT_PORT 7007 43 + 44 + /* Internal (address, port) pairs the server listens/receives at. */ 45 + #define INT_IP4 "127.0.0.2" 46 + #define INT_IP4_V6 "::ffff:127.0.0.2" 47 + #define INT_IP6 "fd00::2" 48 + #define INT_PORT 8008 49 + 50 + #define IO_TIMEOUT_SEC 3 51 + 52 + enum server { 53 + SERVER_A = 0, 54 + SERVER_B = 1, 55 + MAX_SERVERS, 56 + }; 57 + 58 + enum { 59 + PROG1 = 0, 60 + PROG2, 61 + }; 62 + 63 + struct inet_addr { 64 + const char *ip; 65 + unsigned short port; 66 + }; 67 + 68 + struct test { 69 + const char *desc; 70 + struct bpf_program *lookup_prog; 71 + struct bpf_program *reuseport_prog; 72 + struct bpf_map *sock_map; 73 + int sotype; 74 + struct inet_addr connect_to; 75 + struct inet_addr listen_at; 76 + enum server accept_on; 77 + }; 78 + 79 + static __u32 duration; /* for CHECK macro */ 80 + 81 + static bool is_ipv6(const char *ip) 82 + { 83 + return !!strchr(ip, ':'); 84 + } 85 + 86 + static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog) 87 + { 88 + int err, prog_fd; 89 + 90 + prog_fd = bpf_program__fd(reuseport_prog); 91 + if (prog_fd < 0) { 92 + errno = -prog_fd; 93 + return -1; 94 + } 95 + 96 + err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, 97 + &prog_fd, sizeof(prog_fd)); 98 + if (err) 99 + return -1; 100 + 101 + return 0; 102 + } 103 + 104 + static socklen_t inetaddr_len(const struct sockaddr_storage *addr) 105 + { 106 + return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) : 107 + addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0); 108 + } 109 + 110 + static int make_socket(int sotype, const char *ip, int port, 111 + struct sockaddr_storage *addr) 112 + { 113 + struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC }; 114 + int err, family, fd; 115 + 116 + family = is_ipv6(ip) ? AF_INET6 : AF_INET; 117 + err = make_sockaddr(family, ip, port, addr, NULL); 118 + if (CHECK(err, "make_address", "failed\n")) 119 + return -1; 120 + 121 + fd = socket(addr->ss_family, sotype, 0); 122 + if (CHECK(fd < 0, "socket", "failed\n")) { 123 + log_err("failed to make socket"); 124 + return -1; 125 + } 126 + 127 + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 128 + if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) { 129 + log_err("failed to set SNDTIMEO"); 130 + close(fd); 131 + return -1; 132 + } 133 + 134 + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 135 + if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) { 136 + log_err("failed to set RCVTIMEO"); 137 + close(fd); 138 + return -1; 139 + } 140 + 141 + return fd; 142 + } 143 + 144 + static int make_server(int sotype, const char *ip, int port, 145 + struct bpf_program *reuseport_prog) 146 + { 147 + struct sockaddr_storage addr = {0}; 148 + const int one = 1; 149 + int err, fd = -1; 150 + 151 + fd = make_socket(sotype, ip, port, &addr); 152 + if (fd < 0) 153 + return -1; 154 + 155 + /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ 156 + if (sotype == SOCK_DGRAM) { 157 + err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, 158 + sizeof(one)); 159 + if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { 160 + log_err("failed to enable IP_RECVORIGDSTADDR"); 161 + goto fail; 162 + } 163 + } 164 + 165 + if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { 166 + err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, 167 + sizeof(one)); 168 + if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { 169 + log_err("failed to enable IPV6_RECVORIGDSTADDR"); 170 + goto fail; 171 + } 172 + } 173 + 174 + if (sotype == SOCK_STREAM) { 175 + err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, 176 + sizeof(one)); 177 + if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { 178 + log_err("failed to enable SO_REUSEADDR"); 179 + goto fail; 180 + } 181 + } 182 + 183 + if (reuseport_prog) { 184 + err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, 185 + sizeof(one)); 186 + if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { 187 + log_err("failed to enable SO_REUSEPORT"); 188 + goto fail; 189 + } 190 + } 191 + 192 + err = bind(fd, (void *)&addr, inetaddr_len(&addr)); 193 + if (CHECK(err, "bind", "failed\n")) { 194 + log_err("failed to bind listen socket"); 195 + goto fail; 196 + } 197 + 198 + if (sotype == SOCK_STREAM) { 199 + err = listen(fd, SOMAXCONN); 200 + if (CHECK(err, "make_server", "listen")) { 201 + log_err("failed to listen on port %d", port); 202 + goto fail; 203 + } 204 + } 205 + 206 + /* Late attach reuseport prog so we can have one init path */ 207 + if (reuseport_prog) { 208 + err = attach_reuseport(fd, reuseport_prog); 209 + if (CHECK(err, "attach_reuseport", "failed\n")) { 210 + log_err("failed to attach reuseport prog"); 211 + goto fail; 212 + } 213 + } 214 + 215 + return fd; 216 + fail: 217 + close(fd); 218 + return -1; 219 + } 220 + 221 + static int make_client(int sotype, const char *ip, int port) 222 + { 223 + struct sockaddr_storage addr = {0}; 224 + int err, fd; 225 + 226 + fd = make_socket(sotype, ip, port, &addr); 227 + if (fd < 0) 228 + return -1; 229 + 230 + err = connect(fd, (void *)&addr, inetaddr_len(&addr)); 231 + if (CHECK(err, "make_client", "connect")) { 232 + log_err("failed to connect client socket"); 233 + goto fail; 234 + } 235 + 236 + return fd; 237 + fail: 238 + close(fd); 239 + return -1; 240 + } 241 + 242 + static int send_byte(int fd) 243 + { 244 + ssize_t n; 245 + 246 + errno = 0; 247 + n = send(fd, "a", 1, 0); 248 + if (CHECK(n <= 0, "send_byte", "send")) { 249 + log_err("failed/partial send"); 250 + return -1; 251 + } 252 + return 0; 253 + } 254 + 255 + static int recv_byte(int fd) 256 + { 257 + char buf[1]; 258 + ssize_t n; 259 + 260 + n = recv(fd, buf, sizeof(buf), 0); 261 + if (CHECK(n <= 0, "recv_byte", "recv")) { 262 + log_err("failed/partial recv"); 263 + return -1; 264 + } 265 + return 0; 266 + } 267 + 268 + static int tcp_recv_send(int server_fd) 269 + { 270 + char buf[1]; 271 + int ret, fd; 272 + ssize_t n; 273 + 274 + fd = accept(server_fd, NULL, NULL); 275 + if (CHECK(fd < 0, "accept", "failed\n")) { 276 + log_err("failed to accept"); 277 + return -1; 278 + } 279 + 280 + n = recv(fd, buf, sizeof(buf), 0); 281 + if (CHECK(n <= 0, "recv", "failed\n")) { 282 + log_err("failed/partial recv"); 283 + ret = -1; 284 + goto close; 285 + } 286 + 287 + n = send(fd, buf, n, 0); 288 + if (CHECK(n <= 0, "send", "failed\n")) { 289 + log_err("failed/partial send"); 290 + ret = -1; 291 + goto close; 292 + } 293 + 294 + ret = 0; 295 + close: 296 + close(fd); 297 + return ret; 298 + } 299 + 300 + static void v4_to_v6(struct sockaddr_storage *ss) 301 + { 302 + struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss; 303 + struct sockaddr_in v4 = *(struct sockaddr_in *)ss; 304 + 305 + v6->sin6_family = AF_INET6; 306 + v6->sin6_port = v4.sin_port; 307 + v6->sin6_addr.s6_addr[10] = 0xff; 308 + v6->sin6_addr.s6_addr[11] = 0xff; 309 + memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4); 310 + } 311 + 312 + static int udp_recv_send(int server_fd) 313 + { 314 + char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))]; 315 + struct sockaddr_storage _src_addr = { 0 }; 316 + struct sockaddr_storage *src_addr = &_src_addr; 317 + struct sockaddr_storage *dst_addr = NULL; 318 + struct msghdr msg = { 0 }; 319 + struct iovec iov = { 0 }; 320 + struct cmsghdr *cm; 321 + char buf[1]; 322 + int ret, fd; 323 + ssize_t n; 324 + 325 + iov.iov_base = buf; 326 + iov.iov_len = sizeof(buf); 327 + 328 + msg.msg_name = src_addr; 329 + msg.msg_namelen = sizeof(*src_addr); 330 + msg.msg_iov = &iov; 331 + msg.msg_iovlen = 1; 332 + msg.msg_control = cmsg_buf; 333 + msg.msg_controllen = sizeof(cmsg_buf); 334 + 335 + errno = 0; 336 + n = recvmsg(server_fd, &msg, 0); 337 + if (CHECK(n <= 0, "recvmsg", "failed\n")) { 338 + log_err("failed to receive"); 339 + return -1; 340 + } 341 + if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n")) 342 + return -1; 343 + 344 + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { 345 + if ((cm->cmsg_level == SOL_IP && 346 + cm->cmsg_type == IP_ORIGDSTADDR) || 347 + (cm->cmsg_level == SOL_IPV6 && 348 + cm->cmsg_type == IPV6_ORIGDSTADDR)) { 349 + dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm); 350 + break; 351 + } 352 + log_err("warning: ignored cmsg at level %d type %d", 353 + cm->cmsg_level, cm->cmsg_type); 354 + } 355 + if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n")) 356 + return -1; 357 + 358 + /* Server socket bound to IPv4-mapped IPv6 address */ 359 + if (src_addr->ss_family == AF_INET6 && 360 + dst_addr->ss_family == AF_INET) { 361 + v4_to_v6(dst_addr); 362 + } 363 + 364 + /* Reply from original destination address. */ 365 + fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); 366 + if (CHECK(fd < 0, "socket", "failed\n")) { 367 + log_err("failed to create tx socket"); 368 + return -1; 369 + } 370 + 371 + ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); 372 + if (CHECK(ret, "bind", "failed\n")) { 373 + log_err("failed to bind tx socket"); 374 + goto out; 375 + } 376 + 377 + msg.msg_control = NULL; 378 + msg.msg_controllen = 0; 379 + n = sendmsg(fd, &msg, 0); 380 + if (CHECK(n <= 0, "sendmsg", "failed\n")) { 381 + log_err("failed to send echo reply"); 382 + ret = -1; 383 + goto out; 384 + } 385 + 386 + ret = 0; 387 + out: 388 + close(fd); 389 + return ret; 390 + } 391 + 392 + static int tcp_echo_test(int client_fd, int server_fd) 393 + { 394 + int err; 395 + 396 + err = send_byte(client_fd); 397 + if (err) 398 + return -1; 399 + err = tcp_recv_send(server_fd); 400 + if (err) 401 + return -1; 402 + err = recv_byte(client_fd); 403 + if (err) 404 + return -1; 405 + 406 + return 0; 407 + } 408 + 409 + static int udp_echo_test(int client_fd, int server_fd) 410 + { 411 + int err; 412 + 413 + err = send_byte(client_fd); 414 + if (err) 415 + return -1; 416 + err = udp_recv_send(server_fd); 417 + if (err) 418 + return -1; 419 + err = recv_byte(client_fd); 420 + if (err) 421 + return -1; 422 + 423 + return 0; 424 + } 425 + 426 + static struct bpf_link *attach_lookup_prog(struct bpf_program *prog) 427 + { 428 + struct bpf_link *link; 429 + int net_fd; 430 + 431 + net_fd = open("/proc/self/ns/net", O_RDONLY); 432 + if (CHECK(net_fd < 0, "open", "failed\n")) { 433 + log_err("failed to open /proc/self/ns/net"); 434 + return NULL; 435 + } 436 + 437 + link = bpf_program__attach_netns(prog, net_fd); 438 + if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) { 439 + errno = -PTR_ERR(link); 440 + log_err("failed to attach program '%s' to netns", 441 + bpf_program__name(prog)); 442 + link = NULL; 443 + } 444 + 445 + close(net_fd); 446 + return link; 447 + } 448 + 449 + static int update_lookup_map(struct bpf_map *map, int index, int sock_fd) 450 + { 451 + int err, map_fd; 452 + uint64_t value; 453 + 454 + map_fd = bpf_map__fd(map); 455 + if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) { 456 + errno = -map_fd; 457 + log_err("failed to get map FD"); 458 + return -1; 459 + } 460 + 461 + value = (uint64_t)sock_fd; 462 + err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST); 463 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) { 464 + log_err("failed to update redir_map @ %d", index); 465 + return -1; 466 + } 467 + 468 + return 0; 469 + } 470 + 471 + static __u32 link_info_prog_id(struct bpf_link *link) 472 + { 473 + struct bpf_link_info info = {}; 474 + __u32 info_len = sizeof(info); 475 + int link_fd, err; 476 + 477 + link_fd = bpf_link__fd(link); 478 + if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) { 479 + errno = -link_fd; 480 + log_err("bpf_link__fd failed"); 481 + return 0; 482 + } 483 + 484 + err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len); 485 + if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) { 486 + log_err("bpf_obj_get_info_by_fd"); 487 + return 0; 488 + } 489 + if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd", 490 + "unexpected info len %u\n", info_len)) 491 + return 0; 492 + 493 + return info.prog_id; 494 + } 495 + 496 + static void query_lookup_prog(struct test_sk_lookup *skel) 497 + { 498 + struct bpf_link *link[3] = {}; 499 + __u32 attach_flags = 0; 500 + __u32 prog_ids[3] = {}; 501 + __u32 prog_cnt = 3; 502 + __u32 prog_id; 503 + int net_fd; 504 + int err; 505 + 506 + net_fd = open("/proc/self/ns/net", O_RDONLY); 507 + if (CHECK(net_fd < 0, "open", "failed\n")) { 508 + log_err("failed to open /proc/self/ns/net"); 509 + return; 510 + } 511 + 512 + link[0] = attach_lookup_prog(skel->progs.lookup_pass); 513 + if (!link[0]) 514 + goto close; 515 + link[1] = attach_lookup_prog(skel->progs.lookup_pass); 516 + if (!link[1]) 517 + goto detach; 518 + link[2] = attach_lookup_prog(skel->progs.lookup_drop); 519 + if (!link[2]) 520 + goto detach; 521 + 522 + err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */, 523 + &attach_flags, prog_ids, &prog_cnt); 524 + if (CHECK(err, "bpf_prog_query", "failed\n")) { 525 + log_err("failed to query lookup prog"); 526 + goto detach; 527 + } 528 + 529 + errno = 0; 530 + if (CHECK(attach_flags != 0, "bpf_prog_query", 531 + "wrong attach_flags on query: %u", attach_flags)) 532 + goto detach; 533 + if (CHECK(prog_cnt != 3, "bpf_prog_query", 534 + "wrong program count on query: %u", prog_cnt)) 535 + goto detach; 536 + prog_id = link_info_prog_id(link[0]); 537 + CHECK(prog_ids[0] != prog_id, "bpf_prog_query", 538 + "invalid program #0 id on query: %u != %u\n", 539 + prog_ids[0], prog_id); 540 + prog_id = link_info_prog_id(link[1]); 541 + CHECK(prog_ids[1] != prog_id, "bpf_prog_query", 542 + "invalid program #1 id on query: %u != %u\n", 543 + prog_ids[1], prog_id); 544 + prog_id = link_info_prog_id(link[2]); 545 + CHECK(prog_ids[2] != prog_id, "bpf_prog_query", 546 + "invalid program #2 id on query: %u != %u\n", 547 + prog_ids[2], prog_id); 548 + 549 + detach: 550 + if (link[2]) 551 + bpf_link__destroy(link[2]); 552 + if (link[1]) 553 + bpf_link__destroy(link[1]); 554 + if (link[0]) 555 + bpf_link__destroy(link[0]); 556 + close: 557 + close(net_fd); 558 + } 559 + 560 + static void run_lookup_prog(const struct test *t) 561 + { 562 + int client_fd, server_fds[MAX_SERVERS] = { -1 }; 563 + struct bpf_link *lookup_link; 564 + int i, err; 565 + 566 + lookup_link = attach_lookup_prog(t->lookup_prog); 567 + if (!lookup_link) 568 + return; 569 + 570 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 571 + server_fds[i] = make_server(t->sotype, t->listen_at.ip, 572 + t->listen_at.port, 573 + t->reuseport_prog); 574 + if (server_fds[i] < 0) 575 + goto close; 576 + 577 + err = update_lookup_map(t->sock_map, i, server_fds[i]); 578 + if (err) 579 + goto close; 580 + 581 + /* want just one server for non-reuseport test */ 582 + if (!t->reuseport_prog) 583 + break; 584 + } 585 + 586 + client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port); 587 + if (client_fd < 0) 588 + goto close; 589 + 590 + if (t->sotype == SOCK_STREAM) 591 + tcp_echo_test(client_fd, server_fds[t->accept_on]); 592 + else 593 + udp_echo_test(client_fd, server_fds[t->accept_on]); 594 + 595 + close(client_fd); 596 + close: 597 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 598 + if (server_fds[i] != -1) 599 + close(server_fds[i]); 600 + } 601 + bpf_link__destroy(lookup_link); 602 + } 603 + 604 + static void test_redirect_lookup(struct test_sk_lookup *skel) 605 + { 606 + const struct test tests[] = { 607 + { 608 + .desc = "TCP IPv4 redir port", 609 + .lookup_prog = skel->progs.redir_port, 610 + .sock_map = skel->maps.redir_map, 611 + .sotype = SOCK_STREAM, 612 + .connect_to = { EXT_IP4, EXT_PORT }, 613 + .listen_at = { EXT_IP4, INT_PORT }, 614 + }, 615 + { 616 + .desc = "TCP IPv4 redir addr", 617 + .lookup_prog = skel->progs.redir_ip4, 618 + .sock_map = skel->maps.redir_map, 619 + .sotype = SOCK_STREAM, 620 + .connect_to = { EXT_IP4, EXT_PORT }, 621 + .listen_at = { INT_IP4, EXT_PORT }, 622 + }, 623 + { 624 + .desc = "TCP IPv4 redir with reuseport", 625 + .lookup_prog = skel->progs.select_sock_a, 626 + .reuseport_prog = skel->progs.select_sock_b, 627 + .sock_map = skel->maps.redir_map, 628 + .sotype = SOCK_STREAM, 629 + .connect_to = { EXT_IP4, EXT_PORT }, 630 + .listen_at = { INT_IP4, INT_PORT }, 631 + .accept_on = SERVER_B, 632 + }, 633 + { 634 + .desc = "TCP IPv4 redir skip reuseport", 635 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 636 + .reuseport_prog = skel->progs.select_sock_b, 637 + .sock_map = skel->maps.redir_map, 638 + .sotype = SOCK_STREAM, 639 + .connect_to = { EXT_IP4, EXT_PORT }, 640 + .listen_at = { INT_IP4, INT_PORT }, 641 + .accept_on = SERVER_A, 642 + }, 643 + { 644 + .desc = "TCP IPv6 redir port", 645 + .lookup_prog = skel->progs.redir_port, 646 + .sock_map = skel->maps.redir_map, 647 + .sotype = SOCK_STREAM, 648 + .connect_to = { EXT_IP6, EXT_PORT }, 649 + .listen_at = { EXT_IP6, INT_PORT }, 650 + }, 651 + { 652 + .desc = "TCP IPv6 redir addr", 653 + .lookup_prog = skel->progs.redir_ip6, 654 + .sock_map = skel->maps.redir_map, 655 + .sotype = SOCK_STREAM, 656 + .connect_to = { EXT_IP6, EXT_PORT }, 657 + .listen_at = { INT_IP6, EXT_PORT }, 658 + }, 659 + { 660 + .desc = "TCP IPv4->IPv6 redir port", 661 + .lookup_prog = skel->progs.redir_port, 662 + .sock_map = skel->maps.redir_map, 663 + .sotype = SOCK_STREAM, 664 + .connect_to = { EXT_IP4, EXT_PORT }, 665 + .listen_at = { INT_IP4_V6, INT_PORT }, 666 + }, 667 + { 668 + .desc = "TCP IPv6 redir with reuseport", 669 + .lookup_prog = skel->progs.select_sock_a, 670 + .reuseport_prog = skel->progs.select_sock_b, 671 + .sock_map = skel->maps.redir_map, 672 + .sotype = SOCK_STREAM, 673 + .connect_to = { EXT_IP6, EXT_PORT }, 674 + .listen_at = { INT_IP6, INT_PORT }, 675 + .accept_on = SERVER_B, 676 + }, 677 + { 678 + .desc = "TCP IPv6 redir skip reuseport", 679 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 680 + .reuseport_prog = skel->progs.select_sock_b, 681 + .sock_map = skel->maps.redir_map, 682 + .sotype = SOCK_STREAM, 683 + .connect_to = { EXT_IP6, EXT_PORT }, 684 + .listen_at = { INT_IP6, INT_PORT }, 685 + .accept_on = SERVER_A, 686 + }, 687 + { 688 + .desc = "UDP IPv4 redir port", 689 + .lookup_prog = skel->progs.redir_port, 690 + .sock_map = skel->maps.redir_map, 691 + .sotype = SOCK_DGRAM, 692 + .connect_to = { EXT_IP4, EXT_PORT }, 693 + .listen_at = { EXT_IP4, INT_PORT }, 694 + }, 695 + { 696 + .desc = "UDP IPv4 redir addr", 697 + .lookup_prog = skel->progs.redir_ip4, 698 + .sock_map = skel->maps.redir_map, 699 + .sotype = SOCK_DGRAM, 700 + .connect_to = { EXT_IP4, EXT_PORT }, 701 + .listen_at = { INT_IP4, EXT_PORT }, 702 + }, 703 + { 704 + .desc = "UDP IPv4 redir with reuseport", 705 + .lookup_prog = skel->progs.select_sock_a, 706 + .reuseport_prog = skel->progs.select_sock_b, 707 + .sock_map = skel->maps.redir_map, 708 + .sotype = SOCK_DGRAM, 709 + .connect_to = { EXT_IP4, EXT_PORT }, 710 + .listen_at = { INT_IP4, INT_PORT }, 711 + .accept_on = SERVER_B, 712 + }, 713 + { 714 + .desc = "UDP IPv4 redir skip reuseport", 715 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 716 + .reuseport_prog = skel->progs.select_sock_b, 717 + .sock_map = skel->maps.redir_map, 718 + .sotype = SOCK_DGRAM, 719 + .connect_to = { EXT_IP4, EXT_PORT }, 720 + .listen_at = { INT_IP4, INT_PORT }, 721 + .accept_on = SERVER_A, 722 + }, 723 + { 724 + .desc = "UDP IPv6 redir port", 725 + .lookup_prog = skel->progs.redir_port, 726 + .sock_map = skel->maps.redir_map, 727 + .sotype = SOCK_DGRAM, 728 + .connect_to = { EXT_IP6, EXT_PORT }, 729 + .listen_at = { EXT_IP6, INT_PORT }, 730 + }, 731 + { 732 + .desc = "UDP IPv6 redir addr", 733 + .lookup_prog = skel->progs.redir_ip6, 734 + .sock_map = skel->maps.redir_map, 735 + .sotype = SOCK_DGRAM, 736 + .connect_to = { EXT_IP6, EXT_PORT }, 737 + .listen_at = { INT_IP6, EXT_PORT }, 738 + }, 739 + { 740 + .desc = "UDP IPv4->IPv6 redir port", 741 + .lookup_prog = skel->progs.redir_port, 742 + .sock_map = skel->maps.redir_map, 743 + .sotype = SOCK_DGRAM, 744 + .listen_at = { INT_IP4_V6, INT_PORT }, 745 + .connect_to = { EXT_IP4, EXT_PORT }, 746 + }, 747 + { 748 + .desc = "UDP IPv6 redir and reuseport", 749 + .lookup_prog = skel->progs.select_sock_a, 750 + .reuseport_prog = skel->progs.select_sock_b, 751 + .sock_map = skel->maps.redir_map, 752 + .sotype = SOCK_DGRAM, 753 + .connect_to = { EXT_IP6, EXT_PORT }, 754 + .listen_at = { INT_IP6, INT_PORT }, 755 + .accept_on = SERVER_B, 756 + }, 757 + { 758 + .desc = "UDP IPv6 redir skip reuseport", 759 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 760 + .reuseport_prog = skel->progs.select_sock_b, 761 + .sock_map = skel->maps.redir_map, 762 + .sotype = SOCK_DGRAM, 763 + .connect_to = { EXT_IP6, EXT_PORT }, 764 + .listen_at = { INT_IP6, INT_PORT }, 765 + .accept_on = SERVER_A, 766 + }, 767 + }; 768 + const struct test *t; 769 + 770 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 771 + if (test__start_subtest(t->desc)) 772 + run_lookup_prog(t); 773 + } 774 + } 775 + 776 + static void drop_on_lookup(const struct test *t) 777 + { 778 + struct sockaddr_storage dst = {}; 779 + int client_fd, server_fd, err; 780 + struct bpf_link *lookup_link; 781 + ssize_t n; 782 + 783 + lookup_link = attach_lookup_prog(t->lookup_prog); 784 + if (!lookup_link) 785 + return; 786 + 787 + server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, 788 + t->reuseport_prog); 789 + if (server_fd < 0) 790 + goto detach; 791 + 792 + client_fd = make_socket(t->sotype, t->connect_to.ip, 793 + t->connect_to.port, &dst); 794 + if (client_fd < 0) 795 + goto close_srv; 796 + 797 + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); 798 + if (t->sotype == SOCK_DGRAM) { 799 + err = send_byte(client_fd); 800 + if (err) 801 + goto close_all; 802 + 803 + /* Read out asynchronous error */ 804 + n = recv(client_fd, NULL, 0, 0); 805 + err = n == -1; 806 + } 807 + if (CHECK(!err || errno != ECONNREFUSED, "connect", 808 + "unexpected success or error\n")) 809 + log_err("expected ECONNREFUSED on connect"); 810 + 811 + close_all: 812 + close(client_fd); 813 + close_srv: 814 + close(server_fd); 815 + detach: 816 + bpf_link__destroy(lookup_link); 817 + } 818 + 819 + static void test_drop_on_lookup(struct test_sk_lookup *skel) 820 + { 821 + const struct test tests[] = { 822 + { 823 + .desc = "TCP IPv4 drop on lookup", 824 + .lookup_prog = skel->progs.lookup_drop, 825 + .sotype = SOCK_STREAM, 826 + .connect_to = { EXT_IP4, EXT_PORT }, 827 + .listen_at = { EXT_IP4, EXT_PORT }, 828 + }, 829 + { 830 + .desc = "TCP IPv6 drop on lookup", 831 + .lookup_prog = skel->progs.lookup_drop, 832 + .sotype = SOCK_STREAM, 833 + .connect_to = { EXT_IP6, EXT_PORT }, 834 + .listen_at = { EXT_IP6, EXT_PORT }, 835 + }, 836 + { 837 + .desc = "UDP IPv4 drop on lookup", 838 + .lookup_prog = skel->progs.lookup_drop, 839 + .sotype = SOCK_DGRAM, 840 + .connect_to = { EXT_IP4, EXT_PORT }, 841 + .listen_at = { EXT_IP4, EXT_PORT }, 842 + }, 843 + { 844 + .desc = "UDP IPv6 drop on lookup", 845 + .lookup_prog = skel->progs.lookup_drop, 846 + .sotype = SOCK_DGRAM, 847 + .connect_to = { EXT_IP6, EXT_PORT }, 848 + .listen_at = { EXT_IP6, INT_PORT }, 849 + }, 850 + }; 851 + const struct test *t; 852 + 853 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 854 + if (test__start_subtest(t->desc)) 855 + drop_on_lookup(t); 856 + } 857 + } 858 + 859 + static void drop_on_reuseport(const struct test *t) 860 + { 861 + struct sockaddr_storage dst = { 0 }; 862 + int client, server1, server2, err; 863 + struct bpf_link *lookup_link; 864 + ssize_t n; 865 + 866 + lookup_link = attach_lookup_prog(t->lookup_prog); 867 + if (!lookup_link) 868 + return; 869 + 870 + server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, 871 + t->reuseport_prog); 872 + if (server1 < 0) 873 + goto detach; 874 + 875 + err = update_lookup_map(t->sock_map, SERVER_A, server1); 876 + if (err) 877 + goto detach; 878 + 879 + /* second server on destination address we should never reach */ 880 + server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, 881 + NULL /* reuseport prog */); 882 + if (server2 < 0) 883 + goto close_srv1; 884 + 885 + client = make_socket(t->sotype, t->connect_to.ip, 886 + t->connect_to.port, &dst); 887 + if (client < 0) 888 + goto close_srv2; 889 + 890 + err = connect(client, (void *)&dst, inetaddr_len(&dst)); 891 + if (t->sotype == SOCK_DGRAM) { 892 + err = send_byte(client); 893 + if (err) 894 + goto close_all; 895 + 896 + /* Read out asynchronous error */ 897 + n = recv(client, NULL, 0, 0); 898 + err = n == -1; 899 + } 900 + if (CHECK(!err || errno != ECONNREFUSED, "connect", 901 + "unexpected success or error\n")) 902 + log_err("expected ECONNREFUSED on connect"); 903 + 904 + close_all: 905 + close(client); 906 + close_srv2: 907 + close(server2); 908 + close_srv1: 909 + close(server1); 910 + detach: 911 + bpf_link__destroy(lookup_link); 912 + } 913 + 914 + static void test_drop_on_reuseport(struct test_sk_lookup *skel) 915 + { 916 + const struct test tests[] = { 917 + { 918 + .desc = "TCP IPv4 drop on reuseport", 919 + .lookup_prog = skel->progs.select_sock_a, 920 + .reuseport_prog = skel->progs.reuseport_drop, 921 + .sock_map = skel->maps.redir_map, 922 + .sotype = SOCK_STREAM, 923 + .connect_to = { EXT_IP4, EXT_PORT }, 924 + .listen_at = { INT_IP4, INT_PORT }, 925 + }, 926 + { 927 + .desc = "TCP IPv6 drop on reuseport", 928 + .lookup_prog = skel->progs.select_sock_a, 929 + .reuseport_prog = skel->progs.reuseport_drop, 930 + .sock_map = skel->maps.redir_map, 931 + .sotype = SOCK_STREAM, 932 + .connect_to = { EXT_IP6, EXT_PORT }, 933 + .listen_at = { INT_IP6, INT_PORT }, 934 + }, 935 + { 936 + .desc = "UDP IPv4 drop on reuseport", 937 + .lookup_prog = skel->progs.select_sock_a, 938 + .reuseport_prog = skel->progs.reuseport_drop, 939 + .sock_map = skel->maps.redir_map, 940 + .sotype = SOCK_DGRAM, 941 + .connect_to = { EXT_IP4, EXT_PORT }, 942 + .listen_at = { INT_IP4, INT_PORT }, 943 + }, 944 + { 945 + .desc = "TCP IPv6 drop on reuseport", 946 + .lookup_prog = skel->progs.select_sock_a, 947 + .reuseport_prog = skel->progs.reuseport_drop, 948 + .sock_map = skel->maps.redir_map, 949 + .sotype = SOCK_STREAM, 950 + .connect_to = { EXT_IP6, EXT_PORT }, 951 + .listen_at = { INT_IP6, INT_PORT }, 952 + }, 953 + }; 954 + const struct test *t; 955 + 956 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 957 + if (test__start_subtest(t->desc)) 958 + drop_on_reuseport(t); 959 + } 960 + } 961 + 962 + static void run_sk_assign(struct test_sk_lookup *skel, 963 + struct bpf_program *lookup_prog, 964 + const char *listen_ip, const char *connect_ip) 965 + { 966 + int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 }; 967 + struct bpf_link *lookup_link; 968 + int i, err; 969 + 970 + lookup_link = attach_lookup_prog(lookup_prog); 971 + if (!lookup_link) 972 + return; 973 + 974 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 975 + server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL); 976 + if (server_fds[i] < 0) 977 + goto close_servers; 978 + 979 + err = update_lookup_map(skel->maps.redir_map, i, 980 + server_fds[i]); 981 + if (err) 982 + goto close_servers; 983 + } 984 + 985 + client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT); 986 + if (client_fd < 0) 987 + goto close_servers; 988 + 989 + peer_fd = accept(server_fds[SERVER_B], NULL, NULL); 990 + if (CHECK(peer_fd < 0, "accept", "failed\n")) 991 + goto close_client; 992 + 993 + close(peer_fd); 994 + close_client: 995 + close(client_fd); 996 + close_servers: 997 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 998 + if (server_fds[i] != -1) 999 + close(server_fds[i]); 1000 + } 1001 + bpf_link__destroy(lookup_link); 1002 + } 1003 + 1004 + static void run_sk_assign_v4(struct test_sk_lookup *skel, 1005 + struct bpf_program *lookup_prog) 1006 + { 1007 + run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4); 1008 + } 1009 + 1010 + static void run_sk_assign_v6(struct test_sk_lookup *skel, 1011 + struct bpf_program *lookup_prog) 1012 + { 1013 + run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6); 1014 + } 1015 + 1016 + static void run_sk_assign_connected(struct test_sk_lookup *skel, 1017 + int sotype) 1018 + { 1019 + int err, client_fd, connected_fd, server_fd; 1020 + struct bpf_link *lookup_link; 1021 + 1022 + server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL); 1023 + if (server_fd < 0) 1024 + return; 1025 + 1026 + connected_fd = make_client(sotype, EXT_IP4, EXT_PORT); 1027 + if (connected_fd < 0) 1028 + goto out_close_server; 1029 + 1030 + /* Put a connected socket in redirect map */ 1031 + err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd); 1032 + if (err) 1033 + goto out_close_connected; 1034 + 1035 + lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport); 1036 + if (!lookup_link) 1037 + goto out_close_connected; 1038 + 1039 + /* Try to redirect TCP SYN / UDP packet to a connected socket */ 1040 + client_fd = make_client(sotype, EXT_IP4, EXT_PORT); 1041 + if (client_fd < 0) 1042 + goto out_unlink_prog; 1043 + if (sotype == SOCK_DGRAM) { 1044 + send_byte(client_fd); 1045 + recv_byte(server_fd); 1046 + } 1047 + 1048 + close(client_fd); 1049 + out_unlink_prog: 1050 + bpf_link__destroy(lookup_link); 1051 + out_close_connected: 1052 + close(connected_fd); 1053 + out_close_server: 1054 + close(server_fd); 1055 + } 1056 + 1057 + static void test_sk_assign_helper(struct test_sk_lookup *skel) 1058 + { 1059 + if (test__start_subtest("sk_assign returns EEXIST")) 1060 + run_sk_assign_v4(skel, skel->progs.sk_assign_eexist); 1061 + if (test__start_subtest("sk_assign honors F_REPLACE")) 1062 + run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag); 1063 + if (test__start_subtest("sk_assign accepts NULL socket")) 1064 + run_sk_assign_v4(skel, skel->progs.sk_assign_null); 1065 + if (test__start_subtest("access ctx->sk")) 1066 + run_sk_assign_v4(skel, skel->progs.access_ctx_sk); 1067 + if (test__start_subtest("narrow access to ctx v4")) 1068 + run_sk_assign_v4(skel, skel->progs.ctx_narrow_access); 1069 + if (test__start_subtest("narrow access to ctx v6")) 1070 + run_sk_assign_v6(skel, skel->progs.ctx_narrow_access); 1071 + if (test__start_subtest("sk_assign rejects TCP established")) 1072 + run_sk_assign_connected(skel, SOCK_STREAM); 1073 + if (test__start_subtest("sk_assign rejects UDP connected")) 1074 + run_sk_assign_connected(skel, SOCK_DGRAM); 1075 + } 1076 + 1077 + struct test_multi_prog { 1078 + const char *desc; 1079 + struct bpf_program *prog1; 1080 + struct bpf_program *prog2; 1081 + struct bpf_map *redir_map; 1082 + struct bpf_map *run_map; 1083 + int expect_errno; 1084 + struct inet_addr listen_at; 1085 + }; 1086 + 1087 + static void run_multi_prog_lookup(const struct test_multi_prog *t) 1088 + { 1089 + struct sockaddr_storage dst = {}; 1090 + int map_fd, server_fd, client_fd; 1091 + struct bpf_link *link1, *link2; 1092 + int prog_idx, done, err; 1093 + 1094 + map_fd = bpf_map__fd(t->run_map); 1095 + 1096 + done = 0; 1097 + prog_idx = PROG1; 1098 + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); 1099 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) 1100 + return; 1101 + prog_idx = PROG2; 1102 + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); 1103 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) 1104 + return; 1105 + 1106 + link1 = attach_lookup_prog(t->prog1); 1107 + if (!link1) 1108 + return; 1109 + link2 = attach_lookup_prog(t->prog2); 1110 + if (!link2) 1111 + goto out_unlink1; 1112 + 1113 + server_fd = make_server(SOCK_STREAM, t->listen_at.ip, 1114 + t->listen_at.port, NULL); 1115 + if (server_fd < 0) 1116 + goto out_unlink2; 1117 + 1118 + err = update_lookup_map(t->redir_map, SERVER_A, server_fd); 1119 + if (err) 1120 + goto out_close_server; 1121 + 1122 + client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst); 1123 + if (client_fd < 0) 1124 + goto out_close_server; 1125 + 1126 + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); 1127 + if (CHECK(err && !t->expect_errno, "connect", 1128 + "unexpected error %d\n", errno)) 1129 + goto out_close_client; 1130 + if (CHECK(err && t->expect_errno && errno != t->expect_errno, 1131 + "connect", "unexpected error %d\n", errno)) 1132 + goto out_close_client; 1133 + 1134 + done = 0; 1135 + prog_idx = PROG1; 1136 + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); 1137 + CHECK(err, "bpf_map_lookup_elem", "failed\n"); 1138 + CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n"); 1139 + 1140 + done = 0; 1141 + prog_idx = PROG2; 1142 + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); 1143 + CHECK(err, "bpf_map_lookup_elem", "failed\n"); 1144 + CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n"); 1145 + 1146 + out_close_client: 1147 + close(client_fd); 1148 + out_close_server: 1149 + close(server_fd); 1150 + out_unlink2: 1151 + bpf_link__destroy(link2); 1152 + out_unlink1: 1153 + bpf_link__destroy(link1); 1154 + } 1155 + 1156 + static void test_multi_prog_lookup(struct test_sk_lookup *skel) 1157 + { 1158 + struct test_multi_prog tests[] = { 1159 + { 1160 + .desc = "multi prog - pass, pass", 1161 + .prog1 = skel->progs.multi_prog_pass1, 1162 + .prog2 = skel->progs.multi_prog_pass2, 1163 + .listen_at = { EXT_IP4, EXT_PORT }, 1164 + }, 1165 + { 1166 + .desc = "multi prog - drop, drop", 1167 + .prog1 = skel->progs.multi_prog_drop1, 1168 + .prog2 = skel->progs.multi_prog_drop2, 1169 + .listen_at = { EXT_IP4, EXT_PORT }, 1170 + .expect_errno = ECONNREFUSED, 1171 + }, 1172 + { 1173 + .desc = "multi prog - pass, drop", 1174 + .prog1 = skel->progs.multi_prog_pass1, 1175 + .prog2 = skel->progs.multi_prog_drop2, 1176 + .listen_at = { EXT_IP4, EXT_PORT }, 1177 + .expect_errno = ECONNREFUSED, 1178 + }, 1179 + { 1180 + .desc = "multi prog - drop, pass", 1181 + .prog1 = skel->progs.multi_prog_drop1, 1182 + .prog2 = skel->progs.multi_prog_pass2, 1183 + .listen_at = { EXT_IP4, EXT_PORT }, 1184 + .expect_errno = ECONNREFUSED, 1185 + }, 1186 + { 1187 + .desc = "multi prog - pass, redir", 1188 + .prog1 = skel->progs.multi_prog_pass1, 1189 + .prog2 = skel->progs.multi_prog_redir2, 1190 + .listen_at = { INT_IP4, INT_PORT }, 1191 + }, 1192 + { 1193 + .desc = "multi prog - redir, pass", 1194 + .prog1 = skel->progs.multi_prog_redir1, 1195 + .prog2 = skel->progs.multi_prog_pass2, 1196 + .listen_at = { INT_IP4, INT_PORT }, 1197 + }, 1198 + { 1199 + .desc = "multi prog - drop, redir", 1200 + .prog1 = skel->progs.multi_prog_drop1, 1201 + .prog2 = skel->progs.multi_prog_redir2, 1202 + .listen_at = { INT_IP4, INT_PORT }, 1203 + }, 1204 + { 1205 + .desc = "multi prog - redir, drop", 1206 + .prog1 = skel->progs.multi_prog_redir1, 1207 + .prog2 = skel->progs.multi_prog_drop2, 1208 + .listen_at = { INT_IP4, INT_PORT }, 1209 + }, 1210 + { 1211 + .desc = "multi prog - redir, redir", 1212 + .prog1 = skel->progs.multi_prog_redir1, 1213 + .prog2 = skel->progs.multi_prog_redir2, 1214 + .listen_at = { INT_IP4, INT_PORT }, 1215 + }, 1216 + }; 1217 + struct test_multi_prog *t; 1218 + 1219 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1220 + t->redir_map = skel->maps.redir_map; 1221 + t->run_map = skel->maps.run_map; 1222 + if (test__start_subtest(t->desc)) 1223 + run_multi_prog_lookup(t); 1224 + } 1225 + } 1226 + 1227 + static void run_tests(struct test_sk_lookup *skel) 1228 + { 1229 + if (test__start_subtest("query lookup prog")) 1230 + query_lookup_prog(skel); 1231 + test_redirect_lookup(skel); 1232 + test_drop_on_lookup(skel); 1233 + test_drop_on_reuseport(skel); 1234 + test_sk_assign_helper(skel); 1235 + test_multi_prog_lookup(skel); 1236 + } 1237 + 1238 + static int switch_netns(void) 1239 + { 1240 + static const char * const setup_script[] = { 1241 + "ip -6 addr add dev lo " EXT_IP6 "/128 nodad", 1242 + "ip -6 addr add dev lo " INT_IP6 "/128 nodad", 1243 + "ip link set dev lo up", 1244 + NULL, 1245 + }; 1246 + const char * const *cmd; 1247 + int err; 1248 + 1249 + err = unshare(CLONE_NEWNET); 1250 + if (CHECK(err, "unshare", "failed\n")) { 1251 + log_err("unshare(CLONE_NEWNET)"); 1252 + return -1; 1253 + } 1254 + 1255 + for (cmd = setup_script; *cmd; cmd++) { 1256 + err = system(*cmd); 1257 + if (CHECK(err, "system", "failed\n")) { 1258 + log_err("system(%s)", *cmd); 1259 + return -1; 1260 + } 1261 + } 1262 + 1263 + return 0; 1264 + } 1265 + 1266 + void test_sk_lookup(void) 1267 + { 1268 + struct test_sk_lookup *skel; 1269 + int err; 1270 + 1271 + err = switch_netns(); 1272 + if (err) 1273 + return; 1274 + 1275 + skel = test_sk_lookup__open_and_load(); 1276 + if (CHECK(!skel, "skel open_and_load", "failed\n")) 1277 + return; 1278 + 1279 + run_tests(skel); 1280 + 1281 + test_sk_lookup__destroy(skel); 1282 + }
+1 -2
tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
··· 193 193 if (CHECK_FAIL(server_fd < 0)) 194 194 goto close_bpf_object; 195 195 196 + pthread_mutex_lock(&server_started_mtx); 196 197 if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, 197 198 (void *)&server_fd))) 198 199 goto close_server_fd; 199 - 200 - pthread_mutex_lock(&server_started_mtx); 201 200 pthread_cond_wait(&server_started, &server_started_mtx); 202 201 pthread_mutex_unlock(&server_started_mtx); 203 202
+70
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <uapi/linux/bpf.h> 3 + #include <linux/if_link.h> 4 + #include <test_progs.h> 5 + 6 + #include "test_xdp_with_cpumap_helpers.skel.h" 7 + 8 + #define IFINDEX_LO 1 9 + 10 + void test_xdp_with_cpumap_helpers(void) 11 + { 12 + struct test_xdp_with_cpumap_helpers *skel; 13 + struct bpf_prog_info info = {}; 14 + struct bpf_cpumap_val val = { 15 + .qsize = 192, 16 + }; 17 + __u32 duration = 0, idx = 0; 18 + __u32 len = sizeof(info); 19 + int err, prog_fd, map_fd; 20 + 21 + skel = test_xdp_with_cpumap_helpers__open_and_load(); 22 + if (CHECK_FAIL(!skel)) { 23 + perror("test_xdp_with_cpumap_helpers__open_and_load"); 24 + return; 25 + } 26 + 27 + /* can not attach program with cpumaps that allow programs 28 + * as xdp generic 29 + */ 30 + prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); 31 + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); 32 + CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", 33 + "should have failed\n"); 34 + 35 + prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); 36 + map_fd = bpf_map__fd(skel->maps.cpu_map); 37 + err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); 38 + if (CHECK_FAIL(err)) 39 + goto out_close; 40 + 41 + val.bpf_prog.fd = prog_fd; 42 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 43 + CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", 44 + err, errno); 45 + 46 + err = bpf_map_lookup_elem(map_fd, &idx, &val); 47 + CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); 48 + CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", 49 + "expected %u read %u\n", info.id, val.bpf_prog.id); 50 + 51 + /* can not attach BPF_XDP_CPUMAP program to a device */ 52 + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); 53 + CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", 54 + "should have failed\n"); 55 + 56 + val.qsize = 192; 57 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); 58 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 59 + CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", 60 + "should have failed\n"); 61 + 62 + out_close: 63 + test_xdp_with_cpumap_helpers__destroy(skel); 64 + } 65 + 66 + void test_xdp_cpumap_attach(void) 67 + { 68 + if (test__start_subtest("cpumap_with_progs")) 69 + test_xdp_with_cpumap_helpers(); 70 + }
+3 -3
tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
··· 36 36 if (!nlk->groups) { 37 37 group = 0; 38 38 } else { 39 - /* FIXME: temporary use bpf_probe_read here, needs 39 + /* FIXME: temporary use bpf_probe_read_kernel here, needs 40 40 * verifier support to do direct access. 41 41 */ 42 - bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); 42 + bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]); 43 43 } 44 44 BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", 45 45 nlk->portid, (u32)group, ··· 56 56 * with current verifier. 57 57 */ 58 58 inode = SOCK_INODE(sk); 59 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 59 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 60 60 } 61 61 BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); 62 62
+1 -1
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
··· 57 57 return 0; 58 58 59 59 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 60 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 60 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 61 61 return ino; 62 62 } 63 63
+1 -1
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
··· 57 57 return 0; 58 58 59 59 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 60 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 60 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 61 61 return ino; 62 62 } 63 63
+1 -1
tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
··· 18 18 return 0; 19 19 20 20 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 21 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 21 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 22 22 return ino; 23 23 } 24 24
+1 -1
tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
··· 25 25 return 0; 26 26 27 27 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 28 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 28 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 29 29 return ino; 30 30 } 31 31
+641
tools/testing/selftests/bpf/progs/test_sk_lookup.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + // Copyright (c) 2020 Cloudflare 3 + 4 + #include <errno.h> 5 + #include <stdbool.h> 6 + #include <stddef.h> 7 + #include <linux/bpf.h> 8 + #include <linux/in.h> 9 + #include <sys/socket.h> 10 + 11 + #include <bpf/bpf_endian.h> 12 + #include <bpf/bpf_helpers.h> 13 + 14 + #define IP4(a, b, c, d) \ 15 + bpf_htonl((((__u32)(a) & 0xffU) << 24) | \ 16 + (((__u32)(b) & 0xffU) << 16) | \ 17 + (((__u32)(c) & 0xffU) << 8) | \ 18 + (((__u32)(d) & 0xffU) << 0)) 19 + #define IP6(aaaa, bbbb, cccc, dddd) \ 20 + { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } 21 + 22 + #define MAX_SOCKS 32 23 + 24 + struct { 25 + __uint(type, BPF_MAP_TYPE_SOCKMAP); 26 + __uint(max_entries, MAX_SOCKS); 27 + __type(key, __u32); 28 + __type(value, __u64); 29 + } redir_map SEC(".maps"); 30 + 31 + struct { 32 + __uint(type, BPF_MAP_TYPE_ARRAY); 33 + __uint(max_entries, 2); 34 + __type(key, int); 35 + __type(value, int); 36 + } run_map SEC(".maps"); 37 + 38 + enum { 39 + PROG1 = 0, 40 + PROG2, 41 + }; 42 + 43 + enum { 44 + SERVER_A = 0, 45 + SERVER_B, 46 + }; 47 + 48 + /* Addressable key/value constants for convenience */ 49 + static const int KEY_PROG1 = PROG1; 50 + static const int KEY_PROG2 = PROG2; 51 + static const int PROG_DONE = 1; 52 + 53 + static const __u32 KEY_SERVER_A = SERVER_A; 54 + static const __u32 KEY_SERVER_B = SERVER_B; 55 + 56 + static const __u16 DST_PORT = 7007; /* Host byte order */ 57 + static const __u32 DST_IP4 = IP4(127, 0, 0, 1); 58 + static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); 59 + 60 + SEC("sk_lookup/lookup_pass") 61 + int lookup_pass(struct bpf_sk_lookup *ctx) 62 + { 63 + return SK_PASS; 64 + } 65 + 66 + SEC("sk_lookup/lookup_drop") 67 + int lookup_drop(struct bpf_sk_lookup *ctx) 68 + { 69 + return SK_DROP; 70 + } 71 + 72 + SEC("sk_reuseport/reuse_pass") 73 + int reuseport_pass(struct sk_reuseport_md *ctx) 74 + { 75 + return SK_PASS; 76 + } 77 + 78 + SEC("sk_reuseport/reuse_drop") 79 + int reuseport_drop(struct sk_reuseport_md *ctx) 80 + { 81 + return SK_DROP; 82 + } 83 + 84 + /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ 85 + SEC("sk_lookup/redir_port") 86 + int redir_port(struct bpf_sk_lookup *ctx) 87 + { 88 + struct bpf_sock *sk; 89 + int err; 90 + 91 + if (ctx->local_port != DST_PORT) 92 + return SK_PASS; 93 + 94 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 95 + if (!sk) 96 + return SK_PASS; 97 + 98 + err = bpf_sk_assign(ctx, sk, 0); 99 + bpf_sk_release(sk); 100 + return err ? SK_DROP : SK_PASS; 101 + } 102 + 103 + /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ 104 + SEC("sk_lookup/redir_ip4") 105 + int redir_ip4(struct bpf_sk_lookup *ctx) 106 + { 107 + struct bpf_sock *sk; 108 + int err; 109 + 110 + if (ctx->family != AF_INET) 111 + return SK_PASS; 112 + if (ctx->local_port != DST_PORT) 113 + return SK_PASS; 114 + if (ctx->local_ip4 != DST_IP4) 115 + return SK_PASS; 116 + 117 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 118 + if (!sk) 119 + return SK_PASS; 120 + 121 + err = bpf_sk_assign(ctx, sk, 0); 122 + bpf_sk_release(sk); 123 + return err ? SK_DROP : SK_PASS; 124 + } 125 + 126 + /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ 127 + SEC("sk_lookup/redir_ip6") 128 + int redir_ip6(struct bpf_sk_lookup *ctx) 129 + { 130 + struct bpf_sock *sk; 131 + int err; 132 + 133 + if (ctx->family != AF_INET6) 134 + return SK_PASS; 135 + if (ctx->local_port != DST_PORT) 136 + return SK_PASS; 137 + if (ctx->local_ip6[0] != DST_IP6[0] || 138 + ctx->local_ip6[1] != DST_IP6[1] || 139 + ctx->local_ip6[2] != DST_IP6[2] || 140 + ctx->local_ip6[3] != DST_IP6[3]) 141 + return SK_PASS; 142 + 143 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 144 + if (!sk) 145 + return SK_PASS; 146 + 147 + err = bpf_sk_assign(ctx, sk, 0); 148 + bpf_sk_release(sk); 149 + return err ? SK_DROP : SK_PASS; 150 + } 151 + 152 + SEC("sk_lookup/select_sock_a") 153 + int select_sock_a(struct bpf_sk_lookup *ctx) 154 + { 155 + struct bpf_sock *sk; 156 + int err; 157 + 158 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 159 + if (!sk) 160 + return SK_PASS; 161 + 162 + err = bpf_sk_assign(ctx, sk, 0); 163 + bpf_sk_release(sk); 164 + return err ? SK_DROP : SK_PASS; 165 + } 166 + 167 + SEC("sk_lookup/select_sock_a_no_reuseport") 168 + int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) 169 + { 170 + struct bpf_sock *sk; 171 + int err; 172 + 173 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 174 + if (!sk) 175 + return SK_DROP; 176 + 177 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT); 178 + bpf_sk_release(sk); 179 + return err ? SK_DROP : SK_PASS; 180 + } 181 + 182 + SEC("sk_reuseport/select_sock_b") 183 + int select_sock_b(struct sk_reuseport_md *ctx) 184 + { 185 + __u32 key = KEY_SERVER_B; 186 + int err; 187 + 188 + err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0); 189 + return err ? SK_DROP : SK_PASS; 190 + } 191 + 192 + /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ 193 + SEC("sk_lookup/sk_assign_eexist") 194 + int sk_assign_eexist(struct bpf_sk_lookup *ctx) 195 + { 196 + struct bpf_sock *sk; 197 + int err, ret; 198 + 199 + ret = SK_DROP; 200 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 201 + if (!sk) 202 + goto out; 203 + err = bpf_sk_assign(ctx, sk, 0); 204 + if (err) 205 + goto out; 206 + bpf_sk_release(sk); 207 + 208 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 209 + if (!sk) 210 + goto out; 211 + err = bpf_sk_assign(ctx, sk, 0); 212 + if (err != -EEXIST) { 213 + bpf_printk("sk_assign returned %d, expected %d\n", 214 + err, -EEXIST); 215 + goto out; 216 + } 217 + 218 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 219 + out: 220 + if (sk) 221 + bpf_sk_release(sk); 222 + return ret; 223 + } 224 + 225 + /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ 226 + SEC("sk_lookup/sk_assign_replace_flag") 227 + int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) 228 + { 229 + struct bpf_sock *sk; 230 + int err, ret; 231 + 232 + ret = SK_DROP; 233 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 234 + if (!sk) 235 + goto out; 236 + err = bpf_sk_assign(ctx, sk, 0); 237 + if (err) 238 + goto out; 239 + bpf_sk_release(sk); 240 + 241 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 242 + if (!sk) 243 + goto out; 244 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 245 + if (err) { 246 + bpf_printk("sk_assign returned %d, expected 0\n", err); 247 + goto out; 248 + } 249 + 250 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 251 + out: 252 + if (sk) 253 + bpf_sk_release(sk); 254 + return ret; 255 + } 256 + 257 + /* Check that bpf_sk_assign(sk=NULL) is accepted. */ 258 + SEC("sk_lookup/sk_assign_null") 259 + int sk_assign_null(struct bpf_sk_lookup *ctx) 260 + { 261 + struct bpf_sock *sk = NULL; 262 + int err, ret; 263 + 264 + ret = SK_DROP; 265 + 266 + err = bpf_sk_assign(ctx, NULL, 0); 267 + if (err) { 268 + bpf_printk("sk_assign returned %d, expected 0\n", err); 269 + goto out; 270 + } 271 + 272 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 273 + if (!sk) 274 + goto out; 275 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 276 + if (err) { 277 + bpf_printk("sk_assign returned %d, expected 0\n", err); 278 + goto out; 279 + } 280 + 281 + if (ctx->sk != sk) 282 + goto out; 283 + err = bpf_sk_assign(ctx, NULL, 0); 284 + if (err != -EEXIST) 285 + goto out; 286 + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); 287 + if (err) 288 + goto out; 289 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 290 + if (err) 291 + goto out; 292 + 293 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 294 + out: 295 + if (sk) 296 + bpf_sk_release(sk); 297 + return ret; 298 + } 299 + 300 + /* Check that selected sk is accessible through context. */ 301 + SEC("sk_lookup/access_ctx_sk") 302 + int access_ctx_sk(struct bpf_sk_lookup *ctx) 303 + { 304 + struct bpf_sock *sk1 = NULL, *sk2 = NULL; 305 + int err, ret; 306 + 307 + ret = SK_DROP; 308 + 309 + /* Try accessing unassigned (NULL) ctx->sk field */ 310 + if (ctx->sk && ctx->sk->family != AF_INET) 311 + goto out; 312 + 313 + /* Assign a value to ctx->sk */ 314 + sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 315 + if (!sk1) 316 + goto out; 317 + err = bpf_sk_assign(ctx, sk1, 0); 318 + if (err) 319 + goto out; 320 + if (ctx->sk != sk1) 321 + goto out; 322 + 323 + /* Access ctx->sk fields */ 324 + if (ctx->sk->family != AF_INET || 325 + ctx->sk->type != SOCK_STREAM || 326 + ctx->sk->state != BPF_TCP_LISTEN) 327 + goto out; 328 + 329 + /* Reset selection */ 330 + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); 331 + if (err) 332 + goto out; 333 + if (ctx->sk) 334 + goto out; 335 + 336 + /* Assign another socket */ 337 + sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 338 + if (!sk2) 339 + goto out; 340 + err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE); 341 + if (err) 342 + goto out; 343 + if (ctx->sk != sk2) 344 + goto out; 345 + 346 + /* Access reassigned ctx->sk fields */ 347 + if (ctx->sk->family != AF_INET || 348 + ctx->sk->type != SOCK_STREAM || 349 + ctx->sk->state != BPF_TCP_LISTEN) 350 + goto out; 351 + 352 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 353 + out: 354 + if (sk1) 355 + bpf_sk_release(sk1); 356 + if (sk2) 357 + bpf_sk_release(sk2); 358 + return ret; 359 + } 360 + 361 + /* Check narrow loads from ctx fields that support them. 362 + * 363 + * Narrow loads of size >= target field size from a non-zero offset 364 + * are not covered because they give bogus results, that is the 365 + * verifier ignores the offset. 366 + */ 367 + SEC("sk_lookup/ctx_narrow_access") 368 + int ctx_narrow_access(struct bpf_sk_lookup *ctx) 369 + { 370 + struct bpf_sock *sk; 371 + int err, family; 372 + __u16 *half; 373 + __u8 *byte; 374 + bool v4; 375 + 376 + v4 = (ctx->family == AF_INET); 377 + 378 + /* Narrow loads from family field */ 379 + byte = (__u8 *)&ctx->family; 380 + half = (__u16 *)&ctx->family; 381 + if (byte[0] != (v4 ? AF_INET : AF_INET6) || 382 + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) 383 + return SK_DROP; 384 + if (half[0] != (v4 ? AF_INET : AF_INET6)) 385 + return SK_DROP; 386 + 387 + byte = (__u8 *)&ctx->protocol; 388 + if (byte[0] != IPPROTO_TCP || 389 + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) 390 + return SK_DROP; 391 + half = (__u16 *)&ctx->protocol; 392 + if (half[0] != IPPROTO_TCP) 393 + return SK_DROP; 394 + 395 + /* Narrow loads from remote_port field. Expect non-0 value. */ 396 + byte = (__u8 *)&ctx->remote_port; 397 + if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) 398 + return SK_DROP; 399 + half = (__u16 *)&ctx->remote_port; 400 + if (half[0] == 0) 401 + return SK_DROP; 402 + 403 + /* Narrow loads from local_port field. Expect DST_PORT. */ 404 + byte = (__u8 *)&ctx->local_port; 405 + if (byte[0] != ((DST_PORT >> 0) & 0xff) || 406 + byte[1] != ((DST_PORT >> 8) & 0xff) || 407 + byte[2] != 0 || byte[3] != 0) 408 + return SK_DROP; 409 + half = (__u16 *)&ctx->local_port; 410 + if (half[0] != DST_PORT) 411 + return SK_DROP; 412 + 413 + /* Narrow loads from IPv4 fields */ 414 + if (v4) { 415 + /* Expect non-0.0.0.0 in remote_ip4 */ 416 + byte = (__u8 *)&ctx->remote_ip4; 417 + if (byte[0] == 0 && byte[1] == 0 && 418 + byte[2] == 0 && byte[3] == 0) 419 + return SK_DROP; 420 + half = (__u16 *)&ctx->remote_ip4; 421 + if (half[0] == 0 && half[1] == 0) 422 + return SK_DROP; 423 + 424 + /* Expect DST_IP4 in local_ip4 */ 425 + byte = (__u8 *)&ctx->local_ip4; 426 + if (byte[0] != ((DST_IP4 >> 0) & 0xff) || 427 + byte[1] != ((DST_IP4 >> 8) & 0xff) || 428 + byte[2] != ((DST_IP4 >> 16) & 0xff) || 429 + byte[3] != ((DST_IP4 >> 24) & 0xff)) 430 + return SK_DROP; 431 + half = (__u16 *)&ctx->local_ip4; 432 + if (half[0] != ((DST_IP4 >> 0) & 0xffff) || 433 + half[1] != ((DST_IP4 >> 16) & 0xffff)) 434 + return SK_DROP; 435 + } else { 436 + /* Expect 0.0.0.0 IPs when family != AF_INET */ 437 + byte = (__u8 *)&ctx->remote_ip4; 438 + if (byte[0] != 0 || byte[1] != 0 && 439 + byte[2] != 0 || byte[3] != 0) 440 + return SK_DROP; 441 + half = (__u16 *)&ctx->remote_ip4; 442 + if (half[0] != 0 || half[1] != 0) 443 + return SK_DROP; 444 + 445 + byte = (__u8 *)&ctx->local_ip4; 446 + if (byte[0] != 0 || byte[1] != 0 && 447 + byte[2] != 0 || byte[3] != 0) 448 + return SK_DROP; 449 + half = (__u16 *)&ctx->local_ip4; 450 + if (half[0] != 0 || half[1] != 0) 451 + return SK_DROP; 452 + } 453 + 454 + /* Narrow loads from IPv6 fields */ 455 + if (!v4) { 456 + /* Expenct non-:: IP in remote_ip6 */ 457 + byte = (__u8 *)&ctx->remote_ip6; 458 + if (byte[0] == 0 && byte[1] == 0 && 459 + byte[2] == 0 && byte[3] == 0 && 460 + byte[4] == 0 && byte[5] == 0 && 461 + byte[6] == 0 && byte[7] == 0 && 462 + byte[8] == 0 && byte[9] == 0 && 463 + byte[10] == 0 && byte[11] == 0 && 464 + byte[12] == 0 && byte[13] == 0 && 465 + byte[14] == 0 && byte[15] == 0) 466 + return SK_DROP; 467 + half = (__u16 *)&ctx->remote_ip6; 468 + if (half[0] == 0 && half[1] == 0 && 469 + half[2] == 0 && half[3] == 0 && 470 + half[4] == 0 && half[5] == 0 && 471 + half[6] == 0 && half[7] == 0) 472 + return SK_DROP; 473 + 474 + /* Expect DST_IP6 in local_ip6 */ 475 + byte = (__u8 *)&ctx->local_ip6; 476 + if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || 477 + byte[1] != ((DST_IP6[0] >> 8) & 0xff) || 478 + byte[2] != ((DST_IP6[0] >> 16) & 0xff) || 479 + byte[3] != ((DST_IP6[0] >> 24) & 0xff) || 480 + byte[4] != ((DST_IP6[1] >> 0) & 0xff) || 481 + byte[5] != ((DST_IP6[1] >> 8) & 0xff) || 482 + byte[6] != ((DST_IP6[1] >> 16) & 0xff) || 483 + byte[7] != ((DST_IP6[1] >> 24) & 0xff) || 484 + byte[8] != ((DST_IP6[2] >> 0) & 0xff) || 485 + byte[9] != ((DST_IP6[2] >> 8) & 0xff) || 486 + byte[10] != ((DST_IP6[2] >> 16) & 0xff) || 487 + byte[11] != ((DST_IP6[2] >> 24) & 0xff) || 488 + byte[12] != ((DST_IP6[3] >> 0) & 0xff) || 489 + byte[13] != ((DST_IP6[3] >> 8) & 0xff) || 490 + byte[14] != ((DST_IP6[3] >> 16) & 0xff) || 491 + byte[15] != ((DST_IP6[3] >> 24) & 0xff)) 492 + return SK_DROP; 493 + half = (__u16 *)&ctx->local_ip6; 494 + if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || 495 + half[1] != ((DST_IP6[0] >> 16) & 0xffff) || 496 + half[2] != ((DST_IP6[1] >> 0) & 0xffff) || 497 + half[3] != ((DST_IP6[1] >> 16) & 0xffff) || 498 + half[4] != ((DST_IP6[2] >> 0) & 0xffff) || 499 + half[5] != ((DST_IP6[2] >> 16) & 0xffff) || 500 + half[6] != ((DST_IP6[3] >> 0) & 0xffff) || 501 + half[7] != ((DST_IP6[3] >> 16) & 0xffff)) 502 + return SK_DROP; 503 + } else { 504 + /* Expect :: IPs when family != AF_INET6 */ 505 + byte = (__u8 *)&ctx->remote_ip6; 506 + if (byte[0] != 0 || byte[1] != 0 || 507 + byte[2] != 0 || byte[3] != 0 || 508 + byte[4] != 0 || byte[5] != 0 || 509 + byte[6] != 0 || byte[7] != 0 || 510 + byte[8] != 0 || byte[9] != 0 || 511 + byte[10] != 0 || byte[11] != 0 || 512 + byte[12] != 0 || byte[13] != 0 || 513 + byte[14] != 0 || byte[15] != 0) 514 + return SK_DROP; 515 + half = (__u16 *)&ctx->remote_ip6; 516 + if (half[0] != 0 || half[1] != 0 || 517 + half[2] != 0 || half[3] != 0 || 518 + half[4] != 0 || half[5] != 0 || 519 + half[6] != 0 || half[7] != 0) 520 + return SK_DROP; 521 + 522 + byte = (__u8 *)&ctx->local_ip6; 523 + if (byte[0] != 0 || byte[1] != 0 || 524 + byte[2] != 0 || byte[3] != 0 || 525 + byte[4] != 0 || byte[5] != 0 || 526 + byte[6] != 0 || byte[7] != 0 || 527 + byte[8] != 0 || byte[9] != 0 || 528 + byte[10] != 0 || byte[11] != 0 || 529 + byte[12] != 0 || byte[13] != 0 || 530 + byte[14] != 0 || byte[15] != 0) 531 + return SK_DROP; 532 + half = (__u16 *)&ctx->local_ip6; 533 + if (half[0] != 0 || half[1] != 0 || 534 + half[2] != 0 || half[3] != 0 || 535 + half[4] != 0 || half[5] != 0 || 536 + half[6] != 0 || half[7] != 0) 537 + return SK_DROP; 538 + } 539 + 540 + /* Success, redirect to KEY_SERVER_B */ 541 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 542 + if (sk) { 543 + bpf_sk_assign(ctx, sk, 0); 544 + bpf_sk_release(sk); 545 + } 546 + return SK_PASS; 547 + } 548 + 549 + /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ 550 + SEC("sk_lookup/sk_assign_esocknosupport") 551 + int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) 552 + { 553 + struct bpf_sock *sk; 554 + int err, ret; 555 + 556 + ret = SK_DROP; 557 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 558 + if (!sk) 559 + goto out; 560 + 561 + err = bpf_sk_assign(ctx, sk, 0); 562 + if (err != -ESOCKTNOSUPPORT) { 563 + bpf_printk("sk_assign returned %d, expected %d\n", 564 + err, -ESOCKTNOSUPPORT); 565 + goto out; 566 + } 567 + 568 + ret = SK_PASS; /* Success, pass to regular lookup */ 569 + out: 570 + if (sk) 571 + bpf_sk_release(sk); 572 + return ret; 573 + } 574 + 575 + SEC("sk_lookup/multi_prog_pass1") 576 + int multi_prog_pass1(struct bpf_sk_lookup *ctx) 577 + { 578 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 579 + return SK_PASS; 580 + } 581 + 582 + SEC("sk_lookup/multi_prog_pass2") 583 + int multi_prog_pass2(struct bpf_sk_lookup *ctx) 584 + { 585 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 586 + return SK_PASS; 587 + } 588 + 589 + SEC("sk_lookup/multi_prog_drop1") 590 + int multi_prog_drop1(struct bpf_sk_lookup *ctx) 591 + { 592 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 593 + return SK_DROP; 594 + } 595 + 596 + SEC("sk_lookup/multi_prog_drop2") 597 + int multi_prog_drop2(struct bpf_sk_lookup *ctx) 598 + { 599 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 600 + return SK_DROP; 601 + } 602 + 603 + static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) 604 + { 605 + struct bpf_sock *sk; 606 + int err; 607 + 608 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 609 + if (!sk) 610 + return SK_DROP; 611 + 612 + err = bpf_sk_assign(ctx, sk, 0); 613 + bpf_sk_release(sk); 614 + if (err) 615 + return SK_DROP; 616 + 617 + return SK_PASS; 618 + } 619 + 620 + SEC("sk_lookup/multi_prog_redir1") 621 + int multi_prog_redir1(struct bpf_sk_lookup *ctx) 622 + { 623 + int ret; 624 + 625 + ret = select_server_a(ctx); 626 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 627 + return SK_PASS; 628 + } 629 + 630 + SEC("sk_lookup/multi_prog_redir2") 631 + int multi_prog_redir2(struct bpf_sk_lookup *ctx) 632 + { 633 + int ret; 634 + 635 + ret = select_server_a(ctx); 636 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 637 + return SK_PASS; 638 + } 639 + 640 + char _license[] SEC("license") = "Dual BSD/GPL"; 641 + __u32 _version SEC("version") = 1;
+36
tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + #define IFINDEX_LO 1 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_CPUMAP); 10 + __uint(key_size, sizeof(__u32)); 11 + __uint(value_size, sizeof(struct bpf_cpumap_val)); 12 + __uint(max_entries, 4); 13 + } cpu_map SEC(".maps"); 14 + 15 + SEC("xdp_redir") 16 + int xdp_redir_prog(struct xdp_md *ctx) 17 + { 18 + return bpf_redirect_map(&cpu_map, 1, 0); 19 + } 20 + 21 + SEC("xdp_dummy") 22 + int xdp_dummy_prog(struct xdp_md *ctx) 23 + { 24 + return XDP_PASS; 25 + } 26 + 27 + SEC("xdp_cpumap/dummy_cm") 28 + int xdp_dummy_cm(struct xdp_md *ctx) 29 + { 30 + if (ctx->ingress_ifindex == IFINDEX_LO) 31 + return XDP_DROP; 32 + 33 + return XDP_PASS; 34 + } 35 + 36 + char _license[] SEC("license") = "GPL";
+9 -3
tools/testing/selftests/bpf/test_kmod.sh
··· 10 10 exit $ksft_skip 11 11 fi 12 12 13 - SRC_TREE=../../../../ 13 + if [ "$building_out_of_srctree" ]; then 14 + # We are in linux-build/kselftest/bpf 15 + OUTPUT=../../ 16 + else 17 + # We are in linux/tools/testing/selftests/bpf 18 + OUTPUT=../../../../ 19 + fi 14 20 15 21 test_run() 16 22 { ··· 25 19 26 20 echo "[ JIT enabled:$1 hardened:$2 ]" 27 21 dmesg -C 28 - if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then 29 - insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null 22 + if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then 23 + insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null 30 24 if [ $? -ne 0 ]; then 31 25 rc=1 32 26 fi
+1 -1
tools/testing/selftests/bpf/test_lwt_seg6local.sh
··· 140 140 ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & 141 141 ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" 142 142 sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment 143 - kill -INT $! 143 + kill -TERM $! 144 144 145 145 if [[ $(< $TMP_FILE) != "foobar" ]]; then 146 146 exit 1
+492
tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
··· 1 + { 2 + "valid 1,2,4,8-byte reads from bpf_sk_lookup", 3 + .insns = { 4 + /* 1-byte read from family field */ 5 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 6 + offsetof(struct bpf_sk_lookup, family)), 7 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 8 + offsetof(struct bpf_sk_lookup, family) + 1), 9 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 10 + offsetof(struct bpf_sk_lookup, family) + 2), 11 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 12 + offsetof(struct bpf_sk_lookup, family) + 3), 13 + /* 2-byte read from family field */ 14 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 15 + offsetof(struct bpf_sk_lookup, family)), 16 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 17 + offsetof(struct bpf_sk_lookup, family) + 2), 18 + /* 4-byte read from family field */ 19 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 20 + offsetof(struct bpf_sk_lookup, family)), 21 + 22 + /* 1-byte read from protocol field */ 23 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 24 + offsetof(struct bpf_sk_lookup, protocol)), 25 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 26 + offsetof(struct bpf_sk_lookup, protocol) + 1), 27 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 28 + offsetof(struct bpf_sk_lookup, protocol) + 2), 29 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 30 + offsetof(struct bpf_sk_lookup, protocol) + 3), 31 + /* 2-byte read from protocol field */ 32 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 33 + offsetof(struct bpf_sk_lookup, protocol)), 34 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 35 + offsetof(struct bpf_sk_lookup, protocol) + 2), 36 + /* 4-byte read from protocol field */ 37 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 38 + offsetof(struct bpf_sk_lookup, protocol)), 39 + 40 + /* 1-byte read from remote_ip4 field */ 41 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 42 + offsetof(struct bpf_sk_lookup, remote_ip4)), 43 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 44 + offsetof(struct bpf_sk_lookup, remote_ip4) + 1), 45 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 46 + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), 47 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 48 + offsetof(struct bpf_sk_lookup, remote_ip4) + 3), 49 + /* 2-byte read from remote_ip4 field */ 50 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 51 + offsetof(struct bpf_sk_lookup, remote_ip4)), 52 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 53 + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), 54 + /* 4-byte read from remote_ip4 field */ 55 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 56 + offsetof(struct bpf_sk_lookup, remote_ip4)), 57 + 58 + /* 1-byte read from remote_ip6 field */ 59 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 60 + offsetof(struct bpf_sk_lookup, remote_ip6)), 61 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 62 + offsetof(struct bpf_sk_lookup, remote_ip6) + 1), 63 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 64 + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), 65 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 66 + offsetof(struct bpf_sk_lookup, remote_ip6) + 3), 67 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 68 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 69 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 70 + offsetof(struct bpf_sk_lookup, remote_ip6) + 5), 71 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 72 + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), 73 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 74 + offsetof(struct bpf_sk_lookup, remote_ip6) + 7), 75 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 76 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 77 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 78 + offsetof(struct bpf_sk_lookup, remote_ip6) + 9), 79 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 80 + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), 81 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 82 + offsetof(struct bpf_sk_lookup, remote_ip6) + 11), 83 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 84 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 85 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 86 + offsetof(struct bpf_sk_lookup, remote_ip6) + 13), 87 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 88 + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), 89 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 90 + offsetof(struct bpf_sk_lookup, remote_ip6) + 15), 91 + /* 2-byte read from remote_ip6 field */ 92 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 93 + offsetof(struct bpf_sk_lookup, remote_ip6)), 94 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 95 + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), 96 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 97 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 98 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 99 + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), 100 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 101 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 102 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 103 + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), 104 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 105 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 106 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 107 + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), 108 + /* 4-byte read from remote_ip6 field */ 109 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 110 + offsetof(struct bpf_sk_lookup, remote_ip6)), 111 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 112 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 113 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 114 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 115 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 116 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 117 + 118 + /* 1-byte read from remote_port field */ 119 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 120 + offsetof(struct bpf_sk_lookup, remote_port)), 121 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 122 + offsetof(struct bpf_sk_lookup, remote_port) + 1), 123 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 124 + offsetof(struct bpf_sk_lookup, remote_port) + 2), 125 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 126 + offsetof(struct bpf_sk_lookup, remote_port) + 3), 127 + /* 2-byte read from remote_port field */ 128 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 129 + offsetof(struct bpf_sk_lookup, remote_port)), 130 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 131 + offsetof(struct bpf_sk_lookup, remote_port) + 2), 132 + /* 4-byte read from remote_port field */ 133 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 134 + offsetof(struct bpf_sk_lookup, remote_port)), 135 + 136 + /* 1-byte read from local_ip4 field */ 137 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 138 + offsetof(struct bpf_sk_lookup, local_ip4)), 139 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 140 + offsetof(struct bpf_sk_lookup, local_ip4) + 1), 141 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 142 + offsetof(struct bpf_sk_lookup, local_ip4) + 2), 143 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 144 + offsetof(struct bpf_sk_lookup, local_ip4) + 3), 145 + /* 2-byte read from local_ip4 field */ 146 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 147 + offsetof(struct bpf_sk_lookup, local_ip4)), 148 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 149 + offsetof(struct bpf_sk_lookup, local_ip4) + 2), 150 + /* 4-byte read from local_ip4 field */ 151 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 152 + offsetof(struct bpf_sk_lookup, local_ip4)), 153 + 154 + /* 1-byte read from local_ip6 field */ 155 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 156 + offsetof(struct bpf_sk_lookup, local_ip6)), 157 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 158 + offsetof(struct bpf_sk_lookup, local_ip6) + 1), 159 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 160 + offsetof(struct bpf_sk_lookup, local_ip6) + 2), 161 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 162 + offsetof(struct bpf_sk_lookup, local_ip6) + 3), 163 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 164 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 165 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 166 + offsetof(struct bpf_sk_lookup, local_ip6) + 5), 167 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 168 + offsetof(struct bpf_sk_lookup, local_ip6) + 6), 169 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 170 + offsetof(struct bpf_sk_lookup, local_ip6) + 7), 171 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 172 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 173 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 174 + offsetof(struct bpf_sk_lookup, local_ip6) + 9), 175 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 176 + offsetof(struct bpf_sk_lookup, local_ip6) + 10), 177 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 178 + offsetof(struct bpf_sk_lookup, local_ip6) + 11), 179 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 180 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 181 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 182 + offsetof(struct bpf_sk_lookup, local_ip6) + 13), 183 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 184 + offsetof(struct bpf_sk_lookup, local_ip6) + 14), 185 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 186 + offsetof(struct bpf_sk_lookup, local_ip6) + 15), 187 + /* 2-byte read from local_ip6 field */ 188 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 189 + offsetof(struct bpf_sk_lookup, local_ip6)), 190 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 191 + offsetof(struct bpf_sk_lookup, local_ip6) + 2), 192 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 193 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 194 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 195 + offsetof(struct bpf_sk_lookup, local_ip6) + 6), 196 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 197 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 198 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 199 + offsetof(struct bpf_sk_lookup, local_ip6) + 10), 200 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 201 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 202 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 203 + offsetof(struct bpf_sk_lookup, local_ip6) + 14), 204 + /* 4-byte read from local_ip6 field */ 205 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 206 + offsetof(struct bpf_sk_lookup, local_ip6)), 207 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 208 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 209 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 210 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 211 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 212 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 213 + 214 + /* 1-byte read from local_port field */ 215 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 216 + offsetof(struct bpf_sk_lookup, local_port)), 217 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 218 + offsetof(struct bpf_sk_lookup, local_port) + 1), 219 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 220 + offsetof(struct bpf_sk_lookup, local_port) + 2), 221 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 222 + offsetof(struct bpf_sk_lookup, local_port) + 3), 223 + /* 2-byte read from local_port field */ 224 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 225 + offsetof(struct bpf_sk_lookup, local_port)), 226 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 227 + offsetof(struct bpf_sk_lookup, local_port) + 2), 228 + /* 4-byte read from local_port field */ 229 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 230 + offsetof(struct bpf_sk_lookup, local_port)), 231 + 232 + /* 8-byte read from sk field */ 233 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 234 + offsetof(struct bpf_sk_lookup, sk)), 235 + 236 + BPF_MOV32_IMM(BPF_REG_0, 0), 237 + BPF_EXIT_INSN(), 238 + }, 239 + .result = ACCEPT, 240 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 241 + .expected_attach_type = BPF_SK_LOOKUP, 242 + }, 243 + /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ 244 + { 245 + "invalid 8-byte read from bpf_sk_lookup family field", 246 + .insns = { 247 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 248 + offsetof(struct bpf_sk_lookup, family)), 249 + BPF_MOV32_IMM(BPF_REG_0, 0), 250 + BPF_EXIT_INSN(), 251 + }, 252 + .errstr = "invalid bpf_context access", 253 + .result = REJECT, 254 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 255 + .expected_attach_type = BPF_SK_LOOKUP, 256 + }, 257 + { 258 + "invalid 8-byte read from bpf_sk_lookup protocol field", 259 + .insns = { 260 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 261 + offsetof(struct bpf_sk_lookup, protocol)), 262 + BPF_MOV32_IMM(BPF_REG_0, 0), 263 + BPF_EXIT_INSN(), 264 + }, 265 + .errstr = "invalid bpf_context access", 266 + .result = REJECT, 267 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 268 + .expected_attach_type = BPF_SK_LOOKUP, 269 + }, 270 + { 271 + "invalid 8-byte read from bpf_sk_lookup remote_ip4 field", 272 + .insns = { 273 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 274 + offsetof(struct bpf_sk_lookup, remote_ip4)), 275 + BPF_MOV32_IMM(BPF_REG_0, 0), 276 + BPF_EXIT_INSN(), 277 + }, 278 + .errstr = "invalid bpf_context access", 279 + .result = REJECT, 280 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 281 + .expected_attach_type = BPF_SK_LOOKUP, 282 + }, 283 + { 284 + "invalid 8-byte read from bpf_sk_lookup remote_ip6 field", 285 + .insns = { 286 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 287 + offsetof(struct bpf_sk_lookup, remote_ip6)), 288 + BPF_MOV32_IMM(BPF_REG_0, 0), 289 + BPF_EXIT_INSN(), 290 + }, 291 + .errstr = "invalid bpf_context access", 292 + .result = REJECT, 293 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 294 + .expected_attach_type = BPF_SK_LOOKUP, 295 + }, 296 + { 297 + "invalid 8-byte read from bpf_sk_lookup remote_port field", 298 + .insns = { 299 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 300 + offsetof(struct bpf_sk_lookup, remote_port)), 301 + BPF_MOV32_IMM(BPF_REG_0, 0), 302 + BPF_EXIT_INSN(), 303 + }, 304 + .errstr = "invalid bpf_context access", 305 + .result = REJECT, 306 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 307 + .expected_attach_type = BPF_SK_LOOKUP, 308 + }, 309 + { 310 + "invalid 8-byte read from bpf_sk_lookup local_ip4 field", 311 + .insns = { 312 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 313 + offsetof(struct bpf_sk_lookup, local_ip4)), 314 + BPF_MOV32_IMM(BPF_REG_0, 0), 315 + BPF_EXIT_INSN(), 316 + }, 317 + .errstr = "invalid bpf_context access", 318 + .result = REJECT, 319 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 320 + .expected_attach_type = BPF_SK_LOOKUP, 321 + }, 322 + { 323 + "invalid 8-byte read from bpf_sk_lookup local_ip6 field", 324 + .insns = { 325 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 326 + offsetof(struct bpf_sk_lookup, local_ip6)), 327 + BPF_MOV32_IMM(BPF_REG_0, 0), 328 + BPF_EXIT_INSN(), 329 + }, 330 + .errstr = "invalid bpf_context access", 331 + .result = REJECT, 332 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 333 + .expected_attach_type = BPF_SK_LOOKUP, 334 + }, 335 + { 336 + "invalid 8-byte read from bpf_sk_lookup local_port field", 337 + .insns = { 338 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 339 + offsetof(struct bpf_sk_lookup, local_port)), 340 + BPF_MOV32_IMM(BPF_REG_0, 0), 341 + BPF_EXIT_INSN(), 342 + }, 343 + .errstr = "invalid bpf_context access", 344 + .result = REJECT, 345 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 346 + .expected_attach_type = BPF_SK_LOOKUP, 347 + }, 348 + /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ 349 + { 350 + "invalid 4-byte read from bpf_sk_lookup sk field", 351 + .insns = { 352 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 353 + offsetof(struct bpf_sk_lookup, sk)), 354 + BPF_MOV32_IMM(BPF_REG_0, 0), 355 + BPF_EXIT_INSN(), 356 + }, 357 + .errstr = "invalid bpf_context access", 358 + .result = REJECT, 359 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 360 + .expected_attach_type = BPF_SK_LOOKUP, 361 + }, 362 + { 363 + "invalid 2-byte read from bpf_sk_lookup sk field", 364 + .insns = { 365 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 366 + offsetof(struct bpf_sk_lookup, sk)), 367 + BPF_MOV32_IMM(BPF_REG_0, 0), 368 + BPF_EXIT_INSN(), 369 + }, 370 + .errstr = "invalid bpf_context access", 371 + .result = REJECT, 372 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 373 + .expected_attach_type = BPF_SK_LOOKUP, 374 + }, 375 + { 376 + "invalid 1-byte read from bpf_sk_lookup sk field", 377 + .insns = { 378 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 379 + offsetof(struct bpf_sk_lookup, sk)), 380 + BPF_MOV32_IMM(BPF_REG_0, 0), 381 + BPF_EXIT_INSN(), 382 + }, 383 + .errstr = "invalid bpf_context access", 384 + .result = REJECT, 385 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 386 + .expected_attach_type = BPF_SK_LOOKUP, 387 + }, 388 + /* out of bounds and unaligned reads from bpf_sk_lookup */ 389 + { 390 + "invalid 4-byte read past end of bpf_sk_lookup", 391 + .insns = { 392 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 393 + sizeof(struct bpf_sk_lookup)), 394 + BPF_MOV32_IMM(BPF_REG_0, 0), 395 + BPF_EXIT_INSN(), 396 + }, 397 + .errstr = "invalid bpf_context access", 398 + .result = REJECT, 399 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 400 + .expected_attach_type = BPF_SK_LOOKUP, 401 + }, 402 + { 403 + "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset", 404 + .insns = { 405 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1), 406 + BPF_MOV32_IMM(BPF_REG_0, 0), 407 + BPF_EXIT_INSN(), 408 + }, 409 + .errstr = "invalid bpf_context access", 410 + .result = REJECT, 411 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 412 + .expected_attach_type = BPF_SK_LOOKUP, 413 + }, 414 + { 415 + "invalid 4-byte unaligned read from bpf_sk_lookup at even offset", 416 + .insns = { 417 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2), 418 + BPF_MOV32_IMM(BPF_REG_0, 0), 419 + BPF_EXIT_INSN(), 420 + }, 421 + .errstr = "invalid bpf_context access", 422 + .result = REJECT, 423 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 424 + .expected_attach_type = BPF_SK_LOOKUP, 425 + }, 426 + /* in-bound and out-of-bound writes to bpf_sk_lookup */ 427 + { 428 + "invalid 8-byte write to bpf_sk_lookup", 429 + .insns = { 430 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 431 + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 432 + BPF_MOV32_IMM(BPF_REG_0, 0), 433 + BPF_EXIT_INSN(), 434 + }, 435 + .errstr = "invalid bpf_context access", 436 + .result = REJECT, 437 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 438 + .expected_attach_type = BPF_SK_LOOKUP, 439 + }, 440 + { 441 + "invalid 4-byte write to bpf_sk_lookup", 442 + .insns = { 443 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 444 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), 445 + BPF_MOV32_IMM(BPF_REG_0, 0), 446 + BPF_EXIT_INSN(), 447 + }, 448 + .errstr = "invalid bpf_context access", 449 + .result = REJECT, 450 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 451 + .expected_attach_type = BPF_SK_LOOKUP, 452 + }, 453 + { 454 + "invalid 2-byte write to bpf_sk_lookup", 455 + .insns = { 456 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 457 + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0), 458 + BPF_MOV32_IMM(BPF_REG_0, 0), 459 + BPF_EXIT_INSN(), 460 + }, 461 + .errstr = "invalid bpf_context access", 462 + .result = REJECT, 463 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 464 + .expected_attach_type = BPF_SK_LOOKUP, 465 + }, 466 + { 467 + "invalid 1-byte write to bpf_sk_lookup", 468 + .insns = { 469 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 470 + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), 471 + BPF_MOV32_IMM(BPF_REG_0, 0), 472 + BPF_EXIT_INSN(), 473 + }, 474 + .errstr = "invalid bpf_context access", 475 + .result = REJECT, 476 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 477 + .expected_attach_type = BPF_SK_LOOKUP, 478 + }, 479 + { 480 + "invalid 4-byte write past end of bpf_sk_lookup", 481 + .insns = { 482 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 483 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 484 + sizeof(struct bpf_sk_lookup)), 485 + BPF_MOV32_IMM(BPF_REG_0, 0), 486 + BPF_EXIT_INSN(), 487 + }, 488 + .errstr = "invalid bpf_context access", 489 + .result = REJECT, 490 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 491 + .expected_attach_type = BPF_SK_LOOKUP, 492 + },