Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+480 -3

arch/riscv/net/bpf_jit.h

··· 13 13 #include <linux/filter.h> 14 14 #include <asm/cacheflush.h> 15 15 16 + static inline bool rvc_enabled(void) 17 + { 18 + return IS_ENABLED(CONFIG_RISCV_ISA_C); 19 + } 20 + 16 21 enum { 17 22 RV_REG_ZERO = 0, /* The constant value 0 */ 18 23 RV_REG_RA = 1, /* Return address */ ··· 53 48 RV_REG_T6 = 31, 54 49 }; 55 50 51 + static inline bool is_creg(u8 reg) 52 + { 53 + return (1 << reg) & (BIT(RV_REG_FP) | 54 + BIT(RV_REG_S1) | 55 + BIT(RV_REG_A0) | 56 + BIT(RV_REG_A1) | 57 + BIT(RV_REG_A2) | 58 + BIT(RV_REG_A3) | 59 + BIT(RV_REG_A4) | 60 + BIT(RV_REG_A5)); 61 + } 62 + 56 63 struct rv_jit_context { 57 64 struct bpf_prog *prog; 58 - u32 *insns; /* RV insns */ 65 + u16 *insns; /* RV insns */ 59 66 int ninsns; 60 67 int epilogue_offset; 61 68 int *offset; /* BPF to RV */ 62 69 unsigned long flags; 63 70 int stack_size; 64 71 }; 72 + 73 + /* Convert from ninsns to bytes. */ 74 + static inline int ninsns_rvoff(int ninsns) 75 + { 76 + return ninsns << 1; 77 + } 65 78 66 79 struct rv_jit_data { 67 80 struct bpf_binary_header *header; ··· 97 74 flush_icache_range((unsigned long)start, (unsigned long)end); 98 75 } 99 76 77 + /* Emit a 4-byte riscv instruction. */ 100 78 static inline void emit(const u32 insn, struct rv_jit_context *ctx) 101 79 { 80 + if (ctx->insns) { 81 + ctx->insns[ctx->ninsns] = insn; 82 + ctx->insns[ctx->ninsns + 1] = (insn >> 16); 83 + } 84 + 85 + ctx->ninsns += 2; 86 + } 87 + 88 + /* Emit a 2-byte riscv compressed instruction. */ 89 + static inline void emitc(const u16 insn, struct rv_jit_context *ctx) 90 + { 91 + BUILD_BUG_ON(!rvc_enabled()); 92 + 102 93 if (ctx->insns) 103 94 ctx->insns[ctx->ninsns] = insn; 104 95 ··· 123 86 { 124 87 int to = ctx->epilogue_offset, from = ctx->ninsns; 125 88 126 - return (to - from) << 2; 89 + return ninsns_rvoff(to - from); 127 90 } 128 91 129 92 /* Return -1 or inverted cond. */ ··· 152 115 return BPF_JSGT; 153 116 } 154 117 return -1; 118 + } 119 + 120 + static inline bool is_6b_int(long val) 121 + { 122 + return -(1L << 5) <= val && val < (1L << 5); 123 + } 124 + 125 + static inline bool is_7b_uint(unsigned long val) 126 + { 127 + return val < (1UL << 7); 128 + } 129 + 130 + static inline bool is_8b_uint(unsigned long val) 131 + { 132 + return val < (1UL << 8); 133 + } 134 + 135 + static inline bool is_9b_uint(unsigned long val) 136 + { 137 + return val < (1UL << 9); 138 + } 139 + 140 + static inline bool is_10b_int(long val) 141 + { 142 + return -(1L << 9) <= val && val < (1L << 9); 143 + } 144 + 145 + static inline bool is_10b_uint(unsigned long val) 146 + { 147 + return val < (1UL << 10); 155 148 } 156 149 157 150 static inline bool is_12b_int(long val) ··· 216 149 off++; /* BPF branch is from PC+1, RV is from PC */ 217 150 from = (insn > 0) ? ctx->offset[insn - 1] : 0; 218 151 to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; 219 - return (to - from) << 2; 152 + return ninsns_rvoff(to - from); 220 153 } 221 154 222 155 /* Instruction formats. */ ··· 272 205 u8 funct7 = (funct5 << 2) | (aq << 1) | rl; 273 206 274 207 return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); 208 + } 209 + 210 + /* RISC-V compressed instruction formats. */ 211 + 212 + static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op) 213 + { 214 + return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op; 215 + } 216 + 217 + static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op) 218 + { 219 + u32 imm; 220 + 221 + imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2); 222 + return (funct3 << 13) | (rd << 7) | op | imm; 223 + } 224 + 225 + static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op) 226 + { 227 + return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op; 228 + } 229 + 230 + static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op) 231 + { 232 + return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op; 233 + } 234 + 235 + static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd, 236 + u8 op) 237 + { 238 + return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) | 239 + (imm_lo << 5) | ((rd & 0x7) << 2) | op; 240 + } 241 + 242 + static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2, 243 + u8 op) 244 + { 245 + return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) | 246 + (imm_lo << 5) | ((rs2 & 0x7) << 2) | op; 247 + } 248 + 249 + static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op) 250 + { 251 + return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) | 252 + ((rs2 & 0x7) << 2) | op; 253 + } 254 + 255 + static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op) 256 + { 257 + u32 imm; 258 + 259 + imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2); 260 + return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm; 275 261 } 276 262 277 263 /* Instructions shared by both RV32 and RV64. */ ··· 534 414 return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 535 415 } 536 416 417 + /* RVC instrutions. */ 418 + 419 + static inline u16 rvc_addi4spn(u8 rd, u32 imm10) 420 + { 421 + u32 imm; 422 + 423 + imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) | 424 + ((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3); 425 + return rv_ciw_insn(0x0, imm, rd, 0x0); 426 + } 427 + 428 + static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1) 429 + { 430 + u32 imm_hi, imm_lo; 431 + 432 + imm_hi = (imm7 & 0x38) >> 3; 433 + imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6); 434 + return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0); 435 + } 436 + 437 + static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2) 438 + { 439 + u32 imm_hi, imm_lo; 440 + 441 + imm_hi = (imm7 & 0x38) >> 3; 442 + imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6); 443 + return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0); 444 + } 445 + 446 + static inline u16 rvc_addi(u8 rd, u32 imm6) 447 + { 448 + return rv_ci_insn(0, imm6, rd, 0x1); 449 + } 450 + 451 + static inline u16 rvc_li(u8 rd, u32 imm6) 452 + { 453 + return rv_ci_insn(0x2, imm6, rd, 0x1); 454 + } 455 + 456 + static inline u16 rvc_addi16sp(u32 imm10) 457 + { 458 + u32 imm; 459 + 460 + imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) | 461 + ((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5); 462 + return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1); 463 + } 464 + 465 + static inline u16 rvc_lui(u8 rd, u32 imm6) 466 + { 467 + return rv_ci_insn(0x3, imm6, rd, 0x1); 468 + } 469 + 470 + static inline u16 rvc_srli(u8 rd, u32 imm6) 471 + { 472 + return rv_cb_insn(0x4, imm6, 0, rd, 0x1); 473 + } 474 + 475 + static inline u16 rvc_srai(u8 rd, u32 imm6) 476 + { 477 + return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1); 478 + } 479 + 480 + static inline u16 rvc_andi(u8 rd, u32 imm6) 481 + { 482 + return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1); 483 + } 484 + 485 + static inline u16 rvc_sub(u8 rd, u8 rs) 486 + { 487 + return rv_ca_insn(0x23, rd, 0, rs, 0x1); 488 + } 489 + 490 + static inline u16 rvc_xor(u8 rd, u8 rs) 491 + { 492 + return rv_ca_insn(0x23, rd, 0x1, rs, 0x1); 493 + } 494 + 495 + static inline u16 rvc_or(u8 rd, u8 rs) 496 + { 497 + return rv_ca_insn(0x23, rd, 0x2, rs, 0x1); 498 + } 499 + 500 + static inline u16 rvc_and(u8 rd, u8 rs) 501 + { 502 + return rv_ca_insn(0x23, rd, 0x3, rs, 0x1); 503 + } 504 + 505 + static inline u16 rvc_slli(u8 rd, u32 imm6) 506 + { 507 + return rv_ci_insn(0, imm6, rd, 0x2); 508 + } 509 + 510 + static inline u16 rvc_lwsp(u8 rd, u32 imm8) 511 + { 512 + u32 imm; 513 + 514 + imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c); 515 + return rv_ci_insn(0x2, imm, rd, 0x2); 516 + } 517 + 518 + static inline u16 rvc_jr(u8 rs1) 519 + { 520 + return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2); 521 + } 522 + 523 + static inline u16 rvc_mv(u8 rd, u8 rs) 524 + { 525 + return rv_cr_insn(0x8, rd, rs, 0x2); 526 + } 527 + 528 + static inline u16 rvc_jalr(u8 rs1) 529 + { 530 + return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2); 531 + } 532 + 533 + static inline u16 rvc_add(u8 rd, u8 rs) 534 + { 535 + return rv_cr_insn(0x9, rd, rs, 0x2); 536 + } 537 + 538 + static inline u16 rvc_swsp(u32 imm8, u8 rs2) 539 + { 540 + u32 imm; 541 + 542 + imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6); 543 + return rv_css_insn(0x6, imm, rs2, 0x2); 544 + } 545 + 537 546 /* 538 547 * RV64-only instructions. 539 548 * ··· 750 501 static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 751 502 { 752 503 return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 504 + } 505 + 506 + /* RV64-only RVC instructions. */ 507 + 508 + static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1) 509 + { 510 + u32 imm_hi, imm_lo; 511 + 512 + imm_hi = (imm8 & 0x38) >> 3; 513 + imm_lo = (imm8 & 0xc0) >> 6; 514 + return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0); 515 + } 516 + 517 + static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2) 518 + { 519 + u32 imm_hi, imm_lo; 520 + 521 + imm_hi = (imm8 & 0x38) >> 3; 522 + imm_lo = (imm8 & 0xc0) >> 6; 523 + return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0); 524 + } 525 + 526 + static inline u16 rvc_subw(u8 rd, u8 rs) 527 + { 528 + return rv_ca_insn(0x27, rd, 0, rs, 0x1); 529 + } 530 + 531 + static inline u16 rvc_addiw(u8 rd, u32 imm6) 532 + { 533 + return rv_ci_insn(0x1, imm6, rd, 0x1); 534 + } 535 + 536 + static inline u16 rvc_ldsp(u8 rd, u32 imm9) 537 + { 538 + u32 imm; 539 + 540 + imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38); 541 + return rv_ci_insn(0x3, imm, rd, 0x2); 542 + } 543 + 544 + static inline u16 rvc_sdsp(u32 imm9, u8 rs2) 545 + { 546 + u32 imm; 547 + 548 + imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6); 549 + return rv_css_insn(0x7, imm, rs2, 0x2); 550 + } 551 + 552 + #endif /* __riscv_xlen == 64 */ 553 + 554 + /* Helper functions that emit RVC instructions when possible. */ 555 + 556 + static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 557 + { 558 + if (rvc_enabled() && rd == RV_REG_RA && rs && !imm) 559 + emitc(rvc_jalr(rs), ctx); 560 + else if (rvc_enabled() && !rd && rs && !imm) 561 + emitc(rvc_jr(rs), ctx); 562 + else 563 + emit(rv_jalr(rd, rs, imm), ctx); 564 + } 565 + 566 + static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx) 567 + { 568 + if (rvc_enabled() && rd && rs) 569 + emitc(rvc_mv(rd, rs), ctx); 570 + else 571 + emit(rv_addi(rd, rs, 0), ctx); 572 + } 573 + 574 + static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 575 + { 576 + if (rvc_enabled() && rd && rd == rs1 && rs2) 577 + emitc(rvc_add(rd, rs2), ctx); 578 + else 579 + emit(rv_add(rd, rs1, rs2), ctx); 580 + } 581 + 582 + static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 583 + { 584 + if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf)) 585 + emitc(rvc_addi16sp(imm), ctx); 586 + else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) && 587 + !(imm & 0x3) && imm) 588 + emitc(rvc_addi4spn(rd, imm), ctx); 589 + else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm)) 590 + emitc(rvc_addi(rd, imm), ctx); 591 + else 592 + emit(rv_addi(rd, rs, imm), ctx); 593 + } 594 + 595 + static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx) 596 + { 597 + if (rvc_enabled() && rd && is_6b_int(imm)) 598 + emitc(rvc_li(rd, imm), ctx); 599 + else 600 + emit(rv_addi(rd, RV_REG_ZERO, imm), ctx); 601 + } 602 + 603 + static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx) 604 + { 605 + if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm) 606 + emitc(rvc_lui(rd, imm), ctx); 607 + else 608 + emit(rv_lui(rd, imm), ctx); 609 + } 610 + 611 + static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 612 + { 613 + if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen) 614 + emitc(rvc_slli(rd, imm), ctx); 615 + else 616 + emit(rv_slli(rd, rs, imm), ctx); 617 + } 618 + 619 + static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 620 + { 621 + if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm)) 622 + emitc(rvc_andi(rd, imm), ctx); 623 + else 624 + emit(rv_andi(rd, rs, imm), ctx); 625 + } 626 + 627 + static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 628 + { 629 + if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen) 630 + emitc(rvc_srli(rd, imm), ctx); 631 + else 632 + emit(rv_srli(rd, rs, imm), ctx); 633 + } 634 + 635 + static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 636 + { 637 + if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen) 638 + emitc(rvc_srai(rd, imm), ctx); 639 + else 640 + emit(rv_srai(rd, rs, imm), ctx); 641 + } 642 + 643 + static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 644 + { 645 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 646 + emitc(rvc_sub(rd, rs2), ctx); 647 + else 648 + emit(rv_sub(rd, rs1, rs2), ctx); 649 + } 650 + 651 + static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 652 + { 653 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 654 + emitc(rvc_or(rd, rs2), ctx); 655 + else 656 + emit(rv_or(rd, rs1, rs2), ctx); 657 + } 658 + 659 + static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 660 + { 661 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 662 + emitc(rvc_and(rd, rs2), ctx); 663 + else 664 + emit(rv_and(rd, rs1, rs2), ctx); 665 + } 666 + 667 + static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 668 + { 669 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 670 + emitc(rvc_xor(rd, rs2), ctx); 671 + else 672 + emit(rv_xor(rd, rs1, rs2), ctx); 673 + } 674 + 675 + static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx) 676 + { 677 + if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3)) 678 + emitc(rvc_lwsp(rd, off), ctx); 679 + else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3)) 680 + emitc(rvc_lw(rd, off, rs1), ctx); 681 + else 682 + emit(rv_lw(rd, off, rs1), ctx); 683 + } 684 + 685 + static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx) 686 + { 687 + if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3)) 688 + emitc(rvc_swsp(off, rs2), ctx); 689 + else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3)) 690 + emitc(rvc_sw(rs1, off, rs2), ctx); 691 + else 692 + emit(rv_sw(rs1, off, rs2), ctx); 693 + } 694 + 695 + /* RV64-only helper functions. */ 696 + #if __riscv_xlen == 64 697 + 698 + static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx) 699 + { 700 + if (rvc_enabled() && rd && rd == rs && is_6b_int(imm)) 701 + emitc(rvc_addiw(rd, imm), ctx); 702 + else 703 + emit(rv_addiw(rd, rs, imm), ctx); 704 + } 705 + 706 + static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx) 707 + { 708 + if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7)) 709 + emitc(rvc_ldsp(rd, off), ctx); 710 + else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7)) 711 + emitc(rvc_ld(rd, off, rs1), ctx); 712 + else 713 + emit(rv_ld(rd, off, rs1), ctx); 714 + } 715 + 716 + static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx) 717 + { 718 + if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7)) 719 + emitc(rvc_sdsp(off, rs2), ctx); 720 + else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7)) 721 + emitc(rvc_sd(rs1, off, rs2), ctx); 722 + else 723 + emit(rv_sd(rs1, off, rs2), ctx); 724 + } 725 + 726 + static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 727 + { 728 + if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2)) 729 + emitc(rvc_subw(rd, rs2), ctx); 730 + else 731 + emit(rv_subw(rd, rs1, rs2), ctx); 753 732 } 754 733 755 734 #endif /* __riscv_xlen == 64 */

+7 -7

arch/riscv/net/bpf_jit_comp32.c

··· 644 644 645 645 e = ctx->ninsns; 646 646 /* Adjust for extra insns. */ 647 - rvoff -= (e - s) << 2; 647 + rvoff -= ninsns_rvoff(e - s); 648 648 emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 649 649 return 0; 650 650 } ··· 713 713 if (far) { 714 714 e = ctx->ninsns; 715 715 /* Adjust for extra insns. */ 716 - rvoff -= (e - s) << 2; 716 + rvoff -= ninsns_rvoff(e - s); 717 717 emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 718 718 } 719 719 return 0; ··· 731 731 732 732 e = ctx->ninsns; 733 733 /* Adjust for extra insns. */ 734 - rvoff -= (e - s) << 2; 734 + rvoff -= ninsns_rvoff(e - s); 735 735 736 736 if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx)) 737 737 return -1; ··· 795 795 * if (index >= max_entries) 796 796 * goto out; 797 797 */ 798 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 798 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 799 799 emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); 800 800 801 801 /* ··· 804 804 * goto out; 805 805 */ 806 806 emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); 807 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 807 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 808 808 emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); 809 809 810 810 /* ··· 818 818 if (is_12b_check(off, insn)) 819 819 return -1; 820 820 emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); 821 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 821 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 822 822 emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx); 823 823 824 824 /* ··· 1214 1214 emit_imm32(tmp2, imm, ctx); 1215 1215 src = tmp2; 1216 1216 e = ctx->ninsns; 1217 - rvoff -= (e - s) << 2; 1217 + rvoff -= ninsns_rvoff(e - s); 1218 1218 } 1219 1219 1220 1220 if (is64)

+150 -137

arch/riscv/net/bpf_jit_comp64.c

··· 132 132 * 133 133 * This also means that we need to process LSB to MSB. 134 134 */ 135 - s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff; 135 + s64 upper = (val + (1 << 11)) >> 12; 136 + /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw, 137 + * and addi are signed and RVC checks will perform signed comparisons. 138 + */ 139 + s64 lower = ((val & 0xfff) << 52) >> 52; 136 140 int shift; 137 141 138 142 if (is_32b_int(val)) { 139 143 if (upper) 140 - emit(rv_lui(rd, upper), ctx); 144 + emit_lui(rd, upper, ctx); 141 145 142 146 if (!upper) { 143 - emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); 147 + emit_li(rd, lower, ctx); 144 148 return; 145 149 } 146 150 147 - emit(rv_addiw(rd, rd, lower), ctx); 151 + emit_addiw(rd, rd, lower, ctx); 148 152 return; 149 153 } 150 154 ··· 158 154 159 155 emit_imm(rd, upper, ctx); 160 156 161 - emit(rv_slli(rd, rd, shift), ctx); 157 + emit_slli(rd, rd, shift, ctx); 162 158 if (lower) 163 - emit(rv_addi(rd, rd, lower), ctx); 159 + emit_addi(rd, rd, lower, ctx); 164 160 } 165 161 166 162 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) ··· 168 164 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 169 165 170 166 if (seen_reg(RV_REG_RA, ctx)) { 171 - emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx); 167 + emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx); 172 168 store_offset -= 8; 173 169 } 174 - emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx); 170 + emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx); 175 171 store_offset -= 8; 176 172 if (seen_reg(RV_REG_S1, ctx)) { 177 - emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx); 173 + emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx); 178 174 store_offset -= 8; 179 175 } 180 176 if (seen_reg(RV_REG_S2, ctx)) { 181 - emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx); 177 + emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx); 182 178 store_offset -= 8; 183 179 } 184 180 if (seen_reg(RV_REG_S3, ctx)) { 185 - emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx); 181 + emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx); 186 182 store_offset -= 8; 187 183 } 188 184 if (seen_reg(RV_REG_S4, ctx)) { 189 - emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx); 185 + emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx); 190 186 store_offset -= 8; 191 187 } 192 188 if (seen_reg(RV_REG_S5, ctx)) { 193 - emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx); 189 + emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx); 194 190 store_offset -= 8; 195 191 } 196 192 if (seen_reg(RV_REG_S6, ctx)) { 197 - emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx); 193 + emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); 198 194 store_offset -= 8; 199 195 } 200 196 201 - emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); 197 + emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); 202 198 /* Set return value. */ 203 199 if (!is_tail_call) 204 - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); 205 - emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 206 - is_tail_call ? 4 : 0), /* skip TCC init */ 207 - ctx); 200 + emit_mv(RV_REG_A0, RV_REG_A5, ctx); 201 + emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 202 + is_tail_call ? 4 : 0, /* skip TCC init */ 203 + ctx); 208 204 } 209 205 210 206 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, ··· 284 280 285 281 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) 286 282 { 287 - emit(rv_slli(reg, reg, 32), ctx); 288 - emit(rv_srli(reg, reg, 32), ctx); 283 + emit_slli(reg, reg, 32, ctx); 284 + emit_srli(reg, reg, 32, ctx); 289 285 } 290 286 291 287 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) ··· 308 304 if (is_12b_check(off, insn)) 309 305 return -1; 310 306 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 311 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 307 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 312 308 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 313 309 314 310 /* if (TCC-- < 0) 315 311 * goto out; 316 312 */ 317 - emit(rv_addi(RV_REG_T1, tcc, -1), ctx); 318 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 313 + emit_addi(RV_REG_T1, tcc, -1, ctx); 314 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 319 315 emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); 320 316 321 317 /* prog = array->ptrs[index]; 322 318 * if (!prog) 323 319 * goto out; 324 320 */ 325 - emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx); 326 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx); 321 + emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx); 322 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx); 327 323 off = offsetof(struct bpf_array, ptrs); 328 324 if (is_12b_check(off, insn)) 329 325 return -1; 330 - emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); 331 - off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 326 + emit_ld(RV_REG_T2, off, RV_REG_T2, ctx); 327 + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 332 328 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 333 329 334 330 /* goto *(prog->bpf_func + 4); */ 335 331 off = offsetof(struct bpf_prog, bpf_func); 336 332 if (is_12b_check(off, insn)) 337 333 return -1; 338 - emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); 339 - emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); 334 + emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); 335 + emit_mv(RV_REG_TCC, RV_REG_T1, ctx); 340 336 __build_epilogue(true, ctx); 341 337 return 0; 342 338 } ··· 364 360 365 361 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 366 362 { 367 - emit(rv_addi(RV_REG_T2, *rd, 0), ctx); 363 + emit_mv(RV_REG_T2, *rd, ctx); 368 364 emit_zext_32(RV_REG_T2, ctx); 369 - emit(rv_addi(RV_REG_T1, *rs, 0), ctx); 365 + emit_mv(RV_REG_T1, *rs, ctx); 370 366 emit_zext_32(RV_REG_T1, ctx); 371 367 *rd = RV_REG_T2; 372 368 *rs = RV_REG_T1; ··· 374 370 375 371 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 376 372 { 377 - emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); 378 - emit(rv_addiw(RV_REG_T1, *rs, 0), ctx); 373 + emit_addiw(RV_REG_T2, *rd, 0, ctx); 374 + emit_addiw(RV_REG_T1, *rs, 0, ctx); 379 375 *rd = RV_REG_T2; 380 376 *rs = RV_REG_T1; 381 377 } 382 378 383 379 static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) 384 380 { 385 - emit(rv_addi(RV_REG_T2, *rd, 0), ctx); 381 + emit_mv(RV_REG_T2, *rd, ctx); 386 382 emit_zext_32(RV_REG_T2, ctx); 387 383 emit_zext_32(RV_REG_T1, ctx); 388 384 *rd = RV_REG_T2; ··· 390 386 391 387 static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) 392 388 { 393 - emit(rv_addiw(RV_REG_T2, *rd, 0), ctx); 389 + emit_addiw(RV_REG_T2, *rd, 0, ctx); 394 390 *rd = RV_REG_T2; 395 391 } 396 392 ··· 436 432 if (ret) 437 433 return ret; 438 434 rd = bpf_to_rv_reg(BPF_REG_0, ctx); 439 - emit(rv_addi(rd, RV_REG_A0, 0), ctx); 435 + emit_mv(rd, RV_REG_A0, ctx); 440 436 return 0; 441 437 } 442 438 ··· 462 458 emit_zext_32(rd, ctx); 463 459 break; 464 460 } 465 - emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); 461 + emit_mv(rd, rs, ctx); 466 462 if (!is64 && !aux->verifier_zext) 467 463 emit_zext_32(rd, ctx); 468 464 break; ··· 470 466 /* dst = dst OP src */ 471 467 case BPF_ALU | BPF_ADD | BPF_X: 472 468 case BPF_ALU64 | BPF_ADD | BPF_X: 473 - emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); 469 + emit_add(rd, rd, rs, ctx); 474 470 if (!is64 && !aux->verifier_zext) 475 471 emit_zext_32(rd, ctx); 476 472 break; 477 473 case BPF_ALU | BPF_SUB | BPF_X: 478 474 case BPF_ALU64 | BPF_SUB | BPF_X: 479 - emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); 475 + if (is64) 476 + emit_sub(rd, rd, rs, ctx); 477 + else 478 + emit_subw(rd, rd, rs, ctx); 479 + 480 480 if (!is64 && !aux->verifier_zext) 481 481 emit_zext_32(rd, ctx); 482 482 break; 483 483 case BPF_ALU | BPF_AND | BPF_X: 484 484 case BPF_ALU64 | BPF_AND | BPF_X: 485 - emit(rv_and(rd, rd, rs), ctx); 485 + emit_and(rd, rd, rs, ctx); 486 486 if (!is64 && !aux->verifier_zext) 487 487 emit_zext_32(rd, ctx); 488 488 break; 489 489 case BPF_ALU | BPF_OR | BPF_X: 490 490 case BPF_ALU64 | BPF_OR | BPF_X: 491 - emit(rv_or(rd, rd, rs), ctx); 491 + emit_or(rd, rd, rs, ctx); 492 492 if (!is64 && !aux->verifier_zext) 493 493 emit_zext_32(rd, ctx); 494 494 break; 495 495 case BPF_ALU | BPF_XOR | BPF_X: 496 496 case BPF_ALU64 | BPF_XOR | BPF_X: 497 - emit(rv_xor(rd, rd, rs), ctx); 497 + emit_xor(rd, rd, rs, ctx); 498 498 if (!is64 && !aux->verifier_zext) 499 499 emit_zext_32(rd, ctx); 500 500 break; ··· 542 534 /* dst = -dst */ 543 535 case BPF_ALU | BPF_NEG: 544 536 case BPF_ALU64 | BPF_NEG: 545 - emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : 546 - rv_subw(rd, RV_REG_ZERO, rd), ctx); 537 + emit_sub(rd, RV_REG_ZERO, rd, ctx); 547 538 if (!is64 && !aux->verifier_zext) 548 539 emit_zext_32(rd, ctx); 549 540 break; ··· 551 544 case BPF_ALU | BPF_END | BPF_FROM_LE: 552 545 switch (imm) { 553 546 case 16: 554 - emit(rv_slli(rd, rd, 48), ctx); 555 - emit(rv_srli(rd, rd, 48), ctx); 547 + emit_slli(rd, rd, 48, ctx); 548 + emit_srli(rd, rd, 48, ctx); 556 549 break; 557 550 case 32: 558 551 if (!aux->verifier_zext) ··· 565 558 break; 566 559 567 560 case BPF_ALU | BPF_END | BPF_FROM_BE: 568 - emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx); 561 + emit_li(RV_REG_T2, 0, ctx); 569 562 570 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 571 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 572 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 573 - emit(rv_srli(rd, rd, 8), ctx); 563 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 564 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 565 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 566 + emit_srli(rd, rd, 8, ctx); 574 567 if (imm == 16) 575 568 goto out_be; 576 569 577 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 578 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 579 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 580 - emit(rv_srli(rd, rd, 8), ctx); 570 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 571 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 572 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 573 + emit_srli(rd, rd, 8, ctx); 581 574 582 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 583 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 584 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 585 - emit(rv_srli(rd, rd, 8), ctx); 575 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 576 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 577 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 578 + emit_srli(rd, rd, 8, ctx); 586 579 if (imm == 32) 587 580 goto out_be; 588 581 589 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 590 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 591 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 592 - emit(rv_srli(rd, rd, 8), ctx); 582 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 583 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 584 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 585 + emit_srli(rd, rd, 8, ctx); 593 586 594 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 595 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 596 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 597 - emit(rv_srli(rd, rd, 8), ctx); 587 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 588 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 589 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 590 + emit_srli(rd, rd, 8, ctx); 598 591 599 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 600 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 601 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 602 - emit(rv_srli(rd, rd, 8), ctx); 592 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 593 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 594 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 595 + emit_srli(rd, rd, 8, ctx); 603 596 604 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 605 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 606 - emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx); 607 - emit(rv_srli(rd, rd, 8), ctx); 597 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 598 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 599 + emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 600 + emit_srli(rd, rd, 8, ctx); 608 601 out_be: 609 - emit(rv_andi(RV_REG_T1, rd, 0xff), ctx); 610 - emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx); 602 + emit_andi(RV_REG_T1, rd, 0xff, ctx); 603 + emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 611 604 612 - emit(rv_addi(rd, RV_REG_T2, 0), ctx); 605 + emit_mv(rd, RV_REG_T2, ctx); 613 606 break; 614 607 615 608 /* dst = imm */ ··· 624 617 case BPF_ALU | BPF_ADD | BPF_K: 625 618 case BPF_ALU64 | BPF_ADD | BPF_K: 626 619 if (is_12b_int(imm)) { 627 - emit(is64 ? rv_addi(rd, rd, imm) : 628 - rv_addiw(rd, rd, imm), ctx); 620 + emit_addi(rd, rd, imm, ctx); 629 621 } else { 630 622 emit_imm(RV_REG_T1, imm, ctx); 631 - emit(is64 ? rv_add(rd, rd, RV_REG_T1) : 632 - rv_addw(rd, rd, RV_REG_T1), ctx); 623 + emit_add(rd, rd, RV_REG_T1, ctx); 633 624 } 634 625 if (!is64 && !aux->verifier_zext) 635 626 emit_zext_32(rd, ctx); ··· 635 630 case BPF_ALU | BPF_SUB | BPF_K: 636 631 case BPF_ALU64 | BPF_SUB | BPF_K: 637 632 if (is_12b_int(-imm)) { 638 - emit(is64 ? rv_addi(rd, rd, -imm) : 639 - rv_addiw(rd, rd, -imm), ctx); 633 + emit_addi(rd, rd, -imm, ctx); 640 634 } else { 641 635 emit_imm(RV_REG_T1, imm, ctx); 642 - emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : 643 - rv_subw(rd, rd, RV_REG_T1), ctx); 636 + emit_sub(rd, rd, RV_REG_T1, ctx); 644 637 } 645 638 if (!is64 && !aux->verifier_zext) 646 639 emit_zext_32(rd, ctx); ··· 646 643 case BPF_ALU | BPF_AND | BPF_K: 647 644 case BPF_ALU64 | BPF_AND | BPF_K: 648 645 if (is_12b_int(imm)) { 649 - emit(rv_andi(rd, rd, imm), ctx); 646 + emit_andi(rd, rd, imm, ctx); 650 647 } else { 651 648 emit_imm(RV_REG_T1, imm, ctx); 652 - emit(rv_and(rd, rd, RV_REG_T1), ctx); 649 + emit_and(rd, rd, RV_REG_T1, ctx); 653 650 } 654 651 if (!is64 && !aux->verifier_zext) 655 652 emit_zext_32(rd, ctx); ··· 660 657 emit(rv_ori(rd, rd, imm), ctx); 661 658 } else { 662 659 emit_imm(RV_REG_T1, imm, ctx); 663 - emit(rv_or(rd, rd, RV_REG_T1), ctx); 660 + emit_or(rd, rd, RV_REG_T1, ctx); 664 661 } 665 662 if (!is64 && !aux->verifier_zext) 666 663 emit_zext_32(rd, ctx); ··· 671 668 emit(rv_xori(rd, rd, imm), ctx); 672 669 } else { 673 670 emit_imm(RV_REG_T1, imm, ctx); 674 - emit(rv_xor(rd, rd, RV_REG_T1), ctx); 671 + emit_xor(rd, rd, RV_REG_T1, ctx); 675 672 } 676 673 if (!is64 && !aux->verifier_zext) 677 674 emit_zext_32(rd, ctx); ··· 702 699 break; 703 700 case BPF_ALU | BPF_LSH | BPF_K: 704 701 case BPF_ALU64 | BPF_LSH | BPF_K: 705 - emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); 702 + emit_slli(rd, rd, imm, ctx); 703 + 706 704 if (!is64 && !aux->verifier_zext) 707 705 emit_zext_32(rd, ctx); 708 706 break; 709 707 case BPF_ALU | BPF_RSH | BPF_K: 710 708 case BPF_ALU64 | BPF_RSH | BPF_K: 711 - emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); 709 + if (is64) 710 + emit_srli(rd, rd, imm, ctx); 711 + else 712 + emit(rv_srliw(rd, rd, imm), ctx); 713 + 712 714 if (!is64 && !aux->verifier_zext) 713 715 emit_zext_32(rd, ctx); 714 716 break; 715 717 case BPF_ALU | BPF_ARSH | BPF_K: 716 718 case BPF_ALU64 | BPF_ARSH | BPF_K: 717 - emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); 719 + if (is64) 720 + emit_srai(rd, rd, imm, ctx); 721 + else 722 + emit(rv_sraiw(rd, rd, imm), ctx); 723 + 718 724 if (!is64 && !aux->verifier_zext) 719 725 emit_zext_32(rd, ctx); 720 726 break; ··· 769 757 e = ctx->ninsns; 770 758 771 759 /* Adjust for extra insns */ 772 - rvoff -= (e - s) << 2; 760 + rvoff -= ninsns_rvoff(e - s); 773 761 } 774 762 775 763 if (BPF_OP(code) == BPF_JSET) { 776 764 /* Adjust for and */ 777 765 rvoff -= 4; 778 - emit(rv_and(RV_REG_T1, rd, rs), ctx); 766 + emit_and(RV_REG_T1, rd, rs, ctx); 779 767 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, 780 768 ctx); 781 769 } else { ··· 822 810 e = ctx->ninsns; 823 811 824 812 /* Adjust for extra insns */ 825 - rvoff -= (e - s) << 2; 813 + rvoff -= ninsns_rvoff(e - s); 826 814 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 827 815 break; 828 816 ··· 831 819 rvoff = rv_offset(i, off, ctx); 832 820 s = ctx->ninsns; 833 821 if (is_12b_int(imm)) { 834 - emit(rv_andi(RV_REG_T1, rd, imm), ctx); 822 + emit_andi(RV_REG_T1, rd, imm, ctx); 835 823 } else { 836 824 emit_imm(RV_REG_T1, imm, ctx); 837 - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); 825 + emit_and(RV_REG_T1, rd, RV_REG_T1, ctx); 838 826 } 839 827 /* For jset32, we should clear the upper 32 bits of t1, but 840 828 * sign-extension is sufficient here and saves one instruction, 841 829 * as t1 is used only in comparison against zero. 842 830 */ 843 831 if (!is64 && imm < 0) 844 - emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx); 832 + emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx); 845 833 e = ctx->ninsns; 846 - rvoff -= (e - s) << 2; 834 + rvoff -= ninsns_rvoff(e - s); 847 835 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 848 836 break; 849 837 ··· 899 887 } 900 888 901 889 emit_imm(RV_REG_T1, off, ctx); 902 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 890 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 903 891 emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 904 892 if (insn_is_zext(&insn[1])) 905 893 return 1; ··· 911 899 } 912 900 913 901 emit_imm(RV_REG_T1, off, ctx); 914 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 902 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 915 903 emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 916 904 if (insn_is_zext(&insn[1])) 917 905 return 1; ··· 923 911 } 924 912 925 913 emit_imm(RV_REG_T1, off, ctx); 926 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 914 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 927 915 emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 928 916 if (insn_is_zext(&insn[1])) 929 917 return 1; 930 918 break; 931 919 case BPF_LDX | BPF_MEM | BPF_DW: 932 920 if (is_12b_int(off)) { 933 - emit(rv_ld(rd, off, rs), ctx); 921 + emit_ld(rd, off, rs, ctx); 934 922 break; 935 923 } 936 924 937 925 emit_imm(RV_REG_T1, off, ctx); 938 - emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); 939 - emit(rv_ld(rd, 0, RV_REG_T1), ctx); 926 + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 927 + emit_ld(rd, 0, RV_REG_T1, ctx); 940 928 break; 941 929 942 930 /* ST: *(size *)(dst + off) = imm */ ··· 948 936 } 949 937 950 938 emit_imm(RV_REG_T2, off, ctx); 951 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 939 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 952 940 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); 953 941 break; 954 942 ··· 960 948 } 961 949 962 950 emit_imm(RV_REG_T2, off, ctx); 963 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 951 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 964 952 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); 965 953 break; 966 954 case BPF_ST | BPF_MEM | BPF_W: 967 955 emit_imm(RV_REG_T1, imm, ctx); 968 956 if (is_12b_int(off)) { 969 - emit(rv_sw(rd, off, RV_REG_T1), ctx); 957 + emit_sw(rd, off, RV_REG_T1, ctx); 970 958 break; 971 959 } 972 960 973 961 emit_imm(RV_REG_T2, off, ctx); 974 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 975 - emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx); 962 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 963 + emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); 976 964 break; 977 965 case BPF_ST | BPF_MEM | BPF_DW: 978 966 emit_imm(RV_REG_T1, imm, ctx); 979 967 if (is_12b_int(off)) { 980 - emit(rv_sd(rd, off, RV_REG_T1), ctx); 968 + emit_sd(rd, off, RV_REG_T1, ctx); 981 969 break; 982 970 } 983 971 984 972 emit_imm(RV_REG_T2, off, ctx); 985 - emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx); 986 - emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx); 973 + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 974 + emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); 987 975 break; 988 976 989 977 /* STX: *(size *)(dst + off) = src */ ··· 994 982 } 995 983 996 984 emit_imm(RV_REG_T1, off, ctx); 997 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 985 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 998 986 emit(rv_sb(RV_REG_T1, 0, rs), ctx); 999 987 break; 1000 988 case BPF_STX | BPF_MEM | BPF_H: ··· 1004 992 } 1005 993 1006 994 emit_imm(RV_REG_T1, off, ctx); 1007 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 995 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1008 996 emit(rv_sh(RV_REG_T1, 0, rs), ctx); 1009 997 break; 1010 998 case BPF_STX | BPF_MEM | BPF_W: 1011 999 if (is_12b_int(off)) { 1012 - emit(rv_sw(rd, off, rs), ctx); 1000 + emit_sw(rd, off, rs, ctx); 1013 1001 break; 1014 1002 } 1015 1003 1016 1004 emit_imm(RV_REG_T1, off, ctx); 1017 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1018 - emit(rv_sw(RV_REG_T1, 0, rs), ctx); 1005 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1006 + emit_sw(RV_REG_T1, 0, rs, ctx); 1019 1007 break; 1020 1008 case BPF_STX | BPF_MEM | BPF_DW: 1021 1009 if (is_12b_int(off)) { 1022 - emit(rv_sd(rd, off, rs), ctx); 1010 + emit_sd(rd, off, rs, ctx); 1023 1011 break; 1024 1012 } 1025 1013 1026 1014 emit_imm(RV_REG_T1, off, ctx); 1027 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1028 - emit(rv_sd(RV_REG_T1, 0, rs), ctx); 1015 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1016 + emit_sd(RV_REG_T1, 0, rs, ctx); 1029 1017 break; 1030 1018 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1031 1019 case BPF_STX | BPF_XADD | BPF_W: ··· 1033 1021 case BPF_STX | BPF_XADD | BPF_DW: 1034 1022 if (off) { 1035 1023 if (is_12b_int(off)) { 1036 - emit(rv_addi(RV_REG_T1, rd, off), ctx); 1024 + emit_addi(RV_REG_T1, rd, off, ctx); 1037 1025 } else { 1038 1026 emit_imm(RV_REG_T1, off, ctx); 1039 - emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx); 1027 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1040 1028 } 1041 1029 1042 1030 rd = RV_REG_T1; ··· 1085 1073 1086 1074 /* First instruction is always setting the tail-call-counter 1087 1075 * (TCC) register. This instruction is skipped for tail calls. 1076 + * Force using a 4-byte (non-compressed) instruction. 1088 1077 */ 1089 1078 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 1090 1079 1091 - emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); 1080 + emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx); 1092 1081 1093 1082 if (seen_reg(RV_REG_RA, ctx)) { 1094 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx); 1083 + emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx); 1095 1084 store_offset -= 8; 1096 1085 } 1097 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx); 1086 + emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx); 1098 1087 store_offset -= 8; 1099 1088 if (seen_reg(RV_REG_S1, ctx)) { 1100 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx); 1089 + emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx); 1101 1090 store_offset -= 8; 1102 1091 } 1103 1092 if (seen_reg(RV_REG_S2, ctx)) { 1104 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx); 1093 + emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx); 1105 1094 store_offset -= 8; 1106 1095 } 1107 1096 if (seen_reg(RV_REG_S3, ctx)) { 1108 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx); 1097 + emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx); 1109 1098 store_offset -= 8; 1110 1099 } 1111 1100 if (seen_reg(RV_REG_S4, ctx)) { 1112 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx); 1101 + emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx); 1113 1102 store_offset -= 8; 1114 1103 } 1115 1104 if (seen_reg(RV_REG_S5, ctx)) { 1116 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx); 1105 + emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx); 1117 1106 store_offset -= 8; 1118 1107 } 1119 1108 if (seen_reg(RV_REG_S6, ctx)) { 1120 - emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx); 1109 + emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); 1121 1110 store_offset -= 8; 1122 1111 } 1123 1112 1124 - emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); 1113 + emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); 1125 1114 1126 1115 if (bpf_stack_adjust) 1127 - emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx); 1116 + emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx); 1128 1117 1129 1118 /* Program contains calls and tail calls, so RV_REG_TCC need 1130 1119 * to be saved across calls. 1131 1120 */ 1132 1121 if (seen_tail_call(ctx) && seen_call(ctx)) 1133 - emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); 1122 + emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); 1134 1123 1135 1124 ctx->stack_size = stack_adjust; 1136 1125 }

+3 -3

arch/riscv/net/bpf_jit_core.c

··· 73 73 74 74 if (ctx->offset) { 75 75 extra_pass = true; 76 - image_size = sizeof(u32) * ctx->ninsns; 76 + image_size = sizeof(*ctx->insns) * ctx->ninsns; 77 77 goto skip_init_ctx; 78 78 } 79 79 ··· 103 103 if (jit_data->header) 104 104 break; 105 105 106 - image_size = sizeof(u32) * ctx->ninsns; 106 + image_size = sizeof(*ctx->insns) * ctx->ninsns; 107 107 jit_data->header = 108 108 bpf_jit_binary_alloc(image_size, 109 109 &jit_data->image, ··· 114 114 goto out_offset; 115 115 } 116 116 117 - ctx->insns = (u32 *)jit_data->image; 117 + ctx->insns = (u16 *)jit_data->image; 118 118 /* 119 119 * Now, when the image is allocated, the image can 120 120 * potentially shrink more (auipc/jalr -> jal).

+41 -22

arch/s390/net/bpf_jit_comp.c

··· 489 489 } while (re <= last); 490 490 } 491 491 492 + static void bpf_skip(struct bpf_jit *jit, int size) 493 + { 494 + if (size >= 6 && !is_valid_rel(size)) { 495 + /* brcl 0xf,size */ 496 + EMIT6_PCREL_RIL(0xc0f4000000, size); 497 + size -= 6; 498 + } else if (size >= 4 && is_valid_rel(size)) { 499 + /* brc 0xf,size */ 500 + EMIT4_PCREL(0xa7f40000, size); 501 + size -= 4; 502 + } 503 + while (size >= 2) { 504 + /* bcr 0,%0 */ 505 + _EMIT2(0x0700); 506 + size -= 2; 507 + } 508 + } 509 + 492 510 /* 493 511 * Emit function prologue 494 512 * ··· 519 501 /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ 520 502 _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); 521 503 } else { 522 - /* j tail_call_start: NOP if no tail calls are used */ 523 - EMIT4_PCREL(0xa7f40000, 6); 524 - /* bcr 0,%0 */ 525 - EMIT2(0x0700, 0, REG_0); 504 + /* 505 + * There are no tail calls. Insert nops in order to have 506 + * tail_call_start at a predictable offset. 507 + */ 508 + bpf_skip(jit, 6); 526 509 } 527 510 /* Tail calls have to skip above initialization */ 528 511 jit->tail_call_start = jit->prg; ··· 1287 1268 last = (i == fp->len - 1) ? 1 : 0; 1288 1269 if (last) 1289 1270 break; 1290 - /* j <exit> */ 1291 - EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); 1271 + if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip)) 1272 + /* brc 0xf, <exit> */ 1273 + EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip); 1274 + else 1275 + /* brcl 0xf, <exit> */ 1276 + EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip); 1292 1277 break; 1293 1278 /* 1294 1279 * Branch relative (number of skipped instructions) to offset on ··· 1440 1417 } 1441 1418 break; 1442 1419 branch_ku: 1443 - is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1444 - /* clfi or clgfi %dst,imm */ 1445 - EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000, 1446 - dst_reg, imm); 1447 - if (!is_first_pass(jit) && 1448 - can_use_rel(jit, addrs[i + off + 1])) { 1449 - /* brc mask,off */ 1450 - EMIT4_PCREL_RIC(0xa7040000, 1451 - mask >> 12, addrs[i + off + 1]); 1452 - } else { 1453 - /* brcl mask,off */ 1454 - EMIT6_PCREL_RILC(0xc0040000, 1455 - mask >> 12, addrs[i + off + 1]); 1456 - } 1457 - break; 1420 + /* lgfi %w1,imm (load sign extend imm) */ 1421 + src_reg = REG_1; 1422 + EMIT6_IMM(0xc0010000, src_reg, imm); 1423 + goto branch_xu; 1458 1424 branch_xs: 1459 1425 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1460 1426 if (!is_first_pass(jit) && ··· 1522 1510 */ 1523 1511 static int bpf_set_addr(struct bpf_jit *jit, int i) 1524 1512 { 1525 - if (!bpf_is_new_addr_sane(jit, i)) 1513 + int delta; 1514 + 1515 + if (is_codegen_pass(jit)) { 1516 + delta = jit->prg - jit->addrs[i]; 1517 + if (delta < 0) 1518 + bpf_skip(jit, -delta); 1519 + } 1520 + if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i))) 1526 1521 return -1; 1527 1522 jit->addrs[i] = jit->prg; 1528 1523 return 0;

+3

include/linux/bpf-netns.h

··· 8 8 enum netns_bpf_attach_type { 9 9 NETNS_BPF_INVALID = -1, 10 10 NETNS_BPF_FLOW_DISSECTOR = 0, 11 + NETNS_BPF_SK_LOOKUP, 11 12 MAX_NETNS_BPF_ATTACH_TYPE 12 13 }; 13 14 ··· 18 17 switch (attach_type) { 19 18 case BPF_FLOW_DISSECTOR: 20 19 return NETNS_BPF_FLOW_DISSECTOR; 20 + case BPF_SK_LOOKUP: 21 + return NETNS_BPF_SK_LOOKUP; 21 22 default: 22 23 return NETNS_BPF_INVALID; 23 24 }

+11 -4

include/linux/bpf.h

··· 249 249 ARG_PTR_TO_INT, /* pointer to int */ 250 250 ARG_PTR_TO_LONG, /* pointer to long */ 251 251 ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ 252 + ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ 252 253 ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ 253 254 ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ 254 255 ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ ··· 668 667 struct bpf_ctx_arg_aux { 669 668 u32 offset; 670 669 enum bpf_reg_type reg_type; 670 + u32 btf_id; 671 671 }; 672 672 673 673 struct bpf_prog_aux { ··· 930 928 931 929 void bpf_prog_array_delete_safe(struct bpf_prog_array *progs, 932 930 struct bpf_prog *old_prog); 931 + int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index); 932 + int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, 933 + struct bpf_prog *prog); 933 934 int bpf_prog_array_copy_info(struct bpf_prog_array *array, 934 935 u32 *prog_ids, u32 request_cnt, 935 936 u32 *prog_cnt); ··· 1277 1272 void __cpu_map_flush(void); 1278 1273 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, 1279 1274 struct net_device *dev_rx); 1275 + bool cpu_map_prog_allowed(struct bpf_map *map); 1280 1276 1281 1277 /* Return map's numa specified by userspace */ 1282 1278 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) ··· 1438 1432 return 0; 1439 1433 } 1440 1434 1435 + static inline bool cpu_map_prog_allowed(struct bpf_map *map) 1436 + { 1437 + return false; 1438 + } 1439 + 1441 1440 static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, 1442 1441 enum bpf_prog_type type) 1443 1442 { ··· 1542 1531 1543 1532 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); 1544 1533 void bpf_map_offload_map_free(struct bpf_map *map); 1545 - void init_btf_sock_ids(struct btf *btf); 1546 1534 #else 1547 1535 static inline int bpf_prog_offload_init(struct bpf_prog *prog, 1548 1536 union bpf_attr *attr) ··· 1565 1555 } 1566 1556 1567 1557 static inline void bpf_map_offload_map_free(struct bpf_map *map) 1568 - { 1569 - } 1570 - static inline void init_btf_sock_ids(struct btf *btf) 1571 1558 { 1572 1559 } 1573 1560 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

+2

include/linux/bpf_types.h

··· 64 64 #ifdef CONFIG_INET 65 65 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, 66 66 struct sk_reuseport_md, struct sk_reuseport_kern) 67 + BPF_PROG_TYPE(BPF_PROG_TYPE_SK_LOOKUP, sk_lookup, 68 + struct bpf_sk_lookup, struct bpf_sk_lookup_kern) 67 69 #endif 68 70 #if defined(CONFIG_BPF_JIT) 69 71 BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops,

+37 -3

include/linux/btf_ids.h

··· 57 57 * .zero 4 58 58 * 59 59 */ 60 - #define __BTF_ID_LIST(name) \ 60 + #define __BTF_ID_LIST(name, scope) \ 61 61 asm( \ 62 62 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 63 - ".local " #name "; \n" \ 63 + "." #scope " " #name "; \n" \ 64 64 #name ":; \n" \ 65 65 ".popsection; \n"); \ 66 66 67 67 #define BTF_ID_LIST(name) \ 68 - __BTF_ID_LIST(name) \ 68 + __BTF_ID_LIST(name, local) \ 69 69 extern u32 name[]; 70 + 71 + #define BTF_ID_LIST_GLOBAL(name) \ 72 + __BTF_ID_LIST(name, globl) 70 73 71 74 /* 72 75 * The BTF_ID_UNUSED macro defines 4 zero bytes. ··· 93 90 #define BTF_ID_LIST(name) static u32 name[5]; 94 91 #define BTF_ID(prefix, name) 95 92 #define BTF_ID_UNUSED 93 + #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; 96 94 97 95 #endif /* CONFIG_DEBUG_INFO_BTF */ 96 + 97 + #ifdef CONFIG_NET 98 + /* Define a list of socket types which can be the argument for 99 + * skc_to_*_sock() helpers. All these sockets should have 100 + * sock_common as the first argument in its memory layout. 101 + */ 102 + #define BTF_SOCK_TYPE_xxx \ 103 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ 104 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ 105 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ 106 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ 107 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ 108 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ 109 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ 110 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ 111 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ 112 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ 113 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ 114 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ 115 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) 116 + 117 + enum { 118 + #define BTF_SOCK_TYPE(name, str) name, 119 + BTF_SOCK_TYPE_xxx 120 + #undef BTF_SOCK_TYPE 121 + MAX_BTF_SOCK_TYPE, 122 + }; 123 + 124 + extern u32 btf_sock_ids[]; 125 + #endif 98 126 99 127 #endif

+147

include/linux/filter.h

··· 1278 1278 1279 1279 int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len); 1280 1280 1281 + struct bpf_sk_lookup_kern { 1282 + u16 family; 1283 + u16 protocol; 1284 + struct { 1285 + __be32 saddr; 1286 + __be32 daddr; 1287 + } v4; 1288 + struct { 1289 + const struct in6_addr *saddr; 1290 + const struct in6_addr *daddr; 1291 + } v6; 1292 + __be16 sport; 1293 + u16 dport; 1294 + struct sock *selected_sk; 1295 + bool no_reuseport; 1296 + }; 1297 + 1298 + extern struct static_key_false bpf_sk_lookup_enabled; 1299 + 1300 + /* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup. 1301 + * 1302 + * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and 1303 + * SK_DROP. Their meaning is as follows: 1304 + * 1305 + * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result 1306 + * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup 1307 + * SK_DROP : terminate lookup with -ECONNREFUSED 1308 + * 1309 + * This macro aggregates return values and selected sockets from 1310 + * multiple BPF programs according to following rules in order: 1311 + * 1312 + * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk, 1313 + * macro result is SK_PASS and last ctx.selected_sk is used. 1314 + * 2. If any program returned SK_DROP return value, 1315 + * macro result is SK_DROP. 1316 + * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL. 1317 + * 1318 + * Caller must ensure that the prog array is non-NULL, and that the 1319 + * array as well as the programs it contains remain valid. 1320 + */ 1321 + #define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \ 1322 + ({ \ 1323 + struct bpf_sk_lookup_kern *_ctx = &(ctx); \ 1324 + struct bpf_prog_array_item *_item; \ 1325 + struct sock *_selected_sk = NULL; \ 1326 + bool _no_reuseport = false; \ 1327 + struct bpf_prog *_prog; \ 1328 + bool _all_pass = true; \ 1329 + u32 _ret; \ 1330 + \ 1331 + migrate_disable(); \ 1332 + _item = &(array)->items[0]; \ 1333 + while ((_prog = READ_ONCE(_item->prog))) { \ 1334 + /* restore most recent selection */ \ 1335 + _ctx->selected_sk = _selected_sk; \ 1336 + _ctx->no_reuseport = _no_reuseport; \ 1337 + \ 1338 + _ret = func(_prog, _ctx); \ 1339 + if (_ret == SK_PASS && _ctx->selected_sk) { \ 1340 + /* remember last non-NULL socket */ \ 1341 + _selected_sk = _ctx->selected_sk; \ 1342 + _no_reuseport = _ctx->no_reuseport; \ 1343 + } else if (_ret == SK_DROP && _all_pass) { \ 1344 + _all_pass = false; \ 1345 + } \ 1346 + _item++; \ 1347 + } \ 1348 + _ctx->selected_sk = _selected_sk; \ 1349 + _ctx->no_reuseport = _no_reuseport; \ 1350 + migrate_enable(); \ 1351 + _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ 1352 + }) 1353 + 1354 + static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, 1355 + const __be32 saddr, const __be16 sport, 1356 + const __be32 daddr, const u16 dport, 1357 + struct sock **psk) 1358 + { 1359 + struct bpf_prog_array *run_array; 1360 + struct sock *selected_sk = NULL; 1361 + bool no_reuseport = false; 1362 + 1363 + rcu_read_lock(); 1364 + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); 1365 + if (run_array) { 1366 + struct bpf_sk_lookup_kern ctx = { 1367 + .family = AF_INET, 1368 + .protocol = protocol, 1369 + .v4.saddr = saddr, 1370 + .v4.daddr = daddr, 1371 + .sport = sport, 1372 + .dport = dport, 1373 + }; 1374 + u32 act; 1375 + 1376 + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); 1377 + if (act == SK_PASS) { 1378 + selected_sk = ctx.selected_sk; 1379 + no_reuseport = ctx.no_reuseport; 1380 + } else { 1381 + selected_sk = ERR_PTR(-ECONNREFUSED); 1382 + } 1383 + } 1384 + rcu_read_unlock(); 1385 + *psk = selected_sk; 1386 + return no_reuseport; 1387 + } 1388 + 1389 + #if IS_ENABLED(CONFIG_IPV6) 1390 + static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, 1391 + const struct in6_addr *saddr, 1392 + const __be16 sport, 1393 + const struct in6_addr *daddr, 1394 + const u16 dport, 1395 + struct sock **psk) 1396 + { 1397 + struct bpf_prog_array *run_array; 1398 + struct sock *selected_sk = NULL; 1399 + bool no_reuseport = false; 1400 + 1401 + rcu_read_lock(); 1402 + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); 1403 + if (run_array) { 1404 + struct bpf_sk_lookup_kern ctx = { 1405 + .family = AF_INET6, 1406 + .protocol = protocol, 1407 + .v6.saddr = saddr, 1408 + .v6.daddr = daddr, 1409 + .sport = sport, 1410 + .dport = dport, 1411 + }; 1412 + u32 act; 1413 + 1414 + act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN); 1415 + if (act == SK_PASS) { 1416 + selected_sk = ctx.selected_sk; 1417 + no_reuseport = ctx.no_reuseport; 1418 + } else { 1419 + selected_sk = ERR_PTR(-ECONNREFUSED); 1420 + } 1421 + } 1422 + rcu_read_unlock(); 1423 + *psk = selected_sk; 1424 + return no_reuseport; 1425 + } 1426 + #endif /* IS_ENABLED(CONFIG_IPV6) */ 1427 + 1281 1428 #endif /* __LINUX_FILTER_H__ */

+29 -13

include/net/xdp.h

··· 104 104 struct net_device *dev_rx; /* used by cpumap */ 105 105 }; 106 106 107 + 107 108 static inline struct skb_shared_info * 108 109 xdp_get_shared_info_from_frame(struct xdp_frame *frame) 109 110 { ··· 113 112 return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - 114 113 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 115 114 } 115 + 116 + struct xdp_cpumap_stats { 117 + unsigned int redirect; 118 + unsigned int pass; 119 + unsigned int drop; 120 + }; 116 121 117 122 /* Clear kernel pointers in xdp_frame */ 118 123 static inline void xdp_scrub_frame(struct xdp_frame *frame) ··· 143 136 xdp->frame_sz = frame->frame_sz; 144 137 } 145 138 146 - /* Convert xdp_buff to xdp_frame */ 147 139 static inline 148 - struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) 140 + int xdp_update_frame_from_buff(struct xdp_buff *xdp, 141 + struct xdp_frame *xdp_frame) 149 142 { 150 - struct xdp_frame *xdp_frame; 151 - int metasize; 152 - int headroom; 153 - 154 - if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) 155 - return xdp_convert_zc_to_xdp_frame(xdp); 143 + int metasize, headroom; 156 144 157 145 /* Assure headroom is available for storing info */ 158 146 headroom = xdp->data - xdp->data_hard_start; 159 147 metasize = xdp->data - xdp->data_meta; 160 148 metasize = metasize > 0 ? metasize : 0; 161 149 if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) 162 - return NULL; 150 + return -ENOSPC; 163 151 164 152 /* Catch if driver didn't reserve tailroom for skb_shared_info */ 165 153 if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { 166 154 XDP_WARN("Driver BUG: missing reserved tailroom"); 167 - return NULL; 155 + return -ENOSPC; 168 156 } 169 - 170 - /* Store info in top of packet */ 171 - xdp_frame = xdp->data_hard_start; 172 157 173 158 xdp_frame->data = xdp->data; 174 159 xdp_frame->len = xdp->data_end - xdp->data; 175 160 xdp_frame->headroom = headroom - sizeof(*xdp_frame); 176 161 xdp_frame->metasize = metasize; 177 162 xdp_frame->frame_sz = xdp->frame_sz; 163 + 164 + return 0; 165 + } 166 + 167 + /* Convert xdp_buff to xdp_frame */ 168 + static inline 169 + struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) 170 + { 171 + struct xdp_frame *xdp_frame; 172 + 173 + if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) 174 + return xdp_convert_zc_to_xdp_frame(xdp); 175 + 176 + /* Store info in top of packet */ 177 + xdp_frame = xdp->data_hard_start; 178 + if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) 179 + return NULL; 178 180 179 181 /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ 180 182 xdp_frame->mem = xdp->rxq->mem;

+12 -4

include/trace/events/xdp.h

··· 177 177 TRACE_EVENT(xdp_cpumap_kthread, 178 178 179 179 TP_PROTO(int map_id, unsigned int processed, unsigned int drops, 180 - int sched), 180 + int sched, struct xdp_cpumap_stats *xdp_stats), 181 181 182 - TP_ARGS(map_id, processed, drops, sched), 182 + TP_ARGS(map_id, processed, drops, sched, xdp_stats), 183 183 184 184 TP_STRUCT__entry( 185 185 __field(int, map_id) ··· 188 188 __field(unsigned int, drops) 189 189 __field(unsigned int, processed) 190 190 __field(int, sched) 191 + __field(unsigned int, xdp_pass) 192 + __field(unsigned int, xdp_drop) 193 + __field(unsigned int, xdp_redirect) 191 194 ), 192 195 193 196 TP_fast_assign( ··· 200 197 __entry->drops = drops; 201 198 __entry->processed = processed; 202 199 __entry->sched = sched; 200 + __entry->xdp_pass = xdp_stats->pass; 201 + __entry->xdp_drop = xdp_stats->drop; 202 + __entry->xdp_redirect = xdp_stats->redirect; 203 203 ), 204 204 205 205 TP_printk("kthread" 206 206 " cpu=%d map_id=%d action=%s" 207 207 " processed=%u drops=%u" 208 - " sched=%d", 208 + " sched=%d" 209 + " xdp_pass=%u xdp_drop=%u xdp_redirect=%u", 209 210 __entry->cpu, __entry->map_id, 210 211 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 211 212 __entry->processed, __entry->drops, 212 - __entry->sched) 213 + __entry->sched, 214 + __entry->xdp_pass, __entry->xdp_drop, __entry->xdp_redirect) 213 215 ); 214 216 215 217 TRACE_EVENT(xdp_cpumap_enqueue,

+94 -3

include/uapi/linux/bpf.h

··· 189 189 BPF_PROG_TYPE_STRUCT_OPS, 190 190 BPF_PROG_TYPE_EXT, 191 191 BPF_PROG_TYPE_LSM, 192 + BPF_PROG_TYPE_SK_LOOKUP, 192 193 }; 193 194 194 195 enum bpf_attach_type { ··· 228 227 BPF_CGROUP_INET6_GETSOCKNAME, 229 228 BPF_XDP_DEVMAP, 230 229 BPF_CGROUP_INET_SOCK_RELEASE, 230 + BPF_XDP_CPUMAP, 231 + BPF_SK_LOOKUP, 231 232 __MAX_BPF_ATTACH_TYPE 232 233 }; 233 234 ··· 2422 2419 * Look for an IPv6 socket. 2423 2420 * 2424 2421 * If the *netns* is a negative signed 32-bit integer, then the 2425 - * socket lookup table in the netns associated with the *ctx* will 2422 + * socket lookup table in the netns associated with the *ctx* 2426 2423 * will be used. For the TC hooks, this is the netns of the device 2427 2424 * in the skb. For socket hooks, this is the netns of the socket. 2428 2425 * If *netns* is any other signed 32-bit value greater than or ··· 2459 2456 * Look for an IPv6 socket. 2460 2457 * 2461 2458 * If the *netns* is a negative signed 32-bit integer, then the 2462 - * socket lookup table in the netns associated with the *ctx* will 2459 + * socket lookup table in the netns associated with the *ctx* 2463 2460 * will be used. For the TC hooks, this is the netns of the device 2464 2461 * in the skb. For socket hooks, this is the netns of the socket. 2465 2462 * If *netns* is any other signed 32-bit value greater than or ··· 3071 3068 * 3072 3069 * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) 3073 3070 * Description 3071 + * Helper is overloaded depending on BPF program type. This 3072 + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and 3073 + * **BPF_PROG_TYPE_SCHED_ACT** programs. 3074 + * 3074 3075 * Assign the *sk* to the *skb*. When combined with appropriate 3075 3076 * routing configuration to receive the packet towards the socket, 3076 3077 * will cause *skb* to be delivered to the specified socket. ··· 3099 3092 * 3100 3093 * **-ESOCKTNOSUPPORT** if the socket type is not supported 3101 3094 * (reuseport). 3095 + * 3096 + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) 3097 + * Description 3098 + * Helper is overloaded depending on BPF program type. This 3099 + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. 3100 + * 3101 + * Select the *sk* as a result of a socket lookup. 3102 + * 3103 + * For the operation to succeed passed socket must be compatible 3104 + * with the packet description provided by the *ctx* object. 3105 + * 3106 + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must 3107 + * be an exact match. While IP family (**AF_INET** or 3108 + * **AF_INET6**) must be compatible, that is IPv6 sockets 3109 + * that are not v6-only can be selected for IPv4 packets. 3110 + * 3111 + * Only TCP listeners and UDP unconnected sockets can be 3112 + * selected. *sk* can also be NULL to reset any previous 3113 + * selection. 3114 + * 3115 + * *flags* argument can combination of following values: 3116 + * 3117 + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous 3118 + * socket selection, potentially done by a BPF program 3119 + * that ran before us. 3120 + * 3121 + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip 3122 + * load-balancing within reuseport group for the socket 3123 + * being selected. 3124 + * 3125 + * On success *ctx->sk* will point to the selected socket. 3126 + * 3127 + * Return 3128 + * 0 on success, or a negative errno in case of failure. 3129 + * 3130 + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is 3131 + * not compatible with packet family (*ctx->family*). 3132 + * 3133 + * * **-EEXIST** if socket has been already selected, 3134 + * potentially by another program, and 3135 + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. 3136 + * 3137 + * * **-EINVAL** if unsupported flags were specified. 3138 + * 3139 + * * **-EPROTOTYPE** if socket L4 protocol 3140 + * (*sk->protocol*) doesn't match packet protocol 3141 + * (*ctx->protocol*). 3142 + * 3143 + * * **-ESOCKTNOSUPPORT** if socket is not in allowed 3144 + * state (TCP listening or UDP unconnected). 3102 3145 * 3103 3146 * u64 bpf_ktime_get_boot_ns(void) 3104 3147 * Description ··· 3663 3606 BPF_RINGBUF_HDR_SZ = 8, 3664 3607 }; 3665 3608 3609 + /* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ 3610 + enum { 3611 + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), 3612 + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), 3613 + }; 3614 + 3666 3615 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3667 3616 enum bpf_adj_room_mode { 3668 3617 BPF_ADJ_ROOM_NET, ··· 3912 3849 } bpf_prog; 3913 3850 }; 3914 3851 3852 + /* CPUMAP map-value layout 3853 + * 3854 + * The struct data-layout of map-value is a configuration interface. 3855 + * New members can only be added to the end of this structure. 3856 + */ 3857 + struct bpf_cpumap_val { 3858 + __u32 qsize; /* queue size to remote target CPU */ 3859 + union { 3860 + int fd; /* prog fd on map write */ 3861 + __u32 id; /* prog id on map read */ 3862 + } bpf_prog; 3863 + }; 3864 + 3915 3865 enum sk_action { 3916 3866 SK_DROP = 0, 3917 3867 SK_PASS, ··· 4062 3986 4063 3987 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 4064 3988 * by user and intended to be used by socket (e.g. to bind to, depends on 4065 - * attach attach type). 3989 + * attach type). 4066 3990 */ 4067 3991 struct bpf_sock_addr { 4068 3992 __u32 user_family; /* Allows 4-byte read, but no write. */ ··· 4411 4335 __u32 pid; 4412 4336 __u32 tgid; 4413 4337 }; 4338 + 4339 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ 4340 + struct bpf_sk_lookup { 4341 + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ 4342 + 4343 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ 4344 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ 4345 + __u32 remote_ip4; /* Network byte order */ 4346 + __u32 remote_ip6[4]; /* Network byte order */ 4347 + __u32 remote_port; /* Network byte order */ 4348 + __u32 local_ip4; /* Network byte order */ 4349 + __u32 local_ip6[4]; /* Network byte order */ 4350 + __u32 local_port; /* Host byte order */ 4351 + }; 4352 + 4414 4353 #endif /* _UAPI__LINUX_BPF_H__ */

+3 -3

kernel/bpf/btf.c

··· 3672 3672 goto errout; 3673 3673 3674 3674 bpf_struct_ops_init(btf, log); 3675 - init_btf_sock_ids(btf); 3676 3675 3677 3676 btf_verifier_env_free(env); 3678 3677 refcount_set(&btf->refcnt, 1); ··· 3817 3818 return true; 3818 3819 3819 3820 /* this is a pointer to another type */ 3820 - info->reg_type = PTR_TO_BTF_ID; 3821 3821 for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { 3822 3822 const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; 3823 3823 3824 3824 if (ctx_arg_info->offset == off) { 3825 3825 info->reg_type = ctx_arg_info->reg_type; 3826 - break; 3826 + info->btf_id = ctx_arg_info->btf_id; 3827 + return true; 3827 3828 } 3828 3829 } 3829 3830 3831 + info->reg_type = PTR_TO_BTF_ID; 3830 3832 if (tgt_prog) { 3831 3833 ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg); 3832 3834 if (ret > 0) {

+55

kernel/bpf/core.c

··· 1958 1958 } 1959 1959 } 1960 1960 1961 + /** 1962 + * bpf_prog_array_delete_safe_at() - Replaces the program at the given 1963 + * index into the program array with 1964 + * a dummy no-op program. 1965 + * @array: a bpf_prog_array 1966 + * @index: the index of the program to replace 1967 + * 1968 + * Skips over dummy programs, by not counting them, when calculating 1969 + * the the position of the program to replace. 1970 + * 1971 + * Return: 1972 + * * 0 - Success 1973 + * * -EINVAL - Invalid index value. Must be a non-negative integer. 1974 + * * -ENOENT - Index out of range 1975 + */ 1976 + int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index) 1977 + { 1978 + return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog); 1979 + } 1980 + 1981 + /** 1982 + * bpf_prog_array_update_at() - Updates the program at the given index 1983 + * into the program array. 1984 + * @array: a bpf_prog_array 1985 + * @index: the index of the program to update 1986 + * @prog: the program to insert into the array 1987 + * 1988 + * Skips over dummy programs, by not counting them, when calculating 1989 + * the position of the program to update. 1990 + * 1991 + * Return: 1992 + * * 0 - Success 1993 + * * -EINVAL - Invalid index value. Must be a non-negative integer. 1994 + * * -ENOENT - Index out of range 1995 + */ 1996 + int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, 1997 + struct bpf_prog *prog) 1998 + { 1999 + struct bpf_prog_array_item *item; 2000 + 2001 + if (unlikely(index < 0)) 2002 + return -EINVAL; 2003 + 2004 + for (item = array->items; item->prog; item++) { 2005 + if (item->prog == &dummy_bpf_prog.prog) 2006 + continue; 2007 + if (!index) { 2008 + WRITE_ONCE(item->prog, prog); 2009 + return 0; 2010 + } 2011 + index--; 2012 + } 2013 + return -ENOENT; 2014 + } 2015 + 1961 2016 int bpf_prog_array_copy(struct bpf_prog_array *old_array, 1962 2017 struct bpf_prog *exclude_prog, 1963 2018 struct bpf_prog *include_prog,

+140 -27

kernel/bpf/cpumap.c

··· 52 52 struct bpf_cpu_map_entry { 53 53 u32 cpu; /* kthread CPU and map index */ 54 54 int map_id; /* Back reference to map */ 55 - u32 qsize; /* Queue size placeholder for map lookup */ 56 55 57 56 /* XDP can run multiple RX-ring queues, need __percpu enqueue store */ 58 57 struct xdp_bulk_queue __percpu *bulkq; ··· 61 62 /* Queue with potential multi-producers, and single-consumer kthread */ 62 63 struct ptr_ring *queue; 63 64 struct task_struct *kthread; 64 - struct work_struct kthread_stop_wq; 65 + 66 + struct bpf_cpumap_val value; 67 + struct bpf_prog *prog; 65 68 66 69 atomic_t refcnt; /* Control when this struct can be free'ed */ 67 70 struct rcu_head rcu; 71 + 72 + struct work_struct kthread_stop_wq; 68 73 }; 69 74 70 75 struct bpf_cpu_map { ··· 83 80 84 81 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) 85 82 { 83 + u32 value_size = attr->value_size; 86 84 struct bpf_cpu_map *cmap; 87 85 int err = -ENOMEM; 88 86 u64 cost; ··· 94 90 95 91 /* check sanity of attributes */ 96 92 if (attr->max_entries == 0 || attr->key_size != 4 || 97 - attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 93 + (value_size != offsetofend(struct bpf_cpumap_val, qsize) && 94 + value_size != offsetofend(struct bpf_cpumap_val, bpf_prog.fd)) || 95 + attr->map_flags & ~BPF_F_NUMA_NODE) 98 96 return ERR_PTR(-EINVAL); 99 97 100 98 cmap = kzalloc(sizeof(*cmap), GFP_USER); ··· 218 212 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 219 213 { 220 214 if (atomic_dec_and_test(&rcpu->refcnt)) { 215 + if (rcpu->prog) 216 + bpf_prog_put(rcpu->prog); 221 217 /* The queue should be empty at this point */ 222 218 __cpu_map_ring_cleanup(rcpu->queue); 223 219 ptr_ring_cleanup(rcpu->queue, NULL); 224 220 kfree(rcpu->queue); 225 221 kfree(rcpu); 226 222 } 223 + } 224 + 225 + static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, 226 + void **frames, int n, 227 + struct xdp_cpumap_stats *stats) 228 + { 229 + struct xdp_rxq_info rxq; 230 + struct xdp_buff xdp; 231 + int i, nframes = 0; 232 + 233 + if (!rcpu->prog) 234 + return n; 235 + 236 + rcu_read_lock_bh(); 237 + 238 + xdp_set_return_frame_no_direct(); 239 + xdp.rxq = &rxq; 240 + 241 + for (i = 0; i < n; i++) { 242 + struct xdp_frame *xdpf = frames[i]; 243 + u32 act; 244 + int err; 245 + 246 + rxq.dev = xdpf->dev_rx; 247 + rxq.mem = xdpf->mem; 248 + /* TODO: report queue_index to xdp_rxq_info */ 249 + 250 + xdp_convert_frame_to_buff(xdpf, &xdp); 251 + 252 + act = bpf_prog_run_xdp(rcpu->prog, &xdp); 253 + switch (act) { 254 + case XDP_PASS: 255 + err = xdp_update_frame_from_buff(&xdp, xdpf); 256 + if (err < 0) { 257 + xdp_return_frame(xdpf); 258 + stats->drop++; 259 + } else { 260 + frames[nframes++] = xdpf; 261 + stats->pass++; 262 + } 263 + break; 264 + case XDP_REDIRECT: 265 + err = xdp_do_redirect(xdpf->dev_rx, &xdp, 266 + rcpu->prog); 267 + if (unlikely(err)) { 268 + xdp_return_frame(xdpf); 269 + stats->drop++; 270 + } else { 271 + stats->redirect++; 272 + } 273 + break; 274 + default: 275 + bpf_warn_invalid_xdp_action(act); 276 + /* fallthrough */ 277 + case XDP_DROP: 278 + xdp_return_frame(xdpf); 279 + stats->drop++; 280 + break; 281 + } 282 + } 283 + 284 + if (stats->redirect) 285 + xdp_do_flush_map(); 286 + 287 + xdp_clear_return_frame_no_direct(); 288 + 289 + rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ 290 + 291 + return nframes; 227 292 } 228 293 229 294 #define CPUMAP_BATCH 8 ··· 311 234 * kthread_stop signal until queue is empty. 312 235 */ 313 236 while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { 237 + struct xdp_cpumap_stats stats = {}; /* zero stats */ 238 + gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; 314 239 unsigned int drops = 0, sched = 0; 315 240 void *frames[CPUMAP_BATCH]; 316 241 void *skbs[CPUMAP_BATCH]; 317 - gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; 318 - int i, n, m; 242 + int i, n, m, nframes; 319 243 320 244 /* Release CPU reschedule checks */ 321 245 if (__ptr_ring_empty(rcpu->queue)) { ··· 337 259 * kthread CPU pinned. Lockless access to ptr_ring 338 260 * consume side valid as no-resize allowed of queue. 339 261 */ 340 - n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); 341 - 262 + n = __ptr_ring_consume_batched(rcpu->queue, frames, 263 + CPUMAP_BATCH); 342 264 for (i = 0; i < n; i++) { 343 265 void *f = frames[i]; 344 266 struct page *page = virt_to_page(f); ··· 350 272 prefetchw(page); 351 273 } 352 274 353 - m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs); 354 - if (unlikely(m == 0)) { 355 - for (i = 0; i < n; i++) 356 - skbs[i] = NULL; /* effect: xdp_return_frame */ 357 - drops = n; 275 + /* Support running another XDP prog on this CPU */ 276 + nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats); 277 + if (nframes) { 278 + m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); 279 + if (unlikely(m == 0)) { 280 + for (i = 0; i < nframes; i++) 281 + skbs[i] = NULL; /* effect: xdp_return_frame */ 282 + drops += nframes; 283 + } 358 284 } 359 285 360 286 local_bh_disable(); 361 - for (i = 0; i < n; i++) { 287 + for (i = 0; i < nframes; i++) { 362 288 struct xdp_frame *xdpf = frames[i]; 363 289 struct sk_buff *skb = skbs[i]; 364 290 int ret; ··· 379 297 drops++; 380 298 } 381 299 /* Feedback loop via tracepoint */ 382 - trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched); 300 + trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats); 383 301 384 302 local_bh_enable(); /* resched point, may call do_softirq() */ 385 303 } ··· 389 307 return 0; 390 308 } 391 309 392 - static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, 393 - int map_id) 310 + bool cpu_map_prog_allowed(struct bpf_map *map) 394 311 { 312 + return map->map_type == BPF_MAP_TYPE_CPUMAP && 313 + map->value_size != offsetofend(struct bpf_cpumap_val, qsize); 314 + } 315 + 316 + static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) 317 + { 318 + struct bpf_prog *prog; 319 + 320 + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); 321 + if (IS_ERR(prog)) 322 + return PTR_ERR(prog); 323 + 324 + if (prog->expected_attach_type != BPF_XDP_CPUMAP) { 325 + bpf_prog_put(prog); 326 + return -EINVAL; 327 + } 328 + 329 + rcpu->value.bpf_prog.id = prog->aux->id; 330 + rcpu->prog = prog; 331 + 332 + return 0; 333 + } 334 + 335 + static struct bpf_cpu_map_entry * 336 + __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) 337 + { 338 + int numa, err, i, fd = value->bpf_prog.fd; 395 339 gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; 396 340 struct bpf_cpu_map_entry *rcpu; 397 341 struct xdp_bulk_queue *bq; 398 - int numa, err, i; 399 342 400 343 /* Have map->numa_node, but choose node of redirect target CPU */ 401 344 numa = cpu_to_node(cpu); ··· 445 338 if (!rcpu->queue) 446 339 goto free_bulkq; 447 340 448 - err = ptr_ring_init(rcpu->queue, qsize, gfp); 341 + err = ptr_ring_init(rcpu->queue, value->qsize, gfp); 449 342 if (err) 450 343 goto free_queue; 451 344 452 345 rcpu->cpu = cpu; 453 346 rcpu->map_id = map_id; 454 - rcpu->qsize = qsize; 347 + rcpu->value.qsize = value->qsize; 348 + 349 + if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) 350 + goto free_ptr_ring; 455 351 456 352 /* Setup kthread */ 457 353 rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, 458 354 "cpumap/%d/map:%d", cpu, map_id); 459 355 if (IS_ERR(rcpu->kthread)) 460 - goto free_ptr_ring; 356 + goto free_prog; 461 357 462 358 get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ 463 359 get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ ··· 471 361 472 362 return rcpu; 473 363 364 + free_prog: 365 + if (rcpu->prog) 366 + bpf_prog_put(rcpu->prog); 474 367 free_ptr_ring: 475 368 ptr_ring_cleanup(rcpu->queue, NULL); 476 369 free_queue: ··· 550 437 u64 map_flags) 551 438 { 552 439 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 440 + struct bpf_cpumap_val cpumap_value = {}; 553 441 struct bpf_cpu_map_entry *rcpu; 554 - 555 442 /* Array index key correspond to CPU number */ 556 443 u32 key_cpu = *(u32 *)key; 557 - /* Value is the queue size */ 558 - u32 qsize = *(u32 *)value; 444 + 445 + memcpy(&cpumap_value, value, map->value_size); 559 446 560 447 if (unlikely(map_flags > BPF_EXIST)) 561 448 return -EINVAL; ··· 563 450 return -E2BIG; 564 451 if (unlikely(map_flags == BPF_NOEXIST)) 565 452 return -EEXIST; 566 - if (unlikely(qsize > 16384)) /* sanity limit on qsize */ 453 + if (unlikely(cpumap_value.qsize > 16384)) /* sanity limit on qsize */ 567 454 return -EOVERFLOW; 568 455 569 456 /* Make sure CPU is a valid possible cpu */ 570 457 if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu)) 571 458 return -ENODEV; 572 459 573 - if (qsize == 0) { 460 + if (cpumap_value.qsize == 0) { 574 461 rcpu = NULL; /* Same as deleting */ 575 462 } else { 576 463 /* Updating qsize cause re-allocation of bpf_cpu_map_entry */ 577 - rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id); 464 + rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id); 578 465 if (!rcpu) 579 466 return -ENOMEM; 580 467 rcpu->cmap = cmap; ··· 636 523 struct bpf_cpu_map_entry *rcpu = 637 524 __cpu_map_lookup_elem(map, *(u32 *)key); 638 525 639 - return rcpu ? &rcpu->qsize : NULL; 526 + return rcpu ? &rcpu->value : NULL; 640 527 } 641 528 642 529 static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)

+6 -1

kernel/bpf/map_iter.c

··· 4 4 #include <linux/fs.h> 5 5 #include <linux/filter.h> 6 6 #include <linux/kernel.h> 7 + #include <linux/btf_ids.h> 7 8 8 9 struct bpf_iter_seq_map_info { 9 10 u32 mid; ··· 82 81 .show = bpf_map_seq_show, 83 82 }; 84 83 85 - static const struct bpf_iter_reg bpf_map_reg_info = { 84 + BTF_ID_LIST(btf_bpf_map_id) 85 + BTF_ID(struct, bpf_map) 86 + 87 + static struct bpf_iter_reg bpf_map_reg_info = { 86 88 .target = "bpf_map", 87 89 .seq_ops = &bpf_map_seq_ops, 88 90 .init_seq_private = NULL, ··· 100 96 101 97 static int __init bpf_map_iter_init(void) 102 98 { 99 + bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id; 103 100 return bpf_iter_reg_target(&bpf_map_reg_info); 104 101 } 105 102

+121 -10

kernel/bpf/net_namespace.c

··· 25 25 /* Protects updates to netns_bpf */ 26 26 DEFINE_MUTEX(netns_bpf_mutex); 27 27 28 + static void netns_bpf_attach_type_unneed(enum netns_bpf_attach_type type) 29 + { 30 + switch (type) { 31 + #ifdef CONFIG_INET 32 + case NETNS_BPF_SK_LOOKUP: 33 + static_branch_dec(&bpf_sk_lookup_enabled); 34 + break; 35 + #endif 36 + default: 37 + break; 38 + } 39 + } 40 + 41 + static void netns_bpf_attach_type_need(enum netns_bpf_attach_type type) 42 + { 43 + switch (type) { 44 + #ifdef CONFIG_INET 45 + case NETNS_BPF_SK_LOOKUP: 46 + static_branch_inc(&bpf_sk_lookup_enabled); 47 + break; 48 + #endif 49 + default: 50 + break; 51 + } 52 + } 53 + 28 54 /* Must be called with netns_bpf_mutex held. */ 29 55 static void netns_bpf_run_array_detach(struct net *net, 30 56 enum netns_bpf_attach_type type) ··· 62 36 bpf_prog_array_free(run_array); 63 37 } 64 38 39 + static int link_index(struct net *net, enum netns_bpf_attach_type type, 40 + struct bpf_netns_link *link) 41 + { 42 + struct bpf_netns_link *pos; 43 + int i = 0; 44 + 45 + list_for_each_entry(pos, &net->bpf.links[type], node) { 46 + if (pos == link) 47 + return i; 48 + i++; 49 + } 50 + return -ENOENT; 51 + } 52 + 53 + static int link_count(struct net *net, enum netns_bpf_attach_type type) 54 + { 55 + struct list_head *pos; 56 + int i = 0; 57 + 58 + list_for_each(pos, &net->bpf.links[type]) 59 + i++; 60 + return i; 61 + } 62 + 63 + static void fill_prog_array(struct net *net, enum netns_bpf_attach_type type, 64 + struct bpf_prog_array *prog_array) 65 + { 66 + struct bpf_netns_link *pos; 67 + unsigned int i = 0; 68 + 69 + list_for_each_entry(pos, &net->bpf.links[type], node) { 70 + prog_array->items[i].prog = pos->link.prog; 71 + i++; 72 + } 73 + } 74 + 65 75 static void bpf_netns_link_release(struct bpf_link *link) 66 76 { 67 77 struct bpf_netns_link *net_link = 68 78 container_of(link, struct bpf_netns_link, link); 69 79 enum netns_bpf_attach_type type = net_link->netns_type; 80 + struct bpf_prog_array *old_array, *new_array; 70 81 struct net *net; 82 + int cnt, idx; 71 83 72 84 mutex_lock(&netns_bpf_mutex); 73 85 ··· 117 53 if (!net) 118 54 goto out_unlock; 119 55 120 - netns_bpf_run_array_detach(net, type); 56 + /* Mark attach point as unused */ 57 + netns_bpf_attach_type_unneed(type); 58 + 59 + /* Remember link position in case of safe delete */ 60 + idx = link_index(net, type, net_link); 121 61 list_del(&net_link->node); 62 + 63 + cnt = link_count(net, type); 64 + if (!cnt) { 65 + netns_bpf_run_array_detach(net, type); 66 + goto out_unlock; 67 + } 68 + 69 + old_array = rcu_dereference_protected(net->bpf.run_array[type], 70 + lockdep_is_held(&netns_bpf_mutex)); 71 + new_array = bpf_prog_array_alloc(cnt, GFP_KERNEL); 72 + if (!new_array) { 73 + WARN_ON(bpf_prog_array_delete_safe_at(old_array, idx)); 74 + goto out_unlock; 75 + } 76 + fill_prog_array(net, type, new_array); 77 + rcu_assign_pointer(net->bpf.run_array[type], new_array); 78 + bpf_prog_array_free(old_array); 122 79 123 80 out_unlock: 124 81 mutex_unlock(&netns_bpf_mutex); ··· 162 77 enum netns_bpf_attach_type type = net_link->netns_type; 163 78 struct bpf_prog_array *run_array; 164 79 struct net *net; 165 - int ret = 0; 80 + int idx, ret; 166 81 167 82 if (old_prog && old_prog != link->prog) 168 83 return -EPERM; ··· 180 95 181 96 run_array = rcu_dereference_protected(net->bpf.run_array[type], 182 97 lockdep_is_held(&netns_bpf_mutex)); 183 - WRITE_ONCE(run_array->items[0].prog, new_prog); 98 + idx = link_index(net, type, net_link); 99 + ret = bpf_prog_array_update_at(run_array, idx, new_prog); 100 + if (ret) 101 + goto out_unlock; 184 102 185 103 old_prog = xchg(&link->prog, new_prog); 186 104 bpf_prog_put(old_prog); ··· 397 309 return ret; 398 310 } 399 311 312 + static int netns_bpf_max_progs(enum netns_bpf_attach_type type) 313 + { 314 + switch (type) { 315 + case NETNS_BPF_FLOW_DISSECTOR: 316 + return 1; 317 + case NETNS_BPF_SK_LOOKUP: 318 + return 64; 319 + default: 320 + return 0; 321 + } 322 + } 323 + 400 324 static int netns_bpf_link_attach(struct net *net, struct bpf_link *link, 401 325 enum netns_bpf_attach_type type) 402 326 { 403 327 struct bpf_netns_link *net_link = 404 328 container_of(link, struct bpf_netns_link, link); 405 329 struct bpf_prog_array *run_array; 406 - int err; 330 + int cnt, err; 407 331 408 332 mutex_lock(&netns_bpf_mutex); 409 333 410 - /* Allow attaching only one prog or link for now */ 411 - if (!list_empty(&net->bpf.links[type])) { 334 + cnt = link_count(net, type); 335 + if (cnt >= netns_bpf_max_progs(type)) { 412 336 err = -E2BIG; 413 337 goto out_unlock; 414 338 } ··· 434 334 case NETNS_BPF_FLOW_DISSECTOR: 435 335 err = flow_dissector_bpf_prog_attach_check(net, link->prog); 436 336 break; 337 + case NETNS_BPF_SK_LOOKUP: 338 + err = 0; /* nothing to check */ 339 + break; 437 340 default: 438 341 err = -EINVAL; 439 342 break; ··· 444 341 if (err) 445 342 goto out_unlock; 446 343 447 - run_array = bpf_prog_array_alloc(1, GFP_KERNEL); 344 + run_array = bpf_prog_array_alloc(cnt + 1, GFP_KERNEL); 448 345 if (!run_array) { 449 346 err = -ENOMEM; 450 347 goto out_unlock; 451 348 } 452 - run_array->items[0].prog = link->prog; 453 - rcu_assign_pointer(net->bpf.run_array[type], run_array); 454 349 455 350 list_add_tail(&net_link->node, &net->bpf.links[type]); 351 + 352 + fill_prog_array(net, type, run_array); 353 + run_array = rcu_replace_pointer(net->bpf.run_array[type], run_array, 354 + lockdep_is_held(&netns_bpf_mutex)); 355 + bpf_prog_array_free(run_array); 356 + 357 + /* Mark attach point as used */ 358 + netns_bpf_attach_type_need(type); 456 359 457 360 out_unlock: 458 361 mutex_unlock(&netns_bpf_mutex); ··· 535 426 mutex_lock(&netns_bpf_mutex); 536 427 for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) { 537 428 netns_bpf_run_array_detach(net, type); 538 - list_for_each_entry(net_link, &net->bpf.links[type], node) 429 + list_for_each_entry(net_link, &net->bpf.links[type], node) { 539 430 net_link->net = NULL; /* auto-detach link */ 431 + netns_bpf_attach_type_unneed(type); 432 + } 540 433 if (net->bpf.progs[type]) 541 434 bpf_prog_put(net->bpf.progs[type]); 542 435 }

+9

kernel/bpf/syscall.c

··· 2022 2022 default: 2023 2023 return -EINVAL; 2024 2024 } 2025 + case BPF_PROG_TYPE_SK_LOOKUP: 2026 + if (expected_attach_type == BPF_SK_LOOKUP) 2027 + return 0; 2028 + return -EINVAL; 2025 2029 case BPF_PROG_TYPE_EXT: 2026 2030 if (expected_attach_type) 2027 2031 return -EINVAL; ··· 2760 2756 case BPF_PROG_TYPE_CGROUP_SOCK: 2761 2757 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2762 2758 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2759 + case BPF_PROG_TYPE_SK_LOOKUP: 2763 2760 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 2764 2761 case BPF_PROG_TYPE_CGROUP_SKB: 2765 2762 if (!capable(CAP_NET_ADMIN)) ··· 2822 2817 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 2823 2818 case BPF_TRACE_ITER: 2824 2819 return BPF_PROG_TYPE_TRACING; 2820 + case BPF_SK_LOOKUP: 2821 + return BPF_PROG_TYPE_SK_LOOKUP; 2825 2822 default: 2826 2823 return BPF_PROG_TYPE_UNSPEC; 2827 2824 } ··· 2960 2953 case BPF_LIRC_MODE2: 2961 2954 return lirc_prog_query(attr, uattr); 2962 2955 case BPF_FLOW_DISSECTOR: 2956 + case BPF_SK_LOOKUP: 2963 2957 return netns_bpf_prog_query(attr, uattr); 2964 2958 default: 2965 2959 return -EINVAL; ··· 3899 3891 ret = tracing_bpf_link_attach(attr, prog); 3900 3892 break; 3901 3893 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3894 + case BPF_PROG_TYPE_SK_LOOKUP: 3902 3895 ret = netns_bpf_link_create(attr, prog); 3903 3896 break; 3904 3897 default:

+10 -2

kernel/bpf/task_iter.c

··· 7 7 #include <linux/fs.h> 8 8 #include <linux/fdtable.h> 9 9 #include <linux/filter.h> 10 + #include <linux/btf_ids.h> 10 11 11 12 struct bpf_iter_seq_task_common { 12 13 struct pid_namespace *ns; ··· 313 312 .show = task_file_seq_show, 314 313 }; 315 314 316 - static const struct bpf_iter_reg task_reg_info = { 315 + BTF_ID_LIST(btf_task_file_ids) 316 + BTF_ID(struct, task_struct) 317 + BTF_ID(struct, file) 318 + 319 + static struct bpf_iter_reg task_reg_info = { 317 320 .target = "task", 318 321 .seq_ops = &task_seq_ops, 319 322 .init_seq_private = init_seq_pidns, ··· 330 325 }, 331 326 }; 332 327 333 - static const struct bpf_iter_reg task_file_reg_info = { 328 + static struct bpf_iter_reg task_file_reg_info = { 334 329 .target = "task_file", 335 330 .seq_ops = &task_file_seq_ops, 336 331 .init_seq_private = init_seq_pidns, ··· 349 344 { 350 345 int ret; 351 346 347 + task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0]; 352 348 ret = bpf_iter_reg_target(&task_reg_info); 353 349 if (ret) 354 350 return ret; 355 351 352 + task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0]; 353 + task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1]; 356 354 return bpf_iter_reg_target(&task_file_reg_info); 357 355 } 358 356 late_initcall(task_iter_init);

+10 -3

kernel/bpf/verifier.c

··· 3878 3878 } 3879 3879 meta->ref_obj_id = reg->ref_obj_id; 3880 3880 } 3881 - } else if (arg_type == ARG_PTR_TO_SOCKET) { 3881 + } else if (arg_type == ARG_PTR_TO_SOCKET || 3882 + arg_type == ARG_PTR_TO_SOCKET_OR_NULL) { 3882 3883 expected_type = PTR_TO_SOCKET; 3883 - if (type != expected_type) 3884 - goto err_type; 3884 + if (!(register_is_null(reg) && 3885 + arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) { 3886 + if (type != expected_type) 3887 + goto err_type; 3888 + } 3885 3889 } else if (arg_type == ARG_PTR_TO_BTF_ID) { 3886 3890 expected_type = PTR_TO_BTF_ID; 3887 3891 if (type != expected_type) ··· 7357 7353 default: 7358 7354 return -ENOTSUPP; 7359 7355 } 7356 + break; 7357 + case BPF_PROG_TYPE_SK_LOOKUP: 7358 + range = tnum_range(SK_DROP, SK_PASS); 7360 7359 break; 7361 7360 case BPF_PROG_TYPE_EXT: 7362 7361 /* freplace program can return anything as its return value

-20

lib/test_bpf.c

··· 5275 5275 { /* Mainly checking JIT here. */ 5276 5276 "BPF_MAXINSNS: Ctx heavy transformations", 5277 5277 { }, 5278 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5279 - CLASSIC | FLAG_EXPECTED_FAIL, 5280 - #else 5281 5278 CLASSIC, 5282 - #endif 5283 5279 { }, 5284 5280 { 5285 5281 { 1, SKB_VLAN_PRESENT }, 5286 5282 { 10, SKB_VLAN_PRESENT } 5287 5283 }, 5288 5284 .fill_helper = bpf_fill_maxinsns6, 5289 - .expected_errcode = -ENOTSUPP, 5290 5285 }, 5291 5286 { /* Mainly checking JIT here. */ 5292 5287 "BPF_MAXINSNS: Call heavy transformations", 5293 5288 { }, 5294 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5295 - CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, 5296 - #else 5297 5289 CLASSIC | FLAG_NO_DATA, 5298 - #endif 5299 5290 { }, 5300 5291 { { 1, 0 }, { 10, 0 } }, 5301 5292 .fill_helper = bpf_fill_maxinsns7, 5302 - .expected_errcode = -ENOTSUPP, 5303 5293 }, 5304 5294 { /* Mainly checking JIT here. */ 5305 5295 "BPF_MAXINSNS: Jump heavy test", ··· 5340 5350 { 5341 5351 "BPF_MAXINSNS: exec all MSH", 5342 5352 { }, 5343 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5344 - CLASSIC | FLAG_EXPECTED_FAIL, 5345 - #else 5346 5353 CLASSIC, 5347 - #endif 5348 5354 { 0xfa, 0xfb, 0xfc, 0xfd, }, 5349 5355 { { 4, 0xababab83 } }, 5350 5356 .fill_helper = bpf_fill_maxinsns13, 5351 - .expected_errcode = -ENOTSUPP, 5352 5357 }, 5353 5358 { 5354 5359 "BPF_MAXINSNS: ld_abs+get_processor_id", 5355 5360 { }, 5356 - #if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) 5357 - CLASSIC | FLAG_EXPECTED_FAIL, 5358 - #else 5359 5361 CLASSIC, 5360 - #endif 5361 5362 { }, 5362 5363 { { 1, 0xbee } }, 5363 5364 .fill_helper = bpf_fill_ld_abs_get_processor_id, 5364 - .expected_errcode = -ENOTSUPP, 5365 5365 }, 5366 5366 /* 5367 5367 * LD_IND / LD_ABS on fragmented SKBs

+9

net/core/dev.c

··· 5449 5449 for (i = 0; i < new->aux->used_map_cnt; i++) { 5450 5450 if (dev_map_can_have_prog(new->aux->used_maps[i])) 5451 5451 return -EINVAL; 5452 + if (cpu_map_prog_allowed(new->aux->used_maps[i])) 5453 + return -EINVAL; 5452 5454 } 5453 5455 } 5454 5456 ··· 8878 8876 8879 8877 if (prog->expected_attach_type == BPF_XDP_DEVMAP) { 8880 8878 NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); 8879 + bpf_prog_put(prog); 8880 + return -EINVAL; 8881 + } 8882 + 8883 + if (prog->expected_attach_type == BPF_XDP_CPUMAP) { 8884 + NL_SET_ERR_MSG(extack, 8885 + "BPF_XDP_CPUMAP programs can not be attached to a device"); 8881 8886 bpf_prog_put(prog); 8882 8887 return -EINVAL; 8883 8888 }

+188 -44

net/core/filter.c

··· 9252 9252 9253 9253 const struct bpf_prog_ops sk_reuseport_prog_ops = { 9254 9254 }; 9255 + 9256 + DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); 9257 + EXPORT_SYMBOL(bpf_sk_lookup_enabled); 9258 + 9259 + BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, 9260 + struct sock *, sk, u64, flags) 9261 + { 9262 + if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | 9263 + BPF_SK_LOOKUP_F_NO_REUSEPORT))) 9264 + return -EINVAL; 9265 + if (unlikely(sk && sk_is_refcounted(sk))) 9266 + return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ 9267 + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) 9268 + return -ESOCKTNOSUPPORT; /* reject connected sockets */ 9269 + 9270 + /* Check if socket is suitable for packet L3/L4 protocol */ 9271 + if (sk && sk->sk_protocol != ctx->protocol) 9272 + return -EPROTOTYPE; 9273 + if (sk && sk->sk_family != ctx->family && 9274 + (sk->sk_family == AF_INET || ipv6_only_sock(sk))) 9275 + return -EAFNOSUPPORT; 9276 + 9277 + if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) 9278 + return -EEXIST; 9279 + 9280 + /* Select socket as lookup result */ 9281 + ctx->selected_sk = sk; 9282 + ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; 9283 + return 0; 9284 + } 9285 + 9286 + static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { 9287 + .func = bpf_sk_lookup_assign, 9288 + .gpl_only = false, 9289 + .ret_type = RET_INTEGER, 9290 + .arg1_type = ARG_PTR_TO_CTX, 9291 + .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, 9292 + .arg3_type = ARG_ANYTHING, 9293 + }; 9294 + 9295 + static const struct bpf_func_proto * 9296 + sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 9297 + { 9298 + switch (func_id) { 9299 + case BPF_FUNC_perf_event_output: 9300 + return &bpf_event_output_data_proto; 9301 + case BPF_FUNC_sk_assign: 9302 + return &bpf_sk_lookup_assign_proto; 9303 + case BPF_FUNC_sk_release: 9304 + return &bpf_sk_release_proto; 9305 + default: 9306 + return bpf_base_func_proto(func_id); 9307 + } 9308 + } 9309 + 9310 + static bool sk_lookup_is_valid_access(int off, int size, 9311 + enum bpf_access_type type, 9312 + const struct bpf_prog *prog, 9313 + struct bpf_insn_access_aux *info) 9314 + { 9315 + if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) 9316 + return false; 9317 + if (off % size != 0) 9318 + return false; 9319 + if (type != BPF_READ) 9320 + return false; 9321 + 9322 + switch (off) { 9323 + case offsetof(struct bpf_sk_lookup, sk): 9324 + info->reg_type = PTR_TO_SOCKET_OR_NULL; 9325 + return size == sizeof(__u64); 9326 + 9327 + case bpf_ctx_range(struct bpf_sk_lookup, family): 9328 + case bpf_ctx_range(struct bpf_sk_lookup, protocol): 9329 + case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): 9330 + case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): 9331 + case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): 9332 + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): 9333 + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): 9334 + case bpf_ctx_range(struct bpf_sk_lookup, local_port): 9335 + bpf_ctx_record_field_size(info, sizeof(__u32)); 9336 + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); 9337 + 9338 + default: 9339 + return false; 9340 + } 9341 + } 9342 + 9343 + static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, 9344 + const struct bpf_insn *si, 9345 + struct bpf_insn *insn_buf, 9346 + struct bpf_prog *prog, 9347 + u32 *target_size) 9348 + { 9349 + struct bpf_insn *insn = insn_buf; 9350 + 9351 + switch (si->off) { 9352 + case offsetof(struct bpf_sk_lookup, sk): 9353 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9354 + offsetof(struct bpf_sk_lookup_kern, selected_sk)); 9355 + break; 9356 + 9357 + case offsetof(struct bpf_sk_lookup, family): 9358 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9359 + bpf_target_off(struct bpf_sk_lookup_kern, 9360 + family, 2, target_size)); 9361 + break; 9362 + 9363 + case offsetof(struct bpf_sk_lookup, protocol): 9364 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9365 + bpf_target_off(struct bpf_sk_lookup_kern, 9366 + protocol, 2, target_size)); 9367 + break; 9368 + 9369 + case offsetof(struct bpf_sk_lookup, remote_ip4): 9370 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9371 + bpf_target_off(struct bpf_sk_lookup_kern, 9372 + v4.saddr, 4, target_size)); 9373 + break; 9374 + 9375 + case offsetof(struct bpf_sk_lookup, local_ip4): 9376 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, 9377 + bpf_target_off(struct bpf_sk_lookup_kern, 9378 + v4.daddr, 4, target_size)); 9379 + break; 9380 + 9381 + case bpf_ctx_range_till(struct bpf_sk_lookup, 9382 + remote_ip6[0], remote_ip6[3]): { 9383 + #if IS_ENABLED(CONFIG_IPV6) 9384 + int off = si->off; 9385 + 9386 + off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); 9387 + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 9388 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9389 + offsetof(struct bpf_sk_lookup_kern, v6.saddr)); 9390 + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 9391 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 9392 + #else 9393 + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9394 + #endif 9395 + break; 9396 + } 9397 + case bpf_ctx_range_till(struct bpf_sk_lookup, 9398 + local_ip6[0], local_ip6[3]): { 9399 + #if IS_ENABLED(CONFIG_IPV6) 9400 + int off = si->off; 9401 + 9402 + off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); 9403 + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); 9404 + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, 9405 + offsetof(struct bpf_sk_lookup_kern, v6.daddr)); 9406 + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); 9407 + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); 9408 + #else 9409 + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); 9410 + #endif 9411 + break; 9412 + } 9413 + case offsetof(struct bpf_sk_lookup, remote_port): 9414 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9415 + bpf_target_off(struct bpf_sk_lookup_kern, 9416 + sport, 2, target_size)); 9417 + break; 9418 + 9419 + case offsetof(struct bpf_sk_lookup, local_port): 9420 + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9421 + bpf_target_off(struct bpf_sk_lookup_kern, 9422 + dport, 2, target_size)); 9423 + break; 9424 + } 9425 + 9426 + return insn - insn_buf; 9427 + } 9428 + 9429 + const struct bpf_prog_ops sk_lookup_prog_ops = { 9430 + }; 9431 + 9432 + const struct bpf_verifier_ops sk_lookup_verifier_ops = { 9433 + .get_func_proto = sk_lookup_func_proto, 9434 + .is_valid_access = sk_lookup_is_valid_access, 9435 + .convert_ctx_access = sk_lookup_convert_ctx_access, 9436 + }; 9437 + 9255 9438 #endif /* CONFIG_INET */ 9256 9439 9257 9440 DEFINE_BPF_DISPATCHER(xdp) ··· 9444 9261 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); 9445 9262 } 9446 9263 9447 - /* Define a list of socket types which can be the argument for 9448 - * skc_to_*_sock() helpers. All these sockets should have 9449 - * sock_common as the first argument in its memory layout. 9450 - */ 9451 - #define BTF_SOCK_TYPE_xxx \ 9452 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \ 9453 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \ 9454 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \ 9455 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \ 9456 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \ 9457 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \ 9458 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \ 9459 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \ 9460 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \ 9461 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \ 9462 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \ 9463 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \ 9464 - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock") 9465 - 9466 - enum { 9467 - #define BTF_SOCK_TYPE(name, str) name, 9264 + #ifdef CONFIG_DEBUG_INFO_BTF 9265 + BTF_ID_LIST_GLOBAL(btf_sock_ids) 9266 + #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) 9468 9267 BTF_SOCK_TYPE_xxx 9469 9268 #undef BTF_SOCK_TYPE 9470 - MAX_BTF_SOCK_TYPE, 9471 - }; 9472 - 9473 - static int btf_sock_ids[MAX_BTF_SOCK_TYPE]; 9474 - 9475 - #ifdef CONFIG_BPF_SYSCALL 9476 - static const char *bpf_sock_types[] = { 9477 - #define BTF_SOCK_TYPE(name, str) str, 9478 - BTF_SOCK_TYPE_xxx 9479 - #undef BTF_SOCK_TYPE 9480 - }; 9481 - 9482 - void init_btf_sock_ids(struct btf *btf) 9483 - { 9484 - int i, btf_id; 9485 - 9486 - for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) { 9487 - btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i], 9488 - BTF_KIND_STRUCT); 9489 - if (btf_id > 0) 9490 - btf_sock_ids[i] = btf_id; 9491 - } 9492 - } 9269 + #else 9270 + u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; 9493 9271 #endif 9494 9272 9495 9273 static bool check_arg_btf_id(u32 btf_id, u32 arg)

+51 -9

net/ipv4/inet_hashtables.c

··· 246 246 return score; 247 247 } 248 248 249 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 250 + struct sk_buff *skb, int doff, 251 + __be32 saddr, __be16 sport, 252 + __be32 daddr, unsigned short hnum) 253 + { 254 + struct sock *reuse_sk = NULL; 255 + u32 phash; 256 + 257 + if (sk->sk_reuseport) { 258 + phash = inet_ehashfn(net, daddr, hnum, saddr, sport); 259 + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); 260 + } 261 + return reuse_sk; 262 + } 263 + 249 264 /* 250 265 * Here are some nice properties to exploit here. The BSD API 251 266 * does not allow a listening sock to specify the remote port nor the ··· 280 265 struct inet_connection_sock *icsk; 281 266 struct sock *sk, *result = NULL; 282 267 int score, hiscore = 0; 283 - u32 phash = 0; 284 268 285 269 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { 286 270 sk = (struct sock *)icsk; 287 271 score = compute_score(sk, net, hnum, daddr, 288 272 dif, sdif, exact_dif); 289 273 if (score > hiscore) { 290 - if (sk->sk_reuseport) { 291 - phash = inet_ehashfn(net, daddr, hnum, 292 - saddr, sport); 293 - result = reuseport_select_sock(sk, phash, 294 - skb, doff); 295 - if (result) 296 - return result; 297 - } 274 + result = lookup_reuseport(net, sk, skb, doff, 275 + saddr, sport, daddr, hnum); 276 + if (result) 277 + return result; 278 + 298 279 result = sk; 299 280 hiscore = score; 300 281 } 301 282 } 302 283 303 284 return result; 285 + } 286 + 287 + static inline struct sock *inet_lookup_run_bpf(struct net *net, 288 + struct inet_hashinfo *hashinfo, 289 + struct sk_buff *skb, int doff, 290 + __be32 saddr, __be16 sport, 291 + __be32 daddr, u16 hnum) 292 + { 293 + struct sock *sk, *reuse_sk; 294 + bool no_reuseport; 295 + 296 + if (hashinfo != &tcp_hashinfo) 297 + return NULL; /* only TCP is supported */ 298 + 299 + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, 300 + saddr, sport, daddr, hnum, &sk); 301 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 302 + return sk; 303 + 304 + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); 305 + if (reuse_sk) 306 + sk = reuse_sk; 307 + return sk; 304 308 } 305 309 306 310 struct sock *__inet_lookup_listener(struct net *net, ··· 332 298 struct inet_listen_hashbucket *ilb2; 333 299 struct sock *result = NULL; 334 300 unsigned int hash2; 301 + 302 + /* Lookup redirect from BPF */ 303 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 304 + result = inet_lookup_run_bpf(net, hashinfo, skb, doff, 305 + saddr, sport, daddr, hnum); 306 + if (result) 307 + goto done; 308 + } 335 309 336 310 hash2 = ipv4_portaddr_hash(net, daddr, hnum); 337 311 ilb2 = inet_lhash2_bucket(hashinfo, hash2);

+3 -1

net/ipv4/tcp_ipv4.c

··· 76 76 #include <linux/proc_fs.h> 77 77 #include <linux/seq_file.h> 78 78 #include <linux/inetdevice.h> 79 + #include <linux/btf_ids.h> 79 80 80 81 #include <crypto/hash.h> 81 82 #include <linux/scatterlist.h> ··· 2947 2946 bpf_iter_fini_seq_net(priv_data); 2948 2947 } 2949 2948 2950 - static const struct bpf_iter_reg tcp_reg_info = { 2949 + static struct bpf_iter_reg tcp_reg_info = { 2951 2950 .target = "tcp", 2952 2951 .seq_ops = &bpf_iter_tcp_seq_ops, 2953 2952 .init_seq_private = bpf_iter_init_tcp, ··· 2962 2961 2963 2962 static void __init bpf_iter_register(void) 2964 2963 { 2964 + tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; 2965 2965 if (bpf_iter_reg_target(&tcp_reg_info)) 2966 2966 pr_warn("Warning: could not register bpf iterator tcp\n"); 2967 2967 }

+76 -19

net/ipv4/udp.c

··· 106 106 #include <net/xfrm.h> 107 107 #include <trace/events/udp.h> 108 108 #include <linux/static_key.h> 109 + #include <linux/btf_ids.h> 109 110 #include <trace/events/skb.h> 110 111 #include <net/busy_poll.h> 111 112 #include "udp_impl.h" ··· 409 408 udp_ehash_secret + net_hash_mix(net)); 410 409 } 411 410 411 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 412 + struct sk_buff *skb, 413 + __be32 saddr, __be16 sport, 414 + __be32 daddr, unsigned short hnum) 415 + { 416 + struct sock *reuse_sk = NULL; 417 + u32 hash; 418 + 419 + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { 420 + hash = udp_ehashfn(net, daddr, hnum, saddr, sport); 421 + reuse_sk = reuseport_select_sock(sk, hash, skb, 422 + sizeof(struct udphdr)); 423 + /* Fall back to scoring if group has connections */ 424 + if (reuseport_has_conns(sk, false)) 425 + return NULL; 426 + } 427 + return reuse_sk; 428 + } 429 + 412 430 /* called with rcu_read_lock() */ 413 431 static struct sock *udp4_lib_lookup2(struct net *net, 414 432 __be32 saddr, __be16 sport, ··· 438 418 { 439 419 struct sock *sk, *result; 440 420 int score, badness; 441 - u32 hash = 0; 442 421 443 422 result = NULL; 444 423 badness = 0; ··· 445 426 score = compute_score(sk, net, saddr, sport, 446 427 daddr, hnum, dif, sdif); 447 428 if (score > badness) { 448 - if (sk->sk_reuseport && 449 - sk->sk_state != TCP_ESTABLISHED) { 450 - hash = udp_ehashfn(net, daddr, hnum, 451 - saddr, sport); 452 - result = reuseport_select_sock(sk, hash, skb, 453 - sizeof(struct udphdr)); 454 - if (result && !reuseport_has_conns(sk, false)) 455 - return result; 456 - } 429 + result = lookup_reuseport(net, sk, skb, 430 + saddr, sport, daddr, hnum); 431 + if (result) 432 + return result; 433 + 457 434 badness = score; 458 435 result = sk; 459 436 } 460 437 } 461 438 return result; 439 + } 440 + 441 + static inline struct sock *udp4_lookup_run_bpf(struct net *net, 442 + struct udp_table *udptable, 443 + struct sk_buff *skb, 444 + __be32 saddr, __be16 sport, 445 + __be32 daddr, u16 hnum) 446 + { 447 + struct sock *sk, *reuse_sk; 448 + bool no_reuseport; 449 + 450 + if (udptable != &udp_table) 451 + return NULL; /* only UDP is supported */ 452 + 453 + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, 454 + saddr, sport, daddr, hnum, &sk); 455 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 456 + return sk; 457 + 458 + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); 459 + if (reuse_sk) 460 + sk = reuse_sk; 461 + return sk; 462 462 } 463 463 464 464 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try ··· 487 449 __be16 sport, __be32 daddr, __be16 dport, int dif, 488 450 int sdif, struct udp_table *udptable, struct sk_buff *skb) 489 451 { 490 - struct sock *result; 491 452 unsigned short hnum = ntohs(dport); 492 453 unsigned int hash2, slot2; 493 454 struct udp_hslot *hslot2; 455 + struct sock *result, *sk; 494 456 495 457 hash2 = ipv4_portaddr_hash(net, daddr, hnum); 496 458 slot2 = hash2 & udptable->mask; 497 459 hslot2 = &udptable->hash2[slot2]; 498 460 461 + /* Lookup connected or non-wildcard socket */ 499 462 result = udp4_lib_lookup2(net, saddr, sport, 500 463 daddr, hnum, dif, sdif, 501 464 hslot2, skb); 502 - if (!result) { 503 - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 504 - slot2 = hash2 & udptable->mask; 505 - hslot2 = &udptable->hash2[slot2]; 465 + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) 466 + goto done; 506 467 507 - result = udp4_lib_lookup2(net, saddr, sport, 508 - htonl(INADDR_ANY), hnum, dif, sdif, 509 - hslot2, skb); 468 + /* Lookup redirect from BPF */ 469 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 470 + sk = udp4_lookup_run_bpf(net, udptable, skb, 471 + saddr, sport, daddr, hnum); 472 + if (sk) { 473 + result = sk; 474 + goto done; 475 + } 510 476 } 477 + 478 + /* Got non-wildcard socket or error on first lookup */ 479 + if (result) 480 + goto done; 481 + 482 + /* Lookup wildcard sockets */ 483 + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 484 + slot2 = hash2 & udptable->mask; 485 + hslot2 = &udptable->hash2[slot2]; 486 + 487 + result = udp4_lib_lookup2(net, saddr, sport, 488 + htonl(INADDR_ANY), hnum, dif, sdif, 489 + hslot2, skb); 490 + done: 511 491 if (IS_ERR(result)) 512 492 return NULL; 513 493 return result; ··· 3209 3153 bpf_iter_fini_seq_net(priv_data); 3210 3154 } 3211 3155 3212 - static const struct bpf_iter_reg udp_reg_info = { 3156 + static struct bpf_iter_reg udp_reg_info = { 3213 3157 .target = "udp", 3214 3158 .seq_ops = &bpf_iter_udp_seq_ops, 3215 3159 .init_seq_private = bpf_iter_init_udp, ··· 3224 3168 3225 3169 static void __init bpf_iter_register(void) 3226 3170 { 3171 + udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; 3227 3172 if (bpf_iter_reg_target(&udp_reg_info)) 3228 3173 pr_warn("Warning: could not register bpf iterator udp\n"); 3229 3174 }

+57 -9

net/ipv6/inet6_hashtables.c

··· 21 21 #include <net/ip.h> 22 22 #include <net/sock_reuseport.h> 23 23 24 + extern struct inet_hashinfo tcp_hashinfo; 25 + 24 26 u32 inet6_ehashfn(const struct net *net, 25 27 const struct in6_addr *laddr, const u16 lport, 26 28 const struct in6_addr *faddr, const __be16 fport) ··· 113 111 return score; 114 112 } 115 113 114 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 115 + struct sk_buff *skb, int doff, 116 + const struct in6_addr *saddr, 117 + __be16 sport, 118 + const struct in6_addr *daddr, 119 + unsigned short hnum) 120 + { 121 + struct sock *reuse_sk = NULL; 122 + u32 phash; 123 + 124 + if (sk->sk_reuseport) { 125 + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); 126 + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); 127 + } 128 + return reuse_sk; 129 + } 130 + 116 131 /* called with rcu_read_lock() */ 117 132 static struct sock *inet6_lhash2_lookup(struct net *net, 118 133 struct inet_listen_hashbucket *ilb2, ··· 142 123 struct inet_connection_sock *icsk; 143 124 struct sock *sk, *result = NULL; 144 125 int score, hiscore = 0; 145 - u32 phash = 0; 146 126 147 127 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { 148 128 sk = (struct sock *)icsk; 149 129 score = compute_score(sk, net, hnum, daddr, dif, sdif, 150 130 exact_dif); 151 131 if (score > hiscore) { 152 - if (sk->sk_reuseport) { 153 - phash = inet6_ehashfn(net, daddr, hnum, 154 - saddr, sport); 155 - result = reuseport_select_sock(sk, phash, 156 - skb, doff); 157 - if (result) 158 - return result; 159 - } 132 + result = lookup_reuseport(net, sk, skb, doff, 133 + saddr, sport, daddr, hnum); 134 + if (result) 135 + return result; 136 + 160 137 result = sk; 161 138 hiscore = score; 162 139 } 163 140 } 164 141 165 142 return result; 143 + } 144 + 145 + static inline struct sock *inet6_lookup_run_bpf(struct net *net, 146 + struct inet_hashinfo *hashinfo, 147 + struct sk_buff *skb, int doff, 148 + const struct in6_addr *saddr, 149 + const __be16 sport, 150 + const struct in6_addr *daddr, 151 + const u16 hnum) 152 + { 153 + struct sock *sk, *reuse_sk; 154 + bool no_reuseport; 155 + 156 + if (hashinfo != &tcp_hashinfo) 157 + return NULL; /* only TCP is supported */ 158 + 159 + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, 160 + saddr, sport, daddr, hnum, &sk); 161 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 162 + return sk; 163 + 164 + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); 165 + if (reuse_sk) 166 + sk = reuse_sk; 167 + return sk; 166 168 } 167 169 168 170 struct sock *inet6_lookup_listener(struct net *net, ··· 196 156 struct inet_listen_hashbucket *ilb2; 197 157 struct sock *result = NULL; 198 158 unsigned int hash2; 159 + 160 + /* Lookup redirect from BPF */ 161 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 162 + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, 163 + saddr, sport, daddr, hnum); 164 + if (result) 165 + goto done; 166 + } 199 167 200 168 hash2 = ipv6_portaddr_hash(net, daddr, hnum); 201 169 ilb2 = inet_lhash2_bucket(hashinfo, hash2);

+6 -1

net/ipv6/route.c

··· 61 61 #include <net/l3mdev.h> 62 62 #include <net/ip.h> 63 63 #include <linux/uaccess.h> 64 + #include <linux/btf_ids.h> 64 65 65 66 #ifdef CONFIG_SYSCTL 66 67 #include <linux/sysctl.h> ··· 6424 6423 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 6425 6424 DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) 6426 6425 6427 - static const struct bpf_iter_reg ipv6_route_reg_info = { 6426 + BTF_ID_LIST(btf_fib6_info_id) 6427 + BTF_ID(struct, fib6_info) 6428 + 6429 + static struct bpf_iter_reg ipv6_route_reg_info = { 6428 6430 .target = "ipv6_route", 6429 6431 .seq_ops = &ipv6_route_seq_ops, 6430 6432 .init_seq_private = bpf_iter_init_seq_net, ··· 6442 6438 6443 6439 static int __init bpf_iter_register(void) 6444 6440 { 6441 + ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; 6445 6442 return bpf_iter_reg_target(&ipv6_route_reg_info); 6446 6443 } 6447 6444

+76 -19

net/ipv6/udp.c

··· 141 141 return score; 142 142 } 143 143 144 + static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, 145 + struct sk_buff *skb, 146 + const struct in6_addr *saddr, 147 + __be16 sport, 148 + const struct in6_addr *daddr, 149 + unsigned int hnum) 150 + { 151 + struct sock *reuse_sk = NULL; 152 + u32 hash; 153 + 154 + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { 155 + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); 156 + reuse_sk = reuseport_select_sock(sk, hash, skb, 157 + sizeof(struct udphdr)); 158 + /* Fall back to scoring if group has connections */ 159 + if (reuseport_has_conns(sk, false)) 160 + return NULL; 161 + } 162 + return reuse_sk; 163 + } 164 + 144 165 /* called with rcu_read_lock() */ 145 166 static struct sock *udp6_lib_lookup2(struct net *net, 146 167 const struct in6_addr *saddr, __be16 sport, ··· 171 150 { 172 151 struct sock *sk, *result; 173 152 int score, badness; 174 - u32 hash = 0; 175 153 176 154 result = NULL; 177 155 badness = -1; ··· 178 158 score = compute_score(sk, net, saddr, sport, 179 159 daddr, hnum, dif, sdif); 180 160 if (score > badness) { 181 - if (sk->sk_reuseport && 182 - sk->sk_state != TCP_ESTABLISHED) { 183 - hash = udp6_ehashfn(net, daddr, hnum, 184 - saddr, sport); 161 + result = lookup_reuseport(net, sk, skb, 162 + saddr, sport, daddr, hnum); 163 + if (result) 164 + return result; 185 165 186 - result = reuseport_select_sock(sk, hash, skb, 187 - sizeof(struct udphdr)); 188 - if (result && !reuseport_has_conns(sk, false)) 189 - return result; 190 - } 191 166 result = sk; 192 167 badness = score; 193 168 } 194 169 } 195 170 return result; 171 + } 172 + 173 + static inline struct sock *udp6_lookup_run_bpf(struct net *net, 174 + struct udp_table *udptable, 175 + struct sk_buff *skb, 176 + const struct in6_addr *saddr, 177 + __be16 sport, 178 + const struct in6_addr *daddr, 179 + u16 hnum) 180 + { 181 + struct sock *sk, *reuse_sk; 182 + bool no_reuseport; 183 + 184 + if (udptable != &udp_table) 185 + return NULL; /* only UDP is supported */ 186 + 187 + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, 188 + saddr, sport, daddr, hnum, &sk); 189 + if (no_reuseport || IS_ERR_OR_NULL(sk)) 190 + return sk; 191 + 192 + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); 193 + if (reuse_sk) 194 + sk = reuse_sk; 195 + return sk; 196 196 } 197 197 198 198 /* rcu_read_lock() must be held */ ··· 225 185 unsigned short hnum = ntohs(dport); 226 186 unsigned int hash2, slot2; 227 187 struct udp_hslot *hslot2; 228 - struct sock *result; 188 + struct sock *result, *sk; 229 189 230 190 hash2 = ipv6_portaddr_hash(net, daddr, hnum); 231 191 slot2 = hash2 & udptable->mask; 232 192 hslot2 = &udptable->hash2[slot2]; 233 193 194 + /* Lookup connected or non-wildcard sockets */ 234 195 result = udp6_lib_lookup2(net, saddr, sport, 235 196 daddr, hnum, dif, sdif, 236 197 hslot2, skb); 237 - if (!result) { 238 - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); 239 - slot2 = hash2 & udptable->mask; 198 + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) 199 + goto done; 240 200 241 - hslot2 = &udptable->hash2[slot2]; 242 - 243 - result = udp6_lib_lookup2(net, saddr, sport, 244 - &in6addr_any, hnum, dif, sdif, 245 - hslot2, skb); 201 + /* Lookup redirect from BPF */ 202 + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { 203 + sk = udp6_lookup_run_bpf(net, udptable, skb, 204 + saddr, sport, daddr, hnum); 205 + if (sk) { 206 + result = sk; 207 + goto done; 208 + } 246 209 } 210 + 211 + /* Got non-wildcard socket or error on first lookup */ 212 + if (result) 213 + goto done; 214 + 215 + /* Lookup wildcard sockets */ 216 + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); 217 + slot2 = hash2 & udptable->mask; 218 + hslot2 = &udptable->hash2[slot2]; 219 + 220 + result = udp6_lib_lookup2(net, saddr, sport, 221 + &in6addr_any, hnum, dif, sdif, 222 + hslot2, skb); 223 + done: 247 224 if (IS_ERR(result)) 248 225 return NULL; 249 226 return result;

+6 -1

net/netlink/af_netlink.c

··· 60 60 #include <linux/genetlink.h> 61 61 #include <linux/net_namespace.h> 62 62 #include <linux/nospec.h> 63 + #include <linux/btf_ids.h> 63 64 64 65 #include <net/net_namespace.h> 65 66 #include <net/netns/generic.h> ··· 2804 2803 }; 2805 2804 2806 2805 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 2807 - static const struct bpf_iter_reg netlink_reg_info = { 2806 + BTF_ID_LIST(btf_netlink_sock_id) 2807 + BTF_ID(struct, netlink_sock) 2808 + 2809 + static struct bpf_iter_reg netlink_reg_info = { 2808 2810 .target = "netlink", 2809 2811 .seq_ops = &netlink_seq_ops, 2810 2812 .init_seq_private = bpf_iter_init_seq_net, ··· 2822 2818 2823 2819 static int __init bpf_iter_register(void) 2824 2820 { 2821 + netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; 2825 2822 return bpf_iter_reg_target(&netlink_reg_info); 2826 2823 } 2827 2824 #endif

+6 -1

samples/bpf/offwaketime_kern.c

··· 12 12 #include <bpf/bpf_helpers.h> 13 13 #include <bpf/bpf_tracing.h> 14 14 15 - #define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) 15 + #define _(P) \ 16 + ({ \ 17 + typeof(P) val; \ 18 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 + val; \ 20 + }) 16 21 17 22 #define MINBLOCK_US 1 18 23

+9 -3

samples/bpf/test_overhead_kprobe_kern.c

··· 10 10 #include <bpf/bpf_helpers.h> 11 11 #include <bpf/bpf_tracing.h> 12 12 13 - #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 13 + #define _(P) \ 14 + ({ \ 15 + typeof(P) val = 0; \ 16 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 17 + val; \ 18 + }) 14 19 15 20 SEC("kprobe/__set_task_comm") 16 21 int prog(struct pt_regs *ctx) ··· 30 25 tsk = (void *)PT_REGS_PARM1(ctx); 31 26 32 27 pid = _(tsk->pid); 33 - bpf_probe_read(oldcomm, sizeof(oldcomm), &tsk->comm); 34 - bpf_probe_read(newcomm, sizeof(newcomm), (void *)PT_REGS_PARM2(ctx)); 28 + bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); 29 + bpf_probe_read_kernel(newcomm, sizeof(newcomm), 30 + (void *)PT_REGS_PARM2(ctx)); 35 31 signal = _(tsk->signal); 36 32 oom_score_adj = _(signal->oom_score_adj); 37 33 return 0;

+7 -2

samples/bpf/tracex1_kern.c

··· 11 11 #include <bpf/bpf_helpers.h> 12 12 #include <bpf/bpf_tracing.h> 13 13 14 - #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 14 + #define _(P) \ 15 + ({ \ 16 + typeof(P) val = 0; \ 17 + bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 18 + val; \ 19 + }) 15 20 16 21 /* kprobe is NOT a stable ABI 17 22 * kernel functions can be removed, renamed or completely change semantics. ··· 39 34 dev = _(skb->dev); 40 35 len = _(skb->len); 41 36 42 - bpf_probe_read(devname, sizeof(devname), dev->name); 37 + bpf_probe_read_kernel(devname, sizeof(devname), dev->name); 43 38 44 39 if (devname[0] == 'l' && devname[1] == 'o') { 45 40 char fmt[] = "skb %p len %d\n";

+2 -2

samples/bpf/tracex5_kern.c

··· 47 47 { 48 48 struct seccomp_data sd; 49 49 50 - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 50 + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 51 51 if (sd.args[2] == 512) { 52 52 char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; 53 53 bpf_trace_printk(fmt, sizeof(fmt), ··· 60 60 { 61 61 struct seccomp_data sd; 62 62 63 - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 63 + bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 64 64 if (sd.args[2] > 128 && sd.args[2] <= 1024) { 65 65 char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; 66 66 bpf_trace_printk(fmt, sizeof(fmt),

+17 -8

samples/bpf/xdp_redirect_cpu_kern.c

··· 21 21 struct { 22 22 __uint(type, BPF_MAP_TYPE_CPUMAP); 23 23 __uint(key_size, sizeof(u32)); 24 - __uint(value_size, sizeof(u32)); 24 + __uint(value_size, sizeof(struct bpf_cpumap_val)); 25 25 __uint(max_entries, MAX_CPUS); 26 26 } cpu_map SEC(".maps"); 27 27 ··· 30 30 __u64 processed; 31 31 __u64 dropped; 32 32 __u64 issue; 33 + __u64 xdp_pass; 34 + __u64 xdp_drop; 35 + __u64 xdp_redirect; 33 36 }; 34 37 35 38 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success ··· 695 692 * Code in: kernel/include/trace/events/xdp.h 696 693 */ 697 694 struct cpumap_kthread_ctx { 698 - u64 __pad; // First 8 bytes are not accessible by bpf code 699 - int map_id; // offset:8; size:4; signed:1; 700 - u32 act; // offset:12; size:4; signed:0; 701 - int cpu; // offset:16; size:4; signed:1; 702 - unsigned int drops; // offset:20; size:4; signed:0; 703 - unsigned int processed; // offset:24; size:4; signed:0; 704 - int sched; // offset:28; size:4; signed:1; 695 + u64 __pad; // First 8 bytes are not accessible 696 + int map_id; // offset:8; size:4; signed:1; 697 + u32 act; // offset:12; size:4; signed:0; 698 + int cpu; // offset:16; size:4; signed:1; 699 + unsigned int drops; // offset:20; size:4; signed:0; 700 + unsigned int processed; // offset:24; size:4; signed:0; 701 + int sched; // offset:28; size:4; signed:1; 702 + unsigned int xdp_pass; // offset:32; size:4; signed:0; 703 + unsigned int xdp_drop; // offset:36; size:4; signed:0; 704 + unsigned int xdp_redirect; // offset:40; size:4; signed:0; 705 705 }; 706 706 707 707 SEC("tracepoint/xdp/xdp_cpumap_kthread") ··· 718 712 return 0; 719 713 rec->processed += ctx->processed; 720 714 rec->dropped += ctx->drops; 715 + rec->xdp_pass += ctx->xdp_pass; 716 + rec->xdp_drop += ctx->xdp_drop; 717 + rec->xdp_redirect += ctx->xdp_redirect; 721 718 722 719 /* Count times kthread yielded CPU via schedule call */ 723 720 if (ctx->sched)

+187 -22

samples/bpf/xdp_redirect_cpu_user.c

··· 70 70 {"stress-mode", no_argument, NULL, 'x' }, 71 71 {"no-separators", no_argument, NULL, 'z' }, 72 72 {"force", no_argument, NULL, 'F' }, 73 + {"mprog-disable", no_argument, NULL, 'n' }, 74 + {"mprog-name", required_argument, NULL, 'e' }, 75 + {"mprog-filename", required_argument, NULL, 'f' }, 76 + {"redirect-device", required_argument, NULL, 'r' }, 77 + {"redirect-map", required_argument, NULL, 'm' }, 73 78 {0, 0, NULL, 0 } 74 79 }; 75 80 ··· 161 156 __u64 processed; 162 157 __u64 dropped; 163 158 __u64 issue; 159 + __u64 xdp_pass; 160 + __u64 xdp_drop; 161 + __u64 xdp_redirect; 164 162 }; 165 163 struct record { 166 164 __u64 timestamp; ··· 183 175 /* For percpu maps, userspace gets a value per possible CPU */ 184 176 unsigned int nr_cpus = bpf_num_possible_cpus(); 185 177 struct datarec values[nr_cpus]; 178 + __u64 sum_xdp_redirect = 0; 179 + __u64 sum_xdp_pass = 0; 180 + __u64 sum_xdp_drop = 0; 186 181 __u64 sum_processed = 0; 187 182 __u64 sum_dropped = 0; 188 183 __u64 sum_issue = 0; ··· 207 196 sum_dropped += values[i].dropped; 208 197 rec->cpu[i].issue = values[i].issue; 209 198 sum_issue += values[i].issue; 199 + rec->cpu[i].xdp_pass = values[i].xdp_pass; 200 + sum_xdp_pass += values[i].xdp_pass; 201 + rec->cpu[i].xdp_drop = values[i].xdp_drop; 202 + sum_xdp_drop += values[i].xdp_drop; 203 + rec->cpu[i].xdp_redirect = values[i].xdp_redirect; 204 + sum_xdp_redirect += values[i].xdp_redirect; 210 205 } 211 206 rec->total.processed = sum_processed; 212 207 rec->total.dropped = sum_dropped; 213 208 rec->total.issue = sum_issue; 209 + rec->total.xdp_pass = sum_xdp_pass; 210 + rec->total.xdp_drop = sum_xdp_drop; 211 + rec->total.xdp_redirect = sum_xdp_redirect; 214 212 return true; 215 213 } 216 214 ··· 320 300 return pps; 321 301 } 322 302 303 + static void calc_xdp_pps(struct datarec *r, struct datarec *p, 304 + double *xdp_pass, double *xdp_drop, 305 + double *xdp_redirect, double period_) 306 + { 307 + *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; 308 + if (period_ > 0) { 309 + *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; 310 + *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; 311 + *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; 312 + } 313 + } 314 + 323 315 static void stats_print(struct stats_record *stats_rec, 324 316 struct stats_record *stats_prev, 325 - char *prog_name) 317 + char *prog_name, char *mprog_name, int mprog_fd) 326 318 { 327 319 unsigned int nr_cpus = bpf_num_possible_cpus(); 328 320 double pps = 0, drop = 0, err = 0; 321 + bool mprog_enabled = false; 329 322 struct record *rec, *prev; 330 323 int to_cpu; 331 324 double t; 332 325 int i; 326 + 327 + if (mprog_fd > 0) 328 + mprog_enabled = true; 333 329 334 330 /* Header */ 335 331 printf("Running XDP/eBPF prog_name:%s\n", prog_name); ··· 491 455 printf(fm2_err, "xdp_exception", "total", pps, drop); 492 456 } 493 457 458 + /* CPUMAP attached XDP program that runs on remote/destination CPU */ 459 + if (mprog_enabled) { 460 + char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n"; 461 + char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n"; 462 + double xdp_pass, xdp_drop, xdp_redirect; 463 + 464 + printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name); 465 + printf("%-15s %-7s %-14s %-11s %-9s\n", 466 + "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir"); 467 + 468 + rec = &stats_rec->kthread; 469 + prev = &stats_prev->kthread; 470 + t = calc_period(rec, prev); 471 + for (i = 0; i < nr_cpus; i++) { 472 + struct datarec *r = &rec->cpu[i]; 473 + struct datarec *p = &prev->cpu[i]; 474 + 475 + calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, 476 + &xdp_redirect, t); 477 + if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0) 478 + printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop, 479 + xdp_redirect); 480 + } 481 + calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, 482 + &xdp_redirect, t); 483 + printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect); 484 + } 485 + 494 486 printf("\n"); 495 487 fflush(stdout); 496 488 } ··· 555 491 *b = tmp; 556 492 } 557 493 558 - static int create_cpu_entry(__u32 cpu, __u32 queue_size, 494 + static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, 559 495 __u32 avail_idx, bool new) 560 496 { 561 497 __u32 curr_cpus_count = 0; ··· 565 501 /* Add a CPU entry to cpumap, as this allocate a cpu entry in 566 502 * the kernel for the cpu. 567 503 */ 568 - ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); 504 + ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0); 569 505 if (ret) { 570 506 fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); 571 507 exit(EXIT_FAIL_BPF); ··· 596 532 } 597 533 } 598 534 /* map_fd[7] = cpus_iterator */ 599 - printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n", 535 + printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n", 600 536 new ? "Add-new":"Replace", cpu, avail_idx, 601 - queue_size, curr_cpus_count); 537 + value->qsize, value->bpf_prog.fd, curr_cpus_count); 602 538 603 539 return 0; 604 540 } ··· 622 558 } 623 559 624 560 /* Stress cpumap management code by concurrently changing underlying cpumap */ 625 - static void stress_cpumap(void) 561 + static void stress_cpumap(struct bpf_cpumap_val *value) 626 562 { 627 563 /* Changing qsize will cause kernel to free and alloc a new 628 564 * bpf_cpu_map_entry, with an associated/complicated tear-down 629 565 * procedure. 630 566 */ 631 - create_cpu_entry(1, 1024, 0, false); 632 - create_cpu_entry(1, 8, 0, false); 633 - create_cpu_entry(1, 16000, 0, false); 567 + value->qsize = 1024; 568 + create_cpu_entry(1, value, 0, false); 569 + value->qsize = 8; 570 + create_cpu_entry(1, value, 0, false); 571 + value->qsize = 16000; 572 + create_cpu_entry(1, value, 0, false); 634 573 } 635 574 636 575 static void stats_poll(int interval, bool use_separators, char *prog_name, 576 + char *mprog_name, struct bpf_cpumap_val *value, 637 577 bool stress_mode) 638 578 { 639 579 struct stats_record *record, *prev; 580 + int mprog_fd; 640 581 641 582 record = alloc_stats_record(); 642 583 prev = alloc_stats_record(); ··· 653 584 654 585 while (1) { 655 586 swap(&prev, &record); 587 + mprog_fd = value->bpf_prog.fd; 656 588 stats_collect(record); 657 - stats_print(record, prev, prog_name); 589 + stats_print(record, prev, prog_name, mprog_name, mprog_fd); 658 590 sleep(interval); 659 591 if (stress_mode) 660 - stress_cpumap(); 592 + stress_cpumap(value); 661 593 } 662 594 663 595 free_stats_record(record); ··· 731 661 return 0; 732 662 } 733 663 664 + static int load_cpumap_prog(char *file_name, char *prog_name, 665 + char *redir_interface, char *redir_map) 666 + { 667 + struct bpf_prog_load_attr prog_load_attr = { 668 + .prog_type = BPF_PROG_TYPE_XDP, 669 + .expected_attach_type = BPF_XDP_CPUMAP, 670 + .file = file_name, 671 + }; 672 + struct bpf_program *prog; 673 + struct bpf_object *obj; 674 + int fd; 675 + 676 + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd)) 677 + return -1; 678 + 679 + if (fd < 0) { 680 + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", 681 + strerror(errno)); 682 + return fd; 683 + } 684 + 685 + if (redir_interface && redir_map) { 686 + int err, map_fd, ifindex_out, key = 0; 687 + 688 + map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); 689 + if (map_fd < 0) 690 + return map_fd; 691 + 692 + ifindex_out = if_nametoindex(redir_interface); 693 + if (!ifindex_out) 694 + return -1; 695 + 696 + err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0); 697 + if (err < 0) 698 + return err; 699 + } 700 + 701 + prog = bpf_object__find_program_by_title(obj, prog_name); 702 + if (!prog) { 703 + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); 704 + return EXIT_FAIL; 705 + } 706 + 707 + return bpf_program__fd(prog); 708 + } 709 + 734 710 int main(int argc, char **argv) 735 711 { 736 712 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; 737 713 char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; 714 + char *mprog_filename = "xdp_redirect_kern.o"; 715 + char *redir_interface = NULL, *redir_map = NULL; 716 + char *mprog_name = "xdp_redirect_dummy"; 717 + bool mprog_disable = false; 738 718 struct bpf_prog_load_attr prog_load_attr = { 739 719 .prog_type = BPF_PROG_TYPE_UNSPEC, 740 720 }; 741 721 struct bpf_prog_info info = {}; 742 722 __u32 info_len = sizeof(info); 723 + struct bpf_cpumap_val value; 743 724 bool use_separators = true; 744 725 bool stress_mode = false; 745 726 struct bpf_program *prog; ··· 802 681 int add_cpu = -1; 803 682 int opt, err; 804 683 int prog_fd; 684 + int *cpu, i; 805 685 __u32 qsize; 806 686 807 687 n_cpus = get_nprocs_conf(); ··· 838 716 } 839 717 mark_cpus_unavailable(); 840 718 719 + cpu = malloc(n_cpus * sizeof(int)); 720 + if (!cpu) { 721 + fprintf(stderr, "failed to allocate cpu array\n"); 722 + return EXIT_FAIL; 723 + } 724 + memset(cpu, 0, n_cpus * sizeof(int)); 725 + 841 726 /* Parse commands line args */ 842 - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", 727 + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", 843 728 long_options, &longindex)) != -1) { 844 729 switch (opt) { 845 730 case 'd': ··· 880 751 /* Selecting eBPF prog to load */ 881 752 prog_name = optarg; 882 753 break; 754 + case 'n': 755 + mprog_disable = true; 756 + break; 757 + case 'f': 758 + mprog_filename = optarg; 759 + break; 760 + case 'e': 761 + mprog_name = optarg; 762 + break; 763 + case 'r': 764 + redir_interface = optarg; 765 + break; 766 + case 'm': 767 + redir_map = optarg; 768 + break; 883 769 case 'c': 884 770 /* Add multiple CPUs */ 885 771 add_cpu = strtoul(optarg, NULL, 0); ··· 904 760 errno, strerror(errno)); 905 761 goto error; 906 762 } 907 - create_cpu_entry(add_cpu, qsize, added_cpus, true); 908 - added_cpus++; 763 + cpu[added_cpus++] = add_cpu; 909 764 break; 910 765 case 'q': 911 766 qsize = atoi(optarg); ··· 915 772 case 'h': 916 773 error: 917 774 default: 775 + free(cpu); 918 776 usage(argv, obj); 919 777 return EXIT_FAIL_OPTION; 920 778 } ··· 928 784 if (ifindex == -1) { 929 785 fprintf(stderr, "ERR: required option --dev missing\n"); 930 786 usage(argv, obj); 931 - return EXIT_FAIL_OPTION; 787 + err = EXIT_FAIL_OPTION; 788 + goto out; 932 789 } 933 790 /* Required option */ 934 791 if (add_cpu == -1) { 935 792 fprintf(stderr, "ERR: required option --cpu missing\n"); 936 793 fprintf(stderr, " Specify multiple --cpu option to add more\n"); 937 794 usage(argv, obj); 938 - return EXIT_FAIL_OPTION; 795 + err = EXIT_FAIL_OPTION; 796 + goto out; 939 797 } 798 + 799 + value.bpf_prog.fd = 0; 800 + if (!mprog_disable) 801 + value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name, 802 + redir_interface, redir_map); 803 + if (value.bpf_prog.fd < 0) { 804 + err = value.bpf_prog.fd; 805 + goto out; 806 + } 807 + value.qsize = qsize; 808 + 809 + for (i = 0; i < added_cpus; i++) 810 + create_cpu_entry(cpu[i], &value, i, true); 940 811 941 812 /* Remove XDP program when program is interrupted or killed */ 942 813 signal(SIGINT, int_exit); ··· 960 801 prog = bpf_object__find_program_by_title(obj, prog_name); 961 802 if (!prog) { 962 803 fprintf(stderr, "bpf_object__find_program_by_title failed\n"); 963 - return EXIT_FAIL; 804 + err = EXIT_FAIL; 805 + goto out; 964 806 } 965 807 966 808 prog_fd = bpf_program__fd(prog); 967 809 if (prog_fd < 0) { 968 810 fprintf(stderr, "bpf_program__fd failed\n"); 969 - return EXIT_FAIL; 811 + err = EXIT_FAIL; 812 + goto out; 970 813 } 971 814 972 815 if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 973 816 fprintf(stderr, "link set xdp fd failed\n"); 974 - return EXIT_FAIL_XDP; 817 + err = EXIT_FAIL_XDP; 818 + goto out; 975 819 } 976 820 977 821 err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); 978 822 if (err) { 979 823 printf("can't get prog info - %s\n", strerror(errno)); 980 - return err; 824 + goto out; 981 825 } 982 826 prog_id = info.id; 983 827 984 - stats_poll(interval, use_separators, prog_name, stress_mode); 985 - return EXIT_OK; 828 + stats_poll(interval, use_separators, prog_name, mprog_name, 829 + &value, stress_mode); 830 + out: 831 + free(cpu); 832 + return err; 986 833 }

+8 -1

scripts/bpf_helpers_doc.py

··· 404 404 405 405 type_fwds = [ 406 406 'struct bpf_fib_lookup', 407 + 'struct bpf_sk_lookup', 407 408 'struct bpf_perf_event_data', 408 409 'struct bpf_perf_event_value', 409 410 'struct bpf_pidns_info', ··· 451 450 'struct bpf_perf_event_data', 452 451 'struct bpf_perf_event_value', 453 452 'struct bpf_pidns_info', 453 + 'struct bpf_sk_lookup', 454 454 'struct bpf_sock', 455 455 'struct bpf_sock_addr', 456 456 'struct bpf_sock_ops', ··· 489 487 'struct sk_msg_buff': 'struct sk_msg_md', 490 488 'struct xdp_buff': 'struct xdp_md', 491 489 } 490 + # Helpers overloaded for different context types. 491 + overloaded_helpers = [ 492 + 'bpf_get_socket_cookie', 493 + 'bpf_sk_assign', 494 + ] 492 495 493 496 def print_header(self): 494 497 header = '''\ ··· 550 543 for i, a in enumerate(proto['args']): 551 544 t = a['type'] 552 545 n = a['name'] 553 - if proto['name'] == 'bpf_get_socket_cookie' and i == 0: 546 + if proto['name'] in self.overloaded_helpers and i == 0: 554 547 t = 'void' 555 548 n = 'ctx' 556 549 one_arg = '{}{}'.format(comma, self.map_type(t))

+1 -1

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 45 45 | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | 46 46 | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | 47 47 | **cgroup/getsockopt** | **cgroup/setsockopt** | 48 - | **struct_ops** | **fentry** | **fexit** | **freplace** 48 + | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** 49 49 | } 50 50 | *ATTACH_TYPE* := { 51 51 | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**

+1 -1

tools/bpf/bpftool/bash-completion/bpftool

··· 479 479 cgroup/post_bind4 cgroup/post_bind6 \ 480 480 cgroup/sysctl cgroup/getsockopt \ 481 481 cgroup/setsockopt struct_ops \ 482 - fentry fexit freplace" -- \ 482 + fentry fexit freplace sk_lookup" -- \ 483 483 "$cur" ) ) 484 484 return 0 485 485 ;;

+84 -60

tools/bpf/bpftool/common.c

··· 1 1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 2 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ 3 3 4 + #define _GNU_SOURCE 4 5 #include <ctype.h> 5 6 #include <errno.h> 6 7 #include <fcntl.h> 7 - #include <fts.h> 8 + #include <ftw.h> 8 9 #include <libgen.h> 9 10 #include <mntent.h> 10 11 #include <stdbool.h> ··· 65 64 [BPF_TRACE_FEXIT] = "fexit", 66 65 [BPF_MODIFY_RETURN] = "mod_ret", 67 66 [BPF_LSM_MAC] = "lsm_mac", 67 + [BPF_SK_LOOKUP] = "sk_lookup", 68 68 }; 69 69 70 70 void p_err(const char *fmt, ...) ··· 162 160 return err; 163 161 } 164 162 165 - int open_obj_pinned(char *path, bool quiet) 163 + int open_obj_pinned(const char *path, bool quiet) 166 164 { 167 - int fd; 165 + char *pname; 166 + int fd = -1; 168 167 169 - fd = bpf_obj_get(path); 170 - if (fd < 0) { 168 + pname = strdup(path); 169 + if (!pname) { 171 170 if (!quiet) 172 - p_err("bpf obj get (%s): %s", path, 173 - errno == EACCES && !is_bpffs(dirname(path)) ? 174 - "directory not in bpf file system (bpffs)" : 175 - strerror(errno)); 176 - return -1; 171 + p_err("mem alloc failed"); 172 + goto out_ret; 177 173 } 178 174 175 + fd = bpf_obj_get(pname); 176 + if (fd < 0) { 177 + if (!quiet) 178 + p_err("bpf obj get (%s): %s", pname, 179 + errno == EACCES && !is_bpffs(dirname(pname)) ? 180 + "directory not in bpf file system (bpffs)" : 181 + strerror(errno)); 182 + goto out_free; 183 + } 184 + 185 + out_free: 186 + free(pname); 187 + out_ret: 179 188 return fd; 180 189 } 181 190 182 - int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) 191 + int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) 183 192 { 184 193 enum bpf_obj_type type; 185 194 int fd; ··· 380 367 jsonw_end_array(json_wtr); 381 368 } 382 369 370 + /* extra params for nftw cb */ 371 + static struct pinned_obj_table *build_fn_table; 372 + static enum bpf_obj_type build_fn_type; 373 + 374 + static int do_build_table_cb(const char *fpath, const struct stat *sb, 375 + int typeflag, struct FTW *ftwbuf) 376 + { 377 + struct bpf_prog_info pinned_info; 378 + __u32 len = sizeof(pinned_info); 379 + struct pinned_obj *obj_node; 380 + enum bpf_obj_type objtype; 381 + int fd, err = 0; 382 + 383 + if (typeflag != FTW_F) 384 + goto out_ret; 385 + 386 + fd = open_obj_pinned(fpath, true); 387 + if (fd < 0) 388 + goto out_ret; 389 + 390 + objtype = get_fd_type(fd); 391 + if (objtype != build_fn_type) 392 + goto out_close; 393 + 394 + memset(&pinned_info, 0, sizeof(pinned_info)); 395 + if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) 396 + goto out_close; 397 + 398 + obj_node = calloc(1, sizeof(*obj_node)); 399 + if (!obj_node) { 400 + err = -1; 401 + goto out_close; 402 + } 403 + 404 + obj_node->id = pinned_info.id; 405 + obj_node->path = strdup(fpath); 406 + if (!obj_node->path) { 407 + err = -1; 408 + free(obj_node); 409 + goto out_close; 410 + } 411 + 412 + hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); 413 + out_close: 414 + close(fd); 415 + out_ret: 416 + return err; 417 + } 418 + 383 419 int build_pinned_obj_table(struct pinned_obj_table *tab, 384 420 enum bpf_obj_type type) 385 421 { 386 - struct bpf_prog_info pinned_info = {}; 387 - struct pinned_obj *obj_node = NULL; 388 - __u32 len = sizeof(pinned_info); 389 422 struct mntent *mntent = NULL; 390 - enum bpf_obj_type objtype; 391 423 FILE *mntfile = NULL; 392 - FTSENT *ftse = NULL; 393 - FTS *fts = NULL; 394 - int fd, err; 424 + int flags = FTW_PHYS; 425 + int nopenfd = 16; 426 + int err = 0; 395 427 396 428 mntfile = setmntent("/proc/mounts", "r"); 397 429 if (!mntfile) 398 430 return -1; 399 431 432 + build_fn_table = tab; 433 + build_fn_type = type; 434 + 400 435 while ((mntent = getmntent(mntfile))) { 401 - char *path[] = { mntent->mnt_dir, NULL }; 436 + char *path = mntent->mnt_dir; 402 437 403 438 if (strncmp(mntent->mnt_type, "bpf", 3) != 0) 404 439 continue; 405 - 406 - fts = fts_open(path, 0, NULL); 407 - if (!fts) 408 - continue; 409 - 410 - while ((ftse = fts_read(fts))) { 411 - if (!(ftse->fts_info & FTS_F)) 412 - continue; 413 - fd = open_obj_pinned(ftse->fts_path, true); 414 - if (fd < 0) 415 - continue; 416 - 417 - objtype = get_fd_type(fd); 418 - if (objtype != type) { 419 - close(fd); 420 - continue; 421 - } 422 - memset(&pinned_info, 0, sizeof(pinned_info)); 423 - err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len); 424 - if (err) { 425 - close(fd); 426 - continue; 427 - } 428 - 429 - obj_node = malloc(sizeof(*obj_node)); 430 - if (!obj_node) { 431 - close(fd); 432 - fts_close(fts); 433 - fclose(mntfile); 434 - return -1; 435 - } 436 - 437 - memset(obj_node, 0, sizeof(*obj_node)); 438 - obj_node->id = pinned_info.id; 439 - obj_node->path = strdup(ftse->fts_path); 440 - hash_add(tab->table, &obj_node->hash, obj_node->id); 441 - 442 - close(fd); 443 - } 444 - fts_close(fts); 440 + err = nftw(path, do_build_table_cb, nopenfd, flags); 441 + if (err) 442 + break; 445 443 } 446 444 fclose(mntfile); 447 - return 0; 445 + return err; 448 446 } 449 447 450 448 void delete_pinned_obj_table(struct pinned_obj_table *tab)

+4 -1

tools/bpf/bpftool/gen.c

··· 302 302 opts.object_name = obj_name; 303 303 obj = bpf_object__open_mem(obj_data, file_sz, &opts); 304 304 if (IS_ERR(obj)) { 305 + char err_buf[256]; 306 + 307 + libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); 308 + p_err("failed to open BPF object file: %s", err_buf); 305 309 obj = NULL; 306 - p_err("failed to open BPF object file: %ld", PTR_ERR(obj)); 307 310 goto out; 308 311 } 309 312

+2 -2

tools/bpf/bpftool/main.h

··· 152 152 int get_fd_type(int fd); 153 153 const char *get_fd_type_name(enum bpf_obj_type type); 154 154 char *get_fdinfo(int fd, const char *key); 155 - int open_obj_pinned(char *path, bool quiet); 156 - int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); 155 + int open_obj_pinned(const char *path, bool quiet); 156 + int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); 157 157 int mount_bpffs_for_pin(const char *name); 158 158 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); 159 159 int do_pin_fd(int fd, const char *name);

+2 -1

tools/bpf/bpftool/prog.c

··· 59 59 [BPF_PROG_TYPE_TRACING] = "tracing", 60 60 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 61 61 [BPF_PROG_TYPE_EXT] = "ext", 62 + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 62 63 }; 63 64 64 65 const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); ··· 1906 1905 " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" 1907 1906 " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" 1908 1907 " cgroup/getsockopt | cgroup/setsockopt |\n" 1909 - " struct_ops | fentry | fexit | freplace }\n" 1908 + " struct_ops | fentry | fexit | freplace | sk_lookup }\n" 1910 1909 " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" 1911 1910 " flow_dissector }\n" 1912 1911 " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"

+2 -1

tools/bpf/bpftool/skeleton/pid_iter.bpf.c

··· 71 71 72 72 e.pid = task->tgid; 73 73 e.id = get_obj_id(file->private_data, obj_type); 74 - bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); 74 + bpf_probe_read_kernel(&e.comm, sizeof(e.comm), 75 + task->group_leader->comm); 75 76 bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); 76 77 77 78 return 0;

+47 -4

tools/include/linux/btf_ids.h

··· 3 3 #ifndef _LINUX_BTF_IDS_H 4 4 #define _LINUX_BTF_IDS_H 5 5 6 + #ifdef CONFIG_DEBUG_INFO_BTF 7 + 6 8 #include <linux/compiler.h> /* for __PASTE */ 7 9 8 10 /* ··· 23 21 asm( \ 24 22 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 25 23 ".local " #symbol " ; \n" \ 26 - ".type " #symbol ", @object; \n" \ 24 + ".type " #symbol ", STT_OBJECT; \n" \ 27 25 ".size " #symbol ", 4; \n" \ 28 26 #symbol ": \n" \ 29 27 ".zero 4 \n" \ ··· 57 55 * .zero 4 58 56 * 59 57 */ 60 - #define __BTF_ID_LIST(name) \ 58 + #define __BTF_ID_LIST(name, scope) \ 61 59 asm( \ 62 60 ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ 63 - ".local " #name "; \n" \ 61 + "." #scope " " #name "; \n" \ 64 62 #name ":; \n" \ 65 63 ".popsection; \n"); \ 66 64 67 65 #define BTF_ID_LIST(name) \ 68 - __BTF_ID_LIST(name) \ 66 + __BTF_ID_LIST(name, local) \ 69 67 extern u32 name[]; 68 + 69 + #define BTF_ID_LIST_GLOBAL(name) \ 70 + __BTF_ID_LIST(name, globl) 70 71 71 72 /* 72 73 * The BTF_ID_UNUSED macro defines 4 zero bytes. ··· 88 83 ".zero 4 \n" \ 89 84 ".popsection; \n"); 90 85 86 + #else 87 + 88 + #define BTF_ID_LIST(name) static u32 name[5]; 89 + #define BTF_ID(prefix, name) 90 + #define BTF_ID_UNUSED 91 + #define BTF_ID_LIST_GLOBAL(name) u32 name[1]; 92 + 93 + #endif /* CONFIG_DEBUG_INFO_BTF */ 94 + 95 + #ifdef CONFIG_NET 96 + /* Define a list of socket types which can be the argument for 97 + * skc_to_*_sock() helpers. All these sockets should have 98 + * sock_common as the first argument in its memory layout. 99 + */ 100 + #define BTF_SOCK_TYPE_xxx \ 101 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ 102 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ 103 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ 104 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ 105 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ 106 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ 107 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ 108 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ 109 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ 110 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ 111 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ 112 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ 113 + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) 114 + 115 + enum { 116 + #define BTF_SOCK_TYPE(name, str) name, 117 + BTF_SOCK_TYPE_xxx 118 + #undef BTF_SOCK_TYPE 119 + MAX_BTF_SOCK_TYPE, 120 + }; 121 + 122 + extern u32 btf_sock_ids[]; 123 + #endif 91 124 92 125 #endif

+94 -3

tools/include/uapi/linux/bpf.h

··· 189 189 BPF_PROG_TYPE_STRUCT_OPS, 190 190 BPF_PROG_TYPE_EXT, 191 191 BPF_PROG_TYPE_LSM, 192 + BPF_PROG_TYPE_SK_LOOKUP, 192 193 }; 193 194 194 195 enum bpf_attach_type { ··· 228 227 BPF_CGROUP_INET6_GETSOCKNAME, 229 228 BPF_XDP_DEVMAP, 230 229 BPF_CGROUP_INET_SOCK_RELEASE, 230 + BPF_XDP_CPUMAP, 231 + BPF_SK_LOOKUP, 231 232 __MAX_BPF_ATTACH_TYPE 232 233 }; 233 234 ··· 2422 2419 * Look for an IPv6 socket. 2423 2420 * 2424 2421 * If the *netns* is a negative signed 32-bit integer, then the 2425 - * socket lookup table in the netns associated with the *ctx* will 2422 + * socket lookup table in the netns associated with the *ctx* 2426 2423 * will be used. For the TC hooks, this is the netns of the device 2427 2424 * in the skb. For socket hooks, this is the netns of the socket. 2428 2425 * If *netns* is any other signed 32-bit value greater than or ··· 2459 2456 * Look for an IPv6 socket. 2460 2457 * 2461 2458 * If the *netns* is a negative signed 32-bit integer, then the 2462 - * socket lookup table in the netns associated with the *ctx* will 2459 + * socket lookup table in the netns associated with the *ctx* 2463 2460 * will be used. For the TC hooks, this is the netns of the device 2464 2461 * in the skb. For socket hooks, this is the netns of the socket. 2465 2462 * If *netns* is any other signed 32-bit value greater than or ··· 3071 3068 * 3072 3069 * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) 3073 3070 * Description 3071 + * Helper is overloaded depending on BPF program type. This 3072 + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and 3073 + * **BPF_PROG_TYPE_SCHED_ACT** programs. 3074 + * 3074 3075 * Assign the *sk* to the *skb*. When combined with appropriate 3075 3076 * routing configuration to receive the packet towards the socket, 3076 3077 * will cause *skb* to be delivered to the specified socket. ··· 3099 3092 * 3100 3093 * **-ESOCKTNOSUPPORT** if the socket type is not supported 3101 3094 * (reuseport). 3095 + * 3096 + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) 3097 + * Description 3098 + * Helper is overloaded depending on BPF program type. This 3099 + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. 3100 + * 3101 + * Select the *sk* as a result of a socket lookup. 3102 + * 3103 + * For the operation to succeed passed socket must be compatible 3104 + * with the packet description provided by the *ctx* object. 3105 + * 3106 + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must 3107 + * be an exact match. While IP family (**AF_INET** or 3108 + * **AF_INET6**) must be compatible, that is IPv6 sockets 3109 + * that are not v6-only can be selected for IPv4 packets. 3110 + * 3111 + * Only TCP listeners and UDP unconnected sockets can be 3112 + * selected. *sk* can also be NULL to reset any previous 3113 + * selection. 3114 + * 3115 + * *flags* argument can combination of following values: 3116 + * 3117 + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous 3118 + * socket selection, potentially done by a BPF program 3119 + * that ran before us. 3120 + * 3121 + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip 3122 + * load-balancing within reuseport group for the socket 3123 + * being selected. 3124 + * 3125 + * On success *ctx->sk* will point to the selected socket. 3126 + * 3127 + * Return 3128 + * 0 on success, or a negative errno in case of failure. 3129 + * 3130 + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is 3131 + * not compatible with packet family (*ctx->family*). 3132 + * 3133 + * * **-EEXIST** if socket has been already selected, 3134 + * potentially by another program, and 3135 + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. 3136 + * 3137 + * * **-EINVAL** if unsupported flags were specified. 3138 + * 3139 + * * **-EPROTOTYPE** if socket L4 protocol 3140 + * (*sk->protocol*) doesn't match packet protocol 3141 + * (*ctx->protocol*). 3142 + * 3143 + * * **-ESOCKTNOSUPPORT** if socket is not in allowed 3144 + * state (TCP listening or UDP unconnected). 3102 3145 * 3103 3146 * u64 bpf_ktime_get_boot_ns(void) 3104 3147 * Description ··· 3663 3606 BPF_RINGBUF_HDR_SZ = 8, 3664 3607 }; 3665 3608 3609 + /* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ 3610 + enum { 3611 + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), 3612 + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), 3613 + }; 3614 + 3666 3615 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3667 3616 enum bpf_adj_room_mode { 3668 3617 BPF_ADJ_ROOM_NET, ··· 3912 3849 } bpf_prog; 3913 3850 }; 3914 3851 3852 + /* CPUMAP map-value layout 3853 + * 3854 + * The struct data-layout of map-value is a configuration interface. 3855 + * New members can only be added to the end of this structure. 3856 + */ 3857 + struct bpf_cpumap_val { 3858 + __u32 qsize; /* queue size to remote target CPU */ 3859 + union { 3860 + int fd; /* prog fd on map write */ 3861 + __u32 id; /* prog id on map read */ 3862 + } bpf_prog; 3863 + }; 3864 + 3915 3865 enum sk_action { 3916 3866 SK_DROP = 0, 3917 3867 SK_PASS, ··· 4062 3986 4063 3987 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 4064 3988 * by user and intended to be used by socket (e.g. to bind to, depends on 4065 - * attach attach type). 3989 + * attach type). 4066 3990 */ 4067 3991 struct bpf_sock_addr { 4068 3992 __u32 user_family; /* Allows 4-byte read, but no write. */ ··· 4411 4335 __u32 pid; 4412 4336 __u32 tgid; 4413 4337 }; 4338 + 4339 + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ 4340 + struct bpf_sk_lookup { 4341 + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ 4342 + 4343 + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ 4344 + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ 4345 + __u32 remote_ip4; /* Network byte order */ 4346 + __u32 remote_ip6[4]; /* Network byte order */ 4347 + __u32 remote_port; /* Network byte order */ 4348 + __u32 local_ip4; /* Network byte order */ 4349 + __u32 local_ip6[4]; /* Network byte order */ 4350 + __u32 local_port; /* Host byte order */ 4351 + }; 4352 + 4414 4353 #endif /* _UAPI__LINUX_BPF_H__ */

+1 -1

tools/lib/bpf/bpf_helpers.h

··· 40 40 * Helper macro to manipulate data structures 41 41 */ 42 42 #ifndef offsetof 43 - #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 43 + #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) 44 44 #endif 45 45 #ifndef container_of 46 46 #define container_of(ptr, type, member) \

+5

tools/lib/bpf/libbpf.c

··· 6799 6799 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); 6800 6800 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); 6801 6801 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); 6802 + BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); 6802 6803 6803 6804 enum bpf_attach_type 6804 6805 bpf_program__get_expected_attach_type(struct bpf_program *prog) ··· 6913 6912 .attach_fn = attach_iter), 6914 6913 BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, 6915 6914 BPF_XDP_DEVMAP), 6915 + BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, 6916 + BPF_XDP_CPUMAP), 6916 6917 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 6917 6918 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 6918 6919 BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), ··· 6982 6979 BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, 6983 6980 BPF_CGROUP_SETSOCKOPT), 6984 6981 BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), 6982 + BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, 6983 + BPF_SK_LOOKUP), 6985 6984 }; 6986 6985 6987 6986 #undef BPF_PROG_SEC_IMPL

+2

tools/lib/bpf/libbpf.h

··· 350 350 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); 351 351 LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); 352 352 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); 353 + LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); 353 354 354 355 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); 355 356 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, ··· 378 377 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); 379 378 LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); 380 379 LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); 380 + LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); 381 381 382 382 /* 383 383 * No need for __attribute__((packed)), all members of 'bpf_map_def'

+2

tools/lib/bpf/libbpf.map

··· 287 287 bpf_map__type; 288 288 bpf_map__value_size; 289 289 bpf_program__autoload; 290 + bpf_program__is_sk_lookup; 290 291 bpf_program__set_autoload; 292 + bpf_program__set_sk_lookup; 291 293 btf__set_fd; 292 294 } LIBBPF_0.0.9;

+3

tools/lib/bpf/libbpf_probes.c

··· 78 78 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 79 79 xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; 80 80 break; 81 + case BPF_PROG_TYPE_SK_LOOKUP: 82 + xattr.expected_attach_type = BPF_SK_LOOKUP; 83 + break; 81 84 case BPF_PROG_TYPE_KPROBE: 82 85 xattr.kern_version = get_kernel_version(); 83 86 break;

+35 -23

tools/testing/selftests/bpf/network_helpers.c

··· 73 73 socklen_t len; 74 74 int fd; 75 75 76 - if (family == AF_INET) { 77 - struct sockaddr_in *sin = (void *)&addr; 78 - 79 - sin->sin_family = AF_INET; 80 - sin->sin_port = htons(port); 81 - if (addr_str && 82 - inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 83 - log_err("inet_pton(AF_INET, %s)", addr_str); 84 - return -1; 85 - } 86 - len = sizeof(*sin); 87 - } else { 88 - struct sockaddr_in6 *sin6 = (void *)&addr; 89 - 90 - sin6->sin6_family = AF_INET6; 91 - sin6->sin6_port = htons(port); 92 - if (addr_str && 93 - inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 94 - log_err("inet_pton(AF_INET6, %s)", addr_str); 95 - return -1; 96 - } 97 - len = sizeof(*sin6); 98 - } 76 + if (make_sockaddr(family, addr_str, port, &addr, &len)) 77 + return -1; 99 78 100 79 fd = socket(family, type, 0); 101 80 if (fd < 0) { ··· 172 193 return -1; 173 194 174 195 return 0; 196 + } 197 + 198 + int make_sockaddr(int family, const char *addr_str, __u16 port, 199 + struct sockaddr_storage *addr, socklen_t *len) 200 + { 201 + if (family == AF_INET) { 202 + struct sockaddr_in *sin = (void *)addr; 203 + 204 + sin->sin_family = AF_INET; 205 + sin->sin_port = htons(port); 206 + if (addr_str && 207 + inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 208 + log_err("inet_pton(AF_INET, %s)", addr_str); 209 + return -1; 210 + } 211 + if (len) 212 + *len = sizeof(*sin); 213 + return 0; 214 + } else if (family == AF_INET6) { 215 + struct sockaddr_in6 *sin6 = (void *)addr; 216 + 217 + sin6->sin6_family = AF_INET6; 218 + sin6->sin6_port = htons(port); 219 + if (addr_str && 220 + inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 221 + log_err("inet_pton(AF_INET6, %s)", addr_str); 222 + return -1; 223 + } 224 + if (len) 225 + *len = sizeof(*sin6); 226 + return 0; 227 + } 228 + return -1; 175 229 }

+2

tools/testing/selftests/bpf/network_helpers.h

··· 37 37 int timeout_ms); 38 38 int connect_to_fd(int server_fd, int timeout_ms); 39 39 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); 40 + int make_sockaddr(int family, const char *addr_str, __u16 port, 41 + struct sockaddr_storage *addr, socklen_t *len); 40 42 41 43 #endif

+26 -8

tools/testing/selftests/bpf/prog_tests/resolve_btfids.c

··· 6 6 #include <bpf/libbpf.h> 7 7 #include <linux/btf.h> 8 8 #include <linux/kernel.h> 9 + #define CONFIG_DEBUG_INFO_BTF 9 10 #include <linux/btf_ids.h> 10 11 #include "test_progs.h" 11 12 ··· 28 27 { "func", BTF_KIND_FUNC, -1 }, 29 28 }; 30 29 31 - BTF_ID_LIST(test_list) 30 + BTF_ID_LIST(test_list_local) 31 + BTF_ID_UNUSED 32 + BTF_ID(typedef, S) 33 + BTF_ID(typedef, T) 34 + BTF_ID(typedef, U) 35 + BTF_ID(struct, S) 36 + BTF_ID(union, U) 37 + BTF_ID(func, func) 38 + 39 + extern __u32 test_list_global[]; 40 + BTF_ID_LIST_GLOBAL(test_list_global) 32 41 BTF_ID_UNUSED 33 42 BTF_ID(typedef, S) 34 43 BTF_ID(typedef, T) ··· 104 93 105 94 int test_resolve_btfids(void) 106 95 { 107 - unsigned int i; 96 + __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; 97 + unsigned int i, j; 108 98 int ret = 0; 109 99 110 100 if (resolve_symbols()) 111 101 return -1; 112 102 113 - /* Check BTF_ID_LIST(test_list) IDs */ 114 - for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { 115 - ret = CHECK(test_list[i] != test_symbols[i].id, 116 - "id_check", 117 - "wrong ID for %s (%d != %d)\n", test_symbols[i].name, 118 - test_list[i], test_symbols[i].id); 103 + /* Check BTF_ID_LIST(test_list_local) and 104 + * BTF_ID_LIST_GLOBAL(test_list_global) IDs 105 + */ 106 + for (j = 0; j < ARRAY_SIZE(test_lists); j++) { 107 + test_list = test_lists[j]; 108 + for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { 109 + ret = CHECK(test_list[i] != test_symbols[i].id, 110 + "id_check", 111 + "wrong ID for %s (%d != %d)\n", 112 + test_symbols[i].name, 113 + test_list[i], test_symbols[i].id); 114 + } 119 115 } 120 116 121 117 return ret;

+1282

tools/testing/selftests/bpf/prog_tests/sk_lookup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + // Copyright (c) 2020 Cloudflare 3 + /* 4 + * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP). 5 + * 6 + * Tests exercise: 7 + * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook, 8 + * - redirecting socket lookup to a socket selected by BPF program, 9 + * - failing a socket lookup on BPF program's request, 10 + * - error scenarios for selecting a socket from BPF program, 11 + * - accessing BPF program context, 12 + * - attaching and running multiple BPF programs. 13 + * 14 + * Tests run in a dedicated network namespace. 15 + */ 16 + 17 + #define _GNU_SOURCE 18 + #include <arpa/inet.h> 19 + #include <assert.h> 20 + #include <errno.h> 21 + #include <error.h> 22 + #include <fcntl.h> 23 + #include <sched.h> 24 + #include <stdio.h> 25 + #include <sys/types.h> 26 + #include <sys/stat.h> 27 + #include <unistd.h> 28 + 29 + #include <bpf/libbpf.h> 30 + #include <bpf/bpf.h> 31 + 32 + #include "test_progs.h" 33 + #include "bpf_rlimit.h" 34 + #include "bpf_util.h" 35 + #include "cgroup_helpers.h" 36 + #include "network_helpers.h" 37 + #include "test_sk_lookup.skel.h" 38 + 39 + /* External (address, port) pairs the client sends packets to. */ 40 + #define EXT_IP4 "127.0.0.1" 41 + #define EXT_IP6 "fd00::1" 42 + #define EXT_PORT 7007 43 + 44 + /* Internal (address, port) pairs the server listens/receives at. */ 45 + #define INT_IP4 "127.0.0.2" 46 + #define INT_IP4_V6 "::ffff:127.0.0.2" 47 + #define INT_IP6 "fd00::2" 48 + #define INT_PORT 8008 49 + 50 + #define IO_TIMEOUT_SEC 3 51 + 52 + enum server { 53 + SERVER_A = 0, 54 + SERVER_B = 1, 55 + MAX_SERVERS, 56 + }; 57 + 58 + enum { 59 + PROG1 = 0, 60 + PROG2, 61 + }; 62 + 63 + struct inet_addr { 64 + const char *ip; 65 + unsigned short port; 66 + }; 67 + 68 + struct test { 69 + const char *desc; 70 + struct bpf_program *lookup_prog; 71 + struct bpf_program *reuseport_prog; 72 + struct bpf_map *sock_map; 73 + int sotype; 74 + struct inet_addr connect_to; 75 + struct inet_addr listen_at; 76 + enum server accept_on; 77 + }; 78 + 79 + static __u32 duration; /* for CHECK macro */ 80 + 81 + static bool is_ipv6(const char *ip) 82 + { 83 + return !!strchr(ip, ':'); 84 + } 85 + 86 + static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog) 87 + { 88 + int err, prog_fd; 89 + 90 + prog_fd = bpf_program__fd(reuseport_prog); 91 + if (prog_fd < 0) { 92 + errno = -prog_fd; 93 + return -1; 94 + } 95 + 96 + err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, 97 + &prog_fd, sizeof(prog_fd)); 98 + if (err) 99 + return -1; 100 + 101 + return 0; 102 + } 103 + 104 + static socklen_t inetaddr_len(const struct sockaddr_storage *addr) 105 + { 106 + return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) : 107 + addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0); 108 + } 109 + 110 + static int make_socket(int sotype, const char *ip, int port, 111 + struct sockaddr_storage *addr) 112 + { 113 + struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC }; 114 + int err, family, fd; 115 + 116 + family = is_ipv6(ip) ? AF_INET6 : AF_INET; 117 + err = make_sockaddr(family, ip, port, addr, NULL); 118 + if (CHECK(err, "make_address", "failed\n")) 119 + return -1; 120 + 121 + fd = socket(addr->ss_family, sotype, 0); 122 + if (CHECK(fd < 0, "socket", "failed\n")) { 123 + log_err("failed to make socket"); 124 + return -1; 125 + } 126 + 127 + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 128 + if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) { 129 + log_err("failed to set SNDTIMEO"); 130 + close(fd); 131 + return -1; 132 + } 133 + 134 + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 135 + if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) { 136 + log_err("failed to set RCVTIMEO"); 137 + close(fd); 138 + return -1; 139 + } 140 + 141 + return fd; 142 + } 143 + 144 + static int make_server(int sotype, const char *ip, int port, 145 + struct bpf_program *reuseport_prog) 146 + { 147 + struct sockaddr_storage addr = {0}; 148 + const int one = 1; 149 + int err, fd = -1; 150 + 151 + fd = make_socket(sotype, ip, port, &addr); 152 + if (fd < 0) 153 + return -1; 154 + 155 + /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ 156 + if (sotype == SOCK_DGRAM) { 157 + err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, 158 + sizeof(one)); 159 + if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { 160 + log_err("failed to enable IP_RECVORIGDSTADDR"); 161 + goto fail; 162 + } 163 + } 164 + 165 + if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { 166 + err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, 167 + sizeof(one)); 168 + if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { 169 + log_err("failed to enable IPV6_RECVORIGDSTADDR"); 170 + goto fail; 171 + } 172 + } 173 + 174 + if (sotype == SOCK_STREAM) { 175 + err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, 176 + sizeof(one)); 177 + if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { 178 + log_err("failed to enable SO_REUSEADDR"); 179 + goto fail; 180 + } 181 + } 182 + 183 + if (reuseport_prog) { 184 + err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, 185 + sizeof(one)); 186 + if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { 187 + log_err("failed to enable SO_REUSEPORT"); 188 + goto fail; 189 + } 190 + } 191 + 192 + err = bind(fd, (void *)&addr, inetaddr_len(&addr)); 193 + if (CHECK(err, "bind", "failed\n")) { 194 + log_err("failed to bind listen socket"); 195 + goto fail; 196 + } 197 + 198 + if (sotype == SOCK_STREAM) { 199 + err = listen(fd, SOMAXCONN); 200 + if (CHECK(err, "make_server", "listen")) { 201 + log_err("failed to listen on port %d", port); 202 + goto fail; 203 + } 204 + } 205 + 206 + /* Late attach reuseport prog so we can have one init path */ 207 + if (reuseport_prog) { 208 + err = attach_reuseport(fd, reuseport_prog); 209 + if (CHECK(err, "attach_reuseport", "failed\n")) { 210 + log_err("failed to attach reuseport prog"); 211 + goto fail; 212 + } 213 + } 214 + 215 + return fd; 216 + fail: 217 + close(fd); 218 + return -1; 219 + } 220 + 221 + static int make_client(int sotype, const char *ip, int port) 222 + { 223 + struct sockaddr_storage addr = {0}; 224 + int err, fd; 225 + 226 + fd = make_socket(sotype, ip, port, &addr); 227 + if (fd < 0) 228 + return -1; 229 + 230 + err = connect(fd, (void *)&addr, inetaddr_len(&addr)); 231 + if (CHECK(err, "make_client", "connect")) { 232 + log_err("failed to connect client socket"); 233 + goto fail; 234 + } 235 + 236 + return fd; 237 + fail: 238 + close(fd); 239 + return -1; 240 + } 241 + 242 + static int send_byte(int fd) 243 + { 244 + ssize_t n; 245 + 246 + errno = 0; 247 + n = send(fd, "a", 1, 0); 248 + if (CHECK(n <= 0, "send_byte", "send")) { 249 + log_err("failed/partial send"); 250 + return -1; 251 + } 252 + return 0; 253 + } 254 + 255 + static int recv_byte(int fd) 256 + { 257 + char buf[1]; 258 + ssize_t n; 259 + 260 + n = recv(fd, buf, sizeof(buf), 0); 261 + if (CHECK(n <= 0, "recv_byte", "recv")) { 262 + log_err("failed/partial recv"); 263 + return -1; 264 + } 265 + return 0; 266 + } 267 + 268 + static int tcp_recv_send(int server_fd) 269 + { 270 + char buf[1]; 271 + int ret, fd; 272 + ssize_t n; 273 + 274 + fd = accept(server_fd, NULL, NULL); 275 + if (CHECK(fd < 0, "accept", "failed\n")) { 276 + log_err("failed to accept"); 277 + return -1; 278 + } 279 + 280 + n = recv(fd, buf, sizeof(buf), 0); 281 + if (CHECK(n <= 0, "recv", "failed\n")) { 282 + log_err("failed/partial recv"); 283 + ret = -1; 284 + goto close; 285 + } 286 + 287 + n = send(fd, buf, n, 0); 288 + if (CHECK(n <= 0, "send", "failed\n")) { 289 + log_err("failed/partial send"); 290 + ret = -1; 291 + goto close; 292 + } 293 + 294 + ret = 0; 295 + close: 296 + close(fd); 297 + return ret; 298 + } 299 + 300 + static void v4_to_v6(struct sockaddr_storage *ss) 301 + { 302 + struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss; 303 + struct sockaddr_in v4 = *(struct sockaddr_in *)ss; 304 + 305 + v6->sin6_family = AF_INET6; 306 + v6->sin6_port = v4.sin_port; 307 + v6->sin6_addr.s6_addr[10] = 0xff; 308 + v6->sin6_addr.s6_addr[11] = 0xff; 309 + memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4); 310 + } 311 + 312 + static int udp_recv_send(int server_fd) 313 + { 314 + char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))]; 315 + struct sockaddr_storage _src_addr = { 0 }; 316 + struct sockaddr_storage *src_addr = &_src_addr; 317 + struct sockaddr_storage *dst_addr = NULL; 318 + struct msghdr msg = { 0 }; 319 + struct iovec iov = { 0 }; 320 + struct cmsghdr *cm; 321 + char buf[1]; 322 + int ret, fd; 323 + ssize_t n; 324 + 325 + iov.iov_base = buf; 326 + iov.iov_len = sizeof(buf); 327 + 328 + msg.msg_name = src_addr; 329 + msg.msg_namelen = sizeof(*src_addr); 330 + msg.msg_iov = &iov; 331 + msg.msg_iovlen = 1; 332 + msg.msg_control = cmsg_buf; 333 + msg.msg_controllen = sizeof(cmsg_buf); 334 + 335 + errno = 0; 336 + n = recvmsg(server_fd, &msg, 0); 337 + if (CHECK(n <= 0, "recvmsg", "failed\n")) { 338 + log_err("failed to receive"); 339 + return -1; 340 + } 341 + if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n")) 342 + return -1; 343 + 344 + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { 345 + if ((cm->cmsg_level == SOL_IP && 346 + cm->cmsg_type == IP_ORIGDSTADDR) || 347 + (cm->cmsg_level == SOL_IPV6 && 348 + cm->cmsg_type == IPV6_ORIGDSTADDR)) { 349 + dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm); 350 + break; 351 + } 352 + log_err("warning: ignored cmsg at level %d type %d", 353 + cm->cmsg_level, cm->cmsg_type); 354 + } 355 + if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n")) 356 + return -1; 357 + 358 + /* Server socket bound to IPv4-mapped IPv6 address */ 359 + if (src_addr->ss_family == AF_INET6 && 360 + dst_addr->ss_family == AF_INET) { 361 + v4_to_v6(dst_addr); 362 + } 363 + 364 + /* Reply from original destination address. */ 365 + fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); 366 + if (CHECK(fd < 0, "socket", "failed\n")) { 367 + log_err("failed to create tx socket"); 368 + return -1; 369 + } 370 + 371 + ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); 372 + if (CHECK(ret, "bind", "failed\n")) { 373 + log_err("failed to bind tx socket"); 374 + goto out; 375 + } 376 + 377 + msg.msg_control = NULL; 378 + msg.msg_controllen = 0; 379 + n = sendmsg(fd, &msg, 0); 380 + if (CHECK(n <= 0, "sendmsg", "failed\n")) { 381 + log_err("failed to send echo reply"); 382 + ret = -1; 383 + goto out; 384 + } 385 + 386 + ret = 0; 387 + out: 388 + close(fd); 389 + return ret; 390 + } 391 + 392 + static int tcp_echo_test(int client_fd, int server_fd) 393 + { 394 + int err; 395 + 396 + err = send_byte(client_fd); 397 + if (err) 398 + return -1; 399 + err = tcp_recv_send(server_fd); 400 + if (err) 401 + return -1; 402 + err = recv_byte(client_fd); 403 + if (err) 404 + return -1; 405 + 406 + return 0; 407 + } 408 + 409 + static int udp_echo_test(int client_fd, int server_fd) 410 + { 411 + int err; 412 + 413 + err = send_byte(client_fd); 414 + if (err) 415 + return -1; 416 + err = udp_recv_send(server_fd); 417 + if (err) 418 + return -1; 419 + err = recv_byte(client_fd); 420 + if (err) 421 + return -1; 422 + 423 + return 0; 424 + } 425 + 426 + static struct bpf_link *attach_lookup_prog(struct bpf_program *prog) 427 + { 428 + struct bpf_link *link; 429 + int net_fd; 430 + 431 + net_fd = open("/proc/self/ns/net", O_RDONLY); 432 + if (CHECK(net_fd < 0, "open", "failed\n")) { 433 + log_err("failed to open /proc/self/ns/net"); 434 + return NULL; 435 + } 436 + 437 + link = bpf_program__attach_netns(prog, net_fd); 438 + if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) { 439 + errno = -PTR_ERR(link); 440 + log_err("failed to attach program '%s' to netns", 441 + bpf_program__name(prog)); 442 + link = NULL; 443 + } 444 + 445 + close(net_fd); 446 + return link; 447 + } 448 + 449 + static int update_lookup_map(struct bpf_map *map, int index, int sock_fd) 450 + { 451 + int err, map_fd; 452 + uint64_t value; 453 + 454 + map_fd = bpf_map__fd(map); 455 + if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) { 456 + errno = -map_fd; 457 + log_err("failed to get map FD"); 458 + return -1; 459 + } 460 + 461 + value = (uint64_t)sock_fd; 462 + err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST); 463 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) { 464 + log_err("failed to update redir_map @ %d", index); 465 + return -1; 466 + } 467 + 468 + return 0; 469 + } 470 + 471 + static __u32 link_info_prog_id(struct bpf_link *link) 472 + { 473 + struct bpf_link_info info = {}; 474 + __u32 info_len = sizeof(info); 475 + int link_fd, err; 476 + 477 + link_fd = bpf_link__fd(link); 478 + if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) { 479 + errno = -link_fd; 480 + log_err("bpf_link__fd failed"); 481 + return 0; 482 + } 483 + 484 + err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len); 485 + if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) { 486 + log_err("bpf_obj_get_info_by_fd"); 487 + return 0; 488 + } 489 + if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd", 490 + "unexpected info len %u\n", info_len)) 491 + return 0; 492 + 493 + return info.prog_id; 494 + } 495 + 496 + static void query_lookup_prog(struct test_sk_lookup *skel) 497 + { 498 + struct bpf_link *link[3] = {}; 499 + __u32 attach_flags = 0; 500 + __u32 prog_ids[3] = {}; 501 + __u32 prog_cnt = 3; 502 + __u32 prog_id; 503 + int net_fd; 504 + int err; 505 + 506 + net_fd = open("/proc/self/ns/net", O_RDONLY); 507 + if (CHECK(net_fd < 0, "open", "failed\n")) { 508 + log_err("failed to open /proc/self/ns/net"); 509 + return; 510 + } 511 + 512 + link[0] = attach_lookup_prog(skel->progs.lookup_pass); 513 + if (!link[0]) 514 + goto close; 515 + link[1] = attach_lookup_prog(skel->progs.lookup_pass); 516 + if (!link[1]) 517 + goto detach; 518 + link[2] = attach_lookup_prog(skel->progs.lookup_drop); 519 + if (!link[2]) 520 + goto detach; 521 + 522 + err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */, 523 + &attach_flags, prog_ids, &prog_cnt); 524 + if (CHECK(err, "bpf_prog_query", "failed\n")) { 525 + log_err("failed to query lookup prog"); 526 + goto detach; 527 + } 528 + 529 + errno = 0; 530 + if (CHECK(attach_flags != 0, "bpf_prog_query", 531 + "wrong attach_flags on query: %u", attach_flags)) 532 + goto detach; 533 + if (CHECK(prog_cnt != 3, "bpf_prog_query", 534 + "wrong program count on query: %u", prog_cnt)) 535 + goto detach; 536 + prog_id = link_info_prog_id(link[0]); 537 + CHECK(prog_ids[0] != prog_id, "bpf_prog_query", 538 + "invalid program #0 id on query: %u != %u\n", 539 + prog_ids[0], prog_id); 540 + prog_id = link_info_prog_id(link[1]); 541 + CHECK(prog_ids[1] != prog_id, "bpf_prog_query", 542 + "invalid program #1 id on query: %u != %u\n", 543 + prog_ids[1], prog_id); 544 + prog_id = link_info_prog_id(link[2]); 545 + CHECK(prog_ids[2] != prog_id, "bpf_prog_query", 546 + "invalid program #2 id on query: %u != %u\n", 547 + prog_ids[2], prog_id); 548 + 549 + detach: 550 + if (link[2]) 551 + bpf_link__destroy(link[2]); 552 + if (link[1]) 553 + bpf_link__destroy(link[1]); 554 + if (link[0]) 555 + bpf_link__destroy(link[0]); 556 + close: 557 + close(net_fd); 558 + } 559 + 560 + static void run_lookup_prog(const struct test *t) 561 + { 562 + int client_fd, server_fds[MAX_SERVERS] = { -1 }; 563 + struct bpf_link *lookup_link; 564 + int i, err; 565 + 566 + lookup_link = attach_lookup_prog(t->lookup_prog); 567 + if (!lookup_link) 568 + return; 569 + 570 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 571 + server_fds[i] = make_server(t->sotype, t->listen_at.ip, 572 + t->listen_at.port, 573 + t->reuseport_prog); 574 + if (server_fds[i] < 0) 575 + goto close; 576 + 577 + err = update_lookup_map(t->sock_map, i, server_fds[i]); 578 + if (err) 579 + goto close; 580 + 581 + /* want just one server for non-reuseport test */ 582 + if (!t->reuseport_prog) 583 + break; 584 + } 585 + 586 + client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port); 587 + if (client_fd < 0) 588 + goto close; 589 + 590 + if (t->sotype == SOCK_STREAM) 591 + tcp_echo_test(client_fd, server_fds[t->accept_on]); 592 + else 593 + udp_echo_test(client_fd, server_fds[t->accept_on]); 594 + 595 + close(client_fd); 596 + close: 597 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 598 + if (server_fds[i] != -1) 599 + close(server_fds[i]); 600 + } 601 + bpf_link__destroy(lookup_link); 602 + } 603 + 604 + static void test_redirect_lookup(struct test_sk_lookup *skel) 605 + { 606 + const struct test tests[] = { 607 + { 608 + .desc = "TCP IPv4 redir port", 609 + .lookup_prog = skel->progs.redir_port, 610 + .sock_map = skel->maps.redir_map, 611 + .sotype = SOCK_STREAM, 612 + .connect_to = { EXT_IP4, EXT_PORT }, 613 + .listen_at = { EXT_IP4, INT_PORT }, 614 + }, 615 + { 616 + .desc = "TCP IPv4 redir addr", 617 + .lookup_prog = skel->progs.redir_ip4, 618 + .sock_map = skel->maps.redir_map, 619 + .sotype = SOCK_STREAM, 620 + .connect_to = { EXT_IP4, EXT_PORT }, 621 + .listen_at = { INT_IP4, EXT_PORT }, 622 + }, 623 + { 624 + .desc = "TCP IPv4 redir with reuseport", 625 + .lookup_prog = skel->progs.select_sock_a, 626 + .reuseport_prog = skel->progs.select_sock_b, 627 + .sock_map = skel->maps.redir_map, 628 + .sotype = SOCK_STREAM, 629 + .connect_to = { EXT_IP4, EXT_PORT }, 630 + .listen_at = { INT_IP4, INT_PORT }, 631 + .accept_on = SERVER_B, 632 + }, 633 + { 634 + .desc = "TCP IPv4 redir skip reuseport", 635 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 636 + .reuseport_prog = skel->progs.select_sock_b, 637 + .sock_map = skel->maps.redir_map, 638 + .sotype = SOCK_STREAM, 639 + .connect_to = { EXT_IP4, EXT_PORT }, 640 + .listen_at = { INT_IP4, INT_PORT }, 641 + .accept_on = SERVER_A, 642 + }, 643 + { 644 + .desc = "TCP IPv6 redir port", 645 + .lookup_prog = skel->progs.redir_port, 646 + .sock_map = skel->maps.redir_map, 647 + .sotype = SOCK_STREAM, 648 + .connect_to = { EXT_IP6, EXT_PORT }, 649 + .listen_at = { EXT_IP6, INT_PORT }, 650 + }, 651 + { 652 + .desc = "TCP IPv6 redir addr", 653 + .lookup_prog = skel->progs.redir_ip6, 654 + .sock_map = skel->maps.redir_map, 655 + .sotype = SOCK_STREAM, 656 + .connect_to = { EXT_IP6, EXT_PORT }, 657 + .listen_at = { INT_IP6, EXT_PORT }, 658 + }, 659 + { 660 + .desc = "TCP IPv4->IPv6 redir port", 661 + .lookup_prog = skel->progs.redir_port, 662 + .sock_map = skel->maps.redir_map, 663 + .sotype = SOCK_STREAM, 664 + .connect_to = { EXT_IP4, EXT_PORT }, 665 + .listen_at = { INT_IP4_V6, INT_PORT }, 666 + }, 667 + { 668 + .desc = "TCP IPv6 redir with reuseport", 669 + .lookup_prog = skel->progs.select_sock_a, 670 + .reuseport_prog = skel->progs.select_sock_b, 671 + .sock_map = skel->maps.redir_map, 672 + .sotype = SOCK_STREAM, 673 + .connect_to = { EXT_IP6, EXT_PORT }, 674 + .listen_at = { INT_IP6, INT_PORT }, 675 + .accept_on = SERVER_B, 676 + }, 677 + { 678 + .desc = "TCP IPv6 redir skip reuseport", 679 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 680 + .reuseport_prog = skel->progs.select_sock_b, 681 + .sock_map = skel->maps.redir_map, 682 + .sotype = SOCK_STREAM, 683 + .connect_to = { EXT_IP6, EXT_PORT }, 684 + .listen_at = { INT_IP6, INT_PORT }, 685 + .accept_on = SERVER_A, 686 + }, 687 + { 688 + .desc = "UDP IPv4 redir port", 689 + .lookup_prog = skel->progs.redir_port, 690 + .sock_map = skel->maps.redir_map, 691 + .sotype = SOCK_DGRAM, 692 + .connect_to = { EXT_IP4, EXT_PORT }, 693 + .listen_at = { EXT_IP4, INT_PORT }, 694 + }, 695 + { 696 + .desc = "UDP IPv4 redir addr", 697 + .lookup_prog = skel->progs.redir_ip4, 698 + .sock_map = skel->maps.redir_map, 699 + .sotype = SOCK_DGRAM, 700 + .connect_to = { EXT_IP4, EXT_PORT }, 701 + .listen_at = { INT_IP4, EXT_PORT }, 702 + }, 703 + { 704 + .desc = "UDP IPv4 redir with reuseport", 705 + .lookup_prog = skel->progs.select_sock_a, 706 + .reuseport_prog = skel->progs.select_sock_b, 707 + .sock_map = skel->maps.redir_map, 708 + .sotype = SOCK_DGRAM, 709 + .connect_to = { EXT_IP4, EXT_PORT }, 710 + .listen_at = { INT_IP4, INT_PORT }, 711 + .accept_on = SERVER_B, 712 + }, 713 + { 714 + .desc = "UDP IPv4 redir skip reuseport", 715 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 716 + .reuseport_prog = skel->progs.select_sock_b, 717 + .sock_map = skel->maps.redir_map, 718 + .sotype = SOCK_DGRAM, 719 + .connect_to = { EXT_IP4, EXT_PORT }, 720 + .listen_at = { INT_IP4, INT_PORT }, 721 + .accept_on = SERVER_A, 722 + }, 723 + { 724 + .desc = "UDP IPv6 redir port", 725 + .lookup_prog = skel->progs.redir_port, 726 + .sock_map = skel->maps.redir_map, 727 + .sotype = SOCK_DGRAM, 728 + .connect_to = { EXT_IP6, EXT_PORT }, 729 + .listen_at = { EXT_IP6, INT_PORT }, 730 + }, 731 + { 732 + .desc = "UDP IPv6 redir addr", 733 + .lookup_prog = skel->progs.redir_ip6, 734 + .sock_map = skel->maps.redir_map, 735 + .sotype = SOCK_DGRAM, 736 + .connect_to = { EXT_IP6, EXT_PORT }, 737 + .listen_at = { INT_IP6, EXT_PORT }, 738 + }, 739 + { 740 + .desc = "UDP IPv4->IPv6 redir port", 741 + .lookup_prog = skel->progs.redir_port, 742 + .sock_map = skel->maps.redir_map, 743 + .sotype = SOCK_DGRAM, 744 + .listen_at = { INT_IP4_V6, INT_PORT }, 745 + .connect_to = { EXT_IP4, EXT_PORT }, 746 + }, 747 + { 748 + .desc = "UDP IPv6 redir and reuseport", 749 + .lookup_prog = skel->progs.select_sock_a, 750 + .reuseport_prog = skel->progs.select_sock_b, 751 + .sock_map = skel->maps.redir_map, 752 + .sotype = SOCK_DGRAM, 753 + .connect_to = { EXT_IP6, EXT_PORT }, 754 + .listen_at = { INT_IP6, INT_PORT }, 755 + .accept_on = SERVER_B, 756 + }, 757 + { 758 + .desc = "UDP IPv6 redir skip reuseport", 759 + .lookup_prog = skel->progs.select_sock_a_no_reuseport, 760 + .reuseport_prog = skel->progs.select_sock_b, 761 + .sock_map = skel->maps.redir_map, 762 + .sotype = SOCK_DGRAM, 763 + .connect_to = { EXT_IP6, EXT_PORT }, 764 + .listen_at = { INT_IP6, INT_PORT }, 765 + .accept_on = SERVER_A, 766 + }, 767 + }; 768 + const struct test *t; 769 + 770 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 771 + if (test__start_subtest(t->desc)) 772 + run_lookup_prog(t); 773 + } 774 + } 775 + 776 + static void drop_on_lookup(const struct test *t) 777 + { 778 + struct sockaddr_storage dst = {}; 779 + int client_fd, server_fd, err; 780 + struct bpf_link *lookup_link; 781 + ssize_t n; 782 + 783 + lookup_link = attach_lookup_prog(t->lookup_prog); 784 + if (!lookup_link) 785 + return; 786 + 787 + server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, 788 + t->reuseport_prog); 789 + if (server_fd < 0) 790 + goto detach; 791 + 792 + client_fd = make_socket(t->sotype, t->connect_to.ip, 793 + t->connect_to.port, &dst); 794 + if (client_fd < 0) 795 + goto close_srv; 796 + 797 + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); 798 + if (t->sotype == SOCK_DGRAM) { 799 + err = send_byte(client_fd); 800 + if (err) 801 + goto close_all; 802 + 803 + /* Read out asynchronous error */ 804 + n = recv(client_fd, NULL, 0, 0); 805 + err = n == -1; 806 + } 807 + if (CHECK(!err || errno != ECONNREFUSED, "connect", 808 + "unexpected success or error\n")) 809 + log_err("expected ECONNREFUSED on connect"); 810 + 811 + close_all: 812 + close(client_fd); 813 + close_srv: 814 + close(server_fd); 815 + detach: 816 + bpf_link__destroy(lookup_link); 817 + } 818 + 819 + static void test_drop_on_lookup(struct test_sk_lookup *skel) 820 + { 821 + const struct test tests[] = { 822 + { 823 + .desc = "TCP IPv4 drop on lookup", 824 + .lookup_prog = skel->progs.lookup_drop, 825 + .sotype = SOCK_STREAM, 826 + .connect_to = { EXT_IP4, EXT_PORT }, 827 + .listen_at = { EXT_IP4, EXT_PORT }, 828 + }, 829 + { 830 + .desc = "TCP IPv6 drop on lookup", 831 + .lookup_prog = skel->progs.lookup_drop, 832 + .sotype = SOCK_STREAM, 833 + .connect_to = { EXT_IP6, EXT_PORT }, 834 + .listen_at = { EXT_IP6, EXT_PORT }, 835 + }, 836 + { 837 + .desc = "UDP IPv4 drop on lookup", 838 + .lookup_prog = skel->progs.lookup_drop, 839 + .sotype = SOCK_DGRAM, 840 + .connect_to = { EXT_IP4, EXT_PORT }, 841 + .listen_at = { EXT_IP4, EXT_PORT }, 842 + }, 843 + { 844 + .desc = "UDP IPv6 drop on lookup", 845 + .lookup_prog = skel->progs.lookup_drop, 846 + .sotype = SOCK_DGRAM, 847 + .connect_to = { EXT_IP6, EXT_PORT }, 848 + .listen_at = { EXT_IP6, INT_PORT }, 849 + }, 850 + }; 851 + const struct test *t; 852 + 853 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 854 + if (test__start_subtest(t->desc)) 855 + drop_on_lookup(t); 856 + } 857 + } 858 + 859 + static void drop_on_reuseport(const struct test *t) 860 + { 861 + struct sockaddr_storage dst = { 0 }; 862 + int client, server1, server2, err; 863 + struct bpf_link *lookup_link; 864 + ssize_t n; 865 + 866 + lookup_link = attach_lookup_prog(t->lookup_prog); 867 + if (!lookup_link) 868 + return; 869 + 870 + server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, 871 + t->reuseport_prog); 872 + if (server1 < 0) 873 + goto detach; 874 + 875 + err = update_lookup_map(t->sock_map, SERVER_A, server1); 876 + if (err) 877 + goto detach; 878 + 879 + /* second server on destination address we should never reach */ 880 + server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, 881 + NULL /* reuseport prog */); 882 + if (server2 < 0) 883 + goto close_srv1; 884 + 885 + client = make_socket(t->sotype, t->connect_to.ip, 886 + t->connect_to.port, &dst); 887 + if (client < 0) 888 + goto close_srv2; 889 + 890 + err = connect(client, (void *)&dst, inetaddr_len(&dst)); 891 + if (t->sotype == SOCK_DGRAM) { 892 + err = send_byte(client); 893 + if (err) 894 + goto close_all; 895 + 896 + /* Read out asynchronous error */ 897 + n = recv(client, NULL, 0, 0); 898 + err = n == -1; 899 + } 900 + if (CHECK(!err || errno != ECONNREFUSED, "connect", 901 + "unexpected success or error\n")) 902 + log_err("expected ECONNREFUSED on connect"); 903 + 904 + close_all: 905 + close(client); 906 + close_srv2: 907 + close(server2); 908 + close_srv1: 909 + close(server1); 910 + detach: 911 + bpf_link__destroy(lookup_link); 912 + } 913 + 914 + static void test_drop_on_reuseport(struct test_sk_lookup *skel) 915 + { 916 + const struct test tests[] = { 917 + { 918 + .desc = "TCP IPv4 drop on reuseport", 919 + .lookup_prog = skel->progs.select_sock_a, 920 + .reuseport_prog = skel->progs.reuseport_drop, 921 + .sock_map = skel->maps.redir_map, 922 + .sotype = SOCK_STREAM, 923 + .connect_to = { EXT_IP4, EXT_PORT }, 924 + .listen_at = { INT_IP4, INT_PORT }, 925 + }, 926 + { 927 + .desc = "TCP IPv6 drop on reuseport", 928 + .lookup_prog = skel->progs.select_sock_a, 929 + .reuseport_prog = skel->progs.reuseport_drop, 930 + .sock_map = skel->maps.redir_map, 931 + .sotype = SOCK_STREAM, 932 + .connect_to = { EXT_IP6, EXT_PORT }, 933 + .listen_at = { INT_IP6, INT_PORT }, 934 + }, 935 + { 936 + .desc = "UDP IPv4 drop on reuseport", 937 + .lookup_prog = skel->progs.select_sock_a, 938 + .reuseport_prog = skel->progs.reuseport_drop, 939 + .sock_map = skel->maps.redir_map, 940 + .sotype = SOCK_DGRAM, 941 + .connect_to = { EXT_IP4, EXT_PORT }, 942 + .listen_at = { INT_IP4, INT_PORT }, 943 + }, 944 + { 945 + .desc = "TCP IPv6 drop on reuseport", 946 + .lookup_prog = skel->progs.select_sock_a, 947 + .reuseport_prog = skel->progs.reuseport_drop, 948 + .sock_map = skel->maps.redir_map, 949 + .sotype = SOCK_STREAM, 950 + .connect_to = { EXT_IP6, EXT_PORT }, 951 + .listen_at = { INT_IP6, INT_PORT }, 952 + }, 953 + }; 954 + const struct test *t; 955 + 956 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 957 + if (test__start_subtest(t->desc)) 958 + drop_on_reuseport(t); 959 + } 960 + } 961 + 962 + static void run_sk_assign(struct test_sk_lookup *skel, 963 + struct bpf_program *lookup_prog, 964 + const char *listen_ip, const char *connect_ip) 965 + { 966 + int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 }; 967 + struct bpf_link *lookup_link; 968 + int i, err; 969 + 970 + lookup_link = attach_lookup_prog(lookup_prog); 971 + if (!lookup_link) 972 + return; 973 + 974 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 975 + server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL); 976 + if (server_fds[i] < 0) 977 + goto close_servers; 978 + 979 + err = update_lookup_map(skel->maps.redir_map, i, 980 + server_fds[i]); 981 + if (err) 982 + goto close_servers; 983 + } 984 + 985 + client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT); 986 + if (client_fd < 0) 987 + goto close_servers; 988 + 989 + peer_fd = accept(server_fds[SERVER_B], NULL, NULL); 990 + if (CHECK(peer_fd < 0, "accept", "failed\n")) 991 + goto close_client; 992 + 993 + close(peer_fd); 994 + close_client: 995 + close(client_fd); 996 + close_servers: 997 + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { 998 + if (server_fds[i] != -1) 999 + close(server_fds[i]); 1000 + } 1001 + bpf_link__destroy(lookup_link); 1002 + } 1003 + 1004 + static void run_sk_assign_v4(struct test_sk_lookup *skel, 1005 + struct bpf_program *lookup_prog) 1006 + { 1007 + run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4); 1008 + } 1009 + 1010 + static void run_sk_assign_v6(struct test_sk_lookup *skel, 1011 + struct bpf_program *lookup_prog) 1012 + { 1013 + run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6); 1014 + } 1015 + 1016 + static void run_sk_assign_connected(struct test_sk_lookup *skel, 1017 + int sotype) 1018 + { 1019 + int err, client_fd, connected_fd, server_fd; 1020 + struct bpf_link *lookup_link; 1021 + 1022 + server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL); 1023 + if (server_fd < 0) 1024 + return; 1025 + 1026 + connected_fd = make_client(sotype, EXT_IP4, EXT_PORT); 1027 + if (connected_fd < 0) 1028 + goto out_close_server; 1029 + 1030 + /* Put a connected socket in redirect map */ 1031 + err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd); 1032 + if (err) 1033 + goto out_close_connected; 1034 + 1035 + lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport); 1036 + if (!lookup_link) 1037 + goto out_close_connected; 1038 + 1039 + /* Try to redirect TCP SYN / UDP packet to a connected socket */ 1040 + client_fd = make_client(sotype, EXT_IP4, EXT_PORT); 1041 + if (client_fd < 0) 1042 + goto out_unlink_prog; 1043 + if (sotype == SOCK_DGRAM) { 1044 + send_byte(client_fd); 1045 + recv_byte(server_fd); 1046 + } 1047 + 1048 + close(client_fd); 1049 + out_unlink_prog: 1050 + bpf_link__destroy(lookup_link); 1051 + out_close_connected: 1052 + close(connected_fd); 1053 + out_close_server: 1054 + close(server_fd); 1055 + } 1056 + 1057 + static void test_sk_assign_helper(struct test_sk_lookup *skel) 1058 + { 1059 + if (test__start_subtest("sk_assign returns EEXIST")) 1060 + run_sk_assign_v4(skel, skel->progs.sk_assign_eexist); 1061 + if (test__start_subtest("sk_assign honors F_REPLACE")) 1062 + run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag); 1063 + if (test__start_subtest("sk_assign accepts NULL socket")) 1064 + run_sk_assign_v4(skel, skel->progs.sk_assign_null); 1065 + if (test__start_subtest("access ctx->sk")) 1066 + run_sk_assign_v4(skel, skel->progs.access_ctx_sk); 1067 + if (test__start_subtest("narrow access to ctx v4")) 1068 + run_sk_assign_v4(skel, skel->progs.ctx_narrow_access); 1069 + if (test__start_subtest("narrow access to ctx v6")) 1070 + run_sk_assign_v6(skel, skel->progs.ctx_narrow_access); 1071 + if (test__start_subtest("sk_assign rejects TCP established")) 1072 + run_sk_assign_connected(skel, SOCK_STREAM); 1073 + if (test__start_subtest("sk_assign rejects UDP connected")) 1074 + run_sk_assign_connected(skel, SOCK_DGRAM); 1075 + } 1076 + 1077 + struct test_multi_prog { 1078 + const char *desc; 1079 + struct bpf_program *prog1; 1080 + struct bpf_program *prog2; 1081 + struct bpf_map *redir_map; 1082 + struct bpf_map *run_map; 1083 + int expect_errno; 1084 + struct inet_addr listen_at; 1085 + }; 1086 + 1087 + static void run_multi_prog_lookup(const struct test_multi_prog *t) 1088 + { 1089 + struct sockaddr_storage dst = {}; 1090 + int map_fd, server_fd, client_fd; 1091 + struct bpf_link *link1, *link2; 1092 + int prog_idx, done, err; 1093 + 1094 + map_fd = bpf_map__fd(t->run_map); 1095 + 1096 + done = 0; 1097 + prog_idx = PROG1; 1098 + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); 1099 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) 1100 + return; 1101 + prog_idx = PROG2; 1102 + err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY); 1103 + if (CHECK(err, "bpf_map_update_elem", "failed\n")) 1104 + return; 1105 + 1106 + link1 = attach_lookup_prog(t->prog1); 1107 + if (!link1) 1108 + return; 1109 + link2 = attach_lookup_prog(t->prog2); 1110 + if (!link2) 1111 + goto out_unlink1; 1112 + 1113 + server_fd = make_server(SOCK_STREAM, t->listen_at.ip, 1114 + t->listen_at.port, NULL); 1115 + if (server_fd < 0) 1116 + goto out_unlink2; 1117 + 1118 + err = update_lookup_map(t->redir_map, SERVER_A, server_fd); 1119 + if (err) 1120 + goto out_close_server; 1121 + 1122 + client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst); 1123 + if (client_fd < 0) 1124 + goto out_close_server; 1125 + 1126 + err = connect(client_fd, (void *)&dst, inetaddr_len(&dst)); 1127 + if (CHECK(err && !t->expect_errno, "connect", 1128 + "unexpected error %d\n", errno)) 1129 + goto out_close_client; 1130 + if (CHECK(err && t->expect_errno && errno != t->expect_errno, 1131 + "connect", "unexpected error %d\n", errno)) 1132 + goto out_close_client; 1133 + 1134 + done = 0; 1135 + prog_idx = PROG1; 1136 + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); 1137 + CHECK(err, "bpf_map_lookup_elem", "failed\n"); 1138 + CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n"); 1139 + 1140 + done = 0; 1141 + prog_idx = PROG2; 1142 + err = bpf_map_lookup_elem(map_fd, &prog_idx, &done); 1143 + CHECK(err, "bpf_map_lookup_elem", "failed\n"); 1144 + CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n"); 1145 + 1146 + out_close_client: 1147 + close(client_fd); 1148 + out_close_server: 1149 + close(server_fd); 1150 + out_unlink2: 1151 + bpf_link__destroy(link2); 1152 + out_unlink1: 1153 + bpf_link__destroy(link1); 1154 + } 1155 + 1156 + static void test_multi_prog_lookup(struct test_sk_lookup *skel) 1157 + { 1158 + struct test_multi_prog tests[] = { 1159 + { 1160 + .desc = "multi prog - pass, pass", 1161 + .prog1 = skel->progs.multi_prog_pass1, 1162 + .prog2 = skel->progs.multi_prog_pass2, 1163 + .listen_at = { EXT_IP4, EXT_PORT }, 1164 + }, 1165 + { 1166 + .desc = "multi prog - drop, drop", 1167 + .prog1 = skel->progs.multi_prog_drop1, 1168 + .prog2 = skel->progs.multi_prog_drop2, 1169 + .listen_at = { EXT_IP4, EXT_PORT }, 1170 + .expect_errno = ECONNREFUSED, 1171 + }, 1172 + { 1173 + .desc = "multi prog - pass, drop", 1174 + .prog1 = skel->progs.multi_prog_pass1, 1175 + .prog2 = skel->progs.multi_prog_drop2, 1176 + .listen_at = { EXT_IP4, EXT_PORT }, 1177 + .expect_errno = ECONNREFUSED, 1178 + }, 1179 + { 1180 + .desc = "multi prog - drop, pass", 1181 + .prog1 = skel->progs.multi_prog_drop1, 1182 + .prog2 = skel->progs.multi_prog_pass2, 1183 + .listen_at = { EXT_IP4, EXT_PORT }, 1184 + .expect_errno = ECONNREFUSED, 1185 + }, 1186 + { 1187 + .desc = "multi prog - pass, redir", 1188 + .prog1 = skel->progs.multi_prog_pass1, 1189 + .prog2 = skel->progs.multi_prog_redir2, 1190 + .listen_at = { INT_IP4, INT_PORT }, 1191 + }, 1192 + { 1193 + .desc = "multi prog - redir, pass", 1194 + .prog1 = skel->progs.multi_prog_redir1, 1195 + .prog2 = skel->progs.multi_prog_pass2, 1196 + .listen_at = { INT_IP4, INT_PORT }, 1197 + }, 1198 + { 1199 + .desc = "multi prog - drop, redir", 1200 + .prog1 = skel->progs.multi_prog_drop1, 1201 + .prog2 = skel->progs.multi_prog_redir2, 1202 + .listen_at = { INT_IP4, INT_PORT }, 1203 + }, 1204 + { 1205 + .desc = "multi prog - redir, drop", 1206 + .prog1 = skel->progs.multi_prog_redir1, 1207 + .prog2 = skel->progs.multi_prog_drop2, 1208 + .listen_at = { INT_IP4, INT_PORT }, 1209 + }, 1210 + { 1211 + .desc = "multi prog - redir, redir", 1212 + .prog1 = skel->progs.multi_prog_redir1, 1213 + .prog2 = skel->progs.multi_prog_redir2, 1214 + .listen_at = { INT_IP4, INT_PORT }, 1215 + }, 1216 + }; 1217 + struct test_multi_prog *t; 1218 + 1219 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1220 + t->redir_map = skel->maps.redir_map; 1221 + t->run_map = skel->maps.run_map; 1222 + if (test__start_subtest(t->desc)) 1223 + run_multi_prog_lookup(t); 1224 + } 1225 + } 1226 + 1227 + static void run_tests(struct test_sk_lookup *skel) 1228 + { 1229 + if (test__start_subtest("query lookup prog")) 1230 + query_lookup_prog(skel); 1231 + test_redirect_lookup(skel); 1232 + test_drop_on_lookup(skel); 1233 + test_drop_on_reuseport(skel); 1234 + test_sk_assign_helper(skel); 1235 + test_multi_prog_lookup(skel); 1236 + } 1237 + 1238 + static int switch_netns(void) 1239 + { 1240 + static const char * const setup_script[] = { 1241 + "ip -6 addr add dev lo " EXT_IP6 "/128 nodad", 1242 + "ip -6 addr add dev lo " INT_IP6 "/128 nodad", 1243 + "ip link set dev lo up", 1244 + NULL, 1245 + }; 1246 + const char * const *cmd; 1247 + int err; 1248 + 1249 + err = unshare(CLONE_NEWNET); 1250 + if (CHECK(err, "unshare", "failed\n")) { 1251 + log_err("unshare(CLONE_NEWNET)"); 1252 + return -1; 1253 + } 1254 + 1255 + for (cmd = setup_script; *cmd; cmd++) { 1256 + err = system(*cmd); 1257 + if (CHECK(err, "system", "failed\n")) { 1258 + log_err("system(%s)", *cmd); 1259 + return -1; 1260 + } 1261 + } 1262 + 1263 + return 0; 1264 + } 1265 + 1266 + void test_sk_lookup(void) 1267 + { 1268 + struct test_sk_lookup *skel; 1269 + int err; 1270 + 1271 + err = switch_netns(); 1272 + if (err) 1273 + return; 1274 + 1275 + skel = test_sk_lookup__open_and_load(); 1276 + if (CHECK(!skel, "skel open_and_load", "failed\n")) 1277 + return; 1278 + 1279 + run_tests(skel); 1280 + 1281 + test_sk_lookup__destroy(skel); 1282 + }

+1 -2

tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c

··· 193 193 if (CHECK_FAIL(server_fd < 0)) 194 194 goto close_bpf_object; 195 195 196 + pthread_mutex_lock(&server_started_mtx); 196 197 if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, 197 198 (void *)&server_fd))) 198 199 goto close_server_fd; 199 - 200 - pthread_mutex_lock(&server_started_mtx); 201 200 pthread_cond_wait(&server_started, &server_started_mtx); 202 201 pthread_mutex_unlock(&server_started_mtx); 203 202

+70

tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <uapi/linux/bpf.h> 3 + #include <linux/if_link.h> 4 + #include <test_progs.h> 5 + 6 + #include "test_xdp_with_cpumap_helpers.skel.h" 7 + 8 + #define IFINDEX_LO 1 9 + 10 + void test_xdp_with_cpumap_helpers(void) 11 + { 12 + struct test_xdp_with_cpumap_helpers *skel; 13 + struct bpf_prog_info info = {}; 14 + struct bpf_cpumap_val val = { 15 + .qsize = 192, 16 + }; 17 + __u32 duration = 0, idx = 0; 18 + __u32 len = sizeof(info); 19 + int err, prog_fd, map_fd; 20 + 21 + skel = test_xdp_with_cpumap_helpers__open_and_load(); 22 + if (CHECK_FAIL(!skel)) { 23 + perror("test_xdp_with_cpumap_helpers__open_and_load"); 24 + return; 25 + } 26 + 27 + /* can not attach program with cpumaps that allow programs 28 + * as xdp generic 29 + */ 30 + prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); 31 + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); 32 + CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", 33 + "should have failed\n"); 34 + 35 + prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); 36 + map_fd = bpf_map__fd(skel->maps.cpu_map); 37 + err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); 38 + if (CHECK_FAIL(err)) 39 + goto out_close; 40 + 41 + val.bpf_prog.fd = prog_fd; 42 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 43 + CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", 44 + err, errno); 45 + 46 + err = bpf_map_lookup_elem(map_fd, &idx, &val); 47 + CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); 48 + CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", 49 + "expected %u read %u\n", info.id, val.bpf_prog.id); 50 + 51 + /* can not attach BPF_XDP_CPUMAP program to a device */ 52 + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); 53 + CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", 54 + "should have failed\n"); 55 + 56 + val.qsize = 192; 57 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); 58 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 59 + CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", 60 + "should have failed\n"); 61 + 62 + out_close: 63 + test_xdp_with_cpumap_helpers__destroy(skel); 64 + } 65 + 66 + void test_xdp_cpumap_attach(void) 67 + { 68 + if (test__start_subtest("cpumap_with_progs")) 69 + test_xdp_with_cpumap_helpers(); 70 + }

+3 -3

tools/testing/selftests/bpf/progs/bpf_iter_netlink.c

··· 36 36 if (!nlk->groups) { 37 37 group = 0; 38 38 } else { 39 - /* FIXME: temporary use bpf_probe_read here, needs 39 + /* FIXME: temporary use bpf_probe_read_kernel here, needs 40 40 * verifier support to do direct access. 41 41 */ 42 - bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); 42 + bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]); 43 43 } 44 44 BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ", 45 45 nlk->portid, (u32)group, ··· 56 56 * with current verifier. 57 57 */ 58 58 inode = SOCK_INODE(sk); 59 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 59 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 60 60 } 61 61 BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino); 62 62

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c

··· 57 57 return 0; 58 58 59 59 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 60 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 60 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 61 61 return ino; 62 62 } 63 63

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

··· 57 57 return 0; 58 58 59 59 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 60 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 60 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 61 61 return ino; 62 62 } 63 63

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_udp4.c

··· 18 18 return 0; 19 19 20 20 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 21 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 21 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 22 22 return ino; 23 23 } 24 24

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

··· 25 25 return 0; 26 26 27 27 inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; 28 - bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); 28 + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino); 29 29 return ino; 30 30 } 31 31

+641

tools/testing/selftests/bpf/progs/test_sk_lookup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + // Copyright (c) 2020 Cloudflare 3 + 4 + #include <errno.h> 5 + #include <stdbool.h> 6 + #include <stddef.h> 7 + #include <linux/bpf.h> 8 + #include <linux/in.h> 9 + #include <sys/socket.h> 10 + 11 + #include <bpf/bpf_endian.h> 12 + #include <bpf/bpf_helpers.h> 13 + 14 + #define IP4(a, b, c, d) \ 15 + bpf_htonl((((__u32)(a) & 0xffU) << 24) | \ 16 + (((__u32)(b) & 0xffU) << 16) | \ 17 + (((__u32)(c) & 0xffU) << 8) | \ 18 + (((__u32)(d) & 0xffU) << 0)) 19 + #define IP6(aaaa, bbbb, cccc, dddd) \ 20 + { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } 21 + 22 + #define MAX_SOCKS 32 23 + 24 + struct { 25 + __uint(type, BPF_MAP_TYPE_SOCKMAP); 26 + __uint(max_entries, MAX_SOCKS); 27 + __type(key, __u32); 28 + __type(value, __u64); 29 + } redir_map SEC(".maps"); 30 + 31 + struct { 32 + __uint(type, BPF_MAP_TYPE_ARRAY); 33 + __uint(max_entries, 2); 34 + __type(key, int); 35 + __type(value, int); 36 + } run_map SEC(".maps"); 37 + 38 + enum { 39 + PROG1 = 0, 40 + PROG2, 41 + }; 42 + 43 + enum { 44 + SERVER_A = 0, 45 + SERVER_B, 46 + }; 47 + 48 + /* Addressable key/value constants for convenience */ 49 + static const int KEY_PROG1 = PROG1; 50 + static const int KEY_PROG2 = PROG2; 51 + static const int PROG_DONE = 1; 52 + 53 + static const __u32 KEY_SERVER_A = SERVER_A; 54 + static const __u32 KEY_SERVER_B = SERVER_B; 55 + 56 + static const __u16 DST_PORT = 7007; /* Host byte order */ 57 + static const __u32 DST_IP4 = IP4(127, 0, 0, 1); 58 + static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); 59 + 60 + SEC("sk_lookup/lookup_pass") 61 + int lookup_pass(struct bpf_sk_lookup *ctx) 62 + { 63 + return SK_PASS; 64 + } 65 + 66 + SEC("sk_lookup/lookup_drop") 67 + int lookup_drop(struct bpf_sk_lookup *ctx) 68 + { 69 + return SK_DROP; 70 + } 71 + 72 + SEC("sk_reuseport/reuse_pass") 73 + int reuseport_pass(struct sk_reuseport_md *ctx) 74 + { 75 + return SK_PASS; 76 + } 77 + 78 + SEC("sk_reuseport/reuse_drop") 79 + int reuseport_drop(struct sk_reuseport_md *ctx) 80 + { 81 + return SK_DROP; 82 + } 83 + 84 + /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */ 85 + SEC("sk_lookup/redir_port") 86 + int redir_port(struct bpf_sk_lookup *ctx) 87 + { 88 + struct bpf_sock *sk; 89 + int err; 90 + 91 + if (ctx->local_port != DST_PORT) 92 + return SK_PASS; 93 + 94 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 95 + if (!sk) 96 + return SK_PASS; 97 + 98 + err = bpf_sk_assign(ctx, sk, 0); 99 + bpf_sk_release(sk); 100 + return err ? SK_DROP : SK_PASS; 101 + } 102 + 103 + /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */ 104 + SEC("sk_lookup/redir_ip4") 105 + int redir_ip4(struct bpf_sk_lookup *ctx) 106 + { 107 + struct bpf_sock *sk; 108 + int err; 109 + 110 + if (ctx->family != AF_INET) 111 + return SK_PASS; 112 + if (ctx->local_port != DST_PORT) 113 + return SK_PASS; 114 + if (ctx->local_ip4 != DST_IP4) 115 + return SK_PASS; 116 + 117 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 118 + if (!sk) 119 + return SK_PASS; 120 + 121 + err = bpf_sk_assign(ctx, sk, 0); 122 + bpf_sk_release(sk); 123 + return err ? SK_DROP : SK_PASS; 124 + } 125 + 126 + /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */ 127 + SEC("sk_lookup/redir_ip6") 128 + int redir_ip6(struct bpf_sk_lookup *ctx) 129 + { 130 + struct bpf_sock *sk; 131 + int err; 132 + 133 + if (ctx->family != AF_INET6) 134 + return SK_PASS; 135 + if (ctx->local_port != DST_PORT) 136 + return SK_PASS; 137 + if (ctx->local_ip6[0] != DST_IP6[0] || 138 + ctx->local_ip6[1] != DST_IP6[1] || 139 + ctx->local_ip6[2] != DST_IP6[2] || 140 + ctx->local_ip6[3] != DST_IP6[3]) 141 + return SK_PASS; 142 + 143 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 144 + if (!sk) 145 + return SK_PASS; 146 + 147 + err = bpf_sk_assign(ctx, sk, 0); 148 + bpf_sk_release(sk); 149 + return err ? SK_DROP : SK_PASS; 150 + } 151 + 152 + SEC("sk_lookup/select_sock_a") 153 + int select_sock_a(struct bpf_sk_lookup *ctx) 154 + { 155 + struct bpf_sock *sk; 156 + int err; 157 + 158 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 159 + if (!sk) 160 + return SK_PASS; 161 + 162 + err = bpf_sk_assign(ctx, sk, 0); 163 + bpf_sk_release(sk); 164 + return err ? SK_DROP : SK_PASS; 165 + } 166 + 167 + SEC("sk_lookup/select_sock_a_no_reuseport") 168 + int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx) 169 + { 170 + struct bpf_sock *sk; 171 + int err; 172 + 173 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 174 + if (!sk) 175 + return SK_DROP; 176 + 177 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT); 178 + bpf_sk_release(sk); 179 + return err ? SK_DROP : SK_PASS; 180 + } 181 + 182 + SEC("sk_reuseport/select_sock_b") 183 + int select_sock_b(struct sk_reuseport_md *ctx) 184 + { 185 + __u32 key = KEY_SERVER_B; 186 + int err; 187 + 188 + err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0); 189 + return err ? SK_DROP : SK_PASS; 190 + } 191 + 192 + /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */ 193 + SEC("sk_lookup/sk_assign_eexist") 194 + int sk_assign_eexist(struct bpf_sk_lookup *ctx) 195 + { 196 + struct bpf_sock *sk; 197 + int err, ret; 198 + 199 + ret = SK_DROP; 200 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 201 + if (!sk) 202 + goto out; 203 + err = bpf_sk_assign(ctx, sk, 0); 204 + if (err) 205 + goto out; 206 + bpf_sk_release(sk); 207 + 208 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 209 + if (!sk) 210 + goto out; 211 + err = bpf_sk_assign(ctx, sk, 0); 212 + if (err != -EEXIST) { 213 + bpf_printk("sk_assign returned %d, expected %d\n", 214 + err, -EEXIST); 215 + goto out; 216 + } 217 + 218 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 219 + out: 220 + if (sk) 221 + bpf_sk_release(sk); 222 + return ret; 223 + } 224 + 225 + /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */ 226 + SEC("sk_lookup/sk_assign_replace_flag") 227 + int sk_assign_replace_flag(struct bpf_sk_lookup *ctx) 228 + { 229 + struct bpf_sock *sk; 230 + int err, ret; 231 + 232 + ret = SK_DROP; 233 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 234 + if (!sk) 235 + goto out; 236 + err = bpf_sk_assign(ctx, sk, 0); 237 + if (err) 238 + goto out; 239 + bpf_sk_release(sk); 240 + 241 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 242 + if (!sk) 243 + goto out; 244 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 245 + if (err) { 246 + bpf_printk("sk_assign returned %d, expected 0\n", err); 247 + goto out; 248 + } 249 + 250 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 251 + out: 252 + if (sk) 253 + bpf_sk_release(sk); 254 + return ret; 255 + } 256 + 257 + /* Check that bpf_sk_assign(sk=NULL) is accepted. */ 258 + SEC("sk_lookup/sk_assign_null") 259 + int sk_assign_null(struct bpf_sk_lookup *ctx) 260 + { 261 + struct bpf_sock *sk = NULL; 262 + int err, ret; 263 + 264 + ret = SK_DROP; 265 + 266 + err = bpf_sk_assign(ctx, NULL, 0); 267 + if (err) { 268 + bpf_printk("sk_assign returned %d, expected 0\n", err); 269 + goto out; 270 + } 271 + 272 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 273 + if (!sk) 274 + goto out; 275 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 276 + if (err) { 277 + bpf_printk("sk_assign returned %d, expected 0\n", err); 278 + goto out; 279 + } 280 + 281 + if (ctx->sk != sk) 282 + goto out; 283 + err = bpf_sk_assign(ctx, NULL, 0); 284 + if (err != -EEXIST) 285 + goto out; 286 + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); 287 + if (err) 288 + goto out; 289 + err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE); 290 + if (err) 291 + goto out; 292 + 293 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 294 + out: 295 + if (sk) 296 + bpf_sk_release(sk); 297 + return ret; 298 + } 299 + 300 + /* Check that selected sk is accessible through context. */ 301 + SEC("sk_lookup/access_ctx_sk") 302 + int access_ctx_sk(struct bpf_sk_lookup *ctx) 303 + { 304 + struct bpf_sock *sk1 = NULL, *sk2 = NULL; 305 + int err, ret; 306 + 307 + ret = SK_DROP; 308 + 309 + /* Try accessing unassigned (NULL) ctx->sk field */ 310 + if (ctx->sk && ctx->sk->family != AF_INET) 311 + goto out; 312 + 313 + /* Assign a value to ctx->sk */ 314 + sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 315 + if (!sk1) 316 + goto out; 317 + err = bpf_sk_assign(ctx, sk1, 0); 318 + if (err) 319 + goto out; 320 + if (ctx->sk != sk1) 321 + goto out; 322 + 323 + /* Access ctx->sk fields */ 324 + if (ctx->sk->family != AF_INET || 325 + ctx->sk->type != SOCK_STREAM || 326 + ctx->sk->state != BPF_TCP_LISTEN) 327 + goto out; 328 + 329 + /* Reset selection */ 330 + err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE); 331 + if (err) 332 + goto out; 333 + if (ctx->sk) 334 + goto out; 335 + 336 + /* Assign another socket */ 337 + sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 338 + if (!sk2) 339 + goto out; 340 + err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE); 341 + if (err) 342 + goto out; 343 + if (ctx->sk != sk2) 344 + goto out; 345 + 346 + /* Access reassigned ctx->sk fields */ 347 + if (ctx->sk->family != AF_INET || 348 + ctx->sk->type != SOCK_STREAM || 349 + ctx->sk->state != BPF_TCP_LISTEN) 350 + goto out; 351 + 352 + ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */ 353 + out: 354 + if (sk1) 355 + bpf_sk_release(sk1); 356 + if (sk2) 357 + bpf_sk_release(sk2); 358 + return ret; 359 + } 360 + 361 + /* Check narrow loads from ctx fields that support them. 362 + * 363 + * Narrow loads of size >= target field size from a non-zero offset 364 + * are not covered because they give bogus results, that is the 365 + * verifier ignores the offset. 366 + */ 367 + SEC("sk_lookup/ctx_narrow_access") 368 + int ctx_narrow_access(struct bpf_sk_lookup *ctx) 369 + { 370 + struct bpf_sock *sk; 371 + int err, family; 372 + __u16 *half; 373 + __u8 *byte; 374 + bool v4; 375 + 376 + v4 = (ctx->family == AF_INET); 377 + 378 + /* Narrow loads from family field */ 379 + byte = (__u8 *)&ctx->family; 380 + half = (__u16 *)&ctx->family; 381 + if (byte[0] != (v4 ? AF_INET : AF_INET6) || 382 + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) 383 + return SK_DROP; 384 + if (half[0] != (v4 ? AF_INET : AF_INET6)) 385 + return SK_DROP; 386 + 387 + byte = (__u8 *)&ctx->protocol; 388 + if (byte[0] != IPPROTO_TCP || 389 + byte[1] != 0 || byte[2] != 0 || byte[3] != 0) 390 + return SK_DROP; 391 + half = (__u16 *)&ctx->protocol; 392 + if (half[0] != IPPROTO_TCP) 393 + return SK_DROP; 394 + 395 + /* Narrow loads from remote_port field. Expect non-0 value. */ 396 + byte = (__u8 *)&ctx->remote_port; 397 + if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) 398 + return SK_DROP; 399 + half = (__u16 *)&ctx->remote_port; 400 + if (half[0] == 0) 401 + return SK_DROP; 402 + 403 + /* Narrow loads from local_port field. Expect DST_PORT. */ 404 + byte = (__u8 *)&ctx->local_port; 405 + if (byte[0] != ((DST_PORT >> 0) & 0xff) || 406 + byte[1] != ((DST_PORT >> 8) & 0xff) || 407 + byte[2] != 0 || byte[3] != 0) 408 + return SK_DROP; 409 + half = (__u16 *)&ctx->local_port; 410 + if (half[0] != DST_PORT) 411 + return SK_DROP; 412 + 413 + /* Narrow loads from IPv4 fields */ 414 + if (v4) { 415 + /* Expect non-0.0.0.0 in remote_ip4 */ 416 + byte = (__u8 *)&ctx->remote_ip4; 417 + if (byte[0] == 0 && byte[1] == 0 && 418 + byte[2] == 0 && byte[3] == 0) 419 + return SK_DROP; 420 + half = (__u16 *)&ctx->remote_ip4; 421 + if (half[0] == 0 && half[1] == 0) 422 + return SK_DROP; 423 + 424 + /* Expect DST_IP4 in local_ip4 */ 425 + byte = (__u8 *)&ctx->local_ip4; 426 + if (byte[0] != ((DST_IP4 >> 0) & 0xff) || 427 + byte[1] != ((DST_IP4 >> 8) & 0xff) || 428 + byte[2] != ((DST_IP4 >> 16) & 0xff) || 429 + byte[3] != ((DST_IP4 >> 24) & 0xff)) 430 + return SK_DROP; 431 + half = (__u16 *)&ctx->local_ip4; 432 + if (half[0] != ((DST_IP4 >> 0) & 0xffff) || 433 + half[1] != ((DST_IP4 >> 16) & 0xffff)) 434 + return SK_DROP; 435 + } else { 436 + /* Expect 0.0.0.0 IPs when family != AF_INET */ 437 + byte = (__u8 *)&ctx->remote_ip4; 438 + if (byte[0] != 0 || byte[1] != 0 && 439 + byte[2] != 0 || byte[3] != 0) 440 + return SK_DROP; 441 + half = (__u16 *)&ctx->remote_ip4; 442 + if (half[0] != 0 || half[1] != 0) 443 + return SK_DROP; 444 + 445 + byte = (__u8 *)&ctx->local_ip4; 446 + if (byte[0] != 0 || byte[1] != 0 && 447 + byte[2] != 0 || byte[3] != 0) 448 + return SK_DROP; 449 + half = (__u16 *)&ctx->local_ip4; 450 + if (half[0] != 0 || half[1] != 0) 451 + return SK_DROP; 452 + } 453 + 454 + /* Narrow loads from IPv6 fields */ 455 + if (!v4) { 456 + /* Expenct non-:: IP in remote_ip6 */ 457 + byte = (__u8 *)&ctx->remote_ip6; 458 + if (byte[0] == 0 && byte[1] == 0 && 459 + byte[2] == 0 && byte[3] == 0 && 460 + byte[4] == 0 && byte[5] == 0 && 461 + byte[6] == 0 && byte[7] == 0 && 462 + byte[8] == 0 && byte[9] == 0 && 463 + byte[10] == 0 && byte[11] == 0 && 464 + byte[12] == 0 && byte[13] == 0 && 465 + byte[14] == 0 && byte[15] == 0) 466 + return SK_DROP; 467 + half = (__u16 *)&ctx->remote_ip6; 468 + if (half[0] == 0 && half[1] == 0 && 469 + half[2] == 0 && half[3] == 0 && 470 + half[4] == 0 && half[5] == 0 && 471 + half[6] == 0 && half[7] == 0) 472 + return SK_DROP; 473 + 474 + /* Expect DST_IP6 in local_ip6 */ 475 + byte = (__u8 *)&ctx->local_ip6; 476 + if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || 477 + byte[1] != ((DST_IP6[0] >> 8) & 0xff) || 478 + byte[2] != ((DST_IP6[0] >> 16) & 0xff) || 479 + byte[3] != ((DST_IP6[0] >> 24) & 0xff) || 480 + byte[4] != ((DST_IP6[1] >> 0) & 0xff) || 481 + byte[5] != ((DST_IP6[1] >> 8) & 0xff) || 482 + byte[6] != ((DST_IP6[1] >> 16) & 0xff) || 483 + byte[7] != ((DST_IP6[1] >> 24) & 0xff) || 484 + byte[8] != ((DST_IP6[2] >> 0) & 0xff) || 485 + byte[9] != ((DST_IP6[2] >> 8) & 0xff) || 486 + byte[10] != ((DST_IP6[2] >> 16) & 0xff) || 487 + byte[11] != ((DST_IP6[2] >> 24) & 0xff) || 488 + byte[12] != ((DST_IP6[3] >> 0) & 0xff) || 489 + byte[13] != ((DST_IP6[3] >> 8) & 0xff) || 490 + byte[14] != ((DST_IP6[3] >> 16) & 0xff) || 491 + byte[15] != ((DST_IP6[3] >> 24) & 0xff)) 492 + return SK_DROP; 493 + half = (__u16 *)&ctx->local_ip6; 494 + if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || 495 + half[1] != ((DST_IP6[0] >> 16) & 0xffff) || 496 + half[2] != ((DST_IP6[1] >> 0) & 0xffff) || 497 + half[3] != ((DST_IP6[1] >> 16) & 0xffff) || 498 + half[4] != ((DST_IP6[2] >> 0) & 0xffff) || 499 + half[5] != ((DST_IP6[2] >> 16) & 0xffff) || 500 + half[6] != ((DST_IP6[3] >> 0) & 0xffff) || 501 + half[7] != ((DST_IP6[3] >> 16) & 0xffff)) 502 + return SK_DROP; 503 + } else { 504 + /* Expect :: IPs when family != AF_INET6 */ 505 + byte = (__u8 *)&ctx->remote_ip6; 506 + if (byte[0] != 0 || byte[1] != 0 || 507 + byte[2] != 0 || byte[3] != 0 || 508 + byte[4] != 0 || byte[5] != 0 || 509 + byte[6] != 0 || byte[7] != 0 || 510 + byte[8] != 0 || byte[9] != 0 || 511 + byte[10] != 0 || byte[11] != 0 || 512 + byte[12] != 0 || byte[13] != 0 || 513 + byte[14] != 0 || byte[15] != 0) 514 + return SK_DROP; 515 + half = (__u16 *)&ctx->remote_ip6; 516 + if (half[0] != 0 || half[1] != 0 || 517 + half[2] != 0 || half[3] != 0 || 518 + half[4] != 0 || half[5] != 0 || 519 + half[6] != 0 || half[7] != 0) 520 + return SK_DROP; 521 + 522 + byte = (__u8 *)&ctx->local_ip6; 523 + if (byte[0] != 0 || byte[1] != 0 || 524 + byte[2] != 0 || byte[3] != 0 || 525 + byte[4] != 0 || byte[5] != 0 || 526 + byte[6] != 0 || byte[7] != 0 || 527 + byte[8] != 0 || byte[9] != 0 || 528 + byte[10] != 0 || byte[11] != 0 || 529 + byte[12] != 0 || byte[13] != 0 || 530 + byte[14] != 0 || byte[15] != 0) 531 + return SK_DROP; 532 + half = (__u16 *)&ctx->local_ip6; 533 + if (half[0] != 0 || half[1] != 0 || 534 + half[2] != 0 || half[3] != 0 || 535 + half[4] != 0 || half[5] != 0 || 536 + half[6] != 0 || half[7] != 0) 537 + return SK_DROP; 538 + } 539 + 540 + /* Success, redirect to KEY_SERVER_B */ 541 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B); 542 + if (sk) { 543 + bpf_sk_assign(ctx, sk, 0); 544 + bpf_sk_release(sk); 545 + } 546 + return SK_PASS; 547 + } 548 + 549 + /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */ 550 + SEC("sk_lookup/sk_assign_esocknosupport") 551 + int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx) 552 + { 553 + struct bpf_sock *sk; 554 + int err, ret; 555 + 556 + ret = SK_DROP; 557 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 558 + if (!sk) 559 + goto out; 560 + 561 + err = bpf_sk_assign(ctx, sk, 0); 562 + if (err != -ESOCKTNOSUPPORT) { 563 + bpf_printk("sk_assign returned %d, expected %d\n", 564 + err, -ESOCKTNOSUPPORT); 565 + goto out; 566 + } 567 + 568 + ret = SK_PASS; /* Success, pass to regular lookup */ 569 + out: 570 + if (sk) 571 + bpf_sk_release(sk); 572 + return ret; 573 + } 574 + 575 + SEC("sk_lookup/multi_prog_pass1") 576 + int multi_prog_pass1(struct bpf_sk_lookup *ctx) 577 + { 578 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 579 + return SK_PASS; 580 + } 581 + 582 + SEC("sk_lookup/multi_prog_pass2") 583 + int multi_prog_pass2(struct bpf_sk_lookup *ctx) 584 + { 585 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 586 + return SK_PASS; 587 + } 588 + 589 + SEC("sk_lookup/multi_prog_drop1") 590 + int multi_prog_drop1(struct bpf_sk_lookup *ctx) 591 + { 592 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 593 + return SK_DROP; 594 + } 595 + 596 + SEC("sk_lookup/multi_prog_drop2") 597 + int multi_prog_drop2(struct bpf_sk_lookup *ctx) 598 + { 599 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 600 + return SK_DROP; 601 + } 602 + 603 + static __always_inline int select_server_a(struct bpf_sk_lookup *ctx) 604 + { 605 + struct bpf_sock *sk; 606 + int err; 607 + 608 + sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A); 609 + if (!sk) 610 + return SK_DROP; 611 + 612 + err = bpf_sk_assign(ctx, sk, 0); 613 + bpf_sk_release(sk); 614 + if (err) 615 + return SK_DROP; 616 + 617 + return SK_PASS; 618 + } 619 + 620 + SEC("sk_lookup/multi_prog_redir1") 621 + int multi_prog_redir1(struct bpf_sk_lookup *ctx) 622 + { 623 + int ret; 624 + 625 + ret = select_server_a(ctx); 626 + bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY); 627 + return SK_PASS; 628 + } 629 + 630 + SEC("sk_lookup/multi_prog_redir2") 631 + int multi_prog_redir2(struct bpf_sk_lookup *ctx) 632 + { 633 + int ret; 634 + 635 + ret = select_server_a(ctx); 636 + bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY); 637 + return SK_PASS; 638 + } 639 + 640 + char _license[] SEC("license") = "Dual BSD/GPL"; 641 + __u32 _version SEC("version") = 1;

+36

tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + #define IFINDEX_LO 1 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_CPUMAP); 10 + __uint(key_size, sizeof(__u32)); 11 + __uint(value_size, sizeof(struct bpf_cpumap_val)); 12 + __uint(max_entries, 4); 13 + } cpu_map SEC(".maps"); 14 + 15 + SEC("xdp_redir") 16 + int xdp_redir_prog(struct xdp_md *ctx) 17 + { 18 + return bpf_redirect_map(&cpu_map, 1, 0); 19 + } 20 + 21 + SEC("xdp_dummy") 22 + int xdp_dummy_prog(struct xdp_md *ctx) 23 + { 24 + return XDP_PASS; 25 + } 26 + 27 + SEC("xdp_cpumap/dummy_cm") 28 + int xdp_dummy_cm(struct xdp_md *ctx) 29 + { 30 + if (ctx->ingress_ifindex == IFINDEX_LO) 31 + return XDP_DROP; 32 + 33 + return XDP_PASS; 34 + } 35 + 36 + char _license[] SEC("license") = "GPL";

+9 -3

tools/testing/selftests/bpf/test_kmod.sh

··· 10 10 exit $ksft_skip 11 11 fi 12 12 13 - SRC_TREE=../../../../ 13 + if [ "$building_out_of_srctree" ]; then 14 + # We are in linux-build/kselftest/bpf 15 + OUTPUT=../../ 16 + else 17 + # We are in linux/tools/testing/selftests/bpf 18 + OUTPUT=../../../../ 19 + fi 14 20 15 21 test_run() 16 22 { ··· 25 19 26 20 echo "[ JIT enabled:$1 hardened:$2 ]" 27 21 dmesg -C 28 - if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then 29 - insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null 22 + if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then 23 + insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null 30 24 if [ $? -ne 0 ]; then 31 25 rc=1 32 26 fi

+1 -1

tools/testing/selftests/bpf/test_lwt_seg6local.sh

··· 140 140 ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & 141 141 ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" 142 142 sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment 143 - kill -INT $! 143 + kill -TERM $! 144 144 145 145 if [[ $(< $TMP_FILE) != "foobar" ]]; then 146 146 exit 1

+492

tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

··· 1 + { 2 + "valid 1,2,4,8-byte reads from bpf_sk_lookup", 3 + .insns = { 4 + /* 1-byte read from family field */ 5 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 6 + offsetof(struct bpf_sk_lookup, family)), 7 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 8 + offsetof(struct bpf_sk_lookup, family) + 1), 9 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 10 + offsetof(struct bpf_sk_lookup, family) + 2), 11 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 12 + offsetof(struct bpf_sk_lookup, family) + 3), 13 + /* 2-byte read from family field */ 14 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 15 + offsetof(struct bpf_sk_lookup, family)), 16 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 17 + offsetof(struct bpf_sk_lookup, family) + 2), 18 + /* 4-byte read from family field */ 19 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 20 + offsetof(struct bpf_sk_lookup, family)), 21 + 22 + /* 1-byte read from protocol field */ 23 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 24 + offsetof(struct bpf_sk_lookup, protocol)), 25 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 26 + offsetof(struct bpf_sk_lookup, protocol) + 1), 27 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 28 + offsetof(struct bpf_sk_lookup, protocol) + 2), 29 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 30 + offsetof(struct bpf_sk_lookup, protocol) + 3), 31 + /* 2-byte read from protocol field */ 32 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 33 + offsetof(struct bpf_sk_lookup, protocol)), 34 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 35 + offsetof(struct bpf_sk_lookup, protocol) + 2), 36 + /* 4-byte read from protocol field */ 37 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 38 + offsetof(struct bpf_sk_lookup, protocol)), 39 + 40 + /* 1-byte read from remote_ip4 field */ 41 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 42 + offsetof(struct bpf_sk_lookup, remote_ip4)), 43 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 44 + offsetof(struct bpf_sk_lookup, remote_ip4) + 1), 45 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 46 + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), 47 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 48 + offsetof(struct bpf_sk_lookup, remote_ip4) + 3), 49 + /* 2-byte read from remote_ip4 field */ 50 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 51 + offsetof(struct bpf_sk_lookup, remote_ip4)), 52 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 53 + offsetof(struct bpf_sk_lookup, remote_ip4) + 2), 54 + /* 4-byte read from remote_ip4 field */ 55 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 56 + offsetof(struct bpf_sk_lookup, remote_ip4)), 57 + 58 + /* 1-byte read from remote_ip6 field */ 59 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 60 + offsetof(struct bpf_sk_lookup, remote_ip6)), 61 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 62 + offsetof(struct bpf_sk_lookup, remote_ip6) + 1), 63 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 64 + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), 65 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 66 + offsetof(struct bpf_sk_lookup, remote_ip6) + 3), 67 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 68 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 69 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 70 + offsetof(struct bpf_sk_lookup, remote_ip6) + 5), 71 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 72 + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), 73 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 74 + offsetof(struct bpf_sk_lookup, remote_ip6) + 7), 75 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 76 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 77 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 78 + offsetof(struct bpf_sk_lookup, remote_ip6) + 9), 79 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 80 + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), 81 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 82 + offsetof(struct bpf_sk_lookup, remote_ip6) + 11), 83 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 84 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 85 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 86 + offsetof(struct bpf_sk_lookup, remote_ip6) + 13), 87 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 88 + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), 89 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 90 + offsetof(struct bpf_sk_lookup, remote_ip6) + 15), 91 + /* 2-byte read from remote_ip6 field */ 92 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 93 + offsetof(struct bpf_sk_lookup, remote_ip6)), 94 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 95 + offsetof(struct bpf_sk_lookup, remote_ip6) + 2), 96 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 97 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 98 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 99 + offsetof(struct bpf_sk_lookup, remote_ip6) + 6), 100 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 101 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 102 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 103 + offsetof(struct bpf_sk_lookup, remote_ip6) + 10), 104 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 105 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 106 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 107 + offsetof(struct bpf_sk_lookup, remote_ip6) + 14), 108 + /* 4-byte read from remote_ip6 field */ 109 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 110 + offsetof(struct bpf_sk_lookup, remote_ip6)), 111 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 112 + offsetof(struct bpf_sk_lookup, remote_ip6) + 4), 113 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 114 + offsetof(struct bpf_sk_lookup, remote_ip6) + 8), 115 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 116 + offsetof(struct bpf_sk_lookup, remote_ip6) + 12), 117 + 118 + /* 1-byte read from remote_port field */ 119 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 120 + offsetof(struct bpf_sk_lookup, remote_port)), 121 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 122 + offsetof(struct bpf_sk_lookup, remote_port) + 1), 123 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 124 + offsetof(struct bpf_sk_lookup, remote_port) + 2), 125 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 126 + offsetof(struct bpf_sk_lookup, remote_port) + 3), 127 + /* 2-byte read from remote_port field */ 128 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 129 + offsetof(struct bpf_sk_lookup, remote_port)), 130 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 131 + offsetof(struct bpf_sk_lookup, remote_port) + 2), 132 + /* 4-byte read from remote_port field */ 133 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 134 + offsetof(struct bpf_sk_lookup, remote_port)), 135 + 136 + /* 1-byte read from local_ip4 field */ 137 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 138 + offsetof(struct bpf_sk_lookup, local_ip4)), 139 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 140 + offsetof(struct bpf_sk_lookup, local_ip4) + 1), 141 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 142 + offsetof(struct bpf_sk_lookup, local_ip4) + 2), 143 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 144 + offsetof(struct bpf_sk_lookup, local_ip4) + 3), 145 + /* 2-byte read from local_ip4 field */ 146 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 147 + offsetof(struct bpf_sk_lookup, local_ip4)), 148 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 149 + offsetof(struct bpf_sk_lookup, local_ip4) + 2), 150 + /* 4-byte read from local_ip4 field */ 151 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 152 + offsetof(struct bpf_sk_lookup, local_ip4)), 153 + 154 + /* 1-byte read from local_ip6 field */ 155 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 156 + offsetof(struct bpf_sk_lookup, local_ip6)), 157 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 158 + offsetof(struct bpf_sk_lookup, local_ip6) + 1), 159 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 160 + offsetof(struct bpf_sk_lookup, local_ip6) + 2), 161 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 162 + offsetof(struct bpf_sk_lookup, local_ip6) + 3), 163 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 164 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 165 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 166 + offsetof(struct bpf_sk_lookup, local_ip6) + 5), 167 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 168 + offsetof(struct bpf_sk_lookup, local_ip6) + 6), 169 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 170 + offsetof(struct bpf_sk_lookup, local_ip6) + 7), 171 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 172 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 173 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 174 + offsetof(struct bpf_sk_lookup, local_ip6) + 9), 175 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 176 + offsetof(struct bpf_sk_lookup, local_ip6) + 10), 177 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 178 + offsetof(struct bpf_sk_lookup, local_ip6) + 11), 179 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 180 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 181 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 182 + offsetof(struct bpf_sk_lookup, local_ip6) + 13), 183 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 184 + offsetof(struct bpf_sk_lookup, local_ip6) + 14), 185 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 186 + offsetof(struct bpf_sk_lookup, local_ip6) + 15), 187 + /* 2-byte read from local_ip6 field */ 188 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 189 + offsetof(struct bpf_sk_lookup, local_ip6)), 190 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 191 + offsetof(struct bpf_sk_lookup, local_ip6) + 2), 192 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 193 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 194 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 195 + offsetof(struct bpf_sk_lookup, local_ip6) + 6), 196 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 197 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 198 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 199 + offsetof(struct bpf_sk_lookup, local_ip6) + 10), 200 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 201 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 202 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 203 + offsetof(struct bpf_sk_lookup, local_ip6) + 14), 204 + /* 4-byte read from local_ip6 field */ 205 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 206 + offsetof(struct bpf_sk_lookup, local_ip6)), 207 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 208 + offsetof(struct bpf_sk_lookup, local_ip6) + 4), 209 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 210 + offsetof(struct bpf_sk_lookup, local_ip6) + 8), 211 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 212 + offsetof(struct bpf_sk_lookup, local_ip6) + 12), 213 + 214 + /* 1-byte read from local_port field */ 215 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 216 + offsetof(struct bpf_sk_lookup, local_port)), 217 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 218 + offsetof(struct bpf_sk_lookup, local_port) + 1), 219 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 220 + offsetof(struct bpf_sk_lookup, local_port) + 2), 221 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 222 + offsetof(struct bpf_sk_lookup, local_port) + 3), 223 + /* 2-byte read from local_port field */ 224 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 225 + offsetof(struct bpf_sk_lookup, local_port)), 226 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 227 + offsetof(struct bpf_sk_lookup, local_port) + 2), 228 + /* 4-byte read from local_port field */ 229 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 230 + offsetof(struct bpf_sk_lookup, local_port)), 231 + 232 + /* 8-byte read from sk field */ 233 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 234 + offsetof(struct bpf_sk_lookup, sk)), 235 + 236 + BPF_MOV32_IMM(BPF_REG_0, 0), 237 + BPF_EXIT_INSN(), 238 + }, 239 + .result = ACCEPT, 240 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 241 + .expected_attach_type = BPF_SK_LOOKUP, 242 + }, 243 + /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ 244 + { 245 + "invalid 8-byte read from bpf_sk_lookup family field", 246 + .insns = { 247 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 248 + offsetof(struct bpf_sk_lookup, family)), 249 + BPF_MOV32_IMM(BPF_REG_0, 0), 250 + BPF_EXIT_INSN(), 251 + }, 252 + .errstr = "invalid bpf_context access", 253 + .result = REJECT, 254 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 255 + .expected_attach_type = BPF_SK_LOOKUP, 256 + }, 257 + { 258 + "invalid 8-byte read from bpf_sk_lookup protocol field", 259 + .insns = { 260 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 261 + offsetof(struct bpf_sk_lookup, protocol)), 262 + BPF_MOV32_IMM(BPF_REG_0, 0), 263 + BPF_EXIT_INSN(), 264 + }, 265 + .errstr = "invalid bpf_context access", 266 + .result = REJECT, 267 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 268 + .expected_attach_type = BPF_SK_LOOKUP, 269 + }, 270 + { 271 + "invalid 8-byte read from bpf_sk_lookup remote_ip4 field", 272 + .insns = { 273 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 274 + offsetof(struct bpf_sk_lookup, remote_ip4)), 275 + BPF_MOV32_IMM(BPF_REG_0, 0), 276 + BPF_EXIT_INSN(), 277 + }, 278 + .errstr = "invalid bpf_context access", 279 + .result = REJECT, 280 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 281 + .expected_attach_type = BPF_SK_LOOKUP, 282 + }, 283 + { 284 + "invalid 8-byte read from bpf_sk_lookup remote_ip6 field", 285 + .insns = { 286 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 287 + offsetof(struct bpf_sk_lookup, remote_ip6)), 288 + BPF_MOV32_IMM(BPF_REG_0, 0), 289 + BPF_EXIT_INSN(), 290 + }, 291 + .errstr = "invalid bpf_context access", 292 + .result = REJECT, 293 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 294 + .expected_attach_type = BPF_SK_LOOKUP, 295 + }, 296 + { 297 + "invalid 8-byte read from bpf_sk_lookup remote_port field", 298 + .insns = { 299 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 300 + offsetof(struct bpf_sk_lookup, remote_port)), 301 + BPF_MOV32_IMM(BPF_REG_0, 0), 302 + BPF_EXIT_INSN(), 303 + }, 304 + .errstr = "invalid bpf_context access", 305 + .result = REJECT, 306 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 307 + .expected_attach_type = BPF_SK_LOOKUP, 308 + }, 309 + { 310 + "invalid 8-byte read from bpf_sk_lookup local_ip4 field", 311 + .insns = { 312 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 313 + offsetof(struct bpf_sk_lookup, local_ip4)), 314 + BPF_MOV32_IMM(BPF_REG_0, 0), 315 + BPF_EXIT_INSN(), 316 + }, 317 + .errstr = "invalid bpf_context access", 318 + .result = REJECT, 319 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 320 + .expected_attach_type = BPF_SK_LOOKUP, 321 + }, 322 + { 323 + "invalid 8-byte read from bpf_sk_lookup local_ip6 field", 324 + .insns = { 325 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 326 + offsetof(struct bpf_sk_lookup, local_ip6)), 327 + BPF_MOV32_IMM(BPF_REG_0, 0), 328 + BPF_EXIT_INSN(), 329 + }, 330 + .errstr = "invalid bpf_context access", 331 + .result = REJECT, 332 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 333 + .expected_attach_type = BPF_SK_LOOKUP, 334 + }, 335 + { 336 + "invalid 8-byte read from bpf_sk_lookup local_port field", 337 + .insns = { 338 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 339 + offsetof(struct bpf_sk_lookup, local_port)), 340 + BPF_MOV32_IMM(BPF_REG_0, 0), 341 + BPF_EXIT_INSN(), 342 + }, 343 + .errstr = "invalid bpf_context access", 344 + .result = REJECT, 345 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 346 + .expected_attach_type = BPF_SK_LOOKUP, 347 + }, 348 + /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ 349 + { 350 + "invalid 4-byte read from bpf_sk_lookup sk field", 351 + .insns = { 352 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 353 + offsetof(struct bpf_sk_lookup, sk)), 354 + BPF_MOV32_IMM(BPF_REG_0, 0), 355 + BPF_EXIT_INSN(), 356 + }, 357 + .errstr = "invalid bpf_context access", 358 + .result = REJECT, 359 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 360 + .expected_attach_type = BPF_SK_LOOKUP, 361 + }, 362 + { 363 + "invalid 2-byte read from bpf_sk_lookup sk field", 364 + .insns = { 365 + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, 366 + offsetof(struct bpf_sk_lookup, sk)), 367 + BPF_MOV32_IMM(BPF_REG_0, 0), 368 + BPF_EXIT_INSN(), 369 + }, 370 + .errstr = "invalid bpf_context access", 371 + .result = REJECT, 372 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 373 + .expected_attach_type = BPF_SK_LOOKUP, 374 + }, 375 + { 376 + "invalid 1-byte read from bpf_sk_lookup sk field", 377 + .insns = { 378 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 379 + offsetof(struct bpf_sk_lookup, sk)), 380 + BPF_MOV32_IMM(BPF_REG_0, 0), 381 + BPF_EXIT_INSN(), 382 + }, 383 + .errstr = "invalid bpf_context access", 384 + .result = REJECT, 385 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 386 + .expected_attach_type = BPF_SK_LOOKUP, 387 + }, 388 + /* out of bounds and unaligned reads from bpf_sk_lookup */ 389 + { 390 + "invalid 4-byte read past end of bpf_sk_lookup", 391 + .insns = { 392 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 393 + sizeof(struct bpf_sk_lookup)), 394 + BPF_MOV32_IMM(BPF_REG_0, 0), 395 + BPF_EXIT_INSN(), 396 + }, 397 + .errstr = "invalid bpf_context access", 398 + .result = REJECT, 399 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 400 + .expected_attach_type = BPF_SK_LOOKUP, 401 + }, 402 + { 403 + "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset", 404 + .insns = { 405 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1), 406 + BPF_MOV32_IMM(BPF_REG_0, 0), 407 + BPF_EXIT_INSN(), 408 + }, 409 + .errstr = "invalid bpf_context access", 410 + .result = REJECT, 411 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 412 + .expected_attach_type = BPF_SK_LOOKUP, 413 + }, 414 + { 415 + "invalid 4-byte unaligned read from bpf_sk_lookup at even offset", 416 + .insns = { 417 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2), 418 + BPF_MOV32_IMM(BPF_REG_0, 0), 419 + BPF_EXIT_INSN(), 420 + }, 421 + .errstr = "invalid bpf_context access", 422 + .result = REJECT, 423 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 424 + .expected_attach_type = BPF_SK_LOOKUP, 425 + }, 426 + /* in-bound and out-of-bound writes to bpf_sk_lookup */ 427 + { 428 + "invalid 8-byte write to bpf_sk_lookup", 429 + .insns = { 430 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 431 + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 432 + BPF_MOV32_IMM(BPF_REG_0, 0), 433 + BPF_EXIT_INSN(), 434 + }, 435 + .errstr = "invalid bpf_context access", 436 + .result = REJECT, 437 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 438 + .expected_attach_type = BPF_SK_LOOKUP, 439 + }, 440 + { 441 + "invalid 4-byte write to bpf_sk_lookup", 442 + .insns = { 443 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 444 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), 445 + BPF_MOV32_IMM(BPF_REG_0, 0), 446 + BPF_EXIT_INSN(), 447 + }, 448 + .errstr = "invalid bpf_context access", 449 + .result = REJECT, 450 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 451 + .expected_attach_type = BPF_SK_LOOKUP, 452 + }, 453 + { 454 + "invalid 2-byte write to bpf_sk_lookup", 455 + .insns = { 456 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 457 + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0), 458 + BPF_MOV32_IMM(BPF_REG_0, 0), 459 + BPF_EXIT_INSN(), 460 + }, 461 + .errstr = "invalid bpf_context access", 462 + .result = REJECT, 463 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 464 + .expected_attach_type = BPF_SK_LOOKUP, 465 + }, 466 + { 467 + "invalid 1-byte write to bpf_sk_lookup", 468 + .insns = { 469 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 470 + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), 471 + BPF_MOV32_IMM(BPF_REG_0, 0), 472 + BPF_EXIT_INSN(), 473 + }, 474 + .errstr = "invalid bpf_context access", 475 + .result = REJECT, 476 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 477 + .expected_attach_type = BPF_SK_LOOKUP, 478 + }, 479 + { 480 + "invalid 4-byte write past end of bpf_sk_lookup", 481 + .insns = { 482 + BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U), 483 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 484 + sizeof(struct bpf_sk_lookup)), 485 + BPF_MOV32_IMM(BPF_REG_0, 0), 486 + BPF_EXIT_INSN(), 487 + }, 488 + .errstr = "invalid bpf_context access", 489 + .result = REJECT, 490 + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, 491 + .expected_attach_type = BPF_SK_LOOKUP, 492 + },