commit fa593d0f969dcfa41d390822fdf1a0ab48cd882c · tjh.dev/kernel

+1 -1

Documentation/bpf/bpf_iterators.rst

··· 86 86 The following are a few examples of selftest BPF iterator programs: 87 87 88 88 * `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_ 89 - * `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_ 89 + * `bpf_iter_task_vmas.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c>`_ 90 90 * `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_ 91 91 92 92 Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:

+21 -4

Documentation/bpf/btf.rst

··· 102 102 * bits 24-28: kind (e.g. int, ptr, array...etc) 103 103 * bits 29-30: unused 104 104 * bit 31: kind_flag, currently used by 105 - * struct, union, fwd, enum and enum64. 105 + * struct, union, enum, fwd, enum64, 106 + * decl_tag and type_tag 106 107 */ 107 108 __u32 info; 108 109 /* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64. ··· 479 478 480 479 ``struct btf_type`` encoding requirement: 481 480 * ``name_off``: offset to a non-empty string 482 - * ``info.kind_flag``: 0 481 + * ``info.kind_flag``: 0 or 1 483 482 * ``info.kind``: BTF_KIND_DECL_TAG 484 483 * ``info.vlen``: 0 485 484 * ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef`` ··· 490 489 __u32 component_idx; 491 490 }; 492 491 493 - The ``name_off`` encodes btf_decl_tag attribute string. 494 492 The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``. 495 493 For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``. 496 494 For the other three types, if the btf_decl_tag attribute is ··· 499 499 a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a 500 500 valid index (starting from 0) pointing to a member or an argument. 501 501 502 + If ``info.kind_flag`` is 0, then this is a normal decl tag, and the 503 + ``name_off`` encodes btf_decl_tag attribute string. 504 + 505 + If ``info.kind_flag`` is 1, then the decl tag represents an arbitrary 506 + __attribute__. In this case, ``name_off`` encodes a string 507 + representing the attribute-list of the attribute specifier. For 508 + example, for an ``__attribute__((aligned(4)))`` the string's contents 509 + is ``aligned(4)``. 510 + 502 511 2.2.18 BTF_KIND_TYPE_TAG 503 512 ~~~~~~~~~~~~~~~~~~~~~~~~ 504 513 505 514 ``struct btf_type`` encoding requirement: 506 515 * ``name_off``: offset to a non-empty string 507 - * ``info.kind_flag``: 0 516 + * ``info.kind_flag``: 0 or 1 508 517 * ``info.kind``: BTF_KIND_TYPE_TAG 509 518 * ``info.vlen``: 0 510 519 * ``type``: the type with ``btf_type_tag`` attribute ··· 530 521 type_tag, then zero or more const/volatile/restrict/typedef 531 522 and finally the base type. The base type is one of 532 523 int, ptr, array, struct, union, enum, func_proto and float types. 524 + 525 + Similarly to decl tags, if the ``info.kind_flag`` is 0, then this is a 526 + normal type tag, and the ``name_off`` encodes btf_type_tag attribute 527 + string. 528 + 529 + If ``info.kind_flag`` is 1, then the type tag represents an arbitrary 530 + __attribute__, and the ``name_off`` encodes a string representing the 531 + attribute-list of the attribute specifier. 533 532 534 533 2.2.19 BTF_KIND_ENUM64 535 534 ~~~~~~~~~~~~~~~~~~~~~~

+14 -6

Documentation/bpf/standardization/instruction-set.rst

··· 324 324 325 325 .. table:: Arithmetic instructions 326 326 327 - ===== ===== ======= ========================================================== 327 + ===== ===== ======= =================================================================================== 328 328 name code offset description 329 - ===== ===== ======= ========================================================== 329 + ===== ===== ======= =================================================================================== 330 330 ADD 0x0 0 dst += src 331 331 SUB 0x1 0 dst -= src 332 332 MUL 0x2 0 dst \*= src 333 333 DIV 0x3 0 dst = (src != 0) ? (dst / src) : 0 334 - SDIV 0x3 1 dst = (src != 0) ? (dst s/ src) : 0 334 + SDIV 0x3 1 dst = (src == 0) ? 0 : ((src == -1 && dst == LLONG_MIN) ? LLONG_MIN : (dst s/ src)) 335 335 OR 0x4 0 dst \|= src 336 336 AND 0x5 0 dst &= src 337 337 LSH 0x6 0 dst <<= (src & mask) 338 338 RSH 0x7 0 dst >>= (src & mask) 339 339 NEG 0x8 0 dst = -dst 340 340 MOD 0x9 0 dst = (src != 0) ? (dst % src) : dst 341 - SMOD 0x9 1 dst = (src != 0) ? (dst s% src) : dst 341 + SMOD 0x9 1 dst = (src == 0) ? dst : ((src == -1 && dst == LLONG_MIN) ? 0: (dst s% src)) 342 342 XOR 0xa 0 dst ^= src 343 343 MOV 0xb 0 dst = src 344 344 MOVSX 0xb 8/16/32 dst = (s8,s16,s32)src 345 345 ARSH 0xc 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask) 346 346 END 0xd 0 byte swap operations (see `Byte swap instructions`_ below) 347 - ===== ===== ======= ========================================================== 347 + ===== ===== ======= =================================================================================== 348 348 349 349 Underflow and overflow are allowed during arithmetic operations, meaning 350 350 the 64-bit or 32-bit value will wrap. If BPF program execution would 351 351 result in division by zero, the destination register is instead set to zero. 352 + Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN`` 353 + dividing -1, the desination register is instead set to ``LLONG_MIN``. For 354 + ``ALU``, if execution would result in ``INT_MIN`` dividing -1, the 355 + desination register is instead set to ``INT_MIN``. 356 + 352 357 If execution would result in modulo by zero, for ``ALU64`` the value of 353 358 the destination register is unchanged whereas for ``ALU`` the upper 354 - 32 bits of the destination register are zeroed. 359 + 32 bits of the destination register are zeroed. Otherwise, for ``ALU64``, 360 + if execution would resuslt in ``LLONG_MIN`` modulo -1, the destination 361 + register is instead set to 0. For ``ALU``, if execution would result in 362 + ``INT_MIN`` modulo -1, the destination register is instead set to 0. 355 363 356 364 ``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means:: 357 365

+10 -2

arch/arm64/include/asm/insn.h

··· 188 188 AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, 189 189 AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, 190 190 AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, 191 + AARCH64_INSN_LDST_LOAD_ACQ, 191 192 AARCH64_INSN_LDST_LOAD_EX, 192 193 AARCH64_INSN_LDST_LOAD_ACQ_EX, 194 + AARCH64_INSN_LDST_STORE_REL, 193 195 AARCH64_INSN_LDST_STORE_EX, 194 196 AARCH64_INSN_LDST_STORE_REL_EX, 195 197 AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, ··· 353 351 __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) 354 352 __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) 355 353 __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) 356 - __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) 357 - __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) 354 + __AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00) 355 + __AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00) 356 + __AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000) 357 + __AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000) 358 358 __AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) 359 359 __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) 360 360 __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) ··· 606 602 int offset, 607 603 enum aarch64_insn_variant variant, 608 604 enum aarch64_insn_ldst_type type); 605 + u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, 606 + enum aarch64_insn_register base, 607 + enum aarch64_insn_size_type size, 608 + enum aarch64_insn_ldst_type type); 609 609 u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, 610 610 enum aarch64_insn_register base, 611 611 enum aarch64_insn_register state,

+29

arch/arm64/lib/insn.c

··· 540 540 offset >> shift); 541 541 } 542 542 543 + u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg, 544 + enum aarch64_insn_register base, 545 + enum aarch64_insn_size_type size, 546 + enum aarch64_insn_ldst_type type) 547 + { 548 + u32 insn; 549 + 550 + switch (type) { 551 + case AARCH64_INSN_LDST_LOAD_ACQ: 552 + insn = aarch64_insn_get_load_acq_value(); 553 + break; 554 + case AARCH64_INSN_LDST_STORE_REL: 555 + insn = aarch64_insn_get_store_rel_value(); 556 + break; 557 + default: 558 + pr_err("%s: unknown load-acquire/store-release encoding %d\n", 559 + __func__, type); 560 + return AARCH64_BREAK_FAULT; 561 + } 562 + 563 + insn = aarch64_insn_encode_ldst_size(size, insn); 564 + 565 + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, 566 + reg); 567 + 568 + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, 569 + base); 570 + } 571 + 543 572 u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, 544 573 enum aarch64_insn_register base, 545 574 enum aarch64_insn_register state,

+20

arch/arm64/net/bpf_jit.h

··· 119 119 aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \ 120 120 AARCH64_INSN_LDST_STORE_REL_EX) 121 121 122 + /* Load-acquire & store-release */ 123 + #define A64_LDAR(Rt, Rn, size) \ 124 + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ 125 + AARCH64_INSN_LDST_LOAD_ACQ) 126 + #define A64_STLR(Rt, Rn, size) \ 127 + aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \ 128 + AARCH64_INSN_LDST_STORE_REL) 129 + 130 + /* Rt = [Rn] (load acquire) */ 131 + #define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8) 132 + #define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16) 133 + #define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32) 134 + #define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64) 135 + 136 + /* [Rn] = Rt (store release) */ 137 + #define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8) 138 + #define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16) 139 + #define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32) 140 + #define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64) 141 + 122 142 /* 123 143 * LSE atomics 124 144 *

+87 -5

arch/arm64/net/bpf_jit_comp.c

··· 272 272 { 273 273 if (is_addsub_imm(imm)) { 274 274 emit(A64_ADD_I(is64, dst, src, imm), ctx); 275 - } else if (is_addsub_imm(-imm)) { 275 + } else if (is_addsub_imm(-(u32)imm)) { 276 276 emit(A64_SUB_I(is64, dst, src, -imm), ctx); 277 277 } else { 278 278 emit_a64_mov_i(is64, tmp, imm, ctx); ··· 642 642 643 643 off = &ctx->image[ctx->idx] - branch3; 644 644 *branch3 = cpu_to_le32(A64_CBZ(1, prg, off)); 645 + } 646 + 647 + return 0; 648 + } 649 + 650 + static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) 651 + { 652 + const s32 imm = insn->imm; 653 + const s16 off = insn->off; 654 + const u8 code = insn->code; 655 + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; 656 + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 657 + const u8 dst = bpf2a64[insn->dst_reg]; 658 + const u8 src = bpf2a64[insn->src_reg]; 659 + const u8 tmp = bpf2a64[TMP_REG_1]; 660 + u8 reg; 661 + 662 + switch (imm) { 663 + case BPF_LOAD_ACQ: 664 + reg = src; 665 + break; 666 + case BPF_STORE_REL: 667 + reg = dst; 668 + break; 669 + default: 670 + pr_err_once("unknown atomic load/store op code %02x\n", imm); 671 + return -EINVAL; 672 + } 673 + 674 + if (off) { 675 + emit_a64_add_i(1, tmp, reg, tmp, off, ctx); 676 + reg = tmp; 677 + } 678 + if (arena) { 679 + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); 680 + reg = tmp; 681 + } 682 + 683 + switch (imm) { 684 + case BPF_LOAD_ACQ: 685 + switch (BPF_SIZE(code)) { 686 + case BPF_B: 687 + emit(A64_LDARB(dst, reg), ctx); 688 + break; 689 + case BPF_H: 690 + emit(A64_LDARH(dst, reg), ctx); 691 + break; 692 + case BPF_W: 693 + emit(A64_LDAR32(dst, reg), ctx); 694 + break; 695 + case BPF_DW: 696 + emit(A64_LDAR64(dst, reg), ctx); 697 + break; 698 + } 699 + break; 700 + case BPF_STORE_REL: 701 + switch (BPF_SIZE(code)) { 702 + case BPF_B: 703 + emit(A64_STLRB(src, reg), ctx); 704 + break; 705 + case BPF_H: 706 + emit(A64_STLRH(src, reg), ctx); 707 + break; 708 + case BPF_W: 709 + emit(A64_STLR32(src, reg), ctx); 710 + break; 711 + case BPF_DW: 712 + emit(A64_STLR64(src, reg), ctx); 713 + break; 714 + } 715 + break; 716 + default: 717 + pr_err_once("unexpected atomic load/store op code %02x\n", 718 + imm); 719 + return -EINVAL; 645 720 } 646 721 647 722 return 0; ··· 1234 1159 case BPF_ALU64 | BPF_SUB | BPF_K: 1235 1160 if (is_addsub_imm(imm)) { 1236 1161 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 1237 - } else if (is_addsub_imm(-imm)) { 1162 + } else if (is_addsub_imm(-(u32)imm)) { 1238 1163 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 1239 1164 } else { 1240 1165 emit_a64_mov_i(is64, tmp, imm, ctx); ··· 1405 1330 case BPF_JMP32 | BPF_JSLE | BPF_K: 1406 1331 if (is_addsub_imm(imm)) { 1407 1332 emit(A64_CMP_I(is64, dst, imm), ctx); 1408 - } else if (is_addsub_imm(-imm)) { 1333 + } else if (is_addsub_imm(-(u32)imm)) { 1409 1334 emit(A64_CMN_I(is64, dst, -imm), ctx); 1410 1335 } else { 1411 1336 emit_a64_mov_i(is64, tmp, imm, ctx); ··· 1716 1641 return ret; 1717 1642 break; 1718 1643 1644 + case BPF_STX | BPF_ATOMIC | BPF_B: 1645 + case BPF_STX | BPF_ATOMIC | BPF_H: 1719 1646 case BPF_STX | BPF_ATOMIC | BPF_W: 1720 1647 case BPF_STX | BPF_ATOMIC | BPF_DW: 1648 + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: 1649 + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: 1721 1650 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 1722 1651 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 1723 - if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1652 + if (bpf_atomic_is_load_store(insn)) 1653 + ret = emit_atomic_ld_st(insn, ctx); 1654 + else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1724 1655 ret = emit_lse_atomic(insn, ctx); 1725 1656 else 1726 1657 ret = emit_ll_sc_atomic(insn, ctx); ··· 2750 2669 switch (insn->code) { 2751 2670 case BPF_STX | BPF_ATOMIC | BPF_W: 2752 2671 case BPF_STX | BPF_ATOMIC | BPF_DW: 2753 - if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 2672 + if (!bpf_atomic_is_load_store(insn) && 2673 + !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 2754 2674 return false; 2755 2675 } 2756 2676 return true;

+10 -4

arch/s390/net/bpf_jit_comp.c

··· 2919 2919 2920 2920 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 2921 2921 { 2922 - /* 2923 - * Currently the verifier uses this function only to check which 2924 - * atomic stores to arena are supported, and they all are. 2925 - */ 2922 + if (!in_arena) 2923 + return true; 2924 + switch (insn->code) { 2925 + case BPF_STX | BPF_ATOMIC | BPF_B: 2926 + case BPF_STX | BPF_ATOMIC | BPF_H: 2927 + case BPF_STX | BPF_ATOMIC | BPF_W: 2928 + case BPF_STX | BPF_ATOMIC | BPF_DW: 2929 + if (bpf_atomic_is_load_store(insn)) 2930 + return false; 2931 + } 2926 2932 return true; 2927 2933 } 2928 2934

+1 -1

arch/x86/net/Makefile

··· 6 6 ifeq ($(CONFIG_X86_32),y) 7 7 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o 8 8 else 9 - obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o 9 + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o 10 10 endif

+87 -13

arch/x86/net/bpf_jit_comp.c

··· 1250 1250 emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); 1251 1251 } 1252 1252 1253 - static int emit_atomic(u8 **pprog, u8 atomic_op, 1254 - u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) 1253 + static int emit_atomic_rmw(u8 **pprog, u32 atomic_op, 1254 + u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) 1255 1255 { 1256 1256 u8 *prog = *pprog; 1257 1257 ··· 1291 1291 return 0; 1292 1292 } 1293 1293 1294 - static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, 1295 - u32 dst_reg, u32 src_reg, u32 index_reg, int off) 1294 + static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size, 1295 + u32 dst_reg, u32 src_reg, u32 index_reg, 1296 + int off) 1296 1297 { 1297 1298 u8 *prog = *pprog; 1298 1299 ··· 1306 1305 EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); 1307 1306 break; 1308 1307 default: 1309 - pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n"); 1308 + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); 1310 1309 return -EFAULT; 1311 1310 } 1312 1311 ··· 1337 1336 } 1338 1337 emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); 1339 1338 *pprog = prog; 1339 + return 0; 1340 + } 1341 + 1342 + static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg, 1343 + u32 src_reg, s16 off, u8 bpf_size) 1344 + { 1345 + switch (atomic_op) { 1346 + case BPF_LOAD_ACQ: 1347 + /* dst_reg = smp_load_acquire(src_reg + off16) */ 1348 + emit_ldx(pprog, bpf_size, dst_reg, src_reg, off); 1349 + break; 1350 + case BPF_STORE_REL: 1351 + /* smp_store_release(dst_reg + off16, src_reg) */ 1352 + emit_stx(pprog, bpf_size, dst_reg, src_reg, off); 1353 + break; 1354 + default: 1355 + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", 1356 + atomic_op); 1357 + return -EFAULT; 1358 + } 1359 + 1360 + return 0; 1361 + } 1362 + 1363 + static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, 1364 + u32 dst_reg, u32 src_reg, u32 index_reg, 1365 + int off) 1366 + { 1367 + switch (atomic_op) { 1368 + case BPF_LOAD_ACQ: 1369 + /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */ 1370 + emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off); 1371 + break; 1372 + case BPF_STORE_REL: 1373 + /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */ 1374 + emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off); 1375 + break; 1376 + default: 1377 + pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", 1378 + atomic_op); 1379 + return -EFAULT; 1380 + } 1381 + 1340 1382 return 0; 1341 1383 } 1342 1384 ··· 2165 2121 } 2166 2122 break; 2167 2123 2124 + case BPF_STX | BPF_ATOMIC | BPF_B: 2125 + case BPF_STX | BPF_ATOMIC | BPF_H: 2126 + if (!bpf_atomic_is_load_store(insn)) { 2127 + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); 2128 + return -EFAULT; 2129 + } 2130 + fallthrough; 2168 2131 case BPF_STX | BPF_ATOMIC | BPF_W: 2169 2132 case BPF_STX | BPF_ATOMIC | BPF_DW: 2170 2133 if (insn->imm == (BPF_AND | BPF_FETCH) || ··· 2207 2156 EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], 2208 2157 add_2reg(0xC0, AUX_REG, real_src_reg)); 2209 2158 /* Attempt to swap in new value */ 2210 - err = emit_atomic(&prog, BPF_CMPXCHG, 2211 - real_dst_reg, AUX_REG, 2212 - insn->off, 2213 - BPF_SIZE(insn->code)); 2159 + err = emit_atomic_rmw(&prog, BPF_CMPXCHG, 2160 + real_dst_reg, AUX_REG, 2161 + insn->off, 2162 + BPF_SIZE(insn->code)); 2214 2163 if (WARN_ON(err)) 2215 2164 return err; 2216 2165 /* ··· 2225 2174 break; 2226 2175 } 2227 2176 2228 - err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, 2229 - insn->off, BPF_SIZE(insn->code)); 2177 + if (bpf_atomic_is_load_store(insn)) 2178 + err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg, 2179 + insn->off, BPF_SIZE(insn->code)); 2180 + else 2181 + err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg, 2182 + insn->off, BPF_SIZE(insn->code)); 2230 2183 if (err) 2231 2184 return err; 2232 2185 break; 2233 2186 2187 + case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: 2188 + case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: 2189 + if (!bpf_atomic_is_load_store(insn)) { 2190 + pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); 2191 + return -EFAULT; 2192 + } 2193 + fallthrough; 2234 2194 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 2235 2195 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 2236 2196 start_of_ldx = prog; 2237 - err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code), 2238 - dst_reg, src_reg, X86_REG_R12, insn->off); 2197 + 2198 + if (bpf_atomic_is_load_store(insn)) 2199 + err = emit_atomic_ld_st_index(&prog, insn->imm, 2200 + BPF_SIZE(insn->code), dst_reg, 2201 + src_reg, X86_REG_R12, insn->off); 2202 + else 2203 + err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code), 2204 + dst_reg, src_reg, X86_REG_R12, 2205 + insn->off); 2239 2206 if (err) 2240 2207 return err; 2241 2208 goto populate_extable; ··· 3869 3800 u64 bpf_arch_uaddress_limit(void) 3870 3801 { 3871 3802 return 0; 3803 + } 3804 + 3805 + bool bpf_jit_supports_timed_may_goto(void) 3806 + { 3807 + return true; 3872 3808 }

+55

arch/x86/net/bpf_timed_may_goto.S

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <linux/export.h> 5 + #include <linux/linkage.h> 6 + #include <asm/nospec-branch.h> 7 + 8 + .code64 9 + .section .text, "ax" 10 + 11 + SYM_FUNC_START(arch_bpf_timed_may_goto) 12 + ANNOTATE_NOENDBR 13 + 14 + /* 15 + * r10 passes us stack depth, load the pointer to count and timestamp 16 + * into r10 by adding it to BPF frame pointer. 17 + */ 18 + leaq (%rbp, %r10, 1), %r10 19 + 20 + /* Setup frame. */ 21 + pushq %rbp 22 + movq %rsp, %rbp 23 + 24 + /* Save r0-r5. */ 25 + pushq %rax 26 + pushq %rdi 27 + pushq %rsi 28 + pushq %rdx 29 + pushq %rcx 30 + pushq %r8 31 + 32 + /* 33 + * r10 has the pointer to count and timestamp, pass it as first 34 + * argument. 35 + */ 36 + movq %r10, %rdi 37 + 38 + /* Emit call depth accounting for call below. */ 39 + CALL_DEPTH_ACCOUNT 40 + call bpf_check_timed_may_goto 41 + 42 + /* BPF_REG_AX=r10 will be stored into count, so move return value to it. */ 43 + movq %rax, %r10 44 + 45 + /* Restore r5-r0. */ 46 + popq %r8 47 + popq %rcx 48 + popq %rdx 49 + popq %rsi 50 + popq %rdi 51 + popq %rax 52 + 53 + leave 54 + RET 55 + SYM_FUNC_END(arch_bpf_timed_may_goto)

+214 -11

fs/bpf_fs_kfuncs.c

··· 2 2 /* Copyright (c) 2024 Google LLC. */ 3 3 4 4 #include <linux/bpf.h> 5 + #include <linux/bpf_lsm.h> 5 6 #include <linux/btf.h> 6 7 #include <linux/btf_ids.h> 7 8 #include <linux/dcache.h> 8 9 #include <linux/fs.h> 10 + #include <linux/fsnotify.h> 9 11 #include <linux/file.h> 10 12 #include <linux/mm.h> 11 13 #include <linux/xattr.h> ··· 95 93 return len; 96 94 } 97 95 96 + static bool match_security_bpf_prefix(const char *name__str) 97 + { 98 + return !strncmp(name__str, XATTR_NAME_BPF_LSM, XATTR_NAME_BPF_LSM_LEN); 99 + } 100 + 101 + static int bpf_xattr_read_permission(const char *name, struct inode *inode) 102 + { 103 + if (WARN_ON(!inode)) 104 + return -EINVAL; 105 + 106 + /* Allow reading xattr with user. and security.bpf. prefix */ 107 + if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && 108 + !match_security_bpf_prefix(name)) 109 + return -EPERM; 110 + 111 + return inode_permission(&nop_mnt_idmap, inode, MAY_READ); 112 + } 113 + 98 114 /** 99 115 * bpf_get_dentry_xattr - get xattr of a dentry 100 116 * @dentry: dentry to get xattr from ··· 121 101 * 122 102 * Get xattr *name__str* of *dentry* and store the output in *value_ptr*. 123 103 * 124 - * For security reasons, only *name__str* with prefix "user." is allowed. 104 + * For security reasons, only *name__str* with prefixes "user." or 105 + * "security.bpf." are allowed. 125 106 * 126 - * Return: 0 on success, a negative value on error. 107 + * Return: length of the xattr value on success, a negative value on error. 127 108 */ 128 109 __bpf_kfunc int bpf_get_dentry_xattr(struct dentry *dentry, const char *name__str, 129 110 struct bpf_dynptr *value_p) ··· 135 114 void *value; 136 115 int ret; 137 116 138 - if (WARN_ON(!inode)) 139 - return -EINVAL; 140 - 141 - if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) 142 - return -EPERM; 143 - 144 117 value_len = __bpf_dynptr_size(value_ptr); 145 118 value = __bpf_dynptr_data_rw(value_ptr, value_len); 146 119 if (!value) 147 120 return -EINVAL; 148 121 149 - ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); 122 + ret = bpf_xattr_read_permission(name__str, inode); 150 123 if (ret) 151 124 return ret; 152 125 return __vfs_getxattr(dentry, inode, name__str, value, value_len); ··· 154 139 * 155 140 * Get xattr *name__str* of *file* and store the output in *value_ptr*. 156 141 * 157 - * For security reasons, only *name__str* with prefix "user." is allowed. 142 + * For security reasons, only *name__str* with prefixes "user." or 143 + * "security.bpf." are allowed. 158 144 * 159 - * Return: 0 on success, a negative value on error. 145 + * Return: length of the xattr value on success, a negative value on error. 160 146 */ 161 147 __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, 162 148 struct bpf_dynptr *value_p) ··· 170 154 171 155 __bpf_kfunc_end_defs(); 172 156 157 + static int bpf_xattr_write_permission(const char *name, struct inode *inode) 158 + { 159 + if (WARN_ON(!inode)) 160 + return -EINVAL; 161 + 162 + /* Only allow setting and removing security.bpf. xattrs */ 163 + if (!match_security_bpf_prefix(name)) 164 + return -EPERM; 165 + 166 + return inode_permission(&nop_mnt_idmap, inode, MAY_WRITE); 167 + } 168 + 169 + /** 170 + * bpf_set_dentry_xattr_locked - set a xattr of a dentry 171 + * @dentry: dentry to get xattr from 172 + * @name__str: name of the xattr 173 + * @value_p: xattr value 174 + * @flags: flags to pass into filesystem operations 175 + * 176 + * Set xattr *name__str* of *dentry* to the value in *value_ptr*. 177 + * 178 + * For security reasons, only *name__str* with prefix "security.bpf." 179 + * is allowed. 180 + * 181 + * The caller already locked dentry->d_inode. 182 + * 183 + * Return: 0 on success, a negative value on error. 184 + */ 185 + int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, 186 + const struct bpf_dynptr *value_p, int flags) 187 + { 188 + 189 + struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; 190 + struct inode *inode = d_inode(dentry); 191 + const void *value; 192 + u32 value_len; 193 + int ret; 194 + 195 + value_len = __bpf_dynptr_size(value_ptr); 196 + value = __bpf_dynptr_data(value_ptr, value_len); 197 + if (!value) 198 + return -EINVAL; 199 + 200 + ret = bpf_xattr_write_permission(name__str, inode); 201 + if (ret) 202 + return ret; 203 + 204 + ret = __vfs_setxattr(&nop_mnt_idmap, dentry, inode, name__str, 205 + value, value_len, flags); 206 + if (!ret) { 207 + fsnotify_xattr(dentry); 208 + 209 + /* This xattr is set by BPF LSM, so we do not call 210 + * security_inode_post_setxattr. Otherwise, we would 211 + * risk deadlocks by calling back to the same kfunc. 212 + * 213 + * This is the same as security_inode_setsecurity(). 214 + */ 215 + } 216 + return ret; 217 + } 218 + 219 + /** 220 + * bpf_remove_dentry_xattr_locked - remove a xattr of a dentry 221 + * @dentry: dentry to get xattr from 222 + * @name__str: name of the xattr 223 + * 224 + * Rmove xattr *name__str* of *dentry*. 225 + * 226 + * For security reasons, only *name__str* with prefix "security.bpf." 227 + * is allowed. 228 + * 229 + * The caller already locked dentry->d_inode. 230 + * 231 + * Return: 0 on success, a negative value on error. 232 + */ 233 + int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str) 234 + { 235 + struct inode *inode = d_inode(dentry); 236 + int ret; 237 + 238 + ret = bpf_xattr_write_permission(name__str, inode); 239 + if (ret) 240 + return ret; 241 + 242 + ret = __vfs_removexattr(&nop_mnt_idmap, dentry, name__str); 243 + if (!ret) { 244 + fsnotify_xattr(dentry); 245 + 246 + /* This xattr is removed by BPF LSM, so we do not call 247 + * security_inode_post_removexattr. Otherwise, we would 248 + * risk deadlocks by calling back to the same kfunc. 249 + */ 250 + } 251 + return ret; 252 + } 253 + 254 + __bpf_kfunc_start_defs(); 255 + 256 + /** 257 + * bpf_set_dentry_xattr - set a xattr of a dentry 258 + * @dentry: dentry to get xattr from 259 + * @name__str: name of the xattr 260 + * @value_p: xattr value 261 + * @flags: flags to pass into filesystem operations 262 + * 263 + * Set xattr *name__str* of *dentry* to the value in *value_ptr*. 264 + * 265 + * For security reasons, only *name__str* with prefix "security.bpf." 266 + * is allowed. 267 + * 268 + * The caller has not locked dentry->d_inode. 269 + * 270 + * Return: 0 on success, a negative value on error. 271 + */ 272 + __bpf_kfunc int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str, 273 + const struct bpf_dynptr *value_p, int flags) 274 + { 275 + struct inode *inode = d_inode(dentry); 276 + int ret; 277 + 278 + inode_lock(inode); 279 + ret = bpf_set_dentry_xattr_locked(dentry, name__str, value_p, flags); 280 + inode_unlock(inode); 281 + return ret; 282 + } 283 + 284 + /** 285 + * bpf_remove_dentry_xattr - remove a xattr of a dentry 286 + * @dentry: dentry to get xattr from 287 + * @name__str: name of the xattr 288 + * 289 + * Rmove xattr *name__str* of *dentry*. 290 + * 291 + * For security reasons, only *name__str* with prefix "security.bpf." 292 + * is allowed. 293 + * 294 + * The caller has not locked dentry->d_inode. 295 + * 296 + * Return: 0 on success, a negative value on error. 297 + */ 298 + __bpf_kfunc int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) 299 + { 300 + struct inode *inode = d_inode(dentry); 301 + int ret; 302 + 303 + inode_lock(inode); 304 + ret = bpf_remove_dentry_xattr_locked(dentry, name__str); 305 + inode_unlock(inode); 306 + return ret; 307 + } 308 + 309 + __bpf_kfunc_end_defs(); 310 + 173 311 BTF_KFUNCS_START(bpf_fs_kfunc_set_ids) 174 312 BTF_ID_FLAGS(func, bpf_get_task_exe_file, 175 313 KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL) ··· 331 161 BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS) 332 162 BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) 333 163 BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) 164 + BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) 165 + BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) 334 166 BTF_KFUNCS_END(bpf_fs_kfunc_set_ids) 335 167 336 168 static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id) ··· 341 169 prog->type == BPF_PROG_TYPE_LSM) 342 170 return 0; 343 171 return -EACCES; 172 + } 173 + 174 + /* bpf_[set|remove]_dentry_xattr.* hooks have KF_TRUSTED_ARGS and 175 + * KF_SLEEPABLE, so they are only available to sleepable hooks with 176 + * dentry arguments. 177 + * 178 + * Setting and removing xattr requires exclusive lock on dentry->d_inode. 179 + * Some hooks already locked d_inode, while some hooks have not locked 180 + * d_inode. Therefore, we need different kfuncs for different hooks. 181 + * Specifically, hooks in the following list (d_inode_locked_hooks) 182 + * should call bpf_[set|remove]_dentry_xattr_locked; while other hooks 183 + * should call bpf_[set|remove]_dentry_xattr. 184 + */ 185 + BTF_SET_START(d_inode_locked_hooks) 186 + BTF_ID(func, bpf_lsm_inode_post_removexattr) 187 + BTF_ID(func, bpf_lsm_inode_post_setattr) 188 + BTF_ID(func, bpf_lsm_inode_post_setxattr) 189 + BTF_ID(func, bpf_lsm_inode_removexattr) 190 + BTF_ID(func, bpf_lsm_inode_rmdir) 191 + BTF_ID(func, bpf_lsm_inode_setattr) 192 + BTF_ID(func, bpf_lsm_inode_setxattr) 193 + BTF_ID(func, bpf_lsm_inode_unlink) 194 + #ifdef CONFIG_SECURITY_PATH 195 + BTF_ID(func, bpf_lsm_path_unlink) 196 + BTF_ID(func, bpf_lsm_path_rmdir) 197 + #endif /* CONFIG_SECURITY_PATH */ 198 + BTF_SET_END(d_inode_locked_hooks) 199 + 200 + bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog) 201 + { 202 + return btf_id_set_contains(&d_inode_locked_hooks, prog->aux->attach_btf_id); 344 203 } 345 204 346 205 static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {

+1

include/linux/bpf-cgroup.h

··· 111 111 struct bpf_prog *prog; 112 112 struct bpf_cgroup_link *link; 113 113 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 114 + u32 flags; 114 115 }; 115 116 116 117 int cgroup_bpf_inherit(struct cgroup *cgrp);

+29 -2

include/linux/bpf.h

··· 968 968 struct { 969 969 struct btf *btf; 970 970 u32 btf_id; 971 + u32 ref_obj_id; 971 972 }; 972 973 }; 973 974 struct bpf_verifier_log *log; /* for verbose logs */ ··· 989 988 static inline bool bpf_pseudo_func(const struct bpf_insn *insn) 990 989 { 991 990 return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 991 + } 992 + 993 + /* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an 994 + * atomic load or store, and false if it is a read-modify-write instruction. 995 + */ 996 + static inline bool 997 + bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) 998 + { 999 + switch (atomic_insn->imm) { 1000 + case BPF_LOAD_ACQ: 1001 + case BPF_STORE_REL: 1002 + return true; 1003 + default: 1004 + return false; 1005 + } 992 1006 } 993 1007 994 1008 struct bpf_prog_ops { ··· 1497 1481 enum bpf_reg_type reg_type; 1498 1482 struct btf *btf; 1499 1483 u32 btf_id; 1484 + u32 ref_obj_id; 1485 + bool refcounted; 1500 1486 }; 1501 1487 1502 1488 struct btf_mod_pair { ··· 1521 1503 u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ 1522 1504 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ 1523 1505 u32 attach_btf_id; /* in-kernel BTF type id to attach to */ 1506 + u32 attach_st_ops_member_off; 1524 1507 u32 ctx_arg_info_size; 1525 1508 u32 max_rdonly_access; 1526 1509 u32 max_rdwr_access; 1527 1510 struct btf *attach_btf; 1528 - const struct bpf_ctx_arg_aux *ctx_arg_info; 1511 + struct bpf_ctx_arg_aux *ctx_arg_info; 1529 1512 void __percpu *priv_stack_ptr; 1530 1513 struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ 1531 1514 struct bpf_prog *dst_prog; ··· 1547 1528 bool jits_use_priv_stack; 1548 1529 bool priv_stack_requested; 1549 1530 bool changes_pkt_data; 1531 + bool might_sleep; 1550 1532 u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ 1551 1533 struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ 1552 1534 struct bpf_arena *arena; ··· 1567 1547 #endif 1568 1548 struct bpf_ksym ksym; 1569 1549 const struct bpf_prog_ops *ops; 1550 + const struct bpf_struct_ops *st_ops; 1570 1551 struct bpf_map **used_maps; 1571 1552 struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ 1572 1553 struct btf_mod_pair *used_btfs; ··· 1966 1945 1967 1946 #endif 1968 1947 1948 + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, 1949 + const struct bpf_ctx_arg_aux *info, u32 cnt); 1950 + 1969 1951 #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) 1970 1952 int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, 1971 1953 int cgroup_atype); ··· 2004 1980 */ 2005 1981 enum { 2006 1982 BPF_MAX_LOOPS = 8 * 1024 * 1024, 1983 + BPF_MAX_TIMED_LOOPS = 0xffff, 2007 1984 }; 2008 1985 2009 1986 #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ ··· 2060 2035 2061 2036 const struct bpf_func_proto *bpf_get_trace_printk_proto(void); 2062 2037 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); 2038 + 2039 + const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void); 2063 2040 2064 2041 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, 2065 2042 unsigned long off, unsigned long len); ··· 2573 2546 2574 2547 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); 2575 2548 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); 2576 - bool bpf_iter_prog_supported(struct bpf_prog *prog); 2549 + int bpf_iter_prog_supported(struct bpf_prog *prog); 2577 2550 const struct bpf_func_proto * 2578 2551 bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); 2579 2552 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);

+18

include/linux/bpf_lsm.h

··· 48 48 49 49 int bpf_lsm_get_retval_range(const struct bpf_prog *prog, 50 50 struct bpf_retval_range *range); 51 + int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, 52 + const struct bpf_dynptr *value_p, int flags); 53 + int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str); 54 + bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog); 55 + 51 56 #else /* !CONFIG_BPF_LSM */ 52 57 53 58 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) ··· 90 85 struct bpf_retval_range *range) 91 86 { 92 87 return -EOPNOTSUPP; 88 + } 89 + static inline int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, 90 + const struct bpf_dynptr *value_p, int flags) 91 + { 92 + return -EOPNOTSUPP; 93 + } 94 + static inline int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str) 95 + { 96 + return -EOPNOTSUPP; 97 + } 98 + static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog) 99 + { 100 + return false; 93 101 } 94 102 #endif /* CONFIG_BPF_LSM */ 95 103

+23 -9

include/linux/bpf_verifier.h

··· 427 427 bool active_rcu_lock; 428 428 429 429 bool speculative; 430 - /* If this state was ever pointed-to by other state's loop_entry field 431 - * this flag would be set to true. Used to avoid freeing such states 432 - * while they are still in use. 433 - */ 434 - bool used_as_loop_entry; 435 430 bool in_sleepable; 436 431 437 432 /* first and last insn idx of this verifier state */ ··· 453 458 u32 dfs_depth; 454 459 u32 callback_unroll_depth; 455 460 u32 may_goto_depth; 461 + /* If this state was ever pointed-to by other state's loop_entry field 462 + * this flag would be set to true. Used to avoid freeing such states 463 + * while they are still in use. 464 + */ 465 + u32 used_as_loop_entry; 456 466 }; 457 467 458 468 #define bpf_get_spilled_reg(slot, frame, mask) \ ··· 498 498 /* linked list of verifier states used to prune search */ 499 499 struct bpf_verifier_state_list { 500 500 struct bpf_verifier_state state; 501 - struct bpf_verifier_state_list *next; 502 - int miss_cnt, hit_cnt; 501 + struct list_head node; 502 + u32 miss_cnt; 503 + u32 hit_cnt:31; 504 + u32 in_free_list:1; 503 505 }; 504 506 505 507 struct bpf_loop_inline_state { ··· 591 589 * accepts callback function as a parameter. 592 590 */ 593 591 bool calls_callback; 592 + /* registers alive before this instruction. */ 593 + u16 live_regs_before; 594 594 }; 595 595 596 596 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ ··· 669 665 /* true if bpf_fastcall stack region is used by functions that can't be inlined */ 670 666 bool keep_fastcall_stack: 1; 671 667 bool changes_pkt_data: 1; 668 + bool might_sleep: 1; 672 669 673 670 enum priv_stack_mode priv_stack_mode; 674 671 u8 arg_cnt; ··· 715 710 bool test_state_freq; /* test verifier with different pruning frequency */ 716 711 bool test_reg_invariants; /* fail verification on register invariants violations */ 717 712 struct bpf_verifier_state *cur_state; /* current verifier state */ 718 - struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ 719 - struct bpf_verifier_state_list *free_list; 713 + /* Search pruning optimization, array of list_heads for 714 + * lists of struct bpf_verifier_state_list. 715 + */ 716 + struct list_head *explored_states; 717 + struct list_head free_list; /* list of struct bpf_verifier_state_list */ 720 718 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ 721 719 struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ 722 720 u32 used_map_cnt; /* number of used maps */ ··· 750 742 struct { 751 743 int *insn_state; 752 744 int *insn_stack; 745 + /* vector of instruction indexes sorted in post-order */ 746 + int *insn_postorder; 753 747 int cur_stack; 748 + /* current position in the insn_postorder vector */ 749 + int cur_postorder; 754 750 } cfg; 755 751 struct backtrack_state bt; 756 752 struct bpf_insn_hist_entry *insn_hist; ··· 779 767 u32 peak_states; 780 768 /* longest register parentage chain walked for liveness marking */ 781 769 u32 longest_mark_read_walk; 770 + u32 free_list_size; 771 + u32 explored_states_size; 782 772 bpfptr_t fd_array; 783 773 784 774 /* bit mask to keep track of whether a register has been accessed

+3

include/linux/btf.h

··· 76 76 #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ 77 77 #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ 78 78 #define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ 79 + #define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */ 80 + #define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */ 81 + #define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */ 79 82 80 83 /* 81 84 * Tag marking a kernel function as a kfunc. This is meant to minimize the

+20

include/linux/filter.h

··· 364 364 * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); 365 365 * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) 366 366 * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) 367 + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) 368 + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) 367 369 */ 368 370 369 371 #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ··· 470 468 .src_reg = 0, \ 471 469 .off = 0, \ 472 470 .imm = BPF_CALL_IMM(FUNC) }) 471 + 472 + /* Kfunc call */ 473 + 474 + #define BPF_CALL_KFUNC(OFF, IMM) \ 475 + ((struct bpf_insn) { \ 476 + .code = BPF_JMP | BPF_CALL, \ 477 + .dst_reg = 0, \ 478 + .src_reg = BPF_PSEUDO_KFUNC_CALL, \ 479 + .off = OFF, \ 480 + .imm = IMM }) 473 481 474 482 /* Raw code statement block */ 475 483 ··· 670 658 u64_stats_t misses; 671 659 struct u64_stats_sync syncp; 672 660 } __aligned(2 * sizeof(u64)); 661 + 662 + struct bpf_timed_may_goto { 663 + u64 count; 664 + u64 timestamp; 665 + }; 673 666 674 667 struct sk_filter { 675 668 refcount_t refcnt; ··· 1137 1120 bool bpf_jit_supports_arena(void); 1138 1121 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); 1139 1122 bool bpf_jit_supports_private_stack(void); 1123 + bool bpf_jit_supports_timed_may_goto(void); 1140 1124 u64 bpf_arch_uaddress_limit(void); 1141 1125 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); 1126 + u64 arch_bpf_timed_may_goto(void); 1127 + u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *); 1142 1128 bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); 1143 1129 1144 1130 static inline bool bpf_dump_raw_ok(const struct cred *cred)

+3 -3

include/linux/lsm_hook_defs.h

··· 426 426 #endif /* CONFIG_AUDIT */ 427 427 428 428 #ifdef CONFIG_BPF_SYSCALL 429 - LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) 429 + LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 430 430 LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) 431 431 LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) 432 432 LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, 433 - struct bpf_token *token) 433 + struct bpf_token *token, bool kernel) 434 434 LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) 435 435 LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, 436 - struct bpf_token *token) 436 + struct bpf_token *token, bool kernel) 437 437 LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) 438 438 LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, 439 439 const struct path *path)

+5

include/linux/mm.h

··· 2477 2477 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, 2478 2478 void *buf, int len, unsigned int gup_flags); 2479 2479 2480 + #ifdef CONFIG_BPF_SYSCALL 2481 + extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, 2482 + void *buf, int len, unsigned int gup_flags); 2483 + #endif 2484 + 2480 2485 long get_user_pages_remote(struct mm_struct *mm, 2481 2486 unsigned long start, unsigned long nr_pages, 2482 2487 unsigned int gup_flags, struct page **pages,

+6 -6

include/linux/security.h

··· 2249 2249 struct bpf_prog; 2250 2250 struct bpf_token; 2251 2251 #ifdef CONFIG_SECURITY 2252 - extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); 2252 + extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel); 2253 2253 extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); 2254 2254 extern int security_bpf_prog(struct bpf_prog *prog); 2255 2255 extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, 2256 - struct bpf_token *token); 2256 + struct bpf_token *token, bool kernel); 2257 2257 extern void security_bpf_map_free(struct bpf_map *map); 2258 2258 extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, 2259 - struct bpf_token *token); 2259 + struct bpf_token *token, bool kernel); 2260 2260 extern void security_bpf_prog_free(struct bpf_prog *prog); 2261 2261 extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, 2262 2262 const struct path *path); ··· 2265 2265 extern int security_bpf_token_capable(const struct bpf_token *token, int cap); 2266 2266 #else 2267 2267 static inline int security_bpf(int cmd, union bpf_attr *attr, 2268 - unsigned int size) 2268 + unsigned int size, bool kernel) 2269 2269 { 2270 2270 return 0; 2271 2271 } ··· 2281 2281 } 2282 2282 2283 2283 static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, 2284 - struct bpf_token *token) 2284 + struct bpf_token *token, bool kernel) 2285 2285 { 2286 2286 return 0; 2287 2287 } ··· 2290 2290 { } 2291 2291 2292 2292 static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, 2293 - struct bpf_token *token) 2293 + struct bpf_token *token, bool kernel) 2294 2294 { 2295 2295 return 0; 2296 2296 }

+9 -1

include/uapi/linux/bpf.h

··· 51 51 #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ 52 52 #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ 53 53 54 + #define BPF_LOAD_ACQ 0x100 /* load-acquire */ 55 + #define BPF_STORE_REL 0x110 /* store-release */ 56 + 54 57 enum bpf_cond_pseudo_jmp { 55 58 BPF_MAY_GOTO = 0, 56 59 }; ··· 1210 1207 #define BPF_F_BEFORE (1U << 3) 1211 1208 #define BPF_F_AFTER (1U << 4) 1212 1209 #define BPF_F_ID (1U << 5) 1210 + #define BPF_F_PREORDER (1U << 6) 1213 1211 #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ 1214 1212 1215 1213 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the ··· 1652 1648 }; 1653 1649 __u32 next_id; 1654 1650 __u32 open_flags; 1651 + __s32 fd_by_id_token_fd; 1655 1652 }; 1656 1653 1657 1654 struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ ··· 6024 6019 FN(user_ringbuf_drain, 209, ##ctx) \ 6025 6020 FN(cgrp_storage_get, 210, ##ctx) \ 6026 6021 FN(cgrp_storage_delete, 211, ##ctx) \ 6027 - /* */ 6022 + /* This helper list is effectively frozen. If you are trying to \ 6023 + * add a new helper, you should add a kfunc instead which has \ 6024 + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ 6025 + */ 6028 6026 6029 6027 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't 6030 6028 * know or care about integer value that is now passed as second argument

+2 -1

include/uapi/linux/btf.h

··· 36 36 * bits 24-28: kind (e.g. int, ptr, array...etc) 37 37 * bits 29-30: unused 38 38 * bit 31: kind_flag, currently used by 39 - * struct, union, enum, fwd and enum64 39 + * struct, union, enum, fwd, enum64, 40 + * decl_tag and type_tag 40 41 */ 41 42 __u32 info; 42 43 /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.

+4

include/uapi/linux/xattr.h

··· 83 83 #define XATTR_CAPS_SUFFIX "capability" 84 84 #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX 85 85 86 + #define XATTR_BPF_LSM_SUFFIX "bpf." 87 + #define XATTR_NAME_BPF_LSM (XATTR_SECURITY_PREFIX XATTR_BPF_LSM_SUFFIX) 88 + #define XATTR_NAME_BPF_LSM_LEN (sizeof(XATTR_NAME_BPF_LSM) - 1) 89 + 86 90 #define XATTR_POSIX_ACL_ACCESS "posix_acl_access" 87 91 #define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS 88 92 #define XATTR_POSIX_ACL_DEFAULT "posix_acl_default"

+2 -2

kernel/bpf/arena.c

··· 577 577 __bpf_kfunc_end_defs(); 578 578 579 579 BTF_KFUNCS_START(arena_kfuncs) 580 - BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE) 581 - BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE) 580 + BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2) 581 + BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2) 582 582 BTF_KFUNCS_END(arena_kfuncs) 583 583 584 584 static const struct btf_kfunc_id_set common_kfunc_set = {

+6 -5

kernel/bpf/bpf_cgrp_storage.c

··· 161 161 void *, value, u64, flags, gfp_t, gfp_flags) 162 162 { 163 163 struct bpf_local_storage_data *sdata; 164 + bool nobusy; 164 165 165 166 WARN_ON_ONCE(!bpf_rcu_lock_held()); 166 167 if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) ··· 170 169 if (!cgroup) 171 170 return (unsigned long)NULL; 172 171 173 - if (!bpf_cgrp_storage_trylock()) 174 - return (unsigned long)NULL; 172 + nobusy = bpf_cgrp_storage_trylock(); 175 173 176 - sdata = cgroup_storage_lookup(cgroup, map, true); 174 + sdata = cgroup_storage_lookup(cgroup, map, nobusy); 177 175 if (sdata) 178 176 goto unlock; 179 177 180 178 /* only allocate new storage, when the cgroup is refcounted */ 181 179 if (!percpu_ref_is_dying(&cgroup->self.refcnt) && 182 - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) 180 + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) 183 181 sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, 184 182 value, BPF_NOEXIST, false, gfp_flags); 185 183 186 184 unlock: 187 - bpf_cgrp_storage_unlock(); 185 + if (nobusy) 186 + bpf_cgrp_storage_unlock(); 188 187 return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; 189 188 } 190 189

+6 -7

kernel/bpf/bpf_iter.c

··· 335 335 tinfo->btf_id = prog->aux->attach_btf_id; 336 336 } 337 337 338 - bool bpf_iter_prog_supported(struct bpf_prog *prog) 338 + int bpf_iter_prog_supported(struct bpf_prog *prog) 339 339 { 340 340 const char *attach_fname = prog->aux->attach_func_name; 341 341 struct bpf_iter_target_info *tinfo = NULL, *iter; ··· 344 344 int prefix_len = strlen(prefix); 345 345 346 346 if (strncmp(attach_fname, prefix, prefix_len)) 347 - return false; 347 + return -EINVAL; 348 348 349 349 mutex_lock(&targets_mutex); 350 350 list_for_each_entry(iter, &targets, list) { ··· 360 360 } 361 361 mutex_unlock(&targets_mutex); 362 362 363 - if (tinfo) { 364 - prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 365 - prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 366 - } 363 + if (!tinfo) 364 + return -EINVAL; 367 365 368 - return tinfo != NULL; 366 + return bpf_prog_ctx_arg_info_init(prog, tinfo->reg_info->ctx_arg_info, 367 + tinfo->reg_info->ctx_arg_info_size); 369 368 } 370 369 371 370 const struct bpf_func_proto *

+2

kernel/bpf/bpf_lsm.c

··· 316 316 BTF_ID(func, bpf_lsm_inode_mknod) 317 317 BTF_ID(func, bpf_lsm_inode_need_killpriv) 318 318 BTF_ID(func, bpf_lsm_inode_post_setxattr) 319 + BTF_ID(func, bpf_lsm_inode_post_removexattr) 319 320 BTF_ID(func, bpf_lsm_inode_readlink) 321 + BTF_ID(func, bpf_lsm_inode_removexattr) 320 322 BTF_ID(func, bpf_lsm_inode_rename) 321 323 BTF_ID(func, bpf_lsm_inode_rmdir) 322 324 BTF_ID(func, bpf_lsm_inode_setattr)

+75 -60

kernel/bpf/bpf_struct_ops.c

··· 146 146 } 147 147 148 148 #define MAYBE_NULL_SUFFIX "__nullable" 149 - #define MAX_STUB_NAME 128 150 - 151 - /* Return the type info of a stub function, if it exists. 152 - * 153 - * The name of a stub function is made up of the name of the struct_ops and 154 - * the name of the function pointer member, separated by "__". For example, 155 - * if the struct_ops type is named "foo_ops" and the function pointer 156 - * member is named "bar", the stub function name would be "foo_ops__bar". 157 - */ 158 - static const struct btf_type * 159 - find_stub_func_proto(const struct btf *btf, const char *st_op_name, 160 - const char *member_name) 161 - { 162 - char stub_func_name[MAX_STUB_NAME]; 163 - const struct btf_type *func_type; 164 - s32 btf_id; 165 - int cp; 166 - 167 - cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s", 168 - st_op_name, member_name); 169 - if (cp >= MAX_STUB_NAME) { 170 - pr_warn("Stub function name too long\n"); 171 - return NULL; 172 - } 173 - btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC); 174 - if (btf_id < 0) 175 - return NULL; 176 - func_type = btf_type_by_id(btf, btf_id); 177 - if (!func_type) 178 - return NULL; 179 - 180 - return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */ 181 - } 149 + #define REFCOUNTED_SUFFIX "__ref" 182 150 183 151 /* Prepare argument info for every nullable argument of a member of a 184 152 * struct_ops type. ··· 171 203 static int prepare_arg_info(struct btf *btf, 172 204 const char *st_ops_name, 173 205 const char *member_name, 174 - const struct btf_type *func_proto, 206 + const struct btf_type *func_proto, void *stub_func_addr, 175 207 struct bpf_struct_ops_arg_info *arg_info) 176 208 { 177 209 const struct btf_type *stub_func_proto, *pointed_type; 210 + bool is_nullable = false, is_refcounted = false; 178 211 const struct btf_param *stub_args, *args; 179 212 struct bpf_ctx_arg_aux *info, *info_buf; 180 213 u32 nargs, arg_no, info_cnt = 0; 214 + char ksym[KSYM_SYMBOL_LEN]; 215 + const char *stub_fname; 216 + const char *suffix; 217 + s32 stub_func_id; 181 218 u32 arg_btf_id; 182 219 int offset; 183 220 184 - stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name); 185 - if (!stub_func_proto) 186 - return 0; 221 + stub_fname = kallsyms_lookup((unsigned long)stub_func_addr, NULL, NULL, NULL, ksym); 222 + if (!stub_fname) { 223 + pr_warn("Cannot find the stub function name for the %s in struct %s\n", 224 + member_name, st_ops_name); 225 + return -ENOENT; 226 + } 227 + 228 + stub_func_id = btf_find_by_name_kind(btf, stub_fname, BTF_KIND_FUNC); 229 + if (stub_func_id < 0) { 230 + pr_warn("Cannot find the stub function %s in btf\n", stub_fname); 231 + return -ENOENT; 232 + } 233 + 234 + stub_func_proto = btf_type_by_id(btf, stub_func_id); 235 + stub_func_proto = btf_type_by_id(btf, stub_func_proto->type); 187 236 188 237 /* Check if the number of arguments of the stub function is the same 189 238 * as the number of arguments of the function pointer. 190 239 */ 191 240 nargs = btf_type_vlen(func_proto); 192 241 if (nargs != btf_type_vlen(stub_func_proto)) { 193 - pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n", 194 - st_ops_name, member_name, member_name, st_ops_name); 242 + pr_warn("the number of arguments of the stub function %s does not match the number of arguments of the member %s of struct %s\n", 243 + stub_fname, member_name, st_ops_name); 195 244 return -EINVAL; 196 245 } 197 246 ··· 226 241 info = info_buf; 227 242 for (arg_no = 0; arg_no < nargs; arg_no++) { 228 243 /* Skip arguments that is not suffixed with 229 - * "__nullable". 244 + * "__nullable or __ref". 230 245 */ 231 - if (!btf_param_match_suffix(btf, &stub_args[arg_no], 232 - MAYBE_NULL_SUFFIX)) 246 + is_nullable = btf_param_match_suffix(btf, &stub_args[arg_no], 247 + MAYBE_NULL_SUFFIX); 248 + is_refcounted = btf_param_match_suffix(btf, &stub_args[arg_no], 249 + REFCOUNTED_SUFFIX); 250 + 251 + if (is_nullable) 252 + suffix = MAYBE_NULL_SUFFIX; 253 + else if (is_refcounted) 254 + suffix = REFCOUNTED_SUFFIX; 255 + else 233 256 continue; 234 257 235 258 /* Should be a pointer to struct */ ··· 246 253 &arg_btf_id); 247 254 if (!pointed_type || 248 255 !btf_type_is_struct(pointed_type)) { 249 - pr_warn("stub function %s__%s has %s tagging to an unsupported type\n", 250 - st_ops_name, member_name, MAYBE_NULL_SUFFIX); 256 + pr_warn("stub function %s has %s tagging to an unsupported type\n", 257 + stub_fname, suffix); 251 258 goto err_out; 252 259 } 253 260 254 261 offset = btf_ctx_arg_offset(btf, func_proto, arg_no); 255 262 if (offset < 0) { 256 - pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n", 257 - st_ops_name, member_name, arg_no); 263 + pr_warn("stub function %s has an invalid trampoline ctx offset for arg#%u\n", 264 + stub_fname, arg_no); 258 265 goto err_out; 259 266 } 260 267 261 268 if (args[arg_no].type != stub_args[arg_no].type) { 262 - pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n", 263 - arg_no, st_ops_name, member_name); 269 + pr_warn("arg#%u type in stub function %s does not match with its original func_proto\n", 270 + arg_no, stub_fname); 264 271 goto err_out; 265 272 } 266 273 267 274 /* Fill the information of the new argument */ 268 - info->reg_type = 269 - PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL; 270 275 info->btf_id = arg_btf_id; 271 276 info->btf = btf; 272 277 info->offset = offset; 278 + if (is_nullable) { 279 + info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL; 280 + } else if (is_refcounted) { 281 + info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID; 282 + info->refcounted = true; 283 + } 273 284 274 285 info++; 275 286 info_cnt++; ··· 319 322 return false; 320 323 321 324 return !strcmp(btf_name_by_offset(btf, t->name_off), "module"); 325 + } 326 + 327 + int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) 328 + { 329 + void *func_ptr = *(void **)(st_ops->cfi_stubs + moff); 330 + 331 + return func_ptr ? 0 : -ENOTSUPP; 322 332 } 323 333 324 334 int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, ··· 390 386 st_ops_desc->value_type = btf_type_by_id(btf, value_id); 391 387 392 388 for_each_member(i, t, member) { 393 - const struct btf_type *func_proto; 389 + const struct btf_type *func_proto, *ret_type; 390 + void **stub_func_addr; 391 + u32 moff; 394 392 393 + moff = __btf_member_bit_offset(t, member) / 8; 395 394 mname = btf_name_by_offset(btf, member->name_off); 396 395 if (!*mname) { 397 396 pr_warn("anon member in struct %s is not supported\n", ··· 420 413 func_proto = btf_type_resolve_func_ptr(btf, 421 414 member->type, 422 415 NULL); 423 - if (!func_proto) 416 + 417 + /* The member is not a function pointer or 418 + * the function pointer is not supported. 419 + */ 420 + if (!func_proto || bpf_struct_ops_supported(st_ops, moff)) 424 421 continue; 422 + 423 + if (func_proto->type) { 424 + ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL); 425 + if (ret_type && !__btf_type_is_struct(ret_type)) { 426 + pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n", 427 + mname, st_ops->name); 428 + err = -EOPNOTSUPP; 429 + goto errout; 430 + } 431 + } 425 432 426 433 if (btf_distill_func_proto(log, btf, 427 434 func_proto, mname, ··· 446 425 goto errout; 447 426 } 448 427 428 + stub_func_addr = *(void **)(st_ops->cfi_stubs + moff); 449 429 err = prepare_arg_info(btf, st_ops->name, mname, 450 - func_proto, 430 + func_proto, stub_func_addr, 451 431 arg_info + i); 452 432 if (err) 453 433 goto errout; ··· 1172 1150 st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); 1173 1151 1174 1152 bpf_map_put(&st_map->map); 1175 - } 1176 - 1177 - int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) 1178 - { 1179 - void *func_ptr = *(void **)(st_ops->cfi_stubs + moff); 1180 - 1181 - return func_ptr ? 0 : -ENOTSUPP; 1182 1153 } 1183 1154 1184 1155 static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)

+113 -14

kernel/bpf/btf.c

··· 606 606 spin_unlock_bh(&btf_idr_lock); 607 607 return ret; 608 608 } 609 + EXPORT_SYMBOL_GPL(bpf_find_btf_id); 609 610 610 611 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, 611 612 u32 id, u32 *res_id) ··· 2576 2575 return -EINVAL; 2577 2576 } 2578 2577 2579 - if (btf_type_kflag(t)) { 2578 + if (btf_type_kflag(t) && !btf_type_is_type_tag(t)) { 2580 2579 btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); 2581 2580 return -EINVAL; 2582 2581 } ··· 3333 3332 u32 off, int sz, struct btf_field_info *info, u32 field_mask) 3334 3333 { 3335 3334 enum btf_field_type type; 3335 + const char *tag_value; 3336 + bool is_type_tag; 3336 3337 u32 res_id; 3337 3338 3338 3339 /* Permit modifiers on the pointer itself */ ··· 3344 3341 if (!btf_type_is_ptr(t)) 3345 3342 return BTF_FIELD_IGNORE; 3346 3343 t = btf_type_by_id(btf, t->type); 3347 - 3348 - if (!btf_type_is_type_tag(t)) 3344 + is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t); 3345 + if (!is_type_tag) 3349 3346 return BTF_FIELD_IGNORE; 3350 3347 /* Reject extra tags */ 3351 3348 if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) 3352 3349 return -EINVAL; 3353 - if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off))) 3350 + tag_value = __btf_name_by_offset(btf, t->name_off); 3351 + if (!strcmp("kptr_untrusted", tag_value)) 3354 3352 type = BPF_KPTR_UNREF; 3355 - else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) 3353 + else if (!strcmp("kptr", tag_value)) 3356 3354 type = BPF_KPTR_REF; 3357 - else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) 3355 + else if (!strcmp("percpu_kptr", tag_value)) 3358 3356 type = BPF_KPTR_PERCPU; 3359 - else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off))) 3357 + else if (!strcmp("uptr", tag_value)) 3360 3358 type = BPF_UPTR; 3361 3359 else 3362 3360 return -EINVAL; ··· 4948 4944 return -EINVAL; 4949 4945 } 4950 4946 4951 - if (btf_type_kflag(t)) { 4952 - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); 4953 - return -EINVAL; 4954 - } 4955 - 4956 4947 component_idx = btf_type_decl_tag(t)->component_idx; 4957 4948 if (component_idx < -1) { 4958 4949 btf_verifier_log_type(env, t, "Invalid component_idx"); ··· 6506 6507 /* rxrpc */ 6507 6508 { "rxrpc_recvdata", 0x1 }, 6508 6509 { "rxrpc_resend", 0x10 }, 6510 + { "rxrpc_tq", 0x10 }, 6511 + { "rxrpc_client", 0x1 }, 6509 6512 /* skb */ 6510 6513 {"kfree_skb", 0x1000}, 6511 6514 /* sunrpc */ ··· 6530 6529 { "mr_integ_alloc", 0x2000 }, 6531 6530 /* bpf_testmod */ 6532 6531 { "bpf_testmod_test_read", 0x0 }, 6532 + /* amdgpu */ 6533 + { "amdgpu_vm_bo_map", 0x1 }, 6534 + { "amdgpu_vm_bo_unmap", 0x1 }, 6535 + /* netfs */ 6536 + { "netfs_folioq", 0x1 }, 6537 + /* xfs from xfs_defer_pending_class */ 6538 + { "xfs_defer_create_intent", 0x1 }, 6539 + { "xfs_defer_cancel_list", 0x1 }, 6540 + { "xfs_defer_pending_finish", 0x1 }, 6541 + { "xfs_defer_pending_abort", 0x1 }, 6542 + { "xfs_defer_relog_intent", 0x1 }, 6543 + { "xfs_defer_isolate_paused", 0x1 }, 6544 + { "xfs_defer_item_pause", 0x1 }, 6545 + { "xfs_defer_item_unpause", 0x1 }, 6546 + /* xfs from xfs_defer_pending_item_class */ 6547 + { "xfs_defer_add_item", 0x1 }, 6548 + { "xfs_defer_cancel_item", 0x1 }, 6549 + { "xfs_defer_finish_item", 0x1 }, 6550 + /* xfs from xfs_icwalk_class */ 6551 + { "xfs_ioc_free_eofblocks", 0x10 }, 6552 + { "xfs_blockgc_free_space", 0x10 }, 6553 + /* xfs from xfs_btree_cur_class */ 6554 + { "xfs_btree_updkeys", 0x100 }, 6555 + { "xfs_btree_overlapped_query_range", 0x100 }, 6556 + /* xfs from xfs_imap_class*/ 6557 + { "xfs_map_blocks_found", 0x10000 }, 6558 + { "xfs_map_blocks_alloc", 0x10000 }, 6559 + { "xfs_iomap_alloc", 0x1000 }, 6560 + { "xfs_iomap_found", 0x1000 }, 6561 + /* xfs from xfs_fs_class */ 6562 + { "xfs_inodegc_flush", 0x1 }, 6563 + { "xfs_inodegc_push", 0x1 }, 6564 + { "xfs_inodegc_start", 0x1 }, 6565 + { "xfs_inodegc_stop", 0x1 }, 6566 + { "xfs_inodegc_queue", 0x1 }, 6567 + { "xfs_inodegc_throttle", 0x1 }, 6568 + { "xfs_fs_sync_fs", 0x1 }, 6569 + { "xfs_blockgc_start", 0x1 }, 6570 + { "xfs_blockgc_stop", 0x1 }, 6571 + { "xfs_blockgc_worker", 0x1 }, 6572 + { "xfs_blockgc_flush_all", 0x1 }, 6573 + /* xfs_scrub */ 6574 + { "xchk_nlinks_live_update", 0x10 }, 6575 + /* xfs_scrub from xchk_metapath_class */ 6576 + { "xchk_metapath_lookup", 0x100 }, 6577 + /* nfsd */ 6578 + { "nfsd_dirent", 0x1 }, 6579 + { "nfsd_file_acquire", 0x1001 }, 6580 + { "nfsd_file_insert_err", 0x1 }, 6581 + { "nfsd_file_cons_err", 0x1 }, 6582 + /* nfs4 */ 6583 + { "nfs4_setup_sequence", 0x1 }, 6584 + { "pnfs_update_layout", 0x10000 }, 6585 + { "nfs4_inode_callback_event", 0x200 }, 6586 + { "nfs4_inode_stateid_callback_event", 0x200 }, 6587 + /* nfs from pnfs_layout_event */ 6588 + { "pnfs_mds_fallback_pg_init_read", 0x10000 }, 6589 + { "pnfs_mds_fallback_pg_init_write", 0x10000 }, 6590 + { "pnfs_mds_fallback_pg_get_mirror_count", 0x10000 }, 6591 + { "pnfs_mds_fallback_read_done", 0x10000 }, 6592 + { "pnfs_mds_fallback_write_done", 0x10000 }, 6593 + { "pnfs_mds_fallback_read_pagelist", 0x10000 }, 6594 + { "pnfs_mds_fallback_write_pagelist", 0x10000 }, 6595 + /* coda */ 6596 + { "coda_dec_pic_run", 0x10 }, 6597 + { "coda_dec_pic_done", 0x10 }, 6598 + /* cfg80211 */ 6599 + { "cfg80211_scan_done", 0x11 }, 6600 + { "rdev_set_coalesce", 0x10 }, 6601 + { "cfg80211_report_wowlan_wakeup", 0x100 }, 6602 + { "cfg80211_inform_bss_frame", 0x100 }, 6603 + { "cfg80211_michael_mic_failure", 0x10000 }, 6604 + /* cfg80211 from wiphy_work_event */ 6605 + { "wiphy_work_queue", 0x10 }, 6606 + { "wiphy_work_run", 0x10 }, 6607 + { "wiphy_work_cancel", 0x10 }, 6608 + { "wiphy_work_flush", 0x10 }, 6609 + /* hugetlbfs */ 6610 + { "hugetlbfs_alloc_inode", 0x10 }, 6611 + /* spufs */ 6612 + { "spufs_context", 0x10 }, 6613 + /* kvm_hv */ 6614 + { "kvm_page_fault_enter", 0x100 }, 6615 + /* dpu */ 6616 + { "dpu_crtc_setup_mixer", 0x100 }, 6617 + /* binder */ 6618 + { "binder_transaction", 0x100 }, 6619 + /* bcachefs */ 6620 + { "btree_path_free", 0x100 }, 6621 + /* hfi1_tx */ 6622 + { "hfi1_sdma_progress", 0x1000 }, 6623 + /* iptfs */ 6624 + { "iptfs_ingress_postq_event", 0x1000 }, 6625 + /* neigh */ 6626 + { "neigh_update", 0x10 }, 6627 + /* snd_firewire_lib */ 6628 + { "amdtp_packet", 0x100 }, 6533 6629 }; 6534 6630 6535 6631 bool btf_ctx_access(int off, int size, enum bpf_access_type type, ··· 6777 6679 info->reg_type = ctx_arg_info->reg_type; 6778 6680 info->btf = ctx_arg_info->btf ? : btf_vmlinux; 6779 6681 info->btf_id = ctx_arg_info->btf_id; 6682 + info->ref_obj_id = ctx_arg_info->ref_obj_id; 6780 6683 return true; 6781 6684 } 6782 6685 } ··· 6844 6745 info->btf_id = t->type; 6845 6746 t = btf_type_by_id(btf, t->type); 6846 6747 6847 - if (btf_type_is_type_tag(t)) { 6748 + if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { 6848 6749 tag_value = __btf_name_by_offset(btf, t->name_off); 6849 6750 if (strcmp(tag_value, "user") == 0) 6850 6751 info->reg_type |= MEM_USER; ··· 7103 7004 7104 7005 /* check type tag */ 7105 7006 t = btf_type_by_id(btf, mtype->type); 7106 - if (btf_type_is_type_tag(t)) { 7007 + if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { 7107 7008 tag_value = __btf_name_by_offset(btf, t->name_off); 7108 7009 /* check __user tag */ 7109 7010 if (strcmp(tag_value, "user") == 0)

+25 -8

kernel/bpf/cgroup.c

··· 369 369 /* count number of elements in the list. 370 370 * it's slow but the list cannot be long 371 371 */ 372 - static u32 prog_list_length(struct hlist_head *head) 372 + static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt) 373 373 { 374 374 struct bpf_prog_list *pl; 375 375 u32 cnt = 0; ··· 377 377 hlist_for_each_entry(pl, head, node) { 378 378 if (!prog_list_prog(pl)) 379 379 continue; 380 + if (preorder_cnt && (pl->flags & BPF_F_PREORDER)) 381 + (*preorder_cnt)++; 380 382 cnt++; 381 383 } 382 384 return cnt; ··· 402 400 403 401 if (flags & BPF_F_ALLOW_MULTI) 404 402 return true; 405 - cnt = prog_list_length(&p->bpf.progs[atype]); 403 + cnt = prog_list_length(&p->bpf.progs[atype], NULL); 406 404 WARN_ON_ONCE(cnt > 1); 407 405 if (cnt == 1) 408 406 return !!(flags & BPF_F_ALLOW_OVERRIDE); ··· 425 423 struct bpf_prog_array *progs; 426 424 struct bpf_prog_list *pl; 427 425 struct cgroup *p = cgrp; 428 - int cnt = 0; 426 + int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart; 429 427 430 428 /* count number of effective programs by walking parents */ 431 429 do { 432 430 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 433 - cnt += prog_list_length(&p->bpf.progs[atype]); 431 + cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt); 434 432 p = cgroup_parent(p); 435 433 } while (p); 436 434 ··· 441 439 /* populate the array with effective progs */ 442 440 cnt = 0; 443 441 p = cgrp; 442 + fstart = preorder_cnt; 443 + bstart = preorder_cnt - 1; 444 444 do { 445 445 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 446 446 continue; 447 447 448 + init_bstart = bstart; 448 449 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { 449 450 if (!prog_list_prog(pl)) 450 451 continue; 451 452 452 - item = &progs->items[cnt]; 453 + if (pl->flags & BPF_F_PREORDER) { 454 + item = &progs->items[bstart]; 455 + bstart--; 456 + } else { 457 + item = &progs->items[fstart]; 458 + fstart++; 459 + } 453 460 item->prog = prog_list_prog(pl); 454 461 bpf_cgroup_storages_assign(item->cgroup_storage, 455 462 pl->storage); 456 463 cnt++; 457 464 } 465 + 466 + /* reverse pre-ordering progs at this cgroup level */ 467 + for (i = bstart + 1, j = init_bstart; i < j; i++, j--) 468 + swap(progs->items[i], progs->items[j]); 469 + 458 470 } while ((p = cgroup_parent(p))); 459 471 460 472 *array = progs; ··· 679 663 */ 680 664 return -EPERM; 681 665 682 - if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) 666 + if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS) 683 667 return -E2BIG; 684 668 685 669 pl = find_attach_entry(progs, prog, link, replace_prog, ··· 714 698 715 699 pl->prog = prog; 716 700 pl->link = link; 701 + pl->flags = flags; 717 702 bpf_cgroup_storages_assign(pl->storage, storage); 718 703 cgrp->bpf.flags[atype] = saved_flags; 719 704 ··· 1090 1073 lockdep_is_held(&cgroup_mutex)); 1091 1074 total_cnt += bpf_prog_array_length(effective); 1092 1075 } else { 1093 - total_cnt += prog_list_length(&cgrp->bpf.progs[atype]); 1076 + total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL); 1094 1077 } 1095 1078 } 1096 1079 ··· 1122 1105 u32 id; 1123 1106 1124 1107 progs = &cgrp->bpf.progs[atype]; 1125 - cnt = min_t(int, prog_list_length(progs), total_cnt); 1108 + cnt = min_t(int, prog_list_length(progs, NULL), total_cnt); 1126 1109 i = 0; 1127 1110 hlist_for_each_entry(pl, progs, node) { 1128 1111 prog = prog_list_prog(pl);

+107 -10

kernel/bpf/core.c

··· 1663 1663 INSN_3(JMP, JSET, K), \ 1664 1664 INSN_2(JMP, JA), \ 1665 1665 INSN_2(JMP32, JA), \ 1666 + /* Atomic operations. */ \ 1667 + INSN_3(STX, ATOMIC, B), \ 1668 + INSN_3(STX, ATOMIC, H), \ 1669 + INSN_3(STX, ATOMIC, W), \ 1670 + INSN_3(STX, ATOMIC, DW), \ 1666 1671 /* Store instructions. */ \ 1667 1672 /* Register based. */ \ 1668 1673 INSN_3(STX, MEM, B), \ 1669 1674 INSN_3(STX, MEM, H), \ 1670 1675 INSN_3(STX, MEM, W), \ 1671 1676 INSN_3(STX, MEM, DW), \ 1672 - INSN_3(STX, ATOMIC, W), \ 1673 - INSN_3(STX, ATOMIC, DW), \ 1674 1677 /* Immediate based. */ \ 1675 1678 INSN_3(ST, MEM, B), \ 1676 1679 INSN_3(ST, MEM, H), \ ··· 2155 2152 if (BPF_SIZE(insn->code) == BPF_W) \ 2156 2153 atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ 2157 2154 (DST + insn->off)); \ 2158 - else \ 2155 + else if (BPF_SIZE(insn->code) == BPF_DW) \ 2159 2156 atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ 2160 2157 (DST + insn->off)); \ 2158 + else \ 2159 + goto default_label; \ 2161 2160 break; \ 2162 2161 case BOP | BPF_FETCH: \ 2163 2162 if (BPF_SIZE(insn->code) == BPF_W) \ 2164 2163 SRC = (u32) atomic_fetch_##KOP( \ 2165 2164 (u32) SRC, \ 2166 2165 (atomic_t *)(unsigned long) (DST + insn->off)); \ 2167 - else \ 2166 + else if (BPF_SIZE(insn->code) == BPF_DW) \ 2168 2167 SRC = (u64) atomic64_fetch_##KOP( \ 2169 2168 (u64) SRC, \ 2170 2169 (atomic64_t *)(unsigned long) (DST + insn->off)); \ 2170 + else \ 2171 + goto default_label; \ 2171 2172 break; 2172 2173 2173 2174 STX_ATOMIC_DW: 2174 2175 STX_ATOMIC_W: 2176 + STX_ATOMIC_H: 2177 + STX_ATOMIC_B: 2175 2178 switch (IMM) { 2179 + /* Atomic read-modify-write instructions support only W and DW 2180 + * size modifiers. 2181 + */ 2176 2182 ATOMIC_ALU_OP(BPF_ADD, add) 2177 2183 ATOMIC_ALU_OP(BPF_AND, and) 2178 2184 ATOMIC_ALU_OP(BPF_OR, or) ··· 2193 2181 SRC = (u32) atomic_xchg( 2194 2182 (atomic_t *)(unsigned long) (DST + insn->off), 2195 2183 (u32) SRC); 2196 - else 2184 + else if (BPF_SIZE(insn->code) == BPF_DW) 2197 2185 SRC = (u64) atomic64_xchg( 2198 2186 (atomic64_t *)(unsigned long) (DST + insn->off), 2199 2187 (u64) SRC); 2188 + else 2189 + goto default_label; 2200 2190 break; 2201 2191 case BPF_CMPXCHG: 2202 2192 if (BPF_SIZE(insn->code) == BPF_W) 2203 2193 BPF_R0 = (u32) atomic_cmpxchg( 2204 2194 (atomic_t *)(unsigned long) (DST + insn->off), 2205 2195 (u32) BPF_R0, (u32) SRC); 2206 - else 2196 + else if (BPF_SIZE(insn->code) == BPF_DW) 2207 2197 BPF_R0 = (u64) atomic64_cmpxchg( 2208 2198 (atomic64_t *)(unsigned long) (DST + insn->off), 2209 2199 (u64) BPF_R0, (u64) SRC); 2200 + else 2201 + goto default_label; 2202 + break; 2203 + /* Atomic load and store instructions support all size 2204 + * modifiers. 2205 + */ 2206 + case BPF_LOAD_ACQ: 2207 + switch (BPF_SIZE(insn->code)) { 2208 + #define LOAD_ACQUIRE(SIZEOP, SIZE) \ 2209 + case BPF_##SIZEOP: \ 2210 + DST = (SIZE)smp_load_acquire( \ 2211 + (SIZE *)(unsigned long)(SRC + insn->off)); \ 2212 + break; 2213 + LOAD_ACQUIRE(B, u8) 2214 + LOAD_ACQUIRE(H, u16) 2215 + LOAD_ACQUIRE(W, u32) 2216 + #ifdef CONFIG_64BIT 2217 + LOAD_ACQUIRE(DW, u64) 2218 + #endif 2219 + #undef LOAD_ACQUIRE 2220 + default: 2221 + goto default_label; 2222 + } 2223 + break; 2224 + case BPF_STORE_REL: 2225 + switch (BPF_SIZE(insn->code)) { 2226 + #define STORE_RELEASE(SIZEOP, SIZE) \ 2227 + case BPF_##SIZEOP: \ 2228 + smp_store_release( \ 2229 + (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \ 2230 + break; 2231 + STORE_RELEASE(B, u8) 2232 + STORE_RELEASE(H, u16) 2233 + STORE_RELEASE(W, u32) 2234 + #ifdef CONFIG_64BIT 2235 + STORE_RELEASE(DW, u64) 2236 + #endif 2237 + #undef STORE_RELEASE 2238 + default: 2239 + goto default_label; 2240 + } 2210 2241 break; 2211 2242 2212 2243 default: ··· 2345 2290 insn->code = BPF_JMP | BPF_CALL_ARGS; 2346 2291 } 2347 2292 #endif 2348 - #else 2293 + #endif 2294 + 2349 2295 static unsigned int __bpf_prog_ret0_warn(const void *ctx, 2350 2296 const struct bpf_insn *insn) 2351 2297 { 2352 2298 /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON 2353 - * is not working properly, so warn about it! 2299 + * is not working properly, or interpreter is being used when 2300 + * prog->jit_requested is not 0, so warn about it! 2354 2301 */ 2355 2302 WARN_ON_ONCE(1); 2356 2303 return 0; 2357 2304 } 2358 - #endif 2359 2305 2360 2306 bool bpf_prog_map_compatible(struct bpf_map *map, 2361 2307 const struct bpf_prog *fp) ··· 2436 2380 { 2437 2381 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 2438 2382 u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); 2383 + u32 idx = (round_up(stack_depth, 32) / 32) - 1; 2439 2384 2440 - fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 2385 + /* may_goto may cause stack size > 512, leading to idx out-of-bounds. 2386 + * But for non-JITed programs, we don't need bpf_func, so no bounds 2387 + * check needed. 2388 + */ 2389 + if (!fp->jit_requested && 2390 + !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) { 2391 + fp->bpf_func = interpreters[idx]; 2392 + } else { 2393 + fp->bpf_func = __bpf_prog_ret0_warn; 2394 + } 2441 2395 #else 2442 2396 fp->bpf_func = __bpf_prog_ret0_warn; 2443 2397 #endif ··· 2972 2906 return NULL; 2973 2907 } 2974 2908 2909 + const struct bpf_func_proto * __weak bpf_get_perf_event_read_value_proto(void) 2910 + { 2911 + return NULL; 2912 + } 2913 + 2975 2914 u64 __weak 2976 2915 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 2977 2916 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) ··· 3127 3056 3128 3057 void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) 3129 3058 { 3059 + } 3060 + 3061 + bool __weak bpf_jit_supports_timed_may_goto(void) 3062 + { 3063 + return false; 3064 + } 3065 + 3066 + u64 __weak arch_bpf_timed_may_goto(void) 3067 + { 3068 + return 0; 3069 + } 3070 + 3071 + u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p) 3072 + { 3073 + u64 time = ktime_get_mono_fast_ns(); 3074 + 3075 + /* Populate the timestamp for this stack frame, and refresh count. */ 3076 + if (!p->timestamp) { 3077 + p->timestamp = time; 3078 + return BPF_MAX_TIMED_LOOPS; 3079 + } 3080 + /* Check if we've exhausted our time slice, and zero count. */ 3081 + if (time - p->timestamp >= (NSEC_PER_SEC / 4)) 3082 + return 0; 3083 + /* Refresh the count for the stack frame. */ 3084 + return BPF_MAX_TIMED_LOOPS; 3130 3085 } 3131 3086 3132 3087 /* for configs without MMU or 32-bit */

+53

kernel/bpf/cpumask.c

··· 45 45 * 46 46 * bpf_cpumask_create() allocates memory using the BPF memory allocator, and 47 47 * will not block. It may return NULL if no memory is available. 48 + * 49 + * Return: 50 + * * A pointer to a new struct bpf_cpumask instance on success. 51 + * * NULL if the BPF memory allocator is out of memory. 48 52 */ 49 53 __bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void) 50 54 { ··· 75 71 * Acquires a reference to a BPF cpumask. The cpumask returned by this function 76 72 * must either be embedded in a map as a kptr, or freed with 77 73 * bpf_cpumask_release(). 74 + * 75 + * Return: 76 + * * The struct bpf_cpumask pointer passed to the function. 77 + * 78 78 */ 79 79 __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) 80 80 { ··· 114 106 * 115 107 * Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask 116 108 * pointer may be safely passed to this function. 109 + * 110 + * Return: 111 + * * The index of the first nonzero bit in the struct cpumask. 117 112 */ 118 113 __bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask) 119 114 { ··· 130 119 * 131 120 * Find the index of the first unset bit of the cpumask. A struct bpf_cpumask 132 121 * pointer may be safely passed to this function. 122 + * 123 + * Return: 124 + * * The index of the first zero bit in the struct cpumask. 133 125 */ 134 126 __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) 135 127 { ··· 147 133 * 148 134 * Find the index of the first nonzero bit of the AND of two cpumasks. 149 135 * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. 136 + * 137 + * Return: 138 + * * The index of the first bit that is nonzero in both cpumask instances. 150 139 */ 151 140 __bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1, 152 141 const struct cpumask *src2) ··· 431 414 * @cpumask: The cpumask being queried. 432 415 * 433 416 * Count the number of set bits in the given cpumask. 417 + * 418 + * Return: 419 + * * The number of bits set in the mask. 434 420 */ 435 421 __bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) 436 422 { 437 423 return cpumask_weight(cpumask); 424 + } 425 + 426 + /** 427 + * bpf_cpumask_populate() - Populate the CPU mask from the contents of 428 + * a BPF memory region. 429 + * 430 + * @cpumask: The cpumask being populated. 431 + * @src: The BPF memory holding the bit pattern. 432 + * @src__sz: Length of the BPF memory region in bytes. 433 + * 434 + * Return: 435 + * * 0 if the struct cpumask * instance was populated successfully. 436 + * * -EACCES if the memory region is too small to populate the cpumask. 437 + * * -EINVAL if the memory region is not aligned to the size of a long 438 + * and the architecture does not support efficient unaligned accesses. 439 + */ 440 + __bpf_kfunc int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) 441 + { 442 + unsigned long source = (unsigned long)src; 443 + 444 + /* The memory region must be large enough to populate the entire CPU mask. */ 445 + if (src__sz < bitmap_size(nr_cpu_ids)) 446 + return -EACCES; 447 + 448 + /* If avoiding unaligned accesses, the input region must be aligned to the nearest long. */ 449 + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && 450 + !IS_ALIGNED(source, sizeof(long))) 451 + return -EINVAL; 452 + 453 + bitmap_copy(cpumask_bits(cpumask), src, nr_cpu_ids); 454 + 455 + return 0; 438 456 } 439 457 440 458 __bpf_kfunc_end_defs(); ··· 500 448 BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) 501 449 BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) 502 450 BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) 451 + BTF_ID_FLAGS(func, bpf_cpumask_populate, KF_RCU) 503 452 BTF_KFUNCS_END(cpumask_kfunc_btf_ids) 504 453 505 454 static const struct btf_kfunc_id_set cpumask_kfunc_set = {

+14 -2

kernel/bpf/disasm.c

··· 202 202 insn->dst_reg, class == BPF_ALU ? 'w' : 'r', 203 203 insn->dst_reg); 204 204 } else if (is_addr_space_cast(insn)) { 205 - verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n", 205 + verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %u, %u)\n", 206 206 insn->code, insn->dst_reg, 207 207 insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm); 208 208 } else if (is_mov_percpu_addr(insn)) { ··· 265 265 verbose(cbs->private_data, "(%02x) r%d = atomic%s_xchg((%s *)(r%d %+d), r%d)\n", 266 266 insn->code, insn->src_reg, 267 267 BPF_SIZE(insn->code) == BPF_DW ? "64" : "", 268 + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 269 + insn->dst_reg, insn->off, insn->src_reg); 270 + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && 271 + insn->imm == BPF_LOAD_ACQ) { 272 + verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n", 273 + insn->code, insn->dst_reg, 274 + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 275 + insn->src_reg, insn->off); 276 + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && 277 + insn->imm == BPF_STORE_REL) { 278 + verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n", 279 + insn->code, 268 280 bpf_ldst_string[BPF_SIZE(insn->code) >> 3], 269 281 insn->dst_reg, insn->off, insn->src_reg); 270 282 } else { ··· 381 369 insn->code, class == BPF_JMP32 ? 'w' : 'r', 382 370 insn->dst_reg, 383 371 bpf_jmp_string[BPF_OP(insn->code) >> 4], 384 - insn->imm, insn->off); 372 + (u32)insn->imm, insn->off); 385 373 } 386 374 } else { 387 375 verbose(cbs->private_data, "(%02x) %s\n",

+6 -3

kernel/bpf/hashtab.c

··· 198 198 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 199 199 void __percpu *pptr) 200 200 { 201 - *(void __percpu **)(l->key + key_size) = pptr; 201 + *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr; 202 202 } 203 203 204 204 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 205 205 { 206 - return *(void __percpu **)(l->key + key_size); 206 + return *(void __percpu **)(l->key + roundup(key_size, 8)); 207 207 } 208 208 209 209 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) ··· 787 787 static void check_and_free_fields(struct bpf_htab *htab, 788 788 struct htab_elem *elem) 789 789 { 790 + if (IS_ERR_OR_NULL(htab->map.record)) 791 + return; 792 + 790 793 if (htab_is_percpu(htab)) { 791 794 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 792 795 int cpu; ··· 2357 2354 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2358 2355 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2359 2356 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2360 - offsetof(struct htab_elem, key) + map->key_size); 2357 + offsetof(struct htab_elem, key) + roundup(map->key_size, 8)); 2361 2358 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2362 2359 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2363 2360

+119 -4

kernel/bpf/helpers.c

··· 1758 1758 .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, 1759 1759 }; 1760 1760 1761 - BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, 1762 - u32, offset, u64, flags) 1761 + static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, 1762 + u32 offset, u64 flags) 1763 1763 { 1764 1764 enum bpf_dynptr_type type; 1765 1765 int err; ··· 1792 1792 } 1793 1793 } 1794 1794 1795 + BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, 1796 + u32, offset, u64, flags) 1797 + { 1798 + return __bpf_dynptr_read(dst, len, src, offset, flags); 1799 + } 1800 + 1795 1801 static const struct bpf_func_proto bpf_dynptr_read_proto = { 1796 1802 .func = bpf_dynptr_read, 1797 1803 .gpl_only = false, ··· 1809 1803 .arg5_type = ARG_ANYTHING, 1810 1804 }; 1811 1805 1812 - BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, 1813 - u32, len, u64, flags) 1806 + static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, 1807 + u32 len, u64 flags) 1814 1808 { 1815 1809 enum bpf_dynptr_type type; 1816 1810 int err; ··· 1846 1840 WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type); 1847 1841 return -EFAULT; 1848 1842 } 1843 + } 1844 + 1845 + BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, 1846 + u32, len, u64, flags) 1847 + { 1848 + return __bpf_dynptr_write(dst, offset, src, len, flags); 1849 1849 } 1850 1850 1851 1851 static const struct bpf_func_proto bpf_dynptr_write_proto = { ··· 2055 2043 return &bpf_task_pt_regs_proto; 2056 2044 case BPF_FUNC_trace_vprintk: 2057 2045 return bpf_get_trace_vprintk_proto(); 2046 + case BPF_FUNC_perf_event_read_value: 2047 + return bpf_get_perf_event_read_value_proto(); 2058 2048 default: 2059 2049 return NULL; 2060 2050 } ··· 2771 2757 return 0; 2772 2758 } 2773 2759 2760 + /** 2761 + * bpf_dynptr_copy() - Copy data from one dynptr to another. 2762 + * @dst_ptr: Destination dynptr - where data should be copied to 2763 + * @dst_off: Offset into the destination dynptr 2764 + * @src_ptr: Source dynptr - where data should be copied from 2765 + * @src_off: Offset into the source dynptr 2766 + * @size: Length of the data to copy from source to destination 2767 + * 2768 + * Copies data from source dynptr to destination dynptr. 2769 + * Returns 0 on success; negative error, otherwise. 2770 + */ 2771 + __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, 2772 + struct bpf_dynptr *src_ptr, u32 src_off, u32 size) 2773 + { 2774 + struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; 2775 + struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; 2776 + void *src_slice, *dst_slice; 2777 + char buf[256]; 2778 + u32 off; 2779 + 2780 + src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); 2781 + dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); 2782 + 2783 + if (src_slice && dst_slice) { 2784 + memmove(dst_slice, src_slice, size); 2785 + return 0; 2786 + } 2787 + 2788 + if (src_slice) 2789 + return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0); 2790 + 2791 + if (dst_slice) 2792 + return __bpf_dynptr_read(dst_slice, size, src, src_off, 0); 2793 + 2794 + if (bpf_dynptr_check_off_len(dst, dst_off, size) || 2795 + bpf_dynptr_check_off_len(src, src_off, size)) 2796 + return -E2BIG; 2797 + 2798 + off = 0; 2799 + while (off < size) { 2800 + u32 chunk_sz = min_t(u32, sizeof(buf), size - off); 2801 + int err; 2802 + 2803 + err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); 2804 + if (err) 2805 + return err; 2806 + err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0); 2807 + if (err) 2808 + return err; 2809 + 2810 + off += chunk_sz; 2811 + } 2812 + return 0; 2813 + } 2814 + 2774 2815 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) 2775 2816 { 2776 2817 return obj; ··· 3135 3066 return ret + 1; 3136 3067 } 3137 3068 3069 + /** 3070 + * bpf_copy_from_user_task_str() - Copy a string from an task's address space 3071 + * @dst: Destination address, in kernel space. This buffer must be 3072 + * at least @dst__sz bytes long. 3073 + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. 3074 + * @unsafe_ptr__ign: Source address in the task's address space. 3075 + * @tsk: The task whose address space will be used 3076 + * @flags: The only supported flag is BPF_F_PAD_ZEROS 3077 + * 3078 + * Copies a NUL terminated string from a task's address space to @dst__sz 3079 + * buffer. If user string is too long this will still ensure zero termination 3080 + * in the @dst__sz buffer unless buffer size is 0. 3081 + * 3082 + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success 3083 + * and memset all of @dst__sz on failure. 3084 + * 3085 + * Return: The number of copied bytes on success including the NUL terminator. 3086 + * A negative error code on failure. 3087 + */ 3088 + __bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, 3089 + const void __user *unsafe_ptr__ign, 3090 + struct task_struct *tsk, u64 flags) 3091 + { 3092 + int ret; 3093 + 3094 + if (unlikely(flags & ~BPF_F_PAD_ZEROS)) 3095 + return -EINVAL; 3096 + 3097 + if (unlikely(dst__sz == 0)) 3098 + return 0; 3099 + 3100 + ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0); 3101 + if (ret < 0) { 3102 + if (flags & BPF_F_PAD_ZEROS) 3103 + memset(dst, 0, dst__sz); 3104 + return ret; 3105 + } 3106 + 3107 + if (flags & BPF_F_PAD_ZEROS) 3108 + memset(dst + ret, 0, dst__sz - ret); 3109 + 3110 + return ret + 1; 3111 + } 3112 + 3138 3113 /* Keep unsinged long in prototype so that kfunc is usable when emitted to 3139 3114 * vmlinux.h in BPF programs directly, but note that while in BPF prog, the 3140 3115 * unsigned long always points to 8-byte region on stack, the kernel may only ··· 3274 3161 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) 3275 3162 BTF_ID_FLAGS(func, bpf_dynptr_size) 3276 3163 BTF_ID_FLAGS(func, bpf_dynptr_clone) 3164 + BTF_ID_FLAGS(func, bpf_dynptr_copy) 3277 3165 #ifdef CONFIG_NET 3278 3166 BTF_ID_FLAGS(func, bpf_modify_return_test_tp) 3279 3167 #endif ··· 3287 3173 BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) 3288 3174 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) 3289 3175 BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) 3176 + BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE) 3290 3177 BTF_ID_FLAGS(func, bpf_get_kmem_cache) 3291 3178 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) 3292 3179 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)

+1

kernel/bpf/preload/bpf_preload_kern.c

··· 90 90 late_initcall(load); 91 91 module_exit(fini); 92 92 MODULE_LICENSE("GPL"); 93 + MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs");

+33 -15

kernel/bpf/syscall.c

··· 1315 1315 1316 1316 #define BPF_MAP_CREATE_LAST_FIELD map_token_fd 1317 1317 /* called via syscall */ 1318 - static int map_create(union bpf_attr *attr) 1318 + static int map_create(union bpf_attr *attr, bool kernel) 1319 1319 { 1320 1320 const struct bpf_map_ops *ops; 1321 1321 struct bpf_token *token = NULL; ··· 1505 1505 attr->btf_vmlinux_value_type_id; 1506 1506 } 1507 1507 1508 - err = security_bpf_map_create(map, attr, token); 1508 + err = security_bpf_map_create(map, attr, token, kernel); 1509 1509 if (err) 1510 1510 goto free_map_sec; 1511 1511 ··· 1593 1593 1594 1594 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 1595 1595 { 1596 - spin_lock_bh(&map_idr_lock); 1597 - map = __bpf_map_inc_not_zero(map, false); 1598 - spin_unlock_bh(&map_idr_lock); 1599 - 1600 - return map; 1596 + lockdep_assert(rcu_read_lock_held()); 1597 + return __bpf_map_inc_not_zero(map, false); 1601 1598 } 1602 1599 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 1603 1600 ··· 2311 2314 kvfree(prog->aux->jited_linfo); 2312 2315 kvfree(prog->aux->linfo); 2313 2316 kfree(prog->aux->kfunc_tab); 2317 + kfree(prog->aux->ctx_arg_info); 2314 2318 if (prog->aux->attach_btf) 2315 2319 btf_put(prog->aux->attach_btf); 2316 2320 ··· 2942 2944 if (err < 0) 2943 2945 goto free_prog; 2944 2946 2945 - err = security_bpf_prog_load(prog, attr, token); 2947 + err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel); 2946 2948 if (err) 2947 2949 goto free_prog_sec; 2948 2950 ··· 4167 4169 #define BPF_F_ATTACH_MASK_BASE \ 4168 4170 (BPF_F_ALLOW_OVERRIDE | \ 4169 4171 BPF_F_ALLOW_MULTI | \ 4170 - BPF_F_REPLACE) 4172 + BPF_F_REPLACE | \ 4173 + BPF_F_PREORDER) 4171 4174 4172 4175 #define BPF_F_ATTACH_MASK_MPROG \ 4173 4176 (BPF_F_REPLACE | \ ··· 4732 4733 info.recursion_misses = stats.misses; 4733 4734 4734 4735 info.verified_insns = prog->aux->verified_insns; 4736 + if (prog->aux->btf) 4737 + info.btf_id = btf_obj_id(prog->aux->btf); 4735 4738 4736 4739 if (!bpf_capable()) { 4737 4740 info.jited_prog_len = 0; ··· 4880 4879 } 4881 4880 } 4882 4881 4883 - if (prog->aux->btf) 4884 - info.btf_id = btf_obj_id(prog->aux->btf); 4885 4882 info.attach_btf_id = prog->aux->attach_btf_id; 4886 4883 if (attach_btf) 4887 4884 info.attach_btf_obj_id = btf_obj_id(attach_btf); ··· 5120 5121 return btf_new_fd(attr, uattr, uattr_size); 5121 5122 } 5122 5123 5123 - #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 5124 + #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd 5124 5125 5125 5126 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 5126 5127 { 5128 + struct bpf_token *token = NULL; 5129 + 5127 5130 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) 5128 5131 return -EINVAL; 5129 5132 5130 - if (!capable(CAP_SYS_ADMIN)) 5133 + if (attr->open_flags & ~BPF_F_TOKEN_FD) 5134 + return -EINVAL; 5135 + 5136 + if (attr->open_flags & BPF_F_TOKEN_FD) { 5137 + token = bpf_token_get_from_fd(attr->fd_by_id_token_fd); 5138 + if (IS_ERR(token)) 5139 + return PTR_ERR(token); 5140 + if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) { 5141 + bpf_token_put(token); 5142 + token = NULL; 5143 + } 5144 + } 5145 + 5146 + if (!bpf_token_capable(token, CAP_SYS_ADMIN)) { 5147 + bpf_token_put(token); 5131 5148 return -EPERM; 5149 + } 5150 + 5151 + bpf_token_put(token); 5132 5152 5133 5153 return btf_get_fd_by_id(attr->btf_id); 5134 5154 } ··· 5786 5768 if (copy_from_bpfptr(&attr, uattr, size) != 0) 5787 5769 return -EFAULT; 5788 5770 5789 - err = security_bpf(cmd, &attr, size); 5771 + err = security_bpf(cmd, &attr, size, uattr.is_kernel); 5790 5772 if (err < 0) 5791 5773 return err; 5792 5774 5793 5775 switch (cmd) { 5794 5776 case BPF_MAP_CREATE: 5795 - err = map_create(&attr); 5777 + err = map_create(&attr, uattr.is_kernel); 5796 5778 break; 5797 5779 case BPF_MAP_LOOKUP_ELEM: 5798 5780 err = map_lookup_elem(&attr);

+1035 -325

kernel/bpf/verifier.c

··· 579 579 insn->imm == BPF_CMPXCHG; 580 580 } 581 581 582 + static bool is_atomic_load_insn(const struct bpf_insn *insn) 583 + { 584 + return BPF_CLASS(insn->code) == BPF_STX && 585 + BPF_MODE(insn->code) == BPF_ATOMIC && 586 + insn->imm == BPF_LOAD_ACQ; 587 + } 588 + 582 589 static int __get_spi(s32 off) 583 590 { 584 591 return (-off - 1) / BPF_REG_SIZE; ··· 1552 1545 return; 1553 1546 } 1554 1547 1548 + static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id) 1549 + { 1550 + int i; 1551 + 1552 + for (i = 0; i < state->acquired_refs; i++) 1553 + if (state->refs[i].id == ptr_id) 1554 + return true; 1555 + 1556 + return false; 1557 + } 1558 + 1555 1559 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) 1556 1560 { 1557 1561 int i; ··· 1618 1600 return NULL; 1619 1601 } 1620 1602 1603 + static void update_peak_states(struct bpf_verifier_env *env) 1604 + { 1605 + u32 cur_states; 1606 + 1607 + cur_states = env->explored_states_size + env->free_list_size; 1608 + env->peak_states = max(env->peak_states, cur_states); 1609 + } 1610 + 1621 1611 static void free_func_state(struct bpf_func_state *state) 1622 1612 { 1623 1613 if (!state) ··· 1646 1620 kfree(state->refs); 1647 1621 if (free_self) 1648 1622 kfree(state); 1623 + } 1624 + 1625 + /* struct bpf_verifier_state->{parent,loop_entry} refer to states 1626 + * that are in either of env->{expored_states,free_list}. 1627 + * In both cases the state is contained in struct bpf_verifier_state_list. 1628 + */ 1629 + static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) 1630 + { 1631 + if (st->parent) 1632 + return container_of(st->parent, struct bpf_verifier_state_list, state); 1633 + return NULL; 1634 + } 1635 + 1636 + static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st) 1637 + { 1638 + if (st->loop_entry) 1639 + return container_of(st->loop_entry, struct bpf_verifier_state_list, state); 1640 + return NULL; 1641 + } 1642 + 1643 + /* A state can be freed if it is no longer referenced: 1644 + * - is in the env->free_list; 1645 + * - has no children states; 1646 + * - is not used as loop_entry. 1647 + * 1648 + * Freeing a state can make it's loop_entry free-able. 1649 + */ 1650 + static void maybe_free_verifier_state(struct bpf_verifier_env *env, 1651 + struct bpf_verifier_state_list *sl) 1652 + { 1653 + struct bpf_verifier_state_list *loop_entry_sl; 1654 + 1655 + while (sl && sl->in_free_list && 1656 + sl->state.branches == 0 && 1657 + sl->state.used_as_loop_entry == 0) { 1658 + loop_entry_sl = state_loop_entry_as_list(&sl->state); 1659 + if (loop_entry_sl) 1660 + loop_entry_sl->state.used_as_loop_entry--; 1661 + list_del(&sl->node); 1662 + free_verifier_state(&sl->state, false); 1663 + kfree(sl); 1664 + env->free_list_size--; 1665 + sl = loop_entry_sl; 1666 + } 1649 1667 } 1650 1668 1651 1669 /* copy verifier state from src to dst growing dst stack space ··· 1731 1661 dst_state->callback_unroll_depth = src->callback_unroll_depth; 1732 1662 dst_state->used_as_loop_entry = src->used_as_loop_entry; 1733 1663 dst_state->may_goto_depth = src->may_goto_depth; 1664 + dst_state->loop_entry = src->loop_entry; 1734 1665 for (i = 0; i <= src->curframe; i++) { 1735 1666 dst = dst_state->frame[i]; 1736 1667 if (!dst) { ··· 1752 1681 return env->prog->len; 1753 1682 } 1754 1683 1755 - static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) 1684 + static struct list_head *explored_state(struct bpf_verifier_env *env, int idx) 1756 1685 { 1757 1686 struct bpf_verifier_state *cur = env->cur_state; 1758 1687 struct bpf_func_state *state = cur->frame[cur->curframe]; ··· 1860 1789 * # Find outermost loop entry known for n 1861 1790 * def get_loop_entry(n): 1862 1791 * h = entries.get(n, None) 1863 - * while h in entries and entries[h] != h: 1792 + * while h in entries: 1864 1793 * h = entries[h] 1865 1794 * return h 1866 1795 * 1867 - * # Update n's loop entry if h's outermost entry comes 1868 - * # before n's outermost entry in current DFS path. 1796 + * # Update n's loop entry if h comes before n in current DFS path. 1869 1797 * def update_loop_entry(n, h): 1870 - * n1 = get_loop_entry(n) or n 1871 - * h1 = get_loop_entry(h) or h 1872 - * if h1 in path and depths[h1] <= depths[n1]: 1798 + * if h in path and depths[entries.get(n, n)] < depths[n]: 1873 1799 * entries[n] = h1 1874 1800 * 1875 1801 * def dfs(n, depth): ··· 1878 1810 * # Case A: explore succ and update cur's loop entry 1879 1811 * # only if succ's entry is in current DFS path. 1880 1812 * dfs(succ, depth + 1) 1881 - * h = get_loop_entry(succ) 1813 + * h = entries.get(succ, None) 1882 1814 * update_loop_entry(n, h) 1883 1815 * else: 1884 1816 * # Case B or C depending on `h1 in path` check in update_loop_entry(). ··· 1890 1822 * and cur's loop entry has to be updated (case A), handle this in 1891 1823 * update_branch_counts(); 1892 1824 * - use st->branch > 0 as a signal that st is in the current DFS path; 1893 - * - handle cases B and C in is_state_visited(); 1894 - * - update topmost loop entry for intermediate states in get_loop_entry(). 1825 + * - handle cases B and C in is_state_visited(). 1895 1826 */ 1896 - static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) 1827 + static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, 1828 + struct bpf_verifier_state *st) 1897 1829 { 1898 - struct bpf_verifier_state *topmost = st->loop_entry, *old; 1830 + struct bpf_verifier_state *topmost = st->loop_entry; 1831 + u32 steps = 0; 1899 1832 1900 - while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) 1833 + while (topmost && topmost->loop_entry) { 1834 + if (steps++ > st->dfs_depth) { 1835 + WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); 1836 + verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); 1837 + return ERR_PTR(-EFAULT); 1838 + } 1901 1839 topmost = topmost->loop_entry; 1902 - /* Update loop entries for intermediate states to avoid this 1903 - * traversal in future get_loop_entry() calls. 1904 - */ 1905 - while (st && st->loop_entry != topmost) { 1906 - old = st->loop_entry; 1907 - st->loop_entry = topmost; 1908 - st = old; 1909 1840 } 1910 1841 return topmost; 1911 1842 } 1912 1843 1913 - static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) 1844 + static void update_loop_entry(struct bpf_verifier_env *env, 1845 + struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) 1914 1846 { 1915 - struct bpf_verifier_state *cur1, *hdr1; 1916 - 1917 - cur1 = get_loop_entry(cur) ?: cur; 1918 - hdr1 = get_loop_entry(hdr) ?: hdr; 1919 - /* The head1->branches check decides between cases B and C in 1920 - * comment for get_loop_entry(). If hdr1->branches == 0 then 1847 + /* The hdr->branches check decides between cases B and C in 1848 + * comment for get_loop_entry(). If hdr->branches == 0 then 1921 1849 * head's topmost loop entry is not in current DFS path, 1922 1850 * hence 'cur' and 'hdr' are not in the same loop and there is 1923 1851 * no need to update cur->loop_entry. 1924 1852 */ 1925 - if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { 1853 + if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { 1854 + if (cur->loop_entry) { 1855 + cur->loop_entry->used_as_loop_entry--; 1856 + maybe_free_verifier_state(env, state_loop_entry_as_list(cur)); 1857 + } 1926 1858 cur->loop_entry = hdr; 1927 - hdr->used_as_loop_entry = true; 1859 + hdr->used_as_loop_entry++; 1928 1860 } 1929 1861 } 1930 1862 1931 1863 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 1932 1864 { 1865 + struct bpf_verifier_state_list *sl = NULL, *parent_sl; 1866 + struct bpf_verifier_state *parent; 1867 + 1933 1868 while (st) { 1934 1869 u32 br = --st->branches; 1935 1870 ··· 1942 1871 * This is a part of 'case A' in get_loop_entry() comment. 1943 1872 */ 1944 1873 if (br == 0 && st->parent && st->loop_entry) 1945 - update_loop_entry(st->parent, st->loop_entry); 1874 + update_loop_entry(env, st->parent, st->loop_entry); 1946 1875 1947 1876 /* WARN_ON(br > 1) technically makes sense here, 1948 1877 * but see comment in push_stack(), hence: ··· 1952 1881 br); 1953 1882 if (br) 1954 1883 break; 1955 - st = st->parent; 1884 + parent = st->parent; 1885 + parent_sl = state_parent_as_list(st); 1886 + if (sl) 1887 + maybe_free_verifier_state(env, sl); 1888 + st = parent; 1889 + sl = parent_sl; 1956 1890 } 1957 1891 } 1958 1892 ··· 3282 3206 return res ? &res->func_model : NULL; 3283 3207 } 3284 3208 3209 + static int add_kfunc_in_insns(struct bpf_verifier_env *env, 3210 + struct bpf_insn *insn, int cnt) 3211 + { 3212 + int i, ret; 3213 + 3214 + for (i = 0; i < cnt; i++, insn++) { 3215 + if (bpf_pseudo_kfunc_call(insn)) { 3216 + ret = add_kfunc_call(env, insn->imm, insn->off); 3217 + if (ret < 0) 3218 + return ret; 3219 + } 3220 + } 3221 + return 0; 3222 + } 3223 + 3285 3224 static int add_subprog_and_kfunc(struct bpf_verifier_env *env) 3286 3225 { 3287 3226 struct bpf_subprog_info *subprog = env->subprog_info; ··· 3360 3269 return 0; 3361 3270 } 3362 3271 3272 + static int jmp_offset(struct bpf_insn *insn) 3273 + { 3274 + u8 code = insn->code; 3275 + 3276 + if (code == (BPF_JMP32 | BPF_JA)) 3277 + return insn->imm; 3278 + return insn->off; 3279 + } 3280 + 3363 3281 static int check_subprogs(struct bpf_verifier_env *env) 3364 3282 { 3365 3283 int i, subprog_start, subprog_end, off, cur_subprog = 0; ··· 3395 3295 goto next; 3396 3296 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) 3397 3297 goto next; 3398 - if (code == (BPF_JMP32 | BPF_JA)) 3399 - off = i + insn[i].imm + 1; 3400 - else 3401 - off = i + insn[i].off + 1; 3298 + off = i + jmp_offset(&insn[i]) + 1; 3402 3299 if (off < subprog_start || off >= subprog_end) { 3403 3300 verbose(env, "jump out of range from insn %d to %d\n", i, off); 3404 3301 return -EINVAL; ··· 3580 3483 } 3581 3484 3582 3485 if (class == BPF_STX) { 3583 - /* BPF_STX (including atomic variants) has multiple source 3486 + /* BPF_STX (including atomic variants) has one or more source 3584 3487 * operands, one of which is a ptr. Check whether the caller is 3585 3488 * asking about it. 3586 3489 */ ··· 3925 3828 return btf_name_by_offset(desc_btf, func->name_off); 3926 3829 } 3927 3830 3831 + static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn) 3832 + { 3833 + const struct bpf_insn_cbs cbs = { 3834 + .cb_call = disasm_kfunc_name, 3835 + .cb_print = verbose, 3836 + .private_data = env, 3837 + }; 3838 + 3839 + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 3840 + } 3841 + 3928 3842 static inline void bt_init(struct backtrack_state *bt, u32 frame) 3929 3843 { 3930 3844 bt->frame = frame; ··· 4136 4028 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, 4137 4029 struct bpf_insn_hist_entry *hist, struct backtrack_state *bt) 4138 4030 { 4139 - const struct bpf_insn_cbs cbs = { 4140 - .cb_call = disasm_kfunc_name, 4141 - .cb_print = verbose, 4142 - .private_data = env, 4143 - }; 4144 4031 struct bpf_insn *insn = env->prog->insnsi + idx; 4145 4032 u8 class = BPF_CLASS(insn->code); 4146 4033 u8 opcode = BPF_OP(insn->code); ··· 4153 4050 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); 4154 4051 verbose(env, "stack=%s before ", env->tmp_str_buf); 4155 4052 verbose(env, "%d: ", idx); 4156 - print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 4053 + verbose_insn(env, insn); 4157 4054 } 4158 4055 4159 4056 /* If there is a history record that some registers gained range at this insn, ··· 4200 4097 * dreg still needs precision before this insn 4201 4098 */ 4202 4099 } 4203 - } else if (class == BPF_LDX) { 4100 + } else if (class == BPF_LDX || is_atomic_load_insn(insn)) { 4204 4101 if (!bt_is_reg_set(bt, dreg)) 4205 4102 return 0; 4206 4103 bt_clear_reg(bt, dreg); ··· 6085 5982 6086 5983 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 6087 5984 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 6088 - enum bpf_access_type t, enum bpf_reg_type *reg_type, 6089 - struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx) 5985 + enum bpf_access_type t, struct bpf_insn_access_aux *info) 6090 5986 { 6091 - struct bpf_insn_access_aux info = { 6092 - .reg_type = *reg_type, 6093 - .log = &env->log, 6094 - .is_retval = false, 6095 - .is_ldsx = is_ldsx, 6096 - }; 6097 - 6098 5987 if (env->ops->is_valid_access && 6099 - env->ops->is_valid_access(off, size, t, env->prog, &info)) { 5988 + env->ops->is_valid_access(off, size, t, env->prog, info)) { 6100 5989 /* A non zero info.ctx_field_size indicates that this field is a 6101 5990 * candidate for later verifier transformation to load the whole 6102 5991 * field and then apply a mask when accessed with a narrower ··· 6096 6001 * will only allow for whole field access and rejects any other 6097 6002 * type of narrower access. 6098 6003 */ 6099 - *reg_type = info.reg_type; 6100 - *is_retval = info.is_retval; 6101 - 6102 - if (base_type(*reg_type) == PTR_TO_BTF_ID) { 6103 - *btf = info.btf; 6104 - *btf_id = info.btf_id; 6004 + if (base_type(info->reg_type) == PTR_TO_BTF_ID) { 6005 + if (info->ref_obj_id && 6006 + !find_reference_state(env->cur_state, info->ref_obj_id)) { 6007 + verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n", 6008 + off); 6009 + return -EACCES; 6010 + } 6105 6011 } else { 6106 - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; 6012 + env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size; 6107 6013 } 6108 6014 /* remember the offset of last byte accessed in ctx */ 6109 6015 if (env->prog->aux->max_ctx_offset < off + size) ··· 6212 6116 const struct bpf_reg_state *reg = reg_state(env, regno); 6213 6117 6214 6118 return reg->type == PTR_TO_ARENA; 6119 + } 6120 + 6121 + /* Return false if @regno contains a pointer whose type isn't supported for 6122 + * atomic instruction @insn. 6123 + */ 6124 + static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno, 6125 + struct bpf_insn *insn) 6126 + { 6127 + if (is_ctx_reg(env, regno)) 6128 + return false; 6129 + if (is_pkt_reg(env, regno)) 6130 + return false; 6131 + if (is_flow_key_reg(env, regno)) 6132 + return false; 6133 + if (is_sk_reg(env, regno)) 6134 + return false; 6135 + if (is_arena_reg(env, regno)) 6136 + return bpf_jit_supports_insn(insn, true); 6137 + 6138 + return true; 6215 6139 } 6216 6140 6217 6141 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { ··· 7481 7365 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) 7482 7366 mark_reg_unknown(env, regs, value_regno); 7483 7367 } else if (reg->type == PTR_TO_CTX) { 7484 - bool is_retval = false; 7485 7368 struct bpf_retval_range range; 7486 - enum bpf_reg_type reg_type = SCALAR_VALUE; 7487 - struct btf *btf = NULL; 7488 - u32 btf_id = 0; 7369 + struct bpf_insn_access_aux info = { 7370 + .reg_type = SCALAR_VALUE, 7371 + .is_ldsx = is_ldsx, 7372 + .log = &env->log, 7373 + }; 7489 7374 7490 7375 if (t == BPF_WRITE && value_regno >= 0 && 7491 7376 is_pointer_value(env, value_regno)) { ··· 7498 7381 if (err < 0) 7499 7382 return err; 7500 7383 7501 - err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, 7502 - &btf_id, &is_retval, is_ldsx); 7384 + err = check_ctx_access(env, insn_idx, off, size, t, &info); 7503 7385 if (err) 7504 7386 verbose_linfo(env, insn_idx, "; "); 7505 7387 if (!err && t == BPF_READ && value_regno >= 0) { ··· 7506 7390 * PTR_TO_PACKET[_META,_END]. In the latter 7507 7391 * case, we know the offset is zero. 7508 7392 */ 7509 - if (reg_type == SCALAR_VALUE) { 7510 - if (is_retval && get_func_retval_range(env->prog, &range)) { 7393 + if (info.reg_type == SCALAR_VALUE) { 7394 + if (info.is_retval && get_func_retval_range(env->prog, &range)) { 7511 7395 err = __mark_reg_s32_range(env, regs, value_regno, 7512 7396 range.minval, range.maxval); 7513 7397 if (err) ··· 7518 7402 } else { 7519 7403 mark_reg_known_zero(env, regs, 7520 7404 value_regno); 7521 - if (type_may_be_null(reg_type)) 7405 + if (type_may_be_null(info.reg_type)) 7522 7406 regs[value_regno].id = ++env->id_gen; 7523 7407 /* A load of ctx field could have different 7524 7408 * actual load size with the one encoded in the ··· 7526 7410 * a sub-register. 7527 7411 */ 7528 7412 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 7529 - if (base_type(reg_type) == PTR_TO_BTF_ID) { 7530 - regs[value_regno].btf = btf; 7531 - regs[value_regno].btf_id = btf_id; 7413 + if (base_type(info.reg_type) == PTR_TO_BTF_ID) { 7414 + regs[value_regno].btf = info.btf; 7415 + regs[value_regno].btf_id = info.btf_id; 7416 + regs[value_regno].ref_obj_id = info.ref_obj_id; 7532 7417 } 7533 7418 } 7534 - regs[value_regno].type = reg_type; 7419 + regs[value_regno].type = info.reg_type; 7535 7420 } 7536 7421 7537 7422 } else if (reg->type == PTR_TO_STACK) { ··· 7635 7518 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, 7636 7519 bool allow_trust_mismatch); 7637 7520 7638 - static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) 7521 + static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn, 7522 + bool strict_alignment_once, bool is_ldsx, 7523 + bool allow_trust_mismatch, const char *ctx) 7524 + { 7525 + struct bpf_reg_state *regs = cur_regs(env); 7526 + enum bpf_reg_type src_reg_type; 7527 + int err; 7528 + 7529 + /* check src operand */ 7530 + err = check_reg_arg(env, insn->src_reg, SRC_OP); 7531 + if (err) 7532 + return err; 7533 + 7534 + /* check dst operand */ 7535 + err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 7536 + if (err) 7537 + return err; 7538 + 7539 + src_reg_type = regs[insn->src_reg].type; 7540 + 7541 + /* Check if (src_reg + off) is readable. The state of dst_reg will be 7542 + * updated by this call. 7543 + */ 7544 + err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, 7545 + BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, 7546 + strict_alignment_once, is_ldsx); 7547 + err = err ?: save_aux_ptr_type(env, src_reg_type, 7548 + allow_trust_mismatch); 7549 + err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx); 7550 + 7551 + return err; 7552 + } 7553 + 7554 + static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn, 7555 + bool strict_alignment_once) 7556 + { 7557 + struct bpf_reg_state *regs = cur_regs(env); 7558 + enum bpf_reg_type dst_reg_type; 7559 + int err; 7560 + 7561 + /* check src1 operand */ 7562 + err = check_reg_arg(env, insn->src_reg, SRC_OP); 7563 + if (err) 7564 + return err; 7565 + 7566 + /* check src2 operand */ 7567 + err = check_reg_arg(env, insn->dst_reg, SRC_OP); 7568 + if (err) 7569 + return err; 7570 + 7571 + dst_reg_type = regs[insn->dst_reg].type; 7572 + 7573 + /* Check if (dst_reg + off) is writeable. */ 7574 + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 7575 + BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, 7576 + strict_alignment_once, false); 7577 + err = err ?: save_aux_ptr_type(env, dst_reg_type, false); 7578 + 7579 + return err; 7580 + } 7581 + 7582 + static int check_atomic_rmw(struct bpf_verifier_env *env, 7583 + struct bpf_insn *insn) 7639 7584 { 7640 7585 int load_reg; 7641 7586 int err; 7642 - 7643 - switch (insn->imm) { 7644 - case BPF_ADD: 7645 - case BPF_ADD | BPF_FETCH: 7646 - case BPF_AND: 7647 - case BPF_AND | BPF_FETCH: 7648 - case BPF_OR: 7649 - case BPF_OR | BPF_FETCH: 7650 - case BPF_XOR: 7651 - case BPF_XOR | BPF_FETCH: 7652 - case BPF_XCHG: 7653 - case BPF_CMPXCHG: 7654 - break; 7655 - default: 7656 - verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); 7657 - return -EINVAL; 7658 - } 7659 7587 7660 7588 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { 7661 7589 verbose(env, "invalid atomic operand size\n"); ··· 7736 7574 return -EACCES; 7737 7575 } 7738 7576 7739 - if (is_ctx_reg(env, insn->dst_reg) || 7740 - is_pkt_reg(env, insn->dst_reg) || 7741 - is_flow_key_reg(env, insn->dst_reg) || 7742 - is_sk_reg(env, insn->dst_reg) || 7743 - (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) { 7577 + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 7744 7578 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 7745 7579 insn->dst_reg, 7746 7580 reg_type_str(env, reg_state(env, insn->dst_reg)->type)); ··· 7763 7605 /* Check whether we can read the memory, with second call for fetch 7764 7606 * case to simulate the register fill. 7765 7607 */ 7766 - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 7608 + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 7767 7609 BPF_SIZE(insn->code), BPF_READ, -1, true, false); 7768 7610 if (!err && load_reg >= 0) 7769 - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 7770 - BPF_SIZE(insn->code), BPF_READ, load_reg, 7771 - true, false); 7611 + err = check_mem_access(env, env->insn_idx, insn->dst_reg, 7612 + insn->off, BPF_SIZE(insn->code), 7613 + BPF_READ, load_reg, true, false); 7772 7614 if (err) 7773 7615 return err; 7774 7616 ··· 7778 7620 return err; 7779 7621 } 7780 7622 /* Check whether we can write into the same memory. */ 7781 - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, 7623 + err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, 7782 7624 BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); 7783 7625 if (err) 7784 7626 return err; 7785 7627 return 0; 7628 + } 7629 + 7630 + static int check_atomic_load(struct bpf_verifier_env *env, 7631 + struct bpf_insn *insn) 7632 + { 7633 + int err; 7634 + 7635 + err = check_load_mem(env, insn, true, false, false, "atomic_load"); 7636 + if (err) 7637 + return err; 7638 + 7639 + if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { 7640 + verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", 7641 + insn->src_reg, 7642 + reg_type_str(env, reg_state(env, insn->src_reg)->type)); 7643 + return -EACCES; 7644 + } 7645 + 7646 + return 0; 7647 + } 7648 + 7649 + static int check_atomic_store(struct bpf_verifier_env *env, 7650 + struct bpf_insn *insn) 7651 + { 7652 + int err; 7653 + 7654 + err = check_store_reg(env, insn, true); 7655 + if (err) 7656 + return err; 7657 + 7658 + if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 7659 + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 7660 + insn->dst_reg, 7661 + reg_type_str(env, reg_state(env, insn->dst_reg)->type)); 7662 + return -EACCES; 7663 + } 7664 + 7665 + return 0; 7666 + } 7667 + 7668 + static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) 7669 + { 7670 + switch (insn->imm) { 7671 + case BPF_ADD: 7672 + case BPF_ADD | BPF_FETCH: 7673 + case BPF_AND: 7674 + case BPF_AND | BPF_FETCH: 7675 + case BPF_OR: 7676 + case BPF_OR | BPF_FETCH: 7677 + case BPF_XOR: 7678 + case BPF_XOR | BPF_FETCH: 7679 + case BPF_XCHG: 7680 + case BPF_CMPXCHG: 7681 + return check_atomic_rmw(env, insn); 7682 + case BPF_LOAD_ACQ: 7683 + if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 7684 + verbose(env, 7685 + "64-bit load-acquires are only supported on 64-bit arches\n"); 7686 + return -EOPNOTSUPP; 7687 + } 7688 + return check_atomic_load(env, insn); 7689 + case BPF_STORE_REL: 7690 + if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 7691 + verbose(env, 7692 + "64-bit store-releases are only supported on 64-bit arches\n"); 7693 + return -EOPNOTSUPP; 7694 + } 7695 + return check_atomic_store(env, insn); 7696 + default: 7697 + verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", 7698 + insn->imm); 7699 + return -EINVAL; 7700 + } 7786 7701 } 7787 7702 7788 7703 /* When register 'regno' is used to read the stack (either directly or through ··· 8662 8431 { 8663 8432 struct bpf_verifier_state_list *sl; 8664 8433 struct bpf_verifier_state *st; 8434 + struct list_head *pos, *head; 8665 8435 8666 8436 /* Explored states are pushed in stack order, most recent states come first */ 8667 - sl = *explored_state(env, insn_idx); 8668 - for (; sl; sl = sl->next) { 8437 + head = explored_state(env, insn_idx); 8438 + list_for_each(pos, head) { 8439 + sl = container_of(pos, struct bpf_verifier_state_list, node); 8669 8440 /* If st->branches != 0 state is a part of current DFS verification path, 8670 8441 * hence cur & st for a loop. 8671 8442 */ ··· 9899 9666 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 9900 9667 goto error; 9901 9668 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) { 9902 - verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); 9669 + verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n"); 9903 9670 return -EINVAL; 9904 9671 } 9905 9672 break; ··· 10470 10237 if (subprog_is_global(env, subprog)) { 10471 10238 const char *sub_name = subprog_name(env, subprog); 10472 10239 10473 - /* Only global subprogs cannot be called with a lock held. */ 10474 10240 if (env->cur_state->active_locks) { 10475 10241 verbose(env, "global function calls are not allowed while holding a lock,\n" 10476 10242 "use static function instead\n"); 10477 10243 return -EINVAL; 10478 10244 } 10479 10245 10480 - /* Only global subprogs cannot be called with preemption disabled. */ 10481 - if (env->cur_state->active_preempt_locks) { 10482 - verbose(env, "global function calls are not allowed with preemption disabled,\n" 10483 - "use static function instead\n"); 10484 - return -EINVAL; 10485 - } 10486 - 10487 - if (env->cur_state->active_irq_id) { 10488 - verbose(env, "global function calls are not allowed with IRQs disabled,\n" 10489 - "use static function instead\n"); 10246 + if (env->subprog_info[subprog].might_sleep && 10247 + (env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks || 10248 + env->cur_state->active_irq_id || !in_sleepable(env))) { 10249 + verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n" 10250 + "i.e., in a RCU/IRQ/preempt-disabled section, or in\n" 10251 + "a non-sleepable BPF program context\n"); 10490 10252 return -EINVAL; 10491 10253 } 10492 10254 ··· 10980 10752 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) 10981 10753 { 10982 10754 struct bpf_verifier_state *state = env->cur_state; 10755 + enum bpf_prog_type type = resolve_prog_type(env->prog); 10756 + struct bpf_reg_state *reg = reg_state(env, BPF_REG_0); 10983 10757 bool refs_lingering = false; 10984 10758 int i; 10985 10759 ··· 10990 10760 10991 10761 for (i = 0; i < state->acquired_refs; i++) { 10992 10762 if (state->refs[i].type != REF_TYPE_PTR) 10763 + continue; 10764 + /* Allow struct_ops programs to return a referenced kptr back to 10765 + * kernel. Type checks are performed later in check_return_code. 10766 + */ 10767 + if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit && 10768 + reg->ref_obj_id == state->refs[i].id) 10993 10769 continue; 10994 10770 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 10995 10771 state->refs[i].id, state->refs[i].insn_idx); ··· 12017 11781 KF_bpf_iter_num_new, 12018 11782 KF_bpf_iter_num_next, 12019 11783 KF_bpf_iter_num_destroy, 11784 + KF_bpf_set_dentry_xattr, 11785 + KF_bpf_remove_dentry_xattr, 12020 11786 }; 12021 11787 12022 11788 BTF_SET_START(special_kfunc_set) ··· 12047 11809 BTF_ID(func, bpf_wq_set_callback_impl) 12048 11810 #ifdef CONFIG_CGROUPS 12049 11811 BTF_ID(func, bpf_iter_css_task_new) 11812 + #endif 11813 + #ifdef CONFIG_BPF_LSM 11814 + BTF_ID(func, bpf_set_dentry_xattr) 11815 + BTF_ID(func, bpf_remove_dentry_xattr) 12050 11816 #endif 12051 11817 BTF_SET_END(special_kfunc_set) 12052 11818 ··· 12101 11859 BTF_ID(func, bpf_iter_num_new) 12102 11860 BTF_ID(func, bpf_iter_num_next) 12103 11861 BTF_ID(func, bpf_iter_num_destroy) 11862 + #ifdef CONFIG_BPF_LSM 11863 + BTF_ID(func, bpf_set_dentry_xattr) 11864 + BTF_ID(func, bpf_remove_dentry_xattr) 11865 + #else 11866 + BTF_ID_UNUSED 11867 + BTF_ID_UNUSED 11868 + #endif 12104 11869 12105 11870 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 12106 11871 { ··· 16648 16399 const char *exit_ctx = "At program exit"; 16649 16400 struct tnum enforce_attach_type_range = tnum_unknown; 16650 16401 const struct bpf_prog *prog = env->prog; 16651 - struct bpf_reg_state *reg; 16402 + struct bpf_reg_state *reg = reg_state(env, regno); 16652 16403 struct bpf_retval_range range = retval_range(0, 1); 16653 16404 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 16654 16405 int err; 16655 16406 struct bpf_func_state *frame = env->cur_state->frame[0]; 16656 16407 const bool is_subprog = frame->subprogno; 16657 16408 bool return_32bit = false; 16409 + const struct btf_type *reg_type, *ret_type = NULL; 16658 16410 16659 16411 /* LSM and struct_ops func-ptr's return type could be "void" */ 16660 16412 if (!is_subprog || frame->in_exception_callback_fn) { ··· 16664 16414 if (prog->expected_attach_type == BPF_LSM_CGROUP) 16665 16415 /* See below, can be 0 or 0-1 depending on hook. */ 16666 16416 break; 16667 - fallthrough; 16417 + if (!prog->aux->attach_func_proto->type) 16418 + return 0; 16419 + break; 16668 16420 case BPF_PROG_TYPE_STRUCT_OPS: 16669 16421 if (!prog->aux->attach_func_proto->type) 16670 16422 return 0; 16423 + 16424 + if (frame->in_exception_callback_fn) 16425 + break; 16426 + 16427 + /* Allow a struct_ops program to return a referenced kptr if it 16428 + * matches the operator's return type and is in its unmodified 16429 + * form. A scalar zero (i.e., a null pointer) is also allowed. 16430 + */ 16431 + reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL; 16432 + ret_type = btf_type_resolve_ptr(prog->aux->attach_btf, 16433 + prog->aux->attach_func_proto->type, 16434 + NULL); 16435 + if (ret_type && ret_type == reg_type && reg->ref_obj_id) 16436 + return __check_ptr_off_reg(env, reg, regno, false); 16671 16437 break; 16672 16438 default: 16673 16439 break; ··· 16704 16438 verbose(env, "R%d leaks addr as return value\n", regno); 16705 16439 return -EACCES; 16706 16440 } 16707 - 16708 - reg = cur_regs(env) + regno; 16709 16441 16710 16442 if (frame->in_async_callback_fn) { 16711 16443 /* enforce return zero from async callbacks like timer */ ··· 16803 16539 case BPF_PROG_TYPE_NETFILTER: 16804 16540 range = retval_range(NF_DROP, NF_ACCEPT); 16805 16541 break; 16542 + case BPF_PROG_TYPE_STRUCT_OPS: 16543 + if (!ret_type) 16544 + return 0; 16545 + range = retval_range(0, 0); 16546 + break; 16806 16547 case BPF_PROG_TYPE_EXT: 16807 16548 /* freplace program can return anything as its return value 16808 16549 * depends on the to-be-replaced kernel func or bpf program. ··· 16851 16582 subprog->changes_pkt_data = true; 16852 16583 } 16853 16584 16585 + static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off) 16586 + { 16587 + struct bpf_subprog_info *subprog; 16588 + 16589 + subprog = find_containing_subprog(env, off); 16590 + subprog->might_sleep = true; 16591 + } 16592 + 16854 16593 /* 't' is an index of a call-site. 16855 16594 * 'w' is a callee entry point. 16856 16595 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. ··· 16872 16595 caller = find_containing_subprog(env, t); 16873 16596 callee = find_containing_subprog(env, w); 16874 16597 caller->changes_pkt_data |= callee->changes_pkt_data; 16598 + caller->might_sleep |= callee->might_sleep; 16875 16599 } 16876 16600 16877 16601 /* non-recursive DFS pseudo code ··· 17031 16753 /* Bitmask with 1s for all caller saved registers */ 17032 16754 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) 17033 16755 17034 - /* Return a bitmask specifying which caller saved registers are 17035 - * clobbered by a call to a helper *as if* this helper follows 17036 - * bpf_fastcall contract: 17037 - * - includes R0 if function is non-void; 17038 - * - includes R1-R5 if corresponding parameter has is described 17039 - * in the function prototype. 17040 - */ 17041 - static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn) 17042 - { 17043 - u32 mask; 17044 - int i; 17045 - 17046 - mask = 0; 17047 - if (fn->ret_type != RET_VOID) 17048 - mask |= BIT(BPF_REG_0); 17049 - for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) 17050 - if (fn->arg_type[i] != ARG_DONTCARE) 17051 - mask |= BIT(BPF_REG_1 + i); 17052 - return mask; 17053 - } 17054 - 17055 16756 /* True if do_misc_fixups() replaces calls to helper number 'imm', 17056 16757 * replacement patch is presumed to follow bpf_fastcall contract 17057 16758 * (see mark_fastcall_pattern_for_call() below). ··· 17047 16790 } 17048 16791 } 17049 16792 17050 - /* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */ 17051 - static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) 17052 - { 17053 - u32 vlen, i, mask; 16793 + struct call_summary { 16794 + u8 num_params; 16795 + bool is_void; 16796 + bool fastcall; 16797 + }; 17054 16798 17055 - vlen = btf_type_vlen(meta->func_proto); 17056 - mask = 0; 17057 - if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type))) 17058 - mask |= BIT(BPF_REG_0); 17059 - for (i = 0; i < vlen; ++i) 17060 - mask |= BIT(BPF_REG_1 + i); 17061 - return mask; 17062 - } 17063 - 17064 - /* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ 17065 - static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) 16799 + /* If @call is a kfunc or helper call, fills @cs and returns true, 16800 + * otherwise returns false. 16801 + */ 16802 + static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call, 16803 + struct call_summary *cs) 17066 16804 { 17067 - return meta->kfunc_flags & KF_FASTCALL; 16805 + struct bpf_kfunc_call_arg_meta meta; 16806 + const struct bpf_func_proto *fn; 16807 + int i; 16808 + 16809 + if (bpf_helper_call(call)) { 16810 + 16811 + if (get_helper_proto(env, call->imm, &fn) < 0) 16812 + /* error would be reported later */ 16813 + return false; 16814 + cs->fastcall = fn->allow_fastcall && 16815 + (verifier_inlines_helper_call(env, call->imm) || 16816 + bpf_jit_inlines_helper_call(call->imm)); 16817 + cs->is_void = fn->ret_type == RET_VOID; 16818 + cs->num_params = 0; 16819 + for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) { 16820 + if (fn->arg_type[i] == ARG_DONTCARE) 16821 + break; 16822 + cs->num_params++; 16823 + } 16824 + return true; 16825 + } 16826 + 16827 + if (bpf_pseudo_kfunc_call(call)) { 16828 + int err; 16829 + 16830 + err = fetch_kfunc_meta(env, call, &meta, NULL); 16831 + if (err < 0) 16832 + /* error would be reported later */ 16833 + return false; 16834 + cs->num_params = btf_type_vlen(meta.func_proto); 16835 + cs->fastcall = meta.kfunc_flags & KF_FASTCALL; 16836 + cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type)); 16837 + return true; 16838 + } 16839 + 16840 + return false; 17068 16841 } 17069 16842 17070 16843 /* LLVM define a bpf_fastcall function attribute. ··· 17177 16890 { 17178 16891 struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; 17179 16892 struct bpf_insn *call = &env->prog->insnsi[insn_idx]; 17180 - const struct bpf_func_proto *fn; 17181 - u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS; 16893 + u32 clobbered_regs_mask; 16894 + struct call_summary cs; 17182 16895 u32 expected_regs_mask; 17183 - bool can_be_inlined = false; 17184 16896 s16 off; 17185 16897 int i; 17186 16898 17187 - if (bpf_helper_call(call)) { 17188 - if (get_helper_proto(env, call->imm, &fn) < 0) 17189 - /* error would be reported later */ 17190 - return; 17191 - clobbered_regs_mask = helper_fastcall_clobber_mask(fn); 17192 - can_be_inlined = fn->allow_fastcall && 17193 - (verifier_inlines_helper_call(env, call->imm) || 17194 - bpf_jit_inlines_helper_call(call->imm)); 17195 - } 17196 - 17197 - if (bpf_pseudo_kfunc_call(call)) { 17198 - struct bpf_kfunc_call_arg_meta meta; 17199 - int err; 17200 - 17201 - err = fetch_kfunc_meta(env, call, &meta, NULL); 17202 - if (err < 0) 17203 - /* error would be reported later */ 17204 - return; 17205 - 17206 - clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta); 17207 - can_be_inlined = is_fastcall_kfunc_call(&meta); 17208 - } 17209 - 17210 - if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS) 16899 + if (!get_call_summary(env, call, &cs)) 17211 16900 return; 17212 16901 16902 + /* A bitmask specifying which caller saved registers are clobbered 16903 + * by a call to a helper/kfunc *as if* this helper/kfunc follows 16904 + * bpf_fastcall contract: 16905 + * - includes R0 if function is non-void; 16906 + * - includes R1-R5 if corresponding parameter has is described 16907 + * in the function prototype. 16908 + */ 16909 + clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0); 17213 16910 /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */ 17214 16911 expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS; 17215 16912 ··· 17251 16980 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() 17252 16981 * does not remove spill/fill pair {4,6}. 17253 16982 */ 17254 - if (can_be_inlined) 16983 + if (cs.fastcall) 17255 16984 env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; 17256 16985 else 17257 16986 subprog->keep_fastcall_stack = 1; ··· 17333 17062 mark_prune_point(env, t); 17334 17063 mark_jmp_point(env, t); 17335 17064 } 17336 - if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm)) 17337 - mark_subprog_changes_pkt_data(env, t); 17338 - if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 17065 + if (bpf_helper_call(insn)) { 17066 + const struct bpf_func_proto *fp; 17067 + 17068 + ret = get_helper_proto(env, insn->imm, &fp); 17069 + /* If called in a non-sleepable context program will be 17070 + * rejected anyway, so we should end up with precise 17071 + * sleepable marks on subprogs, except for dead code 17072 + * elimination. 17073 + */ 17074 + if (ret == 0 && fp->might_sleep) 17075 + mark_subprog_might_sleep(env, t); 17076 + if (bpf_helper_changes_pkt_data(insn->imm)) 17077 + mark_subprog_changes_pkt_data(env, t); 17078 + } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 17339 17079 struct bpf_kfunc_call_arg_meta meta; 17340 17080 17341 17081 ret = fetch_kfunc_meta(env, insn, &meta, NULL); ··· 17365 17083 */ 17366 17084 mark_force_checkpoint(env, t); 17367 17085 } 17086 + /* Same as helpers, if called in a non-sleepable context 17087 + * program will be rejected anyway, so we should end up 17088 + * with precise sleepable marks on subprogs, except for 17089 + * dead code elimination. 17090 + */ 17091 + if (ret == 0 && is_kfunc_sleepable(&meta)) 17092 + mark_subprog_might_sleep(env, t); 17368 17093 } 17369 17094 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); 17370 17095 ··· 17414 17125 static int check_cfg(struct bpf_verifier_env *env) 17415 17126 { 17416 17127 int insn_cnt = env->prog->len; 17417 - int *insn_stack, *insn_state; 17128 + int *insn_stack, *insn_state, *insn_postorder; 17418 17129 int ex_insn_beg, i, ret = 0; 17419 - bool ex_done = false; 17420 17130 17421 17131 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 17422 17132 if (!insn_state) ··· 17426 17138 kvfree(insn_state); 17427 17139 return -ENOMEM; 17428 17140 } 17141 + 17142 + insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); 17143 + if (!insn_postorder) { 17144 + kvfree(insn_state); 17145 + kvfree(insn_stack); 17146 + return -ENOMEM; 17147 + } 17148 + 17149 + ex_insn_beg = env->exception_callback_subprog 17150 + ? env->subprog_info[env->exception_callback_subprog].start 17151 + : 0; 17429 17152 17430 17153 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ 17431 17154 insn_stack[0] = 0; /* 0 is the first instruction */ ··· 17451 17152 case DONE_EXPLORING: 17452 17153 insn_state[t] = EXPLORED; 17453 17154 env->cfg.cur_stack--; 17155 + insn_postorder[env->cfg.cur_postorder++] = t; 17454 17156 break; 17455 17157 case KEEP_EXPLORING: 17456 17158 break; ··· 17470 17170 goto err_free; 17471 17171 } 17472 17172 17473 - if (env->exception_callback_subprog && !ex_done) { 17474 - ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start; 17475 - 17173 + if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) { 17476 17174 insn_state[ex_insn_beg] = DISCOVERED; 17477 17175 insn_stack[0] = ex_insn_beg; 17478 17176 env->cfg.cur_stack = 1; 17479 - ex_done = true; 17480 17177 goto walk_cfg; 17481 17178 } 17482 17179 ··· 17496 17199 } 17497 17200 ret = 0; /* cfg looks good */ 17498 17201 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; 17202 + env->prog->aux->might_sleep = env->subprog_info[0].might_sleep; 17499 17203 17500 17204 err_free: 17501 17205 kvfree(insn_state); ··· 18113 17815 static void clean_live_states(struct bpf_verifier_env *env, int insn, 18114 17816 struct bpf_verifier_state *cur) 18115 17817 { 17818 + struct bpf_verifier_state *loop_entry; 18116 17819 struct bpf_verifier_state_list *sl; 17820 + struct list_head *pos, *head; 18117 17821 18118 - sl = *explored_state(env, insn); 18119 - while (sl) { 17822 + head = explored_state(env, insn); 17823 + list_for_each(pos, head) { 17824 + sl = container_of(pos, struct bpf_verifier_state_list, node); 18120 17825 if (sl->state.branches) 18121 - goto next; 17826 + continue; 17827 + loop_entry = get_loop_entry(env, &sl->state); 17828 + if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) 17829 + continue; 18122 17830 if (sl->state.insn_idx != insn || 18123 17831 !same_callsites(&sl->state, cur)) 18124 - goto next; 17832 + continue; 18125 17833 clean_verifier_state(env, &sl->state); 18126 - next: 18127 - sl = sl->next; 18128 17834 } 18129 17835 } 18130 17836 ··· 18512 18210 * the current state will reach 'bpf_exit' instruction safely 18513 18211 */ 18514 18212 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, 18515 - struct bpf_func_state *cur, enum exact_level exact) 18213 + struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) 18516 18214 { 18517 - int i; 18215 + u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before; 18216 + u16 i; 18518 18217 18519 18218 if (old->callback_depth > cur->callback_depth) 18520 18219 return false; 18521 18220 18522 18221 for (i = 0; i < MAX_BPF_REG; i++) 18523 - if (!regsafe(env, &old->regs[i], &cur->regs[i], 18222 + if (((1 << i) & live_regs) && 18223 + !regsafe(env, &old->regs[i], &cur->regs[i], 18524 18224 &env->idmap_scratch, exact)) 18525 18225 return false; 18526 18226 ··· 18543 18239 struct bpf_verifier_state *cur, 18544 18240 enum exact_level exact) 18545 18241 { 18242 + u32 insn_idx; 18546 18243 int i; 18547 18244 18548 18245 if (old->curframe != cur->curframe) ··· 18567 18262 * and all frame states need to be equivalent 18568 18263 */ 18569 18264 for (i = 0; i <= old->curframe; i++) { 18265 + insn_idx = i == old->curframe 18266 + ? env->insn_idx 18267 + : old->frame[i + 1]->callsite; 18570 18268 if (old->frame[i]->callsite != cur->frame[i]->callsite) 18571 18269 return false; 18572 - if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) 18270 + if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) 18573 18271 return false; 18574 18272 } 18575 18273 return true; ··· 18825 18517 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) 18826 18518 { 18827 18519 struct bpf_verifier_state_list *new_sl; 18828 - struct bpf_verifier_state_list *sl, **pprev; 18520 + struct bpf_verifier_state_list *sl; 18829 18521 struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; 18830 18522 int i, j, n, err, states_cnt = 0; 18831 18523 bool force_new_state, add_new_state, force_exact; 18524 + struct list_head *pos, *tmp, *head; 18832 18525 18833 18526 force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || 18834 18527 /* Avoid accumulating infinitely long jmp history */ ··· 18848 18539 env->insn_processed - env->prev_insn_processed >= 8) 18849 18540 add_new_state = true; 18850 18541 18851 - pprev = explored_state(env, insn_idx); 18852 - sl = *pprev; 18853 - 18854 18542 clean_live_states(env, insn_idx, cur); 18855 18543 18856 - while (sl) { 18544 + head = explored_state(env, insn_idx); 18545 + list_for_each_safe(pos, tmp, head) { 18546 + sl = container_of(pos, struct bpf_verifier_state_list, node); 18857 18547 states_cnt++; 18858 18548 if (sl->state.insn_idx != insn_idx) 18859 - goto next; 18549 + continue; 18860 18550 18861 18551 if (sl->state.branches) { 18862 18552 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; ··· 18929 18621 spi = __get_spi(iter_reg->off + iter_reg->var_off.value); 18930 18622 iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; 18931 18623 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { 18932 - update_loop_entry(cur, &sl->state); 18624 + update_loop_entry(env, cur, &sl->state); 18933 18625 goto hit; 18934 18626 } 18935 18627 } ··· 18938 18630 if (is_may_goto_insn_at(env, insn_idx)) { 18939 18631 if (sl->state.may_goto_depth != cur->may_goto_depth && 18940 18632 states_equal(env, &sl->state, cur, RANGE_WITHIN)) { 18941 - update_loop_entry(cur, &sl->state); 18633 + update_loop_entry(env, cur, &sl->state); 18942 18634 goto hit; 18943 18635 } 18944 18636 } ··· 19005 18697 * 19006 18698 * Additional details are in the comment before get_loop_entry(). 19007 18699 */ 19008 - loop_entry = get_loop_entry(&sl->state); 18700 + loop_entry = get_loop_entry(env, &sl->state); 18701 + if (IS_ERR(loop_entry)) 18702 + return PTR_ERR(loop_entry); 19009 18703 force_exact = loop_entry && loop_entry->branches > 0; 19010 18704 if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { 19011 18705 if (force_exact) 19012 - update_loop_entry(cur, loop_entry); 18706 + update_loop_entry(env, cur, loop_entry); 19013 18707 hit: 19014 18708 sl->hit_cnt++; 19015 18709 /* reached equivalent register/stack state, ··· 19060 18750 /* the state is unlikely to be useful. Remove it to 19061 18751 * speed up verification 19062 18752 */ 19063 - *pprev = sl->next; 19064 - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && 19065 - !sl->state.used_as_loop_entry) { 19066 - u32 br = sl->state.branches; 19067 - 19068 - WARN_ONCE(br, 19069 - "BUG live_done but branches_to_explore %d\n", 19070 - br); 19071 - free_verifier_state(&sl->state, false); 19072 - kfree(sl); 19073 - env->peak_states--; 19074 - } else { 19075 - /* cannot free this state, since parentage chain may 19076 - * walk it later. Add it for free_list instead to 19077 - * be freed at the end of verification 19078 - */ 19079 - sl->next = env->free_list; 19080 - env->free_list = sl; 19081 - } 19082 - sl = *pprev; 19083 - continue; 18753 + sl->in_free_list = true; 18754 + list_del(&sl->node); 18755 + list_add(&sl->node, &env->free_list); 18756 + env->free_list_size++; 18757 + env->explored_states_size--; 18758 + maybe_free_verifier_state(env, sl); 19084 18759 } 19085 - next: 19086 - pprev = &sl->next; 19087 - sl = *pprev; 19088 18760 } 19089 18761 19090 18762 if (env->max_states_per_insn < states_cnt) ··· 19091 18799 if (!new_sl) 19092 18800 return -ENOMEM; 19093 18801 env->total_states++; 19094 - env->peak_states++; 18802 + env->explored_states_size++; 18803 + update_peak_states(env); 19095 18804 env->prev_jmps_processed = env->jmps_processed; 19096 18805 env->prev_insn_processed = env->insn_processed; 19097 18806 ··· 19116 18823 cur->first_insn_idx = insn_idx; 19117 18824 cur->insn_hist_start = cur->insn_hist_end; 19118 18825 cur->dfs_depth = new->dfs_depth + 1; 19119 - new_sl->next = *explored_state(env, insn_idx); 19120 - *explored_state(env, insn_idx) = new_sl; 18826 + list_add(&new_sl->node, head); 18827 + 19121 18828 /* connect new state to parentage chain. Current frame needs all 19122 18829 * registers connected. Only r6 - r9 of the callers are alive (pushed 19123 18830 * to the stack implicitly by JITs) so in callers' frames connect just ··· 19304 19011 } 19305 19012 19306 19013 if (env->log.level & BPF_LOG_LEVEL) { 19307 - const struct bpf_insn_cbs cbs = { 19308 - .cb_call = disasm_kfunc_name, 19309 - .cb_print = verbose, 19310 - .private_data = env, 19311 - }; 19312 - 19313 19014 if (verifier_state_scratched(env)) 19314 19015 print_insn_state(env, state, state->curframe); 19315 19016 19316 19017 verbose_linfo(env, env->insn_idx, "; "); 19317 19018 env->prev_log_pos = env->log.end_pos; 19318 19019 verbose(env, "%d: ", env->insn_idx); 19319 - print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 19020 + verbose_insn(env, insn); 19320 19021 env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos; 19321 19022 env->prev_log_pos = env->log.end_pos; 19322 19023 } ··· 19332 19045 return err; 19333 19046 19334 19047 } else if (class == BPF_LDX) { 19335 - enum bpf_reg_type src_reg_type; 19048 + bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX; 19336 19049 19337 - /* check for reserved fields is already done */ 19338 - 19339 - /* check src operand */ 19340 - err = check_reg_arg(env, insn->src_reg, SRC_OP); 19341 - if (err) 19342 - return err; 19343 - 19344 - err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 19345 - if (err) 19346 - return err; 19347 - 19348 - src_reg_type = regs[insn->src_reg].type; 19349 - 19350 - /* check that memory (src_reg + off) is readable, 19351 - * the state of dst_reg will be updated by this func 19050 + /* Check for reserved fields is already done in 19051 + * resolve_pseudo_ldimm64(). 19352 19052 */ 19353 - err = check_mem_access(env, env->insn_idx, insn->src_reg, 19354 - insn->off, BPF_SIZE(insn->code), 19355 - BPF_READ, insn->dst_reg, false, 19356 - BPF_MODE(insn->code) == BPF_MEMSX); 19357 - err = err ?: save_aux_ptr_type(env, src_reg_type, true); 19358 - err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx"); 19053 + err = check_load_mem(env, insn, false, is_ldsx, true, 19054 + "ldx"); 19359 19055 if (err) 19360 19056 return err; 19361 19057 } else if (class == BPF_STX) { 19362 - enum bpf_reg_type dst_reg_type; 19363 - 19364 19058 if (BPF_MODE(insn->code) == BPF_ATOMIC) { 19365 - err = check_atomic(env, env->insn_idx, insn); 19059 + err = check_atomic(env, insn); 19366 19060 if (err) 19367 19061 return err; 19368 19062 env->insn_idx++; ··· 19355 19087 return -EINVAL; 19356 19088 } 19357 19089 19358 - /* check src1 operand */ 19359 - err = check_reg_arg(env, insn->src_reg, SRC_OP); 19360 - if (err) 19361 - return err; 19362 - /* check src2 operand */ 19363 - err = check_reg_arg(env, insn->dst_reg, SRC_OP); 19364 - if (err) 19365 - return err; 19366 - 19367 - dst_reg_type = regs[insn->dst_reg].type; 19368 - 19369 - /* check that memory (dst_reg + off) is writeable */ 19370 - err = check_mem_access(env, env->insn_idx, insn->dst_reg, 19371 - insn->off, BPF_SIZE(insn->code), 19372 - BPF_WRITE, insn->src_reg, false, false); 19373 - if (err) 19374 - return err; 19375 - 19376 - err = save_aux_ptr_type(env, dst_reg_type, false); 19090 + err = check_store_reg(env, insn, false); 19377 19091 if (err) 19378 19092 return err; 19379 19093 } else if (class == BPF_ST) { ··· 19495 19245 return err; 19496 19246 break; 19497 19247 } else { 19248 + if (WARN_ON_ONCE(env->cur_state->loop_entry)) { 19249 + verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n"); 19250 + return -EFAULT; 19251 + } 19498 19252 do_print_state = true; 19499 19253 continue; 19500 19254 } ··· 20588 20334 { 20589 20335 struct bpf_subprog_info *subprogs = env->subprog_info; 20590 20336 const struct bpf_verifier_ops *ops = env->ops; 20591 - int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0; 20337 + int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0; 20592 20338 const int insn_cnt = env->prog->len; 20593 20339 struct bpf_insn *epilogue_buf = env->epilogue_buf; 20594 20340 struct bpf_insn *insn_buf = env->insn_buf; ··· 20617 20363 return -ENOMEM; 20618 20364 env->prog = new_prog; 20619 20365 delta += cnt - 1; 20366 + 20367 + ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1); 20368 + if (ret < 0) 20369 + return ret; 20620 20370 } 20621 20371 } 20622 20372 ··· 20641 20383 20642 20384 env->prog = new_prog; 20643 20385 delta += cnt - 1; 20386 + 20387 + ret = add_kfunc_in_insns(env, insn_buf, cnt - 1); 20388 + if (ret < 0) 20389 + return ret; 20644 20390 } 20645 20391 } 20646 20392 ··· 20677 20415 insn->code == (BPF_ST | BPF_MEM | BPF_W) || 20678 20416 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { 20679 20417 type = BPF_WRITE; 20680 - } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || 20418 + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) || 20419 + insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) || 20420 + insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || 20681 20421 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && 20682 20422 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { 20683 20423 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); ··· 20987 20723 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; 20988 20724 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; 20989 20725 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; 20726 + func[i]->aux->might_sleep = env->subprog_info[i].might_sleep; 20990 20727 if (!i) 20991 20728 func[i]->aux->exception_boundary = env->seen_exception; 20992 20729 func[i] = bpf_int_jit_compile(func[i]); ··· 21204 20939 */ 21205 20940 env->seen_direct_write = seen_direct_write; 21206 20941 } 20942 + 20943 + if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] && 20944 + bpf_lsm_has_d_inode_locked(prog)) 20945 + *addr = (unsigned long)bpf_set_dentry_xattr_locked; 20946 + 20947 + if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] && 20948 + bpf_lsm_has_d_inode_locked(prog)) 20949 + *addr = (unsigned long)bpf_remove_dentry_xattr_locked; 21207 20950 } 21208 20951 21209 20952 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, ··· 21646 21373 goto next_insn; 21647 21374 } 21648 21375 21649 - if (is_may_goto_insn(insn)) { 21376 + if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) { 21377 + int stack_off_cnt = -stack_depth - 16; 21378 + 21379 + /* 21380 + * Two 8 byte slots, depth-16 stores the count, and 21381 + * depth-8 stores the start timestamp of the loop. 21382 + * 21383 + * The starting value of count is BPF_MAX_TIMED_LOOPS 21384 + * (0xffff). Every iteration loads it and subs it by 1, 21385 + * until the value becomes 0 in AX (thus, 1 in stack), 21386 + * after which we call arch_bpf_timed_may_goto, which 21387 + * either sets AX to 0xffff to keep looping, or to 0 21388 + * upon timeout. AX is then stored into the stack. In 21389 + * the next iteration, we either see 0 and break out, or 21390 + * continue iterating until the next time value is 0 21391 + * after subtraction, rinse and repeat. 21392 + */ 21393 + stack_depth_extra = 16; 21394 + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt); 21395 + if (insn->off >= 0) 21396 + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5); 21397 + else 21398 + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); 21399 + insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); 21400 + insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2); 21401 + /* 21402 + * AX is used as an argument to pass in stack_off_cnt 21403 + * (to add to r10/fp), and also as the return value of 21404 + * the call to arch_bpf_timed_may_goto. 21405 + */ 21406 + insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt); 21407 + insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto); 21408 + insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt); 21409 + cnt = 7; 21410 + 21411 + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); 21412 + if (!new_prog) 21413 + return -ENOMEM; 21414 + 21415 + delta += cnt - 1; 21416 + env->prog = prog = new_prog; 21417 + insn = new_prog->insnsi + i + delta; 21418 + goto next_insn; 21419 + } else if (is_may_goto_insn(insn)) { 21650 21420 int stack_off = -stack_depth - 8; 21651 21421 21652 21422 stack_depth_extra = 8; ··· 22213 21897 if (subprogs[cur_subprog + 1].start == i + delta + 1) { 22214 21898 subprogs[cur_subprog].stack_depth += stack_depth_extra; 22215 21899 subprogs[cur_subprog].stack_extra = stack_depth_extra; 21900 + 21901 + stack_depth = subprogs[cur_subprog].stack_depth; 21902 + if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) { 21903 + verbose(env, "stack size %d(extra %d) is too large\n", 21904 + stack_depth, stack_depth_extra); 21905 + return -EINVAL; 21906 + } 22216 21907 cur_subprog++; 22217 21908 stack_depth = subprogs[cur_subprog].stack_depth; 22218 21909 stack_depth_extra = 0; ··· 22230 21907 22231 21908 env->prog->aux->stack_depth = subprogs[0].stack_depth; 22232 21909 for (i = 0; i < env->subprog_cnt; i++) { 21910 + int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1; 22233 21911 int subprog_start = subprogs[i].start; 22234 21912 int stack_slots = subprogs[i].stack_extra / 8; 21913 + int slots = delta, cnt = 0; 22235 21914 22236 21915 if (!stack_slots) 22237 21916 continue; 22238 - if (stack_slots > 1) { 21917 + /* We need two slots in case timed may_goto is supported. */ 21918 + if (stack_slots > slots) { 22239 21919 verbose(env, "verifier bug: stack_slots supports may_goto only\n"); 22240 21920 return -EFAULT; 22241 21921 } 22242 21922 22243 - /* Add ST insn to subprog prologue to init extra stack */ 22244 - insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, 22245 - -subprogs[i].stack_depth, BPF_MAX_LOOPS); 21923 + stack_depth = subprogs[i].stack_depth; 21924 + if (bpf_jit_supports_timed_may_goto()) { 21925 + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 21926 + BPF_MAX_TIMED_LOOPS); 21927 + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0); 21928 + } else { 21929 + /* Add ST insn to subprog prologue to init extra stack */ 21930 + insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth, 21931 + BPF_MAX_LOOPS); 21932 + } 22246 21933 /* Copy first actual insn to preserve it */ 22247 - insn_buf[1] = env->prog->insnsi[subprog_start]; 21934 + insn_buf[cnt++] = env->prog->insnsi[subprog_start]; 22248 21935 22249 - new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2); 21936 + new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt); 22250 21937 if (!new_prog) 22251 21938 return -ENOMEM; 22252 21939 env->prog = prog = new_prog; ··· 22266 21933 * to insn after BPF_ST that inits may_goto count. 22267 21934 * Adjustment will succeed because bpf_patch_insn_data() didn't fail. 22268 21935 */ 22269 - WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1)); 21936 + WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta)); 22270 21937 } 22271 21938 22272 21939 /* Since poke tab is now finalized, publish aux to tracker. */ ··· 22464 22131 22465 22132 static void free_states(struct bpf_verifier_env *env) 22466 22133 { 22467 - struct bpf_verifier_state_list *sl, *sln; 22134 + struct bpf_verifier_state_list *sl; 22135 + struct list_head *head, *pos, *tmp; 22468 22136 int i; 22469 22137 22470 - sl = env->free_list; 22471 - while (sl) { 22472 - sln = sl->next; 22138 + list_for_each_safe(pos, tmp, &env->free_list) { 22139 + sl = container_of(pos, struct bpf_verifier_state_list, node); 22473 22140 free_verifier_state(&sl->state, false); 22474 22141 kfree(sl); 22475 - sl = sln; 22476 22142 } 22477 - env->free_list = NULL; 22143 + INIT_LIST_HEAD(&env->free_list); 22478 22144 22479 22145 if (!env->explored_states) 22480 22146 return; 22481 22147 22482 22148 for (i = 0; i < state_htab_size(env); i++) { 22483 - sl = env->explored_states[i]; 22149 + head = &env->explored_states[i]; 22484 22150 22485 - while (sl) { 22486 - sln = sl->next; 22151 + list_for_each_safe(pos, tmp, head) { 22152 + sl = container_of(pos, struct bpf_verifier_state_list, node); 22487 22153 free_verifier_state(&sl->state, false); 22488 22154 kfree(sl); 22489 - sl = sln; 22490 22155 } 22491 - env->explored_states[i] = NULL; 22156 + INIT_LIST_HEAD(&env->explored_states[i]); 22492 22157 } 22493 22158 } 22494 22159 ··· 22494 22163 { 22495 22164 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 22496 22165 struct bpf_subprog_info *sub = subprog_info(env, subprog); 22166 + struct bpf_prog_aux *aux = env->prog->aux; 22497 22167 struct bpf_verifier_state *state; 22498 22168 struct bpf_reg_state *regs; 22499 22169 int ret, i; ··· 22600 22268 /* 1st arg to a function */ 22601 22269 regs[BPF_REG_1].type = PTR_TO_CTX; 22602 22270 mark_reg_known_zero(env, regs, BPF_REG_1); 22271 + } 22272 + 22273 + /* Acquire references for struct_ops program arguments tagged with "__ref" */ 22274 + if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 22275 + for (i = 0; i < aux->ctx_arg_info_size; i++) 22276 + aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ? 22277 + acquire_reference(env, 0) : 0; 22603 22278 } 22604 22279 22605 22280 ret = do_check(env); ··· 22731 22392 env->peak_states, env->longest_mark_read_walk); 22732 22393 } 22733 22394 22395 + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, 22396 + const struct bpf_ctx_arg_aux *info, u32 cnt) 22397 + { 22398 + prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL); 22399 + prog->aux->ctx_arg_info_size = cnt; 22400 + 22401 + return prog->aux->ctx_arg_info ? 0 : -ENOMEM; 22402 + } 22403 + 22734 22404 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 22735 22405 { 22736 22406 const struct btf_type *t, *func_proto; ··· 22747 22399 const struct bpf_struct_ops *st_ops; 22748 22400 const struct btf_member *member; 22749 22401 struct bpf_prog *prog = env->prog; 22750 - u32 btf_id, member_idx; 22402 + bool has_refcounted_arg = false; 22403 + u32 btf_id, member_idx, member_off; 22751 22404 struct btf *btf; 22752 22405 const char *mname; 22753 - int err; 22406 + int i, err; 22754 22407 22755 22408 if (!prog->gpl_compatible) { 22756 22409 verbose(env, "struct ops programs must have a GPL compatible license\n"); ··· 22799 22450 return -EINVAL; 22800 22451 } 22801 22452 22802 - err = bpf_struct_ops_supported(st_ops, __btf_member_bit_offset(t, member) / 8); 22453 + member_off = __btf_member_bit_offset(t, member) / 8; 22454 + err = bpf_struct_ops_supported(st_ops, member_off); 22803 22455 if (err) { 22804 22456 verbose(env, "attach to unsupported member %s of struct %s\n", 22805 22457 mname, st_ops->name); ··· 22822 22472 return -EACCES; 22823 22473 } 22824 22474 22825 - /* btf_ctx_access() used this to provide argument type info */ 22826 - prog->aux->ctx_arg_info = 22827 - st_ops_desc->arg_info[member_idx].info; 22828 - prog->aux->ctx_arg_info_size = 22829 - st_ops_desc->arg_info[member_idx].cnt; 22475 + for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) { 22476 + if (st_ops_desc->arg_info[member_idx].info->refcounted) { 22477 + has_refcounted_arg = true; 22478 + break; 22479 + } 22480 + } 22481 + 22482 + /* Tail call is not allowed for programs with refcounted arguments since we 22483 + * cannot guarantee that valid refcounted kptrs will be passed to the callee. 22484 + */ 22485 + for (i = 0; i < env->subprog_cnt; i++) { 22486 + if (has_refcounted_arg && env->subprog_info[i].has_tail_call) { 22487 + verbose(env, "program with __ref argument cannot tail call\n"); 22488 + return -EINVAL; 22489 + } 22490 + } 22491 + 22492 + prog->aux->st_ops = st_ops; 22493 + prog->aux->attach_st_ops_member_off = member_off; 22830 22494 22831 22495 prog->aux->attach_func_proto = func_proto; 22832 22496 prog->aux->attach_func_name = mname; 22833 22497 env->ops = st_ops->verifier_ops; 22834 22498 22835 - return 0; 22499 + return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info, 22500 + st_ops_desc->arg_info[member_idx].cnt); 22836 22501 } 22837 22502 #define SECURITY_PREFIX "security_" 22838 22503 ··· 22923 22558 if (tgt_prog) { 22924 22559 struct bpf_prog_aux *aux = tgt_prog->aux; 22925 22560 bool tgt_changes_pkt_data; 22561 + bool tgt_might_sleep; 22926 22562 22927 22563 if (bpf_prog_is_dev_bound(prog->aux) && 22928 22564 !bpf_prog_dev_bound_match(prog, tgt_prog)) { ··· 22964 22598 if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { 22965 22599 bpf_log(log, 22966 22600 "Extension program changes packet data, while original does not\n"); 22601 + return -EINVAL; 22602 + } 22603 + 22604 + tgt_might_sleep = aux->func 22605 + ? aux->func[subprog]->aux->might_sleep 22606 + : aux->might_sleep; 22607 + if (prog->aux->might_sleep && !tgt_might_sleep) { 22608 + bpf_log(log, 22609 + "Extension program may sleep, while original does not\n"); 22967 22610 return -EINVAL; 22968 22611 } 22969 22612 } ··· 23231 22856 #endif 23232 22857 BTF_SET_END(btf_id_deny) 23233 22858 22859 + /* fexit and fmod_ret can't be used to attach to __noreturn functions. 22860 + * Currently, we must manually list all __noreturn functions here. Once a more 22861 + * robust solution is implemented, this workaround can be removed. 22862 + */ 22863 + BTF_SET_START(noreturn_deny) 22864 + #ifdef CONFIG_IA32_EMULATION 22865 + BTF_ID(func, __ia32_sys_exit) 22866 + BTF_ID(func, __ia32_sys_exit_group) 22867 + #endif 22868 + #ifdef CONFIG_KUNIT 22869 + BTF_ID(func, __kunit_abort) 22870 + BTF_ID(func, kunit_try_catch_throw) 22871 + #endif 22872 + #ifdef CONFIG_MODULES 22873 + BTF_ID(func, __module_put_and_kthread_exit) 22874 + #endif 22875 + #ifdef CONFIG_X86_64 22876 + BTF_ID(func, __x64_sys_exit) 22877 + BTF_ID(func, __x64_sys_exit_group) 22878 + #endif 22879 + BTF_ID(func, do_exit) 22880 + BTF_ID(func, do_group_exit) 22881 + BTF_ID(func, kthread_complete_and_exit) 22882 + BTF_ID(func, kthread_exit) 22883 + BTF_ID(func, make_task_dead) 22884 + BTF_SET_END(noreturn_deny) 22885 + 23234 22886 static bool can_be_sleepable(struct bpf_prog *prog) 23235 22887 { 23236 22888 if (prog->type == BPF_PROG_TYPE_TRACING) { ··· 23334 22932 prog->aux->attach_btf_trace = true; 23335 22933 return 0; 23336 22934 } else if (prog->expected_attach_type == BPF_TRACE_ITER) { 23337 - if (!bpf_iter_prog_supported(prog)) 23338 - return -EINVAL; 23339 - return 0; 22935 + return bpf_iter_prog_supported(prog); 23340 22936 } 23341 22937 23342 22938 if (prog->type == BPF_PROG_TYPE_LSM) { ··· 23343 22943 return ret; 23344 22944 } else if (prog->type == BPF_PROG_TYPE_TRACING && 23345 22945 btf_id_set_contains(&btf_id_deny, btf_id)) { 22946 + return -EINVAL; 22947 + } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || 22948 + prog->expected_attach_type == BPF_MODIFY_RETURN) && 22949 + btf_id_set_contains(&noreturn_deny, btf_id)) { 22950 + verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n"); 23346 22951 return -EINVAL; 23347 22952 } 23348 22953 ··· 23441 23036 return 0; 23442 23037 } 23443 23038 23039 + static bool can_fallthrough(struct bpf_insn *insn) 23040 + { 23041 + u8 class = BPF_CLASS(insn->code); 23042 + u8 opcode = BPF_OP(insn->code); 23043 + 23044 + if (class != BPF_JMP && class != BPF_JMP32) 23045 + return true; 23046 + 23047 + if (opcode == BPF_EXIT || opcode == BPF_JA) 23048 + return false; 23049 + 23050 + return true; 23051 + } 23052 + 23053 + static bool can_jump(struct bpf_insn *insn) 23054 + { 23055 + u8 class = BPF_CLASS(insn->code); 23056 + u8 opcode = BPF_OP(insn->code); 23057 + 23058 + if (class != BPF_JMP && class != BPF_JMP32) 23059 + return false; 23060 + 23061 + switch (opcode) { 23062 + case BPF_JA: 23063 + case BPF_JEQ: 23064 + case BPF_JNE: 23065 + case BPF_JLT: 23066 + case BPF_JLE: 23067 + case BPF_JGT: 23068 + case BPF_JGE: 23069 + case BPF_JSGT: 23070 + case BPF_JSGE: 23071 + case BPF_JSLT: 23072 + case BPF_JSLE: 23073 + case BPF_JCOND: 23074 + return true; 23075 + } 23076 + 23077 + return false; 23078 + } 23079 + 23080 + static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]) 23081 + { 23082 + struct bpf_insn *insn = &prog->insnsi[idx]; 23083 + int i = 0, insn_sz; 23084 + u32 dst; 23085 + 23086 + insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; 23087 + if (can_fallthrough(insn) && idx + 1 < prog->len) 23088 + succ[i++] = idx + insn_sz; 23089 + 23090 + if (can_jump(insn)) { 23091 + dst = idx + jmp_offset(insn) + 1; 23092 + if (i == 0 || succ[0] != dst) 23093 + succ[i++] = dst; 23094 + } 23095 + 23096 + return i; 23097 + } 23098 + 23099 + /* Each field is a register bitmask */ 23100 + struct insn_live_regs { 23101 + u16 use; /* registers read by instruction */ 23102 + u16 def; /* registers written by instruction */ 23103 + u16 in; /* registers that may be alive before instruction */ 23104 + u16 out; /* registers that may be alive after instruction */ 23105 + }; 23106 + 23107 + /* Bitmask with 1s for all caller saved registers */ 23108 + #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) 23109 + 23110 + /* Compute info->{use,def} fields for the instruction */ 23111 + static void compute_insn_live_regs(struct bpf_verifier_env *env, 23112 + struct bpf_insn *insn, 23113 + struct insn_live_regs *info) 23114 + { 23115 + struct call_summary cs; 23116 + u8 class = BPF_CLASS(insn->code); 23117 + u8 code = BPF_OP(insn->code); 23118 + u8 mode = BPF_MODE(insn->code); 23119 + u16 src = BIT(insn->src_reg); 23120 + u16 dst = BIT(insn->dst_reg); 23121 + u16 r0 = BIT(0); 23122 + u16 def = 0; 23123 + u16 use = 0xffff; 23124 + 23125 + switch (class) { 23126 + case BPF_LD: 23127 + switch (mode) { 23128 + case BPF_IMM: 23129 + if (BPF_SIZE(insn->code) == BPF_DW) { 23130 + def = dst; 23131 + use = 0; 23132 + } 23133 + break; 23134 + case BPF_LD | BPF_ABS: 23135 + case BPF_LD | BPF_IND: 23136 + /* stick with defaults */ 23137 + break; 23138 + } 23139 + break; 23140 + case BPF_LDX: 23141 + switch (mode) { 23142 + case BPF_MEM: 23143 + case BPF_MEMSX: 23144 + def = dst; 23145 + use = src; 23146 + break; 23147 + } 23148 + break; 23149 + case BPF_ST: 23150 + switch (mode) { 23151 + case BPF_MEM: 23152 + def = 0; 23153 + use = dst; 23154 + break; 23155 + } 23156 + break; 23157 + case BPF_STX: 23158 + switch (mode) { 23159 + case BPF_MEM: 23160 + def = 0; 23161 + use = dst | src; 23162 + break; 23163 + case BPF_ATOMIC: 23164 + switch (insn->imm) { 23165 + case BPF_CMPXCHG: 23166 + use = r0 | dst | src; 23167 + def = r0; 23168 + break; 23169 + case BPF_LOAD_ACQ: 23170 + def = dst; 23171 + use = src; 23172 + break; 23173 + case BPF_STORE_REL: 23174 + def = 0; 23175 + use = dst | src; 23176 + break; 23177 + default: 23178 + use = dst | src; 23179 + if (insn->imm & BPF_FETCH) 23180 + def = src; 23181 + else 23182 + def = 0; 23183 + } 23184 + break; 23185 + } 23186 + break; 23187 + case BPF_ALU: 23188 + case BPF_ALU64: 23189 + switch (code) { 23190 + case BPF_END: 23191 + use = dst; 23192 + def = dst; 23193 + break; 23194 + case BPF_MOV: 23195 + def = dst; 23196 + if (BPF_SRC(insn->code) == BPF_K) 23197 + use = 0; 23198 + else 23199 + use = src; 23200 + break; 23201 + default: 23202 + def = dst; 23203 + if (BPF_SRC(insn->code) == BPF_K) 23204 + use = dst; 23205 + else 23206 + use = dst | src; 23207 + } 23208 + break; 23209 + case BPF_JMP: 23210 + case BPF_JMP32: 23211 + switch (code) { 23212 + case BPF_JA: 23213 + case BPF_JCOND: 23214 + def = 0; 23215 + use = 0; 23216 + break; 23217 + case BPF_EXIT: 23218 + def = 0; 23219 + use = r0; 23220 + break; 23221 + case BPF_CALL: 23222 + def = ALL_CALLER_SAVED_REGS; 23223 + use = def & ~BIT(BPF_REG_0); 23224 + if (get_call_summary(env, insn, &cs)) 23225 + use = GENMASK(cs.num_params, 1); 23226 + break; 23227 + default: 23228 + def = 0; 23229 + if (BPF_SRC(insn->code) == BPF_K) 23230 + use = dst; 23231 + else 23232 + use = dst | src; 23233 + } 23234 + break; 23235 + } 23236 + 23237 + info->def = def; 23238 + info->use = use; 23239 + } 23240 + 23241 + /* Compute may-live registers after each instruction in the program. 23242 + * The register is live after the instruction I if it is read by some 23243 + * instruction S following I during program execution and is not 23244 + * overwritten between I and S. 23245 + * 23246 + * Store result in env->insn_aux_data[i].live_regs. 23247 + */ 23248 + static int compute_live_registers(struct bpf_verifier_env *env) 23249 + { 23250 + struct bpf_insn_aux_data *insn_aux = env->insn_aux_data; 23251 + struct bpf_insn *insns = env->prog->insnsi; 23252 + struct insn_live_regs *state; 23253 + int insn_cnt = env->prog->len; 23254 + int err = 0, i, j; 23255 + bool changed; 23256 + 23257 + /* Use the following algorithm: 23258 + * - define the following: 23259 + * - I.use : a set of all registers read by instruction I; 23260 + * - I.def : a set of all registers written by instruction I; 23261 + * - I.in : a set of all registers that may be alive before I execution; 23262 + * - I.out : a set of all registers that may be alive after I execution; 23263 + * - insn_successors(I): a set of instructions S that might immediately 23264 + * follow I for some program execution; 23265 + * - associate separate empty sets 'I.in' and 'I.out' with each instruction; 23266 + * - visit each instruction in a postorder and update 23267 + * state[i].in, state[i].out as follows: 23268 + * 23269 + * state[i].out = U [state[s].in for S in insn_successors(i)] 23270 + * state[i].in = (state[i].out / state[i].def) U state[i].use 23271 + * 23272 + * (where U stands for set union, / stands for set difference) 23273 + * - repeat the computation while {in,out} fields changes for 23274 + * any instruction. 23275 + */ 23276 + state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL); 23277 + if (!state) { 23278 + err = -ENOMEM; 23279 + goto out; 23280 + } 23281 + 23282 + for (i = 0; i < insn_cnt; ++i) 23283 + compute_insn_live_regs(env, &insns[i], &state[i]); 23284 + 23285 + changed = true; 23286 + while (changed) { 23287 + changed = false; 23288 + for (i = 0; i < env->cfg.cur_postorder; ++i) { 23289 + int insn_idx = env->cfg.insn_postorder[i]; 23290 + struct insn_live_regs *live = &state[insn_idx]; 23291 + int succ_num; 23292 + u32 succ[2]; 23293 + u16 new_out = 0; 23294 + u16 new_in = 0; 23295 + 23296 + succ_num = insn_successors(env->prog, insn_idx, succ); 23297 + for (int s = 0; s < succ_num; ++s) 23298 + new_out |= state[succ[s]].in; 23299 + new_in = (new_out & ~live->def) | live->use; 23300 + if (new_out != live->out || new_in != live->in) { 23301 + live->in = new_in; 23302 + live->out = new_out; 23303 + changed = true; 23304 + } 23305 + } 23306 + } 23307 + 23308 + for (i = 0; i < insn_cnt; ++i) 23309 + insn_aux[i].live_regs_before = state[i].in; 23310 + 23311 + if (env->log.level & BPF_LOG_LEVEL2) { 23312 + verbose(env, "Live regs before insn:\n"); 23313 + for (i = 0; i < insn_cnt; ++i) { 23314 + verbose(env, "%3d: ", i); 23315 + for (j = BPF_REG_0; j < BPF_REG_10; ++j) 23316 + if (insn_aux[i].live_regs_before & BIT(j)) 23317 + verbose(env, "%d", j); 23318 + else 23319 + verbose(env, "."); 23320 + verbose(env, " "); 23321 + verbose_insn(env, &insns[i]); 23322 + if (bpf_is_ldimm64(&insns[i])) 23323 + i++; 23324 + } 23325 + } 23326 + 23327 + out: 23328 + kvfree(state); 23329 + kvfree(env->cfg.insn_postorder); 23330 + env->cfg.insn_postorder = NULL; 23331 + env->cfg.cur_postorder = 0; 23332 + return err; 23333 + } 23334 + 23444 23335 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 23445 23336 { 23446 23337 u64 start_time = ktime_get_ns(); ··· 23814 23113 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; 23815 23114 23816 23115 env->explored_states = kvcalloc(state_htab_size(env), 23817 - sizeof(struct bpf_verifier_state_list *), 23116 + sizeof(struct list_head), 23818 23117 GFP_USER); 23819 23118 ret = -ENOMEM; 23820 23119 if (!env->explored_states) 23821 23120 goto skip_full_check; 23121 + 23122 + for (i = 0; i < state_htab_size(env); i++) 23123 + INIT_LIST_HEAD(&env->explored_states[i]); 23124 + INIT_LIST_HEAD(&env->free_list); 23822 23125 23823 23126 ret = check_btf_info_early(env, attr, uattr); 23824 23127 if (ret < 0) ··· 23856 23151 23857 23152 ret = check_attach_btf_id(env); 23858 23153 if (ret) 23154 + goto skip_full_check; 23155 + 23156 + ret = compute_live_registers(env); 23157 + if (ret < 0) 23859 23158 goto skip_full_check; 23860 23159 23861 23160 ret = mark_fastcall_patterns(env); ··· 24000 23291 vfree(env->insn_aux_data); 24001 23292 kvfree(env->insn_hist); 24002 23293 err_free_env: 23294 + kvfree(env->cfg.insn_postorder); 24003 23295 kvfree(env); 24004 23296 return ret; 24005 23297 }

+10 -4

kernel/trace/bpf_trace.c

··· 392 392 .arg2_type = ARG_CONST_SIZE, 393 393 }; 394 394 395 - static void __set_printk_clr_event(void) 395 + static void __set_printk_clr_event(struct work_struct *work) 396 396 { 397 397 /* 398 398 * This program might be calling bpf_trace_printk, ··· 405 405 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) 406 406 pr_warn_ratelimited("could not enable bpf_trace_printk events"); 407 407 } 408 + static DECLARE_WORK(set_printk_work, __set_printk_clr_event); 408 409 409 410 const struct bpf_func_proto *bpf_get_trace_printk_proto(void) 410 411 { 411 - __set_printk_clr_event(); 412 + schedule_work(&set_printk_work); 412 413 return &bpf_trace_printk_proto; 413 414 } 414 415 ··· 452 451 453 452 const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) 454 453 { 455 - __set_printk_clr_event(); 454 + schedule_work(&set_printk_work); 456 455 return &bpf_trace_vprintk_proto; 457 456 } 458 457 ··· 606 605 .arg3_type = ARG_PTR_TO_UNINIT_MEM, 607 606 .arg4_type = ARG_CONST_SIZE, 608 607 }; 608 + 609 + const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void) 610 + { 611 + return &bpf_perf_event_read_value_proto; 612 + } 609 613 610 614 static __always_inline u64 611 615 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, ··· 849 843 if (unlikely(is_global_init(task))) 850 844 return -EPERM; 851 845 852 - if (!preemptible()) { 846 + if (preempt_count() != 0 || irqs_disabled()) { 853 847 /* Do an early check on signal validity. Otherwise, 854 848 * the error is lost in deferred irq_work. 855 849 */

+118

mm/memory.c

··· 6807 6807 } 6808 6808 EXPORT_SYMBOL_GPL(access_process_vm); 6809 6809 6810 + #ifdef CONFIG_BPF_SYSCALL 6811 + /* 6812 + * Copy a string from another process's address space as given in mm. 6813 + * If there is any error return -EFAULT. 6814 + */ 6815 + static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, 6816 + void *buf, int len, unsigned int gup_flags) 6817 + { 6818 + void *old_buf = buf; 6819 + int err = 0; 6820 + 6821 + *(char *)buf = '\0'; 6822 + 6823 + if (mmap_read_lock_killable(mm)) 6824 + return -EFAULT; 6825 + 6826 + addr = untagged_addr_remote(mm, addr); 6827 + 6828 + /* Avoid triggering the temporary warning in __get_user_pages */ 6829 + if (!vma_lookup(mm, addr)) { 6830 + err = -EFAULT; 6831 + goto out; 6832 + } 6833 + 6834 + while (len) { 6835 + int bytes, offset, retval; 6836 + void *maddr; 6837 + struct page *page; 6838 + struct vm_area_struct *vma = NULL; 6839 + 6840 + page = get_user_page_vma_remote(mm, addr, gup_flags, &vma); 6841 + if (IS_ERR(page)) { 6842 + /* 6843 + * Treat as a total failure for now until we decide how 6844 + * to handle the CONFIG_HAVE_IOREMAP_PROT case and 6845 + * stack expansion. 6846 + */ 6847 + *(char *)buf = '\0'; 6848 + err = -EFAULT; 6849 + goto out; 6850 + } 6851 + 6852 + bytes = len; 6853 + offset = addr & (PAGE_SIZE - 1); 6854 + if (bytes > PAGE_SIZE - offset) 6855 + bytes = PAGE_SIZE - offset; 6856 + 6857 + maddr = kmap_local_page(page); 6858 + retval = strscpy(buf, maddr + offset, bytes); 6859 + if (retval >= 0) { 6860 + /* Found the end of the string */ 6861 + buf += retval; 6862 + unmap_and_put_page(page, maddr); 6863 + break; 6864 + } 6865 + 6866 + buf += bytes - 1; 6867 + /* 6868 + * Because strscpy always NUL terminates we need to 6869 + * copy the last byte in the page if we are going to 6870 + * load more pages 6871 + */ 6872 + if (bytes != len) { 6873 + addr += bytes - 1; 6874 + copy_from_user_page(vma, page, addr, buf, maddr + (PAGE_SIZE - 1), 1); 6875 + buf += 1; 6876 + addr += 1; 6877 + } 6878 + len -= bytes; 6879 + 6880 + unmap_and_put_page(page, maddr); 6881 + } 6882 + 6883 + out: 6884 + mmap_read_unlock(mm); 6885 + if (err) 6886 + return err; 6887 + return buf - old_buf; 6888 + } 6889 + 6890 + /** 6891 + * copy_remote_vm_str - copy a string from another process's address space. 6892 + * @tsk: the task of the target address space 6893 + * @addr: start address to read from 6894 + * @buf: destination buffer 6895 + * @len: number of bytes to copy 6896 + * @gup_flags: flags modifying lookup behaviour 6897 + * 6898 + * The caller must hold a reference on @mm. 6899 + * 6900 + * Return: number of bytes copied from @addr (source) to @buf (destination); 6901 + * not including the trailing NUL. Always guaranteed to leave NUL-terminated 6902 + * buffer. On any error, return -EFAULT. 6903 + */ 6904 + int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, 6905 + void *buf, int len, unsigned int gup_flags) 6906 + { 6907 + struct mm_struct *mm; 6908 + int ret; 6909 + 6910 + if (unlikely(len == 0)) 6911 + return 0; 6912 + 6913 + mm = get_task_mm(tsk); 6914 + if (!mm) { 6915 + *(char *)buf = '\0'; 6916 + return -EFAULT; 6917 + } 6918 + 6919 + ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags); 6920 + 6921 + mmput(mm); 6922 + 6923 + return ret; 6924 + } 6925 + EXPORT_SYMBOL_GPL(copy_remote_vm_str); 6926 + #endif /* CONFIG_BPF_SYSCALL */ 6927 + 6810 6928 /* 6811 6929 * Print the name of a VMA. 6812 6930 */

+79

mm/nommu.c

··· 1714 1714 } 1715 1715 EXPORT_SYMBOL_GPL(access_process_vm); 1716 1716 1717 + #ifdef CONFIG_BPF_SYSCALL 1718 + /* 1719 + * Copy a string from another process's address space as given in mm. 1720 + * If there is any error return -EFAULT. 1721 + */ 1722 + static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, 1723 + void *buf, int len) 1724 + { 1725 + unsigned long addr_end; 1726 + struct vm_area_struct *vma; 1727 + int ret = -EFAULT; 1728 + 1729 + *(char *)buf = '\0'; 1730 + 1731 + if (mmap_read_lock_killable(mm)) 1732 + return ret; 1733 + 1734 + /* the access must start within one of the target process's mappings */ 1735 + vma = find_vma(mm, addr); 1736 + if (!vma) 1737 + goto out; 1738 + 1739 + if (check_add_overflow(addr, len, &addr_end)) 1740 + goto out; 1741 + 1742 + /* don't overrun this mapping */ 1743 + if (addr_end > vma->vm_end) 1744 + len = vma->vm_end - addr; 1745 + 1746 + /* only read mappings where it is permitted */ 1747 + if (vma->vm_flags & VM_MAYREAD) { 1748 + ret = strscpy(buf, (char *)addr, len); 1749 + if (ret < 0) 1750 + ret = len - 1; 1751 + } 1752 + 1753 + out: 1754 + mmap_read_unlock(mm); 1755 + return ret; 1756 + } 1757 + 1758 + /** 1759 + * copy_remote_vm_str - copy a string from another process's address space. 1760 + * @tsk: the task of the target address space 1761 + * @addr: start address to read from 1762 + * @buf: destination buffer 1763 + * @len: number of bytes to copy 1764 + * @gup_flags: flags modifying lookup behaviour (unused) 1765 + * 1766 + * The caller must hold a reference on @mm. 1767 + * 1768 + * Return: number of bytes copied from @addr (source) to @buf (destination); 1769 + * not including the trailing NUL. Always guaranteed to leave NUL-terminated 1770 + * buffer. On any error, return -EFAULT. 1771 + */ 1772 + int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, 1773 + void *buf, int len, unsigned int gup_flags) 1774 + { 1775 + struct mm_struct *mm; 1776 + int ret; 1777 + 1778 + if (unlikely(len == 0)) 1779 + return 0; 1780 + 1781 + mm = get_task_mm(tsk); 1782 + if (!mm) { 1783 + *(char *)buf = '\0'; 1784 + return -EFAULT; 1785 + } 1786 + 1787 + ret = __copy_remote_vm_str(mm, addr, buf, len); 1788 + 1789 + mmput(mm); 1790 + 1791 + return ret; 1792 + } 1793 + EXPORT_SYMBOL_GPL(copy_remote_vm_str); 1794 + #endif /* CONFIG_BPF_SYSCALL */ 1795 + 1717 1796 /** 1718 1797 * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1719 1798 * @inode: The inode to check

+4 -2

net/core/filter.c

··· 8137 8137 return &bpf_skb_load_bytes_relative_proto; 8138 8138 case BPF_FUNC_get_socket_cookie: 8139 8139 return &bpf_get_socket_cookie_proto; 8140 + case BPF_FUNC_get_netns_cookie: 8141 + return &bpf_get_netns_cookie_proto; 8140 8142 case BPF_FUNC_get_socket_uid: 8141 8143 return &bpf_get_socket_uid_proto; 8142 8144 case BPF_FUNC_perf_event_output: ··· 9699 9697 9700 9698 case offsetof(struct __sk_buff, queue_mapping): 9701 9699 if (type == BPF_WRITE) { 9702 - u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); 9700 + u32 offset = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); 9703 9701 9704 9702 if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { 9705 9703 *insn++ = BPF_JMP_A(0); /* noop */ ··· 9708 9706 9709 9707 if (BPF_CLASS(si->code) == BPF_STX) 9710 9708 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); 9711 - *insn++ = BPF_EMIT_STORE(BPF_H, si, off); 9709 + *insn++ = BPF_EMIT_STORE(BPF_H, si, offset); 9712 9710 } else { 9713 9711 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, 9714 9712 bpf_target_off(struct sk_buff,

+1 -1

samples/bpf/Makefile

··· 307 307 308 308 VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ 309 309 $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ 310 - $(abspath ./vmlinux) 310 + $(abspath $(objtree)/vmlinux) 311 311 VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) 312 312 313 313 $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)

+1 -1

scripts/Makefile.btf

··· 24 24 pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs 25 25 26 26 ifneq ($(KBUILD_EXTMOD),) 27 - module-pahole-flags-$(call test-ge, $(pahole-ver), 126) += --btf_features=distilled_base 27 + module-pahole-flags-$(call test-ge, $(pahole-ver), 128) += --btf_features=distilled_base 28 28 endif 29 29 30 30 endif

+9 -6

security/security.c

··· 5627 5627 * @cmd: command 5628 5628 * @attr: bpf attribute 5629 5629 * @size: size 5630 + * @kernel: whether or not call originated from kernel 5630 5631 * 5631 5632 * Do a initial check for all bpf syscalls after the attribute is copied into 5632 5633 * the kernel. The actual security module can implement their own rules to ··· 5635 5634 * 5636 5635 * Return: Returns 0 if permission is granted. 5637 5636 */ 5638 - int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) 5637 + int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 5639 5638 { 5640 - return call_int_hook(bpf, cmd, attr, size); 5639 + return call_int_hook(bpf, cmd, attr, size, kernel); 5641 5640 } 5642 5641 5643 5642 /** ··· 5674 5673 * @map: BPF map object 5675 5674 * @attr: BPF syscall attributes used to create BPF map 5676 5675 * @token: BPF token used to grant user access 5676 + * @kernel: whether or not call originated from kernel 5677 5677 * 5678 5678 * Do a check when the kernel creates a new BPF map. This is also the 5679 5679 * point where LSM blob is allocated for LSMs that need them. ··· 5682 5680 * Return: Returns 0 on success, error on failure. 5683 5681 */ 5684 5682 int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, 5685 - struct bpf_token *token) 5683 + struct bpf_token *token, bool kernel) 5686 5684 { 5687 - return call_int_hook(bpf_map_create, map, attr, token); 5685 + return call_int_hook(bpf_map_create, map, attr, token, kernel); 5688 5686 } 5689 5687 5690 5688 /** ··· 5692 5690 * @prog: BPF program object 5693 5691 * @attr: BPF syscall attributes used to create BPF program 5694 5692 * @token: BPF token used to grant user access to BPF subsystem 5693 + * @kernel: whether or not call originated from kernel 5695 5694 * 5696 5695 * Perform an access control check when the kernel loads a BPF program and 5697 5696 * allocates associated BPF program object. This hook is also responsible for ··· 5701 5698 * Return: Returns 0 on success, error on failure. 5702 5699 */ 5703 5700 int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, 5704 - struct bpf_token *token) 5701 + struct bpf_token *token, bool kernel) 5705 5702 { 5706 - return call_int_hook(bpf_prog_load, prog, attr, token); 5703 + return call_int_hook(bpf_prog_load, prog, attr, token, kernel); 5707 5704 } 5708 5705 5709 5706 /**

+3 -3

security/selinux/hooks.c

··· 6907 6907 6908 6908 #ifdef CONFIG_BPF_SYSCALL 6909 6909 static int selinux_bpf(int cmd, union bpf_attr *attr, 6910 - unsigned int size) 6910 + unsigned int size, bool kernel) 6911 6911 { 6912 6912 u32 sid = current_sid(); 6913 6913 int ret; ··· 6994 6994 } 6995 6995 6996 6996 static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, 6997 - struct bpf_token *token) 6997 + struct bpf_token *token, bool kernel) 6998 6998 { 6999 6999 struct bpf_security_struct *bpfsec; 7000 7000 ··· 7017 7017 } 7018 7018 7019 7019 static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, 7020 - struct bpf_token *token) 7020 + struct bpf_token *token, bool kernel) 7021 7021 { 7022 7022 struct bpf_security_struct *bpfsec; 7023 7023

+6 -1

tools/bpf/bpftool/Makefile

··· 65 65 bash_compdir ?= /usr/share/bash-completion/completions 66 66 67 67 CFLAGS += -O2 68 - CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers 68 + CFLAGS += -W 69 + CFLAGS += -Wall 70 + CFLAGS += -Wextra 71 + CFLAGS += -Wformat-signedness 72 + CFLAGS += -Wno-unused-parameter 73 + CFLAGS += -Wno-missing-field-initializers 69 74 CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) 70 75 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ 71 76 -I$(or $(OUTPUT),.) \

+7 -7

tools/bpf/bpftool/btf.c

··· 253 253 if (btf_kflag(t)) 254 254 printf("\n\t'%s' val=%d", name, v->val); 255 255 else 256 - printf("\n\t'%s' val=%u", name, v->val); 256 + printf("\n\t'%s' val=%u", name, (__u32)v->val); 257 257 } 258 258 } 259 259 if (json_output) ··· 1022 1022 for (i = 0; i < root_type_cnt; i++) { 1023 1023 if (root_type_ids[i] == root_id) { 1024 1024 err = -EINVAL; 1025 - p_err("duplicate root_id %d supplied", root_id); 1025 + p_err("duplicate root_id %u supplied", root_id); 1026 1026 goto done; 1027 1027 } 1028 1028 } ··· 1132 1132 break; 1133 1133 default: 1134 1134 err = -1; 1135 - p_err("unexpected object type: %d", type); 1135 + p_err("unexpected object type: %u", type); 1136 1136 goto err_free; 1137 1137 } 1138 1138 if (err) { ··· 1155 1155 break; 1156 1156 default: 1157 1157 err = -1; 1158 - p_err("unexpected object type: %d", type); 1158 + p_err("unexpected object type: %u", type); 1159 1159 goto err_free; 1160 1160 } 1161 1161 if (fd < 0) { ··· 1188 1188 break; 1189 1189 default: 1190 1190 err = -1; 1191 - p_err("unexpected object type: %d", type); 1191 + p_err("unexpected object type: %u", type); 1192 1192 goto err_free; 1193 1193 } 1194 1194 if (!btf_id) ··· 1254 1254 1255 1255 n = 0; 1256 1256 hashmap__for_each_key_entry(btf_prog_table, entry, info->id) { 1257 - printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value); 1257 + printf("%s%lu", n++ == 0 ? " prog_ids " : ",", (unsigned long)entry->value); 1258 1258 } 1259 1259 1260 1260 n = 0; 1261 1261 hashmap__for_each_key_entry(btf_map_table, entry, info->id) { 1262 - printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value); 1262 + printf("%s%lu", n++ == 0 ? " map_ids " : ",", (unsigned long)entry->value); 1263 1263 } 1264 1264 1265 1265 emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");

+1 -1

tools/bpf/bpftool/btf_dumper.c

··· 653 653 case BTF_KIND_ARRAY: 654 654 array = (struct btf_array *)(t + 1); 655 655 BTF_PRINT_TYPE(array->type); 656 - BTF_PRINT_ARG("[%d]", array->nelems); 656 + BTF_PRINT_ARG("[%u]", array->nelems); 657 657 break; 658 658 case BTF_KIND_PTR: 659 659 BTF_PRINT_TYPE(t->type);

+1 -1

tools/bpf/bpftool/cgroup.c

··· 191 191 if (attach_btf_name) 192 192 printf(" %-15s", attach_btf_name); 193 193 else if (info.attach_btf_id) 194 - printf(" attach_btf_obj_id=%d attach_btf_id=%d", 194 + printf(" attach_btf_obj_id=%u attach_btf_id=%u", 195 195 info.attach_btf_obj_id, info.attach_btf_id); 196 196 printf("\n"); 197 197 }

+4 -3

tools/bpf/bpftool/common.c

··· 461 461 p_err("can't read link type: %s", strerror(errno)); 462 462 return -1; 463 463 } 464 - if (n == sizeof(path)) { 464 + if (n == sizeof(buf)) { 465 465 p_err("can't read link type: path too long!"); 466 466 return -1; 467 467 } 468 + buf[n] = '\0'; 468 469 469 470 if (strstr(buf, "bpf-map")) 470 471 return BPF_OBJ_MAP; ··· 714 713 int vendor_id; 715 714 716 715 if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { 717 - p_err("Can't get net device name for ifindex %d: %s", ifindex, 716 + p_err("Can't get net device name for ifindex %u: %s", ifindex, 718 717 strerror(errno)); 719 718 return NULL; 720 719 } ··· 739 738 /* No NFP support in LLVM, we have no valid triple to return. */ 740 739 default: 741 740 p_err("Can't get arch name for device vendor id 0x%04x", 742 - vendor_id); 741 + (unsigned int)vendor_id); 743 742 return NULL; 744 743 } 745 744 }

+6 -6

tools/bpf/bpftool/gen.c

··· 670 670 continue; 671 671 if (bpf_map__is_internal(map) && 672 672 (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) 673 - printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zd);\n", 673 + printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zu);\n", 674 674 ident, bpf_map_mmap_sz(map)); 675 675 codegen("\ 676 676 \n\ ··· 984 984 985 985 offset = m->offset / 8; 986 986 if (next_offset < offset) 987 - printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset); 987 + printf("\t\t\tchar __padding_%d[%u];\n", i, offset - next_offset); 988 988 989 989 switch (btf_kind(member_type)) { 990 990 case BTF_KIND_INT: ··· 1052 1052 /* Cannot fail since it must be a struct type */ 1053 1053 size = btf__resolve_size(btf, map_type_id); 1054 1054 if (next_offset < (__u32)size) 1055 - printf("\t\t\tchar __padding_end[%d];\n", size - next_offset); 1055 + printf("\t\t\tchar __padding_end[%u];\n", size - next_offset); 1056 1056 1057 1057 out: 1058 1058 btf_dump__free(d); ··· 2095 2095 break; 2096 2096 /* tells if some other type needs to be handled */ 2097 2097 default: 2098 - p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); 2098 + p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id); 2099 2099 return -EINVAL; 2100 2100 } 2101 2101 ··· 2147 2147 btf_type = btf__type_by_id(btf, type_id); 2148 2148 break; 2149 2149 default: 2150 - p_err("unsupported kind: %s (%d)", 2150 + p_err("unsupported kind: %s (%u)", 2151 2151 btf_kind_str(btf_type), btf_type->type); 2152 2152 return -EINVAL; 2153 2153 } ··· 2246 2246 } 2247 2247 /* tells if some other type needs to be handled */ 2248 2248 default: 2249 - p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); 2249 + p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id); 2250 2250 return -EINVAL; 2251 2251 } 2252 2252

+2 -1

tools/bpf/bpftool/jit_disasm.c

··· 343 343 { 344 344 const struct bpf_line_info *linfo = NULL; 345 345 unsigned int nr_skip = 0; 346 - int count, i, pc = 0; 346 + int count, i; 347 + unsigned int pc = 0; 347 348 disasm_ctx_t ctx; 348 349 349 350 if (!len)

+7 -7

tools/bpf/bpftool/link.c

··· 107 107 108 108 fd = bpf_link_get_fd_by_id(id); 109 109 if (fd < 0) 110 - p_err("failed to get link with ID %d: %s", id, strerror(errno)); 110 + p_err("failed to get link with ID %u: %s", id, strerror(errno)); 111 111 return fd; 112 112 } else if (is_prefix(**argv, "pinned")) { 113 113 char *path; ··· 404 404 if (hw_cache) 405 405 snprintf(str, PERF_HW_CACHE_LEN, "%s-", hw_cache); 406 406 else 407 - snprintf(str, PERF_HW_CACHE_LEN, "%lld-", config & 0xff); 407 + snprintf(str, PERF_HW_CACHE_LEN, "%llu-", config & 0xff); 408 408 409 409 op = perf_event_name(evsel__hw_cache_op, (config >> 8) & 0xff); 410 410 if (op) ··· 412 412 "%s-", op); 413 413 else 414 414 snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), 415 - "%lld-", (config >> 8) & 0xff); 415 + "%llu-", (config >> 8) & 0xff); 416 416 417 417 result = perf_event_name(evsel__hw_cache_result, config >> 16); 418 418 if (result) ··· 420 420 "%s", result); 421 421 else 422 422 snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str), 423 - "%lld", config >> 16); 423 + "%llu", config >> 16); 424 424 return str; 425 425 } 426 426 ··· 623 623 else 624 624 snprintf(devname, sizeof(devname), "(detached)"); 625 625 if (ret) 626 - snprintf(devname, sizeof(devname), "%s(%d)", 626 + snprintf(devname, sizeof(devname), "%s(%u)", 627 627 tmpname, ifindex); 628 628 printf("ifindex %s ", devname); 629 629 } ··· 699 699 if (pfname) 700 700 printf("\n\t%s", pfname); 701 701 else 702 - printf("\n\tpf: %d", pf); 702 + printf("\n\tpf: %u", pf); 703 703 704 704 if (hookname) 705 705 printf(" %s", hookname); ··· 773 773 printf("func_cnt %u ", info->uprobe_multi.count); 774 774 775 775 if (info->uprobe_multi.pid) 776 - printf("pid %d ", info->uprobe_multi.pid); 776 + printf("pid %u ", info->uprobe_multi.pid); 777 777 778 778 printf("\n\t%-16s %-16s %-16s", "offset", "ref_ctr_offset", "cookies"); 779 779 for (i = 0; i < info->uprobe_multi.count; i++) {

+4 -4

tools/bpf/bpftool/main.c

··· 152 152 BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION); 153 153 #endif 154 154 jsonw_name(json_wtr, "libbpf_version"); 155 - jsonw_printf(json_wtr, "\"%d.%d\"", 155 + jsonw_printf(json_wtr, "\"%u.%u\"", 156 156 libbpf_major_version(), libbpf_minor_version()); 157 157 158 158 jsonw_name(json_wtr, "features"); ··· 370 370 while ((cp = strstr(buf, "\\\n")) != NULL) { 371 371 if (!fgets(contline, sizeof(contline), fp) || 372 372 strlen(contline) == 0) { 373 - p_err("missing continuation line on command %d", 373 + p_err("missing continuation line on command %u", 374 374 lines); 375 375 err = -1; 376 376 goto err_close; ··· 381 381 *cp = '\0'; 382 382 383 383 if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) { 384 - p_err("command %d is too long", lines); 384 + p_err("command %u is too long", lines); 385 385 err = -1; 386 386 goto err_close; 387 387 } ··· 423 423 err = -1; 424 424 } else { 425 425 if (!json_output) 426 - printf("processed %d commands\n", lines); 426 + printf("processed %u commands\n", lines); 427 427 } 428 428 err_close: 429 429 if (fp != stdin)

+9 -5

tools/bpf/bpftool/map.c

··· 285 285 } 286 286 if (info->value_size) { 287 287 for (i = 0; i < n; i++) { 288 - printf("value (CPU %02d):%c", 288 + printf("value (CPU %02u):%c", 289 289 i, info->value_size > 16 ? '\n' : ' '); 290 290 fprint_hex(stdout, value + i * step, 291 291 info->value_size, " "); ··· 316 316 } 317 317 318 318 if (i != n) { 319 - p_err("%s expected %d bytes got %d", name, n, i); 319 + p_err("%s expected %u bytes got %u", name, n, i); 320 320 return NULL; 321 321 } 322 322 ··· 462 462 jsonw_string_field(wtr, "name", info->name); 463 463 464 464 jsonw_name(wtr, "flags"); 465 - jsonw_printf(wtr, "%d", info->map_flags); 465 + jsonw_printf(wtr, "%u", info->map_flags); 466 466 } 467 467 468 468 static int show_map_close_json(int fd, struct bpf_map_info *info) ··· 588 588 if (prog_type_str) 589 589 printf("owner_prog_type %s ", prog_type_str); 590 590 else 591 - printf("owner_prog_type %d ", prog_type); 591 + printf("owner_prog_type %u ", prog_type); 592 592 } 593 593 if (owner_jited) 594 594 printf("owner%s jited", ··· 615 615 printf("\n\t"); 616 616 617 617 if (info->btf_id) 618 - printf("btf_id %d", info->btf_id); 618 + printf("btf_id %u", info->btf_id); 619 619 620 620 if (frozen) 621 621 printf("%sfrozen", info->btf_id ? " " : ""); ··· 1270 1270 } else if (is_prefix(*argv, "name")) { 1271 1271 NEXT_ARG(); 1272 1272 map_name = GET_ARG(); 1273 + if (strlen(map_name) > BPF_OBJ_NAME_LEN - 1) { 1274 + p_info("Warning: map name is longer than %u characters, it will be truncated.", 1275 + BPF_OBJ_NAME_LEN - 1); 1276 + } 1273 1277 } else if (is_prefix(*argv, "key")) { 1274 1278 if (parse_u32_arg(&argc, &argv, &key_size, 1275 1279 "key size"))

+3 -3

tools/bpf/bpftool/map_perf_ring.c

··· 91 91 jsonw_end_object(json_wtr); 92 92 } else { 93 93 if (e->header.type == PERF_RECORD_SAMPLE) { 94 - printf("== @%lld.%09lld CPU: %d index: %d =====\n", 94 + printf("== @%llu.%09llu CPU: %d index: %d =====\n", 95 95 e->time / 1000000000ULL, e->time % 1000000000ULL, 96 96 cpu, idx); 97 97 fprint_hex(stdout, e->data, e->size, " "); 98 98 printf("\n"); 99 99 } else if (e->header.type == PERF_RECORD_LOST) { 100 - printf("lost %lld events\n", lost->lost); 100 + printf("lost %llu events\n", lost->lost); 101 101 } else { 102 - printf("unknown event type=%d size=%d\n", 102 + printf("unknown event type=%u size=%u\n", 103 103 e->header.type, e->header.size); 104 104 } 105 105 }

+2 -2

tools/bpf/bpftool/net.c

··· 476 476 for (i = 0; i < optq.count; i++) { 477 477 NET_START_OBJECT; 478 478 NET_DUMP_STR("devname", "%s", dev->devname); 479 - NET_DUMP_UINT("ifindex", "(%u)", dev->ifindex); 479 + NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)dev->ifindex); 480 480 NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]); 481 481 ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name, 482 482 sizeof(prog_name)); ··· 831 831 if (err) { 832 832 if (errno == ENOENT) 833 833 break; 834 - p_err("can't get next link: %s (id %d)", strerror(errno), id); 834 + p_err("can't get next link: %s (id %u)", strerror(errno), id); 835 835 break; 836 836 } 837 837

+3 -3

tools/bpf/bpftool/netlink_dumper.c

··· 45 45 NET_START_OBJECT; 46 46 if (name) 47 47 NET_DUMP_STR("devname", "%s", name); 48 - NET_DUMP_UINT("ifindex", "(%d)", ifindex); 48 + NET_DUMP_UINT("ifindex", "(%u)", ifindex); 49 49 50 50 if (mode == XDP_ATTACHED_MULTI) { 51 51 if (json_output) { ··· 74 74 if (!tb[IFLA_XDP]) 75 75 return 0; 76 76 77 - return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index, 77 + return do_xdp_dump_one(tb[IFLA_XDP], (unsigned int)ifinfo->ifi_index, 78 78 libbpf_nla_getattr_str(tb[IFLA_IFNAME])); 79 79 } 80 80 ··· 168 168 NET_START_OBJECT; 169 169 if (devname[0] != '\0') 170 170 NET_DUMP_STR("devname", "%s", devname); 171 - NET_DUMP_UINT("ifindex", "(%u)", ifindex); 171 + NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)ifindex); 172 172 NET_DUMP_STR("kind", " %s", kind); 173 173 ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); 174 174 NET_END_OBJECT_FINAL;

+7 -6

tools/bpf/bpftool/prog.c

··· 521 521 print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); 522 522 printf("%s", info->gpl_compatible ? " gpl" : ""); 523 523 if (info->run_time_ns) 524 - printf(" run_time_ns %lld run_cnt %lld", 524 + printf(" run_time_ns %llu run_cnt %llu", 525 525 info->run_time_ns, info->run_cnt); 526 526 if (info->recursion_misses) 527 - printf(" recursion_misses %lld", info->recursion_misses); 527 + printf(" recursion_misses %llu", info->recursion_misses); 528 528 printf("\n"); 529 529 } 530 530 ··· 569 569 } 570 570 571 571 if (info->btf_id) 572 - printf("\n\tbtf_id %d", info->btf_id); 572 + printf("\n\tbtf_id %u", info->btf_id); 573 573 574 574 emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); 575 575 ··· 1164 1164 } 1165 1165 if (nb_read > buf_size - block_size) { 1166 1166 if (buf_size == UINT32_MAX) { 1167 - p_err("data_in/ctx_in is too long (max: %d)", 1167 + p_err("data_in/ctx_in is too long (max: %u)", 1168 1168 UINT32_MAX); 1169 1169 goto err_free; 1170 1170 } ··· 1928 1928 1929 1929 obj = bpf_object__open_file(file, &open_opts); 1930 1930 if (!obj) { 1931 + err = -1; 1931 1932 p_err("failed to open object file"); 1932 1933 goto err_close_obj; 1933 1934 } ··· 2252 2251 2253 2252 t = btf__type_by_id(btf, func_info.type_id); 2254 2253 if (!t) { 2255 - p_err("btf %d doesn't have type %d", 2254 + p_err("btf %u doesn't have type %u", 2256 2255 info.btf_id, func_info.type_id); 2257 2256 goto out; 2258 2257 } ··· 2330 2329 continue; 2331 2330 for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { 2332 2331 if (profile_open_perf_event(m, cpu, map_fd)) { 2333 - p_err("failed to create event %s on cpu %d", 2332 + p_err("failed to create event %s on cpu %u", 2334 2333 metrics[m].name, cpu); 2335 2334 return -1; 2336 2335 }

+1 -1

tools/bpf/bpftool/tracelog.c

··· 78 78 return false; 79 79 80 80 /* Allow room for NULL terminating byte and pipe file name */ 81 - snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n", 81 + snprintf(format, sizeof(format), "%%*s %%%zus %%99s %%*s %%*d %%*d\\n", 82 82 PATH_MAX - strlen(pipe_name) - 1); 83 83 while (fscanf(fp, format, mnt, type) == 2) 84 84 if (strcmp(type, fstype) == 0) {

+3 -3

tools/bpf/bpftool/xlated_dumper.c

··· 199 199 200 200 if (insn->src_reg == BPF_PSEUDO_MAP_FD) 201 201 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), 202 - "map[id:%u]", insn->imm); 202 + "map[id:%d]", insn->imm); 203 203 else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) 204 204 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), 205 - "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); 205 + "map[id:%d][0]+%d", insn->imm, (insn + 1)->imm); 206 206 else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) 207 207 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), 208 - "map[idx:%u]+%u", insn->imm, (insn + 1)->imm); 208 + "map[idx:%d]+%d", insn->imm, (insn + 1)->imm); 209 209 else if (insn->src_reg == BPF_PSEUDO_FUNC) 210 210 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), 211 211 "subprog[%+d]", insn->imm);

+2 -1

tools/bpf/runqslower/Makefile

··· 6 6 BPFTOOL_OUTPUT := $(OUTPUT)bpftool/ 7 7 DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool 8 8 BPFTOOL ?= $(DEFAULT_BPFTOOL) 9 + BPF_TARGET_ENDIAN ?= --target=bpf 9 10 LIBBPF_SRC := $(abspath ../../lib/bpf) 10 11 BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ 11 12 BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a ··· 61 60 $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ 62 61 63 62 $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) 64 - $(QUIET_GEN)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \ 63 + $(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \ 65 64 -c $(filter %.c,$^) -o $@ && \ 66 65 $(LLVM_STRIP) -g $@ 67 66

+9 -1

tools/include/uapi/linux/bpf.h

··· 51 51 #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ 52 52 #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ 53 53 54 + #define BPF_LOAD_ACQ 0x100 /* load-acquire */ 55 + #define BPF_STORE_REL 0x110 /* store-release */ 56 + 54 57 enum bpf_cond_pseudo_jmp { 55 58 BPF_MAY_GOTO = 0, 56 59 }; ··· 1210 1207 #define BPF_F_BEFORE (1U << 3) 1211 1208 #define BPF_F_AFTER (1U << 4) 1212 1209 #define BPF_F_ID (1U << 5) 1210 + #define BPF_F_PREORDER (1U << 6) 1213 1211 #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ 1214 1212 1215 1213 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the ··· 1652 1648 }; 1653 1649 __u32 next_id; 1654 1650 __u32 open_flags; 1651 + __s32 fd_by_id_token_fd; 1655 1652 }; 1656 1653 1657 1654 struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ ··· 6024 6019 FN(user_ringbuf_drain, 209, ##ctx) \ 6025 6020 FN(cgrp_storage_get, 210, ##ctx) \ 6026 6021 FN(cgrp_storage_delete, 211, ##ctx) \ 6027 - /* */ 6022 + /* This helper list is effectively frozen. If you are trying to \ 6023 + * add a new helper, you should add a kfunc instead which has \ 6024 + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ 6025 + */ 6028 6026 6029 6027 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't 6030 6028 * know or care about integer value that is now passed as second argument

+2 -1

tools/include/uapi/linux/btf.h

··· 36 36 * bits 24-28: kind (e.g. int, ptr, array...etc) 37 37 * bits 29-30: unused 38 38 * bit 31: kind_flag, currently used by 39 - * struct, union, enum, fwd and enum64 39 + * struct, union, enum, fwd, enum64, 40 + * decl_tag and type_tag 40 41 */ 41 42 __u32 info; 42 43 /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.

+2 -1

tools/lib/bpf/bpf.c

··· 1097 1097 int bpf_btf_get_fd_by_id_opts(__u32 id, 1098 1098 const struct bpf_get_fd_by_id_opts *opts) 1099 1099 { 1100 - const size_t attr_sz = offsetofend(union bpf_attr, open_flags); 1100 + const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd); 1101 1101 union bpf_attr attr; 1102 1102 int fd; 1103 1103 ··· 1107 1107 memset(&attr, 0, attr_sz); 1108 1108 attr.btf_id = id; 1109 1109 attr.open_flags = OPTS_GET(opts, open_flags, 0); 1110 + attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0); 1110 1111 1111 1112 fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); 1112 1113 return libbpf_err_errno(fd);

+2 -1

tools/lib/bpf/bpf.h

··· 487 487 struct bpf_get_fd_by_id_opts { 488 488 size_t sz; /* size of this struct for forward/backward compatibility */ 489 489 __u32 open_flags; /* permissions requested for the operation on fd */ 490 + __u32 token_fd; 490 491 size_t :0; 491 492 }; 492 - #define bpf_get_fd_by_id_opts__last_field open_flags 493 + #define bpf_get_fd_by_id_opts__last_field token_fd 493 494 494 495 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); 495 496 LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,

+78 -27

tools/lib/bpf/btf.c

··· 1619 1619 return btf; 1620 1620 } 1621 1621 1622 - struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) 1622 + struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd) 1623 1623 { 1624 1624 struct btf *btf; 1625 1625 int btf_fd; 1626 + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); 1626 1627 1627 - btf_fd = bpf_btf_get_fd_by_id(id); 1628 + if (token_fd) { 1629 + opts.open_flags |= BPF_F_TOKEN_FD; 1630 + opts.token_fd = token_fd; 1631 + } 1632 + 1633 + btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts); 1628 1634 if (btf_fd < 0) 1629 1635 return libbpf_err_ptr(-errno); 1630 1636 ··· 1638 1632 close(btf_fd); 1639 1633 1640 1634 return libbpf_ptr(btf); 1635 + } 1636 + 1637 + struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) 1638 + { 1639 + return btf_load_from_kernel(id, base_btf, 0); 1641 1640 } 1642 1641 1643 1642 struct btf *btf__load_from_kernel_by_id(__u32 id) ··· 2101 2090 } 2102 2091 2103 2092 /* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ 2104 - static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) 2093 + static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag) 2105 2094 { 2106 2095 struct btf_type *t; 2107 2096 int sz, name_off = 0; ··· 2124 2113 } 2125 2114 2126 2115 t->name_off = name_off; 2127 - t->info = btf_type_info(kind, 0, 0); 2116 + t->info = btf_type_info(kind, 0, kflag); 2128 2117 t->type = ref_type_id; 2129 2118 2130 2119 return btf_commit_type(btf, sz); ··· 2139 2128 */ 2140 2129 int btf__add_ptr(struct btf *btf, int ref_type_id) 2141 2130 { 2142 - return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); 2131 + return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0); 2143 2132 } 2144 2133 2145 2134 /* ··· 2517 2506 struct btf_type *t; 2518 2507 int id; 2519 2508 2520 - id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); 2509 + id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0); 2521 2510 if (id <= 0) 2522 2511 return id; 2523 2512 t = btf_type_by_id(btf, id); ··· 2547 2536 if (!name || !name[0]) 2548 2537 return libbpf_err(-EINVAL); 2549 2538 2550 - return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); 2539 + return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0); 2551 2540 } 2552 2541 2553 2542 /* ··· 2559 2548 */ 2560 2549 int btf__add_volatile(struct btf *btf, int ref_type_id) 2561 2550 { 2562 - return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); 2551 + return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0); 2563 2552 } 2564 2553 2565 2554 /* ··· 2571 2560 */ 2572 2561 int btf__add_const(struct btf *btf, int ref_type_id) 2573 2562 { 2574 - return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); 2563 + return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0); 2575 2564 } 2576 2565 2577 2566 /* ··· 2583 2572 */ 2584 2573 int btf__add_restrict(struct btf *btf, int ref_type_id) 2585 2574 { 2586 - return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); 2575 + return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0); 2587 2576 } 2588 2577 2589 2578 /* ··· 2599 2588 if (!value || !value[0]) 2600 2589 return libbpf_err(-EINVAL); 2601 2590 2602 - return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); 2591 + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0); 2592 + } 2593 + 2594 + /* 2595 + * Append new BTF_KIND_TYPE_TAG type with: 2596 + * - *value*, non-empty/non-NULL tag value; 2597 + * - *ref_type_id* - referenced type ID, it might not exist yet; 2598 + * Set info->kflag to 1, indicating this tag is an __attribute__ 2599 + * Returns: 2600 + * - >0, type ID of newly added BTF type; 2601 + * - <0, on error. 2602 + */ 2603 + int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id) 2604 + { 2605 + if (!value || !value[0]) 2606 + return libbpf_err(-EINVAL); 2607 + 2608 + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1); 2603 2609 } 2604 2610 2605 2611 /* ··· 2638 2610 linkage != BTF_FUNC_EXTERN) 2639 2611 return libbpf_err(-EINVAL); 2640 2612 2641 - id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); 2613 + id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0); 2642 2614 if (id > 0) { 2643 2615 struct btf_type *t = btf_type_by_id(btf, id); 2644 2616 ··· 2873 2845 return 0; 2874 2846 } 2875 2847 2876 - /* 2877 - * Append new BTF_KIND_DECL_TAG type with: 2878 - * - *value* - non-empty/non-NULL string; 2879 - * - *ref_type_id* - referenced type ID, it might not exist yet; 2880 - * - *component_idx* - -1 for tagging reference type, otherwise struct/union 2881 - * member or function argument index; 2882 - * Returns: 2883 - * - >0, type ID of newly added BTF type; 2884 - * - <0, on error. 2885 - */ 2886 - int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, 2887 - int component_idx) 2848 + static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, 2849 + int component_idx, int kflag) 2888 2850 { 2889 2851 struct btf_type *t; 2890 2852 int sz, value_off; ··· 2898 2880 return value_off; 2899 2881 2900 2882 t->name_off = value_off; 2901 - t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); 2883 + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag); 2902 2884 t->type = ref_type_id; 2903 2885 btf_decl_tag(t)->component_idx = component_idx; 2904 2886 2905 2887 return btf_commit_type(btf, sz); 2888 + } 2889 + 2890 + /* 2891 + * Append new BTF_KIND_DECL_TAG type with: 2892 + * - *value* - non-empty/non-NULL string; 2893 + * - *ref_type_id* - referenced type ID, it might not exist yet; 2894 + * - *component_idx* - -1 for tagging reference type, otherwise struct/union 2895 + * member or function argument index; 2896 + * Returns: 2897 + * - >0, type ID of newly added BTF type; 2898 + * - <0, on error. 2899 + */ 2900 + int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, 2901 + int component_idx) 2902 + { 2903 + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0); 2904 + } 2905 + 2906 + /* 2907 + * Append new BTF_KIND_DECL_TAG type with: 2908 + * - *value* - non-empty/non-NULL string; 2909 + * - *ref_type_id* - referenced type ID, it might not exist yet; 2910 + * - *component_idx* - -1 for tagging reference type, otherwise struct/union 2911 + * member or function argument index; 2912 + * Set info->kflag to 1, indicating this tag is an __attribute__ 2913 + * Returns: 2914 + * - >0, type ID of newly added BTF type; 2915 + * - <0, on error. 2916 + */ 2917 + int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, 2918 + int component_idx) 2919 + { 2920 + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1); 2906 2921 } 2907 2922 2908 2923 struct btf_ext_sec_info_param { ··· 3066 3015 .desc = "line_info", 3067 3016 }; 3068 3017 struct btf_ext_sec_info_param core_relo = { 3069 - .off = btf_ext->hdr->core_relo_off, 3070 - .len = btf_ext->hdr->core_relo_len, 3071 3018 .min_rec_size = sizeof(struct bpf_core_relo), 3072 3019 .ext_info = &btf_ext->core_relo_info, 3073 3020 .desc = "core_relo", ··· 3083 3034 if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) 3084 3035 return 0; /* skip core relos parsing */ 3085 3036 3037 + core_relo.off = btf_ext->hdr->core_relo_off; 3038 + core_relo.len = btf_ext->hdr->core_relo_len; 3086 3039 err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); 3087 3040 if (err) 3088 3041 return err;

+3

tools/lib/bpf/btf.h

··· 227 227 LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); 228 228 LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); 229 229 LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); 230 + LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id); 230 231 231 232 /* func and func_proto construction APIs */ 232 233 LIBBPF_API int btf__add_func(struct btf *btf, const char *name, ··· 244 243 /* tag construction API */ 245 244 LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, 246 245 int component_idx); 246 + LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, 247 + int component_idx); 247 248 248 249 struct btf_dedup_opts { 249 250 size_t sz;

+4 -1

tools/lib/bpf/btf_dump.c

··· 1494 1494 case BTF_KIND_TYPE_TAG: 1495 1495 btf_dump_emit_mods(d, decls); 1496 1496 name = btf_name_of(d, t->name_off); 1497 - btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); 1497 + if (btf_kflag(t)) 1498 + btf_dump_printf(d, " __attribute__((%s))", name); 1499 + else 1500 + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); 1498 1501 break; 1499 1502 case BTF_KIND_ARRAY: { 1500 1503 const struct btf_array *a = btf_array(t);

+151 -86

tools/lib/bpf/libbpf.c

··· 670 670 671 671 struct usdt_manager; 672 672 673 + enum bpf_object_state { 674 + OBJ_OPEN, 675 + OBJ_PREPARED, 676 + OBJ_LOADED, 677 + }; 678 + 673 679 struct bpf_object { 674 680 char name[BPF_OBJ_NAME_LEN]; 675 681 char license[64]; 676 682 __u32 kern_version; 677 683 684 + enum bpf_object_state state; 678 685 struct bpf_program *programs; 679 686 size_t nr_programs; 680 687 struct bpf_map *maps; ··· 693 686 int nr_extern; 694 687 int kconfig_map_idx; 695 688 696 - bool loaded; 697 689 bool has_subcalls; 698 690 bool has_rodata; 699 691 ··· 1517 1511 obj->kconfig_map_idx = -1; 1518 1512 1519 1513 obj->kern_version = get_kernel_version(); 1520 - obj->loaded = false; 1514 + obj->state = OBJ_OPEN; 1521 1515 1522 1516 return obj; 1523 1517 } ··· 2112 2106 } 2113 2107 2114 2108 len = strlen(value); 2115 - if (value[len - 1] != '"') { 2109 + if (len < 2 || value[len - 1] != '"') { 2116 2110 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 2117 2111 ext->name, value); 2118 2112 return -EINVAL; ··· 4851 4845 return 0; 4852 4846 } 4853 4847 4848 + static bool map_is_created(const struct bpf_map *map) 4849 + { 4850 + return map->obj->state >= OBJ_PREPARED || map->reused; 4851 + } 4852 + 4854 4853 bool bpf_map__autocreate(const struct bpf_map *map) 4855 4854 { 4856 4855 return map->autocreate; ··· 4863 4852 4864 4853 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4865 4854 { 4866 - if (map->obj->loaded) 4855 + if (map_is_created(map)) 4867 4856 return libbpf_err(-EBUSY); 4868 4857 4869 4858 map->autocreate = autocreate; ··· 4957 4946 4958 4947 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4959 4948 { 4960 - if (map->obj->loaded) 4949 + if (map_is_created(map)) 4961 4950 return libbpf_err(-EBUSY); 4962 4951 4963 4952 map->def.max_entries = max_entries; ··· 5201 5190 } 5202 5191 5203 5192 static void bpf_map__destroy(struct bpf_map *map); 5204 - 5205 - static bool map_is_created(const struct bpf_map *map) 5206 - { 5207 - return map->obj->loaded || map->reused; 5208 - } 5209 5193 5210 5194 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 5211 5195 { ··· 7903 7897 7904 7898 for (i = 0; i < obj->nr_programs; i++) { 7905 7899 prog = &obj->programs[i]; 7906 - err = bpf_object__sanitize_prog(obj, prog); 7907 - if (err) 7908 - return err; 7909 - } 7910 - 7911 - for (i = 0; i < obj->nr_programs; i++) { 7912 - prog = &obj->programs[i]; 7913 7900 if (prog_is_subprog(obj, prog)) 7914 7901 continue; 7915 7902 if (!prog->autoload) { ··· 7923 7924 } 7924 7925 7925 7926 bpf_object__free_relocs(obj); 7927 + return 0; 7928 + } 7929 + 7930 + static int bpf_object_prepare_progs(struct bpf_object *obj) 7931 + { 7932 + struct bpf_program *prog; 7933 + size_t i; 7934 + int err; 7935 + 7936 + for (i = 0; i < obj->nr_programs; i++) { 7937 + prog = &obj->programs[i]; 7938 + err = bpf_object__sanitize_prog(obj, prog); 7939 + if (err) 7940 + return err; 7941 + } 7926 7942 return 0; 7927 7943 } 7928 7944 ··· 8557 8543 return 0; 8558 8544 } 8559 8545 8546 + static void bpf_object_unpin(struct bpf_object *obj) 8547 + { 8548 + int i; 8549 + 8550 + /* unpin any maps that were auto-pinned during load */ 8551 + for (i = 0; i < obj->nr_maps; i++) 8552 + if (obj->maps[i].pinned && !obj->maps[i].reused) 8553 + bpf_map__unpin(&obj->maps[i], NULL); 8554 + } 8555 + 8556 + static void bpf_object_post_load_cleanup(struct bpf_object *obj) 8557 + { 8558 + int i; 8559 + 8560 + /* clean up fd_array */ 8561 + zfree(&obj->fd_array); 8562 + 8563 + /* clean up module BTFs */ 8564 + for (i = 0; i < obj->btf_module_cnt; i++) { 8565 + close(obj->btf_modules[i].fd); 8566 + btf__free(obj->btf_modules[i].btf); 8567 + free(obj->btf_modules[i].name); 8568 + } 8569 + obj->btf_module_cnt = 0; 8570 + zfree(&obj->btf_modules); 8571 + 8572 + /* clean up vmlinux BTF */ 8573 + btf__free(obj->btf_vmlinux); 8574 + obj->btf_vmlinux = NULL; 8575 + } 8576 + 8577 + static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) 8578 + { 8579 + int err; 8580 + 8581 + if (obj->state >= OBJ_PREPARED) { 8582 + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); 8583 + return -EINVAL; 8584 + } 8585 + 8586 + err = bpf_object_prepare_token(obj); 8587 + err = err ? : bpf_object__probe_loading(obj); 8588 + err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8589 + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8590 + err = err ? : bpf_object__sanitize_maps(obj); 8591 + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8592 + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8593 + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8594 + err = err ? : bpf_object__sanitize_and_load_btf(obj); 8595 + err = err ? : bpf_object__create_maps(obj); 8596 + err = err ? : bpf_object_prepare_progs(obj); 8597 + 8598 + if (err) { 8599 + bpf_object_unpin(obj); 8600 + bpf_object_unload(obj); 8601 + obj->state = OBJ_LOADED; 8602 + return err; 8603 + } 8604 + 8605 + obj->state = OBJ_PREPARED; 8606 + return 0; 8607 + } 8608 + 8560 8609 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8561 8610 { 8562 - int err, i; 8611 + int err; 8563 8612 8564 8613 if (!obj) 8565 8614 return libbpf_err(-EINVAL); 8566 8615 8567 - if (obj->loaded) { 8616 + if (obj->state >= OBJ_LOADED) { 8568 8617 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8569 8618 return libbpf_err(-EINVAL); 8570 8619 } ··· 8642 8565 return libbpf_err(-LIBBPF_ERRNO__ENDIAN); 8643 8566 } 8644 8567 8645 - err = bpf_object_prepare_token(obj); 8646 - err = err ? : bpf_object__probe_loading(obj); 8647 - err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8648 - err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8649 - err = err ? : bpf_object__sanitize_maps(obj); 8650 - err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8651 - err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8652 - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8653 - err = err ? : bpf_object__sanitize_and_load_btf(obj); 8654 - err = err ? : bpf_object__create_maps(obj); 8655 - err = err ? : bpf_object__load_progs(obj, extra_log_level); 8568 + if (obj->state < OBJ_PREPARED) { 8569 + err = bpf_object_prepare(obj, target_btf_path); 8570 + if (err) 8571 + return libbpf_err(err); 8572 + } 8573 + err = bpf_object__load_progs(obj, extra_log_level); 8656 8574 err = err ? : bpf_object_init_prog_arrays(obj); 8657 8575 err = err ? : bpf_object_prepare_struct_ops(obj); 8658 8576 ··· 8659 8587 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8660 8588 } 8661 8589 8662 - /* clean up fd_array */ 8663 - zfree(&obj->fd_array); 8590 + bpf_object_post_load_cleanup(obj); 8591 + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ 8664 8592 8665 - /* clean up module BTFs */ 8666 - for (i = 0; i < obj->btf_module_cnt; i++) { 8667 - close(obj->btf_modules[i].fd); 8668 - btf__free(obj->btf_modules[i].btf); 8669 - free(obj->btf_modules[i].name); 8593 + if (err) { 8594 + bpf_object_unpin(obj); 8595 + bpf_object_unload(obj); 8596 + pr_warn("failed to load object '%s'\n", obj->path); 8597 + return libbpf_err(err); 8670 8598 } 8671 - free(obj->btf_modules); 8672 - 8673 - /* clean up vmlinux BTF */ 8674 - btf__free(obj->btf_vmlinux); 8675 - obj->btf_vmlinux = NULL; 8676 - 8677 - obj->loaded = true; /* doesn't matter if successfully or not */ 8678 - 8679 - if (err) 8680 - goto out; 8681 8599 8682 8600 return 0; 8683 - out: 8684 - /* unpin any maps that were auto-pinned during load */ 8685 - for (i = 0; i < obj->nr_maps; i++) 8686 - if (obj->maps[i].pinned && !obj->maps[i].reused) 8687 - bpf_map__unpin(&obj->maps[i], NULL); 8601 + } 8688 8602 8689 - bpf_object_unload(obj); 8690 - pr_warn("failed to load object '%s'\n", obj->path); 8691 - return libbpf_err(err); 8603 + int bpf_object__prepare(struct bpf_object *obj) 8604 + { 8605 + return libbpf_err(bpf_object_prepare(obj, NULL)); 8692 8606 } 8693 8607 8694 8608 int bpf_object__load(struct bpf_object *obj) ··· 8924 8866 if (!obj) 8925 8867 return libbpf_err(-ENOENT); 8926 8868 8927 - if (!obj->loaded) { 8869 + if (obj->state < OBJ_PREPARED) { 8928 8870 pr_warn("object not yet loaded; load it first\n"); 8929 8871 return libbpf_err(-ENOENT); 8930 8872 } ··· 9003 8945 if (!obj) 9004 8946 return libbpf_err(-ENOENT); 9005 8947 9006 - if (!obj->loaded) { 8948 + if (obj->state < OBJ_LOADED) { 9007 8949 pr_warn("object not yet loaded; load it first\n"); 9008 8950 return libbpf_err(-ENOENT); 9009 8951 } ··· 9122 9064 if (IS_ERR_OR_NULL(obj)) 9123 9065 return; 9124 9066 9067 + /* 9068 + * if user called bpf_object__prepare() without ever getting to 9069 + * bpf_object__load(), we need to clean up stuff that is normally 9070 + * cleaned up at the end of loading step 9071 + */ 9072 + bpf_object_post_load_cleanup(obj); 9073 + 9125 9074 usdt_manager_free(obj->usdt_man); 9126 9075 obj->usdt_man = NULL; 9127 9076 ··· 9197 9132 9198 9133 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 9199 9134 { 9200 - if (obj->loaded) 9135 + if (obj->state >= OBJ_LOADED) 9201 9136 return libbpf_err(-EINVAL); 9202 9137 9203 9138 obj->kern_version = kern_version; ··· 9210 9145 struct bpf_gen *gen; 9211 9146 9212 9147 if (!opts) 9213 - return -EFAULT; 9148 + return libbpf_err(-EFAULT); 9214 9149 if (!OPTS_VALID(opts, gen_loader_opts)) 9215 - return -EINVAL; 9150 + return libbpf_err(-EINVAL); 9216 9151 gen = calloc(sizeof(*gen), 1); 9217 9152 if (!gen) 9218 - return -ENOMEM; 9153 + return libbpf_err(-ENOMEM); 9219 9154 gen->opts = opts; 9220 9155 gen->swapped_endian = !is_native_endianness(obj); 9221 9156 obj->gen_loader = gen; ··· 9294 9229 9295 9230 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 9296 9231 { 9297 - if (prog->obj->loaded) 9232 + if (prog->obj->state >= OBJ_LOADED) 9298 9233 return libbpf_err(-EINVAL); 9299 9234 9300 9235 prog->autoload = autoload; ··· 9326 9261 { 9327 9262 struct bpf_insn *insns; 9328 9263 9329 - if (prog->obj->loaded) 9330 - return -EBUSY; 9264 + if (prog->obj->state >= OBJ_LOADED) 9265 + return libbpf_err(-EBUSY); 9331 9266 9332 9267 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 9333 9268 /* NULL is a valid return from reallocarray if the new count is zero */ 9334 9269 if (!insns && new_insn_cnt) { 9335 9270 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 9336 - return -ENOMEM; 9271 + return libbpf_err(-ENOMEM); 9337 9272 } 9338 9273 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 9339 9274 ··· 9369 9304 9370 9305 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 9371 9306 { 9372 - if (prog->obj->loaded) 9307 + if (prog->obj->state >= OBJ_LOADED) 9373 9308 return libbpf_err(-EBUSY); 9374 9309 9375 9310 /* if type is not changed, do nothing */ ··· 9400 9335 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 9401 9336 enum bpf_attach_type type) 9402 9337 { 9403 - if (prog->obj->loaded) 9338 + if (prog->obj->state >= OBJ_LOADED) 9404 9339 return libbpf_err(-EBUSY); 9405 9340 9406 9341 prog->expected_attach_type = type; ··· 9414 9349 9415 9350 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 9416 9351 { 9417 - if (prog->obj->loaded) 9352 + if (prog->obj->state >= OBJ_LOADED) 9418 9353 return libbpf_err(-EBUSY); 9419 9354 9420 9355 prog->prog_flags = flags; ··· 9428 9363 9429 9364 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 9430 9365 { 9431 - if (prog->obj->loaded) 9366 + if (prog->obj->state >= OBJ_LOADED) 9432 9367 return libbpf_err(-EBUSY); 9433 9368 9434 9369 prog->log_level = log_level; ··· 9444 9379 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 9445 9380 { 9446 9381 if (log_size && !log_buf) 9447 - return -EINVAL; 9382 + return libbpf_err(-EINVAL); 9448 9383 if (prog->log_size > UINT_MAX) 9449 - return -EINVAL; 9450 - if (prog->obj->loaded) 9451 - return -EBUSY; 9384 + return libbpf_err(-EINVAL); 9385 + if (prog->obj->state >= OBJ_LOADED) 9386 + return libbpf_err(-EBUSY); 9452 9387 9453 9388 prog->log_buf = log_buf; 9454 9389 prog->log_size = log_size; ··· 10024 9959 return libbpf_err(err); 10025 9960 } 10026 9961 10027 - static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 9962 + static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) 10028 9963 { 10029 9964 struct bpf_prog_info info; 10030 9965 __u32 info_len = sizeof(info); ··· 10044 9979 pr_warn("The target program doesn't have BTF\n"); 10045 9980 goto out; 10046 9981 } 10047 - btf = btf__load_from_kernel_by_id(info.btf_id); 9982 + btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); 10048 9983 err = libbpf_get_error(btf); 10049 9984 if (err) { 10050 9985 pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); ··· 10127 10062 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 10128 10063 return -EINVAL; 10129 10064 } 10130 - err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); 10065 + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); 10131 10066 if (err < 0) { 10132 10067 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", 10133 10068 prog->name, attach_prog_fd, attach_name, errstr(err)); ··· 10364 10299 10365 10300 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 10366 10301 { 10367 - if (map->obj->loaded || map->reused) 10302 + if (map_is_created(map)) 10368 10303 return libbpf_err(-EBUSY); 10369 10304 10370 10305 if (map->mmaped) { ··· 10372 10307 int err; 10373 10308 10374 10309 if (map->def.type != BPF_MAP_TYPE_ARRAY) 10375 - return -EOPNOTSUPP; 10310 + return libbpf_err(-EOPNOTSUPP); 10376 10311 10377 10312 mmap_old_sz = bpf_map_mmap_sz(map); 10378 10313 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); ··· 10380 10315 if (err) { 10381 10316 pr_warn("map '%s': failed to resize memory-mapped region: %s\n", 10382 10317 bpf_map__name(map), errstr(err)); 10383 - return err; 10318 + return libbpf_err(err); 10384 10319 } 10385 10320 err = map_btf_datasec_resize(map, size); 10386 10321 if (err && err != -ENOENT) { ··· 10410 10345 { 10411 10346 size_t actual_sz; 10412 10347 10413 - if (map->obj->loaded || map->reused) 10348 + if (map_is_created(map)) 10414 10349 return libbpf_err(-EBUSY); 10415 10350 10416 10351 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) ··· 12923 12858 if (target_fd) { 12924 12859 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 12925 12860 12926 - btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); 12861 + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); 12927 12862 if (btf_id < 0) 12928 12863 return libbpf_err_ptr(btf_id); 12929 12864 ··· 13135 13070 int err; 13136 13071 13137 13072 if (!bpf_map__is_struct_ops(map)) 13138 - return -EINVAL; 13073 + return libbpf_err(-EINVAL); 13139 13074 13140 13075 if (map->fd < 0) { 13141 13076 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 13142 - return -EINVAL; 13077 + return libbpf_err(-EINVAL); 13143 13078 } 13144 13079 13145 13080 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 13146 13081 /* Ensure the type of a link is correct */ 13147 13082 if (st_ops_link->map_fd < 0) 13148 - return -EINVAL; 13083 + return libbpf_err(-EINVAL); 13149 13084 13150 13085 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 13151 13086 /* It can be EBUSY if the map has been used to create or ··· 13731 13666 if (!prog || attach_prog_fd < 0) 13732 13667 return libbpf_err(-EINVAL); 13733 13668 13734 - if (prog->obj->loaded) 13669 + if (prog->obj->state >= OBJ_LOADED) 13735 13670 return libbpf_err(-EINVAL); 13736 13671 13737 13672 if (attach_prog_fd && !attach_func_name) { ··· 13744 13679 13745 13680 if (attach_prog_fd) { 13746 13681 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13747 - attach_prog_fd); 13682 + attach_prog_fd, prog->obj->token_fd); 13748 13683 if (btf_id < 0) 13749 13684 return libbpf_err(btf_id); 13750 13685 } else {

+13

tools/lib/bpf/libbpf.h

··· 242 242 const struct bpf_object_open_opts *opts); 243 243 244 244 /** 245 + * @brief **bpf_object__prepare()** prepares BPF object for loading: 246 + * performs ELF processing, relocations, prepares final state of BPF program 247 + * instructions (accessible with bpf_program__insns()), creates and 248 + * (potentially) pins maps. Leaves BPF object in the state ready for program 249 + * loading. 250 + * @param obj Pointer to a valid BPF object instance returned by 251 + * **bpf_object__open*()** API 252 + * @return 0, on success; negative error code, otherwise, error code is 253 + * stored in errno 254 + */ 255 + int bpf_object__prepare(struct bpf_object *obj); 256 + 257 + /** 245 258 * @brief **bpf_object__load()** loads BPF object into kernel. 246 259 * @param obj Pointer to a valid BPF object instance returned by 247 260 * **bpf_object__open*()** APIs

+3

tools/lib/bpf/libbpf.map

··· 436 436 bpf_linker__add_buf; 437 437 bpf_linker__add_fd; 438 438 bpf_linker__new_fd; 439 + bpf_object__prepare; 440 + btf__add_decl_attr; 441 + btf__add_type_attr; 439 442 } LIBBPF_1.5.0;

+1

tools/lib/bpf/libbpf_internal.h

··· 409 409 int btf_load_into_kernel(struct btf *btf, 410 410 char *log_buf, size_t log_sz, __u32 log_level, 411 411 int token_fd); 412 + struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd); 412 413 413 414 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); 414 415 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,

+1 -1

tools/lib/bpf/linker.c

··· 2163 2163 2164 2164 obj->sym_map[src_sym_idx] = dst_sym_idx; 2165 2165 2166 - if (sym_type == STT_SECTION && dst_sym) { 2166 + if (sym_type == STT_SECTION && dst_sec) { 2167 2167 dst_sec->sec_sym_idx = dst_sym_idx; 2168 2168 dst_sym->st_value = 0; 2169 2169 }

+20 -4

tools/lib/bpf/relo_core.c

··· 683 683 { 684 684 const struct bpf_core_accessor *acc; 685 685 const struct btf_type *t; 686 - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; 686 + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id; 687 687 const struct btf_member *m; 688 688 const struct btf_type *mt; 689 689 bool bitfield; ··· 706 706 if (!acc->name) { 707 707 if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { 708 708 *val = spec->bit_offset / 8; 709 - /* remember field size for load/store mem size */ 710 - sz = btf__resolve_size(spec->btf, acc->type_id); 709 + /* remember field size for load/store mem size; 710 + * note, for arrays we care about individual element 711 + * sizes, not the overall array size 712 + */ 713 + t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id); 714 + while (btf_is_array(t)) 715 + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); 716 + sz = btf__resolve_size(spec->btf, elem_id); 711 717 if (sz < 0) 712 718 return -EINVAL; 713 719 *field_sz = sz; ··· 773 767 case BPF_CORE_FIELD_BYTE_OFFSET: 774 768 *val = byte_off; 775 769 if (!bitfield) { 776 - *field_sz = byte_sz; 770 + /* remember field size for load/store mem size; 771 + * note, for arrays we care about individual element 772 + * sizes, not the overall array size 773 + */ 774 + t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id); 775 + while (btf_is_array(t)) 776 + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); 777 + sz = btf__resolve_size(spec->btf, elem_id); 778 + if (sz < 0) 779 + return -EINVAL; 780 + *field_sz = sz; 777 781 *type_id = field_type_id; 778 782 } 779 783 break;

+1 -1

tools/lib/bpf/str_error.c

··· 36 36 return dst; 37 37 } 38 38 39 - const char *errstr(int err) 39 + const char *libbpf_errstr(int err) 40 40 { 41 41 static __thread char buf[12]; 42 42

+5 -2

tools/lib/bpf/str_error.h

··· 7 7 char *libbpf_strerror_r(int err, char *dst, int len); 8 8 9 9 /** 10 - * @brief **errstr()** returns string corresponding to numeric errno 10 + * @brief **libbpf_errstr()** returns string corresponding to numeric errno 11 11 * @param err negative numeric errno 12 12 * @return pointer to string representation of the errno, that is invalidated 13 13 * upon the next call. 14 14 */ 15 - const char *errstr(int err); 15 + const char *libbpf_errstr(int err); 16 + 17 + #define errstr(err) libbpf_errstr(err) 18 + 16 19 #endif /* __LIBBPF_STR_ERROR_H */

+32

tools/lib/bpf/usdt.bpf.h

··· 108 108 return spec->arg_cnt; 109 109 } 110 110 111 + /* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument. 112 + * Returns negative error if argument is not found or arg_num is invalid. 113 + */ 114 + static __always_inline 115 + int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num) 116 + { 117 + struct __bpf_usdt_arg_spec *arg_spec; 118 + struct __bpf_usdt_spec *spec; 119 + int spec_id; 120 + 121 + spec_id = __bpf_usdt_spec_id(ctx); 122 + if (spec_id < 0) 123 + return -ESRCH; 124 + 125 + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); 126 + if (!spec) 127 + return -ESRCH; 128 + 129 + if (arg_num >= BPF_USDT_MAX_ARG_CNT) 130 + return -ENOENT; 131 + barrier_var(arg_num); 132 + if (arg_num >= spec->arg_cnt) 133 + return -ENOENT; 134 + 135 + arg_spec = &spec->args[arg_num]; 136 + 137 + /* arg_spec->arg_bitshift = 64 - arg_sz * 8 138 + * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8 139 + */ 140 + return (unsigned int)(64 - arg_spec->arg_bitshift) / 8; 141 + } 142 + 111 143 /* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. 112 144 * Returns 0 on success; negative error, otherwise. 113 145 * On error *res is guaranteed to be set to zero.

-9

tools/testing/selftests/bpf/DENYLIST.aarch64

··· 1 - bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 2 - bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 3 - kprobe_multi_bench_attach # needs CONFIG_FPROBE 4 - kprobe_multi_test # needs CONFIG_FPROBE 5 - module_attach # prog 'kprobe_multi': failed to auto-attach: -95 6 1 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 7 2 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 8 3 tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 9 - fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 10 - fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 11 - fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 12 - missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)

+14 -14

tools/testing/selftests/bpf/Makefile

··· 95 95 TEST_INST_SUBDIRS += cpuv4 96 96 endif 97 97 98 - TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o 98 + TEST_GEN_FILES = test_tc_edt.bpf.o 99 99 TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) 100 100 101 101 # Order correspond to 'make run_tests' order 102 102 TEST_PROGS := test_kmod.sh \ 103 - test_xdp_redirect_multi.sh \ 104 - test_tunnel.sh \ 105 - test_lwt_seg6local.sh \ 106 103 test_lirc_mode2.sh \ 107 - test_xdp_vlan_mode_generic.sh \ 108 - test_xdp_vlan_mode_native.sh \ 109 - test_lwt_ip_encap.sh \ 110 104 test_tc_tunnel.sh \ 111 105 test_tc_edt.sh \ 112 106 test_xdping.sh \ ··· 111 117 test_xsk.sh \ 112 118 test_xdp_features.sh 113 119 114 - TEST_PROGS_EXTENDED := with_addr.sh \ 115 - with_tunnels.sh ima_setup.sh verify_sig_setup.sh \ 116 - test_xdp_vlan.sh test_bpftool.py 120 + TEST_PROGS_EXTENDED := \ 121 + ima_setup.sh verify_sig_setup.sh \ 122 + test_bpftool.py 117 123 118 124 TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ 119 125 bpf_test_modorder_y.ko ··· 129 135 veristat \ 130 136 xdp_features \ 131 137 xdp_hw_metadata \ 132 - xdp_redirect_multi \ 133 138 xdp_synproxy \ 134 139 xdping \ 135 140 xskxceiver ··· 177 184 LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets 178 185 # both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict 179 186 LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) 180 - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) 181 - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) 182 - LLVM_LDLIBS += -lstdc++ 187 + # Prefer linking statically if it's available, otherwise fallback to shared 188 + ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static) 189 + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) 190 + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) 191 + LLVM_LDLIBS += -lstdc++ 192 + else 193 + LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS)) 194 + endif 183 195 LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) 184 196 endif 185 197 ··· 304 306 BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ 305 307 BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ 306 308 BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ 309 + BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \ 307 310 EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ 308 311 EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \ 309 312 cp $(RUNQSLOWER_OUTPUT)runqslower $@ ··· 683 684 $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ 684 685 $(RESOLVE_BTFIDS) \ 685 686 $(TRUNNER_BPFTOOL) \ 687 + $(OUTPUT)/veristat \ 686 688 | $(TRUNNER_BINARY)-extras 687 689 $$(call msg,BINARY,,$$@) 688 690 $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@

+533

tools/testing/selftests/bpf/bpf_arena_spin_lock.h

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #ifndef BPF_ARENA_SPIN_LOCK_H 4 + #define BPF_ARENA_SPIN_LOCK_H 5 + 6 + #include <vmlinux.h> 7 + #include <bpf/bpf_helpers.h> 8 + #include "bpf_atomic.h" 9 + 10 + #define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label) 11 + #define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1) 12 + 13 + #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) 14 + 15 + #define EBUSY 16 16 + #define EOPNOTSUPP 95 17 + #define ETIMEDOUT 110 18 + 19 + #ifndef __arena 20 + #define __arena __attribute__((address_space(1))) 21 + #endif 22 + 23 + extern unsigned long CONFIG_NR_CPUS __kconfig; 24 + 25 + /* 26 + * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but 27 + * PowerPC overrides the definition to define lock->val as u32 instead of 28 + * atomic_t, leading to compilation errors. Import a local definition below so 29 + * that we don't depend on the vmlinux.h version. 30 + */ 31 + 32 + struct __qspinlock { 33 + union { 34 + atomic_t val; 35 + struct { 36 + u8 locked; 37 + u8 pending; 38 + }; 39 + struct { 40 + u16 locked_pending; 41 + u16 tail; 42 + }; 43 + }; 44 + }; 45 + 46 + #define arena_spinlock_t struct __qspinlock 47 + /* FIXME: Using typedef causes CO-RE relocation error */ 48 + /* typedef struct qspinlock arena_spinlock_t; */ 49 + 50 + struct arena_mcs_spinlock { 51 + struct arena_mcs_spinlock __arena *next; 52 + int locked; 53 + int count; 54 + }; 55 + 56 + struct arena_qnode { 57 + struct arena_mcs_spinlock mcs; 58 + }; 59 + 60 + #define _Q_MAX_NODES 4 61 + #define _Q_PENDING_LOOPS 1 62 + 63 + /* 64 + * Bitfields in the atomic value: 65 + * 66 + * 0- 7: locked byte 67 + * 8: pending 68 + * 9-15: not used 69 + * 16-17: tail index 70 + * 18-31: tail cpu (+1) 71 + */ 72 + #define _Q_MAX_CPUS 1024 73 + 74 + #define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ 75 + << _Q_ ## type ## _OFFSET) 76 + #define _Q_LOCKED_OFFSET 0 77 + #define _Q_LOCKED_BITS 8 78 + #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) 79 + 80 + #define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) 81 + #define _Q_PENDING_BITS 8 82 + #define _Q_PENDING_MASK _Q_SET_MASK(PENDING) 83 + 84 + #define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) 85 + #define _Q_TAIL_IDX_BITS 2 86 + #define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) 87 + 88 + #define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) 89 + #define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) 90 + #define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) 91 + 92 + #define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET 93 + #define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) 94 + 95 + #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) 96 + #define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) 97 + 98 + #define likely(x) __builtin_expect(!!(x), 1) 99 + #define unlikely(x) __builtin_expect(!!(x), 0) 100 + 101 + struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; 102 + 103 + static inline u32 encode_tail(int cpu, int idx) 104 + { 105 + u32 tail; 106 + 107 + tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; 108 + tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ 109 + 110 + return tail; 111 + } 112 + 113 + static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail) 114 + { 115 + u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; 116 + u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; 117 + 118 + return &qnodes[cpu][idx].mcs; 119 + } 120 + 121 + static inline 122 + struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx) 123 + { 124 + return &((struct arena_qnode __arena *)base + idx)->mcs; 125 + } 126 + 127 + #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) 128 + 129 + /** 130 + * xchg_tail - Put in the new queue tail code word & retrieve previous one 131 + * @lock : Pointer to queued spinlock structure 132 + * @tail : The new queue tail code word 133 + * Return: The previous queue tail code word 134 + * 135 + * xchg(lock, tail) 136 + * 137 + * p,*,* -> n,*,* ; prev = xchg(lock, node) 138 + */ 139 + static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail) 140 + { 141 + u32 old, new; 142 + 143 + old = atomic_read(&lock->val); 144 + do { 145 + new = (old & _Q_LOCKED_PENDING_MASK) | tail; 146 + /* 147 + * We can use relaxed semantics since the caller ensures that 148 + * the MCS node is properly initialized before updating the 149 + * tail. 150 + */ 151 + /* These loops are not expected to stall, but we still need to 152 + * prove to the verifier they will terminate eventually. 153 + */ 154 + cond_break_label(out); 155 + } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); 156 + 157 + return old; 158 + out: 159 + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); 160 + return old; 161 + } 162 + 163 + /** 164 + * clear_pending - clear the pending bit. 165 + * @lock: Pointer to queued spinlock structure 166 + * 167 + * *,1,* -> *,0,* 168 + */ 169 + static __always_inline void clear_pending(arena_spinlock_t __arena *lock) 170 + { 171 + WRITE_ONCE(lock->pending, 0); 172 + } 173 + 174 + /** 175 + * clear_pending_set_locked - take ownership and clear the pending bit. 176 + * @lock: Pointer to queued spinlock structure 177 + * 178 + * *,1,0 -> *,0,1 179 + * 180 + * Lock stealing is not allowed if this function is used. 181 + */ 182 + static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock) 183 + { 184 + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); 185 + } 186 + 187 + /** 188 + * set_locked - Set the lock bit and own the lock 189 + * @lock: Pointer to queued spinlock structure 190 + * 191 + * *,*,0 -> *,0,1 192 + */ 193 + static __always_inline void set_locked(arena_spinlock_t __arena *lock) 194 + { 195 + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); 196 + } 197 + 198 + static __always_inline 199 + u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock) 200 + { 201 + u32 old, new; 202 + 203 + old = atomic_read(&lock->val); 204 + do { 205 + new = old | _Q_PENDING_VAL; 206 + /* 207 + * These loops are not expected to stall, but we still need to 208 + * prove to the verifier they will terminate eventually. 209 + */ 210 + cond_break_label(out); 211 + } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new)); 212 + 213 + return old; 214 + out: 215 + bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__); 216 + return old; 217 + } 218 + 219 + /** 220 + * arena_spin_trylock - try to acquire the queued spinlock 221 + * @lock : Pointer to queued spinlock structure 222 + * Return: 1 if lock acquired, 0 if failed 223 + */ 224 + static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock) 225 + { 226 + int val = atomic_read(&lock->val); 227 + 228 + if (unlikely(val)) 229 + return 0; 230 + 231 + return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)); 232 + } 233 + 234 + __noinline 235 + int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val) 236 + { 237 + struct arena_mcs_spinlock __arena *prev, *next, *node0, *node; 238 + int ret = -ETIMEDOUT; 239 + u32 old, tail; 240 + int idx; 241 + 242 + /* 243 + * Wait for in-progress pending->locked hand-overs with a bounded 244 + * number of spins so that we guarantee forward progress. 245 + * 246 + * 0,1,0 -> 0,0,1 247 + */ 248 + if (val == _Q_PENDING_VAL) { 249 + int cnt = _Q_PENDING_LOOPS; 250 + val = atomic_cond_read_relaxed_label(&lock->val, 251 + (VAL != _Q_PENDING_VAL) || !cnt--, 252 + release_err); 253 + } 254 + 255 + /* 256 + * If we observe any contention; queue. 257 + */ 258 + if (val & ~_Q_LOCKED_MASK) 259 + goto queue; 260 + 261 + /* 262 + * trylock || pending 263 + * 264 + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock 265 + */ 266 + val = arena_fetch_set_pending_acquire(lock); 267 + 268 + /* 269 + * If we observe contention, there is a concurrent locker. 270 + * 271 + * Undo and queue; our setting of PENDING might have made the 272 + * n,0,0 -> 0,0,0 transition fail and it will now be waiting 273 + * on @next to become !NULL. 274 + */ 275 + if (unlikely(val & ~_Q_LOCKED_MASK)) { 276 + 277 + /* Undo PENDING if we set it. */ 278 + if (!(val & _Q_PENDING_MASK)) 279 + clear_pending(lock); 280 + 281 + goto queue; 282 + } 283 + 284 + /* 285 + * We're pending, wait for the owner to go away. 286 + * 287 + * 0,1,1 -> *,1,0 288 + * 289 + * this wait loop must be a load-acquire such that we match the 290 + * store-release that clears the locked bit and create lock 291 + * sequentiality; this is because not all 292 + * clear_pending_set_locked() implementations imply full 293 + * barriers. 294 + */ 295 + if (val & _Q_LOCKED_MASK) 296 + smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); 297 + 298 + /* 299 + * take ownership and clear the pending bit. 300 + * 301 + * 0,1,0 -> 0,0,1 302 + */ 303 + clear_pending_set_locked(lock); 304 + return 0; 305 + 306 + /* 307 + * End of pending bit optimistic spinning and beginning of MCS 308 + * queuing. 309 + */ 310 + queue: 311 + node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs; 312 + idx = node0->count++; 313 + tail = encode_tail(bpf_get_smp_processor_id(), idx); 314 + 315 + /* 316 + * 4 nodes are allocated based on the assumption that there will not be 317 + * nested NMIs taking spinlocks. That may not be true in some 318 + * architectures even though the chance of needing more than 4 nodes 319 + * will still be extremely unlikely. When that happens, we simply return 320 + * an error. Original qspinlock has a trylock fallback in this case. 321 + */ 322 + if (unlikely(idx >= _Q_MAX_NODES)) { 323 + ret = -EBUSY; 324 + goto release_node_err; 325 + } 326 + 327 + node = grab_mcs_node(node0, idx); 328 + 329 + /* 330 + * Ensure that we increment the head node->count before initialising 331 + * the actual node. If the compiler is kind enough to reorder these 332 + * stores, then an IRQ could overwrite our assignments. 333 + */ 334 + barrier(); 335 + 336 + node->locked = 0; 337 + node->next = NULL; 338 + 339 + /* 340 + * We touched a (possibly) cold cacheline in the per-cpu queue node; 341 + * attempt the trylock once more in the hope someone let go while we 342 + * weren't watching. 343 + */ 344 + if (arena_spin_trylock(lock)) 345 + goto release; 346 + 347 + /* 348 + * Ensure that the initialisation of @node is complete before we 349 + * publish the updated tail via xchg_tail() and potentially link 350 + * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. 351 + */ 352 + smp_wmb(); 353 + 354 + /* 355 + * Publish the updated tail. 356 + * We have already touched the queueing cacheline; don't bother with 357 + * pending stuff. 358 + * 359 + * p,*,* -> n,*,* 360 + */ 361 + old = xchg_tail(lock, tail); 362 + next = NULL; 363 + 364 + /* 365 + * if there was a previous node; link it and wait until reaching the 366 + * head of the waitqueue. 367 + */ 368 + if (old & _Q_TAIL_MASK) { 369 + prev = decode_tail(old); 370 + 371 + /* Link @node into the waitqueue. */ 372 + WRITE_ONCE(prev->next, node); 373 + 374 + arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); 375 + 376 + /* 377 + * While waiting for the MCS lock, the next pointer may have 378 + * been set by another lock waiter. We cannot prefetch here 379 + * due to lack of equivalent instruction in BPF ISA. 380 + */ 381 + next = READ_ONCE(node->next); 382 + } 383 + 384 + /* 385 + * we're at the head of the waitqueue, wait for the owner & pending to 386 + * go away. 387 + * 388 + * *,x,y -> *,0,0 389 + * 390 + * this wait loop must use a load-acquire such that we match the 391 + * store-release that clears the locked bit and create lock 392 + * sequentiality; this is because the set_locked() function below 393 + * does not imply a full barrier. 394 + */ 395 + val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK), 396 + release_node_err); 397 + 398 + /* 399 + * claim the lock: 400 + * 401 + * n,0,0 -> 0,0,1 : lock, uncontended 402 + * *,*,0 -> *,*,1 : lock, contended 403 + * 404 + * If the queue head is the only one in the queue (lock value == tail) 405 + * and nobody is pending, clear the tail code and grab the lock. 406 + * Otherwise, we only need to grab the lock. 407 + */ 408 + 409 + /* 410 + * In the PV case we might already have _Q_LOCKED_VAL set, because 411 + * of lock stealing; therefore we must also allow: 412 + * 413 + * n,0,1 -> 0,0,1 414 + * 415 + * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the 416 + * above wait condition, therefore any concurrent setting of 417 + * PENDING will make the uncontended transition fail. 418 + */ 419 + if ((val & _Q_TAIL_MASK) == tail) { 420 + if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) 421 + goto release; /* No contention */ 422 + } 423 + 424 + /* 425 + * Either somebody is queued behind us or _Q_PENDING_VAL got set 426 + * which will then detect the remaining tail and queue behind us 427 + * ensuring we'll see a @next. 428 + */ 429 + set_locked(lock); 430 + 431 + /* 432 + * contended path; wait for next if not observed yet, release. 433 + */ 434 + if (!next) 435 + next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err); 436 + 437 + arch_mcs_spin_unlock_contended(&next->locked); 438 + 439 + release:; 440 + /* 441 + * release the node 442 + * 443 + * Doing a normal dec vs this_cpu_dec is fine. An upper context always 444 + * decrements count it incremented before returning, thus we're fine. 445 + * For contexts interrupting us, they either observe our dec or not. 446 + * Just ensure the compiler doesn't reorder this statement, as a 447 + * this_cpu_dec implicitly implied that. 448 + */ 449 + barrier(); 450 + node0->count--; 451 + return 0; 452 + release_node_err: 453 + barrier(); 454 + node0->count--; 455 + goto release_err; 456 + release_err: 457 + return ret; 458 + } 459 + 460 + /** 461 + * arena_spin_lock - acquire a queued spinlock 462 + * @lock: Pointer to queued spinlock structure 463 + * 464 + * On error, returned value will be negative. 465 + * On success, zero is returned. 466 + * 467 + * The return value _must_ be tested against zero for success, 468 + * instead of checking it against negative, for passing the 469 + * BPF verifier. 470 + * 471 + * The user should do: 472 + * if (arena_spin_lock(...) != 0) // failure 473 + * or 474 + * if (arena_spin_lock(...) == 0) // success 475 + * or 476 + * if (arena_spin_lock(...)) // failure 477 + * or 478 + * if (!arena_spin_lock(...)) // success 479 + * instead of: 480 + * if (arena_spin_lock(...) < 0) // failure 481 + * 482 + * The return value can still be inspected later. 483 + */ 484 + static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock) 485 + { 486 + int val = 0; 487 + 488 + if (CONFIG_NR_CPUS > 1024) 489 + return -EOPNOTSUPP; 490 + 491 + bpf_preempt_disable(); 492 + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) 493 + return 0; 494 + 495 + val = arena_spin_lock_slowpath(lock, val); 496 + /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */ 497 + if (val) 498 + bpf_preempt_enable(); 499 + return val; 500 + } 501 + 502 + /** 503 + * arena_spin_unlock - release a queued spinlock 504 + * @lock : Pointer to queued spinlock structure 505 + */ 506 + static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock) 507 + { 508 + /* 509 + * unlock() needs release semantics: 510 + */ 511 + smp_store_release(&lock->locked, 0); 512 + bpf_preempt_enable(); 513 + } 514 + 515 + #define arena_spin_lock_irqsave(lock, flags) \ 516 + ({ \ 517 + int __ret; \ 518 + bpf_local_irq_save(&(flags)); \ 519 + __ret = arena_spin_lock((lock)); \ 520 + if (__ret) \ 521 + bpf_local_irq_restore(&(flags)); \ 522 + (__ret); \ 523 + }) 524 + 525 + #define arena_spin_unlock_irqrestore(lock, flags) \ 526 + ({ \ 527 + arena_spin_unlock((lock)); \ 528 + bpf_local_irq_restore(&(flags)); \ 529 + }) 530 + 531 + #endif 532 + 533 + #endif /* BPF_ARENA_SPIN_LOCK_H */

+140

tools/testing/selftests/bpf/bpf_atomic.h

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #ifndef BPF_ATOMIC_H 4 + #define BPF_ATOMIC_H 5 + 6 + #include <vmlinux.h> 7 + #include <bpf/bpf_helpers.h> 8 + #include "bpf_experimental.h" 9 + 10 + extern bool CONFIG_X86_64 __kconfig __weak; 11 + 12 + /* 13 + * __unqual_typeof(x) - Declare an unqualified scalar type, leaving 14 + * non-scalar types unchanged, 15 + * 16 + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' 17 + * is not type-compatible with 'signed char', and we define a separate case. 18 + * 19 + * This is copied verbatim from kernel's include/linux/compiler_types.h, but 20 + * with default expression (for pointers) changed from (x) to (typeof(x)0). 21 + * 22 + * This is because LLVM has a bug where for lvalue (x), it does not get rid of 23 + * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). 24 + * Hence, for pointers, we need to create an rvalue expression to get the 25 + * desired type. See https://github.com/llvm/llvm-project/issues/53400. 26 + */ 27 + #define __scalar_type_to_expr_cases(type) \ 28 + unsigned type : (unsigned type)0, signed type : (signed type)0 29 + 30 + #define __unqual_typeof(x) \ 31 + typeof(_Generic((x), \ 32 + char: (char)0, \ 33 + __scalar_type_to_expr_cases(char), \ 34 + __scalar_type_to_expr_cases(short), \ 35 + __scalar_type_to_expr_cases(int), \ 36 + __scalar_type_to_expr_cases(long), \ 37 + __scalar_type_to_expr_cases(long long), \ 38 + default: (typeof(x))0)) 39 + 40 + /* No-op for BPF */ 41 + #define cpu_relax() ({}) 42 + 43 + #define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) 44 + 45 + #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val)) 46 + 47 + #define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new) 48 + 49 + #define try_cmpxchg(p, pold, new) \ 50 + ({ \ 51 + __unqual_typeof(*(pold)) __o = *(pold); \ 52 + __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \ 53 + if (__r != __o) \ 54 + *(pold) = __r; \ 55 + __r == __o; \ 56 + }) 57 + 58 + #define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new) 59 + 60 + #define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new) 61 + 62 + #define smp_mb() \ 63 + ({ \ 64 + unsigned long __val; \ 65 + __sync_fetch_and_add(&__val, 0); \ 66 + }) 67 + 68 + #define smp_rmb() \ 69 + ({ \ 70 + if (!CONFIG_X86_64) \ 71 + smp_mb(); \ 72 + else \ 73 + barrier(); \ 74 + }) 75 + 76 + #define smp_wmb() \ 77 + ({ \ 78 + if (!CONFIG_X86_64) \ 79 + smp_mb(); \ 80 + else \ 81 + barrier(); \ 82 + }) 83 + 84 + /* Control dependency provides LOAD->STORE, provide LOAD->LOAD */ 85 + #define smp_acquire__after_ctrl_dep() ({ smp_rmb(); }) 86 + 87 + #define smp_load_acquire(p) \ 88 + ({ \ 89 + __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \ 90 + if (!CONFIG_X86_64) \ 91 + smp_mb(); \ 92 + barrier(); \ 93 + __v; \ 94 + }) 95 + 96 + #define smp_store_release(p, val) \ 97 + ({ \ 98 + if (!CONFIG_X86_64) \ 99 + smp_mb(); \ 100 + barrier(); \ 101 + WRITE_ONCE(*(p), val); \ 102 + }) 103 + 104 + #define smp_cond_load_relaxed_label(p, cond_expr, label) \ 105 + ({ \ 106 + typeof(p) __ptr = (p); \ 107 + __unqual_typeof(*(p)) VAL; \ 108 + for (;;) { \ 109 + VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \ 110 + if (cond_expr) \ 111 + break; \ 112 + cond_break_label(label); \ 113 + cpu_relax(); \ 114 + } \ 115 + (typeof(*(p)))VAL; \ 116 + }) 117 + 118 + #define smp_cond_load_acquire_label(p, cond_expr, label) \ 119 + ({ \ 120 + __unqual_typeof(*p) __val = \ 121 + smp_cond_load_relaxed_label(p, cond_expr, label); \ 122 + smp_acquire__after_ctrl_dep(); \ 123 + (typeof(*(p)))__val; \ 124 + }) 125 + 126 + #define atomic_read(p) READ_ONCE((p)->counter) 127 + 128 + #define atomic_cond_read_relaxed_label(p, cond_expr, label) \ 129 + smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label) 130 + 131 + #define atomic_cond_read_acquire_label(p, cond_expr, label) \ 132 + smp_cond_load_acquire_label(&(p)->counter, cond_expr, label) 133 + 134 + #define atomic_try_cmpxchg_relaxed(p, pold, new) \ 135 + try_cmpxchg_relaxed(&(p)->counter, pold, new) 136 + 137 + #define atomic_try_cmpxchg_acquire(p, pold, new) \ 138 + try_cmpxchg_acquire(&(p)->counter, pold, new) 139 + 140 + #endif /* BPF_ATOMIC_H */

+9 -6

tools/testing/selftests/bpf/bpf_experimental.h

··· 368 368 ret; \ 369 369 }) 370 370 371 - #define cond_break \ 371 + #define __cond_break(expr) \ 372 372 ({ __label__ l_break, l_continue; \ 373 373 asm volatile goto("may_goto %l[l_break]" \ 374 374 :::: l_break); \ 375 375 goto l_continue; \ 376 - l_break: break; \ 376 + l_break: expr; \ 377 377 l_continue:; \ 378 378 }) 379 379 #else ··· 392 392 ret; \ 393 393 }) 394 394 395 - #define cond_break \ 395 + #define __cond_break(expr) \ 396 396 ({ __label__ l_break, l_continue; \ 397 397 asm volatile goto("1:.byte 0xe5; \ 398 398 .byte 0; \ ··· 400 400 .short 0" \ 401 401 :::: l_break); \ 402 402 goto l_continue; \ 403 - l_break: break; \ 403 + l_break: expr; \ 404 404 l_continue:; \ 405 405 }) 406 406 #else ··· 418 418 ret; \ 419 419 }) 420 420 421 - #define cond_break \ 421 + #define __cond_break(expr) \ 422 422 ({ __label__ l_break, l_continue; \ 423 423 asm volatile goto("1:.byte 0xe5; \ 424 424 .byte 0; \ ··· 426 426 .short 0" \ 427 427 :::: l_break); \ 428 428 goto l_continue; \ 429 - l_break: break; \ 429 + l_break: expr; \ 430 430 l_continue:; \ 431 431 }) 432 432 #endif 433 433 #endif 434 + 435 + #define cond_break __cond_break(break) 436 + #define cond_break_label(label) __cond_break(goto label) 434 437 435 438 #ifndef bpf_nop_mov 436 439 #define bpf_nop_mov(var) \

+5

tools/testing/selftests/bpf/bpf_kfuncs.h

··· 87 87 */ 88 88 extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name, 89 89 struct bpf_dynptr *value_ptr) __ksym __weak; 90 + 91 + extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str, 92 + const struct bpf_dynptr *value_p, int flags) __ksym __weak; 93 + extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak; 94 + 90 95 #endif

+4 -4

tools/testing/selftests/bpf/cap_helpers.c

··· 19 19 20 20 err = capget(&hdr, data); 21 21 if (err) 22 - return err; 22 + return -errno; 23 23 24 24 if (old_caps) 25 25 *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; ··· 32 32 data[1].effective |= cap1; 33 33 err = capset(&hdr, data); 34 34 if (err) 35 - return err; 35 + return -errno; 36 36 37 37 return 0; 38 38 } ··· 49 49 50 50 err = capget(&hdr, data); 51 51 if (err) 52 - return err; 52 + return -errno; 53 53 54 54 if (old_caps) 55 55 *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective; ··· 61 61 data[1].effective &= ~cap1; 62 62 err = capset(&hdr, data); 63 63 if (err) 64 - return err; 64 + return -errno; 65 65 66 66 return 0; 67 67 }

+1

tools/testing/selftests/bpf/cap_helpers.h

··· 4 4 5 5 #include <linux/types.h> 6 6 #include <linux/capability.h> 7 + #include <errno.h> 7 8 8 9 #ifndef CAP_PERFMON 9 10 #define CAP_PERFMON 38

+76 -35

tools/testing/selftests/bpf/network_helpers.c

··· 446 446 return "ping"; 447 447 } 448 448 449 + int append_tid(char *str, size_t sz) 450 + { 451 + size_t end; 452 + 453 + if (!str) 454 + return -1; 455 + 456 + end = strlen(str); 457 + if (end + 8 > sz) 458 + return -1; 459 + 460 + sprintf(&str[end], "%07d", gettid()); 461 + str[end + 7] = '\0'; 462 + 463 + return 0; 464 + } 465 + 449 466 int remove_netns(const char *name) 450 467 { 451 468 char *cmd; ··· 778 761 int pcap_fd; 779 762 }; 780 763 764 + static int __base_pr(const char *format, va_list args) 765 + { 766 + return vfprintf(stdout, format, args); 767 + } 768 + 769 + static tm_print_fn_t __tm_pr = __base_pr; 770 + 771 + tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 772 + { 773 + tm_print_fn_t old_print_fn; 774 + 775 + old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); 776 + 777 + return old_print_fn; 778 + } 779 + 780 + void tm_print(const char *format, ...) 781 + { 782 + tm_print_fn_t print_fn; 783 + va_list args; 784 + 785 + print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); 786 + if (!print_fn) 787 + return; 788 + 789 + va_start(args, format); 790 + print_fn(format, args); 791 + va_end(args); 792 + } 793 + 781 794 /* Is this packet captured with a Ethernet protocol type? */ 782 795 static bool is_ethernet(const u_char *packet) 783 796 { ··· 825 778 case 770: /* ARPHRD_FRAD */ 826 779 case 778: /* ARPHDR_IPGRE */ 827 780 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 828 - printf("Packet captured: arphdr_type=%d\n", arphdr_type); 781 + tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); 829 782 return false; 830 783 } 831 784 return true; ··· 846 799 return "Unknown"; 847 800 } 848 801 802 + #define MAX_FLAGS_STRLEN 21 849 803 /* Show the information of the transport layer in the packet */ 850 804 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 851 805 const char *src_addr, const char *dst_addr, 852 806 u16 proto, bool ipv6, u8 pkt_type) 853 807 { 854 - char *ifname, _ifname[IF_NAMESIZE]; 808 + char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; 855 809 const char *transport_str; 856 810 u16 src_port, dst_port; 857 811 struct udphdr *udp; ··· 875 827 dst_port = ntohs(tcp->dest); 876 828 transport_str = "TCP"; 877 829 } else if (proto == IPPROTO_ICMP) { 878 - printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 879 - ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 880 - packet[0], packet[1]); 830 + tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 831 + ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 832 + packet[0], packet[1]); 881 833 return; 882 834 } else if (proto == IPPROTO_ICMPV6) { 883 - printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 884 - ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 885 - packet[0], packet[1]); 835 + tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 836 + ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 837 + packet[0], packet[1]); 886 838 return; 887 839 } else { 888 - printf("%-7s %-3s %s %s > %s: protocol %d\n", 889 - ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 890 - src_addr, dst_addr, proto); 840 + tm_print("%-7s %-3s %s %s > %s: protocol %d\n", 841 + ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 842 + src_addr, dst_addr, proto); 891 843 return; 892 844 } 893 845 894 846 /* TCP or UDP*/ 895 847 896 - flockfile(stdout); 848 + if (proto == IPPROTO_TCP) 849 + snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", 850 + tcp->fin ? ", FIN" : "", 851 + tcp->syn ? ", SYN" : "", 852 + tcp->rst ? ", RST" : "", 853 + tcp->ack ? ", ACK" : ""); 854 + 897 855 if (ipv6) 898 - printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d", 899 - ifname, pkt_type_str(pkt_type), src_addr, src_port, 900 - dst_addr, dst_port, transport_str, len); 856 + tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", 857 + ifname, pkt_type_str(pkt_type), src_addr, src_port, 858 + dst_addr, dst_port, transport_str, len, flags); 901 859 else 902 - printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d", 903 - ifname, pkt_type_str(pkt_type), src_addr, src_port, 904 - dst_addr, dst_port, transport_str, len); 905 - 906 - if (proto == IPPROTO_TCP) { 907 - if (tcp->fin) 908 - printf(", FIN"); 909 - if (tcp->syn) 910 - printf(", SYN"); 911 - if (tcp->rst) 912 - printf(", RST"); 913 - if (tcp->ack) 914 - printf(", ACK"); 915 - } 916 - 917 - printf("\n"); 918 - funlockfile(stdout); 860 + tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", 861 + ifname, pkt_type_str(pkt_type), src_addr, src_port, 862 + dst_addr, dst_port, transport_str, len, flags); 919 863 } 920 864 921 865 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) ··· 1022 982 ifname = _ifname; 1023 983 } 1024 984 1025 - printf("%-7s %-3s Unknown network protocol type 0x%x\n", 1026 - ifname, pkt_type_str(ptype), proto); 985 + tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", 986 + ifname, pkt_type_str(ptype), proto); 1027 987 } 1028 988 } 1029 989 ··· 1223 1183 write(ctx->wake_fd, &w, sizeof(w)); 1224 1184 pthread_join(ctx->thread, NULL); 1225 1185 1226 - printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1186 + tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1227 1187 1228 1188 traffic_monitor_release(ctx); 1229 1189 } 1190 + 1230 1191 #endif /* TRAFFIC_MONITOR */

+21

tools/testing/selftests/bpf/network_helpers.h

··· 18 18 #include <netinet/udp.h> 19 19 #include <bpf/bpf_endian.h> 20 20 #include <net/if.h> 21 + #include <stdio.h> 21 22 22 23 #define MAGIC_VAL 0x1234 23 24 #define NUM_ITER 100000 ··· 101 100 int send_recv_data(int lfd, int fd, uint32_t total_bytes); 102 101 int make_netns(const char *name); 103 102 int remove_netns(const char *name); 103 + 104 + /** 105 + * append_tid() - Append thread ID to the given string. 106 + * 107 + * @str: string to extend 108 + * @sz: string's size 109 + * 110 + * 8 characters are used to append the thread ID (7 digits + '\0') 111 + * 112 + * Returns -1 on errors, 0 otherwise 113 + */ 114 + int append_tid(char *str, size_t sz); 104 115 105 116 static __u16 csum_fold(__u32 csum) 106 117 { ··· 253 240 254 241 struct tmonitor_ctx; 255 242 243 + typedef int (*tm_print_fn_t)(const char *format, va_list args); 244 + 256 245 #ifdef TRAFFIC_MONITOR 257 246 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 258 247 const char *subtest_name); 259 248 void traffic_monitor_stop(struct tmonitor_ctx *ctx); 249 + tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn); 260 250 #else 261 251 static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 262 252 const char *subtest_name) ··· 269 253 270 254 static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx) 271 255 { 256 + } 257 + 258 + static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 259 + { 260 + return NULL; 272 261 } 273 262 #endif 274 263

+10 -1

tools/testing/selftests/bpf/prog_tests/align.c

··· 610 610 .log_size = sizeof(bpf_vlog), 611 611 .log_level = 2, 612 612 ); 613 + const char *main_pass_start = "0: R1=ctx() R10=fp0"; 613 614 const char *line_ptr; 614 615 int cur_line = -1; 615 616 int prog_len, i; 617 + char *start; 616 618 int fd_prog; 617 619 int ret; 618 620 ··· 634 632 ret = 0; 635 633 /* We make a local copy so that we can strtok() it */ 636 634 strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy)); 637 - line_ptr = strtok(bpf_vlog_copy, "\n"); 635 + start = strstr(bpf_vlog_copy, main_pass_start); 636 + if (!start) { 637 + ret = 1; 638 + printf("Can't find initial line '%s'\n", main_pass_start); 639 + goto out; 640 + } 641 + line_ptr = strtok(start, "\n"); 638 642 for (i = 0; i < MAX_MATCHES; i++) { 639 643 struct bpf_reg_match m = test->matches[i]; 640 644 const char *p; ··· 690 682 break; 691 683 } 692 684 } 685 + out: 693 686 if (fd_prog >= 0) 694 687 close(fd_prog); 695 688 }

+65 -1

tools/testing/selftests/bpf/prog_tests/arena_atomics.c

··· 162 162 ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); 163 163 } 164 164 165 + static void test_load_acquire(struct arena_atomics *skel) 166 + { 167 + LIBBPF_OPTS(bpf_test_run_opts, topts); 168 + int err, prog_fd; 169 + 170 + if (skel->data->skip_lacq_srel_tests) { 171 + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n", 172 + __func__); 173 + test__skip(); 174 + return; 175 + } 176 + 177 + /* No need to attach it, just run it directly */ 178 + prog_fd = bpf_program__fd(skel->progs.load_acquire); 179 + err = bpf_prog_test_run_opts(prog_fd, &topts); 180 + if (!ASSERT_OK(err, "test_run_opts err")) 181 + return; 182 + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) 183 + return; 184 + 185 + ASSERT_EQ(skel->arena->load_acquire8_result, 0x12, 186 + "load_acquire8_result"); 187 + ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234, 188 + "load_acquire16_result"); 189 + ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678, 190 + "load_acquire32_result"); 191 + ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef, 192 + "load_acquire64_result"); 193 + } 194 + 195 + static void test_store_release(struct arena_atomics *skel) 196 + { 197 + LIBBPF_OPTS(bpf_test_run_opts, topts); 198 + int err, prog_fd; 199 + 200 + if (skel->data->skip_lacq_srel_tests) { 201 + printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n", 202 + __func__); 203 + test__skip(); 204 + return; 205 + } 206 + 207 + /* No need to attach it, just run it directly */ 208 + prog_fd = bpf_program__fd(skel->progs.store_release); 209 + err = bpf_prog_test_run_opts(prog_fd, &topts); 210 + if (!ASSERT_OK(err, "test_run_opts err")) 211 + return; 212 + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) 213 + return; 214 + 215 + ASSERT_EQ(skel->arena->store_release8_result, 0x12, 216 + "store_release8_result"); 217 + ASSERT_EQ(skel->arena->store_release16_result, 0x1234, 218 + "store_release16_result"); 219 + ASSERT_EQ(skel->arena->store_release32_result, 0x12345678, 220 + "store_release32_result"); 221 + ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef, 222 + "store_release64_result"); 223 + } 224 + 165 225 void test_arena_atomics(void) 166 226 { 167 227 struct arena_atomics *skel; ··· 231 171 if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open")) 232 172 return; 233 173 234 - if (skel->data->skip_tests) { 174 + if (skel->data->skip_all_tests) { 235 175 printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang", 236 176 __func__); 237 177 test__skip(); ··· 258 198 test_xchg(skel); 259 199 if (test__start_subtest("uaf")) 260 200 test_uaf(skel); 201 + if (test__start_subtest("load_acquire")) 202 + test_load_acquire(skel); 203 + if (test__start_subtest("store_release")) 204 + test_store_release(skel); 261 205 262 206 cleanup: 263 207 arena_atomics__destroy(skel);

+108

tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include <network_helpers.h> 5 + #include <sys/sysinfo.h> 6 + 7 + struct __qspinlock { int val; }; 8 + typedef struct __qspinlock arena_spinlock_t; 9 + 10 + struct arena_qnode { 11 + unsigned long next; 12 + int count; 13 + int locked; 14 + }; 15 + 16 + #include "arena_spin_lock.skel.h" 17 + 18 + static long cpu; 19 + static int repeat; 20 + 21 + pthread_barrier_t barrier; 22 + 23 + static void *spin_lock_thread(void *arg) 24 + { 25 + int err, prog_fd = *(u32 *)arg; 26 + LIBBPF_OPTS(bpf_test_run_opts, topts, 27 + .data_in = &pkt_v4, 28 + .data_size_in = sizeof(pkt_v4), 29 + .repeat = repeat, 30 + ); 31 + cpu_set_t cpuset; 32 + 33 + CPU_ZERO(&cpuset); 34 + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); 35 + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity"); 36 + 37 + err = pthread_barrier_wait(&barrier); 38 + if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0) 39 + ASSERT_FALSE(true, "pthread_barrier"); 40 + 41 + err = bpf_prog_test_run_opts(prog_fd, &topts); 42 + ASSERT_OK(err, "test_run err"); 43 + ASSERT_EQ((int)topts.retval, 0, "test_run retval"); 44 + 45 + pthread_exit(arg); 46 + } 47 + 48 + static void test_arena_spin_lock_size(int size) 49 + { 50 + LIBBPF_OPTS(bpf_test_run_opts, topts); 51 + struct arena_spin_lock *skel; 52 + pthread_t thread_id[16]; 53 + int prog_fd, i, err; 54 + void *ret; 55 + 56 + if (get_nprocs() < 2) { 57 + test__skip(); 58 + return; 59 + } 60 + 61 + skel = arena_spin_lock__open_and_load(); 62 + if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load")) 63 + return; 64 + if (skel->data->test_skip == 2) { 65 + test__skip(); 66 + goto end; 67 + } 68 + skel->bss->cs_count = size; 69 + skel->bss->limit = repeat * 16; 70 + 71 + ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init"); 72 + 73 + prog_fd = bpf_program__fd(skel->progs.prog); 74 + for (i = 0; i < 16; i++) { 75 + err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); 76 + if (!ASSERT_OK(err, "pthread_create")) 77 + goto end_barrier; 78 + } 79 + 80 + for (i = 0; i < 16; i++) { 81 + if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) 82 + goto end_barrier; 83 + if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) 84 + goto end_barrier; 85 + } 86 + 87 + ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value"); 88 + 89 + end_barrier: 90 + pthread_barrier_destroy(&barrier); 91 + end: 92 + arena_spin_lock__destroy(skel); 93 + return; 94 + } 95 + 96 + void test_arena_spin_lock(void) 97 + { 98 + repeat = 1000; 99 + if (test__start_subtest("arena_spin_lock_1")) 100 + test_arena_spin_lock_size(1); 101 + cpu = 0; 102 + if (test__start_subtest("arena_spin_lock_1000")) 103 + test_arena_spin_lock_size(1000); 104 + cpu = 0; 105 + repeat = 100; 106 + if (test__start_subtest("arena_spin_lock_50000")) 107 + test_arena_spin_lock_size(50000); 108 + }

+5

tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c

··· 6 6 #include <test_progs.h> 7 7 #include "bloom_filter_map.skel.h" 8 8 9 + #ifndef NUMA_NO_NODE 10 + #define NUMA_NO_NODE (-1) 11 + #endif 12 + 9 13 static void test_fail_cases(void) 10 14 { 11 15 LIBBPF_OPTS(bpf_map_create_opts, opts); ··· 73 69 74 70 /* Create a map */ 75 71 opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; 72 + opts.numa_node = NUMA_NO_NODE; 76 73 fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); 77 74 if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) 78 75 return;

+68

tools/testing/selftests/bpf/prog_tests/bpf_iter.c

··· 323 323 static void test_task_sleepable(void) 324 324 { 325 325 struct bpf_iter_tasks *skel; 326 + int pid, status, err, data_pipe[2], finish_pipe[2], c; 327 + char *test_data = NULL; 328 + char *test_data_long = NULL; 329 + char *data[2]; 330 + 331 + if (!ASSERT_OK(pipe(data_pipe), "data_pipe") || 332 + !ASSERT_OK(pipe(finish_pipe), "finish_pipe")) 333 + return; 326 334 327 335 skel = bpf_iter_tasks__open_and_load(); 328 336 if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load")) 329 337 return; 338 + 339 + pid = fork(); 340 + if (!ASSERT_GE(pid, 0, "fork")) 341 + return; 342 + 343 + if (pid == 0) { 344 + /* child */ 345 + close(data_pipe[0]); 346 + close(finish_pipe[1]); 347 + 348 + test_data = malloc(sizeof(char) * 10); 349 + strncpy(test_data, "test_data", 10); 350 + test_data[9] = '\0'; 351 + 352 + test_data_long = malloc(sizeof(char) * 5000); 353 + for (int i = 0; i < 5000; ++i) { 354 + if (i % 2 == 0) 355 + test_data_long[i] = 'b'; 356 + else 357 + test_data_long[i] = 'a'; 358 + } 359 + test_data_long[4999] = '\0'; 360 + 361 + data[0] = test_data; 362 + data[1] = test_data_long; 363 + 364 + write(data_pipe[1], &data, sizeof(data)); 365 + 366 + /* keep child alive until after the test */ 367 + err = read(finish_pipe[0], &c, 1); 368 + if (err != 1) 369 + exit(-1); 370 + 371 + close(data_pipe[1]); 372 + close(finish_pipe[0]); 373 + _exit(0); 374 + } 375 + 376 + /* parent */ 377 + close(data_pipe[1]); 378 + close(finish_pipe[0]); 379 + 380 + err = read(data_pipe[0], &data, sizeof(data)); 381 + ASSERT_EQ(err, sizeof(data), "read_check"); 382 + 383 + skel->bss->user_ptr = data[0]; 384 + skel->bss->user_ptr_long = data[1]; 385 + skel->bss->pid = pid; 330 386 331 387 do_dummy_read(skel->progs.dump_task_sleepable); 332 388 ··· 390 334 "num_expected_failure_copy_from_user_task"); 391 335 ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0, 392 336 "num_success_copy_from_user_task"); 337 + ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0, 338 + "num_expected_failure_copy_from_user_task_str"); 339 + ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0, 340 + "num_success_copy_from_user_task_str"); 393 341 394 342 bpf_iter_tasks__destroy(skel); 343 + 344 + write(finish_pipe[1], &c, 1); 345 + err = waitpid(pid, &status, 0); 346 + ASSERT_EQ(err, pid, "waitpid"); 347 + ASSERT_EQ(status, 0, "zero_child_exit"); 348 + 349 + close(data_pipe[0]); 350 + close(finish_pipe[1]); 395 351 } 396 352 397 353 static void test_task_stack(void)

+6 -3

tools/testing/selftests/bpf/prog_tests/bpf_nf.c

··· 72 72 if (!ASSERT_OK(system(cmd), cmd)) 73 73 goto end; 74 74 75 - srv_port = (mode == TEST_XDP) ? 5005 : 5006; 76 - srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); 75 + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS); 77 76 if (!ASSERT_GE(srv_fd, 0, "start_server")) 77 + goto end; 78 + 79 + srv_port = get_socket_local_port(srv_fd); 80 + if (!ASSERT_GE(srv_port, 0, "get_sock_local_port")) 78 81 goto end; 79 82 80 83 client_fd = connect_to_server(srv_fd); ··· 94 91 skel->bss->saddr = peer_addr.sin_addr.s_addr; 95 92 skel->bss->sport = peer_addr.sin_port; 96 93 skel->bss->daddr = peer_addr.sin_addr.s_addr; 97 - skel->bss->dport = htons(srv_port); 94 + skel->bss->dport = srv_port; 98 95 99 96 if (mode == TEST_XDP) 100 97 prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);

+19 -4

tools/testing/selftests/bpf/prog_tests/btf.c

··· 3866 3866 .err_str = "vlen != 0", 3867 3867 }, 3868 3868 { 3869 - .descr = "decl_tag test #8, invalid kflag", 3869 + .descr = "decl_tag test #8, tag with kflag", 3870 3870 .raw_types = { 3871 3871 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 3872 3872 BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ 3873 - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), 3873 + BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1), 3874 3874 BTF_END_RAW, 3875 3875 }, 3876 3876 BTF_STR_SEC("\0local\0tag1"), ··· 3881 3881 .key_type_id = 1, 3882 3882 .value_type_id = 1, 3883 3883 .max_entries = 1, 3884 - .btf_load_err = true, 3885 - .err_str = "Invalid btf_info kind_flag", 3886 3884 }, 3887 3885 { 3888 3886 .descr = "decl_tag test #9, var, invalid component_idx", ··· 4203 4205 .max_entries = 1, 4204 4206 .btf_load_err = true, 4205 4207 .err_str = "Type tags don't precede modifiers", 4208 + }, 4209 + { 4210 + .descr = "type_tag test #7, tag with kflag", 4211 + .raw_types = { 4212 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 4213 + BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */ 4214 + BTF_PTR_ENC(2), /* [3] */ 4215 + BTF_END_RAW, 4216 + }, 4217 + BTF_STR_SEC("\0tag"), 4218 + .map_type = BPF_MAP_TYPE_ARRAY, 4219 + .map_name = "tag_type_check_btf", 4220 + .key_size = sizeof(int), 4221 + .value_size = 4, 4222 + .key_type_id = 1, 4223 + .value_type_id = 1, 4224 + .max_entries = 1, 4206 4225 }, 4207 4226 { 4208 4227 .descr = "enum64 test #1, unsigned, size 8",

+110 -37

tools/testing/selftests/bpf/prog_tests/btf_dump.c

··· 126 126 return err; 127 127 } 128 128 129 - static char *dump_buf; 130 - static size_t dump_buf_sz; 131 - static FILE *dump_buf_file; 129 + struct test_ctx { 130 + struct btf *btf; 131 + struct btf_dump *d; 132 + char *dump_buf; 133 + size_t dump_buf_sz; 134 + FILE *dump_buf_file; 135 + }; 136 + 137 + static void test_ctx__free(struct test_ctx *t) 138 + { 139 + fclose(t->dump_buf_file); 140 + free(t->dump_buf); 141 + btf_dump__free(t->d); 142 + btf__free(t->btf); 143 + } 144 + 145 + static int test_ctx__init(struct test_ctx *t) 146 + { 147 + t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz); 148 + if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream")) 149 + return -1; 150 + t->btf = btf__new_empty(); 151 + if (!ASSERT_OK_PTR(t->btf, "new_empty")) 152 + goto err_out; 153 + t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL); 154 + if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new")) 155 + goto err_out; 156 + 157 + return 0; 158 + 159 + err_out: 160 + test_ctx__free(t); 161 + return -1; 162 + } 163 + 164 + static void test_ctx__dump_and_compare(struct test_ctx *t, 165 + const char *expected_output, 166 + const char *message) 167 + { 168 + int i, err; 169 + 170 + for (i = 1; i < btf__type_cnt(t->btf); i++) { 171 + err = btf_dump__dump_type(t->d, i); 172 + ASSERT_OK(err, "dump_type_ok"); 173 + } 174 + 175 + fflush(t->dump_buf_file); 176 + t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */ 177 + 178 + ASSERT_STREQ(t->dump_buf, expected_output, message); 179 + } 132 180 133 181 static void test_btf_dump_incremental(void) 134 182 { 135 - struct btf *btf = NULL; 136 - struct btf_dump *d = NULL; 137 - int id, err, i; 183 + struct test_ctx t = {}; 184 + struct btf *btf; 185 + int id, err; 138 186 139 - dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); 140 - if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) 187 + if (test_ctx__init(&t)) 141 188 return; 142 - btf = btf__new_empty(); 143 - if (!ASSERT_OK_PTR(btf, "new_empty")) 144 - goto err_out; 145 - d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); 146 - if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) 147 - goto err_out; 189 + 190 + btf = t.btf; 148 191 149 192 /* First, generate BTF corresponding to the following C code: 150 193 * ··· 225 182 err = btf__add_field(btf, "x", 4, 0, 0); 226 183 ASSERT_OK(err, "field_ok"); 227 184 228 - for (i = 1; i < btf__type_cnt(btf); i++) { 229 - err = btf_dump__dump_type(d, i); 230 - ASSERT_OK(err, "dump_type_ok"); 231 - } 232 - 233 - fflush(dump_buf_file); 234 - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ 235 - 236 - ASSERT_STREQ(dump_buf, 185 + test_ctx__dump_and_compare(&t, 237 186 "enum x;\n" 238 187 "\n" 239 188 "enum x {\n" ··· 256 221 * enum values don't conflict; 257 222 * 258 223 */ 259 - fseek(dump_buf_file, 0, SEEK_SET); 224 + fseek(t.dump_buf_file, 0, SEEK_SET); 260 225 261 226 id = btf__add_struct(btf, "s", 4); 262 227 ASSERT_EQ(id, 7, "struct_id"); ··· 267 232 err = btf__add_field(btf, "s", 6, 64, 0); 268 233 ASSERT_OK(err, "field_ok"); 269 234 270 - for (i = 1; i < btf__type_cnt(btf); i++) { 271 - err = btf_dump__dump_type(d, i); 272 - ASSERT_OK(err, "dump_type_ok"); 273 - } 274 - 275 - fflush(dump_buf_file); 276 - dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ 277 - ASSERT_STREQ(dump_buf, 235 + test_ctx__dump_and_compare(&t, 278 236 "struct s___2 {\n" 279 237 " enum x x;\n" 280 238 " enum {\n" ··· 276 248 " struct s s;\n" 277 249 "};\n\n" , "c_dump1"); 278 250 279 - err_out: 280 - fclose(dump_buf_file); 281 - free(dump_buf); 282 - btf_dump__free(d); 283 - btf__free(btf); 251 + test_ctx__free(&t); 252 + } 253 + 254 + static void test_btf_dump_type_tags(void) 255 + { 256 + struct test_ctx t = {}; 257 + struct btf *btf; 258 + int id, err; 259 + 260 + if (test_ctx__init(&t)) 261 + return; 262 + 263 + btf = t.btf; 264 + 265 + /* Generate BTF corresponding to the following C code: 266 + * 267 + * struct s { 268 + * void __attribute__((btf_type_tag(\"void_tag\"))) *p1; 269 + * void __attribute__((void_attr)) *p2; 270 + * }; 271 + * 272 + */ 273 + 274 + id = btf__add_type_tag(btf, "void_tag", 0); 275 + ASSERT_EQ(id, 1, "type_tag_id"); 276 + id = btf__add_ptr(btf, id); 277 + ASSERT_EQ(id, 2, "void_ptr_id1"); 278 + 279 + id = btf__add_type_attr(btf, "void_attr", 0); 280 + ASSERT_EQ(id, 3, "type_attr_id"); 281 + id = btf__add_ptr(btf, id); 282 + ASSERT_EQ(id, 4, "void_ptr_id2"); 283 + 284 + id = btf__add_struct(btf, "s", 8); 285 + ASSERT_EQ(id, 5, "struct_id"); 286 + err = btf__add_field(btf, "p1", 2, 0, 0); 287 + ASSERT_OK(err, "field_ok1"); 288 + err = btf__add_field(btf, "p2", 4, 0, 0); 289 + ASSERT_OK(err, "field_ok2"); 290 + 291 + test_ctx__dump_and_compare(&t, 292 + "struct s {\n" 293 + " void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n" 294 + " void __attribute__((void_attr)) *p2;\n" 295 + "};\n\n", "dump_and_compare"); 296 + 297 + test_ctx__free(&t); 284 298 } 285 299 286 300 #define STRSIZE 4096 ··· 943 873 } 944 874 if (test__start_subtest("btf_dump: incremental")) 945 875 test_btf_dump_incremental(); 876 + 877 + if (test__start_subtest("btf_dump: type_tags")) 878 + test_btf_dump_type_tags(); 946 879 947 880 btf = libbpf_find_kernel_btf(); 948 881 if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))

+128

tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include "cgroup_helpers.h" 5 + #include "cgroup_preorder.skel.h" 6 + 7 + static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder) 8 + { 9 + LIBBPF_OPTS(bpf_prog_attach_opts, opts); 10 + enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype; 11 + int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd; 12 + struct cgroup_preorder *skel = NULL; 13 + struct bpf_program *prog; 14 + __u8 *result, buf; 15 + socklen_t optlen; 16 + int err = 0; 17 + 18 + skel = cgroup_preorder__open_and_load(); 19 + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) 20 + return 0; 21 + 22 + buf = 0x00; 23 + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); 24 + if (!ASSERT_OK(err, "setsockopt")) 25 + goto close_skel; 26 + 27 + opts.flags = BPF_F_ALLOW_MULTI; 28 + if (all_preorder) 29 + opts.flags |= BPF_F_PREORDER; 30 + prog = skel->progs.child; 31 + prog_c_fd = bpf_program__fd(prog); 32 + prog_c_atype = bpf_program__expected_attach_type(prog); 33 + err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts); 34 + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child")) 35 + goto close_skel; 36 + 37 + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; 38 + prog = skel->progs.child_2; 39 + prog_c2_fd = bpf_program__fd(prog); 40 + prog_c2_atype = bpf_program__expected_attach_type(prog); 41 + err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts); 42 + if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2")) 43 + goto detach_child; 44 + 45 + optlen = 1; 46 + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); 47 + if (!ASSERT_OK(err, "getsockopt")) 48 + goto detach_child_2; 49 + 50 + result = skel->bss->result; 51 + if (all_preorder) 52 + ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only"); 53 + else 54 + ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only"); 55 + 56 + skel->bss->idx = 0; 57 + memset(result, 0, 4); 58 + 59 + opts.flags = BPF_F_ALLOW_MULTI; 60 + if (all_preorder) 61 + opts.flags |= BPF_F_PREORDER; 62 + prog = skel->progs.parent; 63 + prog_p_fd = bpf_program__fd(prog); 64 + prog_p_atype = bpf_program__expected_attach_type(prog); 65 + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); 66 + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) 67 + goto detach_child_2; 68 + 69 + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER; 70 + prog = skel->progs.parent_2; 71 + prog_p2_fd = bpf_program__fd(prog); 72 + prog_p2_atype = bpf_program__expected_attach_type(prog); 73 + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); 74 + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) 75 + goto detach_parent; 76 + 77 + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); 78 + if (!ASSERT_OK(err, "getsockopt")) 79 + goto detach_parent_2; 80 + 81 + if (all_preorder) 82 + ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2, 83 + "parent and child"); 84 + else 85 + ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3, 86 + "parent and child"); 87 + 88 + detach_parent_2: 89 + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), 90 + "bpf_prog_detach2-parent_2"); 91 + detach_parent: 92 + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), 93 + "bpf_prog_detach2-parent"); 94 + detach_child_2: 95 + ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype), 96 + "bpf_prog_detach2-child_2"); 97 + detach_child: 98 + ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype), 99 + "bpf_prog_detach2-child"); 100 + close_skel: 101 + cgroup_preorder__destroy(skel); 102 + return err; 103 + } 104 + 105 + void test_cgroup_preorder(void) 106 + { 107 + int cg_parent = -1, cg_child = -1, sock_fd = -1; 108 + 109 + cg_parent = test__join_cgroup("/parent"); 110 + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) 111 + goto out; 112 + 113 + cg_child = test__join_cgroup("/parent/child"); 114 + if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) 115 + goto out; 116 + 117 + sock_fd = socket(AF_INET, SOCK_STREAM, 0); 118 + if (!ASSERT_GE(sock_fd, 0, "socket")) 119 + goto out; 120 + 121 + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1"); 122 + ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2"); 123 + 124 + out: 125 + close(sock_fd); 126 + close(cg_child); 127 + close(cg_parent); 128 + }

+9 -4

tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c

··· 10 10 static int run_test(int cgroup_fd, int server_fd, bool classid) 11 11 { 12 12 struct connect4_dropper *skel; 13 - int fd, err = 0; 13 + int fd, err = 0, port; 14 14 15 15 skel = connect4_dropper__open_and_load(); 16 16 if (!ASSERT_OK_PTR(skel, "skel_open")) 17 17 return -1; 18 + 19 + port = get_socket_local_port(server_fd); 20 + if (!ASSERT_GE(port, 0, "get_socket_local_port")) 21 + return -1; 22 + 23 + skel->bss->port = ntohs(port); 18 24 19 25 skel->links.connect_v4_dropper = 20 26 bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, ··· 54 48 { 55 49 struct network_helper_opts opts = {}; 56 50 int server_fd, client_fd, cgroup_fd; 57 - static const int port = 60120; 58 51 59 52 /* Step 1: Check base connectivity works without any BPF. */ 60 - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); 53 + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); 61 54 if (!ASSERT_GE(server_fd, 0, "server_fd")) 62 55 return; 63 56 client_fd = connect_to_fd_opts(server_fd, &opts); ··· 71 66 cgroup_fd = test__join_cgroup("/connect_dropper"); 72 67 if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) 73 68 return; 74 - server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); 69 + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); 75 70 if (!ASSERT_GE(server_fd, 0, "server_fd")) { 76 71 close(cgroup_fd); 77 72 return;

-107

tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include "bpf/libbpf.h" 3 - #include "changes_pkt_data_freplace.skel.h" 4 - #include "changes_pkt_data.skel.h" 5 - #include <test_progs.h> 6 - 7 - static void print_verifier_log(const char *log) 8 - { 9 - if (env.verbosity >= VERBOSE_VERY) 10 - fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); 11 - } 12 - 13 - static void test_aux(const char *main_prog_name, 14 - const char *to_be_replaced, 15 - const char *replacement, 16 - bool expect_load) 17 - { 18 - struct changes_pkt_data_freplace *freplace = NULL; 19 - struct bpf_program *freplace_prog = NULL; 20 - struct bpf_program *main_prog = NULL; 21 - LIBBPF_OPTS(bpf_object_open_opts, opts); 22 - struct changes_pkt_data *main = NULL; 23 - char log[16*1024]; 24 - int err; 25 - 26 - opts.kernel_log_buf = log; 27 - opts.kernel_log_size = sizeof(log); 28 - if (env.verbosity >= VERBOSE_SUPER) 29 - opts.kernel_log_level = 1 | 2 | 4; 30 - main = changes_pkt_data__open_opts(&opts); 31 - if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) 32 - goto out; 33 - main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); 34 - if (!ASSERT_OK_PTR(main_prog, "main_prog")) 35 - goto out; 36 - bpf_program__set_autoload(main_prog, true); 37 - err = changes_pkt_data__load(main); 38 - print_verifier_log(log); 39 - if (!ASSERT_OK(err, "changes_pkt_data__load")) 40 - goto out; 41 - freplace = changes_pkt_data_freplace__open_opts(&opts); 42 - if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) 43 - goto out; 44 - freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); 45 - if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) 46 - goto out; 47 - bpf_program__set_autoload(freplace_prog, true); 48 - bpf_program__set_autoattach(freplace_prog, true); 49 - bpf_program__set_attach_target(freplace_prog, 50 - bpf_program__fd(main_prog), 51 - to_be_replaced); 52 - err = changes_pkt_data_freplace__load(freplace); 53 - print_verifier_log(log); 54 - if (expect_load) { 55 - ASSERT_OK(err, "changes_pkt_data_freplace__load"); 56 - } else { 57 - ASSERT_ERR(err, "changes_pkt_data_freplace__load"); 58 - ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); 59 - } 60 - 61 - out: 62 - changes_pkt_data_freplace__destroy(freplace); 63 - changes_pkt_data__destroy(main); 64 - } 65 - 66 - /* There are two global subprograms in both changes_pkt_data.skel.h: 67 - * - one changes packet data; 68 - * - another does not. 69 - * It is ok to freplace subprograms that change packet data with those 70 - * that either do or do not. It is only ok to freplace subprograms 71 - * that do not change packet data with those that do not as well. 72 - * The below tests check outcomes for each combination of such freplace. 73 - * Also test a case when main subprogram itself is replaced and is a single 74 - * subprogram in a program. 75 - */ 76 - void test_changes_pkt_data_freplace(void) 77 - { 78 - struct { 79 - const char *main; 80 - const char *to_be_replaced; 81 - bool changes; 82 - } mains[] = { 83 - { "main_with_subprogs", "changes_pkt_data", true }, 84 - { "main_with_subprogs", "does_not_change_pkt_data", false }, 85 - { "main_changes", "main_changes", true }, 86 - { "main_does_not_change", "main_does_not_change", false }, 87 - }; 88 - struct { 89 - const char *func; 90 - bool changes; 91 - } replacements[] = { 92 - { "changes_pkt_data", true }, 93 - { "does_not_change_pkt_data", false } 94 - }; 95 - char buf[64]; 96 - 97 - for (int i = 0; i < ARRAY_SIZE(mains); ++i) { 98 - for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { 99 - snprintf(buf, sizeof(buf), "%s_with_%s", 100 - mains[i].to_be_replaced, replacements[j].func); 101 - if (!test__start_subtest(buf)) 102 - continue; 103 - test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, 104 - mains[i].changes || !replacements[j].changes); 105 - } 106 - } 107 - }

+9

tools/testing/selftests/bpf/prog_tests/compute_live_registers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "compute_live_registers.skel.h" 4 + #include "test_progs.h" 5 + 6 + void test_compute_live_registers(void) 7 + { 8 + RUN_TESTS(compute_live_registers); 9 + }

+4 -2

tools/testing/selftests/bpf/prog_tests/core_reloc.c

··· 85 85 #define NESTING_ERR_CASE(name) { \ 86 86 NESTING_CASE_COMMON(name), \ 87 87 .fails = true, \ 88 - .run_btfgen_fails = true, \ 88 + .run_btfgen_fails = true, \ 89 89 } 90 90 91 91 #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ 92 - .a = { [2] = 1 }, \ 92 + .a = { [2] = 1, [3] = 11 }, \ 93 93 .b = { [1] = { [2] = { [3] = 2 } } }, \ 94 94 .c = { [1] = { .c = 3 } }, \ 95 95 .d = { [0] = { [0] = { .d = 4 } } }, \ ··· 108 108 .input_len = sizeof(struct core_reloc_##name), \ 109 109 .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \ 110 110 .a2 = 1, \ 111 + .a3 = 12, \ 111 112 .b123 = 2, \ 112 113 .c1c = 3, \ 113 114 .d00d = 4, \ ··· 603 602 ARRAYS_ERR_CASE(arrays___err_non_array), 604 603 ARRAYS_ERR_CASE(arrays___err_wrong_val_type), 605 604 ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), 605 + ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz), 606 606 607 607 /* enum/ptr/int handling scenarios */ 608 608 PRIMITIVES_CASE(primitives),

+4 -1

tools/testing/selftests/bpf/prog_tests/cpumask.c

··· 25 25 "test_global_mask_nested_deep_rcu", 26 26 "test_global_mask_nested_deep_array_rcu", 27 27 "test_cpumask_weight", 28 + "test_refcount_null_tracking", 29 + "test_populate_reject_small_mask", 30 + "test_populate_reject_unaligned", 31 + "test_populate", 28 32 }; 29 33 30 34 static void verify_success(const char *prog_name) ··· 82 78 verify_success(cpumask_success_testcases[i]); 83 79 } 84 80 85 - RUN_TESTS(cpumask_success); 86 81 RUN_TESTS(cpumask_failure); 87 82 }

+21

tools/testing/selftests/bpf/prog_tests/dynptr.c

··· 10 10 SETUP_SYSCALL_SLEEP, 11 11 SETUP_SKB_PROG, 12 12 SETUP_SKB_PROG_TP, 13 + SETUP_XDP_PROG, 13 14 }; 14 15 15 16 static struct { ··· 19 18 } success_tests[] = { 20 19 {"test_read_write", SETUP_SYSCALL_SLEEP}, 21 20 {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, 21 + {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, 22 + {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, 22 23 {"test_ringbuf", SETUP_SYSCALL_SLEEP}, 23 24 {"test_skb_readonly", SETUP_SKB_PROG}, 24 25 {"test_dynptr_skb_data", SETUP_SKB_PROG}, ··· 117 114 118 115 err = bpf_prog_test_run_opts(aux_prog_fd, &topts); 119 116 bpf_link__destroy(link); 117 + 118 + if (!ASSERT_OK(err, "test_run")) 119 + goto cleanup; 120 + 121 + break; 122 + } 123 + case SETUP_XDP_PROG: 124 + { 125 + char data[5000]; 126 + int err, prog_fd; 127 + LIBBPF_OPTS(bpf_test_run_opts, opts, 128 + .data_in = &data, 129 + .data_size_in = sizeof(data), 130 + .repeat = 1, 131 + ); 132 + 133 + prog_fd = bpf_program__fd(prog); 134 + err = bpf_prog_test_run_opts(prog_fd, &opts); 120 135 121 136 if (!ASSERT_OK(err, "test_run")) 122 137 goto cleanup;

+2 -2

tools/testing/selftests/bpf/prog_tests/fd_array.c

··· 83 83 int err; 84 84 85 85 memset(&info, 0, len); 86 - info.nr_map_ids = *nr_map_ids, 87 - info.map_ids = ptr_to_u64(map_ids), 86 + info.nr_map_ids = *nr_map_ids; 87 + info.map_ids = ptr_to_u64(map_ids); 88 88 89 89 err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); 90 90 if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))

+9

tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + #include "fexit_noreturns.skel.h" 5 + 6 + void test_fexit_noreturns(void) 7 + { 8 + RUN_TESTS(fexit_noreturns); 9 + }

+152 -10

tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c

··· 8 8 #include <unistd.h> 9 9 #include <test_progs.h> 10 10 #include "test_get_xattr.skel.h" 11 + #include "test_set_remove_xattr.skel.h" 11 12 #include "test_fsverity.skel.h" 12 13 13 14 static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; 14 15 15 - static void test_xattr(void) 16 + static void test_get_xattr(const char *name, const char *value, bool allow_access) 16 17 { 17 18 struct test_get_xattr *skel = NULL; 18 19 int fd = -1, err; ··· 26 25 close(fd); 27 26 fd = -1; 28 27 29 - err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); 28 + err = setxattr(testfile, name, value, strlen(value) + 1, 0); 30 29 if (err && errno == EOPNOTSUPP) { 31 30 printf("%s:SKIP:local fs doesn't support xattr (%d)\n" 32 31 "To run this test, make sure /tmp filesystem supports xattr.\n", ··· 49 48 goto out; 50 49 51 50 fd = open(testfile, O_RDONLY, 0644); 51 + 52 52 if (!ASSERT_GE(fd, 0, "open_file")) 53 53 goto out; 54 54 55 - ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); 56 - 57 55 /* Trigger security_inode_getxattr */ 58 - err = getxattr(testfile, "user.kfuncs", v, sizeof(v)); 59 - ASSERT_EQ(err, -1, "getxattr_return"); 60 - ASSERT_EQ(errno, EINVAL, "getxattr_errno"); 61 - ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); 56 + err = getxattr(testfile, name, v, sizeof(v)); 57 + 58 + if (allow_access) { 59 + ASSERT_EQ(err, -1, "getxattr_return"); 60 + ASSERT_EQ(errno, EINVAL, "getxattr_errno"); 61 + ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file"); 62 + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry"); 63 + } else { 64 + ASSERT_EQ(err, strlen(value) + 1, "getxattr_return"); 65 + ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file"); 66 + ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry"); 67 + } 62 68 63 69 out: 64 70 close(fd); 65 71 test_get_xattr__destroy(skel); 72 + remove(testfile); 73 + } 74 + 75 + /* xattr value we will set to security.bpf.foo */ 76 + static const char value_foo[] = "hello"; 77 + 78 + static void read_and_validate_foo(struct test_set_remove_xattr *skel) 79 + { 80 + char value_out[32]; 81 + int err; 82 + 83 + err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out)); 84 + ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo"); 85 + ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo"); 86 + } 87 + 88 + static void set_foo(struct test_set_remove_xattr *skel) 89 + { 90 + ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0), 91 + "setxattr foo"); 92 + } 93 + 94 + static void validate_bar_match(struct test_set_remove_xattr *skel) 95 + { 96 + char value_out[32]; 97 + int err; 98 + 99 + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); 100 + ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar"); 101 + ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0, 102 + "strncmp value_bar"); 103 + } 104 + 105 + static void validate_bar_removed(struct test_set_remove_xattr *skel) 106 + { 107 + char value_out[32]; 108 + int err; 109 + 110 + err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out)); 111 + ASSERT_LT(err, 0, "getxattr size bar should fail"); 112 + } 113 + 114 + static void test_set_remove_xattr(void) 115 + { 116 + struct test_set_remove_xattr *skel = NULL; 117 + int fd = -1, err; 118 + 119 + fd = open(testfile, O_CREAT | O_RDONLY, 0644); 120 + if (!ASSERT_GE(fd, 0, "create_file")) 121 + return; 122 + 123 + close(fd); 124 + fd = -1; 125 + 126 + skel = test_set_remove_xattr__open_and_load(); 127 + if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load")) 128 + return; 129 + 130 + /* Set security.bpf.foo to "hello" */ 131 + err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0); 132 + if (err && errno == EOPNOTSUPP) { 133 + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" 134 + "To run this test, make sure /tmp filesystem supports xattr.\n", 135 + __func__, errno); 136 + test__skip(); 137 + goto out; 138 + } 139 + 140 + if (!ASSERT_OK(err, "setxattr")) 141 + goto out; 142 + 143 + skel->bss->monitored_pid = getpid(); 144 + err = test_set_remove_xattr__attach(skel); 145 + if (!ASSERT_OK(err, "test_set_remove_xattr__attach")) 146 + goto out; 147 + 148 + /* First, test not _locked version of the kfuncs with getxattr. */ 149 + 150 + /* Read security.bpf.foo and trigger test_inode_getxattr. This 151 + * bpf program will set security.bpf.bar to "world". 152 + */ 153 + read_and_validate_foo(skel); 154 + validate_bar_match(skel); 155 + 156 + /* Read security.bpf.foo and trigger test_inode_getxattr again. 157 + * This will remove xattr security.bpf.bar. 158 + */ 159 + read_and_validate_foo(skel); 160 + validate_bar_removed(skel); 161 + 162 + ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success"); 163 + ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success"); 164 + ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail"); 165 + ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail"); 166 + 167 + /* Second, test _locked version of the kfuncs, with setxattr */ 168 + 169 + /* Set security.bpf.foo and trigger test_inode_setxattr. This 170 + * bpf program will set security.bpf.bar to "world". 171 + */ 172 + set_foo(skel); 173 + validate_bar_match(skel); 174 + 175 + /* Set security.bpf.foo and trigger test_inode_setxattr again. 176 + * This will remove xattr security.bpf.bar. 177 + */ 178 + set_foo(skel); 179 + validate_bar_removed(skel); 180 + 181 + ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success, 182 + "locked_set_security_bpf_bar_success"); 183 + ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success, 184 + "locked_remove_security_bpf_bar_success"); 185 + ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail, 186 + "locked_set_security_selinux_fail"); 187 + ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail, 188 + "locked_remove_security_selinux_fail"); 189 + 190 + out: 191 + close(fd); 192 + test_set_remove_xattr__destroy(skel); 66 193 remove(testfile); 67 194 } 68 195 ··· 270 141 271 142 void test_fs_kfuncs(void) 272 143 { 273 - if (test__start_subtest("xattr")) 274 - test_xattr(); 144 + /* Matches xattr_names in progs/test_get_xattr.c */ 145 + if (test__start_subtest("user_xattr")) 146 + test_get_xattr("user.kfuncs", "hello", true); 147 + 148 + if (test__start_subtest("security_bpf_xattr")) 149 + test_get_xattr("security.bpf.xxx", "hello", true); 150 + 151 + if (test__start_subtest("security_bpf_xattr_error")) 152 + test_get_xattr("security.bpf", "hello", false); 153 + 154 + if (test__start_subtest("security_selinux_xattr_error")) 155 + test_get_xattr("security.selinux", "hello", false); 156 + 157 + if (test__start_subtest("set_remove_xattr")) 158 + test_set_remove_xattr(); 275 159 276 160 if (test__start_subtest("fsverity")) 277 161 test_fsverity();

+43

tools/testing/selftests/bpf/prog_tests/kernel_flag.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Microsoft */ 3 + #include <test_progs.h> 4 + #include "kfunc_call_test.skel.h" 5 + #include "kfunc_call_test.lskel.h" 6 + #include "test_kernel_flag.skel.h" 7 + 8 + void test_kernel_flag(void) 9 + { 10 + struct test_kernel_flag *lsm_skel; 11 + struct kfunc_call_test *skel = NULL; 12 + struct kfunc_call_test_lskel *lskel = NULL; 13 + int ret; 14 + 15 + lsm_skel = test_kernel_flag__open_and_load(); 16 + if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel")) 17 + return; 18 + 19 + lsm_skel->bss->monitored_tid = gettid(); 20 + 21 + ret = test_kernel_flag__attach(lsm_skel); 22 + if (!ASSERT_OK(ret, "test_kernel_flag__attach")) 23 + goto close_prog; 24 + 25 + /* Test with skel. This should pass the gatekeeper */ 26 + skel = kfunc_call_test__open_and_load(); 27 + if (!ASSERT_OK_PTR(skel, "skel")) 28 + goto close_prog; 29 + 30 + /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */ 31 + lskel = kfunc_call_test_lskel__open_and_load(); 32 + if (!ASSERT_ERR_PTR(lskel, "lskel")) 33 + goto close_prog; 34 + 35 + close_prog: 36 + if (skel) 37 + kfunc_call_test__destroy(skel); 38 + if (lskel) 39 + kfunc_call_test_lskel__destroy(lskel); 40 + 41 + lsm_skel->bss->monitored_tid = 0; 42 + test_kernel_flag__destroy(lsm_skel); 43 + }

+540

tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <netinet/in.h> 3 + 4 + #include "network_helpers.h" 5 + #include "test_progs.h" 6 + 7 + #define BPF_FILE "test_lwt_ip_encap.bpf.o" 8 + 9 + #define NETNS_NAME_SIZE 32 10 + #define NETNS_BASE "ns-lwt-ip-encap" 11 + 12 + #define IP4_ADDR_1 "172.16.1.100" 13 + #define IP4_ADDR_2 "172.16.2.100" 14 + #define IP4_ADDR_3 "172.16.3.100" 15 + #define IP4_ADDR_4 "172.16.4.100" 16 + #define IP4_ADDR_5 "172.16.5.100" 17 + #define IP4_ADDR_6 "172.16.6.100" 18 + #define IP4_ADDR_7 "172.16.7.100" 19 + #define IP4_ADDR_8 "172.16.8.100" 20 + #define IP4_ADDR_GRE "172.16.16.100" 21 + 22 + #define IP4_ADDR_SRC IP4_ADDR_1 23 + #define IP4_ADDR_DST IP4_ADDR_4 24 + 25 + #define IP6_ADDR_1 "fb01::1" 26 + #define IP6_ADDR_2 "fb02::1" 27 + #define IP6_ADDR_3 "fb03::1" 28 + #define IP6_ADDR_4 "fb04::1" 29 + #define IP6_ADDR_5 "fb05::1" 30 + #define IP6_ADDR_6 "fb06::1" 31 + #define IP6_ADDR_7 "fb07::1" 32 + #define IP6_ADDR_8 "fb08::1" 33 + #define IP6_ADDR_GRE "fb10::1" 34 + 35 + #define IP6_ADDR_SRC IP6_ADDR_1 36 + #define IP6_ADDR_DST IP6_ADDR_4 37 + 38 + /* Setup/topology: 39 + * 40 + * NS1 NS2 NS3 41 + * veth1 <---> veth2 veth3 <---> veth4 (the top route) 42 + * veth5 <---> veth6 veth7 <---> veth8 (the bottom route) 43 + * 44 + * Each vethN gets IP[4|6]_ADDR_N address. 45 + * 46 + * IP*_ADDR_SRC = IP*_ADDR_1 47 + * IP*_ADDR_DST = IP*_ADDR_4 48 + * 49 + * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST. 50 + * 51 + * By default, routes are configured to allow packets to go 52 + * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route). 53 + * 54 + * A GRE device is installed in NS3 with IP*_ADDR_GRE, and 55 + * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8 56 + * (the bottom route). 57 + * 58 + * Tests: 59 + * 60 + * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping 61 + * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE. 62 + * 63 + * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1 64 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. 65 + * 66 + * ping: SRC->[encap at veth1:egress]->GRE:decap->DST 67 + * ping replies go DST->SRC directly 68 + * 69 + * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2 70 + * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE. 71 + * 72 + * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST 73 + * ping replies go DST->SRC directly 74 + */ 75 + 76 + static int create_ns(char *name, size_t name_sz) 77 + { 78 + if (!name) 79 + goto fail; 80 + 81 + if (!ASSERT_OK(append_tid(name, name_sz), "append TID")) 82 + goto fail; 83 + 84 + SYS(fail, "ip netns add %s", name); 85 + 86 + /* rp_filter gets confused by what these tests are doing, so disable it */ 87 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name); 88 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name); 89 + /* Disable IPv6 DAD because it sometimes takes too long and fails tests */ 90 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name); 91 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name); 92 + 93 + return 0; 94 + fail: 95 + return -1; 96 + } 97 + 98 + static int set_top_addr(const char *ns1, const char *ns2, const char *ns3) 99 + { 100 + SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1); 101 + SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2); 102 + SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3); 103 + SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4); 104 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1); 105 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2); 106 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3); 107 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4); 108 + 109 + SYS(fail, "ip -n %s link set dev veth1 up", ns1); 110 + SYS(fail, "ip -n %s link set dev veth2 up", ns2); 111 + SYS(fail, "ip -n %s link set dev veth3 up", ns2); 112 + SYS(fail, "ip -n %s link set dev veth4 up", ns3); 113 + 114 + return 0; 115 + fail: 116 + return 1; 117 + } 118 + 119 + static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3) 120 + { 121 + SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5); 122 + SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6); 123 + SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7); 124 + SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8); 125 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5); 126 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6); 127 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7); 128 + SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8); 129 + 130 + SYS(fail, "ip -n %s link set dev veth5 up", ns1); 131 + SYS(fail, "ip -n %s link set dev veth6 up", ns2); 132 + SYS(fail, "ip -n %s link set dev veth7 up", ns2); 133 + SYS(fail, "ip -n %s link set dev veth8 up", ns3); 134 + 135 + return 0; 136 + fail: 137 + return 1; 138 + } 139 + 140 + static int configure_vrf(const char *ns1, const char *ns2) 141 + { 142 + if (!ns1 || !ns2) 143 + goto fail; 144 + 145 + SYS(fail, "ip -n %s link add red type vrf table 1001", ns1); 146 + SYS(fail, "ip -n %s link set red up", ns1); 147 + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1); 148 + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1); 149 + SYS(fail, "ip -n %s link set veth1 vrf red", ns1); 150 + SYS(fail, "ip -n %s link set veth5 vrf red", ns1); 151 + 152 + SYS(fail, "ip -n %s link add red type vrf table 1001", ns2); 153 + SYS(fail, "ip -n %s link set red up", ns2); 154 + SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2); 155 + SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2); 156 + SYS(fail, "ip -n %s link set veth2 vrf red", ns2); 157 + SYS(fail, "ip -n %s link set veth3 vrf red", ns2); 158 + SYS(fail, "ip -n %s link set veth6 vrf red", ns2); 159 + SYS(fail, "ip -n %s link set veth7 vrf red", ns2); 160 + 161 + return 0; 162 + fail: 163 + return -1; 164 + } 165 + 166 + static int configure_ns1(const char *ns1, const char *vrf) 167 + { 168 + struct nstoken *nstoken = NULL; 169 + 170 + if (!ns1 || !vrf) 171 + goto fail; 172 + 173 + nstoken = open_netns(ns1); 174 + if (!ASSERT_OK_PTR(nstoken, "open ns1")) 175 + goto fail; 176 + 177 + /* Top route */ 178 + SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf); 179 + SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf); 180 + SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf); 181 + SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf); 182 + /* Bottom route */ 183 + SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf); 184 + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf); 185 + SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf); 186 + SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf); 187 + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf); 188 + SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf); 189 + 190 + close_netns(nstoken); 191 + return 0; 192 + fail: 193 + close_netns(nstoken); 194 + return -1; 195 + } 196 + 197 + static int configure_ns2(const char *ns2, const char *vrf) 198 + { 199 + struct nstoken *nstoken = NULL; 200 + 201 + if (!ns2 || !vrf) 202 + goto fail; 203 + 204 + nstoken = open_netns(ns2); 205 + if (!ASSERT_OK_PTR(nstoken, "open ns2")) 206 + goto fail; 207 + 208 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2); 209 + SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2); 210 + 211 + /* Top route */ 212 + SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf); 213 + SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf); 214 + SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf); 215 + SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf); 216 + /* Bottom route */ 217 + SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf); 218 + SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf); 219 + SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf); 220 + SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf); 221 + 222 + close_netns(nstoken); 223 + return 0; 224 + fail: 225 + close_netns(nstoken); 226 + return -1; 227 + } 228 + 229 + static int configure_ns3(const char *ns3) 230 + { 231 + struct nstoken *nstoken = NULL; 232 + 233 + if (!ns3) 234 + goto fail; 235 + 236 + nstoken = open_netns(ns3); 237 + if (!ASSERT_OK_PTR(nstoken, "open ns3")) 238 + goto fail; 239 + 240 + /* Top route */ 241 + SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3); 242 + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3); 243 + SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3); 244 + SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3); 245 + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3); 246 + SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3); 247 + /* Bottom route */ 248 + SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7); 249 + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7); 250 + SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7); 251 + SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7); 252 + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7); 253 + SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7); 254 + 255 + /* Configure IPv4 GRE device */ 256 + SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255", 257 + IP4_ADDR_1, IP4_ADDR_GRE); 258 + SYS(fail, "ip link set gre_dev up"); 259 + SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE); 260 + 261 + /* Configure IPv6 GRE device */ 262 + SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255", 263 + IP6_ADDR_1, IP6_ADDR_GRE); 264 + SYS(fail, "ip link set gre6_dev up"); 265 + SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE); 266 + 267 + close_netns(nstoken); 268 + return 0; 269 + fail: 270 + close_netns(nstoken); 271 + return -1; 272 + } 273 + 274 + static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf) 275 + { 276 + if (!ns1 || !ns2 || !ns3 || !vrf) 277 + goto fail; 278 + 279 + SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2); 280 + SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3); 281 + SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2); 282 + SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3); 283 + 284 + if (vrf[0]) { 285 + if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf")) 286 + goto fail; 287 + } 288 + if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses")) 289 + goto fail; 290 + 291 + if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses")) 292 + goto fail; 293 + 294 + if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes")) 295 + goto fail; 296 + 297 + if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes")) 298 + goto fail; 299 + 300 + if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes")) 301 + goto fail; 302 + 303 + /* Link bottom route to the GRE tunnels */ 304 + SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s", 305 + ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf); 306 + SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s", 307 + ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf); 308 + SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s", 309 + ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf); 310 + SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s", 311 + ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf); 312 + 313 + return 0; 314 + fail: 315 + return -1; 316 + } 317 + 318 + static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) 319 + { 320 + SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf); 321 + SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf); 322 + SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf); 323 + SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf); 324 + 325 + return 0; 326 + fail: 327 + return -1; 328 + } 329 + 330 + static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf) 331 + { 332 + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf); 333 + SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf); 334 + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf); 335 + SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf); 336 + 337 + return 0; 338 + fail: 339 + return -1; 340 + } 341 + 342 + #define GSO_SIZE 5000 343 + #define GSO_TCP_PORT 9000 344 + /* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */ 345 + static int test_gso_fix(const char *ns1, const char *ns3, int family) 346 + { 347 + const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST; 348 + char gso_packet[GSO_SIZE] = {}; 349 + struct nstoken *nstoken = NULL; 350 + int sfd, cfd, afd; 351 + ssize_t bytes; 352 + int ret = -1; 353 + 354 + if (!ns1 || !ns3) 355 + return ret; 356 + 357 + nstoken = open_netns(ns3); 358 + if (!ASSERT_OK_PTR(nstoken, "open ns3")) 359 + return ret; 360 + 361 + sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); 362 + if (!ASSERT_OK_FD(sfd, "start server")) 363 + goto close_netns; 364 + 365 + close_netns(nstoken); 366 + 367 + nstoken = open_netns(ns1); 368 + if (!ASSERT_OK_PTR(nstoken, "open ns1")) 369 + goto close_server; 370 + 371 + cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL); 372 + if (!ASSERT_OK_FD(cfd, "connect to server")) 373 + goto close_server; 374 + 375 + close_netns(nstoken); 376 + nstoken = NULL; 377 + 378 + afd = accept(sfd, NULL, NULL); 379 + if (!ASSERT_OK_FD(afd, "accept")) 380 + goto close_client; 381 + 382 + /* Send a packet larger than MTU */ 383 + bytes = send(cfd, gso_packet, GSO_SIZE, 0); 384 + if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet")) 385 + goto close_accept; 386 + 387 + /* Verify we received all expected bytes */ 388 + bytes = read(afd, gso_packet, GSO_SIZE); 389 + if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet")) 390 + goto close_accept; 391 + 392 + ret = 0; 393 + 394 + close_accept: 395 + close(afd); 396 + close_client: 397 + close(cfd); 398 + close_server: 399 + close(sfd); 400 + close_netns: 401 + close_netns(nstoken); 402 + 403 + return ret; 404 + } 405 + 406 + static int check_ping_ok(const char *ns1) 407 + { 408 + SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST); 409 + SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST); 410 + return 0; 411 + fail: 412 + return -1; 413 + } 414 + 415 + static int check_ping_fails(const char *ns1) 416 + { 417 + int ret; 418 + 419 + ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST); 420 + if (!ret) 421 + return -1; 422 + 423 + ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST); 424 + if (!ret) 425 + return -1; 426 + 427 + return 0; 428 + } 429 + 430 + #define EGRESS true 431 + #define INGRESS false 432 + #define IPV4_ENCAP true 433 + #define IPV6_ENCAP false 434 + static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf) 435 + { 436 + char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-"; 437 + char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-"; 438 + char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-"; 439 + char *sec = ipv4_encap ? "encap_gre" : "encap_gre6"; 440 + 441 + if (!vrf) 442 + return; 443 + 444 + if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1")) 445 + goto out; 446 + if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2")) 447 + goto out; 448 + if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3")) 449 + goto out; 450 + 451 + if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network")) 452 + goto out; 453 + 454 + /* By default, pings work */ 455 + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) 456 + goto out; 457 + 458 + /* Remove NS2->DST routes, ping fails */ 459 + SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf); 460 + SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf); 461 + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) 462 + goto out; 463 + 464 + /* Install replacement routes (LWT/eBPF), pings succeed */ 465 + if (egress) { 466 + SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s", 467 + ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf); 468 + SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s", 469 + ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf); 470 + } else { 471 + SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s", 472 + ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf); 473 + SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s", 474 + ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf); 475 + } 476 + 477 + if (!ASSERT_OK(check_ping_ok(ns1), "ping OK")) 478 + goto out; 479 + 480 + /* Skip GSO tests with VRF: VRF routing needs properly assigned 481 + * source IP/device, which is easy to do with ping but hard with TCP. 482 + */ 483 + if (egress && !vrf[0]) { 484 + if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO")) 485 + goto out; 486 + } 487 + 488 + /* Negative test: remove routes to GRE devices: ping fails */ 489 + if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev")) 490 + goto out; 491 + if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail")) 492 + goto out; 493 + 494 + /* Another negative test */ 495 + if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf), 496 + "add unreachable routes")) 497 + goto out; 498 + ASSERT_OK(check_ping_fails(ns1), "ping expected fail"); 499 + 500 + out: 501 + SYS_NOFAIL("ip netns del %s", ns1); 502 + SYS_NOFAIL("ip netns del %s", ns2); 503 + SYS_NOFAIL("ip netns del %s", ns3); 504 + } 505 + 506 + void test_lwt_ip_encap_vrf_ipv6(void) 507 + { 508 + if (test__start_subtest("egress")) 509 + lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red"); 510 + 511 + if (test__start_subtest("ingress")) 512 + lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red"); 513 + } 514 + 515 + void test_lwt_ip_encap_vrf_ipv4(void) 516 + { 517 + if (test__start_subtest("egress")) 518 + lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red"); 519 + 520 + if (test__start_subtest("ingress")) 521 + lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red"); 522 + } 523 + 524 + void test_lwt_ip_encap_ipv6(void) 525 + { 526 + if (test__start_subtest("egress")) 527 + lwt_ip_encap(IPV6_ENCAP, EGRESS, ""); 528 + 529 + if (test__start_subtest("ingress")) 530 + lwt_ip_encap(IPV6_ENCAP, INGRESS, ""); 531 + } 532 + 533 + void test_lwt_ip_encap_ipv4(void) 534 + { 535 + if (test__start_subtest("egress")) 536 + lwt_ip_encap(IPV4_ENCAP, EGRESS, ""); 537 + 538 + if (test__start_subtest("ingress")) 539 + lwt_ip_encap(IPV4_ENCAP, INGRESS, ""); 540 + }

+176

tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* Connects 6 network namespaces through veths. 4 + * Each NS may have different IPv6 global scope addresses : 5 + * 6 + * NS1 NS2 NS3 NS4 NS5 NS6 7 + * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo 8 + * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6 9 + * fd00 ::4 10 + * fc42 ::1 11 + * 12 + * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a 13 + * IPv6 header with a Segment Routing Header, with segments : 14 + * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 15 + * 16 + * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : 17 + * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 18 + * - fd00::2 : remove the TLV, change the flags, add a tag 19 + * - fd00::3 : apply an End.T action to fd00::4, through routing table 117 20 + * 21 + * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. 22 + * Each End.BPF action will validate the operations applied on the SRH by the 23 + * previous BPF program in the chain, otherwise the packet is dropped. 24 + * 25 + * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this 26 + * datagram can be read on NS6 when binding to fb00::6. 27 + */ 28 + 29 + #include "network_helpers.h" 30 + #include "test_progs.h" 31 + 32 + #define NETNS_BASE "lwt-seg6local-" 33 + #define BPF_FILE "test_lwt_seg6local.bpf.o" 34 + 35 + static void cleanup(void) 36 + { 37 + int ns; 38 + 39 + for (ns = 1; ns < 7; ns++) 40 + SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns); 41 + } 42 + 43 + static int setup(void) 44 + { 45 + int ns; 46 + 47 + for (ns = 1; ns < 7; ns++) 48 + SYS(fail, "ip netns add %s%d", NETNS_BASE, ns); 49 + 50 + SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE); 51 + 52 + for (ns = 1; ns < 6; ns++) { 53 + int local_id = ns * 2 - 1; 54 + int peer_id = ns * 2; 55 + int next_ns = ns + 1; 56 + 57 + SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d", 58 + NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns); 59 + 60 + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id); 61 + SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id); 62 + 63 + /* All link scope addresses to veths */ 64 + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", 65 + NETNS_BASE, ns, local_id, peer_id, local_id); 66 + SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link", 67 + NETNS_BASE, next_ns, peer_id, local_id, peer_id); 68 + } 69 + 70 + 71 + SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE); 72 + 73 + SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE); 74 + SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE); 75 + 76 + SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2", 77 + NETNS_BASE, BPF_FILE); 78 + SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE); 79 + 80 + SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE); 81 + SYS(fail, 82 + "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4", 83 + NETNS_BASE, BPF_FILE); 84 + 85 + SYS(fail, 86 + "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6", 87 + NETNS_BASE, BPF_FILE); 88 + SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE); 89 + SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE); 90 + 91 + SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE); 92 + SYS(fail, 93 + "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8", 94 + NETNS_BASE, BPF_FILE); 95 + 96 + SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE); 97 + SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE); 98 + 99 + for (ns = 1; ns < 6; ns++) 100 + SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1", 101 + NETNS_BASE, ns); 102 + 103 + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE); 104 + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE); 105 + SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE); 106 + 107 + return 0; 108 + fail: 109 + return -1; 110 + } 111 + 112 + #define SERVER_PORT 7330 113 + #define CLIENT_PORT 2121 114 + void test_lwt_seg6local(void) 115 + { 116 + struct sockaddr_in6 server_addr = {}; 117 + const char *ns1 = NETNS_BASE "1"; 118 + const char *ns6 = NETNS_BASE "6"; 119 + struct nstoken *nstoken = NULL; 120 + const char *foobar = "foobar"; 121 + ssize_t bytes; 122 + int sfd, cfd; 123 + char buf[7]; 124 + 125 + if (!ASSERT_OK(setup(), "setup")) 126 + goto out; 127 + 128 + nstoken = open_netns(ns6); 129 + if (!ASSERT_OK_PTR(nstoken, "open ns6")) 130 + goto out; 131 + 132 + sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL); 133 + if (!ASSERT_OK_FD(sfd, "start server")) 134 + goto close_netns; 135 + 136 + close_netns(nstoken); 137 + 138 + nstoken = open_netns(ns1); 139 + if (!ASSERT_OK_PTR(nstoken, "open ns1")) 140 + goto close_server; 141 + 142 + cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL); 143 + if (!ASSERT_OK_FD(cfd, "start client")) 144 + goto close_server; 145 + 146 + close_netns(nstoken); 147 + nstoken = NULL; 148 + 149 + /* Send a packet larger than MTU */ 150 + server_addr.sin6_family = AF_INET6; 151 + server_addr.sin6_port = htons(SERVER_PORT); 152 + if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1, 153 + "build target addr")) 154 + goto close_client; 155 + 156 + bytes = sendto(cfd, foobar, sizeof(foobar), 0, 157 + (struct sockaddr *)&server_addr, sizeof(server_addr)); 158 + if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet")) 159 + goto close_client; 160 + 161 + /* Verify we received all expected bytes */ 162 + bytes = read(sfd, buf, sizeof(buf)); 163 + if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet")) 164 + goto close_client; 165 + ASSERT_STREQ(buf, foobar, "check udp packet"); 166 + 167 + close_client: 168 + close(cfd); 169 + close_server: 170 + close(sfd); 171 + close_netns: 172 + close_netns(nstoken); 173 + 174 + out: 175 + cleanup(); 176 + }

+14 -7

tools/testing/selftests/bpf/prog_tests/netns_cookie.c

··· 33 33 34 34 skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup( 35 35 skel->progs.get_netns_cookie_sockops, cgroup_fd); 36 - if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach")) 36 + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops")) 37 37 goto done; 38 38 39 39 verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg); 40 40 map = bpf_map__fd(skel->maps.sock_map); 41 41 err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); 42 - if (!ASSERT_OK(err, "prog_attach")) 42 + if (!ASSERT_OK(err, "prog_attach_sk_msg")) 43 43 goto done; 44 44 45 45 tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); 46 46 err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); 47 - if (!ASSERT_OK(err, "prog_attach")) 47 + if (!ASSERT_OK(err, "prog_attach_tcx")) 48 48 goto done; 49 + 50 + skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup( 51 + skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd); 52 + if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb")) 53 + goto cleanup_tc; 49 54 50 55 server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); 51 56 if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) ··· 74 69 if (!ASSERT_OK(err, "getsockopt")) 75 70 goto cleanup_tc; 76 71 77 - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); 72 + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops"); 78 73 79 74 err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), 80 75 &client_fd, &val); 81 76 if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) 82 77 goto cleanup_tc; 83 78 84 - ASSERT_EQ(val, cookie_expected_value, "cookie_value"); 85 - ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); 86 - ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); 79 + ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg"); 80 + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx"); 81 + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx"); 82 + ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb"); 83 + ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb"); 87 84 88 85 cleanup_tc: 89 86 err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);

+18 -31

tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c

··· 200 200 return; 201 201 } 202 202 203 - static void test_in_netns(int (*fn)(void *), void *arg) 204 - { 205 - struct nstoken *nstoken = NULL; 206 - 207 - SYS(cleanup, "ip netns add ns_current_pid_tgid"); 208 - SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up"); 209 - 210 - nstoken = open_netns("ns_current_pid_tgid"); 211 - if (!ASSERT_OK_PTR(nstoken, "open_netns")) 212 - goto cleanup; 213 - 214 - test_ns_current_pid_tgid_new_ns(fn, arg); 215 - 216 - cleanup: 217 - if (nstoken) 218 - close_netns(nstoken); 219 - SYS_NOFAIL("ip netns del ns_current_pid_tgid"); 220 - } 221 - 222 203 /* TODO: use a different tracepoint */ 223 - void serial_test_ns_current_pid_tgid(void) 204 + void serial_test_current_pid_tgid(void) 224 205 { 225 206 if (test__start_subtest("root_ns_tp")) 226 207 test_current_pid_tgid_tp(NULL); 227 208 if (test__start_subtest("new_ns_tp")) 228 209 test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); 229 - if (test__start_subtest("new_ns_cgrp")) { 230 - int cgroup_fd = -1; 231 - 232 - cgroup_fd = test__join_cgroup("/sock_addr"); 233 - if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) { 234 - test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd); 235 - close(cgroup_fd); 236 - } 237 - } 238 - if (test__start_subtest("new_ns_sk_msg")) 239 - test_in_netns(test_current_pid_tgid_sk_msg, NULL); 240 210 } 211 + 212 + void test_ns_current_pid_tgid_cgrp(void) 213 + { 214 + int cgroup_fd = test__join_cgroup("/sock_addr"); 215 + 216 + if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) { 217 + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd); 218 + close(cgroup_fd); 219 + } 220 + } 221 + 222 + void test_ns_current_pid_tgid_sk_msg(void) 223 + { 224 + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL); 225 + } 226 + 227 +

+99

tools/testing/selftests/bpf/prog_tests/prepare.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta */ 3 + 4 + #include <test_progs.h> 5 + #include <network_helpers.h> 6 + #include "prepare.skel.h" 7 + 8 + static bool check_prepared(struct bpf_object *obj) 9 + { 10 + bool is_prepared = true; 11 + const struct bpf_map *map; 12 + 13 + bpf_object__for_each_map(map, obj) { 14 + if (bpf_map__fd(map) < 0) 15 + is_prepared = false; 16 + } 17 + 18 + return is_prepared; 19 + } 20 + 21 + static void test_prepare_no_load(void) 22 + { 23 + struct prepare *skel; 24 + int err; 25 + LIBBPF_OPTS(bpf_test_run_opts, topts, 26 + .data_in = &pkt_v4, 27 + .data_size_in = sizeof(pkt_v4), 28 + ); 29 + 30 + skel = prepare__open(); 31 + if (!ASSERT_OK_PTR(skel, "prepare__open")) 32 + return; 33 + 34 + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) 35 + goto cleanup; 36 + 37 + err = bpf_object__prepare(skel->obj); 38 + 39 + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) 40 + goto cleanup; 41 + 42 + if (!ASSERT_OK(err, "bpf_object__prepare")) 43 + goto cleanup; 44 + 45 + cleanup: 46 + prepare__destroy(skel); 47 + } 48 + 49 + static void test_prepare_load(void) 50 + { 51 + struct prepare *skel; 52 + int err, prog_fd; 53 + LIBBPF_OPTS(bpf_test_run_opts, topts, 54 + .data_in = &pkt_v4, 55 + .data_size_in = sizeof(pkt_v4), 56 + ); 57 + 58 + skel = prepare__open(); 59 + if (!ASSERT_OK_PTR(skel, "prepare__open")) 60 + return; 61 + 62 + if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared")) 63 + goto cleanup; 64 + 65 + err = bpf_object__prepare(skel->obj); 66 + if (!ASSERT_OK(err, "bpf_object__prepare")) 67 + goto cleanup; 68 + 69 + err = prepare__load(skel); 70 + if (!ASSERT_OK(err, "prepare__load")) 71 + goto cleanup; 72 + 73 + if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared")) 74 + goto cleanup; 75 + 76 + prog_fd = bpf_program__fd(skel->progs.program); 77 + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) 78 + goto cleanup; 79 + 80 + err = bpf_prog_test_run_opts(prog_fd, &topts); 81 + if (!ASSERT_OK(err, "test_run_opts err")) 82 + goto cleanup; 83 + 84 + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) 85 + goto cleanup; 86 + 87 + ASSERT_EQ(skel->bss->err, 0, "err"); 88 + 89 + cleanup: 90 + prepare__destroy(skel); 91 + } 92 + 93 + void test_prepare(void) 94 + { 95 + if (test__start_subtest("prepare_load")) 96 + test_prepare_load(); 97 + if (test__start_subtest("prepare_no_load")) 98 + test_prepare_no_load(); 99 + }

+2

tools/testing/selftests/bpf/prog_tests/pro_epilogue.c

··· 6 6 #include "epilogue_tailcall.skel.h" 7 7 #include "pro_epilogue_goto_start.skel.h" 8 8 #include "epilogue_exit.skel.h" 9 + #include "pro_epilogue_with_kfunc.skel.h" 9 10 10 11 struct st_ops_args { 11 12 __u64 a; ··· 56 55 RUN_TESTS(pro_epilogue); 57 56 RUN_TESTS(pro_epilogue_goto_start); 58 57 RUN_TESTS(epilogue_exit); 58 + RUN_TESTS(pro_epilogue_with_kfunc); 59 59 if (test__start_subtest("tailcall")) 60 60 test_tailcall(); 61 61 }

+3

tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c

··· 81 81 "nested_rcu_region", 82 82 "rcu_read_lock_global_subprog_lock", 83 83 "rcu_read_lock_global_subprog_unlock", 84 + "rcu_read_lock_sleepable_helper_global_subprog", 85 + "rcu_read_lock_sleepable_kfunc_global_subprog", 86 + "rcu_read_lock_sleepable_global_subprog_indirect", 84 87 }; 85 88 86 89 static void test_inproper_region(void)

+1

tools/testing/selftests/bpf/prog_tests/read_vsyscall.c

··· 24 24 { .name = "copy_from_user", .ret = -EFAULT }, 25 25 { .name = "copy_from_user_task", .ret = -EFAULT }, 26 26 { .name = "copy_from_user_str", .ret = -EFAULT }, 27 + { .name = "copy_from_user_task_str", .ret = -EFAULT }, 27 28 }; 28 29 29 30 void test_read_vsyscall(void)

+1 -1

tools/testing/selftests/bpf/prog_tests/setget_sockopt.c

··· 202 202 void test_setget_sockopt(void) 203 203 { 204 204 cg_fd = test__join_cgroup(CG_NAME); 205 - if (cg_fd < 0) 205 + if (!ASSERT_OK_FD(cg_fd, "join cgroup")) 206 206 return; 207 207 208 208 if (create_netns())

+3

tools/testing/selftests/bpf/prog_tests/spin_lock.c

··· 50 50 { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, 51 51 { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" }, 52 52 { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" }, 53 + { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" }, 54 + { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" }, 55 + { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" }, 53 56 }; 54 57 55 58 static int match_regex(const char *pattern, const char *string)

+144

tools/testing/selftests/bpf/prog_tests/summarization.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "bpf/libbpf.h" 3 + #include "summarization_freplace.skel.h" 4 + #include "summarization.skel.h" 5 + #include <test_progs.h> 6 + 7 + static void print_verifier_log(const char *log) 8 + { 9 + if (env.verbosity >= VERBOSE_VERY) 10 + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); 11 + } 12 + 13 + static void test_aux(const char *main_prog_name, 14 + const char *to_be_replaced, 15 + const char *replacement, 16 + bool expect_load, 17 + const char *err_msg) 18 + { 19 + struct summarization_freplace *freplace = NULL; 20 + struct bpf_program *freplace_prog = NULL; 21 + struct bpf_program *main_prog = NULL; 22 + LIBBPF_OPTS(bpf_object_open_opts, opts); 23 + struct summarization *main = NULL; 24 + char log[16*1024]; 25 + int err; 26 + 27 + opts.kernel_log_buf = log; 28 + opts.kernel_log_size = sizeof(log); 29 + if (env.verbosity >= VERBOSE_SUPER) 30 + opts.kernel_log_level = 1 | 2 | 4; 31 + main = summarization__open_opts(&opts); 32 + if (!ASSERT_OK_PTR(main, "summarization__open")) 33 + goto out; 34 + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); 35 + if (!ASSERT_OK_PTR(main_prog, "main_prog")) 36 + goto out; 37 + bpf_program__set_autoload(main_prog, true); 38 + err = summarization__load(main); 39 + print_verifier_log(log); 40 + if (!ASSERT_OK(err, "summarization__load")) 41 + goto out; 42 + freplace = summarization_freplace__open_opts(&opts); 43 + if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open")) 44 + goto out; 45 + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); 46 + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) 47 + goto out; 48 + bpf_program__set_autoload(freplace_prog, true); 49 + bpf_program__set_autoattach(freplace_prog, true); 50 + bpf_program__set_attach_target(freplace_prog, 51 + bpf_program__fd(main_prog), 52 + to_be_replaced); 53 + err = summarization_freplace__load(freplace); 54 + print_verifier_log(log); 55 + 56 + /* The might_sleep extension doesn't work yet as sleepable calls are not 57 + * allowed, but preserve the check in case it's supported later and then 58 + * this particular combination can be enabled. 59 + */ 60 + if (!strcmp("might_sleep", replacement) && err) { 61 + ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log"); 62 + ASSERT_EQ(err, -EINVAL, "err"); 63 + test__skip(); 64 + goto out; 65 + } 66 + 67 + if (expect_load) { 68 + ASSERT_OK(err, "summarization_freplace__load"); 69 + } else { 70 + ASSERT_ERR(err, "summarization_freplace__load"); 71 + ASSERT_HAS_SUBSTR(log, err_msg, "error log"); 72 + } 73 + 74 + out: 75 + summarization_freplace__destroy(freplace); 76 + summarization__destroy(main); 77 + } 78 + 79 + /* There are two global subprograms in both summarization.skel.h: 80 + * - one changes packet data; 81 + * - another does not. 82 + * It is ok to freplace subprograms that change packet data with those 83 + * that either do or do not. It is only ok to freplace subprograms 84 + * that do not change packet data with those that do not as well. 85 + * The below tests check outcomes for each combination of such freplace. 86 + * Also test a case when main subprogram itself is replaced and is a single 87 + * subprogram in a program. 88 + * 89 + * This holds for might_sleep programs. It is ok to replace might_sleep with 90 + * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced 91 + * with might_sleep. 92 + */ 93 + void test_summarization_freplace(void) 94 + { 95 + struct { 96 + const char *main; 97 + const char *to_be_replaced; 98 + bool has_side_effect; 99 + } mains[2][4] = { 100 + { 101 + { "main_changes_with_subprogs", "changes_pkt_data", true }, 102 + { "main_changes_with_subprogs", "does_not_change_pkt_data", false }, 103 + { "main_changes", "main_changes", true }, 104 + { "main_does_not_change", "main_does_not_change", false }, 105 + }, 106 + { 107 + { "main_might_sleep_with_subprogs", "might_sleep", true }, 108 + { "main_might_sleep_with_subprogs", "does_not_sleep", false }, 109 + { "main_might_sleep", "main_might_sleep", true }, 110 + { "main_does_not_sleep", "main_does_not_sleep", false }, 111 + }, 112 + }; 113 + const char *pkt_err = "Extension program changes packet data"; 114 + const char *slp_err = "Extension program may sleep"; 115 + struct { 116 + const char *func; 117 + bool has_side_effect; 118 + const char *err_msg; 119 + } replacements[2][2] = { 120 + { 121 + { "changes_pkt_data", true, pkt_err }, 122 + { "does_not_change_pkt_data", false, pkt_err }, 123 + }, 124 + { 125 + { "might_sleep", true, slp_err }, 126 + { "does_not_sleep", false, slp_err }, 127 + }, 128 + }; 129 + char buf[64]; 130 + 131 + for (int t = 0; t < 2; t++) { 132 + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { 133 + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { 134 + snprintf(buf, sizeof(buf), "%s_with_%s", 135 + mains[t][i].to_be_replaced, replacements[t][j].func); 136 + if (!test__start_subtest(buf)) 137 + continue; 138 + test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func, 139 + mains[t][i].has_side_effect || !replacements[t][j].has_side_effect, 140 + replacements[t][j].err_msg); 141 + } 142 + } 143 + } 144 + }

+1

tools/testing/selftests/bpf/prog_tests/tailcalls.c

··· 1600 1600 goto out; 1601 1601 1602 1602 err = bpf_link__destroy(freplace_link); 1603 + freplace_link = NULL; 1603 1604 if (!ASSERT_OK(err, "destroy link")) 1604 1605 goto out; 1605 1606

+14 -14

tools/testing/selftests/bpf/prog_tests/tc_links.c

··· 13 13 #include "netlink_helpers.h" 14 14 #include "tc_helpers.h" 15 15 16 - void serial_test_tc_links_basic(void) 16 + void test_ns_tc_links_basic(void) 17 17 { 18 18 LIBBPF_OPTS(bpf_prog_query_opts, optq); 19 19 LIBBPF_OPTS(bpf_tcx_opts, optl); ··· 260 260 assert_mprog_count(target, 0); 261 261 } 262 262 263 - void serial_test_tc_links_before(void) 263 + void test_ns_tc_links_before(void) 264 264 { 265 265 test_tc_links_before_target(BPF_TCX_INGRESS); 266 266 test_tc_links_before_target(BPF_TCX_EGRESS); ··· 414 414 assert_mprog_count(target, 0); 415 415 } 416 416 417 - void serial_test_tc_links_after(void) 417 + void test_ns_tc_links_after(void) 418 418 { 419 419 test_tc_links_after_target(BPF_TCX_INGRESS); 420 420 test_tc_links_after_target(BPF_TCX_EGRESS); ··· 514 514 assert_mprog_count(target, 0); 515 515 } 516 516 517 - void serial_test_tc_links_revision(void) 517 + void test_ns_tc_links_revision(void) 518 518 { 519 519 test_tc_links_revision_target(BPF_TCX_INGRESS); 520 520 test_tc_links_revision_target(BPF_TCX_EGRESS); ··· 618 618 assert_mprog_count(target, 0); 619 619 } 620 620 621 - void serial_test_tc_links_chain_classic(void) 621 + void test_ns_tc_links_chain_classic(void) 622 622 { 623 623 test_tc_chain_classic(BPF_TCX_INGRESS, false); 624 624 test_tc_chain_classic(BPF_TCX_EGRESS, false); ··· 846 846 assert_mprog_count(target, 0); 847 847 } 848 848 849 - void serial_test_tc_links_replace(void) 849 + void test_ns_tc_links_replace(void) 850 850 { 851 851 test_tc_links_replace_target(BPF_TCX_INGRESS); 852 852 test_tc_links_replace_target(BPF_TCX_EGRESS); ··· 1158 1158 assert_mprog_count(target, 0); 1159 1159 } 1160 1160 1161 - void serial_test_tc_links_invalid(void) 1161 + void test_ns_tc_links_invalid(void) 1162 1162 { 1163 1163 test_tc_links_invalid_target(BPF_TCX_INGRESS); 1164 1164 test_tc_links_invalid_target(BPF_TCX_EGRESS); ··· 1314 1314 assert_mprog_count(target, 0); 1315 1315 } 1316 1316 1317 - void serial_test_tc_links_prepend(void) 1317 + void test_ns_tc_links_prepend(void) 1318 1318 { 1319 1319 test_tc_links_prepend_target(BPF_TCX_INGRESS); 1320 1320 test_tc_links_prepend_target(BPF_TCX_EGRESS); ··· 1470 1470 assert_mprog_count(target, 0); 1471 1471 } 1472 1472 1473 - void serial_test_tc_links_append(void) 1473 + void test_ns_tc_links_append(void) 1474 1474 { 1475 1475 test_tc_links_append_target(BPF_TCX_INGRESS); 1476 1476 test_tc_links_append_target(BPF_TCX_EGRESS); ··· 1568 1568 ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); 1569 1569 } 1570 1570 1571 - void serial_test_tc_links_dev_cleanup(void) 1571 + void test_ns_tc_links_dev_cleanup(void) 1572 1572 { 1573 1573 test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS); 1574 1574 test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS); ··· 1672 1672 test_tc_link__destroy(skel); 1673 1673 } 1674 1674 1675 - void serial_test_tc_links_chain_mixed(void) 1675 + void test_ns_tc_links_chain_mixed(void) 1676 1676 { 1677 1677 test_tc_chain_mixed(BPF_TCX_INGRESS); 1678 1678 test_tc_chain_mixed(BPF_TCX_EGRESS); ··· 1782 1782 assert_mprog_count(target, 0); 1783 1783 } 1784 1784 1785 - void serial_test_tc_links_ingress(void) 1785 + void test_ns_tc_links_ingress(void) 1786 1786 { 1787 1787 test_tc_links_ingress(BPF_TCX_INGRESS, true, true); 1788 1788 test_tc_links_ingress(BPF_TCX_INGRESS, true, false); ··· 1823 1823 return err; 1824 1824 } 1825 1825 1826 - void serial_test_tc_links_dev_chain0(void) 1826 + void test_ns_tc_links_dev_chain0(void) 1827 1827 { 1828 1828 int err, ifindex; 1829 1829 ··· 1955 1955 ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); 1956 1956 } 1957 1957 1958 - void serial_test_tc_links_dev_mixed(void) 1958 + void test_ns_tc_links_dev_mixed(void) 1959 1959 { 1960 1960 test_tc_links_dev_mixed(BPF_TCX_INGRESS); 1961 1961 test_tc_links_dev_mixed(BPF_TCX_EGRESS);

+20 -20

tools/testing/selftests/bpf/prog_tests/tc_opts.c

··· 10 10 #include "test_tc_link.skel.h" 11 11 #include "tc_helpers.h" 12 12 13 - void serial_test_tc_opts_basic(void) 13 + void test_ns_tc_opts_basic(void) 14 14 { 15 15 LIBBPF_OPTS(bpf_prog_attach_opts, opta); 16 16 LIBBPF_OPTS(bpf_prog_detach_opts, optd); ··· 254 254 test_tc_link__destroy(skel); 255 255 } 256 256 257 - void serial_test_tc_opts_before(void) 257 + void test_ns_tc_opts_before(void) 258 258 { 259 259 test_tc_opts_before_target(BPF_TCX_INGRESS); 260 260 test_tc_opts_before_target(BPF_TCX_EGRESS); ··· 445 445 test_tc_link__destroy(skel); 446 446 } 447 447 448 - void serial_test_tc_opts_after(void) 448 + void test_ns_tc_opts_after(void) 449 449 { 450 450 test_tc_opts_after_target(BPF_TCX_INGRESS); 451 451 test_tc_opts_after_target(BPF_TCX_EGRESS); ··· 554 554 test_tc_link__destroy(skel); 555 555 } 556 556 557 - void serial_test_tc_opts_revision(void) 557 + void test_ns_tc_opts_revision(void) 558 558 { 559 559 test_tc_opts_revision_target(BPF_TCX_INGRESS); 560 560 test_tc_opts_revision_target(BPF_TCX_EGRESS); ··· 655 655 assert_mprog_count(target, 0); 656 656 } 657 657 658 - void serial_test_tc_opts_chain_classic(void) 658 + void test_ns_tc_opts_chain_classic(void) 659 659 { 660 660 test_tc_chain_classic(BPF_TCX_INGRESS, false); 661 661 test_tc_chain_classic(BPF_TCX_EGRESS, false); ··· 864 864 test_tc_link__destroy(skel); 865 865 } 866 866 867 - void serial_test_tc_opts_replace(void) 867 + void test_ns_tc_opts_replace(void) 868 868 { 869 869 test_tc_opts_replace_target(BPF_TCX_INGRESS); 870 870 test_tc_opts_replace_target(BPF_TCX_EGRESS); ··· 1017 1017 test_tc_link__destroy(skel); 1018 1018 } 1019 1019 1020 - void serial_test_tc_opts_invalid(void) 1020 + void test_ns_tc_opts_invalid(void) 1021 1021 { 1022 1022 test_tc_opts_invalid_target(BPF_TCX_INGRESS); 1023 1023 test_tc_opts_invalid_target(BPF_TCX_EGRESS); ··· 1157 1157 test_tc_link__destroy(skel); 1158 1158 } 1159 1159 1160 - void serial_test_tc_opts_prepend(void) 1160 + void test_ns_tc_opts_prepend(void) 1161 1161 { 1162 1162 test_tc_opts_prepend_target(BPF_TCX_INGRESS); 1163 1163 test_tc_opts_prepend_target(BPF_TCX_EGRESS); ··· 1297 1297 test_tc_link__destroy(skel); 1298 1298 } 1299 1299 1300 - void serial_test_tc_opts_append(void) 1300 + void test_ns_tc_opts_append(void) 1301 1301 { 1302 1302 test_tc_opts_append_target(BPF_TCX_INGRESS); 1303 1303 test_tc_opts_append_target(BPF_TCX_EGRESS); ··· 1387 1387 ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); 1388 1388 } 1389 1389 1390 - void serial_test_tc_opts_dev_cleanup(void) 1390 + void test_ns_tc_opts_dev_cleanup(void) 1391 1391 { 1392 1392 test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS); 1393 1393 test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS); ··· 1563 1563 assert_mprog_count(target, 0); 1564 1564 } 1565 1565 1566 - void serial_test_tc_opts_mixed(void) 1566 + void test_ns_tc_opts_mixed(void) 1567 1567 { 1568 1568 test_tc_opts_mixed_target(BPF_TCX_INGRESS); 1569 1569 test_tc_opts_mixed_target(BPF_TCX_EGRESS); ··· 1642 1642 assert_mprog_count(target, 0); 1643 1643 } 1644 1644 1645 - void serial_test_tc_opts_demixed(void) 1645 + void test_ns_tc_opts_demixed(void) 1646 1646 { 1647 1647 test_tc_opts_demixed_target(BPF_TCX_INGRESS); 1648 1648 test_tc_opts_demixed_target(BPF_TCX_EGRESS); ··· 1813 1813 test_tc_link__destroy(skel); 1814 1814 } 1815 1815 1816 - void serial_test_tc_opts_detach(void) 1816 + void test_ns_tc_opts_detach(void) 1817 1817 { 1818 1818 test_tc_opts_detach_target(BPF_TCX_INGRESS); 1819 1819 test_tc_opts_detach_target(BPF_TCX_EGRESS); ··· 2020 2020 test_tc_link__destroy(skel); 2021 2021 } 2022 2022 2023 - void serial_test_tc_opts_detach_before(void) 2023 + void test_ns_tc_opts_detach_before(void) 2024 2024 { 2025 2025 test_tc_opts_detach_before_target(BPF_TCX_INGRESS); 2026 2026 test_tc_opts_detach_before_target(BPF_TCX_EGRESS); ··· 2236 2236 test_tc_link__destroy(skel); 2237 2237 } 2238 2238 2239 - void serial_test_tc_opts_detach_after(void) 2239 + void test_ns_tc_opts_detach_after(void) 2240 2240 { 2241 2241 test_tc_opts_detach_after_target(BPF_TCX_INGRESS); 2242 2242 test_tc_opts_detach_after_target(BPF_TCX_EGRESS); ··· 2265 2265 assert_mprog_count(target, 0); 2266 2266 } 2267 2267 2268 - void serial_test_tc_opts_delete_empty(void) 2268 + void test_ns_tc_opts_delete_empty(void) 2269 2269 { 2270 2270 test_tc_opts_delete_empty(BPF_TCX_INGRESS, false); 2271 2271 test_tc_opts_delete_empty(BPF_TCX_EGRESS, false); ··· 2372 2372 test_tc_link__destroy(skel); 2373 2373 } 2374 2374 2375 - void serial_test_tc_opts_chain_mixed(void) 2375 + void test_ns_tc_opts_chain_mixed(void) 2376 2376 { 2377 2377 test_tc_chain_mixed(BPF_TCX_INGRESS); 2378 2378 test_tc_chain_mixed(BPF_TCX_EGRESS); ··· 2446 2446 ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); 2447 2447 } 2448 2448 2449 - void serial_test_tc_opts_max(void) 2449 + void test_ns_tc_opts_max(void) 2450 2450 { 2451 2451 test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); 2452 2452 test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); ··· 2748 2748 test_tc_link__destroy(skel); 2749 2749 } 2750 2750 2751 - void serial_test_tc_opts_query(void) 2751 + void test_ns_tc_opts_query(void) 2752 2752 { 2753 2753 test_tc_opts_query_target(BPF_TCX_INGRESS); 2754 2754 test_tc_opts_query_target(BPF_TCX_EGRESS); ··· 2807 2807 test_tc_link__destroy(skel); 2808 2808 } 2809 2809 2810 - void serial_test_tc_opts_query_attach(void) 2810 + void test_ns_tc_opts_query_attach(void) 2811 2811 { 2812 2812 test_tc_opts_query_attach_target(BPF_TCX_INGRESS); 2813 2813 test_tc_opts_query_attach_target(BPF_TCX_EGRESS);

+16

tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c

··· 1 + #include <test_progs.h> 2 + 3 + #include "struct_ops_kptr_return.skel.h" 4 + #include "struct_ops_kptr_return_fail__wrong_type.skel.h" 5 + #include "struct_ops_kptr_return_fail__invalid_scalar.skel.h" 6 + #include "struct_ops_kptr_return_fail__nonzero_offset.skel.h" 7 + #include "struct_ops_kptr_return_fail__local_kptr.skel.h" 8 + 9 + void test_struct_ops_kptr_return(void) 10 + { 11 + RUN_TESTS(struct_ops_kptr_return); 12 + RUN_TESTS(struct_ops_kptr_return_fail__wrong_type); 13 + RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar); 14 + RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset); 15 + RUN_TESTS(struct_ops_kptr_return_fail__local_kptr); 16 + }

+14

tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c

··· 1 + #include <test_progs.h> 2 + 3 + #include "struct_ops_refcounted.skel.h" 4 + #include "struct_ops_refcounted_fail__ref_leak.skel.h" 5 + #include "struct_ops_refcounted_fail__global_subprog.skel.h" 6 + #include "struct_ops_refcounted_fail__tail_call.skel.h" 7 + 8 + void test_struct_ops_refcounted(void) 9 + { 10 + RUN_TESTS(struct_ops_refcounted); 11 + RUN_TESTS(struct_ops_refcounted_fail__ref_leak); 12 + RUN_TESTS(struct_ops_refcounted_fail__global_subprog); 13 + RUN_TESTS(struct_ops_refcounted_fail__tail_call); 14 + }

+530 -103

tools/testing/selftests/bpf/prog_tests/test_tunnel.c

··· 71 71 #define IP4_ADDR2_VETH1 "172.16.1.20" 72 72 #define IP4_ADDR_TUNL_DEV0 "10.1.1.100" 73 73 #define IP4_ADDR_TUNL_DEV1 "10.1.1.200" 74 + #define IP6_ADDR_TUNL_DEV0 "fc80::100" 75 + #define IP6_ADDR_TUNL_DEV1 "fc80::200" 74 76 75 77 #define IP6_ADDR_VETH0 "::11" 76 78 #define IP6_ADDR1_VETH1 "::22" ··· 99 97 #define XFRM_ENC "0x22222222222222222222222222222222" 100 98 #define XFRM_SPI_IN_TO_OUT 0x1 101 99 #define XFRM_SPI_OUT_TO_IN 0x2 100 + 101 + #define GRE_TUNL_DEV0 "gre00" 102 + #define GRE_TUNL_DEV1 "gre11" 103 + 104 + #define IP6GRE_TUNL_DEV0 "ip6gre00" 105 + #define IP6GRE_TUNL_DEV1 "ip6gre11" 106 + 107 + #define ERSPAN_TUNL_DEV0 "erspan00" 108 + #define ERSPAN_TUNL_DEV1 "erspan11" 109 + 110 + #define IP6ERSPAN_TUNL_DEV0 "ip6erspan00" 111 + #define IP6ERSPAN_TUNL_DEV1 "ip6erspan11" 112 + 113 + #define GENEVE_TUNL_DEV0 "geneve00" 114 + #define GENEVE_TUNL_DEV1 "geneve11" 115 + 116 + #define IP6GENEVE_TUNL_DEV0 "ip6geneve00" 117 + #define IP6GENEVE_TUNL_DEV1 "ip6geneve11" 118 + 119 + #define IP6TNL_TUNL_DEV0 "ip6tnl00" 120 + #define IP6TNL_TUNL_DEV1 "ip6tnl11" 102 121 103 122 #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" 104 123 ··· 237 214 return 0; 238 215 fail: 239 216 return -1; 217 + } 218 + 219 + static int set_ipv4_addr(const char *dev0, const char *dev1) 220 + { 221 + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); 222 + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); 223 + SYS(fail, "ip link set dev %s up", dev1); 224 + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); 225 + 226 + return 0; 227 + fail: 228 + return 1; 240 229 } 241 230 242 231 static int add_ipip_tunnel(enum ipip_encap encap) ··· 391 356 IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); 392 357 } 393 358 359 + static int add_ipv4_tunnel(const char *dev0, const char *dev1, 360 + const char *type, const char *opt) 361 + { 362 + if (!type || !opt || !dev0 || !dev1) 363 + return -1; 364 + 365 + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", 366 + dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); 367 + 368 + SYS(fail, "ip link add dev %s type %s external", dev1, type); 369 + 370 + return set_ipv4_addr(dev0, dev1); 371 + fail: 372 + return -1; 373 + } 374 + 375 + static void delete_tunnel(const char *dev0, const char *dev1) 376 + { 377 + if (!dev0 || !dev1) 378 + return; 379 + 380 + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0); 381 + SYS_NOFAIL("ip link delete dev %s", dev1); 382 + } 383 + 384 + static int set_ipv6_addr(const char *dev0, const char *dev1) 385 + { 386 + /* disable IPv6 DAD because it might take too long and fail tests */ 387 + SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0); 388 + SYS(fail, "ip -n at_ns0 link set dev veth0 up"); 389 + SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1); 390 + SYS(fail, "ip link set dev veth1 up"); 391 + 392 + SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0); 393 + SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0); 394 + SYS(fail, "ip -n at_ns0 link set dev %s up", dev0); 395 + 396 + SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1); 397 + SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1); 398 + SYS(fail, "ip link set dev %s up", dev1); 399 + return 0; 400 + fail: 401 + return 1; 402 + } 403 + 404 + static int add_ipv6_tunnel(const char *dev0, const char *dev1, 405 + const char *type, const char *opt) 406 + { 407 + if (!type || !opt || !dev0 || !dev1) 408 + return -1; 409 + 410 + SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s", 411 + dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1); 412 + 413 + SYS(fail, "ip link add dev %s type %s external", dev1, type); 414 + 415 + return set_ipv6_addr(dev0, dev1); 416 + fail: 417 + return -1; 418 + } 419 + 420 + static int add_geneve_tunnel(const char *dev0, const char *dev1, 421 + const char *type, const char *opt) 422 + { 423 + if (!type || !opt || !dev0 || !dev1) 424 + return -1; 425 + 426 + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s", 427 + dev0, type, opt, IP4_ADDR1_VETH1); 428 + 429 + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); 430 + 431 + return set_ipv4_addr(dev0, dev1); 432 + fail: 433 + return -1; 434 + } 435 + 436 + static int add_ip6geneve_tunnel(const char *dev0, const char *dev1, 437 + const char *type, const char *opt) 438 + { 439 + if (!type || !opt || !dev0 || !dev1) 440 + return -1; 441 + 442 + SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s", 443 + dev0, type, opt, IP6_ADDR1_VETH1); 444 + 445 + SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt); 446 + 447 + return set_ipv6_addr(dev0, dev1); 448 + fail: 449 + return -1; 450 + } 451 + 394 452 static int test_ping(int family, const char *addr) 395 453 { 396 454 SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); ··· 492 364 return -1; 493 365 } 494 366 495 - static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) 367 + static void ping_dev0(void) 496 368 { 369 + /* ping from root namespace test */ 370 + test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); 371 + } 372 + 373 + static void ping_dev1(void) 374 + { 375 + struct nstoken *nstoken; 376 + 377 + /* ping from at_ns0 namespace test */ 378 + nstoken = open_netns("at_ns0"); 379 + if (!ASSERT_OK_PTR(nstoken, "setns")) 380 + return; 381 + 382 + test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); 383 + close_netns(nstoken); 384 + } 385 + 386 + static void ping6_veth0(void) 387 + { 388 + test_ping(AF_INET6, IP6_ADDR_VETH0); 389 + } 390 + 391 + static void ping6_dev0(void) 392 + { 393 + test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0); 394 + } 395 + 396 + static void ping6_dev1(void) 397 + { 398 + struct nstoken *nstoken; 399 + 400 + /* ping from at_ns0 namespace test */ 401 + nstoken = open_netns("at_ns0"); 402 + if (!ASSERT_OK_PTR(nstoken, "setns")) 403 + return; 404 + 405 + test_ping(AF_INET, IP6_ADDR_TUNL_DEV1); 406 + close_netns(nstoken); 407 + } 408 + 409 + static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd) 410 + { 411 + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, 412 + .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); 497 413 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, 498 414 .priority = 1, .prog_fd = igr_fd); 499 415 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, 500 416 .priority = 1, .prog_fd = egr_fd); 501 417 int ret; 502 418 503 - ret = bpf_tc_hook_create(hook); 419 + ret = bpf_tc_hook_create(&hook); 504 420 if (!ASSERT_OK(ret, "create tc hook")) 505 421 return ret; 506 422 507 423 if (igr_fd >= 0) { 508 - hook->attach_point = BPF_TC_INGRESS; 509 - ret = bpf_tc_attach(hook, &opts1); 424 + hook.attach_point = BPF_TC_INGRESS; 425 + ret = bpf_tc_attach(&hook, &opts1); 510 426 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 511 - bpf_tc_hook_destroy(hook); 427 + bpf_tc_hook_destroy(&hook); 512 428 return ret; 513 429 } 514 430 } 515 431 516 432 if (egr_fd >= 0) { 517 - hook->attach_point = BPF_TC_EGRESS; 518 - ret = bpf_tc_attach(hook, &opts2); 433 + hook.attach_point = BPF_TC_EGRESS; 434 + ret = bpf_tc_attach(&hook, &opts2); 519 435 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 520 - bpf_tc_hook_destroy(hook); 436 + bpf_tc_hook_destroy(&hook); 521 437 return ret; 522 438 } 523 439 } 524 440 525 441 return 0; 442 + } 443 + 444 + static int generic_attach(const char *dev, int igr_fd, int egr_fd) 445 + { 446 + int ifindex; 447 + 448 + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) 449 + return -1; 450 + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) 451 + return -1; 452 + 453 + ifindex = if_nametoindex(dev); 454 + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) 455 + return -1; 456 + 457 + return attach_tc_prog(ifindex, igr_fd, egr_fd); 458 + } 459 + 460 + static int generic_attach_igr(const char *dev, int igr_fd) 461 + { 462 + int ifindex; 463 + 464 + if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) 465 + return -1; 466 + 467 + ifindex = if_nametoindex(dev); 468 + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) 469 + return -1; 470 + 471 + return attach_tc_prog(ifindex, igr_fd, -1); 472 + } 473 + 474 + static int generic_attach_egr(const char *dev, int egr_fd) 475 + { 476 + int ifindex; 477 + 478 + if (!ASSERT_OK_FD(egr_fd, "check egress fd")) 479 + return -1; 480 + 481 + ifindex = if_nametoindex(dev); 482 + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) 483 + return -1; 484 + 485 + return attach_tc_prog(ifindex, -1, egr_fd); 526 486 } 527 487 528 488 static void test_vxlan_tunnel(void) ··· 620 404 int local_ip_map_fd = -1; 621 405 int set_src_prog_fd, get_src_prog_fd; 622 406 int set_dst_prog_fd; 623 - int key = 0, ifindex = -1; 407 + int key = 0; 624 408 uint local_ip; 625 409 int err; 626 - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, 627 - .attach_point = BPF_TC_INGRESS); 628 410 629 411 /* add vxlan tunnel */ 630 412 err = add_vxlan_tunnel(); ··· 633 419 skel = test_tunnel_kern__open_and_load(); 634 420 if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 635 421 goto done; 636 - ifindex = if_nametoindex(VXLAN_TUNL_DEV1); 637 - if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) 638 - goto done; 639 - tc_hook.ifindex = ifindex; 640 422 get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); 641 423 set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); 642 - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) 643 - goto done; 644 - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) 645 - goto done; 646 - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) 424 + if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) 647 425 goto done; 648 426 649 427 /* load and attach bpf prog to veth dev tc hook point */ 650 - ifindex = if_nametoindex("veth1"); 651 - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) 652 - goto done; 653 - tc_hook.ifindex = ifindex; 654 428 set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); 655 - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) 656 - goto done; 657 - if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) 429 + if (generic_attach_igr("veth1", set_dst_prog_fd)) 658 430 goto done; 659 431 660 432 /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ 661 433 nstoken = open_netns("at_ns0"); 662 434 if (!ASSERT_OK_PTR(nstoken, "setns src")) 663 435 goto done; 664 - ifindex = if_nametoindex(VXLAN_TUNL_DEV0); 665 - if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) 666 - goto done; 667 - tc_hook.ifindex = ifindex; 668 436 set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); 669 - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) 670 - goto done; 671 - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) 437 + if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd)) 672 438 goto done; 673 439 close_netns(nstoken); 674 440 ··· 662 468 goto done; 663 469 664 470 /* ping test */ 665 - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); 666 - if (!ASSERT_OK(err, "test_ping")) 667 - goto done; 471 + ping_dev0(); 668 472 669 473 done: 670 474 /* delete vxlan tunnel */ ··· 680 488 int local_ip_map_fd = -1; 681 489 int set_src_prog_fd, get_src_prog_fd; 682 490 int set_dst_prog_fd; 683 - int key = 0, ifindex = -1; 491 + int key = 0; 684 492 uint local_ip; 685 493 int err; 686 - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, 687 - .attach_point = BPF_TC_INGRESS); 688 494 689 495 /* add vxlan tunnel */ 690 496 err = add_ip6vxlan_tunnel(); ··· 693 503 skel = test_tunnel_kern__open_and_load(); 694 504 if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 695 505 goto done; 696 - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); 697 - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) 698 - goto done; 699 - tc_hook.ifindex = ifindex; 700 506 get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); 701 507 set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); 702 - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) 703 - goto done; 704 - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) 705 - goto done; 706 - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) 508 + if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) 707 509 goto done; 708 510 709 511 /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ 710 512 nstoken = open_netns("at_ns0"); 711 513 if (!ASSERT_OK_PTR(nstoken, "setns src")) 712 514 goto done; 713 - ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); 714 - if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) 715 - goto done; 716 - tc_hook.ifindex = ifindex; 717 515 set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); 718 - if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) 719 - goto done; 720 - if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) 516 + if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd)) 721 517 goto done; 722 518 close_netns(nstoken); 723 519 ··· 717 541 goto done; 718 542 719 543 /* ping test */ 720 - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); 721 - if (!ASSERT_OK(err, "test_ping")) 722 - goto done; 544 + ping_dev0(); 723 545 724 546 done: 725 547 /* delete ipv6 vxlan tunnel */ ··· 731 557 static void test_ipip_tunnel(enum ipip_encap encap) 732 558 { 733 559 struct test_tunnel_kern *skel = NULL; 734 - struct nstoken *nstoken; 735 560 int set_src_prog_fd, get_src_prog_fd; 736 - int ifindex = -1; 737 561 int err; 738 - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, 739 - .attach_point = BPF_TC_INGRESS); 740 562 741 563 /* add ipip tunnel */ 742 564 err = add_ipip_tunnel(encap); ··· 743 573 skel = test_tunnel_kern__open_and_load(); 744 574 if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 745 575 goto done; 746 - ifindex = if_nametoindex(IPIP_TUNL_DEV1); 747 - if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex")) 748 - goto done; 749 - tc_hook.ifindex = ifindex; 750 576 751 577 switch (encap) { 752 578 case FOU: ··· 764 598 skel->progs.ipip_set_tunnel); 765 599 } 766 600 767 - if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) 768 - goto done; 769 - if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) 770 - goto done; 771 - if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) 601 + if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) 772 602 goto done; 773 603 774 - /* ping from root namespace test */ 775 - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); 776 - if (!ASSERT_OK(err, "test_ping")) 777 - goto done; 778 - 779 - /* ping from at_ns0 namespace test */ 780 - nstoken = open_netns("at_ns0"); 781 - if (!ASSERT_OK_PTR(nstoken, "setns")) 782 - goto done; 783 - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); 784 - if (!ASSERT_OK(err, "test_ping")) 785 - goto done; 786 - close_netns(nstoken); 604 + ping_dev0(); 605 + ping_dev1(); 787 606 788 607 done: 789 608 /* delete ipip tunnel */ ··· 779 628 780 629 static void test_xfrm_tunnel(void) 781 630 { 782 - DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, 783 - .attach_point = BPF_TC_INGRESS); 784 631 LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 785 632 struct test_tunnel_kern *skel = NULL; 786 - struct nstoken *nstoken; 787 633 int xdp_prog_fd; 788 634 int tc_prog_fd; 789 635 int ifindex; ··· 794 646 if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 795 647 goto done; 796 648 797 - ifindex = if_nametoindex("veth1"); 798 - if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) 799 - goto done; 800 649 801 650 /* attach tc prog to tunnel dev */ 802 - tc_hook.ifindex = ifindex; 803 651 tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); 804 - if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) 805 - goto done; 806 - if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) 652 + if (generic_attach_igr("veth1", tc_prog_fd)) 807 653 goto done; 808 654 809 655 /* attach xdp prog to tunnel dev */ 656 + ifindex = if_nametoindex("veth1"); 657 + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) 658 + goto done; 810 659 xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); 811 660 if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) 812 661 goto done; ··· 811 666 if (!ASSERT_OK(err, "bpf_xdp_attach")) 812 667 goto done; 813 668 814 - /* ping from at_ns0 namespace test */ 815 - nstoken = open_netns("at_ns0"); 816 - if (!ASSERT_OK_PTR(nstoken, "setns")) 817 - goto done; 818 - err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); 819 - close_netns(nstoken); 820 - if (!ASSERT_OK(err, "test_ping")) 821 - goto done; 669 + ping_dev1(); 822 670 823 671 if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) 824 672 goto done; ··· 826 688 delete_xfrm_tunnel(); 827 689 if (skel) 828 690 test_tunnel_kern__destroy(skel); 691 + } 692 + 693 + enum gre_test { 694 + GRE, 695 + GRE_NOKEY, 696 + GRETAP, 697 + GRETAP_NOKEY, 698 + }; 699 + 700 + static void test_gre_tunnel(enum gre_test test) 701 + { 702 + struct test_tunnel_kern *skel; 703 + int set_fd, get_fd; 704 + int err; 705 + 706 + skel = test_tunnel_kern__open_and_load(); 707 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 708 + return; 709 + 710 + switch (test) { 711 + case GRE: 712 + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq"); 713 + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); 714 + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); 715 + break; 716 + case GRE_NOKEY: 717 + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2"); 718 + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); 719 + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); 720 + break; 721 + case GRETAP: 722 + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq"); 723 + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key); 724 + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); 725 + break; 726 + case GRETAP_NOKEY: 727 + err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2"); 728 + set_fd = bpf_program__fd(skel->progs.gre_set_tunnel); 729 + get_fd = bpf_program__fd(skel->progs.gre_get_tunnel); 730 + break; 731 + } 732 + if (!ASSERT_OK(err, "add tunnel")) 733 + goto done; 734 + 735 + if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd)) 736 + goto done; 737 + 738 + ping_dev0(); 739 + ping_dev1(); 740 + 741 + done: 742 + delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1); 743 + test_tunnel_kern__destroy(skel); 744 + } 745 + 746 + enum ip6gre_test { 747 + IP6GRE, 748 + IP6GRETAP 749 + }; 750 + 751 + static void test_ip6gre_tunnel(enum ip6gre_test test) 752 + { 753 + struct test_tunnel_kern *skel; 754 + int set_fd, get_fd; 755 + int err; 756 + 757 + skel = test_tunnel_kern__open_and_load(); 758 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 759 + return; 760 + 761 + switch (test) { 762 + case IP6GRE: 763 + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, 764 + "ip6gre", "flowlabel 0xbcdef key 2"); 765 + break; 766 + case IP6GRETAP: 767 + err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1, 768 + "ip6gretap", "flowlabel 0xbcdef key 2"); 769 + break; 770 + } 771 + if (!ASSERT_OK(err, "add tunnel")) 772 + goto done; 773 + 774 + set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel); 775 + get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel); 776 + if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) 777 + goto done; 778 + 779 + ping6_veth0(); 780 + ping6_dev1(); 781 + ping_dev0(); 782 + ping_dev1(); 783 + done: 784 + delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1); 785 + test_tunnel_kern__destroy(skel); 786 + } 787 + 788 + enum erspan_test { 789 + V1, 790 + V2 791 + }; 792 + 793 + static void test_erspan_tunnel(enum erspan_test test) 794 + { 795 + struct test_tunnel_kern *skel; 796 + int set_fd, get_fd; 797 + int err; 798 + 799 + skel = test_tunnel_kern__open_and_load(); 800 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 801 + return; 802 + 803 + switch (test) { 804 + case V1: 805 + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, 806 + "erspan", "seq key 2 erspan_ver 1 erspan 123"); 807 + break; 808 + case V2: 809 + err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1, 810 + "erspan", 811 + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3"); 812 + break; 813 + } 814 + if (!ASSERT_OK(err, "add tunnel")) 815 + goto done; 816 + 817 + set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel); 818 + get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel); 819 + if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) 820 + goto done; 821 + 822 + ping_dev0(); 823 + ping_dev1(); 824 + done: 825 + delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1); 826 + test_tunnel_kern__destroy(skel); 827 + } 828 + 829 + static void test_ip6erspan_tunnel(enum erspan_test test) 830 + { 831 + struct test_tunnel_kern *skel; 832 + int set_fd, get_fd; 833 + int err; 834 + 835 + skel = test_tunnel_kern__open_and_load(); 836 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 837 + return; 838 + 839 + switch (test) { 840 + case V1: 841 + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, 842 + "ip6erspan", "seq key 2 erspan_ver 1 erspan 123"); 843 + break; 844 + case V2: 845 + err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1, 846 + "ip6erspan", 847 + "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7"); 848 + break; 849 + } 850 + if (!ASSERT_OK(err, "add tunnel")) 851 + goto done; 852 + 853 + set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel); 854 + get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel); 855 + if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) 856 + goto done; 857 + 858 + ping6_veth0(); 859 + ping_dev1(); 860 + done: 861 + delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1); 862 + test_tunnel_kern__destroy(skel); 863 + } 864 + 865 + static void test_geneve_tunnel(void) 866 + { 867 + struct test_tunnel_kern *skel; 868 + int set_fd, get_fd; 869 + int err; 870 + 871 + skel = test_tunnel_kern__open_and_load(); 872 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 873 + return; 874 + 875 + err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1, 876 + "geneve", "dstport 6081"); 877 + if (!ASSERT_OK(err, "add tunnel")) 878 + goto done; 879 + 880 + set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel); 881 + get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel); 882 + if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) 883 + goto done; 884 + 885 + ping_dev0(); 886 + ping_dev1(); 887 + done: 888 + delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1); 889 + test_tunnel_kern__destroy(skel); 890 + } 891 + 892 + static void test_ip6geneve_tunnel(void) 893 + { 894 + struct test_tunnel_kern *skel; 895 + int set_fd, get_fd; 896 + int err; 897 + 898 + skel = test_tunnel_kern__open_and_load(); 899 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 900 + return; 901 + 902 + err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1, 903 + "geneve", ""); 904 + if (!ASSERT_OK(err, "add tunnel")) 905 + goto done; 906 + 907 + set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel); 908 + get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel); 909 + if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) 910 + goto done; 911 + 912 + ping_dev0(); 913 + ping_dev1(); 914 + done: 915 + delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1); 916 + test_tunnel_kern__destroy(skel); 917 + } 918 + 919 + enum ip6tnl_test { 920 + IPIP6, 921 + IP6IP6 922 + }; 923 + 924 + static void test_ip6tnl_tunnel(enum ip6tnl_test test) 925 + { 926 + struct test_tunnel_kern *skel; 927 + int set_fd, get_fd; 928 + int err; 929 + 930 + skel = test_tunnel_kern__open_and_load(); 931 + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) 932 + return; 933 + 934 + err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", ""); 935 + if (!ASSERT_OK(err, "add tunnel")) 936 + goto done; 937 + 938 + switch (test) { 939 + case IPIP6: 940 + set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel); 941 + get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel); 942 + break; 943 + case IP6IP6: 944 + set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel); 945 + get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel); 946 + break; 947 + } 948 + if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) 949 + goto done; 950 + 951 + ping6_veth0(); 952 + switch (test) { 953 + case IPIP6: 954 + ping_dev0(); 955 + ping_dev1(); 956 + break; 957 + case IP6IP6: 958 + ping6_dev0(); 959 + ping6_dev1(); 960 + break; 961 + } 962 + 963 + done: 964 + delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1); 965 + test_tunnel_kern__destroy(skel); 829 966 } 830 967 831 968 #define RUN_TEST(name, ...) \ ··· 1120 707 RUN_TEST(ipip_tunnel, FOU); 1121 708 RUN_TEST(ipip_tunnel, GUE); 1122 709 RUN_TEST(xfrm_tunnel); 710 + RUN_TEST(gre_tunnel, GRE); 711 + RUN_TEST(gre_tunnel, GRE_NOKEY); 712 + RUN_TEST(gre_tunnel, GRETAP); 713 + RUN_TEST(gre_tunnel, GRETAP_NOKEY); 714 + RUN_TEST(ip6gre_tunnel, IP6GRE); 715 + RUN_TEST(ip6gre_tunnel, IP6GRETAP); 716 + RUN_TEST(erspan_tunnel, V1); 717 + RUN_TEST(erspan_tunnel, V2); 718 + RUN_TEST(ip6erspan_tunnel, V1); 719 + RUN_TEST(ip6erspan_tunnel, V2); 720 + RUN_TEST(geneve_tunnel); 721 + RUN_TEST(ip6geneve_tunnel); 722 + RUN_TEST(ip6tnl_tunnel, IPIP6); 723 + RUN_TEST(ip6tnl_tunnel, IP6IP6); 1123 724 1124 725 return NULL; 1125 726 }

+139

tools/testing/selftests/bpf/prog_tests/test_veristat.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include <string.h> 5 + #include <stdio.h> 6 + 7 + #define __CHECK_STR(str, name) \ 8 + do { \ 9 + if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \ 10 + goto out; \ 11 + } while (0) 12 + 13 + struct fixture { 14 + char tmpfile[80]; 15 + int fd; 16 + char *output; 17 + size_t sz; 18 + char veristat[80]; 19 + }; 20 + 21 + static struct fixture *init_fixture(void) 22 + { 23 + struct fixture *fix = malloc(sizeof(struct fixture)); 24 + 25 + /* for no_alu32 and cpuv4 veristat is in parent folder */ 26 + if (access("./veristat", F_OK) == 0) 27 + strcpy(fix->veristat, "./veristat"); 28 + else if (access("../veristat", F_OK) == 0) 29 + strcpy(fix->veristat, "../veristat"); 30 + else 31 + PRINT_FAIL("Can't find veristat binary"); 32 + 33 + snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX"); 34 + fix->fd = mkstemp(fix->tmpfile); 35 + fix->sz = 1000000; 36 + fix->output = malloc(fix->sz); 37 + return fix; 38 + } 39 + 40 + static void teardown_fixture(struct fixture *fix) 41 + { 42 + free(fix->output); 43 + close(fix->fd); 44 + remove(fix->tmpfile); 45 + free(fix); 46 + } 47 + 48 + static void test_set_global_vars_succeeds(void) 49 + { 50 + struct fixture *fix = init_fixture(); 51 + 52 + SYS(out, 53 + "%s set_global_vars.bpf.o"\ 54 + " -G \"var_s64 = 0xf000000000000001\" "\ 55 + " -G \"var_u64 = 0xfedcba9876543210\" "\ 56 + " -G \"var_s32 = -0x80000000\" "\ 57 + " -G \"var_u32 = 0x76543210\" "\ 58 + " -G \"var_s16 = -32768\" "\ 59 + " -G \"var_u16 = 60652\" "\ 60 + " -G \"var_s8 = -128\" "\ 61 + " -G \"var_u8 = 255\" "\ 62 + " -G \"var_ea = EA2\" "\ 63 + " -G \"var_eb = EB2\" "\ 64 + " -G \"var_ec = EC2\" "\ 65 + " -G \"var_b = 1\" "\ 66 + "-vl2 > %s", fix->veristat, fix->tmpfile); 67 + 68 + read(fix->fd, fix->output, fix->sz); 69 + __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); 70 + __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); 71 + __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000"); 72 + __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210"); 73 + __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); 74 + __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); 75 + __CHECK_STR("_w=128 ", "var_s8 = -128"); 76 + __CHECK_STR("_w=255 ", "var_u8 = 255"); 77 + __CHECK_STR("_w=11 ", "var_ea = EA2"); 78 + __CHECK_STR("_w=12 ", "var_eb = EB2"); 79 + __CHECK_STR("_w=13 ", "var_ec = EC2"); 80 + __CHECK_STR("_w=1 ", "var_b = 1"); 81 + 82 + out: 83 + teardown_fixture(fix); 84 + } 85 + 86 + static void test_set_global_vars_from_file_succeeds(void) 87 + { 88 + struct fixture *fix = init_fixture(); 89 + char input_file[80]; 90 + const char *vars = "var_s16 = -32768\nvar_u16 = 60652"; 91 + int fd; 92 + 93 + snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX"); 94 + fd = mkstemp(input_file); 95 + if (!ASSERT_GE(fd, 0, "valid fd")) 96 + goto out; 97 + 98 + write(fd, vars, strlen(vars)); 99 + syncfs(fd); 100 + SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s", 101 + fix->veristat, input_file, fix->tmpfile); 102 + read(fix->fd, fix->output, fix->sz); 103 + __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); 104 + __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); 105 + 106 + out: 107 + close(fd); 108 + remove(input_file); 109 + teardown_fixture(fix); 110 + } 111 + 112 + static void test_set_global_vars_out_of_range(void) 113 + { 114 + struct fixture *fix = init_fixture(); 115 + 116 + SYS_FAIL(out, 117 + "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s", 118 + fix->veristat, fix->tmpfile); 119 + 120 + read(fix->fd, fix->output, fix->sz); 121 + __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range"); 122 + 123 + out: 124 + teardown_fixture(fix); 125 + } 126 + 127 + void test_veristat(void) 128 + { 129 + if (test__start_subtest("set_global_vars_succeeds")) 130 + test_set_global_vars_succeeds(); 131 + 132 + if (test__start_subtest("set_global_vars_out_of_range")) 133 + test_set_global_vars_out_of_range(); 134 + 135 + if (test__start_subtest("set_global_vars_from_file_succeeds")) 136 + test_set_global_vars_from_file_succeeds(); 137 + } 138 + 139 + #undef __CHECK_STR

+517 -131

tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c

··· 3 3 /* Create 3 namespaces with 3 veth peers, and forward packets in-between using 4 4 * native XDP 5 5 * 6 - * XDP_TX 7 - * NS1(veth11) NS2(veth22) NS3(veth33) 8 - * | | | 9 - * | | | 10 - * (veth1, (veth2, (veth3, 11 - * id:111) id:122) id:133) 12 - * ^ | ^ | ^ | 13 - * | | XDP_REDIRECT | | XDP_REDIRECT | | 14 - * | ------------------ ------------------ | 15 - * ----------------------------------------- 16 - * XDP_REDIRECT 6 + * Network topology: 7 + * ---------- ---------- ---------- 8 + * | NS1 | | NS2 | | NS3 | 9 + * | veth11 | | veth22 | | veth33 | 10 + * ----|----- -----|---- -----|---- 11 + * | | | 12 + * ----|------------------|----------------|---- 13 + * | veth1 veth2 veth3 | 14 + * | | 15 + * | NSO | 16 + * --------------------------------------------- 17 + * 18 + * Test cases: 19 + * - [test_xdp_veth_redirect] : ping veth33 from veth11 20 + * 21 + * veth11 veth22 veth33 22 + * (XDP_PASS) (XDP_TX) (XDP_PASS) 23 + * | | | 24 + * | | | 25 + * veth1 veth2 veth3 26 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) 27 + * ^ | ^ | ^ | 28 + * | | | | | | 29 + * | ------------------ ------------------ | 30 + * ----------------------------------------- 31 + * 32 + * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11 33 + * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS 34 + * -> echo request received by all except veth11 35 + * - IPv4 ping : BPF_F_BROADCAST 36 + * -> echo request received by all veth 37 + * - [test_xdp_veth_egress]: 38 + * - all src mac should be the magic mac 39 + * 40 + * veth11 veth22 veth33 41 + * (XDP_PASS) (XDP_PASS) (XDP_PASS) 42 + * | | | 43 + * | | | 44 + * veth1 veth2 veth3 45 + * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT) 46 + * | ^ ^ 47 + * | | | 48 + * ---------------------------------------- 49 + * 17 50 */ 18 51 19 52 #define _GNU_SOURCE ··· 55 22 #include "network_helpers.h" 56 23 #include "xdp_dummy.skel.h" 57 24 #include "xdp_redirect_map.skel.h" 25 + #include "xdp_redirect_multi_kern.skel.h" 58 26 #include "xdp_tx.skel.h" 27 + #include <uapi/linux/if_link.h> 59 28 60 29 #define VETH_PAIRS_COUNT 3 61 - #define NS_SUFFIX_LEN 6 62 - #define VETH_NAME_MAX_LEN 16 30 + #define VETH_NAME_MAX_LEN 32 31 + #define IP_MAX_LEN 16 63 32 #define IP_SRC "10.1.1.11" 64 33 #define IP_DST "10.1.1.33" 65 - #define IP_CMD_MAX_LEN 128 66 - 67 - struct skeletons { 68 - struct xdp_dummy *xdp_dummy; 69 - struct xdp_tx *xdp_tx; 70 - struct xdp_redirect_map *xdp_redirect_maps; 71 - }; 34 + #define IP_NEIGH "10.1.1.253" 35 + #define PROG_NAME_MAX_LEN 128 36 + #define NS_NAME_MAX_LEN 32 72 37 73 38 struct veth_configuration { 74 39 char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */ 75 40 char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/ 76 - const char *namespace; /* Namespace for the remote veth */ 77 - char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */ 78 - char *remote_addr; /* IP address of the remote veth */ 41 + char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */ 42 + int next_veth; /* Local interface to redirect traffic to */ 43 + char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */ 79 44 }; 80 45 81 - static struct veth_configuration config[VETH_PAIRS_COUNT] = { 46 + struct net_configuration { 47 + char ns0_name[NS_NAME_MAX_LEN]; 48 + struct veth_configuration veth_cfg[VETH_PAIRS_COUNT]; 49 + }; 50 + 51 + static const struct net_configuration default_config = { 52 + .ns0_name = "ns0-", 82 53 { 83 - .local_veth = "veth1", 84 - .remote_veth = "veth11", 85 - .next_veth = "veth2", 86 - .remote_addr = IP_SRC, 87 - .namespace = "ns-veth11" 88 - }, 89 - { 90 - .local_veth = "veth2", 91 - .remote_veth = "veth22", 92 - .next_veth = "veth3", 93 - .remote_addr = NULL, 94 - .namespace = "ns-veth22" 95 - }, 96 - { 97 - .local_veth = "veth3", 98 - .remote_veth = "veth33", 99 - .next_veth = "veth1", 100 - .remote_addr = IP_DST, 101 - .namespace = "ns-veth33" 54 + { 55 + .local_veth = "veth1-", 56 + .remote_veth = "veth11", 57 + .next_veth = 1, 58 + .remote_addr = IP_SRC, 59 + .namespace = "ns-veth11-" 60 + }, 61 + { 62 + .local_veth = "veth2-", 63 + .remote_veth = "veth22", 64 + .next_veth = 2, 65 + .remote_addr = "", 66 + .namespace = "ns-veth22-" 67 + }, 68 + { 69 + .local_veth = "veth3-", 70 + .remote_veth = "veth33", 71 + .next_veth = 0, 72 + .remote_addr = IP_DST, 73 + .namespace = "ns-veth33-" 74 + } 102 75 } 103 76 }; 104 77 105 - static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index) 78 + struct prog_configuration { 79 + char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */ 80 + char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */ 81 + u32 local_flags; /* XDP flags to use on local_veth */ 82 + u32 remote_flags; /* XDP flags to use on remote_veth */ 83 + }; 84 + 85 + static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj, 86 + struct net_configuration *net_config, 87 + struct prog_configuration *prog, int index) 106 88 { 107 89 struct bpf_program *local_prog, *remote_prog; 108 - struct bpf_link **local_link, **remote_link; 109 90 struct nstoken *nstoken; 110 - struct bpf_link *link; 111 - int interface; 91 + int interface, ret, i; 112 92 113 - switch (index) { 114 - case 0: 115 - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0; 116 - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0; 117 - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; 118 - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; 119 - break; 120 - case 1: 121 - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1; 122 - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1; 123 - remote_prog = skeletons->xdp_tx->progs.xdp_tx; 124 - remote_link = &skeletons->xdp_tx->links.xdp_tx; 125 - break; 126 - case 2: 127 - local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2; 128 - local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2; 129 - remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog; 130 - remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog; 131 - break; 93 + for (i = 0; i < nb_obj; i++) { 94 + local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name); 95 + if (local_prog) 96 + break; 132 97 } 133 - interface = if_nametoindex(config[index].local_veth); 98 + if (!ASSERT_OK_PTR(local_prog, "find local program")) 99 + return -1; 100 + 101 + for (i = 0; i < nb_obj; i++) { 102 + remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name); 103 + if (remote_prog) 104 + break; 105 + } 106 + if (!ASSERT_OK_PTR(remote_prog, "find remote program")) 107 + return -1; 108 + 109 + interface = if_nametoindex(net_config->veth_cfg[index].local_veth); 134 110 if (!ASSERT_NEQ(interface, 0, "non zero interface index")) 135 111 return -1; 136 - link = bpf_program__attach_xdp(local_prog, interface); 137 - if (!ASSERT_OK_PTR(link, "attach xdp program to local veth")) 112 + 113 + ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog), 114 + prog[index].local_flags, NULL); 115 + if (!ASSERT_OK(ret, "attach xdp program to local veth")) 138 116 return -1; 139 - *local_link = link; 140 - nstoken = open_netns(config[index].namespace); 117 + 118 + nstoken = open_netns(net_config->veth_cfg[index].namespace); 141 119 if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace")) 142 120 return -1; 143 - interface = if_nametoindex(config[index].remote_veth); 121 + 122 + interface = if_nametoindex(net_config->veth_cfg[index].remote_veth); 144 123 if (!ASSERT_NEQ(interface, 0, "non zero interface index")) { 145 124 close_netns(nstoken); 146 125 return -1; 147 126 } 148 - link = bpf_program__attach_xdp(remote_prog, interface); 149 - *remote_link = link; 150 - close_netns(nstoken); 151 - if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth")) 152 - return -1; 153 127 128 + ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog), 129 + prog[index].remote_flags, NULL); 130 + if (!ASSERT_OK(ret, "attach xdp program to remote veth")) { 131 + close_netns(nstoken); 132 + return -1; 133 + } 134 + 135 + close_netns(nstoken); 154 136 return 0; 155 137 } 156 138 157 - static int configure_network(struct skeletons *skeletons) 139 + static int create_network(struct net_configuration *net_config) 158 140 { 159 - int interface_id; 160 - int map_fd; 161 - int err; 162 - int i = 0; 141 + struct nstoken *nstoken = NULL; 142 + int i, err; 163 143 164 - /* First create and configure all interfaces */ 165 - for (i = 0; i < VETH_PAIRS_COUNT; i++) { 166 - SYS(fail, "ip netns add %s", config[i].namespace); 167 - SYS(fail, "ip link add %s type veth peer name %s netns %s", 168 - config[i].local_veth, config[i].remote_veth, config[i].namespace); 169 - SYS(fail, "ip link set dev %s up", config[i].local_veth); 170 - if (config[i].remote_addr) 171 - SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace, 172 - config[i].remote_addr, config[i].remote_veth); 173 - SYS(fail, "ip -n %s link set dev %s up", config[i].namespace, 174 - config[i].remote_veth); 175 - } 144 + memcpy(net_config, &default_config, sizeof(struct net_configuration)); 176 145 177 - /* Then configure the redirect map and attach programs to interfaces */ 178 - map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port); 179 - if (!ASSERT_GE(map_fd, 0, "open redirect map")) 146 + /* Create unique namespaces */ 147 + err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN); 148 + if (!ASSERT_OK(err, "append TID to ns0 name")) 180 149 goto fail; 150 + SYS(fail, "ip netns add %s", net_config->ns0_name); 151 + 181 152 for (i = 0; i < VETH_PAIRS_COUNT; i++) { 182 - interface_id = if_nametoindex(config[i].next_veth); 183 - if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) 153 + err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN); 154 + if (!ASSERT_OK(err, "append TID to ns name")) 184 155 goto fail; 185 - err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); 186 - if (!ASSERT_OK(err, "configure interface redirection through map")) 187 - goto fail; 188 - if (attach_programs_to_veth_pair(skeletons, i)) 189 - goto fail; 156 + SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace); 190 157 } 191 158 159 + /* Create interfaces */ 160 + nstoken = open_netns(net_config->ns0_name); 161 + if (!nstoken) 162 + goto fail; 163 + 164 + for (i = 0; i < VETH_PAIRS_COUNT; i++) { 165 + SYS(fail, "ip link add %s type veth peer name %s netns %s", 166 + net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth, 167 + net_config->veth_cfg[i].namespace); 168 + SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth); 169 + if (net_config->veth_cfg[i].remote_addr[0]) 170 + SYS(fail, "ip -n %s addr add %s/24 dev %s", 171 + net_config->veth_cfg[i].namespace, 172 + net_config->veth_cfg[i].remote_addr, 173 + net_config->veth_cfg[i].remote_veth); 174 + SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace, 175 + net_config->veth_cfg[i].remote_veth); 176 + } 177 + 178 + close_netns(nstoken); 192 179 return 0; 193 180 194 181 fail: 182 + close_netns(nstoken); 195 183 return -1; 196 184 } 197 185 198 - static void cleanup_network(void) 186 + static void cleanup_network(struct net_configuration *net_config) 199 187 { 200 188 int i; 201 189 202 - /* Deleting namespaces is enough to automatically remove veth pairs as well 203 - */ 190 + SYS_NOFAIL("ip netns del %s", net_config->ns0_name); 204 191 for (i = 0; i < VETH_PAIRS_COUNT; i++) 205 - SYS_NOFAIL("ip netns del %s", config[i].namespace); 192 + SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace); 206 193 } 207 194 208 - static int check_ping(struct skeletons *skeletons) 195 + #define VETH_REDIRECT_SKEL_NB 3 196 + static void xdp_veth_redirect(u32 flags) 209 197 { 198 + struct prog_configuration ping_config[VETH_PAIRS_COUNT] = { 199 + { 200 + .local_name = "xdp_redirect_map_0", 201 + .remote_name = "xdp_dummy_prog", 202 + .local_flags = flags, 203 + .remote_flags = flags, 204 + }, 205 + { 206 + .local_name = "xdp_redirect_map_1", 207 + .remote_name = "xdp_tx", 208 + .local_flags = flags, 209 + .remote_flags = flags, 210 + }, 211 + { 212 + .local_name = "xdp_redirect_map_2", 213 + .remote_name = "xdp_dummy_prog", 214 + .local_flags = flags, 215 + .remote_flags = flags, 216 + } 217 + }; 218 + struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB]; 219 + struct xdp_redirect_map *xdp_redirect_map; 220 + struct net_configuration net_config; 221 + struct nstoken *nstoken = NULL; 222 + struct xdp_dummy *xdp_dummy; 223 + struct xdp_tx *xdp_tx; 224 + int map_fd; 225 + int i; 226 + 227 + xdp_dummy = xdp_dummy__open_and_load(); 228 + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) 229 + return; 230 + 231 + xdp_tx = xdp_tx__open_and_load(); 232 + if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load")) 233 + goto destroy_xdp_dummy; 234 + 235 + xdp_redirect_map = xdp_redirect_map__open_and_load(); 236 + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) 237 + goto destroy_xdp_tx; 238 + 239 + if (!ASSERT_OK(create_network(&net_config), "create network")) 240 + goto destroy_xdp_redirect_map; 241 + 242 + /* Then configure the redirect map and attach programs to interfaces */ 243 + map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port); 244 + if (!ASSERT_OK_FD(map_fd, "open redirect map")) 245 + goto destroy_xdp_redirect_map; 246 + 247 + bpf_objs[0] = xdp_dummy->obj; 248 + bpf_objs[1] = xdp_tx->obj; 249 + bpf_objs[2] = xdp_redirect_map->obj; 250 + 251 + nstoken = open_netns(net_config.ns0_name); 252 + if (!ASSERT_OK_PTR(nstoken, "open NS0")) 253 + goto destroy_xdp_redirect_map; 254 + 255 + for (i = 0; i < VETH_PAIRS_COUNT; i++) { 256 + int next_veth = net_config.veth_cfg[i].next_veth; 257 + int interface_id; 258 + int err; 259 + 260 + interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth); 261 + if (!ASSERT_NEQ(interface_id, 0, "non zero interface index")) 262 + goto destroy_xdp_redirect_map; 263 + err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY); 264 + if (!ASSERT_OK(err, "configure interface redirection through map")) 265 + goto destroy_xdp_redirect_map; 266 + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, 267 + &net_config, ping_config, i)) 268 + goto destroy_xdp_redirect_map; 269 + } 270 + 210 271 /* Test: if all interfaces are properly configured, we must be able to ping 211 272 * veth33 from veth11 212 273 */ 213 - return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", 214 - config[0].namespace, IP_DST); 274 + ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null", 275 + net_config.veth_cfg[0].namespace, IP_DST), "ping"); 276 + 277 + destroy_xdp_redirect_map: 278 + close_netns(nstoken); 279 + xdp_redirect_map__destroy(xdp_redirect_map); 280 + destroy_xdp_tx: 281 + xdp_tx__destroy(xdp_tx); 282 + destroy_xdp_dummy: 283 + xdp_dummy__destroy(xdp_dummy); 284 + 285 + cleanup_network(&net_config); 286 + } 287 + 288 + #define BROADCAST_REDIRECT_SKEL_NB 2 289 + static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags) 290 + { 291 + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { 292 + { 293 + .local_name = "xdp_redirect_map_multi_prog", 294 + .remote_name = "xdp_count_0", 295 + .local_flags = attach_flags, 296 + .remote_flags = attach_flags, 297 + }, 298 + { 299 + .local_name = "xdp_redirect_map_multi_prog", 300 + .remote_name = "xdp_count_1", 301 + .local_flags = attach_flags, 302 + .remote_flags = attach_flags, 303 + }, 304 + { 305 + .local_name = "xdp_redirect_map_multi_prog", 306 + .remote_name = "xdp_count_2", 307 + .local_flags = attach_flags, 308 + .remote_flags = attach_flags, 309 + } 310 + }; 311 + struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB]; 312 + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; 313 + struct xdp_redirect_map *xdp_redirect_map; 314 + struct bpf_devmap_val devmap_val = {}; 315 + struct net_configuration net_config; 316 + struct nstoken *nstoken = NULL; 317 + u16 protocol = ETH_P_IP; 318 + int group_map; 319 + int flags_map; 320 + int cnt_map; 321 + u64 cnt = 0; 322 + int i, err; 323 + 324 + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); 325 + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) 326 + return; 327 + 328 + xdp_redirect_map = xdp_redirect_map__open_and_load(); 329 + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) 330 + goto destroy_xdp_redirect_multi_kern; 331 + 332 + if (!ASSERT_OK(create_network(&net_config), "create network")) 333 + goto destroy_xdp_redirect_map; 334 + 335 + group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all); 336 + if (!ASSERT_OK_FD(group_map, "open map_all")) 337 + goto destroy_xdp_redirect_map; 338 + 339 + flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags); 340 + if (!ASSERT_OK_FD(group_map, "open map_all")) 341 + goto destroy_xdp_redirect_map; 342 + 343 + err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST); 344 + if (!ASSERT_OK(err, "init IP count")) 345 + goto destroy_xdp_redirect_map; 346 + 347 + cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt); 348 + if (!ASSERT_OK_FD(cnt_map, "open rxcnt map")) 349 + goto destroy_xdp_redirect_map; 350 + 351 + bpf_objs[0] = xdp_redirect_multi_kern->obj; 352 + bpf_objs[1] = xdp_redirect_map->obj; 353 + 354 + nstoken = open_netns(net_config.ns0_name); 355 + if (!ASSERT_OK_PTR(nstoken, "open NS0")) 356 + goto destroy_xdp_redirect_map; 357 + 358 + for (i = 0; i < VETH_PAIRS_COUNT; i++) { 359 + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); 360 + 361 + if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB, 362 + &net_config, prog_cfg, i)) 363 + goto destroy_xdp_redirect_map; 364 + 365 + SYS(destroy_xdp_redirect_map, 366 + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", 367 + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); 368 + 369 + devmap_val.ifindex = ifindex; 370 + err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0); 371 + if (!ASSERT_OK(err, "bpf_map_update_elem")) 372 + goto destroy_xdp_redirect_map; 373 + 374 + } 375 + 376 + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", 377 + net_config.veth_cfg[0].namespace, IP_NEIGH); 378 + 379 + for (i = 0; i < VETH_PAIRS_COUNT; i++) { 380 + err = bpf_map_lookup_elem(cnt_map, &i, &cnt); 381 + if (!ASSERT_OK(err, "get IP cnt")) 382 + goto destroy_xdp_redirect_map; 383 + 384 + if (redirect_flags & BPF_F_EXCLUDE_INGRESS) 385 + /* veth11 shouldn't receive the ICMP requests; 386 + * others should 387 + */ 388 + ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt"); 389 + else 390 + /* All remote veth should receive the ICMP requests */ 391 + ASSERT_EQ(cnt, 4, "compare IP cnt"); 392 + } 393 + 394 + destroy_xdp_redirect_map: 395 + close_netns(nstoken); 396 + xdp_redirect_map__destroy(xdp_redirect_map); 397 + destroy_xdp_redirect_multi_kern: 398 + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); 399 + 400 + cleanup_network(&net_config); 401 + } 402 + 403 + #define VETH_EGRESS_SKEL_NB 3 404 + static void xdp_veth_egress(u32 flags) 405 + { 406 + struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = { 407 + { 408 + .local_name = "xdp_redirect_map_all_prog", 409 + .remote_name = "xdp_dummy_prog", 410 + .local_flags = flags, 411 + .remote_flags = flags, 412 + }, 413 + { 414 + .local_name = "xdp_redirect_map_all_prog", 415 + .remote_name = "store_mac_1", 416 + .local_flags = flags, 417 + .remote_flags = flags, 418 + }, 419 + { 420 + .local_name = "xdp_redirect_map_all_prog", 421 + .remote_name = "store_mac_2", 422 + .local_flags = flags, 423 + .remote_flags = flags, 424 + } 425 + }; 426 + const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; 427 + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; 428 + struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB]; 429 + struct xdp_redirect_map *xdp_redirect_map; 430 + struct bpf_devmap_val devmap_val = {}; 431 + struct net_configuration net_config; 432 + int mac_map, egress_map, res_map; 433 + struct nstoken *nstoken = NULL; 434 + struct xdp_dummy *xdp_dummy; 435 + int err; 436 + int i; 437 + 438 + xdp_dummy = xdp_dummy__open_and_load(); 439 + if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load")) 440 + return; 441 + 442 + xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); 443 + if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load")) 444 + goto destroy_xdp_dummy; 445 + 446 + xdp_redirect_map = xdp_redirect_map__open_and_load(); 447 + if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load")) 448 + goto destroy_xdp_redirect_multi_kern; 449 + 450 + if (!ASSERT_OK(create_network(&net_config), "create network")) 451 + goto destroy_xdp_redirect_map; 452 + 453 + mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map); 454 + if (!ASSERT_OK_FD(mac_map, "open mac_map")) 455 + goto destroy_xdp_redirect_map; 456 + 457 + egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress); 458 + if (!ASSERT_OK_FD(egress_map, "open map_egress")) 459 + goto destroy_xdp_redirect_map; 460 + 461 + devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog); 462 + 463 + bpf_objs[0] = xdp_dummy->obj; 464 + bpf_objs[1] = xdp_redirect_multi_kern->obj; 465 + bpf_objs[2] = xdp_redirect_map->obj; 466 + 467 + nstoken = open_netns(net_config.ns0_name); 468 + if (!ASSERT_OK_PTR(nstoken, "open NS0")) 469 + goto destroy_xdp_redirect_map; 470 + 471 + for (i = 0; i < VETH_PAIRS_COUNT; i++) { 472 + int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth); 473 + 474 + SYS(destroy_xdp_redirect_map, 475 + "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s", 476 + net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth); 477 + 478 + if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB, 479 + &net_config, prog_cfg, i)) 480 + goto destroy_xdp_redirect_map; 481 + 482 + err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0); 483 + if (!ASSERT_OK(err, "bpf_map_update_elem")) 484 + goto destroy_xdp_redirect_map; 485 + 486 + devmap_val.ifindex = ifindex; 487 + err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0); 488 + if (!ASSERT_OK(err, "bpf_map_update_elem")) 489 + goto destroy_xdp_redirect_map; 490 + } 491 + 492 + SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ", 493 + net_config.veth_cfg[0].namespace, IP_NEIGH); 494 + 495 + res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac); 496 + if (!ASSERT_OK_FD(res_map, "open rx_map")) 497 + goto destroy_xdp_redirect_map; 498 + 499 + for (i = 0; i < 2; i++) { 500 + u32 key = i; 501 + u64 res; 502 + 503 + err = bpf_map_lookup_elem(res_map, &key, &res); 504 + if (!ASSERT_OK(err, "get MAC res")) 505 + goto destroy_xdp_redirect_map; 506 + 507 + ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac"); 508 + } 509 + 510 + destroy_xdp_redirect_map: 511 + close_netns(nstoken); 512 + xdp_redirect_map__destroy(xdp_redirect_map); 513 + destroy_xdp_redirect_multi_kern: 514 + xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern); 515 + destroy_xdp_dummy: 516 + xdp_dummy__destroy(xdp_dummy); 517 + 518 + cleanup_network(&net_config); 215 519 } 216 520 217 521 void test_xdp_veth_redirect(void) 218 522 { 219 - struct skeletons skeletons = {}; 523 + if (test__start_subtest("0")) 524 + xdp_veth_redirect(0); 220 525 221 - skeletons.xdp_dummy = xdp_dummy__open_and_load(); 222 - if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) 223 - return; 526 + if (test__start_subtest("DRV_MODE")) 527 + xdp_veth_redirect(XDP_FLAGS_DRV_MODE); 224 528 225 - skeletons.xdp_tx = xdp_tx__open_and_load(); 226 - if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) 227 - goto destroy_xdp_dummy; 529 + if (test__start_subtest("SKB_MODE")) 530 + xdp_veth_redirect(XDP_FLAGS_SKB_MODE); 531 + } 228 532 229 - skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load(); 230 - if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load")) 231 - goto destroy_xdp_tx; 533 + void test_xdp_veth_broadcast_redirect(void) 534 + { 535 + if (test__start_subtest("0/BROADCAST")) 536 + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST); 232 537 233 - if (configure_network(&skeletons)) 234 - goto destroy_xdp_redirect_map; 538 + if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)")) 539 + xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 235 540 236 - ASSERT_OK(check_ping(&skeletons), "ping"); 541 + if (test__start_subtest("DRV_MODE/BROADCAST")) 542 + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST); 237 543 238 - destroy_xdp_redirect_map: 239 - xdp_redirect_map__destroy(skeletons.xdp_redirect_maps); 240 - destroy_xdp_tx: 241 - xdp_tx__destroy(skeletons.xdp_tx); 242 - destroy_xdp_dummy: 243 - xdp_dummy__destroy(skeletons.xdp_dummy); 544 + if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)")) 545 + xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, 546 + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 244 547 245 - cleanup_network(); 548 + if (test__start_subtest("SKB_MODE/BROADCAST")) 549 + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST); 550 + 551 + if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)")) 552 + xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, 553 + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 554 + } 555 + 556 + void test_xdp_veth_egress(void) 557 + { 558 + if (test__start_subtest("0/egress")) 559 + xdp_veth_egress(0); 560 + 561 + if (test__start_subtest("DRV_MODE/egress")) 562 + xdp_veth_egress(XDP_FLAGS_DRV_MODE); 563 + 564 + if (test__start_subtest("SKB_MODE/egress")) 565 + xdp_veth_egress(XDP_FLAGS_SKB_MODE); 246 566 }

+96 -1

tools/testing/selftests/bpf/prog_tests/token.c

··· 19 19 #include "priv_prog.skel.h" 20 20 #include "dummy_st_ops_success.skel.h" 21 21 #include "token_lsm.skel.h" 22 + #include "priv_freplace_prog.skel.h" 22 23 23 24 static inline int sys_mount(const char *dev_name, const char *dir_name, 24 25 const char *type, unsigned long flags, ··· 789 788 return 0; 790 789 } 791 790 791 + static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel, 792 + struct priv_prog **skel, int *tgt_fd) 793 + { 794 + LIBBPF_OPTS(bpf_object_open_opts, opts); 795 + int err; 796 + char buf[256]; 797 + 798 + /* use bpf_token_path to provide BPF FS path */ 799 + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); 800 + opts.bpf_token_path = buf; 801 + *skel = priv_prog__open_opts(&opts); 802 + if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts")) 803 + return -EINVAL; 804 + err = priv_prog__load(*skel); 805 + if (!ASSERT_OK(err, "priv_prog__load")) 806 + return -EINVAL; 807 + 808 + *fr_skel = priv_freplace_prog__open_opts(&opts); 809 + if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts")) 810 + return -EINVAL; 811 + 812 + *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1); 813 + return 0; 814 + } 815 + 816 + /* Verify that freplace works from user namespace, because bpf token is loaded 817 + * in bpf_object__prepare 818 + */ 819 + static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel) 820 + { 821 + struct priv_freplace_prog *fr_skel = NULL; 822 + struct priv_prog *skel = NULL; 823 + int err, tgt_fd; 824 + 825 + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); 826 + if (!ASSERT_OK(err, "setup")) 827 + goto out; 828 + 829 + err = bpf_object__prepare(fr_skel->obj); 830 + if (!ASSERT_OK(err, "freplace__prepare")) 831 + goto out; 832 + 833 + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); 834 + if (!ASSERT_OK(err, "set_attach_target")) 835 + goto out; 836 + 837 + err = priv_freplace_prog__load(fr_skel); 838 + ASSERT_OK(err, "priv_freplace_prog__load"); 839 + 840 + out: 841 + priv_freplace_prog__destroy(fr_skel); 842 + priv_prog__destroy(skel); 843 + return err; 844 + } 845 + 846 + /* Verify that replace fails to set attach target from user namespace without bpf token */ 847 + static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel) 848 + { 849 + struct priv_freplace_prog *fr_skel = NULL; 850 + struct priv_prog *skel = NULL; 851 + int err, tgt_fd; 852 + 853 + err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd); 854 + if (!ASSERT_OK(err, "setup")) 855 + goto out; 856 + 857 + err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1"); 858 + if (ASSERT_ERR(err, "attach fails")) 859 + err = 0; 860 + else 861 + err = -EINVAL; 862 + 863 + out: 864 + priv_freplace_prog__destroy(fr_skel); 865 + priv_prog__destroy(skel); 866 + return err; 867 + } 868 + 792 869 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, 793 870 * which should cause struct_ops application to fail, as BTF won't be uploaded 794 871 * into the kernel, even if STRUCT_OPS programs themselves are allowed ··· 1083 1004 if (test__start_subtest("obj_priv_prog")) { 1084 1005 struct bpffs_opts opts = { 1085 1006 .cmds = bit(BPF_PROG_LOAD), 1086 - .progs = bit(BPF_PROG_TYPE_KPROBE), 1007 + .progs = bit(BPF_PROG_TYPE_XDP), 1087 1008 .attachs = ~0ULL, 1088 1009 }; 1089 1010 1090 1011 subtest_userns(&opts, userns_obj_priv_prog); 1012 + } 1013 + if (test__start_subtest("obj_priv_freplace_prog")) { 1014 + struct bpffs_opts opts = { 1015 + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), 1016 + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), 1017 + .attachs = ~0ULL, 1018 + }; 1019 + subtest_userns(&opts, userns_obj_priv_freplace_prog); 1020 + } 1021 + if (test__start_subtest("obj_priv_freplace_prog_fail")) { 1022 + struct bpffs_opts opts = { 1023 + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID), 1024 + .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP), 1025 + .attachs = ~0ULL, 1026 + }; 1027 + subtest_userns(&opts, userns_obj_priv_freplace_prog_fail); 1091 1028 } 1092 1029 if (test__start_subtest("obj_priv_btf_fail")) { 1093 1030 struct bpffs_opts opts = {

+10 -1

tools/testing/selftests/bpf/prog_tests/usdt.c

··· 45 45 LIBBPF_OPTS(bpf_usdt_opts, opts); 46 46 struct test_usdt *skel; 47 47 struct test_usdt__bss *bss; 48 - int err; 48 + int err, i; 49 49 50 50 skel = test_usdt__open_and_load(); 51 51 if (!ASSERT_OK_PTR(skel, "skel_open")) ··· 75 75 ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); 76 76 ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); 77 77 ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); 78 + ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); 78 79 79 80 /* auto-attached usdt3 gets default zero cookie value */ 80 81 ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie"); ··· 87 86 ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1"); 88 87 ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); 89 88 ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); 89 + ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size"); 90 + ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size"); 91 + ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size"); 90 92 91 93 /* auto-attached usdt12 gets default zero cookie value */ 92 94 ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie"); ··· 107 103 ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10"); 108 104 ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11"); 109 105 ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12"); 106 + 107 + int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 }; 108 + 109 + for (i = 0; i < 12; i++) 110 + ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size"); 110 111 111 112 /* trigger_func() is marked __always_inline, so USDT invocations will be 112 113 * inlined in two different places, meaning that each USDT will have

+6 -2

tools/testing/selftests/bpf/prog_tests/verifier.c

··· 45 45 #include "verifier_ldsx.skel.h" 46 46 #include "verifier_leak_ptr.skel.h" 47 47 #include "verifier_linked_scalars.skel.h" 48 + #include "verifier_load_acquire.skel.h" 48 49 #include "verifier_loops1.skel.h" 49 50 #include "verifier_lwt.skel.h" 50 51 #include "verifier_map_in_map.skel.h" ··· 81 80 #include "verifier_spill_fill.skel.h" 82 81 #include "verifier_spin_lock.skel.h" 83 82 #include "verifier_stack_ptr.skel.h" 83 + #include "verifier_store_release.skel.h" 84 84 #include "verifier_subprog_precision.skel.h" 85 85 #include "verifier_subreg.skel.h" 86 86 #include "verifier_tailcall_jit.skel.h" ··· 123 121 /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */ 124 122 err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps); 125 123 if (err) { 126 - PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); 124 + PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); 127 125 return; 128 126 } 129 127 ··· 133 131 134 132 err = cap_enable_effective(old_caps, NULL); 135 133 if (err) 136 - PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err)); 134 + PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err)); 137 135 } 138 136 139 137 #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL) ··· 175 173 void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } 176 174 void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } 177 175 void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } 176 + void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); } 178 177 void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } 179 178 void test_verifier_ldsx(void) { RUN(verifier_ldsx); } 180 179 void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } ··· 214 211 void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } 215 212 void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } 216 213 void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } 214 + void test_verifier_store_release(void) { RUN(verifier_store_release); } 217 215 void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } 218 216 void test_verifier_subreg(void) { RUN(verifier_subreg); } 219 217 void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }

+175

tools/testing/selftests/bpf/prog_tests/xdp_vlan.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Network topology: 5 + * ----------- ----------- 6 + * | NS1 | | NS2 | 7 + * | veth0 -|--------|- veth0 | 8 + * ----------- ----------- 9 + * 10 + */ 11 + 12 + #define _GNU_SOURCE 13 + #include <net/if.h> 14 + #include <uapi/linux/if_link.h> 15 + 16 + #include "network_helpers.h" 17 + #include "test_progs.h" 18 + #include "test_xdp_vlan.skel.h" 19 + 20 + 21 + #define VETH_NAME "veth0" 22 + #define NS_MAX_SIZE 32 23 + #define NS1_NAME "ns-xdp-vlan-1-" 24 + #define NS2_NAME "ns-xdp-vlan-2-" 25 + #define NS1_IP_ADDR "100.64.10.1" 26 + #define NS2_IP_ADDR "100.64.10.2" 27 + #define VLAN_ID 4011 28 + 29 + static int setup_network(char *ns1, char *ns2) 30 + { 31 + if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name")) 32 + goto fail; 33 + if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name")) 34 + goto fail; 35 + 36 + SYS(fail, "ip netns add %s", ns1); 37 + SYS(fail, "ip netns add %s", ns2); 38 + SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s", 39 + ns1, VETH_NAME, VETH_NAME, ns2); 40 + 41 + /* NOTICE: XDP require VLAN header inside packet payload 42 + * - Thus, disable VLAN offloading driver features 43 + */ 44 + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME); 45 + SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME); 46 + 47 + /* NS1 configuration */ 48 + SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME); 49 + SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME); 50 + 51 + /* NS2 configuration */ 52 + SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d", 53 + ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID); 54 + SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID); 55 + SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME); 56 + SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID); 57 + 58 + /* At this point ping should fail because VLAN tags are only used by NS2 */ 59 + return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR); 60 + 61 + fail: 62 + return -1; 63 + } 64 + 65 + static void cleanup_network(const char *ns1, const char *ns2) 66 + { 67 + SYS_NOFAIL("ip netns del %s", ns1); 68 + SYS_NOFAIL("ip netns del %s", ns2); 69 + } 70 + 71 + static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags) 72 + { 73 + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS); 74 + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); 75 + char ns1[NS_MAX_SIZE] = NS1_NAME; 76 + char ns2[NS_MAX_SIZE] = NS2_NAME; 77 + struct nstoken *nstoken = NULL; 78 + int interface; 79 + int ret; 80 + 81 + if (!ASSERT_OK(setup_network(ns1, ns2), "setup network")) 82 + goto cleanup; 83 + 84 + nstoken = open_netns(ns1); 85 + if (!ASSERT_OK_PTR(nstoken, "open NS1")) 86 + goto cleanup; 87 + 88 + interface = if_nametoindex(VETH_NAME); 89 + if (!ASSERT_NEQ(interface, 0, "get interface index")) 90 + goto cleanup; 91 + 92 + ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL); 93 + if (!ASSERT_OK(ret, "attach xdp_vlan_change")) 94 + goto cleanup; 95 + 96 + tc_hook.ifindex = interface; 97 + ret = bpf_tc_hook_create(&tc_hook); 98 + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) 99 + goto detach_xdp; 100 + 101 + /* Now we'll use BPF programs to pop/push the VLAN tags */ 102 + tc_opts.prog_fd = bpf_program__fd(tc); 103 + ret = bpf_tc_attach(&tc_hook, &tc_opts); 104 + if (!ASSERT_OK(ret, "bpf_tc_attach")) 105 + goto detach_xdp; 106 + 107 + close_netns(nstoken); 108 + nstoken = NULL; 109 + 110 + /* Now the namespaces can reach each-other, test with pings */ 111 + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR); 112 + SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR); 113 + 114 + 115 + detach_tc: 116 + bpf_tc_detach(&tc_hook, &tc_opts); 117 + detach_xdp: 118 + bpf_xdp_detach(interface, flags, NULL); 119 + cleanup: 120 + close_netns(nstoken); 121 + cleanup_network(ns1, ns2); 122 + } 123 + 124 + /* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" 125 + * egress use TC to add back VLAN tag 4011 126 + */ 127 + void test_xdp_vlan_change(void) 128 + { 129 + struct test_xdp_vlan *skel; 130 + 131 + skel = test_xdp_vlan__open_and_load(); 132 + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) 133 + return; 134 + 135 + if (test__start_subtest("0")) 136 + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0); 137 + 138 + if (test__start_subtest("DRV_MODE")) 139 + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 140 + XDP_FLAGS_DRV_MODE); 141 + 142 + if (test__start_subtest("SKB_MODE")) 143 + xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 144 + XDP_FLAGS_SKB_MODE); 145 + 146 + test_xdp_vlan__destroy(skel); 147 + } 148 + 149 + /* Second test: XDP prog fully remove vlan header 150 + * 151 + * Catch kernel bug for generic-XDP, that doesn't allow us to 152 + * remove a VLAN header, because skb->protocol still contain VLAN 153 + * ETH_P_8021Q indication, and this cause overwriting of our changes. 154 + */ 155 + void test_xdp_vlan_remove(void) 156 + { 157 + struct test_xdp_vlan *skel; 158 + 159 + skel = test_xdp_vlan__open_and_load(); 160 + if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load")) 161 + return; 162 + 163 + if (test__start_subtest("0")) 164 + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0); 165 + 166 + if (test__start_subtest("DRV_MODE")) 167 + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 168 + XDP_FLAGS_DRV_MODE); 169 + 170 + if (test__start_subtest("SKB_MODE")) 171 + xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 172 + XDP_FLAGS_SKB_MODE); 173 + 174 + test_xdp_vlan__destroy(skel); 175 + }

+119 -2

tools/testing/selftests/bpf/progs/arena_atomics.c

··· 6 6 #include <stdbool.h> 7 7 #include <stdatomic.h> 8 8 #include "bpf_arena_common.h" 9 + #include "../../../include/linux/filter.h" 10 + #include "bpf_misc.h" 9 11 10 12 struct { 11 13 __uint(type, BPF_MAP_TYPE_ARENA); ··· 21 19 } arena SEC(".maps"); 22 20 23 21 #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) 24 - bool skip_tests __attribute((__section__(".data"))) = false; 22 + bool skip_all_tests __attribute((__section__(".data"))) = false; 25 23 #else 26 - bool skip_tests = true; 24 + bool skip_all_tests = true; 25 + #endif 26 + 27 + #if defined(ENABLE_ATOMICS_TESTS) && \ 28 + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ 29 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 30 + bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; 31 + #else 32 + bool skip_lacq_srel_tests = true; 27 33 #endif 28 34 29 35 __u32 pid = 0; ··· 281 271 uaf_recovery_fails -= 1; 282 272 #endif 283 273 274 + return 0; 275 + } 276 + 277 + #if __clang_major__ >= 18 278 + __u8 __arena_global load_acquire8_value = 0x12; 279 + __u16 __arena_global load_acquire16_value = 0x1234; 280 + __u32 __arena_global load_acquire32_value = 0x12345678; 281 + __u64 __arena_global load_acquire64_value = 0x1234567890abcdef; 282 + 283 + __u8 __arena_global load_acquire8_result = 0; 284 + __u16 __arena_global load_acquire16_result = 0; 285 + __u32 __arena_global load_acquire32_result = 0; 286 + __u64 __arena_global load_acquire64_result = 0; 287 + #else 288 + /* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around 289 + * this issue by defining the below variables as 64-bit. 290 + */ 291 + __u64 __arena_global load_acquire8_value; 292 + __u64 __arena_global load_acquire16_value; 293 + __u64 __arena_global load_acquire32_value; 294 + __u64 __arena_global load_acquire64_value; 295 + 296 + __u64 __arena_global load_acquire8_result; 297 + __u64 __arena_global load_acquire16_result; 298 + __u64 __arena_global load_acquire32_result; 299 + __u64 __arena_global load_acquire64_result; 300 + #endif 301 + 302 + SEC("raw_tp/sys_enter") 303 + int load_acquire(const void *ctx) 304 + { 305 + #if defined(ENABLE_ATOMICS_TESTS) && \ 306 + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ 307 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 308 + 309 + #define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ 310 + { asm volatile ( \ 311 + "r1 = %[" #SRC "] ll;" \ 312 + "r1 = addr_space_cast(r1, 0x0, 0x1);" \ 313 + ".8byte %[load_acquire_insn];" \ 314 + "r3 = %[" #DST "] ll;" \ 315 + "r3 = addr_space_cast(r3, 0x0, 0x1);" \ 316 + "*(" #SIZE " *)(r3 + 0) = r2;" \ 317 + : \ 318 + : __imm_addr(SRC), \ 319 + __imm_insn(load_acquire_insn, \ 320 + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \ 321 + BPF_REG_2, BPF_REG_1, 0)), \ 322 + __imm_addr(DST) \ 323 + : __clobber_all); } \ 324 + 325 + LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result) 326 + LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value, 327 + load_acquire16_result) 328 + LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value, 329 + load_acquire32_result) 330 + LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value, 331 + load_acquire64_result) 332 + #undef LOAD_ACQUIRE_ARENA 333 + 334 + #endif 335 + return 0; 336 + } 337 + 338 + #if __clang_major__ >= 18 339 + __u8 __arena_global store_release8_result = 0; 340 + __u16 __arena_global store_release16_result = 0; 341 + __u32 __arena_global store_release32_result = 0; 342 + __u64 __arena_global store_release64_result = 0; 343 + #else 344 + /* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around 345 + * this issue by defining the below variables as 64-bit. 346 + */ 347 + __u64 __arena_global store_release8_result; 348 + __u64 __arena_global store_release16_result; 349 + __u64 __arena_global store_release32_result; 350 + __u64 __arena_global store_release64_result; 351 + #endif 352 + 353 + SEC("raw_tp/sys_enter") 354 + int store_release(const void *ctx) 355 + { 356 + #if defined(ENABLE_ATOMICS_TESTS) && \ 357 + defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ 358 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 359 + 360 + #define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ 361 + { asm volatile ( \ 362 + "r1 = " VAL ";" \ 363 + "r2 = %[" #DST "] ll;" \ 364 + "r2 = addr_space_cast(r2, 0x0, 0x1);" \ 365 + ".8byte %[store_release_insn];" \ 366 + : \ 367 + : __imm_addr(DST), \ 368 + __imm_insn(store_release_insn, \ 369 + BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \ 370 + BPF_REG_2, BPF_REG_1, 0)) \ 371 + : __clobber_all); } \ 372 + 373 + STORE_RELEASE_ARENA(B, store_release8_result, "0x12") 374 + STORE_RELEASE_ARENA(H, store_release16_result, "0x1234") 375 + STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678") 376 + STORE_RELEASE_ARENA(DW, store_release64_result, 377 + "0x1234567890abcdef ll") 378 + #undef STORE_RELEASE_ARENA 379 + 380 + #endif 284 381 return 0; 285 382 } 286 383

+51

tools/testing/selftests/bpf/progs/arena_spin_lock.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include "bpf_misc.h" 7 + #include "bpf_arena_spin_lock.h" 8 + 9 + struct { 10 + __uint(type, BPF_MAP_TYPE_ARENA); 11 + __uint(map_flags, BPF_F_MMAPABLE); 12 + __uint(max_entries, 100); /* number of pages */ 13 + #ifdef __TARGET_ARCH_arm64 14 + __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */ 15 + #else 16 + __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */ 17 + #endif 18 + } arena SEC(".maps"); 19 + 20 + int cs_count; 21 + 22 + #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) 23 + arena_spinlock_t __arena lock; 24 + int test_skip = 1; 25 + #else 26 + int test_skip = 2; 27 + #endif 28 + 29 + int counter; 30 + int limit; 31 + 32 + SEC("tc") 33 + int prog(void *ctx) 34 + { 35 + int ret = -2; 36 + 37 + #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) 38 + unsigned long flags; 39 + 40 + if ((ret = arena_spin_lock_irqsave(&lock, flags))) 41 + return ret; 42 + if (counter != limit) 43 + counter++; 44 + bpf_repeat(cs_count); 45 + ret = 0; 46 + arena_spin_unlock_irqrestore(&lock, flags); 47 + #endif 48 + return ret; 49 + } 50 + 51 + char _license[] SEC("license") = "GPL";

+110

tools/testing/selftests/bpf/progs/bpf_iter_tasks.c

··· 9 9 uint32_t tid = 0; 10 10 int num_unknown_tid = 0; 11 11 int num_known_tid = 0; 12 + void *user_ptr = 0; 13 + void *user_ptr_long = 0; 14 + uint32_t pid = 0; 15 + 16 + static char big_str1[5000]; 17 + static char big_str2[5005]; 18 + static char big_str3[4996]; 12 19 13 20 SEC("iter/task") 14 21 int dump_task(struct bpf_iter__task *ctx) ··· 42 35 } 43 36 44 37 int num_expected_failure_copy_from_user_task = 0; 38 + int num_expected_failure_copy_from_user_task_str = 0; 45 39 int num_success_copy_from_user_task = 0; 40 + int num_success_copy_from_user_task_str = 0; 46 41 47 42 SEC("iter.s/task") 48 43 int dump_task_sleepable(struct bpf_iter__task *ctx) ··· 53 44 struct task_struct *task = ctx->task; 54 45 static const char info[] = " === END ==="; 55 46 struct pt_regs *regs; 47 + char task_str1[10] = "aaaaaaaaaa"; 48 + char task_str2[10], task_str3[10]; 49 + char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa"; 56 50 void *ptr; 57 51 uint32_t user_data = 0; 58 52 int ret; ··· 90 78 BPF_SEQ_PRINTF(seq, "%s\n", info); 91 79 return 0; 92 80 } 81 + 93 82 ++num_success_copy_from_user_task; 83 + 84 + /* Read an invalid pointer and ensure we get an error */ 85 + ptr = NULL; 86 + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0); 87 + if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') { 88 + BPF_SEQ_PRINTF(seq, "%s\n", info); 89 + return 0; 90 + } 91 + 92 + /* Read an invalid pointer and ensure we get error with pad zeros flag */ 93 + ptr = NULL; 94 + ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), 95 + ptr, task, BPF_F_PAD_ZEROS); 96 + if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') { 97 + BPF_SEQ_PRINTF(seq, "%s\n", info); 98 + return 0; 99 + } 100 + 101 + ++num_expected_failure_copy_from_user_task_str; 102 + 103 + /* Same length as the string */ 104 + ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0); 105 + /* only need to do the task pid check once */ 106 + if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) { 107 + BPF_SEQ_PRINTF(seq, "%s\n", info); 108 + return 0; 109 + } 110 + 111 + /* Shorter length than the string */ 112 + ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0); 113 + if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) { 114 + BPF_SEQ_PRINTF(seq, "%s\n", info); 115 + return 0; 116 + } 117 + 118 + /* Longer length than the string */ 119 + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0); 120 + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 121 + || task_str4[sizeof(task_str4) - 1] != 'a') { 122 + BPF_SEQ_PRINTF(seq, "%s\n", info); 123 + return 0; 124 + } 125 + 126 + /* Longer length than the string with pad zeros flag */ 127 + ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS); 128 + if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10 129 + || task_str4[sizeof(task_str4) - 1] != '\0') { 130 + BPF_SEQ_PRINTF(seq, "%s\n", info); 131 + return 0; 132 + } 133 + 134 + /* Longer length than the string past a page boundary */ 135 + ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0); 136 + if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) { 137 + BPF_SEQ_PRINTF(seq, "%s\n", info); 138 + return 0; 139 + } 140 + 141 + /* String that crosses a page boundary */ 142 + ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS); 143 + if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000 144 + || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) { 145 + BPF_SEQ_PRINTF(seq, "%s\n", info); 146 + return 0; 147 + } 148 + 149 + for (int i = 0; i < 4999; ++i) { 150 + if (i % 2 == 0) { 151 + if (big_str1[i] != 'b') { 152 + BPF_SEQ_PRINTF(seq, "%s\n", info); 153 + return 0; 154 + } 155 + } else { 156 + if (big_str1[i] != 'a') { 157 + BPF_SEQ_PRINTF(seq, "%s\n", info); 158 + return 0; 159 + } 160 + } 161 + } 162 + 163 + /* Longer length than the string that crosses a page boundary */ 164 + ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS); 165 + if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000 166 + || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) { 167 + BPF_SEQ_PRINTF(seq, "%s\n", info); 168 + return 0; 169 + } 170 + 171 + /* Shorter length than the string that crosses a page boundary */ 172 + ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0); 173 + if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996 174 + || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) { 175 + BPF_SEQ_PRINTF(seq, "%s\n", info); 176 + return 0; 177 + } 178 + 179 + ++num_success_copy_from_user_task_str; 94 180 95 181 if (ctx->meta->seq_num == 0) 96 182 BPF_SEQ_PRINTF(seq, " tgid gid data\n");

+22

tools/testing/selftests/bpf/progs/bpf_misc.h

··· 135 135 #define __arch_arm64 __arch("ARM64") 136 136 #define __arch_riscv64 __arch("RISCV64") 137 137 #define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) 138 + #define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited"))) 139 + #define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited"))) 138 140 139 141 /* Define common capabilities tested using __caps_unpriv */ 140 142 #define CAP_NET_ADMIN 12 ··· 174 172 #elif defined(__TARGET_ARCH_riscv) 175 173 #define SYSCALL_WRAPPER 1 176 174 #define SYS_PREFIX "__riscv_" 175 + #elif defined(__TARGET_ARCH_powerpc) 176 + #define SYSCALL_WRAPPER 1 177 + #define SYS_PREFIX "" 177 178 #else 178 179 #define SYSCALL_WRAPPER 0 179 180 #define SYS_PREFIX "__se_" ··· 211 206 212 207 #ifndef ARRAY_SIZE 213 208 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 209 + #endif 210 + 211 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 212 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ 213 + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ 214 + defined(__TARGET_ARCH_loongarch)) && \ 215 + __clang_major__ >= 18 216 + #define CAN_USE_GOTOL 217 + #endif 218 + 219 + #if _clang_major__ >= 18 220 + #define CAN_USE_BPF_ST 221 + #endif 222 + 223 + #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ 224 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 225 + #define CAN_USE_LOAD_ACQ_STORE_REL 214 226 #endif 215 227 216 228 #endif

+4

tools/testing/selftests/bpf/progs/bpf_tracing_net.h

··· 15 15 #define SO_KEEPALIVE 9 16 16 #define SO_PRIORITY 12 17 17 #define SO_REUSEPORT 15 18 + #if defined(__TARGET_ARCH_powerpc) 19 + #define SO_RCVLOWAT 16 20 + #else 18 21 #define SO_RCVLOWAT 18 22 + #endif 19 23 #define SO_BINDTODEVICE 25 20 24 #define SO_MARK 36 21 25 #define SO_MAX_PACING_RATE 47

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}

+41

tools/testing/selftests/bpf/progs/cgroup_preorder.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + unsigned int idx; 9 + __u8 result[4]; 10 + 11 + SEC("cgroup/getsockopt") 12 + int child(struct bpf_sockopt *ctx) 13 + { 14 + if (idx < 4) 15 + result[idx++] = 1; 16 + return 1; 17 + } 18 + 19 + SEC("cgroup/getsockopt") 20 + int child_2(struct bpf_sockopt *ctx) 21 + { 22 + if (idx < 4) 23 + result[idx++] = 2; 24 + return 1; 25 + } 26 + 27 + SEC("cgroup/getsockopt") 28 + int parent(struct bpf_sockopt *ctx) 29 + { 30 + if (idx < 4) 31 + result[idx++] = 3; 32 + return 1; 33 + } 34 + 35 + SEC("cgroup/getsockopt") 36 + int parent_2(struct bpf_sockopt *ctx) 37 + { 38 + if (idx < 4) 39 + result[idx++] = 4; 40 + return 1; 41 + }

-39

tools/testing/selftests/bpf/progs/changes_pkt_data.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - #include <linux/bpf.h> 4 - #include <bpf/bpf_helpers.h> 5 - 6 - __noinline 7 - long changes_pkt_data(struct __sk_buff *sk) 8 - { 9 - return bpf_skb_pull_data(sk, 0); 10 - } 11 - 12 - __noinline __weak 13 - long does_not_change_pkt_data(struct __sk_buff *sk) 14 - { 15 - return 0; 16 - } 17 - 18 - SEC("?tc") 19 - int main_with_subprogs(struct __sk_buff *sk) 20 - { 21 - changes_pkt_data(sk); 22 - does_not_change_pkt_data(sk); 23 - return 0; 24 - } 25 - 26 - SEC("?tc") 27 - int main_changes(struct __sk_buff *sk) 28 - { 29 - bpf_skb_pull_data(sk, 0); 30 - return 0; 31 - } 32 - 33 - SEC("?tc") 34 - int main_does_not_change(struct __sk_buff *sk) 35 - { 36 - return 0; 37 - } 38 - 39 - char _license[] SEC("license") = "GPL";

+16 -1

tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c tools/testing/selftests/bpf/progs/summarization_freplace.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - #include <linux/bpf.h> 3 + #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 5 6 6 SEC("?freplace") ··· 11 11 12 12 SEC("?freplace") 13 13 long does_not_change_pkt_data(struct __sk_buff *sk) 14 + { 15 + return 0; 16 + } 17 + 18 + SEC("?freplace") 19 + long might_sleep(struct pt_regs *ctx) 20 + { 21 + int i; 22 + 23 + bpf_copy_from_user(&i, sizeof(i), NULL); 24 + return i; 25 + } 26 + 27 + SEC("?freplace") 28 + long does_not_sleep(struct pt_regs *ctx) 14 29 { 15 30 return 0; 16 31 }

+424

tools/testing/selftests/bpf/progs/compute_live_registers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include "../../../include/linux/filter.h" 6 + #include "bpf_arena_common.h" 7 + #include "bpf_misc.h" 8 + 9 + struct { 10 + __uint(type, BPF_MAP_TYPE_ARRAY); 11 + __uint(max_entries, 1); 12 + __type(key, __u32); 13 + __type(value, __u64); 14 + } test_map SEC(".maps"); 15 + 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_ARENA); 18 + __uint(map_flags, BPF_F_MMAPABLE); 19 + __uint(max_entries, 1); 20 + } arena SEC(".maps"); 21 + 22 + SEC("socket") 23 + __log_level(2) 24 + __msg(" 0: .......... (b7) r0 = 42") 25 + __msg(" 1: 0......... (bf) r1 = r0") 26 + __msg(" 2: .1........ (bf) r2 = r1") 27 + __msg(" 3: ..2....... (bf) r3 = r2") 28 + __msg(" 4: ...3...... (bf) r4 = r3") 29 + __msg(" 5: ....4..... (bf) r5 = r4") 30 + __msg(" 6: .....5.... (bf) r6 = r5") 31 + __msg(" 7: ......6... (bf) r7 = r6") 32 + __msg(" 8: .......7.. (bf) r8 = r7") 33 + __msg(" 9: ........8. (bf) r9 = r8") 34 + __msg("10: .........9 (bf) r0 = r9") 35 + __msg("11: 0......... (95) exit") 36 + __naked void assign_chain(void) 37 + { 38 + asm volatile ( 39 + "r0 = 42;" 40 + "r1 = r0;" 41 + "r2 = r1;" 42 + "r3 = r2;" 43 + "r4 = r3;" 44 + "r5 = r4;" 45 + "r6 = r5;" 46 + "r7 = r6;" 47 + "r8 = r7;" 48 + "r9 = r8;" 49 + "r0 = r9;" 50 + "exit;" 51 + ::: __clobber_all); 52 + } 53 + 54 + SEC("socket") 55 + __log_level(2) 56 + __msg("0: .......... (b7) r1 = 7") 57 + __msg("1: .1........ (07) r1 += 7") 58 + __msg("2: .......... (b7) r2 = 7") 59 + __msg("3: ..2....... (b7) r3 = 42") 60 + __msg("4: ..23...... (0f) r2 += r3") 61 + __msg("5: .......... (b7) r0 = 0") 62 + __msg("6: 0......... (95) exit") 63 + __naked void arithmetics(void) 64 + { 65 + asm volatile ( 66 + "r1 = 7;" 67 + "r1 += 7;" 68 + "r2 = 7;" 69 + "r3 = 42;" 70 + "r2 += r3;" 71 + "r0 = 0;" 72 + "exit;" 73 + ::: __clobber_all); 74 + } 75 + 76 + #ifdef CAN_USE_BPF_ST 77 + SEC("socket") 78 + __log_level(2) 79 + __msg(" 1: .1........ (07) r1 += -8") 80 + __msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7") 81 + __msg(" 3: .1........ (b7) r2 = 42") 82 + __msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2") 83 + __msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2") 84 + __msg(" 6: .......... (b7) r0 = 0") 85 + __naked void store(void) 86 + { 87 + asm volatile ( 88 + "r1 = r10;" 89 + "r1 += -8;" 90 + "*(u64 *)(r1 +0) = 7;" 91 + "r2 = 42;" 92 + "*(u64 *)(r1 +0) = r2;" 93 + "*(u64 *)(r1 +0) = r2;" 94 + "r0 = 0;" 95 + "exit;" 96 + ::: __clobber_all); 97 + } 98 + #endif 99 + 100 + SEC("socket") 101 + __log_level(2) 102 + __msg("1: ....4..... (07) r4 += -8") 103 + __msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)") 104 + __msg("3: ....45.... (07) r4 += -8") 105 + __naked void load(void) 106 + { 107 + asm volatile ( 108 + "r4 = r10;" 109 + "r4 += -8;" 110 + "r5 = *(u64 *)(r4 +0);" 111 + "r4 += -8;" 112 + "r0 = r5;" 113 + "exit;" 114 + ::: __clobber_all); 115 + } 116 + 117 + SEC("socket") 118 + __log_level(2) 119 + __msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)") 120 + __msg("1: ..2....... (d4) r2 = le64 r2") 121 + __msg("2: ..2....... (bf) r0 = r2") 122 + __naked void endian(void) 123 + { 124 + asm volatile ( 125 + "r2 = *(u32 *)(r1 +0);" 126 + "r2 = le64 r2;" 127 + "r0 = r2;" 128 + "exit;" 129 + ::: __clobber_all); 130 + } 131 + 132 + SEC("socket") 133 + __log_level(2) 134 + __msg(" 8: 0......... (b7) r1 = 1") 135 + __msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)") 136 + __msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1") 137 + __msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)") 138 + __msg("12: 01........ (bf) r2 = r0") 139 + __msg("13: .12....... (bf) r0 = r1") 140 + __msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)") 141 + __naked void atomic(void) 142 + { 143 + asm volatile ( 144 + "r2 = r10;" 145 + "r2 += -8;" 146 + "r1 = 0;" 147 + "*(u64 *)(r2 +0) = r1;" 148 + "r1 = %[test_map] ll;" 149 + "call %[bpf_map_lookup_elem];" 150 + "if r0 == 0 goto 1f;" 151 + "r1 = 1;" 152 + "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);" 153 + ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */ 154 + "r1 = xchg_64(r0 + 0, r1);" 155 + "r2 = r0;" 156 + "r0 = r1;" 157 + "r0 = cmpxchg_64(r2 + 0, r0, r1);" 158 + "1: exit;" 159 + : 160 + : __imm(bpf_map_lookup_elem), 161 + __imm_addr(test_map), 162 + __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0)) 163 + : __clobber_all); 164 + } 165 + 166 + #ifdef CAN_USE_LOAD_ACQ_STORE_REL 167 + 168 + SEC("socket") 169 + __log_level(2) 170 + __msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)") 171 + __msg("3: .......... (bf) r3 = r10") 172 + __msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))") 173 + __naked void atomic_load_acq_store_rel(void) 174 + { 175 + asm volatile ( 176 + "r1 = 42;" 177 + "r2 = r10;" 178 + ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */ 179 + "r3 = r10;" 180 + ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */ 181 + "r0 = r4;" 182 + "exit;" 183 + : 184 + : __imm_insn(store_release_insn, 185 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)), 186 + __imm_insn(load_acquire_insn, 187 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8)) 188 + : __clobber_all); 189 + } 190 + 191 + #endif /* CAN_USE_LOAD_ACQ_STORE_REL */ 192 + 193 + SEC("socket") 194 + __log_level(2) 195 + __msg("4: .12....7.. (85) call bpf_trace_printk#6") 196 + __msg("5: 0......7.. (0f) r0 += r7") 197 + __naked void regular_call(void) 198 + { 199 + asm volatile ( 200 + "r7 = 1;" 201 + "r1 = r10;" 202 + "r1 += -8;" 203 + "r2 = 1;" 204 + "call %[bpf_trace_printk];" 205 + "r0 += r7;" 206 + "exit;" 207 + : 208 + : __imm(bpf_trace_printk) 209 + : __clobber_all); 210 + } 211 + 212 + SEC("socket") 213 + __log_level(2) 214 + __msg("2: 012....... (25) if r1 > 0x7 goto pc+1") 215 + __msg("3: ..2....... (bf) r0 = r2") 216 + __naked void if1(void) 217 + { 218 + asm volatile ( 219 + "r0 = 1;" 220 + "r2 = 2;" 221 + "if r1 > 0x7 goto +1;" 222 + "r0 = r2;" 223 + "exit;" 224 + ::: __clobber_all); 225 + } 226 + 227 + SEC("socket") 228 + __log_level(2) 229 + __msg("3: 0123...... (2d) if r1 > r3 goto pc+1") 230 + __msg("4: ..2....... (bf) r0 = r2") 231 + __naked void if2(void) 232 + { 233 + asm volatile ( 234 + "r0 = 1;" 235 + "r2 = 2;" 236 + "r3 = 7;" 237 + "if r1 > r3 goto +1;" 238 + "r0 = r2;" 239 + "exit;" 240 + ::: __clobber_all); 241 + } 242 + 243 + SEC("socket") 244 + __log_level(2) 245 + __msg("0: .......... (b7) r1 = 0") 246 + __msg("1: .1........ (b7) r2 = 7") 247 + __msg("2: .12....... (25) if r1 > 0x7 goto pc+4") 248 + __msg("3: .12....... (07) r1 += 1") 249 + __msg("4: .12....... (27) r2 *= 2") 250 + __msg("5: .12....... (05) goto pc+0") 251 + __msg("6: .12....... (05) goto pc-5") 252 + __msg("7: .......... (b7) r0 = 0") 253 + __msg("8: 0......... (95) exit") 254 + __naked void loop(void) 255 + { 256 + asm volatile ( 257 + "r1 = 0;" 258 + "r2 = 7;" 259 + "if r1 > 0x7 goto +4;" 260 + "r1 += 1;" 261 + "r2 *= 2;" 262 + "goto +0;" 263 + "goto -5;" 264 + "r0 = 0;" 265 + "exit;" 266 + : 267 + : __imm(bpf_trace_printk) 268 + : __clobber_all); 269 + } 270 + 271 + #ifdef CAN_USE_GOTOL 272 + SEC("socket") 273 + __log_level(2) 274 + __msg("2: .123...... (25) if r1 > 0x7 goto pc+2") 275 + __msg("3: ..2....... (bf) r0 = r2") 276 + __msg("4: 0......... (06) gotol pc+1") 277 + __msg("5: ...3...... (bf) r0 = r3") 278 + __msg("6: 0......... (95) exit") 279 + __naked void gotol(void) 280 + { 281 + asm volatile ( 282 + "r2 = 42;" 283 + "r3 = 24;" 284 + "if r1 > 0x7 goto +2;" 285 + "r0 = r2;" 286 + "gotol +1;" 287 + "r0 = r3;" 288 + "exit;" 289 + : 290 + : __imm(bpf_trace_printk) 291 + : __clobber_all); 292 + } 293 + #endif 294 + 295 + SEC("socket") 296 + __log_level(2) 297 + __msg("0: .......... (b7) r1 = 1") 298 + __msg("1: .1........ (e5) may_goto pc+1") 299 + __msg("2: .......... (05) goto pc-3") 300 + __msg("3: .1........ (bf) r0 = r1") 301 + __msg("4: 0......... (95) exit") 302 + __naked void may_goto(void) 303 + { 304 + asm volatile ( 305 + "1: r1 = 1;" 306 + ".8byte %[may_goto];" 307 + "goto 1b;" 308 + "r0 = r1;" 309 + "exit;" 310 + : 311 + : __imm(bpf_get_smp_processor_id), 312 + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) 313 + : __clobber_all); 314 + } 315 + 316 + SEC("socket") 317 + __log_level(2) 318 + __msg("1: 0......... (18) r2 = 0x7") 319 + __msg("3: 0.2....... (0f) r0 += r2") 320 + __naked void ldimm64(void) 321 + { 322 + asm volatile ( 323 + "r0 = 0;" 324 + "r2 = 0x7 ll;" 325 + "r0 += r2;" 326 + "exit;" 327 + : 328 + :: __clobber_all); 329 + } 330 + 331 + /* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */ 332 + SEC("socket") 333 + __log_level(2) 334 + __msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]") 335 + __msg("3: 012.456789 (0f) r7 += r0") 336 + __msg("4: 012.456789 (b7) r3 = 42") 337 + __msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]") 338 + __msg("6: 0......7.. (0f) r7 += r0") 339 + __naked void ldabs(void) 340 + { 341 + asm volatile ( 342 + "r6 = r1;" 343 + "r7 = 0;" 344 + "r0 = *(u8 *)skb[42];" 345 + "r7 += r0;" 346 + "r3 = 42;" 347 + ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */ 348 + "r7 += r0;" 349 + "r0 = r7;" 350 + "exit;" 351 + : 352 + : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0)) 353 + : __clobber_all); 354 + } 355 + 356 + 357 + #ifdef __BPF_FEATURE_ADDR_SPACE_CAST 358 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 359 + __log_level(2) 360 + __msg(" 6: .12345.... (85) call bpf_arena_alloc_pages") 361 + __msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)") 362 + __msg(" 8: .1........ (b7) r2 = 42") 363 + __naked void addr_space_cast(void) 364 + { 365 + asm volatile ( 366 + "r1 = %[arena] ll;" 367 + "r2 = 0;" 368 + "r3 = 1;" 369 + "r4 = 0;" 370 + "r5 = 0;" 371 + "call %[bpf_arena_alloc_pages];" 372 + "r1 = addr_space_cast(r0, 0, 1);" 373 + "r2 = 42;" 374 + "*(u64 *)(r1 +0) = r2;" 375 + "r0 = 0;" 376 + "exit;" 377 + : 378 + : __imm(bpf_arena_alloc_pages), 379 + __imm_addr(arena) 380 + : __clobber_all); 381 + } 382 + #endif 383 + 384 + static __used __naked int aux1(void) 385 + { 386 + asm volatile ( 387 + "r0 = r1;" 388 + "r0 += r2;" 389 + "exit;" 390 + ::: __clobber_all); 391 + } 392 + 393 + SEC("socket") 394 + __log_level(2) 395 + __msg("0: ....45.... (b7) r1 = 1") 396 + __msg("1: .1..45.... (b7) r2 = 2") 397 + __msg("2: .12.45.... (b7) r3 = 3") 398 + /* Conservative liveness for subprog parameters. */ 399 + __msg("3: .12345.... (85) call pc+2") 400 + __msg("4: .......... (b7) r0 = 0") 401 + __msg("5: 0......... (95) exit") 402 + __msg("6: .12....... (bf) r0 = r1") 403 + __msg("7: 0.2....... (0f) r0 += r2") 404 + /* Conservative liveness for subprog return value. */ 405 + __msg("8: 0......... (95) exit") 406 + __naked void subprog1(void) 407 + { 408 + asm volatile ( 409 + "r1 = 1;" 410 + "r2 = 2;" 411 + "r3 = 3;" 412 + "call aux1;" 413 + "r0 = 0;" 414 + "exit;" 415 + ::: __clobber_all); 416 + } 417 + 418 + /* to retain debug info for BTF generation */ 419 + void kfunc_root(void) 420 + { 421 + bpf_arena_alloc_pages(0, 0, 0, 0, 0); 422 + } 423 + 424 + char _license[] SEC("license") = "GPL";

+3 -1

tools/testing/selftests/bpf/progs/connect4_dropper.c

··· 13 13 #define VERDICT_REJECT 0 14 14 #define VERDICT_PROCEED 1 15 15 16 + int port; 17 + 16 18 SEC("cgroup/connect4") 17 19 int connect_v4_dropper(struct bpf_sock_addr *ctx) 18 20 { 19 21 if (ctx->type != SOCK_STREAM) 20 22 return VERDICT_PROCEED; 21 - if (ctx->user_port == bpf_htons(60120)) 23 + if (ctx->user_port == bpf_htons(port)) 22 24 return VERDICT_REJECT; 23 25 return VERDICT_PROCEED; 24 26 }

+10

tools/testing/selftests/bpf/progs/core_reloc_types.h

··· 347 347 */ 348 348 struct core_reloc_arrays_output { 349 349 int a2; 350 + int a3; 350 351 char b123; 351 352 int c1c; 352 353 int d00d; ··· 454 453 char b[2][3][4]; 455 454 struct core_reloc_arrays_substruct c[3]; 456 455 struct core_reloc_arrays_substruct d[1][2]; 456 + }; 457 + 458 + struct core_reloc_arrays___err_bad_signed_arr_elem_sz { 459 + /* int -> short (signed!): not supported case */ 460 + short a[5]; 461 + char b[2][3][4]; 462 + struct core_reloc_arrays_substruct c[3]; 463 + struct core_reloc_arrays_substruct d[1][2]; 464 + struct core_reloc_arrays_substruct f[][2]; 457 465 }; 458 466 459 467 /*

+1

tools/testing/selftests/bpf/progs/cpumask_common.h

··· 61 61 u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, 62 62 const struct cpumask *src2) __ksym __weak; 63 63 u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak; 64 + int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak; 64 65 65 66 void bpf_rcu_read_lock(void) __ksym __weak; 66 67 void bpf_rcu_read_unlock(void) __ksym __weak;

+38

tools/testing/selftests/bpf/progs/cpumask_failure.c

··· 222 222 223 223 return 0; 224 224 } 225 + 226 + SEC("tp_btf/task_newtask") 227 + __failure __msg("type=scalar expected=fp") 228 + int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags) 229 + { 230 + struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456; 231 + u64 bits; 232 + int ret; 233 + 234 + ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits)); 235 + if (!ret) 236 + err = 2; 237 + 238 + return 0; 239 + } 240 + 241 + SEC("tp_btf/task_newtask") 242 + __failure __msg("leads to invalid memory access") 243 + int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags) 244 + { 245 + void *garbage = (void *)0x123456; 246 + struct bpf_cpumask *local; 247 + int ret; 248 + 249 + local = create_cpumask(); 250 + if (!local) { 251 + err = 1; 252 + return 0; 253 + } 254 + 255 + ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8); 256 + if (!ret) 257 + err = 2; 258 + 259 + bpf_cpumask_release(local); 260 + 261 + return 0; 262 + }

+119 -1

tools/testing/selftests/bpf/progs/cpumask_success.c

··· 749 749 } 750 750 751 751 SEC("tp_btf/task_newtask") 752 - __success 753 752 int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) 754 753 { 755 754 struct bpf_cpumask *mask1, *mask2; ··· 769 770 bpf_cpumask_release(mask2); 770 771 return 0; 771 772 } 773 + 774 + SEC("tp_btf/task_newtask") 775 + int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags) 776 + { 777 + struct bpf_cpumask *local; 778 + u8 toofewbits; 779 + int ret; 780 + 781 + if (!is_test_task()) 782 + return 0; 783 + 784 + local = create_cpumask(); 785 + if (!local) 786 + return 0; 787 + 788 + /* The kfunc should prevent this operation */ 789 + ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits)); 790 + if (ret != -EACCES) 791 + err = 2; 792 + 793 + bpf_cpumask_release(local); 794 + 795 + return 0; 796 + } 797 + 798 + /* Mask is guaranteed to be large enough for bpf_cpumask_t. */ 799 + #define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t)) 800 + 801 + /* Add an extra word for the test_populate_reject_unaligned test. */ 802 + u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1]; 803 + extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak; 804 + 805 + SEC("tp_btf/task_newtask") 806 + int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags) 807 + { 808 + struct bpf_cpumask *mask; 809 + char *src; 810 + int ret; 811 + 812 + if (!is_test_task()) 813 + return 0; 814 + 815 + /* Skip if unaligned accesses are fine for this arch. */ 816 + if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 817 + return 0; 818 + 819 + mask = bpf_cpumask_create(); 820 + if (!mask) { 821 + err = 1; 822 + return 0; 823 + } 824 + 825 + /* Misalign the source array by a byte. */ 826 + src = &((char *)bits)[1]; 827 + 828 + ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN); 829 + if (ret != -EINVAL) 830 + err = 2; 831 + 832 + bpf_cpumask_release(mask); 833 + 834 + return 0; 835 + } 836 + 837 + 838 + SEC("tp_btf/task_newtask") 839 + int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags) 840 + { 841 + struct bpf_cpumask *mask; 842 + bool bit; 843 + int ret; 844 + int i; 845 + 846 + if (!is_test_task()) 847 + return 0; 848 + 849 + /* Set only odd bits. */ 850 + __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN); 851 + 852 + mask = bpf_cpumask_create(); 853 + if (!mask) { 854 + err = 1; 855 + return 0; 856 + } 857 + 858 + /* Pass the entire bits array, the kfunc will only copy the valid bits. */ 859 + ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN); 860 + if (ret) { 861 + err = 2; 862 + goto out; 863 + } 864 + 865 + /* 866 + * Test is there to appease the verifier. We cannot directly 867 + * access NR_CPUS, the upper bound for nr_cpus, so we infer 868 + * it from the size of cpumask_t. 869 + */ 870 + if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) { 871 + err = 3; 872 + goto out; 873 + } 874 + 875 + bpf_for(i, 0, nr_cpus) { 876 + /* Odd-numbered bits should be set, even ones unset. */ 877 + bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask); 878 + if (bit == (i % 2 != 0)) 879 + continue; 880 + 881 + err = 4; 882 + break; 883 + } 884 + 885 + out: 886 + bpf_cpumask_release(mask); 887 + 888 + return 0; 889 + } 890 + 891 + #undef CPUMASK_TEST_MASKLEN

+118 -5

tools/testing/selftests/bpf/progs/dynptr_success.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2022 Facebook */ 3 3 4 + #include <vmlinux.h> 4 5 #include <string.h> 5 6 #include <stdbool.h> 6 - #include <linux/bpf.h> 7 7 #include <bpf/bpf_helpers.h> 8 8 #include <bpf/bpf_tracing.h> 9 9 #include "bpf_misc.h" 10 - #include "bpf_kfuncs.h" 11 10 #include "errno.h" 12 11 13 12 char _license[] SEC("license") = "GPL"; 14 13 15 14 int pid, err, val; 16 15 17 - struct sample { 16 + struct ringbuf_sample { 18 17 int pid; 19 18 int seq; 20 19 long value; ··· 120 121 121 122 static int ringbuf_callback(__u32 index, void *data) 122 123 { 123 - struct sample *sample; 124 + struct ringbuf_sample *sample; 124 125 125 126 struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; 126 127 ··· 137 138 int test_ringbuf(void *ctx) 138 139 { 139 140 struct bpf_dynptr ptr; 140 - struct sample *sample; 141 + struct ringbuf_sample *sample; 141 142 142 143 if (bpf_get_current_pid_tgid() >> 32 != pid) 143 144 return 0; ··· 565 566 } 566 567 567 568 return 1; 569 + } 570 + 571 + static inline int bpf_memcmp(const char *a, const char *b, u32 size) 572 + { 573 + int i; 574 + 575 + bpf_for(i, 0, size) { 576 + if (a[i] != b[i]) 577 + return a[i] < b[i] ? -1 : 1; 578 + } 579 + return 0; 580 + } 581 + 582 + SEC("?tp/syscalls/sys_enter_nanosleep") 583 + int test_dynptr_copy(void *ctx) 584 + { 585 + char data[] = "hello there, world!!"; 586 + char buf[32] = {'\0'}; 587 + __u32 sz = sizeof(data); 588 + struct bpf_dynptr src, dst; 589 + 590 + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src); 591 + bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst); 592 + 593 + /* Test basic case of copying contiguous memory backed dynptrs */ 594 + err = bpf_dynptr_write(&src, 0, data, sz, 0); 595 + err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz); 596 + err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0); 597 + err = err ?: bpf_memcmp(data, buf, sz); 598 + 599 + /* Test that offsets are handled correctly */ 600 + err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5); 601 + err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0); 602 + err = err ?: bpf_memcmp(data + 5, buf, sz - 5); 603 + 604 + bpf_ringbuf_discard_dynptr(&src, 0); 605 + bpf_ringbuf_discard_dynptr(&dst, 0); 606 + return 0; 607 + } 608 + 609 + SEC("xdp") 610 + int test_dynptr_copy_xdp(struct xdp_md *xdp) 611 + { 612 + struct bpf_dynptr ptr_buf, ptr_xdp; 613 + char data[] = "qwertyuiopasdfghjkl"; 614 + char buf[32] = {'\0'}; 615 + __u32 len = sizeof(data); 616 + int i, chunks = 200; 617 + 618 + /* ptr_xdp is backed by non-contiguous memory */ 619 + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); 620 + bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); 621 + 622 + /* Destination dynptr is backed by non-contiguous memory */ 623 + bpf_for(i, 0, chunks) { 624 + err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0); 625 + if (err) 626 + goto out; 627 + } 628 + 629 + err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks); 630 + if (err) 631 + goto out; 632 + 633 + bpf_for(i, 0, chunks) { 634 + __builtin_memset(buf, 0, sizeof(buf)); 635 + err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0); 636 + if (err) 637 + goto out; 638 + if (bpf_memcmp(data, buf, len) != 0) 639 + goto out; 640 + } 641 + 642 + /* Source dynptr is backed by non-contiguous memory */ 643 + __builtin_memset(buf, 0, sizeof(buf)); 644 + bpf_for(i, 0, chunks) { 645 + err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0); 646 + if (err) 647 + goto out; 648 + } 649 + 650 + err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks); 651 + if (err) 652 + goto out; 653 + 654 + bpf_for(i, 0, chunks) { 655 + __builtin_memset(buf, 0, sizeof(buf)); 656 + err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0); 657 + if (err) 658 + goto out; 659 + if (bpf_memcmp(data, buf, len) != 0) 660 + goto out; 661 + } 662 + 663 + /* Both source and destination dynptrs are backed by non-contiguous memory */ 664 + err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1)); 665 + if (err) 666 + goto out; 667 + 668 + bpf_for(i, 0, chunks - 1) { 669 + __builtin_memset(buf, 0, sizeof(buf)); 670 + err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0); 671 + if (err) 672 + goto out; 673 + if (bpf_memcmp(data, buf, len) != 0) 674 + goto out; 675 + } 676 + 677 + if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) 678 + err = 1; 679 + 680 + out: 681 + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); 682 + return XDP_DROP; 568 683 }

+15

tools/testing/selftests/bpf/progs/fexit_noreturns.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_tracing.h> 6 + #include "bpf_misc.h" 7 + 8 + char _license[] SEC("license") = "GPL"; 9 + 10 + SEC("fexit/do_exit") 11 + __failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.") 12 + int BPF_PROG(noreturns) 13 + { 14 + return 0; 15 + }

+70 -1

tools/testing/selftests/bpf/progs/irq.c

··· 222 222 } 223 223 224 224 SEC("?tc") 225 - __failure __msg("global function calls are not allowed with IRQs disabled") 225 + __success 226 226 int irq_global_subprog(struct __sk_buff *ctx) 227 227 { 228 228 unsigned long flags; ··· 438 438 bpf_local_irq_restore(&flags[1]); 439 439 bpf_local_irq_restore(&flags[2]); 440 440 bpf_local_irq_restore(&flags[0]); 441 + return 0; 442 + } 443 + 444 + int __noinline 445 + global_subprog(int i) 446 + { 447 + if (i) 448 + bpf_printk("%p", &i); 449 + return i; 450 + } 451 + 452 + int __noinline 453 + global_sleepable_helper_subprog(int i) 454 + { 455 + if (i) 456 + bpf_copy_from_user(&i, sizeof(i), NULL); 457 + return i; 458 + } 459 + 460 + int __noinline 461 + global_sleepable_kfunc_subprog(int i) 462 + { 463 + if (i) 464 + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); 465 + global_subprog(i); 466 + return i; 467 + } 468 + 469 + int __noinline 470 + global_subprog_calling_sleepable_global(int i) 471 + { 472 + if (!i) 473 + global_sleepable_kfunc_subprog(i); 474 + return i; 475 + } 476 + 477 + SEC("?syscall") 478 + __success 479 + int irq_non_sleepable_global_subprog(void *ctx) 480 + { 481 + unsigned long flags; 482 + 483 + bpf_local_irq_save(&flags); 484 + global_subprog(0); 485 + bpf_local_irq_restore(&flags); 486 + return 0; 487 + } 488 + 489 + SEC("?syscall") 490 + __failure __msg("global functions that may sleep are not allowed in non-sleepable context") 491 + int irq_sleepable_helper_global_subprog(void *ctx) 492 + { 493 + unsigned long flags; 494 + 495 + bpf_local_irq_save(&flags); 496 + global_sleepable_helper_subprog(0); 497 + bpf_local_irq_restore(&flags); 498 + return 0; 499 + } 500 + 501 + SEC("?syscall") 502 + __failure __msg("global functions that may sleep are not allowed in non-sleepable context") 503 + int irq_sleepable_global_subprog_indirect(void *ctx) 504 + { 505 + unsigned long flags; 506 + 507 + bpf_local_irq_save(&flags); 508 + global_subprog_calling_sleepable_global(0); 509 + bpf_local_irq_restore(&flags); 441 510 return 0; 442 511 } 443 512

+139

tools/testing/selftests/bpf/progs/iters.c

··· 7 7 #include "bpf_misc.h" 8 8 #include "bpf_compiler.h" 9 9 10 + #define unlikely(x) __builtin_expect(!!(x), 0) 11 + 10 12 static volatile int zero = 0; 11 13 12 14 int my_pid; ··· 1177 1175 } 1178 1176 1179 1177 SEC("?raw_tp") 1178 + __failure 1179 + __msg("math between fp pointer and register with unbounded") 1180 + __flag(BPF_F_TEST_STATE_FREQ) 1181 + __naked int loop_state_deps3(void) 1182 + { 1183 + /* This is equivalent to a C program below. 1184 + * 1185 + * if (random() != 24) { // assume false branch is placed first 1186 + * i = iter_new(); // fp[-8] 1187 + * while (iter_next(i)); 1188 + * iter_destroy(i); 1189 + * return; 1190 + * } 1191 + * 1192 + * for (i = 10; i > 0; i--); // increase dfs_depth for child states 1193 + * 1194 + * i = iter_new(); // fp[-8] 1195 + * b = -24; // r8 1196 + * for (;;) { // checkpoint (L) 1197 + * if (iter_next(i)) // checkpoint (N) 1198 + * break; 1199 + * if (random() == 77) { // assume false branch is placed first 1200 + * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25 1201 + * iter_destroy(i); 1202 + * return; 1203 + * } 1204 + * if (random() == 42) { // assume false branch is placed first 1205 + * b = -25; 1206 + * } 1207 + * } 1208 + * iter_destroy(i); 1209 + * 1210 + * In case of a buggy verifier first loop might poison 1211 + * env->cur_state->loop_entry with a state having 0 branches 1212 + * and small dfs_depth. This would trigger NOT_EXACT states 1213 + * comparison for some states within second loop. 1214 + * Specifically, checkpoint (L) might be problematic if: 1215 + * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet; 1216 + * - checkpoint (L) is first reached in state {b=-24}; 1217 + * - traversal is pruned at checkpoint (N) setting checkpoint's (L) 1218 + * branch count to 0, thus making it eligible for use in pruning; 1219 + * - checkpoint (L) is next reached in state {b=-25}, 1220 + * this would cause NOT_EXACT comparison with a state {b=-24} 1221 + * while 'b' is not marked precise yet. 1222 + */ 1223 + asm volatile ( 1224 + "call %[bpf_get_prandom_u32];" 1225 + "if r0 == 24 goto 2f;" 1226 + "r1 = r10;" 1227 + "r1 += -8;" 1228 + "r2 = 0;" 1229 + "r3 = 5;" 1230 + "call %[bpf_iter_num_new];" 1231 + "1:" 1232 + "r1 = r10;" 1233 + "r1 += -8;" 1234 + "call %[bpf_iter_num_next];" 1235 + "if r0 != 0 goto 1b;" 1236 + "r1 = r10;" 1237 + "r1 += -8;" 1238 + "call %[bpf_iter_num_destroy];" 1239 + "r0 = 0;" 1240 + "exit;" 1241 + "2:" 1242 + /* loop to increase dfs_depth */ 1243 + "r0 = 10;" 1244 + "3:" 1245 + "r0 -= 1;" 1246 + "if r0 != 0 goto 3b;" 1247 + /* end of loop */ 1248 + "r1 = r10;" 1249 + "r1 += -8;" 1250 + "r2 = 0;" 1251 + "r3 = 10;" 1252 + "call %[bpf_iter_num_new];" 1253 + "r8 = -24;" 1254 + "main_loop_%=:" 1255 + "r1 = r10;" 1256 + "r1 += -8;" 1257 + "call %[bpf_iter_num_next];" 1258 + "if r0 == 0 goto main_loop_end_%=;" 1259 + /* first if */ 1260 + "call %[bpf_get_prandom_u32];" 1261 + "if r0 == 77 goto unsafe_write_%=;" 1262 + /* second if */ 1263 + "call %[bpf_get_prandom_u32];" 1264 + "if r0 == 42 goto poison_r8_%=;" 1265 + /* iterate */ 1266 + "goto main_loop_%=;" 1267 + "main_loop_end_%=:" 1268 + "r1 = r10;" 1269 + "r1 += -8;" 1270 + "call %[bpf_iter_num_destroy];" 1271 + "r0 = 0;" 1272 + "exit;" 1273 + 1274 + "unsafe_write_%=:" 1275 + "r0 = r10;" 1276 + "r0 += r8;" 1277 + "r1 = 7;" 1278 + "*(u64 *)(r0 + 0) = r1;" 1279 + "goto main_loop_end_%=;" 1280 + 1281 + "poison_r8_%=:" 1282 + "r8 = -25;" 1283 + "goto main_loop_%=;" 1284 + : 1285 + : __imm(bpf_get_prandom_u32), 1286 + __imm(bpf_iter_num_new), 1287 + __imm(bpf_iter_num_next), 1288 + __imm(bpf_iter_num_destroy) 1289 + : __clobber_all 1290 + ); 1291 + } 1292 + 1293 + SEC("?raw_tp") 1180 1294 __success 1181 1295 __naked int triple_continue(void) 1182 1296 { ··· 1627 1509 int iter_destroy_bad_arg(const void *ctx) 1628 1510 { 1629 1511 bpf_iter_num_destroy(&global_it); 1512 + return 0; 1513 + } 1514 + 1515 + SEC("raw_tp") 1516 + __success 1517 + int clean_live_states(const void *ctx) 1518 + { 1519 + char buf[1]; 1520 + int i, j, k, l, m, n, o; 1521 + 1522 + bpf_for(i, 0, 10) 1523 + bpf_for(j, 0, 10) 1524 + bpf_for(k, 0, 10) 1525 + bpf_for(l, 0, 10) 1526 + bpf_for(m, 0, 10) 1527 + bpf_for(n, 0, 10) 1528 + bpf_for(o, 0, 10) { 1529 + if (unlikely(bpf_get_prandom_u32())) 1530 + buf[0] = 42; 1531 + bpf_printk("%s", buf); 1532 + } 1630 1533 return 0; 1631 1534 } 1632 1535

+9

tools/testing/selftests/bpf/progs/netns_cookie_prog.c

··· 28 28 } sock_map SEC(".maps"); 29 29 30 30 int tcx_init_netns_cookie, tcx_netns_cookie; 31 + int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie; 31 32 32 33 SEC("sockops") 33 34 int get_netns_cookie_sockops(struct bpf_sock_ops *ctx) ··· 90 89 tcx_init_netns_cookie = bpf_get_netns_cookie(NULL); 91 90 tcx_netns_cookie = bpf_get_netns_cookie(skb); 92 91 return TCX_PASS; 92 + } 93 + 94 + SEC("cgroup_skb/ingress") 95 + int get_netns_cookie_cgroup_skb(struct __sk_buff *skb) 96 + { 97 + cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL); 98 + cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb); 99 + return SK_PASS; 93 100 } 94 101 95 102 char _license[] SEC("license") = "GPL";

+67 -1

tools/testing/selftests/bpf/progs/preempt_lock.c

··· 134 134 } 135 135 136 136 SEC("?tc") 137 - __failure __msg("global function calls are not allowed with preemption disabled") 137 + __success 138 138 int preempt_global_subprog_test(struct __sk_buff *ctx) 139 139 { 140 140 preempt_disable(); 141 141 preempt_global_subprog(); 142 + preempt_enable(); 143 + return 0; 144 + } 145 + 146 + int __noinline 147 + global_subprog(int i) 148 + { 149 + if (i) 150 + bpf_printk("%p", &i); 151 + return i; 152 + } 153 + 154 + int __noinline 155 + global_sleepable_helper_subprog(int i) 156 + { 157 + if (i) 158 + bpf_copy_from_user(&i, sizeof(i), NULL); 159 + return i; 160 + } 161 + 162 + int __noinline 163 + global_sleepable_kfunc_subprog(int i) 164 + { 165 + if (i) 166 + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); 167 + global_subprog(i); 168 + return i; 169 + } 170 + 171 + int __noinline 172 + global_subprog_calling_sleepable_global(int i) 173 + { 174 + if (!i) 175 + global_sleepable_kfunc_subprog(i); 176 + return i; 177 + } 178 + 179 + SEC("?syscall") 180 + __failure __msg("global functions that may sleep are not allowed in non-sleepable context") 181 + int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx) 182 + { 183 + preempt_disable(); 184 + if (ctx->mark) 185 + global_sleepable_helper_subprog(ctx->mark); 186 + preempt_enable(); 187 + return 0; 188 + } 189 + 190 + SEC("?syscall") 191 + __failure __msg("global functions that may sleep are not allowed in non-sleepable context") 192 + int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) 193 + { 194 + preempt_disable(); 195 + if (ctx->mark) 196 + global_sleepable_kfunc_subprog(ctx->mark); 197 + preempt_enable(); 198 + return 0; 199 + } 200 + 201 + SEC("?syscall") 202 + __failure __msg("global functions that may sleep are not allowed in non-sleepable context") 203 + int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx) 204 + { 205 + preempt_disable(); 206 + if (ctx->mark) 207 + global_subprog_calling_sleepable_global(ctx->mark); 142 208 preempt_enable(); 143 209 return 0; 144 210 }

+28

tools/testing/selftests/bpf/progs/prepare.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta */ 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_helpers.h> 5 + //#include <bpf/bpf_tracing.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + int err; 10 + 11 + struct { 12 + __uint(type, BPF_MAP_TYPE_RINGBUF); 13 + __uint(max_entries, 4096); 14 + } ringbuf SEC(".maps"); 15 + 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_ARRAY); 18 + __uint(max_entries, 1); 19 + __type(key, __u32); 20 + __type(value, __u32); 21 + } array_map SEC(".maps"); 22 + 23 + SEC("cgroup_skb/egress") 24 + int program(struct __sk_buff *skb) 25 + { 26 + err = 0; 27 + return 0; 28 + }

+13

tools/testing/selftests/bpf/progs/priv_freplace_prog.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + SEC("freplace/xdp_prog1") 10 + int new_xdp_prog2(struct xdp_md *xd) 11 + { 12 + return XDP_DROP; 13 + }

+3 -3

tools/testing/selftests/bpf/progs/priv_prog.c

··· 6 6 7 7 char _license[] SEC("license") = "GPL"; 8 8 9 - SEC("kprobe") 10 - int kprobe_prog(void *ctx) 9 + SEC("xdp") 10 + int xdp_prog1(struct xdp_md *xdp) 11 11 { 12 - return 1; 12 + return XDP_DROP; 13 13 }

+88

tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_tracing.h> 6 + #include "bpf_misc.h" 7 + #include "../test_kmods/bpf_testmod.h" 8 + #include "../test_kmods/bpf_testmod_kfunc.h" 9 + 10 + char _license[] SEC("license") = "GPL"; 11 + 12 + void __kfunc_btf_root(void) 13 + { 14 + bpf_kfunc_st_ops_inc10(NULL); 15 + } 16 + 17 + static __noinline __used int subprog(struct st_ops_args *args) 18 + { 19 + args->a += 1; 20 + return args->a; 21 + } 22 + 23 + __success 24 + /* prologue */ 25 + __xlated("0: r8 = r1") 26 + __xlated("1: r1 = 0") 27 + __xlated("2: call kernel-function") 28 + __xlated("3: if r0 != 0x0 goto pc+5") 29 + __xlated("4: r6 = *(u64 *)(r8 +0)") 30 + __xlated("5: r7 = *(u64 *)(r6 +0)") 31 + __xlated("6: r7 += 1000") 32 + __xlated("7: *(u64 *)(r6 +0) = r7") 33 + __xlated("8: goto pc+2") 34 + __xlated("9: r1 = r0") 35 + __xlated("10: call kernel-function") 36 + __xlated("11: r1 = r8") 37 + /* save __u64 *ctx to stack */ 38 + __xlated("12: *(u64 *)(r10 -8) = r1") 39 + /* main prog */ 40 + __xlated("13: r1 = *(u64 *)(r1 +0)") 41 + __xlated("14: r6 = r1") 42 + __xlated("15: call kernel-function") 43 + __xlated("16: r1 = r6") 44 + __xlated("17: call pc+") 45 + /* epilogue */ 46 + __xlated("18: r1 = 0") 47 + __xlated("19: r6 = 0") 48 + __xlated("20: call kernel-function") 49 + __xlated("21: if r0 != 0x0 goto pc+6") 50 + __xlated("22: r1 = *(u64 *)(r10 -8)") 51 + __xlated("23: r1 = *(u64 *)(r1 +0)") 52 + __xlated("24: r6 = *(u64 *)(r1 +0)") 53 + __xlated("25: r6 += 10000") 54 + __xlated("26: *(u64 *)(r1 +0) = r6") 55 + __xlated("27: goto pc+2") 56 + __xlated("28: r1 = r0") 57 + __xlated("29: call kernel-function") 58 + __xlated("30: r0 = r6") 59 + __xlated("31: r0 *= 2") 60 + __xlated("32: exit") 61 + SEC("struct_ops/test_pro_epilogue") 62 + __naked int test_kfunc_pro_epilogue(void) 63 + { 64 + asm volatile ( 65 + "r1 = *(u64 *)(r1 +0);" 66 + "r6 = r1;" 67 + "call %[bpf_kfunc_st_ops_inc10];" 68 + "r1 = r6;" 69 + "call subprog;" 70 + "exit;" 71 + : 72 + : __imm(bpf_kfunc_st_ops_inc10) 73 + : __clobber_all); 74 + } 75 + 76 + SEC("syscall") 77 + __retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */ 78 + int syscall_pro_epilogue(void *ctx) 79 + { 80 + struct st_ops_args args = {}; 81 + 82 + return bpf_kfunc_st_ops_test_pro_epilogue(&args); 83 + } 84 + 85 + SEC(".struct_ops.link") 86 + struct bpf_testmod_st_ops pro_epilogue_with_kfunc = { 87 + .test_pro_epilogue = (void *)test_kfunc_pro_epilogue, 88 + };

+60 -1

tools/testing/selftests/bpf/progs/rcu_read_lock.c

··· 242 242 } 243 243 244 244 SEC("?lsm.s/bpf") 245 - int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) 245 + int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size, 246 + bool kernel) 246 247 { 247 248 struct bpf_key *bkey; 248 249 ··· 438 437 439 438 bpf_rcu_read_lock(); 440 439 ret += global_subprog_unlock(ret); 440 + return 0; 441 + } 442 + 443 + int __noinline 444 + global_sleepable_helper_subprog(int i) 445 + { 446 + if (i) 447 + bpf_copy_from_user(&i, sizeof(i), NULL); 448 + return i; 449 + } 450 + 451 + int __noinline 452 + global_sleepable_kfunc_subprog(int i) 453 + { 454 + if (i) 455 + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); 456 + global_subprog(i); 457 + return i; 458 + } 459 + 460 + int __noinline 461 + global_subprog_calling_sleepable_global(int i) 462 + { 463 + if (!i) 464 + global_sleepable_kfunc_subprog(i); 465 + return i; 466 + } 467 + 468 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 469 + int rcu_read_lock_sleepable_helper_global_subprog(void *ctx) 470 + { 471 + volatile int ret = 0; 472 + 473 + bpf_rcu_read_lock(); 474 + ret += global_sleepable_helper_subprog(ret); 475 + bpf_rcu_read_unlock(); 476 + return 0; 477 + } 478 + 479 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 480 + int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx) 481 + { 482 + volatile int ret = 0; 483 + 484 + bpf_rcu_read_lock(); 485 + ret += global_sleepable_kfunc_subprog(ret); 486 + bpf_rcu_read_unlock(); 487 + return 0; 488 + } 489 + 490 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 491 + int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx) 492 + { 493 + volatile int ret = 0; 494 + 495 + bpf_rcu_read_lock(); 496 + ret += global_subprog_calling_sleepable_global(ret); 497 + bpf_rcu_read_unlock(); 441 498 return 0; 442 499 }

+9 -2

tools/testing/selftests/bpf/progs/read_vsyscall.c

··· 8 8 9 9 int target_pid = 0; 10 10 void *user_ptr = 0; 11 - int read_ret[9]; 11 + int read_ret[10]; 12 12 13 13 char _license[] SEC("license") = "GPL"; 14 14 15 15 /* 16 - * This is the only kfunc, the others are helpers 16 + * These are the kfuncs, the others are helpers 17 17 */ 18 18 int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym; 19 + int bpf_copy_from_user_task_str(void *dst, u32, const void *, 20 + struct task_struct *, u64) __weak __ksym; 19 21 20 22 SEC("fentry/" SYS_PREFIX "sys_nanosleep") 21 23 int do_probe_read(void *ctx) ··· 49 47 read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, 50 48 bpf_get_current_task_btf(), 0); 51 49 read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0); 50 + read_ret[9] = bpf_copy_from_user_task_str((char *)buf, 51 + sizeof(buf), 52 + user_ptr, 53 + bpf_get_current_task_btf(), 54 + 0); 52 55 53 56 return 0; 54 57 }

+47

tools/testing/selftests/bpf/progs/set_global_vars.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include "bpf_experimental.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include "bpf_misc.h" 6 + #include <stdbool.h> 7 + 8 + char _license[] SEC("license") = "GPL"; 9 + 10 + enum Enum { EA1 = 0, EA2 = 11 }; 11 + enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; 12 + enum Enums64 { EC1 = 0ll, EC2 = 13ll }; 13 + 14 + const volatile __s64 var_s64 = -1; 15 + const volatile __u64 var_u64 = 0; 16 + const volatile __s32 var_s32 = -1; 17 + const volatile __u32 var_u32 = 0; 18 + const volatile __s16 var_s16 = -1; 19 + const volatile __u16 var_u16 = 0; 20 + const volatile __s8 var_s8 = -1; 21 + const volatile __u8 var_u8 = 0; 22 + const volatile enum Enum var_ea = EA1; 23 + const volatile enum Enumu64 var_eb = EB1; 24 + const volatile enum Enums64 var_ec = EC1; 25 + const volatile bool var_b = false; 26 + 27 + char arr[4] = {0}; 28 + 29 + SEC("socket") 30 + int test_set_globals(void *ctx) 31 + { 32 + volatile __s8 a; 33 + 34 + a = var_s64; 35 + a = var_u64; 36 + a = var_s32; 37 + a = var_u32; 38 + a = var_s16; 39 + a = var_u16; 40 + a = var_s8; 41 + a = var_u8; 42 + a = var_ea; 43 + a = var_eb; 44 + a = var_ec; 45 + a = var_b; 46 + return a; 47 + }

+4 -1

tools/testing/selftests/bpf/progs/strncmp_bench.c

··· 35 35 SEC("tp/syscalls/sys_enter_getpgid") 36 36 int strncmp_no_helper(void *ctx) 37 37 { 38 - if (local_strncmp(str, cmp_str_len + 1, target) < 0) 38 + const char *target_str = target; 39 + 40 + barrier_var(target_str); 41 + if (local_strncmp(str, cmp_str_len + 1, target_str) < 0) 39 42 __sync_add_and_fetch(&hits, 1); 40 43 return 0; 41 44 }

+30

tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + void bpf_task_release(struct task_struct *p) __ksym; 9 + 10 + /* This test struct_ops BPF programs returning referenced kptr. The verifier should 11 + * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task 12 + * here is acquried automatically as the task argument is tagged with "__ref". 13 + */ 14 + SEC("struct_ops/test_return_ref_kptr") 15 + struct task_struct *BPF_PROG(kptr_return, int dummy, 16 + struct task_struct *task, struct cgroup *cgrp) 17 + { 18 + if (dummy % 2) { 19 + bpf_task_release(task); 20 + return NULL; 21 + } 22 + return task; 23 + } 24 + 25 + SEC(".struct_ops.link") 26 + struct bpf_testmod_ops testmod_kptr_return = { 27 + .test_return_ref_kptr = (void *)kptr_return, 28 + }; 29 + 30 +

+26

tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; 9 + void bpf_task_release(struct task_struct *p) __ksym; 10 + 11 + /* This test struct_ops BPF programs returning referenced kptr. The verifier should 12 + * reject programs returning a non-zero scalar value. 13 + */ 14 + SEC("struct_ops/test_return_ref_kptr") 15 + __failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]") 16 + struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy, 17 + struct task_struct *task, struct cgroup *cgrp) 18 + { 19 + bpf_task_release(task); 20 + return (struct task_struct *)1; 21 + } 22 + 23 + SEC(".struct_ops.link") 24 + struct bpf_testmod_ops testmod_kptr_return = { 25 + .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar, 26 + };

+34

tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_experimental.h" 5 + #include "bpf_misc.h" 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; 10 + void bpf_task_release(struct task_struct *p) __ksym; 11 + 12 + /* This test struct_ops BPF programs returning referenced kptr. The verifier should 13 + * reject programs returning a local kptr. 14 + */ 15 + SEC("struct_ops/test_return_ref_kptr") 16 + __failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)") 17 + struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy, 18 + struct task_struct *task, struct cgroup *cgrp) 19 + { 20 + struct task_struct *t; 21 + 22 + bpf_task_release(task); 23 + 24 + t = bpf_obj_new(typeof(*task)); 25 + if (!t) 26 + return NULL; 27 + 28 + return t; 29 + } 30 + 31 + SEC(".struct_ops.link") 32 + struct bpf_testmod_ops testmod_kptr_return = { 33 + .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr, 34 + };

+25

tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; 9 + void bpf_task_release(struct task_struct *p) __ksym; 10 + 11 + /* This test struct_ops BPF programs returning referenced kptr. The verifier should 12 + * reject programs returning a modified referenced kptr. 13 + */ 14 + SEC("struct_ops/test_return_ref_kptr") 15 + __failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed") 16 + struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy, 17 + struct task_struct *task, struct cgroup *cgrp) 18 + { 19 + return (struct task_struct *)&task->jobctl; 20 + } 21 + 22 + SEC(".struct_ops.link") 23 + struct bpf_testmod_ops testmod_kptr_return = { 24 + .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset, 25 + };

+30

tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; 9 + void bpf_task_release(struct task_struct *p) __ksym; 10 + 11 + /* This test struct_ops BPF programs returning referenced kptr. The verifier should 12 + * reject programs returning a referenced kptr of the wrong type. 13 + */ 14 + SEC("struct_ops/test_return_ref_kptr") 15 + __failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)") 16 + struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy, 17 + struct task_struct *task, struct cgroup *cgrp) 18 + { 19 + struct task_struct *ret; 20 + 21 + ret = (struct task_struct *)bpf_cgroup_acquire(cgrp); 22 + bpf_task_release(task); 23 + 24 + return ret; 25 + } 26 + 27 + SEC(".struct_ops.link") 28 + struct bpf_testmod_ops testmod_kptr_return = { 29 + .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type, 30 + };

+31

tools/testing/selftests/bpf/progs/struct_ops_refcounted.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + __attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym; 9 + 10 + /* This is a test BPF program that uses struct_ops to access a referenced 11 + * kptr argument. This is a test for the verifier to ensure that it 12 + * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and 13 + * 2) the same reference can be acquired from multiple paths as long as it 14 + * has not been released. 15 + */ 16 + SEC("struct_ops/test_refcounted") 17 + int BPF_PROG(refcounted, int dummy, struct task_struct *task) 18 + { 19 + if (dummy == 1) 20 + bpf_task_release(task); 21 + else 22 + bpf_task_release(task); 23 + return 0; 24 + } 25 + 26 + SEC(".struct_ops.link") 27 + struct bpf_testmod_ops testmod_refcounted = { 28 + .test_refcounted = (void *)refcounted, 29 + }; 30 + 31 +

+39

tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + extern void bpf_task_release(struct task_struct *p) __ksym; 9 + 10 + __noinline int subprog_release(__u64 *ctx __arg_ctx) 11 + { 12 + struct task_struct *task = (struct task_struct *)ctx[1]; 13 + int dummy = (int)ctx[0]; 14 + 15 + bpf_task_release(task); 16 + 17 + return dummy + 1; 18 + } 19 + 20 + /* Test that the verifier rejects a program that contains a global 21 + * subprogram with referenced kptr arguments 22 + */ 23 + SEC("struct_ops/test_refcounted") 24 + __failure __log_level(2) 25 + __msg("Validating subprog_release() func#1...") 26 + __msg("invalid bpf_context access off=8. Reference may already be released") 27 + int refcounted_fail__global_subprog(unsigned long long *ctx) 28 + { 29 + struct task_struct *task = (struct task_struct *)ctx[1]; 30 + 31 + bpf_task_release(task); 32 + 33 + return subprog_release(ctx); 34 + } 35 + 36 + SEC(".struct_ops.link") 37 + struct bpf_testmod_ops testmod_ref_acquire = { 38 + .test_refcounted = (void *)refcounted_fail__global_subprog, 39 + };

+22

tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c

··· 1 + #include <vmlinux.h> 2 + #include <bpf/bpf_tracing.h> 3 + #include "../test_kmods/bpf_testmod.h" 4 + #include "bpf_misc.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + /* Test that the verifier rejects a program that acquires a referenced 9 + * kptr through context without releasing the reference 10 + */ 11 + SEC("struct_ops/test_refcounted") 12 + __failure __msg("Unreleased reference id=1 alloc_insn=0") 13 + int BPF_PROG(refcounted_fail__ref_leak, int dummy, 14 + struct task_struct *task) 15 + { 16 + return 0; 17 + } 18 + 19 + SEC(".struct_ops.link") 20 + struct bpf_testmod_ops testmod_ref_acquire = { 21 + .test_refcounted = (void *)refcounted_fail__ref_leak, 22 + };

+36

tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include "../test_kmods/bpf_testmod.h" 6 + #include "bpf_misc.h" 7 + 8 + char _license[] SEC("license") = "GPL"; 9 + 10 + struct { 11 + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 12 + __uint(max_entries, 1); 13 + __uint(key_size, sizeof(__u32)); 14 + __uint(value_size, sizeof(__u32)); 15 + } prog_array SEC(".maps"); 16 + 17 + /* Test that the verifier rejects a program with referenced kptr arguments 18 + * that tail call 19 + */ 20 + SEC("struct_ops/test_refcounted") 21 + __failure __msg("program with __ref argument cannot tail call") 22 + int refcounted_fail__tail_call(unsigned long long *ctx) 23 + { 24 + struct task_struct *task = (struct task_struct *)ctx[1]; 25 + 26 + bpf_task_release(task); 27 + bpf_tail_call(ctx, &prog_array, 0); 28 + 29 + return 0; 30 + } 31 + 32 + SEC(".struct_ops.link") 33 + struct bpf_testmod_ops testmod_ref_acquire = { 34 + .test_refcounted = (void *)refcounted_fail__tail_call, 35 + }; 36 +

+78

tools/testing/selftests/bpf/progs/summarization.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <vmlinux.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include "bpf_misc.h" 6 + 7 + __noinline 8 + long changes_pkt_data(struct __sk_buff *sk) 9 + { 10 + return bpf_skb_pull_data(sk, 0); 11 + } 12 + 13 + __noinline __weak 14 + long does_not_change_pkt_data(struct __sk_buff *sk) 15 + { 16 + return 0; 17 + } 18 + 19 + SEC("?tc") 20 + int main_changes_with_subprogs(struct __sk_buff *sk) 21 + { 22 + changes_pkt_data(sk); 23 + does_not_change_pkt_data(sk); 24 + return 0; 25 + } 26 + 27 + SEC("?tc") 28 + int main_changes(struct __sk_buff *sk) 29 + { 30 + bpf_skb_pull_data(sk, 0); 31 + return 0; 32 + } 33 + 34 + SEC("?tc") 35 + int main_does_not_change(struct __sk_buff *sk) 36 + { 37 + return 0; 38 + } 39 + 40 + __noinline 41 + long might_sleep(struct pt_regs *ctx __arg_ctx) 42 + { 43 + int i; 44 + 45 + bpf_copy_from_user(&i, sizeof(i), NULL); 46 + return i; 47 + } 48 + 49 + __noinline __weak 50 + long does_not_sleep(struct pt_regs *ctx __arg_ctx) 51 + { 52 + return 0; 53 + } 54 + 55 + SEC("?uprobe.s") 56 + int main_might_sleep_with_subprogs(struct pt_regs *ctx) 57 + { 58 + might_sleep(ctx); 59 + does_not_sleep(ctx); 60 + return 0; 61 + } 62 + 63 + SEC("?uprobe.s") 64 + int main_might_sleep(struct pt_regs *ctx) 65 + { 66 + int i; 67 + 68 + bpf_copy_from_user(&i, sizeof(i), NULL); 69 + return i; 70 + } 71 + 72 + SEC("?uprobe.s") 73 + int main_does_not_sleep(struct pt_regs *ctx) 74 + { 75 + return 0; 76 + } 77 + 78 + char _license[] SEC("license") = "GPL";

+2 -2

tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c

··· 51 51 } 52 52 53 53 SEC("lsm/bpf") 54 - int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) 54 + int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 55 55 { 56 56 return bpf_link_create_verify(cmd); 57 57 } 58 58 59 59 SEC("lsm.s/bpf") 60 - int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) 60 + int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 61 61 { 62 62 return bpf_link_create_verify(cmd); 63 63 }

+5

tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c

··· 15 15 16 16 struct core_reloc_arrays_output { 17 17 int a2; 18 + int a3; 18 19 char b123; 19 20 int c1c; 20 21 int d00d; ··· 42 41 { 43 42 struct core_reloc_arrays *in = (void *)&data.in; 44 43 struct core_reloc_arrays_output *out = (void *)&data.out; 44 + int *a; 45 45 46 46 if (CORE_READ(&out->a2, &in->a[2])) 47 47 return 1; ··· 54 52 return 1; 55 53 if (CORE_READ(&out->f01c, &in->f[0][1].c)) 56 54 return 1; 55 + 56 + a = __builtin_preserve_access_index(({ in->a; })); 57 + out->a3 = a[0] + a[1] + a[2] + a[3]; 57 58 58 59 return 0; 59 60 }

+24 -4

tools/testing/selftests/bpf/progs/test_get_xattr.c

··· 6 6 #include <bpf/bpf_helpers.h> 7 7 #include <bpf/bpf_tracing.h> 8 8 #include "bpf_kfuncs.h" 9 + #include "bpf_misc.h" 9 10 10 11 char _license[] SEC("license") = "GPL"; 11 12 ··· 18 17 char value1[32]; 19 18 char value2[32]; 20 19 20 + /* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */ 21 + static const char xattr_names[][64] = { 22 + /* The following work. */ 23 + "user.kfuncs", 24 + "security.bpf.xxx", 25 + 26 + /* The following do not work. */ 27 + "security.bpf", 28 + "security.selinux" 29 + }; 30 + 21 31 SEC("lsm.s/file_open") 22 32 int BPF_PROG(test_file_open, struct file *f) 23 33 { 24 34 struct bpf_dynptr value_ptr; 25 35 __u32 pid; 26 - int ret; 36 + int ret, i; 27 37 28 38 pid = bpf_get_current_pid_tgid() >> 32; 29 39 if (pid != monitored_pid) ··· 42 30 43 31 bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr); 44 32 45 - ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr); 33 + for (i = 0; i < ARRAY_SIZE(xattr_names); i++) { 34 + ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr); 35 + if (ret == sizeof(expected_value)) 36 + break; 37 + } 46 38 if (ret != sizeof(expected_value)) 47 39 return 0; 48 40 if (bpf_strncmp(value1, ret, expected_value)) ··· 60 44 { 61 45 struct bpf_dynptr value_ptr; 62 46 __u32 pid; 63 - int ret; 47 + int ret, i; 64 48 65 49 pid = bpf_get_current_pid_tgid() >> 32; 66 50 if (pid != monitored_pid) ··· 68 52 69 53 bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr); 70 54 71 - ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr); 55 + for (i = 0; i < ARRAY_SIZE(xattr_names); i++) { 56 + ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr); 57 + if (ret == sizeof(expected_value)) 58 + break; 59 + } 72 60 if (ret != sizeof(expected_value)) 73 61 return 0; 74 62 if (bpf_strncmp(value2, ret, expected_value))

+28

tools/testing/selftests/bpf/progs/test_kernel_flag.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright (C) 2025 Microsoft Corporation 5 + * 6 + * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com> 7 + */ 8 + 9 + #include "vmlinux.h" 10 + #include <errno.h> 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 + 14 + char _license[] SEC("license") = "GPL"; 15 + 16 + __u32 monitored_tid; 17 + 18 + SEC("lsm.s/bpf") 19 + int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 20 + { 21 + __u32 tid; 22 + 23 + tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF; 24 + if (!kernel || tid != monitored_tid) 25 + return 0; 26 + else 27 + return -EINVAL; 28 + }

+3 -3

tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c

··· 36 36 37 37 SEC("?lsm.s/bpf") 38 38 __failure __msg("cannot pass in dynptr at an offset=-8") 39 - int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) 39 + int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 40 40 { 41 41 unsigned long val; 42 42 ··· 46 46 47 47 SEC("?lsm.s/bpf") 48 48 __failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") 49 - int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) 49 + int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 50 50 { 51 51 unsigned long val = 0; 52 52 ··· 55 55 } 56 56 57 57 SEC("lsm.s/bpf") 58 - int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size) 58 + int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 59 59 { 60 60 struct bpf_key *trusted_keyring; 61 61 struct bpf_dynptr ptr;

+1 -1

tools/testing/selftests/bpf/progs/test_lookup_key.c

··· 23 23 extern void bpf_key_put(struct bpf_key *key) __ksym; 24 24 25 25 SEC("lsm.s/bpf") 26 - int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) 26 + int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 27 27 { 28 28 struct bpf_key *bkey; 29 29 __u32 pid;

+1 -1

tools/testing/selftests/bpf/progs/test_ptr_untrusted.c

··· 7 7 char tp_name[128]; 8 8 9 9 SEC("lsm.s/bpf") 10 - int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) 10 + int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 11 11 { 12 12 switch (cmd) { 13 13 case BPF_RAW_TRACEPOINT_OPEN:

-1

tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c

+133

tools/testing/selftests/bpf/progs/test_set_remove_xattr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include "vmlinux.h" 5 + #include <errno.h> 6 + #include <bpf/bpf_tracing.h> 7 + #include "bpf_kfuncs.h" 8 + #include "bpf_misc.h" 9 + 10 + char _license[] SEC("license") = "GPL"; 11 + 12 + __u32 monitored_pid; 13 + 14 + const char xattr_foo[] = "security.bpf.foo"; 15 + const char xattr_bar[] = "security.bpf.bar"; 16 + static const char xattr_selinux[] = "security.selinux"; 17 + char value_bar[] = "world"; 18 + char read_value[32]; 19 + 20 + bool set_security_bpf_bar_success; 21 + bool remove_security_bpf_bar_success; 22 + bool set_security_selinux_fail; 23 + bool remove_security_selinux_fail; 24 + 25 + char name_buf[32]; 26 + 27 + static inline bool name_match_foo(const char *name) 28 + { 29 + bpf_probe_read_kernel(name_buf, sizeof(name_buf), name); 30 + 31 + return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo); 32 + } 33 + 34 + /* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */ 35 + SEC("lsm.s/inode_getxattr") 36 + int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name) 37 + { 38 + struct bpf_dynptr value_ptr; 39 + __u32 pid; 40 + int ret; 41 + 42 + pid = bpf_get_current_pid_tgid() >> 32; 43 + if (pid != monitored_pid) 44 + return 0; 45 + 46 + /* Only do the following for security.bpf.foo */ 47 + if (!name_match_foo(name)) 48 + return 0; 49 + 50 + bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr); 51 + 52 + /* read security.bpf.bar */ 53 + ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr); 54 + 55 + if (ret < 0) { 56 + /* If security.bpf.bar doesn't exist, set it */ 57 + bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr); 58 + 59 + ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0); 60 + if (!ret) 61 + set_security_bpf_bar_success = true; 62 + ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0); 63 + if (ret) 64 + set_security_selinux_fail = true; 65 + } else { 66 + /* If security.bpf.bar exists, remove it */ 67 + ret = bpf_remove_dentry_xattr(dentry, xattr_bar); 68 + if (!ret) 69 + remove_security_bpf_bar_success = true; 70 + 71 + ret = bpf_remove_dentry_xattr(dentry, xattr_selinux); 72 + if (ret) 73 + remove_security_selinux_fail = true; 74 + } 75 + 76 + return 0; 77 + } 78 + 79 + bool locked_set_security_bpf_bar_success; 80 + bool locked_remove_security_bpf_bar_success; 81 + bool locked_set_security_selinux_fail; 82 + bool locked_remove_security_selinux_fail; 83 + 84 + /* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked. 85 + * It not necessary to differentiate the _locked version and the 86 + * not-_locked version in the BPF program. The verifier will fix them up 87 + * properly. 88 + */ 89 + SEC("lsm.s/inode_setxattr") 90 + int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap, 91 + struct dentry *dentry, const char *name, 92 + const void *value, size_t size, int flags) 93 + { 94 + struct bpf_dynptr value_ptr; 95 + __u32 pid; 96 + int ret; 97 + 98 + pid = bpf_get_current_pid_tgid() >> 32; 99 + if (pid != monitored_pid) 100 + return 0; 101 + 102 + /* Only do the following for security.bpf.foo */ 103 + if (!name_match_foo(name)) 104 + return 0; 105 + 106 + bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr); 107 + 108 + /* read security.bpf.bar */ 109 + ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr); 110 + 111 + if (ret < 0) { 112 + /* If security.bpf.bar doesn't exist, set it */ 113 + bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr); 114 + 115 + ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0); 116 + if (!ret) 117 + locked_set_security_bpf_bar_success = true; 118 + ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0); 119 + if (ret) 120 + locked_set_security_selinux_fail = true; 121 + } else { 122 + /* If security.bpf.bar exists, remove it */ 123 + ret = bpf_remove_dentry_xattr(dentry, xattr_bar); 124 + if (!ret) 125 + locked_remove_security_bpf_bar_success = true; 126 + 127 + ret = bpf_remove_dentry_xattr(dentry, xattr_selinux); 128 + if (ret) 129 + locked_remove_security_selinux_fail = true; 130 + } 131 + 132 + return 0; 133 + }

+69

tools/testing/selftests/bpf/progs/test_spin_lock_fail.c

··· 245 245 return ret; 246 246 } 247 247 248 + int __noinline 249 + global_subprog_int(int i) 250 + { 251 + if (i) 252 + bpf_printk("%p", &i); 253 + return i; 254 + } 255 + 256 + int __noinline 257 + global_sleepable_helper_subprog(int i) 258 + { 259 + if (i) 260 + bpf_copy_from_user(&i, sizeof(i), NULL); 261 + return i; 262 + } 263 + 264 + int __noinline 265 + global_sleepable_kfunc_subprog(int i) 266 + { 267 + if (i) 268 + bpf_copy_from_user_str(&i, sizeof(i), NULL, 0); 269 + global_subprog_int(i); 270 + return i; 271 + } 272 + 273 + int __noinline 274 + global_subprog_calling_sleepable_global(int i) 275 + { 276 + if (!i) 277 + global_sleepable_kfunc_subprog(i); 278 + return i; 279 + } 280 + 281 + SEC("?syscall") 282 + int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx) 283 + { 284 + int ret = 0; 285 + 286 + bpf_spin_lock(&lockA); 287 + if (ctx->mark == 42) 288 + ret = global_sleepable_helper_subprog(ctx->mark); 289 + bpf_spin_unlock(&lockA); 290 + return ret; 291 + } 292 + 293 + SEC("?syscall") 294 + int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx) 295 + { 296 + int ret = 0; 297 + 298 + bpf_spin_lock(&lockA); 299 + if (ctx->mark == 42) 300 + ret = global_sleepable_kfunc_subprog(ctx->mark); 301 + bpf_spin_unlock(&lockA); 302 + return ret; 303 + } 304 + 305 + SEC("?syscall") 306 + int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx) 307 + { 308 + int ret = 0; 309 + 310 + bpf_spin_lock(&lockA); 311 + if (ctx->mark == 42) 312 + ret = global_subprog_calling_sleepable_global(ctx->mark); 313 + bpf_spin_unlock(&lockA); 314 + return ret; 315 + } 316 + 248 317 char _license[] SEC("license") = "GPL";

+1 -1

tools/testing/selftests/bpf/progs/test_task_under_cgroup.c

··· 49 49 } 50 50 51 51 SEC("lsm.s/bpf") 52 - int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) 52 + int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 53 53 { 54 54 struct cgroup *cgrp = NULL; 55 55 struct task_struct *task;

+14

tools/testing/selftests/bpf/progs/test_usdt.c

··· 11 11 u64 usdt0_cookie; 12 12 int usdt0_arg_cnt; 13 13 int usdt0_arg_ret; 14 + int usdt0_arg_size; 14 15 15 16 SEC("usdt") 16 17 int usdt0(struct pt_regs *ctx) ··· 27 26 usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx); 28 27 /* should return -ENOENT for any arg_num */ 29 28 usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp); 29 + usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32()); 30 30 return 0; 31 31 } 32 32 ··· 36 34 int usdt3_arg_cnt; 37 35 int usdt3_arg_rets[3]; 38 36 u64 usdt3_args[3]; 37 + int usdt3_arg_sizes[3]; 39 38 40 39 SEC("usdt//proc/self/exe:test:usdt3") 41 40 int usdt3(struct pt_regs *ctx) ··· 53 50 54 51 usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp); 55 52 usdt3_args[0] = (int)tmp; 53 + usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0); 56 54 57 55 usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp); 58 56 usdt3_args[1] = (long)tmp; 57 + usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1); 59 58 60 59 usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp); 61 60 usdt3_args[2] = (uintptr_t)tmp; 61 + usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2); 62 62 63 63 return 0; 64 64 } ··· 70 64 u64 usdt12_cookie; 71 65 int usdt12_arg_cnt; 72 66 u64 usdt12_args[12]; 67 + int usdt12_arg_sizes[12]; 73 68 74 69 SEC("usdt//proc/self/exe:test:usdt12") 75 70 int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, 76 71 long a6, __u64 a7, uintptr_t a8, int a9, short a10, 77 72 short a11, signed char a12) 78 73 { 74 + int i; 75 + 79 76 if (my_pid != (bpf_get_current_pid_tgid() >> 32)) 80 77 return 0; 81 78 ··· 99 90 usdt12_args[9] = a10; 100 91 usdt12_args[10] = a11; 101 92 usdt12_args[11] = a12; 93 + 94 + bpf_for(i, 0, 12) { 95 + usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i); 96 + } 97 + 102 98 return 0; 103 99 } 104 100

+1 -1

tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c

··· 37 37 char _license[] SEC("license") = "GPL"; 38 38 39 39 SEC("lsm.s/bpf") 40 - int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size) 40 + int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) 41 41 { 42 42 struct bpf_dynptr data_ptr, sig_ptr; 43 43 struct data *data_val;

+10 -10

tools/testing/selftests/bpf/progs/test_xdp_vlan.c

··· 102 102 #define TESTVLAN 4011 /* 0xFAB */ 103 103 // #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ 104 104 105 - SEC("xdp_drop_vlan_4011") 106 - int xdp_prognum0(struct xdp_md *ctx) 105 + SEC("xdp") 106 + int xdp_drop_vlan_4011(struct xdp_md *ctx) 107 107 { 108 108 void *data_end = (void *)(long)ctx->data_end; 109 109 void *data = (void *)(long)ctx->data; ··· 144 144 /* Changing VLAN to zero, have same practical effect as removing the VLAN. */ 145 145 #define TO_VLAN 0 146 146 147 - SEC("xdp_vlan_change") 148 - int xdp_prognum1(struct xdp_md *ctx) 147 + SEC("xdp") 148 + int xdp_vlan_change(struct xdp_md *ctx) 149 149 { 150 150 void *data_end = (void *)(long)ctx->data_end; 151 151 void *data = (void *)(long)ctx->data; ··· 178 178 #endif 179 179 #define VLAN_HDR_SZ 4 /* bytes */ 180 180 181 - SEC("xdp_vlan_remove_outer") 182 - int xdp_prognum2(struct xdp_md *ctx) 181 + SEC("xdp") 182 + int xdp_vlan_remove_outer(struct xdp_md *ctx) 183 183 { 184 184 void *data_end = (void *)(long)ctx->data_end; 185 185 void *data = (void *)(long)ctx->data; ··· 224 224 p[1] = p[0]; 225 225 } 226 226 227 - SEC("xdp_vlan_remove_outer2") 228 - int xdp_prognum3(struct xdp_md *ctx) 227 + SEC("xdp") 228 + int xdp_vlan_remove_outer2(struct xdp_md *ctx) 229 229 { 230 230 void *data_end = (void *)(long)ctx->data_end; 231 231 void *data = (void *)(long)ctx->data; ··· 254 254 * The TC-clsact eBPF programs (currently) need to be attach via TC commands 255 255 */ 256 256 257 - SEC("tc_vlan_push") 258 - int _tc_progA(struct __sk_buff *ctx) 257 + SEC("tc") 258 + int tc_vlan_push(struct __sk_buff *ctx) 259 259 { 260 260 bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); 261 261

+48 -10

tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c

··· 620 620 621 621 SEC("raw_tp") 622 622 __arch_x86_64 623 + __log_level(4) __msg("stack depth 24") 624 + /* may_goto counter at -24 */ 625 + __xlated("0: *(u64 *)(r10 -24) =") 626 + /* may_goto timestamp at -16 */ 627 + __xlated("1: *(u64 *)(r10 -16) =") 628 + __xlated("2: r1 = 1") 629 + __xlated("...") 630 + __xlated("4: r0 = &(void __percpu *)(r0)") 631 + __xlated("...") 632 + /* may_goto expansion starts */ 633 + __xlated("6: r11 = *(u64 *)(r10 -24)") 634 + __xlated("7: if r11 == 0x0 goto pc+6") 635 + __xlated("8: r11 -= 1") 636 + __xlated("9: if r11 != 0x0 goto pc+2") 637 + __xlated("10: r11 = -24") 638 + __xlated("11: call unknown") 639 + __xlated("12: *(u64 *)(r10 -24) = r11") 640 + /* may_goto expansion ends */ 641 + __xlated("13: *(u64 *)(r10 -8) = r1") 642 + __xlated("14: exit") 643 + __success 644 + __naked void may_goto_interaction_x86_64(void) 645 + { 646 + asm volatile ( 647 + "r1 = 1;" 648 + "*(u64 *)(r10 - 16) = r1;" 649 + "call %[bpf_get_smp_processor_id];" 650 + "r1 = *(u64 *)(r10 - 16);" 651 + ".8byte %[may_goto];" 652 + /* just touch some stack at -8 */ 653 + "*(u64 *)(r10 - 8) = r1;" 654 + "exit;" 655 + : 656 + : __imm(bpf_get_smp_processor_id), 657 + __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0)) 658 + : __clobber_all); 659 + } 660 + 661 + SEC("raw_tp") 662 + __arch_arm64 623 663 __log_level(4) __msg("stack depth 16") 624 664 /* may_goto counter at -16 */ 625 665 __xlated("0: *(u64 *)(r10 -16) =") 626 666 __xlated("1: r1 = 1") 627 - __xlated("...") 628 - __xlated("3: r0 = &(void __percpu *)(r0)") 629 - __xlated("...") 667 + __xlated("2: call bpf_get_smp_processor_id") 630 668 /* may_goto expansion starts */ 631 - __xlated("5: r11 = *(u64 *)(r10 -16)") 632 - __xlated("6: if r11 == 0x0 goto pc+3") 633 - __xlated("7: r11 -= 1") 634 - __xlated("8: *(u64 *)(r10 -16) = r11") 669 + __xlated("3: r11 = *(u64 *)(r10 -16)") 670 + __xlated("4: if r11 == 0x0 goto pc+3") 671 + __xlated("5: r11 -= 1") 672 + __xlated("6: *(u64 *)(r10 -16) = r11") 635 673 /* may_goto expansion ends */ 636 - __xlated("9: *(u64 *)(r10 -8) = r1") 637 - __xlated("10: exit") 674 + __xlated("7: *(u64 *)(r10 -8) = r1") 675 + __xlated("8: exit") 638 676 __success 639 - __naked void may_goto_interaction(void) 677 + __naked void may_goto_interaction_arm64(void) 640 678 { 641 679 asm volatile ( 642 680 "r1 = 1;"

+1 -5

tools/testing/selftests/bpf/progs/verifier_gotol.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ 9 - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ 10 - defined(__TARGET_ARCH_loongarch)) && \ 11 - __clang_major__ >= 18 7 + #ifdef CAN_USE_GOTOL 12 8 13 9 SEC("socket") 14 10 __description("gotol, small_imm")

+1 -5

tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c

··· 407 407 : __clobber_all); 408 408 } 409 409 410 - #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 411 - (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ 412 - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ 413 - defined(__TARGET_ARCH_loongarch)) && \ 414 - __clang_major__ >= 18 410 + #ifdef CAN_USE_GOTOL 415 411 SEC("socket") 416 412 __success __retval(0) 417 413 __naked void gotol_and_may_goto(void)

+218

tools/testing/selftests/bpf/progs/verifier_load_acquire.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Google LLC. */ 3 + 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include "../../../include/linux/filter.h" 7 + #include "bpf_misc.h" 8 + 9 + #ifdef CAN_USE_LOAD_ACQ_STORE_REL 10 + 11 + SEC("socket") 12 + __description("load-acquire, 8-bit") 13 + __success __success_unpriv __retval(0x12) 14 + __naked void load_acquire_8(void) 15 + { 16 + asm volatile ( 17 + "w1 = 0x12;" 18 + "*(u8 *)(r10 - 1) = w1;" 19 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); 20 + "exit;" 21 + : 22 + : __imm_insn(load_acquire_insn, 23 + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) 24 + : __clobber_all); 25 + } 26 + 27 + SEC("socket") 28 + __description("load-acquire, 16-bit") 29 + __success __success_unpriv __retval(0x1234) 30 + __naked void load_acquire_16(void) 31 + { 32 + asm volatile ( 33 + "w1 = 0x1234;" 34 + "*(u16 *)(r10 - 2) = w1;" 35 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); 36 + "exit;" 37 + : 38 + : __imm_insn(load_acquire_insn, 39 + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) 40 + : __clobber_all); 41 + } 42 + 43 + SEC("socket") 44 + __description("load-acquire, 32-bit") 45 + __success __success_unpriv __retval(0x12345678) 46 + __naked void load_acquire_32(void) 47 + { 48 + asm volatile ( 49 + "w1 = 0x12345678;" 50 + "*(u32 *)(r10 - 4) = w1;" 51 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); 52 + "exit;" 53 + : 54 + : __imm_insn(load_acquire_insn, 55 + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) 56 + : __clobber_all); 57 + } 58 + 59 + SEC("socket") 60 + __description("load-acquire, 64-bit") 61 + __success __success_unpriv __retval(0x1234567890abcdef) 62 + __naked void load_acquire_64(void) 63 + { 64 + asm volatile ( 65 + "r1 = 0x1234567890abcdef ll;" 66 + "*(u64 *)(r10 - 8) = r1;" 67 + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); 68 + "exit;" 69 + : 70 + : __imm_insn(load_acquire_insn, 71 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) 72 + : __clobber_all); 73 + } 74 + 75 + SEC("socket") 76 + __description("load-acquire with uninitialized src_reg") 77 + __failure __failure_unpriv __msg("R2 !read_ok") 78 + __naked void load_acquire_with_uninitialized_src_reg(void) 79 + { 80 + asm volatile ( 81 + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0)); 82 + "exit;" 83 + : 84 + : __imm_insn(load_acquire_insn, 85 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) 86 + : __clobber_all); 87 + } 88 + 89 + SEC("socket") 90 + __description("load-acquire with non-pointer src_reg") 91 + __failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") 92 + __naked void load_acquire_with_non_pointer_src_reg(void) 93 + { 94 + asm volatile ( 95 + "r1 = 0;" 96 + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0)); 97 + "exit;" 98 + : 99 + : __imm_insn(load_acquire_insn, 100 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) 101 + : __clobber_all); 102 + } 103 + 104 + SEC("socket") 105 + __description("misaligned load-acquire") 106 + __failure __failure_unpriv __msg("misaligned stack access off") 107 + __flag(BPF_F_ANY_ALIGNMENT) 108 + __naked void load_acquire_misaligned(void) 109 + { 110 + asm volatile ( 111 + "r1 = 0;" 112 + "*(u64 *)(r10 - 8) = r1;" 113 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5)); 114 + "exit;" 115 + : 116 + : __imm_insn(load_acquire_insn, 117 + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5)) 118 + : __clobber_all); 119 + } 120 + 121 + SEC("socket") 122 + __description("load-acquire from ctx pointer") 123 + __failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed") 124 + __naked void load_acquire_from_ctx_pointer(void) 125 + { 126 + asm volatile ( 127 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0)); 128 + "exit;" 129 + : 130 + : __imm_insn(load_acquire_insn, 131 + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0)) 132 + : __clobber_all); 133 + } 134 + 135 + SEC("xdp") 136 + __description("load-acquire from pkt pointer") 137 + __failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed") 138 + __naked void load_acquire_from_pkt_pointer(void) 139 + { 140 + asm volatile ( 141 + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" 142 + "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);" 143 + "r1 = r2;" 144 + "r1 += 8;" 145 + "if r1 >= r3 goto l0_%=;" 146 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); 147 + "l0_%=: r0 = 0;" 148 + "exit;" 149 + : 150 + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), 151 + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), 152 + __imm_insn(load_acquire_insn, 153 + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) 154 + : __clobber_all); 155 + } 156 + 157 + SEC("flow_dissector") 158 + __description("load-acquire from flow_keys pointer") 159 + __failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed") 160 + __naked void load_acquire_from_flow_keys_pointer(void) 161 + { 162 + asm volatile ( 163 + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" 164 + ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0)); 165 + "exit;" 166 + : 167 + : __imm_const(__sk_buff_flow_keys, 168 + offsetof(struct __sk_buff, flow_keys)), 169 + __imm_insn(load_acquire_insn, 170 + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0)) 171 + : __clobber_all); 172 + } 173 + 174 + SEC("sk_reuseport") 175 + __description("load-acquire from sock pointer") 176 + __failure __msg("BPF_ATOMIC loads from R2 sock is not allowed") 177 + __naked void load_acquire_from_sock_pointer(void) 178 + { 179 + asm volatile ( 180 + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" 181 + // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family))); 182 + ".8byte %[load_acquire_insn];" 183 + "exit;" 184 + : 185 + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), 186 + __imm_insn(load_acquire_insn, 187 + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 188 + offsetof(struct bpf_sock, family))) 189 + : __clobber_all); 190 + } 191 + 192 + SEC("socket") 193 + __description("load-acquire with invalid register R15") 194 + __failure __failure_unpriv __msg("R15 is invalid") 195 + __naked void load_acquire_with_invalid_reg(void) 196 + { 197 + asm volatile ( 198 + ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0)); 199 + "exit;" 200 + : 201 + : __imm_insn(load_acquire_insn, 202 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0)) 203 + : __clobber_all); 204 + } 205 + 206 + #else /* CAN_USE_LOAD_ACQ_STORE_REL */ 207 + 208 + SEC("socket") 209 + __description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test") 210 + __success 211 + int dummy_test(void) 212 + { 213 + return 0; 214 + } 215 + 216 + #endif /* CAN_USE_LOAD_ACQ_STORE_REL */ 217 + 218 + char _license[] SEC("license") = "GPL";

+32 -2

tools/testing/selftests/bpf/progs/verifier_may_goto_1.c

··· 69 69 } 70 70 71 71 SEC("raw_tp") 72 - __description("may_goto batch with offsets 2/0") 72 + __description("may_goto batch with offsets 2/0 - x86_64") 73 73 __arch_x86_64 74 + __xlated("0: *(u64 *)(r10 -16) = 65535") 75 + __xlated("1: *(u64 *)(r10 -8) = 0") 76 + __xlated("2: r11 = *(u64 *)(r10 -16)") 77 + __xlated("3: if r11 == 0x0 goto pc+6") 78 + __xlated("4: r11 -= 1") 79 + __xlated("5: if r11 != 0x0 goto pc+2") 80 + __xlated("6: r11 = -16") 81 + __xlated("7: call unknown") 82 + __xlated("8: *(u64 *)(r10 -16) = r11") 83 + __xlated("9: r0 = 1") 84 + __xlated("10: r0 = 2") 85 + __xlated("11: exit") 86 + __success 87 + __naked void may_goto_batch_2_x86_64(void) 88 + { 89 + asm volatile ( 90 + ".8byte %[may_goto1];" 91 + ".8byte %[may_goto3];" 92 + "r0 = 1;" 93 + "r0 = 2;" 94 + "exit;" 95 + : 96 + : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), 97 + __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) 98 + : __clobber_all); 99 + } 100 + 101 + SEC("raw_tp") 102 + __description("may_goto batch with offsets 2/0 - arm64") 103 + __arch_arm64 74 104 __xlated("0: *(u64 *)(r10 -8) = 8388608") 75 105 __xlated("1: r11 = *(u64 *)(r10 -8)") 76 106 __xlated("2: if r11 == 0x0 goto pc+3") ··· 110 80 __xlated("6: r0 = 2") 111 81 __xlated("7: exit") 112 82 __success 113 - __naked void may_goto_batch_2(void) 83 + __naked void may_goto_batch_2_arm64(void) 114 84 { 115 85 asm volatile ( 116 86 ".8byte %[may_goto1];"

+49

tools/testing/selftests/bpf/progs/verifier_precision.c

··· 2 2 /* Copyright (C) 2023 SUSE LLC */ 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include "../../../include/linux/filter.h" 5 6 #include "bpf_misc.h" 6 7 7 8 SEC("?raw_tp") ··· 91 90 ::: __clobber_all); 92 91 } 93 92 93 + #if defined(ENABLE_ATOMICS_TESTS) && \ 94 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 95 + 96 + SEC("?raw_tp") 97 + __success __log_level(2) 98 + __msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") 99 + __msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))") 100 + __msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1") 101 + __msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") 102 + __naked int bpf_load_acquire(void) 103 + { 104 + asm volatile ( 105 + "r1 = 8;" 106 + "*(u64 *)(r10 - 8) = r1;" 107 + ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */ 108 + "r3 = r10;" 109 + "r3 += r2;" /* mark_precise */ 110 + "r0 = 0;" 111 + "exit;" 112 + : 113 + : __imm_insn(load_acquire_insn, 114 + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) 115 + : __clobber_all); 116 + } 117 + 118 + SEC("?raw_tp") 119 + __success __log_level(2) 120 + __msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10") 121 + __msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)") 122 + __msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)") 123 + __msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8") 124 + __naked int bpf_store_release(void) 125 + { 126 + asm volatile ( 127 + "r1 = 8;" 128 + ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */ 129 + "r1 = *(u64 *)(r10 - 8);" 130 + "r2 = r10;" 131 + "r2 += r1;" /* mark_precise */ 132 + "r0 = 0;" 133 + "exit;" 134 + : 135 + : __imm_insn(store_release_insn, 136 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) 137 + : __clobber_all); 138 + } 139 + 140 + #endif /* load-acquire, store-release */ 94 141 #endif /* v4 instruction */ 95 142 96 143 SEC("?raw_tp")

+52

tools/testing/selftests/bpf/progs/verifier_stack_ptr.c

··· 481 481 : __clobber_all); 482 482 } 483 483 484 + SEC("socket") 485 + __description("PTR_TO_STACK stack size > 512") 486 + __failure __msg("invalid write to stack R1 off=-520 size=8") 487 + __naked void stack_check_size_gt_512(void) 488 + { 489 + asm volatile (" \ 490 + r1 = r10; \ 491 + r1 += -520; \ 492 + r0 = 42; \ 493 + *(u64*)(r1 + 0) = r0; \ 494 + exit; \ 495 + " ::: __clobber_all); 496 + } 497 + 498 + #ifdef __BPF_FEATURE_MAY_GOTO 499 + SEC("socket") 500 + __description("PTR_TO_STACK stack size 512 with may_goto with jit") 501 + __load_if_JITed() 502 + __success __retval(42) 503 + __naked void stack_check_size_512_with_may_goto_jit(void) 504 + { 505 + asm volatile (" \ 506 + r1 = r10; \ 507 + r1 += -512; \ 508 + r0 = 42; \ 509 + *(u32*)(r1 + 0) = r0; \ 510 + may_goto l0_%=; \ 511 + r2 = 100; \ 512 + l0_%=: \ 513 + exit; \ 514 + " ::: __clobber_all); 515 + } 516 + 517 + SEC("socket") 518 + __description("PTR_TO_STACK stack size 512 with may_goto without jit") 519 + __load_if_no_JITed() 520 + __failure __msg("stack size 520(extra 8) is too large") 521 + __naked void stack_check_size_512_with_may_goto(void) 522 + { 523 + asm volatile (" \ 524 + r1 = r10; \ 525 + r1 += -512; \ 526 + r0 = 42; \ 527 + *(u32*)(r1 + 0) = r0; \ 528 + may_goto l0_%=; \ 529 + r2 = 100; \ 530 + l0_%=: \ 531 + exit; \ 532 + " ::: __clobber_all); 533 + } 534 + #endif 535 + 484 536 char _license[] SEC("license") = "GPL";

+286

tools/testing/selftests/bpf/progs/verifier_store_release.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Google LLC. */ 3 + 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include "../../../include/linux/filter.h" 7 + #include "bpf_misc.h" 8 + 9 + #if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ 10 + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) 11 + 12 + SEC("socket") 13 + __description("store-release, 8-bit") 14 + __success __success_unpriv __retval(0x12) 15 + __naked void store_release_8(void) 16 + { 17 + asm volatile ( 18 + "w1 = 0x12;" 19 + ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); 20 + "w0 = *(u8 *)(r10 - 1);" 21 + "exit;" 22 + : 23 + : __imm_insn(store_release_insn, 24 + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1)) 25 + : __clobber_all); 26 + } 27 + 28 + SEC("socket") 29 + __description("store-release, 16-bit") 30 + __success __success_unpriv __retval(0x1234) 31 + __naked void store_release_16(void) 32 + { 33 + asm volatile ( 34 + "w1 = 0x1234;" 35 + ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); 36 + "w0 = *(u16 *)(r10 - 2);" 37 + "exit;" 38 + : 39 + : __imm_insn(store_release_insn, 40 + BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2)) 41 + : __clobber_all); 42 + } 43 + 44 + SEC("socket") 45 + __description("store-release, 32-bit") 46 + __success __success_unpriv __retval(0x12345678) 47 + __naked void store_release_32(void) 48 + { 49 + asm volatile ( 50 + "w1 = 0x12345678;" 51 + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); 52 + "w0 = *(u32 *)(r10 - 4);" 53 + "exit;" 54 + : 55 + : __imm_insn(store_release_insn, 56 + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4)) 57 + : __clobber_all); 58 + } 59 + 60 + SEC("socket") 61 + __description("store-release, 64-bit") 62 + __success __success_unpriv __retval(0x1234567890abcdef) 63 + __naked void store_release_64(void) 64 + { 65 + asm volatile ( 66 + "r1 = 0x1234567890abcdef ll;" 67 + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); 68 + "r0 = *(u64 *)(r10 - 8);" 69 + "exit;" 70 + : 71 + : __imm_insn(store_release_insn, 72 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) 73 + : __clobber_all); 74 + } 75 + 76 + SEC("socket") 77 + __description("store-release with uninitialized src_reg") 78 + __failure __failure_unpriv __msg("R2 !read_ok") 79 + __naked void store_release_with_uninitialized_src_reg(void) 80 + { 81 + asm volatile ( 82 + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2); 83 + "exit;" 84 + : 85 + : __imm_insn(store_release_insn, 86 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8)) 87 + : __clobber_all); 88 + } 89 + 90 + SEC("socket") 91 + __description("store-release with uninitialized dst_reg") 92 + __failure __failure_unpriv __msg("R2 !read_ok") 93 + __naked void store_release_with_uninitialized_dst_reg(void) 94 + { 95 + asm volatile ( 96 + "r1 = 0;" 97 + ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1); 98 + "exit;" 99 + : 100 + : __imm_insn(store_release_insn, 101 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)) 102 + : __clobber_all); 103 + } 104 + 105 + SEC("socket") 106 + __description("store-release with non-pointer dst_reg") 107 + __failure __failure_unpriv __msg("R1 invalid mem access 'scalar'") 108 + __naked void store_release_with_non_pointer_dst_reg(void) 109 + { 110 + asm volatile ( 111 + "r1 = 0;" 112 + ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1); 113 + "exit;" 114 + : 115 + : __imm_insn(store_release_insn, 116 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0)) 117 + : __clobber_all); 118 + } 119 + 120 + SEC("socket") 121 + __description("misaligned store-release") 122 + __failure __failure_unpriv __msg("misaligned stack access off") 123 + __flag(BPF_F_ANY_ALIGNMENT) 124 + __naked void store_release_misaligned(void) 125 + { 126 + asm volatile ( 127 + "w0 = 0;" 128 + ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0); 129 + "exit;" 130 + : 131 + : __imm_insn(store_release_insn, 132 + BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5)) 133 + : __clobber_all); 134 + } 135 + 136 + SEC("socket") 137 + __description("store-release to ctx pointer") 138 + __failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed") 139 + __naked void store_release_to_ctx_pointer(void) 140 + { 141 + asm volatile ( 142 + "w0 = 0;" 143 + // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0); 144 + ".8byte %[store_release_insn];" 145 + "exit;" 146 + : 147 + : __imm_insn(store_release_insn, 148 + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0, 149 + offsetof(struct __sk_buff, cb[0]))) 150 + : __clobber_all); 151 + } 152 + 153 + SEC("xdp") 154 + __description("store-release to pkt pointer") 155 + __failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed") 156 + __naked void store_release_to_pkt_pointer(void) 157 + { 158 + asm volatile ( 159 + "w0 = 0;" 160 + "r2 = *(u32 *)(r1 + %[xdp_md_data]);" 161 + "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);" 162 + "r1 = r2;" 163 + "r1 += 8;" 164 + "if r1 >= r3 goto l0_%=;" 165 + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); 166 + "l0_%=: r0 = 0;" 167 + "exit;" 168 + : 169 + : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)), 170 + __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)), 171 + __imm_insn(store_release_insn, 172 + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) 173 + : __clobber_all); 174 + } 175 + 176 + SEC("flow_dissector") 177 + __description("store-release to flow_keys pointer") 178 + __failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed") 179 + __naked void store_release_to_flow_keys_pointer(void) 180 + { 181 + asm volatile ( 182 + "w0 = 0;" 183 + "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);" 184 + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); 185 + "exit;" 186 + : 187 + : __imm_const(__sk_buff_flow_keys, 188 + offsetof(struct __sk_buff, flow_keys)), 189 + __imm_insn(store_release_insn, 190 + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) 191 + : __clobber_all); 192 + } 193 + 194 + SEC("sk_reuseport") 195 + __description("store-release to sock pointer") 196 + __failure __msg("R2 cannot write into sock") 197 + __naked void store_release_to_sock_pointer(void) 198 + { 199 + asm volatile ( 200 + "w0 = 0;" 201 + "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);" 202 + ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0); 203 + "exit;" 204 + : 205 + : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)), 206 + __imm_insn(store_release_insn, 207 + BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0)) 208 + : __clobber_all); 209 + } 210 + 211 + SEC("socket") 212 + __description("store-release, leak pointer to stack") 213 + __success __success_unpriv __retval(0) 214 + __naked void store_release_leak_pointer_to_stack(void) 215 + { 216 + asm volatile ( 217 + ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); 218 + "r0 = 0;" 219 + "exit;" 220 + : 221 + : __imm_insn(store_release_insn, 222 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8)) 223 + : __clobber_all); 224 + } 225 + 226 + struct { 227 + __uint(type, BPF_MAP_TYPE_HASH); 228 + __uint(max_entries, 1); 229 + __type(key, long long); 230 + __type(value, long long); 231 + } map_hash_8b SEC(".maps"); 232 + 233 + SEC("socket") 234 + __description("store-release, leak pointer to map") 235 + __success __retval(0) 236 + __failure_unpriv __msg_unpriv("R6 leaks addr into map") 237 + __naked void store_release_leak_pointer_to_map(void) 238 + { 239 + asm volatile ( 240 + "r6 = r1;" 241 + "r1 = %[map_hash_8b] ll;" 242 + "r2 = 0;" 243 + "*(u64 *)(r10 - 8) = r2;" 244 + "r2 = r10;" 245 + "r2 += -8;" 246 + "call %[bpf_map_lookup_elem];" 247 + "if r0 == 0 goto l0_%=;" 248 + ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6); 249 + "l0_%=:" 250 + "r0 = 0;" 251 + "exit;" 252 + : 253 + : __imm_addr(map_hash_8b), 254 + __imm(bpf_map_lookup_elem), 255 + __imm_insn(store_release_insn, 256 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0)) 257 + : __clobber_all); 258 + } 259 + 260 + SEC("socket") 261 + __description("store-release with invalid register R15") 262 + __failure __failure_unpriv __msg("R15 is invalid") 263 + __naked void store_release_with_invalid_reg(void) 264 + { 265 + asm volatile ( 266 + ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1); 267 + "exit;" 268 + : 269 + : __imm_insn(store_release_insn, 270 + BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0)) 271 + : __clobber_all); 272 + } 273 + 274 + #else 275 + 276 + SEC("socket") 277 + __description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test") 278 + __success 279 + int dummy_test(void) 280 + { 281 + return 0; 282 + } 283 + 284 + #endif 285 + 286 + char _license[] SEC("license") = "GPL";

+88

tools/testing/selftests/bpf/progs/xdp_redirect_map.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 + #include <linux/if_ether.h> 4 + 3 5 #include <linux/bpf.h> 4 6 #include <bpf/bpf_helpers.h> 7 + #include <bpf/bpf_endian.h> 5 8 6 9 struct { 7 10 __uint(type, BPF_MAP_TYPE_DEVMAP); ··· 29 26 int xdp_redirect_map_2(struct xdp_md *xdp) 30 27 { 31 28 return bpf_redirect_map(&tx_port, 2, 0); 29 + } 30 + 31 + struct { 32 + __uint(type, BPF_MAP_TYPE_ARRAY); 33 + __uint(max_entries, 3); 34 + __type(key, __u32); 35 + __type(value, __u64); 36 + } rxcnt SEC(".maps"); 37 + 38 + static int xdp_count(struct xdp_md *xdp, __u32 key) 39 + { 40 + void *data_end = (void *)(long)xdp->data_end; 41 + void *data = (void *)(long)xdp->data; 42 + struct ethhdr *eth = data; 43 + __u64 *count; 44 + 45 + if (data + sizeof(*eth) > data_end) 46 + return XDP_DROP; 47 + 48 + if (bpf_htons(eth->h_proto) == ETH_P_IP) { 49 + /* We only count IPv4 packets */ 50 + count = bpf_map_lookup_elem(&rxcnt, &key); 51 + if (count) 52 + *count += 1; 53 + } 54 + 55 + return XDP_PASS; 56 + } 57 + 58 + SEC("xdp") 59 + int xdp_count_0(struct xdp_md *xdp) 60 + { 61 + return xdp_count(xdp, 0); 62 + } 63 + 64 + SEC("xdp") 65 + int xdp_count_1(struct xdp_md *xdp) 66 + { 67 + return xdp_count(xdp, 1); 68 + } 69 + 70 + SEC("xdp") 71 + int xdp_count_2(struct xdp_md *xdp) 72 + { 73 + return xdp_count(xdp, 2); 74 + } 75 + 76 + struct { 77 + __uint(type, BPF_MAP_TYPE_ARRAY); 78 + __uint(max_entries, 2); 79 + __type(key, __u32); 80 + __type(value, __be64); 81 + } rx_mac SEC(".maps"); 82 + 83 + static int store_mac(struct xdp_md *xdp, __u32 id) 84 + { 85 + void *data_end = (void *)(long)xdp->data_end; 86 + void *data = (void *)(long)xdp->data; 87 + struct ethhdr *eth = data; 88 + __u32 key = id; 89 + __be64 mac = 0; 90 + 91 + if (data + sizeof(*eth) > data_end) 92 + return XDP_DROP; 93 + 94 + /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */ 95 + if (eth->h_proto == bpf_htons(ETH_P_IP)) { 96 + __builtin_memcpy(&mac, eth->h_source, ETH_ALEN); 97 + bpf_map_update_elem(&rx_mac, &key, &mac, 0); 98 + bpf_printk("%s - %x", __func__, mac); 99 + } 100 + 101 + return XDP_PASS; 102 + } 103 + 104 + SEC("xdp") 105 + int store_mac_1(struct xdp_md *xdp) 106 + { 107 + return store_mac(xdp, 0); 108 + } 109 + 110 + SEC("xdp") 111 + int store_mac_2(struct xdp_md *xdp) 112 + { 113 + return store_mac(xdp, 1); 32 114 } 33 115 34 116 char _license[] SEC("license") = "GPL";

+28 -11

tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c

··· 34 34 __uint(max_entries, 128); 35 35 } mac_map SEC(".maps"); 36 36 37 + /* map to store redirect flags for each protocol*/ 38 + struct { 39 + __uint(type, BPF_MAP_TYPE_HASH); 40 + __type(key, __u16); 41 + __type(value, __u64); 42 + __uint(max_entries, 16); 43 + } redirect_flags SEC(".maps"); 44 + 37 45 SEC("xdp") 38 46 int xdp_redirect_map_multi_prog(struct xdp_md *ctx) 39 47 { ··· 49 41 void *data = (void *)(long)ctx->data; 50 42 int if_index = ctx->ingress_ifindex; 51 43 struct ethhdr *eth = data; 44 + __u64 *flags_from_map; 52 45 __u16 h_proto; 53 46 __u64 nh_off; 47 + __u64 flags; 54 48 55 49 nh_off = sizeof(*eth); 56 50 if (data + nh_off > data_end) 57 51 return XDP_DROP; 58 52 59 - h_proto = eth->h_proto; 53 + h_proto = bpf_htons(eth->h_proto); 60 54 61 - /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */ 62 - if (h_proto == bpf_htons(ETH_P_IP)) 63 - return bpf_redirect_map(&map_all, 0, 64 - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 65 - /* Using IPv6 for none flag testing */ 66 - else if (h_proto == bpf_htons(ETH_P_IPV6)) 67 - return bpf_redirect_map(&map_all, if_index, 0); 68 - /* All others for BPF_F_BROADCAST testing */ 69 - else 70 - return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST); 55 + flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto); 56 + 57 + /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */ 58 + if (h_proto == ETH_P_IP) { 59 + flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS; 60 + return bpf_redirect_map(&map_all, 0, flags); 61 + } 62 + /* Default flags for IPv6 : 0 */ 63 + if (h_proto == ETH_P_IPV6) { 64 + flags = flags_from_map ? *flags_from_map : 0; 65 + return bpf_redirect_map(&map_all, if_index, flags); 66 + } 67 + /* Default flags for others BPF_F_BROADCAST : 0 */ 68 + else { 69 + flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST; 70 + return bpf_redirect_map(&map_all, 0, flags); 71 + } 71 72 } 72 73 73 74 /* The following 2 progs are for 2nd devmap prog testing */

+6

tools/testing/selftests/bpf/test_btf.h

··· 72 72 #define BTF_TYPE_FLOAT_ENC(name, sz) \ 73 73 BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) 74 74 75 + #define BTF_DECL_ATTR_ENC(value, type, component_idx) \ 76 + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx) 77 + 75 78 #define BTF_DECL_TAG_ENC(value, type, component_idx) \ 76 79 BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) 80 + 81 + #define BTF_TYPE_ATTR_ENC(value, type) \ 82 + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type) 77 83 78 84 #define BTF_TYPE_TAG_ENC(value, type) \ 79 85 BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)

+108

tools/testing/selftests/bpf/test_kmods/bpf_testmod.c

··· 1130 1130 }; 1131 1131 1132 1132 static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { 1133 + .get_func_proto = bpf_base_func_proto, 1133 1134 .is_valid_access = bpf_testmod_ops_is_valid_access, 1134 1135 }; 1135 1136 ··· 1177 1176 return 0; 1178 1177 } 1179 1178 1179 + static int bpf_testmod_ops__test_refcounted(int dummy, 1180 + struct task_struct *task__ref) 1181 + { 1182 + return 0; 1183 + } 1184 + 1185 + static struct task_struct * 1186 + bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref, 1187 + struct cgroup *cgrp) 1188 + { 1189 + return NULL; 1190 + } 1191 + 1180 1192 static struct bpf_testmod_ops __bpf_testmod_ops = { 1181 1193 .test_1 = bpf_testmod_test_1, 1182 1194 .test_2 = bpf_testmod_test_2, 1183 1195 .test_maybe_null = bpf_testmod_ops__test_maybe_null, 1196 + .test_refcounted = bpf_testmod_ops__test_refcounted, 1197 + .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr, 1184 1198 }; 1185 1199 1186 1200 struct bpf_struct_ops bpf_bpf_testmod_ops = { ··· 1309 1293 return 0; 1310 1294 } 1311 1295 1296 + static int bpf_cgroup_from_id_id; 1297 + static int bpf_cgroup_release_id; 1298 + 1299 + static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write, 1300 + const struct bpf_prog *prog) 1301 + { 1302 + struct bpf_insn *insn = insn_buf; 1303 + 1304 + /* r8 = r1; // r8 will be "u64 *ctx". 1305 + * r1 = 0; 1306 + * r0 = bpf_cgroup_from_id(r1); 1307 + * if r0 != 0 goto pc+5; 1308 + * r6 = r8[0]; // r6 will be "struct st_ops *args". 1309 + * r7 = r6->a; 1310 + * r7 += 1000; 1311 + * r6->a = r7; 1312 + * goto pc+2; 1313 + * r1 = r0; 1314 + * bpf_cgroup_release(r1); 1315 + * r1 = r8; 1316 + */ 1317 + *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1); 1318 + *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0); 1319 + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id); 1320 + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5); 1321 + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0); 1322 + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a)); 1323 + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000); 1324 + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); 1325 + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); 1326 + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); 1327 + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), 1328 + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8); 1329 + *insn++ = prog->insnsi[0]; 1330 + 1331 + return insn - insn_buf; 1332 + } 1333 + 1334 + static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog, 1335 + s16 ctx_stack_off) 1336 + { 1337 + struct bpf_insn *insn = insn_buf; 1338 + 1339 + /* r1 = 0; 1340 + * r6 = 0; 1341 + * r0 = bpf_cgroup_from_id(r1); 1342 + * if r0 != 0 goto pc+6; 1343 + * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" 1344 + * r1 = r1[0]; // r1 will be "struct st_ops *args" 1345 + * r6 = r1->a; 1346 + * r6 += 10000; 1347 + * r1->a = r6; 1348 + * goto pc+2 1349 + * r1 = r0; 1350 + * bpf_cgroup_release(r1); 1351 + * r0 = r6; 1352 + * r0 *= 2; 1353 + * BPF_EXIT; 1354 + */ 1355 + *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0); 1356 + *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0); 1357 + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id); 1358 + *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6); 1359 + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off); 1360 + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0); 1361 + *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a)); 1362 + *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000); 1363 + *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); 1364 + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); 1365 + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); 1366 + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), 1367 + *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); 1368 + *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); 1369 + *insn++ = BPF_EXIT_INSN(); 1370 + 1371 + return insn - insn_buf; 1372 + } 1373 + 1374 + #define KFUNC_PRO_EPI_PREFIX "test_kfunc_" 1312 1375 static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write, 1313 1376 const struct bpf_prog *prog) 1314 1377 { ··· 1396 1301 if (strcmp(prog->aux->attach_func_name, "test_prologue") && 1397 1302 strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) 1398 1303 return 0; 1304 + 1305 + if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX))) 1306 + return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog); 1399 1307 1400 1308 /* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx". 1401 1309 * r7 = r6->a; ··· 1422 1324 if (strcmp(prog->aux->attach_func_name, "test_epilogue") && 1423 1325 strcmp(prog->aux->attach_func_name, "test_pro_epilogue")) 1424 1326 return 0; 1327 + 1328 + if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX))) 1329 + return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off); 1425 1330 1426 1331 /* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx" 1427 1332 * r1 = r1[0]; // r1 will be "struct st_ops *args" ··· 1496 1395 1497 1396 static int st_ops_init(struct btf *btf) 1498 1397 { 1398 + struct btf *kfunc_btf; 1399 + 1400 + bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf); 1401 + bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf); 1402 + if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0) 1403 + return -EINVAL; 1404 + 1499 1405 return 0; 1500 1406 } 1501 1407

+6

tools/testing/selftests/bpf/test_kmods/bpf_testmod.h

··· 6 6 #include <linux/types.h> 7 7 8 8 struct task_struct; 9 + struct cgroup; 9 10 10 11 struct bpf_testmod_test_read_ctx { 11 12 char *buf; ··· 37 36 /* Used to test nullable arguments. */ 38 37 int (*test_maybe_null)(int dummy, struct task_struct *task); 39 38 int (*unsupported_ops)(void); 39 + /* Used to test ref_acquired arguments. */ 40 + int (*test_refcounted)(int dummy, struct task_struct *task); 41 + /* Used to test returning referenced kptr. */ 42 + struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task, 43 + struct cgroup *cgrp); 40 44 41 45 /* The following fields are used to test shadow copies. */ 42 46 char onebyte;

+29 -3

tools/testing/selftests/bpf/test_loader.c

··· 37 37 #define TEST_TAG_JITED_PFX "comment:test_jited=" 38 38 #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" 39 39 #define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" 40 + #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" 40 41 41 42 /* Warning: duplicated in bpf_misc.h */ 42 43 #define POINTER_VALUE 0xcafe4all ··· 54 53 enum mode { 55 54 PRIV = 1, 56 55 UNPRIV = 2 56 + }; 57 + 58 + enum load_mode { 59 + JITED = 1 << 0, 60 + NO_JITED = 1 << 1, 57 61 }; 58 62 59 63 struct expect_msg { ··· 93 87 int prog_flags; 94 88 int mode_mask; 95 89 int arch_mask; 90 + int load_mask; 96 91 bool auxiliary; 97 92 bool valid; 98 93 }; ··· 413 406 bool collect_jit = false; 414 407 int func_id, i, err = 0; 415 408 u32 arch_mask = 0; 409 + u32 load_mask = 0; 416 410 struct btf *btf; 417 411 enum arch arch; 418 412 ··· 588 580 if (err) 589 581 goto cleanup; 590 582 spec->mode_mask |= UNPRIV; 583 + } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) { 584 + val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1; 585 + if (strcmp(val, "jited") == 0) { 586 + load_mask = JITED; 587 + } else if (strcmp(val, "no_jited") == 0) { 588 + load_mask = NO_JITED; 589 + } else { 590 + PRINT_FAIL("bad load spec: '%s'", val); 591 + err = -EINVAL; 592 + goto cleanup; 593 + } 591 594 } 592 595 } 593 596 594 597 spec->arch_mask = arch_mask ?: -1; 598 + spec->load_mask = load_mask ?: (JITED | NO_JITED); 595 599 596 600 if (spec->mode_mask == 0) 597 601 spec->mode_mask = PRIV; ··· 793 773 794 774 err = cap_disable_effective(caps_to_drop, &caps->old_caps); 795 775 if (err) { 796 - PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err)); 776 + PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err)); 797 777 return err; 798 778 } 799 779 ··· 810 790 811 791 err = cap_enable_effective(caps->old_caps, NULL); 812 792 if (err) 813 - PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err)); 793 + PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err)); 814 794 caps->initialized = false; 815 795 return err; 816 796 } ··· 948 928 bool unpriv) 949 929 { 950 930 struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv; 931 + int current_runtime = is_jit_enabled() ? JITED : NO_JITED; 951 932 struct bpf_program *tprog = NULL, *tprog_iter; 952 933 struct bpf_link *link, *links[32] = {}; 953 934 struct test_spec *spec_iter; ··· 967 946 return; 968 947 } 969 948 949 + if ((current_runtime & spec->load_mask) == 0) { 950 + test__skip(); 951 + return; 952 + } 953 + 970 954 if (unpriv) { 971 955 if (!can_execute_unpriv(tester, spec)) { 972 956 test__skip(); ··· 985 959 if (subspec->caps) { 986 960 err = cap_enable_effective(subspec->caps, NULL); 987 961 if (err) { 988 - PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); 962 + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err)); 989 963 goto subtest_cleanup; 990 964 } 991 965 }

-476

tools/testing/selftests/bpf/test_lwt_ip_encap.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - # 4 - # Setup/topology: 5 - # 6 - # NS1 NS2 NS3 7 - # veth1 <---> veth2 veth3 <---> veth4 (the top route) 8 - # veth5 <---> veth6 veth7 <---> veth8 (the bottom route) 9 - # 10 - # each vethN gets IPv[4|6]_N address 11 - # 12 - # IPv*_SRC = IPv*_1 13 - # IPv*_DST = IPv*_4 14 - # 15 - # all tests test pings from IPv*_SRC to IPv*_DST 16 - # 17 - # by default, routes are configured to allow packets to go 18 - # IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route) 19 - # 20 - # a GRE device is installed in NS3 with IPv*_GRE, and 21 - # NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8 22 - # (the bottom route) 23 - # 24 - # Tests: 25 - # 26 - # 1. routes NS2->IPv*_DST are brought down, so the only way a ping 27 - # from IP*_SRC to IP*_DST can work is via IPv*_GRE 28 - # 29 - # 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1 30 - # that encaps the packets with an IP/GRE header to route to IPv*_GRE 31 - # 32 - # ping: SRC->[encap at veth1:egress]->GRE:decap->DST 33 - # ping replies go DST->SRC directly 34 - # 35 - # 2b. in an ingress test, a bpf LWT_IN program is installed on veth2 36 - # that encaps the packets with an IP/GRE header to route to IPv*_GRE 37 - # 38 - # ping: SRC->[encap at veth2:ingress]->GRE:decap->DST 39 - # ping replies go DST->SRC directly 40 - 41 - BPF_FILE="test_lwt_ip_encap.bpf.o" 42 - if [[ $EUID -ne 0 ]]; then 43 - echo "This script must be run as root" 44 - echo "FAIL" 45 - exit 1 46 - fi 47 - 48 - readonly NS1="ns1-$(mktemp -u XXXXXX)" 49 - readonly NS2="ns2-$(mktemp -u XXXXXX)" 50 - readonly NS3="ns3-$(mktemp -u XXXXXX)" 51 - 52 - readonly IPv4_1="172.16.1.100" 53 - readonly IPv4_2="172.16.2.100" 54 - readonly IPv4_3="172.16.3.100" 55 - readonly IPv4_4="172.16.4.100" 56 - readonly IPv4_5="172.16.5.100" 57 - readonly IPv4_6="172.16.6.100" 58 - readonly IPv4_7="172.16.7.100" 59 - readonly IPv4_8="172.16.8.100" 60 - readonly IPv4_GRE="172.16.16.100" 61 - 62 - readonly IPv4_SRC=$IPv4_1 63 - readonly IPv4_DST=$IPv4_4 64 - 65 - readonly IPv6_1="fb01::1" 66 - readonly IPv6_2="fb02::1" 67 - readonly IPv6_3="fb03::1" 68 - readonly IPv6_4="fb04::1" 69 - readonly IPv6_5="fb05::1" 70 - readonly IPv6_6="fb06::1" 71 - readonly IPv6_7="fb07::1" 72 - readonly IPv6_8="fb08::1" 73 - readonly IPv6_GRE="fb10::1" 74 - 75 - readonly IPv6_SRC=$IPv6_1 76 - readonly IPv6_DST=$IPv6_4 77 - 78 - TEST_STATUS=0 79 - TESTS_SUCCEEDED=0 80 - TESTS_FAILED=0 81 - 82 - TMPFILE="" 83 - 84 - process_test_results() 85 - { 86 - if [[ "${TEST_STATUS}" -eq 0 ]] ; then 87 - echo "PASS" 88 - TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1)) 89 - else 90 - echo "FAIL" 91 - TESTS_FAILED=$((TESTS_FAILED+1)) 92 - fi 93 - } 94 - 95 - print_test_summary_and_exit() 96 - { 97 - echo "passed tests: ${TESTS_SUCCEEDED}" 98 - echo "failed tests: ${TESTS_FAILED}" 99 - if [ "${TESTS_FAILED}" -eq "0" ] ; then 100 - exit 0 101 - else 102 - exit 1 103 - fi 104 - } 105 - 106 - setup() 107 - { 108 - set -e # exit on error 109 - TEST_STATUS=0 110 - 111 - # create devices and namespaces 112 - ip netns add "${NS1}" 113 - ip netns add "${NS2}" 114 - ip netns add "${NS3}" 115 - 116 - # rp_filter gets confused by what these tests are doing, so disable it 117 - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 118 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 119 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 120 - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 121 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 122 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 123 - 124 - # disable IPv6 DAD because it sometimes takes too long and fails tests 125 - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0 126 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0 127 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0 128 - ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0 129 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0 130 - ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0 131 - 132 - ip link add veth1 type veth peer name veth2 133 - ip link add veth3 type veth peer name veth4 134 - ip link add veth5 type veth peer name veth6 135 - ip link add veth7 type veth peer name veth8 136 - 137 - ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1 138 - ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1 139 - 140 - ip link set veth1 netns ${NS1} 141 - ip link set veth2 netns ${NS2} 142 - ip link set veth3 netns ${NS2} 143 - ip link set veth4 netns ${NS3} 144 - ip link set veth5 netns ${NS1} 145 - ip link set veth6 netns ${NS2} 146 - ip link set veth7 netns ${NS2} 147 - ip link set veth8 netns ${NS3} 148 - 149 - if [ ! -z "${VRF}" ] ; then 150 - ip -netns ${NS1} link add red type vrf table 1001 151 - ip -netns ${NS1} link set red up 152 - ip -netns ${NS1} route add table 1001 unreachable default metric 8192 153 - ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192 154 - ip -netns ${NS1} link set veth1 vrf red 155 - ip -netns ${NS1} link set veth5 vrf red 156 - 157 - ip -netns ${NS2} link add red type vrf table 1001 158 - ip -netns ${NS2} link set red up 159 - ip -netns ${NS2} route add table 1001 unreachable default metric 8192 160 - ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192 161 - ip -netns ${NS2} link set veth2 vrf red 162 - ip -netns ${NS2} link set veth3 vrf red 163 - ip -netns ${NS2} link set veth6 vrf red 164 - ip -netns ${NS2} link set veth7 vrf red 165 - fi 166 - 167 - # configure addesses: the top route (1-2-3-4) 168 - ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1 169 - ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2 170 - ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3 171 - ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4 172 - ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1 173 - ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2 174 - ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3 175 - ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4 176 - 177 - # configure addresses: the bottom route (5-6-7-8) 178 - ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5 179 - ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6 180 - ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7 181 - ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8 182 - ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5 183 - ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6 184 - ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7 185 - ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8 186 - 187 - ip -netns ${NS1} link set dev veth1 up 188 - ip -netns ${NS2} link set dev veth2 up 189 - ip -netns ${NS2} link set dev veth3 up 190 - ip -netns ${NS3} link set dev veth4 up 191 - ip -netns ${NS1} link set dev veth5 up 192 - ip -netns ${NS2} link set dev veth6 up 193 - ip -netns ${NS2} link set dev veth7 up 194 - ip -netns ${NS3} link set dev veth8 up 195 - 196 - # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default; 197 - # the bottom route to specific bottom addresses 198 - 199 - # NS1 200 - # top route 201 - ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF} 202 - ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default 203 - ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF} 204 - ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default 205 - # bottom route 206 - ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF} 207 - ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF} 208 - ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF} 209 - ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF} 210 - ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF} 211 - ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF} 212 - 213 - # NS2 214 - # top route 215 - ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF} 216 - ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF} 217 - ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF} 218 - ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF} 219 - # bottom route 220 - ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF} 221 - ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF} 222 - ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF} 223 - ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF} 224 - 225 - # NS3 226 - # top route 227 - ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4 228 - ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3} 229 - ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3} 230 - ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4 231 - ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3} 232 - ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3} 233 - # bottom route 234 - ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8 235 - ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7} 236 - ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7} 237 - ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8 238 - ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7} 239 - ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7} 240 - 241 - # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route 242 - ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255 243 - ip -netns ${NS3} link set gre_dev up 244 - ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev 245 - ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF} 246 - ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF} 247 - 248 - 249 - # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route 250 - ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255 251 - ip -netns ${NS3} link set gre6_dev up 252 - ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev 253 - ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} 254 - ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} 255 - 256 - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) 257 - 258 - sleep 1 # reduce flakiness 259 - set +e 260 - } 261 - 262 - cleanup() 263 - { 264 - if [ -f ${TMPFILE} ] ; then 265 - rm ${TMPFILE} 266 - fi 267 - 268 - ip netns del ${NS1} 2> /dev/null 269 - ip netns del ${NS2} 2> /dev/null 270 - ip netns del ${NS3} 2> /dev/null 271 - } 272 - 273 - trap cleanup EXIT 274 - 275 - remove_routes_to_gredev() 276 - { 277 - ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF} 278 - ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF} 279 - ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF} 280 - ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF} 281 - } 282 - 283 - add_unreachable_routes_to_gredev() 284 - { 285 - ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF} 286 - ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF} 287 - ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} 288 - ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF} 289 - } 290 - 291 - test_ping() 292 - { 293 - local readonly PROTO=$1 294 - local readonly EXPECTED=$2 295 - local RET=0 296 - 297 - if [ "${PROTO}" == "IPv4" ] ; then 298 - ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null 299 - RET=$? 300 - elif [ "${PROTO}" == "IPv6" ] ; then 301 - ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null 302 - RET=$? 303 - else 304 - echo " test_ping: unknown PROTO: ${PROTO}" 305 - TEST_STATUS=1 306 - fi 307 - 308 - if [ "0" != "${RET}" ]; then 309 - RET=1 310 - fi 311 - 312 - if [ "${EXPECTED}" != "${RET}" ] ; then 313 - echo " test_ping failed: expected: ${EXPECTED}; got ${RET}" 314 - TEST_STATUS=1 315 - fi 316 - } 317 - 318 - test_gso() 319 - { 320 - local readonly PROTO=$1 321 - local readonly PKT_SZ=5000 322 - local IP_DST="" 323 - : > ${TMPFILE} # trim the capture file 324 - 325 - # check that nc is present 326 - command -v nc >/dev/null 2>&1 || \ 327 - { echo >&2 "nc is not available: skipping TSO tests"; return; } 328 - 329 - # listen on port 9000, capture TCP into $TMPFILE 330 - if [ "${PROTO}" == "IPv4" ] ; then 331 - IP_DST=${IPv4_DST} 332 - ip netns exec ${NS3} bash -c \ 333 - "nc -4 -l -p 9000 > ${TMPFILE} &" 334 - elif [ "${PROTO}" == "IPv6" ] ; then 335 - IP_DST=${IPv6_DST} 336 - ip netns exec ${NS3} bash -c \ 337 - "nc -6 -l -p 9000 > ${TMPFILE} &" 338 - RET=$? 339 - else 340 - echo " test_gso: unknown PROTO: ${PROTO}" 341 - TEST_STATUS=1 342 - fi 343 - sleep 1 # let nc start listening 344 - 345 - # send a packet larger than MTU 346 - ip netns exec ${NS1} bash -c \ 347 - "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null" 348 - sleep 2 # let the packet get delivered 349 - 350 - # verify we received all expected bytes 351 - SZ=$(stat -c %s ${TMPFILE}) 352 - if [ "$SZ" != "$PKT_SZ" ] ; then 353 - echo " test_gso failed: ${PROTO}" 354 - TEST_STATUS=1 355 - fi 356 - } 357 - 358 - test_egress() 359 - { 360 - local readonly ENCAP=$1 361 - echo "starting egress ${ENCAP} encap test ${VRF}" 362 - setup 363 - 364 - # by default, pings work 365 - test_ping IPv4 0 366 - test_ping IPv6 0 367 - 368 - # remove NS2->DST routes, ping fails 369 - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} 370 - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} 371 - test_ping IPv4 1 372 - test_ping IPv6 1 373 - 374 - # install replacement routes (LWT/eBPF), pings succeed 375 - if [ "${ENCAP}" == "IPv4" ] ; then 376 - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ 377 - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} 378 - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ 379 - ${BPF_FILE} sec encap_gre dev veth1 ${VRF} 380 - elif [ "${ENCAP}" == "IPv6" ] ; then 381 - ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ 382 - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} 383 - ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ 384 - ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} 385 - else 386 - echo " unknown encap ${ENCAP}" 387 - TEST_STATUS=1 388 - fi 389 - test_ping IPv4 0 390 - test_ping IPv6 0 391 - 392 - # skip GSO tests with VRF: VRF routing needs properly assigned 393 - # source IP/device, which is easy to do with ping and hard with dd/nc. 394 - if [ -z "${VRF}" ] ; then 395 - test_gso IPv4 396 - test_gso IPv6 397 - fi 398 - 399 - # a negative test: remove routes to GRE devices: ping fails 400 - remove_routes_to_gredev 401 - test_ping IPv4 1 402 - test_ping IPv6 1 403 - 404 - # another negative test 405 - add_unreachable_routes_to_gredev 406 - test_ping IPv4 1 407 - test_ping IPv6 1 408 - 409 - cleanup 410 - process_test_results 411 - } 412 - 413 - test_ingress() 414 - { 415 - local readonly ENCAP=$1 416 - echo "starting ingress ${ENCAP} encap test ${VRF}" 417 - setup 418 - 419 - # need to wait a bit for IPv6 to autoconf, otherwise 420 - # ping6 sometimes fails with "unable to bind to address" 421 - 422 - # by default, pings work 423 - test_ping IPv4 0 424 - test_ping IPv6 0 425 - 426 - # remove NS2->DST routes, pings fail 427 - ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF} 428 - ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF} 429 - test_ping IPv4 1 430 - test_ping IPv6 1 431 - 432 - # install replacement routes (LWT/eBPF), pings succeed 433 - if [ "${ENCAP}" == "IPv4" ] ; then 434 - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ 435 - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} 436 - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ 437 - ${BPF_FILE} sec encap_gre dev veth2 ${VRF} 438 - elif [ "${ENCAP}" == "IPv6" ] ; then 439 - ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ 440 - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} 441 - ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ 442 - ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} 443 - else 444 - echo "FAIL: unknown encap ${ENCAP}" 445 - TEST_STATUS=1 446 - fi 447 - test_ping IPv4 0 448 - test_ping IPv6 0 449 - 450 - # a negative test: remove routes to GRE devices: ping fails 451 - remove_routes_to_gredev 452 - test_ping IPv4 1 453 - test_ping IPv6 1 454 - 455 - # another negative test 456 - add_unreachable_routes_to_gredev 457 - test_ping IPv4 1 458 - test_ping IPv6 1 459 - 460 - cleanup 461 - process_test_results 462 - } 463 - 464 - VRF="" 465 - test_egress IPv4 466 - test_egress IPv6 467 - test_ingress IPv4 468 - test_ingress IPv6 469 - 470 - VRF="vrf red" 471 - test_egress IPv4 472 - test_egress IPv6 473 - test_ingress IPv4 474 - test_ingress IPv6 475 - 476 - print_test_summary_and_exit

-156

tools/testing/selftests/bpf/test_lwt_seg6local.sh

··· 1 - #!/bin/bash 2 - # Connects 6 network namespaces through veths. 3 - # Each NS may have different IPv6 global scope addresses : 4 - # NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6 5 - # fb00::1 fd00::1 fd00::2 fd00::3 fb00::6 6 - # fc42::1 fd00::4 7 - # 8 - # All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a 9 - # IPv6 header with a Segment Routing Header, with segments : 10 - # fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 11 - # 12 - # 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : 13 - # - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 14 - # - fd00::2 : remove the TLV, change the flags, add a tag 15 - # - fd00::3 : apply an End.T action to fd00::4, through routing table 117 16 - # 17 - # fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. 18 - # Each End.BPF action will validate the operations applied on the SRH by the 19 - # previous BPF program in the chain, otherwise the packet is dropped. 20 - # 21 - # An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this 22 - # datagram can be read on NS6 when binding to fb00::6. 23 - 24 - # Kselftest framework requirement - SKIP code is 4. 25 - ksft_skip=4 26 - BPF_FILE="test_lwt_seg6local.bpf.o" 27 - readonly NS1="ns1-$(mktemp -u XXXXXX)" 28 - readonly NS2="ns2-$(mktemp -u XXXXXX)" 29 - readonly NS3="ns3-$(mktemp -u XXXXXX)" 30 - readonly NS4="ns4-$(mktemp -u XXXXXX)" 31 - readonly NS5="ns5-$(mktemp -u XXXXXX)" 32 - readonly NS6="ns6-$(mktemp -u XXXXXX)" 33 - 34 - msg="skip all tests:" 35 - if [ $UID != 0 ]; then 36 - echo $msg please run this as root >&2 37 - exit $ksft_skip 38 - fi 39 - 40 - TMP_FILE="/tmp/selftest_lwt_seg6local.txt" 41 - 42 - cleanup() 43 - { 44 - if [ "$?" = "0" ]; then 45 - echo "selftests: test_lwt_seg6local [PASS]"; 46 - else 47 - echo "selftests: test_lwt_seg6local [FAILED]"; 48 - fi 49 - 50 - set +e 51 - ip netns del ${NS1} 2> /dev/null 52 - ip netns del ${NS2} 2> /dev/null 53 - ip netns del ${NS3} 2> /dev/null 54 - ip netns del ${NS4} 2> /dev/null 55 - ip netns del ${NS5} 2> /dev/null 56 - ip netns del ${NS6} 2> /dev/null 57 - rm -f $TMP_FILE 58 - } 59 - 60 - set -e 61 - 62 - ip netns add ${NS1} 63 - ip netns add ${NS2} 64 - ip netns add ${NS3} 65 - ip netns add ${NS4} 66 - ip netns add ${NS5} 67 - ip netns add ${NS6} 68 - 69 - trap cleanup 0 2 3 6 9 70 - 71 - ip link add veth1 type veth peer name veth2 72 - ip link add veth3 type veth peer name veth4 73 - ip link add veth5 type veth peer name veth6 74 - ip link add veth7 type veth peer name veth8 75 - ip link add veth9 type veth peer name veth10 76 - 77 - ip link set veth1 netns ${NS1} 78 - ip link set veth2 netns ${NS2} 79 - ip link set veth3 netns ${NS2} 80 - ip link set veth4 netns ${NS3} 81 - ip link set veth5 netns ${NS3} 82 - ip link set veth6 netns ${NS4} 83 - ip link set veth7 netns ${NS4} 84 - ip link set veth8 netns ${NS5} 85 - ip link set veth9 netns ${NS5} 86 - ip link set veth10 netns ${NS6} 87 - 88 - ip netns exec ${NS1} ip link set dev veth1 up 89 - ip netns exec ${NS2} ip link set dev veth2 up 90 - ip netns exec ${NS2} ip link set dev veth3 up 91 - ip netns exec ${NS3} ip link set dev veth4 up 92 - ip netns exec ${NS3} ip link set dev veth5 up 93 - ip netns exec ${NS4} ip link set dev veth6 up 94 - ip netns exec ${NS4} ip link set dev veth7 up 95 - ip netns exec ${NS5} ip link set dev veth8 up 96 - ip netns exec ${NS5} ip link set dev veth9 up 97 - ip netns exec ${NS6} ip link set dev veth10 up 98 - ip netns exec ${NS6} ip link set dev lo up 99 - 100 - # All link scope addresses and routes required between veths 101 - ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link 102 - ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link 103 - ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link 104 - ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link 105 - ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link 106 - ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link 107 - ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link 108 - ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link 109 - ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link 110 - ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link 111 - ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link 112 - ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link 113 - ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link 114 - ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link 115 - ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link 116 - ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link 117 - 118 - ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo 119 - ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 120 - 121 - ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2 122 - ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link 123 - 124 - ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 125 - ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4 126 - 127 - ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6 128 - ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo 129 - ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 130 - 131 - ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 132 - ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8 133 - 134 - ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo 135 - ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo 136 - 137 - ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 138 - ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 139 - ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 140 - ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 141 - ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 142 - 143 - ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null 144 - ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null 145 - ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null 146 - 147 - ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE & 148 - ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" 149 - sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment 150 - kill -TERM $! 151 - 152 - if [[ $(< $TMP_FILE) != "foobar" ]]; then 153 - exit 1 154 - fi 155 - 156 - exit 0

+5 -4

tools/testing/selftests/bpf/test_maps.c

··· 1396 1396 #define MAX_DELAY_US 50000 1397 1397 #define MIN_DELAY_RANGE_US 5000 1398 1398 1399 - static bool retry_for_again_or_busy(int err) 1399 + static bool can_retry(int err) 1400 1400 { 1401 - return (err == EAGAIN || err == EBUSY); 1401 + return (err == EAGAIN || err == EBUSY || 1402 + (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC)); 1402 1403 } 1403 1404 1404 1405 int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, ··· 1452 1451 1453 1452 if (do_update) { 1454 1453 err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, 1455 - retry_for_again_or_busy); 1454 + can_retry); 1456 1455 if (err) 1457 1456 printf("error %d %d\n", err, errno); 1458 1457 assert(err == 0); 1459 1458 err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, 1460 - retry_for_again_or_busy); 1459 + can_retry); 1461 1460 if (err) 1462 1461 printf("error %d %d\n", err, errno); 1463 1462 assert(err == 0);

+52 -38

tools/testing/selftests/bpf/test_progs.c

··· 88 88 #endif 89 89 } 90 90 91 - static void stdio_restore_cleanup(void) 92 - { 93 - #ifdef __GLIBC__ 94 - if (verbose() && env.worker_id == -1) { 95 - /* nothing to do, output to stdout by default */ 96 - return; 97 - } 98 - 99 - fflush(stdout); 100 - 101 - if (env.subtest_state) { 102 - fclose(env.subtest_state->stdout_saved); 103 - env.subtest_state->stdout_saved = NULL; 104 - stdout = env.test_state->stdout_saved; 105 - stderr = env.test_state->stdout_saved; 106 - } else { 107 - fclose(env.test_state->stdout_saved); 108 - env.test_state->stdout_saved = NULL; 109 - } 110 - #endif 111 - } 91 + static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER; 112 92 113 93 static void stdio_restore(void) 114 94 { ··· 98 118 return; 99 119 } 100 120 101 - if (stdout == env.stdout_saved) 102 - return; 121 + fflush(stdout); 103 122 104 - stdio_restore_cleanup(); 123 + pthread_mutex_lock(&stdout_lock); 105 124 106 - stdout = env.stdout_saved; 107 - stderr = env.stderr_saved; 125 + if (env.subtest_state) { 126 + if (env.subtest_state->stdout_saved) 127 + fclose(env.subtest_state->stdout_saved); 128 + env.subtest_state->stdout_saved = NULL; 129 + stdout = env.test_state->stdout_saved; 130 + stderr = env.test_state->stdout_saved; 131 + } else { 132 + if (env.test_state->stdout_saved) 133 + fclose(env.test_state->stdout_saved); 134 + env.test_state->stdout_saved = NULL; 135 + stdout = env.stdout_saved; 136 + stderr = env.stderr_saved; 137 + } 138 + 139 + pthread_mutex_unlock(&stdout_lock); 108 140 #endif 141 + } 142 + 143 + static int traffic_monitor_print_fn(const char *format, va_list args) 144 + { 145 + pthread_mutex_lock(&stdout_lock); 146 + vfprintf(stdout, format, args); 147 + pthread_mutex_unlock(&stdout_lock); 148 + 149 + return 0; 109 150 } 110 151 111 152 /* Adapted from perf/util/string.c */ ··· 475 474 print_test_result(test, test_state); 476 475 } 477 476 478 - static void stdio_restore(void); 479 - 480 477 /* A bunch of tests set custom affinity per-thread and/or per-process. Reset 481 478 * it after each test/sub-test. 482 479 */ ··· 489 490 490 491 err = sched_setaffinity(0, sizeof(cpuset), &cpuset); 491 492 if (err < 0) { 492 - stdio_restore(); 493 493 fprintf(stderr, "Failed to reset process affinity: %d!\n", err); 494 494 exit(EXIT_ERR_SETUP_INFRA); 495 495 } 496 496 err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); 497 497 if (err < 0) { 498 - stdio_restore(); 499 498 fprintf(stderr, "Failed to reset thread affinity: %d!\n", err); 500 499 exit(EXIT_ERR_SETUP_INFRA); 501 500 } ··· 511 514 static void restore_netns(void) 512 515 { 513 516 if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) { 514 - stdio_restore(); 515 517 perror("setns(CLONE_NEWNS)"); 516 518 exit(EXIT_ERR_SETUP_INFRA); 517 519 } ··· 537 541 test_result(subtest_state->error_cnt, 538 542 subtest_state->skipped)); 539 543 540 - stdio_restore_cleanup(); 544 + stdio_restore(); 545 + 541 546 env.subtest_state = NULL; 542 547 } 543 548 ··· 1267 1270 1268 1271 sz = backtrace(bt, ARRAY_SIZE(bt)); 1269 1272 1270 - if (env.stdout_saved) 1271 - stdio_restore(); 1273 + fflush(stdout); 1274 + stdout = env.stdout_saved; 1275 + stderr = env.stderr_saved; 1276 + 1272 1277 if (env.test) { 1273 1278 env.test_state->error_cnt++; 1274 1279 dump_test_log(env.test, env.test_state, true, false, NULL); ··· 1364 1365 return ret; 1365 1366 } 1366 1367 1368 + static bool ns_is_needed(const char *test_name) 1369 + { 1370 + if (strlen(test_name) < 3) 1371 + return false; 1372 + 1373 + return !strncmp(test_name, "ns_", 3); 1374 + } 1375 + 1367 1376 static void run_one_test(int test_num) 1368 1377 { 1369 1378 struct prog_test_def *test = &prog_test_defs[test_num]; 1370 1379 struct test_state *state = &test_states[test_num]; 1380 + struct netns_obj *ns = NULL; 1371 1381 1372 1382 env.test = test; 1373 1383 env.test_state = state; ··· 1384 1376 stdio_hijack(&state->log_buf, &state->log_cnt); 1385 1377 1386 1378 watchdog_start(); 1379 + if (ns_is_needed(test->test_name)) 1380 + ns = netns_new(test->test_name, true); 1387 1381 if (test->run_test) 1388 1382 test->run_test(); 1389 1383 else if (test->run_serial_test) 1390 1384 test->run_serial_test(); 1385 + netns_free(ns); 1391 1386 watchdog_stop(); 1392 1387 1393 1388 /* ensure last sub-test is finalized properly */ ··· 1398 1387 test__end_subtest(); 1399 1388 1400 1389 state->tested = true; 1390 + 1391 + stdio_restore(); 1401 1392 1402 1393 if (verbose() && env.worker_id == -1) 1403 1394 print_test_result(test, state); ··· 1409 1396 if (test->need_cgroup_cleanup) 1410 1397 cleanup_cgroup_environment(); 1411 1398 1412 - stdio_restore(); 1413 1399 free(stop_libbpf_log_capture()); 1414 1400 1415 1401 dump_test_log(test, state, false, false, NULL); ··· 1943 1931 1944 1932 sigaction(SIGSEGV, &sigact, NULL); 1945 1933 1934 + env.stdout_saved = stdout; 1935 + env.stderr_saved = stderr; 1936 + 1946 1937 env.secs_till_notify = 10; 1947 1938 env.secs_till_kill = 120; 1948 1939 err = argp_parse(&argp, argc, argv, 0, NULL, &env); ··· 1962 1947 libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 1963 1948 libbpf_set_print(libbpf_print_fn); 1964 1949 1950 + traffic_monitor_set_print(traffic_monitor_print_fn); 1951 + 1965 1952 srand(time(NULL)); 1966 1953 1967 1954 env.jit_enabled = is_jit_enabled(); ··· 1973 1956 env.nr_cpus); 1974 1957 return -1; 1975 1958 } 1976 - 1977 - env.stdout_saved = stdout; 1978 - env.stderr_saved = stderr; 1979 1959 1980 1960 env.has_testmod = true; 1981 1961 if (!env.list_test_names) {

+8

tools/testing/selftests/bpf/test_progs.h

··· 427 427 goto goto_label; \ 428 428 }) 429 429 430 + #define SYS_FAIL(goto_label, fmt, ...) \ 431 + ({ \ 432 + char cmd[1024]; \ 433 + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ 434 + if (!ASSERT_NEQ(0, system(cmd), cmd)) \ 435 + goto goto_label; \ 436 + }) 437 + 430 438 #define ALL_TO_DEV_NULL " >/dev/null 2>&1" 431 439 432 440 #define SYS_NOFAIL(fmt, ...) \

-645

tools/testing/selftests/bpf/test_tunnel.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - # End-to-end eBPF tunnel test suite 5 - # The script tests BPF network tunnel implementation. 6 - # 7 - # Topology: 8 - # --------- 9 - # root namespace | at_ns0 namespace 10 - # | 11 - # ----------- | ----------- 12 - # | tnl dev | | | tnl dev | (overlay network) 13 - # ----------- | ----------- 14 - # metadata-mode | native-mode 15 - # with bpf | 16 - # | 17 - # ---------- | ---------- 18 - # | veth1 | --------- | veth0 | (underlay network) 19 - # ---------- peer ---------- 20 - # 21 - # 22 - # Device Configuration 23 - # -------------------- 24 - # Root namespace with metadata-mode tunnel + BPF 25 - # Device names and addresses: 26 - # veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay) 27 - # tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) 28 - # 29 - # Namespace at_ns0 with native tunnel 30 - # Device names and addresses: 31 - # veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) 32 - # tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) 33 - # 34 - # 35 - # End-to-end ping packet flow 36 - # --------------------------- 37 - # Most of the tests start by namespace creation, device configuration, 38 - # then ping the underlay and overlay network. When doing 'ping 10.1.1.100' 39 - # from root namespace, the following operations happen: 40 - # 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. 41 - # 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, 42 - # with remote_ip=172.16.1.100 and others. 43 - # 3) Outer tunnel header is prepended and route the packet to veth1's egress 44 - # 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0 45 - # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet 46 - # 6) Forward the packet to the overlay tnl dev 47 - 48 - BPF_FILE="test_tunnel_kern.bpf.o" 49 - BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" 50 - PING_ARG="-c 3 -w 10 -q" 51 - ret=0 52 - GREEN='\033[0;92m' 53 - RED='\033[0;31m' 54 - NC='\033[0m' # No Color 55 - 56 - config_device() 57 - { 58 - ip netns add at_ns0 59 - ip link add veth0 type veth peer name veth1 60 - ip link set veth0 netns at_ns0 61 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 62 - ip netns exec at_ns0 ip link set dev veth0 up 63 - ip link set dev veth1 up mtu 1500 64 - ip addr add dev veth1 172.16.1.200/24 65 - } 66 - 67 - add_gre_tunnel() 68 - { 69 - tun_key= 70 - if [ -n "$1" ]; then 71 - tun_key="key $1" 72 - fi 73 - 74 - # at_ns0 namespace 75 - ip netns exec at_ns0 \ 76 - ip link add dev $DEV_NS type $TYPE seq $tun_key \ 77 - local 172.16.1.100 remote 172.16.1.200 78 - ip netns exec at_ns0 ip link set dev $DEV_NS up 79 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 80 - 81 - # root namespace 82 - ip link add dev $DEV type $TYPE $tun_key external 83 - ip link set dev $DEV up 84 - ip addr add dev $DEV 10.1.1.200/24 85 - } 86 - 87 - add_ip6gretap_tunnel() 88 - { 89 - 90 - # assign ipv6 address 91 - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 92 - ip netns exec at_ns0 ip link set dev veth0 up 93 - ip addr add dev veth1 ::22/96 94 - ip link set dev veth1 up 95 - 96 - # at_ns0 namespace 97 - ip netns exec at_ns0 \ 98 - ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ 99 - local ::11 remote ::22 100 - 101 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 102 - ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 103 - ip netns exec at_ns0 ip link set dev $DEV_NS up 104 - 105 - # root namespace 106 - ip link add dev $DEV type $TYPE external 107 - ip addr add dev $DEV 10.1.1.200/24 108 - ip addr add dev $DEV fc80::200/24 109 - ip link set dev $DEV up 110 - } 111 - 112 - add_erspan_tunnel() 113 - { 114 - # at_ns0 namespace 115 - if [ "$1" == "v1" ]; then 116 - ip netns exec at_ns0 \ 117 - ip link add dev $DEV_NS type $TYPE seq key 2 \ 118 - local 172.16.1.100 remote 172.16.1.200 \ 119 - erspan_ver 1 erspan 123 120 - else 121 - ip netns exec at_ns0 \ 122 - ip link add dev $DEV_NS type $TYPE seq key 2 \ 123 - local 172.16.1.100 remote 172.16.1.200 \ 124 - erspan_ver 2 erspan_dir egress erspan_hwid 3 125 - fi 126 - ip netns exec at_ns0 ip link set dev $DEV_NS up 127 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 128 - 129 - # root namespace 130 - ip link add dev $DEV type $TYPE external 131 - ip link set dev $DEV up 132 - ip addr add dev $DEV 10.1.1.200/24 133 - } 134 - 135 - add_ip6erspan_tunnel() 136 - { 137 - 138 - # assign ipv6 address 139 - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 140 - ip netns exec at_ns0 ip link set dev veth0 up 141 - ip addr add dev veth1 ::22/96 142 - ip link set dev veth1 up 143 - 144 - # at_ns0 namespace 145 - if [ "$1" == "v1" ]; then 146 - ip netns exec at_ns0 \ 147 - ip link add dev $DEV_NS type $TYPE seq key 2 \ 148 - local ::11 remote ::22 \ 149 - erspan_ver 1 erspan 123 150 - else 151 - ip netns exec at_ns0 \ 152 - ip link add dev $DEV_NS type $TYPE seq key 2 \ 153 - local ::11 remote ::22 \ 154 - erspan_ver 2 erspan_dir egress erspan_hwid 7 155 - fi 156 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 157 - ip netns exec at_ns0 ip link set dev $DEV_NS up 158 - 159 - # root namespace 160 - ip link add dev $DEV type $TYPE external 161 - ip addr add dev $DEV 10.1.1.200/24 162 - ip link set dev $DEV up 163 - } 164 - 165 - add_geneve_tunnel() 166 - { 167 - # at_ns0 namespace 168 - ip netns exec at_ns0 \ 169 - ip link add dev $DEV_NS type $TYPE \ 170 - id 2 dstport 6081 remote 172.16.1.200 171 - ip netns exec at_ns0 ip link set dev $DEV_NS up 172 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 173 - 174 - # root namespace 175 - ip link add dev $DEV type $TYPE dstport 6081 external 176 - ip link set dev $DEV up 177 - ip addr add dev $DEV 10.1.1.200/24 178 - } 179 - 180 - add_ip6geneve_tunnel() 181 - { 182 - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 183 - ip netns exec at_ns0 ip link set dev veth0 up 184 - ip addr add dev veth1 ::22/96 185 - ip link set dev veth1 up 186 - 187 - # at_ns0 namespace 188 - ip netns exec at_ns0 \ 189 - ip link add dev $DEV_NS type $TYPE id 22 \ 190 - remote ::22 # geneve has no local option 191 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 192 - ip netns exec at_ns0 ip link set dev $DEV_NS up 193 - 194 - # root namespace 195 - ip link add dev $DEV type $TYPE external 196 - ip addr add dev $DEV 10.1.1.200/24 197 - ip link set dev $DEV up 198 - } 199 - 200 - add_ipip_tunnel() 201 - { 202 - # at_ns0 namespace 203 - ip netns exec at_ns0 \ 204 - ip link add dev $DEV_NS type $TYPE \ 205 - local 172.16.1.100 remote 172.16.1.200 206 - ip netns exec at_ns0 ip link set dev $DEV_NS up 207 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 208 - 209 - # root namespace 210 - ip link add dev $DEV type $TYPE external 211 - ip link set dev $DEV up 212 - ip addr add dev $DEV 10.1.1.200/24 213 - } 214 - 215 - add_ip6tnl_tunnel() 216 - { 217 - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 218 - ip netns exec at_ns0 ip link set dev veth0 up 219 - ip addr add dev veth1 ::22/96 220 - ip link set dev veth1 up 221 - 222 - # at_ns0 namespace 223 - ip netns exec at_ns0 \ 224 - ip link add dev $DEV_NS type $TYPE \ 225 - local ::11 remote ::22 226 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 227 - ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96 228 - ip netns exec at_ns0 ip link set dev $DEV_NS up 229 - 230 - # root namespace 231 - ip link add dev $DEV type $TYPE external 232 - ip addr add dev $DEV 10.1.1.200/24 233 - ip addr add dev $DEV 1::22/96 234 - ip link set dev $DEV up 235 - } 236 - 237 - test_gre() 238 - { 239 - TYPE=gretap 240 - DEV_NS=gretap00 241 - DEV=gretap11 242 - ret=0 243 - 244 - check $TYPE 245 - config_device 246 - add_gre_tunnel 2 247 - attach_bpf $DEV gre_set_tunnel gre_get_tunnel 248 - ping $PING_ARG 10.1.1.100 249 - check_err $? 250 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 251 - check_err $? 252 - cleanup 253 - 254 - if [ $ret -ne 0 ]; then 255 - echo -e ${RED}"FAIL: $TYPE"${NC} 256 - return 1 257 - fi 258 - echo -e ${GREEN}"PASS: $TYPE"${NC} 259 - } 260 - 261 - test_gre_no_tunnel_key() 262 - { 263 - TYPE=gre 264 - DEV_NS=gre00 265 - DEV=gre11 266 - ret=0 267 - 268 - check $TYPE 269 - config_device 270 - add_gre_tunnel 271 - attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel 272 - ping $PING_ARG 10.1.1.100 273 - check_err $? 274 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 275 - check_err $? 276 - cleanup 277 - 278 - if [ $ret -ne 0 ]; then 279 - echo -e ${RED}"FAIL: $TYPE"${NC} 280 - return 1 281 - fi 282 - echo -e ${GREEN}"PASS: $TYPE"${NC} 283 - } 284 - 285 - test_ip6gre() 286 - { 287 - TYPE=ip6gre 288 - DEV_NS=ip6gre00 289 - DEV=ip6gre11 290 - ret=0 291 - 292 - check $TYPE 293 - config_device 294 - # reuse the ip6gretap function 295 - add_ip6gretap_tunnel 296 - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel 297 - # underlay 298 - ping6 $PING_ARG ::11 299 - # overlay: ipv4 over ipv6 300 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 301 - ping $PING_ARG 10.1.1.100 302 - check_err $? 303 - # overlay: ipv6 over ipv6 304 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 305 - check_err $? 306 - cleanup 307 - 308 - if [ $ret -ne 0 ]; then 309 - echo -e ${RED}"FAIL: $TYPE"${NC} 310 - return 1 311 - fi 312 - echo -e ${GREEN}"PASS: $TYPE"${NC} 313 - } 314 - 315 - test_ip6gretap() 316 - { 317 - TYPE=ip6gretap 318 - DEV_NS=ip6gretap00 319 - DEV=ip6gretap11 320 - ret=0 321 - 322 - check $TYPE 323 - config_device 324 - add_ip6gretap_tunnel 325 - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel 326 - # underlay 327 - ping6 $PING_ARG ::11 328 - # overlay: ipv4 over ipv6 329 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 330 - ping $PING_ARG 10.1.1.100 331 - check_err $? 332 - # overlay: ipv6 over ipv6 333 - ip netns exec at_ns0 ping6 $PING_ARG fc80::200 334 - check_err $? 335 - cleanup 336 - 337 - if [ $ret -ne 0 ]; then 338 - echo -e ${RED}"FAIL: $TYPE"${NC} 339 - return 1 340 - fi 341 - echo -e ${GREEN}"PASS: $TYPE"${NC} 342 - } 343 - 344 - test_erspan() 345 - { 346 - TYPE=erspan 347 - DEV_NS=erspan00 348 - DEV=erspan11 349 - ret=0 350 - 351 - check $TYPE 352 - config_device 353 - add_erspan_tunnel $1 354 - attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel 355 - ping $PING_ARG 10.1.1.100 356 - check_err $? 357 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 358 - check_err $? 359 - cleanup 360 - 361 - if [ $ret -ne 0 ]; then 362 - echo -e ${RED}"FAIL: $TYPE"${NC} 363 - return 1 364 - fi 365 - echo -e ${GREEN}"PASS: $TYPE"${NC} 366 - } 367 - 368 - test_ip6erspan() 369 - { 370 - TYPE=ip6erspan 371 - DEV_NS=ip6erspan00 372 - DEV=ip6erspan11 373 - ret=0 374 - 375 - check $TYPE 376 - config_device 377 - add_ip6erspan_tunnel $1 378 - attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel 379 - ping6 $PING_ARG ::11 380 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 381 - check_err $? 382 - cleanup 383 - 384 - if [ $ret -ne 0 ]; then 385 - echo -e ${RED}"FAIL: $TYPE"${NC} 386 - return 1 387 - fi 388 - echo -e ${GREEN}"PASS: $TYPE"${NC} 389 - } 390 - 391 - test_geneve() 392 - { 393 - TYPE=geneve 394 - DEV_NS=geneve00 395 - DEV=geneve11 396 - ret=0 397 - 398 - check $TYPE 399 - config_device 400 - add_geneve_tunnel 401 - attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel 402 - ping $PING_ARG 10.1.1.100 403 - check_err $? 404 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 405 - check_err $? 406 - cleanup 407 - 408 - if [ $ret -ne 0 ]; then 409 - echo -e ${RED}"FAIL: $TYPE"${NC} 410 - return 1 411 - fi 412 - echo -e ${GREEN}"PASS: $TYPE"${NC} 413 - } 414 - 415 - test_ip6geneve() 416 - { 417 - TYPE=geneve 418 - DEV_NS=ip6geneve00 419 - DEV=ip6geneve11 420 - ret=0 421 - 422 - check $TYPE 423 - config_device 424 - add_ip6geneve_tunnel 425 - attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel 426 - ping $PING_ARG 10.1.1.100 427 - check_err $? 428 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 429 - check_err $? 430 - cleanup 431 - 432 - if [ $ret -ne 0 ]; then 433 - echo -e ${RED}"FAIL: ip6$TYPE"${NC} 434 - return 1 435 - fi 436 - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} 437 - } 438 - 439 - test_ipip() 440 - { 441 - TYPE=ipip 442 - DEV_NS=ipip00 443 - DEV=ipip11 444 - ret=0 445 - 446 - check $TYPE 447 - config_device 448 - add_ipip_tunnel 449 - ip link set dev veth1 mtu 1500 450 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel 451 - ping $PING_ARG 10.1.1.100 452 - check_err $? 453 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 454 - check_err $? 455 - cleanup 456 - 457 - if [ $ret -ne 0 ]; then 458 - echo -e ${RED}"FAIL: $TYPE"${NC} 459 - return 1 460 - fi 461 - echo -e ${GREEN}"PASS: $TYPE"${NC} 462 - } 463 - 464 - test_ipip6() 465 - { 466 - TYPE=ip6tnl 467 - DEV_NS=ipip6tnl00 468 - DEV=ipip6tnl11 469 - ret=0 470 - 471 - check $TYPE 472 - config_device 473 - add_ip6tnl_tunnel 474 - ip link set dev veth1 mtu 1500 475 - attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel 476 - # underlay 477 - ping6 $PING_ARG ::11 478 - # ip4 over ip6 479 - ping $PING_ARG 10.1.1.100 480 - check_err $? 481 - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 482 - check_err $? 483 - cleanup 484 - 485 - if [ $ret -ne 0 ]; then 486 - echo -e ${RED}"FAIL: $TYPE"${NC} 487 - return 1 488 - fi 489 - echo -e ${GREEN}"PASS: $TYPE"${NC} 490 - } 491 - 492 - test_ip6ip6() 493 - { 494 - TYPE=ip6tnl 495 - DEV_NS=ip6ip6tnl00 496 - DEV=ip6ip6tnl11 497 - ret=0 498 - 499 - check $TYPE 500 - config_device 501 - add_ip6tnl_tunnel 502 - ip link set dev veth1 mtu 1500 503 - attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel 504 - # underlay 505 - ping6 $PING_ARG ::11 506 - # ip6 over ip6 507 - ping6 $PING_ARG 1::11 508 - check_err $? 509 - ip netns exec at_ns0 ping6 $PING_ARG 1::22 510 - check_err $? 511 - cleanup 512 - 513 - if [ $ret -ne 0 ]; then 514 - echo -e ${RED}"FAIL: ip6$TYPE"${NC} 515 - return 1 516 - fi 517 - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} 518 - } 519 - 520 - attach_bpf() 521 - { 522 - DEV=$1 523 - SET=$2 524 - GET=$3 525 - mkdir -p ${BPF_PIN_TUNNEL_DIR} 526 - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ 527 - tc qdisc add dev $DEV clsact 528 - tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET 529 - tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET 530 - } 531 - 532 - cleanup() 533 - { 534 - rm -rf ${BPF_PIN_TUNNEL_DIR} 535 - 536 - ip netns delete at_ns0 2> /dev/null 537 - ip link del veth1 2> /dev/null 538 - ip link del ipip11 2> /dev/null 539 - ip link del ipip6tnl11 2> /dev/null 540 - ip link del ip6ip6tnl11 2> /dev/null 541 - ip link del gretap11 2> /dev/null 542 - ip link del gre11 2> /dev/null 543 - ip link del ip6gre11 2> /dev/null 544 - ip link del ip6gretap11 2> /dev/null 545 - ip link del geneve11 2> /dev/null 546 - ip link del ip6geneve11 2> /dev/null 547 - ip link del erspan11 2> /dev/null 548 - ip link del ip6erspan11 2> /dev/null 549 - } 550 - 551 - cleanup_exit() 552 - { 553 - echo "CATCH SIGKILL or SIGINT, cleanup and exit" 554 - cleanup 555 - exit 0 556 - } 557 - 558 - check() 559 - { 560 - ip link help 2>&1 | grep -q "\s$1\s" 561 - if [ $? -ne 0 ];then 562 - echo "SKIP $1: iproute2 not support" 563 - cleanup 564 - return 1 565 - fi 566 - } 567 - 568 - enable_debug() 569 - { 570 - echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control 571 - echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control 572 - echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control 573 - echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control 574 - } 575 - 576 - check_err() 577 - { 578 - if [ $ret -eq 0 ]; then 579 - ret=$1 580 - fi 581 - } 582 - 583 - bpf_tunnel_test() 584 - { 585 - local errors=0 586 - 587 - echo "Testing GRE tunnel..." 588 - test_gre 589 - errors=$(( $errors + $? )) 590 - 591 - echo "Testing GRE tunnel (without tunnel keys)..." 592 - test_gre_no_tunnel_key 593 - errors=$(( $errors + $? )) 594 - 595 - echo "Testing IP6GRE tunnel..." 596 - test_ip6gre 597 - errors=$(( $errors + $? )) 598 - 599 - echo "Testing IP6GRETAP tunnel..." 600 - test_ip6gretap 601 - errors=$(( $errors + $? )) 602 - 603 - echo "Testing ERSPAN tunnel..." 604 - test_erspan v2 605 - errors=$(( $errors + $? )) 606 - 607 - echo "Testing IP6ERSPAN tunnel..." 608 - test_ip6erspan v2 609 - errors=$(( $errors + $? )) 610 - 611 - echo "Testing GENEVE tunnel..." 612 - test_geneve 613 - errors=$(( $errors + $? )) 614 - 615 - echo "Testing IP6GENEVE tunnel..." 616 - test_ip6geneve 617 - errors=$(( $errors + $? )) 618 - 619 - echo "Testing IPIP tunnel..." 620 - test_ipip 621 - errors=$(( $errors + $? )) 622 - 623 - echo "Testing IPIP6 tunnel..." 624 - test_ipip6 625 - errors=$(( $errors + $? )) 626 - 627 - echo "Testing IP6IP6 tunnel..." 628 - test_ip6ip6 629 - errors=$(( $errors + $? )) 630 - 631 - return $errors 632 - } 633 - 634 - trap cleanup 0 3 6 635 - trap cleanup_exit 2 9 636 - 637 - cleanup 638 - bpf_tunnel_test 639 - 640 - if [ $? -ne 0 ]; then 641 - echo -e "$(basename $0): ${RED}FAIL${NC}" 642 - exit 1 643 - fi 644 - echo -e "$(basename $0): ${GREEN}PASS${NC}" 645 - exit 0

-214

tools/testing/selftests/bpf/test_xdp_redirect_multi.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - # 4 - # Test topology: 5 - # - - - - - - - - - - - - - - - - - - - 6 - # | veth1 veth2 veth3 | ns0 7 - # - -| - - - - - - | - - - - - - | - - 8 - # --------- --------- --------- 9 - # | veth0 | | veth0 | | veth0 | 10 - # --------- --------- --------- 11 - # ns1 ns2 ns3 12 - # 13 - # Test modules: 14 - # XDP modes: generic, native, native + egress_prog 15 - # 16 - # Test cases: 17 - # ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive 18 - # the redirects. 19 - # ns1 -> gw: ns1, ns2, ns3, should receive the arp request 20 - # IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress 21 - # interface should not receive the redirects. 22 - # ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects. 23 - # IPv6: Testing none flag, all the pkts should be redirected back 24 - # ping test: ns1 -> ns2 (block), echo requests will be redirect back 25 - # egress_prog: 26 - # all src mac should be egress interface's mac 27 - 28 - # netns numbers 29 - NUM=3 30 - IFACES="" 31 - DRV_MODE="xdpgeneric xdpdrv xdpegress" 32 - PASS=0 33 - FAIL=0 34 - LOG_DIR=$(mktemp -d) 35 - declare -a NS 36 - NS[0]="ns0-$(mktemp -u XXXXXX)" 37 - NS[1]="ns1-$(mktemp -u XXXXXX)" 38 - NS[2]="ns2-$(mktemp -u XXXXXX)" 39 - NS[3]="ns3-$(mktemp -u XXXXXX)" 40 - 41 - test_pass() 42 - { 43 - echo "Pass: $@" 44 - PASS=$((PASS + 1)) 45 - } 46 - 47 - test_fail() 48 - { 49 - echo "fail: $@" 50 - FAIL=$((FAIL + 1)) 51 - } 52 - 53 - clean_up() 54 - { 55 - for i in $(seq 0 $NUM); do 56 - ip netns del ${NS[$i]} 2> /dev/null 57 - done 58 - } 59 - 60 - # Kselftest framework requirement - SKIP code is 4. 61 - check_env() 62 - { 63 - ip link set dev lo xdpgeneric off &>/dev/null 64 - if [ $? -ne 0 ];then 65 - echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" 66 - exit 4 67 - fi 68 - 69 - which tcpdump &>/dev/null 70 - if [ $? -ne 0 ];then 71 - echo "selftests: [SKIP] Could not run test without tcpdump" 72 - exit 4 73 - fi 74 - } 75 - 76 - setup_ns() 77 - { 78 - local mode=$1 79 - IFACES="" 80 - 81 - if [ "$mode" = "xdpegress" ]; then 82 - mode="xdpdrv" 83 - fi 84 - 85 - ip netns add ${NS[0]} 86 - for i in $(seq $NUM); do 87 - ip netns add ${NS[$i]} 88 - ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]} 89 - ip -n ${NS[$i]} link set veth0 up 90 - ip -n ${NS[0]} link set veth$i up 91 - 92 - ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0 93 - ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0 94 - # Add a neigh entry for IPv4 ping test 95 - ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 96 - ip -n ${NS[$i]} link set veth0 $mode obj \ 97 - xdp_dummy.bpf.o sec xdp &> /dev/null || \ 98 - { test_fail "Unable to load dummy xdp" && exit 1; } 99 - IFACES="$IFACES veth$i" 100 - veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}') 101 - done 102 - } 103 - 104 - do_egress_tests() 105 - { 106 - local mode=$1 107 - 108 - # mac test 109 - ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log & 110 - ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log & 111 - sleep 0.5 112 - ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null 113 - sleep 0.5 114 - pkill tcpdump 115 - 116 - # mac check 117 - grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \ 118 - test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2" 119 - grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \ 120 - test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3" 121 - } 122 - 123 - do_ping_tests() 124 - { 125 - local mode=$1 126 - 127 - # ping6 test: echo request should be redirect back to itself, not others 128 - ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 129 - 130 - ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log & 131 - ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log & 132 - ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log & 133 - sleep 0.5 134 - # ARP test 135 - ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254 136 - # IPv4 test 137 - ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null 138 - # IPv6 test 139 - ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null 140 - sleep 0.5 141 - pkill tcpdump 142 - 143 - # All netns should receive the redirect arp requests 144 - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ 145 - test_pass "$mode arp(F_BROADCAST) ns1-1" || \ 146 - test_fail "$mode arp(F_BROADCAST) ns1-1" 147 - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \ 148 - test_pass "$mode arp(F_BROADCAST) ns1-2" || \ 149 - test_fail "$mode arp(F_BROADCAST) ns1-2" 150 - [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \ 151 - test_pass "$mode arp(F_BROADCAST) ns1-3" || \ 152 - test_fail "$mode arp(F_BROADCAST) ns1-3" 153 - 154 - # ns1 should not receive the redirect echo request, others should 155 - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ 156 - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \ 157 - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" 158 - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \ 159 - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \ 160 - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" 161 - [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \ 162 - test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \ 163 - test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" 164 - 165 - # ns1 should receive the echo request, ns2 should not 166 - [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \ 167 - test_pass "$mode IPv6 (no flags) ns1-1" || \ 168 - test_fail "$mode IPv6 (no flags) ns1-1" 169 - [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \ 170 - test_pass "$mode IPv6 (no flags) ns1-2" || \ 171 - test_fail "$mode IPv6 (no flags) ns1-2" 172 - } 173 - 174 - do_tests() 175 - { 176 - local mode=$1 177 - local drv_p 178 - 179 - case ${mode} in 180 - xdpdrv) drv_p="-N";; 181 - xdpegress) drv_p="-X";; 182 - xdpgeneric) drv_p="-S";; 183 - esac 184 - 185 - ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log & 186 - xdp_pid=$! 187 - sleep 1 188 - if ! ps -p $xdp_pid > /dev/null; then 189 - test_fail "$mode xdp_redirect_multi start failed" 190 - return 1 191 - fi 192 - 193 - if [ "$mode" = "xdpegress" ]; then 194 - do_egress_tests $mode 195 - else 196 - do_ping_tests $mode 197 - fi 198 - 199 - kill $xdp_pid 200 - } 201 - 202 - check_env 203 - 204 - trap clean_up EXIT 205 - 206 - for mode in ${DRV_MODE}; do 207 - setup_ns $mode 208 - do_tests $mode 209 - clean_up 210 - done 211 - rm -rf ${LOG_DIR} 212 - 213 - echo "Summary: PASS $PASS, FAIL $FAIL" 214 - [ $FAIL -eq 0 ] && exit 0 || exit 1

-233

tools/testing/selftests/bpf/test_xdp_vlan.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - # Author: Jesper Dangaard Brouer <hawk@kernel.org> 4 - 5 - # Kselftest framework requirement - SKIP code is 4. 6 - readonly KSFT_SKIP=4 7 - readonly NS1="ns1-$(mktemp -u XXXXXX)" 8 - readonly NS2="ns2-$(mktemp -u XXXXXX)" 9 - 10 - # Allow wrapper scripts to name test 11 - if [ -z "$TESTNAME" ]; then 12 - TESTNAME=xdp_vlan 13 - fi 14 - 15 - # Default XDP mode 16 - XDP_MODE=xdpgeneric 17 - 18 - usage() { 19 - echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME" 20 - echo "" 21 - echo "Usage: $0 [-vfh]" 22 - echo " -v | --verbose : Verbose" 23 - echo " --flush : Flush before starting (e.g. after --interactive)" 24 - echo " --interactive : Keep netns setup running after test-run" 25 - echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)" 26 - echo "" 27 - } 28 - 29 - valid_xdp_mode() 30 - { 31 - local mode=$1 32 - 33 - case "$mode" in 34 - xdpgeneric | xdpdrv | xdp) 35 - return 0 36 - ;; 37 - *) 38 - return 1 39 - esac 40 - } 41 - 42 - cleanup() 43 - { 44 - local status=$? 45 - 46 - if [ "$status" = "0" ]; then 47 - echo "selftests: $TESTNAME [PASS]"; 48 - else 49 - echo "selftests: $TESTNAME [FAILED]"; 50 - fi 51 - 52 - if [ -n "$INTERACTIVE" ]; then 53 - echo "Namespace setup still active explore with:" 54 - echo " ip netns exec ${NS1} bash" 55 - echo " ip netns exec ${NS2} bash" 56 - exit $status 57 - fi 58 - 59 - set +e 60 - ip link del veth1 2> /dev/null 61 - ip netns del ${NS1} 2> /dev/null 62 - ip netns del ${NS2} 2> /dev/null 63 - } 64 - 65 - # Using external program "getopt" to get --long-options 66 - OPTIONS=$(getopt -o hvfi: \ 67 - --long verbose,flush,help,interactive,debug,mode: -- "$@") 68 - if (( $? != 0 )); then 69 - usage 70 - echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?" 71 - exit 2 72 - fi 73 - eval set -- "$OPTIONS" 74 - 75 - ## --- Parse command line arguments / parameters --- 76 - while true; do 77 - case "$1" in 78 - -v | --verbose) 79 - export VERBOSE=yes 80 - shift 81 - ;; 82 - -i | --interactive | --debug ) 83 - INTERACTIVE=yes 84 - shift 85 - ;; 86 - -f | --flush ) 87 - cleanup 88 - shift 89 - ;; 90 - --mode ) 91 - shift 92 - XDP_MODE=$1 93 - shift 94 - ;; 95 - -- ) 96 - shift 97 - break 98 - ;; 99 - -h | --help ) 100 - usage; 101 - echo "selftests: $TESTNAME [SKIP] usage help info requested" 102 - exit $KSFT_SKIP 103 - ;; 104 - * ) 105 - shift 106 - break 107 - ;; 108 - esac 109 - done 110 - 111 - if [ "$EUID" -ne 0 ]; then 112 - echo "selftests: $TESTNAME [FAILED] need root privileges" 113 - exit 1 114 - fi 115 - 116 - valid_xdp_mode $XDP_MODE 117 - if [ $? -ne 0 ]; then 118 - echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)" 119 - exit 1 120 - fi 121 - 122 - ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null 123 - if [ $? -ne 0 ]; then 124 - echo "selftests: $TESTNAME [SKIP] need ip xdp support" 125 - exit $KSFT_SKIP 126 - fi 127 - 128 - # Interactive mode likely require us to cleanup netns 129 - if [ -n "$INTERACTIVE" ]; then 130 - ip link del veth1 2> /dev/null 131 - ip netns del ${NS1} 2> /dev/null 132 - ip netns del ${NS2} 2> /dev/null 133 - fi 134 - 135 - # Exit on failure 136 - set -e 137 - 138 - # Some shell-tools dependencies 139 - which ip > /dev/null 140 - which tc > /dev/null 141 - which ethtool > /dev/null 142 - 143 - # Make rest of shell verbose, showing comments as doc/info 144 - if [ -n "$VERBOSE" ]; then 145 - set -v 146 - fi 147 - 148 - # Create two namespaces 149 - ip netns add ${NS1} 150 - ip netns add ${NS2} 151 - 152 - # Run cleanup if failing or on kill 153 - trap cleanup 0 2 3 6 9 154 - 155 - # Create veth pair 156 - ip link add veth1 type veth peer name veth2 157 - 158 - # Move veth1 and veth2 into the respective namespaces 159 - ip link set veth1 netns ${NS1} 160 - ip link set veth2 netns ${NS2} 161 - 162 - # NOTICE: XDP require VLAN header inside packet payload 163 - # - Thus, disable VLAN offloading driver features 164 - # - For veth REMEMBER TX side VLAN-offload 165 - # 166 - # Disable rx-vlan-offload (mostly needed on ns1) 167 - ip netns exec ${NS1} ethtool -K veth1 rxvlan off 168 - ip netns exec ${NS2} ethtool -K veth2 rxvlan off 169 - # 170 - # Disable tx-vlan-offload (mostly needed on ns2) 171 - ip netns exec ${NS2} ethtool -K veth2 txvlan off 172 - ip netns exec ${NS1} ethtool -K veth1 txvlan off 173 - 174 - export IPADDR1=100.64.41.1 175 - export IPADDR2=100.64.41.2 176 - 177 - # In ns1/veth1 add IP-addr on plain net_device 178 - ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1 179 - ip netns exec ${NS1} ip link set veth1 up 180 - 181 - # In ns2/veth2 create VLAN device 182 - export VLAN=4011 183 - export DEVNS2=veth2 184 - ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN 185 - ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN 186 - ip netns exec ${NS2} ip link set $DEVNS2 up 187 - ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up 188 - 189 - # Bringup lo in netns (to avoids confusing people using --interactive) 190 - ip netns exec ${NS1} ip link set lo up 191 - ip netns exec ${NS2} ip link set lo up 192 - 193 - # At this point, the hosts cannot reach each-other, 194 - # because ns2 are using VLAN tags on the packets. 195 - 196 - ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"' 197 - 198 - 199 - # Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags 200 - # ---------------------------------------------------------------------- 201 - # In ns1: ingress use XDP to remove VLAN tags 202 - export DEVNS1=veth1 203 - export BPF_FILE=test_xdp_vlan.bpf.o 204 - 205 - # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" 206 - export XDP_PROG=xdp_vlan_change 207 - ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG 208 - 209 - # In ns1: egress use TC to add back VLAN tag 4011 210 - # (del cmd) 211 - # tc qdisc del dev $DEVNS1 clsact 2> /dev/null 212 - # 213 - ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact 214 - ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ 215 - prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push 216 - 217 - # Now the namespaces can reach each-other, test with ping: 218 - ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 219 - ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 220 - 221 - # Second test: Replace xdp prog, that fully remove vlan header 222 - # 223 - # Catch kernel bug for generic-XDP, that does didn't allow us to 224 - # remove a VLAN header, because skb->protocol still contain VLAN 225 - # ETH_P_8021Q indication, and this cause overwriting of our changes. 226 - # 227 - export XDP_PROG=xdp_vlan_remove_outer2 228 - ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off 229 - ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG 230 - 231 - # Now the namespaces should still be able reach each-other, test with ping: 232 - ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 233 - ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2

-9

tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - # Exit on failure 5 - set -e 6 - 7 - # Wrapper script to test generic-XDP 8 - export TESTNAME=xdp_vlan_mode_generic 9 - ./test_xdp_vlan.sh --mode=xdpgeneric

-9

tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - # Exit on failure 5 - set -e 6 - 7 - # Wrapper script to test native-XDP 8 - export TESTNAME=xdp_vlan_mode_native 9 - ./test_xdp_vlan.sh --mode=xdpdrv

+355 -12

tools/testing/selftests/bpf/veristat.c

··· 3 3 #define _GNU_SOURCE 4 4 #include <argp.h> 5 5 #include <libgen.h> 6 + #include <ctype.h> 6 7 #include <string.h> 7 8 #include <stdlib.h> 8 9 #include <sched.h> ··· 155 154 bool abs; 156 155 }; 157 156 157 + struct var_preset { 158 + char *name; 159 + enum { INTEGRAL, ENUMERATOR } type; 160 + union { 161 + long long ivalue; 162 + char *svalue; 163 + }; 164 + bool applied; 165 + }; 166 + 158 167 static struct env { 159 168 char **filenames; 160 169 int filename_cnt; ··· 206 195 int progs_processed; 207 196 int progs_skipped; 208 197 int top_src_lines; 198 + struct var_preset *presets; 199 + int npresets; 209 200 } env; 210 201 211 202 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) ··· 259 246 { "test-reg-invariants", 'r', NULL, 0, 260 247 "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, 261 248 { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, 249 + { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, 262 250 {}, 263 251 }; 264 252 265 253 static int parse_stats(const char *stats_str, struct stat_specs *specs); 266 254 static int append_filter(struct filter **filters, int *cnt, const char *str); 267 255 static int append_filter_file(const char *path); 256 + static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr); 257 + static int append_var_preset_file(const char *filename); 258 + static int append_file(const char *path); 259 + static int append_file_from_file(const char *path); 268 260 269 261 static error_t parse_arg(int key, char *arg, struct argp_state *state) 270 262 { 271 - void *tmp; 272 263 int err; 273 264 274 265 switch (key) { ··· 370 353 argp_usage(state); 371 354 } 372 355 break; 356 + case 'G': { 357 + if (arg[0] == '@') 358 + err = append_var_preset_file(arg + 1); 359 + else 360 + err = append_var_preset(&env.presets, &env.npresets, arg); 361 + if (err) { 362 + fprintf(stderr, "Failed to parse global variable presets: %s\n", arg); 363 + return err; 364 + } 365 + break; 366 + } 373 367 case ARGP_KEY_ARG: 374 - tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); 375 - if (!tmp) 376 - return -ENOMEM; 377 - env.filenames = tmp; 378 - env.filenames[env.filename_cnt] = strdup(arg); 379 - if (!env.filenames[env.filename_cnt]) 380 - return -ENOMEM; 381 - env.filename_cnt++; 368 + if (arg[0] == '@') 369 + err = append_file_from_file(arg + 1); 370 + else 371 + err = append_file(arg); 372 + if (err) { 373 + fprintf(stderr, "Failed to collect BPF object files: %d\n", err); 374 + return err; 375 + } 382 376 break; 383 377 default: 384 378 return ARGP_ERR_UNKNOWN; ··· 660 632 f = fopen(path, "r"); 661 633 if (!f) { 662 634 err = -errno; 663 - fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err); 635 + fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err)); 664 636 return err; 665 637 } 666 638 ··· 689 661 TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE 690 662 }, 691 663 }; 664 + 665 + static int append_file(const char *path) 666 + { 667 + void *tmp; 668 + 669 + tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); 670 + if (!tmp) 671 + return -ENOMEM; 672 + env.filenames = tmp; 673 + env.filenames[env.filename_cnt] = strdup(path); 674 + if (!env.filenames[env.filename_cnt]) 675 + return -ENOMEM; 676 + env.filename_cnt++; 677 + return 0; 678 + } 679 + 680 + static int append_file_from_file(const char *path) 681 + { 682 + char buf[1024]; 683 + int err = 0; 684 + FILE *f; 685 + 686 + f = fopen(path, "r"); 687 + if (!f) { 688 + err = -errno; 689 + fprintf(stderr, "Failed to open object files list in '%s': %s\n", 690 + path, strerror(errno)); 691 + return err; 692 + } 693 + 694 + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { 695 + /* lines starting with # are comments, skip them */ 696 + if (buf[0] == '\0' || buf[0] == '#') 697 + continue; 698 + err = append_file(buf); 699 + if (err) 700 + goto cleanup; 701 + } 702 + 703 + cleanup: 704 + fclose(f); 705 + return err; 706 + } 692 707 693 708 static const struct stat_specs default_csv_output_spec = { 694 709 .spec_cnt = 14, ··· 1234 1163 bpf_program__set_expected_attach_type(prog, attach_type); 1235 1164 1236 1165 if (!env.quiet) { 1237 - printf("Using guessed program type '%s' for %s/%s...\n", 1166 + fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n", 1238 1167 libbpf_bpf_prog_type_str(prog_type), 1239 1168 filename, prog_name); 1240 1169 } 1241 1170 } else { 1242 1171 if (!env.quiet) { 1243 - printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", 1172 + fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n", 1244 1173 ctx_name, filename, prog_name); 1245 1174 } 1246 1175 } ··· 1363 1292 return 0; 1364 1293 }; 1365 1294 1295 + static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) 1296 + { 1297 + void *tmp; 1298 + struct var_preset *cur; 1299 + char var[256], val[256], *val_end; 1300 + long long value; 1301 + int n; 1302 + 1303 + tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); 1304 + if (!tmp) 1305 + return -ENOMEM; 1306 + *presets = tmp; 1307 + cur = &(*presets)[*cnt]; 1308 + memset(cur, 0, sizeof(*cur)); 1309 + (*cnt)++; 1310 + 1311 + if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { 1312 + fprintf(stderr, "Failed to parse expression '%s'\n", expr); 1313 + return -EINVAL; 1314 + } 1315 + 1316 + if (val[0] == '-' || isdigit(val[0])) { 1317 + /* must be a number */ 1318 + errno = 0; 1319 + value = strtoll(val, &val_end, 0); 1320 + if (errno == ERANGE) { 1321 + errno = 0; 1322 + value = strtoull(val, &val_end, 0); 1323 + } 1324 + if (errno || *val_end != '\0') { 1325 + fprintf(stderr, "Failed to parse value '%s'\n", val); 1326 + return -EINVAL; 1327 + } 1328 + cur->ivalue = value; 1329 + cur->type = INTEGRAL; 1330 + } else { 1331 + /* if not a number, consider it enum value */ 1332 + cur->svalue = strdup(val); 1333 + if (!cur->svalue) 1334 + return -ENOMEM; 1335 + cur->type = ENUMERATOR; 1336 + } 1337 + 1338 + cur->name = strdup(var); 1339 + if (!cur->name) 1340 + return -ENOMEM; 1341 + 1342 + return 0; 1343 + } 1344 + 1345 + static int append_var_preset_file(const char *filename) 1346 + { 1347 + char buf[1024]; 1348 + FILE *f; 1349 + int err = 0; 1350 + 1351 + f = fopen(filename, "rt"); 1352 + if (!f) { 1353 + err = -errno; 1354 + fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err)); 1355 + return -EINVAL; 1356 + } 1357 + 1358 + while (fscanf(f, " %1023[^\n]\n", buf) == 1) { 1359 + if (buf[0] == '\0' || buf[0] == '#') 1360 + continue; 1361 + 1362 + err = append_var_preset(&env.presets, &env.npresets, buf); 1363 + if (err) 1364 + goto cleanup; 1365 + } 1366 + 1367 + cleanup: 1368 + fclose(f); 1369 + return err; 1370 + } 1371 + 1372 + static bool is_signed_type(const struct btf_type *t) 1373 + { 1374 + if (btf_is_int(t)) 1375 + return btf_int_encoding(t) & BTF_INT_SIGNED; 1376 + if (btf_is_any_enum(t)) 1377 + return btf_kflag(t); 1378 + return true; 1379 + } 1380 + 1381 + static int enum_value_from_name(const struct btf *btf, const struct btf_type *t, 1382 + const char *evalue, long long *retval) 1383 + { 1384 + if (btf_is_enum(t)) { 1385 + struct btf_enum *e = btf_enum(t); 1386 + int i, n = btf_vlen(t); 1387 + 1388 + for (i = 0; i < n; ++i, ++e) { 1389 + const char *cur_name = btf__name_by_offset(btf, e->name_off); 1390 + 1391 + if (strcmp(cur_name, evalue) == 0) { 1392 + *retval = e->val; 1393 + return 0; 1394 + } 1395 + } 1396 + } else if (btf_is_enum64(t)) { 1397 + struct btf_enum64 *e = btf_enum64(t); 1398 + int i, n = btf_vlen(t); 1399 + 1400 + for (i = 0; i < n; ++i, ++e) { 1401 + const char *cur_name = btf__name_by_offset(btf, e->name_off); 1402 + __u64 value = btf_enum64_value(e); 1403 + 1404 + if (strcmp(cur_name, evalue) == 0) { 1405 + *retval = value; 1406 + return 0; 1407 + } 1408 + } 1409 + } 1410 + return -EINVAL; 1411 + } 1412 + 1413 + static bool is_preset_supported(const struct btf_type *t) 1414 + { 1415 + return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); 1416 + } 1417 + 1418 + static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, 1419 + struct bpf_map *map, struct btf_var_secinfo *sinfo, 1420 + struct var_preset *preset) 1421 + { 1422 + const struct btf_type *base_type; 1423 + void *ptr; 1424 + long long value = preset->ivalue; 1425 + size_t size; 1426 + 1427 + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); 1428 + if (!base_type) { 1429 + fprintf(stderr, "Failed to resolve type %d\n", t->type); 1430 + return -EINVAL; 1431 + } 1432 + if (!is_preset_supported(base_type)) { 1433 + fprintf(stderr, "Setting value for type %s is not supported\n", 1434 + btf__name_by_offset(btf, base_type->name_off)); 1435 + return -EINVAL; 1436 + } 1437 + 1438 + if (preset->type == ENUMERATOR) { 1439 + if (btf_is_any_enum(base_type)) { 1440 + if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { 1441 + fprintf(stderr, 1442 + "Failed to find integer value for enum element %s\n", 1443 + preset->svalue); 1444 + return -EINVAL; 1445 + } 1446 + } else { 1447 + fprintf(stderr, "Value %s is not supported for type %s\n", 1448 + preset->svalue, btf__name_by_offset(btf, base_type->name_off)); 1449 + return -EINVAL; 1450 + } 1451 + } 1452 + 1453 + /* Check if value fits into the target variable size */ 1454 + if (sinfo->size < sizeof(value)) { 1455 + bool is_signed = is_signed_type(base_type); 1456 + __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0); 1457 + long long max_val = 1ll << unsigned_bits; 1458 + 1459 + if (value >= max_val || value < -max_val) { 1460 + fprintf(stderr, 1461 + "Variable %s value %lld is out of range [%lld; %lld]\n", 1462 + btf__name_by_offset(btf, t->name_off), value, 1463 + is_signed ? -max_val : 0, max_val - 1); 1464 + return -EINVAL; 1465 + } 1466 + } 1467 + 1468 + ptr = bpf_map__initial_value(map, &size); 1469 + if (!ptr || sinfo->offset + sinfo->size > size) 1470 + return -EINVAL; 1471 + 1472 + if (__BYTE_ORDER == __LITTLE_ENDIAN) { 1473 + memcpy(ptr + sinfo->offset, &value, sinfo->size); 1474 + } else { /* __BYTE_ORDER == __BIG_ENDIAN */ 1475 + __u8 src_offset = sizeof(value) - sinfo->size; 1476 + 1477 + memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size); 1478 + } 1479 + return 0; 1480 + } 1481 + 1482 + static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets) 1483 + { 1484 + struct btf_var_secinfo *sinfo; 1485 + const char *sec_name; 1486 + const struct btf_type *t; 1487 + struct bpf_map *map; 1488 + struct btf *btf; 1489 + int i, j, k, n, cnt, err = 0; 1490 + 1491 + if (npresets == 0) 1492 + return 0; 1493 + 1494 + btf = bpf_object__btf(obj); 1495 + if (!btf) 1496 + return -EINVAL; 1497 + 1498 + cnt = btf__type_cnt(btf); 1499 + for (i = 1; i != cnt; ++i) { 1500 + t = btf__type_by_id(btf, i); 1501 + 1502 + if (!btf_is_datasec(t)) 1503 + continue; 1504 + 1505 + sinfo = btf_var_secinfos(t); 1506 + sec_name = btf__name_by_offset(btf, t->name_off); 1507 + map = bpf_object__find_map_by_name(obj, sec_name); 1508 + if (!map) 1509 + continue; 1510 + 1511 + n = btf_vlen(t); 1512 + for (j = 0; j < n; ++j, ++sinfo) { 1513 + const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); 1514 + const char *var_name; 1515 + 1516 + if (!btf_is_var(var_type)) 1517 + continue; 1518 + 1519 + var_name = btf__name_by_offset(btf, var_type->name_off); 1520 + 1521 + for (k = 0; k < npresets; ++k) { 1522 + if (strcmp(var_name, presets[k].name) != 0) 1523 + continue; 1524 + 1525 + if (presets[k].applied) { 1526 + fprintf(stderr, "Variable %s is set more than once", 1527 + var_name); 1528 + return -EINVAL; 1529 + } 1530 + 1531 + err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); 1532 + if (err) 1533 + return err; 1534 + 1535 + presets[k].applied = true; 1536 + break; 1537 + } 1538 + } 1539 + } 1540 + for (i = 0; i < npresets; ++i) { 1541 + if (!presets[i].applied) { 1542 + fprintf(stderr, "Global variable preset %s has not been applied\n", 1543 + presets[i].name); 1544 + } 1545 + presets[i].applied = false; 1546 + } 1547 + return err; 1548 + } 1549 + 1366 1550 static int process_obj(const char *filename) 1367 1551 { 1368 1552 const char *base_filename = basename(strdupa(filename)); ··· 1667 1341 if (prog_cnt == 1) { 1668 1342 prog = bpf_object__next_program(obj, NULL); 1669 1343 bpf_program__set_autoload(prog, true); 1344 + err = set_global_vars(obj, env.presets, env.npresets); 1345 + if (err) { 1346 + fprintf(stderr, "Failed to set global variables %d\n", err); 1347 + goto cleanup; 1348 + } 1670 1349 process_prog(filename, obj, prog); 1671 1350 goto cleanup; 1672 1351 } ··· 1683 1352 if (!tobj) { 1684 1353 err = -errno; 1685 1354 fprintf(stderr, "Failed to open '%s': %d\n", filename, err); 1355 + goto cleanup; 1356 + } 1357 + 1358 + err = set_global_vars(tobj, env.presets, env.npresets); 1359 + if (err) { 1360 + fprintf(stderr, "Failed to set global variables %d\n", err); 1686 1361 goto cleanup; 1687 1362 } 1688 1363 ··· 2797 2460 free(env.deny_filters[i].prog_glob); 2798 2461 } 2799 2462 free(env.deny_filters); 2463 + for (i = 0; i < env.npresets; ++i) { 2464 + free(env.presets[i].name); 2465 + if (env.presets[i].type == ENUMERATOR) 2466 + free(env.presets[i].svalue); 2467 + } 2468 + free(env.presets); 2800 2469 return -err; 2801 2470 }

-54

tools/testing/selftests/bpf/with_addr.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - # 4 - # add private ipv4 and ipv6 addresses to loopback 5 - 6 - readonly V6_INNER='100::a/128' 7 - readonly V4_INNER='192.168.0.1/32' 8 - 9 - if getopts ":s" opt; then 10 - readonly SIT_DEV_NAME='sixtofourtest0' 11 - readonly V6_SIT='2::/64' 12 - readonly V4_SIT='172.17.0.1/32' 13 - shift 14 - fi 15 - 16 - fail() { 17 - echo "error: $*" 1>&2 18 - exit 1 19 - } 20 - 21 - setup() { 22 - ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' 23 - ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' 24 - 25 - if [[ -n "${V6_SIT}" ]]; then 26 - ip link add "${SIT_DEV_NAME}" type sit remote any local any \ 27 - || fail 'failed to add sit' 28 - ip link set dev "${SIT_DEV_NAME}" up \ 29 - || fail 'failed to bring sit device up' 30 - ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ 31 - || fail 'failed to setup v6 SIT address' 32 - ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ 33 - || fail 'failed to setup v4 SIT address' 34 - fi 35 - 36 - sleep 2 # avoid race causing bind to fail 37 - } 38 - 39 - cleanup() { 40 - if [[ -n "${V6_SIT}" ]]; then 41 - ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" 42 - ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" 43 - ip link del "${SIT_DEV_NAME}" 44 - fi 45 - 46 - ip -4 addr del "${V4_INNER}" dev lo 47 - ip -6 addr del "${V6_INNER}" dev lo 48 - } 49 - 50 - trap cleanup EXIT 51 - 52 - setup 53 - "$@" 54 - exit "$?"

-36

tools/testing/selftests/bpf/with_tunnels.sh

··· 1 - #!/bin/bash 2 - # SPDX-License-Identifier: GPL-2.0 3 - # 4 - # setup tunnels for flow dissection test 5 - 6 - readonly SUFFIX="test_$(mktemp -u XXXX)" 7 - CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" 8 - 9 - setup() { 10 - ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} 11 - ip link add "gre_${SUFFIX}" type gre ${CONFIG} 12 - ip link add "sit_${SUFFIX}" type sit ${CONFIG} 13 - 14 - echo "tunnels before test:" 15 - ip tunnel show 16 - 17 - ip link set "ipip_${SUFFIX}" up 18 - ip link set "gre_${SUFFIX}" up 19 - ip link set "sit_${SUFFIX}" up 20 - } 21 - 22 - 23 - cleanup() { 24 - ip tunnel del "ipip_${SUFFIX}" 25 - ip tunnel del "gre_${SUFFIX}" 26 - ip tunnel del "sit_${SUFFIX}" 27 - 28 - echo "tunnels after test:" 29 - ip tunnel show 30 - } 31 - 32 - trap cleanup EXIT 33 - 34 - setup 35 - "$@" 36 - exit "$?"

-226

tools/testing/selftests/bpf/xdp_redirect_multi.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/bpf.h> 3 - #include <linux/if_link.h> 4 - #include <assert.h> 5 - #include <errno.h> 6 - #include <signal.h> 7 - #include <stdio.h> 8 - #include <stdlib.h> 9 - #include <string.h> 10 - #include <net/if.h> 11 - #include <unistd.h> 12 - #include <libgen.h> 13 - #include <sys/ioctl.h> 14 - #include <sys/types.h> 15 - #include <sys/socket.h> 16 - #include <netinet/in.h> 17 - 18 - #include "bpf_util.h" 19 - #include <bpf/bpf.h> 20 - #include <bpf/libbpf.h> 21 - 22 - #define MAX_IFACE_NUM 32 23 - #define MAX_INDEX_NUM 1024 24 - 25 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 26 - static int ifaces[MAX_IFACE_NUM] = {}; 27 - 28 - static void int_exit(int sig) 29 - { 30 - __u32 prog_id = 0; 31 - int i; 32 - 33 - for (i = 0; ifaces[i] > 0; i++) { 34 - if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) { 35 - printf("bpf_xdp_query_id failed\n"); 36 - exit(1); 37 - } 38 - if (prog_id) 39 - bpf_xdp_detach(ifaces[i], xdp_flags, NULL); 40 - } 41 - 42 - exit(0); 43 - } 44 - 45 - static int get_mac_addr(unsigned int ifindex, void *mac_addr) 46 - { 47 - char ifname[IF_NAMESIZE]; 48 - struct ifreq ifr; 49 - int fd, ret = -1; 50 - 51 - fd = socket(AF_INET, SOCK_DGRAM, 0); 52 - if (fd < 0) 53 - return ret; 54 - 55 - if (!if_indextoname(ifindex, ifname)) 56 - goto err_out; 57 - 58 - strcpy(ifr.ifr_name, ifname); 59 - 60 - if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) 61 - goto err_out; 62 - 63 - memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); 64 - ret = 0; 65 - 66 - err_out: 67 - close(fd); 68 - return ret; 69 - } 70 - 71 - static void usage(const char *prog) 72 - { 73 - fprintf(stderr, 74 - "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n" 75 - "OPTS:\n" 76 - " -S use skb-mode\n" 77 - " -N enforce native mode\n" 78 - " -F force loading prog\n" 79 - " -X load xdp program on egress\n", 80 - prog); 81 - } 82 - 83 - int main(int argc, char **argv) 84 - { 85 - int prog_fd, group_all, mac_map; 86 - struct bpf_program *ingress_prog, *egress_prog; 87 - int i, err, ret, opt, egress_prog_fd = 0; 88 - struct bpf_devmap_val devmap_val; 89 - bool attach_egress_prog = false; 90 - unsigned char mac_addr[6]; 91 - char ifname[IF_NAMESIZE]; 92 - struct bpf_object *obj; 93 - unsigned int ifindex; 94 - char filename[256]; 95 - 96 - while ((opt = getopt(argc, argv, "SNFX")) != -1) { 97 - switch (opt) { 98 - case 'S': 99 - xdp_flags |= XDP_FLAGS_SKB_MODE; 100 - break; 101 - case 'N': 102 - /* default, set below */ 103 - break; 104 - case 'F': 105 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 106 - break; 107 - case 'X': 108 - attach_egress_prog = true; 109 - break; 110 - default: 111 - usage(basename(argv[0])); 112 - return 1; 113 - } 114 - } 115 - 116 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { 117 - xdp_flags |= XDP_FLAGS_DRV_MODE; 118 - } else if (attach_egress_prog) { 119 - printf("Load xdp program on egress with SKB mode not supported yet\n"); 120 - goto err_out; 121 - } 122 - 123 - if (optind == argc) { 124 - printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]); 125 - goto err_out; 126 - } 127 - 128 - printf("Get interfaces:"); 129 - for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { 130 - ifaces[i] = if_nametoindex(argv[optind + i]); 131 - if (!ifaces[i]) 132 - ifaces[i] = strtoul(argv[optind + i], NULL, 0); 133 - if (!if_indextoname(ifaces[i], ifname)) { 134 - perror("Invalid interface name or i"); 135 - goto err_out; 136 - } 137 - if (ifaces[i] > MAX_INDEX_NUM) { 138 - printf(" interface index too large\n"); 139 - goto err_out; 140 - } 141 - printf(" %d", ifaces[i]); 142 - } 143 - printf("\n"); 144 - 145 - snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]); 146 - obj = bpf_object__open_file(filename, NULL); 147 - err = libbpf_get_error(obj); 148 - if (err) 149 - goto err_out; 150 - err = bpf_object__load(obj); 151 - if (err) 152 - goto err_out; 153 - prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL)); 154 - 155 - if (attach_egress_prog) 156 - group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); 157 - else 158 - group_all = bpf_object__find_map_fd_by_name(obj, "map_all"); 159 - mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map"); 160 - 161 - if (group_all < 0 || mac_map < 0) { 162 - printf("bpf_object__find_map_fd_by_name failed\n"); 163 - goto err_out; 164 - } 165 - 166 - if (attach_egress_prog) { 167 - /* Find ingress/egress prog for 2nd xdp prog */ 168 - ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog"); 169 - egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog"); 170 - if (!ingress_prog || !egress_prog) { 171 - printf("finding ingress/egress_prog in obj file failed\n"); 172 - goto err_out; 173 - } 174 - prog_fd = bpf_program__fd(ingress_prog); 175 - egress_prog_fd = bpf_program__fd(egress_prog); 176 - if (prog_fd < 0 || egress_prog_fd < 0) { 177 - printf("find egress_prog fd failed\n"); 178 - goto err_out; 179 - } 180 - } 181 - 182 - signal(SIGINT, int_exit); 183 - signal(SIGTERM, int_exit); 184 - 185 - /* Init forward multicast groups and exclude group */ 186 - for (i = 0; ifaces[i] > 0; i++) { 187 - ifindex = ifaces[i]; 188 - 189 - if (attach_egress_prog) { 190 - ret = get_mac_addr(ifindex, mac_addr); 191 - if (ret < 0) { 192 - printf("get interface %d mac failed\n", ifindex); 193 - goto err_out; 194 - } 195 - ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0); 196 - if (ret) { 197 - perror("bpf_update_elem mac_map failed\n"); 198 - goto err_out; 199 - } 200 - } 201 - 202 - /* Add all the interfaces to group all */ 203 - devmap_val.ifindex = ifindex; 204 - devmap_val.bpf_prog.fd = egress_prog_fd; 205 - ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0); 206 - if (ret) { 207 - perror("bpf_map_update_elem"); 208 - goto err_out; 209 - } 210 - 211 - /* bind prog_fd to each interface */ 212 - ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL); 213 - if (ret) { 214 - printf("Set xdp fd failed on %d\n", ifindex); 215 - goto err_out; 216 - } 217 - } 218 - 219 - /* sleep some time for testing */ 220 - sleep(999); 221 - 222 - return 0; 223 - 224 - err_out: 225 - return 1; 226 - }