Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+1 -2

Documentation/bpf/libbpf/index.rst

··· 6 6 .. toctree:: 7 7 :maxdepth: 1 8 8 9 + API Documentation <https://libbpf.readthedocs.io/en/latest/api.html> 9 10 libbpf_naming_convention 10 11 libbpf_build 11 12 12 13 This is documentation for libbpf, a userspace library for loading and 13 14 interacting with bpf programs. 14 - 15 - For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_. 16 15 17 16 All general BPF questions, including kernel functionality, libbpf APIs and 18 17 their application, should be sent to bpf@vger.kernel.org mailing list.

+67

arch/riscv/net/bpf_jit.h

··· 535 535 return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 536 536 } 537 537 538 + static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 539 + { 540 + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f); 541 + } 542 + 543 + static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 544 + { 545 + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f); 546 + } 547 + 548 + static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 549 + { 550 + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f); 551 + } 552 + 553 + static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 554 + { 555 + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f); 556 + } 557 + 558 + static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 559 + { 560 + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f); 561 + } 562 + 563 + static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 564 + { 565 + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f); 566 + } 567 + 568 + static inline u32 rv_fence(u8 pred, u8 succ) 569 + { 570 + u16 imm11_0 = pred << 4 | succ; 571 + 572 + return rv_i_insn(imm11_0, 0, 0, 0, 0xf); 573 + } 574 + 538 575 /* RVC instrutions. */ 539 576 540 577 static inline u16 rvc_addi4spn(u8 rd, u32 imm10) ··· 788 751 static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 789 752 { 790 753 return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 754 + } 755 + 756 + static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 757 + { 758 + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f); 759 + } 760 + 761 + static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 762 + { 763 + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f); 764 + } 765 + 766 + static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 767 + { 768 + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f); 769 + } 770 + 771 + static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 772 + { 773 + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f); 774 + } 775 + 776 + static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 777 + { 778 + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f); 779 + } 780 + 781 + static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 782 + { 783 + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f); 791 784 } 792 785 793 786 /* RV64-only RVC instructions. */

+86 -24

arch/riscv/net/bpf_jit_comp64.c

··· 455 455 return 0; 456 456 } 457 457 458 + static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, 459 + struct rv_jit_context *ctx) 460 + { 461 + u8 r0; 462 + int jmp_offset; 463 + 464 + if (off) { 465 + if (is_12b_int(off)) { 466 + emit_addi(RV_REG_T1, rd, off, ctx); 467 + } else { 468 + emit_imm(RV_REG_T1, off, ctx); 469 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 470 + } 471 + rd = RV_REG_T1; 472 + } 473 + 474 + switch (imm) { 475 + /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ 476 + case BPF_ADD: 477 + emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : 478 + rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 479 + break; 480 + case BPF_AND: 481 + emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : 482 + rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 483 + break; 484 + case BPF_OR: 485 + emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : 486 + rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 487 + break; 488 + case BPF_XOR: 489 + emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : 490 + rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 491 + break; 492 + /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ 493 + case BPF_ADD | BPF_FETCH: 494 + emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : 495 + rv_amoadd_w(rs, rs, rd, 0, 0), ctx); 496 + if (!is64) 497 + emit_zext_32(rs, ctx); 498 + break; 499 + case BPF_AND | BPF_FETCH: 500 + emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : 501 + rv_amoand_w(rs, rs, rd, 0, 0), ctx); 502 + if (!is64) 503 + emit_zext_32(rs, ctx); 504 + break; 505 + case BPF_OR | BPF_FETCH: 506 + emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : 507 + rv_amoor_w(rs, rs, rd, 0, 0), ctx); 508 + if (!is64) 509 + emit_zext_32(rs, ctx); 510 + break; 511 + case BPF_XOR | BPF_FETCH: 512 + emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : 513 + rv_amoxor_w(rs, rs, rd, 0, 0), ctx); 514 + if (!is64) 515 + emit_zext_32(rs, ctx); 516 + break; 517 + /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ 518 + case BPF_XCHG: 519 + emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : 520 + rv_amoswap_w(rs, rs, rd, 0, 0), ctx); 521 + if (!is64) 522 + emit_zext_32(rs, ctx); 523 + break; 524 + /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ 525 + case BPF_CMPXCHG: 526 + r0 = bpf_to_rv_reg(BPF_REG_0, ctx); 527 + emit(is64 ? rv_addi(RV_REG_T2, r0, 0) : 528 + rv_addiw(RV_REG_T2, r0, 0), ctx); 529 + emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : 530 + rv_lr_w(r0, 0, rd, 0, 0), ctx); 531 + jmp_offset = ninsns_rvoff(8); 532 + emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); 533 + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : 534 + rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); 535 + jmp_offset = ninsns_rvoff(-6); 536 + emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); 537 + emit(rv_fence(0x3, 0x3), ctx); 538 + break; 539 + } 540 + } 541 + 458 542 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 459 543 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 460 544 ··· 1230 1146 break; 1231 1147 case BPF_STX | BPF_ATOMIC | BPF_W: 1232 1148 case BPF_STX | BPF_ATOMIC | BPF_DW: 1233 - if (insn->imm != BPF_ADD) { 1234 - pr_err("bpf-jit: not supported: atomic operation %02x ***\n", 1235 - insn->imm); 1236 - return -EINVAL; 1237 - } 1238 - 1239 - /* atomic_add: lock *(u32 *)(dst + off) += src 1240 - * atomic_add: lock *(u64 *)(dst + off) += src 1241 - */ 1242 - 1243 - if (off) { 1244 - if (is_12b_int(off)) { 1245 - emit_addi(RV_REG_T1, rd, off, ctx); 1246 - } else { 1247 - emit_imm(RV_REG_T1, off, ctx); 1248 - emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1249 - } 1250 - 1251 - rd = RV_REG_T1; 1252 - } 1253 - 1254 - emit(BPF_SIZE(code) == BPF_W ? 1255 - rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : 1256 - rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); 1149 + emit_atomic(rd, rs, off, imm, 1150 + BPF_SIZE(code) == BPF_DW, ctx); 1257 1151 break; 1258 1152 default: 1259 1153 pr_err("bpf-jit: unknown opcode %02x\n", code);

+6 -2

drivers/media/rc/bpf-lirc.c

··· 216 216 217 217 raw->bpf_sample = sample; 218 218 219 - if (raw->progs) 220 - BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run); 219 + if (raw->progs) { 220 + rcu_read_lock(); 221 + bpf_prog_run_array(rcu_dereference(raw->progs), 222 + &raw->bpf_sample, bpf_prog_run); 223 + rcu_read_unlock(); 224 + } 221 225 } 222 226 223 227 /*

+1

drivers/net/ethernet/intel/i40e/i40e_txrx_common.h

··· 20 20 #define I40E_XDP_CONSUMED BIT(0) 21 21 #define I40E_XDP_TX BIT(1) 22 22 #define I40E_XDP_REDIR BIT(2) 23 + #define I40E_XDP_EXIT BIT(3) 23 24 24 25 /* 25 26 * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword

+25 -14

drivers/net/ethernet/intel/i40e/i40e_xsk.c

··· 161 161 162 162 if (likely(act == XDP_REDIRECT)) { 163 163 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); 164 - if (err) 165 - goto out_failure; 166 - return I40E_XDP_REDIR; 164 + if (!err) 165 + return I40E_XDP_REDIR; 166 + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) 167 + result = I40E_XDP_EXIT; 168 + else 169 + result = I40E_XDP_CONSUMED; 170 + goto out_failure; 167 171 } 168 172 169 173 switch (act) { ··· 179 175 if (result == I40E_XDP_CONSUMED) 180 176 goto out_failure; 181 177 break; 178 + case XDP_DROP: 179 + result = I40E_XDP_CONSUMED; 180 + break; 182 181 default: 183 182 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); 184 183 fallthrough; 185 184 case XDP_ABORTED: 185 + result = I40E_XDP_CONSUMED; 186 186 out_failure: 187 187 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 188 - fallthrough; /* handle aborts by dropping packet */ 189 - case XDP_DROP: 190 - result = I40E_XDP_CONSUMED; 191 - break; 192 188 } 193 189 return result; 194 190 } ··· 275 271 unsigned int *rx_packets, 276 272 unsigned int *rx_bytes, 277 273 unsigned int size, 278 - unsigned int xdp_res) 274 + unsigned int xdp_res, 275 + bool *failure) 279 276 { 280 277 struct sk_buff *skb; 281 278 ··· 286 281 if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) 287 282 return; 288 283 284 + if (xdp_res == I40E_XDP_EXIT) { 285 + *failure = true; 286 + return; 287 + } 288 + 289 289 if (xdp_res == I40E_XDP_CONSUMED) { 290 290 xsk_buff_free(xdp_buff); 291 291 return; 292 292 } 293 - 294 293 if (xdp_res == I40E_XDP_PASS) { 295 294 /* NB! We are not checking for errors using 296 295 * i40e_test_staterr with ··· 380 371 381 372 xdp_res = i40e_run_xdp_zc(rx_ring, bi); 382 373 i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets, 383 - &rx_bytes, size, xdp_res); 374 + &rx_bytes, size, xdp_res, &failure); 375 + if (failure) 376 + break; 384 377 total_rx_packets += rx_packets; 385 378 total_rx_bytes += rx_bytes; 386 379 xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); ··· 393 382 cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; 394 383 395 384 if (cleaned_count >= I40E_RX_BUFFER_WRITE) 396 - failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); 385 + failure |= !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); 397 386 398 387 i40e_finalize_xdp_rx(rx_ring, xdp_xmit); 399 388 i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); ··· 605 594 return -ENETDOWN; 606 595 607 596 if (!i40e_enabled_xdp_vsi(vsi)) 608 - return -ENXIO; 597 + return -EINVAL; 609 598 610 599 if (queue_id >= vsi->num_queue_pairs) 611 - return -ENXIO; 600 + return -EINVAL; 612 601 613 602 if (!vsi->xdp_rings[queue_id]->xsk_pool) 614 - return -ENXIO; 603 + return -EINVAL; 615 604 616 605 ring = vsi->xdp_rings[queue_id]; 617 606

+1

drivers/net/ethernet/intel/ice/ice_txrx.h

··· 133 133 #define ICE_XDP_CONSUMED BIT(0) 134 134 #define ICE_XDP_TX BIT(1) 135 135 #define ICE_XDP_REDIR BIT(2) 136 + #define ICE_XDP_EXIT BIT(3) 136 137 137 138 #define ICE_RX_DMA_ATTR \ 138 139 (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)

+34 -21

drivers/net/ethernet/intel/ice/ice_xsk.c

··· 545 545 546 546 if (likely(act == XDP_REDIRECT)) { 547 547 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); 548 - if (err) 549 - goto out_failure; 550 - return ICE_XDP_REDIR; 548 + if (!err) 549 + return ICE_XDP_REDIR; 550 + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) 551 + result = ICE_XDP_EXIT; 552 + else 553 + result = ICE_XDP_CONSUMED; 554 + goto out_failure; 551 555 } 552 556 553 557 switch (act) { ··· 562 558 if (result == ICE_XDP_CONSUMED) 563 559 goto out_failure; 564 560 break; 561 + case XDP_DROP: 562 + result = ICE_XDP_CONSUMED; 563 + break; 565 564 default: 566 565 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); 567 566 fallthrough; 568 567 case XDP_ABORTED: 568 + result = ICE_XDP_CONSUMED; 569 569 out_failure: 570 570 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 571 - fallthrough; 572 - case XDP_DROP: 573 - result = ICE_XDP_CONSUMED; 574 571 break; 575 572 } 576 573 ··· 592 587 unsigned int xdp_xmit = 0; 593 588 struct bpf_prog *xdp_prog; 594 589 bool failure = false; 590 + int entries_to_alloc; 595 591 596 592 /* ZC patch is enabled only when XDP program is set, 597 593 * so here it can not be NULL ··· 640 634 xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); 641 635 642 636 xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring); 643 - if (xdp_res) { 644 - if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) 645 - xdp_xmit |= xdp_res; 646 - else 647 - xsk_buff_free(xdp); 648 - 649 - total_rx_bytes += size; 650 - total_rx_packets++; 651 - 652 - ice_bump_ntc(rx_ring); 653 - continue; 637 + if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { 638 + xdp_xmit |= xdp_res; 639 + } else if (xdp_res == ICE_XDP_EXIT) { 640 + failure = true; 641 + break; 642 + } else if (xdp_res == ICE_XDP_CONSUMED) { 643 + xsk_buff_free(xdp); 644 + } else if (xdp_res == ICE_XDP_PASS) { 645 + goto construct_skb; 654 646 } 647 + 648 + total_rx_bytes += size; 649 + total_rx_packets++; 650 + 651 + ice_bump_ntc(rx_ring); 652 + continue; 653 + 655 654 construct_skb: 656 655 /* XDP_PASS path */ 657 656 skb = ice_construct_skb_zc(rx_ring, xdp); ··· 684 673 ice_receive_skb(rx_ring, skb, vlan_tag); 685 674 } 686 675 687 - failure = !ice_alloc_rx_bufs_zc(rx_ring, ICE_DESC_UNUSED(rx_ring)); 676 + entries_to_alloc = ICE_DESC_UNUSED(rx_ring); 677 + if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) 678 + failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); 688 679 689 680 ice_finalize_xdp_rx(xdp_ring, xdp_xmit); 690 681 ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); ··· 942 929 return -ENETDOWN; 943 930 944 931 if (!ice_is_xdp_ena_vsi(vsi)) 945 - return -ENXIO; 932 + return -EINVAL; 946 933 947 934 if (queue_id >= vsi->num_txq) 948 - return -ENXIO; 935 + return -EINVAL; 949 936 950 937 if (!vsi->xdp_rings[queue_id]->xsk_pool) 951 - return -ENXIO; 938 + return -EINVAL; 952 939 953 940 ring = vsi->xdp_rings[queue_id]; 954 941

+1

drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h

··· 8 8 #define IXGBE_XDP_CONSUMED BIT(0) 9 9 #define IXGBE_XDP_TX BIT(1) 10 10 #define IXGBE_XDP_REDIR BIT(2) 11 + #define IXGBE_XDP_EXIT BIT(3) 11 12 12 13 #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ 13 14 IXGBE_TXD_CMD_RS)

+32 -23

drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

··· 109 109 110 110 if (likely(act == XDP_REDIRECT)) { 111 111 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); 112 - if (err) 113 - goto out_failure; 114 - return IXGBE_XDP_REDIR; 112 + if (!err) 113 + return IXGBE_XDP_REDIR; 114 + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) 115 + result = IXGBE_XDP_EXIT; 116 + else 117 + result = IXGBE_XDP_CONSUMED; 118 + goto out_failure; 115 119 } 116 120 117 121 switch (act) { ··· 134 130 if (result == IXGBE_XDP_CONSUMED) 135 131 goto out_failure; 136 132 break; 133 + case XDP_DROP: 134 + result = IXGBE_XDP_CONSUMED; 135 + break; 137 136 default: 138 137 bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); 139 138 fallthrough; 140 139 case XDP_ABORTED: 140 + result = IXGBE_XDP_CONSUMED; 141 141 out_failure: 142 142 trace_xdp_exception(rx_ring->netdev, xdp_prog, act); 143 - fallthrough; /* handle aborts by dropping packet */ 144 - case XDP_DROP: 145 - result = IXGBE_XDP_CONSUMED; 146 - break; 147 143 } 148 144 return result; 149 145 } ··· 307 303 xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool); 308 304 xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp); 309 305 310 - if (xdp_res) { 311 - if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) 312 - xdp_xmit |= xdp_res; 313 - else 314 - xsk_buff_free(bi->xdp); 315 - 316 - bi->xdp = NULL; 317 - total_rx_packets++; 318 - total_rx_bytes += size; 319 - 320 - cleaned_count++; 321 - ixgbe_inc_ntc(rx_ring); 322 - continue; 306 + if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) { 307 + xdp_xmit |= xdp_res; 308 + } else if (xdp_res == IXGBE_XDP_EXIT) { 309 + failure = true; 310 + break; 311 + } else if (xdp_res == IXGBE_XDP_CONSUMED) { 312 + xsk_buff_free(bi->xdp); 313 + } else if (xdp_res == IXGBE_XDP_PASS) { 314 + goto construct_skb; 323 315 } 324 316 317 + bi->xdp = NULL; 318 + total_rx_packets++; 319 + total_rx_bytes += size; 320 + 321 + cleaned_count++; 322 + ixgbe_inc_ntc(rx_ring); 323 + continue; 324 + 325 + construct_skb: 325 326 /* XDP_PASS path */ 326 327 skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp); 327 328 if (!skb) { ··· 525 516 return -ENETDOWN; 526 517 527 518 if (!READ_ONCE(adapter->xdp_prog)) 528 - return -ENXIO; 519 + return -EINVAL; 529 520 530 521 if (qid >= adapter->num_xdp_queues) 531 - return -ENXIO; 522 + return -EINVAL; 532 523 533 524 ring = adapter->xdp_ring[qid]; 534 525 ··· 536 527 return -ENETDOWN; 537 528 538 529 if (!ring->xsk_pool) 539 - return -ENXIO; 530 + return -EINVAL; 540 531 541 532 if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { 542 533 u64 eics = BIT_ULL(ring->q_vector->v_idx);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c

··· 23 23 c = priv->channels.c[ix]; 24 24 25 25 if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) 26 - return -ENXIO; 26 + return -EINVAL; 27 27 28 28 if (!napi_if_scheduled_mark_missed(&c->napi)) { 29 29 /* To avoid WQE overrun, don't post a NOP if async_icosq is not

+2 -2

drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

··· 6559 6559 return -ENETDOWN; 6560 6560 6561 6561 if (!stmmac_xdp_is_enabled(priv)) 6562 - return -ENXIO; 6562 + return -EINVAL; 6563 6563 6564 6564 if (queue >= priv->plat->rx_queues_to_use || 6565 6565 queue >= priv->plat->tx_queues_to_use) ··· 6570 6570 ch = &priv->channel[queue]; 6571 6571 6572 6572 if (!rx_q->xsk_pool && !tx_q->xsk_pool) 6573 - return -ENXIO; 6573 + return -EINVAL; 6574 6574 6575 6575 if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) { 6576 6576 /* EQoS does not have per-DMA channel SW interrupt,

+2 -6

include/linux/bpf-cgroup.h

··· 225 225 226 226 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \ 227 227 ({ \ 228 - u32 __unused_flags; \ 229 228 int __ret = 0; \ 230 229 if (cgroup_bpf_enabled(atype)) \ 231 230 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ 232 - NULL, \ 233 - &__unused_flags); \ 231 + NULL, NULL); \ 234 232 __ret; \ 235 233 }) 236 234 237 235 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \ 238 236 ({ \ 239 - u32 __unused_flags; \ 240 237 int __ret = 0; \ 241 238 if (cgroup_bpf_enabled(atype)) { \ 242 239 lock_sock(sk); \ 243 240 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \ 244 - t_ctx, \ 245 - &__unused_flags); \ 241 + t_ctx, NULL); \ 246 242 release_sock(sk); \ 247 243 } \ 248 244 __ret; \

+94 -139

include/linux/bpf.h

··· 23 23 #include <linux/slab.h> 24 24 #include <linux/percpu-refcount.h> 25 25 #include <linux/bpfptr.h> 26 + #include <linux/btf.h> 26 27 27 28 struct bpf_verifier_env; 28 29 struct bpf_verifier_log; ··· 148 147 bpf_callback_t callback_fn, 149 148 void *callback_ctx, u64 flags); 150 149 151 - /* BTF name and id of struct allocated by map_alloc */ 152 - const char * const map_btf_name; 150 + /* BTF id of struct allocated by map_alloc */ 153 151 int *map_btf_id; 154 152 155 153 /* bpf_iter info used to open a seq_file */ 156 154 const struct bpf_iter_seq_info *iter_seq_info; 155 + }; 156 + 157 + enum { 158 + /* Support at most 8 pointers in a BPF map value */ 159 + BPF_MAP_VALUE_OFF_MAX = 8, 160 + BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + 161 + 1 + /* for bpf_spin_lock */ 162 + 1, /* for bpf_timer */ 163 + }; 164 + 165 + enum bpf_kptr_type { 166 + BPF_KPTR_UNREF, 167 + BPF_KPTR_REF, 168 + }; 169 + 170 + struct bpf_map_value_off_desc { 171 + u32 offset; 172 + enum bpf_kptr_type type; 173 + struct { 174 + struct btf *btf; 175 + struct module *module; 176 + btf_dtor_kfunc_t dtor; 177 + u32 btf_id; 178 + } kptr; 179 + }; 180 + 181 + struct bpf_map_value_off { 182 + u32 nr_off; 183 + struct bpf_map_value_off_desc off[]; 184 + }; 185 + 186 + struct bpf_map_off_arr { 187 + u32 cnt; 188 + u32 field_off[BPF_MAP_OFF_ARR_MAX]; 189 + u8 field_sz[BPF_MAP_OFF_ARR_MAX]; 157 190 }; 158 191 159 192 struct bpf_map { ··· 206 171 u64 map_extra; /* any per-map-type extra fields */ 207 172 u32 map_flags; 208 173 int spin_lock_off; /* >=0 valid offset, <0 error */ 174 + struct bpf_map_value_off *kptr_off_tab; 209 175 int timer_off; /* >=0 valid offset, <0 error */ 210 176 u32 id; 211 177 int numa_node; ··· 218 182 struct mem_cgroup *memcg; 219 183 #endif 220 184 char name[BPF_OBJ_NAME_LEN]; 221 - bool bypass_spec_v1; 222 - bool frozen; /* write-once; write-protected by freeze_mutex */ 223 - /* 14 bytes hole */ 224 - 185 + struct bpf_map_off_arr *off_arr; 225 186 /* The 3rd and 4th cacheline with misc members to avoid false sharing 226 187 * particularly with refcounting. 227 188 */ ··· 238 205 bool jited; 239 206 bool xdp_has_frags; 240 207 } owner; 208 + bool bypass_spec_v1; 209 + bool frozen; /* write-once; write-protected by freeze_mutex */ 241 210 }; 242 211 243 212 static inline bool map_value_has_spin_lock(const struct bpf_map *map) ··· 252 217 return map->timer_off >= 0; 253 218 } 254 219 220 + static inline bool map_value_has_kptrs(const struct bpf_map *map) 221 + { 222 + return !IS_ERR_OR_NULL(map->kptr_off_tab); 223 + } 224 + 255 225 static inline void check_and_init_map_value(struct bpf_map *map, void *dst) 256 226 { 257 227 if (unlikely(map_value_has_spin_lock(map))) 258 228 memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); 259 229 if (unlikely(map_value_has_timer(map))) 260 230 memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); 231 + if (unlikely(map_value_has_kptrs(map))) { 232 + struct bpf_map_value_off *tab = map->kptr_off_tab; 233 + int i; 234 + 235 + for (i = 0; i < tab->nr_off; i++) 236 + *(u64 *)(dst + tab->off[i].offset) = 0; 237 + } 261 238 } 262 239 263 240 /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ 264 241 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) 265 242 { 266 - u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0; 243 + u32 curr_off = 0; 244 + int i; 267 245 268 - if (unlikely(map_value_has_spin_lock(map))) { 269 - s_off = map->spin_lock_off; 270 - s_sz = sizeof(struct bpf_spin_lock); 271 - } 272 - if (unlikely(map_value_has_timer(map))) { 273 - t_off = map->timer_off; 274 - t_sz = sizeof(struct bpf_timer); 275 - } 276 - 277 - if (unlikely(s_sz || t_sz)) { 278 - if (s_off < t_off || !s_sz) { 279 - swap(s_off, t_off); 280 - swap(s_sz, t_sz); 281 - } 282 - memcpy(dst, src, t_off); 283 - memcpy(dst + t_off + t_sz, 284 - src + t_off + t_sz, 285 - s_off - t_off - t_sz); 286 - memcpy(dst + s_off + s_sz, 287 - src + s_off + s_sz, 288 - map->value_size - s_off - s_sz); 289 - } else { 246 + if (likely(!map->off_arr)) { 290 247 memcpy(dst, src, map->value_size); 248 + return; 291 249 } 250 + 251 + for (i = 0; i < map->off_arr->cnt; i++) { 252 + u32 next_off = map->off_arr->field_off[i]; 253 + 254 + memcpy(dst + curr_off, src + curr_off, next_off - curr_off); 255 + curr_off += map->off_arr->field_sz[i]; 256 + } 257 + memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); 292 258 } 293 259 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, 294 260 bool lock_src); ··· 378 342 */ 379 343 MEM_PERCPU = BIT(4 + BPF_BASE_TYPE_BITS), 380 344 381 - __BPF_TYPE_LAST_FLAG = MEM_PERCPU, 345 + /* Indicates that the argument will be released. */ 346 + OBJ_RELEASE = BIT(5 + BPF_BASE_TYPE_BITS), 347 + 348 + /* PTR is not trusted. This is only used with PTR_TO_BTF_ID, to mark 349 + * unreferenced and referenced kptr loaded from map value using a load 350 + * instruction, so that they can only be dereferenced but not escape the 351 + * BPF program into the kernel (i.e. cannot be passed as arguments to 352 + * kfunc or bpf helpers). 353 + */ 354 + PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), 355 + 356 + __BPF_TYPE_LAST_FLAG = PTR_UNTRUSTED, 382 357 }; 383 358 384 359 /* Max number of base types. */ ··· 438 391 ARG_PTR_TO_STACK, /* pointer to stack */ 439 392 ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ 440 393 ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ 394 + ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ 441 395 __BPF_ARG_TYPE_MAX, 442 396 443 397 /* Extended arg_types. */ ··· 448 400 ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, 449 401 ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, 450 402 ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, 403 + ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, 451 404 452 405 /* This must be the last entry. Its purpose is to ensure the enum is 453 406 * wide enough to hold the higher bits reserved for bpf_type_flag. ··· 1270 1221 /* an array of programs to be executed under rcu_lock. 1271 1222 * 1272 1223 * Typical usage: 1273 - * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run); 1224 + * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run); 1274 1225 * 1275 1226 * the structure returned by bpf_prog_array_alloc() should be populated 1276 1227 * with program pointers and the last pointer must be NULL. ··· 1364 1315 1365 1316 typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); 1366 1317 1367 - static __always_inline int 1368 - BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, 1369 - const void *ctx, bpf_prog_run_fn run_prog, 1370 - int retval, u32 *ret_flags) 1371 - { 1372 - const struct bpf_prog_array_item *item; 1373 - const struct bpf_prog *prog; 1374 - const struct bpf_prog_array *array; 1375 - struct bpf_run_ctx *old_run_ctx; 1376 - struct bpf_cg_run_ctx run_ctx; 1377 - u32 func_ret; 1378 - 1379 - run_ctx.retval = retval; 1380 - migrate_disable(); 1381 - rcu_read_lock(); 1382 - array = rcu_dereference(array_rcu); 1383 - item = &array->items[0]; 1384 - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 1385 - while ((prog = READ_ONCE(item->prog))) { 1386 - run_ctx.prog_item = item; 1387 - func_ret = run_prog(prog, ctx); 1388 - if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) 1389 - run_ctx.retval = -EPERM; 1390 - *(ret_flags) |= (func_ret >> 1); 1391 - item++; 1392 - } 1393 - bpf_reset_run_ctx(old_run_ctx); 1394 - rcu_read_unlock(); 1395 - migrate_enable(); 1396 - return run_ctx.retval; 1397 - } 1398 - 1399 - static __always_inline int 1400 - BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, 1401 - const void *ctx, bpf_prog_run_fn run_prog, 1402 - int retval) 1403 - { 1404 - const struct bpf_prog_array_item *item; 1405 - const struct bpf_prog *prog; 1406 - const struct bpf_prog_array *array; 1407 - struct bpf_run_ctx *old_run_ctx; 1408 - struct bpf_cg_run_ctx run_ctx; 1409 - 1410 - run_ctx.retval = retval; 1411 - migrate_disable(); 1412 - rcu_read_lock(); 1413 - array = rcu_dereference(array_rcu); 1414 - item = &array->items[0]; 1415 - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 1416 - while ((prog = READ_ONCE(item->prog))) { 1417 - run_ctx.prog_item = item; 1418 - if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) 1419 - run_ctx.retval = -EPERM; 1420 - item++; 1421 - } 1422 - bpf_reset_run_ctx(old_run_ctx); 1423 - rcu_read_unlock(); 1424 - migrate_enable(); 1425 - return run_ctx.retval; 1426 - } 1427 - 1428 1318 static __always_inline u32 1429 - BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu, 1319 + bpf_prog_run_array(const struct bpf_prog_array *array, 1430 1320 const void *ctx, bpf_prog_run_fn run_prog) 1431 1321 { 1432 1322 const struct bpf_prog_array_item *item; 1433 1323 const struct bpf_prog *prog; 1434 - const struct bpf_prog_array *array; 1435 1324 struct bpf_run_ctx *old_run_ctx; 1436 1325 struct bpf_trace_run_ctx run_ctx; 1437 1326 u32 ret = 1; 1438 1327 1439 - migrate_disable(); 1440 - rcu_read_lock(); 1441 - array = rcu_dereference(array_rcu); 1328 + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held"); 1329 + 1442 1330 if (unlikely(!array)) 1443 - goto out; 1331 + return ret; 1332 + 1333 + migrate_disable(); 1444 1334 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 1445 1335 item = &array->items[0]; 1446 1336 while ((prog = READ_ONCE(item->prog))) { ··· 1388 1400 item++; 1389 1401 } 1390 1402 bpf_reset_run_ctx(old_run_ctx); 1391 - out: 1392 - rcu_read_unlock(); 1393 1403 migrate_enable(); 1394 1404 return ret; 1395 1405 } 1396 - 1397 - /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs 1398 - * so BPF programs can request cwr for TCP packets. 1399 - * 1400 - * Current cgroup skb programs can only return 0 or 1 (0 to drop the 1401 - * packet. This macro changes the behavior so the low order bit 1402 - * indicates whether the packet should be dropped (0) or not (1) 1403 - * and the next bit is a congestion notification bit. This could be 1404 - * used by TCP to call tcp_enter_cwr() 1405 - * 1406 - * Hence, new allowed return values of CGROUP EGRESS BPF programs are: 1407 - * 0: drop packet 1408 - * 1: keep packet 1409 - * 2: drop packet and cn 1410 - * 3: keep packet and cn 1411 - * 1412 - * This macro then converts it to one of the NET_XMIT or an error 1413 - * code that is then interpreted as drop packet (and no cn): 1414 - * 0: NET_XMIT_SUCCESS skb should be transmitted 1415 - * 1: NET_XMIT_DROP skb should be dropped and cn 1416 - * 2: NET_XMIT_CN skb should be transmitted and cn 1417 - * 3: -err skb should be dropped 1418 - */ 1419 - #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ 1420 - ({ \ 1421 - u32 _flags = 0; \ 1422 - bool _cn; \ 1423 - u32 _ret; \ 1424 - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ 1425 - _cn = _flags & BPF_RET_SET_CN; \ 1426 - if (_ret && !IS_ERR_VALUE((long)_ret)) \ 1427 - _ret = -EFAULT; \ 1428 - if (!_ret) \ 1429 - _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ 1430 - else \ 1431 - _ret = (_cn ? NET_XMIT_DROP : _ret); \ 1432 - _ret; \ 1433 - }) 1434 1406 1435 1407 #ifdef CONFIG_BPF_SYSCALL 1436 1408 DECLARE_PER_CPU(int, bpf_prog_active); ··· 1444 1496 1445 1497 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); 1446 1498 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); 1499 + 1500 + struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); 1501 + void bpf_map_free_kptr_off_tab(struct bpf_map *map); 1502 + struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); 1503 + bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); 1504 + void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); 1447 1505 1448 1506 struct bpf_map *bpf_map_get(u32 ufd); 1449 1507 struct bpf_map *bpf_map_get_with_uref(u32 ufd); ··· 1747 1793 u32 *next_btf_id, enum bpf_type_flag *flag); 1748 1794 bool btf_struct_ids_match(struct bpf_verifier_log *log, 1749 1795 const struct btf *btf, u32 id, int off, 1750 - const struct btf *need_btf, u32 need_type_id); 1796 + const struct btf *need_btf, u32 need_type_id, 1797 + bool strict); 1751 1798 1752 1799 int btf_distill_func_proto(struct bpf_verifier_log *log, 1753 1800 struct btf *btf,

+2 -2

include/linux/bpf_local_storage.h

··· 143 143 144 144 bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, 145 145 struct bpf_local_storage_elem *selem, 146 - bool uncharge_omem); 146 + bool uncharge_omem, bool use_trace_rcu); 147 147 148 - void bpf_selem_unlink(struct bpf_local_storage_elem *selem); 148 + void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); 149 149 150 150 void bpf_selem_link_map(struct bpf_local_storage_map *smap, 151 151 struct bpf_local_storage_elem *selem);

+1 -2

include/linux/bpf_verifier.h

··· 523 523 const struct bpf_reg_state *reg, int regno); 524 524 int check_func_arg_reg_off(struct bpf_verifier_env *env, 525 525 const struct bpf_reg_state *reg, int regno, 526 - enum bpf_arg_type arg_type, 527 - bool is_release_func); 526 + enum bpf_arg_type arg_type); 528 527 int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 529 528 u32 regno); 530 529 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,

+23

include/linux/btf.h

··· 17 17 BTF_KFUNC_TYPE_ACQUIRE, 18 18 BTF_KFUNC_TYPE_RELEASE, 19 19 BTF_KFUNC_TYPE_RET_NULL, 20 + BTF_KFUNC_TYPE_KPTR_ACQUIRE, 20 21 BTF_KFUNC_TYPE_MAX, 21 22 }; 22 23 ··· 36 35 struct btf_id_set *acquire_set; 37 36 struct btf_id_set *release_set; 38 37 struct btf_id_set *ret_null_set; 38 + struct btf_id_set *kptr_acquire_set; 39 39 }; 40 40 struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; 41 41 }; 42 42 }; 43 + 44 + struct btf_id_dtor_kfunc { 45 + u32 btf_id; 46 + u32 kfunc_btf_id; 47 + }; 48 + 49 + typedef void (*btf_dtor_kfunc_t)(void *); 43 50 44 51 extern const struct file_operations btf_fops; 45 52 ··· 132 123 u32 expected_offset, u32 expected_size); 133 124 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); 134 125 int btf_find_timer(const struct btf *btf, const struct btf_type *t); 126 + struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, 127 + const struct btf_type *t); 135 128 bool btf_type_is_void(const struct btf_type *t); 136 129 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); 137 130 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, ··· 355 344 enum btf_kfunc_type type, u32 kfunc_btf_id); 356 345 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 357 346 const struct btf_kfunc_id_set *s); 347 + s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); 348 + int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, 349 + struct module *owner); 358 350 #else 359 351 static inline const struct btf_type *btf_type_by_id(const struct btf *btf, 360 352 u32 type_id) ··· 378 364 } 379 365 static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 380 366 const struct btf_kfunc_id_set *s) 367 + { 368 + return 0; 369 + } 370 + static inline s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) 371 + { 372 + return -ENOENT; 373 + } 374 + static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, 375 + u32 add_cnt, struct module *owner) 381 376 { 382 377 return 0; 383 378 }

+1 -1

include/linux/skbuff.h

··· 3925 3925 struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, 3926 3926 unsigned int offset); 3927 3927 struct sk_buff *skb_vlan_untag(struct sk_buff *skb); 3928 - int skb_ensure_writable(struct sk_buff *skb, int write_len); 3928 + int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); 3929 3929 int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); 3930 3930 int skb_vlan_pop(struct sk_buff *skb); 3931 3931 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);

+12

include/uapi/linux/bpf.h

··· 5143 5143 * The **hash_algo** is returned on success, 5144 5144 * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if 5145 5145 * invalid arguments are passed. 5146 + * 5147 + * void *bpf_kptr_xchg(void *map_value, void *ptr) 5148 + * Description 5149 + * Exchange kptr at pointer *map_value* with *ptr*, and return the 5150 + * old value. *ptr* can be NULL, otherwise it must be a referenced 5151 + * pointer which will be released when this helper is called. 5152 + * Return 5153 + * The old value of kptr (which can be NULL). The returned pointer 5154 + * if not NULL, is a reference which must be released using its 5155 + * corresponding release function, or moved into a BPF map before 5156 + * program exit. 5146 5157 */ 5147 5158 #define __BPF_FUNC_MAPPER(FN) \ 5148 5159 FN(unspec), \ ··· 5350 5339 FN(copy_from_user_task), \ 5351 5340 FN(skb_set_tstamp), \ 5352 5341 FN(ima_file_hash), \ 5342 + FN(kptr_xchg), \ 5353 5343 /* */ 5354 5344 5355 5345 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

+22 -22

kernel/bpf/arraymap.c

··· 11 11 #include <linux/perf_event.h> 12 12 #include <uapi/linux/btf.h> 13 13 #include <linux/rcupdate_trace.h> 14 + #include <linux/btf_ids.h> 14 15 15 16 #include "map_in_map.h" 16 17 ··· 288 287 return 0; 289 288 } 290 289 291 - static void check_and_free_timer_in_array(struct bpf_array *arr, void *val) 290 + static void check_and_free_fields(struct bpf_array *arr, void *val) 292 291 { 293 - if (unlikely(map_value_has_timer(&arr->map))) 292 + if (map_value_has_timer(&arr->map)) 294 293 bpf_timer_cancel_and_free(val + arr->map.timer_off); 294 + if (map_value_has_kptrs(&arr->map)) 295 + bpf_map_free_kptrs(&arr->map, val); 295 296 } 296 297 297 298 /* Called from syscall or from eBPF program */ ··· 330 327 copy_map_value_locked(map, val, value, false); 331 328 else 332 329 copy_map_value(map, val, value); 333 - check_and_free_timer_in_array(array, val); 330 + check_and_free_fields(array, val); 334 331 } 335 332 return 0; 336 333 } ··· 389 386 struct bpf_array *array = container_of(map, struct bpf_array, map); 390 387 int i; 391 388 392 - if (likely(!map_value_has_timer(map))) 389 + /* We don't reset or free kptr on uref dropping to zero. */ 390 + if (!map_value_has_timer(map)) 393 391 return; 394 392 395 393 for (i = 0; i < array->map.max_entries; i++) ··· 402 398 static void array_map_free(struct bpf_map *map) 403 399 { 404 400 struct bpf_array *array = container_of(map, struct bpf_array, map); 401 + int i; 402 + 403 + if (map_value_has_kptrs(map)) { 404 + for (i = 0; i < array->map.max_entries; i++) 405 + bpf_map_free_kptrs(map, array->value + array->elem_size * i); 406 + bpf_map_free_kptr_off_tab(map); 407 + } 405 408 406 409 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 407 410 bpf_array_free_percpu(array); ··· 691 680 return num_elems; 692 681 } 693 682 694 - static int array_map_btf_id; 683 + BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) 695 684 const struct bpf_map_ops array_map_ops = { 696 685 .map_meta_equal = array_map_meta_equal, 697 686 .map_alloc_check = array_map_alloc_check, ··· 712 701 .map_update_batch = generic_map_update_batch, 713 702 .map_set_for_each_callback_args = map_set_for_each_callback_args, 714 703 .map_for_each_callback = bpf_for_each_array_elem, 715 - .map_btf_name = "bpf_array", 716 - .map_btf_id = &array_map_btf_id, 704 + .map_btf_id = &array_map_btf_ids[0], 717 705 .iter_seq_info = &iter_seq_info, 718 706 }; 719 707 720 - static int percpu_array_map_btf_id; 721 708 const struct bpf_map_ops percpu_array_map_ops = { 722 709 .map_meta_equal = bpf_map_meta_equal, 723 710 .map_alloc_check = array_map_alloc_check, ··· 731 722 .map_update_batch = generic_map_update_batch, 732 723 .map_set_for_each_callback_args = map_set_for_each_callback_args, 733 724 .map_for_each_callback = bpf_for_each_array_elem, 734 - .map_btf_name = "bpf_array", 735 - .map_btf_id = &percpu_array_map_btf_id, 725 + .map_btf_id = &array_map_btf_ids[0], 736 726 .iter_seq_info = &iter_seq_info, 737 727 }; 738 728 ··· 1110 1102 * Thus, prog_array_map cannot be used as an inner_map 1111 1103 * and map_meta_equal is not implemented. 1112 1104 */ 1113 - static int prog_array_map_btf_id; 1114 1105 const struct bpf_map_ops prog_array_map_ops = { 1115 1106 .map_alloc_check = fd_array_map_alloc_check, 1116 1107 .map_alloc = prog_array_map_alloc, ··· 1125 1118 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 1126 1119 .map_release_uref = prog_array_map_clear, 1127 1120 .map_seq_show_elem = prog_array_map_seq_show_elem, 1128 - .map_btf_name = "bpf_array", 1129 - .map_btf_id = &prog_array_map_btf_id, 1121 + .map_btf_id = &array_map_btf_ids[0], 1130 1122 }; 1131 1123 1132 1124 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, ··· 1214 1208 fd_array_map_free(map); 1215 1209 } 1216 1210 1217 - static int perf_event_array_map_btf_id; 1218 1211 const struct bpf_map_ops perf_event_array_map_ops = { 1219 1212 .map_meta_equal = bpf_map_meta_equal, 1220 1213 .map_alloc_check = fd_array_map_alloc_check, ··· 1226 1221 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 1227 1222 .map_release = perf_event_fd_array_release, 1228 1223 .map_check_btf = map_check_no_btf, 1229 - .map_btf_name = "bpf_array", 1230 - .map_btf_id = &perf_event_array_map_btf_id, 1224 + .map_btf_id = &array_map_btf_ids[0], 1231 1225 }; 1232 1226 1233 1227 #ifdef CONFIG_CGROUPS ··· 1249 1245 fd_array_map_free(map); 1250 1246 } 1251 1247 1252 - static int cgroup_array_map_btf_id; 1253 1248 const struct bpf_map_ops cgroup_array_map_ops = { 1254 1249 .map_meta_equal = bpf_map_meta_equal, 1255 1250 .map_alloc_check = fd_array_map_alloc_check, ··· 1260 1257 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 1261 1258 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 1262 1259 .map_check_btf = map_check_no_btf, 1263 - .map_btf_name = "bpf_array", 1264 - .map_btf_id = &cgroup_array_map_btf_id, 1260 + .map_btf_id = &array_map_btf_ids[0], 1265 1261 }; 1266 1262 #endif 1267 1263 ··· 1334 1332 return insn - insn_buf; 1335 1333 } 1336 1334 1337 - static int array_of_maps_map_btf_id; 1338 1335 const struct bpf_map_ops array_of_maps_map_ops = { 1339 1336 .map_alloc_check = fd_array_map_alloc_check, 1340 1337 .map_alloc = array_of_map_alloc, ··· 1346 1345 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 1347 1346 .map_gen_lookup = array_of_map_gen_lookup, 1348 1347 .map_check_btf = map_check_no_btf, 1349 - .map_btf_name = "bpf_array", 1350 - .map_btf_id = &array_of_maps_map_btf_id, 1348 + .map_btf_id = &array_map_btf_ids[0], 1351 1349 };

+3 -3

kernel/bpf/bloom_filter.c

··· 7 7 #include <linux/err.h> 8 8 #include <linux/jhash.h> 9 9 #include <linux/random.h> 10 + #include <linux/btf_ids.h> 10 11 11 12 #define BLOOM_CREATE_FLAG_MASK \ 12 13 (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) ··· 193 192 return btf_type_is_void(key_type) ? 0 : -EINVAL; 194 193 } 195 194 196 - static int bpf_bloom_map_btf_id; 195 + BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) 197 196 const struct bpf_map_ops bloom_filter_map_ops = { 198 197 .map_meta_equal = bpf_map_meta_equal, 199 198 .map_alloc = bloom_map_alloc, ··· 206 205 .map_update_elem = bloom_map_update_elem, 207 206 .map_delete_elem = bloom_map_delete_elem, 208 207 .map_check_btf = bloom_map_check_btf, 209 - .map_btf_name = "bpf_bloom_filter", 210 - .map_btf_id = &bpf_bloom_map_btf_id, 208 + .map_btf_id = &bpf_bloom_map_btf_ids[0], 211 209 };

+5 -5

kernel/bpf/bpf_inode_storage.c

··· 90 90 */ 91 91 bpf_selem_unlink_map(selem); 92 92 free_inode_storage = bpf_selem_unlink_storage_nolock( 93 - local_storage, selem, false); 93 + local_storage, selem, false, false); 94 94 } 95 95 raw_spin_unlock_bh(&local_storage->lock); 96 96 rcu_read_unlock(); ··· 149 149 if (!sdata) 150 150 return -ENOENT; 151 151 152 - bpf_selem_unlink(SELEM(sdata)); 152 + bpf_selem_unlink(SELEM(sdata), true); 153 153 154 154 return 0; 155 155 } ··· 245 245 bpf_local_storage_map_free(smap, NULL); 246 246 } 247 247 248 - static int inode_storage_map_btf_id; 248 + BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct, 249 + bpf_local_storage_map) 249 250 const struct bpf_map_ops inode_storage_map_ops = { 250 251 .map_meta_equal = bpf_map_meta_equal, 251 252 .map_alloc_check = bpf_local_storage_map_alloc_check, ··· 257 256 .map_update_elem = bpf_fd_inode_storage_update_elem, 258 257 .map_delete_elem = bpf_fd_inode_storage_delete_elem, 259 258 .map_check_btf = bpf_local_storage_map_check_btf, 260 - .map_btf_name = "bpf_local_storage_map", 261 - .map_btf_id = &inode_storage_map_btf_id, 259 + .map_btf_id = &inode_storage_map_btf_ids[0], 262 260 .map_owner_storage_ptr = inode_storage_ptr, 263 261 }; 264 262

+1 -1

kernel/bpf/bpf_iter.c

··· 545 545 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 546 546 link->tinfo = tinfo; 547 547 548 - err = bpf_link_prime(&link->link, &link_primer); 548 + err = bpf_link_prime(&link->link, &link_primer); 549 549 if (err) { 550 550 kfree(link); 551 551 return err;

+19 -10

kernel/bpf/bpf_local_storage.c

··· 106 106 */ 107 107 bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, 108 108 struct bpf_local_storage_elem *selem, 109 - bool uncharge_mem) 109 + bool uncharge_mem, bool use_trace_rcu) 110 110 { 111 111 struct bpf_local_storage_map *smap; 112 112 bool free_local_storage; ··· 150 150 SDATA(selem)) 151 151 RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 152 152 153 - call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); 153 + if (use_trace_rcu) 154 + call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); 155 + else 156 + kfree_rcu(selem, rcu); 157 + 154 158 return free_local_storage; 155 159 } 156 160 157 - static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) 161 + static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, 162 + bool use_trace_rcu) 158 163 { 159 164 struct bpf_local_storage *local_storage; 160 165 bool free_local_storage = false; ··· 174 169 raw_spin_lock_irqsave(&local_storage->lock, flags); 175 170 if (likely(selem_linked_to_storage(selem))) 176 171 free_local_storage = bpf_selem_unlink_storage_nolock( 177 - local_storage, selem, true); 172 + local_storage, selem, true, use_trace_rcu); 178 173 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 179 174 180 - if (free_local_storage) 181 - call_rcu_tasks_trace(&local_storage->rcu, 175 + if (free_local_storage) { 176 + if (use_trace_rcu) 177 + call_rcu_tasks_trace(&local_storage->rcu, 182 178 bpf_local_storage_free_rcu); 179 + else 180 + kfree_rcu(local_storage, rcu); 181 + } 183 182 } 184 183 185 184 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, ··· 223 214 raw_spin_unlock_irqrestore(&b->lock, flags); 224 215 } 225 216 226 - void bpf_selem_unlink(struct bpf_local_storage_elem *selem) 217 + void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) 227 218 { 228 219 /* Always unlink from map before unlinking from local_storage 229 220 * because selem will be freed after successfully unlinked from 230 221 * the local_storage. 231 222 */ 232 223 bpf_selem_unlink_map(selem); 233 - __bpf_selem_unlink_storage(selem); 224 + __bpf_selem_unlink_storage(selem, use_trace_rcu); 234 225 } 235 226 236 227 struct bpf_local_storage_data * ··· 475 466 if (old_sdata) { 476 467 bpf_selem_unlink_map(SELEM(old_sdata)); 477 468 bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), 478 - false); 469 + false, true); 479 470 } 480 471 481 472 unlock: ··· 557 548 migrate_disable(); 558 549 __this_cpu_inc(*busy_counter); 559 550 } 560 - bpf_selem_unlink(selem); 551 + bpf_selem_unlink(selem, false); 561 552 if (busy_counter) { 562 553 __this_cpu_dec(*busy_counter); 563 554 migrate_enable();

+5 -5

kernel/bpf/bpf_struct_ops.c

··· 10 10 #include <linux/seq_file.h> 11 11 #include <linux/refcount.h> 12 12 #include <linux/mutex.h> 13 + #include <linux/btf_ids.h> 13 14 14 15 enum bpf_struct_ops_state { 15 16 BPF_STRUCT_OPS_STATE_INIT, ··· 264 263 /* No lock is needed. state and refcnt do not need 265 264 * to be updated together under atomic context. 266 265 */ 267 - uvalue = (struct bpf_struct_ops_value *)value; 266 + uvalue = value; 268 267 memcpy(uvalue, st_map->uvalue, map->value_size); 269 268 uvalue->state = state; 270 269 refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); ··· 354 353 if (err) 355 354 return err; 356 355 357 - uvalue = (struct bpf_struct_ops_value *)value; 356 + uvalue = value; 358 357 err = check_zero_holes(t, uvalue->data); 359 358 if (err) 360 359 return err; ··· 613 612 return map; 614 613 } 615 614 616 - static int bpf_struct_ops_map_btf_id; 615 + BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) 617 616 const struct bpf_map_ops bpf_struct_ops_map_ops = { 618 617 .map_alloc_check = bpf_struct_ops_map_alloc_check, 619 618 .map_alloc = bpf_struct_ops_map_alloc, ··· 623 622 .map_delete_elem = bpf_struct_ops_map_delete_elem, 624 623 .map_update_elem = bpf_struct_ops_map_update_elem, 625 624 .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, 626 - .map_btf_name = "bpf_struct_ops_map", 627 - .map_btf_id = &bpf_struct_ops_map_btf_id, 625 + .map_btf_id = &bpf_struct_ops_map_btf_ids[0], 628 626 }; 629 627 630 628 /* "const void *" because some subsystem is

+4 -5

kernel/bpf/bpf_task_storage.c

··· 102 102 */ 103 103 bpf_selem_unlink_map(selem); 104 104 free_task_storage = bpf_selem_unlink_storage_nolock( 105 - local_storage, selem, false); 105 + local_storage, selem, false, false); 106 106 } 107 107 raw_spin_unlock_irqrestore(&local_storage->lock, flags); 108 108 bpf_task_storage_unlock(); ··· 192 192 if (!sdata) 193 193 return -ENOENT; 194 194 195 - bpf_selem_unlink(SELEM(sdata)); 195 + bpf_selem_unlink(SELEM(sdata), true); 196 196 197 197 return 0; 198 198 } ··· 307 307 bpf_local_storage_map_free(smap, &bpf_task_storage_busy); 308 308 } 309 309 310 - static int task_storage_map_btf_id; 310 + BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map) 311 311 const struct bpf_map_ops task_storage_map_ops = { 312 312 .map_meta_equal = bpf_map_meta_equal, 313 313 .map_alloc_check = bpf_local_storage_map_alloc_check, ··· 318 318 .map_update_elem = bpf_pid_task_storage_update_elem, 319 319 .map_delete_elem = bpf_pid_task_storage_delete_elem, 320 320 .map_check_btf = bpf_local_storage_map_check_btf, 321 - .map_btf_name = "bpf_local_storage_map", 322 - .map_btf_id = &task_storage_map_btf_id, 321 + .map_btf_id = &task_storage_map_btf_ids[0], 323 322 .map_owner_storage_ptr = task_storage_ptr, 324 323 }; 325 324

+548 -86

kernel/bpf/btf.c

··· 207 207 208 208 enum { 209 209 BTF_KFUNC_SET_MAX_CNT = 32, 210 + BTF_DTOR_KFUNC_MAX_CNT = 256, 210 211 }; 211 212 212 213 struct btf_kfunc_set_tab { 213 214 struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; 215 + }; 216 + 217 + struct btf_id_dtor_kfunc_tab { 218 + u32 cnt; 219 + struct btf_id_dtor_kfunc dtors[]; 214 220 }; 215 221 216 222 struct btf { ··· 234 228 u32 id; 235 229 struct rcu_head rcu; 236 230 struct btf_kfunc_set_tab *kfunc_set_tab; 231 + struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; 237 232 238 233 /* split BTF support */ 239 234 struct btf *base_btf; ··· 1623 1616 btf->kfunc_set_tab = NULL; 1624 1617 } 1625 1618 1619 + static void btf_free_dtor_kfunc_tab(struct btf *btf) 1620 + { 1621 + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; 1622 + 1623 + if (!tab) 1624 + return; 1625 + kfree(tab); 1626 + btf->dtor_kfunc_tab = NULL; 1627 + } 1628 + 1626 1629 static void btf_free(struct btf *btf) 1627 1630 { 1631 + btf_free_dtor_kfunc_tab(btf); 1628 1632 btf_free_kfunc_set_tab(btf); 1629 1633 kvfree(btf->types); 1630 1634 kvfree(btf->resolved_sizes); ··· 3181 3163 btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); 3182 3164 } 3183 3165 3166 + enum btf_field_type { 3167 + BTF_FIELD_SPIN_LOCK, 3168 + BTF_FIELD_TIMER, 3169 + BTF_FIELD_KPTR, 3170 + }; 3171 + 3172 + enum { 3173 + BTF_FIELD_IGNORE = 0, 3174 + BTF_FIELD_FOUND = 1, 3175 + }; 3176 + 3177 + struct btf_field_info { 3178 + u32 type_id; 3179 + u32 off; 3180 + enum bpf_kptr_type type; 3181 + }; 3182 + 3183 + static int btf_find_struct(const struct btf *btf, const struct btf_type *t, 3184 + u32 off, int sz, struct btf_field_info *info) 3185 + { 3186 + if (!__btf_type_is_struct(t)) 3187 + return BTF_FIELD_IGNORE; 3188 + if (t->size != sz) 3189 + return BTF_FIELD_IGNORE; 3190 + info->off = off; 3191 + return BTF_FIELD_FOUND; 3192 + } 3193 + 3194 + static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, 3195 + u32 off, int sz, struct btf_field_info *info) 3196 + { 3197 + enum bpf_kptr_type type; 3198 + u32 res_id; 3199 + 3200 + /* For PTR, sz is always == 8 */ 3201 + if (!btf_type_is_ptr(t)) 3202 + return BTF_FIELD_IGNORE; 3203 + t = btf_type_by_id(btf, t->type); 3204 + 3205 + if (!btf_type_is_type_tag(t)) 3206 + return BTF_FIELD_IGNORE; 3207 + /* Reject extra tags */ 3208 + if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) 3209 + return -EINVAL; 3210 + if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) 3211 + type = BPF_KPTR_UNREF; 3212 + else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off))) 3213 + type = BPF_KPTR_REF; 3214 + else 3215 + return -EINVAL; 3216 + 3217 + /* Get the base type */ 3218 + t = btf_type_skip_modifiers(btf, t->type, &res_id); 3219 + /* Only pointer to struct is allowed */ 3220 + if (!__btf_type_is_struct(t)) 3221 + return -EINVAL; 3222 + 3223 + info->type_id = res_id; 3224 + info->off = off; 3225 + info->type = type; 3226 + return BTF_FIELD_FOUND; 3227 + } 3228 + 3184 3229 static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, 3185 - const char *name, int sz, int align) 3230 + const char *name, int sz, int align, 3231 + enum btf_field_type field_type, 3232 + struct btf_field_info *info, int info_cnt) 3186 3233 { 3187 3234 const struct btf_member *member; 3188 - u32 i, off = -ENOENT; 3235 + struct btf_field_info tmp; 3236 + int ret, idx = 0; 3237 + u32 i, off; 3189 3238 3190 3239 for_each_member(i, t, member) { 3191 3240 const struct btf_type *member_type = btf_type_by_id(btf, 3192 3241 member->type); 3193 - if (!__btf_type_is_struct(member_type)) 3242 + 3243 + if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) 3194 3244 continue; 3195 - if (member_type->size != sz) 3196 - continue; 3197 - if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) 3198 - continue; 3199 - if (off != -ENOENT) 3200 - /* only one such field is allowed */ 3201 - return -E2BIG; 3245 + 3202 3246 off = __btf_member_bit_offset(t, member); 3203 3247 if (off % 8) 3204 3248 /* valid C code cannot generate such BTF */ ··· 3268 3188 off /= 8; 3269 3189 if (off % align) 3270 3190 return -EINVAL; 3191 + 3192 + switch (field_type) { 3193 + case BTF_FIELD_SPIN_LOCK: 3194 + case BTF_FIELD_TIMER: 3195 + ret = btf_find_struct(btf, member_type, off, sz, 3196 + idx < info_cnt ? &info[idx] : &tmp); 3197 + if (ret < 0) 3198 + return ret; 3199 + break; 3200 + case BTF_FIELD_KPTR: 3201 + ret = btf_find_kptr(btf, member_type, off, sz, 3202 + idx < info_cnt ? &info[idx] : &tmp); 3203 + if (ret < 0) 3204 + return ret; 3205 + break; 3206 + default: 3207 + return -EFAULT; 3208 + } 3209 + 3210 + if (ret == BTF_FIELD_IGNORE) 3211 + continue; 3212 + if (idx >= info_cnt) 3213 + return -E2BIG; 3214 + ++idx; 3271 3215 } 3272 - return off; 3216 + return idx; 3273 3217 } 3274 3218 3275 3219 static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, 3276 - const char *name, int sz, int align) 3220 + const char *name, int sz, int align, 3221 + enum btf_field_type field_type, 3222 + struct btf_field_info *info, int info_cnt) 3277 3223 { 3278 3224 const struct btf_var_secinfo *vsi; 3279 - u32 i, off = -ENOENT; 3225 + struct btf_field_info tmp; 3226 + int ret, idx = 0; 3227 + u32 i, off; 3280 3228 3281 3229 for_each_vsi(i, t, vsi) { 3282 3230 const struct btf_type *var = btf_type_by_id(btf, vsi->type); 3283 3231 const struct btf_type *var_type = btf_type_by_id(btf, var->type); 3284 3232 3285 - if (!__btf_type_is_struct(var_type)) 3286 - continue; 3287 - if (var_type->size != sz) 3233 + off = vsi->offset; 3234 + 3235 + if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) 3288 3236 continue; 3289 3237 if (vsi->size != sz) 3290 3238 continue; 3291 - if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) 3292 - continue; 3293 - if (off != -ENOENT) 3294 - /* only one such field is allowed */ 3295 - return -E2BIG; 3296 - off = vsi->offset; 3297 3239 if (off % align) 3298 3240 return -EINVAL; 3241 + 3242 + switch (field_type) { 3243 + case BTF_FIELD_SPIN_LOCK: 3244 + case BTF_FIELD_TIMER: 3245 + ret = btf_find_struct(btf, var_type, off, sz, 3246 + idx < info_cnt ? &info[idx] : &tmp); 3247 + if (ret < 0) 3248 + return ret; 3249 + break; 3250 + case BTF_FIELD_KPTR: 3251 + ret = btf_find_kptr(btf, var_type, off, sz, 3252 + idx < info_cnt ? &info[idx] : &tmp); 3253 + if (ret < 0) 3254 + return ret; 3255 + break; 3256 + default: 3257 + return -EFAULT; 3258 + } 3259 + 3260 + if (ret == BTF_FIELD_IGNORE) 3261 + continue; 3262 + if (idx >= info_cnt) 3263 + return -E2BIG; 3264 + ++idx; 3299 3265 } 3300 - return off; 3266 + return idx; 3301 3267 } 3302 3268 3303 3269 static int btf_find_field(const struct btf *btf, const struct btf_type *t, 3304 - const char *name, int sz, int align) 3270 + enum btf_field_type field_type, 3271 + struct btf_field_info *info, int info_cnt) 3305 3272 { 3273 + const char *name; 3274 + int sz, align; 3275 + 3276 + switch (field_type) { 3277 + case BTF_FIELD_SPIN_LOCK: 3278 + name = "bpf_spin_lock"; 3279 + sz = sizeof(struct bpf_spin_lock); 3280 + align = __alignof__(struct bpf_spin_lock); 3281 + break; 3282 + case BTF_FIELD_TIMER: 3283 + name = "bpf_timer"; 3284 + sz = sizeof(struct bpf_timer); 3285 + align = __alignof__(struct bpf_timer); 3286 + break; 3287 + case BTF_FIELD_KPTR: 3288 + name = NULL; 3289 + sz = sizeof(u64); 3290 + align = 8; 3291 + break; 3292 + default: 3293 + return -EFAULT; 3294 + } 3306 3295 3307 3296 if (__btf_type_is_struct(t)) 3308 - return btf_find_struct_field(btf, t, name, sz, align); 3297 + return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt); 3309 3298 else if (btf_type_is_datasec(t)) 3310 - return btf_find_datasec_var(btf, t, name, sz, align); 3299 + return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt); 3311 3300 return -EINVAL; 3312 3301 } 3313 3302 ··· 3386 3237 */ 3387 3238 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) 3388 3239 { 3389 - return btf_find_field(btf, t, "bpf_spin_lock", 3390 - sizeof(struct bpf_spin_lock), 3391 - __alignof__(struct bpf_spin_lock)); 3240 + struct btf_field_info info; 3241 + int ret; 3242 + 3243 + ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1); 3244 + if (ret < 0) 3245 + return ret; 3246 + if (!ret) 3247 + return -ENOENT; 3248 + return info.off; 3392 3249 } 3393 3250 3394 3251 int btf_find_timer(const struct btf *btf, const struct btf_type *t) 3395 3252 { 3396 - return btf_find_field(btf, t, "bpf_timer", 3397 - sizeof(struct bpf_timer), 3398 - __alignof__(struct bpf_timer)); 3253 + struct btf_field_info info; 3254 + int ret; 3255 + 3256 + ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1); 3257 + if (ret < 0) 3258 + return ret; 3259 + if (!ret) 3260 + return -ENOENT; 3261 + return info.off; 3262 + } 3263 + 3264 + struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, 3265 + const struct btf_type *t) 3266 + { 3267 + struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX]; 3268 + struct bpf_map_value_off *tab; 3269 + struct btf *kernel_btf = NULL; 3270 + struct module *mod = NULL; 3271 + int ret, i, nr_off; 3272 + 3273 + ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr)); 3274 + if (ret < 0) 3275 + return ERR_PTR(ret); 3276 + if (!ret) 3277 + return NULL; 3278 + 3279 + nr_off = ret; 3280 + tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN); 3281 + if (!tab) 3282 + return ERR_PTR(-ENOMEM); 3283 + 3284 + for (i = 0; i < nr_off; i++) { 3285 + const struct btf_type *t; 3286 + s32 id; 3287 + 3288 + /* Find type in map BTF, and use it to look up the matching type 3289 + * in vmlinux or module BTFs, by name and kind. 3290 + */ 3291 + t = btf_type_by_id(btf, info_arr[i].type_id); 3292 + id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), 3293 + &kernel_btf); 3294 + if (id < 0) { 3295 + ret = id; 3296 + goto end; 3297 + } 3298 + 3299 + /* Find and stash the function pointer for the destruction function that 3300 + * needs to be eventually invoked from the map free path. 3301 + */ 3302 + if (info_arr[i].type == BPF_KPTR_REF) { 3303 + const struct btf_type *dtor_func; 3304 + const char *dtor_func_name; 3305 + unsigned long addr; 3306 + s32 dtor_btf_id; 3307 + 3308 + /* This call also serves as a whitelist of allowed objects that 3309 + * can be used as a referenced pointer and be stored in a map at 3310 + * the same time. 3311 + */ 3312 + dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); 3313 + if (dtor_btf_id < 0) { 3314 + ret = dtor_btf_id; 3315 + goto end_btf; 3316 + } 3317 + 3318 + dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); 3319 + if (!dtor_func) { 3320 + ret = -ENOENT; 3321 + goto end_btf; 3322 + } 3323 + 3324 + if (btf_is_module(kernel_btf)) { 3325 + mod = btf_try_get_module(kernel_btf); 3326 + if (!mod) { 3327 + ret = -ENXIO; 3328 + goto end_btf; 3329 + } 3330 + } 3331 + 3332 + /* We already verified dtor_func to be btf_type_is_func 3333 + * in register_btf_id_dtor_kfuncs. 3334 + */ 3335 + dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); 3336 + addr = kallsyms_lookup_name(dtor_func_name); 3337 + if (!addr) { 3338 + ret = -EINVAL; 3339 + goto end_mod; 3340 + } 3341 + tab->off[i].kptr.dtor = (void *)addr; 3342 + } 3343 + 3344 + tab->off[i].offset = info_arr[i].off; 3345 + tab->off[i].type = info_arr[i].type; 3346 + tab->off[i].kptr.btf_id = id; 3347 + tab->off[i].kptr.btf = kernel_btf; 3348 + tab->off[i].kptr.module = mod; 3349 + } 3350 + tab->nr_off = nr_off; 3351 + return tab; 3352 + end_mod: 3353 + module_put(mod); 3354 + end_btf: 3355 + btf_put(kernel_btf); 3356 + end: 3357 + while (i--) { 3358 + btf_put(tab->off[i].kptr.btf); 3359 + if (tab->off[i].kptr.module) 3360 + module_put(tab->off[i].kptr.module); 3361 + } 3362 + kfree(tab); 3363 + return ERR_PTR(ret); 3399 3364 } 3400 3365 3401 3366 static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, ··· 4804 4541 return 0; 4805 4542 } 4806 4543 4544 + static int btf_check_type_tags(struct btf_verifier_env *env, 4545 + struct btf *btf, int start_id) 4546 + { 4547 + int i, n, good_id = start_id - 1; 4548 + bool in_tags; 4549 + 4550 + n = btf_nr_types(btf); 4551 + for (i = start_id; i < n; i++) { 4552 + const struct btf_type *t; 4553 + u32 cur_id = i; 4554 + 4555 + t = btf_type_by_id(btf, i); 4556 + if (!t) 4557 + return -EINVAL; 4558 + if (!btf_type_is_modifier(t)) 4559 + continue; 4560 + 4561 + cond_resched(); 4562 + 4563 + in_tags = btf_type_is_type_tag(t); 4564 + while (btf_type_is_modifier(t)) { 4565 + if (btf_type_is_type_tag(t)) { 4566 + if (!in_tags) { 4567 + btf_verifier_log(env, "Type tags don't precede modifiers"); 4568 + return -EINVAL; 4569 + } 4570 + } else if (in_tags) { 4571 + in_tags = false; 4572 + } 4573 + if (cur_id <= good_id) 4574 + break; 4575 + /* Move to next type */ 4576 + cur_id = t->type; 4577 + t = btf_type_by_id(btf, cur_id); 4578 + if (!t) 4579 + return -EINVAL; 4580 + } 4581 + good_id = i; 4582 + } 4583 + return 0; 4584 + } 4585 + 4807 4586 static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, 4808 4587 u32 log_level, char __user *log_ubuf, u32 log_size) 4809 4588 { ··· 4910 4605 goto errout; 4911 4606 4912 4607 err = btf_parse_type_sec(env); 4608 + if (err) 4609 + goto errout; 4610 + 4611 + err = btf_check_type_tags(env, btf, 1); 4913 4612 if (err) 4914 4613 goto errout; 4915 4614 ··· 5025 4716 return ctx_type; 5026 4717 } 5027 4718 5028 - static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = { 5029 - #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 5030 - #define BPF_LINK_TYPE(_id, _name) 5031 - #define BPF_MAP_TYPE(_id, _ops) \ 5032 - [_id] = &_ops, 5033 - #include <linux/bpf_types.h> 5034 - #undef BPF_PROG_TYPE 5035 - #undef BPF_LINK_TYPE 5036 - #undef BPF_MAP_TYPE 5037 - }; 5038 - 5039 - static int btf_vmlinux_map_ids_init(const struct btf *btf, 5040 - struct bpf_verifier_log *log) 5041 - { 5042 - const struct bpf_map_ops *ops; 5043 - int i, btf_id; 5044 - 5045 - for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) { 5046 - ops = btf_vmlinux_map_ops[i]; 5047 - if (!ops || (!ops->map_btf_name && !ops->map_btf_id)) 5048 - continue; 5049 - if (!ops->map_btf_name || !ops->map_btf_id) { 5050 - bpf_log(log, "map type %d is misconfigured\n", i); 5051 - return -EINVAL; 5052 - } 5053 - btf_id = btf_find_by_name_kind(btf, ops->map_btf_name, 5054 - BTF_KIND_STRUCT); 5055 - if (btf_id < 0) 5056 - return btf_id; 5057 - *ops->map_btf_id = btf_id; 5058 - } 5059 - 5060 - return 0; 5061 - } 5062 - 5063 4719 static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, 5064 4720 struct btf *btf, 5065 4721 const struct btf_type *t, ··· 5083 4809 if (err) 5084 4810 goto errout; 5085 4811 4812 + err = btf_check_type_tags(env, btf, 1); 4813 + if (err) 4814 + goto errout; 4815 + 5086 4816 /* btf_parse_vmlinux() runs under bpf_verifier_lock */ 5087 4817 bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); 5088 - 5089 - /* find bpf map structs for map_ptr access checking */ 5090 - err = btf_vmlinux_map_ids_init(btf, log); 5091 - if (err < 0) 5092 - goto errout; 5093 4818 5094 4819 bpf_struct_ops_init(btf, log); 5095 4820 ··· 5164 4891 goto errout; 5165 4892 5166 4893 err = btf_check_all_metas(env); 4894 + if (err) 4895 + goto errout; 4896 + 4897 + err = btf_check_type_tags(env, btf, btf_nr_types(base_btf)); 5167 4898 if (err) 5168 4899 goto errout; 5169 4900 ··· 5706 5429 5707 5430 bool btf_struct_ids_match(struct bpf_verifier_log *log, 5708 5431 const struct btf *btf, u32 id, int off, 5709 - const struct btf *need_btf, u32 need_type_id) 5432 + const struct btf *need_btf, u32 need_type_id, 5433 + bool strict) 5710 5434 { 5711 5435 const struct btf_type *type; 5712 5436 enum bpf_type_flag flag; ··· 5716 5438 /* Are we already done? */ 5717 5439 if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id)) 5718 5440 return true; 5719 - 5441 + /* In case of strict type match, we do not walk struct, the top level 5442 + * type match must succeed. When strict is true, off should have already 5443 + * been 0. 5444 + */ 5445 + if (strict) 5446 + return false; 5720 5447 again: 5721 5448 type = btf_type_by_id(btf, id); 5722 5449 if (!type) ··· 6055 5772 struct bpf_verifier_log *log = &env->log; 6056 5773 u32 i, nargs, ref_id, ref_obj_id = 0; 6057 5774 bool is_kfunc = btf_is_kernel(btf); 5775 + bool rel = false, kptr_get = false; 6058 5776 const char *func_name, *ref_tname; 6059 5777 const struct btf_type *t, *ref_t; 6060 5778 const struct btf_param *args; 6061 5779 int ref_regno = 0, ret; 6062 - bool rel = false; 6063 5780 6064 5781 t = btf_type_by_id(btf, func_id); 6065 5782 if (!t || !btf_type_is_func(t)) { ··· 6085 5802 return -EINVAL; 6086 5803 } 6087 5804 6088 - /* Only kfunc can be release func */ 6089 - if (is_kfunc) 5805 + if (is_kfunc) { 5806 + /* Only kfunc can be release func */ 6090 5807 rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), 6091 5808 BTF_KFUNC_TYPE_RELEASE, func_id); 5809 + kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), 5810 + BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); 5811 + } 5812 + 6092 5813 /* check that BTF function arguments match actual types that the 6093 5814 * verifier sees. 6094 5815 */ 6095 5816 for (i = 0; i < nargs; i++) { 5817 + enum bpf_arg_type arg_type = ARG_DONTCARE; 6096 5818 u32 regno = i + 1; 6097 5819 struct bpf_reg_state *reg = &regs[regno]; 6098 5820 ··· 6118 5830 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); 6119 5831 ref_tname = btf_name_by_offset(btf, ref_t->name_off); 6120 5832 6121 - ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE, rel); 5833 + if (rel && reg->ref_obj_id) 5834 + arg_type |= OBJ_RELEASE; 5835 + ret = check_func_arg_reg_off(env, reg, regno, arg_type); 6122 5836 if (ret < 0) 6123 5837 return ret; 6124 5838 6125 - if (btf_get_prog_ctx_type(log, btf, t, 6126 - env->prog->type, i)) { 5839 + /* kptr_get is only true for kfunc */ 5840 + if (i == 0 && kptr_get) { 5841 + struct bpf_map_value_off_desc *off_desc; 5842 + 5843 + if (reg->type != PTR_TO_MAP_VALUE) { 5844 + bpf_log(log, "arg#0 expected pointer to map value\n"); 5845 + return -EINVAL; 5846 + } 5847 + 5848 + /* check_func_arg_reg_off allows var_off for 5849 + * PTR_TO_MAP_VALUE, but we need fixed offset to find 5850 + * off_desc. 5851 + */ 5852 + if (!tnum_is_const(reg->var_off)) { 5853 + bpf_log(log, "arg#0 must have constant offset\n"); 5854 + return -EINVAL; 5855 + } 5856 + 5857 + off_desc = bpf_map_kptr_off_contains(reg->map_ptr, reg->off + reg->var_off.value); 5858 + if (!off_desc || off_desc->type != BPF_KPTR_REF) { 5859 + bpf_log(log, "arg#0 no referenced kptr at map value offset=%llu\n", 5860 + reg->off + reg->var_off.value); 5861 + return -EINVAL; 5862 + } 5863 + 5864 + if (!btf_type_is_ptr(ref_t)) { 5865 + bpf_log(log, "arg#0 BTF type must be a double pointer\n"); 5866 + return -EINVAL; 5867 + } 5868 + 5869 + ref_t = btf_type_skip_modifiers(btf, ref_t->type, &ref_id); 5870 + ref_tname = btf_name_by_offset(btf, ref_t->name_off); 5871 + 5872 + if (!btf_type_is_struct(ref_t)) { 5873 + bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", 5874 + func_name, i, btf_type_str(ref_t), ref_tname); 5875 + return -EINVAL; 5876 + } 5877 + if (!btf_struct_ids_match(log, btf, ref_id, 0, off_desc->kptr.btf, 5878 + off_desc->kptr.btf_id, true)) { 5879 + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s\n", 5880 + func_name, i, btf_type_str(ref_t), ref_tname); 5881 + return -EINVAL; 5882 + } 5883 + /* rest of the arguments can be anything, like normal kfunc */ 5884 + } else if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) { 6127 5885 /* If function expects ctx type in BTF check that caller 6128 5886 * is passing PTR_TO_CTX. 6129 5887 */ ··· 6196 5862 if (reg->type == PTR_TO_BTF_ID) { 6197 5863 reg_btf = reg->btf; 6198 5864 reg_ref_id = reg->btf_id; 6199 - /* Ensure only one argument is referenced 6200 - * PTR_TO_BTF_ID, check_func_arg_reg_off relies 6201 - * on only one referenced register being allowed 6202 - * for kfuncs. 6203 - */ 5865 + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ 6204 5866 if (reg->ref_obj_id) { 6205 5867 if (ref_obj_id) { 6206 5868 bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", ··· 6216 5886 reg_ref_tname = btf_name_by_offset(reg_btf, 6217 5887 reg_ref_t->name_off); 6218 5888 if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, 6219 - reg->off, btf, ref_id)) { 5889 + reg->off, btf, ref_id, rel && reg->ref_obj_id)) { 6220 5890 bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", 6221 5891 func_name, i, 6222 5892 btf_type_str(ref_t), ref_tname, ··· 7161 6831 return ret; 7162 6832 } 7163 6833 EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); 6834 + 6835 + s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) 6836 + { 6837 + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; 6838 + struct btf_id_dtor_kfunc *dtor; 6839 + 6840 + if (!tab) 6841 + return -ENOENT; 6842 + /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need 6843 + * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func. 6844 + */ 6845 + BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0); 6846 + dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func); 6847 + if (!dtor) 6848 + return -ENOENT; 6849 + return dtor->kfunc_btf_id; 6850 + } 6851 + 6852 + static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt) 6853 + { 6854 + const struct btf_type *dtor_func, *dtor_func_proto, *t; 6855 + const struct btf_param *args; 6856 + s32 dtor_btf_id; 6857 + u32 nr_args, i; 6858 + 6859 + for (i = 0; i < cnt; i++) { 6860 + dtor_btf_id = dtors[i].kfunc_btf_id; 6861 + 6862 + dtor_func = btf_type_by_id(btf, dtor_btf_id); 6863 + if (!dtor_func || !btf_type_is_func(dtor_func)) 6864 + return -EINVAL; 6865 + 6866 + dtor_func_proto = btf_type_by_id(btf, dtor_func->type); 6867 + if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto)) 6868 + return -EINVAL; 6869 + 6870 + /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */ 6871 + t = btf_type_by_id(btf, dtor_func_proto->type); 6872 + if (!t || !btf_type_is_void(t)) 6873 + return -EINVAL; 6874 + 6875 + nr_args = btf_type_vlen(dtor_func_proto); 6876 + if (nr_args != 1) 6877 + return -EINVAL; 6878 + args = btf_params(dtor_func_proto); 6879 + t = btf_type_by_id(btf, args[0].type); 6880 + /* Allow any pointer type, as width on targets Linux supports 6881 + * will be same for all pointer types (i.e. sizeof(void *)) 6882 + */ 6883 + if (!t || !btf_type_is_ptr(t)) 6884 + return -EINVAL; 6885 + } 6886 + return 0; 6887 + } 6888 + 6889 + /* This function must be invoked only from initcalls/module init functions */ 6890 + int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, 6891 + struct module *owner) 6892 + { 6893 + struct btf_id_dtor_kfunc_tab *tab; 6894 + struct btf *btf; 6895 + u32 tab_cnt; 6896 + int ret; 6897 + 6898 + btf = btf_get_module_btf(owner); 6899 + if (!btf) { 6900 + if (!owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 6901 + pr_err("missing vmlinux BTF, cannot register dtor kfuncs\n"); 6902 + return -ENOENT; 6903 + } 6904 + if (owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) { 6905 + pr_err("missing module BTF, cannot register dtor kfuncs\n"); 6906 + return -ENOENT; 6907 + } 6908 + return 0; 6909 + } 6910 + if (IS_ERR(btf)) 6911 + return PTR_ERR(btf); 6912 + 6913 + if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { 6914 + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); 6915 + ret = -E2BIG; 6916 + goto end; 6917 + } 6918 + 6919 + /* Ensure that the prototype of dtor kfuncs being registered is sane */ 6920 + ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt); 6921 + if (ret < 0) 6922 + goto end; 6923 + 6924 + tab = btf->dtor_kfunc_tab; 6925 + /* Only one call allowed for modules */ 6926 + if (WARN_ON_ONCE(tab && btf_is_module(btf))) { 6927 + ret = -EINVAL; 6928 + goto end; 6929 + } 6930 + 6931 + tab_cnt = tab ? tab->cnt : 0; 6932 + if (tab_cnt > U32_MAX - add_cnt) { 6933 + ret = -EOVERFLOW; 6934 + goto end; 6935 + } 6936 + if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { 6937 + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); 6938 + ret = -E2BIG; 6939 + goto end; 6940 + } 6941 + 6942 + tab = krealloc(btf->dtor_kfunc_tab, 6943 + offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]), 6944 + GFP_KERNEL | __GFP_NOWARN); 6945 + if (!tab) { 6946 + ret = -ENOMEM; 6947 + goto end; 6948 + } 6949 + 6950 + if (!btf->dtor_kfunc_tab) 6951 + tab->cnt = 0; 6952 + btf->dtor_kfunc_tab = tab; 6953 + 6954 + memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); 6955 + tab->cnt += add_cnt; 6956 + 6957 + sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); 6958 + 6959 + return 0; 6960 + end: 6961 + btf_free_dtor_kfunc_tab(btf); 6962 + btf_put(btf); 6963 + return ret; 6964 + } 6965 + EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs); 7164 6966 7165 6967 #define MAX_TYPES_ARE_COMPAT_DEPTH 2 7166 6968

+86 -20

kernel/bpf/cgroup.c

··· 22 22 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); 23 23 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 24 24 25 + /* __always_inline is necessary to prevent indirect call through run_prog 26 + * function pointer. 27 + */ 28 + static __always_inline int 29 + bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, 30 + enum cgroup_bpf_attach_type atype, 31 + const void *ctx, bpf_prog_run_fn run_prog, 32 + int retval, u32 *ret_flags) 33 + { 34 + const struct bpf_prog_array_item *item; 35 + const struct bpf_prog *prog; 36 + const struct bpf_prog_array *array; 37 + struct bpf_run_ctx *old_run_ctx; 38 + struct bpf_cg_run_ctx run_ctx; 39 + u32 func_ret; 40 + 41 + run_ctx.retval = retval; 42 + migrate_disable(); 43 + rcu_read_lock(); 44 + array = rcu_dereference(cgrp->effective[atype]); 45 + item = &array->items[0]; 46 + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 47 + while ((prog = READ_ONCE(item->prog))) { 48 + run_ctx.prog_item = item; 49 + func_ret = run_prog(prog, ctx); 50 + if (ret_flags) { 51 + *(ret_flags) |= (func_ret >> 1); 52 + func_ret &= 1; 53 + } 54 + if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval)) 55 + run_ctx.retval = -EPERM; 56 + item++; 57 + } 58 + bpf_reset_run_ctx(old_run_ctx); 59 + rcu_read_unlock(); 60 + migrate_enable(); 61 + return run_ctx.retval; 62 + } 63 + 25 64 void cgroup_bpf_offline(struct cgroup *cgrp) 26 65 { 27 66 cgroup_get(cgrp); ··· 1114 1075 bpf_compute_and_save_data_end(skb, &saved_data_end); 1115 1076 1116 1077 if (atype == CGROUP_INET_EGRESS) { 1117 - ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( 1118 - cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); 1078 + u32 flags = 0; 1079 + bool cn; 1080 + 1081 + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb, 1082 + __bpf_prog_run_save_cb, 0, &flags); 1083 + 1084 + /* Return values of CGROUP EGRESS BPF programs are: 1085 + * 0: drop packet 1086 + * 1: keep packet 1087 + * 2: drop packet and cn 1088 + * 3: keep packet and cn 1089 + * 1090 + * The returned value is then converted to one of the NET_XMIT 1091 + * or an error code that is then interpreted as drop packet 1092 + * (and no cn): 1093 + * 0: NET_XMIT_SUCCESS skb should be transmitted 1094 + * 1: NET_XMIT_DROP skb should be dropped and cn 1095 + * 2: NET_XMIT_CN skb should be transmitted and cn 1096 + * 3: -err skb should be dropped 1097 + */ 1098 + 1099 + cn = flags & BPF_RET_SET_CN; 1100 + if (ret && !IS_ERR_VALUE((long)ret)) 1101 + ret = -EFAULT; 1102 + if (!ret) 1103 + ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); 1104 + else 1105 + ret = (cn ? NET_XMIT_DROP : ret); 1119 1106 } else { 1120 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, 1121 - __bpf_prog_run_save_cb, 0); 1107 + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, 1108 + skb, __bpf_prog_run_save_cb, 0, 1109 + NULL); 1122 1110 if (ret && !IS_ERR_VALUE((long)ret)) 1123 1111 ret = -EFAULT; 1124 1112 } ··· 1175 1109 { 1176 1110 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1177 1111 1178 - return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, 1179 - bpf_prog_run, 0); 1112 + return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0, 1113 + NULL); 1180 1114 } 1181 1115 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 1182 1116 ··· 1221 1155 } 1222 1156 1223 1157 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1224 - return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, 1225 - bpf_prog_run, 0, flags); 1158 + return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 1159 + 0, flags); 1226 1160 } 1227 1161 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 1228 1162 ··· 1248 1182 { 1249 1183 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1250 1184 1251 - return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, 1252 - bpf_prog_run, 0); 1185 + return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run, 1186 + 0, NULL); 1253 1187 } 1254 1188 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 1255 1189 ··· 1266 1200 1267 1201 rcu_read_lock(); 1268 1202 cgrp = task_dfl_cgroup(current); 1269 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, 1270 - bpf_prog_run, 0); 1203 + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1204 + NULL); 1271 1205 rcu_read_unlock(); 1272 1206 1273 1207 return ret; ··· 1432 1366 1433 1367 rcu_read_lock(); 1434 1368 cgrp = task_dfl_cgroup(current); 1435 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, 1436 - bpf_prog_run, 0); 1369 + ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1370 + NULL); 1437 1371 rcu_read_unlock(); 1438 1372 1439 1373 kfree(ctx.cur_val); ··· 1525 1459 } 1526 1460 1527 1461 lock_sock(sk); 1528 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], 1529 - &ctx, bpf_prog_run, 0); 1462 + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT, 1463 + &ctx, bpf_prog_run, 0, NULL); 1530 1464 release_sock(sk); 1531 1465 1532 1466 if (ret) ··· 1625 1559 } 1626 1560 1627 1561 lock_sock(sk); 1628 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], 1629 - &ctx, bpf_prog_run, retval); 1562 + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 1563 + &ctx, bpf_prog_run, retval, NULL); 1630 1564 release_sock(sk); 1631 1565 1632 1566 if (ret < 0) ··· 1674 1608 * be called if that data shouldn't be "exported". 1675 1609 */ 1676 1610 1677 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], 1678 - &ctx, bpf_prog_run, retval); 1611 + ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 1612 + &ctx, bpf_prog_run, retval, NULL); 1679 1613 if (ret < 0) 1680 1614 return ret; 1681 1615

+3 -3

kernel/bpf/cpumap.c

··· 27 27 #include <linux/kthread.h> 28 28 #include <linux/capability.h> 29 29 #include <trace/events/xdp.h> 30 + #include <linux/btf_ids.h> 30 31 31 32 #include <linux/netdevice.h> /* netif_receive_skb_list */ 32 33 #include <linux/etherdevice.h> /* eth_type_trans */ ··· 674 673 __cpu_map_lookup_elem); 675 674 } 676 675 677 - static int cpu_map_btf_id; 676 + BTF_ID_LIST_SINGLE(cpu_map_btf_ids, struct, bpf_cpu_map) 678 677 const struct bpf_map_ops cpu_map_ops = { 679 678 .map_meta_equal = bpf_map_meta_equal, 680 679 .map_alloc = cpu_map_alloc, ··· 684 683 .map_lookup_elem = cpu_map_lookup_elem, 685 684 .map_get_next_key = cpu_map_get_next_key, 686 685 .map_check_btf = map_check_no_btf, 687 - .map_btf_name = "bpf_cpu_map", 688 - .map_btf_id = &cpu_map_btf_id, 686 + .map_btf_id = &cpu_map_btf_ids[0], 689 687 .map_redirect = cpu_map_redirect, 690 688 }; 691 689

+4 -6

kernel/bpf/devmap.c

··· 48 48 #include <net/xdp.h> 49 49 #include <linux/filter.h> 50 50 #include <trace/events/xdp.h> 51 + #include <linux/btf_ids.h> 51 52 52 53 #define DEV_CREATE_FLAG_MASK \ 53 54 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) ··· 1006 1005 __dev_map_hash_lookup_elem); 1007 1006 } 1008 1007 1009 - static int dev_map_btf_id; 1008 + BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab) 1010 1009 const struct bpf_map_ops dev_map_ops = { 1011 1010 .map_meta_equal = bpf_map_meta_equal, 1012 1011 .map_alloc = dev_map_alloc, ··· 1016 1015 .map_update_elem = dev_map_update_elem, 1017 1016 .map_delete_elem = dev_map_delete_elem, 1018 1017 .map_check_btf = map_check_no_btf, 1019 - .map_btf_name = "bpf_dtab", 1020 - .map_btf_id = &dev_map_btf_id, 1018 + .map_btf_id = &dev_map_btf_ids[0], 1021 1019 .map_redirect = dev_map_redirect, 1022 1020 }; 1023 1021 1024 - static int dev_map_hash_map_btf_id; 1025 1022 const struct bpf_map_ops dev_map_hash_ops = { 1026 1023 .map_meta_equal = bpf_map_meta_equal, 1027 1024 .map_alloc = dev_map_alloc, ··· 1029 1030 .map_update_elem = dev_map_hash_update_elem, 1030 1031 .map_delete_elem = dev_map_hash_delete_elem, 1031 1032 .map_check_btf = map_check_no_btf, 1032 - .map_btf_name = "bpf_dtab", 1033 - .map_btf_id = &dev_map_hash_map_btf_id, 1033 + .map_btf_id = &dev_map_btf_ids[0], 1034 1034 .map_redirect = dev_hash_map_redirect, 1035 1035 }; 1036 1036

+56 -32

kernel/bpf/hashtab.c

··· 10 10 #include <linux/random.h> 11 11 #include <uapi/linux/btf.h> 12 12 #include <linux/rcupdate_trace.h> 13 + #include <linux/btf_ids.h> 13 14 #include "percpu_freelist.h" 14 15 #include "bpf_lru_list.h" 15 16 #include "map_in_map.h" ··· 239 238 u32 num_entries = htab->map.max_entries; 240 239 int i; 241 240 242 - if (likely(!map_value_has_timer(&htab->map))) 241 + if (!map_value_has_timer(&htab->map)) 243 242 return; 244 243 if (htab_has_extra_elems(htab)) 245 244 num_entries += num_possible_cpus(); ··· 251 250 bpf_timer_cancel_and_free(elem->key + 252 251 round_up(htab->map.key_size, 8) + 253 252 htab->map.timer_off); 253 + cond_resched(); 254 + } 255 + } 256 + 257 + static void htab_free_prealloced_kptrs(struct bpf_htab *htab) 258 + { 259 + u32 num_entries = htab->map.max_entries; 260 + int i; 261 + 262 + if (!map_value_has_kptrs(&htab->map)) 263 + return; 264 + if (htab_has_extra_elems(htab)) 265 + num_entries += num_possible_cpus(); 266 + 267 + for (i = 0; i < num_entries; i++) { 268 + struct htab_elem *elem; 269 + 270 + elem = get_htab_elem(htab, i); 271 + bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8)); 254 272 cond_resched(); 255 273 } 256 274 } ··· 745 725 return insn - insn_buf; 746 726 } 747 727 748 - static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem) 728 + static void check_and_free_fields(struct bpf_htab *htab, 729 + struct htab_elem *elem) 749 730 { 750 - if (unlikely(map_value_has_timer(&htab->map))) 751 - bpf_timer_cancel_and_free(elem->key + 752 - round_up(htab->map.key_size, 8) + 753 - htab->map.timer_off); 731 + void *map_value = elem->key + round_up(htab->map.key_size, 8); 732 + 733 + if (map_value_has_timer(&htab->map)) 734 + bpf_timer_cancel_and_free(map_value + htab->map.timer_off); 735 + if (map_value_has_kptrs(&htab->map)) 736 + bpf_map_free_kptrs(&htab->map, map_value); 754 737 } 755 738 756 739 /* It is called from the bpf_lru_list when the LRU needs to delete ··· 761 738 */ 762 739 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 763 740 { 764 - struct bpf_htab *htab = (struct bpf_htab *)arg; 741 + struct bpf_htab *htab = arg; 765 742 struct htab_elem *l = NULL, *tgt_l; 766 743 struct hlist_nulls_head *head; 767 744 struct hlist_nulls_node *n; ··· 780 757 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 781 758 if (l == tgt_l) { 782 759 hlist_nulls_del_rcu(&l->hash_node); 783 - check_and_free_timer(htab, l); 760 + check_and_free_fields(htab, l); 784 761 break; 785 762 } 786 763 ··· 852 829 { 853 830 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 854 831 free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); 855 - check_and_free_timer(htab, l); 832 + check_and_free_fields(htab, l); 856 833 kfree(l); 857 834 } 858 835 ··· 880 857 htab_put_fd_value(htab, l); 881 858 882 859 if (htab_is_prealloc(htab)) { 883 - check_and_free_timer(htab, l); 860 + check_and_free_fields(htab, l); 884 861 __pcpu_freelist_push(&htab->freelist, &l->fnode); 885 862 } else { 886 863 atomic_dec(&htab->count); ··· 1127 1104 if (!htab_is_prealloc(htab)) 1128 1105 free_htab_elem(htab, l_old); 1129 1106 else 1130 - check_and_free_timer(htab, l_old); 1107 + check_and_free_fields(htab, l_old); 1131 1108 } 1132 1109 ret = 0; 1133 1110 err: ··· 1137 1114 1138 1115 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1139 1116 { 1140 - check_and_free_timer(htab, elem); 1117 + check_and_free_fields(htab, elem); 1141 1118 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1142 1119 } 1143 1120 ··· 1442 1419 struct hlist_nulls_node *n; 1443 1420 struct htab_elem *l; 1444 1421 1445 - hlist_nulls_for_each_entry(l, n, head, hash_node) 1446 - check_and_free_timer(htab, l); 1422 + hlist_nulls_for_each_entry(l, n, head, hash_node) { 1423 + /* We don't reset or free kptr on uref dropping to zero, 1424 + * hence just free timer. 1425 + */ 1426 + bpf_timer_cancel_and_free(l->key + 1427 + round_up(htab->map.key_size, 8) + 1428 + htab->map.timer_off); 1429 + } 1447 1430 cond_resched_rcu(); 1448 1431 } 1449 1432 rcu_read_unlock(); ··· 1459 1430 { 1460 1431 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1461 1432 1462 - if (likely(!map_value_has_timer(&htab->map))) 1433 + /* We don't reset or free kptr on uref dropping to zero. */ 1434 + if (!map_value_has_timer(&htab->map)) 1463 1435 return; 1464 1436 if (!htab_is_prealloc(htab)) 1465 1437 htab_free_malloced_timers(htab); ··· 1483 1453 * not have executed. Wait for them. 1484 1454 */ 1485 1455 rcu_barrier(); 1486 - if (!htab_is_prealloc(htab)) 1456 + if (!htab_is_prealloc(htab)) { 1487 1457 delete_all_elements(htab); 1488 - else 1458 + } else { 1459 + htab_free_prealloced_kptrs(htab); 1489 1460 prealloc_destroy(htab); 1461 + } 1490 1462 1463 + bpf_map_free_kptr_off_tab(map); 1491 1464 free_percpu(htab->extra_elems); 1492 1465 bpf_map_area_free(htab->buckets); 1493 1466 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) ··· 2138 2105 return num_elems; 2139 2106 } 2140 2107 2141 - static int htab_map_btf_id; 2108 + BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2142 2109 const struct bpf_map_ops htab_map_ops = { 2143 2110 .map_meta_equal = bpf_map_meta_equal, 2144 2111 .map_alloc_check = htab_map_alloc_check, ··· 2155 2122 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2156 2123 .map_for_each_callback = bpf_for_each_hash_elem, 2157 2124 BATCH_OPS(htab), 2158 - .map_btf_name = "bpf_htab", 2159 - .map_btf_id = &htab_map_btf_id, 2125 + .map_btf_id = &htab_map_btf_ids[0], 2160 2126 .iter_seq_info = &iter_seq_info, 2161 2127 }; 2162 2128 2163 - static int htab_lru_map_btf_id; 2164 2129 const struct bpf_map_ops htab_lru_map_ops = { 2165 2130 .map_meta_equal = bpf_map_meta_equal, 2166 2131 .map_alloc_check = htab_map_alloc_check, ··· 2176 2145 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2177 2146 .map_for_each_callback = bpf_for_each_hash_elem, 2178 2147 BATCH_OPS(htab_lru), 2179 - .map_btf_name = "bpf_htab", 2180 - .map_btf_id = &htab_lru_map_btf_id, 2148 + .map_btf_id = &htab_map_btf_ids[0], 2181 2149 .iter_seq_info = &iter_seq_info, 2182 2150 }; 2183 2151 ··· 2282 2252 rcu_read_unlock(); 2283 2253 } 2284 2254 2285 - static int htab_percpu_map_btf_id; 2286 2255 const struct bpf_map_ops htab_percpu_map_ops = { 2287 2256 .map_meta_equal = bpf_map_meta_equal, 2288 2257 .map_alloc_check = htab_map_alloc_check, ··· 2296 2267 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2297 2268 .map_for_each_callback = bpf_for_each_hash_elem, 2298 2269 BATCH_OPS(htab_percpu), 2299 - .map_btf_name = "bpf_htab", 2300 - .map_btf_id = &htab_percpu_map_btf_id, 2270 + .map_btf_id = &htab_map_btf_ids[0], 2301 2271 .iter_seq_info = &iter_seq_info, 2302 2272 }; 2303 2273 2304 - static int htab_lru_percpu_map_btf_id; 2305 2274 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2306 2275 .map_meta_equal = bpf_map_meta_equal, 2307 2276 .map_alloc_check = htab_map_alloc_check, ··· 2314 2287 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2315 2288 .map_for_each_callback = bpf_for_each_hash_elem, 2316 2289 BATCH_OPS(htab_lru_percpu), 2317 - .map_btf_name = "bpf_htab", 2318 - .map_btf_id = &htab_lru_percpu_map_btf_id, 2290 + .map_btf_id = &htab_map_btf_ids[0], 2319 2291 .iter_seq_info = &iter_seq_info, 2320 2292 }; 2321 2293 ··· 2438 2412 fd_htab_map_free(map); 2439 2413 } 2440 2414 2441 - static int htab_of_maps_map_btf_id; 2442 2415 const struct bpf_map_ops htab_of_maps_map_ops = { 2443 2416 .map_alloc_check = fd_htab_map_alloc_check, 2444 2417 .map_alloc = htab_of_map_alloc, ··· 2450 2425 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2451 2426 .map_gen_lookup = htab_of_map_gen_lookup, 2452 2427 .map_check_btf = map_check_no_btf, 2453 - .map_btf_name = "bpf_htab", 2454 - .map_btf_id = &htab_of_maps_map_btf_id, 2428 + .map_btf_id = &htab_map_btf_ids[0], 2455 2429 };

+24

kernel/bpf/helpers.c

··· 1374 1374 kfree(t); 1375 1375 } 1376 1376 1377 + BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) 1378 + { 1379 + unsigned long *kptr = map_value; 1380 + 1381 + return xchg(kptr, (unsigned long)ptr); 1382 + } 1383 + 1384 + /* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() 1385 + * helper is determined dynamically by the verifier. 1386 + */ 1387 + #define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) 1388 + 1389 + const struct bpf_func_proto bpf_kptr_xchg_proto = { 1390 + .func = bpf_kptr_xchg, 1391 + .gpl_only = false, 1392 + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, 1393 + .ret_btf_id = BPF_PTR_POISON, 1394 + .arg1_type = ARG_PTR_TO_KPTR, 1395 + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, 1396 + .arg2_btf_id = BPF_PTR_POISON, 1397 + }; 1398 + 1377 1399 const struct bpf_func_proto bpf_get_current_task_proto __weak; 1378 1400 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; 1379 1401 const struct bpf_func_proto bpf_probe_read_user_proto __weak; ··· 1474 1452 return &bpf_timer_start_proto; 1475 1453 case BPF_FUNC_timer_cancel: 1476 1454 return &bpf_timer_cancel_proto; 1455 + case BPF_FUNC_kptr_xchg: 1456 + return &bpf_kptr_xchg_proto; 1477 1457 default: 1478 1458 break; 1479 1459 }

+4 -3

kernel/bpf/local_storage.c

··· 9 9 #include <linux/rbtree.h> 10 10 #include <linux/slab.h> 11 11 #include <uapi/linux/btf.h> 12 + #include <linux/btf_ids.h> 12 13 13 14 #ifdef CONFIG_CGROUP_BPF 14 15 ··· 447 446 rcu_read_unlock(); 448 447 } 449 448 450 - static int cgroup_storage_map_btf_id; 449 + BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, 450 + bpf_cgroup_storage_map) 451 451 const struct bpf_map_ops cgroup_storage_map_ops = { 452 452 .map_alloc = cgroup_storage_map_alloc, 453 453 .map_free = cgroup_storage_map_free, ··· 458 456 .map_delete_elem = cgroup_storage_delete_elem, 459 457 .map_check_btf = cgroup_storage_check_btf, 460 458 .map_seq_show_elem = cgroup_storage_seq_show_elem, 461 - .map_btf_name = "bpf_cgroup_storage_map", 462 - .map_btf_id = &cgroup_storage_map_btf_id, 459 + .map_btf_id = &cgroup_storage_map_btf_ids[0], 463 460 }; 464 461 465 462 int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)

+3 -3

kernel/bpf/lpm_trie.c

··· 14 14 #include <linux/vmalloc.h> 15 15 #include <net/ipv6.h> 16 16 #include <uapi/linux/btf.h> 17 + #include <linux/btf_ids.h> 17 18 18 19 /* Intermediate node */ 19 20 #define LPM_TREE_NODE_FLAG_IM BIT(0) ··· 720 719 -EINVAL : 0; 721 720 } 722 721 723 - static int trie_map_btf_id; 722 + BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie) 724 723 const struct bpf_map_ops trie_map_ops = { 725 724 .map_meta_equal = bpf_map_meta_equal, 726 725 .map_alloc = trie_alloc, ··· 733 732 .map_update_batch = generic_map_update_batch, 734 733 .map_delete_batch = generic_map_delete_batch, 735 734 .map_check_btf = trie_check_btf, 736 - .map_btf_name = "lpm_trie", 737 - .map_btf_id = &trie_map_btf_id, 735 + .map_btf_id = &trie_map_btf_ids[0], 738 736 };

+4 -1

kernel/bpf/map_in_map.c

··· 52 52 inner_map_meta->max_entries = inner_map->max_entries; 53 53 inner_map_meta->spin_lock_off = inner_map->spin_lock_off; 54 54 inner_map_meta->timer_off = inner_map->timer_off; 55 + inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map); 55 56 if (inner_map->btf) { 56 57 btf_get(inner_map->btf); 57 58 inner_map_meta->btf = inner_map->btf; ··· 72 71 73 72 void bpf_map_meta_free(struct bpf_map *map_meta) 74 73 { 74 + bpf_map_free_kptr_off_tab(map_meta); 75 75 btf_put(map_meta->btf); 76 76 kfree(map_meta); 77 77 } ··· 85 83 meta0->key_size == meta1->key_size && 86 84 meta0->value_size == meta1->value_size && 87 85 meta0->timer_off == meta1->timer_off && 88 - meta0->map_flags == meta1->map_flags; 86 + meta0->map_flags == meta1->map_flags && 87 + bpf_map_equal_kptr_off_tab(meta0, meta1); 89 88 } 90 89 91 90 void *bpf_map_fd_get_ptr(struct bpf_map *map,

+4 -6

kernel/bpf/queue_stack_maps.c

··· 8 8 #include <linux/list.h> 9 9 #include <linux/slab.h> 10 10 #include <linux/capability.h> 11 + #include <linux/btf_ids.h> 11 12 #include "percpu_freelist.h" 12 13 13 14 #define QUEUE_STACK_CREATE_FLAG_MASK \ ··· 248 247 return -EINVAL; 249 248 } 250 249 251 - static int queue_map_btf_id; 250 + BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack) 252 251 const struct bpf_map_ops queue_map_ops = { 253 252 .map_meta_equal = bpf_map_meta_equal, 254 253 .map_alloc_check = queue_stack_map_alloc_check, ··· 261 260 .map_pop_elem = queue_map_pop_elem, 262 261 .map_peek_elem = queue_map_peek_elem, 263 262 .map_get_next_key = queue_stack_map_get_next_key, 264 - .map_btf_name = "bpf_queue_stack", 265 - .map_btf_id = &queue_map_btf_id, 263 + .map_btf_id = &queue_map_btf_ids[0], 266 264 }; 267 265 268 - static int stack_map_btf_id; 269 266 const struct bpf_map_ops stack_map_ops = { 270 267 .map_meta_equal = bpf_map_meta_equal, 271 268 .map_alloc_check = queue_stack_map_alloc_check, ··· 276 277 .map_pop_elem = stack_map_pop_elem, 277 278 .map_peek_elem = stack_map_peek_elem, 278 279 .map_get_next_key = queue_stack_map_get_next_key, 279 - .map_btf_name = "bpf_queue_stack", 280 - .map_btf_id = &stack_map_btf_id, 280 + .map_btf_id = &queue_map_btf_ids[0], 281 281 };

+3 -3

kernel/bpf/reuseport_array.c

··· 6 6 #include <linux/err.h> 7 7 #include <linux/sock_diag.h> 8 8 #include <net/sock_reuseport.h> 9 + #include <linux/btf_ids.h> 9 10 10 11 struct reuseport_array { 11 12 struct bpf_map map; ··· 338 337 return 0; 339 338 } 340 339 341 - static int reuseport_array_map_btf_id; 340 + BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array) 342 341 const struct bpf_map_ops reuseport_array_ops = { 343 342 .map_meta_equal = bpf_map_meta_equal, 344 343 .map_alloc_check = reuseport_array_alloc_check, ··· 347 346 .map_lookup_elem = reuseport_array_lookup_elem, 348 347 .map_get_next_key = reuseport_array_get_next_key, 349 348 .map_delete_elem = reuseport_array_delete_elem, 350 - .map_btf_name = "reuseport_array", 351 - .map_btf_id = &reuseport_array_map_btf_id, 349 + .map_btf_id = &reuseport_array_map_btf_ids[0], 352 350 };

+5 -5

kernel/bpf/ringbuf.c

··· 10 10 #include <linux/poll.h> 11 11 #include <linux/kmemleak.h> 12 12 #include <uapi/linux/btf.h> 13 + #include <linux/btf_ids.h> 13 14 14 15 #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) 15 16 ··· 264 263 return 0; 265 264 } 266 265 267 - static int ringbuf_map_btf_id; 266 + BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map) 268 267 const struct bpf_map_ops ringbuf_map_ops = { 269 268 .map_meta_equal = bpf_map_meta_equal, 270 269 .map_alloc = ringbuf_map_alloc, ··· 275 274 .map_update_elem = ringbuf_map_update_elem, 276 275 .map_delete_elem = ringbuf_map_delete_elem, 277 276 .map_get_next_key = ringbuf_map_get_next_key, 278 - .map_btf_name = "bpf_ringbuf_map", 279 - .map_btf_id = &ringbuf_map_btf_id, 277 + .map_btf_id = &ringbuf_map_btf_ids[0], 280 278 }; 281 279 282 280 /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself, ··· 404 404 const struct bpf_func_proto bpf_ringbuf_submit_proto = { 405 405 .func = bpf_ringbuf_submit, 406 406 .ret_type = RET_VOID, 407 - .arg1_type = ARG_PTR_TO_ALLOC_MEM, 407 + .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, 408 408 .arg2_type = ARG_ANYTHING, 409 409 }; 410 410 ··· 417 417 const struct bpf_func_proto bpf_ringbuf_discard_proto = { 418 418 .func = bpf_ringbuf_discard, 419 419 .ret_type = RET_VOID, 420 - .arg1_type = ARG_PTR_TO_ALLOC_MEM, 420 + .arg1_type = ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE, 421 421 .arg2_type = ARG_ANYTHING, 422 422 }; 423 423

+2 -3

kernel/bpf/stackmap.c

··· 654 654 put_callchain_buffers(); 655 655 } 656 656 657 - static int stack_trace_map_btf_id; 657 + BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map) 658 658 const struct bpf_map_ops stack_trace_map_ops = { 659 659 .map_meta_equal = bpf_map_meta_equal, 660 660 .map_alloc = stack_map_alloc, ··· 664 664 .map_update_elem = stack_map_update_elem, 665 665 .map_delete_elem = stack_map_delete_elem, 666 666 .map_check_btf = map_check_no_btf, 667 - .map_btf_name = "bpf_stack_map", 668 - .map_btf_id = &stack_trace_map_btf_id, 667 + .map_btf_id = &stack_trace_map_btf_ids[0], 669 668 };

+373 -60

kernel/bpf/syscall.c

··· 6 6 #include <linux/bpf_trace.h> 7 7 #include <linux/bpf_lirc.h> 8 8 #include <linux/bpf_verifier.h> 9 + #include <linux/bsearch.h> 9 10 #include <linux/btf.h> 10 11 #include <linux/syscalls.h> 11 12 #include <linux/slab.h> ··· 30 29 #include <linux/pgtable.h> 31 30 #include <linux/bpf_lsm.h> 32 31 #include <linux/poll.h> 32 + #include <linux/sort.h> 33 33 #include <linux/bpf-netns.h> 34 34 #include <linux/rcupdate_trace.h> 35 35 #include <linux/memcontrol.h> ··· 475 473 } 476 474 #endif 477 475 476 + static int bpf_map_kptr_off_cmp(const void *a, const void *b) 477 + { 478 + const struct bpf_map_value_off_desc *off_desc1 = a, *off_desc2 = b; 479 + 480 + if (off_desc1->offset < off_desc2->offset) 481 + return -1; 482 + else if (off_desc1->offset > off_desc2->offset) 483 + return 1; 484 + return 0; 485 + } 486 + 487 + struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset) 488 + { 489 + /* Since members are iterated in btf_find_field in increasing order, 490 + * offsets appended to kptr_off_tab are in increasing order, so we can 491 + * do bsearch to find exact match. 492 + */ 493 + struct bpf_map_value_off *tab; 494 + 495 + if (!map_value_has_kptrs(map)) 496 + return NULL; 497 + tab = map->kptr_off_tab; 498 + return bsearch(&offset, tab->off, tab->nr_off, sizeof(tab->off[0]), bpf_map_kptr_off_cmp); 499 + } 500 + 501 + void bpf_map_free_kptr_off_tab(struct bpf_map *map) 502 + { 503 + struct bpf_map_value_off *tab = map->kptr_off_tab; 504 + int i; 505 + 506 + if (!map_value_has_kptrs(map)) 507 + return; 508 + for (i = 0; i < tab->nr_off; i++) { 509 + if (tab->off[i].kptr.module) 510 + module_put(tab->off[i].kptr.module); 511 + btf_put(tab->off[i].kptr.btf); 512 + } 513 + kfree(tab); 514 + map->kptr_off_tab = NULL; 515 + } 516 + 517 + struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map) 518 + { 519 + struct bpf_map_value_off *tab = map->kptr_off_tab, *new_tab; 520 + int size, i; 521 + 522 + if (!map_value_has_kptrs(map)) 523 + return ERR_PTR(-ENOENT); 524 + size = offsetof(struct bpf_map_value_off, off[tab->nr_off]); 525 + new_tab = kmemdup(tab, size, GFP_KERNEL | __GFP_NOWARN); 526 + if (!new_tab) 527 + return ERR_PTR(-ENOMEM); 528 + /* Do a deep copy of the kptr_off_tab */ 529 + for (i = 0; i < tab->nr_off; i++) { 530 + btf_get(tab->off[i].kptr.btf); 531 + if (tab->off[i].kptr.module && !try_module_get(tab->off[i].kptr.module)) { 532 + while (i--) { 533 + if (tab->off[i].kptr.module) 534 + module_put(tab->off[i].kptr.module); 535 + btf_put(tab->off[i].kptr.btf); 536 + } 537 + kfree(new_tab); 538 + return ERR_PTR(-ENXIO); 539 + } 540 + } 541 + return new_tab; 542 + } 543 + 544 + bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b) 545 + { 546 + struct bpf_map_value_off *tab_a = map_a->kptr_off_tab, *tab_b = map_b->kptr_off_tab; 547 + bool a_has_kptr = map_value_has_kptrs(map_a), b_has_kptr = map_value_has_kptrs(map_b); 548 + int size; 549 + 550 + if (!a_has_kptr && !b_has_kptr) 551 + return true; 552 + if (a_has_kptr != b_has_kptr) 553 + return false; 554 + if (tab_a->nr_off != tab_b->nr_off) 555 + return false; 556 + size = offsetof(struct bpf_map_value_off, off[tab_a->nr_off]); 557 + return !memcmp(tab_a, tab_b, size); 558 + } 559 + 560 + /* Caller must ensure map_value_has_kptrs is true. Note that this function can 561 + * be called on a map value while the map_value is visible to BPF programs, as 562 + * it ensures the correct synchronization, and we already enforce the same using 563 + * the bpf_kptr_xchg helper on the BPF program side for referenced kptrs. 564 + */ 565 + void bpf_map_free_kptrs(struct bpf_map *map, void *map_value) 566 + { 567 + struct bpf_map_value_off *tab = map->kptr_off_tab; 568 + unsigned long *btf_id_ptr; 569 + int i; 570 + 571 + for (i = 0; i < tab->nr_off; i++) { 572 + struct bpf_map_value_off_desc *off_desc = &tab->off[i]; 573 + unsigned long old_ptr; 574 + 575 + btf_id_ptr = map_value + off_desc->offset; 576 + if (off_desc->type == BPF_KPTR_UNREF) { 577 + u64 *p = (u64 *)btf_id_ptr; 578 + 579 + WRITE_ONCE(p, 0); 580 + continue; 581 + } 582 + old_ptr = xchg(btf_id_ptr, 0); 583 + off_desc->kptr.dtor((void *)old_ptr); 584 + } 585 + } 586 + 478 587 /* called from workqueue */ 479 588 static void bpf_map_free_deferred(struct work_struct *work) 480 589 { 481 590 struct bpf_map *map = container_of(work, struct bpf_map, work); 482 591 483 592 security_bpf_map_free(map); 593 + kfree(map->off_arr); 484 594 bpf_map_release_memcg(map); 485 - /* implementation dependent freeing */ 595 + /* implementation dependent freeing, map_free callback also does 596 + * bpf_map_free_kptr_off_tab, if needed. 597 + */ 486 598 map->ops->map_free(map); 487 599 } 488 600 ··· 756 640 int err; 757 641 758 642 if (!map->ops->map_mmap || map_value_has_spin_lock(map) || 759 - map_value_has_timer(map)) 643 + map_value_has_timer(map) || map_value_has_kptrs(map)) 760 644 return -ENOTSUPP; 761 645 762 646 if (!(vma->vm_flags & VM_SHARED)) ··· 883 767 return -ENOTSUPP; 884 768 } 885 769 770 + static int map_off_arr_cmp(const void *_a, const void *_b, const void *priv) 771 + { 772 + const u32 a = *(const u32 *)_a; 773 + const u32 b = *(const u32 *)_b; 774 + 775 + if (a < b) 776 + return -1; 777 + else if (a > b) 778 + return 1; 779 + return 0; 780 + } 781 + 782 + static void map_off_arr_swap(void *_a, void *_b, int size, const void *priv) 783 + { 784 + struct bpf_map *map = (struct bpf_map *)priv; 785 + u32 *off_base = map->off_arr->field_off; 786 + u32 *a = _a, *b = _b; 787 + u8 *sz_a, *sz_b; 788 + 789 + sz_a = map->off_arr->field_sz + (a - off_base); 790 + sz_b = map->off_arr->field_sz + (b - off_base); 791 + 792 + swap(*a, *b); 793 + swap(*sz_a, *sz_b); 794 + } 795 + 796 + static int bpf_map_alloc_off_arr(struct bpf_map *map) 797 + { 798 + bool has_spin_lock = map_value_has_spin_lock(map); 799 + bool has_timer = map_value_has_timer(map); 800 + bool has_kptrs = map_value_has_kptrs(map); 801 + struct bpf_map_off_arr *off_arr; 802 + u32 i; 803 + 804 + if (!has_spin_lock && !has_timer && !has_kptrs) { 805 + map->off_arr = NULL; 806 + return 0; 807 + } 808 + 809 + off_arr = kmalloc(sizeof(*map->off_arr), GFP_KERNEL | __GFP_NOWARN); 810 + if (!off_arr) 811 + return -ENOMEM; 812 + map->off_arr = off_arr; 813 + 814 + off_arr->cnt = 0; 815 + if (has_spin_lock) { 816 + i = off_arr->cnt; 817 + 818 + off_arr->field_off[i] = map->spin_lock_off; 819 + off_arr->field_sz[i] = sizeof(struct bpf_spin_lock); 820 + off_arr->cnt++; 821 + } 822 + if (has_timer) { 823 + i = off_arr->cnt; 824 + 825 + off_arr->field_off[i] = map->timer_off; 826 + off_arr->field_sz[i] = sizeof(struct bpf_timer); 827 + off_arr->cnt++; 828 + } 829 + if (has_kptrs) { 830 + struct bpf_map_value_off *tab = map->kptr_off_tab; 831 + u32 *off = &off_arr->field_off[off_arr->cnt]; 832 + u8 *sz = &off_arr->field_sz[off_arr->cnt]; 833 + 834 + for (i = 0; i < tab->nr_off; i++) { 835 + *off++ = tab->off[i].offset; 836 + *sz++ = sizeof(u64); 837 + } 838 + off_arr->cnt += tab->nr_off; 839 + } 840 + 841 + if (off_arr->cnt == 1) 842 + return 0; 843 + sort_r(off_arr->field_off, off_arr->cnt, sizeof(off_arr->field_off[0]), 844 + map_off_arr_cmp, map_off_arr_swap, map); 845 + return 0; 846 + } 847 + 886 848 static int map_check_btf(struct bpf_map *map, const struct btf *btf, 887 849 u32 btf_key_id, u32 btf_value_id) 888 850 { ··· 1014 820 return -EOPNOTSUPP; 1015 821 } 1016 822 1017 - if (map->ops->map_check_btf) 1018 - ret = map->ops->map_check_btf(map, btf, key_type, value_type); 823 + map->kptr_off_tab = btf_parse_kptrs(btf, value_type); 824 + if (map_value_has_kptrs(map)) { 825 + if (!bpf_capable()) { 826 + ret = -EPERM; 827 + goto free_map_tab; 828 + } 829 + if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) { 830 + ret = -EACCES; 831 + goto free_map_tab; 832 + } 833 + if (map->map_type != BPF_MAP_TYPE_HASH && 834 + map->map_type != BPF_MAP_TYPE_LRU_HASH && 835 + map->map_type != BPF_MAP_TYPE_ARRAY) { 836 + ret = -EOPNOTSUPP; 837 + goto free_map_tab; 838 + } 839 + } 1019 840 841 + if (map->ops->map_check_btf) { 842 + ret = map->ops->map_check_btf(map, btf, key_type, value_type); 843 + if (ret < 0) 844 + goto free_map_tab; 845 + } 846 + 847 + return ret; 848 + free_map_tab: 849 + bpf_map_free_kptr_off_tab(map); 1020 850 return ret; 1021 851 } 1022 852 ··· 1130 912 attr->btf_vmlinux_value_type_id; 1131 913 } 1132 914 1133 - err = security_bpf_map_alloc(map); 915 + err = bpf_map_alloc_off_arr(map); 1134 916 if (err) 1135 917 goto free_map; 918 + 919 + err = security_bpf_map_alloc(map); 920 + if (err) 921 + goto free_map_off_arr; 1136 922 1137 923 err = bpf_map_alloc_id(map); 1138 924 if (err) ··· 1160 938 1161 939 free_map_sec: 1162 940 security_bpf_map_free(map); 941 + free_map_off_arr: 942 + kfree(map->off_arr); 1163 943 free_map: 1164 944 btf_put(map->btf); 1165 945 map->ops->map_free(map); ··· 1863 1639 return PTR_ERR(map); 1864 1640 1865 1641 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || 1866 - map_value_has_timer(map)) { 1642 + map_value_has_timer(map) || map_value_has_kptrs(map)) { 1867 1643 fdput(f); 1868 1644 return -ENOTSUPP; 1869 1645 } ··· 3254 3030 } 3255 3031 #endif /* CONFIG_PERF_EVENTS */ 3256 3032 3257 - #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3258 - 3259 - static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3033 + static int bpf_raw_tp_link_attach(struct bpf_prog *prog, 3034 + const char __user *user_tp_name) 3260 3035 { 3261 3036 struct bpf_link_primer link_primer; 3262 3037 struct bpf_raw_tp_link *link; 3263 3038 struct bpf_raw_event_map *btp; 3264 - struct bpf_prog *prog; 3265 3039 const char *tp_name; 3266 3040 char buf[128]; 3267 3041 int err; 3268 - 3269 - if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3270 - return -EINVAL; 3271 - 3272 - prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3273 - if (IS_ERR(prog)) 3274 - return PTR_ERR(prog); 3275 3042 3276 3043 switch (prog->type) { 3277 3044 case BPF_PROG_TYPE_TRACING: 3278 3045 case BPF_PROG_TYPE_EXT: 3279 3046 case BPF_PROG_TYPE_LSM: 3280 - if (attr->raw_tracepoint.name) { 3047 + if (user_tp_name) 3281 3048 /* The attach point for this category of programs 3282 3049 * should be specified via btf_id during program load. 3283 3050 */ 3284 - err = -EINVAL; 3285 - goto out_put_prog; 3286 - } 3051 + return -EINVAL; 3287 3052 if (prog->type == BPF_PROG_TYPE_TRACING && 3288 3053 prog->expected_attach_type == BPF_TRACE_RAW_TP) { 3289 3054 tp_name = prog->aux->attach_func_name; 3290 3055 break; 3291 3056 } 3292 - err = bpf_tracing_prog_attach(prog, 0, 0); 3293 - if (err >= 0) 3294 - return err; 3295 - goto out_put_prog; 3057 + return bpf_tracing_prog_attach(prog, 0, 0); 3296 3058 case BPF_PROG_TYPE_RAW_TRACEPOINT: 3297 3059 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 3298 - if (strncpy_from_user(buf, 3299 - u64_to_user_ptr(attr->raw_tracepoint.name), 3300 - sizeof(buf) - 1) < 0) { 3301 - err = -EFAULT; 3302 - goto out_put_prog; 3303 - } 3060 + if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) 3061 + return -EFAULT; 3304 3062 buf[sizeof(buf) - 1] = 0; 3305 3063 tp_name = buf; 3306 3064 break; 3307 3065 default: 3308 - err = -EINVAL; 3309 - goto out_put_prog; 3066 + return -EINVAL; 3310 3067 } 3311 3068 3312 3069 btp = bpf_get_raw_tracepoint(tp_name); 3313 - if (!btp) { 3314 - err = -ENOENT; 3315 - goto out_put_prog; 3316 - } 3070 + if (!btp) 3071 + return -ENOENT; 3317 3072 3318 3073 link = kzalloc(sizeof(*link), GFP_USER); 3319 3074 if (!link) { ··· 3319 3116 3320 3117 out_put_btp: 3321 3118 bpf_put_raw_tracepoint(btp); 3322 - out_put_prog: 3323 - bpf_prog_put(prog); 3324 3119 return err; 3120 + } 3121 + 3122 + #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 3123 + 3124 + static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 3125 + { 3126 + struct bpf_prog *prog; 3127 + int fd; 3128 + 3129 + if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 3130 + return -EINVAL; 3131 + 3132 + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 3133 + if (IS_ERR(prog)) 3134 + return PTR_ERR(prog); 3135 + 3136 + fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); 3137 + if (fd < 0) 3138 + bpf_prog_put(prog); 3139 + return fd; 3325 3140 } 3326 3141 3327 3142 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, ··· 3410 3189 case BPF_CGROUP_SETSOCKOPT: 3411 3190 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 3412 3191 case BPF_TRACE_ITER: 3192 + case BPF_TRACE_RAW_TP: 3193 + case BPF_TRACE_FENTRY: 3194 + case BPF_TRACE_FEXIT: 3195 + case BPF_MODIFY_RETURN: 3413 3196 return BPF_PROG_TYPE_TRACING; 3197 + case BPF_LSM_MAC: 3198 + return BPF_PROG_TYPE_LSM; 3414 3199 case BPF_SK_LOOKUP: 3415 3200 return BPF_PROG_TYPE_SK_LOOKUP; 3416 3201 case BPF_XDP: ··· 4473 4246 return err; 4474 4247 } 4475 4248 4476 - static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr, 4477 - struct bpf_prog *prog) 4478 - { 4479 - if (attr->link_create.attach_type != prog->expected_attach_type) 4480 - return -EINVAL; 4481 - 4482 - if (prog->expected_attach_type == BPF_TRACE_ITER) 4483 - return bpf_iter_link_attach(attr, uattr, prog); 4484 - else if (prog->type == BPF_PROG_TYPE_EXT) 4485 - return bpf_tracing_prog_attach(prog, 4486 - attr->link_create.target_fd, 4487 - attr->link_create.target_btf_id); 4488 - return -EINVAL; 4489 - } 4490 - 4491 4249 #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4492 4250 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4493 4251 { ··· 4494 4282 4495 4283 switch (prog->type) { 4496 4284 case BPF_PROG_TYPE_EXT: 4497 - ret = tracing_bpf_link_attach(attr, uattr, prog); 4498 - goto out; 4285 + break; 4499 4286 case BPF_PROG_TYPE_PERF_EVENT: 4500 4287 case BPF_PROG_TYPE_TRACEPOINT: 4501 4288 if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4502 4289 ret = -EINVAL; 4503 4290 goto out; 4504 4291 } 4505 - ptype = prog->type; 4506 4292 break; 4507 4293 case BPF_PROG_TYPE_KPROBE: 4508 4294 if (attr->link_create.attach_type != BPF_PERF_EVENT && ··· 4508 4298 ret = -EINVAL; 4509 4299 goto out; 4510 4300 } 4511 - ptype = prog->type; 4512 4301 break; 4513 4302 default: 4514 4303 ptype = attach_type_to_prog_type(attr->link_create.attach_type); ··· 4518 4309 break; 4519 4310 } 4520 4311 4521 - switch (ptype) { 4312 + switch (prog->type) { 4522 4313 case BPF_PROG_TYPE_CGROUP_SKB: 4523 4314 case BPF_PROG_TYPE_CGROUP_SOCK: 4524 4315 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: ··· 4528 4319 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4529 4320 ret = cgroup_bpf_link_attach(attr, prog); 4530 4321 break; 4322 + case BPF_PROG_TYPE_EXT: 4323 + ret = bpf_tracing_prog_attach(prog, 4324 + attr->link_create.target_fd, 4325 + attr->link_create.target_btf_id); 4326 + break; 4327 + case BPF_PROG_TYPE_LSM: 4531 4328 case BPF_PROG_TYPE_TRACING: 4532 - ret = tracing_bpf_link_attach(attr, uattr, prog); 4329 + if (attr->link_create.attach_type != prog->expected_attach_type) { 4330 + ret = -EINVAL; 4331 + goto out; 4332 + } 4333 + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 4334 + ret = bpf_raw_tp_link_attach(prog, NULL); 4335 + else if (prog->expected_attach_type == BPF_TRACE_ITER) 4336 + ret = bpf_iter_link_attach(attr, uattr, prog); 4337 + else 4338 + ret = bpf_tracing_prog_attach(prog, 4339 + attr->link_create.target_fd, 4340 + attr->link_create.target_btf_id); 4533 4341 break; 4534 4342 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4535 4343 case BPF_PROG_TYPE_SK_LOOKUP: ··· 5134 4908 const struct bpf_prog_ops bpf_syscall_prog_ops = { 5135 4909 .test_run = bpf_prog_test_run_syscall, 5136 4910 }; 4911 + 4912 + #ifdef CONFIG_SYSCTL 4913 + static int bpf_stats_handler(struct ctl_table *table, int write, 4914 + void *buffer, size_t *lenp, loff_t *ppos) 4915 + { 4916 + struct static_key *key = (struct static_key *)table->data; 4917 + static int saved_val; 4918 + int val, ret; 4919 + struct ctl_table tmp = { 4920 + .data = &val, 4921 + .maxlen = sizeof(val), 4922 + .mode = table->mode, 4923 + .extra1 = SYSCTL_ZERO, 4924 + .extra2 = SYSCTL_ONE, 4925 + }; 4926 + 4927 + if (write && !capable(CAP_SYS_ADMIN)) 4928 + return -EPERM; 4929 + 4930 + mutex_lock(&bpf_stats_enabled_mutex); 4931 + val = saved_val; 4932 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 4933 + if (write && !ret && val != saved_val) { 4934 + if (val) 4935 + static_key_slow_inc(key); 4936 + else 4937 + static_key_slow_dec(key); 4938 + saved_val = val; 4939 + } 4940 + mutex_unlock(&bpf_stats_enabled_mutex); 4941 + return ret; 4942 + } 4943 + 4944 + void __weak unpriv_ebpf_notify(int new_state) 4945 + { 4946 + } 4947 + 4948 + static int bpf_unpriv_handler(struct ctl_table *table, int write, 4949 + void *buffer, size_t *lenp, loff_t *ppos) 4950 + { 4951 + int ret, unpriv_enable = *(int *)table->data; 4952 + bool locked_state = unpriv_enable == 1; 4953 + struct ctl_table tmp = *table; 4954 + 4955 + if (write && !capable(CAP_SYS_ADMIN)) 4956 + return -EPERM; 4957 + 4958 + tmp.data = &unpriv_enable; 4959 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 4960 + if (write && !ret) { 4961 + if (locked_state && unpriv_enable != 1) 4962 + return -EPERM; 4963 + *(int *)table->data = unpriv_enable; 4964 + } 4965 + 4966 + unpriv_ebpf_notify(unpriv_enable); 4967 + 4968 + return ret; 4969 + } 4970 + 4971 + static struct ctl_table bpf_syscall_table[] = { 4972 + { 4973 + .procname = "unprivileged_bpf_disabled", 4974 + .data = &sysctl_unprivileged_bpf_disabled, 4975 + .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 4976 + .mode = 0644, 4977 + .proc_handler = bpf_unpriv_handler, 4978 + .extra1 = SYSCTL_ZERO, 4979 + .extra2 = SYSCTL_TWO, 4980 + }, 4981 + { 4982 + .procname = "bpf_stats_enabled", 4983 + .data = &bpf_stats_enabled_key.key, 4984 + .maxlen = sizeof(bpf_stats_enabled_key), 4985 + .mode = 0644, 4986 + .proc_handler = bpf_stats_handler, 4987 + }, 4988 + { } 4989 + }; 4990 + 4991 + static int __init bpf_syscall_sysctl_init(void) 4992 + { 4993 + register_sysctl_init("kernel", bpf_syscall_table); 4994 + return 0; 4995 + } 4996 + late_initcall(bpf_syscall_sysctl_init); 4997 + #endif /* CONFIG_SYSCTL */

-1

kernel/bpf/task_iter.c

··· 99 99 if (!prog) 100 100 return 0; 101 101 102 - meta.seq = seq; 103 102 ctx.meta = &meta; 104 103 ctx.task = task; 105 104 return bpf_iter_run_prog(prog, &ctx);

+352 -94

kernel/bpf/verifier.c

··· 245 245 struct bpf_map *map_ptr; 246 246 bool raw_mode; 247 247 bool pkt_access; 248 + u8 release_regno; 248 249 int regno; 249 250 int access_size; 250 251 int mem_size; ··· 258 257 struct btf *ret_btf; 259 258 u32 ret_btf_id; 260 259 u32 subprogno; 260 + struct bpf_map_value_off_desc *kptr_off_desc; 261 261 }; 262 262 263 263 struct btf *btf_vmlinux; ··· 473 471 return type & PTR_MAYBE_NULL; 474 472 } 475 473 476 - /* Determine whether the function releases some resources allocated by another 477 - * function call. The first reference type argument will be assumed to be 478 - * released by release_reference(). 479 - */ 480 - static bool is_release_function(enum bpf_func_id func_id) 481 - { 482 - return func_id == BPF_FUNC_sk_release || 483 - func_id == BPF_FUNC_ringbuf_submit || 484 - func_id == BPF_FUNC_ringbuf_discard; 485 - } 486 - 487 474 static bool may_be_acquire_function(enum bpf_func_id func_id) 488 475 { 489 476 return func_id == BPF_FUNC_sk_lookup_tcp || ··· 490 499 if (func_id == BPF_FUNC_sk_lookup_tcp || 491 500 func_id == BPF_FUNC_sk_lookup_udp || 492 501 func_id == BPF_FUNC_skc_lookup_tcp || 493 - func_id == BPF_FUNC_ringbuf_reserve) 502 + func_id == BPF_FUNC_ringbuf_reserve || 503 + func_id == BPF_FUNC_kptr_xchg) 494 504 return true; 495 505 496 506 if (func_id == BPF_FUNC_map_lookup_elem && ··· 567 575 strncpy(prefix, "user_", 32); 568 576 if (type & MEM_PERCPU) 569 577 strncpy(prefix, "percpu_", 32); 578 + if (type & PTR_UNTRUSTED) 579 + strncpy(prefix, "untrusted_", 32); 570 580 571 581 snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", 572 582 prefix, str[base_type(type)], postfix); ··· 3205 3211 return 0; 3206 3212 } 3207 3213 3208 - enum stack_access_src { 3214 + enum bpf_access_src { 3209 3215 ACCESS_DIRECT = 1, /* the access is performed by an instruction */ 3210 3216 ACCESS_HELPER = 2, /* the access is performed by a helper */ 3211 3217 }; ··· 3213 3219 static int check_stack_range_initialized(struct bpf_verifier_env *env, 3214 3220 int regno, int off, int access_size, 3215 3221 bool zero_size_allowed, 3216 - enum stack_access_src type, 3222 + enum bpf_access_src type, 3217 3223 struct bpf_call_arg_meta *meta); 3218 3224 3219 3225 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) ··· 3463 3469 return 0; 3464 3470 } 3465 3471 3472 + static int __check_ptr_off_reg(struct bpf_verifier_env *env, 3473 + const struct bpf_reg_state *reg, int regno, 3474 + bool fixed_off_ok) 3475 + { 3476 + /* Access to this pointer-typed register or passing it to a helper 3477 + * is only allowed in its original, unmodified form. 3478 + */ 3479 + 3480 + if (reg->off < 0) { 3481 + verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", 3482 + reg_type_str(env, reg->type), regno, reg->off); 3483 + return -EACCES; 3484 + } 3485 + 3486 + if (!fixed_off_ok && reg->off) { 3487 + verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", 3488 + reg_type_str(env, reg->type), regno, reg->off); 3489 + return -EACCES; 3490 + } 3491 + 3492 + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 3493 + char tn_buf[48]; 3494 + 3495 + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3496 + verbose(env, "variable %s access var_off=%s disallowed\n", 3497 + reg_type_str(env, reg->type), tn_buf); 3498 + return -EACCES; 3499 + } 3500 + 3501 + return 0; 3502 + } 3503 + 3504 + int check_ptr_off_reg(struct bpf_verifier_env *env, 3505 + const struct bpf_reg_state *reg, int regno) 3506 + { 3507 + return __check_ptr_off_reg(env, reg, regno, false); 3508 + } 3509 + 3510 + static int map_kptr_match_type(struct bpf_verifier_env *env, 3511 + struct bpf_map_value_off_desc *off_desc, 3512 + struct bpf_reg_state *reg, u32 regno) 3513 + { 3514 + const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id); 3515 + int perm_flags = PTR_MAYBE_NULL; 3516 + const char *reg_name = ""; 3517 + 3518 + /* Only unreferenced case accepts untrusted pointers */ 3519 + if (off_desc->type == BPF_KPTR_UNREF) 3520 + perm_flags |= PTR_UNTRUSTED; 3521 + 3522 + if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) 3523 + goto bad_type; 3524 + 3525 + if (!btf_is_kernel(reg->btf)) { 3526 + verbose(env, "R%d must point to kernel BTF\n", regno); 3527 + return -EINVAL; 3528 + } 3529 + /* We need to verify reg->type and reg->btf, before accessing reg->btf */ 3530 + reg_name = kernel_type_name(reg->btf, reg->btf_id); 3531 + 3532 + /* For ref_ptr case, release function check should ensure we get one 3533 + * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the 3534 + * normal store of unreferenced kptr, we must ensure var_off is zero. 3535 + * Since ref_ptr cannot be accessed directly by BPF insns, checks for 3536 + * reg->off and reg->ref_obj_id are not needed here. 3537 + */ 3538 + if (__check_ptr_off_reg(env, reg, regno, true)) 3539 + return -EACCES; 3540 + 3541 + /* A full type match is needed, as BTF can be vmlinux or module BTF, and 3542 + * we also need to take into account the reg->off. 3543 + * 3544 + * We want to support cases like: 3545 + * 3546 + * struct foo { 3547 + * struct bar br; 3548 + * struct baz bz; 3549 + * }; 3550 + * 3551 + * struct foo *v; 3552 + * v = func(); // PTR_TO_BTF_ID 3553 + * val->foo = v; // reg->off is zero, btf and btf_id match type 3554 + * val->bar = &v->br; // reg->off is still zero, but we need to retry with 3555 + * // first member type of struct after comparison fails 3556 + * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked 3557 + * // to match type 3558 + * 3559 + * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off 3560 + * is zero. We must also ensure that btf_struct_ids_match does not walk 3561 + * the struct to match type against first member of struct, i.e. reject 3562 + * second case from above. Hence, when type is BPF_KPTR_REF, we set 3563 + * strict mode to true for type match. 3564 + */ 3565 + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, 3566 + off_desc->kptr.btf, off_desc->kptr.btf_id, 3567 + off_desc->type == BPF_KPTR_REF)) 3568 + goto bad_type; 3569 + return 0; 3570 + bad_type: 3571 + verbose(env, "invalid kptr access, R%d type=%s%s ", regno, 3572 + reg_type_str(env, reg->type), reg_name); 3573 + verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name); 3574 + if (off_desc->type == BPF_KPTR_UNREF) 3575 + verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED), 3576 + targ_name); 3577 + else 3578 + verbose(env, "\n"); 3579 + return -EINVAL; 3580 + } 3581 + 3582 + static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, 3583 + int value_regno, int insn_idx, 3584 + struct bpf_map_value_off_desc *off_desc) 3585 + { 3586 + struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 3587 + int class = BPF_CLASS(insn->code); 3588 + struct bpf_reg_state *val_reg; 3589 + 3590 + /* Things we already checked for in check_map_access and caller: 3591 + * - Reject cases where variable offset may touch kptr 3592 + * - size of access (must be BPF_DW) 3593 + * - tnum_is_const(reg->var_off) 3594 + * - off_desc->offset == off + reg->var_off.value 3595 + */ 3596 + /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */ 3597 + if (BPF_MODE(insn->code) != BPF_MEM) { 3598 + verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n"); 3599 + return -EACCES; 3600 + } 3601 + 3602 + /* We only allow loading referenced kptr, since it will be marked as 3603 + * untrusted, similar to unreferenced kptr. 3604 + */ 3605 + if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) { 3606 + verbose(env, "store to referenced kptr disallowed\n"); 3607 + return -EACCES; 3608 + } 3609 + 3610 + if (class == BPF_LDX) { 3611 + val_reg = reg_state(env, value_regno); 3612 + /* We can simply mark the value_regno receiving the pointer 3613 + * value from map as PTR_TO_BTF_ID, with the correct type. 3614 + */ 3615 + mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf, 3616 + off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED); 3617 + /* For mark_ptr_or_null_reg */ 3618 + val_reg->id = ++env->id_gen; 3619 + } else if (class == BPF_STX) { 3620 + val_reg = reg_state(env, value_regno); 3621 + if (!register_is_null(val_reg) && 3622 + map_kptr_match_type(env, off_desc, val_reg, value_regno)) 3623 + return -EACCES; 3624 + } else if (class == BPF_ST) { 3625 + if (insn->imm) { 3626 + verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n", 3627 + off_desc->offset); 3628 + return -EACCES; 3629 + } 3630 + } else { 3631 + verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n"); 3632 + return -EACCES; 3633 + } 3634 + return 0; 3635 + } 3636 + 3466 3637 /* check read/write into a map element with possible variable offset */ 3467 3638 static int check_map_access(struct bpf_verifier_env *env, u32 regno, 3468 - int off, int size, bool zero_size_allowed) 3639 + int off, int size, bool zero_size_allowed, 3640 + enum bpf_access_src src) 3469 3641 { 3470 3642 struct bpf_verifier_state *vstate = env->cur_state; 3471 3643 struct bpf_func_state *state = vstate->frame[vstate->curframe]; ··· 3665 3505 t < reg->umax_value + off + size) { 3666 3506 verbose(env, "bpf_timer cannot be accessed directly by load/store\n"); 3667 3507 return -EACCES; 3508 + } 3509 + } 3510 + if (map_value_has_kptrs(map)) { 3511 + struct bpf_map_value_off *tab = map->kptr_off_tab; 3512 + int i; 3513 + 3514 + for (i = 0; i < tab->nr_off; i++) { 3515 + u32 p = tab->off[i].offset; 3516 + 3517 + if (reg->smin_value + off < p + sizeof(u64) && 3518 + p < reg->umax_value + off + size) { 3519 + if (src != ACCESS_DIRECT) { 3520 + verbose(env, "kptr cannot be accessed indirectly by helper\n"); 3521 + return -EACCES; 3522 + } 3523 + if (!tnum_is_const(reg->var_off)) { 3524 + verbose(env, "kptr access cannot have variable offset\n"); 3525 + return -EACCES; 3526 + } 3527 + if (p != off + reg->var_off.value) { 3528 + verbose(env, "kptr access misaligned expected=%u off=%llu\n", 3529 + p, off + reg->var_off.value); 3530 + return -EACCES; 3531 + } 3532 + if (size != bpf_size_to_bytes(BPF_DW)) { 3533 + verbose(env, "kptr access size must be BPF_DW\n"); 3534 + return -EACCES; 3535 + } 3536 + break; 3537 + } 3668 3538 } 3669 3539 } 3670 3540 return err; ··· 4170 3980 } 4171 3981 #endif 4172 3982 4173 - static int __check_ptr_off_reg(struct bpf_verifier_env *env, 4174 - const struct bpf_reg_state *reg, int regno, 4175 - bool fixed_off_ok) 4176 - { 4177 - /* Access to this pointer-typed register or passing it to a helper 4178 - * is only allowed in its original, unmodified form. 4179 - */ 4180 - 4181 - if (reg->off < 0) { 4182 - verbose(env, "negative offset %s ptr R%d off=%d disallowed\n", 4183 - reg_type_str(env, reg->type), regno, reg->off); 4184 - return -EACCES; 4185 - } 4186 - 4187 - if (!fixed_off_ok && reg->off) { 4188 - verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", 4189 - reg_type_str(env, reg->type), regno, reg->off); 4190 - return -EACCES; 4191 - } 4192 - 4193 - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { 4194 - char tn_buf[48]; 4195 - 4196 - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 4197 - verbose(env, "variable %s access var_off=%s disallowed\n", 4198 - reg_type_str(env, reg->type), tn_buf); 4199 - return -EACCES; 4200 - } 4201 - 4202 - return 0; 4203 - } 4204 - 4205 - int check_ptr_off_reg(struct bpf_verifier_env *env, 4206 - const struct bpf_reg_state *reg, int regno) 4207 - { 4208 - return __check_ptr_off_reg(env, reg, regno, false); 4209 - } 4210 - 4211 3983 static int __check_buffer_access(struct bpf_verifier_env *env, 4212 3984 const char *buf_info, 4213 3985 const struct bpf_reg_state *reg, ··· 4376 4224 if (ret < 0) 4377 4225 return ret; 4378 4226 4227 + /* If this is an untrusted pointer, all pointers formed by walking it 4228 + * also inherit the untrusted flag. 4229 + */ 4230 + if (type_flag(reg->type) & PTR_UNTRUSTED) 4231 + flag |= PTR_UNTRUSTED; 4232 + 4379 4233 if (atype == BPF_READ && value_regno >= 0) 4380 4234 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); 4381 4235 ··· 4474 4316 static int check_stack_access_within_bounds( 4475 4317 struct bpf_verifier_env *env, 4476 4318 int regno, int off, int access_size, 4477 - enum stack_access_src src, enum bpf_access_type type) 4319 + enum bpf_access_src src, enum bpf_access_type type) 4478 4320 { 4479 4321 struct bpf_reg_state *regs = cur_regs(env); 4480 4322 struct bpf_reg_state *reg = regs + regno; ··· 4570 4412 if (value_regno >= 0) 4571 4413 mark_reg_unknown(env, regs, value_regno); 4572 4414 } else if (reg->type == PTR_TO_MAP_VALUE) { 4415 + struct bpf_map_value_off_desc *kptr_off_desc = NULL; 4416 + 4573 4417 if (t == BPF_WRITE && value_regno >= 0 && 4574 4418 is_pointer_value(env, value_regno)) { 4575 4419 verbose(env, "R%d leaks addr into map\n", value_regno); ··· 4580 4420 err = check_map_access_type(env, regno, off, size, t); 4581 4421 if (err) 4582 4422 return err; 4583 - err = check_map_access(env, regno, off, size, false); 4584 - if (!err && t == BPF_READ && value_regno >= 0) { 4423 + err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT); 4424 + if (err) 4425 + return err; 4426 + if (tnum_is_const(reg->var_off)) 4427 + kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr, 4428 + off + reg->var_off.value); 4429 + if (kptr_off_desc) { 4430 + err = check_map_kptr_access(env, regno, value_regno, insn_idx, 4431 + kptr_off_desc); 4432 + } else if (t == BPF_READ && value_regno >= 0) { 4585 4433 struct bpf_map *map = reg->map_ptr; 4586 4434 4587 4435 /* if map is read-only, track its contents as scalars */ ··· 4892 4724 static int check_stack_range_initialized( 4893 4725 struct bpf_verifier_env *env, int regno, int off, 4894 4726 int access_size, bool zero_size_allowed, 4895 - enum stack_access_src type, struct bpf_call_arg_meta *meta) 4727 + enum bpf_access_src type, struct bpf_call_arg_meta *meta) 4896 4728 { 4897 4729 struct bpf_reg_state *reg = reg_state(env, regno); 4898 4730 struct bpf_func_state *state = func(env, reg); ··· 5042 4874 BPF_READ)) 5043 4875 return -EACCES; 5044 4876 return check_map_access(env, regno, reg->off, access_size, 5045 - zero_size_allowed); 4877 + zero_size_allowed, ACCESS_HELPER); 5046 4878 case PTR_TO_MEM: 5047 4879 if (type_is_rdonly_mem(reg->type)) { 5048 4880 if (meta && meta->raw_mode) { ··· 5331 5163 return 0; 5332 5164 } 5333 5165 5166 + static int process_kptr_func(struct bpf_verifier_env *env, int regno, 5167 + struct bpf_call_arg_meta *meta) 5168 + { 5169 + struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 5170 + struct bpf_map_value_off_desc *off_desc; 5171 + struct bpf_map *map_ptr = reg->map_ptr; 5172 + u32 kptr_off; 5173 + int ret; 5174 + 5175 + if (!tnum_is_const(reg->var_off)) { 5176 + verbose(env, 5177 + "R%d doesn't have constant offset. kptr has to be at the constant offset\n", 5178 + regno); 5179 + return -EINVAL; 5180 + } 5181 + if (!map_ptr->btf) { 5182 + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", 5183 + map_ptr->name); 5184 + return -EINVAL; 5185 + } 5186 + if (!map_value_has_kptrs(map_ptr)) { 5187 + ret = PTR_ERR(map_ptr->kptr_off_tab); 5188 + if (ret == -E2BIG) 5189 + verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, 5190 + BPF_MAP_VALUE_OFF_MAX); 5191 + else if (ret == -EEXIST) 5192 + verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name); 5193 + else 5194 + verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); 5195 + return -EINVAL; 5196 + } 5197 + 5198 + meta->map_ptr = map_ptr; 5199 + kptr_off = reg->off + reg->var_off.value; 5200 + off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off); 5201 + if (!off_desc) { 5202 + verbose(env, "off=%d doesn't point to kptr\n", kptr_off); 5203 + return -EACCES; 5204 + } 5205 + if (off_desc->type != BPF_KPTR_REF) { 5206 + verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); 5207 + return -EACCES; 5208 + } 5209 + meta->kptr_off_desc = off_desc; 5210 + return 0; 5211 + } 5212 + 5334 5213 static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 5335 5214 { 5336 5215 return base_type(type) == ARG_PTR_TO_MEM || ··· 5399 5184 { 5400 5185 return type == ARG_PTR_TO_INT || 5401 5186 type == ARG_PTR_TO_LONG; 5187 + } 5188 + 5189 + static bool arg_type_is_release(enum bpf_arg_type type) 5190 + { 5191 + return type & OBJ_RELEASE; 5402 5192 } 5403 5193 5404 5194 static int int_ptr_type_to_size(enum bpf_arg_type type) ··· 5518 5298 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; 5519 5299 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; 5520 5300 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; 5301 + static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; 5521 5302 5522 5303 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { 5523 5304 [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, ··· 5546 5325 [ARG_PTR_TO_STACK] = &stack_ptr_types, 5547 5326 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, 5548 5327 [ARG_PTR_TO_TIMER] = &timer_types, 5328 + [ARG_PTR_TO_KPTR] = &kptr_types, 5549 5329 }; 5550 5330 5551 5331 static int check_reg_type(struct bpf_verifier_env *env, u32 regno, 5552 5332 enum bpf_arg_type arg_type, 5553 - const u32 *arg_btf_id) 5333 + const u32 *arg_btf_id, 5334 + struct bpf_call_arg_meta *meta) 5554 5335 { 5555 5336 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 5556 5337 enum bpf_reg_type expected, type = reg->type; ··· 5597 5374 5598 5375 found: 5599 5376 if (reg->type == PTR_TO_BTF_ID) { 5377 + /* For bpf_sk_release, it needs to match against first member 5378 + * 'struct sock_common', hence make an exception for it. This 5379 + * allows bpf_sk_release to work for multiple socket types. 5380 + */ 5381 + bool strict_type_match = arg_type_is_release(arg_type) && 5382 + meta->func_id != BPF_FUNC_sk_release; 5383 + 5600 5384 if (!arg_btf_id) { 5601 5385 if (!compatible->btf_id) { 5602 5386 verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); ··· 5612 5382 arg_btf_id = compatible->btf_id; 5613 5383 } 5614 5384 5615 - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, 5616 - btf_vmlinux, *arg_btf_id)) { 5385 + if (meta->func_id == BPF_FUNC_kptr_xchg) { 5386 + if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno)) 5387 + return -EACCES; 5388 + } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, 5389 + btf_vmlinux, *arg_btf_id, 5390 + strict_type_match)) { 5617 5391 verbose(env, "R%d is of type %s but %s is expected\n", 5618 5392 regno, kernel_type_name(reg->btf, reg->btf_id), 5619 5393 kernel_type_name(btf_vmlinux, *arg_btf_id)); ··· 5630 5396 5631 5397 int check_func_arg_reg_off(struct bpf_verifier_env *env, 5632 5398 const struct bpf_reg_state *reg, int regno, 5633 - enum bpf_arg_type arg_type, 5634 - bool is_release_func) 5399 + enum bpf_arg_type arg_type) 5635 5400 { 5636 - bool fixed_off_ok = false, release_reg; 5637 5401 enum bpf_reg_type type = reg->type; 5402 + bool fixed_off_ok = false; 5638 5403 5639 5404 switch ((u32)type) { 5640 5405 case SCALAR_VALUE: ··· 5651 5418 /* Some of the argument types nevertheless require a 5652 5419 * zero register offset. 5653 5420 */ 5654 - if (arg_type != ARG_PTR_TO_ALLOC_MEM) 5421 + if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM) 5655 5422 return 0; 5656 5423 break; 5657 5424 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows ··· 5659 5426 */ 5660 5427 case PTR_TO_BTF_ID: 5661 5428 /* When referenced PTR_TO_BTF_ID is passed to release function, 5662 - * it's fixed offset must be 0. We rely on the property that 5663 - * only one referenced register can be passed to BPF helpers and 5664 - * kfuncs. In the other cases, fixed offset can be non-zero. 5429 + * it's fixed offset must be 0. In the other cases, fixed offset 5430 + * can be non-zero. 5665 5431 */ 5666 - release_reg = is_release_func && reg->ref_obj_id; 5667 - if (release_reg && reg->off) { 5432 + if (arg_type_is_release(arg_type) && reg->off) { 5668 5433 verbose(env, "R%d must have zero offset when passed to release func\n", 5669 5434 regno); 5670 5435 return -EINVAL; 5671 5436 } 5672 - /* For release_reg == true, fixed_off_ok must be false, but we 5673 - * already checked and rejected reg->off != 0 above, so set to 5674 - * true to allow fixed offset for all other cases. 5437 + /* For arg is release pointer, fixed_off_ok must be false, but 5438 + * we already checked and rejected reg->off != 0 above, so set 5439 + * to true to allow fixed offset for all other cases. 5675 5440 */ 5676 5441 fixed_off_ok = true; 5677 5442 break; ··· 5724 5493 */ 5725 5494 goto skip_type_check; 5726 5495 5727 - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); 5496 + err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); 5728 5497 if (err) 5729 5498 return err; 5730 5499 5731 - err = check_func_arg_reg_off(env, reg, regno, arg_type, is_release_function(meta->func_id)); 5500 + err = check_func_arg_reg_off(env, reg, regno, arg_type); 5732 5501 if (err) 5733 5502 return err; 5734 5503 5735 5504 skip_type_check: 5736 - /* check_func_arg_reg_off relies on only one referenced register being 5737 - * allowed for BPF helpers. 5738 - */ 5505 + if (arg_type_is_release(arg_type)) { 5506 + if (!reg->ref_obj_id && !register_is_null(reg)) { 5507 + verbose(env, "R%d must be referenced when passed to release function\n", 5508 + regno); 5509 + return -EINVAL; 5510 + } 5511 + if (meta->release_regno) { 5512 + verbose(env, "verifier internal error: more than one release argument\n"); 5513 + return -EFAULT; 5514 + } 5515 + meta->release_regno = regno; 5516 + } 5517 + 5739 5518 if (reg->ref_obj_id) { 5740 5519 if (meta->ref_obj_id) { 5741 5520 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", ··· 5883 5642 } 5884 5643 5885 5644 err = check_map_access(env, regno, reg->off, 5886 - map->value_size - reg->off, false); 5645 + map->value_size - reg->off, false, 5646 + ACCESS_HELPER); 5887 5647 if (err) 5888 5648 return err; 5889 5649 ··· 5900 5658 verbose(env, "string is not zero-terminated\n"); 5901 5659 return -EINVAL; 5902 5660 } 5661 + } else if (arg_type == ARG_PTR_TO_KPTR) { 5662 + if (process_kptr_func(env, regno, meta)) 5663 + return -EACCES; 5903 5664 } 5904 5665 5905 5666 return err; ··· 6245 6000 int i; 6246 6001 6247 6002 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 6248 - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) 6003 + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) 6249 6004 return false; 6250 6005 6251 - if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) 6006 + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) 6252 6007 return false; 6253 6008 } 6254 6009 6255 6010 return true; 6256 6011 } 6257 6012 6258 - static int check_func_proto(const struct bpf_func_proto *fn, int func_id) 6013 + static int check_func_proto(const struct bpf_func_proto *fn, int func_id, 6014 + struct bpf_call_arg_meta *meta) 6259 6015 { 6260 6016 return check_raw_mode_ok(fn) && 6261 6017 check_arg_pair_ok(fn) && ··· 6940 6694 memset(&meta, 0, sizeof(meta)); 6941 6695 meta.pkt_access = fn->pkt_access; 6942 6696 6943 - err = check_func_proto(fn, func_id); 6697 + err = check_func_proto(fn, func_id, &meta); 6944 6698 if (err) { 6945 6699 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 6946 6700 func_id_name(func_id), func_id); ··· 6973 6727 return err; 6974 6728 } 6975 6729 6976 - if (is_release_function(func_id)) { 6977 - err = release_reference(env, meta.ref_obj_id); 6730 + regs = cur_regs(env); 6731 + 6732 + if (meta.release_regno) { 6733 + err = -EINVAL; 6734 + if (meta.ref_obj_id) 6735 + err = release_reference(env, meta.ref_obj_id); 6736 + /* meta.ref_obj_id can only be 0 if register that is meant to be 6737 + * released is NULL, which must be > R0. 6738 + */ 6739 + else if (register_is_null(&regs[meta.release_regno])) 6740 + err = 0; 6978 6741 if (err) { 6979 6742 verbose(env, "func %s#%d reference has not been acquired before\n", 6980 6743 func_id_name(func_id), func_id); 6981 6744 return err; 6982 6745 } 6983 6746 } 6984 - 6985 - regs = cur_regs(env); 6986 6747 6987 6748 switch (func_id) { 6988 6749 case BPF_FUNC_tail_call: ··· 7114 6861 regs[BPF_REG_0].btf_id = meta.ret_btf_id; 7115 6862 } 7116 6863 } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { 6864 + struct btf *ret_btf; 7117 6865 int ret_btf_id; 7118 6866 7119 6867 mark_reg_known_zero(env, regs, BPF_REG_0); 7120 6868 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; 7121 - ret_btf_id = *fn->ret_btf_id; 6869 + if (func_id == BPF_FUNC_kptr_xchg) { 6870 + ret_btf = meta.kptr_off_desc->kptr.btf; 6871 + ret_btf_id = meta.kptr_off_desc->kptr.btf_id; 6872 + } else { 6873 + ret_btf = btf_vmlinux; 6874 + ret_btf_id = *fn->ret_btf_id; 6875 + } 7122 6876 if (ret_btf_id == 0) { 7123 6877 verbose(env, "invalid return type %u of func %s#%d\n", 7124 6878 base_type(ret_type), func_id_name(func_id), 7125 6879 func_id); 7126 6880 return -EINVAL; 7127 6881 } 7128 - /* current BPF helper definitions are only coming from 7129 - * built-in code with type IDs from vmlinux BTF 7130 - */ 7131 - regs[BPF_REG_0].btf = btf_vmlinux; 6882 + regs[BPF_REG_0].btf = ret_btf; 7132 6883 regs[BPF_REG_0].btf_id = ret_btf_id; 7133 6884 } else { 7134 6885 verbose(env, "unknown return type %u of func %s#%d\n", ··· 7719 7462 return -EACCES; 7720 7463 break; 7721 7464 case PTR_TO_MAP_VALUE: 7722 - if (check_map_access(env, dst, dst_reg->off, 1, false)) { 7465 + if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) { 7723 7466 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 7724 7467 "prohibited for !root\n", dst); 7725 7468 return -EACCES; ··· 13108 12851 if (!ctx_access) 13109 12852 continue; 13110 12853 13111 - switch (env->insn_aux_data[i + delta].ptr_type) { 12854 + switch ((int)env->insn_aux_data[i + delta].ptr_type) { 13112 12855 case PTR_TO_CTX: 13113 12856 if (!ops->convert_ctx_access) 13114 12857 continue; ··· 13125 12868 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 13126 12869 break; 13127 12870 case PTR_TO_BTF_ID: 12871 + case PTR_TO_BTF_ID | PTR_UNTRUSTED: 13128 12872 if (type == BPF_READ) { 13129 12873 insn->code = BPF_LDX | BPF_PROBE_MEM | 13130 12874 BPF_SIZE((insn)->code);

-79

kernel/sysctl.c

··· 62 62 #include <linux/binfmts.h> 63 63 #include <linux/sched/sysctl.h> 64 64 #include <linux/kexec.h> 65 - #include <linux/bpf.h> 66 65 #include <linux/mount.h> 67 66 #include <linux/userfaultfd_k.h> 68 67 #include <linux/latencytop.h> ··· 146 147 #endif 147 148 148 149 #endif /* CONFIG_SYSCTL */ 149 - 150 - #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) 151 - static int bpf_stats_handler(struct ctl_table *table, int write, 152 - void *buffer, size_t *lenp, loff_t *ppos) 153 - { 154 - struct static_key *key = (struct static_key *)table->data; 155 - static int saved_val; 156 - int val, ret; 157 - struct ctl_table tmp = { 158 - .data = &val, 159 - .maxlen = sizeof(val), 160 - .mode = table->mode, 161 - .extra1 = SYSCTL_ZERO, 162 - .extra2 = SYSCTL_ONE, 163 - }; 164 - 165 - if (write && !capable(CAP_SYS_ADMIN)) 166 - return -EPERM; 167 - 168 - mutex_lock(&bpf_stats_enabled_mutex); 169 - val = saved_val; 170 - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 171 - if (write && !ret && val != saved_val) { 172 - if (val) 173 - static_key_slow_inc(key); 174 - else 175 - static_key_slow_dec(key); 176 - saved_val = val; 177 - } 178 - mutex_unlock(&bpf_stats_enabled_mutex); 179 - return ret; 180 - } 181 - 182 - void __weak unpriv_ebpf_notify(int new_state) 183 - { 184 - } 185 - 186 - static int bpf_unpriv_handler(struct ctl_table *table, int write, 187 - void *buffer, size_t *lenp, loff_t *ppos) 188 - { 189 - int ret, unpriv_enable = *(int *)table->data; 190 - bool locked_state = unpriv_enable == 1; 191 - struct ctl_table tmp = *table; 192 - 193 - if (write && !capable(CAP_SYS_ADMIN)) 194 - return -EPERM; 195 - 196 - tmp.data = &unpriv_enable; 197 - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 198 - if (write && !ret) { 199 - if (locked_state && unpriv_enable != 1) 200 - return -EPERM; 201 - *(int *)table->data = unpriv_enable; 202 - } 203 - 204 - unpriv_ebpf_notify(unpriv_enable); 205 - 206 - return ret; 207 - } 208 - #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ 209 150 210 151 /* 211 152 * /proc/sys support ··· 2236 2297 .proc_handler = timer_migration_handler, 2237 2298 .extra1 = SYSCTL_ZERO, 2238 2299 .extra2 = SYSCTL_ONE, 2239 - }, 2240 - #endif 2241 - #ifdef CONFIG_BPF_SYSCALL 2242 - { 2243 - .procname = "unprivileged_bpf_disabled", 2244 - .data = &sysctl_unprivileged_bpf_disabled, 2245 - .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 2246 - .mode = 0644, 2247 - .proc_handler = bpf_unpriv_handler, 2248 - .extra1 = SYSCTL_ZERO, 2249 - .extra2 = SYSCTL_TWO, 2250 - }, 2251 - { 2252 - .procname = "bpf_stats_enabled", 2253 - .data = &bpf_stats_enabled_key.key, 2254 - .maxlen = sizeof(bpf_stats_enabled_key), 2255 - .mode = 0644, 2256 - .proc_handler = bpf_stats_handler, 2257 2300 }, 2258 2301 #endif 2259 2302 #if defined(CONFIG_TREE_RCU)

+4 -1

kernel/trace/bpf_trace.c

··· 129 129 * out of events when it was updated in between this and the 130 130 * rcu_dereference() which is accepted risk. 131 131 */ 132 - ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run); 132 + rcu_read_lock(); 133 + ret = bpf_prog_run_array(rcu_dereference(call->prog_array), 134 + ctx, bpf_prog_run); 135 + rcu_read_unlock(); 133 136 134 137 out: 135 138 __this_cpu_dec(bpf_prog_active);

+61 -6

net/bpf/test_run.c

··· 550 550 return sk; 551 551 } 552 552 553 + struct prog_test_member1 { 554 + int a; 555 + }; 556 + 553 557 struct prog_test_member { 554 - u64 c; 558 + struct prog_test_member1 m; 559 + int c; 555 560 }; 556 561 557 562 struct prog_test_ref_kfunc { ··· 581 576 return &prog_test_struct; 582 577 } 583 578 579 + noinline struct prog_test_member * 580 + bpf_kfunc_call_memb_acquire(void) 581 + { 582 + return &prog_test_struct.memb; 583 + } 584 + 584 585 noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) 585 586 { 586 587 } 587 588 588 589 noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) 589 590 { 591 + } 592 + 593 + noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) 594 + { 595 + } 596 + 597 + noinline struct prog_test_ref_kfunc * 598 + bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) 599 + { 600 + return &prog_test_struct; 590 601 } 591 602 592 603 struct prog_test_pass1 { ··· 688 667 BTF_ID(func, bpf_kfunc_call_test2) 689 668 BTF_ID(func, bpf_kfunc_call_test3) 690 669 BTF_ID(func, bpf_kfunc_call_test_acquire) 670 + BTF_ID(func, bpf_kfunc_call_memb_acquire) 691 671 BTF_ID(func, bpf_kfunc_call_test_release) 692 672 BTF_ID(func, bpf_kfunc_call_memb_release) 673 + BTF_ID(func, bpf_kfunc_call_memb1_release) 674 + BTF_ID(func, bpf_kfunc_call_test_kptr_get) 693 675 BTF_ID(func, bpf_kfunc_call_test_pass_ctx) 694 676 BTF_ID(func, bpf_kfunc_call_test_pass1) 695 677 BTF_ID(func, bpf_kfunc_call_test_pass2) ··· 706 682 707 683 BTF_SET_START(test_sk_acquire_kfunc_ids) 708 684 BTF_ID(func, bpf_kfunc_call_test_acquire) 685 + BTF_ID(func, bpf_kfunc_call_memb_acquire) 686 + BTF_ID(func, bpf_kfunc_call_test_kptr_get) 709 687 BTF_SET_END(test_sk_acquire_kfunc_ids) 710 688 711 689 BTF_SET_START(test_sk_release_kfunc_ids) 712 690 BTF_ID(func, bpf_kfunc_call_test_release) 713 691 BTF_ID(func, bpf_kfunc_call_memb_release) 692 + BTF_ID(func, bpf_kfunc_call_memb1_release) 714 693 BTF_SET_END(test_sk_release_kfunc_ids) 715 694 716 695 BTF_SET_START(test_sk_ret_null_kfunc_ids) 717 696 BTF_ID(func, bpf_kfunc_call_test_acquire) 697 + BTF_ID(func, bpf_kfunc_call_memb_acquire) 698 + BTF_ID(func, bpf_kfunc_call_test_kptr_get) 718 699 BTF_SET_END(test_sk_ret_null_kfunc_ids) 700 + 701 + BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) 702 + BTF_ID(func, bpf_kfunc_call_test_kptr_get) 703 + BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) 719 704 720 705 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, 721 706 u32 size, u32 headroom, u32 tailroom) ··· 1612 1579 1613 1580 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { 1614 1581 .owner = THIS_MODULE, 1615 - .check_set = &test_sk_check_kfunc_ids, 1616 - .acquire_set = &test_sk_acquire_kfunc_ids, 1617 - .release_set = &test_sk_release_kfunc_ids, 1618 - .ret_null_set = &test_sk_ret_null_kfunc_ids, 1582 + .check_set = &test_sk_check_kfunc_ids, 1583 + .acquire_set = &test_sk_acquire_kfunc_ids, 1584 + .release_set = &test_sk_release_kfunc_ids, 1585 + .ret_null_set = &test_sk_ret_null_kfunc_ids, 1586 + .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids 1619 1587 }; 1588 + 1589 + BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) 1590 + BTF_ID(struct, prog_test_ref_kfunc) 1591 + BTF_ID(func, bpf_kfunc_call_test_release) 1592 + BTF_ID(struct, prog_test_member) 1593 + BTF_ID(func, bpf_kfunc_call_memb_release) 1620 1594 1621 1595 static int __init bpf_prog_test_run_init(void) 1622 1596 { 1623 - return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); 1597 + const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = { 1598 + { 1599 + .btf_id = bpf_prog_test_dtor_kfunc_ids[0], 1600 + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1] 1601 + }, 1602 + { 1603 + .btf_id = bpf_prog_test_dtor_kfunc_ids[2], 1604 + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3], 1605 + }, 1606 + }; 1607 + int ret; 1608 + 1609 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); 1610 + return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, 1611 + ARRAY_SIZE(bpf_prog_test_dtor_kfunc), 1612 + THIS_MODULE); 1624 1613 } 1625 1614 late_initcall(bpf_prog_test_run_init);

+5 -6

net/core/bpf_sk_storage.c

··· 40 40 if (!sdata) 41 41 return -ENOENT; 42 42 43 - bpf_selem_unlink(SELEM(sdata)); 43 + bpf_selem_unlink(SELEM(sdata), true); 44 44 45 45 return 0; 46 46 } ··· 75 75 * sk_storage. 76 76 */ 77 77 bpf_selem_unlink_map(selem); 78 - free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage, 79 - selem, true); 78 + free_sk_storage = bpf_selem_unlink_storage_nolock( 79 + sk_storage, selem, true, false); 80 80 } 81 81 raw_spin_unlock_bh(&sk_storage->lock); 82 82 rcu_read_unlock(); ··· 338 338 return &sk->sk_bpf_storage; 339 339 } 340 340 341 - static int sk_storage_map_btf_id; 341 + BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map) 342 342 const struct bpf_map_ops sk_storage_map_ops = { 343 343 .map_meta_equal = bpf_map_meta_equal, 344 344 .map_alloc_check = bpf_local_storage_map_alloc_check, ··· 349 349 .map_update_elem = bpf_fd_sk_storage_update_elem, 350 350 .map_delete_elem = bpf_fd_sk_storage_delete_elem, 351 351 .map_check_btf = bpf_local_storage_map_check_btf, 352 - .map_btf_name = "bpf_local_storage_map", 353 - .map_btf_id = &sk_storage_map_btf_id, 352 + .map_btf_id = &sk_storage_map_btf_ids[0], 354 353 .map_local_storage_charge = bpf_sk_storage_charge, 355 354 .map_local_storage_uncharge = bpf_sk_storage_uncharge, 356 355 .map_owner_storage_ptr = bpf_sk_storage_ptr,

+3 -3

net/core/filter.c

··· 1687 1687 1688 1688 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) 1689 1689 return -EINVAL; 1690 - if (unlikely(offset > 0xffff)) 1690 + if (unlikely(offset > INT_MAX)) 1691 1691 return -EFAULT; 1692 1692 if (unlikely(bpf_try_make_writable(skb, offset + len))) 1693 1693 return -EFAULT; ··· 1722 1722 { 1723 1723 void *ptr; 1724 1724 1725 - if (unlikely(offset > 0xffff)) 1725 + if (unlikely(offset > INT_MAX)) 1726 1726 goto err_clear; 1727 1727 1728 1728 ptr = skb_header_pointer(skb, offset, len, to); ··· 6621 6621 .func = bpf_sk_release, 6622 6622 .gpl_only = false, 6623 6623 .ret_type = RET_INTEGER, 6624 - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, 6624 + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE, 6625 6625 }; 6626 6626 6627 6627 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,

+1 -1

net/core/skbuff.c

··· 5602 5602 } 5603 5603 EXPORT_SYMBOL(skb_vlan_untag); 5604 5604 5605 - int skb_ensure_writable(struct sk_buff *skb, int write_len) 5605 + int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) 5606 5606 { 5607 5607 if (!pskb_may_pull(skb, write_len)) 5608 5608 return -ENOMEM;

+4 -6

net/core/sock_map.c

··· 793 793 .seq_priv_size = sizeof(struct sock_map_seq_info), 794 794 }; 795 795 796 - static int sock_map_btf_id; 796 + BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab) 797 797 const struct bpf_map_ops sock_map_ops = { 798 798 .map_meta_equal = bpf_map_meta_equal, 799 799 .map_alloc = sock_map_alloc, ··· 805 805 .map_lookup_elem = sock_map_lookup, 806 806 .map_release_uref = sock_map_release_progs, 807 807 .map_check_btf = map_check_no_btf, 808 - .map_btf_name = "bpf_stab", 809 - .map_btf_id = &sock_map_btf_id, 808 + .map_btf_id = &sock_map_btf_ids[0], 810 809 .iter_seq_info = &sock_map_iter_seq_info, 811 810 }; 812 811 ··· 1384 1385 .seq_priv_size = sizeof(struct sock_hash_seq_info), 1385 1386 }; 1386 1387 1387 - static int sock_hash_map_btf_id; 1388 + BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab) 1388 1389 const struct bpf_map_ops sock_hash_ops = { 1389 1390 .map_meta_equal = bpf_map_meta_equal, 1390 1391 .map_alloc = sock_hash_alloc, ··· 1396 1397 .map_lookup_elem_sys_only = sock_hash_lookup_sys, 1397 1398 .map_release_uref = sock_hash_release_progs, 1398 1399 .map_check_btf = map_check_no_btf, 1399 - .map_btf_name = "bpf_shtab", 1400 - .map_btf_id = &sock_hash_map_btf_id, 1400 + .map_btf_id = &sock_hash_map_btf_ids[0], 1401 1401 .iter_seq_info = &sock_hash_iter_seq_info, 1402 1402 }; 1403 1403

+2 -2

net/xdp/xsk.c

··· 184 184 xsk_xdp = xsk_buff_alloc(xs->pool); 185 185 if (!xsk_xdp) { 186 186 xs->rx_dropped++; 187 - return -ENOSPC; 187 + return -ENOMEM; 188 188 } 189 189 190 190 xsk_copy_xdp(xsk_xdp, xdp, len); ··· 217 217 static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp) 218 218 { 219 219 if (!xsk_is_bound(xs)) 220 - return -EINVAL; 220 + return -ENXIO; 221 221 222 222 if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) 223 223 return -EINVAL;

+2 -2

net/xdp/xsk_queue.h

··· 263 263 264 264 static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) 265 265 { 266 - return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false; 266 + return xskq_cons_nb_entries(q, cnt) >= cnt; 267 267 } 268 268 269 269 static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) ··· 382 382 u32 idx; 383 383 384 384 if (xskq_prod_is_full(q)) 385 - return -ENOSPC; 385 + return -ENOBUFS; 386 386 387 387 /* A, matches D */ 388 388 idx = q->cached_prod++ & q->ring_mask;

+3 -3

net/xdp/xskmap.c

··· 9 9 #include <net/xdp_sock.h> 10 10 #include <linux/slab.h> 11 11 #include <linux/sched.h> 12 + #include <linux/btf_ids.h> 12 13 13 14 #include "xsk.h" 14 15 ··· 255 254 bpf_map_meta_equal(meta0, meta1); 256 255 } 257 256 258 - static int xsk_map_btf_id; 257 + BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) 259 258 const struct bpf_map_ops xsk_map_ops = { 260 259 .map_meta_equal = xsk_map_meta_equal, 261 260 .map_alloc = xsk_map_alloc, ··· 267 266 .map_update_elem = xsk_map_update_elem, 268 267 .map_delete_elem = xsk_map_delete_elem, 269 268 .map_check_btf = map_check_no_btf, 270 - .map_btf_name = "xsk_map", 271 - .map_btf_id = &xsk_map_btf_id, 269 + .map_btf_id = &xsk_map_btf_ids[0], 272 270 .map_redirect = xsk_map_redirect, 273 271 };

-1

samples/bpf/cpustat_user.c

··· 13 13 #include <sys/types.h> 14 14 #include <sys/stat.h> 15 15 #include <sys/time.h> 16 - #include <sys/resource.h> 17 16 #include <sys/wait.h> 18 17 19 18 #include <bpf/bpf.h>

+3 -2

samples/bpf/hbm.c

··· 34 34 #include <stdio.h> 35 35 #include <stdlib.h> 36 36 #include <assert.h> 37 - #include <sys/resource.h> 38 37 #include <sys/time.h> 39 38 #include <unistd.h> 40 39 #include <errno.h> ··· 45 46 #include <bpf/bpf.h> 46 47 #include <getopt.h> 47 48 48 - #include "bpf_rlimit.h" 49 49 #include "cgroup_helpers.h" 50 50 #include "hbm.h" 51 51 #include "bpf_util.h" ··· 507 509 if (optind < argc) 508 510 prog = argv[optind]; 509 511 printf("HBM prog: %s\n", prog != NULL ? prog : "NULL"); 512 + 513 + /* Use libbpf 1.0 API mode */ 514 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 510 515 511 516 return run_bpf_prog(prog, cg_id); 512 517 }

-1

samples/bpf/ibumad_user.c

··· 19 19 #include <sys/types.h> 20 20 #include <limits.h> 21 21 22 - #include <sys/resource.h> 23 22 #include <getopt.h> 24 23 #include <net/if.h> 25 24

-1

samples/bpf/map_perf_test_user.c

··· 13 13 #include <signal.h> 14 14 #include <string.h> 15 15 #include <time.h> 16 - #include <sys/resource.h> 17 16 #include <arpa/inet.h> 18 17 #include <errno.h> 19 18

-1

samples/bpf/offwaketime_user.c

··· 8 8 #include <linux/perf_event.h> 9 9 #include <errno.h> 10 10 #include <stdbool.h> 11 - #include <sys/resource.h> 12 11 #include <bpf/libbpf.h> 13 12 #include <bpf/bpf.h> 14 13 #include "trace_helpers.h"

-1

samples/bpf/sockex2_user.c

··· 7 7 #include "sock_example.h" 8 8 #include <unistd.h> 9 9 #include <arpa/inet.h> 10 - #include <sys/resource.h> 11 10 12 11 struct pair { 13 12 __u64 packets;

-1

samples/bpf/sockex3_user.c

··· 6 6 #include "sock_example.h" 7 7 #include <unistd.h> 8 8 #include <arpa/inet.h> 9 - #include <sys/resource.h> 10 9 11 10 struct flow_key_record { 12 11 __be32 src;

-1

samples/bpf/spintest_user.c

··· 3 3 #include <unistd.h> 4 4 #include <string.h> 5 5 #include <assert.h> 6 - #include <sys/resource.h> 7 6 #include <bpf/libbpf.h> 8 7 #include <bpf/bpf.h> 9 8 #include "trace_helpers.h"

-1

samples/bpf/syscall_tp_user.c

··· 8 8 #include <string.h> 9 9 #include <linux/perf_event.h> 10 10 #include <errno.h> 11 - #include <sys/resource.h> 12 11 #include <bpf/libbpf.h> 13 12 #include <bpf/bpf.h> 14 13

-1

samples/bpf/task_fd_query_user.c

··· 10 10 #include <fcntl.h> 11 11 #include <linux/bpf.h> 12 12 #include <sys/ioctl.h> 13 - #include <sys/resource.h> 14 13 #include <sys/types.h> 15 14 #include <sys/stat.h> 16 15 #include <linux/perf_event.h>

-1

samples/bpf/test_lru_dist.c

··· 13 13 #include <sched.h> 14 14 #include <sys/wait.h> 15 15 #include <sys/stat.h> 16 - #include <sys/resource.h> 17 16 #include <fcntl.h> 18 17 #include <stdlib.h> 19 18 #include <time.h>

-1

samples/bpf/test_map_in_map_user.c

-1

samples/bpf/test_overhead_user.c

··· 16 16 #include <linux/bpf.h> 17 17 #include <string.h> 18 18 #include <time.h> 19 - #include <sys/resource.h> 20 19 #include <bpf/bpf.h> 21 20 #include <bpf/libbpf.h> 22 21

-1

samples/bpf/tracex2_user.c

··· 4 4 #include <stdlib.h> 5 5 #include <signal.h> 6 6 #include <string.h> 7 - #include <sys/resource.h> 8 7 9 8 #include <bpf/bpf.h> 10 9 #include <bpf/libbpf.h>

-1

samples/bpf/tracex3_user.c

··· 7 7 #include <unistd.h> 8 8 #include <stdbool.h> 9 9 #include <string.h> 10 - #include <sys/resource.h> 11 10 12 11 #include <bpf/bpf.h> 13 12 #include <bpf/libbpf.h>

-1

samples/bpf/tracex4_user.c

··· 8 8 #include <stdbool.h> 9 9 #include <string.h> 10 10 #include <time.h> 11 - #include <sys/resource.h> 12 11 13 12 #include <bpf/bpf.h> 14 13 #include <bpf/libbpf.h>

-1

samples/bpf/tracex5_user.c

··· 7 7 #include <sys/prctl.h> 8 8 #include <bpf/bpf.h> 9 9 #include <bpf/libbpf.h> 10 - #include <sys/resource.h> 11 10 #include "trace_helpers.h" 12 11 13 12 #ifdef __mips__

-1

samples/bpf/tracex6_user.c

··· 8 8 #include <stdio.h> 9 9 #include <stdlib.h> 10 10 #include <sys/ioctl.h> 11 - #include <sys/resource.h> 12 11 #include <sys/time.h> 13 12 #include <sys/types.h> 14 13 #include <sys/wait.h>

+1 -2

samples/bpf/xdp1_user.c

··· 11 11 #include <string.h> 12 12 #include <unistd.h> 13 13 #include <libgen.h> 14 - #include <sys/resource.h> 15 14 #include <net/if.h> 16 15 17 16 #include "bpf_util.h" ··· 160 161 } 161 162 prog_id = info.id; 162 163 163 - poll_stats(map_fd, 2); 164 + poll_stats(map_fd, 1); 164 165 165 166 return 0; 166 167 }

-1

samples/bpf/xdp_adjust_tail_user.c

··· 14 14 #include <stdlib.h> 15 15 #include <string.h> 16 16 #include <net/if.h> 17 - #include <sys/resource.h> 18 17 #include <arpa/inet.h> 19 18 #include <netinet/ether.h> 20 19 #include <unistd.h>

-1

samples/bpf/xdp_monitor_user.c

··· 17 17 #include <ctype.h> 18 18 #include <unistd.h> 19 19 #include <locale.h> 20 - #include <sys/resource.h> 21 20 #include <getopt.h> 22 21 #include <net/if.h> 23 22 #include <time.h>

-1

samples/bpf/xdp_redirect_cpu_user.c

··· 21 21 #include <string.h> 22 22 #include <unistd.h> 23 23 #include <locale.h> 24 - #include <sys/resource.h> 25 24 #include <sys/sysinfo.h> 26 25 #include <getopt.h> 27 26 #include <net/if.h>

-1

samples/bpf/xdp_redirect_map_multi_user.c

··· 15 15 #include <net/if.h> 16 16 #include <unistd.h> 17 17 #include <libgen.h> 18 - #include <sys/resource.h> 19 18 #include <sys/ioctl.h> 20 19 #include <sys/types.h> 21 20 #include <sys/socket.h>

-1

samples/bpf/xdp_redirect_user.c

··· 18 18 #include <unistd.h> 19 19 #include <libgen.h> 20 20 #include <getopt.h> 21 - #include <sys/resource.h> 22 21 #include <bpf/bpf.h> 23 22 #include <bpf/libbpf.h> 24 23 #include "bpf_util.h"

-1

samples/bpf/xdp_router_ipv4_user.c

··· 22 22 #include <sys/syscall.h> 23 23 #include "bpf_util.h" 24 24 #include <bpf/libbpf.h> 25 - #include <sys/resource.h> 26 25 #include <libgen.h> 27 26 #include <getopt.h> 28 27 #include <pthread.h>

-1

samples/bpf/xdp_rxq_info_user.c

··· 14 14 #include <string.h> 15 15 #include <unistd.h> 16 16 #include <locale.h> 17 - #include <sys/resource.h> 18 17 #include <getopt.h> 19 18 #include <net/if.h> 20 19 #include <time.h>

-1

samples/bpf/xdp_sample_pkts_user.c

··· 12 12 #include <signal.h> 13 13 #include <bpf/libbpf.h> 14 14 #include <bpf/bpf.h> 15 - #include <sys/resource.h> 16 15 #include <libgen.h> 17 16 #include <linux/if_link.h> 18 17

-1

samples/bpf/xdp_sample_user.c

··· 25 25 #include <string.h> 26 26 #include <sys/ioctl.h> 27 27 #include <sys/mman.h> 28 - #include <sys/resource.h> 29 28 #include <sys/signalfd.h> 30 29 #include <sys/sysinfo.h> 31 30 #include <sys/timerfd.h>

-1

samples/bpf/xdp_tx_iptunnel_user.c

··· 10 10 #include <stdlib.h> 11 11 #include <string.h> 12 12 #include <net/if.h> 13 - #include <sys/resource.h> 14 13 #include <arpa/inet.h> 15 14 #include <netinet/ether.h> 16 15 #include <unistd.h>

+2 -7

samples/bpf/xdpsock_user.c

··· 25 25 #include <string.h> 26 26 #include <sys/capability.h> 27 27 #include <sys/mman.h> 28 - #include <sys/resource.h> 29 28 #include <sys/socket.h> 30 29 #include <sys/types.h> 31 30 #include <sys/un.h> ··· 1885 1886 { 1886 1887 struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; 1887 1888 struct __user_cap_data_struct data[2] = { { 0 } }; 1888 - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 1889 1889 bool rx = false, tx = false; 1890 1890 struct sched_param schparam; 1891 1891 struct xsk_umem_info *umem; ··· 1915 1917 data[1].effective, data[1].inheritable, data[1].permitted); 1916 1918 } 1917 1919 } else { 1918 - if (setrlimit(RLIMIT_MEMLOCK, &r)) { 1919 - fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", 1920 - strerror(errno)); 1921 - exit(EXIT_FAILURE); 1922 - } 1920 + /* Use libbpf 1.0 API mode */ 1921 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 1923 1922 1924 1923 if (opt_num_xsks > 1) 1925 1924 load_xdp_program(argv, &obj);

+2 -5

samples/bpf/xsk_fwd.c

··· 10 10 #include <stdlib.h> 11 11 #include <string.h> 12 12 #include <sys/mman.h> 13 - #include <sys/resource.h> 14 13 #include <sys/socket.h> 15 14 #include <sys/types.h> 16 15 #include <time.h> ··· 130 131 bpool_init(struct bpool_params *params, 131 132 struct xsk_umem_config *umem_cfg) 132 133 { 133 - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 134 134 u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved; 135 135 u64 slabs_size, slabs_reserved_size; 136 136 u64 buffers_size, buffers_reserved_size; ··· 138 140 u8 *p; 139 141 int status; 140 142 141 - /* mmap prep. */ 142 - if (setrlimit(RLIMIT_MEMLOCK, &r)) 143 - return NULL; 143 + /* Use libbpf 1.0 API mode */ 144 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 144 145 145 146 /* bpool internals dimensioning. */ 146 147 n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /

-8

tools/bpf/bpftool/common.c

··· 17 17 #include <linux/magic.h> 18 18 #include <net/if.h> 19 19 #include <sys/mount.h> 20 - #include <sys/resource.h> 21 20 #include <sys/stat.h> 22 21 #include <sys/vfs.h> 23 22 ··· 116 117 return false; 117 118 118 119 return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; 119 - } 120 - 121 - void set_max_rlimit(void) 122 - { 123 - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; 124 - 125 - setrlimit(RLIMIT_MEMLOCK, &rinf); 126 120 } 127 121 128 122 static int

-2

tools/bpf/bpftool/feature.c

··· 1136 1136 __u32 ifindex = 0; 1137 1137 char *ifname; 1138 1138 1139 - set_max_rlimit(); 1140 - 1141 1139 while (argc) { 1142 1140 if (is_prefix(*argv, "kernel")) { 1143 1141 if (target != COMPONENT_UNSPEC) {

+3 -3

tools/bpf/bpftool/main.c

··· 507 507 * It will still be rejected if users use LIBBPF_STRICT_ALL 508 508 * mode for loading generated skeleton. 509 509 */ 510 - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); 511 - if (ret) 512 - p_err("failed to enable libbpf strict mode: %d", ret); 510 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS); 511 + } else { 512 + libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK); 513 513 } 514 514 515 515 argc -= optind;

-2

tools/bpf/bpftool/main.h

··· 102 102 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); 103 103 void usage(void) __noreturn; 104 104 105 - void set_max_rlimit(void); 106 - 107 105 int mount_tracefs(const char *target); 108 106 109 107 struct obj_ref {

-2

tools/bpf/bpftool/map.c

··· 1342 1342 goto exit; 1343 1343 } 1344 1344 1345 - set_max_rlimit(); 1346 - 1347 1345 fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); 1348 1346 if (fd < 0) { 1349 1347 p_err("map create failed: %s", strerror(errno));

+59 -57

tools/bpf/bpftool/perf.c

··· 11 11 #include <sys/stat.h> 12 12 #include <sys/types.h> 13 13 #include <unistd.h> 14 - #include <ftw.h> 14 + #include <dirent.h> 15 15 16 16 #include <bpf/bpf.h> 17 17 ··· 147 147 } 148 148 } 149 149 150 - static int show_proc(const char *fpath, const struct stat *sb, 151 - int tflag, struct FTW *ftwbuf) 150 + static int show_proc(void) 152 151 { 152 + struct dirent *proc_de, *pid_fd_de; 153 153 __u64 probe_offset, probe_addr; 154 154 __u32 len, prog_id, fd_type; 155 - int err, pid = 0, fd = 0; 155 + DIR *proc, *pid_fd; 156 + int err, pid, fd; 156 157 const char *pch; 157 158 char buf[4096]; 158 159 159 - /* prefix always /proc */ 160 - pch = fpath + 5; 161 - if (*pch == '\0') 162 - return 0; 160 + proc = opendir("/proc"); 161 + if (!proc) 162 + return -1; 163 163 164 - /* pid should be all numbers */ 165 - pch++; 166 - while (isdigit(*pch)) { 167 - pid = pid * 10 + *pch - '0'; 168 - pch++; 164 + while ((proc_de = readdir(proc))) { 165 + pid = 0; 166 + pch = proc_de->d_name; 167 + 168 + /* pid should be all numbers */ 169 + while (isdigit(*pch)) { 170 + pid = pid * 10 + *pch - '0'; 171 + pch++; 172 + } 173 + if (*pch != '\0') 174 + continue; 175 + 176 + err = snprintf(buf, sizeof(buf), "/proc/%s/fd", proc_de->d_name); 177 + if (err < 0 || err >= (int)sizeof(buf)) 178 + continue; 179 + 180 + pid_fd = opendir(buf); 181 + if (!pid_fd) 182 + continue; 183 + 184 + while ((pid_fd_de = readdir(pid_fd))) { 185 + fd = 0; 186 + pch = pid_fd_de->d_name; 187 + 188 + /* fd should be all numbers */ 189 + while (isdigit(*pch)) { 190 + fd = fd * 10 + *pch - '0'; 191 + pch++; 192 + } 193 + if (*pch != '\0') 194 + continue; 195 + 196 + /* query (pid, fd) for potential perf events */ 197 + len = sizeof(buf); 198 + err = bpf_task_fd_query(pid, fd, 0, buf, &len, 199 + &prog_id, &fd_type, 200 + &probe_offset, &probe_addr); 201 + if (err < 0) 202 + continue; 203 + 204 + if (json_output) 205 + print_perf_json(pid, fd, prog_id, fd_type, buf, 206 + probe_offset, probe_addr); 207 + else 208 + print_perf_plain(pid, fd, prog_id, fd_type, buf, 209 + probe_offset, probe_addr); 210 + } 211 + closedir(pid_fd); 169 212 } 170 - if (*pch == '\0') 171 - return 0; 172 - if (*pch != '/') 173 - return FTW_SKIP_SUBTREE; 174 - 175 - /* check /proc/<pid>/fd directory */ 176 - pch++; 177 - if (strncmp(pch, "fd", 2)) 178 - return FTW_SKIP_SUBTREE; 179 - pch += 2; 180 - if (*pch == '\0') 181 - return 0; 182 - if (*pch != '/') 183 - return FTW_SKIP_SUBTREE; 184 - 185 - /* check /proc/<pid>/fd/<fd_num> */ 186 - pch++; 187 - while (isdigit(*pch)) { 188 - fd = fd * 10 + *pch - '0'; 189 - pch++; 190 - } 191 - if (*pch != '\0') 192 - return FTW_SKIP_SUBTREE; 193 - 194 - /* query (pid, fd) for potential perf events */ 195 - len = sizeof(buf); 196 - err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type, 197 - &probe_offset, &probe_addr); 198 - if (err < 0) 199 - return 0; 200 - 201 - if (json_output) 202 - print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset, 203 - probe_addr); 204 - else 205 - print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset, 206 - probe_addr); 207 - 213 + closedir(proc); 208 214 return 0; 209 215 } 210 216 211 217 static int do_show(int argc, char **argv) 212 218 { 213 - int flags = FTW_ACTIONRETVAL | FTW_PHYS; 214 - int err = 0, nopenfd = 16; 219 + int err; 215 220 216 221 if (!has_perf_query_support()) 217 222 return -1; 218 223 219 224 if (json_output) 220 225 jsonw_start_array(json_wtr); 221 - if (nftw("/proc", show_proc, nopenfd, flags) == -1) { 222 - p_err("%s", strerror(errno)); 223 - err = -1; 224 - } 226 + err = show_proc(); 225 227 if (json_output) 226 228 jsonw_end_array(json_wtr); 227 229

-1

tools/bpf/bpftool/pids.c

··· 108 108 p_err("failed to create hashmap for PID references"); 109 109 return -1; 110 110 } 111 - set_max_rlimit(); 112 111 113 112 skel = pid_iter_bpf__open(); 114 113 if (!skel) {

-3

tools/bpf/bpftool/prog.c

··· 1604 1604 } 1605 1605 } 1606 1606 1607 - set_max_rlimit(); 1608 - 1609 1607 if (verifier_logs) 1610 1608 /* log_level1 + log_level2 + stats, but not stable UAPI */ 1611 1609 open_opts.kernel_log_level = 1 + 2 + 4; ··· 2301 2303 } 2302 2304 } 2303 2305 2304 - set_max_rlimit(); 2305 2306 err = profiler_bpf__load(profile_obj); 2306 2307 if (err) { 2307 2308 p_err("failed to load profile_obj");

-2

tools/bpf/bpftool/struct_ops.c

··· 501 501 if (libbpf_get_error(obj)) 502 502 return -1; 503 503 504 - set_max_rlimit(); 505 - 506 504 if (bpf_object__load(obj)) { 507 505 bpf_object__close(obj); 508 506 return -1;

+1 -1

tools/bpf/bpftool/tracelog.c

··· 9 9 #include <string.h> 10 10 #include <unistd.h> 11 11 #include <linux/magic.h> 12 - #include <sys/fcntl.h> 12 + #include <fcntl.h> 13 13 #include <sys/vfs.h> 14 14 15 15 #include "main.h"

+2 -16

tools/bpf/runqslower/runqslower.c

··· 4 4 #include <stdio.h> 5 5 #include <stdlib.h> 6 6 #include <string.h> 7 - #include <sys/resource.h> 8 7 #include <time.h> 9 8 #include <bpf/libbpf.h> 10 9 #include <bpf/bpf.h> ··· 87 88 return vfprintf(stderr, format, args); 88 89 } 89 90 90 - static int bump_memlock_rlimit(void) 91 - { 92 - struct rlimit rlim_new = { 93 - .rlim_cur = RLIM_INFINITY, 94 - .rlim_max = RLIM_INFINITY, 95 - }; 96 - 97 - return setrlimit(RLIMIT_MEMLOCK, &rlim_new); 98 - } 99 - 100 91 void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) 101 92 { 102 93 const struct runq_event *e = data; ··· 122 133 123 134 libbpf_set_print(libbpf_print_fn); 124 135 125 - err = bump_memlock_rlimit(); 126 - if (err) { 127 - fprintf(stderr, "failed to increase rlimit: %d", err); 128 - return 1; 129 - } 136 + /* Use libbpf 1.0 API mode */ 137 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 130 138 131 139 obj = runqslower_bpf__open(); 132 140 if (!obj) {

+2

tools/include/uapi/asm/bpf_perf_event.h

··· 1 1 #if defined(__aarch64__) 2 2 #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" 3 + #elif defined(__arc__) 4 + #include "../../arch/arc/include/uapi/asm/bpf_perf_event.h" 3 5 #elif defined(__s390__) 4 6 #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" 5 7 #elif defined(__riscv)

+12

tools/include/uapi/linux/bpf.h

··· 5143 5143 * The **hash_algo** is returned on success, 5144 5144 * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if 5145 5145 * invalid arguments are passed. 5146 + * 5147 + * void *bpf_kptr_xchg(void *map_value, void *ptr) 5148 + * Description 5149 + * Exchange kptr at pointer *map_value* with *ptr*, and return the 5150 + * old value. *ptr* can be NULL, otherwise it must be a referenced 5151 + * pointer which will be released when this helper is called. 5152 + * Return 5153 + * The old value of kptr (which can be NULL). The returned pointer 5154 + * if not NULL, is a reference which must be released using its 5155 + * corresponding release function, or moved into a BPF map before 5156 + * program exit. 5146 5157 */ 5147 5158 #define __BPF_FUNC_MAPPER(FN) \ 5148 5159 FN(unspec), \ ··· 5350 5339 FN(copy_from_user_task), \ 5351 5340 FN(skb_set_tstamp), \ 5352 5341 FN(ima_file_hash), \ 5342 + FN(kptr_xchg), \ 5353 5343 /* */ 5354 5344 5355 5345 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

+32 -2

tools/lib/bpf/bpf.c

··· 817 817 { 818 818 __u32 target_btf_id, iter_info_len; 819 819 union bpf_attr attr; 820 - int fd; 820 + int fd, err; 821 821 822 822 if (!OPTS_VALID(opts, bpf_link_create_opts)) 823 823 return libbpf_err(-EINVAL); ··· 870 870 } 871 871 proceed: 872 872 fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); 873 - return libbpf_err_errno(fd); 873 + if (fd >= 0) 874 + return fd; 875 + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry 876 + * and other similar programs 877 + */ 878 + err = -errno; 879 + if (err != -EINVAL) 880 + return libbpf_err(err); 881 + 882 + /* if user used features not supported by 883 + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately 884 + */ 885 + if (attr.link_create.target_fd || attr.link_create.target_btf_id) 886 + return libbpf_err(err); 887 + if (!OPTS_ZEROED(opts, sz)) 888 + return libbpf_err(err); 889 + 890 + /* otherwise, for few select kinds of programs that can be 891 + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as 892 + * a fallback for older kernels 893 + */ 894 + switch (attach_type) { 895 + case BPF_TRACE_RAW_TP: 896 + case BPF_LSM_MAC: 897 + case BPF_TRACE_FENTRY: 898 + case BPF_TRACE_FEXIT: 899 + case BPF_MODIFY_RETURN: 900 + return bpf_raw_tracepoint_open(NULL, prog_fd); 901 + default: 902 + return libbpf_err(err); 903 + } 874 904 } 875 905 876 906 int bpf_link_detach(int link_fd)

+7

tools/lib/bpf/bpf_helpers.h

··· 149 149 150 150 #define __kconfig __attribute__((section(".kconfig"))) 151 151 #define __ksym __attribute__((section(".ksyms"))) 152 + #if __has_attribute(btf_type_tag) 153 + #define __kptr __attribute__((btf_type_tag("kptr"))) 154 + #define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) 155 + #else 156 + #define __kptr 157 + #define __kptr_ref 158 + #endif 152 159 153 160 #ifndef ___bpf_concat 154 161 #define ___bpf_concat(a, b) a ## b

+23

tools/lib/bpf/bpf_tracing.h

··· 27 27 #elif defined(__TARGET_ARCH_riscv) 28 28 #define bpf_target_riscv 29 29 #define bpf_target_defined 30 + #elif defined(__TARGET_ARCH_arc) 31 + #define bpf_target_arc 32 + #define bpf_target_defined 30 33 #else 31 34 32 35 /* Fall back to what the compiler says */ ··· 56 53 #define bpf_target_defined 57 54 #elif defined(__riscv) && __riscv_xlen == 64 58 55 #define bpf_target_riscv 56 + #define bpf_target_defined 57 + #elif defined(__arc__) 58 + #define bpf_target_arc 59 59 #define bpf_target_defined 60 60 #endif /* no compiler target */ 61 61 ··· 237 231 #define __PT_SP_REG sp 238 232 #define __PT_IP_REG pc 239 233 /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ 234 + #define PT_REGS_SYSCALL_REGS(ctx) ctx 235 + 236 + #elif defined(bpf_target_arc) 237 + 238 + /* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ 239 + #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) 240 + #define __PT_PARM1_REG scratch.r0 241 + #define __PT_PARM2_REG scratch.r1 242 + #define __PT_PARM3_REG scratch.r2 243 + #define __PT_PARM4_REG scratch.r3 244 + #define __PT_PARM5_REG scratch.r4 245 + #define __PT_RET_REG scratch.blink 246 + #define __PT_FP_REG __unsupported__ 247 + #define __PT_RC_REG scratch.r0 248 + #define __PT_SP_REG scratch.sp 249 + #define __PT_IP_REG scratch.ret 250 + /* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ 240 251 #define PT_REGS_SYSCALL_REGS(ctx) ctx 241 252 242 253 #endif

+7 -2

tools/lib/bpf/btf.c

··· 2626 2626 const struct btf_ext_info_sec *sinfo; 2627 2627 struct btf_ext_info *ext_info; 2628 2628 __u32 info_left, record_size; 2629 + size_t sec_cnt = 0; 2629 2630 /* The start of the info sec (including the __u32 record_size). */ 2630 2631 void *info; 2631 2632 ··· 2690 2689 return -EINVAL; 2691 2690 } 2692 2691 2693 - total_record_size = sec_hdrlen + 2694 - (__u64)num_records * record_size; 2692 + total_record_size = sec_hdrlen + (__u64)num_records * record_size; 2695 2693 if (info_left < total_record_size) { 2696 2694 pr_debug("%s section has incorrect num_records in .BTF.ext\n", 2697 2695 ext_sec->desc); ··· 2699 2699 2700 2700 info_left -= total_record_size; 2701 2701 sinfo = (void *)sinfo + total_record_size; 2702 + sec_cnt++; 2702 2703 } 2703 2704 2704 2705 ext_info = ext_sec->ext_info; 2705 2706 ext_info->len = ext_sec->len - sizeof(__u32); 2706 2707 ext_info->rec_size = record_size; 2707 2708 ext_info->info = info + sizeof(__u32); 2709 + ext_info->sec_cnt = sec_cnt; 2708 2710 2709 2711 return 0; 2710 2712 } ··· 2790 2788 { 2791 2789 if (IS_ERR_OR_NULL(btf_ext)) 2792 2790 return; 2791 + free(btf_ext->func_info.sec_idxs); 2792 + free(btf_ext->line_info.sec_idxs); 2793 + free(btf_ext->core_relo_info.sec_idxs); 2793 2794 free(btf_ext->data); 2794 2795 free(btf_ext); 2795 2796 }

+250 -72

tools/lib/bpf/libbpf.c

··· 302 302 void *priv; 303 303 bpf_program_clear_priv_t clear_priv; 304 304 305 - bool load; 305 + bool autoload; 306 306 bool mark_btf_static; 307 307 enum bpf_prog_type type; 308 308 enum bpf_attach_type expected_attach_type; ··· 672 672 prog->insns_cnt = prog->sec_insn_cnt; 673 673 674 674 prog->type = BPF_PROG_TYPE_UNSPEC; 675 - prog->load = true; 675 + 676 + /* libbpf's convention for SEC("?abc...") is that it's just like 677 + * SEC("abc...") but the corresponding bpf_program starts out with 678 + * autoload set to false. 679 + */ 680 + if (sec_name[0] == '?') { 681 + prog->autoload = false; 682 + /* from now on forget there was ? in section name */ 683 + sec_name++; 684 + } else { 685 + prog->autoload = true; 686 + } 676 687 677 688 prog->instances.fds = NULL; 678 689 prog->instances.nr = -1; ··· 1233 1222 if (!obj->efile.elf) 1234 1223 return; 1235 1224 1236 - if (obj->efile.elf) { 1237 - elf_end(obj->efile.elf); 1238 - obj->efile.elf = NULL; 1239 - } 1225 + elf_end(obj->efile.elf); 1226 + obj->efile.elf = NULL; 1240 1227 obj->efile.symbols = NULL; 1241 1228 obj->efile.st_ops_data = NULL; 1242 1229 ··· 2765 2756 btf__set_pointer_size(obj->btf, 8); 2766 2757 } 2767 2758 if (btf_ext_data) { 2759 + struct btf_ext_info *ext_segs[3]; 2760 + int seg_num, sec_num; 2761 + 2768 2762 if (!obj->btf) { 2769 2763 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 2770 2764 BTF_EXT_ELF_SEC, BTF_ELF_SEC); ··· 2780 2768 BTF_EXT_ELF_SEC, err); 2781 2769 obj->btf_ext = NULL; 2782 2770 goto out; 2771 + } 2772 + 2773 + /* setup .BTF.ext to ELF section mapping */ 2774 + ext_segs[0] = &obj->btf_ext->func_info; 2775 + ext_segs[1] = &obj->btf_ext->line_info; 2776 + ext_segs[2] = &obj->btf_ext->core_relo_info; 2777 + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 2778 + struct btf_ext_info *seg = ext_segs[seg_num]; 2779 + const struct btf_ext_info_sec *sec; 2780 + const char *sec_name; 2781 + Elf_Scn *scn; 2782 + 2783 + if (seg->sec_cnt == 0) 2784 + continue; 2785 + 2786 + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 2787 + if (!seg->sec_idxs) { 2788 + err = -ENOMEM; 2789 + goto out; 2790 + } 2791 + 2792 + sec_num = 0; 2793 + for_each_btf_ext_sec(seg, sec) { 2794 + /* preventively increment index to avoid doing 2795 + * this before every continue below 2796 + */ 2797 + sec_num++; 2798 + 2799 + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 2800 + if (str_is_empty(sec_name)) 2801 + continue; 2802 + scn = elf_sec_by_name(obj, sec_name); 2803 + if (!scn) 2804 + continue; 2805 + 2806 + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 2807 + } 2783 2808 } 2784 2809 } 2785 2810 out: ··· 2976 2927 } 2977 2928 2978 2929 bpf_object__for_each_program(prog, obj) { 2979 - if (!prog->load) 2930 + if (!prog->autoload) 2980 2931 continue; 2981 2932 if (prog_needs_vmlinux_btf(prog)) 2982 2933 return true; ··· 4643 4594 }; 4644 4595 int fd, insn_cnt = ARRAY_SIZE(insns); 4645 4596 4646 - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); 4597 + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); 4647 4598 return probe_fd(fd); 4648 4599 } 4649 4600 ··· 5626 5577 return 0; 5627 5578 } 5628 5579 5580 + static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5581 + { 5582 + struct reloc_desc *relo; 5583 + int i; 5584 + 5585 + for (i = 0; i < prog->nr_reloc; i++) { 5586 + relo = &prog->reloc_desc[i]; 5587 + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5588 + continue; 5589 + 5590 + return relo->core_relo; 5591 + } 5592 + 5593 + return NULL; 5594 + } 5595 + 5629 5596 static int bpf_core_resolve_relo(struct bpf_program *prog, 5630 5597 const struct bpf_core_relo *relo, 5631 5598 int relo_idx, ··· 5698 5633 struct bpf_program *prog; 5699 5634 struct bpf_insn *insn; 5700 5635 const char *sec_name; 5701 - int i, err = 0, insn_idx, sec_idx; 5636 + int i, err = 0, insn_idx, sec_idx, sec_num; 5702 5637 5703 5638 if (obj->btf_ext->core_relo_info.len == 0) 5704 5639 return 0; ··· 5719 5654 } 5720 5655 5721 5656 seg = &obj->btf_ext->core_relo_info; 5657 + sec_num = 0; 5722 5658 for_each_btf_ext_sec(seg, sec) { 5659 + sec_idx = seg->sec_idxs[sec_num]; 5660 + sec_num++; 5661 + 5723 5662 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5724 5663 if (str_is_empty(sec_name)) { 5725 5664 err = -EINVAL; 5726 5665 goto out; 5727 5666 } 5728 - /* bpf_object's ELF is gone by now so it's not easy to find 5729 - * section index by section name, but we can find *any* 5730 - * bpf_program within desired section name and use it's 5731 - * prog->sec_idx to do a proper search by section index and 5732 - * instruction offset 5733 - */ 5734 - prog = NULL; 5735 - for (i = 0; i < obj->nr_programs; i++) { 5736 - prog = &obj->programs[i]; 5737 - if (strcmp(prog->sec_name, sec_name) == 0) 5738 - break; 5739 - } 5740 - if (!prog) { 5741 - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); 5742 - return -ENOENT; 5743 - } 5744 - sec_idx = prog->sec_idx; 5745 5667 5746 - pr_debug("sec '%s': found %d CO-RE relocations\n", 5747 - sec_name, sec->num_info); 5668 + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5748 5669 5749 5670 for_each_btf_ext_rec(seg, sec, i, rec) { 5750 5671 if (rec->insn_off % BPF_INSN_SZ) ··· 5753 5702 /* no need to apply CO-RE relocation if the program is 5754 5703 * not going to be loaded 5755 5704 */ 5756 - if (!prog->load) 5705 + if (!prog->autoload) 5757 5706 continue; 5758 5707 5759 5708 /* adjust insn_idx from section frame of reference to the local ··· 5765 5714 return -EINVAL; 5766 5715 insn = &prog->insns[insn_idx]; 5767 5716 5768 - if (prog->obj->gen_loader) { 5769 - err = record_relo_core(prog, rec, insn_idx); 5770 - if (err) { 5771 - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5772 - prog->name, i, err); 5773 - goto out; 5774 - } 5775 - continue; 5717 + err = record_relo_core(prog, rec, insn_idx); 5718 + if (err) { 5719 + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5720 + prog->name, i, err); 5721 + goto out; 5776 5722 } 5723 + 5724 + if (prog->obj->gen_loader) 5725 + continue; 5777 5726 5778 5727 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 5779 5728 if (err) { ··· 5914 5863 void *rec, *rec_end, *new_prog_info; 5915 5864 const struct btf_ext_info_sec *sec; 5916 5865 size_t old_sz, new_sz; 5917 - const char *sec_name; 5918 - int i, off_adj; 5866 + int i, sec_num, sec_idx, off_adj; 5919 5867 5868 + sec_num = 0; 5920 5869 for_each_btf_ext_sec(ext_info, sec) { 5921 - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5922 - if (!sec_name) 5923 - return -EINVAL; 5924 - if (strcmp(sec_name, prog->sec_name) != 0) 5870 + sec_idx = ext_info->sec_idxs[sec_num]; 5871 + sec_num++; 5872 + if (prog->sec_idx != sec_idx) 5925 5873 continue; 5926 5874 5927 5875 for_each_btf_ext_rec(ext_info, sec, i, rec) { ··· 6315 6265 if (err) 6316 6266 return err; 6317 6267 6318 - 6319 6268 return 0; 6320 6269 } 6321 6270 ··· 6375 6326 err); 6376 6327 return err; 6377 6328 } 6378 - if (obj->gen_loader) 6379 - bpf_object__sort_relos(obj); 6329 + bpf_object__sort_relos(obj); 6380 6330 } 6381 6331 6382 6332 /* Before relocating calls pre-process relocations and mark ··· 6411 6363 */ 6412 6364 if (prog_is_subprog(obj, prog)) 6413 6365 continue; 6414 - if (!prog->load) 6366 + if (!prog->autoload) 6415 6367 continue; 6416 6368 6417 6369 err = bpf_object__relocate_calls(obj, prog); ··· 6426 6378 prog = &obj->programs[i]; 6427 6379 if (prog_is_subprog(obj, prog)) 6428 6380 continue; 6429 - if (!prog->load) 6381 + if (!prog->autoload) 6430 6382 continue; 6431 6383 err = bpf_object__relocate_data(obj, prog); 6432 6384 if (err) { ··· 6435 6387 return err; 6436 6388 } 6437 6389 } 6438 - if (!obj->gen_loader) 6439 - bpf_object__free_relocs(obj); 6390 + 6440 6391 return 0; 6441 6392 } 6442 6393 ··· 6712 6665 return 0; 6713 6666 } 6714 6667 6668 + static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 6669 + 6715 6670 static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, 6716 6671 struct bpf_insn *insns, int insns_cnt, 6717 6672 const char *license, __u32 kern_version, ··· 6860 6811 goto retry_load; 6861 6812 6862 6813 ret = -errno; 6814 + 6815 + /* post-process verifier log to improve error descriptions */ 6816 + fixup_verifier_log(prog, log_buf, log_buf_size); 6817 + 6863 6818 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 6864 6819 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); 6865 6820 pr_perm_msg(ret); ··· 6872 6819 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 6873 6820 prog->name, log_buf); 6874 6821 } 6875 - if (insns_cnt >= BPF_MAXINSNS) { 6876 - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", 6877 - prog->name, insns_cnt, BPF_MAXINSNS); 6878 - } 6879 6822 6880 6823 out: 6881 6824 if (own_log_buf) 6882 6825 free(log_buf); 6883 6826 return ret; 6827 + } 6828 + 6829 + static char *find_prev_line(char *buf, char *cur) 6830 + { 6831 + char *p; 6832 + 6833 + if (cur == buf) /* end of a log buf */ 6834 + return NULL; 6835 + 6836 + p = cur - 1; 6837 + while (p - 1 >= buf && *(p - 1) != '\n') 6838 + p--; 6839 + 6840 + return p; 6841 + } 6842 + 6843 + static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 6844 + char *orig, size_t orig_sz, const char *patch) 6845 + { 6846 + /* size of the remaining log content to the right from the to-be-replaced part */ 6847 + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 6848 + size_t patch_sz = strlen(patch); 6849 + 6850 + if (patch_sz != orig_sz) { 6851 + /* If patch line(s) are longer than original piece of verifier log, 6852 + * shift log contents by (patch_sz - orig_sz) bytes to the right 6853 + * starting from after to-be-replaced part of the log. 6854 + * 6855 + * If patch line(s) are shorter than original piece of verifier log, 6856 + * shift log contents by (orig_sz - patch_sz) bytes to the left 6857 + * starting from after to-be-replaced part of the log 6858 + * 6859 + * We need to be careful about not overflowing available 6860 + * buf_sz capacity. If that's the case, we'll truncate the end 6861 + * of the original log, as necessary. 6862 + */ 6863 + if (patch_sz > orig_sz) { 6864 + if (orig + patch_sz >= buf + buf_sz) { 6865 + /* patch is big enough to cover remaining space completely */ 6866 + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 6867 + rem_sz = 0; 6868 + } else if (patch_sz - orig_sz > buf_sz - log_sz) { 6869 + /* patch causes part of remaining log to be truncated */ 6870 + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 6871 + } 6872 + } 6873 + /* shift remaining log to the right by calculated amount */ 6874 + memmove(orig + patch_sz, orig + orig_sz, rem_sz); 6875 + } 6876 + 6877 + memcpy(orig, patch, patch_sz); 6878 + } 6879 + 6880 + static void fixup_log_failed_core_relo(struct bpf_program *prog, 6881 + char *buf, size_t buf_sz, size_t log_sz, 6882 + char *line1, char *line2, char *line3) 6883 + { 6884 + /* Expected log for failed and not properly guarded CO-RE relocation: 6885 + * line1 -> 123: (85) call unknown#195896080 6886 + * line2 -> invalid func unknown#195896080 6887 + * line3 -> <anything else or end of buffer> 6888 + * 6889 + * "123" is the index of the instruction that was poisoned. We extract 6890 + * instruction index to find corresponding CO-RE relocation and 6891 + * replace this part of the log with more relevant information about 6892 + * failed CO-RE relocation. 6893 + */ 6894 + const struct bpf_core_relo *relo; 6895 + struct bpf_core_spec spec; 6896 + char patch[512], spec_buf[256]; 6897 + int insn_idx, err; 6898 + 6899 + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 6900 + return; 6901 + 6902 + relo = find_relo_core(prog, insn_idx); 6903 + if (!relo) 6904 + return; 6905 + 6906 + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 6907 + if (err) 6908 + return; 6909 + 6910 + bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 6911 + snprintf(patch, sizeof(patch), 6912 + "%d: <invalid CO-RE relocation>\n" 6913 + "failed to resolve CO-RE relocation %s\n", 6914 + insn_idx, spec_buf); 6915 + 6916 + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 6917 + } 6918 + 6919 + static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 6920 + { 6921 + /* look for familiar error patterns in last N lines of the log */ 6922 + const size_t max_last_line_cnt = 10; 6923 + char *prev_line, *cur_line, *next_line; 6924 + size_t log_sz; 6925 + int i; 6926 + 6927 + if (!buf) 6928 + return; 6929 + 6930 + log_sz = strlen(buf) + 1; 6931 + next_line = buf + log_sz - 1; 6932 + 6933 + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 6934 + cur_line = find_prev_line(buf, next_line); 6935 + if (!cur_line) 6936 + return; 6937 + 6938 + /* failed CO-RE relocation case */ 6939 + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 6940 + prev_line = find_prev_line(buf, cur_line); 6941 + if (!prev_line) 6942 + continue; 6943 + 6944 + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 6945 + prev_line, cur_line, next_line); 6946 + return; 6947 + } 6948 + } 6884 6949 } 6885 6950 6886 6951 static int bpf_program_record_relos(struct bpf_program *prog) ··· 7146 6975 prog = &obj->programs[i]; 7147 6976 if (prog_is_subprog(obj, prog)) 7148 6977 continue; 7149 - if (!prog->load) { 6978 + if (!prog->autoload) { 7150 6979 pr_debug("prog '%s': skipped loading\n", prog->name); 7151 6980 continue; 7152 6981 } ··· 7155 6984 if (err) 7156 6985 return err; 7157 6986 } 7158 - if (obj->gen_loader) 7159 - bpf_object__free_relocs(obj); 6987 + 6988 + bpf_object__free_relocs(obj); 7160 6989 return 0; 7161 6990 } 7162 6991 ··· 7176 7005 continue; 7177 7006 } 7178 7007 7179 - bpf_program__set_type(prog, prog->sec_def->prog_type); 7180 - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); 7008 + prog->type = prog->sec_def->prog_type; 7009 + prog->expected_attach_type = prog->sec_def->expected_attach_type; 7181 7010 7182 7011 #pragma GCC diagnostic push 7183 7012 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" ··· 8626 8455 8627 8456 bool bpf_program__autoload(const struct bpf_program *prog) 8628 8457 { 8629 - return prog->load; 8458 + return prog->autoload; 8630 8459 } 8631 8460 8632 8461 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) ··· 8634 8463 if (prog->obj->loaded) 8635 8464 return libbpf_err(-EINVAL); 8636 8465 8637 - prog->load = autoload; 8466 + prog->autoload = autoload; 8638 8467 return 0; 8639 8468 } 8640 8469 ··· 8722 8551 return prog->type; 8723 8552 } 8724 8553 8725 - void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 8554 + int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 8726 8555 { 8556 + if (prog->obj->loaded) 8557 + return libbpf_err(-EBUSY); 8558 + 8727 8559 prog->type = type; 8560 + return 0; 8728 8561 } 8729 8562 8730 8563 static bool bpf_program__is_type(const struct bpf_program *prog, ··· 8742 8567 { \ 8743 8568 if (!prog) \ 8744 8569 return libbpf_err(-EINVAL); \ 8745 - bpf_program__set_type(prog, TYPE); \ 8746 - return 0; \ 8570 + return bpf_program__set_type(prog, TYPE); \ 8747 8571 } \ 8748 8572 \ 8749 8573 bool bpf_program__is_##NAME(const struct bpf_program *prog) \ ··· 8772 8598 return prog->expected_attach_type; 8773 8599 } 8774 8600 8775 - void bpf_program__set_expected_attach_type(struct bpf_program *prog, 8601 + int bpf_program__set_expected_attach_type(struct bpf_program *prog, 8776 8602 enum bpf_attach_type type) 8777 8603 { 8604 + if (prog->obj->loaded) 8605 + return libbpf_err(-EBUSY); 8606 + 8778 8607 prog->expected_attach_type = type; 8608 + return 0; 8779 8609 } 8780 8610 8781 8611 __u32 bpf_program__flags(const struct bpf_program *prog) ··· 9849 9671 * bpf_object__open guessed 9850 9672 */ 9851 9673 if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { 9852 - bpf_program__set_type(prog, attr->prog_type); 9853 - bpf_program__set_expected_attach_type(prog, 9854 - attach_type); 9674 + prog->type = attr->prog_type; 9675 + prog->expected_attach_type = attach_type; 9855 9676 } 9856 9677 if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { 9857 9678 /* ··· 11159 10982 char resolved_path[512]; 11160 10983 struct bpf_object *obj = prog->obj; 11161 10984 struct bpf_link *link; 11162 - long usdt_cookie; 10985 + __u64 usdt_cookie; 11163 10986 int err; 11164 10987 11165 10988 if (!OPTS_VALID(opts, bpf_uprobe_opts)) ··· 11422 11245 return libbpf_err_ptr(-ENOMEM); 11423 11246 link->detach = &bpf_link__detach_fd; 11424 11247 11425 - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); 11248 + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 11249 + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); 11426 11250 if (pfd < 0) { 11427 11251 pfd = -errno; 11428 11252 free(link); ··· 11432 11254 return libbpf_err_ptr(pfd); 11433 11255 } 11434 11256 link->fd = pfd; 11435 - return (struct bpf_link *)link; 11257 + return link; 11436 11258 } 11437 11259 11438 11260 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) ··· 12843 12665 struct bpf_program *prog = *s->progs[i].prog; 12844 12666 struct bpf_link **link = s->progs[i].link; 12845 12667 12846 - if (!prog->load) 12668 + if (!prog->autoload) 12847 12669 continue; 12848 12670 12849 12671 /* auto-attaching not supported for this program */

+79 -3

tools/lib/bpf/libbpf.h

··· 378 378 LIBBPF_API struct bpf_link *bpf_link__open(const char *path); 379 379 LIBBPF_API int bpf_link__fd(const struct bpf_link *link); 380 380 LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); 381 + /** 382 + * @brief **bpf_link__pin()** pins the BPF link to a file 383 + * in the BPF FS specified by a path. This increments the links 384 + * reference count, allowing it to stay loaded after the process 385 + * which loaded it has exited. 386 + * 387 + * @param link BPF link to pin, must already be loaded 388 + * @param path file path in a BPF file system 389 + * @return 0, on success; negative error code, otherwise 390 + */ 391 + 381 392 LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); 393 + 394 + /** 395 + * @brief **bpf_link__unpin()** unpins the BPF link from a file 396 + * in the BPFFS specified by a path. This decrements the links 397 + * reference count. 398 + * 399 + * The file pinning the BPF link can also be unlinked by a different 400 + * process in which case this function will return an error. 401 + * 402 + * @param prog BPF program to unpin 403 + * @param path file path to the pin in a BPF file system 404 + * @return 0, on success; negative error code, otherwise 405 + */ 382 406 LIBBPF_API int bpf_link__unpin(struct bpf_link *link); 383 407 LIBBPF_API int bpf_link__update_program(struct bpf_link *link, 384 408 struct bpf_program *prog); ··· 410 386 LIBBPF_API int bpf_link__detach(struct bpf_link *link); 411 387 LIBBPF_API int bpf_link__destroy(struct bpf_link *link); 412 388 389 + /** 390 + * @brief **bpf_program__attach()** is a generic function for attaching 391 + * a BPF program based on auto-detection of program type, attach type, 392 + * and extra paremeters, where applicable. 393 + * 394 + * @param prog BPF program to attach 395 + * @return Reference to the newly created BPF link; or NULL is returned on error, 396 + * error code is stored in errno 397 + * 398 + * This is supported for: 399 + * - kprobe/kretprobe (depends on SEC() definition) 400 + * - uprobe/uretprobe (depends on SEC() definition) 401 + * - tracepoint 402 + * - raw tracepoint 403 + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) 404 + */ 413 405 LIBBPF_API struct bpf_link * 414 406 bpf_program__attach(const struct bpf_program *prog); 415 407 ··· 726 686 LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); 727 687 728 688 LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); 729 - LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, 730 - enum bpf_prog_type type); 689 + 690 + /** 691 + * @brief **bpf_program__set_type()** sets the program 692 + * type of the passed BPF program. 693 + * @param prog BPF program to set the program type for 694 + * @param type program type to set the BPF map to have 695 + * @return error code; or 0 if no error. An error occurs 696 + * if the object is already loaded. 697 + * 698 + * This must be called before the BPF object is loaded, 699 + * otherwise it has no effect and an error is returned. 700 + */ 701 + LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, 702 + enum bpf_prog_type type); 731 703 732 704 LIBBPF_API enum bpf_attach_type 733 705 bpf_program__expected_attach_type(const struct bpf_program *prog); 734 - LIBBPF_API void 706 + 707 + /** 708 + * @brief **bpf_program__set_expected_attach_type()** sets the 709 + * attach type of the passed BPF program. This is used for 710 + * auto-detection of attachment when programs are loaded. 711 + * @param prog BPF program to set the attach type for 712 + * @param type attach type to set the BPF map to have 713 + * @return error code; or 0 if no error. An error occurs 714 + * if the object is already loaded. 715 + * 716 + * This must be called before the BPF object is loaded, 717 + * otherwise it has no effect and an error is returned. 718 + */ 719 + LIBBPF_API int 735 720 bpf_program__set_expected_attach_type(struct bpf_program *prog, 736 721 enum bpf_attach_type type); 737 722 ··· 772 707 LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); 773 708 LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); 774 709 710 + /** 711 + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target 712 + * for supported BPF program types: 713 + * - BTF-aware raw tracepoints (tp_btf); 714 + * - fentry/fexit/fmod_ret; 715 + * - lsm; 716 + * - freplace. 717 + * @param prog BPF program to set the attach type for 718 + * @param type attach type to set the BPF map to have 719 + * @return error code; or 0 if no error occurred. 720 + */ 775 721 LIBBPF_API int 776 722 bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, 777 723 const char *attach_func_name);

+8 -1

tools/lib/bpf/libbpf_internal.h

··· 376 376 void *info; 377 377 __u32 rec_size; 378 378 __u32 len; 379 + /* optional (maintained internally by libbpf) mapping between .BTF.ext 380 + * section and corresponding ELF section. This is used to join 381 + * information like CO-RE relocation records with corresponding BPF 382 + * programs defined in ELF sections 383 + */ 384 + __u32 *sec_idxs; 385 + int sec_cnt; 379 386 }; 380 387 381 388 #define for_each_btf_ext_sec(seg, sec) \ ··· 578 571 const struct bpf_program *prog, 579 572 pid_t pid, const char *path, 580 573 const char *usdt_provider, const char *usdt_name, 581 - long usdt_cookie); 574 + __u64 usdt_cookie); 582 575 583 576 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */

+56 -48

tools/lib/bpf/relo_core.c

··· 178 178 * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access 179 179 * string to specify enumerator's value index that need to be relocated. 180 180 */ 181 - static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, 182 - __u32 type_id, 183 - const char *spec_str, 184 - enum bpf_core_relo_kind relo_kind, 185 - struct bpf_core_spec *spec) 181 + int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, 182 + const struct bpf_core_relo *relo, 183 + struct bpf_core_spec *spec) 186 184 { 187 185 int access_idx, parsed_len, i; 188 186 struct bpf_core_accessor *acc; 189 187 const struct btf_type *t; 190 - const char *name; 188 + const char *name, *spec_str; 191 189 __u32 id; 192 190 __s64 sz; 193 191 192 + spec_str = btf__name_by_offset(btf, relo->access_str_off); 194 193 if (str_is_empty(spec_str) || *spec_str == ':') 195 194 return -EINVAL; 196 195 197 196 memset(spec, 0, sizeof(*spec)); 198 197 spec->btf = btf; 199 - spec->root_type_id = type_id; 200 - spec->relo_kind = relo_kind; 198 + spec->root_type_id = relo->type_id; 199 + spec->relo_kind = relo->kind; 201 200 202 201 /* type-based relocations don't have a field access string */ 203 - if (core_relo_is_type_based(relo_kind)) { 202 + if (core_relo_is_type_based(relo->kind)) { 204 203 if (strcmp(spec_str, "0")) 205 204 return -EINVAL; 206 205 return 0; ··· 220 221 if (spec->raw_len == 0) 221 222 return -EINVAL; 222 223 223 - t = skip_mods_and_typedefs(btf, type_id, &id); 224 + t = skip_mods_and_typedefs(btf, relo->type_id, &id); 224 225 if (!t) 225 226 return -EINVAL; 226 227 ··· 230 231 acc->idx = access_idx; 231 232 spec->len++; 232 233 233 - if (core_relo_is_enumval_based(relo_kind)) { 234 + if (core_relo_is_enumval_based(relo->kind)) { 234 235 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) 235 236 return -EINVAL; 236 237 ··· 239 240 return 0; 240 241 } 241 242 242 - if (!core_relo_is_field_based(relo_kind)) 243 + if (!core_relo_is_field_based(relo->kind)) 243 244 return -EINVAL; 244 245 245 246 sz = btf__resolve_size(btf, id); ··· 300 301 spec->bit_offset += access_idx * sz * 8; 301 302 } else { 302 303 pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", 303 - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); 304 + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); 304 305 return -EINVAL; 305 306 } 306 307 } ··· 1054 1055 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, 1055 1056 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b 1056 1057 */ 1057 - static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) 1058 + int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) 1058 1059 { 1059 1060 const struct btf_type *t; 1060 1061 const struct btf_enum *e; 1061 1062 const char *s; 1062 1063 __u32 type_id; 1063 - int i; 1064 + int i, len = 0; 1065 + 1066 + #define append_buf(fmt, args...) \ 1067 + ({ \ 1068 + int r; \ 1069 + r = snprintf(buf, buf_sz, fmt, ##args); \ 1070 + len += r; \ 1071 + if (r >= buf_sz) \ 1072 + r = buf_sz; \ 1073 + buf += r; \ 1074 + buf_sz -= r; \ 1075 + }) 1064 1076 1065 1077 type_id = spec->root_type_id; 1066 1078 t = btf_type_by_id(spec->btf, type_id); 1067 1079 s = btf__name_by_offset(spec->btf, t->name_off); 1068 1080 1069 - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); 1081 + append_buf("<%s> [%u] %s %s", 1082 + core_relo_kind_str(spec->relo_kind), 1083 + type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); 1070 1084 1071 1085 if (core_relo_is_type_based(spec->relo_kind)) 1072 - return; 1086 + return len; 1073 1087 1074 1088 if (core_relo_is_enumval_based(spec->relo_kind)) { 1075 1089 t = skip_mods_and_typedefs(spec->btf, type_id, NULL); 1076 1090 e = btf_enum(t) + spec->raw_spec[0]; 1077 1091 s = btf__name_by_offset(spec->btf, e->name_off); 1078 1092 1079 - libbpf_print(level, "::%s = %u", s, e->val); 1080 - return; 1093 + append_buf("::%s = %u", s, e->val); 1094 + return len; 1081 1095 } 1082 1096 1083 1097 if (core_relo_is_field_based(spec->relo_kind)) { 1084 1098 for (i = 0; i < spec->len; i++) { 1085 1099 if (spec->spec[i].name) 1086 - libbpf_print(level, ".%s", spec->spec[i].name); 1100 + append_buf(".%s", spec->spec[i].name); 1087 1101 else if (i > 0 || spec->spec[i].idx > 0) 1088 - libbpf_print(level, "[%u]", spec->spec[i].idx); 1102 + append_buf("[%u]", spec->spec[i].idx); 1089 1103 } 1090 1104 1091 - libbpf_print(level, " ("); 1105 + append_buf(" ("); 1092 1106 for (i = 0; i < spec->raw_len; i++) 1093 - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); 1107 + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); 1094 1108 1095 1109 if (spec->bit_offset % 8) 1096 - libbpf_print(level, " @ offset %u.%u)", 1097 - spec->bit_offset / 8, spec->bit_offset % 8); 1110 + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); 1098 1111 else 1099 - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); 1100 - return; 1112 + append_buf(" @ offset %u)", spec->bit_offset / 8); 1113 + return len; 1101 1114 } 1115 + 1116 + return len; 1117 + #undef append_buf 1102 1118 } 1103 1119 1104 1120 /* ··· 1181 1167 const struct btf_type *local_type; 1182 1168 const char *local_name; 1183 1169 __u32 local_id; 1184 - const char *spec_str; 1170 + char spec_buf[256]; 1185 1171 int i, j, err; 1186 1172 1187 1173 local_id = relo->type_id; ··· 1190 1176 if (!local_name) 1191 1177 return -EINVAL; 1192 1178 1193 - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); 1194 - if (str_is_empty(spec_str)) 1195 - return -EINVAL; 1196 - 1197 - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, 1198 - relo->kind, local_spec); 1179 + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); 1199 1180 if (err) { 1181 + const char *spec_str; 1182 + 1183 + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); 1200 1184 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", 1201 1185 prog_name, relo_idx, local_id, btf_kind_str(local_type), 1202 1186 str_is_empty(local_name) ? "<anon>" : local_name, 1203 - spec_str, err); 1187 + spec_str ?: "<?>", err); 1204 1188 return -EINVAL; 1205 1189 } 1206 1190 1207 - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, 1208 - relo_idx, core_relo_kind_str(relo->kind), relo->kind); 1209 - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); 1210 - libbpf_print(LIBBPF_DEBUG, "\n"); 1191 + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); 1192 + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); 1211 1193 1212 1194 /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ 1213 1195 if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { ··· 1217 1207 } 1218 1208 1219 1209 /* libbpf doesn't support candidate search for anonymous types */ 1220 - if (str_is_empty(spec_str)) { 1210 + if (str_is_empty(local_name)) { 1221 1211 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", 1222 1212 prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); 1223 1213 return -EOPNOTSUPP; ··· 1227 1217 err = bpf_core_spec_match(local_spec, cands->cands[i].btf, 1228 1218 cands->cands[i].id, cand_spec); 1229 1219 if (err < 0) { 1230 - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", 1231 - prog_name, relo_idx, i); 1232 - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); 1233 - libbpf_print(LIBBPF_WARN, ": %d\n", err); 1220 + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); 1221 + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", 1222 + prog_name, relo_idx, i, spec_buf, err); 1234 1223 return err; 1235 1224 } 1236 1225 1237 - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, 1238 - relo_idx, err == 0 ? "non-matching" : "matching", i); 1239 - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); 1240 - libbpf_print(LIBBPF_DEBUG, "\n"); 1226 + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); 1227 + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, 1228 + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); 1241 1229 1242 1230 if (err == 0) 1243 1231 continue;

+6

tools/lib/bpf/relo_core.h

··· 84 84 int insn_idx, const struct bpf_core_relo *relo, 85 85 int relo_idx, const struct bpf_core_relo_res *res); 86 86 87 + int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, 88 + const struct bpf_core_relo *relo, 89 + struct bpf_core_spec *spec); 90 + 91 + int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); 92 + 87 93 #endif

+187 -4

tools/lib/bpf/usdt.c

··· 10 10 #include <linux/ptrace.h> 11 11 #include <linux/kernel.h> 12 12 13 + /* s8 will be marked as poison while it's a reg of riscv */ 14 + #if defined(__riscv) 15 + #define rv_s8 s8 16 + #endif 17 + 13 18 #include "bpf.h" 14 19 #include "libbpf.h" 15 20 #include "libbpf_common.h" ··· 562 557 GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, 563 558 struct usdt_note *usdt_note); 564 559 565 - static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie); 560 + static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); 566 561 567 562 static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, 568 - const char *usdt_provider, const char *usdt_name, long usdt_cookie, 563 + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, 569 564 struct usdt_target **out_targets, size_t *out_target_cnt) 570 565 { 571 566 size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; ··· 944 939 struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, 945 940 pid_t pid, const char *path, 946 941 const char *usdt_provider, const char *usdt_name, 947 - long usdt_cookie) 942 + __u64 usdt_cookie) 948 943 { 949 944 int i, fd, err, spec_map_fd, ip_map_fd; 950 945 LIBBPF_OPTS(bpf_uprobe_opts, opts); ··· 1146 1141 1147 1142 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); 1148 1143 1149 - static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie) 1144 + static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) 1150 1145 { 1151 1146 const char *s; 1152 1147 int len; ··· 1307 1302 arg->arg_type = USDT_ARG_CONST; 1308 1303 arg->val_off = off; 1309 1304 arg->reg_off = 0; 1305 + } else { 1306 + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); 1307 + return -EINVAL; 1308 + } 1309 + 1310 + arg->arg_signed = arg_sz < 0; 1311 + if (arg_sz < 0) 1312 + arg_sz = -arg_sz; 1313 + 1314 + switch (arg_sz) { 1315 + case 1: case 2: case 4: case 8: 1316 + arg->arg_bitshift = 64 - arg_sz * 8; 1317 + break; 1318 + default: 1319 + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", 1320 + arg_num, arg_str, arg_sz); 1321 + return -EINVAL; 1322 + } 1323 + 1324 + return len; 1325 + } 1326 + 1327 + #elif defined(__aarch64__) 1328 + 1329 + static int calc_pt_regs_off(const char *reg_name) 1330 + { 1331 + int reg_num; 1332 + 1333 + if (sscanf(reg_name, "x%d", &reg_num) == 1) { 1334 + if (reg_num >= 0 && reg_num < 31) 1335 + return offsetof(struct user_pt_regs, regs[reg_num]); 1336 + } else if (strcmp(reg_name, "sp") == 0) { 1337 + return offsetof(struct user_pt_regs, sp); 1338 + } 1339 + pr_warn("usdt: unrecognized register '%s'\n", reg_name); 1340 + return -ENOENT; 1341 + } 1342 + 1343 + static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) 1344 + { 1345 + char *reg_name = NULL; 1346 + int arg_sz, len, reg_off; 1347 + long off; 1348 + 1349 + if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, &reg_name, &off, &len) == 3) { 1350 + /* Memory dereference case, e.g., -4@[sp, 96] */ 1351 + arg->arg_type = USDT_ARG_REG_DEREF; 1352 + arg->val_off = off; 1353 + reg_off = calc_pt_regs_off(reg_name); 1354 + free(reg_name); 1355 + if (reg_off < 0) 1356 + return reg_off; 1357 + arg->reg_off = reg_off; 1358 + } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, &reg_name, &len) == 2) { 1359 + /* Memory dereference case, e.g., -4@[sp] */ 1360 + arg->arg_type = USDT_ARG_REG_DEREF; 1361 + arg->val_off = 0; 1362 + reg_off = calc_pt_regs_off(reg_name); 1363 + free(reg_name); 1364 + if (reg_off < 0) 1365 + return reg_off; 1366 + arg->reg_off = reg_off; 1367 + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { 1368 + /* Constant value case, e.g., 4@5 */ 1369 + arg->arg_type = USDT_ARG_CONST; 1370 + arg->val_off = off; 1371 + arg->reg_off = 0; 1372 + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) { 1373 + /* Register read case, e.g., -8@x4 */ 1374 + arg->arg_type = USDT_ARG_REG; 1375 + arg->val_off = 0; 1376 + reg_off = calc_pt_regs_off(reg_name); 1377 + free(reg_name); 1378 + if (reg_off < 0) 1379 + return reg_off; 1380 + arg->reg_off = reg_off; 1381 + } else { 1382 + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); 1383 + return -EINVAL; 1384 + } 1385 + 1386 + arg->arg_signed = arg_sz < 0; 1387 + if (arg_sz < 0) 1388 + arg_sz = -arg_sz; 1389 + 1390 + switch (arg_sz) { 1391 + case 1: case 2: case 4: case 8: 1392 + arg->arg_bitshift = 64 - arg_sz * 8; 1393 + break; 1394 + default: 1395 + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", 1396 + arg_num, arg_str, arg_sz); 1397 + return -EINVAL; 1398 + } 1399 + 1400 + return len; 1401 + } 1402 + 1403 + #elif defined(__riscv) 1404 + 1405 + static int calc_pt_regs_off(const char *reg_name) 1406 + { 1407 + static struct { 1408 + const char *name; 1409 + size_t pt_regs_off; 1410 + } reg_map[] = { 1411 + { "ra", offsetof(struct user_regs_struct, ra) }, 1412 + { "sp", offsetof(struct user_regs_struct, sp) }, 1413 + { "gp", offsetof(struct user_regs_struct, gp) }, 1414 + { "tp", offsetof(struct user_regs_struct, tp) }, 1415 + { "a0", offsetof(struct user_regs_struct, a0) }, 1416 + { "a1", offsetof(struct user_regs_struct, a1) }, 1417 + { "a2", offsetof(struct user_regs_struct, a2) }, 1418 + { "a3", offsetof(struct user_regs_struct, a3) }, 1419 + { "a4", offsetof(struct user_regs_struct, a4) }, 1420 + { "a5", offsetof(struct user_regs_struct, a5) }, 1421 + { "a6", offsetof(struct user_regs_struct, a6) }, 1422 + { "a7", offsetof(struct user_regs_struct, a7) }, 1423 + { "s0", offsetof(struct user_regs_struct, s0) }, 1424 + { "s1", offsetof(struct user_regs_struct, s1) }, 1425 + { "s2", offsetof(struct user_regs_struct, s2) }, 1426 + { "s3", offsetof(struct user_regs_struct, s3) }, 1427 + { "s4", offsetof(struct user_regs_struct, s4) }, 1428 + { "s5", offsetof(struct user_regs_struct, s5) }, 1429 + { "s6", offsetof(struct user_regs_struct, s6) }, 1430 + { "s7", offsetof(struct user_regs_struct, s7) }, 1431 + { "s8", offsetof(struct user_regs_struct, rv_s8) }, 1432 + { "s9", offsetof(struct user_regs_struct, s9) }, 1433 + { "s10", offsetof(struct user_regs_struct, s10) }, 1434 + { "s11", offsetof(struct user_regs_struct, s11) }, 1435 + { "t0", offsetof(struct user_regs_struct, t0) }, 1436 + { "t1", offsetof(struct user_regs_struct, t1) }, 1437 + { "t2", offsetof(struct user_regs_struct, t2) }, 1438 + { "t3", offsetof(struct user_regs_struct, t3) }, 1439 + { "t4", offsetof(struct user_regs_struct, t4) }, 1440 + { "t5", offsetof(struct user_regs_struct, t5) }, 1441 + { "t6", offsetof(struct user_regs_struct, t6) }, 1442 + }; 1443 + int i; 1444 + 1445 + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { 1446 + if (strcmp(reg_name, reg_map[i].name) == 0) 1447 + return reg_map[i].pt_regs_off; 1448 + } 1449 + 1450 + pr_warn("usdt: unrecognized register '%s'\n", reg_name); 1451 + return -ENOENT; 1452 + } 1453 + 1454 + static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) 1455 + { 1456 + char *reg_name = NULL; 1457 + int arg_sz, len, reg_off; 1458 + long off; 1459 + 1460 + if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, &reg_name, &len) == 3) { 1461 + /* Memory dereference case, e.g., -8@-88(s0) */ 1462 + arg->arg_type = USDT_ARG_REG_DEREF; 1463 + arg->val_off = off; 1464 + reg_off = calc_pt_regs_off(reg_name); 1465 + free(reg_name); 1466 + if (reg_off < 0) 1467 + return reg_off; 1468 + arg->reg_off = reg_off; 1469 + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { 1470 + /* Constant value case, e.g., 4@5 */ 1471 + arg->arg_type = USDT_ARG_CONST; 1472 + arg->val_off = off; 1473 + arg->reg_off = 0; 1474 + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) { 1475 + /* Register read case, e.g., -8@a1 */ 1476 + arg->arg_type = USDT_ARG_REG; 1477 + arg->val_off = 0; 1478 + reg_off = calc_pt_regs_off(reg_name); 1479 + free(reg_name); 1480 + if (reg_off < 0) 1481 + return reg_off; 1482 + arg->reg_off = reg_off; 1310 1483 } else { 1311 1484 pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); 1312 1485 return -EINVAL;

-1

tools/testing/selftests/bpf/bench.c

··· 8 8 #include <fcntl.h> 9 9 #include <pthread.h> 10 10 #include <sys/sysinfo.h> 11 - #include <sys/resource.h> 12 11 #include <signal.h> 13 12 #include "bench.h" 14 13 #include "testing_helpers.h"

-28

tools/testing/selftests/bpf/bpf_rlimit.h

··· 1 - #include <sys/resource.h> 2 - #include <stdio.h> 3 - 4 - static __attribute__((constructor)) void bpf_rlimit_ctor(void) 5 - { 6 - struct rlimit rlim_old, rlim_new = { 7 - .rlim_cur = RLIM_INFINITY, 8 - .rlim_max = RLIM_INFINITY, 9 - }; 10 - 11 - getrlimit(RLIMIT_MEMLOCK, &rlim_old); 12 - /* For the sake of running the test cases, we temporarily 13 - * set rlimit to infinity in order for kernel to focus on 14 - * errors from actual test cases and not getting noise 15 - * from hitting memlock limits. The limit is on per-process 16 - * basis and not a global one, hence destructor not really 17 - * needed here. 18 - */ 19 - if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) { 20 - perror("Unable to lift memlock rlimit"); 21 - /* Trying out lower limit, but expect potential test 22 - * case failures from this! 23 - */ 24 - rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); 25 - rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); 26 - setrlimit(RLIMIT_MEMLOCK, &rlim_new); 27 - } 28 - }

+2 -4

tools/testing/selftests/bpf/flow_dissector_load.c

··· 11 11 #include <bpf/bpf.h> 12 12 #include <bpf/libbpf.h> 13 13 14 - #include "bpf_rlimit.h" 15 14 #include "flow_dissector_load.h" 16 15 17 16 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; ··· 24 25 int prog_fd, ret; 25 26 struct bpf_object *obj; 26 27 27 - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 28 - if (ret) 29 - error(1, 0, "failed to enable libbpf strict mode: %d", ret); 28 + /* Use libbpf 1.0 API mode */ 29 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 30 30 31 31 ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, 32 32 cfg_map_name, NULL, &prog_fd, NULL);

+3 -1

tools/testing/selftests/bpf/get_cgroup_id_user.c

··· 20 20 21 21 #include "cgroup_helpers.h" 22 22 #include "testing_helpers.h" 23 - #include "bpf_rlimit.h" 24 23 25 24 #define CHECK(condition, tag, format...) ({ \ 26 25 int __ret = !!(condition); \ ··· 65 66 cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); 66 67 if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) 67 68 return 1; 69 + 70 + /* Use libbpf 1.0 API mode */ 71 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 68 72 69 73 err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); 70 74 if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))

+107

tools/testing/selftests/bpf/prog_tests/arg_parsing.c

··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + 3 + #include "test_progs.h" 4 + #include "testing_helpers.h" 5 + 6 + static void init_test_filter_set(struct test_filter_set *set) 7 + { 8 + set->cnt = 0; 9 + set->tests = NULL; 10 + } 11 + 12 + static void free_test_filter_set(struct test_filter_set *set) 13 + { 14 + int i, j; 15 + 16 + for (i = 0; i < set->cnt; i++) { 17 + for (j = 0; j < set->tests[i].subtest_cnt; j++) 18 + free((void *)set->tests[i].subtests[j]); 19 + free(set->tests[i].subtests); 20 + free(set->tests[i].name); 21 + } 22 + 23 + free(set->tests); 24 + init_test_filter_set(set); 25 + } 26 + 27 + static void test_parse_test_list(void) 28 + { 29 + struct test_filter_set set; 30 + 31 + init_test_filter_set(&set); 32 + 33 + ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing"); 34 + if (!ASSERT_EQ(set.cnt, 1, "test filters count")) 35 + goto error; 36 + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) 37 + goto error; 38 + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); 39 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name"); 40 + free_test_filter_set(&set); 41 + 42 + ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true), 43 + "parsing"); 44 + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) 45 + goto error; 46 + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) 47 + goto error; 48 + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count"); 49 + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); 50 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); 51 + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); 52 + free_test_filter_set(&set); 53 + 54 + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie", 55 + &set, 56 + true), 57 + "parsing"); 58 + if (!ASSERT_EQ(set.cnt, 2, "count of test filters")) 59 + goto error; 60 + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) 61 + goto error; 62 + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) 63 + goto error; 64 + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); 65 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); 66 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), 67 + "subtest name"); 68 + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); 69 + free_test_filter_set(&set); 70 + 71 + ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true), 72 + "parsing"); 73 + ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing"); 74 + ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing"); 75 + if (!ASSERT_EQ(set.cnt, 3, "count of test filters")) 76 + goto error; 77 + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) 78 + goto error; 79 + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) 80 + goto error; 81 + ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count"); 82 + ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count"); 83 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name"); 84 + ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]), 85 + "subtest name"); 86 + ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name"); 87 + ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name"); 88 + free_test_filter_set(&set); 89 + 90 + ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing"); 91 + if (!ASSERT_EQ(set.cnt, 1, "count of test filters")) 92 + goto error; 93 + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) 94 + goto error; 95 + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count")) 96 + goto error; 97 + ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name"); 98 + ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name"); 99 + error: 100 + free_test_filter_set(&set); 101 + } 102 + 103 + void test_arg_parsing(void) 104 + { 105 + if (test__start_subtest("test_parse_test_list")) 106 + test_parse_test_list(); 107 + }

+1 -3

tools/testing/selftests/bpf/prog_tests/bpf_iter.c

··· 1192 1192 *dst = '\0'; 1193 1193 } 1194 1194 1195 - #define min(a, b) ((a) < (b) ? (a) : (b)) 1196 - 1197 1195 static void test_task_vma(void) 1198 1196 { 1199 1197 int err, iter_fd = -1, proc_maps_fd = -1; ··· 1227 1229 len = 0; 1228 1230 while (len < CMP_BUFFER_SIZE) { 1229 1231 err = read_fd_into_buffer(iter_fd, task_vma_output + len, 1230 - min(read_size, CMP_BUFFER_SIZE - len)); 1232 + MIN(read_size, CMP_BUFFER_SIZE - len)); 1231 1233 if (!err) 1232 1234 break; 1233 1235 if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))

+2 -2

tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c

··· 36 36 void (*bpf_destroy)(void *); 37 37 }; 38 38 39 - enum test_state { 39 + enum bpf_test_state { 40 40 _TS_INVALID, 41 41 TS_MODULE_LOAD, 42 42 TS_MODULE_LOAD_FAIL, 43 43 }; 44 44 45 - static _Atomic enum test_state state = _TS_INVALID; 45 + static _Atomic enum bpf_test_state state = _TS_INVALID; 46 46 47 47 static int sys_finit_module(int fd, const char *param_values, int flags) 48 48 {

+2 -4

tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

··· 10 10 #include "bpf_tcp_nogpl.skel.h" 11 11 #include "bpf_dctcp_release.skel.h" 12 12 13 - #define min(a, b) ((a) < (b) ? (a) : (b)) 14 - 15 13 #ifndef ENOTSUPP 16 14 #define ENOTSUPP 524 17 15 #endif ··· 51 53 52 54 while (bytes < total_bytes && !READ_ONCE(stop)) { 53 55 nr_sent = send(fd, &batch, 54 - min(total_bytes - bytes, sizeof(batch)), 0); 56 + MIN(total_bytes - bytes, sizeof(batch)), 0); 55 57 if (nr_sent == -1 && errno == EINTR) 56 58 continue; 57 59 if (nr_sent == -1) { ··· 144 146 /* recv total_bytes */ 145 147 while (bytes < total_bytes && !READ_ONCE(stop)) { 146 148 nr_recv = recv(fd, &batch, 147 - min(total_bytes - bytes, sizeof(batch)), 0); 149 + MIN(total_bytes - bytes, sizeof(batch)), 0); 148 150 if (nr_recv == -1 && errno == EINTR) 149 151 continue; 150 152 if (nr_recv == -1)

+99 -1

tools/testing/selftests/bpf/prog_tests/btf.c

··· 8 8 #include <linux/filter.h> 9 9 #include <linux/unistd.h> 10 10 #include <bpf/bpf.h> 11 - #include <sys/resource.h> 12 11 #include <libelf.h> 13 12 #include <gelf.h> 14 13 #include <string.h> ··· 3972 3973 .key_type_id = 1, 3973 3974 .value_type_id = 1, 3974 3975 .max_entries = 1, 3976 + }, 3977 + { 3978 + .descr = "type_tag test #2, type tag order", 3979 + .raw_types = { 3980 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 3981 + BTF_CONST_ENC(3), /* [2] */ 3982 + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */ 3983 + BTF_END_RAW, 3984 + }, 3985 + BTF_STR_SEC("\0tag"), 3986 + .map_type = BPF_MAP_TYPE_ARRAY, 3987 + .map_name = "tag_type_check_btf", 3988 + .key_size = sizeof(int), 3989 + .value_size = 4, 3990 + .key_type_id = 1, 3991 + .value_type_id = 1, 3992 + .max_entries = 1, 3993 + .btf_load_err = true, 3994 + .err_str = "Type tags don't precede modifiers", 3995 + }, 3996 + { 3997 + .descr = "type_tag test #3, type tag order", 3998 + .raw_types = { 3999 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 4000 + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ 4001 + BTF_CONST_ENC(4), /* [3] */ 4002 + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ 4003 + BTF_END_RAW, 4004 + }, 4005 + BTF_STR_SEC("\0tag\0tag"), 4006 + .map_type = BPF_MAP_TYPE_ARRAY, 4007 + .map_name = "tag_type_check_btf", 4008 + .key_size = sizeof(int), 4009 + .value_size = 4, 4010 + .key_type_id = 1, 4011 + .value_type_id = 1, 4012 + .max_entries = 1, 4013 + .btf_load_err = true, 4014 + .err_str = "Type tags don't precede modifiers", 4015 + }, 4016 + { 4017 + .descr = "type_tag test #4, type tag order", 4018 + .raw_types = { 4019 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 4020 + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */ 4021 + BTF_CONST_ENC(4), /* [3] */ 4022 + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */ 4023 + BTF_END_RAW, 4024 + }, 4025 + BTF_STR_SEC("\0tag\0tag"), 4026 + .map_type = BPF_MAP_TYPE_ARRAY, 4027 + .map_name = "tag_type_check_btf", 4028 + .key_size = sizeof(int), 4029 + .value_size = 4, 4030 + .key_type_id = 1, 4031 + .value_type_id = 1, 4032 + .max_entries = 1, 4033 + .btf_load_err = true, 4034 + .err_str = "Type tags don't precede modifiers", 4035 + }, 4036 + { 4037 + .descr = "type_tag test #5, type tag order", 4038 + .raw_types = { 4039 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 4040 + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ 4041 + BTF_CONST_ENC(1), /* [3] */ 4042 + BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */ 4043 + BTF_END_RAW, 4044 + }, 4045 + BTF_STR_SEC("\0tag\0tag"), 4046 + .map_type = BPF_MAP_TYPE_ARRAY, 4047 + .map_name = "tag_type_check_btf", 4048 + .key_size = sizeof(int), 4049 + .value_size = 4, 4050 + .key_type_id = 1, 4051 + .value_type_id = 1, 4052 + .max_entries = 1, 4053 + }, 4054 + { 4055 + .descr = "type_tag test #6, type tag order", 4056 + .raw_types = { 4057 + BTF_PTR_ENC(2), /* [1] */ 4058 + BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */ 4059 + BTF_CONST_ENC(4), /* [3] */ 4060 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */ 4061 + BTF_PTR_ENC(6), /* [5] */ 4062 + BTF_CONST_ENC(2), /* [6] */ 4063 + BTF_END_RAW, 4064 + }, 4065 + BTF_STR_SEC("\0tag"), 4066 + .map_type = BPF_MAP_TYPE_ARRAY, 4067 + .map_name = "tag_type_check_btf", 4068 + .key_size = sizeof(int), 4069 + .value_size = 4, 4070 + .key_type_id = 1, 4071 + .value_type_id = 1, 4072 + .max_entries = 1, 4073 + .btf_load_err = true, 4074 + .err_str = "Type tags don't precede modifiers", 3975 4075 }, 3976 4076 3977 4077 }; /* struct btf_raw_test raw_tests[] */

+1 -1

tools/testing/selftests/bpf/prog_tests/fexit_stress.c

··· 53 53 &trace_opts); 54 54 if (!ASSERT_GE(fexit_fd[i], 0, "fexit load")) 55 55 goto out; 56 - link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); 56 + link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL); 57 57 if (!ASSERT_GE(link_fd[i], 0, "fexit attach")) 58 58 goto out; 59 59 }

+4 -6

tools/testing/selftests/bpf/prog_tests/helper_restricted.c

··· 6 6 void test_helper_restricted(void) 7 7 { 8 8 int prog_i = 0, prog_cnt; 9 - int duration = 0; 10 9 11 10 do { 12 11 struct test_helper_restricted *test; 13 - int maybeOK; 12 + int err; 14 13 15 14 test = test_helper_restricted__open(); 16 15 if (!ASSERT_OK_PTR(test, "open")) ··· 20 21 for (int j = 0; j < prog_cnt; ++j) { 21 22 struct bpf_program *prog = *test->skeleton->progs[j].prog; 22 23 23 - maybeOK = bpf_program__set_autoload(prog, prog_i == j); 24 - ASSERT_OK(maybeOK, "set autoload"); 24 + bpf_program__set_autoload(prog, true); 25 25 } 26 26 27 - maybeOK = test_helper_restricted__load(test); 28 - CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted"); 27 + err = test_helper_restricted__load(test); 28 + ASSERT_ERR(err, "load_should_fail"); 29 29 30 30 test_helper_restricted__destroy(test); 31 31 } while (++prog_i < prog_cnt);

+6

tools/testing/selftests/bpf/prog_tests/linked_funcs.c

··· 14 14 if (!ASSERT_OK_PTR(skel, "skel_open")) 15 15 return; 16 16 17 + /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and 18 + * are set to not autoload by default 19 + */ 20 + bpf_program__set_autoload(skel->progs.handler1, true); 21 + bpf_program__set_autoload(skel->progs.handler2, true); 22 + 17 23 skel->rodata->my_tid = syscall(SYS_gettid); 18 24 skel->bss->syscall_id = SYS_getpgid; 19 25

+114

tools/testing/selftests/bpf/prog_tests/log_fixup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 3 + #include <test_progs.h> 4 + #include <bpf/btf.h> 5 + 6 + #include "test_log_fixup.skel.h" 7 + 8 + enum trunc_type { 9 + TRUNC_NONE, 10 + TRUNC_PARTIAL, 11 + TRUNC_FULL, 12 + }; 13 + 14 + static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type) 15 + { 16 + char log_buf[8 * 1024]; 17 + struct test_log_fixup* skel; 18 + int err; 19 + 20 + skel = test_log_fixup__open(); 21 + if (!ASSERT_OK_PTR(skel, "skel_open")) 22 + return; 23 + 24 + bpf_program__set_autoload(skel->progs.bad_relo, true); 25 + memset(log_buf, 0, sizeof(log_buf)); 26 + bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf)); 27 + 28 + err = test_log_fixup__load(skel); 29 + if (!ASSERT_ERR(err, "load_fail")) 30 + goto cleanup; 31 + 32 + ASSERT_HAS_SUBSTR(log_buf, 33 + "0: <invalid CO-RE relocation>\n" 34 + "failed to resolve CO-RE relocation <byte_sz> ", 35 + "log_buf_part1"); 36 + 37 + switch (trunc_type) { 38 + case TRUNC_NONE: 39 + ASSERT_HAS_SUBSTR(log_buf, 40 + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", 41 + "log_buf_part2"); 42 + ASSERT_HAS_SUBSTR(log_buf, 43 + "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n", 44 + "log_buf_end"); 45 + break; 46 + case TRUNC_PARTIAL: 47 + /* we should get full libbpf message patch */ 48 + ASSERT_HAS_SUBSTR(log_buf, 49 + "struct task_struct___bad.fake_field (0:1 @ offset 4)\n", 50 + "log_buf_part2"); 51 + /* we shouldn't get full end of BPF verifier log */ 52 + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), 53 + "log_buf_end"); 54 + break; 55 + case TRUNC_FULL: 56 + /* we shouldn't get second part of libbpf message patch */ 57 + ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"), 58 + "log_buf_part2"); 59 + /* we shouldn't get full end of BPF verifier log */ 60 + ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"), 61 + "log_buf_end"); 62 + break; 63 + } 64 + 65 + if (env.verbosity > VERBOSE_NONE) 66 + printf("LOG: \n=================\n%s=================\n", log_buf); 67 + cleanup: 68 + test_log_fixup__destroy(skel); 69 + } 70 + 71 + static void bad_core_relo_subprog(void) 72 + { 73 + char log_buf[8 * 1024]; 74 + struct test_log_fixup* skel; 75 + int err; 76 + 77 + skel = test_log_fixup__open(); 78 + if (!ASSERT_OK_PTR(skel, "skel_open")) 79 + return; 80 + 81 + bpf_program__set_autoload(skel->progs.bad_relo_subprog, true); 82 + bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf)); 83 + 84 + err = test_log_fixup__load(skel); 85 + if (!ASSERT_ERR(err, "load_fail")) 86 + goto cleanup; 87 + 88 + /* there should be no prog loading log because we specified per-prog log buf */ 89 + ASSERT_HAS_SUBSTR(log_buf, 90 + ": <invalid CO-RE relocation>\n" 91 + "failed to resolve CO-RE relocation <byte_off> ", 92 + "log_buf"); 93 + ASSERT_HAS_SUBSTR(log_buf, 94 + "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n", 95 + "log_buf"); 96 + 97 + if (env.verbosity > VERBOSE_NONE) 98 + printf("LOG: \n=================\n%s=================\n", log_buf); 99 + 100 + cleanup: 101 + test_log_fixup__destroy(skel); 102 + } 103 + 104 + void test_log_fixup(void) 105 + { 106 + if (test__start_subtest("bad_core_relo_trunc_none")) 107 + bad_core_relo(0, TRUNC_NONE /* full buf */); 108 + if (test__start_subtest("bad_core_relo_trunc_partial")) 109 + bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); 110 + if (test__start_subtest("bad_core_relo_trunc_full")) 111 + bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); 112 + if (test__start_subtest("bad_core_relo_subprog")) 113 + bad_core_relo_subprog(); 114 + }

+37

tools/testing/selftests/bpf/prog_tests/map_kptr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + 4 + #include "map_kptr.skel.h" 5 + 6 + void test_map_kptr(void) 7 + { 8 + struct map_kptr *skel; 9 + int key = 0, ret; 10 + char buf[24]; 11 + 12 + skel = map_kptr__open_and_load(); 13 + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) 14 + return; 15 + 16 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); 17 + ASSERT_OK(ret, "array_map update"); 18 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); 19 + ASSERT_OK(ret, "array_map update2"); 20 + 21 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0); 22 + ASSERT_OK(ret, "hash_map update"); 23 + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key); 24 + ASSERT_OK(ret, "hash_map delete"); 25 + 26 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0); 27 + ASSERT_OK(ret, "hash_malloc_map update"); 28 + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key); 29 + ASSERT_OK(ret, "hash_malloc_map delete"); 30 + 31 + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0); 32 + ASSERT_OK(ret, "lru_hash_map update"); 33 + ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key); 34 + ASSERT_OK(ret, "lru_hash_map delete"); 35 + 36 + map_kptr__destroy(skel); 37 + }

+56

tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c

··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + 3 + #include "test_progs.h" 4 + #include "testing_helpers.h" 5 + 6 + static void clear_test_state(struct test_state *state) 7 + { 8 + state->error_cnt = 0; 9 + state->sub_succ_cnt = 0; 10 + state->skip_cnt = 0; 11 + } 12 + 13 + void test_prog_tests_framework(void) 14 + { 15 + struct test_state *state = env.test_state; 16 + 17 + /* in all the ASSERT calls below we need to return on the first 18 + * error due to the fact that we are cleaning the test state after 19 + * each dummy subtest 20 + */ 21 + 22 + /* test we properly count skipped tests with subtests */ 23 + if (test__start_subtest("test_good_subtest")) 24 + test__end_subtest(); 25 + if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check")) 26 + return; 27 + if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check")) 28 + return; 29 + if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check")) 30 + return; 31 + clear_test_state(state); 32 + 33 + if (test__start_subtest("test_skip_subtest")) { 34 + test__skip(); 35 + test__end_subtest(); 36 + } 37 + if (test__start_subtest("test_skip_subtest")) { 38 + test__skip(); 39 + test__end_subtest(); 40 + } 41 + if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check")) 42 + return; 43 + if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check")) 44 + return; 45 + clear_test_state(state); 46 + 47 + if (test__start_subtest("test_fail_subtest")) { 48 + test__fail(); 49 + test__end_subtest(); 50 + } 51 + if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check")) 52 + return; 53 + if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check")) 54 + return; 55 + clear_test_state(state); 56 + }

+7 -16

tools/testing/selftests/bpf/prog_tests/reference_tracking.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 3 4 - static void toggle_object_autoload_progs(const struct bpf_object *obj, 5 - const char *name_load) 6 - { 7 - struct bpf_program *prog; 8 - 9 - bpf_object__for_each_program(prog, obj) { 10 - const char *name = bpf_program__name(prog); 11 - 12 - if (!strcmp(name_load, name)) 13 - bpf_program__set_autoload(prog, true); 14 - else 15 - bpf_program__set_autoload(prog, false); 16 - } 17 - } 18 - 19 4 void test_reference_tracking(void) 20 5 { 21 6 const char *file = "test_sk_lookup_kern.o"; ··· 24 39 goto cleanup; 25 40 26 41 bpf_object__for_each_program(prog, obj_iter) { 42 + struct bpf_program *p; 27 43 const char *name; 28 44 29 45 name = bpf_program__name(prog); ··· 35 49 if (!ASSERT_OK_PTR(obj, "obj_open_file")) 36 50 goto cleanup; 37 51 38 - toggle_object_autoload_progs(obj, name); 52 + /* all programs are not loaded by default, so just set 53 + * autoload to true for the single prog under test 54 + */ 55 + p = bpf_object__find_program_by_name(obj, name); 56 + bpf_program__set_autoload(p, true); 57 + 39 58 /* Expect verifier failure if test name has 'err' */ 40 59 if (strncmp(name, "err_", sizeof("err_") - 1) == 0) { 41 60 libbpf_print_fn_t old_print_fn;

+45

tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <network_helpers.h> 4 + #include "skb_load_bytes.skel.h" 5 + 6 + void test_skb_load_bytes(void) 7 + { 8 + struct skb_load_bytes *skel; 9 + int err, prog_fd, test_result; 10 + struct __sk_buff skb = { 0 }; 11 + 12 + LIBBPF_OPTS(bpf_test_run_opts, tattr, 13 + .data_in = &pkt_v4, 14 + .data_size_in = sizeof(pkt_v4), 15 + .ctx_in = &skb, 16 + .ctx_size_in = sizeof(skb), 17 + ); 18 + 19 + skel = skb_load_bytes__open_and_load(); 20 + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) 21 + return; 22 + 23 + prog_fd = bpf_program__fd(skel->progs.skb_process); 24 + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) 25 + goto out; 26 + 27 + skel->bss->load_offset = (uint32_t)(-1); 28 + err = bpf_prog_test_run_opts(prog_fd, &tattr); 29 + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) 30 + goto out; 31 + test_result = skel->bss->test_result; 32 + if (!ASSERT_EQ(test_result, -EFAULT, "offset -1")) 33 + goto out; 34 + 35 + skel->bss->load_offset = (uint32_t)10; 36 + err = bpf_prog_test_run_opts(prog_fd, &tattr); 37 + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) 38 + goto out; 39 + test_result = skel->bss->test_result; 40 + if (!ASSERT_EQ(test_result, 0, "offset 10")) 41 + goto out; 42 + 43 + out: 44 + skb_load_bytes__destroy(skel); 45 + }

+1 -3

tools/testing/selftests/bpf/prog_tests/snprintf.c

··· 83 83 test_snprintf__destroy(skel); 84 84 } 85 85 86 - #define min(a, b) ((a) < (b) ? (a) : (b)) 87 - 88 86 /* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */ 89 87 static int load_single_snprintf(char *fmt) 90 88 { ··· 93 95 if (!skel) 94 96 return -EINVAL; 95 97 96 - memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10)); 98 + memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10)); 97 99 98 100 ret = test_snprintf_single__load(skel); 99 101 test_snprintf_single__destroy(skel);

-1

tools/testing/selftests/bpf/prog_tests/tc_redirect.c

··· 949 949 return -1; 950 950 } 951 951 952 - #define MAX(a, b) ((a) > (b) ? (a) : (b)) 953 952 enum { 954 953 SRC_TO_TARGET = 0, 955 954 TARGET_TO_SRC = 1,

+3 -22

tools/testing/selftests/bpf/prog_tests/test_strncmp.c

··· 44 44 static void test_strncmp_ret(void) 45 45 { 46 46 struct strncmp_test *skel; 47 - struct bpf_program *prog; 48 47 int err, got; 49 48 50 49 skel = strncmp_test__open(); 51 50 if (!ASSERT_OK_PTR(skel, "strncmp_test open")) 52 51 return; 53 - 54 - bpf_object__for_each_program(prog, skel->obj) 55 - bpf_program__set_autoload(prog, false); 56 52 57 53 bpf_program__set_autoload(skel->progs.do_strncmp, true); 58 54 ··· 87 91 static void test_strncmp_bad_not_const_str_size(void) 88 92 { 89 93 struct strncmp_test *skel; 90 - struct bpf_program *prog; 91 94 int err; 92 95 93 96 skel = strncmp_test__open(); 94 97 if (!ASSERT_OK_PTR(skel, "strncmp_test open")) 95 98 return; 96 99 97 - bpf_object__for_each_program(prog, skel->obj) 98 - bpf_program__set_autoload(prog, false); 99 - 100 - bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, 101 - true); 100 + bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true); 102 101 103 102 err = strncmp_test__load(skel); 104 103 ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size"); ··· 104 113 static void test_strncmp_bad_writable_target(void) 105 114 { 106 115 struct strncmp_test *skel; 107 - struct bpf_program *prog; 108 116 int err; 109 117 110 118 skel = strncmp_test__open(); 111 119 if (!ASSERT_OK_PTR(skel, "strncmp_test open")) 112 120 return; 113 121 114 - bpf_object__for_each_program(prog, skel->obj) 115 - bpf_program__set_autoload(prog, false); 116 - 117 - bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, 118 - true); 122 + bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true); 119 123 120 124 err = strncmp_test__load(skel); 121 125 ASSERT_ERR(err, "strncmp_test load bad_writable_target"); ··· 121 135 static void test_strncmp_bad_not_null_term_target(void) 122 136 { 123 137 struct strncmp_test *skel; 124 - struct bpf_program *prog; 125 138 int err; 126 139 127 140 skel = strncmp_test__open(); 128 141 if (!ASSERT_OK_PTR(skel, "strncmp_test open")) 129 142 return; 130 143 131 - bpf_object__for_each_program(prog, skel->obj) 132 - bpf_program__set_autoload(prog, false); 133 - 134 - bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, 135 - true); 144 + bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true); 136 145 137 146 err = strncmp_test__load(skel); 138 147 ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");

+2 -1

tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c

··· 34 34 35 35 /* trigger & validate shared library u[ret]probes attached by name */ 36 36 mem = malloc(malloc_sz); 37 - free(mem); 38 37 39 38 ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1"); 40 39 ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran"); ··· 43 44 ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran"); 44 45 ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc"); 45 46 ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran"); 47 + 48 + free(mem); 46 49 cleanup: 47 50 test_uprobe_autoattach__destroy(skel); 48 51 }

+13 -2

tools/testing/selftests/bpf/progs/exhandler_kern.c

··· 7 7 #include <bpf/bpf_tracing.h> 8 8 #include <bpf/bpf_core_read.h> 9 9 10 + #define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) 11 + 10 12 char _license[] SEC("license") = "GPL"; 11 13 12 14 unsigned int exception_triggered; ··· 39 37 */ 40 38 work = task->task_works; 41 39 func = work->func; 42 - if (!work && !func) 43 - exception_triggered++; 40 + /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction. 41 + * To workaround the issue, use barrier_var() and rewrite as below to 42 + * prevent compiler from generating verifier-unfriendly code. 43 + */ 44 + barrier_var(work); 45 + if (work) 46 + return 0; 47 + barrier_var(func); 48 + if (func) 49 + return 0; 50 + exception_triggered++; 44 51 return 0; 45 52 }

+6 -1

tools/testing/selftests/bpf/progs/linked_funcs1.c

··· 61 61 /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ 62 62 __hidden extern void set_output_ctx2(__u64 *ctx); 63 63 64 - SEC("raw_tp/sys_enter") 64 + SEC("?raw_tp/sys_enter") 65 65 int BPF_PROG(handler1, struct pt_regs *regs, long id) 66 66 { 67 + static volatile int whatever; 68 + 67 69 if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) 68 70 return 0; 71 + 72 + /* make sure we have CO-RE relocations in main program */ 73 + whatever = bpf_core_type_size(struct task_struct); 69 74 70 75 set_output_val2(1000); 71 76 set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */

+6 -1

tools/testing/selftests/bpf/progs/linked_funcs2.c

··· 61 61 /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ 62 62 __hidden extern void set_output_ctx1(__u64 *ctx); 63 63 64 - SEC("raw_tp/sys_enter") 64 + SEC("?raw_tp/sys_enter") 65 65 int BPF_PROG(handler2, struct pt_regs *regs, long id) 66 66 { 67 + static volatile int whatever; 68 + 67 69 if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) 68 70 return 0; 71 + 72 + /* make sure we have CO-RE relocations in main program */ 73 + whatever = bpf_core_type_size(struct task_struct); 69 74 70 75 set_output_val1(2000); 71 76 set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */

+190

tools/testing/selftests/bpf/progs/map_kptr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_tracing.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + struct map_value { 7 + struct prog_test_ref_kfunc __kptr *unref_ptr; 8 + struct prog_test_ref_kfunc __kptr_ref *ref_ptr; 9 + }; 10 + 11 + struct array_map { 12 + __uint(type, BPF_MAP_TYPE_ARRAY); 13 + __type(key, int); 14 + __type(value, struct map_value); 15 + __uint(max_entries, 1); 16 + } array_map SEC(".maps"); 17 + 18 + struct hash_map { 19 + __uint(type, BPF_MAP_TYPE_HASH); 20 + __type(key, int); 21 + __type(value, struct map_value); 22 + __uint(max_entries, 1); 23 + } hash_map SEC(".maps"); 24 + 25 + struct hash_malloc_map { 26 + __uint(type, BPF_MAP_TYPE_HASH); 27 + __type(key, int); 28 + __type(value, struct map_value); 29 + __uint(max_entries, 1); 30 + __uint(map_flags, BPF_F_NO_PREALLOC); 31 + } hash_malloc_map SEC(".maps"); 32 + 33 + struct lru_hash_map { 34 + __uint(type, BPF_MAP_TYPE_LRU_HASH); 35 + __type(key, int); 36 + __type(value, struct map_value); 37 + __uint(max_entries, 1); 38 + } lru_hash_map SEC(".maps"); 39 + 40 + #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ 41 + struct { \ 42 + __uint(type, map_type); \ 43 + __uint(max_entries, 1); \ 44 + __uint(key_size, sizeof(int)); \ 45 + __uint(value_size, sizeof(int)); \ 46 + __array(values, struct inner_map_type); \ 47 + } name SEC(".maps") = { \ 48 + .values = { [0] = &inner_map_type }, \ 49 + } 50 + 51 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps); 52 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps); 53 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps); 54 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps); 55 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps); 56 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); 57 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); 58 + DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); 59 + 60 + extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 61 + extern struct prog_test_ref_kfunc * 62 + bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; 63 + extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 64 + 65 + static void test_kptr_unref(struct map_value *v) 66 + { 67 + struct prog_test_ref_kfunc *p; 68 + 69 + p = v->unref_ptr; 70 + /* store untrusted_ptr_or_null_ */ 71 + v->unref_ptr = p; 72 + if (!p) 73 + return; 74 + if (p->a + p->b > 100) 75 + return; 76 + /* store untrusted_ptr_ */ 77 + v->unref_ptr = p; 78 + /* store NULL */ 79 + v->unref_ptr = NULL; 80 + } 81 + 82 + static void test_kptr_ref(struct map_value *v) 83 + { 84 + struct prog_test_ref_kfunc *p; 85 + 86 + p = v->ref_ptr; 87 + /* store ptr_or_null_ */ 88 + v->unref_ptr = p; 89 + if (!p) 90 + return; 91 + if (p->a + p->b > 100) 92 + return; 93 + /* store NULL */ 94 + p = bpf_kptr_xchg(&v->ref_ptr, NULL); 95 + if (!p) 96 + return; 97 + if (p->a + p->b > 100) { 98 + bpf_kfunc_call_test_release(p); 99 + return; 100 + } 101 + /* store ptr_ */ 102 + v->unref_ptr = p; 103 + bpf_kfunc_call_test_release(p); 104 + 105 + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); 106 + if (!p) 107 + return; 108 + /* store ptr_ */ 109 + p = bpf_kptr_xchg(&v->ref_ptr, p); 110 + if (!p) 111 + return; 112 + if (p->a + p->b > 100) { 113 + bpf_kfunc_call_test_release(p); 114 + return; 115 + } 116 + bpf_kfunc_call_test_release(p); 117 + } 118 + 119 + static void test_kptr_get(struct map_value *v) 120 + { 121 + struct prog_test_ref_kfunc *p; 122 + 123 + p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); 124 + if (!p) 125 + return; 126 + if (p->a + p->b > 100) { 127 + bpf_kfunc_call_test_release(p); 128 + return; 129 + } 130 + bpf_kfunc_call_test_release(p); 131 + } 132 + 133 + static void test_kptr(struct map_value *v) 134 + { 135 + test_kptr_unref(v); 136 + test_kptr_ref(v); 137 + test_kptr_get(v); 138 + } 139 + 140 + SEC("tc") 141 + int test_map_kptr(struct __sk_buff *ctx) 142 + { 143 + struct map_value *v; 144 + int i, key = 0; 145 + 146 + #define TEST(map) \ 147 + v = bpf_map_lookup_elem(&map, &key); \ 148 + if (!v) \ 149 + return 0; \ 150 + test_kptr(v) 151 + 152 + TEST(array_map); 153 + TEST(hash_map); 154 + TEST(hash_malloc_map); 155 + TEST(lru_hash_map); 156 + 157 + #undef TEST 158 + return 0; 159 + } 160 + 161 + SEC("tc") 162 + int test_map_in_map_kptr(struct __sk_buff *ctx) 163 + { 164 + struct map_value *v; 165 + int i, key = 0; 166 + void *map; 167 + 168 + #define TEST(map_in_map) \ 169 + map = bpf_map_lookup_elem(&map_in_map, &key); \ 170 + if (!map) \ 171 + return 0; \ 172 + v = bpf_map_lookup_elem(map, &key); \ 173 + if (!v) \ 174 + return 0; \ 175 + test_kptr(v) 176 + 177 + TEST(array_of_array_maps); 178 + TEST(array_of_hash_maps); 179 + TEST(array_of_hash_malloc_maps); 180 + TEST(array_of_lru_hash_maps); 181 + TEST(hash_of_array_maps); 182 + TEST(hash_of_hash_maps); 183 + TEST(hash_of_hash_malloc_maps); 184 + TEST(hash_of_lru_hash_maps); 185 + 186 + #undef TEST 187 + return 0; 188 + } 189 + 190 + char _license[] SEC("license") = "GPL";

+4

tools/testing/selftests/bpf/progs/pyperf.h

··· 299 299 #ifdef NO_UNROLL 300 300 #pragma clang loop unroll(disable) 301 301 #else 302 + #ifdef UNROLL_COUNT 303 + #pragma clang loop unroll_count(UNROLL_COUNT) 304 + #else 302 305 #pragma clang loop unroll(full) 306 + #endif 303 307 #endif /* NO_UNROLL */ 304 308 /* Unwind python stack */ 305 309 for (int i = 0; i < STACK_MAX_LEN; ++i) {

+7 -4

tools/testing/selftests/bpf/progs/pyperf600.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2019 Facebook 3 3 #define STACK_MAX_LEN 600 4 - /* clang will not unroll the loop 600 times. 5 - * Instead it will unroll it to the amount it deemed 6 - * appropriate, but the loop will still execute 600 times. 7 - * Total program size is around 90k insns 4 + /* Full unroll of 600 iterations will have total 5 + * program size close to 298k insns and this may 6 + * cause BPF_JMP insn out of 16-bit integer range. 7 + * So limit the unroll size to 150 so the 8 + * total program size is around 80k insns but 9 + * the loop will still execute 600 times. 8 10 */ 11 + #define UNROLL_COUNT 150 9 12 #include "pyperf.h"

+19

tools/testing/selftests/bpf/progs/skb_load_bytes.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + __u32 load_offset = 0; 9 + int test_result = 0; 10 + 11 + SEC("tc") 12 + int skb_process(struct __sk_buff *skb) 13 + { 14 + char buf[16]; 15 + 16 + test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10); 17 + 18 + return 0; 19 + }

+4 -4

tools/testing/selftests/bpf/progs/strncmp_test.c

··· 19 19 20 20 char _license[] SEC("license") = "GPL"; 21 21 22 - SEC("tp/syscalls/sys_enter_nanosleep") 22 + SEC("?tp/syscalls/sys_enter_nanosleep") 23 23 int do_strncmp(void *ctx) 24 24 { 25 25 if ((bpf_get_current_pid_tgid() >> 32) != target_pid) ··· 29 29 return 0; 30 30 } 31 31 32 - SEC("tp/syscalls/sys_enter_nanosleep") 32 + SEC("?tp/syscalls/sys_enter_nanosleep") 33 33 int strncmp_bad_not_const_str_size(void *ctx) 34 34 { 35 35 /* The value of string size is not const, so will fail */ ··· 37 37 return 0; 38 38 } 39 39 40 - SEC("tp/syscalls/sys_enter_nanosleep") 40 + SEC("?tp/syscalls/sys_enter_nanosleep") 41 41 int strncmp_bad_writable_target(void *ctx) 42 42 { 43 43 /* Compared target is not read-only, so will fail */ ··· 45 45 return 0; 46 46 } 47 47 48 - SEC("tp/syscalls/sys_enter_nanosleep") 48 + SEC("?tp/syscalls/sys_enter_nanosleep") 49 49 int strncmp_bad_not_null_term_target(void *ctx) 50 50 { 51 51 /* Compared target is not null-terminated, so will fail */

+8 -8

tools/testing/selftests/bpf/progs/test_helper_restricted.c

··· 56 56 } 57 57 } 58 58 59 - SEC("raw_tp/sys_enter") 59 + SEC("?raw_tp/sys_enter") 60 60 int raw_tp_timer(void *ctx) 61 61 { 62 62 timer_work(); ··· 64 64 return 0; 65 65 } 66 66 67 - SEC("tp/syscalls/sys_enter_nanosleep") 67 + SEC("?tp/syscalls/sys_enter_nanosleep") 68 68 int tp_timer(void *ctx) 69 69 { 70 70 timer_work(); ··· 72 72 return 0; 73 73 } 74 74 75 - SEC("kprobe/sys_nanosleep") 75 + SEC("?kprobe/sys_nanosleep") 76 76 int kprobe_timer(void *ctx) 77 77 { 78 78 timer_work(); ··· 80 80 return 0; 81 81 } 82 82 83 - SEC("perf_event") 83 + SEC("?perf_event") 84 84 int perf_event_timer(void *ctx) 85 85 { 86 86 timer_work(); ··· 88 88 return 0; 89 89 } 90 90 91 - SEC("raw_tp/sys_enter") 91 + SEC("?raw_tp/sys_enter") 92 92 int raw_tp_spin_lock(void *ctx) 93 93 { 94 94 spin_lock_work(); ··· 96 96 return 0; 97 97 } 98 98 99 - SEC("tp/syscalls/sys_enter_nanosleep") 99 + SEC("?tp/syscalls/sys_enter_nanosleep") 100 100 int tp_spin_lock(void *ctx) 101 101 { 102 102 spin_lock_work(); ··· 104 104 return 0; 105 105 } 106 106 107 - SEC("kprobe/sys_nanosleep") 107 + SEC("?kprobe/sys_nanosleep") 108 108 int kprobe_spin_lock(void *ctx) 109 109 { 110 110 spin_lock_work(); ··· 112 112 return 0; 113 113 } 114 114 115 - SEC("perf_event") 115 + SEC("?perf_event") 116 116 int perf_event_spin_lock(void *ctx) 117 117 { 118 118 spin_lock_work();

+38

tools/testing/selftests/bpf/progs/test_log_fixup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_core_read.h> 7 + 8 + struct task_struct___bad { 9 + int pid; 10 + int fake_field; 11 + void *fake_field_subprog; 12 + } __attribute__((preserve_access_index)); 13 + 14 + SEC("?raw_tp/sys_enter") 15 + int bad_relo(const void *ctx) 16 + { 17 + static struct task_struct___bad *t; 18 + 19 + return bpf_core_field_size(t->fake_field); 20 + } 21 + 22 + static __noinline int bad_subprog(void) 23 + { 24 + static struct task_struct___bad *t; 25 + 26 + /* ugliness below is a field offset relocation */ 27 + return (void *)&t->fake_field_subprog - (void *)t; 28 + } 29 + 30 + SEC("?raw_tp/sys_enter") 31 + int bad_relo_subprog(const void *ctx) 32 + { 33 + static struct task_struct___bad *t; 34 + 35 + return bad_subprog() + bpf_core_field_size(t->pid); 36 + } 37 + 38 + char _license[] SEC("license") = "GPL";

+9 -9

tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c

··· 52 52 return result; 53 53 } 54 54 55 - SEC("tc") 55 + SEC("?tc") 56 56 int sk_lookup_success(struct __sk_buff *skb) 57 57 { 58 58 void *data_end = (void *)(long)skb->data_end; ··· 78 78 return sk ? TC_ACT_OK : TC_ACT_UNSPEC; 79 79 } 80 80 81 - SEC("tc") 81 + SEC("?tc") 82 82 int sk_lookup_success_simple(struct __sk_buff *skb) 83 83 { 84 84 struct bpf_sock_tuple tuple = {}; ··· 90 90 return 0; 91 91 } 92 92 93 - SEC("tc") 93 + SEC("?tc") 94 94 int err_use_after_free(struct __sk_buff *skb) 95 95 { 96 96 struct bpf_sock_tuple tuple = {}; ··· 105 105 return family; 106 106 } 107 107 108 - SEC("tc") 108 + SEC("?tc") 109 109 int err_modify_sk_pointer(struct __sk_buff *skb) 110 110 { 111 111 struct bpf_sock_tuple tuple = {}; ··· 120 120 return 0; 121 121 } 122 122 123 - SEC("tc") 123 + SEC("?tc") 124 124 int err_modify_sk_or_null_pointer(struct __sk_buff *skb) 125 125 { 126 126 struct bpf_sock_tuple tuple = {}; ··· 134 134 return 0; 135 135 } 136 136 137 - SEC("tc") 137 + SEC("?tc") 138 138 int err_no_release(struct __sk_buff *skb) 139 139 { 140 140 struct bpf_sock_tuple tuple = {}; ··· 143 143 return 0; 144 144 } 145 145 146 - SEC("tc") 146 + SEC("?tc") 147 147 int err_release_twice(struct __sk_buff *skb) 148 148 { 149 149 struct bpf_sock_tuple tuple = {}; ··· 155 155 return 0; 156 156 } 157 157 158 - SEC("tc") 158 + SEC("?tc") 159 159 int err_release_unchecked(struct __sk_buff *skb) 160 160 { 161 161 struct bpf_sock_tuple tuple = {}; ··· 172 172 bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); 173 173 } 174 174 175 - SEC("tc") 175 + SEC("?tc") 176 176 int err_no_release_subcall(struct __sk_buff *skb) 177 177 { 178 178 lookup_no_release(skb);

+3 -1

tools/testing/selftests/bpf/test_cgroup_storage.c

··· 6 6 #include <stdlib.h> 7 7 #include <sys/sysinfo.h> 8 8 9 - #include "bpf_rlimit.h" 10 9 #include "bpf_util.h" 11 10 #include "cgroup_helpers.h" 12 11 #include "testing_helpers.h" ··· 50 51 printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); 51 52 goto err; 52 53 } 54 + 55 + /* Use libbpf 1.0 API mode */ 56 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 53 57 54 58 map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), 55 59 sizeof(value), 0, NULL);

+3 -1

tools/testing/selftests/bpf/test_dev_cgroup.c

··· 15 15 16 16 #include "cgroup_helpers.h" 17 17 #include "testing_helpers.h" 18 - #include "bpf_rlimit.h" 19 18 20 19 #define DEV_CGROUP_PROG "./dev_cgroup.o" 21 20 ··· 26 27 int error = EXIT_FAILURE; 27 28 int prog_fd, cgroup_fd; 28 29 __u32 prog_cnt; 30 + 31 + /* Use libbpf 1.0 API mode */ 32 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 29 33 30 34 if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, 31 35 &obj, &prog_fd)) {

+16 -27

tools/testing/selftests/bpf/test_lpm_map.c

··· 26 26 #include <bpf/bpf.h> 27 27 28 28 #include "bpf_util.h" 29 - #include "bpf_rlimit.h" 30 29 31 30 struct tlpm_node { 32 31 struct tlpm_node *next; ··· 408 409 409 410 /* Test some lookups that should not match any entry */ 410 411 inet_pton(AF_INET, "10.0.0.1", key_ipv4->data); 411 - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && 412 - errno == ENOENT); 412 + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); 413 413 414 414 inet_pton(AF_INET, "11.11.11.11", key_ipv4->data); 415 - assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 && 416 - errno == ENOENT); 415 + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); 417 416 418 417 inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data); 419 - assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 && 420 - errno == ENOENT); 418 + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT); 421 419 422 420 close(map_fd_ipv4); 423 421 close(map_fd_ipv6); ··· 471 475 /* remove non-existent node */ 472 476 key->prefixlen = 32; 473 477 inet_pton(AF_INET, "10.0.0.1", key->data); 474 - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && 475 - errno == ENOENT); 478 + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); 476 479 477 480 key->prefixlen = 30; // unused prefix so far 478 481 inet_pton(AF_INET, "192.255.0.0", key->data); 479 - assert(bpf_map_delete_elem(map_fd, key) == -1 && 480 - errno == ENOENT); 482 + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); 481 483 482 484 key->prefixlen = 16; // same prefix as the root node 483 485 inet_pton(AF_INET, "192.255.0.0", key->data); 484 - assert(bpf_map_delete_elem(map_fd, key) == -1 && 485 - errno == ENOENT); 486 + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); 486 487 487 488 /* assert initial lookup */ 488 489 key->prefixlen = 32; ··· 524 531 525 532 key->prefixlen = 32; 526 533 inet_pton(AF_INET, "192.168.128.1", key->data); 527 - assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && 528 - errno == ENOENT); 534 + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); 529 535 530 536 close(map_fd); 531 537 } ··· 545 553 assert(map_fd >= 0); 546 554 547 555 /* empty tree. get_next_key should return ENOENT */ 548 - assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && 549 - errno == ENOENT); 556 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT); 550 557 551 558 /* get and verify the first key, get the second one should fail. */ 552 559 key_p->prefixlen = 16; ··· 557 566 assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && 558 567 key_p->data[1] == 168); 559 568 560 - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 561 - errno == ENOENT); 569 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); 562 570 563 571 /* no exact matching key should get the first one in post order. */ 564 572 key_p->prefixlen = 8; ··· 581 591 next_key_p->data[1] == 168); 582 592 583 593 memcpy(key_p, next_key_p, key_size); 584 - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 585 - errno == ENOENT); 594 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); 586 595 587 596 /* Add one more element (total three) */ 588 597 key_p->prefixlen = 24; ··· 604 615 next_key_p->data[1] == 168); 605 616 606 617 memcpy(key_p, next_key_p, key_size); 607 - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 608 - errno == ENOENT); 618 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); 609 619 610 620 /* Add one more element (total four) */ 611 621 key_p->prefixlen = 24; ··· 632 644 next_key_p->data[1] == 168); 633 645 634 646 memcpy(key_p, next_key_p, key_size); 635 - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 636 - errno == ENOENT); 647 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); 637 648 638 649 /* Add one more element (total five) */ 639 650 key_p->prefixlen = 28; ··· 666 679 next_key_p->data[1] == 168); 667 680 668 681 memcpy(key_p, next_key_p, key_size); 669 - assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 670 - errno == ENOENT); 682 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); 671 683 672 684 /* no exact matching key should return the first one in post order */ 673 685 key_p->prefixlen = 22; ··· 776 790 777 791 /* we want predictable, pseudo random tests */ 778 792 srand(0xf00ba1); 793 + 794 + /* Use libbpf 1.0 API mode */ 795 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 779 796 780 797 test_lpm_basic(); 781 798 test_lpm_order();

+27 -43

tools/testing/selftests/bpf/test_lru_map.c

··· 18 18 #include <bpf/libbpf.h> 19 19 20 20 #include "bpf_util.h" 21 - #include "bpf_rlimit.h" 22 21 #include "../../../include/linux/filter.h" 23 22 24 23 #define LOCAL_FREE_TARGET (128) ··· 175 176 BPF_NOEXIST)); 176 177 177 178 /* BPF_NOEXIST means: add new element if it doesn't exist */ 178 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 179 - /* key=1 already exists */ 180 - && errno == EEXIST); 179 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); 180 + /* key=1 already exists */ 181 181 182 - assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 && 183 - errno == EINVAL); 182 + assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL); 184 183 185 184 /* insert key=2 element */ 186 185 187 186 /* check that key=2 is not found */ 188 187 key = 2; 189 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 190 - errno == ENOENT); 188 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 191 189 192 190 /* BPF_EXIST means: update existing element */ 193 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && 194 - /* key=2 is not there */ 195 - errno == ENOENT); 191 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); 192 + /* key=2 is not there */ 196 193 197 194 assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); 198 195 ··· 196 201 197 202 /* check that key=3 is not found */ 198 203 key = 3; 199 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 200 - errno == ENOENT); 204 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 201 205 202 206 /* check that key=1 can be found and mark the ref bit to 203 207 * stop LRU from removing key=1 ··· 212 218 213 219 /* key=2 has been removed from the LRU */ 214 220 key = 2; 215 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 216 - errno == ENOENT); 221 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 217 222 218 223 /* lookup elem key=1 and delete it, then check it doesn't exist */ 219 224 key = 1; ··· 375 382 end_key = 1 + batch_size; 376 383 value[0] = 4321; 377 384 for (key = 1; key < end_key; key++) { 378 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 379 - errno == ENOENT); 385 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 380 386 assert(!bpf_map_update_elem(lru_map_fd, &key, value, 381 387 BPF_NOEXIST)); 382 388 assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); ··· 555 563 assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value)); 556 564 557 565 /* Cannot find the last key because it was removed by LRU */ 558 - assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 && 559 - errno == ENOENT); 566 + assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT); 560 567 } 561 568 562 569 /* Test map with only one element */ ··· 703 712 BPF_NOEXIST)); 704 713 705 714 /* BPF_NOEXIST means: add new element if it doesn't exist */ 706 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 707 - /* key=1 already exists */ 708 - && errno == EEXIST); 715 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); 716 + /* key=1 already exists */ 709 717 710 718 /* insert key=2 element */ 711 719 712 720 /* check that key=2 is not found */ 713 721 key = 2; 714 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 715 - errno == ENOENT); 722 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 716 723 717 724 /* BPF_EXIST means: update existing element */ 718 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && 719 - /* key=2 is not there */ 720 - errno == ENOENT); 725 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); 726 + /* key=2 is not there */ 721 727 722 728 assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); 723 729 ··· 722 734 723 735 /* check that key=3 is not found */ 724 736 key = 3; 725 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 726 - errno == ENOENT); 737 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 727 738 728 739 /* check that key=1 can be found and mark the ref bit to 729 740 * stop LRU from removing key=1 ··· 745 758 746 759 /* key=2 has been removed from the LRU */ 747 760 key = 2; 748 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 749 - errno == ENOENT); 761 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 750 762 751 763 assert(map_equal(lru_map_fd, expected_map_fd)); 752 764 ··· 792 806 assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); 793 807 794 808 /* BPF_NOEXIST means: add new element if it doesn't exist */ 795 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 796 - /* key=1 already exists */ 797 - && errno == EEXIST); 809 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST); 810 + /* key=1 already exists */ 798 811 799 812 /* insert key=2 element */ 800 813 801 814 /* check that key=2 is not found */ 802 815 key = 2; 803 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 804 - errno == ENOENT); 816 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 805 817 806 818 /* BPF_EXIST means: update existing element */ 807 - assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && 808 - /* key=2 is not there */ 809 - errno == ENOENT); 819 + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT); 820 + /* key=2 is not there */ 810 821 811 822 assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); 812 823 assert(!bpf_map_update_elem(expected_map_fd, &key, value, ··· 813 830 814 831 /* check that key=3 is not found */ 815 832 key = 3; 816 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 817 - errno == ENOENT); 833 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 818 834 819 835 /* check that key=1 can be found and do _not_ mark ref bit. 820 836 * this will be evicted on next update. ··· 836 854 837 855 /* key=1 has been removed from the LRU */ 838 856 key = 1; 839 - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && 840 - errno == ENOENT); 857 + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT); 841 858 842 859 assert(map_equal(lru_map_fd, expected_map_fd)); 843 860 ··· 858 877 nr_cpus = bpf_num_possible_cpus(); 859 878 assert(nr_cpus != -1); 860 879 printf("nr_cpus:%d\n\n", nr_cpus); 880 + 881 + /* Use libbpf 1.0 API mode */ 882 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 861 883 862 884 for (f = 0; f < ARRAY_SIZE(map_flags); f++) { 863 885 unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?

+189 -292

tools/testing/selftests/bpf/test_progs.c

··· 3 3 */ 4 4 #define _GNU_SOURCE 5 5 #include "test_progs.h" 6 + #include "testing_helpers.h" 6 7 #include "cgroup_helpers.h" 7 8 #include <argp.h> 8 9 #include <pthread.h> ··· 51 50 int test_num; 52 51 void (*run_test)(void); 53 52 void (*run_serial_test)(void); 54 - bool force_log; 55 - int error_cnt; 56 - int skip_cnt; 57 - int sub_succ_cnt; 58 53 bool should_run; 59 - bool tested; 60 54 bool need_cgroup_cleanup; 61 - 62 - char *subtest_name; 63 - int subtest_num; 64 - 65 - /* store counts before subtest started */ 66 - int old_error_cnt; 67 55 }; 68 56 69 57 /* Override C runtime library's usleep() implementation to ensure nanosleep() ··· 74 84 int i; 75 85 76 86 for (i = 0; i < sel->blacklist.cnt; i++) { 77 - if (glob_match(name, sel->blacklist.strs[i])) 87 + if (glob_match(name, sel->blacklist.tests[i].name) && 88 + !sel->blacklist.tests[i].subtest_cnt) 78 89 return false; 79 90 } 80 91 81 92 for (i = 0; i < sel->whitelist.cnt; i++) { 82 - if (glob_match(name, sel->whitelist.strs[i])) 93 + if (glob_match(name, sel->whitelist.tests[i].name)) 83 94 return true; 84 95 } 85 96 ··· 90 99 return num < sel->num_set_len && sel->num_set[num]; 91 100 } 92 101 93 - static void dump_test_log(const struct prog_test_def *test, bool failed) 102 + static bool should_run_subtest(struct test_selector *sel, 103 + struct test_selector *subtest_sel, 104 + int subtest_num, 105 + const char *test_name, 106 + const char *subtest_name) 94 107 { 95 - if (stdout == env.stdout) 96 - return; 108 + int i, j; 109 + 110 + for (i = 0; i < sel->blacklist.cnt; i++) { 111 + if (glob_match(test_name, sel->blacklist.tests[i].name)) { 112 + if (!sel->blacklist.tests[i].subtest_cnt) 113 + return false; 114 + 115 + for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) { 116 + if (glob_match(subtest_name, 117 + sel->blacklist.tests[i].subtests[j])) 118 + return false; 119 + } 120 + } 121 + } 122 + 123 + for (i = 0; i < sel->whitelist.cnt; i++) { 124 + if (glob_match(test_name, sel->whitelist.tests[i].name)) { 125 + if (!sel->whitelist.tests[i].subtest_cnt) 126 + return true; 127 + 128 + for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) { 129 + if (glob_match(subtest_name, 130 + sel->whitelist.tests[i].subtests[j])) 131 + return true; 132 + } 133 + } 134 + } 135 + 136 + if (!sel->whitelist.cnt && !subtest_sel->num_set) 137 + return true; 138 + 139 + return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num]; 140 + } 141 + 142 + static void dump_test_log(const struct prog_test_def *test, 143 + const struct test_state *test_state, 144 + bool force_failed) 145 + { 146 + bool failed = test_state->error_cnt > 0 || force_failed; 97 147 98 148 /* worker always holds log */ 99 149 if (env.worker_id != -1) 100 150 return; 101 151 102 - fflush(stdout); /* exports env.log_buf & env.log_cnt */ 152 + fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */ 103 153 104 - if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { 105 - if (env.log_cnt) { 106 - env.log_buf[env.log_cnt] = '\0'; 107 - fprintf(env.stdout, "%s", env.log_buf); 108 - if (env.log_buf[env.log_cnt - 1] != '\n') 154 + fprintf(env.stdout, "#%-3d %s:%s\n", 155 + test->test_num, test->test_name, 156 + failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK")); 157 + 158 + if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) { 159 + if (test_state->log_cnt) { 160 + test_state->log_buf[test_state->log_cnt] = '\0'; 161 + fprintf(env.stdout, "%s", test_state->log_buf); 162 + if (test_state->log_buf[test_state->log_cnt - 1] != '\n') 109 163 fprintf(env.stdout, "\n"); 110 164 } 111 - } 112 - } 113 - 114 - static void skip_account(void) 115 - { 116 - if (env.test->skip_cnt) { 117 - env.skip_cnt++; 118 - env.test->skip_cnt = 0; 119 165 } 120 166 } 121 167 ··· 163 135 */ 164 136 static void reset_affinity(void) 165 137 { 166 - 167 138 cpu_set_t cpuset; 168 139 int i, err; 169 140 ··· 205 178 void test__end_subtest(void) 206 179 { 207 180 struct prog_test_def *test = env.test; 208 - int sub_error_cnt = test->error_cnt - test->old_error_cnt; 209 - 210 - dump_test_log(test, sub_error_cnt); 181 + struct test_state *state = env.test_state; 182 + int sub_error_cnt = state->error_cnt - state->old_error_cnt; 211 183 212 184 fprintf(stdout, "#%d/%d %s/%s:%s\n", 213 - test->test_num, test->subtest_num, test->test_name, test->subtest_name, 214 - sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); 185 + test->test_num, state->subtest_num, test->test_name, state->subtest_name, 186 + sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK")); 215 187 216 - if (sub_error_cnt) 217 - test->error_cnt++; 218 - else if (test->skip_cnt == 0) 219 - test->sub_succ_cnt++; 220 - skip_account(); 188 + if (sub_error_cnt == 0) { 189 + if (state->subtest_skip_cnt == 0) { 190 + state->sub_succ_cnt++; 191 + } else { 192 + state->subtest_skip_cnt = 0; 193 + state->skip_cnt++; 194 + } 195 + } 221 196 222 - free(test->subtest_name); 223 - test->subtest_name = NULL; 197 + free(state->subtest_name); 198 + state->subtest_name = NULL; 224 199 } 225 200 226 - bool test__start_subtest(const char *name) 201 + bool test__start_subtest(const char *subtest_name) 227 202 { 228 203 struct prog_test_def *test = env.test; 204 + struct test_state *state = env.test_state; 229 205 230 - if (test->subtest_name) 206 + if (state->subtest_name) 231 207 test__end_subtest(); 232 208 233 - test->subtest_num++; 209 + state->subtest_num++; 234 210 235 - if (!name || !name[0]) { 211 + if (!subtest_name || !subtest_name[0]) { 236 212 fprintf(env.stderr, 237 213 "Subtest #%d didn't provide sub-test name!\n", 238 - test->subtest_num); 214 + state->subtest_num); 239 215 return false; 240 216 } 241 217 242 - if (!should_run(&env.subtest_selector, test->subtest_num, name)) 218 + if (!should_run_subtest(&env.test_selector, 219 + &env.subtest_selector, 220 + state->subtest_num, 221 + test->test_name, 222 + subtest_name)) 243 223 return false; 244 224 245 - test->subtest_name = strdup(name); 246 - if (!test->subtest_name) { 225 + state->subtest_name = strdup(subtest_name); 226 + if (!state->subtest_name) { 247 227 fprintf(env.stderr, 248 228 "Subtest #%d: failed to copy subtest name!\n", 249 - test->subtest_num); 229 + state->subtest_num); 250 230 return false; 251 231 } 252 - env.test->old_error_cnt = env.test->error_cnt; 232 + state->old_error_cnt = state->error_cnt; 253 233 254 234 return true; 255 235 } 256 236 257 237 void test__force_log(void) 258 238 { 259 - env.test->force_log = true; 239 + env.test_state->force_log = true; 260 240 } 261 241 262 242 void test__skip(void) 263 243 { 264 - env.test->skip_cnt++; 244 + if (env.test_state->subtest_name) 245 + env.test_state->subtest_skip_cnt++; 246 + else 247 + env.test_state->skip_cnt++; 265 248 } 266 249 267 250 void test__fail(void) 268 251 { 269 - env.test->error_cnt++; 252 + env.test_state->error_cnt++; 270 253 } 271 254 272 255 int test__join_cgroup(const char *path) ··· 509 472 #include <prog_tests/tests.h> 510 473 #undef DEFINE_TEST 511 474 }; 475 + 512 476 static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); 477 + 478 + static struct test_state test_states[ARRAY_SIZE(prog_test_defs)]; 513 479 514 480 const char *argp_program_version = "test_progs 0.1"; 515 481 const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; ··· 567 527 return 0; 568 528 } 569 529 570 - static void free_str_set(const struct str_set *set) 530 + static void free_test_filter_set(const struct test_filter_set *set) 571 531 { 572 - int i; 532 + int i, j; 573 533 574 534 if (!set) 575 535 return; 576 536 577 - for (i = 0; i < set->cnt; i++) 578 - free((void *)set->strs[i]); 579 - free(set->strs); 580 - } 537 + for (i = 0; i < set->cnt; i++) { 538 + free((void *)set->tests[i].name); 539 + for (j = 0; j < set->tests[i].subtest_cnt; j++) 540 + free((void *)set->tests[i].subtests[j]); 581 541 582 - static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern) 583 - { 584 - char *input, *state = NULL, *next, **tmp, **strs = NULL; 585 - int i, cnt = 0; 586 - 587 - input = strdup(s); 588 - if (!input) 589 - return -ENOMEM; 590 - 591 - while ((next = strtok_r(state ? NULL : input, ",", &state))) { 592 - tmp = realloc(strs, sizeof(*strs) * (cnt + 1)); 593 - if (!tmp) 594 - goto err; 595 - strs = tmp; 596 - 597 - if (is_glob_pattern) { 598 - strs[cnt] = strdup(next); 599 - if (!strs[cnt]) 600 - goto err; 601 - } else { 602 - strs[cnt] = malloc(strlen(next) + 2 + 1); 603 - if (!strs[cnt]) 604 - goto err; 605 - sprintf(strs[cnt], "*%s*", next); 606 - } 607 - 608 - cnt++; 542 + free((void *)set->tests[i].subtests); 609 543 } 610 544 611 - tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt)); 612 - if (!tmp) 613 - goto err; 614 - memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt); 615 - set->strs = (const char **)tmp; 616 - set->cnt += cnt; 545 + free((void *)set->tests); 546 + } 617 547 618 - free(input); 619 - free(strs); 620 - return 0; 621 - err: 622 - for (i = 0; i < cnt; i++) 623 - free(strs[i]); 624 - free(strs); 625 - free(input); 626 - return -ENOMEM; 548 + static void free_test_selector(struct test_selector *test_selector) 549 + { 550 + free_test_filter_set(&test_selector->blacklist); 551 + free_test_filter_set(&test_selector->whitelist); 552 + free(test_selector->num_set); 627 553 } 628 554 629 555 extern int extra_prog_load_log_flags; ··· 621 615 } 622 616 case ARG_TEST_NAME_GLOB_ALLOWLIST: 623 617 case ARG_TEST_NAME: { 624 - char *subtest_str = strchr(arg, '/'); 625 - 626 - if (subtest_str) { 627 - *subtest_str = '\0'; 628 - if (parse_str_list(subtest_str + 1, 629 - &env->subtest_selector.whitelist, 630 - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) 631 - return -ENOMEM; 632 - } 633 - if (parse_str_list(arg, &env->test_selector.whitelist, 634 - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) 618 + if (parse_test_list(arg, 619 + &env->test_selector.whitelist, 620 + key == ARG_TEST_NAME_GLOB_ALLOWLIST)) 635 621 return -ENOMEM; 636 622 break; 637 623 } 638 624 case ARG_TEST_NAME_GLOB_DENYLIST: 639 625 case ARG_TEST_NAME_BLACKLIST: { 640 - char *subtest_str = strchr(arg, '/'); 641 - 642 - if (subtest_str) { 643 - *subtest_str = '\0'; 644 - if (parse_str_list(subtest_str + 1, 645 - &env->subtest_selector.blacklist, 646 - key == ARG_TEST_NAME_GLOB_DENYLIST)) 647 - return -ENOMEM; 648 - } 649 - if (parse_str_list(arg, &env->test_selector.blacklist, 650 - key == ARG_TEST_NAME_GLOB_DENYLIST)) 626 + if (parse_test_list(arg, 627 + &env->test_selector.blacklist, 628 + key == ARG_TEST_NAME_GLOB_DENYLIST)) 651 629 return -ENOMEM; 652 630 break; 653 631 } ··· 696 706 return 0; 697 707 } 698 708 699 - static void stdio_hijack(void) 709 + static void stdio_hijack(char **log_buf, size_t *log_cnt) 700 710 { 701 711 #ifdef __GLIBC__ 702 712 env.stdout = stdout; ··· 710 720 /* stdout and stderr -> buffer */ 711 721 fflush(stdout); 712 722 713 - stdout = open_memstream(&env.log_buf, &env.log_cnt); 723 + stdout = open_memstream(log_buf, log_cnt); 714 724 if (!stdout) { 715 725 stdout = env.stdout; 716 726 perror("open_memstream"); ··· 813 823 sz = backtrace(bt, ARRAY_SIZE(bt)); 814 824 815 825 if (env.test) 816 - dump_test_log(env.test, true); 826 + dump_test_log(env.test, env.test_state, true); 817 827 if (env.stdout) 818 828 stdio_restore(); 819 829 if (env.worker_id != -1) ··· 834 844 static int current_test_idx; 835 845 static pthread_mutex_t current_test_lock; 836 846 static pthread_mutex_t stdout_output_lock; 837 - 838 - struct test_result { 839 - int error_cnt; 840 - int skip_cnt; 841 - int sub_succ_cnt; 842 - 843 - size_t log_cnt; 844 - char *log_buf; 845 - }; 846 - 847 - static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; 848 847 849 848 static inline const char *str_msg(const struct msg *msg, char *buf) 850 849 { ··· 888 909 static void run_one_test(int test_num) 889 910 { 890 911 struct prog_test_def *test = &prog_test_defs[test_num]; 912 + struct test_state *state = &test_states[test_num]; 891 913 892 914 env.test = test; 915 + env.test_state = state; 916 + 917 + stdio_hijack(&state->log_buf, &state->log_cnt); 893 918 894 919 if (test->run_test) 895 920 test->run_test(); ··· 901 918 test->run_serial_test(); 902 919 903 920 /* ensure last sub-test is finalized properly */ 904 - if (test->subtest_name) 921 + if (state->subtest_name) 905 922 test__end_subtest(); 906 923 907 - test->tested = true; 924 + state->tested = true; 908 925 909 - dump_test_log(test, test->error_cnt); 926 + dump_test_log(test, state, false); 910 927 911 928 reset_affinity(); 912 929 restore_netns(); 913 930 if (test->need_cgroup_cleanup) 914 931 cleanup_cgroup_environment(); 932 + 933 + stdio_restore(); 915 934 } 916 935 917 936 struct dispatch_data { ··· 932 947 while (true) { 933 948 int test_to_run = -1; 934 949 struct prog_test_def *test; 935 - struct test_result *result; 950 + struct test_state *state; 936 951 937 952 /* grab a test */ 938 953 { ··· 979 994 if (test_to_run != msg_test_done.test_done.test_num) 980 995 goto error; 981 996 982 - test->tested = true; 983 - result = &test_results[test_to_run]; 984 - 985 - result->error_cnt = msg_test_done.test_done.error_cnt; 986 - result->skip_cnt = msg_test_done.test_done.skip_cnt; 987 - result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; 997 + state = &test_states[test_to_run]; 998 + state->tested = true; 999 + state->error_cnt = msg_test_done.test_done.error_cnt; 1000 + state->skip_cnt = msg_test_done.test_done.skip_cnt; 1001 + state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; 988 1002 989 1003 /* collect all logs */ 990 1004 if (msg_test_done.test_done.have_log) { 991 - log_fp = open_memstream(&result->log_buf, &result->log_cnt); 1005 + log_fp = open_memstream(&state->log_buf, &state->log_cnt); 992 1006 if (!log_fp) 993 1007 goto error; 994 1008 ··· 1006 1022 fclose(log_fp); 1007 1023 log_fp = NULL; 1008 1024 } 1009 - /* output log */ 1010 - { 1011 - pthread_mutex_lock(&stdout_output_lock); 1012 - 1013 - if (result->log_cnt) { 1014 - result->log_buf[result->log_cnt] = '\0'; 1015 - fprintf(stdout, "%s", result->log_buf); 1016 - if (result->log_buf[result->log_cnt - 1] != '\n') 1017 - fprintf(stdout, "\n"); 1018 - } 1019 - 1020 - fprintf(stdout, "#%d %s:%s\n", 1021 - test->test_num, test->test_name, 1022 - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); 1023 - 1024 - pthread_mutex_unlock(&stdout_output_lock); 1025 - } 1026 - 1027 1025 } /* wait for test done */ 1026 + 1027 + pthread_mutex_lock(&stdout_output_lock); 1028 + dump_test_log(test, state, false); 1029 + pthread_mutex_unlock(&stdout_output_lock); 1028 1030 } /* while (true) */ 1029 1031 error: 1030 1032 if (env.debug) ··· 1032 1062 return NULL; 1033 1063 } 1034 1064 1035 - static void print_all_error_logs(void) 1065 + static void calculate_summary_and_print_errors(struct test_env *env) 1036 1066 { 1037 1067 int i; 1068 + int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0; 1038 1069 1039 - if (env.fail_cnt) 1040 - fprintf(stdout, "\nAll error logs:\n"); 1070 + for (i = 0; i < prog_test_cnt; i++) { 1071 + struct test_state *state = &test_states[i]; 1072 + 1073 + if (!state->tested) 1074 + continue; 1075 + 1076 + sub_succ_cnt += state->sub_succ_cnt; 1077 + skip_cnt += state->skip_cnt; 1078 + 1079 + if (state->error_cnt) 1080 + fail_cnt++; 1081 + else 1082 + succ_cnt++; 1083 + } 1084 + 1085 + if (fail_cnt) 1086 + printf("\nAll error logs:\n"); 1041 1087 1042 1088 /* print error logs again */ 1043 1089 for (i = 0; i < prog_test_cnt; i++) { 1044 - struct prog_test_def *test; 1045 - struct test_result *result; 1090 + struct prog_test_def *test = &prog_test_defs[i]; 1091 + struct test_state *state = &test_states[i]; 1046 1092 1047 - test = &prog_test_defs[i]; 1048 - result = &test_results[i]; 1049 - 1050 - if (!test->tested || !result->error_cnt) 1093 + if (!state->tested || !state->error_cnt) 1051 1094 continue; 1052 1095 1053 - fprintf(stdout, "\n#%d %s:%s\n", 1054 - test->test_num, test->test_name, 1055 - result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); 1056 - 1057 - if (result->log_cnt) { 1058 - result->log_buf[result->log_cnt] = '\0'; 1059 - fprintf(stdout, "%s", result->log_buf); 1060 - if (result->log_buf[result->log_cnt - 1] != '\n') 1061 - fprintf(stdout, "\n"); 1062 - } 1096 + dump_test_log(test, state, true); 1063 1097 } 1098 + 1099 + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", 1100 + succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt); 1101 + 1102 + env->succ_cnt = succ_cnt; 1103 + env->sub_succ_cnt = sub_succ_cnt; 1104 + env->fail_cnt = fail_cnt; 1105 + env->skip_cnt = skip_cnt; 1064 1106 } 1065 1107 1066 - static int server_main(void) 1108 + static void server_main(void) 1067 1109 { 1068 1110 pthread_t *dispatcher_threads; 1069 1111 struct dispatch_data *data; ··· 1131 1149 1132 1150 for (int i = 0; i < prog_test_cnt; i++) { 1133 1151 struct prog_test_def *test = &prog_test_defs[i]; 1134 - struct test_result *result = &test_results[i]; 1135 1152 1136 1153 if (!test->should_run || !test->run_serial_test) 1137 1154 continue; 1138 1155 1139 - stdio_hijack(); 1140 - 1141 1156 run_one_test(i); 1142 - 1143 - stdio_restore(); 1144 - if (env.log_buf) { 1145 - result->log_cnt = env.log_cnt; 1146 - result->log_buf = strdup(env.log_buf); 1147 - 1148 - free(env.log_buf); 1149 - env.log_buf = NULL; 1150 - env.log_cnt = 0; 1151 - } 1152 - restore_netns(); 1153 - 1154 - fprintf(stdout, "#%d %s:%s\n", 1155 - test->test_num, test->test_name, 1156 - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); 1157 - 1158 - result->error_cnt = test->error_cnt; 1159 - result->skip_cnt = test->skip_cnt; 1160 - result->sub_succ_cnt = test->sub_succ_cnt; 1161 1157 } 1162 1158 1163 1159 /* generate summary */ 1164 1160 fflush(stderr); 1165 1161 fflush(stdout); 1166 1162 1167 - for (i = 0; i < prog_test_cnt; i++) { 1168 - struct prog_test_def *current_test; 1169 - struct test_result *result; 1170 - 1171 - current_test = &prog_test_defs[i]; 1172 - result = &test_results[i]; 1173 - 1174 - if (!current_test->tested) 1175 - continue; 1176 - 1177 - env.succ_cnt += result->error_cnt ? 0 : 1; 1178 - env.skip_cnt += result->skip_cnt; 1179 - if (result->error_cnt) 1180 - env.fail_cnt++; 1181 - env.sub_succ_cnt += result->sub_succ_cnt; 1182 - } 1183 - 1184 - print_all_error_logs(); 1185 - 1186 - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", 1187 - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); 1163 + calculate_summary_and_print_errors(&env); 1188 1164 1189 1165 /* reap all workers */ 1190 1166 for (i = 0; i < env.workers; i++) { ··· 1152 1212 if (pid != env.worker_pids[i]) 1153 1213 perror("Unable to reap worker"); 1154 1214 } 1155 - 1156 - return 0; 1157 1215 } 1158 1216 1159 1217 static int worker_main(int sock) ··· 1172 1234 env.worker_id); 1173 1235 goto out; 1174 1236 case MSG_DO_TEST: { 1175 - int test_to_run; 1176 - struct prog_test_def *test; 1237 + int test_to_run = msg.do_test.test_num; 1238 + struct prog_test_def *test = &prog_test_defs[test_to_run]; 1239 + struct test_state *state = &test_states[test_to_run]; 1177 1240 struct msg msg_done; 1178 - 1179 - test_to_run = msg.do_test.test_num; 1180 - test = &prog_test_defs[test_to_run]; 1181 1241 1182 1242 if (env.debug) 1183 1243 fprintf(stderr, "[%d]: #%d:%s running.\n", ··· 1183 1247 test_to_run + 1, 1184 1248 test->test_name); 1185 1249 1186 - stdio_hijack(); 1187 - 1188 1250 run_one_test(test_to_run); 1189 - 1190 - stdio_restore(); 1191 1251 1192 1252 memset(&msg_done, 0, sizeof(msg_done)); 1193 1253 msg_done.type = MSG_TEST_DONE; 1194 1254 msg_done.test_done.test_num = test_to_run; 1195 - msg_done.test_done.error_cnt = test->error_cnt; 1196 - msg_done.test_done.skip_cnt = test->skip_cnt; 1197 - msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; 1255 + msg_done.test_done.error_cnt = state->error_cnt; 1256 + msg_done.test_done.skip_cnt = state->skip_cnt; 1257 + msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt; 1198 1258 msg_done.test_done.have_log = false; 1199 1259 1200 - if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { 1201 - if (env.log_cnt) 1260 + if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) { 1261 + if (state->log_cnt) 1202 1262 msg_done.test_done.have_log = true; 1203 1263 } 1204 1264 if (send_message(sock, &msg_done) < 0) { ··· 1207 1275 char *src; 1208 1276 size_t slen; 1209 1277 1210 - src = env.log_buf; 1211 - slen = env.log_cnt; 1278 + src = state->log_buf; 1279 + slen = state->log_cnt; 1212 1280 while (slen) { 1213 1281 struct msg msg_log; 1214 1282 char *dest; ··· 1228 1296 assert(send_message(sock, &msg_log) >= 0); 1229 1297 } 1230 1298 } 1231 - if (env.log_buf) { 1232 - free(env.log_buf); 1233 - env.log_buf = NULL; 1234 - env.log_cnt = 0; 1299 + if (state->log_buf) { 1300 + free(state->log_buf); 1301 + state->log_buf = NULL; 1302 + state->log_cnt = 0; 1235 1303 } 1236 1304 if (env.debug) 1237 1305 fprintf(stderr, "[%d]: #%d:%s done.\n", ··· 1362 1430 1363 1431 for (i = 0; i < prog_test_cnt; i++) { 1364 1432 struct prog_test_def *test = &prog_test_defs[i]; 1365 - struct test_result *result; 1366 1433 1367 1434 if (!test->should_run) 1368 1435 continue; ··· 1377 1446 continue; 1378 1447 } 1379 1448 1380 - stdio_hijack(); 1381 - 1382 1449 run_one_test(i); 1383 - 1384 - stdio_restore(); 1385 - 1386 - fprintf(env.stdout, "#%d %s:%s\n", 1387 - test->test_num, test->test_name, 1388 - test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); 1389 - 1390 - result = &test_results[i]; 1391 - result->error_cnt = test->error_cnt; 1392 - if (env.log_buf) { 1393 - result->log_buf = strdup(env.log_buf); 1394 - result->log_cnt = env.log_cnt; 1395 - 1396 - free(env.log_buf); 1397 - env.log_buf = NULL; 1398 - env.log_cnt = 0; 1399 - } 1400 - 1401 - if (test->error_cnt) 1402 - env.fail_cnt++; 1403 - else 1404 - env.succ_cnt++; 1405 - 1406 - skip_account(); 1407 - env.sub_succ_cnt += test->sub_succ_cnt; 1408 1450 } 1409 1451 1410 1452 if (env.get_test_cnt) { ··· 1388 1484 if (env.list_test_names) 1389 1485 goto out; 1390 1486 1391 - print_all_error_logs(); 1392 - 1393 - fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", 1394 - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); 1487 + calculate_summary_and_print_errors(&env); 1395 1488 1396 1489 close(env.saved_netns_fd); 1397 1490 out: 1398 1491 if (!env.list_test_names && env.has_testmod) 1399 1492 unload_bpf_testmod(); 1400 - free_str_set(&env.test_selector.blacklist); 1401 - free_str_set(&env.test_selector.whitelist); 1402 - free(env.test_selector.num_set); 1403 - free_str_set(&env.subtest_selector.blacklist); 1404 - free_str_set(&env.subtest_selector.whitelist); 1405 - free(env.subtest_selector.num_set); 1493 + 1494 + free_test_selector(&env.test_selector); 1406 1495 1407 1496 if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) 1408 1497 return EXIT_NO_TEST;

+49 -13

tools/testing/selftests/bpf/test_progs.h

··· 25 25 #include <sys/wait.h> 26 26 #include <sys/types.h> 27 27 #include <sys/time.h> 28 + #include <sys/param.h> 28 29 #include <fcntl.h> 29 30 #include <pthread.h> 30 31 #include <linux/bpf.h> ··· 38 37 #include <bpf/bpf_endian.h> 39 38 #include "trace_helpers.h" 40 39 #include "testing_helpers.h" 41 - #include "flow_dissector_load.h" 42 40 43 41 enum verbosity { 44 42 VERBOSE_NONE, ··· 46 46 VERBOSE_SUPER, 47 47 }; 48 48 49 - struct str_set { 50 - const char **strs; 49 + struct test_filter { 50 + char *name; 51 + char **subtests; 52 + int subtest_cnt; 53 + }; 54 + 55 + struct test_filter_set { 56 + struct test_filter *tests; 51 57 int cnt; 52 58 }; 53 59 54 60 struct test_selector { 55 - struct str_set whitelist; 56 - struct str_set blacklist; 61 + struct test_filter_set whitelist; 62 + struct test_filter_set blacklist; 57 63 bool *num_set; 58 64 int num_set_len; 65 + }; 66 + 67 + struct test_state { 68 + bool tested; 69 + bool force_log; 70 + 71 + int error_cnt; 72 + int skip_cnt; 73 + int subtest_skip_cnt; 74 + int sub_succ_cnt; 75 + 76 + char *subtest_name; 77 + int subtest_num; 78 + 79 + /* store counts before subtest started */ 80 + int old_error_cnt; 81 + 82 + size_t log_cnt; 83 + char *log_buf; 59 84 }; 60 85 61 86 struct test_env { ··· 95 70 bool get_test_cnt; 96 71 bool list_test_names; 97 72 98 - struct prog_test_def *test; /* current running tests */ 73 + struct prog_test_def *test; /* current running test */ 74 + struct test_state *test_state; /* current running test result */ 99 75 100 76 FILE *stdout; 101 77 FILE *stderr; 102 - char *log_buf; 103 - size_t log_cnt; 104 78 int nr_cpus; 105 79 106 80 int succ_cnt; /* successful tests */ ··· 144 120 145 121 extern struct test_env env; 146 122 147 - extern void test__force_log(); 148 - extern bool test__start_subtest(const char *name); 149 - extern void test__skip(void); 150 - extern void test__fail(void); 151 - extern int test__join_cgroup(const char *path); 123 + void test__force_log(void); 124 + bool test__start_subtest(const char *name); 125 + void test__end_subtest(void); 126 + void test__skip(void); 127 + void test__fail(void); 128 + int test__join_cgroup(const char *path); 152 129 153 130 #define PRINT_FAIL(format...) \ 154 131 ({ \ ··· 289 264 CHECK(!___ok, (name), \ 290 265 "unexpected %s: actual '%.*s' != expected '%.*s'\n", \ 291 266 (name), ___len, ___act, ___len, ___exp); \ 267 + ___ok; \ 268 + }) 269 + 270 + #define ASSERT_HAS_SUBSTR(str, substr, name) ({ \ 271 + static int duration = 0; \ 272 + const char *___str = str; \ 273 + const char *___substr = substr; \ 274 + bool ___ok = strstr(___str, ___substr) != NULL; \ 275 + CHECK(!___ok, (name), \ 276 + "unexpected %s: '%s' is not a substring of '%s'\n", \ 277 + (name), ___substr, ___str); \ 292 278 ___ok; \ 293 279 }) 294 280

+3 -1

tools/testing/selftests/bpf/test_skb_cgroup_id_user.c

··· 15 15 #include <bpf/bpf.h> 16 16 #include <bpf/libbpf.h> 17 17 18 - #include "bpf_rlimit.h" 19 18 #include "cgroup_helpers.h" 20 19 21 20 #define CGROUP_PATH "/skb_cgroup_test" ··· 158 159 fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]); 159 160 exit(EXIT_FAILURE); 160 161 } 162 + 163 + /* Use libbpf 1.0 API mode */ 164 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 161 165 162 166 cgfd = cgroup_setup_and_join(CGROUP_PATH); 163 167 if (cgfd < 0)

+4 -2

tools/testing/selftests/bpf/test_sock.c

··· 14 14 15 15 #include "cgroup_helpers.h" 16 16 #include <bpf/bpf_endian.h> 17 - #include "bpf_rlimit.h" 18 17 #include "bpf_util.h" 19 18 20 19 #define CG_PATH "/foo" ··· 492 493 goto err; 493 494 } 494 495 495 - if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) { 496 + if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) { 496 497 if (test->result == ATTACH_REJECT) 497 498 goto out; 498 499 else ··· 539 540 cgfd = cgroup_setup_and_join(CG_PATH); 540 541 if (cgfd < 0) 541 542 goto err; 543 + 544 + /* Use libbpf 1.0 API mode */ 545 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 542 546 543 547 if (run_tests(cgfd)) 544 548 goto err;

+3 -1

tools/testing/selftests/bpf/test_sock_addr.c

··· 19 19 #include <bpf/libbpf.h> 20 20 21 21 #include "cgroup_helpers.h" 22 - #include "bpf_rlimit.h" 23 22 #include "bpf_util.h" 24 23 25 24 #ifndef ENOTSUPP ··· 1416 1417 cgfd = cgroup_setup_and_join(CG_PATH); 1417 1418 if (cgfd < 0) 1418 1419 goto err; 1420 + 1421 + /* Use libbpf 1.0 API mode */ 1422 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 1419 1423 1420 1424 if (run_tests(cgfd)) 1421 1425 goto err;

+3 -2

tools/testing/selftests/bpf/test_sockmap.c

··· 18 18 #include <sched.h> 19 19 20 20 #include <sys/time.h> 21 - #include <sys/resource.h> 22 21 #include <sys/types.h> 23 22 #include <sys/sendfile.h> 24 23 ··· 36 37 #include <bpf/libbpf.h> 37 38 38 39 #include "bpf_util.h" 39 - #include "bpf_rlimit.h" 40 40 #include "cgroup_helpers.h" 41 41 42 42 int running; ··· 2014 2016 return cg_fd; 2015 2017 cg_created = 1; 2016 2018 } 2019 + 2020 + /* Use libbpf 1.0 API mode */ 2021 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 2017 2022 2018 2023 if (test == SELFTESTS) { 2019 2024 err = test_selftest(cg_fd, &options);

+4 -2

tools/testing/selftests/bpf/test_sysctl.c

··· 14 14 #include <bpf/libbpf.h> 15 15 16 16 #include <bpf/bpf_endian.h> 17 - #include "bpf_rlimit.h" 18 17 #include "bpf_util.h" 19 18 #include "cgroup_helpers.h" 20 19 #include "testing_helpers.h" ··· 1560 1561 goto err; 1561 1562 } 1562 1563 1563 - if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) { 1564 + if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) { 1564 1565 if (test->result == ATTACH_REJECT) 1565 1566 goto out; 1566 1567 else ··· 1616 1617 cgfd = cgroup_setup_and_join(CG_PATH); 1617 1618 if (cgfd < 0) 1618 1619 goto err; 1620 + 1621 + /* Use libbpf 1.0 API mode */ 1622 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 1619 1623 1620 1624 if (run_tests(cgfd)) 1621 1625 goto err;

+3 -1

tools/testing/selftests/bpf/test_tag.c

··· 20 20 #include <bpf/bpf.h> 21 21 22 22 #include "../../../include/linux/filter.h" 23 - #include "bpf_rlimit.h" 24 23 #include "testing_helpers.h" 25 24 26 25 static struct bpf_insn prog[BPF_MAXINSNS]; ··· 187 188 LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); 188 189 uint32_t tests = 0; 189 190 int i, fd_map; 191 + 192 + /* Use libbpf 1.0 API mode */ 193 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 190 194 191 195 fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), 192 196 sizeof(int), 1, &opts);

+3 -1

tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c

··· 15 15 #include <bpf/bpf.h> 16 16 #include <bpf/libbpf.h> 17 17 18 - #include "bpf_rlimit.h" 19 18 #include "cgroup_helpers.h" 20 19 21 20 static int start_server(const struct sockaddr *addr, socklen_t len, bool dual) ··· 233 234 fprintf(stderr, "Usage: %s prog_id\n", argv[0]); 234 235 exit(1); 235 236 } 237 + 238 + /* Use libbpf 1.0 API mode */ 239 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 236 240 237 241 results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); 238 242 if (results < 0) {

-1

tools/testing/selftests/bpf/test_tcpnotify_user.c

··· 19 19 #include <linux/perf_event.h> 20 20 #include <linux/err.h> 21 21 22 - #include "bpf_rlimit.h" 23 22 #include "bpf_util.h" 24 23 #include "cgroup_helpers.h" 25 24

+53 -2

tools/testing/selftests/bpf/test_verifier.c

··· 53 53 #define MAX_INSNS BPF_MAXINSNS 54 54 #define MAX_TEST_INSNS 1000000 55 55 #define MAX_FIXUPS 8 56 - #define MAX_NR_MAPS 22 56 + #define MAX_NR_MAPS 23 57 57 #define MAX_TEST_RUNS 8 58 58 #define POINTER_VALUE 0xcafe4all 59 59 #define TEST_DATA_LEN 64 ··· 101 101 int fixup_map_reuseport_array[MAX_FIXUPS]; 102 102 int fixup_map_ringbuf[MAX_FIXUPS]; 103 103 int fixup_map_timer[MAX_FIXUPS]; 104 + int fixup_map_kptr[MAX_FIXUPS]; 104 105 struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; 105 106 /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. 106 107 * Can be a tab-separated sequence of expected strings. An empty string ··· 622 621 * struct timer { 623 622 * struct bpf_timer t; 624 623 * }; 624 + * struct btf_ptr { 625 + * struct prog_test_ref_kfunc __kptr *ptr; 626 + * struct prog_test_ref_kfunc __kptr_ref *ptr; 627 + * struct prog_test_member __kptr_ref *ptr; 628 + * } 625 629 */ 626 - static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; 630 + static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" 631 + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" 632 + "\0prog_test_member"; 627 633 static __u32 btf_raw_types[] = { 628 634 /* int */ 629 635 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ··· 646 638 /* struct timer */ /* [5] */ 647 639 BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), 648 640 BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ 641 + /* struct prog_test_ref_kfunc */ /* [6] */ 642 + BTF_STRUCT_ENC(51, 0, 0), 643 + BTF_STRUCT_ENC(89, 0, 0), /* [7] */ 644 + /* type tag "kptr" */ 645 + BTF_TYPE_TAG_ENC(75, 6), /* [8] */ 646 + /* type tag "kptr_ref" */ 647 + BTF_TYPE_TAG_ENC(80, 6), /* [9] */ 648 + BTF_TYPE_TAG_ENC(80, 7), /* [10] */ 649 + BTF_PTR_ENC(8), /* [11] */ 650 + BTF_PTR_ENC(9), /* [12] */ 651 + BTF_PTR_ENC(10), /* [13] */ 652 + /* struct btf_ptr */ /* [14] */ 653 + BTF_STRUCT_ENC(43, 3, 24), 654 + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ 655 + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ 656 + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ 649 657 }; 650 658 651 659 static int load_btf(void) ··· 751 727 return fd; 752 728 } 753 729 730 + static int create_map_kptr(void) 731 + { 732 + LIBBPF_OPTS(bpf_map_create_opts, opts, 733 + .btf_key_type_id = 1, 734 + .btf_value_type_id = 14, 735 + ); 736 + int fd, btf_fd; 737 + 738 + btf_fd = load_btf(); 739 + if (btf_fd < 0) 740 + return -1; 741 + 742 + opts.btf_fd = btf_fd; 743 + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts); 744 + if (fd < 0) 745 + printf("Failed to create map with btf_id pointer\n"); 746 + return fd; 747 + } 748 + 754 749 static char bpf_vlog[UINT_MAX >> 8]; 755 750 756 751 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, ··· 797 754 int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; 798 755 int *fixup_map_ringbuf = test->fixup_map_ringbuf; 799 756 int *fixup_map_timer = test->fixup_map_timer; 757 + int *fixup_map_kptr = test->fixup_map_kptr; 800 758 struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; 801 759 802 760 if (test->fill_helper) { ··· 990 946 prog[*fixup_map_timer].imm = map_fds[21]; 991 947 fixup_map_timer++; 992 948 } while (*fixup_map_timer); 949 + } 950 + if (*fixup_map_kptr) { 951 + map_fds[22] = create_map_kptr(); 952 + do { 953 + prog[*fixup_map_kptr].imm = map_fds[22]; 954 + fixup_map_kptr++; 955 + } while (*fixup_map_kptr); 993 956 } 994 957 995 958 /* Patch in kfunc BTF IDs */

+3 -2

tools/testing/selftests/bpf/test_verifier_log.c

··· 11 11 12 12 #include <bpf/bpf.h> 13 13 14 - #include "bpf_rlimit.h" 15 - 16 14 #define LOG_SIZE (1 << 20) 17 15 18 16 #define err(str...) printf("ERROR: " str) ··· 138 140 int i; 139 141 140 142 memset(log, 1, LOG_SIZE); 143 + 144 + /* Use libbpf 1.0 API mode */ 145 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 141 146 142 147 /* Test incorrect attr */ 143 148 printf("Test log_level 0...\n");

+89

tools/testing/selftests/bpf/testing_helpers.c

··· 6 6 #include <errno.h> 7 7 #include <bpf/bpf.h> 8 8 #include <bpf/libbpf.h> 9 + #include "test_progs.h" 9 10 #include "testing_helpers.h" 10 11 11 12 int parse_num_list(const char *s, bool **num_set, int *num_set_len) ··· 68 67 *num_set_len = set_len; 69 68 70 69 return 0; 70 + } 71 + 72 + int parse_test_list(const char *s, 73 + struct test_filter_set *set, 74 + bool is_glob_pattern) 75 + { 76 + char *input, *state = NULL, *next; 77 + struct test_filter *tmp, *tests = NULL; 78 + int i, j, cnt = 0; 79 + 80 + input = strdup(s); 81 + if (!input) 82 + return -ENOMEM; 83 + 84 + while ((next = strtok_r(state ? NULL : input, ",", &state))) { 85 + char *subtest_str = strchr(next, '/'); 86 + char *pattern = NULL; 87 + int glob_chars = 0; 88 + 89 + tmp = realloc(tests, sizeof(*tests) * (cnt + 1)); 90 + if (!tmp) 91 + goto err; 92 + tests = tmp; 93 + 94 + tests[cnt].subtest_cnt = 0; 95 + tests[cnt].subtests = NULL; 96 + 97 + if (is_glob_pattern) { 98 + pattern = "%s"; 99 + } else { 100 + pattern = "*%s*"; 101 + glob_chars = 2; 102 + } 103 + 104 + if (subtest_str) { 105 + char **tmp_subtests = NULL; 106 + int subtest_cnt = tests[cnt].subtest_cnt; 107 + 108 + *subtest_str = '\0'; 109 + subtest_str += 1; 110 + tmp_subtests = realloc(tests[cnt].subtests, 111 + sizeof(*tmp_subtests) * 112 + (subtest_cnt + 1)); 113 + if (!tmp_subtests) 114 + goto err; 115 + tests[cnt].subtests = tmp_subtests; 116 + 117 + tests[cnt].subtests[subtest_cnt] = 118 + malloc(strlen(subtest_str) + glob_chars + 1); 119 + if (!tests[cnt].subtests[subtest_cnt]) 120 + goto err; 121 + sprintf(tests[cnt].subtests[subtest_cnt], 122 + pattern, 123 + subtest_str); 124 + 125 + tests[cnt].subtest_cnt++; 126 + } 127 + 128 + tests[cnt].name = malloc(strlen(next) + glob_chars + 1); 129 + if (!tests[cnt].name) 130 + goto err; 131 + sprintf(tests[cnt].name, pattern, next); 132 + 133 + cnt++; 134 + } 135 + 136 + tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt)); 137 + if (!tmp) 138 + goto err; 139 + 140 + memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt); 141 + set->tests = tmp; 142 + set->cnt += cnt; 143 + 144 + free(tests); 145 + free(input); 146 + return 0; 147 + 148 + err: 149 + for (i = 0; i < cnt; i++) { 150 + for (j = 0; j < tests[i].subtest_cnt; j++) 151 + free(tests[i].subtests[j]); 152 + 153 + free(tests[i].name); 154 + } 155 + free(tests); 156 + free(input); 157 + return -ENOMEM; 71 158 } 72 159 73 160 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)

+8

tools/testing/selftests/bpf/testing_helpers.h

··· 12 12 size_t insns_cnt, const char *license, 13 13 __u32 kern_version, char *log_buf, 14 14 size_t log_buf_sz); 15 + 16 + /* 17 + * below function is exported for testing in prog_test test 18 + */ 19 + struct test_filter_set; 20 + int parse_test_list(const char *s, 21 + struct test_filter_set *test_set, 22 + bool is_glob_pattern);

+20

tools/testing/selftests/bpf/verifier/calls.c

··· 139 139 }, 140 140 }, 141 141 { 142 + "calls: invalid kfunc call: don't match first member type when passed to release kfunc", 143 + .insns = { 144 + BPF_MOV64_IMM(BPF_REG_0, 0), 145 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 146 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 147 + BPF_EXIT_INSN(), 148 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 149 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 150 + BPF_MOV64_IMM(BPF_REG_0, 0), 151 + BPF_EXIT_INSN(), 152 + }, 153 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 154 + .result = REJECT, 155 + .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer", 156 + .fixup_kfunc_btf_id = { 157 + { "bpf_kfunc_call_memb_acquire", 1 }, 158 + { "bpf_kfunc_call_memb1_release", 5 }, 159 + }, 160 + }, 161 + { 142 162 "calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset", 143 163 .insns = { 144 164 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),

+469

tools/testing/selftests/bpf/verifier/map_kptr.c

··· 1 + /* Common tests */ 2 + { 3 + "map_kptr: BPF_ST imm != 0", 4 + .insns = { 5 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 6 + BPF_LD_MAP_FD(BPF_REG_6, 0), 7 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 8 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 9 + BPF_MOV64_IMM(BPF_REG_0, 0), 10 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 11 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 12 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 13 + BPF_EXIT_INSN(), 14 + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), 15 + BPF_EXIT_INSN(), 16 + }, 17 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 18 + .fixup_map_kptr = { 1 }, 19 + .result = REJECT, 20 + .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0", 21 + }, 22 + { 23 + "map_kptr: size != bpf_size_to_bytes(BPF_DW)", 24 + .insns = { 25 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 26 + BPF_LD_MAP_FD(BPF_REG_6, 0), 27 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 28 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 29 + BPF_MOV64_IMM(BPF_REG_0, 0), 30 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 31 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 32 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 33 + BPF_EXIT_INSN(), 34 + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), 35 + BPF_EXIT_INSN(), 36 + }, 37 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 38 + .fixup_map_kptr = { 1 }, 39 + .result = REJECT, 40 + .errstr = "kptr access size must be BPF_DW", 41 + }, 42 + { 43 + "map_kptr: map_value non-const var_off", 44 + .insns = { 45 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 46 + BPF_LD_MAP_FD(BPF_REG_6, 0), 47 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 48 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 49 + BPF_MOV64_IMM(BPF_REG_0, 0), 50 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 51 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 52 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 53 + BPF_EXIT_INSN(), 54 + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), 55 + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), 56 + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), 57 + BPF_EXIT_INSN(), 58 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), 59 + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), 60 + BPF_EXIT_INSN(), 61 + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), 62 + BPF_EXIT_INSN(), 63 + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 64 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), 65 + BPF_EXIT_INSN(), 66 + }, 67 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 68 + .fixup_map_kptr = { 1 }, 69 + .result = REJECT, 70 + .errstr = "kptr access cannot have variable offset", 71 + }, 72 + { 73 + "map_kptr: bpf_kptr_xchg non-const var_off", 74 + .insns = { 75 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 76 + BPF_LD_MAP_FD(BPF_REG_6, 0), 77 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 78 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 79 + BPF_MOV64_IMM(BPF_REG_0, 0), 80 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 81 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 82 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 83 + BPF_EXIT_INSN(), 84 + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), 85 + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), 86 + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), 87 + BPF_EXIT_INSN(), 88 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0), 89 + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), 90 + BPF_EXIT_INSN(), 91 + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), 92 + BPF_EXIT_INSN(), 93 + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), 94 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_3), 95 + BPF_MOV64_IMM(BPF_REG_2, 0), 96 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), 97 + BPF_EXIT_INSN(), 98 + }, 99 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 100 + .fixup_map_kptr = { 1 }, 101 + .result = REJECT, 102 + .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset", 103 + }, 104 + { 105 + "map_kptr: unaligned boundary load/store", 106 + .insns = { 107 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 108 + BPF_LD_MAP_FD(BPF_REG_6, 0), 109 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 110 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 111 + BPF_MOV64_IMM(BPF_REG_0, 0), 112 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 113 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 114 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 115 + BPF_EXIT_INSN(), 116 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7), 117 + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), 118 + BPF_EXIT_INSN(), 119 + }, 120 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 121 + .fixup_map_kptr = { 1 }, 122 + .result = REJECT, 123 + .errstr = "kptr access misaligned expected=0 off=7", 124 + }, 125 + { 126 + "map_kptr: reject var_off != 0", 127 + .insns = { 128 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 129 + BPF_LD_MAP_FD(BPF_REG_6, 0), 130 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 131 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 132 + BPF_MOV64_IMM(BPF_REG_0, 0), 133 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 134 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 135 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 136 + BPF_EXIT_INSN(), 137 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 138 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), 139 + BPF_EXIT_INSN(), 140 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), 141 + BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1), 142 + BPF_EXIT_INSN(), 143 + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), 144 + BPF_EXIT_INSN(), 145 + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), 146 + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), 147 + BPF_EXIT_INSN(), 148 + }, 149 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 150 + .fixup_map_kptr = { 1 }, 151 + .result = REJECT, 152 + .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed", 153 + }, 154 + /* Tests for unreferened PTR_TO_BTF_ID */ 155 + { 156 + "map_kptr: unref: reject btf_struct_ids_match == false", 157 + .insns = { 158 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 159 + BPF_LD_MAP_FD(BPF_REG_6, 0), 160 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 161 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 162 + BPF_MOV64_IMM(BPF_REG_0, 0), 163 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 164 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 165 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 166 + BPF_EXIT_INSN(), 167 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), 168 + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), 169 + BPF_EXIT_INSN(), 170 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4), 171 + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), 172 + BPF_EXIT_INSN(), 173 + }, 174 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 175 + .fixup_map_kptr = { 1 }, 176 + .result = REJECT, 177 + .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test", 178 + }, 179 + { 180 + "map_kptr: unref: loaded pointer marked as untrusted", 181 + .insns = { 182 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 183 + BPF_LD_MAP_FD(BPF_REG_6, 0), 184 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 185 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 186 + BPF_MOV64_IMM(BPF_REG_0, 0), 187 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 188 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 189 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 190 + BPF_EXIT_INSN(), 191 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 192 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), 193 + BPF_EXIT_INSN(), 194 + }, 195 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 196 + .fixup_map_kptr = { 1 }, 197 + .result = REJECT, 198 + .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'", 199 + }, 200 + { 201 + "map_kptr: unref: correct in kernel type size", 202 + .insns = { 203 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 204 + BPF_LD_MAP_FD(BPF_REG_6, 0), 205 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 206 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 207 + BPF_MOV64_IMM(BPF_REG_0, 0), 208 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 209 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 210 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 211 + BPF_EXIT_INSN(), 212 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 213 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 214 + BPF_EXIT_INSN(), 215 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24), 216 + BPF_EXIT_INSN(), 217 + }, 218 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 219 + .fixup_map_kptr = { 1 }, 220 + .result = REJECT, 221 + .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8", 222 + }, 223 + { 224 + "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk", 225 + .insns = { 226 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 227 + BPF_LD_MAP_FD(BPF_REG_6, 0), 228 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 229 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 230 + BPF_MOV64_IMM(BPF_REG_0, 0), 231 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 232 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 233 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 234 + BPF_EXIT_INSN(), 235 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 236 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 237 + BPF_EXIT_INSN(), 238 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16), 239 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), 240 + BPF_EXIT_INSN(), 241 + }, 242 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 243 + .fixup_map_kptr = { 1 }, 244 + .result = REJECT, 245 + .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_", 246 + }, 247 + { 248 + "map_kptr: unref: no reference state created", 249 + .insns = { 250 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 251 + BPF_LD_MAP_FD(BPF_REG_6, 0), 252 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 253 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 254 + BPF_MOV64_IMM(BPF_REG_0, 0), 255 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 256 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 257 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 258 + BPF_EXIT_INSN(), 259 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 260 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 261 + BPF_EXIT_INSN(), 262 + BPF_EXIT_INSN(), 263 + }, 264 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 265 + .fixup_map_kptr = { 1 }, 266 + .result = ACCEPT, 267 + }, 268 + { 269 + "map_kptr: unref: bpf_kptr_xchg rejected", 270 + .insns = { 271 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 272 + BPF_LD_MAP_FD(BPF_REG_6, 0), 273 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 274 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 275 + BPF_MOV64_IMM(BPF_REG_0, 0), 276 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 277 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 278 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 279 + BPF_EXIT_INSN(), 280 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 281 + BPF_MOV64_IMM(BPF_REG_2, 0), 282 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), 283 + BPF_MOV64_IMM(BPF_REG_0, 0), 284 + BPF_EXIT_INSN(), 285 + }, 286 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 287 + .fixup_map_kptr = { 1 }, 288 + .result = REJECT, 289 + .errstr = "off=0 kptr isn't referenced kptr", 290 + }, 291 + { 292 + "map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected", 293 + .insns = { 294 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 295 + BPF_LD_MAP_FD(BPF_REG_6, 0), 296 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 297 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 298 + BPF_MOV64_IMM(BPF_REG_0, 0), 299 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 300 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 301 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 302 + BPF_EXIT_INSN(), 303 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 304 + BPF_MOV64_IMM(BPF_REG_2, 0), 305 + BPF_MOV64_IMM(BPF_REG_3, 0), 306 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 307 + BPF_MOV64_IMM(BPF_REG_0, 0), 308 + BPF_EXIT_INSN(), 309 + }, 310 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 311 + .fixup_map_kptr = { 1 }, 312 + .result = REJECT, 313 + .errstr = "arg#0 no referenced kptr at map value offset=0", 314 + .fixup_kfunc_btf_id = { 315 + { "bpf_kfunc_call_test_kptr_get", 13 }, 316 + } 317 + }, 318 + /* Tests for referenced PTR_TO_BTF_ID */ 319 + { 320 + "map_kptr: ref: loaded pointer marked as untrusted", 321 + .insns = { 322 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 323 + BPF_LD_MAP_FD(BPF_REG_6, 0), 324 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 325 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 326 + BPF_MOV64_IMM(BPF_REG_0, 0), 327 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 328 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 329 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 330 + BPF_EXIT_INSN(), 331 + BPF_MOV64_IMM(BPF_REG_1, 0), 332 + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8), 333 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr), 334 + BPF_EXIT_INSN(), 335 + }, 336 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 337 + .fixup_map_kptr = { 1 }, 338 + .result = REJECT, 339 + .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", 340 + }, 341 + { 342 + "map_kptr: ref: reject off != 0", 343 + .insns = { 344 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 345 + BPF_LD_MAP_FD(BPF_REG_6, 0), 346 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 347 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 348 + BPF_MOV64_IMM(BPF_REG_0, 0), 349 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 350 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 351 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 352 + BPF_EXIT_INSN(), 353 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), 354 + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), 355 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), 356 + BPF_MOV64_IMM(BPF_REG_2, 0), 357 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), 358 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 359 + BPF_EXIT_INSN(), 360 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), 361 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), 362 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), 363 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), 364 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), 365 + BPF_EXIT_INSN(), 366 + }, 367 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 368 + .fixup_map_kptr = { 1 }, 369 + .result = REJECT, 370 + .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member", 371 + }, 372 + { 373 + "map_kptr: ref: reference state created and released on xchg", 374 + .insns = { 375 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 376 + BPF_LD_MAP_FD(BPF_REG_6, 0), 377 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 378 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 379 + BPF_MOV64_IMM(BPF_REG_0, 0), 380 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 381 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 382 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 383 + BPF_EXIT_INSN(), 384 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), 385 + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), 386 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 387 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 388 + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), 389 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 390 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 391 + BPF_EXIT_INSN(), 392 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), 393 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), 394 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg), 395 + BPF_MOV64_IMM(BPF_REG_0, 0), 396 + BPF_EXIT_INSN(), 397 + }, 398 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 399 + .fixup_map_kptr = { 1 }, 400 + .result = REJECT, 401 + .errstr = "Unreleased reference id=5 alloc_insn=20", 402 + .fixup_kfunc_btf_id = { 403 + { "bpf_kfunc_call_test_acquire", 15 }, 404 + } 405 + }, 406 + { 407 + "map_kptr: ref: reject STX", 408 + .insns = { 409 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 410 + BPF_LD_MAP_FD(BPF_REG_6, 0), 411 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 412 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 413 + BPF_MOV64_IMM(BPF_REG_0, 0), 414 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 415 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 416 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 417 + BPF_EXIT_INSN(), 418 + BPF_MOV64_REG(BPF_REG_1, 0), 419 + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), 420 + BPF_EXIT_INSN(), 421 + }, 422 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 423 + .fixup_map_kptr = { 1 }, 424 + .result = REJECT, 425 + .errstr = "store to referenced kptr disallowed", 426 + }, 427 + { 428 + "map_kptr: ref: reject ST", 429 + .insns = { 430 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 431 + BPF_LD_MAP_FD(BPF_REG_6, 0), 432 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 433 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 434 + BPF_MOV64_IMM(BPF_REG_0, 0), 435 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 436 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 437 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 438 + BPF_EXIT_INSN(), 439 + BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0), 440 + BPF_EXIT_INSN(), 441 + }, 442 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 443 + .fixup_map_kptr = { 1 }, 444 + .result = REJECT, 445 + .errstr = "store to referenced kptr disallowed", 446 + }, 447 + { 448 + "map_kptr: reject helper access to kptr", 449 + .insns = { 450 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 451 + BPF_LD_MAP_FD(BPF_REG_6, 0), 452 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 453 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), 454 + BPF_MOV64_IMM(BPF_REG_0, 0), 455 + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), 456 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 457 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), 458 + BPF_EXIT_INSN(), 459 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), 460 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), 461 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), 462 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), 463 + BPF_EXIT_INSN(), 464 + }, 465 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 466 + .fixup_map_kptr = { 1 }, 467 + .result = REJECT, 468 + .errstr = "kptr cannot be accessed indirectly by helper", 469 + },

+1 -1

tools/testing/selftests/bpf/verifier/ref_tracking.c

··· 796 796 }, 797 797 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 798 798 .result = REJECT, 799 - .errstr = "reference has not been acquired before", 799 + .errstr = "R1 must be referenced when passed to release function", 800 800 }, 801 801 { 802 802 /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */

+3 -3

tools/testing/selftests/bpf/verifier/sock.c

··· 417 417 }, 418 418 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 419 419 .result = REJECT, 420 - .errstr = "reference has not been acquired before", 420 + .errstr = "R1 must be referenced when passed to release function", 421 421 }, 422 422 { 423 423 "bpf_sk_release(bpf_sk_fullsock(skb->sk))", ··· 436 436 }, 437 437 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 438 438 .result = REJECT, 439 - .errstr = "reference has not been acquired before", 439 + .errstr = "R1 must be referenced when passed to release function", 440 440 }, 441 441 { 442 442 "bpf_sk_release(bpf_tcp_sock(skb->sk))", ··· 455 455 }, 456 456 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 457 457 .result = REJECT, 458 - .errstr = "reference has not been acquired before", 458 + .errstr = "R1 must be referenced when passed to release function", 459 459 }, 460 460 { 461 461 "sk_storage_get(map, skb->sk, NULL, 0): value == NULL",

-1

tools/testing/selftests/bpf/xdp_redirect_multi.c

··· 10 10 #include <net/if.h> 11 11 #include <unistd.h> 12 12 #include <libgen.h> 13 - #include <sys/resource.h> 14 13 #include <sys/ioctl.h> 15 14 #include <sys/types.h> 16 15 #include <sys/socket.h>

+2 -6

tools/testing/selftests/bpf/xdping.c

··· 12 12 #include <string.h> 13 13 #include <unistd.h> 14 14 #include <libgen.h> 15 - #include <sys/resource.h> 16 15 #include <net/if.h> 17 16 #include <sys/types.h> 18 17 #include <sys/socket.h> ··· 88 89 { 89 90 __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE; 90 91 struct addrinfo *a, hints = { .ai_family = AF_INET }; 91 - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 92 92 __u16 count = XDPING_DEFAULT_COUNT; 93 93 struct pinginfo pinginfo = { 0 }; 94 94 const char *optstr = "c:I:NsS"; ··· 165 167 freeaddrinfo(a); 166 168 } 167 169 168 - if (setrlimit(RLIMIT_MEMLOCK, &r)) { 169 - perror("setrlimit(RLIMIT_MEMLOCK)"); 170 - return 1; 171 - } 170 + /* Use libbpf 1.0 API mode */ 171 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 172 172 173 173 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 174 174

+2 -4

tools/testing/selftests/bpf/xdpxceiver.c

··· 90 90 #include <string.h> 91 91 #include <stddef.h> 92 92 #include <sys/mman.h> 93 - #include <sys/resource.h> 94 93 #include <sys/types.h> 95 94 #include <sys/queue.h> 96 95 #include <time.h> ··· 1447 1448 1448 1449 int main(int argc, char **argv) 1449 1450 { 1450 - struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; 1451 1451 struct pkt_stream *pkt_stream_default; 1452 1452 struct ifobject *ifobj_tx, *ifobj_rx; 1453 1453 struct test_spec test; 1454 1454 u32 i, j; 1455 1455 1456 - if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) 1457 - exit_with_error(errno); 1456 + /* Use libbpf 1.0 API mode */ 1457 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 1458 1458 1459 1459 ifobj_tx = ifobject_create(); 1460 1460 if (!ifobj_tx)