Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-introduce-file-dynptr'

Mykyta Yatsenko says:

====================
bpf: Introduce file dynptr

From: Mykyta Yatsenko <yatsenko@meta.com>

This series adds a new dynptr kind, file dynptr, which enables BPF
programs to perform safe reads from files in a structured way.
Initial motivations include:
* Parsing the executable’s ELF to locate thread-local variable symbols
* Capturing stack traces when frame pointers are disabled

By leveraging the existing dynptr abstraction, we reuse the verifier’s
lifetime/size checks and keep the API consistent with existing dynptr
read helpers.

Technical details:
1. Reuses the existing freader library to read files a folio at a time.
2. bpf_dynptr_slice() and bpf_dynptr_read() always copy data from folios
into a program-provided buffer; zero-copy access is intentionally not
supported to keep it simple.
3. Reads may sleep if the requested folios are not in the page cache.
4. Few verifier changes required:
* Support dynptr destruction in kfuncs
* Add kfunc address substitution based on whether the program runs in
a sleepable or non-sleepable context.

Testing:
The final patch adds a selftest that validates BPF program reads the
same data as userspace, page faults are enabled in sleepable context and
disabled in non-sleepable.

Changelog:
---
v4 -> v5
v4: https://lore.kernel.org/all/20251021200334.220542-1-mykyta.yatsenko5@gmail.com/
* Inlined and removed kfunc_call_imm(), run overflow check for call_imm
only if !bpf_jit_supports_far_kfunc_call().

v3 -> v4
v3: https://lore.kernel.org/bpf/20251020222538.932915-1-mykyta.yatsenko5@gmail.com/
* Remove ringbuf usage from selftests
* bpf_dynptr_set_null(ptr) when discarding file dynptr
* call kfunc_call_imm() in specialize_kfunc() only, removed
call from add_kfunc_call()

v2 -> v3
v2: https://lore.kernel.org/bpf/20251015161155.120148-1-mykyta.yatsenko5@gmail.com/
* Add negative tests
* Rewrote tests to use LSM for bpf_get_task_exe_file()
* Move call_imm overflow check into kfunc_call_imm()

v1 -> v2
v1: https://lore.kernel.org/bpf/20251003160416.585080-1-mykyta.yatsenko5@gmail.com/
* Remove ELF parsing selftest
* Expanded u32 -> u64 refactoring, changes in include/uapi/linux/bpf.h
* Removed freader.{c,h}, instead move freader definitions into
buildid.h.
* Small refactoring of the multiple folios reading algorithm
* Directly return error after unmark_stack_slots_dynptr().
* Make kfuncs receive trusted arguments.
* Remove enum bpf_is_sleepable, use bool instead
* Remove unnecessary sorting from specialize_kfunc()
* Remove bool kfunc_in_sleepable_ctx; field from the struct
bpf_insn_aux_data, rely on non_sleepable field introduced by Kumar
* Refactor selftests, do madvise(...MADV_PAGEOUT) for all pages read by
the test
* Introduce the test for non-sleepable case, verify it fails with -EFAULT
====================

Link: https://lore.kernel.org/r/20251026203853.135105-1-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+648 -194
+1
MAINTAINERS
··· 4648 4648 F: arch/*/net/* 4649 4649 F: include/linux/bpf* 4650 4650 F: include/linux/btf* 4651 + F: include/linux/buildid.h 4651 4652 F: include/linux/filter.h 4652 4653 F: include/trace/events/xdp.h 4653 4654 F: include/uapi/linux/bpf*
+19 -11
include/linux/bpf.h
··· 670 670 671 671 void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); 672 672 673 + int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, 674 + struct bpf_dynptr *ptr__uninit); 675 + 673 676 extern const struct bpf_map_ops bpf_map_offload_ops; 674 677 675 678 /* bpf_type_flag contains a set of flags that are applicable to the values of ··· 795 792 /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ 796 793 DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), 797 794 795 + /* DYNPTR points to file */ 796 + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), 797 + 798 798 __BPF_TYPE_FLAG_MAX, 799 799 __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, 800 800 }; 801 801 802 802 #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ 803 - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) 803 + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) 804 804 805 805 /* Max number of base types. */ 806 806 #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) ··· 1391 1385 BPF_DYNPTR_TYPE_XDP, 1392 1386 /* Points to skb_metadata_end()-skb_metadata_len() */ 1393 1387 BPF_DYNPTR_TYPE_SKB_META, 1388 + /* Underlying data is a file */ 1389 + BPF_DYNPTR_TYPE_FILE, 1394 1390 }; 1395 1391 1396 - int bpf_dynptr_check_size(u32 size); 1397 - u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); 1398 - const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); 1399 - void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); 1392 + int bpf_dynptr_check_size(u64 size); 1393 + u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); 1394 + const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); 1395 + void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); 1400 1396 bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); 1401 - int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, 1402 - void *src, u32 len, u64 flags); 1403 - void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, 1404 - void *buffer__opt, u32 buffer__szk); 1397 + int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, 1398 + void *src, u64 len, u64 flags); 1399 + void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, 1400 + void *buffer__opt, u64 buffer__szk); 1405 1401 1406 - static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) 1402 + static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) 1407 1403 { 1408 - u32 size = __bpf_dynptr_size(ptr); 1404 + u64 size = __bpf_dynptr_size(ptr); 1409 1405 1410 1406 if (len > size || offset > size - len) 1411 1407 return -E2BIG;
+25
include/linux/buildid.h
··· 18 18 static inline void init_vmlinux_build_id(void) { } 19 19 #endif 20 20 21 + struct freader { 22 + void *buf; 23 + u32 buf_sz; 24 + int err; 25 + union { 26 + struct { 27 + struct file *file; 28 + struct folio *folio; 29 + void *addr; 30 + loff_t folio_off; 31 + bool may_fault; 32 + }; 33 + struct { 34 + const char *data; 35 + u64 data_sz; 36 + }; 37 + }; 38 + }; 39 + 40 + void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, 41 + struct file *file, bool may_fault); 42 + void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz); 43 + const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz); 44 + void freader_cleanup(struct freader *r); 45 + 21 46 #endif
+4 -4
include/uapi/linux/bpf.h
··· 5618 5618 * Return 5619 5619 * *sk* if casting is valid, or **NULL** otherwise. 5620 5620 * 5621 - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) 5621 + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) 5622 5622 * Description 5623 5623 * Get a dynptr to local memory *data*. 5624 5624 * ··· 5661 5661 * Return 5662 5662 * Nothing. Always succeeds. 5663 5663 * 5664 - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) 5664 + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) 5665 5665 * Description 5666 5666 * Read *len* bytes from *src* into *dst*, starting from *offset* 5667 5667 * into *src*. ··· 5671 5671 * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if 5672 5672 * *flags* is not 0. 5673 5673 * 5674 - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) 5674 + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) 5675 5675 * Description 5676 5676 * Write *len* bytes from *src* into *dst*, starting from *offset* 5677 5677 * into *dst*. ··· 5692 5692 * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, 5693 5693 * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). 5694 5694 * 5695 - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) 5695 + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) 5696 5696 * Description 5697 5697 * Get a pointer to the underlying dynptr data. 5698 5698 *
+139 -34
kernel/bpf/helpers.c
··· 28 28 #include <linux/verification.h> 29 29 #include <linux/task_work.h> 30 30 #include <linux/irq_work.h> 31 + #include <linux/buildid.h> 31 32 32 33 #include "../../lib/kstrtox.h" 33 34 ··· 1657 1656 .arg2_btf_id = BPF_PTR_POISON, 1658 1657 }; 1659 1658 1659 + struct bpf_dynptr_file_impl { 1660 + struct freader freader; 1661 + /* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */ 1662 + u64 offset; 1663 + u64 size; 1664 + }; 1665 + 1660 1666 /* Since the upper 8 bits of dynptr->size is reserved, the 1661 1667 * maximum supported size is 2^24 - 1. 1662 1668 */ ··· 1692 1684 return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT; 1693 1685 } 1694 1686 1695 - u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) 1687 + u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) 1696 1688 { 1689 + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { 1690 + struct bpf_dynptr_file_impl *df = ptr->data; 1691 + 1692 + return df->size; 1693 + } 1694 + 1697 1695 return ptr->size & DYNPTR_SIZE_MASK; 1698 1696 } 1699 1697 1700 - static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size) 1698 + static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off) 1699 + { 1700 + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { 1701 + struct bpf_dynptr_file_impl *df = ptr->data; 1702 + 1703 + df->offset += off; 1704 + return; 1705 + } 1706 + ptr->offset += off; 1707 + } 1708 + 1709 + static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size) 1701 1710 { 1702 1711 u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK; 1703 1712 1704 - ptr->size = new_size | metadata; 1713 + if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) { 1714 + struct bpf_dynptr_file_impl *df = ptr->data; 1715 + 1716 + df->size = new_size; 1717 + return; 1718 + } 1719 + ptr->size = (u32)new_size | metadata; 1705 1720 } 1706 1721 1707 - int bpf_dynptr_check_size(u32 size) 1722 + int bpf_dynptr_check_size(u64 size) 1708 1723 { 1709 1724 return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; 1725 + } 1726 + 1727 + static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len) 1728 + { 1729 + const void *ptr; 1730 + 1731 + if (!buf) 1732 + return -EINVAL; 1733 + 1734 + df->freader.buf = buf; 1735 + df->freader.buf_sz = len; 1736 + ptr = freader_fetch(&df->freader, offset + df->offset, len); 1737 + if (!ptr) 1738 + return df->freader.err; 1739 + 1740 + if (ptr != buf) /* Force copying into the buffer */ 1741 + memcpy(buf, ptr, len); 1742 + 1743 + return 0; 1710 1744 } 1711 1745 1712 1746 void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, ··· 1765 1715 memset(ptr, 0, sizeof(*ptr)); 1766 1716 } 1767 1717 1768 - BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) 1718 + BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u64, size, u64, flags, struct bpf_dynptr_kern *, ptr) 1769 1719 { 1770 1720 int err; 1771 1721 ··· 1800 1750 .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, 1801 1751 }; 1802 1752 1803 - static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src, 1804 - u32 offset, u64 flags) 1753 + static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src, 1754 + u64 offset, u64 flags) 1805 1755 { 1806 1756 enum bpf_dynptr_type type; 1807 1757 int err; ··· 1831 1781 case BPF_DYNPTR_TYPE_SKB_META: 1832 1782 memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len); 1833 1783 return 0; 1784 + case BPF_DYNPTR_TYPE_FILE: 1785 + return bpf_file_fetch_bytes(src->data, offset, dst, len); 1834 1786 default: 1835 1787 WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type); 1836 1788 return -EFAULT; 1837 1789 } 1838 1790 } 1839 1791 1840 - BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, 1841 - u32, offset, u64, flags) 1792 + BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src, 1793 + u64, offset, u64, flags) 1842 1794 { 1843 1795 return __bpf_dynptr_read(dst, len, src, offset, flags); 1844 1796 } ··· 1856 1804 .arg5_type = ARG_ANYTHING, 1857 1805 }; 1858 1806 1859 - int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src, 1860 - u32 len, u64 flags) 1807 + int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src, 1808 + u64 len, u64 flags) 1861 1809 { 1862 1810 enum bpf_dynptr_type type; 1863 1811 int err; ··· 1900 1848 } 1901 1849 } 1902 1850 1903 - BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src, 1904 - u32, len, u64, flags) 1851 + BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u64, offset, void *, src, 1852 + u64, len, u64, flags) 1905 1853 { 1906 1854 return __bpf_dynptr_write(dst, offset, src, len, flags); 1907 1855 } ··· 1917 1865 .arg5_type = ARG_ANYTHING, 1918 1866 }; 1919 1867 1920 - BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) 1868 + BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u64, offset, u64, len) 1921 1869 { 1922 1870 enum bpf_dynptr_type type; 1923 1871 int err; ··· 2732 2680 * provided buffer, with its contents containing the data, if unable to obtain 2733 2681 * direct pointer) 2734 2682 */ 2735 - __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, 2736 - void *buffer__opt, u32 buffer__szk) 2683 + __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset, 2684 + void *buffer__opt, u64 buffer__szk) 2737 2685 { 2738 2686 const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; 2739 2687 enum bpf_dynptr_type type; 2740 - u32 len = buffer__szk; 2688 + u64 len = buffer__szk; 2741 2689 int err; 2742 2690 2743 2691 if (!ptr->data) ··· 2771 2719 } 2772 2720 case BPF_DYNPTR_TYPE_SKB_META: 2773 2721 return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset); 2722 + case BPF_DYNPTR_TYPE_FILE: 2723 + err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk); 2724 + return err ? NULL : buffer__opt; 2774 2725 default: 2775 2726 WARN_ONCE(true, "unknown dynptr type %d\n", type); 2776 2727 return NULL; ··· 2822 2767 * provided buffer, with its contents containing the data, if unable to obtain 2823 2768 * direct pointer) 2824 2769 */ 2825 - __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, 2826 - void *buffer__opt, u32 buffer__szk) 2770 + __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, 2771 + void *buffer__opt, u64 buffer__szk) 2827 2772 { 2828 2773 const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; 2829 2774 ··· 2855 2800 return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); 2856 2801 } 2857 2802 2858 - __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) 2803 + __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end) 2859 2804 { 2860 2805 struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; 2861 - u32 size; 2806 + u64 size; 2862 2807 2863 2808 if (!ptr->data || start > end) 2864 2809 return -EINVAL; ··· 2868 2813 if (start > size || end > size) 2869 2814 return -ERANGE; 2870 2815 2871 - ptr->offset += start; 2816 + bpf_dynptr_advance_offset(ptr, start); 2872 2817 bpf_dynptr_set_size(ptr, end - start); 2873 2818 2874 2819 return 0; ··· 2891 2836 return __bpf_dynptr_is_rdonly(ptr); 2892 2837 } 2893 2838 2894 - __bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) 2839 + __bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p) 2895 2840 { 2896 2841 struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; 2897 2842 ··· 2928 2873 * Copies data from source dynptr to destination dynptr. 2929 2874 * Returns 0 on success; negative error, otherwise. 2930 2875 */ 2931 - __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, 2932 - struct bpf_dynptr *src_ptr, u32 src_off, u32 size) 2876 + __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off, 2877 + struct bpf_dynptr *src_ptr, u64 src_off, u64 size) 2933 2878 { 2934 2879 struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; 2935 2880 struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; 2936 2881 void *src_slice, *dst_slice; 2937 2882 char buf[256]; 2938 - u32 off; 2883 + u64 off; 2939 2884 2940 2885 src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size); 2941 2886 dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size); ··· 2957 2902 2958 2903 off = 0; 2959 2904 while (off < size) { 2960 - u32 chunk_sz = min_t(u32, sizeof(buf), size - off); 2905 + u64 chunk_sz = min_t(u64, sizeof(buf), size - off); 2961 2906 int err; 2962 2907 2963 2908 err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0); ··· 2983 2928 * at @offset with the constant byte @val. 2984 2929 * Returns 0 on success; negative error, otherwise. 2985 2930 */ 2986 - __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val) 2987 - { 2931 + __bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val) 2932 + { 2988 2933 struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; 2989 - u32 chunk_sz, write_off; 2934 + u64 chunk_sz, write_off; 2990 2935 char buf[256]; 2991 2936 void* slice; 2992 2937 int err; ··· 3005 2950 return err; 3006 2951 3007 2952 /* Non-linear data under the dynptr, write from a local buffer */ 3008 - chunk_sz = min_t(u32, sizeof(buf), size); 2953 + chunk_sz = min_t(u64, sizeof(buf), size); 3009 2954 memset(buf, val, chunk_sz); 3010 2955 3011 2956 for (write_off = 0; write_off < size; write_off += chunk_sz) { 3012 - chunk_sz = min_t(u32, sizeof(buf), size - write_off); 2957 + chunk_sz = min_t(u64, sizeof(buf), size - write_off); 3013 2958 err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0); 3014 2959 if (err) 3015 2960 return err; ··· 4307 4252 return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); 4308 4253 } 4309 4254 4255 + static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep, 4256 + struct bpf_dynptr_kern *ptr) 4257 + { 4258 + struct bpf_dynptr_file_impl *state; 4259 + 4260 + /* flags is currently unsupported */ 4261 + if (flags) { 4262 + bpf_dynptr_set_null(ptr); 4263 + return -EINVAL; 4264 + } 4265 + 4266 + state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl)); 4267 + if (!state) { 4268 + bpf_dynptr_set_null(ptr); 4269 + return -ENOMEM; 4270 + } 4271 + state->offset = 0; 4272 + state->size = U64_MAX; /* Don't restrict size, as file may change anyways */ 4273 + freader_init_from_file(&state->freader, NULL, 0, file, may_sleep); 4274 + bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0); 4275 + bpf_dynptr_set_rdonly(ptr); 4276 + return 0; 4277 + } 4278 + 4279 + __bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) 4280 + { 4281 + return make_file_dynptr(file, flags, false, (struct bpf_dynptr_kern *)ptr__uninit); 4282 + } 4283 + 4284 + int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) 4285 + { 4286 + return make_file_dynptr(file, flags, true, (struct bpf_dynptr_kern *)ptr__uninit); 4287 + } 4288 + 4289 + __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) 4290 + { 4291 + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr; 4292 + struct bpf_dynptr_file_impl *df = ptr->data; 4293 + 4294 + if (!df) 4295 + return 0; 4296 + 4297 + freader_cleanup(&df->freader); 4298 + bpf_mem_free(&bpf_global_ma, df); 4299 + bpf_dynptr_set_null(ptr); 4300 + return 0; 4301 + } 4302 + 4310 4303 __bpf_kfunc_end_defs(); 4311 4304 4312 4305 static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work) ··· 4532 4429 BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) 4533 4430 BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) 4534 4431 BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) 4432 + BTF_ID_FLAGS(func, bpf_dynptr_from_file, KF_TRUSTED_ARGS) 4433 + BTF_ID_FLAGS(func, bpf_dynptr_file_discard) 4535 4434 BTF_KFUNCS_END(common_btf_ids) 4536 4435 4537 4436 static const struct btf_kfunc_id_set common_kfunc_set = { ··· 4574 4469 /* Get a pointer to dynptr data up to len bytes for read only access. If 4575 4470 * the dynptr doesn't have continuous data up to len bytes, return NULL. 4576 4471 */ 4577 - const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) 4472 + const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len) 4578 4473 { 4579 4474 const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; 4580 4475 ··· 4585 4480 * the dynptr doesn't have continuous data up to len bytes, or the dynptr 4586 4481 * is read only, return NULL. 4587 4482 */ 4588 - void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) 4483 + void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len) 4589 4484 { 4590 4485 if (__bpf_dynptr_is_rdonly(ptr)) 4591 4486 return NULL;
+2
kernel/bpf/log.c
··· 500 500 return "xdp"; 501 501 case BPF_DYNPTR_TYPE_SKB_META: 502 502 return "skb_meta"; 503 + case BPF_DYNPTR_TYPE_FILE: 504 + return "file"; 503 505 case BPF_DYNPTR_TYPE_INVALID: 504 506 return "<invalid>"; 505 507 default:
+91 -62
kernel/bpf/verifier.c
··· 209 209 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); 210 210 static int ref_set_non_owning(struct bpf_verifier_env *env, 211 211 struct bpf_reg_state *reg); 212 - static void specialize_kfunc(struct bpf_verifier_env *env, 213 - u32 func_id, u16 offset, unsigned long *addr); 214 212 static bool is_trusted_reg(const struct bpf_reg_state *reg); 215 213 216 214 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) ··· 690 692 return BPF_DYNPTR_TYPE_XDP; 691 693 case DYNPTR_TYPE_SKB_META: 692 694 return BPF_DYNPTR_TYPE_SKB_META; 695 + case DYNPTR_TYPE_FILE: 696 + return BPF_DYNPTR_TYPE_FILE; 693 697 default: 694 698 return BPF_DYNPTR_TYPE_INVALID; 695 699 } ··· 710 710 return DYNPTR_TYPE_XDP; 711 711 case BPF_DYNPTR_TYPE_SKB_META: 712 712 return DYNPTR_TYPE_SKB_META; 713 + case BPF_DYNPTR_TYPE_FILE: 714 + return DYNPTR_TYPE_FILE; 713 715 default: 714 716 return 0; 715 717 } ··· 719 717 720 718 static bool dynptr_type_refcounted(enum bpf_dynptr_type type) 721 719 { 722 - return type == BPF_DYNPTR_TYPE_RINGBUF; 720 + return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; 723 721 } 724 722 725 723 static void __mark_dynptr_reg(struct bpf_reg_state *reg, ··· 830 828 struct bpf_func_state *state = func(env, reg); 831 829 int spi, ref_obj_id, i; 832 830 831 + /* 832 + * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot 833 + * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr 834 + * is safe to do directly. 835 + */ 836 + if (reg->type == CONST_PTR_TO_DYNPTR) { 837 + verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); 838 + return -EFAULT; 839 + } 833 840 spi = dynptr_get_spi(env, reg); 834 841 if (spi < 0) 835 842 return spi; ··· 3124 3113 u32 nr_descs; 3125 3114 }; 3126 3115 3116 + static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, 3117 + int insn_idx); 3118 + 3127 3119 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) 3128 3120 { 3129 3121 const struct bpf_kfunc_desc *d0 = a; ··· 3144 3130 return d0->offset - d1->offset; 3145 3131 } 3146 3132 3147 - static const struct bpf_kfunc_desc * 3133 + static struct bpf_kfunc_desc * 3148 3134 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) 3149 3135 { 3150 3136 struct bpf_kfunc_desc desc = { ··· 3267 3253 { 3268 3254 const struct btf_type *func, *func_proto; 3269 3255 struct bpf_kfunc_btf_tab *btf_tab; 3256 + struct btf_func_model func_model; 3270 3257 struct bpf_kfunc_desc_tab *tab; 3271 3258 struct bpf_prog_aux *prog_aux; 3272 3259 struct bpf_kfunc_desc *desc; 3273 3260 const char *func_name; 3274 3261 struct btf *desc_btf; 3275 - unsigned long call_imm; 3276 3262 unsigned long addr; 3277 3263 int err; 3278 3264 ··· 3356 3342 func_name); 3357 3343 return -EINVAL; 3358 3344 } 3359 - specialize_kfunc(env, func_id, offset, &addr); 3360 - 3361 - if (bpf_jit_supports_far_kfunc_call()) { 3362 - call_imm = func_id; 3363 - } else { 3364 - call_imm = BPF_CALL_IMM(addr); 3365 - /* Check whether the relative offset overflows desc->imm */ 3366 - if ((unsigned long)(s32)call_imm != call_imm) { 3367 - verbose(env, "address of kernel function %s is out of range\n", 3368 - func_name); 3369 - return -EINVAL; 3370 - } 3371 - } 3372 3345 3373 3346 if (bpf_dev_bound_kfunc_id(func_id)) { 3374 3347 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); ··· 3363 3362 return err; 3364 3363 } 3365 3364 3366 - desc = &tab->descs[tab->nr_descs++]; 3367 - desc->func_id = func_id; 3368 - desc->imm = call_imm; 3369 - desc->offset = offset; 3370 - desc->addr = addr; 3371 3365 err = btf_distill_func_proto(&env->log, desc_btf, 3372 3366 func_proto, func_name, 3373 - &desc->func_model); 3374 - if (!err) 3375 - sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 3376 - kfunc_desc_cmp_by_id_off, NULL); 3377 - return err; 3367 + &func_model); 3368 + if (err) 3369 + return err; 3370 + 3371 + desc = &tab->descs[tab->nr_descs++]; 3372 + desc->func_id = func_id; 3373 + desc->offset = offset; 3374 + desc->addr = addr; 3375 + desc->func_model = func_model; 3376 + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 3377 + kfunc_desc_cmp_by_id_off, NULL); 3378 + return 0; 3378 3379 } 3379 3380 3380 3381 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b) ··· 11517 11514 11518 11515 if (meta.release_regno) { 11519 11516 err = -EINVAL; 11520 - /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot 11521 - * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr 11522 - * is safe to do directly. 11523 - */ 11524 11517 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) { 11525 - if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) { 11526 - verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released"); 11527 - return -EFAULT; 11528 - } 11529 11518 err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]); 11530 11519 } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) { 11531 11520 u32 ref_obj_id = meta.ref_obj_id; ··· 12285 12290 KF_bpf_res_spin_unlock, 12286 12291 KF_bpf_res_spin_lock_irqsave, 12287 12292 KF_bpf_res_spin_unlock_irqrestore, 12293 + KF_bpf_dynptr_from_file, 12294 + KF_bpf_dynptr_file_discard, 12288 12295 KF___bpf_trap, 12289 12296 KF_bpf_task_work_schedule_signal, 12290 12297 KF_bpf_task_work_schedule_resume, ··· 12359 12362 BTF_ID(func, bpf_res_spin_unlock) 12360 12363 BTF_ID(func, bpf_res_spin_lock_irqsave) 12361 12364 BTF_ID(func, bpf_res_spin_unlock_irqrestore) 12365 + BTF_ID(func, bpf_dynptr_from_file) 12366 + BTF_ID(func, bpf_dynptr_file_discard) 12362 12367 BTF_ID(func, __bpf_trap) 12363 12368 BTF_ID(func, bpf_task_work_schedule_signal) 12364 12369 BTF_ID(func, bpf_task_work_schedule_resume) ··· 13324 13325 dynptr_arg_type |= DYNPTR_TYPE_XDP; 13325 13326 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { 13326 13327 dynptr_arg_type |= DYNPTR_TYPE_SKB_META; 13328 + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 13329 + dynptr_arg_type |= DYNPTR_TYPE_FILE; 13330 + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { 13331 + dynptr_arg_type |= DYNPTR_TYPE_FILE; 13332 + meta->release_regno = regno; 13327 13333 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && 13328 13334 (dynptr_arg_type & MEM_UNINIT)) { 13329 13335 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; ··· 14009 14005 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. 14010 14006 */ 14011 14007 if (meta.release_regno) { 14012 - err = release_reference(env, regs[meta.release_regno].ref_obj_id); 14013 - if (err) { 14014 - verbose(env, "kfunc %s#%d reference has not been acquired before\n", 14015 - func_name, meta.func_id); 14016 - return err; 14008 + struct bpf_reg_state *reg = &regs[meta.release_regno]; 14009 + 14010 + if (meta.initialized_dynptr.ref_obj_id) { 14011 + err = unmark_stack_slots_dynptr(env, reg); 14012 + } else { 14013 + err = release_reference(env, reg->ref_obj_id); 14014 + if (err) 14015 + verbose(env, "kfunc %s#%d reference has not been acquired before\n", 14016 + func_name, meta.func_id); 14017 14017 } 14018 + if (err) 14019 + return err; 14018 14020 } 14019 14021 14020 14022 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || ··· 21870 21860 } 21871 21861 21872 21862 /* replace a generic kfunc with a specialized version if necessary */ 21873 - static void specialize_kfunc(struct bpf_verifier_env *env, 21874 - u32 func_id, u16 offset, unsigned long *addr) 21863 + static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx) 21875 21864 { 21876 21865 struct bpf_prog *prog = env->prog; 21877 21866 bool seen_direct_write; 21878 21867 void *xdp_kfunc; 21879 21868 bool is_rdonly; 21869 + u32 func_id = desc->func_id; 21870 + u16 offset = desc->offset; 21871 + unsigned long addr = desc->addr, call_imm; 21872 + 21873 + if (offset) /* return if module BTF is used */ 21874 + goto set_imm; 21880 21875 21881 21876 if (bpf_dev_bound_kfunc_id(func_id)) { 21882 21877 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); 21883 - if (xdp_kfunc) { 21884 - *addr = (unsigned long)xdp_kfunc; 21885 - return; 21886 - } 21878 + if (xdp_kfunc) 21879 + addr = (unsigned long)xdp_kfunc; 21887 21880 /* fallback to default kfunc when not supported by netdev */ 21888 - } 21889 - 21890 - if (offset) 21891 - return; 21892 - 21893 - if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 21881 + } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 21894 21882 seen_direct_write = env->seen_direct_write; 21895 21883 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); 21896 21884 21897 21885 if (is_rdonly) 21898 - *addr = (unsigned long)bpf_dynptr_from_skb_rdonly; 21886 + addr = (unsigned long)bpf_dynptr_from_skb_rdonly; 21899 21887 21900 21888 /* restore env->seen_direct_write to its original value, since 21901 21889 * may_access_direct_pkt_data mutates it 21902 21890 */ 21903 21891 env->seen_direct_write = seen_direct_write; 21892 + } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) { 21893 + if (bpf_lsm_has_d_inode_locked(prog)) 21894 + addr = (unsigned long)bpf_set_dentry_xattr_locked; 21895 + } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { 21896 + if (bpf_lsm_has_d_inode_locked(prog)) 21897 + addr = (unsigned long)bpf_remove_dentry_xattr_locked; 21898 + } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 21899 + if (!env->insn_aux_data[insn_idx].non_sleepable) 21900 + addr = (unsigned long)bpf_dynptr_from_file_sleepable; 21904 21901 } 21905 21902 21906 - if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] && 21907 - bpf_lsm_has_d_inode_locked(prog)) 21908 - *addr = (unsigned long)bpf_set_dentry_xattr_locked; 21909 - 21910 - if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] && 21911 - bpf_lsm_has_d_inode_locked(prog)) 21912 - *addr = (unsigned long)bpf_remove_dentry_xattr_locked; 21903 + set_imm: 21904 + if (bpf_jit_supports_far_kfunc_call()) { 21905 + call_imm = func_id; 21906 + } else { 21907 + call_imm = BPF_CALL_IMM(addr); 21908 + /* Check whether the relative offset overflows desc->imm */ 21909 + if ((unsigned long)(s32)call_imm != call_imm) { 21910 + verbose(env, "address of kernel func_id %u is out of range\n", func_id); 21911 + return -EINVAL; 21912 + } 21913 + } 21914 + desc->imm = call_imm; 21915 + desc->addr = addr; 21916 + return 0; 21913 21917 } 21914 21918 21915 21919 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, ··· 21946 21922 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 21947 21923 struct bpf_insn *insn_buf, int insn_idx, int *cnt) 21948 21924 { 21949 - const struct bpf_kfunc_desc *desc; 21925 + struct bpf_kfunc_desc *desc; 21926 + int err; 21950 21927 21951 21928 if (!insn->imm) { 21952 21929 verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); ··· 21966 21941 insn->imm); 21967 21942 return -EFAULT; 21968 21943 } 21944 + 21945 + err = specialize_kfunc(env, desc, insn_idx); 21946 + if (err) 21947 + return err; 21969 21948 21970 21949 if (!bpf_jit_supports_far_kfunc_call()) 21971 21950 insn->imm = BPF_CALL_IMM(desc->addr);
+23 -23
kernel/trace/bpf_trace.c
··· 3372 3372 * direct calls into all the specific callback implementations 3373 3373 * (copy_user_data_sleepable, copy_user_data_nofault, and so on) 3374 3374 */ 3375 - static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size, 3375 + static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size, 3376 3376 const void *unsafe_src, 3377 3377 copy_fn_t str_copy_fn, 3378 3378 struct task_struct *tsk) 3379 3379 { 3380 3380 struct bpf_dynptr_kern *dst; 3381 - u32 chunk_sz, off; 3381 + u64 chunk_sz, off; 3382 3382 void *dst_slice; 3383 3383 int cnt, err; 3384 3384 char buf[256]; ··· 3392 3392 return -E2BIG; 3393 3393 3394 3394 for (off = 0; off < size; off += chunk_sz - 1) { 3395 - chunk_sz = min_t(u32, sizeof(buf), size - off); 3395 + chunk_sz = min_t(u64, sizeof(buf), size - off); 3396 3396 /* Expect str_copy_fn to return count of copied bytes, including 3397 3397 * zero terminator. Next iteration increment off by chunk_sz - 1 to 3398 3398 * overwrite NUL. ··· 3409 3409 return off; 3410 3410 } 3411 3411 3412 - static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff, 3413 - u32 size, const void *unsafe_src, 3412 + static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff, 3413 + u64 size, const void *unsafe_src, 3414 3414 copy_fn_t copy_fn, struct task_struct *tsk) 3415 3415 { 3416 3416 struct bpf_dynptr_kern *dst; 3417 3417 void *dst_slice; 3418 3418 char buf[256]; 3419 - u32 off, chunk_sz; 3419 + u64 off, chunk_sz; 3420 3420 int err; 3421 3421 3422 3422 dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size); ··· 3428 3428 return -E2BIG; 3429 3429 3430 3430 for (off = 0; off < size; off += chunk_sz) { 3431 - chunk_sz = min_t(u32, sizeof(buf), size - off); 3431 + chunk_sz = min_t(u64, sizeof(buf), size - off); 3432 3432 err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk); 3433 3433 if (err) 3434 3434 return err; ··· 3514 3514 return bpf_send_signal_common(sig, type, task, value); 3515 3515 } 3516 3516 3517 - __bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off, 3518 - u32 size, const void __user *unsafe_ptr__ign) 3517 + __bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off, 3518 + u64 size, const void __user *unsafe_ptr__ign) 3519 3519 { 3520 3520 return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, 3521 3521 copy_user_data_nofault, NULL); 3522 3522 } 3523 3523 3524 - __bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off, 3525 - u32 size, const void *unsafe_ptr__ign) 3524 + __bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off, 3525 + u64 size, const void *unsafe_ptr__ign) 3526 3526 { 3527 3527 return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign, 3528 3528 copy_kernel_data_nofault, NULL); 3529 3529 } 3530 3530 3531 - __bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, 3532 - u32 size, const void __user *unsafe_ptr__ign) 3531 + __bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, 3532 + u64 size, const void __user *unsafe_ptr__ign) 3533 3533 { 3534 3534 return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, 3535 3535 copy_user_str_nofault, NULL); 3536 3536 } 3537 3537 3538 - __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off, 3539 - u32 size, const void *unsafe_ptr__ign) 3538 + __bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off, 3539 + u64 size, const void *unsafe_ptr__ign) 3540 3540 { 3541 3541 return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign, 3542 3542 copy_kernel_str_nofault, NULL); 3543 3543 } 3544 3544 3545 - __bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off, 3546 - u32 size, const void __user *unsafe_ptr__ign) 3545 + __bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off, 3546 + u64 size, const void __user *unsafe_ptr__ign) 3547 3547 { 3548 3548 return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, 3549 3549 copy_user_data_sleepable, NULL); 3550 3550 } 3551 3551 3552 - __bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off, 3553 - u32 size, const void __user *unsafe_ptr__ign) 3552 + __bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off, 3553 + u64 size, const void __user *unsafe_ptr__ign) 3554 3554 { 3555 3555 return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign, 3556 3556 copy_user_str_sleepable, NULL); 3557 3557 } 3558 3558 3559 - __bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off, 3560 - u32 size, const void __user *unsafe_ptr__ign, 3559 + __bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off, 3560 + u64 size, const void __user *unsafe_ptr__ign, 3561 3561 struct task_struct *tsk) 3562 3562 { 3563 3563 return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign, 3564 3564 copy_user_data_sleepable, tsk); 3565 3565 } 3566 3566 3567 - __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off, 3568 - u32 size, const void __user *unsafe_ptr__ign, 3567 + __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off, 3568 + u64 size, const void __user *unsafe_ptr__ign, 3569 3569 struct task_struct *tsk) 3570 3570 { 3571 3571 return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+18 -34
lib/buildid.c
··· 11 11 12 12 #define MAX_PHDR_CNT 256 13 13 14 - struct freader { 15 - void *buf; 16 - u32 buf_sz; 17 - int err; 18 - union { 19 - struct { 20 - struct file *file; 21 - struct folio *folio; 22 - void *addr; 23 - loff_t folio_off; 24 - bool may_fault; 25 - }; 26 - struct { 27 - const char *data; 28 - u64 data_sz; 29 - }; 30 - }; 31 - }; 32 - 33 - static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, 34 - struct file *file, bool may_fault) 14 + void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, 15 + struct file *file, bool may_fault) 35 16 { 36 17 memset(r, 0, sizeof(*r)); 37 18 r->buf = buf; ··· 21 40 r->may_fault = may_fault; 22 41 } 23 42 24 - static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) 43 + void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) 25 44 { 26 45 memset(r, 0, sizeof(*r)); 27 46 r->data = data; ··· 73 92 return 0; 74 93 } 75 94 76 - static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) 95 + const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) 77 96 { 78 97 size_t folio_sz; 79 98 ··· 108 127 */ 109 128 folio_sz = folio_size(r->folio); 110 129 if (file_off + sz > r->folio_off + folio_sz) { 111 - int part_sz = r->folio_off + folio_sz - file_off; 130 + u64 part_sz = r->folio_off + folio_sz - file_off, off; 112 131 113 - /* copy the part that resides in the current folio */ 114 - memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz); 132 + memcpy(r->buf, r->addr + file_off - r->folio_off, part_sz); 133 + off = part_sz; 115 134 116 - /* fetch next folio */ 117 - r->err = freader_get_folio(r, r->folio_off + folio_sz); 118 - if (r->err) 119 - return NULL; 120 - 121 - /* copy the rest of requested data */ 122 - memcpy(r->buf + part_sz, r->addr, sz - part_sz); 135 + while (off < sz) { 136 + /* fetch next folio */ 137 + r->err = freader_get_folio(r, r->folio_off + folio_sz); 138 + if (r->err) 139 + return NULL; 140 + folio_sz = folio_size(r->folio); 141 + part_sz = min_t(u64, sz - off, folio_sz); 142 + memcpy(r->buf + off, r->addr, part_sz); 143 + off += part_sz; 144 + } 123 145 124 146 return r->buf; 125 147 } ··· 131 147 return r->addr + (file_off - r->folio_off); 132 148 } 133 149 134 - static void freader_cleanup(struct freader *r) 150 + void freader_cleanup(struct freader *r) 135 151 { 136 152 if (!r->buf) 137 153 return; /* non-file-backed mode */
+4 -4
tools/include/uapi/linux/bpf.h
··· 5618 5618 * Return 5619 5619 * *sk* if casting is valid, or **NULL** otherwise. 5620 5620 * 5621 - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) 5621 + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) 5622 5622 * Description 5623 5623 * Get a dynptr to local memory *data*. 5624 5624 * ··· 5661 5661 * Return 5662 5662 * Nothing. Always succeeds. 5663 5663 * 5664 - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) 5664 + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) 5665 5665 * Description 5666 5666 * Read *len* bytes from *src* into *dst*, starting from *offset* 5667 5667 * into *src*. ··· 5671 5671 * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if 5672 5672 * *flags* is not 0. 5673 5673 * 5674 - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) 5674 + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) 5675 5675 * Description 5676 5676 * Write *len* bytes from *src* into *dst*, starting from *offset* 5677 5677 * into *dst*. ··· 5692 5692 * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, 5693 5693 * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). 5694 5694 * 5695 - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) 5695 + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) 5696 5696 * Description 5697 5697 * Get a pointer to the underlying dynptr data. 5698 5698 *
+6 -6
tools/testing/selftests/bpf/bpf_kfuncs.h
··· 28 28 * Either a direct pointer to the dynptr data or a pointer to the user-provided 29 29 * buffer if unable to obtain a direct pointer 30 30 */ 31 - extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, 32 - void *buffer, __u32 buffer__szk) __ksym __weak; 31 + extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, 32 + void *buffer, __u64 buffer__szk) __ksym __weak; 33 33 34 34 /* Description 35 35 * Obtain a read-write pointer to the dynptr's data ··· 37 37 * Either a direct pointer to the dynptr data or a pointer to the user-provided 38 38 * buffer if unable to obtain a direct pointer 39 39 */ 40 - extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, 41 - void *buffer, __u32 buffer__szk) __ksym __weak; 40 + extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer, 41 + __u64 buffer__szk) __ksym __weak; 42 42 43 - extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; 43 + extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; 44 44 extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; 45 45 extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; 46 - extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; 46 + extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; 47 47 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; 48 48 49 49 /* Description
+113
tools/testing/selftests/bpf/prog_tests/file_reader.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <test_progs.h> 5 + #include <network_helpers.h> 6 + #include "file_reader.skel.h" 7 + #include "file_reader_fail.skel.h" 8 + #include <dlfcn.h> 9 + #include <sys/mman.h> 10 + 11 + const char *user_ptr = "hello world"; 12 + char file_contents[256000]; 13 + 14 + void *get_executable_base_addr(void) 15 + { 16 + Dl_info info; 17 + 18 + if (!dladdr((void *)&get_executable_base_addr, &info)) { 19 + fprintf(stderr, "dladdr failed\n"); 20 + return NULL; 21 + } 22 + 23 + return info.dli_fbase; 24 + } 25 + 26 + static int initialize_file_contents(void) 27 + { 28 + int fd, page_sz = sysconf(_SC_PAGESIZE); 29 + ssize_t n = 0, cur, off; 30 + void *addr; 31 + 32 + fd = open("/proc/self/exe", O_RDONLY); 33 + if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n")) 34 + return 1; 35 + 36 + do { 37 + cur = read(fd, file_contents + n, sizeof(file_contents) - n); 38 + if (!ASSERT_GT(cur, 0, "read success")) 39 + break; 40 + n += cur; 41 + } while (n < sizeof(file_contents)); 42 + 43 + close(fd); 44 + 45 + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) 46 + return 1; 47 + 48 + addr = get_executable_base_addr(); 49 + if (!ASSERT_NEQ(addr, NULL, "get executable address")) 50 + return 1; 51 + 52 + /* page-align base file address */ 53 + addr = (void *)((unsigned long)addr & ~(page_sz - 1)); 54 + 55 + for (off = 0; off < sizeof(file_contents); off += page_sz) { 56 + if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT), 57 + "madvise pageout")) 58 + return errno; 59 + } 60 + 61 + return 0; 62 + } 63 + 64 + static void run_test(const char *prog_name) 65 + { 66 + struct file_reader *skel; 67 + struct bpf_program *prog; 68 + int err, fd; 69 + 70 + err = initialize_file_contents(); 71 + if (!ASSERT_OK(err, "initialize file contents")) 72 + return; 73 + 74 + skel = file_reader__open(); 75 + if (!ASSERT_OK_PTR(skel, "file_reader__open")) 76 + return; 77 + 78 + bpf_object__for_each_program(prog, skel->obj) { 79 + bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0); 80 + } 81 + 82 + memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents)); 83 + skel->bss->pid = getpid(); 84 + 85 + err = file_reader__load(skel); 86 + if (!ASSERT_OK(err, "file_reader__load")) 87 + goto cleanup; 88 + 89 + err = file_reader__attach(skel); 90 + if (!ASSERT_OK(err, "file_reader__attach")) 91 + goto cleanup; 92 + 93 + fd = open("/proc/self/exe", O_RDONLY); 94 + if (fd >= 0) 95 + close(fd); 96 + 97 + ASSERT_EQ(skel->bss->err, 0, "err"); 98 + ASSERT_EQ(skel->bss->run_success, 1, "run_success"); 99 + cleanup: 100 + file_reader__destroy(skel); 101 + } 102 + 103 + void test_file_reader(void) 104 + { 105 + if (test__start_subtest("on_open_expect_fault")) 106 + run_test("on_open_expect_fault"); 107 + 108 + if (test__start_subtest("on_open_validate_file_read")) 109 + run_test("on_open_validate_file_read"); 110 + 111 + if (test__start_subtest("negative")) 112 + RUN_TESTS(file_reader_fail); 113 + }
+6 -6
tools/testing/selftests/bpf/progs/dynptr_success.c
··· 914 914 char expected_str[384]; 915 915 __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; 916 916 917 - typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, 918 - u32 size, const void *unsafe_ptr); 917 + typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, 918 + u64 size, const void *unsafe_ptr); 919 919 920 920 /* Returns the offset just before the end of the maximum sized xdp fragment. 921 921 * Any write larger than 32 bytes will be split between 2 fragments. ··· 1106 1106 return 0; 1107 1107 } 1108 1108 1109 - static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, 1110 - u32 size, const void *unsafe_ptr) 1109 + static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, 1110 + u64 size, const void *unsafe_ptr) 1111 1111 { 1112 1112 struct task_struct *task = bpf_get_current_task_btf(); 1113 1113 1114 1114 return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); 1115 1115 } 1116 1116 1117 - static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, 1118 - u32 size, const void *unsafe_ptr) 1117 + static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, 1118 + u64 size, const void *unsafe_ptr) 1119 1119 { 1120 1120 struct task_struct *task = bpf_get_current_task_btf(); 1121 1121
+145
tools/testing/selftests/bpf/progs/file_reader.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <vmlinux.h> 5 + #include <string.h> 6 + #include <stdbool.h> 7 + #include <bpf/bpf_tracing.h> 8 + #include "bpf_misc.h" 9 + #include "errno.h" 10 + 11 + char _license[] SEC("license") = "GPL"; 12 + 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_ARRAY); 15 + __uint(max_entries, 1); 16 + __type(key, int); 17 + __type(value, struct elem); 18 + } arrmap SEC(".maps"); 19 + 20 + struct elem { 21 + struct file *file; 22 + struct bpf_task_work tw; 23 + }; 24 + 25 + char user_buf[256000]; 26 + char tmp_buf[256000]; 27 + 28 + int pid = 0; 29 + int err, run_success = 0; 30 + 31 + static int validate_file_read(struct file *file); 32 + static int task_work_callback(struct bpf_map *map, void *key, void *value); 33 + 34 + SEC("lsm/file_open") 35 + int on_open_expect_fault(void *c) 36 + { 37 + struct bpf_dynptr dynptr; 38 + struct file *file; 39 + int local_err = 1; 40 + __u32 user_buf_sz = sizeof(user_buf); 41 + 42 + if (bpf_get_current_pid_tgid() >> 32 != pid) 43 + return 0; 44 + 45 + file = bpf_get_task_exe_file(bpf_get_current_task_btf()); 46 + if (!file) 47 + return 0; 48 + 49 + if (bpf_dynptr_from_file(file, 0, &dynptr)) 50 + goto out; 51 + 52 + local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, 0, 0); 53 + if (local_err == -EFAULT) { /* Expect page fault */ 54 + local_err = 0; 55 + run_success = 1; 56 + } 57 + out: 58 + bpf_dynptr_file_discard(&dynptr); 59 + if (local_err) 60 + err = local_err; 61 + bpf_put_file(file); 62 + return 0; 63 + } 64 + 65 + SEC("lsm/file_open") 66 + int on_open_validate_file_read(void *c) 67 + { 68 + struct task_struct *task = bpf_get_current_task_btf(); 69 + struct elem *work; 70 + int key = 0; 71 + 72 + if (bpf_get_current_pid_tgid() >> 32 != pid) 73 + return 0; 74 + 75 + work = bpf_map_lookup_elem(&arrmap, &key); 76 + if (!work) { 77 + err = 1; 78 + return 0; 79 + } 80 + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback, NULL); 81 + return 0; 82 + } 83 + 84 + /* Called in a sleepable context, read 256K bytes, cross check with user space read data */ 85 + static int task_work_callback(struct bpf_map *map, void *key, void *value) 86 + { 87 + struct task_struct *task = bpf_get_current_task_btf(); 88 + struct file *file = bpf_get_task_exe_file(task); 89 + 90 + if (!file) 91 + return 0; 92 + 93 + err = validate_file_read(file); 94 + if (!err) 95 + run_success = 1; 96 + bpf_put_file(file); 97 + return 0; 98 + } 99 + 100 + static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len) 101 + { 102 + int i; 103 + 104 + if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0)) 105 + return 1; 106 + 107 + /* Verify file contents read from BPF is the same as the one read from userspace */ 108 + bpf_for(i, 0, len) 109 + { 110 + if (tmp_buf[i] != user_buf[i]) 111 + return 1; 112 + } 113 + return 0; 114 + } 115 + 116 + static int validate_file_read(struct file *file) 117 + { 118 + struct bpf_dynptr dynptr; 119 + int loc_err = 1, off; 120 + __u32 user_buf_sz = sizeof(user_buf); 121 + 122 + if (bpf_dynptr_from_file(file, 0, &dynptr)) 123 + goto cleanup; 124 + 125 + loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz); 126 + off = 1; 127 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); 128 + off = user_buf_sz - 1; 129 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); 130 + /* Read file with random offset and length */ 131 + off = 4097; 132 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100); 133 + 134 + /* Adjust dynptr, verify read */ 135 + loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1); 136 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1); 137 + /* Can't read more than 1 byte */ 138 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0; 139 + /* Can't read with far offset */ 140 + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0; 141 + 142 + cleanup: 143 + bpf_dynptr_file_discard(&dynptr); 144 + return loc_err; 145 + }
+52
tools/testing/selftests/bpf/progs/file_reader_fail.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <vmlinux.h> 5 + #include <string.h> 6 + #include <stdbool.h> 7 + #include <bpf/bpf_tracing.h> 8 + #include "bpf_misc.h" 9 + 10 + char _license[] SEC("license") = "GPL"; 11 + 12 + int err; 13 + void *user_ptr; 14 + 15 + SEC("lsm/file_open") 16 + __failure 17 + __msg("Unreleased reference id=") 18 + int on_nanosleep_unreleased_ref(void *ctx) 19 + { 20 + struct task_struct *task = bpf_get_current_task_btf(); 21 + struct file *file = bpf_get_task_exe_file(task); 22 + struct bpf_dynptr dynptr; 23 + 24 + if (!file) 25 + return 0; 26 + 27 + err = bpf_dynptr_from_file(file, 0, &dynptr); 28 + return err ? 1 : 0; 29 + } 30 + 31 + SEC("xdp") 32 + __failure 33 + __msg("Expected a dynptr of type file as arg #0") 34 + int xdp_wrong_dynptr_type(struct xdp_md *xdp) 35 + { 36 + struct bpf_dynptr dynptr; 37 + 38 + bpf_dynptr_from_xdp(xdp, 0, &dynptr); 39 + bpf_dynptr_file_discard(&dynptr); 40 + return 0; 41 + } 42 + 43 + SEC("xdp") 44 + __failure 45 + __msg("Expected an initialized dynptr as arg #0") 46 + int xdp_no_dynptr_type(struct xdp_md *xdp) 47 + { 48 + struct bpf_dynptr dynptr; 49 + 50 + bpf_dynptr_file_discard(&dynptr); 51 + return 0; 52 + }
-5
tools/testing/selftests/bpf/progs/ip_check_defrag.c
··· 12 12 #define IP_OFFSET 0x1FFF 13 13 #define NEXTHDR_FRAGMENT 44 14 14 15 - extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, 16 - struct bpf_dynptr *ptr__uninit) __ksym; 17 - extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, 18 - void *buffer, uint32_t buffer__sz) __ksym; 19 - 20 15 volatile int shootdowns = 0; 21 16 22 17 static bool is_frag_v4(struct iphdr *iph)
-5
tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
··· 79 79 return NF_ACCEPT; 80 80 } 81 81 82 - extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, 83 - struct bpf_dynptr *ptr__uninit) __ksym; 84 - extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, 85 - void *buffer, uint32_t buffer__sz) __ksym; 86 - 87 82 SEC("netfilter") 88 83 __description("netfilter test prog with skb and state read access") 89 84 __success __failure_unpriv