Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Select proper size for bpf_prog_pack

Using HPAGE_PMD_SIZE as the size for bpf_prog_pack is not ideal in some
cases. Specifically, for NUMA systems, __vmalloc_node_range requires
PMD_SIZE * num_online_nodes() to allocate huge pages. Also, if the system
does not support huge pages (i.e., with cmdline option nohugevmalloc), it
is better to use PAGE_SIZE packs.

Add logic to select proper size for bpf_prog_pack. This solution is not
ideal, as it makes assumption about the behavior of module_alloc and
__vmalloc_node_range. However, it appears to be the easiest solution as
it doesn't require changes in module_alloc and vmalloc code.

Fixes: 57631054fae6 ("bpf: Introduce bpf_prog_pack allocator")
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220311201135.3573610-1-song@kernel.org

authored by

Song Liu and committed by
Alexei Starovoitov
ef078600 46e9244b

+47 -23
+47 -23
kernel/bpf/core.c
··· 33 33 #include <linux/extable.h> 34 34 #include <linux/log2.h> 35 35 #include <linux/bpf_verifier.h> 36 + #include <linux/nodemask.h> 36 37 37 38 #include <asm/barrier.h> 38 39 #include <asm/unaligned.h> ··· 816 815 * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86) 817 816 * to host BPF programs. 818 817 */ 819 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 820 - #define BPF_PROG_PACK_SIZE HPAGE_PMD_SIZE 821 - #else 822 - #define BPF_PROG_PACK_SIZE PAGE_SIZE 823 - #endif 824 818 #define BPF_PROG_CHUNK_SHIFT 6 825 819 #define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT) 826 820 #define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1)) 827 - #define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE) 828 821 829 822 struct bpf_prog_pack { 830 823 struct list_head list; ··· 826 831 unsigned long bitmap[]; 827 832 }; 828 833 829 - #define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE 830 834 #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) 835 + 836 + static size_t bpf_prog_pack_size = -1; 837 + 838 + static int bpf_prog_chunk_count(void) 839 + { 840 + WARN_ON_ONCE(bpf_prog_pack_size == -1); 841 + return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE; 842 + } 831 843 832 844 static DEFINE_MUTEX(pack_mutex); 833 845 static LIST_HEAD(pack_list); 846 + 847 + static size_t select_bpf_prog_pack_size(void) 848 + { 849 + size_t size; 850 + void *ptr; 851 + 852 + size = PMD_SIZE * num_online_nodes(); 853 + ptr = module_alloc(size); 854 + 855 + /* Test whether we can get huge pages. If not just use PAGE_SIZE 856 + * packs. 857 + */ 858 + if (!ptr || !is_vm_area_hugepages(ptr)) 859 + size = PAGE_SIZE; 860 + 861 + vfree(ptr); 862 + return size; 863 + } 834 864 835 865 static struct bpf_prog_pack *alloc_new_pack(void) 836 866 { 837 867 struct bpf_prog_pack *pack; 838 868 839 - pack = kzalloc(sizeof(*pack) + BITS_TO_BYTES(BPF_PROG_CHUNK_COUNT), GFP_KERNEL); 869 + pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())), 870 + GFP_KERNEL); 840 871 if (!pack) 841 872 return NULL; 842 - pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); 873 + pack->ptr = module_alloc(bpf_prog_pack_size); 843 874 if (!pack->ptr) { 844 875 kfree(pack); 845 876 return NULL; 846 877 } 847 - bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); 878 + bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); 848 879 list_add_tail(&pack->list, &pack_list); 849 880 850 881 set_vm_flush_reset_perms(pack->ptr); 851 - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 852 - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); 882 + set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); 883 + set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); 853 884 return pack; 854 885 } 855 886 ··· 886 865 unsigned long pos; 887 866 void *ptr = NULL; 888 867 889 - if (size > BPF_PROG_MAX_PACK_PROG_SIZE) { 868 + mutex_lock(&pack_mutex); 869 + if (bpf_prog_pack_size == -1) 870 + bpf_prog_pack_size = select_bpf_prog_pack_size(); 871 + 872 + if (size > bpf_prog_pack_size) { 890 873 size = round_up(size, PAGE_SIZE); 891 874 ptr = module_alloc(size); 892 875 if (ptr) { ··· 898 873 set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); 899 874 set_memory_x((unsigned long)ptr, size / PAGE_SIZE); 900 875 } 901 - return ptr; 876 + goto out; 902 877 } 903 - mutex_lock(&pack_mutex); 904 878 list_for_each_entry(pack, &pack_list, list) { 905 - pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, 879 + pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, 906 880 nbits, 0); 907 - if (pos < BPF_PROG_CHUNK_COUNT) 881 + if (pos < bpf_prog_chunk_count()) 908 882 goto found_free_area; 909 883 } 910 884 ··· 929 905 unsigned long pos; 930 906 void *pack_ptr; 931 907 932 - if (hdr->size > BPF_PROG_MAX_PACK_PROG_SIZE) { 908 + mutex_lock(&pack_mutex); 909 + if (hdr->size > bpf_prog_pack_size) { 933 910 module_memfree(hdr); 934 - return; 911 + goto out; 935 912 } 936 913 937 - pack_ptr = (void *)((unsigned long)hdr & ~(BPF_PROG_PACK_SIZE - 1)); 938 - mutex_lock(&pack_mutex); 914 + pack_ptr = (void *)((unsigned long)hdr & ~(bpf_prog_pack_size - 1)); 939 915 940 916 list_for_each_entry(tmp, &pack_list, list) { 941 917 if (tmp->ptr == pack_ptr) { ··· 951 927 pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; 952 928 953 929 bitmap_clear(pack->bitmap, pos, nbits); 954 - if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, 955 - BPF_PROG_CHUNK_COUNT, 0) == 0) { 930 + if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, 931 + bpf_prog_chunk_count(), 0) == 0) { 956 932 list_del(&pack->list); 957 933 module_memfree(pack->ptr); 958 934 kfree(pack);