Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: introduce execmem_alloc() and execmem_free()

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>

authored by

Mike Rapoport (IBM) and committed by
Luis Chamberlain
12af2b83 bc6b94d3

+128 -45
+3 -3
arch/powerpc/kernel/kprobes.c
··· 19 19 #include <linux/extable.h> 20 20 #include <linux/kdebug.h> 21 21 #include <linux/slab.h> 22 - #include <linux/moduleloader.h> 23 22 #include <linux/set_memory.h> 23 + #include <linux/execmem.h> 24 24 #include <asm/code-patching.h> 25 25 #include <asm/cacheflush.h> 26 26 #include <asm/sstep.h> ··· 130 130 { 131 131 void *page; 132 132 133 - page = module_alloc(PAGE_SIZE); 133 + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 134 134 if (!page) 135 135 return NULL; 136 136 ··· 142 142 } 143 143 return page; 144 144 error: 145 - module_memfree(page); 145 + execmem_free(page); 146 146 return NULL; 147 147 } 148 148
+2 -2
arch/s390/kernel/ftrace.c
··· 7 7 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 8 8 */ 9 9 10 - #include <linux/moduleloader.h> 11 10 #include <linux/hardirq.h> 12 11 #include <linux/uaccess.h> 13 12 #include <linux/ftrace.h> 14 13 #include <linux/kernel.h> 15 14 #include <linux/types.h> 16 15 #include <linux/kprobes.h> 16 + #include <linux/execmem.h> 17 17 #include <trace/syscall.h> 18 18 #include <asm/asm-offsets.h> 19 19 #include <asm/text-patching.h> ··· 220 220 { 221 221 const char *start, *end; 222 222 223 - ftrace_plt = module_alloc(PAGE_SIZE); 223 + ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE); 224 224 if (!ftrace_plt) 225 225 panic("cannot allocate ftrace plt\n"); 226 226
+2 -2
arch/s390/kernel/kprobes.c
··· 9 9 10 10 #define pr_fmt(fmt) "kprobes: " fmt 11 11 12 - #include <linux/moduleloader.h> 13 12 #include <linux/kprobes.h> 14 13 #include <linux/ptrace.h> 15 14 #include <linux/preempt.h> ··· 20 21 #include <linux/slab.h> 21 22 #include <linux/hardirq.h> 22 23 #include <linux/ftrace.h> 24 + #include <linux/execmem.h> 23 25 #include <asm/set_memory.h> 24 26 #include <asm/sections.h> 25 27 #include <asm/dis.h> ··· 38 38 { 39 39 void *page; 40 40 41 - page = module_alloc(PAGE_SIZE); 41 + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 42 42 if (!page) 43 43 return NULL; 44 44 set_memory_rox((unsigned long)page, 1);
+3 -2
arch/s390/kernel/module.c
··· 21 21 #include <linux/moduleloader.h> 22 22 #include <linux/bug.h> 23 23 #include <linux/memory.h> 24 + #include <linux/execmem.h> 24 25 #include <asm/alternative.h> 25 26 #include <asm/nospec-branch.h> 26 27 #include <asm/facility.h> ··· 77 76 #ifdef CONFIG_FUNCTION_TRACER 78 77 void module_arch_cleanup(struct module *mod) 79 78 { 80 - module_memfree(mod->arch.trampolines_start); 79 + execmem_free(mod->arch.trampolines_start); 81 80 } 82 81 #endif 83 82 ··· 511 510 512 511 size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size); 513 512 numpages = DIV_ROUND_UP(size, PAGE_SIZE); 514 - start = module_alloc(numpages * PAGE_SIZE); 513 + start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE); 515 514 if (!start) 516 515 return -ENOMEM; 517 516 set_memory_rox((unsigned long)start, numpages);
+4 -4
arch/sparc/net/bpf_jit_comp_32.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/moduleloader.h> 3 2 #include <linux/workqueue.h> 4 3 #include <linux/netdevice.h> 5 4 #include <linux/filter.h> 6 5 #include <linux/cache.h> 7 6 #include <linux/if_vlan.h> 7 + #include <linux/execmem.h> 8 8 9 9 #include <asm/cacheflush.h> 10 10 #include <asm/ptrace.h> ··· 713 713 if (unlikely(proglen + ilen > oldproglen)) { 714 714 pr_err("bpb_jit_compile fatal error\n"); 715 715 kfree(addrs); 716 - module_memfree(image); 716 + execmem_free(image); 717 717 return; 718 718 } 719 719 memcpy(image + proglen, temp, ilen); ··· 736 736 break; 737 737 } 738 738 if (proglen == oldproglen) { 739 - image = module_alloc(proglen); 739 + image = execmem_alloc(EXECMEM_BPF, proglen); 740 740 if (!image) 741 741 goto out; 742 742 } ··· 758 758 void bpf_jit_free(struct bpf_prog *fp) 759 759 { 760 760 if (fp->jited) 761 - module_memfree(fp->bpf_func); 761 + execmem_free(fp->bpf_func); 762 762 763 763 bpf_prog_unlock_free(fp); 764 764 }
+3 -3
arch/x86/kernel/ftrace.c
··· 25 25 #include <linux/memory.h> 26 26 #include <linux/vmalloc.h> 27 27 #include <linux/set_memory.h> 28 + #include <linux/execmem.h> 28 29 29 30 #include <trace/syscall.h> 30 31 ··· 262 261 #ifdef CONFIG_X86_64 263 262 264 263 #ifdef CONFIG_MODULES 265 - #include <linux/moduleloader.h> 266 264 /* Module allocation simplifies allocating memory for code */ 267 265 static inline void *alloc_tramp(unsigned long size) 268 266 { 269 - return module_alloc(size); 267 + return execmem_alloc(EXECMEM_FTRACE, size); 270 268 } 271 269 static inline void tramp_free(void *tramp) 272 270 { 273 - module_memfree(tramp); 271 + execmem_free(tramp); 274 272 } 275 273 #else 276 274 /* Trampolines can only be created if modules are supported */
+2 -2
arch/x86/kernel/kprobes/core.c
··· 40 40 #include <linux/kgdb.h> 41 41 #include <linux/ftrace.h> 42 42 #include <linux/kasan.h> 43 - #include <linux/moduleloader.h> 44 43 #include <linux/objtool.h> 45 44 #include <linux/vmalloc.h> 46 45 #include <linux/pgtable.h> 47 46 #include <linux/set_memory.h> 48 47 #include <linux/cfi.h> 48 + #include <linux/execmem.h> 49 49 50 50 #include <asm/text-patching.h> 51 51 #include <asm/cacheflush.h> ··· 495 495 { 496 496 void *page; 497 497 498 - page = module_alloc(PAGE_SIZE); 498 + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 499 499 if (!page) 500 500 return NULL; 501 501
+57
include/linux/execmem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_EXECMEM_ALLOC_H 3 + #define _LINUX_EXECMEM_ALLOC_H 4 + 5 + #include <linux/types.h> 6 + #include <linux/moduleloader.h> 7 + 8 + /** 9 + * enum execmem_type - types of executable memory ranges 10 + * 11 + * There are several subsystems that allocate executable memory. 12 + * Architectures define different restrictions on placement, 13 + * permissions, alignment and other parameters for memory that can be used 14 + * by these subsystems. 15 + * Types in this enum identify subsystems that allocate executable memory 16 + * and let architectures define parameters for ranges suitable for 17 + * allocations by each subsystem. 18 + * 19 + * @EXECMEM_DEFAULT: default parameters that would be used for types that 20 + * are not explicitly defined. 21 + * @EXECMEM_MODULE_TEXT: parameters for module text sections 22 + * @EXECMEM_KPROBES: parameters for kprobes 23 + * @EXECMEM_FTRACE: parameters for ftrace 24 + * @EXECMEM_BPF: parameters for BPF 25 + * @EXECMEM_TYPE_MAX: 26 + */ 27 + enum execmem_type { 28 + EXECMEM_DEFAULT, 29 + EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT, 30 + EXECMEM_KPROBES, 31 + EXECMEM_FTRACE, 32 + EXECMEM_BPF, 33 + EXECMEM_TYPE_MAX, 34 + }; 35 + 36 + /** 37 + * execmem_alloc - allocate executable memory 38 + * @type: type of the allocation 39 + * @size: how many bytes of memory are required 40 + * 41 + * Allocates memory that will contain executable code, either generated or 42 + * loaded from kernel modules. 43 + * 44 + * The memory will have protections defined by architecture for executable 45 + * region of the @type. 46 + * 47 + * Return: a pointer to the allocated memory or %NULL 48 + */ 49 + void *execmem_alloc(enum execmem_type type, size_t size); 50 + 51 + /** 52 + * execmem_free - free executable memory 53 + * @ptr: pointer to the memory that should be freed 54 + */ 55 + void execmem_free(void *ptr); 56 + 57 + #endif /* _LINUX_EXECMEM_ALLOC_H */
-3
include/linux/moduleloader.h
··· 29 29 sections. Returns NULL on failure. */ 30 30 void *module_alloc(unsigned long size); 31 31 32 - /* Free memory returned from module_alloc. */ 33 - void module_memfree(void *module_region); 34 - 35 32 /* Determines if the section name is an init section (that is only used during 36 33 * module loading). 37 34 */
+3 -3
kernel/bpf/core.c
··· 22 22 #include <linux/skbuff.h> 23 23 #include <linux/vmalloc.h> 24 24 #include <linux/random.h> 25 - #include <linux/moduleloader.h> 26 25 #include <linux/bpf.h> 27 26 #include <linux/btf.h> 28 27 #include <linux/objtool.h> ··· 36 37 #include <linux/nospec.h> 37 38 #include <linux/bpf_mem_alloc.h> 38 39 #include <linux/memcontrol.h> 40 + #include <linux/execmem.h> 39 41 40 42 #include <asm/barrier.h> 41 43 #include <asm/unaligned.h> ··· 1050 1050 1051 1051 void *__weak bpf_jit_alloc_exec(unsigned long size) 1052 1052 { 1053 - return module_alloc(size); 1053 + return execmem_alloc(EXECMEM_BPF, size); 1054 1054 } 1055 1055 1056 1056 void __weak bpf_jit_free_exec(void *addr) 1057 1057 { 1058 - module_memfree(addr); 1058 + execmem_free(addr); 1059 1059 } 1060 1060 1061 1061 struct bpf_binary_header *
+4 -4
kernel/kprobes.c
··· 26 26 #include <linux/slab.h> 27 27 #include <linux/stddef.h> 28 28 #include <linux/export.h> 29 - #include <linux/moduleloader.h> 30 29 #include <linux/kallsyms.h> 31 30 #include <linux/freezer.h> 32 31 #include <linux/seq_file.h> ··· 38 39 #include <linux/jump_label.h> 39 40 #include <linux/static_call.h> 40 41 #include <linux/perf_event.h> 42 + #include <linux/execmem.h> 41 43 42 44 #include <asm/sections.h> 43 45 #include <asm/cacheflush.h> ··· 113 113 void __weak *alloc_insn_page(void) 114 114 { 115 115 /* 116 - * Use module_alloc() so this page is within +/- 2GB of where the 116 + * Use execmem_alloc() so this page is within +/- 2GB of where the 117 117 * kernel image and loaded module images reside. This is required 118 118 * for most of the architectures. 119 119 * (e.g. x86-64 needs this to handle the %rip-relative fixups.) 120 120 */ 121 - return module_alloc(PAGE_SIZE); 121 + return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 122 122 } 123 123 124 124 static void free_insn_page(void *page) 125 125 { 126 - module_memfree(page); 126 + execmem_free(page); 127 127 } 128 128 129 129 struct kprobe_insn_cache kprobe_insn_slots = {
+1
kernel/module/Kconfig
··· 2 2 menuconfig MODULES 3 3 bool "Enable loadable module support" 4 4 modules 5 + select EXECMEM 5 6 help 6 7 Kernel modules are small pieces of compiled code which can 7 8 be inserted in the running kernel, rather than being
+8 -17
kernel/module/main.c
··· 57 57 #include <linux/audit.h> 58 58 #include <linux/cfi.h> 59 59 #include <linux/debugfs.h> 60 + #include <linux/execmem.h> 60 61 #include <uapi/linux/module.h> 61 62 #include "internal.h" 62 63 ··· 1180 1179 return ksym; 1181 1180 } 1182 1181 1183 - void __weak module_memfree(void *module_region) 1184 - { 1185 - /* 1186 - * This memory may be RO, and freeing RO memory in an interrupt is not 1187 - * supported by vmalloc. 1188 - */ 1189 - WARN_ON(in_interrupt()); 1190 - vfree(module_region); 1191 - } 1192 - 1193 1182 void __weak module_arch_cleanup(struct module *mod) 1194 1183 { 1195 1184 } ··· 1204 1213 if (mod_mem_use_vmalloc(type)) 1205 1214 ptr = vmalloc(size); 1206 1215 else 1207 - ptr = module_alloc(size); 1216 + ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); 1208 1217 1209 1218 if (!ptr) 1210 1219 return -ENOMEM; ··· 1235 1244 if (mod_mem_use_vmalloc(type)) 1236 1245 vfree(ptr); 1237 1246 else 1238 - module_memfree(ptr); 1247 + execmem_free(ptr); 1239 1248 } 1240 1249 1241 1250 static void free_mod_mem(struct module *mod) ··· 2487 2496 2488 2497 llist_for_each_safe(pos, n, list) { 2489 2498 initfree = container_of(pos, struct mod_initfree, node); 2490 - module_memfree(initfree->init_text); 2491 - module_memfree(initfree->init_data); 2492 - module_memfree(initfree->init_rodata); 2499 + execmem_free(initfree->init_text); 2500 + execmem_free(initfree->init_data); 2501 + execmem_free(initfree->init_rodata); 2493 2502 kfree(initfree); 2494 2503 } 2495 2504 } ··· 2599 2608 * We want to free module_init, but be aware that kallsyms may be 2600 2609 * walking this with preempt disabled. In all the failure paths, we 2601 2610 * call synchronize_rcu(), but we don't want to slow down the success 2602 - * path. module_memfree() cannot be called in an interrupt, so do the 2611 + * path. execmem_free() cannot be called in an interrupt, so do the 2603 2612 * work and call synchronize_rcu() in a work queue. 2604 2613 * 2605 - * Note that module_alloc() on most architectures creates W+X page 2614 + * Note that execmem_alloc() on most architectures creates W+X page 2606 2615 * mappings which won't be cleaned up until do_free_init() runs. Any 2607 2616 * code such as mark_rodata_ro() which depends on those mappings to 2608 2617 * be cleaned up needs to sync with the queued work by invoking
+3
mm/Kconfig
··· 1240 1240 config IOMMU_MM_DATA 1241 1241 bool 1242 1242 1243 + config EXECMEM 1244 + bool 1245 + 1243 1246 source "mm/damon/Kconfig" 1244 1247 1245 1248 endmenu
+1
mm/Makefile
··· 133 133 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o 134 134 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o 135 135 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o 136 + obj-$(CONFIG_EXECMEM) += execmem.o
+32
mm/execmem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2002 Richard Henderson 4 + * Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM. 5 + * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org> 6 + * Copyright (C) 2024 Mike Rapoport IBM. 7 + */ 8 + 9 + #include <linux/mm.h> 10 + #include <linux/vmalloc.h> 11 + #include <linux/execmem.h> 12 + #include <linux/moduleloader.h> 13 + 14 + static void *__execmem_alloc(size_t size) 15 + { 16 + return module_alloc(size); 17 + } 18 + 19 + void *execmem_alloc(enum execmem_type type, size_t size) 20 + { 21 + return __execmem_alloc(size); 22 + } 23 + 24 + void execmem_free(void *ptr) 25 + { 26 + /* 27 + * This memory may be RO, and freeing RO memory in an interrupt is not 28 + * supported by vmalloc. 29 + */ 30 + WARN_ON(in_interrupt()); 31 + vfree(ptr); 32 + }