Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: 8220/1: allow modules outside of bl range

Loading modules far away from the kernel in memory is problematic
because the 'bl' instruction only has limited reach, and modules are not
built with PLTs. Instead of using the -mlong-calls option (which affects
all compiler emitted bl instructions, but not the ones in assembler),
this patch allocates some additional space at module load time, and
populates it with PLT like veneers when encountering relocations that
are out of range.

This should work with all relocations against symbols exported by the
kernel, including those resulting from GCC generated implicit function
calls for ftrace etc.

The module memory size increases by about 5% on average, regardless of
whether any PLT entries were actually needed. However, due to the page
based rounding that occurs when allocating module memory, the average
memory footprint increase is negligible.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

authored by

Ard Biesheuvel and committed by
Russell King
7d485f64 e748994f

+248 -3
+16 -1
arch/arm/Kconfig
··· 60 60 select HAVE_KPROBES if !XIP_KERNEL 61 61 select HAVE_KRETPROBES if (HAVE_KPROBES) 62 62 select HAVE_MEMBLOCK 63 - select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND 63 + select HAVE_MOD_ARCH_SPECIFIC 64 64 select HAVE_OPROFILE if (HAVE_PERF_EVENTS) 65 65 select HAVE_OPTPROBES if !THUMB2_KERNEL 66 66 select HAVE_PERF_EVENTS ··· 1680 1680 1681 1681 config ARCH_WANT_GENERAL_HUGETLB 1682 1682 def_bool y 1683 + 1684 + config ARM_MODULE_PLTS 1685 + bool "Use PLTs to allow module memory to spill over into vmalloc area" 1686 + depends on MODULES 1687 + help 1688 + Allocate PLTs when loading modules so that jumps and calls whose 1689 + targets are too far away for their relative offsets to be encoded 1690 + in the instructions themselves can be bounced via veneers in the 1691 + module's PLT. This allows modules to be allocated in the generic 1692 + vmalloc area after the dedicated module memory area has been 1693 + exhausted. The modules will use slightly more memory, but after 1694 + rounding up to page size, the actual memory footprint is usually 1695 + the same. 1696 + 1697 + Say y if you are getting out of memory errors while loading modules 1683 1698 1684 1699 source "mm/Kconfig" 1685 1700
+4
arch/arm/Makefile
··· 19 19 LDFLAGS_MODULE += --be8 20 20 endif 21 21 22 + ifeq ($(CONFIG_ARM_MODULE_PLTS),y) 23 + LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds 24 + endif 25 + 22 26 OBJCOPYFLAGS :=-O binary -R .comment -S 23 27 GZFLAGS :=-9 24 28 #KBUILD_CFLAGS +=-pipe
+11 -1
arch/arm/include/asm/module.h
··· 16 16 ARM_SEC_UNLIKELY, 17 17 ARM_SEC_MAX, 18 18 }; 19 + #endif 19 20 20 21 struct mod_arch_specific { 22 + #ifdef CONFIG_ARM_UNWIND 21 23 struct unwind_table *unwind[ARM_SEC_MAX]; 22 - }; 23 24 #endif 25 + #ifdef CONFIG_ARM_MODULE_PLTS 26 + struct elf32_shdr *core_plt; 27 + struct elf32_shdr *init_plt; 28 + int core_plt_count; 29 + int init_plt_count; 30 + #endif 31 + }; 32 + 33 + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val); 24 34 25 35 /* 26 36 * Add the ARM architecture version to the version magic string
+1
arch/arm/kernel/Makefile
··· 34 34 obj-$(CONFIG_ISA_DMA_API) += dma.o 35 35 obj-$(CONFIG_FIQ) += fiq.o fiqasm.o 36 36 obj-$(CONFIG_MODULES) += armksyms.o module.o 37 + obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o 37 38 obj-$(CONFIG_ISA_DMA) += dma-isa.o 38 39 obj-$(CONFIG_PCI) += bios32.o isa.o 39 40 obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o
+181
arch/arm/kernel/module-plts.c
··· 1 + /* 2 + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <linux/elf.h> 10 + #include <linux/kernel.h> 11 + #include <linux/module.h> 12 + 13 + #include <asm/cache.h> 14 + #include <asm/opcodes.h> 15 + 16 + #define PLT_ENT_STRIDE L1_CACHE_BYTES 17 + #define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) 18 + #define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) 19 + 20 + #ifdef CONFIG_THUMB2_KERNEL 21 + #define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ 22 + (PLT_ENT_STRIDE - 4)) 23 + #else 24 + #define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \ 25 + (PLT_ENT_STRIDE - 8)) 26 + #endif 27 + 28 + struct plt_entries { 29 + u32 ldr[PLT_ENT_COUNT]; 30 + u32 lit[PLT_ENT_COUNT]; 31 + }; 32 + 33 + static bool in_init(const struct module *mod, u32 addr) 34 + { 35 + return addr - (u32)mod->module_init < mod->init_size; 36 + } 37 + 38 + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) 39 + { 40 + struct plt_entries *plt, *plt_end; 41 + int c, *count; 42 + 43 + if (in_init(mod, loc)) { 44 + plt = (void *)mod->arch.init_plt->sh_addr; 45 + plt_end = (void *)plt + mod->arch.init_plt->sh_size; 46 + count = &mod->arch.init_plt_count; 47 + } else { 48 + plt = (void *)mod->arch.core_plt->sh_addr; 49 + plt_end = (void *)plt + mod->arch.core_plt->sh_size; 50 + count = &mod->arch.core_plt_count; 51 + } 52 + 53 + /* Look for an existing entry pointing to 'val' */ 54 + for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { 55 + int i; 56 + 57 + if (!c) { 58 + /* Populate a new set of entries */ 59 + *plt = (struct plt_entries){ 60 + { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, 61 + { val, } 62 + }; 63 + ++*count; 64 + return (u32)plt->ldr; 65 + } 66 + for (i = 0; i < PLT_ENT_COUNT; i++) { 67 + if (!plt->lit[i]) { 68 + plt->lit[i] = val; 69 + ++*count; 70 + } 71 + if (plt->lit[i] == val) 72 + return (u32)&plt->ldr[i]; 73 + } 74 + } 75 + BUG(); 76 + } 77 + 78 + static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, 79 + u32 mask) 80 + { 81 + u32 *loc1, *loc2; 82 + int i; 83 + 84 + for (i = 0; i < num; i++) { 85 + if (rel[i].r_info != rel[num].r_info) 86 + continue; 87 + 88 + /* 89 + * Identical relocation types against identical symbols can 90 + * still result in different PLT entries if the addend in the 91 + * place is different. So resolve the target of the relocation 92 + * to compare the values. 93 + */ 94 + loc1 = (u32 *)(base + rel[i].r_offset); 95 + loc2 = (u32 *)(base + rel[num].r_offset); 96 + if (((*loc1 ^ *loc2) & mask) == 0) 97 + return 1; 98 + } 99 + return 0; 100 + } 101 + 102 + /* Count how many PLT entries we may need */ 103 + static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) 104 + { 105 + unsigned int ret = 0; 106 + int i; 107 + 108 + /* 109 + * Sure, this is order(n^2), but it's usually short, and not 110 + * time critical 111 + */ 112 + for (i = 0; i < num; i++) 113 + switch (ELF32_R_TYPE(rel[i].r_info)) { 114 + case R_ARM_CALL: 115 + case R_ARM_PC24: 116 + case R_ARM_JUMP24: 117 + if (!duplicate_rel(base, rel, i, 118 + __opcode_to_mem_arm(0x00ffffff))) 119 + ret++; 120 + break; 121 + case R_ARM_THM_CALL: 122 + case R_ARM_THM_JUMP24: 123 + if (!duplicate_rel(base, rel, i, 124 + __opcode_to_mem_thumb32(0x07ff2fff))) 125 + ret++; 126 + } 127 + return ret; 128 + } 129 + 130 + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, 131 + char *secstrings, struct module *mod) 132 + { 133 + unsigned long core_plts = 0, init_plts = 0; 134 + Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; 135 + 136 + /* 137 + * To store the PLTs, we expand the .text section for core module code 138 + * and the .init.text section for initialization code. 139 + */ 140 + for (s = sechdrs; s < sechdrs_end; ++s) 141 + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) 142 + mod->arch.core_plt = s; 143 + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) 144 + mod->arch.init_plt = s; 145 + 146 + if (!mod->arch.core_plt || !mod->arch.init_plt) { 147 + pr_err("%s: sections missing\n", mod->name); 148 + return -ENOEXEC; 149 + } 150 + 151 + for (s = sechdrs + 1; s < sechdrs_end; ++s) { 152 + const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; 153 + int numrels = s->sh_size / sizeof(Elf32_Rel); 154 + Elf32_Shdr *dstsec = sechdrs + s->sh_info; 155 + 156 + if (s->sh_type != SHT_REL) 157 + continue; 158 + 159 + if (strstr(secstrings + s->sh_name, ".init")) 160 + init_plts += count_plts(dstsec->sh_addr, rels, numrels); 161 + else 162 + core_plts += count_plts(dstsec->sh_addr, rels, numrels); 163 + } 164 + 165 + mod->arch.core_plt->sh_type = SHT_NOBITS; 166 + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 167 + mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; 168 + mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, 169 + sizeof(struct plt_entries)); 170 + mod->arch.core_plt_count = 0; 171 + 172 + mod->arch.init_plt->sh_type = SHT_NOBITS; 173 + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 174 + mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; 175 + mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, 176 + sizeof(struct plt_entries)); 177 + mod->arch.init_plt_count = 0; 178 + pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, 179 + mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); 180 + return 0; 181 + }
+31 -1
arch/arm/kernel/module.c
··· 40 40 #ifdef CONFIG_MMU 41 41 void *module_alloc(unsigned long size) 42 42 { 43 - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, 43 + void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, 44 + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 45 + __builtin_return_address(0)); 46 + if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p) 47 + return p; 48 + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, 44 49 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 45 50 __builtin_return_address(0)); 46 51 } ··· 115 110 offset -= 0x04000000; 116 111 117 112 offset += sym->st_value - loc; 113 + 114 + /* 115 + * Route through a PLT entry if 'offset' exceeds the 116 + * supported range. Note that 'offset + loc + 8' 117 + * contains the absolute jump target, i.e., 118 + * @sym + addend, corrected for the +8 PC bias. 119 + */ 120 + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && 121 + (offset <= (s32)0xfe000000 || 122 + offset >= (s32)0x02000000)) 123 + offset = get_module_plt(module, loc, 124 + offset + loc + 8) 125 + - loc - 8; 126 + 118 127 if (offset <= (s32)0xfe000000 || 119 128 offset >= (s32)0x02000000) { 120 129 pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", ··· 221 202 if (offset & 0x01000000) 222 203 offset -= 0x02000000; 223 204 offset += sym->st_value - loc; 205 + 206 + /* 207 + * Route through a PLT entry if 'offset' exceeds the 208 + * supported range. 209 + */ 210 + if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) && 211 + (offset <= (s32)0xff000000 || 212 + offset >= (s32)0x01000000)) 213 + offset = get_module_plt(module, loc, 214 + offset + loc + 4) 215 + - loc - 4; 224 216 225 217 if (offset <= (s32)0xff000000 || 226 218 offset >= (s32)0x01000000) {
+4
arch/arm/kernel/module.lds
··· 1 + SECTIONS { 2 + .core.plt : { BYTE(0) } 3 + .init.plt : { BYTE(0) } 4 + }