Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: add support for kernel ASLR

This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Ard Biesheuvel and committed by
Catalin Marinas
f80fb3a3 1e48ef7f

+329 -22
+29
arch/arm64/Kconfig
··· 798 798 relocation pass at runtime even if the kernel is loaded at the 799 799 same address it was linked at. 800 800 801 + config RANDOMIZE_BASE 802 + bool "Randomize the address of the kernel image" 803 + select ARM64_MODULE_PLTS 804 + select RELOCATABLE 805 + help 806 + Randomizes the virtual address at which the kernel image is 807 + loaded, as a security feature that deters exploit attempts 808 + relying on knowledge of the location of kernel internals. 809 + 810 + It is the bootloader's job to provide entropy, by passing a 811 + random u64 value in /chosen/kaslr-seed at kernel entry. 812 + 813 + If unsure, say N. 814 + 815 + config RANDOMIZE_MODULE_REGION_FULL 816 + bool "Randomize the module region independently from the core kernel" 817 + depends on RANDOMIZE_BASE 818 + default y 819 + help 820 + Randomizes the location of the module region without considering the 821 + location of the core kernel. This way, it is impossible for modules 822 + to leak information about the location of core kernel data structures 823 + but it does imply that function calls between modules and the core 824 + kernel will need to be resolved via veneers in the module PLT. 825 + 826 + When this option is not set, the module region will be randomized over 827 + a limited range that contains the [_stext, _etext] interval of the 828 + core kernel, so branch relocations are always in range. 829 + 801 830 endmenu 802 831 803 832 menu "Boot options"
+4 -1
arch/arm64/include/asm/memory.h
··· 53 53 #define KIMAGE_VADDR (MODULES_END) 54 54 #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) 55 55 #define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) 56 - #define MODULES_VSIZE (SZ_64M) 56 + #define MODULES_VSIZE (SZ_128M) 57 57 #define PCI_IO_END (PAGE_OFFSET - SZ_2M) 58 58 #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) 59 59 #define FIXADDR_TOP (PCI_IO_START - SZ_2M) ··· 138 138 extern phys_addr_t memstart_addr; 139 139 /* PHYS_OFFSET - the physical address of the start of memory. */ 140 140 #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) 141 + 142 + /* the virtual base of the kernel image (minus TEXT_OFFSET) */ 143 + extern u64 kimage_vaddr; 141 144 142 145 /* the offset between the kernel virtual and physical mappings */ 143 146 extern u64 kimage_voffset;
+6
arch/arm64/include/asm/module.h
··· 31 31 u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, 32 32 Elf64_Sym *sym); 33 33 34 + #ifdef CONFIG_RANDOMIZE_BASE 35 + extern u64 module_alloc_base; 36 + #else 37 + #define module_alloc_base ((u64)_etext - MODULES_VSIZE) 38 + #endif 39 + 34 40 #endif /* __ASM_MODULE_H */
+1
arch/arm64/kernel/Makefile
··· 44 44 arm64-obj-$(CONFIG_ACPI) += acpi.o 45 45 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o 46 46 arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o 47 + arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 47 48 48 49 obj-y += $(arm64-obj-y) vdso/ 49 50 obj-m += $(arm64-obj-m)
+51 -8
arch/arm64/kernel/head.S
··· 210 210 ENTRY(stext) 211 211 bl preserve_boot_args 212 212 bl el2_setup // Drop to EL1, w20=cpu_boot_mode 213 + mov x23, xzr // KASLR offset, defaults to 0 213 214 adrp x24, __PHYS_OFFSET 214 215 bl set_cpu_boot_mode_flag 215 216 bl __create_page_tables // x25=TTBR0, x26=TTBR1 ··· 314 313 __create_page_tables: 315 314 adrp x25, idmap_pg_dir 316 315 adrp x26, swapper_pg_dir 317 - mov x27, lr 316 + mov x28, lr 318 317 319 318 /* 320 319 * Invalidate the idmap and swapper page tables to avoid potential ··· 393 392 */ 394 393 mov x0, x26 // swapper_pg_dir 395 394 ldr x5, =KIMAGE_VADDR 395 + add x5, x5, x23 // add KASLR displacement 396 396 create_pgd_entry x0, x5, x3, x6 397 397 ldr w6, kernel_img_size 398 398 add x6, x6, x5 ··· 410 408 dmb sy 411 409 bl __inval_cache_range 412 410 413 - mov lr, x27 414 - ret 411 + ret x28 415 412 ENDPROC(__create_page_tables) 416 413 417 414 kernel_img_size: ··· 422 421 */ 423 422 .set initial_sp, init_thread_union + THREAD_START_SP 424 423 __mmap_switched: 424 + mov x28, lr // preserve LR 425 425 adr_l x8, vectors // load VBAR_EL1 with virtual 426 426 msr vbar_el1, x8 // vector table address 427 427 isb ··· 451 449 ldr x13, [x9, #-8] 452 450 cmp w12, #R_AARCH64_RELATIVE 453 451 b.ne 1f 454 - str x13, [x11] 452 + add x13, x13, x23 // relocate 453 + str x13, [x11, x23] 455 454 b 0b 456 455 457 456 1: cmp w12, #R_AARCH64_ABS64 458 457 b.ne 0b 459 458 add x12, x12, x12, lsl #1 // symtab offset: 24x top word 460 459 add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word 460 + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx 461 461 ldr x15, [x12, #8] // Elf64_Sym::st_value 462 + cmp w14, #-0xf // SHN_ABS (0xfff1) ? 463 + add x14, x15, x23 // relocate 464 + csel x15, x14, x15, ne 462 465 add x15, x13, x15 463 - str x15, [x11] 466 + str x15, [x11, x23] 464 467 b 0b 465 468 466 - 2: 469 + 2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr 470 + dc cvac, x8 // value visible to secondaries 471 + dsb sy // with MMU off 467 472 #endif 468 473 469 474 adr_l sp, initial_sp, x4 ··· 479 470 msr sp_el0, x4 // Save thread_info 480 471 str_l x21, __fdt_pointer, x5 // Save FDT pointer 481 472 482 - ldr x4, =KIMAGE_VADDR // Save the offset between 473 + ldr_l x4, kimage_vaddr // Save the offset between 483 474 sub x4, x4, x24 // the kernel virtual and 484 475 str_l x4, kimage_voffset, x5 // physical mappings 485 476 486 477 mov x29, #0 487 478 #ifdef CONFIG_KASAN 488 479 bl kasan_early_init 480 + #endif 481 + #ifdef CONFIG_RANDOMIZE_BASE 482 + cbnz x23, 0f // already running randomized? 483 + mov x0, x21 // pass FDT address in x0 484 + bl kaslr_early_init // parse FDT for KASLR options 485 + cbz x0, 0f // KASLR disabled? just proceed 486 + mov x23, x0 // record KASLR offset 487 + ret x28 // we must enable KASLR, return 488 + // to __enable_mmu() 489 + 0: 489 490 #endif 490 491 b start_kernel 491 492 ENDPROC(__mmap_switched) ··· 505 486 * hotplug and needs to have the same protections as the text region 506 487 */ 507 488 .section ".text","ax" 489 + 490 + ENTRY(kimage_vaddr) 491 + .quad _text - TEXT_OFFSET 492 + 508 493 /* 509 494 * If we're fortunate enough to boot at EL2, ensure that the world is 510 495 * sane before dropping to EL1. ··· 674 651 adrp x26, swapper_pg_dir 675 652 bl __cpu_setup // initialise processor 676 653 677 - ldr x8, =KIMAGE_VADDR 654 + ldr x8, kimage_vaddr 678 655 ldr w9, 0f 679 656 sub x27, x8, w9, sxtw // address to jump to after enabling the MMU 680 657 b __enable_mmu ··· 707 684 */ 708 685 .section ".idmap.text", "ax" 709 686 __enable_mmu: 687 + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value 710 688 mrs x1, ID_AA64MMFR0_EL1 711 689 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 712 690 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED ··· 725 701 ic iallu 726 702 dsb nsh 727 703 isb 704 + #ifdef CONFIG_RANDOMIZE_BASE 705 + mov x19, x0 // preserve new SCTLR_EL1 value 706 + blr x27 707 + 708 + /* 709 + * If we return here, we have a KASLR displacement in x23 which we need 710 + * to take into account by discarding the current kernel mapping and 711 + * creating a new one. 712 + */ 713 + msr sctlr_el1, x18 // disable the MMU 714 + isb 715 + bl __create_page_tables // recreate kernel mapping 716 + 717 + msr sctlr_el1, x19 // re-enable the MMU 718 + isb 719 + ic ialluis // flush instructions fetched 720 + isb // via old mapping 721 + add x27, x27, x23 // relocated __mmap_switched 722 + #endif 728 723 br x27 729 724 ENDPROC(__enable_mmu) 730 725
+173
arch/arm64/kernel/kaslr.c
··· 1 + /* 2 + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <linux/crc32.h> 10 + #include <linux/init.h> 11 + #include <linux/libfdt.h> 12 + #include <linux/mm_types.h> 13 + #include <linux/sched.h> 14 + #include <linux/types.h> 15 + 16 + #include <asm/fixmap.h> 17 + #include <asm/kernel-pgtable.h> 18 + #include <asm/memory.h> 19 + #include <asm/mmu.h> 20 + #include <asm/pgtable.h> 21 + #include <asm/sections.h> 22 + 23 + u64 __read_mostly module_alloc_base; 24 + 25 + static __init u64 get_kaslr_seed(void *fdt) 26 + { 27 + int node, len; 28 + u64 *prop; 29 + u64 ret; 30 + 31 + node = fdt_path_offset(fdt, "/chosen"); 32 + if (node < 0) 33 + return 0; 34 + 35 + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); 36 + if (!prop || len != sizeof(u64)) 37 + return 0; 38 + 39 + ret = fdt64_to_cpu(*prop); 40 + *prop = 0; 41 + return ret; 42 + } 43 + 44 + static __init const u8 *get_cmdline(void *fdt) 45 + { 46 + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; 47 + 48 + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { 49 + int node; 50 + const u8 *prop; 51 + 52 + node = fdt_path_offset(fdt, "/chosen"); 53 + if (node < 0) 54 + goto out; 55 + 56 + prop = fdt_getprop(fdt, node, "bootargs", NULL); 57 + if (!prop) 58 + goto out; 59 + return prop; 60 + } 61 + out: 62 + return default_cmdline; 63 + } 64 + 65 + extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, 66 + pgprot_t prot); 67 + 68 + /* 69 + * This routine will be executed with the kernel mapped at its default virtual 70 + * address, and if it returns successfully, the kernel will be remapped, and 71 + * start_kernel() will be executed from a randomized virtual offset. The 72 + * relocation will result in all absolute references (e.g., static variables 73 + * containing function pointers) to be reinitialized, and zero-initialized 74 + * .bss variables will be reset to 0. 75 + */ 76 + u64 __init kaslr_early_init(u64 dt_phys) 77 + { 78 + void *fdt; 79 + u64 seed, offset, mask, module_range; 80 + const u8 *cmdline, *str; 81 + int size; 82 + 83 + /* 84 + * Set a reasonable default for module_alloc_base in case 85 + * we end up running with module randomization disabled. 86 + */ 87 + module_alloc_base = (u64)_etext - MODULES_VSIZE; 88 + 89 + /* 90 + * Try to map the FDT early. If this fails, we simply bail, 91 + * and proceed with KASLR disabled. We will make another 92 + * attempt at mapping the FDT in setup_machine() 93 + */ 94 + early_fixmap_init(); 95 + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); 96 + if (!fdt) 97 + return 0; 98 + 99 + /* 100 + * Retrieve (and wipe) the seed from the FDT 101 + */ 102 + seed = get_kaslr_seed(fdt); 103 + if (!seed) 104 + return 0; 105 + 106 + /* 107 + * Check if 'nokaslr' appears on the command line, and 108 + * return 0 if that is the case. 109 + */ 110 + cmdline = get_cmdline(fdt); 111 + str = strstr(cmdline, "nokaslr"); 112 + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) 113 + return 0; 114 + 115 + /* 116 + * OK, so we are proceeding with KASLR enabled. Calculate a suitable 117 + * kernel image offset from the seed. Let's place the kernel in the 118 + * lower half of the VMALLOC area (VA_BITS - 2). 119 + * Even if we could randomize at page granularity for 16k and 64k pages, 120 + * let's always round to 2 MB so we don't interfere with the ability to 121 + * map using contiguous PTEs 122 + */ 123 + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); 124 + offset = seed & mask; 125 + 126 + /* 127 + * The kernel Image should not extend across a 1GB/32MB/512MB alignment 128 + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this 129 + * happens, increase the KASLR offset by the size of the kernel image. 130 + */ 131 + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != 132 + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) 133 + offset = (offset + (u64)(_end - _text)) & mask; 134 + 135 + if (IS_ENABLED(CONFIG_KASAN)) 136 + /* 137 + * KASAN does not expect the module region to intersect the 138 + * vmalloc region, since shadow memory is allocated for each 139 + * module at load time, whereas the vmalloc region is shadowed 140 + * by KASAN zero pages. So keep modules out of the vmalloc 141 + * region if KASAN is enabled. 142 + */ 143 + return offset; 144 + 145 + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { 146 + /* 147 + * Randomize the module region independently from the core 148 + * kernel. This prevents modules from leaking any information 149 + * about the address of the kernel itself, but results in 150 + * branches between modules and the core kernel that are 151 + * resolved via PLTs. (Branches between modules will be 152 + * resolved normally.) 153 + */ 154 + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; 155 + module_alloc_base = VMALLOC_START; 156 + } else { 157 + /* 158 + * Randomize the module region by setting module_alloc_base to 159 + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, 160 + * _stext) . This guarantees that the resulting region still 161 + * covers [_stext, _etext], and that all relative branches can 162 + * be resolved without veneers. 163 + */ 164 + module_range = MODULES_VSIZE - (u64)(_etext - _stext); 165 + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; 166 + } 167 + 168 + /* use the lower 21 bits to randomize the base of the module region */ 169 + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; 170 + module_alloc_base &= PAGE_MASK; 171 + 172 + return offset; 173 + }
+2 -1
arch/arm64/kernel/module.c
··· 34 34 { 35 35 void *p; 36 36 37 - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, 37 + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, 38 + module_alloc_base + MODULES_VSIZE, 38 39 GFP_KERNEL, PAGE_KERNEL_EXEC, 0, 39 40 NUMA_NO_NODE, __builtin_return_address(0)); 40 41
+29
arch/arm64/kernel/setup.c
··· 388 388 return 0; 389 389 } 390 390 subsys_initcall(topology_init); 391 + 392 + /* 393 + * Dump out kernel offset information on panic. 394 + */ 395 + static int dump_kernel_offset(struct notifier_block *self, unsigned long v, 396 + void *p) 397 + { 398 + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; 399 + 400 + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { 401 + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", 402 + kaslr_offset, KIMAGE_VADDR); 403 + } else { 404 + pr_emerg("Kernel Offset: disabled\n"); 405 + } 406 + return 0; 407 + } 408 + 409 + static struct notifier_block kernel_offset_notifier = { 410 + .notifier_call = dump_kernel_offset 411 + }; 412 + 413 + static int __init register_kernel_offset_dumper(void) 414 + { 415 + atomic_notifier_chain_register(&panic_notifier_list, 416 + &kernel_offset_notifier); 417 + return 0; 418 + } 419 + __initcall(register_kernel_offset_dumper);
+14 -3
arch/arm64/mm/kasan_init.c
··· 129 129 void __init kasan_init(void) 130 130 { 131 131 u64 kimg_shadow_start, kimg_shadow_end; 132 + u64 mod_shadow_start, mod_shadow_end; 132 133 struct memblock_region *reg; 133 134 int i; 134 135 135 136 kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); 136 137 kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); 138 + 139 + mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); 140 + mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); 137 141 138 142 /* 139 143 * We are going to perform proper setup of shadow memory. ··· 162 158 * with PMD table mappings at the edges of the shadow region for the 163 159 * kernel image. 164 160 */ 165 - if (ARM64_SWAPPER_USES_SECTION_MAPS) 161 + if (ARM64_SWAPPER_USES_SECTION_MAPS) { 162 + kimg_shadow_start = round_down(kimg_shadow_start, 163 + SWAPPER_BLOCK_SIZE); 166 164 kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); 165 + } 167 166 168 167 kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, 169 - kasan_mem_to_shadow((void *)MODULES_VADDR)); 168 + (void *)mod_shadow_start); 170 169 kasan_populate_zero_shadow((void *)kimg_shadow_end, 171 - kasan_mem_to_shadow((void *)PAGE_OFFSET)); 170 + kasan_mem_to_shadow((void *)PAGE_OFFSET)); 171 + 172 + if (kimg_shadow_start > mod_shadow_end) 173 + kasan_populate_zero_shadow((void *)mod_shadow_end, 174 + (void *)kimg_shadow_start); 172 175 173 176 for_each_memblock(memory, reg) { 174 177 void *start = (void *)__phys_to_virt(reg->base);
+20 -9
arch/arm64/mm/mmu.c
··· 678 678 unsigned long addr = FIXADDR_START; 679 679 680 680 pgd = pgd_offset_k(addr); 681 - if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) { 681 + if (CONFIG_PGTABLE_LEVELS > 3 && 682 + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { 682 683 /* 683 684 * We only end up here if the kernel mapping and the fixmap 684 685 * share the top level pgd entry, which should only happen on ··· 736 735 } 737 736 } 738 737 739 - void *__init fixmap_remap_fdt(phys_addr_t dt_phys) 738 + void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) 740 739 { 741 740 const u64 dt_virt_base = __fix_to_virt(FIX_FDT); 742 - pgprot_t prot = PAGE_KERNEL_RO; 743 - int size, offset; 741 + int offset; 744 742 void *dt_virt; 745 743 746 744 /* ··· 778 778 if (fdt_check_header(dt_virt) != 0) 779 779 return NULL; 780 780 781 - size = fdt_totalsize(dt_virt); 782 - if (size > MAX_FDT_SIZE) 781 + *size = fdt_totalsize(dt_virt); 782 + if (*size > MAX_FDT_SIZE) 783 783 return NULL; 784 784 785 - if (offset + size > SWAPPER_BLOCK_SIZE) 785 + if (offset + *size > SWAPPER_BLOCK_SIZE) 786 786 create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, 787 - round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); 787 + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); 788 + 789 + return dt_virt; 790 + } 791 + 792 + void *__init fixmap_remap_fdt(phys_addr_t dt_phys) 793 + { 794 + void *dt_virt; 795 + int size; 796 + 797 + dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); 798 + if (!dt_virt) 799 + return NULL; 788 800 789 801 memblock_reserve(dt_phys, size); 790 - 791 802 return dt_virt; 792 803 } 793 804