Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

riscv: Introduce CONFIG_RELOCATABLE

This config allows to compile 64b kernel as PIE and to relocate it at
any virtual address at runtime: this paves the way to KASLR.
Runtime relocation is possible since relocation metadata are embedded into
the kernel.

Note that relocating at runtime introduces an overhead even if the
kernel is loaded at the same address it was linked at and that the compiler
options are those used in arm64 which uses the same RELA relocation
format.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20230329045329.64565-4-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

authored by

Alexandre Ghiti and committed by
Palmer Dabbelt
39b33072 69a90d2f

+91 -5
+14
arch/riscv/Kconfig
··· 561 561 562 562 If you want to execute 32-bit userspace applications, say Y. 563 563 564 + config RELOCATABLE 565 + bool "Build a relocatable kernel" 566 + depends on MMU && 64BIT && !XIP_KERNEL 567 + help 568 + This builds a kernel as a Position Independent Executable (PIE), 569 + which retains all relocation metadata required to relocate the 570 + kernel binary at runtime to a different virtual address than the 571 + address it was linked at. 572 + Since RISCV uses the RELA relocation format, this requires a 573 + relocation pass at runtime even if the kernel is loaded at the 574 + same address it was linked at. 575 + 576 + If unsure, say N. 577 + 564 578 endmenu # "Kernel features" 565 579 566 580 menu "Boot options"
+5 -2
arch/riscv/Makefile
··· 7 7 # 8 8 9 9 OBJCOPYFLAGS := -O binary 10 - LDFLAGS_vmlinux := 10 + ifeq ($(CONFIG_RELOCATABLE),y) 11 + LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro 12 + KBUILD_CFLAGS += -fPIE 13 + endif 11 14 ifeq ($(CONFIG_DYNAMIC_FTRACE),y) 12 - LDFLAGS_vmlinux := --no-relax 15 + LDFLAGS_vmlinux += --no-relax 13 16 KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY 14 17 ifeq ($(CONFIG_RISCV_ISA_C),y) 15 18 CC_FLAGS_FTRACE := -fpatchable-function-entry=4
+15 -2
arch/riscv/kernel/vmlinux.lds.S
··· 122 122 *(.sdata*) 123 123 } 124 124 125 - .rela.dyn : { 126 - *(.rela*) 125 + .rela.dyn : ALIGN(8) { 126 + __rela_dyn_start = .; 127 + *(.rela .rela*) 128 + __rela_dyn_end = .; 127 129 } 130 + 131 + #ifdef CONFIG_RELOCATABLE 132 + .data.rel : { *(.data.rel*) } 133 + .got : { *(.got*) } 134 + .plt : { *(.plt) } 135 + .dynamic : { *(.dynamic) } 136 + .dynsym : { *(.dynsym) } 137 + .dynstr : { *(.dynstr) } 138 + .hash : { *(.hash) } 139 + .gnu.hash : { *(.gnu.hash) } 140 + #endif 128 141 129 142 #ifdef CONFIG_EFI 130 143 .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+4
arch/riscv/mm/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 3 3 CFLAGS_init.o := -mcmodel=medany 4 + ifdef CONFIG_RELOCATABLE 5 + CFLAGS_init.o += -fno-pie 6 + endif 7 + 4 8 ifdef CONFIG_FTRACE 5 9 CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) 6 10 CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
+53 -1
arch/riscv/mm/init.c
··· 20 20 #include <linux/dma-map-ops.h> 21 21 #include <linux/crash_dump.h> 22 22 #include <linux/hugetlb.h> 23 + #ifdef CONFIG_RELOCATABLE 24 + #include <linux/elf.h> 25 + #endif 23 26 24 27 #include <asm/fixmap.h> 25 28 #include <asm/tlbflush.h> ··· 149 146 print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); 150 147 #endif 151 148 152 - print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, 149 + print_ml("kernel", (unsigned long)kernel_map.virt_addr, 153 150 (unsigned long)ADDRESS_SPACE_END); 154 151 } 155 152 } ··· 823 820 #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." 824 821 #endif 825 822 823 + #ifdef CONFIG_RELOCATABLE 824 + extern unsigned long __rela_dyn_start, __rela_dyn_end; 825 + 826 + static void __init relocate_kernel(void) 827 + { 828 + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; 829 + /* 830 + * This holds the offset between the linked virtual address and the 831 + * relocated virtual address. 832 + */ 833 + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; 834 + /* 835 + * This holds the offset between kernel linked virtual address and 836 + * physical address. 837 + */ 838 + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; 839 + 840 + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { 841 + Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); 842 + Elf64_Addr relocated_addr = rela->r_addend; 843 + 844 + if (rela->r_info != R_RISCV_RELATIVE) 845 + continue; 846 + 847 + /* 848 + * Make sure to not relocate vdso symbols like rt_sigreturn 849 + * which are linked from the address 0 in vmlinux since 850 + * vdso symbol addresses are actually used as an offset from 851 + * mm->context.vdso in VDSO_OFFSET macro. 852 + */ 853 + if (relocated_addr >= KERNEL_LINK_ADDR) 854 + relocated_addr += reloc_offset; 855 + 856 + *(Elf64_Addr *)addr = relocated_addr; 857 + } 858 + } 859 + #endif /* CONFIG_RELOCATABLE */ 860 + 826 861 #ifdef CONFIG_XIP_KERNEL 827 862 static void __init create_kernel_page_table(pgd_t *pgdir, 828 863 __always_unused bool early) ··· 1046 1005 * of IS_ERR_VALUE macro. 1047 1006 */ 1048 1007 BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); 1008 + #endif 1009 + 1010 + #ifdef CONFIG_RELOCATABLE 1011 + /* 1012 + * Early page table uses only one PUD, which makes it possible 1013 + * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset 1014 + * makes the kernel cross over a PUD_SIZE boundary, raise a bug 1015 + * since a part of the kernel would not get mapped. 1016 + */ 1017 + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); 1018 + relocate_kernel(); 1049 1019 #endif 1050 1020 1051 1021 apply_early_boot_alternatives();