Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-next/boot' into for-next/core

* for-next/boot: (34 commits)
arm64: fix KASAN_INLINE
arm64: Add an override for ID_AA64SMFR0_EL1.FA64
arm64: Add the arm64.nosve command line option
arm64: Add the arm64.nosme command line option
arm64: Expose a __check_override primitive for oddball features
arm64: Allow the idreg override to deal with variable field width
arm64: Factor out checking of a feature against the override into a macro
arm64: Allow sticky E2H when entering EL1
arm64: Save state of HCR_EL2.E2H before switch to EL1
arm64: Rename the VHE switch to "finalise_el2"
arm64: mm: fix booting with 52-bit address space
arm64: head: remove __PHYS_OFFSET
arm64: lds: use PROVIDE instead of conditional definitions
arm64: setup: drop early FDT pointer helpers
arm64: head: avoid relocating the kernel twice for KASLR
arm64: kaslr: defer initialization to initcall where permitted
arm64: head: record CPU boot mode after enabling the MMU
arm64: head: populate kernel page tables with MMU and caches on
arm64: head: factor out TTBR1 assignment into a macro
arm64: idreg-override: use early FDT mapping in ID map
...

+753 -615
+6
Documentation/admin-guide/kernel-parameters.txt
··· 400 400 arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension 401 401 support 402 402 403 + arm64.nosve [ARM64] Unconditionally disable Scalable Vector 404 + Extension support 405 + 406 + arm64.nosme [ARM64] Unconditionally disable Scalable Matrix 407 + Extension support 408 + 403 409 ataflop= [HW,M68k] 404 410 405 411 atarimouse= [HW,MOUSE] Atari Mouse
+6 -5
Documentation/virt/kvm/arm/hyp-abi.rst
··· 60 60 61 61 * :: 62 62 63 - x0 = HVC_VHE_RESTART (arm64 only) 63 + x0 = HVC_FINALISE_EL2 (arm64 only) 64 64 65 - Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling 66 - the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU 67 - being off, and VHE not being disabled by any other means (command line 68 - option, for example). 65 + Finish configuring EL2 depending on the command-line options, 66 + including an attempt to upgrade the kernel's exception level from 67 + EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU 68 + supporting VHE, the EL2 MMU being off, and VHE not being disabled by 69 + any other means (command line option, for example). 69 70 70 71 Any other value of r0/x0 triggers a hypervisor-specific handling, 71 72 which is not documented here.
+27 -4
arch/arm64/include/asm/assembler.h
··· 360 360 .endm 361 361 362 362 /* 363 + * idmap_get_t0sz - get the T0SZ value needed to cover the ID map 364 + * 365 + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the 366 + * entire ID map region can be mapped. As T0SZ == (64 - #bits used), 367 + * this number conveniently equals the number of leading zeroes in 368 + * the physical address of _end. 369 + */ 370 + .macro idmap_get_t0sz, reg 371 + adrp \reg, _end 372 + orr \reg, \reg, #(1 << VA_BITS_MIN) - 1 373 + clz \reg, \reg 374 + .endm 375 + 376 + /* 363 377 * tcr_compute_pa_size - set TCR.(I)PS to the highest supported 364 378 * ID_AA64MMFR0_EL1.PARange value 365 379 * ··· 480 466 .endm 481 467 482 468 /* 469 + * load_ttbr1 - install @pgtbl as a TTBR1 page table 470 + * pgtbl preserved 471 + * tmp1/tmp2 clobbered, either may overlap with pgtbl 472 + */ 473 + .macro load_ttbr1, pgtbl, tmp1, tmp2 474 + phys_to_ttbr \tmp1, \pgtbl 475 + offset_ttbr1 \tmp1, \tmp2 476 + msr ttbr1_el1, \tmp1 477 + isb 478 + .endm 479 + 480 + /* 483 481 * To prevent the possibility of old and new partial table walks being visible 484 482 * in the tlb, switch the ttbr to a zero page when we invalidate the old 485 483 * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i ··· 504 478 isb 505 479 tlbi vmalle1 506 480 dsb nsh 507 - phys_to_ttbr \tmp, \page_table 508 - offset_ttbr1 \tmp, \tmp2 509 - msr ttbr1_el1, \tmp 510 - isb 481 + load_ttbr1 \page_table, \tmp, \tmp2 511 482 .endm 512 483 513 484 /*
+3
arch/arm64/include/asm/cpufeature.h
··· 908 908 } 909 909 910 910 extern struct arm64_ftr_override id_aa64mmfr1_override; 911 + extern struct arm64_ftr_override id_aa64pfr0_override; 911 912 extern struct arm64_ftr_override id_aa64pfr1_override; 913 + extern struct arm64_ftr_override id_aa64zfr0_override; 914 + extern struct arm64_ftr_override id_aa64smfr0_override; 912 915 extern struct arm64_ftr_override id_aa64isar1_override; 913 916 extern struct arm64_ftr_override id_aa64isar2_override; 914 917
-60
arch/arm64/include/asm/el2_setup.h
··· 129 129 msr cptr_el2, x0 // Disable copro. traps to EL2 130 130 .endm 131 131 132 - /* SVE register access */ 133 - .macro __init_el2_nvhe_sve 134 - mrs x1, id_aa64pfr0_el1 135 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 136 - cbz x1, .Lskip_sve_\@ 137 - 138 - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps 139 - msr cptr_el2, x0 // Disable copro. traps to EL2 140 - isb 141 - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector 142 - msr_s SYS_ZCR_EL2, x1 // length for EL1. 143 - .Lskip_sve_\@: 144 - .endm 145 - 146 - /* SME register access and priority mapping */ 147 - .macro __init_el2_nvhe_sme 148 - mrs x1, id_aa64pfr1_el1 149 - ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4 150 - cbz x1, .Lskip_sme_\@ 151 - 152 - bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps 153 - msr cptr_el2, x0 // Disable copro. traps to EL2 154 - isb 155 - 156 - mrs x1, sctlr_el2 157 - orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps 158 - msr sctlr_el2, x1 159 - isb 160 - 161 - mov x1, #0 // SMCR controls 162 - 163 - mrs_s x2, SYS_ID_AA64SMFR0_EL1 164 - ubfx x2, x2, #ID_AA64SMFR0_EL1_FA64_SHIFT, #1 // Full FP in SM? 165 - cbz x2, .Lskip_sme_fa64_\@ 166 - 167 - orr x1, x1, SMCR_ELx_FA64_MASK 168 - .Lskip_sme_fa64_\@: 169 - 170 - orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector 171 - msr_s SYS_SMCR_EL2, x1 // length for EL1. 172 - 173 - mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? 174 - ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 175 - cbz x1, .Lskip_sme_\@ 176 - 177 - msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal 178 - 179 - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? 180 - ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 181 - cbz x1, .Lskip_sme_\@ 182 - 183 - mrs_s x1, SYS_HCRX_EL2 184 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping 185 - msr_s SYS_HCRX_EL2, x1 186 - 187 - .Lskip_sme_\@: 188 - .endm 189 - 190 132 /* Disable any fine grained traps */ 191 133 .macro __init_el2_fgt 192 134 mrs x1, id_aa64mmfr0_el1 ··· 192 250 __init_el2_hstr 193 251 __init_el2_nvhe_idregs 194 252 __init_el2_nvhe_cptr 195 - __init_el2_nvhe_sve 196 - __init_el2_nvhe_sme 197 253 __init_el2_fgt 198 254 __init_el2_nvhe_prepare_eret 199 255 .endm
+13 -5
arch/arm64/include/asm/kernel-pgtable.h
··· 8 8 #ifndef __ASM_KERNEL_PGTABLE_H 9 9 #define __ASM_KERNEL_PGTABLE_H 10 10 11 + #include <asm/boot.h> 11 12 #include <asm/pgtable-hwdef.h> 12 13 #include <asm/sparsemem.h> 13 14 ··· 36 35 */ 37 36 #if ARM64_KERNEL_USES_PMD_MAPS 38 37 #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) 39 - #define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) 40 38 #else 41 39 #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) 42 - #define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) 43 40 #endif 44 41 45 42 ··· 86 87 + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ 87 88 + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ 88 89 #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) 89 - #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) 90 + 91 + /* the initial ID map may need two extra pages if it needs to be extended */ 92 + #if VA_BITS < 48 93 + #define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) 94 + #else 95 + #define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) 96 + #endif 97 + #define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE) 90 98 91 99 /* Initial memory map size */ 92 100 #if ARM64_KERNEL_USES_PMD_MAPS ··· 113 107 #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) 114 108 115 109 #if ARM64_KERNEL_USES_PMD_MAPS 116 - #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) 110 + #define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) 111 + #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) 117 112 #else 118 - #define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) 113 + #define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) 114 + #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) 119 115 #endif 120 116 121 117 /*
+4
arch/arm64/include/asm/memory.h
··· 174 174 #include <linux/types.h> 175 175 #include <asm/bug.h> 176 176 177 + #if VA_BITS > 48 177 178 extern u64 vabits_actual; 179 + #else 180 + #define vabits_actual ((u64)VA_BITS) 181 + #endif 178 182 179 183 extern s64 memstart_addr; 180 184 /* PHYS_OFFSET - the physical address of the start of memory. */
+10 -6
arch/arm64/include/asm/mmu_context.h
··· 60 60 * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in 61 61 * physical memory, in which case it will be smaller. 62 62 */ 63 - extern u64 idmap_t0sz; 64 - extern u64 idmap_ptrs_per_pgd; 63 + extern int idmap_t0sz; 65 64 66 65 /* 67 66 * Ensure TCR.T0SZ is set to the provided value. ··· 105 106 cpu_switch_mm(mm->pgd, mm); 106 107 } 107 108 108 - static inline void cpu_install_idmap(void) 109 + static inline void __cpu_install_idmap(pgd_t *idmap) 109 110 { 110 111 cpu_set_reserved_ttbr0(); 111 112 local_flush_tlb_all(); 112 113 cpu_set_idmap_tcr_t0sz(); 113 114 114 - cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); 115 + cpu_switch_mm(lm_alias(idmap), &init_mm); 116 + } 117 + 118 + static inline void cpu_install_idmap(void) 119 + { 120 + __cpu_install_idmap(idmap_pg_dir); 115 121 } 116 122 117 123 /* ··· 147 143 * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, 148 144 * avoiding the possibility of conflicting TLB entries being allocated. 149 145 */ 150 - static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp) 146 + static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) 151 147 { 152 148 typedef void (ttbr_replace_func)(phys_addr_t); 153 149 extern ttbr_replace_func idmap_cpu_replace_ttbr1; ··· 170 166 171 167 replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1)); 172 168 173 - cpu_install_idmap(); 169 + __cpu_install_idmap(idmap); 174 170 replace_phys(ttbr1); 175 171 cpu_uninstall_idmap(); 176 172 }
+9 -2
arch/arm64/include/asm/virt.h
··· 36 36 #define HVC_RESET_VECTORS 2 37 37 38 38 /* 39 - * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible 39 + * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible 40 40 */ 41 - #define HVC_VHE_RESTART 3 41 + #define HVC_FINALISE_EL2 3 42 42 43 43 /* Max number of HYP stub hypercalls */ 44 44 #define HVC_STUB_HCALL_NR 4 ··· 48 48 49 49 #define BOOT_CPU_MODE_EL1 (0xe11) 50 50 #define BOOT_CPU_MODE_EL2 (0xe12) 51 + 52 + /* 53 + * Flags returned together with the boot mode, but not preserved in 54 + * __boot_cpu_mode. Used by the idreg override code to work out the 55 + * boot state. 56 + */ 57 + #define BOOT_CPU_FLAG_E2H BIT_ULL(32) 51 58 52 59 #ifndef __ASSEMBLY__ 53 60
+1 -1
arch/arm64/kernel/Makefile
··· 64 64 obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o 65 65 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o 66 66 obj-$(CONFIG_PARAVIRT) += paravirt.o 67 - obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 67 + obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/ 68 68 obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o 69 69 obj-$(CONFIG_ELF_CORE) += elfcore.o 70 70 obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
+10 -4
arch/arm64/kernel/cpufeature.c
··· 633 633 __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) 634 634 635 635 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; 636 + struct arm64_ftr_override __ro_after_init id_aa64pfr0_override; 636 637 struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; 638 + struct arm64_ftr_override __ro_after_init id_aa64zfr0_override; 639 + struct arm64_ftr_override __ro_after_init id_aa64smfr0_override; 637 640 struct arm64_ftr_override __ro_after_init id_aa64isar1_override; 638 641 struct arm64_ftr_override __ro_after_init id_aa64isar2_override; 639 642 ··· 673 670 ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), 674 671 675 672 /* Op1 = 0, CRn = 0, CRm = 4 */ 676 - ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), 673 + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0, 674 + &id_aa64pfr0_override), 677 675 ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, 678 676 &id_aa64pfr1_override), 679 - ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), 680 - ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0), 677 + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0, 678 + &id_aa64zfr0_override), 679 + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0, 680 + &id_aa64smfr0_override), 681 681 682 682 /* Op1 = 0, CRn = 0, CRm = 5 */ 683 683 ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), ··· 3301 3295 3302 3296 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) 3303 3297 { 3304 - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); 3298 + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); 3305 3299 } 3306 3300 3307 3301 /*
+229 -304
arch/arm64/kernel/head.S
··· 37 37 38 38 #include "efi-header.S" 39 39 40 - #define __PHYS_OFFSET KERNEL_START 41 - 42 40 #if (PAGE_OFFSET & 0x1fffff) != 0 43 41 #error PAGE_OFFSET must be at least 2MB aligned 44 42 #endif ··· 48 50 * The requirements are: 49 51 * MMU = off, D-cache = off, I-cache = on or off, 50 52 * x0 = physical address to the FDT blob. 51 - * 52 - * This code is mostly position independent so you call this at 53 - * __pa(PAGE_OFFSET). 54 53 * 55 54 * Note that the callee-saved registers are used for storing variables 56 55 * that are useful before the MMU is enabled. The allocations are described ··· 77 82 * primary lowlevel boot path: 78 83 * 79 84 * Register Scope Purpose 85 + * x20 primary_entry() .. __primary_switch() CPU boot mode 80 86 * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 87 + * x22 create_idmap() .. start_kernel() ID map VA of the DT blob 81 88 * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset 82 - * x28 __create_page_tables() callee preserved temp register 83 - * x19/x20 __primary_switch() callee preserved temp registers 84 - * x24 __primary_switch() .. relocate_kernel() current RELR displacement 89 + * x24 __primary_switch() linear map KASLR seed 90 + * x25 primary_entry() .. start_kernel() supported VA size 91 + * x28 create_idmap() callee preserved temp register 85 92 */ 86 93 SYM_CODE_START(primary_entry) 87 94 bl preserve_boot_args 88 95 bl init_kernel_el // w0=cpu_boot_mode 89 - adrp x23, __PHYS_OFFSET 90 - and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 91 - bl set_cpu_boot_mode_flag 92 - bl __create_page_tables 96 + mov x20, x0 97 + bl create_idmap 98 + 93 99 /* 94 100 * The following calls CPU setup code, see arch/arm64/mm/proc.S for 95 101 * details. 96 102 * On return, the CPU will be ready for the MMU to be turned on and 97 103 * the TCR will have been set. 98 104 */ 105 + #if VA_BITS > 48 106 + mrs_s x0, SYS_ID_AA64MMFR2_EL1 107 + tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT 108 + mov x0, #VA_BITS 109 + mov x25, #VA_BITS_MIN 110 + csel x25, x25, x0, eq 111 + mov x0, x25 112 + #endif 99 113 bl __cpu_setup // initialise processor 100 114 b __primary_switch 101 115 SYM_CODE_END(primary_entry) ··· 126 122 b dcache_inval_poc // tail call 127 123 SYM_CODE_END(preserve_boot_args) 128 124 129 - /* 130 - * Macro to create a table entry to the next page. 131 - * 132 - * tbl: page table address 133 - * virt: virtual address 134 - * shift: #imm page table shift 135 - * ptrs: #imm pointers per table page 136 - * 137 - * Preserves: virt 138 - * Corrupts: ptrs, tmp1, tmp2 139 - * Returns: tbl -> next level table page address 140 - */ 141 - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 142 - add \tmp1, \tbl, #PAGE_SIZE 143 - phys_to_pte \tmp2, \tmp1 144 - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type 145 - lsr \tmp1, \virt, #\shift 146 - sub \ptrs, \ptrs, #1 147 - and \tmp1, \tmp1, \ptrs // table index 148 - str \tmp2, [\tbl, \tmp1, lsl #3] 149 - add \tbl, \tbl, #PAGE_SIZE // next level table page 150 - .endm 125 + SYM_FUNC_START_LOCAL(clear_page_tables) 126 + /* 127 + * Clear the init page tables. 128 + */ 129 + adrp x0, init_pg_dir 130 + adrp x1, init_pg_end 131 + sub x2, x1, x0 132 + mov x1, xzr 133 + b __pi_memset // tail call 134 + SYM_FUNC_END(clear_page_tables) 151 135 152 136 /* 153 137 * Macro to populate page table entries, these entries can be pointers to the next level ··· 171 179 * vstart: virtual address of start of range 172 180 * vend: virtual address of end of range - we map [vstart, vend] 173 181 * shift: shift used to transform virtual address into index 174 - * ptrs: number of entries in page table 182 + * order: #imm 2log(number of entries in page table) 175 183 * istart: index in table corresponding to vstart 176 184 * iend: index in table corresponding to vend 177 185 * count: On entry: how many extra entries were required in previous level, scales 178 186 * our end index. 179 187 * On exit: returns how many extra entries required for next page table level 180 188 * 181 - * Preserves: vstart, vend, shift, ptrs 189 + * Preserves: vstart, vend 182 190 * Returns: istart, iend, count 183 191 */ 184 - .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count 185 - lsr \iend, \vend, \shift 186 - mov \istart, \ptrs 187 - sub \istart, \istart, #1 188 - and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) 189 - mov \istart, \ptrs 190 - mul \istart, \istart, \count 191 - add \iend, \iend, \istart // iend += count * ptrs 192 - // our entries span multiple tables 193 - 194 - lsr \istart, \vstart, \shift 195 - mov \count, \ptrs 196 - sub \count, \count, #1 197 - and \istart, \istart, \count 198 - 192 + .macro compute_indices, vstart, vend, shift, order, istart, iend, count 193 + ubfx \istart, \vstart, \shift, \order 194 + ubfx \iend, \vend, \shift, \order 195 + add \iend, \iend, \count, lsl \order 199 196 sub \count, \iend, \istart 200 197 .endm 201 198 ··· 199 218 * vend: virtual address of end of range - we map [vstart, vend - 1] 200 219 * flags: flags to use to map last level entries 201 220 * phys: physical address corresponding to vstart - physical memory is contiguous 202 - * pgds: the number of pgd entries 221 + * order: #imm 2log(number of entries in PGD table) 222 + * 223 + * If extra_shift is set, an extra level will be populated if the end address does 224 + * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range. 203 225 * 204 226 * Temporaries: istart, iend, tmp, count, sv - these need to be different registers 205 227 * Preserves: vstart, flags 206 228 * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv 207 229 */ 208 - .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv 230 + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift 209 231 sub \vend, \vend, #1 210 232 add \rtbl, \tbl, #PAGE_SIZE 211 - mov \sv, \rtbl 212 233 mov \count, #0 213 - compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count 234 + 235 + .ifnb \extra_shift 236 + tst \vend, #~((1 << (\extra_shift)) - 1) 237 + b.eq .L_\@ 238 + compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count 239 + mov \sv, \rtbl 214 240 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp 215 241 mov \tbl, \sv 242 + .endif 243 + .L_\@: 244 + compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count 216 245 mov \sv, \rtbl 246 + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp 247 + mov \tbl, \sv 217 248 218 249 #if SWAPPER_PGTABLE_LEVELS > 3 219 - compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count 250 + compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count 251 + mov \sv, \rtbl 220 252 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp 221 253 mov \tbl, \sv 222 - mov \sv, \rtbl 223 254 #endif 224 255 225 256 #if SWAPPER_PGTABLE_LEVELS > 2 226 - compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count 257 + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count 258 + mov \sv, \rtbl 227 259 populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp 228 260 mov \tbl, \sv 229 261 #endif 230 262 231 - compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count 232 - bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 233 - populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp 263 + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count 264 + bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1 265 + populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp 234 266 .endm 235 267 236 268 /* 237 - * Setup the initial page tables. We only setup the barest amount which is 238 - * required to get the kernel running. The following sections are required: 239 - * - identity mapping to enable the MMU (low address, TTBR0) 240 - * - first few MB of the kernel linear mapping to jump to once the MMU has 241 - * been enabled 269 + * Remap a subregion created with the map_memory macro with modified attributes 270 + * or output address. The entire remapped region must have been covered in the 271 + * invocation of map_memory. 272 + * 273 + * x0: last level table address (returned in first argument to map_memory) 274 + * x1: start VA of the existing mapping 275 + * x2: start VA of the region to update 276 + * x3: end VA of the region to update (exclusive) 277 + * x4: start PA associated with the region to update 278 + * x5: attributes to set on the updated region 279 + * x6: order of the last level mappings 242 280 */ 243 - SYM_FUNC_START_LOCAL(__create_page_tables) 281 + SYM_FUNC_START_LOCAL(remap_region) 282 + sub x3, x3, #1 // make end inclusive 283 + 284 + // Get the index offset for the start of the last level table 285 + lsr x1, x1, x6 286 + bfi x1, xzr, #0, #PAGE_SHIFT - 3 287 + 288 + // Derive the start and end indexes into the last level table 289 + // associated with the provided region 290 + lsr x2, x2, x6 291 + lsr x3, x3, x6 292 + sub x2, x2, x1 293 + sub x3, x3, x1 294 + 295 + mov x1, #1 296 + lsl x6, x1, x6 // block size at this level 297 + 298 + populate_entries x0, x4, x2, x3, x5, x6, x7 299 + ret 300 + SYM_FUNC_END(remap_region) 301 + 302 + SYM_FUNC_START_LOCAL(create_idmap) 244 303 mov x28, lr 245 - 246 304 /* 247 - * Invalidate the init page tables to avoid potential dirty cache lines 248 - * being evicted. Other page tables are allocated in rodata as part of 249 - * the kernel image, and thus are clean to the PoC per the boot 250 - * protocol. 251 - */ 252 - adrp x0, init_pg_dir 253 - adrp x1, init_pg_end 254 - bl dcache_inval_poc 255 - 256 - /* 257 - * Clear the init page tables. 258 - */ 259 - adrp x0, init_pg_dir 260 - adrp x1, init_pg_end 261 - sub x1, x1, x0 262 - 1: stp xzr, xzr, [x0], #16 263 - stp xzr, xzr, [x0], #16 264 - stp xzr, xzr, [x0], #16 265 - stp xzr, xzr, [x0], #16 266 - subs x1, x1, #64 267 - b.ne 1b 268 - 269 - mov x7, SWAPPER_MM_MMUFLAGS 270 - 271 - /* 272 - * Create the identity mapping. 273 - */ 274 - adrp x0, idmap_pg_dir 275 - adrp x3, __idmap_text_start // __pa(__idmap_text_start) 276 - 277 - #ifdef CONFIG_ARM64_VA_BITS_52 278 - mrs_s x6, SYS_ID_AA64MMFR2_EL1 279 - and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) 280 - mov x5, #52 281 - cbnz x6, 1f 282 - #endif 283 - mov x5, #VA_BITS_MIN 284 - 1: 285 - adr_l x6, vabits_actual 286 - str x5, [x6] 287 - dmb sy 288 - dc ivac, x6 // Invalidate potentially stale cache line 289 - 290 - /* 291 - * VA_BITS may be too small to allow for an ID mapping to be created 292 - * that covers system RAM if that is located sufficiently high in the 293 - * physical address space. So for the ID map, use an extended virtual 294 - * range in that case, and configure an additional translation level 295 - * if needed. 305 + * The ID map carries a 1:1 mapping of the physical address range 306 + * covered by the loaded image, which could be anywhere in DRAM. This 307 + * means that the required size of the VA (== PA) space is decided at 308 + * boot time, and could be more than the configured size of the VA 309 + * space for ordinary kernel and user space mappings. 296 310 * 297 - * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the 298 - * entire ID map region can be mapped. As T0SZ == (64 - #bits used), 299 - * this number conveniently equals the number of leading zeroes in 300 - * the physical address of __idmap_text_end. 311 + * There are three cases to consider here: 312 + * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover 313 + * the placement of the image. In this case, we configure one extra 314 + * level of translation on the fly for the ID map only. (This case 315 + * also covers 42-bit VA/52-bit PA on 64k pages). 316 + * 317 + * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can 318 + * only happen when using 64k pages, in which case we need to extend 319 + * the root level table rather than add a level. Note that we can 320 + * treat this case as 'always extended' as long as we take care not 321 + * to program an unsupported T0SZ value into the TCR register. 322 + * 323 + * - Combinations that would require two additional levels of 324 + * translation are not supported, e.g., VA_BITS==36 on 16k pages, or 325 + * VA_BITS==39/4k pages with 5-level paging, where the input address 326 + * requires more than 47 or 48 bits, respectively. 301 327 */ 302 - adrp x5, __idmap_text_end 303 - clz x5, x5 304 - cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? 305 - b.ge 1f // .. then skip VA range extension 306 - 307 - adr_l x6, idmap_t0sz 308 - str x5, [x6] 309 - dmb sy 310 - dc ivac, x6 // Invalidate potentially stale cache line 311 - 312 328 #if (VA_BITS < 48) 329 + #define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) 313 330 #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) 314 - #define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) 315 331 316 332 /* 317 333 * If VA_BITS < 48, we have to configure an additional table level. ··· 320 342 #if VA_BITS != EXTRA_SHIFT 321 343 #error "Mismatch between VA_BITS and page size/number of translation levels" 322 344 #endif 323 - 324 - mov x4, EXTRA_PTRS 325 - create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 326 345 #else 346 + #define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) 347 + #define EXTRA_SHIFT 327 348 /* 328 349 * If VA_BITS == 48, we don't have to configure an additional 329 350 * translation level, but the top-level table has more entries. 330 351 */ 331 - mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) 332 - str_l x4, idmap_ptrs_per_pgd, x5 333 352 #endif 334 - 1: 335 - ldr_l x4, idmap_ptrs_per_pgd 336 - adr_l x6, __idmap_text_end // __pa(__idmap_text_end) 353 + adrp x0, init_idmap_pg_dir 354 + adrp x3, _text 355 + adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE 356 + mov x7, SWAPPER_RX_MMUFLAGS 337 357 338 - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 358 + map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT 339 359 340 - /* 341 - * Map the kernel image (starting with PHYS_OFFSET). 342 - */ 343 - adrp x0, init_pg_dir 344 - mov_q x5, KIMAGE_VADDR // compile time __va(_text) 345 - add x5, x5, x23 // add KASLR displacement 346 - mov x4, PTRS_PER_PGD 347 - adrp x6, _end // runtime __pa(_end) 348 - adrp x3, _text // runtime __pa(_text) 349 - sub x6, x6, x3 // _end - _text 350 - add x6, x6, x5 // runtime __va(_end) 360 + /* Remap the kernel page tables r/w in the ID map */ 361 + adrp x1, _text 362 + adrp x2, init_pg_dir 363 + adrp x3, init_pg_end 364 + bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 365 + mov x5, SWAPPER_RW_MMUFLAGS 366 + mov x6, #SWAPPER_BLOCK_SHIFT 367 + bl remap_region 351 368 352 - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 369 + /* Remap the FDT after the kernel image */ 370 + adrp x1, _text 371 + adrp x22, _end + SWAPPER_BLOCK_SIZE 372 + bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 373 + bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address 374 + add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE 375 + bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 376 + mov x5, SWAPPER_RW_MMUFLAGS 377 + mov x6, #SWAPPER_BLOCK_SHIFT 378 + bl remap_region 353 379 354 380 /* 355 381 * Since the page tables have been populated with non-cacheable ··· 362 380 */ 363 381 dmb sy 364 382 365 - adrp x0, idmap_pg_dir 366 - adrp x1, idmap_pg_end 383 + adrp x0, init_idmap_pg_dir 384 + adrp x1, init_idmap_pg_end 367 385 bl dcache_inval_poc 368 - 369 - adrp x0, init_pg_dir 370 - adrp x1, init_pg_end 371 - bl dcache_inval_poc 372 - 373 386 ret x28 374 - SYM_FUNC_END(__create_page_tables) 387 + SYM_FUNC_END(create_idmap) 388 + 389 + SYM_FUNC_START_LOCAL(create_kernel_mapping) 390 + adrp x0, init_pg_dir 391 + mov_q x5, KIMAGE_VADDR // compile time __va(_text) 392 + add x5, x5, x23 // add KASLR displacement 393 + adrp x6, _end // runtime __pa(_end) 394 + adrp x3, _text // runtime __pa(_text) 395 + sub x6, x6, x3 // _end - _text 396 + add x6, x6, x5 // runtime __va(_end) 397 + mov x7, SWAPPER_RW_MMUFLAGS 398 + 399 + map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 400 + 401 + dsb ishst // sync with page table walker 402 + ret 403 + SYM_FUNC_END(create_kernel_mapping) 375 404 376 405 /* 377 406 * Initialize CPU registers with task-specific and cpu-specific context. ··· 413 420 /* 414 421 * The following fragment of code is executed with the MMU enabled. 415 422 * 416 - * x0 = __PHYS_OFFSET 423 + * x0 = __pa(KERNEL_START) 417 424 */ 418 425 SYM_FUNC_START_LOCAL(__primary_switched) 419 426 adr_l x4, init_task ··· 432 439 sub x4, x4, x0 // the kernel virtual and 433 440 str_l x4, kimage_voffset, x5 // physical mappings 434 441 442 + mov x0, x20 443 + bl set_cpu_boot_mode_flag 444 + 435 445 // Clear BSS 436 446 adr_l x0, __bss_start 437 447 mov x1, xzr ··· 443 447 bl __pi_memset 444 448 dsb ishst // Make zero page visible to PTW 445 449 450 + #if VA_BITS > 48 451 + adr_l x8, vabits_actual // Set this early so KASAN early init 452 + str x25, [x8] // ... observes the correct value 453 + dc civac, x8 // Make visible to booting secondaries 454 + #endif 455 + 456 + #ifdef CONFIG_RANDOMIZE_BASE 457 + adrp x5, memstart_offset_seed // Save KASLR linear map seed 458 + strh w24, [x5, :lo12:memstart_offset_seed] 459 + #endif 446 460 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 447 461 bl kasan_early_init 448 462 #endif 449 463 mov x0, x21 // pass FDT address in x0 450 464 bl early_fdt_map // Try mapping the FDT early 465 + mov x0, x20 // pass the full boot status 451 466 bl init_feature_override // Parse cpu feature overrides 452 - #ifdef CONFIG_RANDOMIZE_BASE 453 - tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? 454 - b.ne 0f 455 - bl kaslr_early_init // parse FDT for KASLR options 456 - cbz x0, 0f // KASLR disabled? just proceed 457 - orr x23, x23, x0 // record KASLR offset 458 - ldp x29, x30, [sp], #16 // we must enable KASLR, return 459 - ret // to __primary_switch() 460 - 0: 461 - #endif 462 - bl switch_to_vhe // Prefer VHE if possible 467 + mov x0, x20 468 + bl finalise_el2 // Prefer VHE if possible 463 469 ldp x29, x30, [sp], #16 464 470 bl start_kernel 465 471 ASM_BUG() 466 472 SYM_FUNC_END(__primary_switched) 467 - 468 - .pushsection ".rodata", "a" 469 - SYM_DATA_START(kimage_vaddr) 470 - .quad _text 471 - SYM_DATA_END(kimage_vaddr) 472 - EXPORT_SYMBOL(kimage_vaddr) 473 - .popsection 474 473 475 474 /* 476 475 * end early head section, begin head code that is also used for ··· 481 490 * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if 482 491 * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. 483 492 * 484 - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if 485 - * booted in EL1 or EL2 respectively. 493 + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if 494 + * booted in EL1 or EL2 respectively, with the top 32 bits containing 495 + * potential context flags. These flags are *not* stored in __boot_cpu_mode. 486 496 */ 487 497 SYM_FUNC_START(init_kernel_el) 488 498 mrs x0, CurrentEL ··· 512 520 msr vbar_el2, x0 513 521 isb 514 522 523 + mov_q x1, INIT_SCTLR_EL1_MMU_OFF 524 + 515 525 /* 516 526 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, 517 527 * making it impossible to start in nVHE mode. Is that ··· 523 529 and x0, x0, #HCR_E2H 524 530 cbz x0, 1f 525 531 526 - /* Switching to VHE requires a sane SCTLR_EL1 as a start */ 527 - mov_q x0, INIT_SCTLR_EL1_MMU_OFF 528 - msr_s SYS_SCTLR_EL12, x0 529 - 530 - /* 531 - * Force an eret into a helper "function", and let it return 532 - * to our original caller... This makes sure that we have 533 - * initialised the basic PSTATE state. 534 - */ 535 - mov x0, #INIT_PSTATE_EL2 536 - msr spsr_el1, x0 537 - adr x0, __cpu_stick_to_vhe 538 - msr elr_el1, x0 539 - eret 532 + /* Set a sane SCTLR_EL1, the VHE way */ 533 + msr_s SYS_SCTLR_EL12, x1 534 + mov x2, #BOOT_CPU_FLAG_E2H 535 + b 2f 540 536 541 537 1: 542 - mov_q x0, INIT_SCTLR_EL1_MMU_OFF 543 - msr sctlr_el1, x0 544 - 538 + msr sctlr_el1, x1 539 + mov x2, xzr 540 + 2: 545 541 msr elr_el2, lr 546 542 mov w0, #BOOT_CPU_MODE_EL2 543 + orr x0, x0, x2 547 544 eret 548 - 549 - __cpu_stick_to_vhe: 550 - mov x0, #HVC_VHE_RESTART 551 - hvc #0 552 - mov x0, #BOOT_CPU_MODE_EL2 553 - ret 554 545 SYM_FUNC_END(init_kernel_el) 555 546 556 547 /* ··· 548 569 b.ne 1f 549 570 add x1, x1, #4 550 571 1: str w0, [x1] // Save CPU boot mode 551 - dmb sy 552 - dc ivac, x1 // Invalidate potentially stale cache line 553 572 ret 554 573 SYM_FUNC_END(set_cpu_boot_mode_flag) 555 - 556 - /* 557 - * These values are written with the MMU off, but read with the MMU on. 558 - * Writers will invalidate the corresponding address, discarding up to a 559 - * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures 560 - * sufficient alignment that the CWG doesn't overlap another section. 561 - */ 562 - .pushsection ".mmuoff.data.write", "aw" 563 - /* 564 - * We need to find out the CPU boot mode long after boot, so we need to 565 - * store it in a writable variable. 566 - * 567 - * This is not in .bss, because we set it sufficiently early that the boot-time 568 - * zeroing of .bss would clobber it. 569 - */ 570 - SYM_DATA_START(__boot_cpu_mode) 571 - .long BOOT_CPU_MODE_EL2 572 - .long BOOT_CPU_MODE_EL1 573 - SYM_DATA_END(__boot_cpu_mode) 574 - /* 575 - * The booting CPU updates the failed status @__early_cpu_boot_status, 576 - * with MMU turned off. 577 - */ 578 - SYM_DATA_START(__early_cpu_boot_status) 579 - .quad 0 580 - SYM_DATA_END(__early_cpu_boot_status) 581 - 582 - .popsection 583 574 584 575 /* 585 576 * This provides a "holding pen" for platforms to hold all secondary ··· 557 608 */ 558 609 SYM_FUNC_START(secondary_holding_pen) 559 610 bl init_kernel_el // w0=cpu_boot_mode 560 - bl set_cpu_boot_mode_flag 561 - mrs x0, mpidr_el1 611 + mrs x2, mpidr_el1 562 612 mov_q x1, MPIDR_HWID_BITMASK 563 - and x0, x0, x1 613 + and x2, x2, x1 564 614 adr_l x3, secondary_holding_pen_release 565 615 pen: ldr x4, [x3] 566 - cmp x4, x0 616 + cmp x4, x2 567 617 b.eq secondary_startup 568 618 wfe 569 619 b pen ··· 574 626 */ 575 627 SYM_FUNC_START(secondary_entry) 576 628 bl init_kernel_el // w0=cpu_boot_mode 577 - bl set_cpu_boot_mode_flag 578 629 b secondary_startup 579 630 SYM_FUNC_END(secondary_entry) 580 631 ··· 581 634 /* 582 635 * Common entry point for secondary CPUs. 583 636 */ 584 - bl switch_to_vhe 637 + mov x20, x0 // preserve boot mode 638 + bl finalise_el2 585 639 bl __cpu_secondary_check52bitva 640 + #if VA_BITS > 48 641 + ldr_l x0, vabits_actual 642 + #endif 586 643 bl __cpu_setup // initialise processor 587 644 adrp x1, swapper_pg_dir 645 + adrp x2, idmap_pg_dir 588 646 bl __enable_mmu 589 647 ldr x8, =__secondary_switched 590 648 br x8 591 649 SYM_FUNC_END(secondary_startup) 592 650 593 651 SYM_FUNC_START_LOCAL(__secondary_switched) 652 + mov x0, x20 653 + bl set_cpu_boot_mode_flag 654 + str_l xzr, __early_cpu_boot_status, x3 594 655 adr_l x5, vectors 595 656 msr vbar_el1, x5 596 657 isb ··· 646 691 * 647 692 * x0 = SCTLR_EL1 value for turning on the MMU. 648 693 * x1 = TTBR1_EL1 value 694 + * x2 = ID map root table address 649 695 * 650 696 * Returns to the caller via x30/lr. This requires the caller to be covered 651 697 * by the .idmap.text section. ··· 655 699 * If it isn't, park the CPU 656 700 */ 657 701 SYM_FUNC_START(__enable_mmu) 658 - mrs x2, ID_AA64MMFR0_EL1 659 - ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 660 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN 702 + mrs x3, ID_AA64MMFR0_EL1 703 + ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4 704 + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN 661 705 b.lt __no_granule_support 662 - cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 706 + cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 663 707 b.gt __no_granule_support 664 - update_early_cpu_boot_status 0, x2, x3 665 - adrp x2, idmap_pg_dir 666 - phys_to_ttbr x1, x1 667 708 phys_to_ttbr x2, x2 668 709 msr ttbr0_el1, x2 // load TTBR0 669 - offset_ttbr1 x1, x3 670 - msr ttbr1_el1, x1 // load TTBR1 671 - isb 710 + load_ttbr1 x1, x1, x3 672 711 673 712 set_sctlr_el1 x0 674 713 ··· 671 720 SYM_FUNC_END(__enable_mmu) 672 721 673 722 SYM_FUNC_START(__cpu_secondary_check52bitva) 674 - #ifdef CONFIG_ARM64_VA_BITS_52 723 + #if VA_BITS > 48 675 724 ldr_l x0, vabits_actual 676 725 cmp x0, #52 677 726 b.ne 2f ··· 706 755 * Iterate over each entry in the relocation table, and apply the 707 756 * relocations in place. 708 757 */ 709 - ldr w9, =__rela_offset // offset to reloc table 710 - ldr w10, =__rela_size // size of reloc table 711 - 758 + adr_l x9, __rela_start 759 + adr_l x10, __rela_end 712 760 mov_q x11, KIMAGE_VADDR // default virtual offset 713 761 add x11, x11, x23 // actual virtual offset 714 - add x9, x9, x11 // __va(.rela) 715 - add x10, x9, x10 // __va(.rela) + sizeof(.rela) 716 762 717 763 0: cmp x9, x10 718 764 b.hs 1f ··· 752 804 * entry in x9, the address being relocated by the current address or 753 805 * bitmap entry in x13 and the address being relocated by the current 754 806 * bit in x14. 755 - * 756 - * Because addends are stored in place in the binary, RELR relocations 757 - * cannot be applied idempotently. We use x24 to keep track of the 758 - * currently applied displacement so that we can correctly relocate if 759 - * __relocate_kernel is called twice with non-zero displacements (i.e. 760 - * if there is both a physical misalignment and a KASLR displacement). 761 807 */ 762 - ldr w9, =__relr_offset // offset to reloc table 763 - ldr w10, =__relr_size // size of reloc table 764 - add x9, x9, x11 // __va(.relr) 765 - add x10, x9, x10 // __va(.relr) + sizeof(.relr) 766 - 767 - sub x15, x23, x24 // delta from previous offset 768 - cbz x15, 7f // nothing to do if unchanged 769 - mov x24, x23 // save new offset 808 + adr_l x9, __relr_start 809 + adr_l x10, __relr_end 770 810 771 811 2: cmp x9, x10 772 812 b.hs 7f ··· 762 826 tbnz x11, #0, 3f // branch to handle bitmaps 763 827 add x13, x11, x23 764 828 ldr x12, [x13] // relocate address entry 765 - add x12, x12, x15 829 + add x12, x12, x23 766 830 str x12, [x13], #8 // adjust to start of bitmap 767 831 b 2b 768 832 ··· 771 835 cbz x11, 6f 772 836 tbz x11, #0, 5f // skip bit if not set 773 837 ldr x12, [x14] // relocate bit 774 - add x12, x12, x15 838 + add x12, x12, x23 775 839 str x12, [x14] 776 840 777 841 5: add x14, x14, #8 // move to next bit's address ··· 792 856 #endif 793 857 794 858 SYM_FUNC_START_LOCAL(__primary_switch) 795 - #ifdef CONFIG_RANDOMIZE_BASE 796 - mov x19, x0 // preserve new SCTLR_EL1 value 797 - mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value 798 - #endif 799 - 800 - adrp x1, init_pg_dir 859 + adrp x1, reserved_pg_dir 860 + adrp x2, init_idmap_pg_dir 801 861 bl __enable_mmu 802 862 #ifdef CONFIG_RELOCATABLE 803 - #ifdef CONFIG_RELR 804 - mov x24, #0 // no RELR displacement yet 805 - #endif 806 - bl __relocate_kernel 863 + adrp x23, KERNEL_START 864 + and x23, x23, MIN_KIMG_ALIGN - 1 807 865 #ifdef CONFIG_RANDOMIZE_BASE 808 - ldr x8, =__primary_switched 809 - adrp x0, __PHYS_OFFSET 810 - blr x8 866 + mov x0, x22 867 + adrp x1, init_pg_end 868 + mov sp, x1 869 + mov x29, xzr 870 + bl __pi_kaslr_early_init 871 + and x24, x0, #SZ_2M - 1 // capture memstart offset seed 872 + bic x0, x0, #SZ_2M - 1 873 + orr x23, x23, x0 // record kernel offset 874 + #endif 875 + #endif 876 + bl clear_page_tables 877 + bl create_kernel_mapping 811 878 812 - /* 813 - * If we return here, we have a KASLR displacement in x23 which we need 814 - * to take into account by discarding the current kernel mapping and 815 - * creating a new one. 816 - */ 817 - pre_disable_mmu_workaround 818 - msr sctlr_el1, x20 // disable the MMU 819 - isb 820 - bl __create_page_tables // recreate kernel mapping 821 - 822 - tlbi vmalle1 // Remove any stale TLB entries 823 - dsb nsh 824 - isb 825 - 826 - set_sctlr_el1 x19 // re-enable the MMU 827 - 879 + adrp x1, init_pg_dir 880 + load_ttbr1 x1, x1, x2 881 + #ifdef CONFIG_RELOCATABLE 828 882 bl __relocate_kernel 829 883 #endif 830 - #endif 831 884 ldr x8, =__primary_switched 832 - adrp x0, __PHYS_OFFSET 885 + adrp x0, KERNEL_START // __pa(KERNEL_START) 833 886 br x8 834 887 SYM_FUNC_END(__primary_switch)
+90 -27
arch/arm64/kernel/hyp-stub.S
··· 16 16 #include <asm/ptrace.h> 17 17 #include <asm/virt.h> 18 18 19 + // Warning, hardcoded register allocation 20 + // This will clobber x1 and x2, and expect x1 to contain 21 + // the id register value as read from the HW 22 + .macro __check_override idreg, fld, width, pass, fail 23 + ubfx x1, x1, #\fld, #\width 24 + cbz x1, \fail 25 + 26 + adr_l x1, \idreg\()_override 27 + ldr x2, [x1, FTR_OVR_VAL_OFFSET] 28 + ldr x1, [x1, FTR_OVR_MASK_OFFSET] 29 + ubfx x2, x2, #\fld, #\width 30 + ubfx x1, x1, #\fld, #\width 31 + cmp x1, xzr 32 + and x2, x2, x1 33 + csinv x2, x2, xzr, ne 34 + cbnz x2, \pass 35 + b \fail 36 + .endm 37 + 38 + .macro check_override idreg, fld, pass, fail 39 + mrs x1, \idreg\()_el1 40 + __check_override \idreg \fld 4 \pass \fail 41 + .endm 42 + 19 43 .text 20 44 .pushsection .hyp.text, "ax" 21 45 ··· 75 51 msr vbar_el2, x1 76 52 b 9f 77 53 78 - 1: cmp x0, #HVC_VHE_RESTART 79 - b.eq mutate_to_vhe 54 + 1: cmp x0, #HVC_FINALISE_EL2 55 + b.eq __finalise_el2 80 56 81 57 2: cmp x0, #HVC_SOFT_RESTART 82 58 b.ne 3f ··· 97 73 eret 98 74 SYM_CODE_END(elx_sync) 99 75 100 - // nVHE? No way! Give me the real thing! 101 - SYM_CODE_START_LOCAL(mutate_to_vhe) 76 + SYM_CODE_START_LOCAL(__finalise_el2) 77 + check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve 78 + 79 + .Linit_sve: /* SVE register access */ 80 + mrs x0, cptr_el2 // Disable SVE traps 81 + bic x0, x0, #CPTR_EL2_TZ 82 + msr cptr_el2, x0 83 + isb 84 + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector 85 + msr_s SYS_ZCR_EL2, x1 // length for EL1. 86 + 87 + .Lskip_sve: 88 + check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme 89 + 90 + .Linit_sme: /* SME register access and priority mapping */ 91 + mrs x0, cptr_el2 // Disable SME traps 92 + bic x0, x0, #CPTR_EL2_TSM 93 + msr cptr_el2, x0 94 + isb 95 + 96 + mrs x1, sctlr_el2 97 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps 98 + msr sctlr_el2, x1 99 + isb 100 + 101 + mov x0, #0 // SMCR controls 102 + 103 + // Full FP in SM? 104 + mrs_s x1, SYS_ID_AA64SMFR0_EL1 105 + __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 106 + 107 + .Linit_sme_fa64: 108 + orr x0, x0, SMCR_ELx_FA64_MASK 109 + .Lskip_sme_fa64: 110 + 111 + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector 112 + msr_s SYS_SMCR_EL2, x0 // length for EL1. 113 + 114 + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? 115 + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 116 + cbz x1, .Lskip_sme 117 + 118 + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal 119 + 120 + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? 121 + ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4 122 + cbz x1, .Lskip_sme 123 + 124 + mrs_s x1, SYS_HCRX_EL2 125 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping 126 + msr_s SYS_HCRX_EL2, x1 127 + 128 + .Lskip_sme: 129 + 130 + // nVHE? No way! Give me the real thing! 102 131 // Sanity check: MMU *must* be off 103 132 mrs x1, sctlr_el2 104 133 tbnz x1, #0, 1f 105 134 106 135 // Needs to be VHE capable, obviously 107 - mrs x1, id_aa64mmfr1_el1 108 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 109 - cbz x1, 1f 110 - 111 - // Check whether VHE is disabled from the command line 112 - adr_l x1, id_aa64mmfr1_override 113 - ldr x2, [x1, FTR_OVR_VAL_OFFSET] 114 - ldr x1, [x1, FTR_OVR_MASK_OFFSET] 115 - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 116 - ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 117 - cmp x1, xzr 118 - and x2, x2, x1 119 - csinv x2, x2, xzr, ne 120 - cbnz x2, 2f 136 + check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f 121 137 122 138 1: mov_q x0, HVC_STUB_ERR 123 139 eret ··· 204 140 msr spsr_el1, x0 205 141 206 142 b enter_vhe 207 - SYM_CODE_END(mutate_to_vhe) 143 + SYM_CODE_END(__finalise_el2) 208 144 209 145 // At the point where we reach enter_vhe(), we run with 210 - // the MMU off (which is enforced by mutate_to_vhe()). 146 + // the MMU off (which is enforced by __finalise_el2()). 211 147 // We thus need to be in the idmap, or everything will 212 148 // explode when enabling the MMU. 213 149 ··· 286 222 SYM_FUNC_END(__hyp_reset_vectors) 287 223 288 224 /* 289 - * Entry point to switch to VHE if deemed capable 225 + * Entry point to finalise EL2 and switch to VHE if deemed capable 226 + * 227 + * w0: boot mode, as returned by init_kernel_el() 290 228 */ 291 - SYM_FUNC_START(switch_to_vhe) 229 + SYM_FUNC_START(finalise_el2) 292 230 // Need to have booted at EL2 293 - adr_l x1, __boot_cpu_mode 294 - ldr w0, [x1] 295 231 cmp w0, #BOOT_CPU_MODE_EL2 296 232 b.ne 1f 297 233 ··· 300 236 cmp x0, #CurrentEL_EL1 301 237 b.ne 1f 302 238 303 - // Turn the world upside down 304 - mov x0, #HVC_VHE_RESTART 239 + mov x0, #HVC_FINALISE_EL2 305 240 hvc #0 306 241 1: 307 242 ret 308 - SYM_FUNC_END(switch_to_vhe) 243 + SYM_FUNC_END(finalise_el2)
+78 -15
arch/arm64/kernel/idreg-override.c
··· 19 19 #define FTR_ALIAS_NAME_LEN 30 20 20 #define FTR_ALIAS_OPTION_LEN 116 21 21 22 + static u64 __boot_status __initdata; 23 + 22 24 struct ftr_set_desc { 23 25 char name[FTR_DESC_NAME_LEN]; 24 26 struct arm64_ftr_override *override; 25 27 struct { 26 28 char name[FTR_DESC_FIELD_LEN]; 27 29 u8 shift; 30 + u8 width; 28 31 bool (*filter)(u64 val); 29 32 } fields[]; 30 33 }; 34 + 35 + #define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f } 31 36 32 37 static bool __init mmfr1_vh_filter(u64 val) 33 38 { ··· 42 37 * the user was trying to force nVHE on us, proceed with 43 38 * attitude adjustment. 44 39 */ 45 - return !(is_kernel_in_hyp_mode() && val == 0); 40 + return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) && 41 + val == 0); 46 42 } 47 43 48 44 static const struct ftr_set_desc mmfr1 __initconst = { 49 45 .name = "id_aa64mmfr1", 50 46 .override = &id_aa64mmfr1_override, 51 47 .fields = { 52 - { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, 48 + FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter), 53 49 {} 54 50 }, 55 51 }; 52 + 53 + static bool __init pfr0_sve_filter(u64 val) 54 + { 55 + /* 56 + * Disabling SVE also means disabling all the features that 57 + * are associated with it. The easiest way to do it is just to 58 + * override id_aa64zfr0_el1 to be 0. 59 + */ 60 + if (!val) { 61 + id_aa64zfr0_override.val = 0; 62 + id_aa64zfr0_override.mask = GENMASK(63, 0); 63 + } 64 + 65 + return true; 66 + } 67 + 68 + static const struct ftr_set_desc pfr0 __initconst = { 69 + .name = "id_aa64pfr0", 70 + .override = &id_aa64pfr0_override, 71 + .fields = { 72 + FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter), 73 + {} 74 + }, 75 + }; 76 + 77 + static bool __init pfr1_sme_filter(u64 val) 78 + { 79 + /* 80 + * Similarly to SVE, disabling SME also means disabling all 81 + * the features that are associated with it. Just set 82 + * id_aa64smfr0_el1 to 0 and don't look back. 83 + */ 84 + if (!val) { 85 + id_aa64smfr0_override.val = 0; 86 + id_aa64smfr0_override.mask = GENMASK(63, 0); 87 + } 88 + 89 + return true; 90 + } 56 91 57 92 static const struct ftr_set_desc pfr1 __initconst = { 58 93 .name = "id_aa64pfr1", 59 94 .override = &id_aa64pfr1_override, 60 95 .fields = { 61 - { "bt", ID_AA64PFR1_BT_SHIFT }, 62 - { "mte", ID_AA64PFR1_MTE_SHIFT}, 96 + FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ), 97 + FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL), 98 + FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter), 63 99 {} 64 100 }, 65 101 }; ··· 109 63 .name = "id_aa64isar1", 110 64 .override = &id_aa64isar1_override, 111 65 .fields = { 112 - { "gpi", ID_AA64ISAR1_EL1_GPI_SHIFT }, 113 - { "gpa", ID_AA64ISAR1_EL1_GPA_SHIFT }, 114 - { "api", ID_AA64ISAR1_EL1_API_SHIFT }, 115 - { "apa", ID_AA64ISAR1_EL1_APA_SHIFT }, 66 + FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL), 67 + FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL), 68 + FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL), 69 + FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL), 116 70 {} 117 71 }, 118 72 }; ··· 121 75 .name = "id_aa64isar2", 122 76 .override = &id_aa64isar2_override, 123 77 .fields = { 124 - { "gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT }, 125 - { "apa3", ID_AA64ISAR2_EL1_APA3_SHIFT }, 78 + FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), 79 + FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), 80 + {} 81 + }, 82 + }; 83 + 84 + static const struct ftr_set_desc smfr0 __initconst = { 85 + .name = "id_aa64smfr0", 86 + .override = &id_aa64smfr0_override, 87 + .fields = { 88 + /* FA64 is a one bit field... :-/ */ 89 + { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, 126 90 {} 127 91 }, 128 92 }; ··· 145 89 .override = &kaslr_feature_override, 146 90 #endif 147 91 .fields = { 148 - { "disabled", 0 }, 92 + FIELD("disabled", 0, NULL), 149 93 {} 150 94 }, 151 95 }; 152 96 153 97 static const struct ftr_set_desc * const regs[] __initconst = { 154 98 &mmfr1, 99 + &pfr0, 155 100 &pfr1, 156 101 &isar1, 157 102 &isar2, 103 + &smfr0, 158 104 &kaslr, 159 105 }; 160 106 ··· 166 108 } aliases[] __initconst = { 167 109 { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, 168 110 { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, 111 + { "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" }, 112 + { "arm64.nosme", "id_aa64pfr1.sme=0" }, 169 113 { "arm64.nobti", "id_aa64pfr1.bt=0" }, 170 114 { "arm64.nopauth", 171 115 "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " ··· 204 144 205 145 for (f = 0; strlen(regs[i]->fields[f].name); f++) { 206 146 u64 shift = regs[i]->fields[f].shift; 207 - u64 mask = 0xfUL << shift; 147 + u64 width = regs[i]->fields[f].width ?: 4; 148 + u64 mask = GENMASK_ULL(shift + width - 1, shift); 208 149 u64 v; 209 150 210 151 if (find_field(cmdline, regs[i], f, &v)) ··· 213 152 214 153 /* 215 154 * If an override gets filtered out, advertise 216 - * it by setting the value to 0xf, but 155 + * it by setting the value to the all-ones while 217 156 * clearing the mask... Yes, this is fragile. 218 157 */ 219 158 if (regs[i]->fields[f].filter && ··· 295 234 } 296 235 297 236 /* Keep checkers quiet */ 298 - void init_feature_override(void); 237 + void init_feature_override(u64 boot_status); 299 238 300 - asmlinkage void __init init_feature_override(void) 239 + asmlinkage void __init init_feature_override(u64 boot_status) 301 240 { 302 241 int i; 303 242 ··· 307 246 regs[i]->override->mask = 0; 308 247 } 309 248 } 249 + 250 + __boot_status = boot_status; 310 251 311 252 parse_cmdline(); 312 253
+30 -27
arch/arm64/kernel/image-vars.h
··· 10 10 #error This file should only be included in vmlinux.lds.S 11 11 #endif 12 12 13 - #ifdef CONFIG_EFI 14 - 15 - __efistub_kernel_size = _edata - _text; 16 - __efistub_primary_entry_offset = primary_entry - _text; 17 - 13 + PROVIDE(__efistub_kernel_size = _edata - _text); 14 + PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); 18 15 19 16 /* 20 17 * The EFI stub has its own symbol namespace prefixed by __efistub_, to ··· 22 25 * linked at. The routines below are all implemented in assembler in a 23 26 * position independent manner 24 27 */ 25 - __efistub_memcmp = __pi_memcmp; 26 - __efistub_memchr = __pi_memchr; 27 - __efistub_memcpy = __pi_memcpy; 28 - __efistub_memmove = __pi_memmove; 29 - __efistub_memset = __pi_memset; 30 - __efistub_strlen = __pi_strlen; 31 - __efistub_strnlen = __pi_strnlen; 32 - __efistub_strcmp = __pi_strcmp; 33 - __efistub_strncmp = __pi_strncmp; 34 - __efistub_strrchr = __pi_strrchr; 35 - __efistub_dcache_clean_poc = __pi_dcache_clean_poc; 28 + PROVIDE(__efistub_memcmp = __pi_memcmp); 29 + PROVIDE(__efistub_memchr = __pi_memchr); 30 + PROVIDE(__efistub_memcpy = __pi_memcpy); 31 + PROVIDE(__efistub_memmove = __pi_memmove); 32 + PROVIDE(__efistub_memset = __pi_memset); 33 + PROVIDE(__efistub_strlen = __pi_strlen); 34 + PROVIDE(__efistub_strnlen = __pi_strnlen); 35 + PROVIDE(__efistub_strcmp = __pi_strcmp); 36 + PROVIDE(__efistub_strncmp = __pi_strncmp); 37 + PROVIDE(__efistub_strrchr = __pi_strrchr); 38 + PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc); 36 39 37 - #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 38 - __efistub___memcpy = __pi_memcpy; 39 - __efistub___memmove = __pi_memmove; 40 - __efistub___memset = __pi_memset; 41 - #endif 40 + PROVIDE(__efistub__text = _text); 41 + PROVIDE(__efistub__end = _end); 42 + PROVIDE(__efistub__edata = _edata); 43 + PROVIDE(__efistub_screen_info = screen_info); 44 + PROVIDE(__efistub__ctype = _ctype); 42 45 43 - __efistub__text = _text; 44 - __efistub__end = _end; 45 - __efistub__edata = _edata; 46 - __efistub_screen_info = screen_info; 47 - __efistub__ctype = _ctype; 46 + /* 47 + * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which 48 + * instruments the conventional ones. Therefore, any references from the EFI 49 + * stub or other position independent, low level C code should be redirected to 50 + * the non-instrumented versions as well. 51 + */ 52 + PROVIDE(__efistub___memcpy = __pi_memcpy); 53 + PROVIDE(__efistub___memmove = __pi_memmove); 54 + PROVIDE(__efistub___memset = __pi_memset); 48 55 49 - #endif 56 + PROVIDE(__pi___memcpy = __pi_memcpy); 57 + PROVIDE(__pi___memmove = __pi_memmove); 58 + PROVIDE(__pi___memset = __pi_memset); 50 59 51 60 #ifdef CONFIG_KVM 52 61
+18 -131
arch/arm64/kernel/kaslr.c
··· 13 13 #include <linux/pgtable.h> 14 14 #include <linux/random.h> 15 15 16 - #include <asm/cacheflush.h> 17 16 #include <asm/fixmap.h> 18 17 #include <asm/kernel-pgtable.h> 19 18 #include <asm/memory.h> ··· 20 21 #include <asm/sections.h> 21 22 #include <asm/setup.h> 22 23 23 - enum kaslr_status { 24 - KASLR_ENABLED, 25 - KASLR_DISABLED_CMDLINE, 26 - KASLR_DISABLED_NO_SEED, 27 - KASLR_DISABLED_FDT_REMAP, 28 - }; 29 - 30 - static enum kaslr_status __initdata kaslr_status; 31 24 u64 __ro_after_init module_alloc_base; 32 25 u16 __initdata memstart_offset_seed; 33 26 34 - static __init u64 get_kaslr_seed(void *fdt) 35 - { 36 - int node, len; 37 - fdt64_t *prop; 38 - u64 ret; 39 - 40 - node = fdt_path_offset(fdt, "/chosen"); 41 - if (node < 0) 42 - return 0; 43 - 44 - prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); 45 - if (!prop || len != sizeof(u64)) 46 - return 0; 47 - 48 - ret = fdt64_to_cpu(*prop); 49 - *prop = 0; 50 - return ret; 51 - } 52 - 53 27 struct arm64_ftr_override kaslr_feature_override __initdata; 54 28 55 - /* 56 - * This routine will be executed with the kernel mapped at its default virtual 57 - * address, and if it returns successfully, the kernel will be remapped, and 58 - * start_kernel() will be executed from a randomized virtual offset. The 59 - * relocation will result in all absolute references (e.g., static variables 60 - * containing function pointers) to be reinitialized, and zero-initialized 61 - * .bss variables will be reset to 0. 62 - */ 63 - u64 __init kaslr_early_init(void) 29 + static int __init kaslr_init(void) 64 30 { 65 - void *fdt; 66 - u64 seed, offset, mask, module_range; 67 - unsigned long raw; 31 + u64 module_range; 32 + u32 seed; 68 33 69 34 /* 70 35 * Set a reasonable default for module_alloc_base in case 71 36 * we end up running with module randomization disabled. 72 37 */ 73 38 module_alloc_base = (u64)_etext - MODULES_VSIZE; 74 - dcache_clean_inval_poc((unsigned long)&module_alloc_base, 75 - (unsigned long)&module_alloc_base + 76 - sizeof(module_alloc_base)); 77 39 78 - /* 79 - * Try to map the FDT early. If this fails, we simply bail, 80 - * and proceed with KASLR disabled. We will make another 81 - * attempt at mapping the FDT in setup_machine() 82 - */ 83 - fdt = get_early_fdt_ptr(); 84 - if (!fdt) { 85 - kaslr_status = KASLR_DISABLED_FDT_REMAP; 86 - return 0; 87 - } 88 - 89 - /* 90 - * Retrieve (and wipe) the seed from the FDT 91 - */ 92 - seed = get_kaslr_seed(fdt); 93 - 94 - /* 95 - * Check if 'nokaslr' appears on the command line, and 96 - * return 0 if that is the case. 97 - */ 98 40 if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { 99 - kaslr_status = KASLR_DISABLED_CMDLINE; 41 + pr_info("KASLR disabled on command line\n"); 100 42 return 0; 101 43 } 102 44 103 - /* 104 - * Mix in any entropy obtainable architecturally if enabled 105 - * and supported. 106 - */ 107 - 108 - if (arch_get_random_seed_long_early(&raw)) 109 - seed ^= raw; 110 - 111 - if (!seed) { 112 - kaslr_status = KASLR_DISABLED_NO_SEED; 45 + if (!kaslr_offset()) { 46 + pr_warn("KASLR disabled due to lack of seed\n"); 113 47 return 0; 114 48 } 115 49 50 + pr_info("KASLR enabled\n"); 51 + 116 52 /* 117 - * OK, so we are proceeding with KASLR enabled. Calculate a suitable 118 - * kernel image offset from the seed. Let's place the kernel in the 119 - * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of 120 - * the lower and upper quarters to avoid colliding with other 121 - * allocations. 122 - * Even if we could randomize at page granularity for 16k and 64k pages, 123 - * let's always round to 2 MB so we don't interfere with the ability to 124 - * map using contiguous PTEs 53 + * KASAN without KASAN_VMALLOC does not expect the module region to 54 + * intersect the vmalloc region, since shadow memory is allocated for 55 + * each module at load time, whereas the vmalloc region will already be 56 + * shadowed by KASAN zero pages. 125 57 */ 126 - mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); 127 - offset = BIT(VA_BITS_MIN - 3) + (seed & mask); 58 + BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || 59 + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && 60 + !IS_ENABLED(CONFIG_KASAN_VMALLOC)); 128 61 129 - /* use the top 16 bits to randomize the linear region */ 130 - memstart_offset_seed = seed >> 48; 131 - 132 - if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && 133 - (IS_ENABLED(CONFIG_KASAN_GENERIC) || 134 - IS_ENABLED(CONFIG_KASAN_SW_TAGS))) 135 - /* 136 - * KASAN without KASAN_VMALLOC does not expect the module region 137 - * to intersect the vmalloc region, since shadow memory is 138 - * allocated for each module at load time, whereas the vmalloc 139 - * region is shadowed by KASAN zero pages. So keep modules 140 - * out of the vmalloc region if KASAN is enabled without 141 - * KASAN_VMALLOC, and put the kernel well within 4 GB of the 142 - * module region. 143 - */ 144 - return offset % SZ_2G; 62 + seed = get_random_u32(); 145 63 146 64 if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { 147 65 /* ··· 70 154 * resolved normally.) 71 155 */ 72 156 module_range = SZ_2G - (u64)(_end - _stext); 73 - module_alloc_base = max((u64)_end + offset - SZ_2G, 74 - (u64)MODULES_VADDR); 157 + module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); 75 158 } else { 76 159 /* 77 160 * Randomize the module region by setting module_alloc_base to ··· 82 167 * when ARM64_MODULE_PLTS is enabled. 83 168 */ 84 169 module_range = MODULES_VSIZE - (u64)(_etext - _stext); 85 - module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; 86 170 } 87 171 88 172 /* use the lower 21 bits to randomize the base of the module region */ 89 173 module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; 90 174 module_alloc_base &= PAGE_MASK; 91 175 92 - dcache_clean_inval_poc((unsigned long)&module_alloc_base, 93 - (unsigned long)&module_alloc_base + 94 - sizeof(module_alloc_base)); 95 - dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, 96 - (unsigned long)&memstart_offset_seed + 97 - sizeof(memstart_offset_seed)); 98 - 99 - return offset; 100 - } 101 - 102 - static int __init kaslr_init(void) 103 - { 104 - switch (kaslr_status) { 105 - case KASLR_ENABLED: 106 - pr_info("KASLR enabled\n"); 107 - break; 108 - case KASLR_DISABLED_CMDLINE: 109 - pr_info("KASLR disabled on command line\n"); 110 - break; 111 - case KASLR_DISABLED_NO_SEED: 112 - pr_warn("KASLR disabled due to lack of seed\n"); 113 - break; 114 - case KASLR_DISABLED_FDT_REMAP: 115 - pr_warn("KASLR disabled due to FDT remapping failure\n"); 116 - break; 117 - } 118 - 119 176 return 0; 120 177 } 121 - core_initcall(kaslr_init) 178 + subsys_initcall(kaslr_init)
+33
arch/arm64/kernel/pi/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # Copyright 2022 Google LLC 3 + 4 + KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ 5 + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ 6 + $(call cc-option,-mbranch-protection=none) \ 7 + -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ 8 + -include $(srctree)/include/linux/hidden.h \ 9 + -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \ 10 + $(call cc-option,-fno-addrsig) 11 + 12 + # remove SCS flags from all objects in this directory 13 + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) 14 + # disable LTO 15 + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) 16 + 17 + GCOV_PROFILE := n 18 + KASAN_SANITIZE := n 19 + KCSAN_SANITIZE := n 20 + UBSAN_SANITIZE := n 21 + KCOV_INSTRUMENT := n 22 + 23 + $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ 24 + --remove-section=.note.gnu.property \ 25 + --prefix-alloc-sections=.init 26 + $(obj)/%.pi.o: $(obj)/%.o FORCE 27 + $(call if_changed,objcopy) 28 + 29 + $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE 30 + $(call if_changed_rule,cc_o_c) 31 + 32 + obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o 33 + extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
+112
arch/arm64/kernel/pi/kaslr_early.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + // Copyright 2022 Google LLC 3 + // Author: Ard Biesheuvel <ardb@google.com> 4 + 5 + // NOTE: code in this file runs *very* early, and is not permitted to use 6 + // global variables or anything that relies on absolute addressing. 7 + 8 + #include <linux/libfdt.h> 9 + #include <linux/init.h> 10 + #include <linux/linkage.h> 11 + #include <linux/types.h> 12 + #include <linux/sizes.h> 13 + #include <linux/string.h> 14 + 15 + #include <asm/archrandom.h> 16 + #include <asm/memory.h> 17 + 18 + /* taken from lib/string.c */ 19 + static char *__strstr(const char *s1, const char *s2) 20 + { 21 + size_t l1, l2; 22 + 23 + l2 = strlen(s2); 24 + if (!l2) 25 + return (char *)s1; 26 + l1 = strlen(s1); 27 + while (l1 >= l2) { 28 + l1--; 29 + if (!memcmp(s1, s2, l2)) 30 + return (char *)s1; 31 + s1++; 32 + } 33 + return NULL; 34 + } 35 + static bool cmdline_contains_nokaslr(const u8 *cmdline) 36 + { 37 + const u8 *str; 38 + 39 + str = __strstr(cmdline, "nokaslr"); 40 + return str == cmdline || (str > cmdline && *(str - 1) == ' '); 41 + } 42 + 43 + static bool is_kaslr_disabled_cmdline(void *fdt) 44 + { 45 + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { 46 + int node; 47 + const u8 *prop; 48 + 49 + node = fdt_path_offset(fdt, "/chosen"); 50 + if (node < 0) 51 + goto out; 52 + 53 + prop = fdt_getprop(fdt, node, "bootargs", NULL); 54 + if (!prop) 55 + goto out; 56 + 57 + if (cmdline_contains_nokaslr(prop)) 58 + return true; 59 + 60 + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) 61 + goto out; 62 + 63 + return false; 64 + } 65 + out: 66 + return cmdline_contains_nokaslr(CONFIG_CMDLINE); 67 + } 68 + 69 + static u64 get_kaslr_seed(void *fdt) 70 + { 71 + int node, len; 72 + fdt64_t *prop; 73 + u64 ret; 74 + 75 + node = fdt_path_offset(fdt, "/chosen"); 76 + if (node < 0) 77 + return 0; 78 + 79 + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); 80 + if (!prop || len != sizeof(u64)) 81 + return 0; 82 + 83 + ret = fdt64_to_cpu(*prop); 84 + *prop = 0; 85 + return ret; 86 + } 87 + 88 + asmlinkage u64 kaslr_early_init(void *fdt) 89 + { 90 + u64 seed; 91 + 92 + if (is_kaslr_disabled_cmdline(fdt)) 93 + return 0; 94 + 95 + seed = get_kaslr_seed(fdt); 96 + if (!seed) { 97 + #ifdef CONFIG_ARCH_RANDOM 98 + if (!__early_cpu_has_rndr() || 99 + !__arm64_rndr((unsigned long *)&seed)) 100 + #endif 101 + return 0; 102 + } 103 + 104 + /* 105 + * OK, so we are proceeding with KASLR enabled. Calculate a suitable 106 + * kernel image offset from the seed. Let's place the kernel in the 107 + * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of 108 + * the lower and upper quarters to avoid colliding with other 109 + * allocations. 110 + */ 111 + return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0)); 112 + }
+2 -1
arch/arm64/kernel/sleep.S
··· 100 100 .pushsection ".idmap.text", "awx" 101 101 SYM_CODE_START(cpu_resume) 102 102 bl init_kernel_el 103 - bl switch_to_vhe 103 + bl finalise_el2 104 104 bl __cpu_setup 105 105 /* enable the MMU early - so we can access sleep_save_stash by va */ 106 106 adrp x1, swapper_pg_dir 107 + adrp x2, idmap_pg_dir 107 108 bl __enable_mmu 108 109 ldr x8, =_cpu_resume 109 110 br x8
+1 -1
arch/arm64/kernel/suspend.c
··· 52 52 53 53 /* Restore CnP bit in TTBR1_EL1 */ 54 54 if (system_supports_cnp()) 55 - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); 55 + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); 56 56 57 57 /* 58 58 * PSTATE was not saved over suspend/resume, re-enable any detected
+9 -10
arch/arm64/kernel/vmlinux.lds.S
··· 199 199 } 200 200 201 201 idmap_pg_dir = .; 202 - . += IDMAP_DIR_SIZE; 203 - idmap_pg_end = .; 202 + . += PAGE_SIZE; 204 203 205 204 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 206 205 tramp_pg_dir = .; ··· 235 236 __inittext_end = .; 236 237 __initdata_begin = .; 237 238 239 + init_idmap_pg_dir = .; 240 + . += INIT_IDMAP_DIR_SIZE; 241 + init_idmap_pg_end = .; 242 + 238 243 .init.data : { 239 244 INIT_DATA 240 245 INIT_SETUP(16) ··· 257 254 HYPERVISOR_RELOC_SECTION 258 255 259 256 .rela.dyn : ALIGN(8) { 257 + __rela_start = .; 260 258 *(.rela .rela*) 259 + __rela_end = .; 261 260 } 262 261 263 - __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); 264 - __rela_size = SIZEOF(.rela.dyn); 265 - 266 - #ifdef CONFIG_RELR 267 262 .relr.dyn : ALIGN(8) { 263 + __relr_start = .; 268 264 *(.relr.dyn) 265 + __relr_end = .; 269 266 } 270 - 271 - __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); 272 - __relr_size = SIZEOF(.relr.dyn); 273 - #endif 274 267 275 268 . = ALIGN(SEGMENT_ALIGN); 276 269 __initdata_end = .;
+2 -2
arch/arm64/mm/kasan_init.c
··· 236 236 */ 237 237 memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); 238 238 dsb(ishst); 239 - cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); 239 + cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir); 240 240 241 241 clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); 242 242 ··· 280 280 PAGE_KERNEL_RO)); 281 281 282 282 memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); 283 - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); 283 + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir); 284 284 } 285 285 286 286 static void __init kasan_init_depth(void)
+51 -4
arch/arm64/mm/mmu.c
··· 43 43 #define NO_CONT_MAPPINGS BIT(1) 44 44 #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ 45 45 46 - u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); 47 - u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; 46 + int idmap_t0sz __ro_after_init; 48 47 49 - u64 __section(".mmuoff.data.write") vabits_actual; 48 + #if VA_BITS > 48 49 + u64 vabits_actual __ro_after_init = VA_BITS_MIN; 50 50 EXPORT_SYMBOL(vabits_actual); 51 + #endif 52 + 53 + u64 kimage_vaddr __ro_after_init = (u64)&_text; 54 + EXPORT_SYMBOL(kimage_vaddr); 51 55 52 56 u64 kimage_voffset __ro_after_init; 53 57 EXPORT_SYMBOL(kimage_voffset); 58 + 59 + u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 }; 60 + 61 + /* 62 + * The booting CPU updates the failed status @__early_cpu_boot_status, 63 + * with MMU turned off. 64 + */ 65 + long __section(".mmuoff.data.write") __early_cpu_boot_status; 54 66 55 67 /* 56 68 * Empty_zero_page is a special page that is used for zero-initialized data ··· 775 763 kasan_copy_shadow(pgdp); 776 764 } 777 765 766 + static void __init create_idmap(void) 767 + { 768 + u64 start = __pa_symbol(__idmap_text_start); 769 + u64 size = __pa_symbol(__idmap_text_end) - start; 770 + pgd_t *pgd = idmap_pg_dir; 771 + u64 pgd_phys; 772 + 773 + /* check if we need an additional level of translation */ 774 + if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) { 775 + pgd_phys = early_pgtable_alloc(PAGE_SHIFT); 776 + set_pgd(&idmap_pg_dir[start >> VA_BITS], 777 + __pgd(pgd_phys | P4D_TYPE_TABLE)); 778 + pgd = __va(pgd_phys); 779 + } 780 + __create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX, 781 + early_pgtable_alloc, 0); 782 + 783 + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { 784 + extern u32 __idmap_kpti_flag; 785 + u64 pa = __pa_symbol(&__idmap_kpti_flag); 786 + 787 + /* 788 + * The KPTI G-to-nG conversion code needs a read-write mapping 789 + * of its synchronization flag in the ID map. 790 + */ 791 + __create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL, 792 + early_pgtable_alloc, 0); 793 + } 794 + } 795 + 778 796 void __init paging_init(void) 779 797 { 780 798 pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); 799 + extern pgd_t init_idmap_pg_dir[]; 800 + 801 + idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0)); 781 802 782 803 map_kernel(pgdp); 783 804 map_mem(pgdp); 784 805 785 806 pgd_clear_fixmap(); 786 807 787 - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); 808 + cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir); 788 809 init_mm.pgd = swapper_pg_dir; 789 810 790 811 memblock_phys_free(__pa_symbol(init_pg_dir), 791 812 __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); 792 813 793 814 memblock_allow_resize(); 815 + 816 + create_idmap(); 794 817 } 795 818 796 819 /*
+9 -6
arch/arm64/mm/proc.S
··· 249 249 * 250 250 * Called exactly once from stop_machine context by each CPU found during boot. 251 251 */ 252 - __idmap_kpti_flag: 253 - .long 1 252 + .pushsection ".data", "aw", %progbits 253 + SYM_DATA(__idmap_kpti_flag, .long 1) 254 + .popsection 255 + 254 256 SYM_FUNC_START(idmap_kpti_install_ng_mappings) 255 257 cpu .req w0 256 258 temp_pte .req x0 ··· 275 273 276 274 mov x5, x3 // preserve temp_pte arg 277 275 mrs swapper_ttb, ttbr1_el1 278 - adr flag_ptr, __idmap_kpti_flag 276 + adr_l flag_ptr, __idmap_kpti_flag 279 277 280 278 cbnz cpu, __idmap_kpti_secondary 281 279 ··· 398 396 * 399 397 * Initialise the processor for turning the MMU on. 400 398 * 399 + * Input: 400 + * x0 - actual number of VA bits (ignored unless VA_BITS > 48) 401 401 * Output: 402 402 * Return in x0 the value of the SCTLR_EL1 register. 403 403 */ ··· 469 465 tcr_clear_errata_bits tcr, x9, x5 470 466 471 467 #ifdef CONFIG_ARM64_VA_BITS_52 472 - ldr_l x9, vabits_actual 473 - sub x9, xzr, x9 468 + sub x9, xzr, x0 474 469 add x9, x9, #64 475 470 tcr_set_t1sz tcr, x9 476 471 #else 477 - ldr_l x9, idmap_t0sz 472 + idmap_get_t0sz x9 478 473 #endif 479 474 tcr_set_t0sz tcr, x9 480 475