Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-next/poe' into for-next/core

* for-next/poe: (31 commits)
arm64: pkeys: remove redundant WARN
kselftest/arm64: Add test case for POR_EL0 signal frame records
kselftest/arm64: parse POE_MAGIC in a signal frame
kselftest/arm64: add HWCAP test for FEAT_S1POE
selftests: mm: make protection_keys test work on arm64
selftests: mm: move fpregs printing
kselftest/arm64: move get_header()
arm64: add Permission Overlay Extension Kconfig
arm64: enable PKEY support for CPUs with S1POE
arm64: enable POE and PIE to coexist
arm64/ptrace: add support for FEAT_POE
arm64: add POE signal support
arm64: implement PKEYS support
arm64: add pte_access_permitted_no_overlay()
arm64: handle PKEY/POE faults
arm64: mask out POIndex when modifying a PTE
arm64: convert protection key into vm_flags and pgprot values
arm64: add POIndex defines
arm64: re-order MTE VM_ flags
arm64: enable the Permission Overlay Extension for EL0
...

+1053 -62
+2
Documentation/arch/arm64/elf_hwcaps.rst
··· 365 365 HWCAP2_SME_SF8DP4 366 366 Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1. 367 367 368 + HWCAP2_POE 369 + Functionality implied by ID_AA64MMFR3_EL1.S1POE == 0b0001. 368 370 369 371 4. Unused AT_HWCAP bits 370 372 -----------------------
+23
arch/arm64/Kconfig
··· 2138 2138 if the cpu does not implement the feature. 2139 2139 endmenu # "ARMv8.7 architectural features" 2140 2140 2141 + menu "ARMv8.9 architectural features" 2142 + 2143 + config ARM64_POE 2144 + prompt "Permission Overlay Extension" 2145 + def_bool y 2146 + select ARCH_USES_HIGH_VMA_FLAGS 2147 + select ARCH_HAS_PKEYS 2148 + help 2149 + The Permission Overlay Extension is used to implement Memory 2150 + Protection Keys. Memory Protection Keys provides a mechanism for 2151 + enforcing page-based protections, but without requiring modification 2152 + of the page tables when an application changes protection domains. 2153 + 2154 + For details, see Documentation/core-api/protection-keys.rst 2155 + 2156 + If unsure, say y. 2157 + 2158 + config ARCH_PKEY_BITS 2159 + int 2160 + default 3 2161 + 2162 + endmenu # "ARMv8.9 architectural features" 2163 + 2141 2164 config ARM64_SVE 2142 2165 bool "ARM Scalable Vector Extension support" 2143 2166 default y
+6
arch/arm64/include/asm/cpufeature.h
··· 832 832 return cpus_have_final_cap(ARM64_HAS_LPA2); 833 833 } 834 834 835 + static inline bool system_supports_poe(void) 836 + { 837 + return IS_ENABLED(CONFIG_ARM64_POE) && 838 + alternative_has_cap_unlikely(ARM64_HAS_S1POE); 839 + } 840 + 835 841 int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); 836 842 bool try_emulate_mrs(struct pt_regs *regs, u32 isn); 837 843
+8
arch/arm64/include/asm/el2_setup.h
··· 192 192 orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 193 193 194 194 .Lskip_pie_fgt_\@: 195 + mrs_s x1, SYS_ID_AA64MMFR3_EL1 196 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4 197 + cbz x1, .Lskip_poe_fgt_\@ 198 + 199 + /* Disable trapping of POR_EL0 */ 200 + orr x0, x0, #HFGxTR_EL2_nPOR_EL0 201 + 202 + .Lskip_poe_fgt_\@: 195 203 msr_s SYS_HFGRTR_EL2, x0 196 204 msr_s SYS_HFGWTR_EL2, x0 197 205 msr_s SYS_HFGITR_EL2, xzr
+1
arch/arm64/include/asm/hwcap.h
··· 157 157 #define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA) 158 158 #define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4) 159 159 #define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2) 160 + #define KERNEL_HWCAP_POE __khwcap2_feature(POE) 160 161 161 162 /* 162 163 * This yields a mask that user programs can use to figure out what
+2 -1
arch/arm64/include/asm/kvm_asm.h
··· 10 10 #include <asm/hyp_image.h> 11 11 #include <asm/insn.h> 12 12 #include <asm/virt.h> 13 + #include <asm/sysreg.h> 13 14 14 15 #define ARM_EXIT_WITH_SERROR_BIT 31 15 16 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) ··· 260 259 asm volatile( \ 261 260 " mrs %1, spsr_el2\n" \ 262 261 " mrs %2, elr_el2\n" \ 263 - "1: at "at_op", %3\n" \ 262 + "1: " __msr_s(at_op, "%3") "\n" \ 264 263 " isb\n" \ 265 264 " b 9f\n" \ 266 265 "2: msr spsr_el2, %1\n" \
+4
arch/arm64/include/asm/kvm_host.h
··· 446 446 GCR_EL1, /* Tag Control Register */ 447 447 TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ 448 448 449 + POR_EL0, /* Permission Overlay Register 0 (EL0) */ 450 + 449 451 /* 32bit specific registers. */ 450 452 DACR32_EL2, /* Domain Access Control Register */ 451 453 IFSR32_EL2, /* Instruction Fault Status Register */ ··· 518 516 /* Permission Indirection Extension registers */ 519 517 VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */ 520 518 VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */ 519 + 520 + VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */ 521 521 522 522 VNCR(HFGRTR_EL2), 523 523 VNCR(HFGWTR_EL2),
+9 -1
arch/arm64/include/asm/mman.h
··· 7 7 #include <uapi/asm/mman.h> 8 8 9 9 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, 10 - unsigned long pkey __always_unused) 10 + unsigned long pkey) 11 11 { 12 12 unsigned long ret = 0; 13 13 ··· 16 16 17 17 if (system_supports_mte() && (prot & PROT_MTE)) 18 18 ret |= VM_MTE; 19 + 20 + #ifdef CONFIG_ARCH_HAS_PKEYS 21 + if (system_supports_poe()) { 22 + ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0; 23 + ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0; 24 + ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0; 25 + } 26 + #endif 19 27 20 28 return ret; 21 29 }
+1
arch/arm64/include/asm/mmu.h
··· 25 25 refcount_t pinned; 26 26 void *vdso; 27 27 unsigned long flags; 28 + u8 pkey_allocation_map; 28 29 } mm_context_t; 29 30 30 31 /*
+45 -1
arch/arm64/include/asm/mmu_context.h
··· 15 15 #include <linux/sched/hotplug.h> 16 16 #include <linux/mm_types.h> 17 17 #include <linux/pgtable.h> 18 + #include <linux/pkeys.h> 18 19 19 20 #include <asm/cacheflush.h> 20 21 #include <asm/cpufeature.h> 21 22 #include <asm/daifflags.h> 22 23 #include <asm/proc-fns.h> 23 - #include <asm-generic/mm_hooks.h> 24 24 #include <asm/cputype.h> 25 25 #include <asm/sysreg.h> 26 26 #include <asm/tlbflush.h> ··· 175 175 { 176 176 atomic64_set(&mm->context.id, 0); 177 177 refcount_set(&mm->context.pinned, 0); 178 + 179 + /* pkey 0 is the default, so always reserve it. */ 180 + mm->context.pkey_allocation_map = BIT(0); 181 + 178 182 return 0; 183 + } 184 + 185 + static inline void arch_dup_pkeys(struct mm_struct *oldmm, 186 + struct mm_struct *mm) 187 + { 188 + /* Duplicate the oldmm pkey state in mm: */ 189 + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 190 + } 191 + 192 + static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 193 + { 194 + arch_dup_pkeys(oldmm, mm); 195 + 196 + return 0; 197 + } 198 + 199 + static inline void arch_exit_mmap(struct mm_struct *mm) 200 + { 201 + } 202 + 203 + static inline void arch_unmap(struct mm_struct *mm, 204 + unsigned long start, unsigned long end) 205 + { 179 206 } 180 207 181 208 #ifdef CONFIG_ARM64_SW_TTBR0_PAN ··· 292 265 static inline unsigned long mm_untag_mask(struct mm_struct *mm) 293 266 { 294 267 return -1UL >> 8; 268 + } 269 + 270 + /* 271 + * Only enforce protection keys on the current process, because there is no 272 + * user context to access POR_EL0 for another address space. 273 + */ 274 + static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, 275 + bool write, bool execute, bool foreign) 276 + { 277 + if (!system_supports_poe()) 278 + return true; 279 + 280 + /* allow access if the VMA is not one from this process */ 281 + if (foreign || vma_is_foreign(vma)) 282 + return true; 283 + 284 + return por_el0_allows_pkey(vma_pkey(vma), write, execute); 295 285 } 296 286 297 287 #include <asm-generic/mmu_context.h>
+10
arch/arm64/include/asm/pgtable-hwdef.h
··· 199 199 #define PTE_PI_IDX_3 54 /* UXN */ 200 200 201 201 /* 202 + * POIndex[2:0] encoding (Permission Overlay Extension) 203 + */ 204 + #define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60) 205 + #define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61) 206 + #define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62) 207 + 208 + #define PTE_PO_IDX_MASK GENMASK_ULL(62, 60) 209 + 210 + 211 + /* 202 212 * Memory Attribute override for Stage-2 (MemAttr[3:0]) 203 213 */ 204 214 #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
+4 -4
arch/arm64/include/asm/pgtable-prot.h
··· 154 154 155 155 #define PIE_E0 ( \ 156 156 PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ 157 - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ 158 - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ 159 - PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ 160 - PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) 157 + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ 158 + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ 159 + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \ 160 + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) 161 161 162 162 #define PIE_E1 ( \ 163 163 PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+29 -5
arch/arm64/include/asm/pgtable.h
··· 34 34 35 35 #include <asm/cmpxchg.h> 36 36 #include <asm/fixmap.h> 37 + #include <asm/por.h> 37 38 #include <linux/mmdebug.h> 38 39 #include <linux/mm_types.h> 39 40 #include <linux/sched.h> ··· 150 149 #define pte_accessible(mm, pte) \ 151 150 (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) 152 151 152 + static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) 153 + { 154 + u64 por; 155 + 156 + if (!system_supports_poe()) 157 + return true; 158 + 159 + por = read_sysreg_s(SYS_POR_EL0); 160 + 161 + if (write) 162 + return por_elx_allows_write(por, pkey); 163 + 164 + if (execute) 165 + return por_elx_allows_exec(por, pkey); 166 + 167 + return por_elx_allows_read(por, pkey); 168 + } 169 + 153 170 /* 154 171 * p??_access_permitted() is true for valid user mappings (PTE_USER 155 172 * bit set, subject to the write permission check). For execute-only ··· 175 156 * not set) must return false. PROT_NONE mappings do not have the 176 157 * PTE_VALID bit set. 177 158 */ 178 - #define pte_access_permitted(pte, write) \ 159 + #define pte_access_permitted_no_overlay(pte, write) \ 179 160 (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) 161 + #define pte_access_permitted(pte, write) \ 162 + (pte_access_permitted_no_overlay(pte, write) && \ 163 + por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) 180 164 #define pmd_access_permitted(pmd, write) \ 181 165 (pte_access_permitted(pmd_pte(pmd), (write))) 182 166 #define pud_access_permitted(pud, write) \ ··· 395 373 /* 396 374 * If the PTE would provide user space access to the tags associated 397 375 * with it then ensure that the MTE tags are synchronised. Although 398 - * pte_access_permitted() returns false for exec only mappings, they 399 - * don't expose tags (instruction fetches don't check tags). 376 + * pte_access_permitted_no_overlay() returns false for exec only 377 + * mappings, they don't expose tags (instruction fetches don't check 378 + * tags). 400 379 */ 401 - if (system_supports_mte() && pte_access_permitted(pte, false) && 380 + if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && 402 381 !pte_special(pte) && pte_tagged(pte)) 403 382 mte_sync_tags(pte, nr_pages); 404 383 } ··· 1126 1103 */ 1127 1104 const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | 1128 1105 PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | 1129 - PTE_GP | PTE_ATTRINDX_MASK; 1106 + PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; 1107 + 1130 1108 /* preserve the hardware dirty information */ 1131 1109 if (pte_hw_dirty(pte)) 1132 1110 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+106
arch/arm64/include/asm/pkeys.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2023 Arm Ltd. 4 + * 5 + * Based on arch/x86/include/asm/pkeys.h 6 + */ 7 + 8 + #ifndef _ASM_ARM64_PKEYS_H 9 + #define _ASM_ARM64_PKEYS_H 10 + 11 + #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2) 12 + 13 + #define arch_max_pkey() 8 14 + 15 + int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 16 + unsigned long init_val); 17 + 18 + static inline bool arch_pkeys_enabled(void) 19 + { 20 + return system_supports_poe(); 21 + } 22 + 23 + static inline int vma_pkey(struct vm_area_struct *vma) 24 + { 25 + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; 26 + } 27 + 28 + static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, 29 + int prot, int pkey) 30 + { 31 + if (pkey != -1) 32 + return pkey; 33 + 34 + return vma_pkey(vma); 35 + } 36 + 37 + static inline int execute_only_pkey(struct mm_struct *mm) 38 + { 39 + // Execute-only mappings are handled by EPAN/FEAT_PAN3. 40 + return -1; 41 + } 42 + 43 + #define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map 44 + #define mm_set_pkey_allocated(mm, pkey) do { \ 45 + mm_pkey_allocation_map(mm) |= (1U << pkey); \ 46 + } while (0) 47 + #define mm_set_pkey_free(mm, pkey) do { \ 48 + mm_pkey_allocation_map(mm) &= ~(1U << pkey); \ 49 + } while (0) 50 + 51 + static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) 52 + { 53 + /* 54 + * "Allocated" pkeys are those that have been returned 55 + * from pkey_alloc() or pkey 0 which is allocated 56 + * implicitly when the mm is created. 57 + */ 58 + if (pkey < 0 || pkey >= arch_max_pkey()) 59 + return false; 60 + 61 + return mm_pkey_allocation_map(mm) & (1U << pkey); 62 + } 63 + 64 + /* 65 + * Returns a positive, 3-bit key on success, or -1 on failure. 66 + */ 67 + static inline int mm_pkey_alloc(struct mm_struct *mm) 68 + { 69 + /* 70 + * Note: this is the one and only place we make sure 71 + * that the pkey is valid as far as the hardware is 72 + * concerned. The rest of the kernel trusts that 73 + * only good, valid pkeys come out of here. 74 + */ 75 + u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0); 76 + int ret; 77 + 78 + if (!arch_pkeys_enabled()) 79 + return -1; 80 + 81 + /* 82 + * Are we out of pkeys? We must handle this specially 83 + * because ffz() behavior is undefined if there are no 84 + * zeros. 85 + */ 86 + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) 87 + return -1; 88 + 89 + ret = ffz(mm_pkey_allocation_map(mm)); 90 + 91 + mm_set_pkey_allocated(mm, ret); 92 + 93 + return ret; 94 + } 95 + 96 + static inline int mm_pkey_free(struct mm_struct *mm, int pkey) 97 + { 98 + if (!mm_pkey_is_allocated(mm, pkey)) 99 + return -EINVAL; 100 + 101 + mm_set_pkey_free(mm, pkey); 102 + 103 + return 0; 104 + } 105 + 106 + #endif /* _ASM_ARM64_PKEYS_H */
+33
arch/arm64/include/asm/por.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2023 Arm Ltd. 4 + */ 5 + 6 + #ifndef _ASM_ARM64_POR_H 7 + #define _ASM_ARM64_POR_H 8 + 9 + #define POR_BITS_PER_PKEY 4 10 + #define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf) 11 + 12 + static inline bool por_elx_allows_read(u64 por, u8 pkey) 13 + { 14 + u8 perm = POR_ELx_IDX(por, pkey); 15 + 16 + return perm & POE_R; 17 + } 18 + 19 + static inline bool por_elx_allows_write(u64 por, u8 pkey) 20 + { 21 + u8 perm = POR_ELx_IDX(por, pkey); 22 + 23 + return perm & POE_W; 24 + } 25 + 26 + static inline bool por_elx_allows_exec(u64 por, u8 pkey) 27 + { 28 + u8 perm = POR_ELx_IDX(por, pkey); 29 + 30 + return perm & POE_X; 31 + } 32 + 33 + #endif /* _ASM_ARM64_POR_H */
+1
arch/arm64/include/asm/processor.h
··· 184 184 u64 sctlr_user; 185 185 u64 svcr; 186 186 u64 tpidr2_el0; 187 + u64 por_el0; 187 188 }; 188 189 189 190 static inline unsigned int thread_get_vl(struct thread_struct *thread,
+3
arch/arm64/include/asm/sysreg.h
··· 1076 1076 #define POE_RXW UL(0x7) 1077 1077 #define POE_MASK UL(0xf) 1078 1078 1079 + /* Initial value for Permission Overlay Extension for EL0 */ 1080 + #define POR_EL0_INIT POE_RXW 1081 + 1079 1082 #define ARM64_FEATURE_FIELD_BITS 4 1080 1083 1081 1084 /* Defined for compatibility only, do not add new users. */
+1
arch/arm64/include/asm/traps.h
··· 25 25 void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); 26 26 void arm64_notify_segfault(unsigned long addr); 27 27 void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); 28 + void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey); 28 29 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); 29 30 void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); 30 31
+1
arch/arm64/include/asm/vncr_mapping.h
··· 52 52 #define VNCR_PIRE0_EL1 0x290 53 53 #define VNCR_PIRE0_EL2 0x298 54 54 #define VNCR_PIR_EL1 0x2A0 55 + #define VNCR_POR_EL1 0x2A8 55 56 #define VNCR_ICH_LR0_EL2 0x400 56 57 #define VNCR_ICH_LR1_EL2 0x408 57 58 #define VNCR_ICH_LR2_EL2 0x410
+1
arch/arm64/include/uapi/asm/hwcap.h
··· 122 122 #define HWCAP2_SME_SF8FMA (1UL << 60) 123 123 #define HWCAP2_SME_SF8DP4 (1UL << 61) 124 124 #define HWCAP2_SME_SF8DP2 (1UL << 62) 125 + #define HWCAP2_POE (1UL << 63) 125 126 126 127 #endif /* _UAPI__ASM_HWCAP_H */
+9
arch/arm64/include/uapi/asm/mman.h
··· 7 7 #define PROT_BTI 0x10 /* BTI guarded page */ 8 8 #define PROT_MTE 0x20 /* Normal Tagged mapping */ 9 9 10 + /* Override any generic PKEY permission defines */ 11 + #define PKEY_DISABLE_EXECUTE 0x4 12 + #define PKEY_DISABLE_READ 0x8 13 + #undef PKEY_ACCESS_MASK 14 + #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ 15 + PKEY_DISABLE_WRITE |\ 16 + PKEY_DISABLE_READ |\ 17 + PKEY_DISABLE_EXECUTE) 18 + 10 19 #endif /* ! _UAPI__ASM_MMAN_H */
+7
arch/arm64/include/uapi/asm/sigcontext.h
··· 98 98 __u64 esr; 99 99 }; 100 100 101 + #define POE_MAGIC 0x504f4530 102 + 103 + struct poe_context { 104 + struct _aarch64_ctx head; 105 + __u64 por_el0; 106 + }; 107 + 101 108 /* 102 109 * extra_context: describes extra space in the signal frame for 103 110 * additional structures that don't fit in sigcontext.__reserved[].
+23
arch/arm64/kernel/cpufeature.c
··· 466 466 }; 467 467 468 468 static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { 469 + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE), 470 + FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0), 469 471 ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), 470 472 ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), 471 473 ARM64_FTR_END, ··· 2350 2348 sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); 2351 2349 } 2352 2350 2351 + #ifdef CONFIG_ARM64_POE 2352 + static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused) 2353 + { 2354 + sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE); 2355 + sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE); 2356 + } 2357 + #endif 2358 + 2353 2359 /* Internal helper functions to match cpu capability type */ 2354 2360 static bool 2355 2361 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) ··· 2880 2870 .matches = has_nv1, 2881 2871 ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1) 2882 2872 }, 2873 + #ifdef CONFIG_ARM64_POE 2874 + { 2875 + .desc = "Stage-1 Permission Overlay Extension (S1POE)", 2876 + .capability = ARM64_HAS_S1POE, 2877 + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, 2878 + .matches = has_cpuid_feature, 2879 + .cpu_enable = cpu_enable_poe, 2880 + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP) 2881 + }, 2882 + #endif 2883 2883 {}, 2884 2884 }; 2885 2885 ··· 3054 3034 HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), 3055 3035 HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), 3056 3036 HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), 3037 + #ifdef CONFIG_ARM64_POE 3038 + HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE), 3039 + #endif 3057 3040 {}, 3058 3041 }; 3059 3042
+1
arch/arm64/kernel/cpuinfo.c
··· 143 143 [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma", 144 144 [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4", 145 145 [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2", 146 + [KERNEL_HWCAP_POE] = "poe", 146 147 }; 147 148 148 149 #ifdef CONFIG_COMPAT
+24
arch/arm64/kernel/process.c
··· 271 271 clear_thread_flag(TIF_TAGGED_ADDR); 272 272 } 273 273 274 + static void flush_poe(void) 275 + { 276 + if (!system_supports_poe()) 277 + return; 278 + 279 + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); 280 + } 281 + 274 282 void flush_thread(void) 275 283 { 276 284 fpsimd_flush_thread(); 277 285 tls_thread_flush(); 278 286 flush_ptrace_hw_breakpoint(current); 279 287 flush_tagged_addr_state(); 288 + flush_poe(); 280 289 } 281 290 282 291 void arch_release_task_struct(struct task_struct *tsk) ··· 379 370 *task_user_tls(p) = read_sysreg(tpidr_el0); 380 371 if (system_supports_tpidr2()) 381 372 p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0); 373 + 374 + if (system_supports_poe()) 375 + p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); 382 376 383 377 if (stack_start) { 384 378 if (is_compat_thread(task_thread_info(p))) ··· 507 495 preempt_enable(); 508 496 } 509 497 498 + static void permission_overlay_switch(struct task_struct *next) 499 + { 500 + if (!system_supports_poe()) 501 + return; 502 + 503 + current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); 504 + if (current->thread.por_el0 != next->thread.por_el0) { 505 + write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); 506 + } 507 + } 508 + 510 509 /* 511 510 * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore 512 511 * this function must be called with preemption disabled and the update to ··· 553 530 ssbs_thread_switch(next); 554 531 erratum_1418040_thread_switch(next); 555 532 ptrauth_thread_switch_user(next); 533 + permission_overlay_switch(next); 556 534 557 535 /* 558 536 * Complete any pending TLB or cache maintenance on this CPU in case
+46
arch/arm64/kernel/ptrace.c
··· 1440 1440 } 1441 1441 #endif 1442 1442 1443 + #ifdef CONFIG_ARM64_POE 1444 + static int poe_get(struct task_struct *target, 1445 + const struct user_regset *regset, 1446 + struct membuf to) 1447 + { 1448 + if (!system_supports_poe()) 1449 + return -EINVAL; 1450 + 1451 + return membuf_write(&to, &target->thread.por_el0, 1452 + sizeof(target->thread.por_el0)); 1453 + } 1454 + 1455 + static int poe_set(struct task_struct *target, const struct 1456 + user_regset *regset, unsigned int pos, 1457 + unsigned int count, const void *kbuf, const 1458 + void __user *ubuf) 1459 + { 1460 + int ret; 1461 + long ctrl; 1462 + 1463 + if (!system_supports_poe()) 1464 + return -EINVAL; 1465 + 1466 + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); 1467 + if (ret) 1468 + return ret; 1469 + 1470 + target->thread.por_el0 = ctrl; 1471 + 1472 + return 0; 1473 + } 1474 + #endif 1475 + 1443 1476 enum aarch64_regset { 1444 1477 REGSET_GPR, 1445 1478 REGSET_FPR, ··· 1501 1468 #endif 1502 1469 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI 1503 1470 REGSET_TAGGED_ADDR_CTRL, 1471 + #endif 1472 + #ifdef CONFIG_ARM64_POE 1473 + REGSET_POE 1504 1474 #endif 1505 1475 }; 1506 1476 ··· 1662 1626 .align = sizeof(long), 1663 1627 .regset_get = tagged_addr_ctrl_get, 1664 1628 .set = tagged_addr_ctrl_set, 1629 + }, 1630 + #endif 1631 + #ifdef CONFIG_ARM64_POE 1632 + [REGSET_POE] = { 1633 + .core_note_type = NT_ARM_POE, 1634 + .n = 1, 1635 + .size = sizeof(long), 1636 + .align = sizeof(long), 1637 + .regset_get = poe_get, 1638 + .set = poe_set, 1665 1639 }, 1666 1640 #endif 1667 1641 };
+62
arch/arm64/kernel/signal.c
··· 61 61 unsigned long za_offset; 62 62 unsigned long zt_offset; 63 63 unsigned long fpmr_offset; 64 + unsigned long poe_offset; 64 65 unsigned long extra_offset; 65 66 unsigned long end_offset; 66 67 }; ··· 186 185 u32 zt_size; 187 186 struct fpmr_context __user *fpmr; 188 187 u32 fpmr_size; 188 + struct poe_context __user *poe; 189 + u32 poe_size; 189 190 }; 190 191 191 192 static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) ··· 257 254 __get_user_error(fpmr, &user->fpmr->fpmr, err); 258 255 if (!err) 259 256 write_sysreg_s(fpmr, SYS_FPMR); 257 + 258 + return err; 259 + } 260 + 261 + static int preserve_poe_context(struct poe_context __user *ctx) 262 + { 263 + int err = 0; 264 + 265 + __put_user_error(POE_MAGIC, &ctx->head.magic, err); 266 + __put_user_error(sizeof(*ctx), &ctx->head.size, err); 267 + __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err); 268 + 269 + return err; 270 + } 271 + 272 + static int restore_poe_context(struct user_ctxs *user) 273 + { 274 + u64 por_el0; 275 + int err = 0; 276 + 277 + if (user->poe_size != sizeof(*user->poe)) 278 + return -EINVAL; 279 + 280 + __get_user_error(por_el0, &(user->poe->por_el0), err); 281 + if (!err) 282 + write_sysreg_s(por_el0, SYS_POR_EL0); 260 283 261 284 return err; 262 285 } ··· 650 621 user->za = NULL; 651 622 user->zt = NULL; 652 623 user->fpmr = NULL; 624 + user->poe = NULL; 653 625 654 626 if (!IS_ALIGNED((unsigned long)base, 16)) 655 627 goto invalid; ··· 699 669 700 670 case ESR_MAGIC: 701 671 /* ignore */ 672 + break; 673 + 674 + case POE_MAGIC: 675 + if (!system_supports_poe()) 676 + goto invalid; 677 + 678 + if (user->poe) 679 + goto invalid; 680 + 681 + user->poe = (struct poe_context __user *)head; 682 + user->poe_size = size; 702 683 break; 703 684 704 685 case SVE_MAGIC: ··· 898 857 if (err == 0 && system_supports_sme2() && user.zt) 899 858 err = restore_zt_context(&user); 900 859 860 + if (err == 0 && system_supports_poe() && user.poe) 861 + err = restore_poe_context(&user); 862 + 901 863 return err; 902 864 } 903 865 ··· 1024 980 return err; 1025 981 } 1026 982 983 + if (system_supports_poe()) { 984 + err = sigframe_alloc(user, &user->poe_offset, 985 + sizeof(struct poe_context)); 986 + if (err) 987 + return err; 988 + } 989 + 1027 990 return sigframe_alloc_end(user); 1028 991 } 1029 992 ··· 1092 1041 apply_user_offset(user, user->fpmr_offset); 1093 1042 err |= preserve_fpmr_context(fpmr_ctx); 1094 1043 } 1044 + 1045 + if (system_supports_poe() && err == 0 && user->poe_offset) { 1046 + struct poe_context __user *poe_ctx = 1047 + apply_user_offset(user, user->poe_offset); 1048 + 1049 + err |= preserve_poe_context(poe_ctx); 1050 + } 1051 + 1095 1052 1096 1053 /* ZA state if present */ 1097 1054 if (system_supports_sme() && err == 0 && user->za_offset) { ··· 1236 1177 SVCR_SM_MASK); 1237 1178 sme_smstop(); 1238 1179 } 1180 + 1181 + if (system_supports_poe()) 1182 + write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); 1239 1183 1240 1184 if (ka->sa.sa_flags & SA_RESTORER) 1241 1185 sigtramp = ka->sa.sa_restorer;
+6
arch/arm64/kernel/traps.c
··· 273 273 force_sig_fault(signo, code, (void __user *)far); 274 274 } 275 275 276 + void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey) 277 + { 278 + arm64_show_signal(SIGSEGV, str); 279 + force_sig_pkuerr((void __user *)far, pkey); 280 + } 281 + 276 282 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, 277 283 const char *str) 278 284 {
+4 -1
arch/arm64/kvm/hyp/include/hyp/fault.h
··· 14 14 15 15 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 16 16 { 17 + int ret; 17 18 u64 par, tmp; 18 19 19 20 /* ··· 28 27 * saved the guest context yet, and we may return early... 29 28 */ 30 29 par = read_sysreg_par(); 31 - if (!__kvm_at("s1e1r", far)) 30 + ret = system_supports_poe() ? __kvm_at(OP_AT_S1E1A, far) : 31 + __kvm_at(OP_AT_S1E1R, far); 32 + if (!ret) 32 33 tmp = read_sysreg_par(); 33 34 else 34 35 tmp = SYS_PAR_EL1_F; /* back to the guest */
+27
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
··· 16 16 #include <asm/kvm_hyp.h> 17 17 #include <asm/kvm_mmu.h> 18 18 19 + static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt); 20 + 19 21 static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) 20 22 { 21 23 ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); 24 + 25 + // POR_EL0 can affect uaccess, so must be saved/restored early. 26 + if (ctxt_has_s1poe(ctxt)) 27 + ctxt_sys_reg(ctxt, POR_EL0) = read_sysreg_s(SYS_POR_EL0); 22 28 } 23 29 24 30 static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ··· 72 66 return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP); 73 67 } 74 68 69 + static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt) 70 + { 71 + struct kvm_vcpu *vcpu; 72 + 73 + if (!system_supports_poe()) 74 + return false; 75 + 76 + vcpu = ctxt_to_vcpu(ctxt); 77 + return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP); 78 + } 79 + 75 80 static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) 76 81 { 77 82 ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); ··· 97 80 ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); 98 81 ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); 99 82 } 83 + 84 + if (ctxt_has_s1poe(ctxt)) 85 + ctxt_sys_reg(ctxt, POR_EL1) = read_sysreg_el1(SYS_POR); 100 86 } 101 87 ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); 102 88 ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); ··· 140 120 static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) 141 121 { 142 122 write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); 123 + 124 + // POR_EL0 can affect uaccess, so must be saved/restored early. 125 + if (ctxt_has_s1poe(ctxt)) 126 + write_sysreg_s(ctxt_sys_reg(ctxt, POR_EL0), SYS_POR_EL0); 143 127 } 144 128 145 129 static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) ··· 182 158 write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); 183 159 write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); 184 160 } 161 + 162 + if (ctxt_has_s1poe(ctxt)) 163 + write_sysreg_el1(ctxt_sys_reg(ctxt, POR_EL1), SYS_POR); 185 164 } 186 165 write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); 187 166 write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
+22 -3
arch/arm64/kvm/sys_regs.c
··· 1557 1557 case SYS_ID_AA64MMFR2_EL1: 1558 1558 val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; 1559 1559 break; 1560 + case SYS_ID_AA64MMFR3_EL1: 1561 + val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE; 1562 + break; 1560 1563 case SYS_ID_MMFR4_EL1: 1561 1564 val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); 1562 1565 break; ··· 2259 2256 return true; 2260 2257 } 2261 2258 2259 + static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu, 2260 + const struct sys_reg_desc *rd) 2261 + { 2262 + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) 2263 + return 0; 2264 + 2265 + return REG_HIDDEN; 2266 + } 2267 + 2262 2268 /* 2263 2269 * Architected system registers. 2264 2270 * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 ··· 2431 2419 ID_AA64MMFR2_EL1_IDS | 2432 2420 ID_AA64MMFR2_EL1_NV | 2433 2421 ID_AA64MMFR2_EL1_CCIDX)), 2434 - ID_SANITISED(ID_AA64MMFR3_EL1), 2422 + ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX | 2423 + ID_AA64MMFR3_EL1_S1POE)), 2435 2424 ID_SANITISED(ID_AA64MMFR4_EL1), 2436 2425 ID_UNALLOCATED(7,5), 2437 2426 ID_UNALLOCATED(7,6), ··· 2506 2493 { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, 2507 2494 { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, 2508 2495 { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, 2496 + { SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1, 2497 + .visibility = s1poe_visibility }, 2509 2498 { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, 2510 2499 2511 2500 { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, ··· 2594 2579 .access = access_pmovs, .reg = PMOVSSET_EL0, 2595 2580 .get_user = get_pmreg, .set_user = set_pmreg }, 2596 2581 2582 + { SYS_DESC(SYS_POR_EL0), NULL, reset_unknown, POR_EL0, 2583 + .visibility = s1poe_visibility }, 2597 2584 { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, 2598 2585 { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, 2599 2586 { SYS_DESC(SYS_TPIDR2_EL0), undef_access }, ··· 4586 4569 kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 | 4587 4570 HFGxTR_EL2_nMAIR2_EL1 | 4588 4571 HFGxTR_EL2_nS2POR_EL1 | 4589 - HFGxTR_EL2_nPOR_EL1 | 4590 - HFGxTR_EL2_nPOR_EL0 | 4591 4572 HFGxTR_EL2_nACCDATA_EL1 | 4592 4573 HFGxTR_EL2_nSMPRI_EL1_MASK | 4593 4574 HFGxTR_EL2_nTPIDR2_EL0_MASK); ··· 4619 4604 if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) 4620 4605 kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | 4621 4606 HFGxTR_EL2_nPIR_EL1); 4607 + 4608 + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) 4609 + kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 | 4610 + HFGxTR_EL2_nPOR_EL0); 4622 4611 4623 4612 if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) 4624 4613 kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
+54 -1
arch/arm64/mm/fault.c
··· 23 23 #include <linux/sched/debug.h> 24 24 #include <linux/highmem.h> 25 25 #include <linux/perf_event.h> 26 + #include <linux/pkeys.h> 26 27 #include <linux/preempt.h> 27 28 #include <linux/hugetlb.h> 28 29 ··· 487 486 } 488 487 } 489 488 489 + static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, 490 + unsigned int mm_flags) 491 + { 492 + unsigned long iss2 = ESR_ELx_ISS2(esr); 493 + 494 + if (!system_supports_poe()) 495 + return false; 496 + 497 + if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) 498 + return true; 499 + 500 + return !arch_vma_access_permitted(vma, 501 + mm_flags & FAULT_FLAG_WRITE, 502 + mm_flags & FAULT_FLAG_INSTRUCTION, 503 + false); 504 + } 505 + 490 506 static bool is_el0_instruction_abort(unsigned long esr) 491 507 { 492 508 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; ··· 529 511 unsigned long addr = untagged_addr(far); 530 512 struct vm_area_struct *vma; 531 513 int si_code; 514 + int pkey = -1; 532 515 533 516 if (kprobe_page_fault(regs, esr)) 534 517 return 0; ··· 594 575 count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 595 576 goto bad_area; 596 577 } 578 + 579 + if (fault_from_pkey(esr, vma, mm_flags)) { 580 + pkey = vma_pkey(vma); 581 + vma_end_read(vma); 582 + fault = 0; 583 + si_code = SEGV_PKUERR; 584 + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); 585 + goto bad_area; 586 + } 587 + 597 588 fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); 598 589 if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) 599 590 vma_end_read(vma); ··· 639 610 goto bad_area; 640 611 } 641 612 613 + if (fault_from_pkey(esr, vma, mm_flags)) { 614 + pkey = vma_pkey(vma); 615 + mmap_read_unlock(mm); 616 + fault = 0; 617 + si_code = SEGV_PKUERR; 618 + goto bad_area; 619 + } 620 + 642 621 fault = handle_mm_fault(vma, addr, mm_flags, regs); 622 + 643 623 /* Quick path to respond to signals */ 644 624 if (fault_signal_pending(fault, regs)) { 645 625 if (!user_mode(regs)) ··· 707 669 708 670 arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); 709 671 } else { 672 + /* 673 + * The pkey value that we return to userspace can be different 674 + * from the pkey that caused the fault. 675 + * 676 + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); 677 + * 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page 678 + * 3. T1 : faults... 679 + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); 680 + * 5. T1 : enters fault handler, takes mmap_lock, etc... 681 + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 682 + * faulted on a pte with its pkey=4. 683 + */ 710 684 /* Something tried to access memory that out of memory map */ 711 - arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); 685 + if (si_code == SEGV_PKUERR) 686 + arm64_force_sig_fault_pkey(far, inf->name, pkey); 687 + else 688 + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); 712 689 } 713 690 714 691 return 0;
+11
arch/arm64/mm/mmap.c
··· 102 102 if (vm_flags & VM_MTE) 103 103 prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED); 104 104 105 + #ifdef CONFIG_ARCH_HAS_PKEYS 106 + if (system_supports_poe()) { 107 + if (vm_flags & VM_PKEY_BIT0) 108 + prot |= PTE_PO_IDX_0; 109 + if (vm_flags & VM_PKEY_BIT1) 110 + prot |= PTE_PO_IDX_1; 111 + if (vm_flags & VM_PKEY_BIT2) 112 + prot |= PTE_PO_IDX_2; 113 + } 114 + #endif 115 + 105 116 return __pgprot(prot); 106 117 } 107 118 EXPORT_SYMBOL(vm_get_page_prot);
+45
arch/arm64/mm/mmu.c
··· 25 25 #include <linux/vmalloc.h> 26 26 #include <linux/set_memory.h> 27 27 #include <linux/kfence.h> 28 + #include <linux/pkeys.h> 28 29 29 30 #include <asm/barrier.h> 30 31 #include <asm/cputype.h> ··· 1550 1549 1551 1550 cpu_uninstall_idmap(); 1552 1551 } 1552 + 1553 + #ifdef CONFIG_ARCH_HAS_PKEYS 1554 + int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) 1555 + { 1556 + u64 new_por = POE_RXW; 1557 + u64 old_por; 1558 + u64 pkey_shift; 1559 + 1560 + if (!system_supports_poe()) 1561 + return -ENOSPC; 1562 + 1563 + /* 1564 + * This code should only be called with valid 'pkey' 1565 + * values originating from in-kernel users. Complain 1566 + * if a bad value is observed. 1567 + */ 1568 + if (WARN_ON_ONCE(pkey >= arch_max_pkey())) 1569 + return -EINVAL; 1570 + 1571 + /* Set the bits we need in POR: */ 1572 + new_por = POE_RXW; 1573 + if (init_val & PKEY_DISABLE_WRITE) 1574 + new_por &= ~POE_W; 1575 + if (init_val & PKEY_DISABLE_ACCESS) 1576 + new_por &= ~POE_RW; 1577 + if (init_val & PKEY_DISABLE_READ) 1578 + new_por &= ~POE_R; 1579 + if (init_val & PKEY_DISABLE_EXECUTE) 1580 + new_por &= ~POE_X; 1581 + 1582 + /* Shift the bits in to the correct place in POR for pkey: */ 1583 + pkey_shift = pkey * POR_BITS_PER_PKEY; 1584 + new_por <<= pkey_shift; 1585 + 1586 + /* Get old POR and mask off any old bits in place: */ 1587 + old_por = read_sysreg_s(SYS_POR_EL0); 1588 + old_por &= ~(POE_MASK << pkey_shift); 1589 + 1590 + /* Write old part along with new part: */ 1591 + write_sysreg_s(old_por | new_por, SYS_POR_EL0); 1592 + 1593 + return 0; 1594 + } 1595 + #endif
+1
arch/arm64/tools/cpucaps
··· 45 45 HAS_NESTED_VIRT 46 46 HAS_PAN 47 47 HAS_S1PIE 48 + HAS_S1POE 48 49 HAS_RAS_EXTN 49 50 HAS_RNG 50 51 HAS_SB
+4
arch/powerpc/Kconfig
··· 1026 1026 1027 1027 If unsure, say y. 1028 1028 1029 + config ARCH_PKEY_BITS 1030 + int 1031 + default 5 1032 + 1029 1033 config PPC_SECURE_BOOT 1030 1034 prompt "Enable secure boot support" 1031 1035 bool
+4
arch/x86/Kconfig
··· 1889 1889 1890 1890 If unsure, say y. 1891 1891 1892 + config ARCH_PKEY_BITS 1893 + int 1894 + default 4 1895 + 1892 1896 choice 1893 1897 prompt "TSX enable mode" 1894 1898 depends on CPU_SUP_INTEL
+2
fs/proc/task_mmu.c
··· 976 976 [ilog2(VM_PKEY_BIT0)] = "", 977 977 [ilog2(VM_PKEY_BIT1)] = "", 978 978 [ilog2(VM_PKEY_BIT2)] = "", 979 + #if VM_PKEY_BIT3 979 980 [ilog2(VM_PKEY_BIT3)] = "", 981 + #endif 980 982 #if VM_PKEY_BIT4 981 983 [ilog2(VM_PKEY_BIT4)] = "", 982 984 #endif
+12 -8
include/linux/mm.h
··· 330 330 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ 331 331 332 332 #ifdef CONFIG_ARCH_HAS_PKEYS 333 - # define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 334 - # define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */ 335 - # define VM_PKEY_BIT1 VM_HIGH_ARCH_1 /* on x86 and 5-bit value on ppc64 */ 336 - # define VM_PKEY_BIT2 VM_HIGH_ARCH_2 337 - # define VM_PKEY_BIT3 VM_HIGH_ARCH_3 338 - #ifdef CONFIG_PPC 333 + # define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 334 + # define VM_PKEY_BIT0 VM_HIGH_ARCH_0 335 + # define VM_PKEY_BIT1 VM_HIGH_ARCH_1 336 + # define VM_PKEY_BIT2 VM_HIGH_ARCH_2 337 + #if CONFIG_ARCH_PKEY_BITS > 3 338 + # define VM_PKEY_BIT3 VM_HIGH_ARCH_3 339 + #else 340 + # define VM_PKEY_BIT3 0 341 + #endif 342 + #if CONFIG_ARCH_PKEY_BITS > 4 339 343 # define VM_PKEY_BIT4 VM_HIGH_ARCH_4 340 344 #else 341 345 # define VM_PKEY_BIT4 0 ··· 378 374 #endif 379 375 380 376 #if defined(CONFIG_ARM64_MTE) 381 - # define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ 382 - # define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ 377 + # define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */ 378 + # define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */ 383 379 #else 384 380 # define VM_MTE VM_NONE 385 381 # define VM_MTE_ALLOWED VM_NONE
+1
include/uapi/linux/elf.h
··· 441 441 #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ 442 442 #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ 443 443 #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ 444 + #define NT_ARM_POE 0x40f /* ARM POE registers */ 444 445 #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ 445 446 #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ 446 447 #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+14
tools/testing/selftests/arm64/abi/hwcap.c
··· 156 156 asm volatile(".inst 0x0ee0e000" : : : ); 157 157 } 158 158 159 + static void poe_sigill(void) 160 + { 161 + /* mrs x0, POR_EL0 */ 162 + asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0"); 163 + } 164 + 159 165 static void rng_sigill(void) 160 166 { 161 167 asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); ··· 606 600 .hwcap_bit = HWCAP_PMULL, 607 601 .cpuinfo = "pmull", 608 602 .sigill_fn = pmull_sigill, 603 + }, 604 + { 605 + .name = "POE", 606 + .at_hwcap = AT_HWCAP2, 607 + .hwcap_bit = HWCAP2_POE, 608 + .cpuinfo = "poe", 609 + .sigill_fn = poe_sigill, 610 + .sigill_reliable = true, 609 611 }, 610 612 { 611 613 .name = "RNG",
+1
tools/testing/selftests/arm64/signal/.gitignore
··· 2 2 mangle_* 3 3 fake_sigreturn_* 4 4 fpmr_* 5 + poe_* 5 6 sme_* 6 7 ssve_* 7 8 sve_*
+86
tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2023 Arm Limited 4 + * 5 + * Verify that the POR_EL0 register context in signal frames is set up as 6 + * expected. 7 + */ 8 + 9 + #include <signal.h> 10 + #include <ucontext.h> 11 + #include <sys/auxv.h> 12 + #include <sys/prctl.h> 13 + #include <unistd.h> 14 + #include <asm/sigcontext.h> 15 + 16 + #include "test_signals_utils.h" 17 + #include "testcases.h" 18 + 19 + static union { 20 + ucontext_t uc; 21 + char buf[1024 * 128]; 22 + } context; 23 + 24 + #define SYS_POR_EL0 "S3_3_C10_C2_4" 25 + 26 + static uint64_t get_por_el0(void) 27 + { 28 + uint64_t val; 29 + 30 + asm volatile( 31 + "mrs %0, " SYS_POR_EL0 "\n" 32 + : "=r"(val) 33 + : 34 + : ); 35 + 36 + return val; 37 + } 38 + 39 + int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc) 40 + { 41 + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); 42 + struct poe_context *poe_ctx; 43 + size_t offset; 44 + bool in_sigframe; 45 + bool have_poe; 46 + __u64 orig_poe; 47 + 48 + have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE; 49 + if (have_poe) 50 + orig_poe = get_por_el0(); 51 + 52 + if (!get_current_context(td, &context.uc, sizeof(context))) 53 + return 1; 54 + 55 + poe_ctx = (struct poe_context *) 56 + get_header(head, POE_MAGIC, td->live_sz, &offset); 57 + 58 + in_sigframe = poe_ctx != NULL; 59 + 60 + fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n", 61 + in_sigframe ? "present" : "absent", 62 + have_poe ? "with" : "without"); 63 + 64 + td->pass = (in_sigframe == have_poe); 65 + 66 + /* 67 + * Check that the value we read back was the one present at 68 + * the time that the signal was triggered. 69 + */ 70 + if (have_poe && poe_ctx) { 71 + if (poe_ctx->por_el0 != orig_poe) { 72 + fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n", 73 + poe_ctx->por_el0, orig_poe); 74 + td->pass = false; 75 + } 76 + } 77 + 78 + return 0; 79 + } 80 + 81 + struct tdescr tde = { 82 + .name = "POR_EL0", 83 + .descr = "Validate that POR_EL0 is present as expected", 84 + .timeout = 3, 85 + .run = poe_present, 86 + };
+4 -23
tools/testing/selftests/arm64/signal/testcases/testcases.c
··· 6 6 7 7 #include "testcases.h" 8 8 9 - struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, 10 - size_t resv_sz, size_t *offset) 11 - { 12 - size_t offs = 0; 13 - struct _aarch64_ctx *found = NULL; 14 - 15 - if (!head || resv_sz < HDR_SZ) 16 - return found; 17 - 18 - while (offs <= resv_sz - HDR_SZ && 19 - head->magic != magic && head->magic) { 20 - offs += head->size; 21 - head = GET_RESV_NEXT_HEAD(head); 22 - } 23 - if (head->magic == magic) { 24 - found = head; 25 - if (offset) 26 - *offset = offs; 27 - } 28 - 29 - return found; 30 - } 31 - 32 9 bool validate_extra_context(struct extra_context *extra, char **err, 33 10 void **extra_data, size_t *extra_size) 34 11 { ··· 160 183 case ESR_MAGIC: 161 184 if (head->size != sizeof(struct esr_context)) 162 185 *err = "Bad size for esr_context"; 186 + break; 187 + case POE_MAGIC: 188 + if (head->size != sizeof(struct poe_context)) 189 + *err = "Bad size for poe_context"; 163 190 break; 164 191 case TPIDR2_MAGIC: 165 192 if (head->size != sizeof(struct tpidr2_context))
+26 -2
tools/testing/selftests/arm64/signal/testcases/testcases.h
··· 26 26 #define HDR_SZ \ 27 27 sizeof(struct _aarch64_ctx) 28 28 29 + #define GET_UC_RESV_HEAD(uc) \ 30 + (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved)) 31 + 29 32 #define GET_SF_RESV_HEAD(sf) \ 30 33 (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) 31 34 ··· 91 88 92 89 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); 93 90 94 - struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, 95 - size_t resv_sz, size_t *offset); 91 + static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, 92 + size_t resv_sz, size_t *offset) 93 + { 94 + size_t offs = 0; 95 + struct _aarch64_ctx *found = NULL; 96 + 97 + if (!head || resv_sz < HDR_SZ) 98 + return found; 99 + 100 + while (offs <= resv_sz - HDR_SZ && 101 + head->magic != magic && head->magic) { 102 + offs += head->size; 103 + head = GET_RESV_NEXT_HEAD(head); 104 + } 105 + if (head->magic == magic) { 106 + found = head; 107 + if (offset) 108 + *offset = offs; 109 + } 110 + 111 + return found; 112 + } 113 + 96 114 97 115 static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, 98 116 size_t resv_sz,
+14
tools/testing/selftests/kvm/aarch64/get-reg-list.c
··· 40 40 ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 41 41 4, 42 42 1 43 + }, 44 + { 45 + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ 46 + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 47 + 16, 48 + 1 49 + }, 50 + { 51 + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ 52 + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 53 + 16, 54 + 1 43 55 } 44 56 }; 45 57 ··· 480 468 ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ 481 469 ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ 482 470 ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ 471 + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ 483 472 ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ 484 473 ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ 485 474 ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ ··· 488 475 ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ 489 476 ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ 490 477 ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ 478 + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ 491 479 ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ 492 480 ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ 493 481 ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+1 -1
tools/testing/selftests/mm/Makefile
··· 104 104 endif 105 105 else 106 106 107 - ifneq (,$(findstring $(ARCH),powerpc)) 107 + ifneq (,$(filter $(ARCH),arm64 powerpc)) 108 108 TEST_GEN_FILES += protection_keys 109 109 endif 110 110
+139
tools/testing/selftests/mm/pkey-arm64.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2023 Arm Ltd. 4 + */ 5 + 6 + #ifndef _PKEYS_ARM64_H 7 + #define _PKEYS_ARM64_H 8 + 9 + #include "vm_util.h" 10 + /* for signal frame parsing */ 11 + #include "../arm64/signal/testcases/testcases.h" 12 + 13 + #ifndef SYS_mprotect_key 14 + # define SYS_mprotect_key 288 15 + #endif 16 + #ifndef SYS_pkey_alloc 17 + # define SYS_pkey_alloc 289 18 + # define SYS_pkey_free 290 19 + #endif 20 + #define MCONTEXT_IP(mc) mc.pc 21 + #define MCONTEXT_TRAPNO(mc) -1 22 + 23 + #define PKEY_MASK 0xf 24 + 25 + #define POE_NONE 0x0 26 + #define POE_X 0x2 27 + #define POE_RX 0x3 28 + #define POE_RWX 0x7 29 + 30 + #define NR_PKEYS 8 31 + #define NR_RESERVED_PKEYS 1 /* pkey-0 */ 32 + 33 + #define PKEY_ALLOW_ALL 0x77777777 34 + 35 + #define PKEY_BITS_PER_PKEY 4 36 + #define PAGE_SIZE sysconf(_SC_PAGESIZE) 37 + #undef HPAGE_SIZE 38 + #define HPAGE_SIZE default_huge_page_size() 39 + 40 + /* 4-byte instructions * 16384 = 64K page */ 41 + #define __page_o_noops() asm(".rept 16384 ; nop; .endr") 42 + 43 + static inline u64 __read_pkey_reg(void) 44 + { 45 + u64 pkey_reg = 0; 46 + 47 + // POR_EL0 48 + asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg)); 49 + 50 + return pkey_reg; 51 + } 52 + 53 + static inline void __write_pkey_reg(u64 pkey_reg) 54 + { 55 + u64 por = pkey_reg; 56 + 57 + dprintf4("%s() changing %016llx to %016llx\n", 58 + __func__, __read_pkey_reg(), pkey_reg); 59 + 60 + // POR_EL0 61 + asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :); 62 + 63 + dprintf4("%s() pkey register after changing %016llx to %016llx\n", 64 + __func__, __read_pkey_reg(), pkey_reg); 65 + } 66 + 67 + static inline int cpu_has_pkeys(void) 68 + { 69 + /* No simple way to determine this */ 70 + return 1; 71 + } 72 + 73 + static inline u32 pkey_bit_position(int pkey) 74 + { 75 + return pkey * PKEY_BITS_PER_PKEY; 76 + } 77 + 78 + static inline int get_arch_reserved_keys(void) 79 + { 80 + return NR_RESERVED_PKEYS; 81 + } 82 + 83 + void expect_fault_on_read_execonly_key(void *p1, int pkey) 84 + { 85 + } 86 + 87 + void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) 88 + { 89 + return PTR_ERR_ENOTSUP; 90 + } 91 + 92 + #define set_pkey_bits set_pkey_bits 93 + static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) 94 + { 95 + u32 shift = pkey_bit_position(pkey); 96 + u64 new_val = POE_RWX; 97 + 98 + /* mask out bits from pkey in old value */ 99 + reg &= ~((u64)PKEY_MASK << shift); 100 + 101 + if (flags & PKEY_DISABLE_ACCESS) 102 + new_val = POE_X; 103 + else if (flags & PKEY_DISABLE_WRITE) 104 + new_val = POE_RX; 105 + 106 + /* OR in new bits for pkey */ 107 + reg |= new_val << shift; 108 + 109 + return reg; 110 + } 111 + 112 + #define get_pkey_bits get_pkey_bits 113 + static inline u64 get_pkey_bits(u64 reg, int pkey) 114 + { 115 + u32 shift = pkey_bit_position(pkey); 116 + /* 117 + * shift down the relevant bits to the lowest four, then 118 + * mask off all the other higher bits 119 + */ 120 + u32 perm = (reg >> shift) & PKEY_MASK; 121 + 122 + if (perm == POE_X) 123 + return PKEY_DISABLE_ACCESS; 124 + if (perm == POE_RX) 125 + return PKEY_DISABLE_WRITE; 126 + return 0; 127 + } 128 + 129 + static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) 130 + { 131 + struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); 132 + struct poe_context *poe_ctx = 133 + (struct poe_context *) get_header(ctx, POE_MAGIC, 134 + sizeof(uctxt->uc_mcontext), NULL); 135 + if (poe_ctx) 136 + poe_ctx->por_el0 = pkey; 137 + } 138 + 139 + #endif /* _PKEYS_ARM64_H */
+8
tools/testing/selftests/mm/pkey-helpers.h
··· 91 91 #include "pkey-x86.h" 92 92 #elif defined(__powerpc64__) /* arch */ 93 93 #include "pkey-powerpc.h" 94 + #elif defined(__aarch64__) /* arch */ 95 + #include "pkey-arm64.h" 94 96 #else /* arch */ 95 97 #error Architecture not supported 96 98 #endif /* arch */ 97 99 100 + #ifndef PKEY_MASK 98 101 #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) 102 + #endif 99 103 104 + #ifndef set_pkey_bits 100 105 static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) 101 106 { 102 107 u32 shift = pkey_bit_position(pkey); ··· 111 106 reg |= (flags & PKEY_MASK) << shift; 112 107 return reg; 113 108 } 109 + #endif 114 110 111 + #ifndef get_pkey_bits 115 112 static inline u64 get_pkey_bits(u64 reg, int pkey) 116 113 { 117 114 u32 shift = pkey_bit_position(pkey); ··· 123 116 */ 124 117 return ((reg >> shift) & PKEY_MASK); 125 118 } 119 + #endif 126 120 127 121 extern u64 shadow_pkey_reg; 128 122
+3
tools/testing/selftests/mm/pkey-powerpc.h
··· 8 8 # define SYS_pkey_free 385 9 9 #endif 10 10 #define REG_IP_IDX PT_NIP 11 + #define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX] 12 + #define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO] 11 13 #define REG_TRAPNO PT_TRAP 14 + #define MCONTEXT_FPREGS 12 15 #define gregs gp_regs 13 16 #define fpregs fp_regs 14 17 #define si_pkey_offset 0x20
+4
tools/testing/selftests/mm/pkey-x86.h
··· 15 15 16 16 #endif 17 17 18 + #define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX] 19 + #define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO] 20 + #define MCONTEXT_FPREGS 21 + 18 22 #ifndef PKEY_DISABLE_ACCESS 19 23 # define PKEY_DISABLE_ACCESS 0x1 20 24 #endif
+98 -11
tools/testing/selftests/mm/protection_keys.c
··· 147 147 * will then fault, which makes sure that the fault code handles 148 148 * execute-only memory properly. 149 149 */ 150 - #ifdef __powerpc64__ 150 + #if defined(__powerpc64__) || defined(__aarch64__) 151 151 /* This way, both 4K and 64K alignment are maintained */ 152 152 __attribute__((__aligned__(65536))) 153 153 #else ··· 212 212 unsigned long syscall_flags = 0; 213 213 int ret; 214 214 int pkey_rights; 215 - u64 orig_pkey_reg = read_pkey_reg(); 216 215 217 216 dprintf1("START->%s(%d, 0x%x)\n", __func__, 218 217 pkey, flags); ··· 241 242 242 243 dprintf1("%s(%d) pkey_reg: 0x%016llx\n", 243 244 __func__, pkey, read_pkey_reg()); 244 - if (flags) 245 - pkey_assert(read_pkey_reg() >= orig_pkey_reg); 246 245 dprintf1("END<---%s(%d, 0x%x)\n", __func__, 247 246 pkey, flags); 248 247 } ··· 250 253 unsigned long syscall_flags = 0; 251 254 int ret; 252 255 int pkey_rights = hw_pkey_get(pkey, syscall_flags); 253 - u64 orig_pkey_reg = read_pkey_reg(); 254 256 255 257 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 256 258 ··· 269 273 270 274 dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, 271 275 pkey, read_pkey_reg()); 272 - if (flags) 273 - assert(read_pkey_reg() <= orig_pkey_reg); 274 276 } 275 277 276 278 void pkey_write_allow(int pkey) ··· 308 314 ucontext_t *uctxt = vucontext; 309 315 int trapno; 310 316 unsigned long ip; 317 + #ifdef MCONTEXT_FPREGS 311 318 char *fpregs; 319 + #endif 312 320 #if defined(__i386__) || defined(__x86_64__) /* arch */ 313 321 u32 *pkey_reg_ptr; 314 322 int pkey_reg_offset; ··· 324 328 __func__, __LINE__, 325 329 __read_pkey_reg(), shadow_pkey_reg); 326 330 327 - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 328 - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 331 + trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext); 332 + ip = MCONTEXT_IP(uctxt->uc_mcontext); 333 + #ifdef MCONTEXT_FPREGS 329 334 fpregs = (char *) uctxt->uc_mcontext.fpregs; 335 + #endif 330 336 331 337 dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", 332 338 __func__, trapno, ip, si_code_str(si->si_code), ··· 357 359 #endif /* arch */ 358 360 359 361 dprintf1("siginfo: %p\n", si); 362 + #ifdef MCONTEXT_FPREGS 360 363 dprintf1(" fpregs: %p\n", fpregs); 364 + #endif 361 365 362 366 if ((si->si_code == SEGV_MAPERR) || 363 367 (si->si_code == SEGV_ACCERR) || ··· 389 389 #elif defined(__powerpc64__) /* arch */ 390 390 /* restore access and let the faulting instruction continue */ 391 391 pkey_access_allow(siginfo_pkey); 392 + #elif defined(__aarch64__) 393 + aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); 392 394 #endif /* arch */ 393 395 pkey_faults++; 394 396 dprintf1("<<<<==================================================\n"); ··· 904 902 * test program continue. We now have to restore it. 905 903 */ 906 904 if (__read_pkey_reg() != 0) 907 - #else /* arch */ 905 + #elif defined(__aarch64__) 906 + if (__read_pkey_reg() != PKEY_ALLOW_ALL) 907 + #else 908 908 if (__read_pkey_reg() != shadow_pkey_reg) 909 909 #endif /* arch */ 910 910 pkey_assert(0); ··· 1496 1492 lots_o_noops_around_write(&scratch); 1497 1493 do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1498 1494 expect_fault_on_read_execonly_key(p1, pkey); 1495 + 1496 + // Reset back to PROT_EXEC | PROT_READ for architectures that support 1497 + // non-PKEY execute-only permissions. 1498 + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey); 1499 + pkey_assert(!ret); 1499 1500 } 1500 1501 1501 1502 void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ··· 1674 1665 } 1675 1666 #endif 1676 1667 1668 + #if defined(__aarch64__) 1669 + void test_ptrace_modifies_pkru(int *ptr, u16 pkey) 1670 + { 1671 + pid_t child; 1672 + int status, ret; 1673 + struct iovec iov; 1674 + u64 trace_pkey; 1675 + /* Just a random pkey value.. */ 1676 + u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) | 1677 + (POE_NONE << PKEY_BITS_PER_PKEY) | 1678 + POE_RWX; 1679 + 1680 + child = fork(); 1681 + pkey_assert(child >= 0); 1682 + dprintf3("[%d] fork() ret: %d\n", getpid(), child); 1683 + if (!child) { 1684 + ptrace(PTRACE_TRACEME, 0, 0, 0); 1685 + 1686 + /* Stop and allow the tracer to modify PKRU directly */ 1687 + raise(SIGSTOP); 1688 + 1689 + /* 1690 + * need __read_pkey_reg() version so we do not do shadow_pkey_reg 1691 + * checking 1692 + */ 1693 + if (__read_pkey_reg() != new_pkey) 1694 + exit(1); 1695 + 1696 + raise(SIGSTOP); 1697 + 1698 + exit(0); 1699 + } 1700 + 1701 + pkey_assert(child == waitpid(child, &status, 0)); 1702 + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1703 + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1704 + 1705 + iov.iov_base = &trace_pkey; 1706 + iov.iov_len = 8; 1707 + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); 1708 + pkey_assert(ret == 0); 1709 + pkey_assert(trace_pkey == read_pkey_reg()); 1710 + 1711 + trace_pkey = new_pkey; 1712 + 1713 + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov); 1714 + pkey_assert(ret == 0); 1715 + 1716 + /* Test that the modification is visible in ptrace before any execution */ 1717 + memset(&trace_pkey, 0, sizeof(trace_pkey)); 1718 + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); 1719 + pkey_assert(ret == 0); 1720 + pkey_assert(trace_pkey == new_pkey); 1721 + 1722 + /* Execute the tracee */ 1723 + ret = ptrace(PTRACE_CONT, child, 0, 0); 1724 + pkey_assert(ret == 0); 1725 + 1726 + /* Test that the tracee saw the PKRU value change */ 1727 + pkey_assert(child == waitpid(child, &status, 0)); 1728 + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1729 + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); 1730 + 1731 + /* Test that the modification is visible in ptrace after execution */ 1732 + memset(&trace_pkey, 0, sizeof(trace_pkey)); 1733 + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); 1734 + pkey_assert(ret == 0); 1735 + pkey_assert(trace_pkey == new_pkey); 1736 + 1737 + ret = ptrace(PTRACE_CONT, child, 0, 0); 1738 + pkey_assert(ret == 0); 1739 + pkey_assert(child == waitpid(child, &status, 0)); 1740 + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); 1741 + pkey_assert(WIFEXITED(status)); 1742 + pkey_assert(WEXITSTATUS(status) == 0); 1743 + } 1744 + #endif 1745 + 1677 1746 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1678 1747 { 1679 1748 int size = PAGE_SIZE; ··· 1787 1700 test_pkey_syscalls_bad_args, 1788 1701 test_pkey_alloc_exhaust, 1789 1702 test_pkey_alloc_free_attach_pkey0, 1790 - #if defined(__i386__) || defined(__x86_64__) 1703 + #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) 1791 1704 test_ptrace_modifies_pkru, 1792 1705 #endif 1793 1706 };