Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: uaccess: remove set_fs()

Now that the uaccess primitives dont take addr_limit into account, we
have no need to manipulate this via set_fs() and get_fs(). Remove
support for these, along with some infrastructure this renders
redundant.

We no longer need to flip UAO to access kernel memory under KERNEL_DS,
and head.S unconditionally clears UAO for all kernel configurations via
an ERET in init_kernel_el. Thus, we don't need to dynamically flip UAO,
nor do we need to context-switch it. However, we still need to adjust
PAN during SDEI entry.

Masking of __user pointers no longer needs to use the dynamic value of
addr_limit, and can use a constant derived from the maximum possible
userspace task size. A new TASK_SIZE_MAX constant is introduced for
this, which is also used by core code. In configurations supporting
52-bit VAs, this may include a region of unusable VA space above a
48-bit TTBR0 limit, but never includes any portion of TTBR1.

Note that TASK_SIZE_MAX is an exclusive limit, while USER_DS and
KERNEL_DS were inclusive limits, and is converted to a mask by
subtracting one.

As the SDEI entry code repurposes the otherwise unnecessary
pt_regs::orig_addr_limit field to store the TTBR1 of the interrupted
context, for now we rename that to pt_regs::sdei_ttbr1. In future we can
consider factoring that out.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: James Morse <james.morse@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201202131558.39270-10-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

authored by

Mark Rutland and committed by
Catalin Marinas
3d2403fd 7b90dc40

+13 -92
-1
arch/arm64/Kconfig
··· 195 195 select PCI_SYSCALL if PCI 196 196 select POWER_RESET 197 197 select POWER_SUPPLY 198 - select SET_FS 199 198 select SPARSE_IRQ 200 199 select SWIOTLB 201 200 select SYSCTL_EXCEPTION_TRACE
-1
arch/arm64/include/asm/exec.h
··· 10 10 #include <linux/sched.h> 11 11 12 12 extern unsigned long arch_align_stack(unsigned long sp); 13 - void uao_thread_switch(struct task_struct *next); 14 13 15 14 #endif /* __ASM_EXEC_H */
+1 -3
arch/arm64/include/asm/processor.h
··· 8 8 #ifndef __ASM_PROCESSOR_H 9 9 #define __ASM_PROCESSOR_H 10 10 11 - #define KERNEL_DS UL(-1) 12 - #define USER_DS ((UL(1) << VA_BITS) - 1) 13 - 14 11 /* 15 12 * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is 16 13 * no point in shifting all network buffers by 2 bytes just to make some IP ··· 45 48 46 49 #define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN) 47 50 #define TASK_SIZE_64 (UL(1) << vabits_actual) 51 + #define TASK_SIZE_MAX (UL(1) << VA_BITS) 48 52 49 53 #ifdef CONFIG_COMPAT 50 54 #if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
+1 -2
arch/arm64/include/asm/ptrace.h
··· 193 193 s32 syscallno; 194 194 u32 unused2; 195 195 #endif 196 - 197 - u64 orig_addr_limit; 196 + u64 sdei_ttbr1; 198 197 /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ 199 198 u64 pmr_save; 200 199 u64 stackframe[2];
-4
arch/arm64/include/asm/thread_info.h
··· 18 18 #include <asm/stack_pointer.h> 19 19 #include <asm/types.h> 20 20 21 - typedef unsigned long mm_segment_t; 22 - 23 21 /* 24 22 * low level task data that entry.S needs immediate access to. 25 23 */ 26 24 struct thread_info { 27 25 unsigned long flags; /* low level flags */ 28 - mm_segment_t addr_limit; /* address limit */ 29 26 #ifdef CONFIG_ARM64_SW_TTBR0_PAN 30 27 u64 ttbr0; /* saved TTBR0_EL1 */ 31 28 #endif ··· 116 119 { \ 117 120 .flags = _TIF_FOREIGN_FPSTATE, \ 118 121 .preempt_count = INIT_PREEMPT_COUNT, \ 119 - .addr_limit = KERNEL_DS, \ 120 122 INIT_SCS \ 121 123 } 122 124
+6 -35
arch/arm64/include/asm/uaccess.h
··· 26 26 27 27 #define HAVE_GET_KERNEL_NOFAULT 28 28 29 - #define get_fs() (current_thread_info()->addr_limit) 30 - 31 - static inline void set_fs(mm_segment_t fs) 32 - { 33 - current_thread_info()->addr_limit = fs; 34 - 35 - /* 36 - * Prevent a mispredicted conditional call to set_fs from forwarding 37 - * the wrong address limit to access_ok under speculation. 38 - */ 39 - spec_bar(); 40 - 41 - /* On user-mode return, check fs is correct */ 42 - set_thread_flag(TIF_FSCHECK); 43 - 44 - /* 45 - * Enable/disable UAO so that copy_to_user() etc can access 46 - * kernel memory with the unprivileged instructions. 47 - */ 48 - if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) 49 - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); 50 - else 51 - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, 52 - CONFIG_ARM64_UAO)); 53 - } 54 - 55 - #define uaccess_kernel() (get_fs() == KERNEL_DS) 56 - 57 29 /* 58 30 * Test whether a block of memory is a valid user space address. 59 31 * Returns 1 if the range is valid, 0 otherwise. 60 32 * 61 33 * This is equivalent to the following test: 62 - * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 34 + * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX 63 35 */ 64 36 static inline unsigned long __range_ok(const void __user *addr, unsigned long size) 65 37 { 66 - unsigned long ret, limit = current_thread_info()->addr_limit; 38 + unsigned long ret, limit = TASK_SIZE_MAX - 1; 67 39 68 40 /* 69 41 * Asynchronous I/O running in a kernel thread does not have the ··· 68 96 } 69 97 70 98 #define access_ok(addr, size) __range_ok(addr, size) 71 - #define user_addr_max get_fs 72 99 73 100 #define _ASM_EXTABLE(from, to) \ 74 101 " .pushsection __ex_table, \"a\"\n" \ ··· 197 226 } 198 227 199 228 /* 200 - * Sanitise a uaccess pointer such that it becomes NULL if above the 201 - * current addr_limit. In case the pointer is tagged (has the top byte set), 202 - * untag the pointer before checking. 229 + * Sanitise a uaccess pointer such that it becomes NULL if above the maximum 230 + * user address. In case the pointer is tagged (has the top byte set), untag 231 + * the pointer before checking. 203 232 */ 204 233 #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) 205 234 static inline void __user *__uaccess_mask_ptr(const void __user *ptr) ··· 210 239 " bics xzr, %3, %2\n" 211 240 " csel %0, %1, xzr, eq\n" 212 241 : "=&r" (safe_ptr) 213 - : "r" (ptr), "r" (current_thread_info()->addr_limit), 242 + : "r" (ptr), "r" (TASK_SIZE_MAX - 1), 214 243 "r" (untagged_addr(ptr)) 215 244 : "cc"); 216 245
+1 -2
arch/arm64/kernel/asm-offsets.c
··· 30 30 BLANK(); 31 31 DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); 32 32 DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); 33 - DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); 34 33 #ifdef CONFIG_ARM64_SW_TTBR0_PAN 35 34 DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); 36 35 #endif ··· 69 70 DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); 70 71 DEFINE(S_PC, offsetof(struct pt_regs, pc)); 71 72 DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); 72 - DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); 73 + DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); 73 74 DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); 74 75 DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); 75 76 DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
-4
arch/arm64/kernel/cpufeature.c
··· 1777 1777 .sys_reg = SYS_ID_AA64MMFR2_EL1, 1778 1778 .field_pos = ID_AA64MMFR2_UAO_SHIFT, 1779 1779 .min_field_value = 1, 1780 - /* 1781 - * We rely on stop_machine() calling uao_thread_switch() to set 1782 - * UAO immediately after patching. 1783 - */ 1784 1780 }, 1785 1781 #endif /* CONFIG_ARM64_UAO */ 1786 1782 #ifdef CONFIG_ARM64_PAN
+3 -16
arch/arm64/kernel/entry.S
··· 216 216 .else 217 217 add x21, sp, #S_FRAME_SIZE 218 218 get_current_task tsk 219 - /* Save the task's original addr_limit and set USER_DS */ 220 - ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] 221 - str x20, [sp, #S_ORIG_ADDR_LIMIT] 222 - mov x20, #USER_DS 223 - str x20, [tsk, #TSK_TI_ADDR_LIMIT] 224 - /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ 225 219 .endif /* \el == 0 */ 226 220 mrs x22, elr_el1 227 221 mrs x23, spsr_el1 ··· 273 279 .macro kernel_exit, el 274 280 .if \el != 0 275 281 disable_daif 276 - 277 - /* Restore the task's original addr_limit. */ 278 - ldr x20, [sp, #S_ORIG_ADDR_LIMIT] 279 - str x20, [tsk, #TSK_TI_ADDR_LIMIT] 280 - 281 - /* No need to restore UAO, it will be restored from SPSR_EL1 */ 282 282 .endif 283 283 284 284 /* Restore pmr */ ··· 987 999 mov x4, xzr 988 1000 989 1001 /* 990 - * Use reg->interrupted_regs.addr_limit to remember whether to unmap 991 - * the kernel on exit. 1002 + * Remember whether to unmap the kernel on exit. 992 1003 */ 993 - 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] 1004 + 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] 994 1005 995 1006 #ifdef CONFIG_RANDOMIZE_BASE 996 1007 adr x4, tramp_vectors + PAGE_SIZE ··· 1010 1023 * x4: struct sdei_registered_event argument from registration time. 1011 1024 */ 1012 1025 SYM_CODE_START(__sdei_asm_exit_trampoline) 1013 - ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] 1026 + ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] 1014 1027 cbnz x4, 1f 1015 1028 1016 1029 tramp_unmap_kernel tmp=x4
-12
arch/arm64/kernel/process.c
··· 460 460 write_sysreg(*task_user_tls(next), tpidr_el0); 461 461 } 462 462 463 - /* Restore the UAO state depending on next's addr_limit */ 464 - void uao_thread_switch(struct task_struct *next) 465 - { 466 - if (IS_ENABLED(CONFIG_ARM64_UAO)) { 467 - if (task_thread_info(next)->addr_limit == KERNEL_DS) 468 - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); 469 - else 470 - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); 471 - } 472 - } 473 - 474 463 /* 475 464 * Force SSBS state on context-switch, since it may be lost after migrating 476 465 * from a CPU which treats the bit as RES0 in a heterogeneous system. ··· 543 554 hw_breakpoint_thread_switch(next); 544 555 contextidr_thread_switch(next); 545 556 entry_task_switch(next); 546 - uao_thread_switch(next); 547 557 ssbs_thread_switch(next); 548 558 erratum_1418040_thread_switch(prev, next); 549 559
+1 -6
arch/arm64/kernel/sdei.c
··· 242 242 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) 243 243 { 244 244 unsigned long ret; 245 - mm_segment_t orig_addr_limit; 246 245 247 246 /* 248 247 * We didn't take an exception to get here, so the HW hasn't 249 - * set/cleared bits in PSTATE that we may rely on. Initialize PAN, then 250 - * use force_uaccess_begin() to reset addr_limit. 248 + * set/cleared bits in PSTATE that we may rely on. Initialize PAN. 251 249 */ 252 250 __sdei_pstate_entry(); 253 - orig_addr_limit = force_uaccess_begin(); 254 251 255 252 nmi_enter(); 256 253 257 254 ret = _sdei_handler(regs, arg); 258 255 259 256 nmi_exit(); 260 - 261 - force_uaccess_end(orig_addr_limit); 262 257 263 258 return ret; 264 259 }
-1
arch/arm64/kernel/suspend.c
··· 58 58 * features that might not have been set correctly. 59 59 */ 60 60 __uaccess_enable_hw_pan(); 61 - uao_thread_switch(current); 62 61 63 62 /* 64 63 * Restore HW breakpoint registers to sane values
-5
arch/arm64/mm/fault.c
··· 479 479 } 480 480 481 481 if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { 482 - /* regs->orig_addr_limit may be 0 if we entered from EL0 */ 483 - if (regs->orig_addr_limit == KERNEL_DS) 484 - die_kernel_fault("access to user memory with fs=KERNEL_DS", 485 - addr, esr, regs); 486 - 487 482 if (is_el1_instruction_abort(esr)) 488 483 die_kernel_fault("execution of user memory", 489 484 addr, esr, regs);