Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/pti updates from Thomas Gleixner:
"Yet another pile of melted spectrum related updates:

- Drop native vsyscall support finally as it causes more trouble than
benefit.

- Make microcode loading more robust. There were a few issues
especially related to late loading which are now surfacing because
late loading of the IB* microcodes addressing spectre issues has
become more widely used.

- Simplify and robustify the syscall handling in the entry code

- Prevent kprobes on the entry trampoline code which lead to kernel
crashes when the probe hits before CR3 is updated

- Don't check microcode versions when running on hypervisors as they
are considered as lying anyway.

- Fix the 32bit objtool build and a coment typo"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kprobes: Fix kernel crash when probing .entry_trampoline code
x86/pti: Fix a comment typo
x86/microcode: Synchronize late microcode loading
x86/microcode: Request microcode on the BSP
x86/microcode/intel: Look into the patch cache first
x86/microcode: Do not upload microcode if CPUs are offline
x86/microcode/intel: Writeback and invalidate caches before updating microcode
x86/microcode/intel: Check microcode revision before updating sibling threads
x86/microcode: Get rid of struct apply_microcode_ctx
x86/spectre_v2: Don't check microcode versions when running under hypervisors
x86/vsyscall/64: Drop "native" vsyscalls
x86/entry/64/compat: Save one instruction in entry_INT80_compat()
x86/entry: Do not special-case clone(2) in compat entry
x86/syscalls: Use COMPAT_SYSCALL_DEFINEx() macros for x86-only compat syscalls
x86/syscalls: Use proper syscall definition for sys_ioperm()
x86/entry: Remove stale syscall prototype
x86/syscalls/32: Simplify $entry == $compat entries
objtool: Fix 32-bit build

Changed files
+292 -183
arch
tools
objtool
testing
selftests
+1 -10
arch/x86/Kconfig
··· 2307 2307 it can be used to assist security vulnerability exploitation. 2308 2308 2309 2309 This setting can be changed at boot time via the kernel command 2310 - line parameter vsyscall=[native|emulate|none]. 2310 + line parameter vsyscall=[emulate|none]. 2311 2311 2312 2312 On a system with recent enough glibc (2.14 or newer) and no 2313 2313 static binaries, you can say None without a performance penalty 2314 2314 to improve security. 2315 2315 2316 2316 If unsure, select "Emulate". 2317 - 2318 - config LEGACY_VSYSCALL_NATIVE 2319 - bool "Native" 2320 - help 2321 - Actual executable code is located in the fixed vsyscall 2322 - address mapping, implementing time() efficiently. Since 2323 - this makes the mapping executable, it can be used during 2324 - security vulnerability exploitation (traditionally as 2325 - ROP gadgets). This configuration is not recommended. 2326 2317 2327 2318 config LEGACY_VSYSCALL_EMULATE 2328 2319 bool "Emulate"
+1 -15
arch/x86/entry/entry_64_compat.S
··· 363 363 pushq 2*8(%rdi) /* regs->ip */ 364 364 pushq 1*8(%rdi) /* regs->orig_ax */ 365 365 366 - movq (%rdi), %rdi /* restore %rdi */ 367 - 368 - pushq %rdi /* pt_regs->di */ 366 + pushq (%rdi) /* pt_regs->di */ 369 367 pushq %rsi /* pt_regs->si */ 370 368 pushq %rdx /* pt_regs->dx */ 371 369 pushq %rcx /* pt_regs->cx */ ··· 404 406 TRACE_IRQS_ON 405 407 jmp swapgs_restore_regs_and_return_to_usermode 406 408 END(entry_INT80_compat) 407 - 408 - ENTRY(stub32_clone) 409 - /* 410 - * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). 411 - * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). 412 - * 413 - * The native 64-bit kernel's sys_clone() implements the latter, 414 - * so we need to swap arguments here before calling it: 415 - */ 416 - xchg %r8, %rcx 417 - jmp sys_clone 418 - ENDPROC(stub32_clone)
+19 -19
arch/x86/entry/syscalls/syscall_32.tbl
··· 8 8 # 9 9 0 i386 restart_syscall sys_restart_syscall 10 10 1 i386 exit sys_exit 11 - 2 i386 fork sys_fork sys_fork 11 + 2 i386 fork sys_fork 12 12 3 i386 read sys_read 13 13 4 i386 write sys_write 14 14 5 i386 open sys_open compat_sys_open 15 15 6 i386 close sys_close 16 - 7 i386 waitpid sys_waitpid sys32_waitpid 16 + 7 i386 waitpid sys_waitpid compat_sys_x86_waitpid 17 17 8 i386 creat sys_creat 18 18 9 i386 link sys_link 19 19 10 i386 unlink sys_unlink ··· 78 78 69 i386 ssetmask sys_ssetmask 79 79 70 i386 setreuid sys_setreuid16 80 80 71 i386 setregid sys_setregid16 81 - 72 i386 sigsuspend sys_sigsuspend sys_sigsuspend 81 + 72 i386 sigsuspend sys_sigsuspend 82 82 73 i386 sigpending sys_sigpending compat_sys_sigpending 83 83 74 i386 sethostname sys_sethostname 84 84 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit ··· 96 96 87 i386 swapon sys_swapon 97 97 88 i386 reboot sys_reboot 98 98 89 i386 readdir sys_old_readdir compat_sys_old_readdir 99 - 90 i386 mmap sys_old_mmap sys32_mmap 99 + 90 i386 mmap sys_old_mmap compat_sys_x86_mmap 100 100 91 i386 munmap sys_munmap 101 101 92 i386 truncate sys_truncate compat_sys_truncate 102 102 93 i386 ftruncate sys_ftruncate compat_sys_ftruncate ··· 126 126 117 i386 ipc sys_ipc compat_sys_ipc 127 127 118 i386 fsync sys_fsync 128 128 119 i386 sigreturn sys_sigreturn sys32_sigreturn 129 - 120 i386 clone sys_clone stub32_clone 129 + 120 i386 clone sys_clone compat_sys_x86_clone 130 130 121 i386 setdomainname sys_setdomainname 131 131 122 i386 uname sys_newuname 132 132 123 i386 modify_ldt sys_modify_ldt ··· 186 186 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 187 187 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 188 188 179 i386 rt_sigsuspend sys_rt_sigsuspend 189 - 180 i386 pread64 sys_pread64 sys32_pread 190 - 181 i386 pwrite64 sys_pwrite64 sys32_pwrite 189 + 180 i386 pread64 sys_pread64 compat_sys_x86_pread 190 + 181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite 191 191 182 i386 chown sys_chown16 192 192 183 i386 getcwd sys_getcwd 193 193 184 i386 capget sys_capget ··· 196 196 187 i386 sendfile sys_sendfile compat_sys_sendfile 197 197 188 i386 getpmsg 198 198 189 i386 putpmsg 199 - 190 i386 vfork sys_vfork sys_vfork 199 + 190 i386 vfork sys_vfork 200 200 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 201 201 192 i386 mmap2 sys_mmap_pgoff 202 - 193 i386 truncate64 sys_truncate64 sys32_truncate64 203 - 194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 204 - 195 i386 stat64 sys_stat64 sys32_stat64 205 - 196 i386 lstat64 sys_lstat64 sys32_lstat64 206 - 197 i386 fstat64 sys_fstat64 sys32_fstat64 202 + 193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64 203 + 194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64 204 + 195 i386 stat64 sys_stat64 compat_sys_x86_stat64 205 + 196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64 206 + 197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64 207 207 198 i386 lchown32 sys_lchown 208 208 199 i386 getuid32 sys_getuid 209 209 200 i386 getgid32 sys_getgid ··· 231 231 # 222 is unused 232 232 # 223 is unused 233 233 224 i386 gettid sys_gettid 234 - 225 i386 readahead sys_readahead sys32_readahead 234 + 225 i386 readahead sys_readahead compat_sys_x86_readahead 235 235 226 i386 setxattr sys_setxattr 236 236 227 i386 lsetxattr sys_lsetxattr 237 237 228 i386 fsetxattr sys_fsetxattr ··· 256 256 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 257 257 248 i386 io_submit sys_io_submit compat_sys_io_submit 258 258 249 i386 io_cancel sys_io_cancel 259 - 250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 259 + 250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64 260 260 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 261 261 252 i386 exit_group sys_exit_group 262 262 253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie ··· 278 278 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 279 279 270 i386 tgkill sys_tgkill 280 280 271 i386 utimes sys_utimes compat_sys_utimes 281 - 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 281 + 272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64 282 282 273 i386 vserver 283 283 274 i386 mbind sys_mbind 284 284 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy ··· 306 306 297 i386 mknodat sys_mknodat 307 307 298 i386 fchownat sys_fchownat 308 308 299 i386 futimesat sys_futimesat compat_sys_futimesat 309 - 300 i386 fstatat64 sys_fstatat64 sys32_fstatat 309 + 300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat 310 310 301 i386 unlinkat sys_unlinkat 311 311 302 i386 renameat sys_renameat 312 312 303 i386 linkat sys_linkat ··· 320 320 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 321 321 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 322 322 313 i386 splice sys_splice 323 - 314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range 323 + 314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range 324 324 315 i386 tee sys_tee 325 325 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 326 326 317 i386 move_pages sys_move_pages compat_sys_move_pages ··· 330 330 321 i386 signalfd sys_signalfd compat_sys_signalfd 331 331 322 i386 timerfd_create sys_timerfd_create 332 332 323 i386 eventfd sys_eventfd 333 - 324 i386 fallocate sys_fallocate sys32_fallocate 333 + 324 i386 fallocate sys_fallocate compat_sys_x86_fallocate 334 334 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 335 335 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 336 336 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
+3 -13
arch/x86/entry/vsyscall/vsyscall_64.c
··· 42 42 #define CREATE_TRACE_POINTS 43 43 #include "vsyscall_trace.h" 44 44 45 - static enum { EMULATE, NATIVE, NONE } vsyscall_mode = 46 - #if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) 47 - NATIVE; 48 - #elif defined(CONFIG_LEGACY_VSYSCALL_NONE) 45 + static enum { EMULATE, NONE } vsyscall_mode = 46 + #ifdef CONFIG_LEGACY_VSYSCALL_NONE 49 47 NONE; 50 48 #else 51 49 EMULATE; ··· 54 56 if (str) { 55 57 if (!strcmp("emulate", str)) 56 58 vsyscall_mode = EMULATE; 57 - else if (!strcmp("native", str)) 58 - vsyscall_mode = NATIVE; 59 59 else if (!strcmp("none", str)) 60 60 vsyscall_mode = NONE; 61 61 else ··· 134 138 */ 135 139 136 140 WARN_ON_ONCE(address != regs->ip); 137 - 138 - /* This should be unreachable in NATIVE mode. */ 139 - if (WARN_ON(vsyscall_mode == NATIVE)) 140 - return false; 141 141 142 142 if (vsyscall_mode == NONE) { 143 143 warn_bad_vsyscall(KERN_INFO, regs, ··· 362 370 363 371 if (vsyscall_mode != NONE) { 364 372 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 365 - vsyscall_mode == NATIVE 366 - ? PAGE_KERNEL_VSYSCALL 367 - : PAGE_KERNEL_VVAR); 373 + PAGE_KERNEL_VVAR); 368 374 set_vsyscall_pgtable_user_bits(swapper_pg_dir); 369 375 } 370 376
+44 -30
arch/x86/ia32/sys_ia32.c
··· 51 51 #define AA(__x) ((unsigned long)(__x)) 52 52 53 53 54 - asmlinkage long sys32_truncate64(const char __user *filename, 55 - unsigned long offset_low, 56 - unsigned long offset_high) 54 + COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, 55 + unsigned long, offset_low, unsigned long, offset_high) 57 56 { 58 57 return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); 59 58 } 60 59 61 - asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, 62 - unsigned long offset_high) 60 + COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, 61 + unsigned long, offset_low, unsigned long, offset_high) 63 62 { 64 63 return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); 65 64 } ··· 95 96 return 0; 96 97 } 97 98 98 - asmlinkage long sys32_stat64(const char __user *filename, 99 - struct stat64 __user *statbuf) 99 + COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename, 100 + struct stat64 __user *, statbuf) 100 101 { 101 102 struct kstat stat; 102 103 int ret = vfs_stat(filename, &stat); ··· 106 107 return ret; 107 108 } 108 109 109 - asmlinkage long sys32_lstat64(const char __user *filename, 110 - struct stat64 __user *statbuf) 110 + COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename, 111 + struct stat64 __user *, statbuf) 111 112 { 112 113 struct kstat stat; 113 114 int ret = vfs_lstat(filename, &stat); ··· 116 117 return ret; 117 118 } 118 119 119 - asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) 120 + COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd, 121 + struct stat64 __user *, statbuf) 120 122 { 121 123 struct kstat stat; 122 124 int ret = vfs_fstat(fd, &stat); ··· 126 126 return ret; 127 127 } 128 128 129 - asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, 130 - struct stat64 __user *statbuf, int flag) 129 + COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd, 130 + const char __user *, filename, 131 + struct stat64 __user *, statbuf, int, flag) 131 132 { 132 133 struct kstat stat; 133 134 int error; ··· 154 153 unsigned int offset; 155 154 }; 156 155 157 - asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) 156 + COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) 158 157 { 159 158 struct mmap_arg_struct32 a; 160 159 ··· 168 167 a.offset>>PAGE_SHIFT); 169 168 } 170 169 171 - asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, 172 - int options) 170 + COMPAT_SYSCALL_DEFINE3(x86_waitpid, compat_pid_t, pid, unsigned int __user *, 171 + stat_addr, int, options) 173 172 { 174 173 return compat_sys_wait4(pid, stat_addr, options, NULL); 175 174 } 176 175 177 176 /* warning: next two assume little endian */ 178 - asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, 179 - u32 poslo, u32 poshi) 177 + COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, 178 + u32, count, u32, poslo, u32, poshi) 180 179 { 181 180 return sys_pread64(fd, ubuf, count, 182 181 ((loff_t)AA(poshi) << 32) | AA(poslo)); 183 182 } 184 183 185 - asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, 186 - u32 count, u32 poslo, u32 poshi) 184 + COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, 185 + u32, count, u32, poslo, u32, poshi) 187 186 { 188 187 return sys_pwrite64(fd, ubuf, count, 189 188 ((loff_t)AA(poshi) << 32) | AA(poslo)); ··· 194 193 * Some system calls that need sign extended arguments. This could be 195 194 * done by a generic wrapper. 196 195 */ 197 - long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 198 - __u32 len_low, __u32 len_high, int advice) 196 + COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, 197 + __u32, offset_high, __u32, len_low, __u32, len_high, 198 + int, advice) 199 199 { 200 200 return sys_fadvise64_64(fd, 201 201 (((u64)offset_high)<<32) | offset_low, ··· 204 202 advice); 205 203 } 206 204 207 - asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, 208 - size_t count) 205 + COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, 206 + unsigned int, off_hi, size_t, count) 209 207 { 210 208 return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); 211 209 } 212 210 213 - asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, 214 - unsigned n_low, unsigned n_hi, int flags) 211 + COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, 212 + unsigned int, off_hi, unsigned int, n_low, 213 + unsigned int, n_hi, int, flags) 215 214 { 216 215 return sys_sync_file_range(fd, 217 216 ((u64)off_hi << 32) | off_low, 218 217 ((u64)n_hi << 32) | n_low, flags); 219 218 } 220 219 221 - asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, 222 - size_t len, int advice) 220 + COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, 221 + unsigned int, offset_hi, size_t, len, int, advice) 223 222 { 224 223 return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, 225 224 len, advice); 226 225 } 227 226 228 - asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, 229 - unsigned offset_hi, unsigned len_lo, 230 - unsigned len_hi) 227 + COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, 228 + unsigned int, offset_lo, unsigned int, offset_hi, 229 + unsigned int, len_lo, unsigned int, len_hi) 231 230 { 232 231 return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, 233 232 ((u64)len_hi << 32) | len_lo); 233 + } 234 + 235 + /* 236 + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS 237 + */ 238 + COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, 239 + unsigned long, newsp, int __user *, parent_tidptr, 240 + unsigned long, tls_val, int __user *, child_tidptr) 241 + { 242 + return sys_clone(clone_flags, newsp, parent_tidptr, child_tidptr, 243 + tls_val); 234 244 }
-2
arch/x86/include/asm/pgtable_types.h
··· 174 174 #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) 175 175 #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) 176 176 #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) 177 - #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) 178 177 #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) 179 178 #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 180 179 #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) ··· 205 206 #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) 206 207 #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) 207 208 #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) 208 - #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) 209 209 #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) 210 210 211 211 #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
+1
arch/x86/include/asm/sections.h
··· 10 10 11 11 #if defined(CONFIG_X86_64) 12 12 extern char __end_rodata_hpage_align[]; 13 + extern char __entry_trampoline_start[], __entry_trampoline_end[]; 13 14 #endif 14 15 15 16 #endif /* _ASM_X86_SECTIONS_H */
+30 -18
arch/x86/include/asm/sys_ia32.h
··· 20 20 #include <asm/ia32.h> 21 21 22 22 /* ia32/sys_ia32.c */ 23 - asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); 24 - asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); 23 + asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, 24 + unsigned long); 25 + asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, 26 + unsigned long); 25 27 26 - asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); 27 - asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); 28 - asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); 29 - asmlinkage long sys32_fstatat(unsigned int, const char __user *, 28 + asmlinkage long compat_sys_x86_stat64(const char __user *, 29 + struct stat64 __user *); 30 + asmlinkage long compat_sys_x86_lstat64(const char __user *, 31 + struct stat64 __user *); 32 + asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); 33 + asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, 30 34 struct stat64 __user *, int); 31 35 struct mmap_arg_struct32; 32 - asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); 36 + asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); 33 37 34 - asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); 38 + asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, 39 + int); 35 40 36 - asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); 37 - asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); 41 + asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, 42 + u32); 43 + asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, 44 + u32, u32); 38 45 39 - long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); 40 - long sys32_vm86_warning(void); 46 + asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, 47 + int); 41 48 42 - asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); 43 - asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, 44 - unsigned, unsigned, int); 45 - asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); 46 - asmlinkage long sys32_fallocate(int, int, unsigned, 47 - unsigned, unsigned, unsigned); 49 + asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, 50 + size_t); 51 + asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, 52 + unsigned int, unsigned int, 53 + int); 54 + asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, 55 + size_t, int); 56 + asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, 57 + unsigned int, unsigned int); 58 + asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, 59 + unsigned long, int __user *); 48 60 49 61 /* ia32/ia32_signal.c */ 50 62 asmlinkage long sys32_sigreturn(void);
+7
arch/x86/kernel/cpu/intel.c
··· 144 144 { 145 145 int i; 146 146 147 + /* 148 + * We know that the hypervisor lie to us on the microcode version so 149 + * we may as well hope that it is running the correct version. 150 + */ 151 + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) 152 + return false; 153 + 147 154 for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { 148 155 if (c->x86_model == spectre_bad_microcodes[i].model && 149 156 c->x86_stepping == spectre_bad_microcodes[i].stepping)
+120 -40
arch/x86/kernel/cpu/microcode/core.c
··· 22 22 #define pr_fmt(fmt) "microcode: " fmt 23 23 24 24 #include <linux/platform_device.h> 25 + #include <linux/stop_machine.h> 25 26 #include <linux/syscore_ops.h> 26 27 #include <linux/miscdevice.h> 27 28 #include <linux/capability.h> 28 29 #include <linux/firmware.h> 29 30 #include <linux/kernel.h> 31 + #include <linux/delay.h> 30 32 #include <linux/mutex.h> 31 33 #include <linux/cpu.h> 34 + #include <linux/nmi.h> 32 35 #include <linux/fs.h> 33 36 #include <linux/mm.h> 34 37 ··· 66 63 * updated at any particular moment of time. 67 64 */ 68 65 static DEFINE_MUTEX(microcode_mutex); 66 + 67 + /* 68 + * Serialize late loading so that CPUs get updated one-by-one. 69 + */ 70 + static DEFINE_SPINLOCK(update_lock); 69 71 70 72 struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; 71 73 ··· 381 373 return ret; 382 374 } 383 375 384 - struct apply_microcode_ctx { 385 - enum ucode_state err; 386 - }; 387 - 388 376 static void apply_microcode_local(void *arg) 389 377 { 390 - struct apply_microcode_ctx *ctx = arg; 378 + enum ucode_state *err = arg; 391 379 392 - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); 380 + *err = microcode_ops->apply_microcode(smp_processor_id()); 393 381 } 394 382 395 383 static int apply_microcode_on_target(int cpu) 396 384 { 397 - struct apply_microcode_ctx ctx = { .err = 0 }; 385 + enum ucode_state err; 398 386 int ret; 399 387 400 - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); 401 - if (!ret) 402 - ret = ctx.err; 403 - 388 + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); 389 + if (!ret) { 390 + if (err == UCODE_ERROR) 391 + ret = 1; 392 + } 404 393 return ret; 405 394 } 406 395 ··· 494 489 /* fake device for request_firmware */ 495 490 static struct platform_device *microcode_pdev; 496 491 497 - static enum ucode_state reload_for_cpu(int cpu) 492 + /* 493 + * Late loading dance. Why the heavy-handed stomp_machine effort? 494 + * 495 + * - HT siblings must be idle and not execute other code while the other sibling 496 + * is loading microcode in order to avoid any negative interactions caused by 497 + * the loading. 498 + * 499 + * - In addition, microcode update on the cores must be serialized until this 500 + * requirement can be relaxed in the future. Right now, this is conservative 501 + * and good. 502 + */ 503 + #define SPINUNIT 100 /* 100 nsec */ 504 + 505 + static int check_online_cpus(void) 498 506 { 499 - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 500 - enum ucode_state ustate; 507 + if (num_online_cpus() == num_present_cpus()) 508 + return 0; 501 509 502 - if (!uci->valid) 503 - return UCODE_OK; 510 + pr_err("Not all CPUs online, aborting microcode update.\n"); 504 511 505 - ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true); 506 - if (ustate != UCODE_OK) 507 - return ustate; 512 + return -EINVAL; 513 + } 508 514 509 - return apply_microcode_on_target(cpu); 515 + static atomic_t late_cpus; 516 + 517 + /* 518 + * Returns: 519 + * < 0 - on error 520 + * 0 - no update done 521 + * 1 - microcode was updated 522 + */ 523 + static int __reload_late(void *info) 524 + { 525 + unsigned int timeout = NSEC_PER_SEC; 526 + int all_cpus = num_online_cpus(); 527 + int cpu = smp_processor_id(); 528 + enum ucode_state err; 529 + int ret = 0; 530 + 531 + atomic_dec(&late_cpus); 532 + 533 + /* 534 + * Wait for all CPUs to arrive. A load will not be attempted unless all 535 + * CPUs show up. 536 + * */ 537 + while (atomic_read(&late_cpus)) { 538 + if (timeout < SPINUNIT) { 539 + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", 540 + atomic_read(&late_cpus)); 541 + return -1; 542 + } 543 + 544 + ndelay(SPINUNIT); 545 + timeout -= SPINUNIT; 546 + 547 + touch_nmi_watchdog(); 548 + } 549 + 550 + spin_lock(&update_lock); 551 + apply_microcode_local(&err); 552 + spin_unlock(&update_lock); 553 + 554 + if (err > UCODE_NFOUND) { 555 + pr_warn("Error reloading microcode on CPU %d\n", cpu); 556 + ret = -1; 557 + } else if (err == UCODE_UPDATED) { 558 + ret = 1; 559 + } 560 + 561 + atomic_inc(&late_cpus); 562 + 563 + while (atomic_read(&late_cpus) != all_cpus) 564 + cpu_relax(); 565 + 566 + return ret; 567 + } 568 + 569 + /* 570 + * Reload microcode late on all CPUs. Wait for a sec until they 571 + * all gather together. 572 + */ 573 + static int microcode_reload_late(void) 574 + { 575 + int ret; 576 + 577 + atomic_set(&late_cpus, num_online_cpus()); 578 + 579 + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); 580 + if (ret < 0) 581 + return ret; 582 + else if (ret > 0) 583 + microcode_check(); 584 + 585 + return ret; 510 586 } 511 587 512 588 static ssize_t reload_store(struct device *dev, ··· 595 509 const char *buf, size_t size) 596 510 { 597 511 enum ucode_state tmp_ret = UCODE_OK; 598 - bool do_callback = false; 512 + int bsp = boot_cpu_data.cpu_index; 599 513 unsigned long val; 600 514 ssize_t ret = 0; 601 - int cpu; 602 515 603 516 ret = kstrtoul(buf, 0, &val); 604 517 if (ret) ··· 606 521 if (val != 1) 607 522 return size; 608 523 524 + tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true); 525 + if (tmp_ret != UCODE_OK) 526 + return size; 527 + 609 528 get_online_cpus(); 529 + 530 + ret = check_online_cpus(); 531 + if (ret) 532 + goto put; 533 + 610 534 mutex_lock(&microcode_mutex); 611 - for_each_online_cpu(cpu) { 612 - tmp_ret = reload_for_cpu(cpu); 613 - if (tmp_ret > UCODE_NFOUND) { 614 - pr_warn("Error reloading microcode on CPU %d\n", cpu); 615 - 616 - /* set retval for the first encountered reload error */ 617 - if (!ret) 618 - ret = -EINVAL; 619 - } 620 - 621 - if (tmp_ret == UCODE_UPDATED) 622 - do_callback = true; 623 - } 624 - 625 - if (!ret && do_callback) 626 - microcode_check(); 627 - 535 + ret = microcode_reload_late(); 628 536 mutex_unlock(&microcode_mutex); 537 + 538 + put: 629 539 put_online_cpus(); 630 540 631 - if (!ret) 541 + if (ret >= 0) 632 542 ret = size; 633 543 634 544 return ret;
+40 -8
arch/x86/kernel/cpu/microcode/intel.c
··· 589 589 if (!mc) 590 590 return 0; 591 591 592 + /* 593 + * Save us the MSR write below - which is a particular expensive 594 + * operation - when the other hyperthread has updated the microcode 595 + * already. 596 + */ 597 + rev = intel_get_microcode_revision(); 598 + if (rev >= mc->hdr.rev) { 599 + uci->cpu_sig.rev = rev; 600 + return UCODE_OK; 601 + } 602 + 603 + /* 604 + * Writeback and invalidate caches before updating microcode to avoid 605 + * internal issues depending on what the microcode is updating. 606 + */ 607 + native_wbinvd(); 608 + 592 609 /* write microcode via MSR 0x79 */ 593 610 native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); 594 611 ··· 791 774 792 775 static enum ucode_state apply_microcode_intel(int cpu) 793 776 { 777 + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 778 + struct cpuinfo_x86 *c = &cpu_data(cpu); 794 779 struct microcode_intel *mc; 795 - struct ucode_cpu_info *uci; 796 - struct cpuinfo_x86 *c; 797 780 static int prev_rev; 798 781 u32 rev; 799 782 ··· 801 784 if (WARN_ON(raw_smp_processor_id() != cpu)) 802 785 return UCODE_ERROR; 803 786 804 - uci = ucode_cpu_info + cpu; 805 - mc = uci->mc; 787 + /* Look for a newer patch in our cache: */ 788 + mc = find_patch(uci); 806 789 if (!mc) { 807 - /* Look for a newer patch in our cache: */ 808 - mc = find_patch(uci); 790 + mc = uci->mc; 809 791 if (!mc) 810 792 return UCODE_NFOUND; 811 793 } 794 + 795 + /* 796 + * Save us the MSR write below - which is a particular expensive 797 + * operation - when the other hyperthread has updated the microcode 798 + * already. 799 + */ 800 + rev = intel_get_microcode_revision(); 801 + if (rev >= mc->hdr.rev) { 802 + uci->cpu_sig.rev = rev; 803 + c->microcode = rev; 804 + return UCODE_OK; 805 + } 806 + 807 + /* 808 + * Writeback and invalidate caches before updating microcode to avoid 809 + * internal issues depending on what the microcode is updating. 810 + */ 811 + native_wbinvd(); 812 812 813 813 /* write microcode via MSR 0x79 */ 814 814 wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); ··· 846 812 (mc->hdr.date >> 16) & 0xff); 847 813 prev_rev = rev; 848 814 } 849 - 850 - c = &cpu_data(cpu); 851 815 852 816 uci->cpu_sig.rev = rev; 853 817 c->microcode = rev;
+1 -1
arch/x86/kernel/ioport.c
··· 23 23 /* 24 24 * this changes the io permissions bitmap in the current task. 25 25 */ 26 - asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 26 + SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) 27 27 { 28 28 struct thread_struct *t = &current->thread; 29 29 struct tss_struct *tss;
+9 -1
arch/x86/kernel/kprobes/core.c
··· 1168 1168 1169 1169 bool arch_within_kprobe_blacklist(unsigned long addr) 1170 1170 { 1171 + bool is_in_entry_trampoline_section = false; 1172 + 1173 + #ifdef CONFIG_X86_64 1174 + is_in_entry_trampoline_section = 1175 + (addr >= (unsigned long)__entry_trampoline_start && 1176 + addr < (unsigned long)__entry_trampoline_end); 1177 + #endif 1171 1178 return (addr >= (unsigned long)__kprobes_text_start && 1172 1179 addr < (unsigned long)__kprobes_text_end) || 1173 1180 (addr >= (unsigned long)__entry_text_start && 1174 - addr < (unsigned long)__entry_text_end); 1181 + addr < (unsigned long)__entry_text_end) || 1182 + is_in_entry_trampoline_section; 1175 1183 } 1176 1184 1177 1185 int __init arch_init_kprobes(void)
+2
arch/x86/kernel/vmlinux.lds.S
··· 118 118 119 119 #ifdef CONFIG_X86_64 120 120 . = ALIGN(PAGE_SIZE); 121 + VMLINUX_SYMBOL(__entry_trampoline_start) = .; 121 122 _entry_trampoline = .; 122 123 *(.entry_trampoline) 123 124 . = ALIGN(PAGE_SIZE); 125 + VMLINUX_SYMBOL(__entry_trampoline_end) = .; 124 126 ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); 125 127 #endif 126 128
+1 -1
arch/x86/mm/pti.c
··· 332 332 } 333 333 334 334 /* 335 - * Clone the ESPFIX P4D into the user space visinble page table 335 + * Clone the ESPFIX P4D into the user space visible page table 336 336 */ 337 337 static void __init pti_setup_espfix64(void) 338 338 {
+7 -20
tools/objtool/check.c
··· 1116 1116 1117 1117 static int read_retpoline_hints(struct objtool_file *file) 1118 1118 { 1119 - struct section *sec, *relasec; 1119 + struct section *sec; 1120 1120 struct instruction *insn; 1121 1121 struct rela *rela; 1122 - int i; 1123 1122 1124 - sec = find_section_by_name(file->elf, ".discard.retpoline_safe"); 1123 + sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); 1125 1124 if (!sec) 1126 1125 return 0; 1127 1126 1128 - relasec = sec->rela; 1129 - if (!relasec) { 1130 - WARN("missing .rela.discard.retpoline_safe section"); 1131 - return -1; 1132 - } 1133 - 1134 - if (sec->len % sizeof(unsigned long)) { 1135 - WARN("retpoline_safe size mismatch: %d %ld", sec->len, sizeof(unsigned long)); 1136 - return -1; 1137 - } 1138 - 1139 - for (i = 0; i < sec->len / sizeof(unsigned long); i++) { 1140 - rela = find_rela_by_dest(sec, i * sizeof(unsigned long)); 1141 - if (!rela) { 1142 - WARN("can't find rela for retpoline_safe[%d]", i); 1127 + list_for_each_entry(rela, &sec->rela_list, list) { 1128 + if (rela->sym->type != STT_SECTION) { 1129 + WARN("unexpected relocation symbol type in %s", sec->name); 1143 1130 return -1; 1144 1131 } 1145 1132 1146 1133 insn = find_insn(file, rela->sym->sec, rela->addend); 1147 1134 if (!insn) { 1148 - WARN("can't find insn for retpoline_safe[%d]", i); 1135 + WARN("bad .discard.retpoline_safe entry"); 1149 1136 return -1; 1150 1137 } 1151 1138 1152 1139 if (insn->type != INSN_JUMP_DYNAMIC && 1153 1140 insn->type != INSN_CALL_DYNAMIC) { 1154 - WARN_FUNC("retpoline_safe hint not a indirect jump/call", 1141 + WARN_FUNC("retpoline_safe hint not an indirect jump/call", 1155 1142 insn->sec, insn->offset); 1156 1143 return -1; 1157 1144 }
+6 -5
tools/testing/selftests/x86/test_vsyscall.c
··· 450 450 num_vsyscall_traps++; 451 451 } 452 452 453 - static int test_native_vsyscall(void) 453 + static int test_emulation(void) 454 454 { 455 455 time_t tmp; 456 456 bool is_native; ··· 458 458 if (!vtime) 459 459 return 0; 460 460 461 - printf("[RUN]\tchecking for native vsyscall\n"); 461 + printf("[RUN]\tchecking that vsyscalls are emulated\n"); 462 462 sethandler(SIGTRAP, sigtrap, 0); 463 463 set_eflags(get_eflags() | X86_EFLAGS_TF); 464 464 vtime(&tmp); ··· 474 474 */ 475 475 is_native = (num_vsyscall_traps > 1); 476 476 477 - printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", 477 + printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", 478 + (is_native ? "FAIL" : "OK"), 478 479 (is_native ? "native" : "emulated"), 479 480 (int)num_vsyscall_traps); 480 481 481 - return 0; 482 + return is_native; 482 483 } 483 484 #endif 484 485 ··· 499 498 nerrs += test_vsys_r(); 500 499 501 500 #ifdef __x86_64__ 502 - nerrs += test_native_vsyscall(); 501 + nerrs += test_emulation(); 503 502 #endif 504 503 505 504 return nerrs ? 1 : 0;