Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'x86_cpu_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpu updates from Borislav Petkov:

- Print the CPU number at segfault time.

The number printed is not always accurate (preemption is enabled at
that time) but the print string contains "likely" and after a lot of
back'n'forth on this, this was the consensus that was reached. See
thread at [1].

- After a *lot* of testing and polishing, finally the clear_user()
improvements to inline REP; STOSB by default

Link: https://lore.kernel.org/r/5d62c1d0-7425-d5bb-ecb5-1dc3b4d7d245@intel.com [1]

* tag 'x86_cpu_for_v6.1_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Print likely CPU at segfault time
x86/clear_user: Make it faster

+198 -43
+2 -3
arch/x86/include/asm/uaccess.h
··· 502 502 503 503 extern __must_check long strnlen_user(const char __user *str, long n); 504 504 505 - unsigned long __must_check clear_user(void __user *mem, unsigned long len); 506 - unsigned long __must_check __clear_user(void __user *mem, unsigned long len); 507 - 508 505 #ifdef CONFIG_ARCH_HAS_COPY_MC 509 506 unsigned long __must_check 510 507 copy_mc_to_kernel(void *to, const void *from, unsigned len); ··· 523 526 #define ARCH_HAS_NOCACHE_UACCESS 1 524 527 525 528 #ifdef CONFIG_X86_32 529 + unsigned long __must_check clear_user(void __user *mem, unsigned long len); 530 + unsigned long __must_check __clear_user(void __user *mem, unsigned long len); 526 531 # include <asm/uaccess_32.h> 527 532 #else 528 533 # include <asm/uaccess_64.h>
+45
arch/x86/include/asm/uaccess_64.h
··· 79 79 kasan_check_write(dst, size); 80 80 return __copy_user_flushcache(dst, src, size); 81 81 } 82 + 83 + /* 84 + * Zero Userspace. 85 + */ 86 + 87 + __must_check unsigned long 88 + clear_user_original(void __user *addr, unsigned long len); 89 + __must_check unsigned long 90 + clear_user_rep_good(void __user *addr, unsigned long len); 91 + __must_check unsigned long 92 + clear_user_erms(void __user *addr, unsigned long len); 93 + 94 + static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) 95 + { 96 + might_fault(); 97 + stac(); 98 + 99 + /* 100 + * No memory constraint because it doesn't change any memory gcc 101 + * knows about. 102 + */ 103 + asm volatile( 104 + "1:\n\t" 105 + ALTERNATIVE_3("rep stosb", 106 + "call clear_user_erms", ALT_NOT(X86_FEATURE_FSRM), 107 + "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS), 108 + "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD)) 109 + "2:\n" 110 + _ASM_EXTABLE_UA(1b, 2b) 111 + : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT 112 + : "a" (0) 113 + /* rep_good clobbers %rdx */ 114 + : "rdx"); 115 + 116 + clac(); 117 + 118 + return size; 119 + } 120 + 121 + static __always_inline unsigned long clear_user(void __user *to, unsigned long n) 122 + { 123 + if (access_ok(to, n)) 124 + return __clear_user(to, n); 125 + return n; 126 + } 82 127 #endif /* _ASM_X86_UACCESS_64_H */
+138
arch/x86/lib/clear_page_64.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 #include <linux/linkage.h> 3 + #include <asm/asm.h> 3 4 #include <asm/export.h> 4 5 5 6 /* ··· 51 50 RET 52 51 SYM_FUNC_END(clear_page_erms) 53 52 EXPORT_SYMBOL_GPL(clear_page_erms) 53 + 54 + /* 55 + * Default clear user-space. 56 + * Input: 57 + * rdi destination 58 + * rcx count 59 + * 60 + * Output: 61 + * rcx: uncleared bytes or 0 if successful. 62 + */ 63 + SYM_FUNC_START(clear_user_original) 64 + /* 65 + * Copy only the lower 32 bits of size as that is enough to handle the rest bytes, 66 + * i.e., no need for a 'q' suffix and thus a REX prefix. 67 + */ 68 + mov %ecx,%eax 69 + shr $3,%rcx 70 + jz .Lrest_bytes 71 + 72 + # do the qwords first 73 + .p2align 4 74 + .Lqwords: 75 + movq $0,(%rdi) 76 + lea 8(%rdi),%rdi 77 + dec %rcx 78 + jnz .Lqwords 79 + 80 + .Lrest_bytes: 81 + and $7, %eax 82 + jz .Lexit 83 + 84 + # now do the rest bytes 85 + .Lbytes: 86 + movb $0,(%rdi) 87 + inc %rdi 88 + dec %eax 89 + jnz .Lbytes 90 + 91 + .Lexit: 92 + /* 93 + * %rax still needs to be cleared in the exception case because this function is called 94 + * from inline asm and the compiler expects %rax to be zero when exiting the inline asm, 95 + * in case it might reuse it somewhere. 96 + */ 97 + xor %eax,%eax 98 + RET 99 + 100 + .Lqwords_exception: 101 + # convert remaining qwords back into bytes to return to caller 102 + shl $3, %rcx 103 + and $7, %eax 104 + add %rax,%rcx 105 + jmp .Lexit 106 + 107 + .Lbytes_exception: 108 + mov %eax,%ecx 109 + jmp .Lexit 110 + 111 + _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception) 112 + _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception) 113 + SYM_FUNC_END(clear_user_original) 114 + EXPORT_SYMBOL(clear_user_original) 115 + 116 + /* 117 + * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is 118 + * present. 119 + * Input: 120 + * rdi destination 121 + * rcx count 122 + * 123 + * Output: 124 + * rcx: uncleared bytes or 0 if successful. 125 + */ 126 + SYM_FUNC_START(clear_user_rep_good) 127 + # call the original thing for less than a cacheline 128 + cmp $64, %rcx 129 + jb clear_user_original 130 + 131 + .Lprep: 132 + # copy lower 32-bits for rest bytes 133 + mov %ecx, %edx 134 + shr $3, %rcx 135 + jz .Lrep_good_rest_bytes 136 + 137 + .Lrep_good_qwords: 138 + rep stosq 139 + 140 + .Lrep_good_rest_bytes: 141 + and $7, %edx 142 + jz .Lrep_good_exit 143 + 144 + .Lrep_good_bytes: 145 + mov %edx, %ecx 146 + rep stosb 147 + 148 + .Lrep_good_exit: 149 + # see .Lexit comment above 150 + xor %eax, %eax 151 + RET 152 + 153 + .Lrep_good_qwords_exception: 154 + # convert remaining qwords back into bytes to return to caller 155 + shl $3, %rcx 156 + and $7, %edx 157 + add %rdx, %rcx 158 + jmp .Lrep_good_exit 159 + 160 + _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception) 161 + _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit) 162 + SYM_FUNC_END(clear_user_rep_good) 163 + EXPORT_SYMBOL(clear_user_rep_good) 164 + 165 + /* 166 + * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present. 167 + * Input: 168 + * rdi destination 169 + * rcx count 170 + * 171 + * Output: 172 + * rcx: uncleared bytes or 0 if successful. 173 + * 174 + */ 175 + SYM_FUNC_START(clear_user_erms) 176 + # call the original thing for less than a cacheline 177 + cmp $64, %rcx 178 + jb clear_user_original 179 + 180 + .Lerms_bytes: 181 + rep stosb 182 + 183 + .Lerms_exit: 184 + xorl %eax,%eax 185 + RET 186 + 187 + _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit) 188 + SYM_FUNC_END(clear_user_erms) 189 + EXPORT_SYMBOL(clear_user_erms)
-40
arch/x86/lib/usercopy_64.c
··· 14 14 * Zero Userspace 15 15 */ 16 16 17 - unsigned long __clear_user(void __user *addr, unsigned long size) 18 - { 19 - long __d0; 20 - might_fault(); 21 - /* no memory constraint because it doesn't change any memory gcc knows 22 - about */ 23 - stac(); 24 - asm volatile( 25 - " testq %[size8],%[size8]\n" 26 - " jz 4f\n" 27 - " .align 16\n" 28 - "0: movq $0,(%[dst])\n" 29 - " addq $8,%[dst]\n" 30 - " decl %%ecx ; jnz 0b\n" 31 - "4: movq %[size1],%%rcx\n" 32 - " testl %%ecx,%%ecx\n" 33 - " jz 2f\n" 34 - "1: movb $0,(%[dst])\n" 35 - " incq %[dst]\n" 36 - " decl %%ecx ; jnz 1b\n" 37 - "2:\n" 38 - 39 - _ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN8, %[size1]) 40 - _ASM_EXTABLE_UA(1b, 2b) 41 - 42 - : [size8] "=&c"(size), [dst] "=&D" (__d0) 43 - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr)); 44 - clac(); 45 - return size; 46 - } 47 - EXPORT_SYMBOL(__clear_user); 48 - 49 - unsigned long clear_user(void __user *to, unsigned long n) 50 - { 51 - if (access_ok(to, n)) 52 - return __clear_user(to, n); 53 - return n; 54 - } 55 - EXPORT_SYMBOL(clear_user); 56 - 57 17 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 58 18 /** 59 19 * clean_cache_range - write back a cache range with CLWB
+10
arch/x86/mm/fault.c
··· 769 769 unsigned long address, struct task_struct *tsk) 770 770 { 771 771 const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; 772 + /* This is a racy snapshot, but it's better than nothing. */ 773 + int cpu = raw_smp_processor_id(); 772 774 773 775 if (!unhandled_signal(tsk, SIGSEGV)) 774 776 return; ··· 783 781 (void *)regs->ip, (void *)regs->sp, error_code); 784 782 785 783 print_vma_addr(KERN_CONT " in ", regs->ip); 784 + 785 + /* 786 + * Dump the likely CPU where the fatal segfault happened. 787 + * This can help identify faulty hardware. 788 + */ 789 + printk(KERN_CONT " likely on CPU %d (core %d, socket %d)", cpu, 790 + topology_core_id(cpu), topology_physical_package_id(cpu)); 791 + 786 792 787 793 printk(KERN_CONT "\n"); 788 794
+3
tools/objtool/check.c
··· 1073 1073 "copy_mc_fragile_handle_tail", 1074 1074 "copy_mc_enhanced_fast_string", 1075 1075 "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ 1076 + "clear_user_erms", 1077 + "clear_user_rep_good", 1078 + "clear_user_original", 1076 1079 NULL 1077 1080 }; 1078 1081