Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mmx_32: Remove X86_USE_3DNOW

This code puts an exception table entry on the PREFETCH instruction to
overwrite it with a JMP.d8 when it triggers an exception. Except of
course, our code is no longer writable, also SMP.

Instead of fixing this broken mess, simply take it out.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/YZKQzUmeNuwyvZpk@hirez.programming.kicks-ass.net

+1 -469
+1 -1
arch/x86/Kconfig
··· 1957 1957 1958 1958 config EFI_STUB 1959 1959 bool "EFI stub support" 1960 - depends on EFI && !X86_USE_3DNOW 1960 + depends on EFI 1961 1961 depends on $(cc-option,-mabi=ms) || X86_32 1962 1962 select RELOCATABLE 1963 1963 help
-4
arch/x86/Kconfig.cpu
··· 342 342 def_bool y 343 343 depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM 344 344 345 - config X86_USE_3DNOW 346 - def_bool y 347 - depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML 348 - 349 345 # 350 346 # P6_NOPs are a relatively minor optimization that require a family >= 351 347 # 6 processor, except that it is broken on certain VIA chips.
-15
arch/x86/include/asm/mmx.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef _ASM_X86_MMX_H 3 - #define _ASM_X86_MMX_H 4 - 5 - /* 6 - * MMX 3Dnow! helper operations 7 - */ 8 - 9 - #include <linux/types.h> 10 - 11 - extern void *_mmx_memcpy(void *to, const void *from, size_t size); 12 - extern void mmx_clear_page(void *page); 13 - extern void mmx_copy_page(void *to, void *from); 14 - 15 - #endif /* _ASM_X86_MMX_H */
-14
arch/x86/include/asm/page_32.h
··· 19 19 #define pfn_valid(pfn) ((pfn) < max_mapnr) 20 20 #endif /* CONFIG_FLATMEM */ 21 21 22 - #ifdef CONFIG_X86_USE_3DNOW 23 - #include <asm/mmx.h> 24 - 25 - static inline void clear_page(void *page) 26 - { 27 - mmx_clear_page(page); 28 - } 29 - 30 - static inline void copy_page(void *to, void *from) 31 - { 32 - mmx_copy_page(to, from); 33 - } 34 - #else /* !CONFIG_X86_USE_3DNOW */ 35 22 #include <linux/string.h> 36 23 37 24 static inline void clear_page(void *page) ··· 30 43 { 31 44 memcpy(to, from, PAGE_SIZE); 32 45 } 33 - #endif /* CONFIG_X86_USE_3DNOW */ 34 46 #endif /* !__ASSEMBLY__ */ 35 47 36 48 #endif /* _ASM_X86_PAGE_32_H */
-4
arch/x86/include/asm/required-features.h
··· 35 35 # define NEED_CMOV 0 36 36 #endif 37 37 38 - #ifdef CONFIG_X86_USE_3DNOW 39 - # define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) 40 - #else 41 38 # define NEED_3DNOW 0 42 - #endif 43 39 44 40 #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) 45 41 # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-33
arch/x86/include/asm/string_32.h
··· 146 146 extern void *memcpy(void *, const void *, size_t); 147 147 148 148 #ifndef CONFIG_FORTIFY_SOURCE 149 - #ifdef CONFIG_X86_USE_3DNOW 150 - 151 - #include <asm/mmx.h> 152 - 153 - /* 154 - * This CPU favours 3DNow strongly (eg AMD Athlon) 155 - */ 156 - 157 - static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) 158 - { 159 - if (len < 512) 160 - return __constant_memcpy(to, from, len); 161 - return _mmx_memcpy(to, from, len); 162 - } 163 - 164 - static inline void *__memcpy3d(void *to, const void *from, size_t len) 165 - { 166 - if (len < 512) 167 - return __memcpy(to, from, len); 168 - return _mmx_memcpy(to, from, len); 169 - } 170 - 171 - #define memcpy(t, f, n) \ 172 - (__builtin_constant_p((n)) \ 173 - ? __constant_memcpy3d((t), (f), (n)) \ 174 - : __memcpy3d((t), (f), (n))) 175 - 176 - #else 177 - 178 - /* 179 - * No 3D Now! 180 - */ 181 149 182 150 #define memcpy(t, f, n) __builtin_memcpy(t, f, n) 183 151 184 - #endif 185 152 #endif /* !CONFIG_FORTIFY_SOURCE */ 186 153 187 154 #define __HAVE_ARCH_MEMMOVE
-1
arch/x86/lib/Makefile
··· 63 63 ifneq ($(CONFIG_X86_CMPXCHG64),y) 64 64 lib-y += cmpxchg8b_emu.o atomic64_386_32.o 65 65 endif 66 - lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 67 66 else 68 67 obj-y += iomap_copy_64.o 69 68 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
-4
arch/x86/lib/memcpy_32.c
··· 7 7 8 8 __visible void *memcpy(void *to, const void *from, size_t n) 9 9 { 10 - #if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) 11 - return __memcpy3d(to, from, n); 12 - #else 13 10 return __memcpy(to, from, n); 14 - #endif 15 11 } 16 12 EXPORT_SYMBOL(memcpy); 17 13
-388
arch/x86/lib/mmx_32.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * MMX 3DNow! library helper functions 4 - * 5 - * To do: 6 - * We can use MMX just for prefetch in IRQ's. This may be a win. 7 - * (reported so on K6-III) 8 - * We should use a better code neutral filler for the short jump 9 - * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? 10 - * We also want to clobber the filler register so we don't get any 11 - * register forwarding stalls on the filler. 12 - * 13 - * Add *user handling. Checksums are not a win with MMX on any CPU 14 - * tested so far for any MMX solution figured. 15 - * 16 - * 22/09/2000 - Arjan van de Ven 17 - * Improved for non-engineering-sample Athlons 18 - * 19 - */ 20 - #include <linux/hardirq.h> 21 - #include <linux/string.h> 22 - #include <linux/export.h> 23 - #include <linux/sched.h> 24 - #include <linux/types.h> 25 - 26 - #include <asm/fpu/api.h> 27 - #include <asm/asm.h> 28 - 29 - /* 30 - * Use KFPU_387. MMX instructions are not affected by MXCSR, 31 - * but both AMD and Intel documentation states that even integer MMX 32 - * operations will result in #MF if an exception is pending in FCW. 33 - * 34 - * EMMS is not needed afterwards because, after calling kernel_fpu_end(), 35 - * any subsequent user of the 387 stack will reinitialize it using 36 - * KFPU_387. 37 - */ 38 - 39 - void *_mmx_memcpy(void *to, const void *from, size_t len) 40 - { 41 - void *p; 42 - int i; 43 - 44 - if (unlikely(in_interrupt())) 45 - return __memcpy(to, from, len); 46 - 47 - p = to; 48 - i = len >> 6; /* len/64 */ 49 - 50 - kernel_fpu_begin_mask(KFPU_387); 51 - 52 - __asm__ __volatile__ ( 53 - "1: prefetch (%0)\n" /* This set is 28 bytes */ 54 - " prefetch 64(%0)\n" 55 - " prefetch 128(%0)\n" 56 - " prefetch 192(%0)\n" 57 - " prefetch 256(%0)\n" 58 - "2: \n" 59 - ".section .fixup, \"ax\"\n" 60 - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 61 - " jmp 2b\n" 62 - ".previous\n" 63 - _ASM_EXTABLE(1b, 3b) 64 - : : "r" (from)); 65 - 66 - for ( ; i > 5; i--) { 67 - __asm__ __volatile__ ( 68 - "1: prefetch 320(%0)\n" 69 - "2: movq (%0), %%mm0\n" 70 - " movq 8(%0), %%mm1\n" 71 - " movq 16(%0), %%mm2\n" 72 - " movq 24(%0), %%mm3\n" 73 - " movq %%mm0, (%1)\n" 74 - " movq %%mm1, 8(%1)\n" 75 - " movq %%mm2, 16(%1)\n" 76 - " movq %%mm3, 24(%1)\n" 77 - " movq 32(%0), %%mm0\n" 78 - " movq 40(%0), %%mm1\n" 79 - " movq 48(%0), %%mm2\n" 80 - " movq 56(%0), %%mm3\n" 81 - " movq %%mm0, 32(%1)\n" 82 - " movq %%mm1, 40(%1)\n" 83 - " movq %%mm2, 48(%1)\n" 84 - " movq %%mm3, 56(%1)\n" 85 - ".section .fixup, \"ax\"\n" 86 - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 87 - " jmp 2b\n" 88 - ".previous\n" 89 - _ASM_EXTABLE(1b, 3b) 90 - : : "r" (from), "r" (to) : "memory"); 91 - 92 - from += 64; 93 - to += 64; 94 - } 95 - 96 - for ( ; i > 0; i--) { 97 - __asm__ __volatile__ ( 98 - " movq (%0), %%mm0\n" 99 - " movq 8(%0), %%mm1\n" 100 - " movq 16(%0), %%mm2\n" 101 - " movq 24(%0), %%mm3\n" 102 - " movq %%mm0, (%1)\n" 103 - " movq %%mm1, 8(%1)\n" 104 - " movq %%mm2, 16(%1)\n" 105 - " movq %%mm3, 24(%1)\n" 106 - " movq 32(%0), %%mm0\n" 107 - " movq 40(%0), %%mm1\n" 108 - " movq 48(%0), %%mm2\n" 109 - " movq 56(%0), %%mm3\n" 110 - " movq %%mm0, 32(%1)\n" 111 - " movq %%mm1, 40(%1)\n" 112 - " movq %%mm2, 48(%1)\n" 113 - " movq %%mm3, 56(%1)\n" 114 - : : "r" (from), "r" (to) : "memory"); 115 - 116 - from += 64; 117 - to += 64; 118 - } 119 - /* 120 - * Now do the tail of the block: 121 - */ 122 - __memcpy(to, from, len & 63); 123 - kernel_fpu_end(); 124 - 125 - return p; 126 - } 127 - EXPORT_SYMBOL(_mmx_memcpy); 128 - 129 - #ifdef CONFIG_MK7 130 - 131 - /* 132 - * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and 133 - * other MMX using processors do not. 134 - */ 135 - 136 - static void fast_clear_page(void *page) 137 - { 138 - int i; 139 - 140 - kernel_fpu_begin_mask(KFPU_387); 141 - 142 - __asm__ __volatile__ ( 143 - " pxor %%mm0, %%mm0\n" : : 144 - ); 145 - 146 - for (i = 0; i < 4096/64; i++) { 147 - __asm__ __volatile__ ( 148 - " movntq %%mm0, (%0)\n" 149 - " movntq %%mm0, 8(%0)\n" 150 - " movntq %%mm0, 16(%0)\n" 151 - " movntq %%mm0, 24(%0)\n" 152 - " movntq %%mm0, 32(%0)\n" 153 - " movntq %%mm0, 40(%0)\n" 154 - " movntq %%mm0, 48(%0)\n" 155 - " movntq %%mm0, 56(%0)\n" 156 - : : "r" (page) : "memory"); 157 - page += 64; 158 - } 159 - 160 - /* 161 - * Since movntq is weakly-ordered, a "sfence" is needed to become 162 - * ordered again: 163 - */ 164 - __asm__ __volatile__("sfence\n"::); 165 - 166 - kernel_fpu_end(); 167 - } 168 - 169 - static void fast_copy_page(void *to, void *from) 170 - { 171 - int i; 172 - 173 - kernel_fpu_begin_mask(KFPU_387); 174 - 175 - /* 176 - * maybe the prefetch stuff can go before the expensive fnsave... 177 - * but that is for later. -AV 178 - */ 179 - __asm__ __volatile__( 180 - "1: prefetch (%0)\n" 181 - " prefetch 64(%0)\n" 182 - " prefetch 128(%0)\n" 183 - " prefetch 192(%0)\n" 184 - " prefetch 256(%0)\n" 185 - "2: \n" 186 - ".section .fixup, \"ax\"\n" 187 - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 188 - " jmp 2b\n" 189 - ".previous\n" 190 - _ASM_EXTABLE(1b, 3b) : : "r" (from)); 191 - 192 - for (i = 0; i < (4096-320)/64; i++) { 193 - __asm__ __volatile__ ( 194 - "1: prefetch 320(%0)\n" 195 - "2: movq (%0), %%mm0\n" 196 - " movntq %%mm0, (%1)\n" 197 - " movq 8(%0), %%mm1\n" 198 - " movntq %%mm1, 8(%1)\n" 199 - " movq 16(%0), %%mm2\n" 200 - " movntq %%mm2, 16(%1)\n" 201 - " movq 24(%0), %%mm3\n" 202 - " movntq %%mm3, 24(%1)\n" 203 - " movq 32(%0), %%mm4\n" 204 - " movntq %%mm4, 32(%1)\n" 205 - " movq 40(%0), %%mm5\n" 206 - " movntq %%mm5, 40(%1)\n" 207 - " movq 48(%0), %%mm6\n" 208 - " movntq %%mm6, 48(%1)\n" 209 - " movq 56(%0), %%mm7\n" 210 - " movntq %%mm7, 56(%1)\n" 211 - ".section .fixup, \"ax\"\n" 212 - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 213 - " jmp 2b\n" 214 - ".previous\n" 215 - _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); 216 - 217 - from += 64; 218 - to += 64; 219 - } 220 - 221 - for (i = (4096-320)/64; i < 4096/64; i++) { 222 - __asm__ __volatile__ ( 223 - "2: movq (%0), %%mm0\n" 224 - " movntq %%mm0, (%1)\n" 225 - " movq 8(%0), %%mm1\n" 226 - " movntq %%mm1, 8(%1)\n" 227 - " movq 16(%0), %%mm2\n" 228 - " movntq %%mm2, 16(%1)\n" 229 - " movq 24(%0), %%mm3\n" 230 - " movntq %%mm3, 24(%1)\n" 231 - " movq 32(%0), %%mm4\n" 232 - " movntq %%mm4, 32(%1)\n" 233 - " movq 40(%0), %%mm5\n" 234 - " movntq %%mm5, 40(%1)\n" 235 - " movq 48(%0), %%mm6\n" 236 - " movntq %%mm6, 48(%1)\n" 237 - " movq 56(%0), %%mm7\n" 238 - " movntq %%mm7, 56(%1)\n" 239 - : : "r" (from), "r" (to) : "memory"); 240 - from += 64; 241 - to += 64; 242 - } 243 - /* 244 - * Since movntq is weakly-ordered, a "sfence" is needed to become 245 - * ordered again: 246 - */ 247 - __asm__ __volatile__("sfence \n"::); 248 - kernel_fpu_end(); 249 - } 250 - 251 - #else /* CONFIG_MK7 */ 252 - 253 - /* 254 - * Generic MMX implementation without K7 specific streaming 255 - */ 256 - static void fast_clear_page(void *page) 257 - { 258 - int i; 259 - 260 - kernel_fpu_begin_mask(KFPU_387); 261 - 262 - __asm__ __volatile__ ( 263 - " pxor %%mm0, %%mm0\n" : : 264 - ); 265 - 266 - for (i = 0; i < 4096/128; i++) { 267 - __asm__ __volatile__ ( 268 - " movq %%mm0, (%0)\n" 269 - " movq %%mm0, 8(%0)\n" 270 - " movq %%mm0, 16(%0)\n" 271 - " movq %%mm0, 24(%0)\n" 272 - " movq %%mm0, 32(%0)\n" 273 - " movq %%mm0, 40(%0)\n" 274 - " movq %%mm0, 48(%0)\n" 275 - " movq %%mm0, 56(%0)\n" 276 - " movq %%mm0, 64(%0)\n" 277 - " movq %%mm0, 72(%0)\n" 278 - " movq %%mm0, 80(%0)\n" 279 - " movq %%mm0, 88(%0)\n" 280 - " movq %%mm0, 96(%0)\n" 281 - " movq %%mm0, 104(%0)\n" 282 - " movq %%mm0, 112(%0)\n" 283 - " movq %%mm0, 120(%0)\n" 284 - : : "r" (page) : "memory"); 285 - page += 128; 286 - } 287 - 288 - kernel_fpu_end(); 289 - } 290 - 291 - static void fast_copy_page(void *to, void *from) 292 - { 293 - int i; 294 - 295 - kernel_fpu_begin_mask(KFPU_387); 296 - 297 - __asm__ __volatile__ ( 298 - "1: prefetch (%0)\n" 299 - " prefetch 64(%0)\n" 300 - " prefetch 128(%0)\n" 301 - " prefetch 192(%0)\n" 302 - " prefetch 256(%0)\n" 303 - "2: \n" 304 - ".section .fixup, \"ax\"\n" 305 - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 306 - " jmp 2b\n" 307 - ".previous\n" 308 - _ASM_EXTABLE(1b, 3b) : : "r" (from)); 309 - 310 - for (i = 0; i < 4096/64; i++) { 311 - __asm__ __volatile__ ( 312 - "1: prefetch 320(%0)\n" 313 - "2: movq (%0), %%mm0\n" 314 - " movq 8(%0), %%mm1\n" 315 - " movq 16(%0), %%mm2\n" 316 - " movq 24(%0), %%mm3\n" 317 - " movq %%mm0, (%1)\n" 318 - " movq %%mm1, 8(%1)\n" 319 - " movq %%mm2, 16(%1)\n" 320 - " movq %%mm3, 24(%1)\n" 321 - " movq 32(%0), %%mm0\n" 322 - " movq 40(%0), %%mm1\n" 323 - " movq 48(%0), %%mm2\n" 324 - " movq 56(%0), %%mm3\n" 325 - " movq %%mm0, 32(%1)\n" 326 - " movq %%mm1, 40(%1)\n" 327 - " movq %%mm2, 48(%1)\n" 328 - " movq %%mm3, 56(%1)\n" 329 - ".section .fixup, \"ax\"\n" 330 - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 331 - " jmp 2b\n" 332 - ".previous\n" 333 - _ASM_EXTABLE(1b, 3b) 334 - : : "r" (from), "r" (to) : "memory"); 335 - 336 - from += 64; 337 - to += 64; 338 - } 339 - kernel_fpu_end(); 340 - } 341 - 342 - #endif /* !CONFIG_MK7 */ 343 - 344 - /* 345 - * Favour MMX for page clear and copy: 346 - */ 347 - static void slow_zero_page(void *page) 348 - { 349 - int d0, d1; 350 - 351 - __asm__ __volatile__( 352 - "cld\n\t" 353 - "rep ; stosl" 354 - 355 - : "=&c" (d0), "=&D" (d1) 356 - :"a" (0), "1" (page), "0" (1024) 357 - :"memory"); 358 - } 359 - 360 - void mmx_clear_page(void *page) 361 - { 362 - if (unlikely(in_interrupt())) 363 - slow_zero_page(page); 364 - else 365 - fast_clear_page(page); 366 - } 367 - EXPORT_SYMBOL(mmx_clear_page); 368 - 369 - static void slow_copy_page(void *to, void *from) 370 - { 371 - int d0, d1, d2; 372 - 373 - __asm__ __volatile__( 374 - "cld\n\t" 375 - "rep ; movsl" 376 - : "=&c" (d0), "=&D" (d1), "=&S" (d2) 377 - : "0" (1024), "1" ((long) to), "2" ((long) from) 378 - : "memory"); 379 - } 380 - 381 - void mmx_copy_page(void *to, void *from) 382 - { 383 - if (unlikely(in_interrupt())) 384 - slow_copy_page(to, from); 385 - else 386 - fast_copy_page(to, from); 387 - } 388 - EXPORT_SYMBOL(mmx_copy_page);
-1
arch/x86/lib/usercopy_32.c
··· 8 8 */ 9 9 #include <linux/export.h> 10 10 #include <linux/uaccess.h> 11 - #include <asm/mmx.h> 12 11 #include <asm/asm.h> 13 12 14 13 #ifdef CONFIG_X86_INTEL_USERCOPY
-4
tools/arch/x86/include/asm/required-features.h
··· 35 35 # define NEED_CMOV 0 36 36 #endif 37 37 38 - #ifdef CONFIG_X86_USE_3DNOW 39 - # define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) 40 - #else 41 38 # define NEED_3DNOW 0 42 - #endif 43 39 44 40 #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) 45 41 # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))