Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 FPU updates from Ingo Molnar:
"The main changes in this cycle were:

- do a large round of simplifications after all CPUs do 'eager' FPU
context switching in v4.9: remove CR0 twiddling, remove leftover
eager/lazy bts, etc (Andy Lutomirski)

- more FPU code simplifications: remove struct fpu::counter, clarify
nomenclature, remove unnecessary arguments/functions and better
structure the code (Rik van Riel)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/fpu: Remove clts()
x86/fpu: Remove stts()
x86/fpu: Handle #NM without FPU emulation as an error
x86/fpu, lguest: Remove CR0.TS support
x86/fpu, kvm: Remove host CR0.TS manipulation
x86/fpu: Remove irq_ts_save() and irq_ts_restore()
x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs()
x86/fpu: Get rid of two redundant clts() calls
x86/fpu: Finish excising 'eagerfpu'
x86/fpu: Split old_fpu & new_fpu handling into separate functions
x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state()
x86/fpu: Split old & new FPU code paths
x86/fpu: Remove __fpregs_(de)activate()
x86/fpu: Rename lazy restore functions to "register state valid"
x86/fpu, kvm: Remove KVM vcpu->fpu_counter
x86/fpu: Remove struct fpu::counter
x86/fpu: Remove use_eager_fpu()
x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction
x86/fpu: Hard-disable lazy FPU mode
x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code

+105 -547
-6
Documentation/kernel-parameters.txt
··· 1079 1079 nopku [X86] Disable Memory Protection Keys CPU feature found 1080 1080 in some Intel CPUs. 1081 1081 1082 - eagerfpu= [X86] 1083 - on enable eager fpu restore 1084 - off disable eager fpu restore 1085 - auto selects the default scheme, which automatically 1086 - enables eagerfpu restore for xsaveopt. 1087 - 1088 1082 module.async_probe [KNL] 1089 1083 Enable asynchronous probe on this module. 1090 1084
+4 -18
arch/x86/crypto/crc32c-intel_glue.c
··· 48 48 #ifdef CONFIG_X86_64 49 49 /* 50 50 * use carryless multiply version of crc32c when buffer 51 - * size is >= 512 (when eager fpu is enabled) or 52 - * >= 1024 (when eager fpu is disabled) to account 51 + * size is >= 512 to account 53 52 * for fpu state save/restore overhead. 54 53 */ 55 - #define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 56 - #define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 54 + #define CRC32C_PCL_BREAKEVEN 512 57 55 58 56 asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, 59 57 unsigned int crc_init); 60 - static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; 61 - #if defined(X86_FEATURE_EAGER_FPU) 62 - #define set_pcl_breakeven_point() \ 63 - do { \ 64 - if (!use_eager_fpu()) \ 65 - crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ 66 - } while (0) 67 - #else 68 - #define set_pcl_breakeven_point() \ 69 - (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU) 70 - #endif 71 58 #endif /* CONFIG_X86_64 */ 72 59 73 60 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) ··· 177 190 * use faster PCL version if datasize is large enough to 178 191 * overcome kernel fpu state save/restore overhead 179 192 */ 180 - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { 193 + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { 181 194 kernel_fpu_begin(); 182 195 *crcp = crc_pcl(data, len, *crcp); 183 196 kernel_fpu_end(); ··· 189 202 static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, 190 203 u8 *out) 191 204 { 192 - if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { 205 + if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { 193 206 kernel_fpu_begin(); 194 207 *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); 195 208 kernel_fpu_end(); ··· 248 261 alg.update = crc32c_pcl_intel_update; 249 262 alg.finup = crc32c_pcl_intel_finup; 250 263 alg.digest = crc32c_pcl_intel_digest; 251 - set_pcl_breakeven_point(); 252 264 } 253 265 #endif 254 266 return crypto_register_shash(&alg);
-1
arch/x86/include/asm/cpufeatures.h
··· 104 104 #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 105 105 #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 106 106 #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 107 - #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ 108 107 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 109 108 110 109 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-10
arch/x86/include/asm/fpu/api.h
··· 27 27 extern bool irq_fpu_usable(void); 28 28 29 29 /* 30 - * Some instructions like VIA's padlock instructions generate a spurious 31 - * DNA fault but don't modify SSE registers. And these instructions 32 - * get used from interrupt context as well. To prevent these kernel instructions 33 - * in interrupt context interacting wrongly with other user/kernel fpu usage, we 34 - * should use them only in the context of irq_ts_save/restore() 35 - */ 36 - extern int irq_ts_save(void); 37 - extern void irq_ts_restore(int TS_state); 38 - 39 - /* 40 30 * Query the presence of one or more xfeatures. Works on any legacy CPU as well. 41 31 * 42 32 * If 'feature_name' is set then put a human-readable description of
+42 -97
arch/x86/include/asm/fpu/internal.h
··· 60 60 /* 61 61 * FPU related CPU feature flag helper routines: 62 62 */ 63 - static __always_inline __pure bool use_eager_fpu(void) 64 - { 65 - return static_cpu_has(X86_FEATURE_EAGER_FPU); 66 - } 67 - 68 63 static __always_inline __pure bool use_xsaveopt(void) 69 64 { 70 65 return static_cpu_has(X86_FEATURE_XSAVEOPT); ··· 479 484 DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); 480 485 481 486 /* 482 - * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, 483 - * on this CPU. 487 + * The in-register FPU state for an FPU context on a CPU is assumed to be 488 + * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx 489 + * matches the FPU. 484 490 * 485 - * This will disable any lazy FPU state restore of the current FPU state, 486 - * but if the current thread owns the FPU, it will still be saved by. 491 + * If the FPU register state is valid, the kernel can skip restoring the 492 + * FPU state from memory. 493 + * 494 + * Any code that clobbers the FPU registers or updates the in-memory 495 + * FPU state for a task MUST let the rest of the kernel know that the 496 + * FPU registers are no longer valid for this task. 497 + * 498 + * Either one of these invalidation functions is enough. Invalidate 499 + * a resource you control: CPU if using the CPU for something else 500 + * (with preemption disabled), FPU for the current task, or a task that 501 + * is prevented from running by the current task. 487 502 */ 488 - static inline void __cpu_disable_lazy_restore(unsigned int cpu) 503 + static inline void __cpu_invalidate_fpregs_state(void) 489 504 { 490 - per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; 505 + __this_cpu_write(fpu_fpregs_owner_ctx, NULL); 491 506 } 492 507 493 - static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) 508 + static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) 509 + { 510 + fpu->last_cpu = -1; 511 + } 512 + 513 + static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) 494 514 { 495 515 return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; 496 516 } 497 517 498 - 499 518 /* 500 - * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' 501 - * idiom, which is then paired with the sw-flag (fpregs_active) later on: 519 + * These generally need preemption protection to work, 520 + * do try to avoid using these on their own: 502 521 */ 503 - 504 - static inline void __fpregs_activate_hw(void) 505 - { 506 - if (!use_eager_fpu()) 507 - clts(); 508 - } 509 - 510 - static inline void __fpregs_deactivate_hw(void) 511 - { 512 - if (!use_eager_fpu()) 513 - stts(); 514 - } 515 - 516 - /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ 517 - static inline void __fpregs_deactivate(struct fpu *fpu) 522 + static inline void fpregs_deactivate(struct fpu *fpu) 518 523 { 519 524 WARN_ON_FPU(!fpu->fpregs_active); 520 525 ··· 523 528 trace_x86_fpu_regs_deactivated(fpu); 524 529 } 525 530 526 - /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ 527 - static inline void __fpregs_activate(struct fpu *fpu) 531 + static inline void fpregs_activate(struct fpu *fpu) 528 532 { 529 533 WARN_ON_FPU(fpu->fpregs_active); 530 534 ··· 548 554 } 549 555 550 556 /* 551 - * Encapsulate the CR0.TS handling together with the 552 - * software flag. 553 - * 554 - * These generally need preemption protection to work, 555 - * do try to avoid using these on their own. 556 - */ 557 - static inline void fpregs_activate(struct fpu *fpu) 558 - { 559 - __fpregs_activate_hw(); 560 - __fpregs_activate(fpu); 561 - } 562 - 563 - static inline void fpregs_deactivate(struct fpu *fpu) 564 - { 565 - __fpregs_deactivate(fpu); 566 - __fpregs_deactivate_hw(); 567 - } 568 - 569 - /* 570 557 * FPU state switching for scheduling. 571 558 * 572 559 * This is a two-stage process: 573 560 * 574 - * - switch_fpu_prepare() saves the old state and 575 - * sets the new state of the CR0.TS bit. This is 576 - * done within the context of the old process. 561 + * - switch_fpu_prepare() saves the old state. 562 + * This is done within the context of the old process. 577 563 * 578 564 * - switch_fpu_finish() restores the new state as 579 565 * necessary. 580 566 */ 581 - typedef struct { int preload; } fpu_switch_t; 582 - 583 - static inline fpu_switch_t 584 - switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) 567 + static inline void 568 + switch_fpu_prepare(struct fpu *old_fpu, int cpu) 585 569 { 586 - fpu_switch_t fpu; 587 - 588 - /* 589 - * If the task has used the math, pre-load the FPU on xsave processors 590 - * or if the past 5 consecutive context-switches used math. 591 - */ 592 - fpu.preload = static_cpu_has(X86_FEATURE_FPU) && 593 - new_fpu->fpstate_active && 594 - (use_eager_fpu() || new_fpu->counter > 5); 595 - 596 570 if (old_fpu->fpregs_active) { 597 571 if (!copy_fpregs_to_fpstate(old_fpu)) 598 572 old_fpu->last_cpu = -1; ··· 570 608 /* But leave fpu_fpregs_owner_ctx! */ 571 609 old_fpu->fpregs_active = 0; 572 610 trace_x86_fpu_regs_deactivated(old_fpu); 573 - 574 - /* Don't change CR0.TS if we just switch! */ 575 - if (fpu.preload) { 576 - new_fpu->counter++; 577 - __fpregs_activate(new_fpu); 578 - trace_x86_fpu_regs_activated(new_fpu); 579 - prefetch(&new_fpu->state); 580 - } else { 581 - __fpregs_deactivate_hw(); 582 - } 583 - } else { 584 - old_fpu->counter = 0; 611 + } else 585 612 old_fpu->last_cpu = -1; 586 - if (fpu.preload) { 587 - new_fpu->counter++; 588 - if (fpu_want_lazy_restore(new_fpu, cpu)) 589 - fpu.preload = 0; 590 - else 591 - prefetch(&new_fpu->state); 592 - fpregs_activate(new_fpu); 593 - } 594 - } 595 - return fpu; 596 613 } 597 614 598 615 /* ··· 579 638 */ 580 639 581 640 /* 582 - * By the time this gets called, we've already cleared CR0.TS and 583 - * given the process the FPU if we are going to preload the FPU 584 - * state - all we need to do is to conditionally restore the register 585 - * state itself. 641 + * Set up the userspace FPU context for the new task, if the task 642 + * has used the FPU. 586 643 */ 587 - static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) 644 + static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) 588 645 { 589 - if (fpu_switch.preload) 590 - copy_kernel_to_fpregs(&new_fpu->state); 646 + bool preload = static_cpu_has(X86_FEATURE_FPU) && 647 + new_fpu->fpstate_active; 648 + 649 + if (preload) { 650 + if (!fpregs_state_valid(new_fpu, cpu)) 651 + copy_kernel_to_fpregs(&new_fpu->state); 652 + fpregs_activate(new_fpu); 653 + } 591 654 } 592 655 593 656 /*
-34
arch/x86/include/asm/fpu/types.h
··· 322 322 unsigned char fpregs_active; 323 323 324 324 /* 325 - * @counter: 326 - * 327 - * This counter contains the number of consecutive context switches 328 - * during which the FPU stays used. If this is over a threshold, the 329 - * lazy FPU restore logic becomes eager, to save the trap overhead. 330 - * This is an unsigned char so that after 256 iterations the counter 331 - * wraps and the context switch behavior turns lazy again; this is to 332 - * deal with bursty apps that only use the FPU for a short time: 333 - */ 334 - unsigned char counter; 335 - /* 336 325 * @state: 337 326 * 338 327 * In-memory copy of all FPU registers that we save/restore ··· 329 340 * the registers in the FPU are more recent than this state 330 341 * copy. If the task context-switches away then they get 331 342 * saved here and represent the FPU state. 332 - * 333 - * After context switches there may be a (short) time period 334 - * during which the in-FPU hardware registers are unchanged 335 - * and still perfectly match this state, if the tasks 336 - * scheduled afterwards are not using the FPU. 337 - * 338 - * This is the 'lazy restore' window of optimization, which 339 - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. 340 - * 341 - * We detect whether a subsequent task uses the FPU via setting 342 - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. 343 - * 344 - * During this window, if the task gets scheduled again, we 345 - * might be able to skip having to do a restore from this 346 - * memory buffer to the hardware registers - at the cost of 347 - * incurring the overhead of #NM fault traps. 348 - * 349 - * Note that on modern CPUs that support the XSAVEOPT (or other 350 - * optimized XSAVE instructions), we don't use #NM traps anymore, 351 - * as the hardware can track whether FPU registers need saving 352 - * or not. On such CPUs we activate the non-lazy ('eagerfpu') 353 - * logic, which unconditionally saves/restores all FPU state 354 - * across context switches. (if FPU state exists.) 355 343 */ 356 344 union fpregs_state state; 357 345 /*
+6 -11
arch/x86/include/asm/fpu/xstate.h
··· 21 21 /* Supervisor features */ 22 22 #define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) 23 23 24 - /* Supported features which support lazy state saving */ 25 - #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ 24 + /* All currently supported features */ 25 + #define XCNTXT_MASK (XFEATURE_MASK_FP | \ 26 26 XFEATURE_MASK_SSE | \ 27 27 XFEATURE_MASK_YMM | \ 28 28 XFEATURE_MASK_OPMASK | \ 29 29 XFEATURE_MASK_ZMM_Hi256 | \ 30 - XFEATURE_MASK_Hi16_ZMM) 31 - 32 - /* Supported features which require eager state saving */ 33 - #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \ 34 - XFEATURE_MASK_BNDCSR | \ 35 - XFEATURE_MASK_PKRU) 36 - 37 - /* All currently supported features */ 38 - #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) 30 + XFEATURE_MASK_Hi16_ZMM | \ 31 + XFEATURE_MASK_PKRU | \ 32 + XFEATURE_MASK_BNDREGS | \ 33 + XFEATURE_MASK_BNDCSR) 39 34 40 35 #ifdef CONFIG_X86_64 41 36 #define REX_PREFIX "0x48, "
-1
arch/x86/include/asm/lguest_hcall.h
··· 9 9 #define LHCALL_FLUSH_TLB 5 10 10 #define LHCALL_LOAD_IDT_ENTRY 6 11 11 #define LHCALL_SET_STACK 7 12 - #define LHCALL_TS 8 13 12 #define LHCALL_SET_CLOCKEVENT 9 14 13 #define LHCALL_HALT 10 15 14 #define LHCALL_SET_PMD 13
-5
arch/x86/include/asm/paravirt.h
··· 41 41 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); 42 42 } 43 43 44 - static inline void clts(void) 45 - { 46 - PVOP_VCALL0(pv_cpu_ops.clts); 47 - } 48 - 49 44 static inline unsigned long read_cr0(void) 50 45 { 51 46 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
-2
arch/x86/include/asm/paravirt_types.h
··· 103 103 unsigned long (*get_debugreg)(int regno); 104 104 void (*set_debugreg)(int regno, unsigned long value); 105 105 106 - void (*clts)(void); 107 - 108 106 unsigned long (*read_cr0)(void); 109 107 void (*write_cr0)(unsigned long); 110 108
-13
arch/x86/include/asm/special_insns.h
··· 6 6 7 7 #include <asm/nops.h> 8 8 9 - static inline void native_clts(void) 10 - { 11 - asm volatile("clts"); 12 - } 13 - 14 9 /* 15 10 * Volatile isn't enough to prevent the compiler from reordering the 16 11 * read/write functions for the control registers and messing everything up. ··· 203 208 204 209 #endif 205 210 206 - /* Clear the 'TS' bit */ 207 - static inline void clts(void) 208 - { 209 - native_clts(); 210 - } 211 - 212 211 #endif/* CONFIG_PARAVIRT */ 213 - 214 - #define stts() write_cr0(read_cr0() | X86_CR0_TS) 215 212 216 213 static inline void clflush(volatile void *__p) 217 214 {
+1 -4
arch/x86/include/asm/trace/fpu.h
··· 14 14 __field(struct fpu *, fpu) 15 15 __field(bool, fpregs_active) 16 16 __field(bool, fpstate_active) 17 - __field(int, counter) 18 17 __field(u64, xfeatures) 19 18 __field(u64, xcomp_bv) 20 19 ), ··· 22 23 __entry->fpu = fpu; 23 24 __entry->fpregs_active = fpu->fpregs_active; 24 25 __entry->fpstate_active = fpu->fpstate_active; 25 - __entry->counter = fpu->counter; 26 26 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { 27 27 __entry->xfeatures = fpu->state.xsave.header.xfeatures; 28 28 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; 29 29 } 30 30 ), 31 - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx", 31 + TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", 32 32 __entry->fpu, 33 33 __entry->fpregs_active, 34 34 __entry->fpstate_active, 35 - __entry->counter, 36 35 __entry->xfeatures, 37 36 __entry->xcomp_bv 38 37 )
-7
arch/x86/kernel/fpu/bugs.c
··· 23 23 */ 24 24 void __init fpu__init_check_bugs(void) 25 25 { 26 - u32 cr0_saved; 27 26 s32 fdiv_bug; 28 27 29 28 /* kernel_fpu_begin/end() relies on patched alternative instructions. */ 30 29 if (!boot_cpu_has(X86_FEATURE_FPU)) 31 30 return; 32 - 33 - /* We might have CR0::TS set already, clear it: */ 34 - cr0_saved = read_cr0(); 35 - write_cr0(cr0_saved & ~X86_CR0_TS); 36 31 37 32 kernel_fpu_begin(); 38 33 ··· 50 55 : "m" (*&x), "m" (*&y)); 51 56 52 57 kernel_fpu_end(); 53 - 54 - write_cr0(cr0_saved); 55 58 56 59 if (fdiv_bug) { 57 60 set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
+7 -67
arch/x86/kernel/fpu/core.c
··· 58 58 return this_cpu_read(in_kernel_fpu); 59 59 } 60 60 61 - /* 62 - * Were we in an interrupt that interrupted kernel mode? 63 - * 64 - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that 65 - * pair does nothing at all: the thread must not have fpu (so 66 - * that we don't try to save the FPU state), and TS must 67 - * be set (so that the clts/stts pair does nothing that is 68 - * visible in the interrupted kernel thread). 69 - * 70 - * Except for the eagerfpu case when we return true; in the likely case 71 - * the thread has FPU but we are not going to set/clear TS. 72 - */ 73 61 static bool interrupted_kernel_fpu_idle(void) 74 62 { 75 - if (kernel_fpu_disabled()) 76 - return false; 77 - 78 - if (use_eager_fpu()) 79 - return true; 80 - 81 - return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); 63 + return !kernel_fpu_disabled(); 82 64 } 83 65 84 66 /* ··· 107 125 */ 108 126 copy_fpregs_to_fpstate(fpu); 109 127 } else { 110 - this_cpu_write(fpu_fpregs_owner_ctx, NULL); 111 - __fpregs_activate_hw(); 128 + __cpu_invalidate_fpregs_state(); 112 129 } 113 130 } 114 131 EXPORT_SYMBOL(__kernel_fpu_begin); ··· 118 137 119 138 if (fpu->fpregs_active) 120 139 copy_kernel_to_fpregs(&fpu->state); 121 - else 122 - __fpregs_deactivate_hw(); 123 140 124 141 kernel_fpu_enable(); 125 142 } ··· 138 159 EXPORT_SYMBOL_GPL(kernel_fpu_end); 139 160 140 161 /* 141 - * CR0::TS save/restore functions: 142 - */ 143 - int irq_ts_save(void) 144 - { 145 - /* 146 - * If in process context and not atomic, we can take a spurious DNA fault. 147 - * Otherwise, doing clts() in process context requires disabling preemption 148 - * or some heavy lifting like kernel_fpu_begin() 149 - */ 150 - if (!in_atomic()) 151 - return 0; 152 - 153 - if (read_cr0() & X86_CR0_TS) { 154 - clts(); 155 - return 1; 156 - } 157 - 158 - return 0; 159 - } 160 - EXPORT_SYMBOL_GPL(irq_ts_save); 161 - 162 - void irq_ts_restore(int TS_state) 163 - { 164 - if (TS_state) 165 - stts(); 166 - } 167 - EXPORT_SYMBOL_GPL(irq_ts_restore); 168 - 169 - /* 170 162 * Save the FPU state (mark it for reload if necessary): 171 163 * 172 164 * This only ever gets called for the current task. ··· 150 200 trace_x86_fpu_before_save(fpu); 151 201 if (fpu->fpregs_active) { 152 202 if (!copy_fpregs_to_fpstate(fpu)) { 153 - if (use_eager_fpu()) 154 - copy_kernel_to_fpregs(&fpu->state); 155 - else 156 - fpregs_deactivate(fpu); 203 + copy_kernel_to_fpregs(&fpu->state); 157 204 } 158 205 } 159 206 trace_x86_fpu_after_save(fpu); ··· 194 247 195 248 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 196 249 { 197 - dst_fpu->counter = 0; 198 250 dst_fpu->fpregs_active = 0; 199 251 dst_fpu->last_cpu = -1; 200 252 ··· 206 260 * Don't let 'init optimized' areas of the XSAVE area 207 261 * leak into the child task: 208 262 */ 209 - if (use_eager_fpu()) 210 - memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); 263 + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); 211 264 212 265 /* 213 266 * Save current FPU registers directly into the child ··· 228 283 memcpy(&src_fpu->state, &dst_fpu->state, 229 284 fpu_kernel_xstate_size); 230 285 231 - if (use_eager_fpu()) 232 - copy_kernel_to_fpregs(&src_fpu->state); 233 - else 234 - fpregs_deactivate(src_fpu); 286 + copy_kernel_to_fpregs(&src_fpu->state); 235 287 } 236 288 preempt_enable(); 237 289 ··· 308 366 309 367 if (fpu->fpstate_active) { 310 368 /* Invalidate any lazy state: */ 311 - fpu->last_cpu = -1; 369 + __fpu_invalidate_fpregs_state(fpu); 312 370 } else { 313 371 fpstate_init(&fpu->state); 314 372 trace_x86_fpu_init_state(fpu); ··· 351 409 * ensures we will not be lazy and skip a XRSTOR in the 352 410 * future. 353 411 */ 354 - fpu->last_cpu = -1; 412 + __fpu_invalidate_fpregs_state(fpu); 355 413 } 356 414 357 415 /* ··· 401 459 trace_x86_fpu_before_restore(fpu); 402 460 fpregs_activate(fpu); 403 461 copy_kernel_to_fpregs(&fpu->state); 404 - fpu->counter++; 405 462 trace_x86_fpu_after_restore(fpu); 406 463 kernel_fpu_enable(); 407 464 } ··· 418 477 void fpu__drop(struct fpu *fpu) 419 478 { 420 479 preempt_disable(); 421 - fpu->counter = 0; 422 480 423 481 if (fpu->fpregs_active) { 424 482 /* Ignore delayed exceptions from user space */
+2 -105
arch/x86/kernel/fpu/init.c
··· 10 10 #include <linux/init.h> 11 11 12 12 /* 13 - * Initialize the TS bit in CR0 according to the style of context-switches 14 - * we are using: 15 - */ 16 - static void fpu__init_cpu_ctx_switch(void) 17 - { 18 - if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) 19 - stts(); 20 - else 21 - clts(); 22 - } 23 - 24 - /* 25 13 * Initialize the registers found in all CPUs, CR0 and CR4: 26 14 */ 27 15 static void fpu__init_cpu_generic(void) ··· 46 58 { 47 59 fpu__init_cpu_generic(); 48 60 fpu__init_cpu_xstate(); 49 - fpu__init_cpu_ctx_switch(); 50 61 } 51 62 52 63 /* ··· 220 233 } 221 234 222 235 /* 223 - * FPU context switching strategies: 224 - * 225 - * Against popular belief, we don't do lazy FPU saves, due to the 226 - * task migration complications it brings on SMP - we only do 227 - * lazy FPU restores. 228 - * 229 - * 'lazy' is the traditional strategy, which is based on setting 230 - * CR0::TS to 1 during context-switch (instead of doing a full 231 - * restore of the FPU state), which causes the first FPU instruction 232 - * after the context switch (whenever it is executed) to fault - at 233 - * which point we lazily restore the FPU state into FPU registers. 234 - * 235 - * Tasks are of course under no obligation to execute FPU instructions, 236 - * so it can easily happen that another context-switch occurs without 237 - * a single FPU instruction being executed. If we eventually switch 238 - * back to the original task (that still owns the FPU) then we have 239 - * not only saved the restores along the way, but we also have the 240 - * FPU ready to be used for the original task. 241 - * 242 - * 'lazy' is deprecated because it's almost never a performance win 243 - * and it's much more complicated than 'eager'. 244 - * 245 - * 'eager' switching is by default on all CPUs, there we switch the FPU 246 - * state during every context switch, regardless of whether the task 247 - * has used FPU instructions in that time slice or not. This is done 248 - * because modern FPU context saving instructions are able to optimize 249 - * state saving and restoration in hardware: they can detect both 250 - * unused and untouched FPU state and optimize accordingly. 251 - * 252 - * [ Note that even in 'lazy' mode we might optimize context switches 253 - * to use 'eager' restores, if we detect that a task is using the FPU 254 - * frequently. See the fpu->counter logic in fpu/internal.h for that. ] 255 - */ 256 - static enum { ENABLE, DISABLE } eagerfpu = ENABLE; 257 - 258 - /* 259 236 * Find supported xfeatures based on cpu features and command-line input. 260 237 * This must be called after fpu__init_parse_early_param() is called and 261 238 * xfeatures_mask is enumerated. 262 239 */ 263 240 u64 __init fpu__get_supported_xfeatures_mask(void) 264 241 { 265 - /* Support all xfeatures known to us */ 266 - if (eagerfpu != DISABLE) 267 - return XCNTXT_MASK; 268 - 269 - /* Warning of xfeatures being disabled for no eagerfpu mode */ 270 - if (xfeatures_mask & XFEATURE_MASK_EAGER) { 271 - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", 272 - xfeatures_mask & XFEATURE_MASK_EAGER); 273 - } 274 - 275 - /* Return a mask that masks out all features requiring eagerfpu mode */ 276 - return ~XFEATURE_MASK_EAGER; 242 + return XCNTXT_MASK; 277 243 } 278 244 279 - /* 280 - * Disable features dependent on eagerfpu. 281 - */ 282 - static void __init fpu__clear_eager_fpu_features(void) 283 - { 284 - setup_clear_cpu_cap(X86_FEATURE_MPX); 285 - } 286 - 287 - /* 288 - * Pick the FPU context switching strategy: 289 - * 290 - * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of 291 - * the following is true: 292 - * 293 - * (1) the cpu has xsaveopt, as it has the optimization and doing eager 294 - * FPU switching has a relatively low cost compared to a plain xsave; 295 - * (2) the cpu has xsave features (e.g. MPX) that depend on eager FPU 296 - * switching. Should the kernel boot with noxsaveopt, we support MPX 297 - * with eager FPU switching at a higher cost. 298 - */ 245 + /* Legacy code to initialize eager fpu mode. */ 299 246 static void __init fpu__init_system_ctx_switch(void) 300 247 { 301 248 static bool on_boot_cpu __initdata = 1; ··· 238 317 on_boot_cpu = 0; 239 318 240 319 WARN_ON_FPU(current->thread.fpu.fpstate_active); 241 - 242 - if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE) 243 - eagerfpu = ENABLE; 244 - 245 - if (xfeatures_mask & XFEATURE_MASK_EAGER) 246 - eagerfpu = ENABLE; 247 - 248 - if (eagerfpu == ENABLE) 249 - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); 250 - 251 - printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); 252 320 } 253 321 254 322 /* ··· 246 336 */ 247 337 static void __init fpu__init_parse_early_param(void) 248 338 { 249 - if (cmdline_find_option_bool(boot_command_line, "eagerfpu=off")) { 250 - eagerfpu = DISABLE; 251 - fpu__clear_eager_fpu_features(); 252 - } 253 - 254 339 if (cmdline_find_option_bool(boot_command_line, "no387")) 255 340 setup_clear_cpu_cap(X86_FEATURE_FPU); 256 341 ··· 279 374 * later FPU init activities: 280 375 */ 281 376 fpu__init_cpu(); 282 - 283 - /* 284 - * But don't leave CR0::TS set yet, as some of the FPU setup 285 - * methods depend on being able to execute FPU instructions 286 - * that will fault on a set TS, such as the FXSAVE in 287 - * fpu__init_system_mxcsr(). 288 - */ 289 - clts(); 290 377 291 378 fpu__init_system_generic(); 292 379 fpu__init_system_xstate_size_legacy();
+3 -5
arch/x86/kernel/fpu/signal.c
··· 340 340 } 341 341 342 342 fpu->fpstate_active = 1; 343 - if (use_eager_fpu()) { 344 - preempt_disable(); 345 - fpu__restore(fpu); 346 - preempt_enable(); 347 - } 343 + preempt_disable(); 344 + fpu__restore(fpu); 345 + preempt_enable(); 348 346 349 347 return err; 350 348 } else {
-9
arch/x86/kernel/fpu/xstate.c
··· 892 892 */ 893 893 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 894 894 return -EINVAL; 895 - /* 896 - * For most XSAVE components, this would be an arduous task: 897 - * brining fpstate up to date with fpregs, updating fpstate, 898 - * then re-populating fpregs. But, for components that are 899 - * never lazily managed, we can just access the fpregs 900 - * directly. PKRU is never managed lazily, so we can just 901 - * manipulate it directly. Make sure it stays that way. 902 - */ 903 - WARN_ON_ONCE(!use_eager_fpu()); 904 895 905 896 /* Set the bits we need in PKRU: */ 906 897 if (init_val & PKEY_DISABLE_ACCESS)
-1
arch/x86/kernel/paravirt.c
··· 328 328 .cpuid = native_cpuid, 329 329 .get_debugreg = native_get_debugreg, 330 330 .set_debugreg = native_set_debugreg, 331 - .clts = native_clts, 332 331 .read_cr0 = native_read_cr0, 333 332 .write_cr0 = native_write_cr0, 334 333 .read_cr4 = native_read_cr4,
-2
arch/x86/kernel/paravirt_patch_32.c
··· 8 8 DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); 9 9 DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); 10 10 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); 11 - DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12 11 13 12 #if defined(CONFIG_PARAVIRT_SPINLOCKS) 14 13 DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); ··· 49 50 PATCH_SITE(pv_mmu_ops, read_cr2); 50 51 PATCH_SITE(pv_mmu_ops, read_cr3); 51 52 PATCH_SITE(pv_mmu_ops, write_cr3); 52 - PATCH_SITE(pv_cpu_ops, clts); 53 53 #if defined(CONFIG_PARAVIRT_SPINLOCKS) 54 54 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): 55 55 if (pv_is_native_spin_unlock()) {
-2
arch/x86/kernel/paravirt_patch_64.c
··· 10 10 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); 11 11 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); 12 12 DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); 13 - DEF_NATIVE(pv_cpu_ops, clts, "clts"); 14 13 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 15 14 16 15 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); ··· 59 60 PATCH_SITE(pv_mmu_ops, read_cr2); 60 61 PATCH_SITE(pv_mmu_ops, read_cr3); 61 62 PATCH_SITE(pv_mmu_ops, write_cr3); 62 - PATCH_SITE(pv_cpu_ops, clts); 63 63 PATCH_SITE(pv_mmu_ops, flush_tlb_single); 64 64 PATCH_SITE(pv_cpu_ops, wbinvd); 65 65 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
+2 -3
arch/x86/kernel/process_32.c
··· 231 231 struct fpu *next_fpu = &next->fpu; 232 232 int cpu = smp_processor_id(); 233 233 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 234 - fpu_switch_t fpu_switch; 235 234 236 235 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 237 236 238 - fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); 237 + switch_fpu_prepare(prev_fpu, cpu); 239 238 240 239 /* 241 240 * Save away %gs. No need to save %fs, as it was saved on the ··· 293 294 if (prev->gs | next->gs) 294 295 lazy_load_gs(next->gs); 295 296 296 - switch_fpu_finish(next_fpu, fpu_switch); 297 + switch_fpu_finish(next_fpu, cpu); 297 298 298 299 this_cpu_write(current_task, next_p); 299 300
+2 -3
arch/x86/kernel/process_64.c
··· 270 270 int cpu = smp_processor_id(); 271 271 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 272 272 unsigned prev_fsindex, prev_gsindex; 273 - fpu_switch_t fpu_switch; 274 273 275 - fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); 274 + switch_fpu_prepare(prev_fpu, cpu); 276 275 277 276 /* We must save %fs and %gs before load_TLS() because 278 277 * %fs and %gs may be cleared by load_TLS(). ··· 421 422 prev->gsbase = 0; 422 423 prev->gsindex = prev_gsindex; 423 424 424 - switch_fpu_finish(next_fpu, fpu_switch); 425 + switch_fpu_finish(next_fpu, cpu); 425 426 426 427 /* 427 428 * Switch the PDA and FPU contexts.
+1 -1
arch/x86/kernel/smpboot.c
··· 1132 1132 return err; 1133 1133 1134 1134 /* the FPU context is blank, nobody can own it */ 1135 - __cpu_disable_lazy_restore(cpu); 1135 + per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; 1136 1136 1137 1137 common_cpu_up(cpu, tidle); 1138 1138
+16 -4
arch/x86/kernel/traps.c
··· 853 853 dotraplinkage void 854 854 do_device_not_available(struct pt_regs *regs, long error_code) 855 855 { 856 + unsigned long cr0; 857 + 856 858 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 857 859 858 860 #ifdef CONFIG_MATH_EMULATION ··· 868 866 return; 869 867 } 870 868 #endif 871 - fpu__restore(&current->thread.fpu); /* interrupts still off */ 872 - #ifdef CONFIG_X86_32 873 - cond_local_irq_enable(regs); 874 - #endif 869 + 870 + /* This should not happen. */ 871 + cr0 = read_cr0(); 872 + if (WARN(cr0 & X86_CR0_TS, "CR0.TS was set")) { 873 + /* Try to fix it up and carry on. */ 874 + write_cr0(cr0 & ~X86_CR0_TS); 875 + } else { 876 + /* 877 + * Something terrible happened, and we're better off trying 878 + * to kill the task than getting stuck in a never-ending 879 + * loop of #NM faults. 880 + */ 881 + die("unexpected #NM exception", regs, error_code); 882 + } 875 883 } 876 884 NOKPROBE_SYMBOL(do_device_not_available); 877 885
+1 -3
arch/x86/kvm/cpuid.c
··· 16 16 #include <linux/export.h> 17 17 #include <linux/vmalloc.h> 18 18 #include <linux/uaccess.h> 19 - #include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */ 20 19 #include <asm/user.h> 21 20 #include <asm/fpu/xstate.h> 22 21 #include "cpuid.h" ··· 113 114 if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) 114 115 best->ebx = xstate_required_size(vcpu->arch.xcr0, true); 115 116 116 - if (use_eager_fpu()) 117 - kvm_x86_ops->fpu_activate(vcpu); 117 + kvm_x86_ops->fpu_activate(vcpu); 118 118 119 119 /* 120 120 * The existing code assumes virtual address is 48-bit in the canonical
+4 -8
arch/x86/kvm/vmx.c
··· 2145 2145 #endif 2146 2146 if (vmx->host_state.msr_host_bndcfgs) 2147 2147 wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); 2148 - /* 2149 - * If the FPU is not active (through the host task or 2150 - * the guest vcpu), then restore the cr0.TS bit. 2151 - */ 2152 - if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) 2153 - stts(); 2154 2148 load_gdt(this_cpu_ptr(&host_gdt)); 2155 2149 } 2156 2150 ··· 4839 4845 u32 low32, high32; 4840 4846 unsigned long tmpl; 4841 4847 struct desc_ptr dt; 4842 - unsigned long cr4; 4848 + unsigned long cr0, cr4; 4843 4849 4844 - vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ 4850 + cr0 = read_cr0(); 4851 + WARN_ON(cr0 & X86_CR0_TS); 4852 + vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ 4845 4853 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 4846 4854 4847 4855 /* Save the most likely value for this task's CR4 in the VMCS. */
+1 -18
arch/x86/kvm/x86.c
··· 5097 5097 { 5098 5098 preempt_disable(); 5099 5099 kvm_load_guest_fpu(emul_to_vcpu(ctxt)); 5100 - /* 5101 - * CR0.TS may reference the host fpu state, not the guest fpu state, 5102 - * so it may be clear at this point. 5103 - */ 5104 - clts(); 5105 5100 } 5106 5101 5107 5102 static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) ··· 7418 7423 7419 7424 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 7420 7425 { 7421 - if (!vcpu->guest_fpu_loaded) { 7422 - vcpu->fpu_counter = 0; 7426 + if (!vcpu->guest_fpu_loaded) 7423 7427 return; 7424 - } 7425 7428 7426 7429 vcpu->guest_fpu_loaded = 0; 7427 7430 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); 7428 7431 __kernel_fpu_end(); 7429 7432 ++vcpu->stat.fpu_reload; 7430 - /* 7431 - * If using eager FPU mode, or if the guest is a frequent user 7432 - * of the FPU, just leave the FPU active for next time. 7433 - * Every 255 times fpu_counter rolls over to 0; a guest that uses 7434 - * the FPU in bursts will revert to loading it on demand. 7435 - */ 7436 - if (!use_eager_fpu()) { 7437 - if (++vcpu->fpu_counter < 5) 7438 - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); 7439 - } 7440 7433 trace_kvm_fpu(0); 7441 7434 } 7442 7435
+7 -22
arch/x86/lguest/boot.c
··· 497 497 * a whole series of functions like read_cr0() and write_cr0(). 498 498 * 499 499 * We start with cr0. cr0 allows you to turn on and off all kinds of basic 500 - * features, but Linux only really cares about one: the horrifically-named Task 501 - * Switched (TS) bit at bit 3 (ie. 8) 500 + * features, but the only cr0 bit that Linux ever used at runtime was the 501 + * horrifically-named Task Switched (TS) bit at bit 3 (ie. 8) 502 502 * 503 503 * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if 504 504 * the floating point unit is used. Which allows us to restore FPU state 505 - * lazily after a task switch, and Linux uses that gratefully, but wouldn't a 506 - * name like "FPUTRAP bit" be a little less cryptic? 505 + * lazily after a task switch if we wanted to, but wouldn't a name like 506 + * "FPUTRAP bit" be a little less cryptic? 507 507 * 508 - * We store cr0 locally because the Host never changes it. The Guest sometimes 509 - * wants to read it and we'd prefer not to bother the Host unnecessarily. 508 + * Fortunately, Linux keeps it simple and doesn't use TS, so we can ignore 509 + * cr0. 510 510 */ 511 - static unsigned long current_cr0; 512 511 static void lguest_write_cr0(unsigned long val) 513 512 { 514 - lazy_hcall1(LHCALL_TS, val & X86_CR0_TS); 515 - current_cr0 = val; 516 513 } 517 514 518 515 static unsigned long lguest_read_cr0(void) 519 516 { 520 - return current_cr0; 521 - } 522 - 523 - /* 524 - * Intel provided a special instruction to clear the TS bit for people too cool 525 - * to use write_cr0() to do it. This "clts" instruction is faster, because all 526 - * the vowels have been optimized out. 527 - */ 528 - static void lguest_clts(void) 529 - { 530 - lazy_hcall1(LHCALL_TS, 0); 531 - current_cr0 &= ~X86_CR0_TS; 517 + return 0; 532 518 } 533 519 534 520 /* ··· 1418 1432 pv_cpu_ops.load_tls = lguest_load_tls; 1419 1433 pv_cpu_ops.get_debugreg = lguest_get_debugreg; 1420 1434 pv_cpu_ops.set_debugreg = lguest_set_debugreg; 1421 - pv_cpu_ops.clts = lguest_clts; 1422 1435 pv_cpu_ops.read_cr0 = lguest_read_cr0; 1423 1436 pv_cpu_ops.write_cr0 = lguest_write_cr0; 1424 1437 pv_cpu_ops.read_cr4 = lguest_read_cr4;
+1 -2
arch/x86/mm/pkeys.c
··· 141 141 * Called from the FPU code when creating a fresh set of FPU 142 142 * registers. This is called from a very specific context where 143 143 * we know the FPU regstiers are safe for use and we can use PKRU 144 - * directly. The fact that PKRU is only available when we are 145 - * using eagerfpu mode makes this possible. 144 + * directly. 146 145 */ 147 146 void copy_init_pkru_to_fpregs(void) 148 147 {
-13
arch/x86/xen/enlighten.c
··· 980 980 { 981 981 } 982 982 983 - static void xen_clts(void) 984 - { 985 - struct multicall_space mcs; 986 - 987 - mcs = xen_mc_entry(0); 988 - 989 - MULTI_fpu_taskswitch(mcs.mc, 0); 990 - 991 - xen_mc_issue(PARAVIRT_LAZY_CPU); 992 - } 993 - 994 983 static DEFINE_PER_CPU(unsigned long, xen_cr0_value); 995 984 996 985 static unsigned long xen_read_cr0(void) ··· 1221 1232 1222 1233 .set_debugreg = xen_set_debugreg, 1223 1234 .get_debugreg = xen_get_debugreg, 1224 - 1225 - .clts = xen_clts, 1226 1235 1227 1236 .read_cr0 = xen_read_cr0, 1228 1237 .write_cr0 = xen_write_cr0,
+2 -6
drivers/char/hw_random/via-rng.c
··· 70 70 * until we have 4 bytes, thus returning a u32 at a time, 71 71 * instead of the current u8-at-a-time. 72 72 * 73 - * Padlock instructions can generate a spurious DNA fault, so 74 - * we have to call them in the context of irq_ts_save/restore() 73 + * Padlock instructions can generate a spurious DNA fault, but the 74 + * kernel doesn't use CR0.TS, so this doesn't matter. 75 75 */ 76 76 77 77 static inline u32 xstore(u32 *addr, u32 edx_in) 78 78 { 79 79 u32 eax_out; 80 - int ts_state; 81 - 82 - ts_state = irq_ts_save(); 83 80 84 81 asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" 85 82 : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); 86 83 87 - irq_ts_restore(ts_state); 88 84 return eax_out; 89 85 } 90 86
+2 -21
drivers/crypto/padlock-aes.c
··· 183 183 184 184 /* 185 185 * While the padlock instructions don't use FP/SSE registers, they 186 - * generate a spurious DNA fault when cr0.ts is '1'. These instructions 187 - * should be used only inside the irq_ts_save/restore() context 186 + * generate a spurious DNA fault when CR0.TS is '1'. Fortunately, 187 + * the kernel doesn't use CR0.TS. 188 188 */ 189 189 190 190 static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, ··· 298 298 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 299 299 { 300 300 struct aes_ctx *ctx = aes_ctx(tfm); 301 - int ts_state; 302 301 303 302 padlock_reset_key(&ctx->cword.encrypt); 304 - ts_state = irq_ts_save(); 305 303 ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 306 - irq_ts_restore(ts_state); 307 304 padlock_store_cword(&ctx->cword.encrypt); 308 305 } 309 306 310 307 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) 311 308 { 312 309 struct aes_ctx *ctx = aes_ctx(tfm); 313 - int ts_state; 314 310 315 311 padlock_reset_key(&ctx->cword.encrypt); 316 - ts_state = irq_ts_save(); 317 312 ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 318 - irq_ts_restore(ts_state); 319 313 padlock_store_cword(&ctx->cword.encrypt); 320 314 } 321 315 ··· 340 346 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 341 347 struct blkcipher_walk walk; 342 348 int err; 343 - int ts_state; 344 349 345 350 padlock_reset_key(&ctx->cword.encrypt); 346 351 347 352 blkcipher_walk_init(&walk, dst, src, nbytes); 348 353 err = blkcipher_walk_virt(desc, &walk); 349 354 350 - ts_state = irq_ts_save(); 351 355 while ((nbytes = walk.nbytes)) { 352 356 padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, 353 357 ctx->E, &ctx->cword.encrypt, ··· 353 361 nbytes &= AES_BLOCK_SIZE - 1; 354 362 err = blkcipher_walk_done(desc, &walk, nbytes); 355 363 } 356 - irq_ts_restore(ts_state); 357 364 358 365 padlock_store_cword(&ctx->cword.encrypt); 359 366 ··· 366 375 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 367 376 struct blkcipher_walk walk; 368 377 int err; 369 - int ts_state; 370 378 371 379 padlock_reset_key(&ctx->cword.decrypt); 372 380 373 381 blkcipher_walk_init(&walk, dst, src, nbytes); 374 382 err = blkcipher_walk_virt(desc, &walk); 375 383 376 - ts_state = irq_ts_save(); 377 384 while ((nbytes = walk.nbytes)) { 378 385 padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, 379 386 ctx->D, &ctx->cword.decrypt, ··· 379 390 nbytes &= AES_BLOCK_SIZE - 1; 380 391 err = blkcipher_walk_done(desc, &walk, nbytes); 381 392 } 382 - irq_ts_restore(ts_state); 383 393 384 394 padlock_store_cword(&ctx->cword.encrypt); 385 395 ··· 413 425 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 414 426 struct blkcipher_walk walk; 415 427 int err; 416 - int ts_state; 417 428 418 429 padlock_reset_key(&ctx->cword.encrypt); 419 430 420 431 blkcipher_walk_init(&walk, dst, src, nbytes); 421 432 err = blkcipher_walk_virt(desc, &walk); 422 433 423 - ts_state = irq_ts_save(); 424 434 while ((nbytes = walk.nbytes)) { 425 435 u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, 426 436 walk.dst.virt.addr, ctx->E, ··· 428 442 nbytes &= AES_BLOCK_SIZE - 1; 429 443 err = blkcipher_walk_done(desc, &walk, nbytes); 430 444 } 431 - irq_ts_restore(ts_state); 432 445 433 446 padlock_store_cword(&ctx->cword.decrypt); 434 447 ··· 441 456 struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); 442 457 struct blkcipher_walk walk; 443 458 int err; 444 - int ts_state; 445 459 446 460 padlock_reset_key(&ctx->cword.encrypt); 447 461 448 462 blkcipher_walk_init(&walk, dst, src, nbytes); 449 463 err = blkcipher_walk_virt(desc, &walk); 450 464 451 - ts_state = irq_ts_save(); 452 465 while ((nbytes = walk.nbytes)) { 453 466 padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, 454 467 ctx->D, walk.iv, &ctx->cword.decrypt, ··· 454 471 nbytes &= AES_BLOCK_SIZE - 1; 455 472 err = blkcipher_walk_done(desc, &walk, nbytes); 456 473 } 457 - 458 - irq_ts_restore(ts_state); 459 474 460 475 padlock_store_cword(&ctx->cword.encrypt); 461 476
-18
drivers/crypto/padlock-sha.c
··· 89 89 struct sha1_state state; 90 90 unsigned int space; 91 91 unsigned int leftover; 92 - int ts_state; 93 92 int err; 94 93 95 94 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; ··· 119 120 120 121 memcpy(result, &state.state, SHA1_DIGEST_SIZE); 121 122 122 - /* prevent taking the spurious DNA fault with padlock. */ 123 - ts_state = irq_ts_save(); 124 123 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ 125 124 : \ 126 125 : "c"((unsigned long)state.count + count), \ 127 126 "a"((unsigned long)state.count), \ 128 127 "S"(in), "D"(result)); 129 - irq_ts_restore(ts_state); 130 128 131 129 padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); 132 130 ··· 151 155 struct sha256_state state; 152 156 unsigned int space; 153 157 unsigned int leftover; 154 - int ts_state; 155 158 int err; 156 159 157 160 dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; ··· 181 186 182 187 memcpy(result, &state.state, SHA256_DIGEST_SIZE); 183 188 184 - /* prevent taking the spurious DNA fault with padlock. */ 185 - ts_state = irq_ts_save(); 186 189 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ 187 190 : \ 188 191 : "c"((unsigned long)state.count + count), \ 189 192 "a"((unsigned long)state.count), \ 190 193 "S"(in), "D"(result)); 191 - irq_ts_restore(ts_state); 192 194 193 195 padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); 194 196 ··· 304 312 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 305 313 ((aligned(STACK_ALIGN))); 306 314 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 307 - int ts_state; 308 315 309 316 partial = sctx->count & 0x3f; 310 317 sctx->count += len; ··· 319 328 memcpy(sctx->buffer + partial, data, 320 329 done + SHA1_BLOCK_SIZE); 321 330 src = sctx->buffer; 322 - ts_state = irq_ts_save(); 323 331 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 324 332 : "+S"(src), "+D"(dst) \ 325 333 : "a"((long)-1), "c"((unsigned long)1)); 326 - irq_ts_restore(ts_state); 327 334 done += SHA1_BLOCK_SIZE; 328 335 src = data + done; 329 336 } 330 337 331 338 /* Process the left bytes from the input data */ 332 339 if (len - done >= SHA1_BLOCK_SIZE) { 333 - ts_state = irq_ts_save(); 334 340 asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" 335 341 : "+S"(src), "+D"(dst) 336 342 : "a"((long)-1), 337 343 "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); 338 - irq_ts_restore(ts_state); 339 344 done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); 340 345 src = data + done; 341 346 } ··· 388 401 u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ 389 402 ((aligned(STACK_ALIGN))); 390 403 u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 391 - int ts_state; 392 404 393 405 partial = sctx->count & 0x3f; 394 406 sctx->count += len; ··· 403 417 memcpy(sctx->buf + partial, data, 404 418 done + SHA256_BLOCK_SIZE); 405 419 src = sctx->buf; 406 - ts_state = irq_ts_save(); 407 420 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 408 421 : "+S"(src), "+D"(dst) 409 422 : "a"((long)-1), "c"((unsigned long)1)); 410 - irq_ts_restore(ts_state); 411 423 done += SHA256_BLOCK_SIZE; 412 424 src = data + done; 413 425 } 414 426 415 427 /* Process the left bytes from input data*/ 416 428 if (len - done >= SHA256_BLOCK_SIZE) { 417 - ts_state = irq_ts_save(); 418 429 asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" 419 430 : "+S"(src), "+D"(dst) 420 431 : "a"((long)-1), 421 432 "c"((unsigned long)((len - done) / 64))); 422 - irq_ts_restore(ts_state); 423 433 done += ((len - done) - (len - done) % 64); 424 434 src = data + done; 425 435 }
-4
drivers/lguest/hypercalls.c
··· 109 109 case LHCALL_SET_CLOCKEVENT: 110 110 guest_set_clockevent(cpu, args->arg1); 111 111 break; 112 - case LHCALL_TS: 113 - /* This sets the TS flag, as we saw used in run_guest(). */ 114 - cpu->ts = args->arg1; 115 - break; 116 112 case LHCALL_HALT: 117 113 /* Similarly, this sets the halted flag for run_guest(). */ 118 114 cpu->halted = 1;
-1
drivers/lguest/lg.h
··· 43 43 struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ 44 44 45 45 u32 cr2; 46 - int ts; 47 46 u32 esp1; 48 47 u16 ss1; 49 48
+1 -18
drivers/lguest/x86/core.c
··· 247 247 void lguest_arch_run_guest(struct lg_cpu *cpu) 248 248 { 249 249 /* 250 - * Remember the awfully-named TS bit? If the Guest has asked to set it 251 - * we set it now, so we can trap and pass that trap to the Guest if it 252 - * uses the FPU. 253 - */ 254 - if (cpu->ts && fpregs_active()) 255 - stts(); 256 - 257 - /* 258 250 * SYSENTER is an optimized way of doing system calls. We can't allow 259 251 * it because it always jumps to privilege level 0. A normal Guest 260 252 * won't try it because we don't advertise it in CPUID, but a malicious ··· 273 281 /* Restore SYSENTER if it's supposed to be on. */ 274 282 if (boot_cpu_has(X86_FEATURE_SEP)) 275 283 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 276 - 277 - /* Clear the host TS bit if it was set above. */ 278 - if (cpu->ts && fpregs_active()) 279 - clts(); 280 284 281 285 /* 282 286 * If the Guest page faulted, then the cr2 register will tell us the ··· 409 421 kill_guest(cpu, "Writing cr2"); 410 422 break; 411 423 case 7: /* We've intercepted a Device Not Available fault. */ 412 - /* 413 - * If the Guest doesn't want to know, we already restored the 414 - * Floating Point Unit, so we just continue without telling it. 415 - */ 416 - if (!cpu->ts) 417 - return; 424 + /* No special handling is needed here. */ 418 425 break; 419 426 case 32 ... 255: 420 427 /* This might be a syscall. */
-1
include/linux/kvm_host.h
··· 224 224 225 225 int fpu_active; 226 226 int guest_fpu_loaded, guest_xcr0_loaded; 227 - unsigned char fpu_counter; 228 227 struct swait_queue_head wq; 229 228 struct pid *pid; 230 229 int sigset_active;
-1
tools/arch/x86/include/asm/cpufeatures.h
··· 104 104 #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 105 105 #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 106 106 #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 107 - #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ 108 107 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 109 108 110 109 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */