Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/fpu: Allow setting of XSAVE state

We want to modify the Protection Key rights inside the kernel, so
we need to change PKRU's contents. But, if we do a plain
'wrpkru', when we return to userspace we might do an XRSTOR and
wipe out the kernel's 'wrpkru'. So, we need to go after PKRU in
the xsave buffer.

We do this by:

1. Ensuring that we have the XSAVE registers (fpregs) in the
kernel FPU buffer (fpstate)
2. Looking up the location of a given state in the buffer
3. Filling in the stat
4. Ensuring that the hardware knows that state is present there
(basically that the 'init optimization' is not in place).
5. Copying the newly-modified state back to the registers if
necessary.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210235.5A3139BF@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Dave Hansen and committed by
Ingo Molnar
b8b9b6ba 39a0526f

+161 -2
+2
arch/x86/include/asm/fpu/internal.h
··· 25 25 extern void fpu__activate_curr(struct fpu *fpu); 26 26 extern void fpu__activate_fpstate_read(struct fpu *fpu); 27 27 extern void fpu__activate_fpstate_write(struct fpu *fpu); 28 + extern void fpu__current_fpstate_write_begin(void); 29 + extern void fpu__current_fpstate_write_end(void); 28 30 extern void fpu__save(struct fpu *fpu); 29 31 extern void fpu__restore(struct fpu *fpu); 30 32 extern int fpu__restore_sig(void __user *buf, int ia32_frame);
+63
arch/x86/kernel/fpu/core.c
··· 354 354 } 355 355 356 356 /* 357 + * This function must be called before we write the current 358 + * task's fpstate. 359 + * 360 + * This call gets the current FPU register state and moves 361 + * it in to the 'fpstate'. Preemption is disabled so that 362 + * no writes to the 'fpstate' can occur from context 363 + * swiches. 364 + * 365 + * Must be followed by a fpu__current_fpstate_write_end(). 366 + */ 367 + void fpu__current_fpstate_write_begin(void) 368 + { 369 + struct fpu *fpu = &current->thread.fpu; 370 + 371 + /* 372 + * Ensure that the context-switching code does not write 373 + * over the fpstate while we are doing our update. 374 + */ 375 + preempt_disable(); 376 + 377 + /* 378 + * Move the fpregs in to the fpu's 'fpstate'. 379 + */ 380 + fpu__activate_fpstate_read(fpu); 381 + 382 + /* 383 + * The caller is about to write to 'fpu'. Ensure that no 384 + * CPU thinks that its fpregs match the fpstate. This 385 + * ensures we will not be lazy and skip a XRSTOR in the 386 + * future. 387 + */ 388 + fpu->last_cpu = -1; 389 + } 390 + 391 + /* 392 + * This function must be paired with fpu__current_fpstate_write_begin() 393 + * 394 + * This will ensure that the modified fpstate gets placed back in 395 + * the fpregs if necessary. 396 + * 397 + * Note: This function may be called whether or not an _actual_ 398 + * write to the fpstate occurred. 399 + */ 400 + void fpu__current_fpstate_write_end(void) 401 + { 402 + struct fpu *fpu = &current->thread.fpu; 403 + 404 + /* 405 + * 'fpu' now has an updated copy of the state, but the 406 + * registers may still be out of date. Update them with 407 + * an XRSTOR if they are active. 408 + */ 409 + if (fpregs_active()) 410 + copy_kernel_to_fpregs(&fpu->state); 411 + 412 + /* 413 + * Our update is done and the fpregs/fpstate are in sync 414 + * if necessary. Context switches can happen again. 415 + */ 416 + preempt_enable(); 417 + } 418 + 419 + /* 357 420 * 'fpu__restore()' is called to copy FPU registers from 358 421 * the FPU fpstate to the live hw registers and to activate 359 422 * access to the hardware registers, so that FPU instructions
+96 -2
arch/x86/kernel/fpu/xstate.c
··· 679 679 } 680 680 681 681 /* 682 + * Given an xstate feature mask, calculate where in the xsave 683 + * buffer the state is. Callers should ensure that the buffer 684 + * is valid. 685 + * 686 + * Note: does not work for compacted buffers. 687 + */ 688 + void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) 689 + { 690 + int feature_nr = fls64(xstate_feature_mask) - 1; 691 + 692 + return (void *)xsave + xstate_comp_offsets[feature_nr]; 693 + } 694 + /* 682 695 * Given the xsave area and a state inside, this function returns the 683 696 * address of the state. 684 697 * ··· 711 698 */ 712 699 void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) 713 700 { 714 - int feature_nr = fls64(xstate_feature) - 1; 715 701 /* 716 702 * Do we even *have* xsave state? 717 703 */ ··· 738 726 if (!(xsave->header.xfeatures & xstate_feature)) 739 727 return NULL; 740 728 741 - return (void *)xsave + xstate_comp_offsets[feature_nr]; 729 + return __raw_xsave_addr(xsave, xstate_feature); 742 730 } 743 731 EXPORT_SYMBOL_GPL(get_xsave_addr); 744 732 ··· 772 760 fpu__save(fpu); 773 761 774 762 return get_xsave_addr(&fpu->state.xsave, xsave_state); 763 + } 764 + 765 + 766 + /* 767 + * Set xfeatures (aka XSTATE_BV) bit for a feature that we want 768 + * to take out of its "init state". This will ensure that an 769 + * XRSTOR actually restores the state. 770 + */ 771 + static void fpu__xfeature_set_non_init(struct xregs_state *xsave, 772 + int xstate_feature_mask) 773 + { 774 + xsave->header.xfeatures |= xstate_feature_mask; 775 + } 776 + 777 + /* 778 + * This function is safe to call whether the FPU is in use or not. 779 + * 780 + * Note that this only works on the current task. 781 + * 782 + * Inputs: 783 + * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, 784 + * XFEATURE_MASK_SSE, etc...) 785 + * @xsave_state_ptr: a pointer to a copy of the state that you would 786 + * like written in to the current task's FPU xsave state. This pointer 787 + * must not be located in the current tasks's xsave area. 788 + * Output: 789 + * address of the state in the xsave area or NULL if the state 790 + * is not present or is in its 'init state'. 791 + */ 792 + static void fpu__xfeature_set_state(int xstate_feature_mask, 793 + void *xstate_feature_src, size_t len) 794 + { 795 + struct xregs_state *xsave = &current->thread.fpu.state.xsave; 796 + struct fpu *fpu = &current->thread.fpu; 797 + void *dst; 798 + 799 + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 800 + WARN_ONCE(1, "%s() attempted with no xsave support", __func__); 801 + return; 802 + } 803 + 804 + /* 805 + * Tell the FPU code that we need the FPU state to be in 806 + * 'fpu' (not in the registers), and that we need it to 807 + * be stable while we write to it. 808 + */ 809 + fpu__current_fpstate_write_begin(); 810 + 811 + /* 812 + * This method *WILL* *NOT* work for compact-format 813 + * buffers. If the 'xstate_feature_mask' is unset in 814 + * xcomp_bv then we may need to move other feature state 815 + * "up" in the buffer. 816 + */ 817 + if (xsave->header.xcomp_bv & xstate_feature_mask) { 818 + WARN_ON_ONCE(1); 819 + goto out; 820 + } 821 + 822 + /* find the location in the xsave buffer of the desired state */ 823 + dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); 824 + 825 + /* 826 + * Make sure that the pointer being passed in did not 827 + * come from the xsave buffer itself. 828 + */ 829 + WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); 830 + 831 + /* put the caller-provided data in the location */ 832 + memcpy(dst, xstate_feature_src, len); 833 + 834 + /* 835 + * Mark the xfeature so that the CPU knows there is state 836 + * in the buffer now. 837 + */ 838 + fpu__xfeature_set_non_init(xsave, xstate_feature_mask); 839 + out: 840 + /* 841 + * We are done writing to the 'fpu'. Reenable preeption 842 + * and (possibly) move the fpstate back in to the fpregs. 843 + */ 844 + fpu__current_fpstate_write_end(); 775 845 }