Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mm/pkeys: Allow kernel to modify user pkey rights register

The Protection Key Rights for User memory (PKRU) is a 32-bit
user-accessible register. It contains two bits for each
protection key: one to write-disable (WD) access to memory
covered by the key and another to access-disable (AD).

Userspace can read/write the register with the RDPKRU and WRPKRU
instructions. But, the register is saved and restored with the
XSAVE family of instructions, which means we have to treat it
like a floating point register.

The kernel needs to write to the register if it wants to
implement execute-only memory or if it implements a system call
to change PKRU.

To do this, we need to create a 'pkru_state' buffer, read the old
contents in to it, modify it, and then tell the FPU code that
there is modified data in there so it can (possibly) move the
buffer back in to the registers.

This uses the fpu__xfeature_set_state() function that we defined
in the previous patch.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210236.0BE13217@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Dave Hansen and committed by
Ingo Molnar
84594296 b8b9b6ba

+85 -2
+3 -2
arch/x86/include/asm/pgtable.h
··· 921 921 922 922 #define PKRU_AD_BIT 0x1 923 923 #define PKRU_WD_BIT 0x2 924 + #define PKRU_BITS_PER_PKEY 2 924 925 925 926 static inline bool __pkru_allows_read(u32 pkru, u16 pkey) 926 927 { 927 - int pkru_pkey_bits = pkey * 2; 928 + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; 928 929 return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); 929 930 } 930 931 931 932 static inline bool __pkru_allows_write(u32 pkru, u16 pkey) 932 933 { 933 - int pkru_pkey_bits = pkey * 2; 934 + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; 934 935 /* 935 936 * Access-disable disables writes too so we need to check 936 937 * both bits here.
+3
arch/x86/include/asm/pkeys.h
··· 3 3 4 4 #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) 5 5 6 + extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 7 + unsigned long init_val); 8 + 6 9 #endif /*_ASM_X86_PKEYS_H */
+74
arch/x86/kernel/fpu/xstate.c
··· 5 5 */ 6 6 #include <linux/compat.h> 7 7 #include <linux/cpu.h> 8 + #include <linux/pkeys.h> 8 9 9 10 #include <asm/fpu/api.h> 10 11 #include <asm/fpu/internal.h> ··· 855 854 * and (possibly) move the fpstate back in to the fpregs. 856 855 */ 857 856 fpu__current_fpstate_write_end(); 857 + } 858 + 859 + #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) 860 + #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) 861 + 862 + /* 863 + * This will go out and modify the XSAVE buffer so that PKRU is 864 + * set to a particular state for access to 'pkey'. 865 + * 866 + * PKRU state does affect kernel access to user memory. We do 867 + * not modfiy PKRU *itself* here, only the XSAVE state that will 868 + * be restored in to PKRU when we return back to userspace. 869 + */ 870 + int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 871 + unsigned long init_val) 872 + { 873 + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; 874 + struct pkru_state *old_pkru_state; 875 + struct pkru_state new_pkru_state; 876 + int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); 877 + u32 new_pkru_bits = 0; 878 + 879 + if (!validate_pkey(pkey)) 880 + return -EINVAL; 881 + /* 882 + * This check implies XSAVE support. OSPKE only gets 883 + * set if we enable XSAVE and we enable PKU in XCR0. 884 + */ 885 + if (!boot_cpu_has(X86_FEATURE_OSPKE)) 886 + return -EINVAL; 887 + 888 + /* Set the bits we need in PKRU */ 889 + if (init_val & PKEY_DISABLE_ACCESS) 890 + new_pkru_bits |= PKRU_AD_BIT; 891 + if (init_val & PKEY_DISABLE_WRITE) 892 + new_pkru_bits |= PKRU_WD_BIT; 893 + 894 + /* Shift the bits in to the correct place in PKRU for pkey. */ 895 + new_pkru_bits <<= pkey_shift; 896 + 897 + /* Locate old copy of the state in the xsave buffer */ 898 + old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); 899 + 900 + /* 901 + * When state is not in the buffer, it is in the init 902 + * state, set it manually. Otherwise, copy out the old 903 + * state. 904 + */ 905 + if (!old_pkru_state) 906 + new_pkru_state.pkru = 0; 907 + else 908 + new_pkru_state.pkru = old_pkru_state->pkru; 909 + 910 + /* mask off any old bits in place */ 911 + new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 912 + /* Set the newly-requested bits */ 913 + new_pkru_state.pkru |= new_pkru_bits; 914 + 915 + /* 916 + * We could theoretically live without zeroing pkru.pad. 917 + * The current XSAVE feature state definition says that 918 + * only bytes 0->3 are used. But we do not want to 919 + * chance leaking kernel stack out to userspace in case a 920 + * memcpy() of the whole xsave buffer was done. 921 + * 922 + * They're in the same cacheline anyway. 923 + */ 924 + new_pkru_state.pad = 0; 925 + 926 + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, 927 + sizeof(new_pkru_state)); 928 + 929 + return 0; 858 930 }
+5
include/linux/pkeys.h
··· 4 4 #include <linux/mm_types.h> 5 5 #include <asm/mmu_context.h> 6 6 7 + #define PKEY_DISABLE_ACCESS 0x1 8 + #define PKEY_DISABLE_WRITE 0x2 9 + #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ 10 + PKEY_DISABLE_WRITE) 11 + 7 12 #ifdef CONFIG_ARCH_HAS_PKEYS 8 13 #include <asm/pkeys.h> 9 14 #else /* ! CONFIG_ARCH_HAS_PKEYS */