Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mm/pkeys: Fill in pkey field in siginfo

This fills in the new siginfo field: si_pkey to indicate to
userspace which protection key was set on the PTE that we faulted
on.

Note though that *ALL* protection key faults have to be generated
by a valid, present PTE at some point. But this code does no PTE
lookups which seeds odd. The reason is that we take advantage of
the way we generate PTEs from VMAs. All PTEs under a VMA share
some attributes. For instance, they are _all_ either PROT_READ
*OR* PROT_NONE. They also always share a protection key, so we
never have to walk the page tables; we just use the VMA.

Note that _pkey is a 64-bit value. The current hardware only
supports 4-bit protection keys. We do this because there is
_plenty_ of space in _sigfault and it is possible that future
processors would support more than 4 bits of protection keys.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160212210213.ABC488FA@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Dave Hansen and committed by
Ingo Molnar
019132ff cd0ea35f

+68 -1
+5
arch/x86/include/asm/pgtable_types.h
··· 65 65 #endif 66 66 #define __HAVE_ARCH_PTE_SPECIAL 67 67 68 + #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ 69 + _PAGE_PKEY_BIT1 | \ 70 + _PAGE_PKEY_BIT2 | \ 71 + _PAGE_PKEY_BIT3) 72 + 68 73 #ifdef CONFIG_KMEMCHECK 69 74 #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) 70 75 #else
+63 -1
arch/x86/mm/fault.c
··· 15 15 #include <linux/context_tracking.h> /* exception_enter(), ... */ 16 16 #include <linux/uaccess.h> /* faulthandler_disabled() */ 17 17 18 + #include <asm/cpufeature.h> /* boot_cpu_has, ... */ 18 19 #include <asm/traps.h> /* dotraplinkage, ... */ 19 20 #include <asm/pgalloc.h> /* pgd_*(), ... */ 20 21 #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 21 22 #include <asm/fixmap.h> /* VSYSCALL_ADDR */ 22 23 #include <asm/vsyscall.h> /* emulate_vsyscall */ 23 24 #include <asm/vm86.h> /* struct vm86 */ 25 + #include <asm/mmu_context.h> /* vma_pkey() */ 24 26 25 27 #define CREATE_TRACE_POINTS 26 28 #include <asm/trace/exceptions.h> ··· 171 169 return prefetch; 172 170 } 173 171 172 + /* 173 + * A protection key fault means that the PKRU value did not allow 174 + * access to some PTE. Userspace can figure out what PKRU was 175 + * from the XSAVE state, and this function fills out a field in 176 + * siginfo so userspace can discover which protection key was set 177 + * on the PTE. 178 + * 179 + * If we get here, we know that the hardware signaled a PF_PK 180 + * fault and that there was a VMA once we got in the fault 181 + * handler. It does *not* guarantee that the VMA we find here 182 + * was the one that we faulted on. 183 + * 184 + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); 185 + * 2. T1 : set PKRU to deny access to pkey=4, touches page 186 + * 3. T1 : faults... 187 + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); 188 + * 5. T1 : enters fault handler, takes mmap_sem, etc... 189 + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 190 + * faulted on a pte with its pkey=4. 191 + */ 192 + static void fill_sig_info_pkey(int si_code, siginfo_t *info, 193 + struct vm_area_struct *vma) 194 + { 195 + /* This is effectively an #ifdef */ 196 + if (!boot_cpu_has(X86_FEATURE_OSPKE)) 197 + return; 198 + 199 + /* Fault not from Protection Keys: nothing to do */ 200 + if (si_code != SEGV_PKUERR) 201 + return; 202 + /* 203 + * force_sig_info_fault() is called from a number of 204 + * contexts, some of which have a VMA and some of which 205 + * do not. The PF_PK handing happens after we have a 206 + * valid VMA, so we should never reach this without a 207 + * valid VMA. 208 + */ 209 + if (!vma) { 210 + WARN_ONCE(1, "PKU fault with no VMA passed in"); 211 + info->si_pkey = 0; 212 + return; 213 + } 214 + /* 215 + * si_pkey should be thought of as a strong hint, but not 216 + * absolutely guranteed to be 100% accurate because of 217 + * the race explained above. 218 + */ 219 + info->si_pkey = vma_pkey(vma); 220 + } 221 + 174 222 static void 175 223 force_sig_info_fault(int si_signo, int si_code, unsigned long address, 176 224 struct task_struct *tsk, struct vm_area_struct *vma, ··· 238 186 if (fault & VM_FAULT_HWPOISON) 239 187 lsb = PAGE_SHIFT; 240 188 info.si_addr_lsb = lsb; 189 + 190 + fill_sig_info_pkey(si_code, &info, vma); 241 191 242 192 force_sig_info(si_signo, &info, tsk); 243 193 } ··· 901 847 bad_area_access_error(struct pt_regs *regs, unsigned long error_code, 902 848 unsigned long address, struct vm_area_struct *vma) 903 849 { 904 - __bad_area(regs, error_code, address, vma, SEGV_ACCERR); 850 + /* 851 + * This OSPKE check is not strictly necessary at runtime. 852 + * But, doing it this way allows compiler optimizations 853 + * if pkeys are compiled out. 854 + */ 855 + if (boot_cpu_has(X86_FEATURE_OSPKE) && (error_code & PF_PK)) 856 + __bad_area(regs, error_code, address, vma, SEGV_PKUERR); 857 + else 858 + __bad_area(regs, error_code, address, vma, SEGV_ACCERR); 905 859 } 906 860 907 861 static void