Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.2-rc7 1506 lines 39 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) 4 * 5 * There are examples in here of: 6 * * how to set protection keys on memory 7 * * how to set/clear bits in PKRU (the rights register) 8 * * how to handle SEGV_PKRU signals and extract pkey-relevant 9 * information from the siginfo 10 * 11 * Things to add: 12 * make sure KSM and KSM COW breaking works 13 * prefault pages in at malloc, or not 14 * protect MPX bounds tables with protection keys? 15 * make sure VMA splitting/merging is working correctly 16 * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys 17 * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel 18 * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks 19 * 20 * Compile like this: 21 * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 22 * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 23 */ 24#define _GNU_SOURCE 25#include <errno.h> 26#include <linux/futex.h> 27#include <sys/time.h> 28#include <sys/syscall.h> 29#include <string.h> 30#include <stdio.h> 31#include <stdint.h> 32#include <stdbool.h> 33#include <signal.h> 34#include <assert.h> 35#include <stdlib.h> 36#include <ucontext.h> 37#include <sys/mman.h> 38#include <sys/types.h> 39#include <sys/wait.h> 40#include <sys/stat.h> 41#include <fcntl.h> 42#include <unistd.h> 43#include <sys/ptrace.h> 44#include <setjmp.h> 45 46#include "pkey-helpers.h" 47 48int iteration_nr = 1; 49int test_nr; 50 51unsigned int shadow_pkru; 52 53#define HPAGE_SIZE (1UL<<21) 54#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) 55#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) 56#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) 57#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) 58#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) 59#define __stringify_1(x...) #x 60#define __stringify(x...) __stringify_1(x) 61 62#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) 63 64int dprint_in_signal; 65char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; 66 67extern void abort_hooks(void); 68#define pkey_assert(condition) do { \ 69 if (!(condition)) { \ 70 dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ 71 __FILE__, __LINE__, \ 72 test_nr, iteration_nr); \ 73 dprintf0("errno at assert: %d", errno); \ 74 abort_hooks(); \ 75 exit(__LINE__); \ 76 } \ 77} while (0) 78 79void cat_into_file(char *str, char *file) 80{ 81 int fd = open(file, O_RDWR); 82 int ret; 83 84 dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); 85 /* 86 * these need to be raw because they are called under 87 * pkey_assert() 88 */ 89 if (fd < 0) { 90 fprintf(stderr, "error opening '%s'\n", str); 91 perror("error: "); 92 exit(__LINE__); 93 } 94 95 ret = write(fd, str, strlen(str)); 96 if (ret != strlen(str)) { 97 perror("write to file failed"); 98 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 99 exit(__LINE__); 100 } 101 close(fd); 102} 103 104#if CONTROL_TRACING > 0 105static int warned_tracing; 106int tracing_root_ok(void) 107{ 108 if (geteuid() != 0) { 109 if (!warned_tracing) 110 fprintf(stderr, "WARNING: not run as root, " 111 "can not do tracing control\n"); 112 warned_tracing = 1; 113 return 0; 114 } 115 return 1; 116} 117#endif 118 119void tracing_on(void) 120{ 121#if CONTROL_TRACING > 0 122#define TRACEDIR "/sys/kernel/debug/tracing" 123 char pidstr[32]; 124 125 if (!tracing_root_ok()) 126 return; 127 128 sprintf(pidstr, "%d", getpid()); 129 cat_into_file("0", TRACEDIR "/tracing_on"); 130 cat_into_file("\n", TRACEDIR "/trace"); 131 if (1) { 132 cat_into_file("function_graph", TRACEDIR "/current_tracer"); 133 cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); 134 } else { 135 cat_into_file("nop", TRACEDIR "/current_tracer"); 136 } 137 cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); 138 cat_into_file("1", TRACEDIR "/tracing_on"); 139 dprintf1("enabled tracing\n"); 140#endif 141} 142 143void tracing_off(void) 144{ 145#if CONTROL_TRACING > 0 146 if (!tracing_root_ok()) 147 return; 148 cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); 149#endif 150} 151 152void abort_hooks(void) 153{ 154 fprintf(stderr, "running %s()...\n", __func__); 155 tracing_off(); 156#ifdef SLEEP_ON_ABORT 157 sleep(SLEEP_ON_ABORT); 158#endif 159} 160 161static inline void __page_o_noops(void) 162{ 163 /* 8-bytes of instruction * 512 bytes = 1 page */ 164 asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); 165} 166 167/* 168 * This attempts to have roughly a page of instructions followed by a few 169 * instructions that do a write, and another page of instructions. That 170 * way, we are pretty sure that the write is in the second page of 171 * instructions and has at least a page of padding behind it. 172 * 173 * *That* lets us be sure to madvise() away the write instruction, which 174 * will then fault, which makes sure that the fault code handles 175 * execute-only memory properly. 176 */ 177__attribute__((__aligned__(PAGE_SIZE))) 178void lots_o_noops_around_write(int *write_to_me) 179{ 180 dprintf3("running %s()\n", __func__); 181 __page_o_noops(); 182 /* Assume this happens in the second page of instructions: */ 183 *write_to_me = __LINE__; 184 /* pad out by another page: */ 185 __page_o_noops(); 186 dprintf3("%s() done\n", __func__); 187} 188 189/* Define some kernel-like types */ 190#define u8 uint8_t 191#define u16 uint16_t 192#define u32 uint32_t 193#define u64 uint64_t 194 195#ifdef __i386__ 196 197#ifndef SYS_mprotect_key 198# define SYS_mprotect_key 380 199#endif 200 201#ifndef SYS_pkey_alloc 202# define SYS_pkey_alloc 381 203# define SYS_pkey_free 382 204#endif 205 206#define REG_IP_IDX REG_EIP 207#define si_pkey_offset 0x14 208 209#else 210 211#ifndef SYS_mprotect_key 212# define SYS_mprotect_key 329 213#endif 214 215#ifndef SYS_pkey_alloc 216# define SYS_pkey_alloc 330 217# define SYS_pkey_free 331 218#endif 219 220#define REG_IP_IDX REG_RIP 221#define si_pkey_offset 0x20 222 223#endif 224 225void dump_mem(void *dumpme, int len_bytes) 226{ 227 char *c = (void *)dumpme; 228 int i; 229 230 for (i = 0; i < len_bytes; i += sizeof(u64)) { 231 u64 *ptr = (u64 *)(c + i); 232 dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); 233 } 234} 235 236/* Failed address bound checks: */ 237#ifndef SEGV_BNDERR 238# define SEGV_BNDERR 3 239#endif 240 241#ifndef SEGV_PKUERR 242# define SEGV_PKUERR 4 243#endif 244 245static char *si_code_str(int si_code) 246{ 247 if (si_code == SEGV_MAPERR) 248 return "SEGV_MAPERR"; 249 if (si_code == SEGV_ACCERR) 250 return "SEGV_ACCERR"; 251 if (si_code == SEGV_BNDERR) 252 return "SEGV_BNDERR"; 253 if (si_code == SEGV_PKUERR) 254 return "SEGV_PKUERR"; 255 return "UNKNOWN"; 256} 257 258int pkru_faults; 259int last_si_pkey = -1; 260void signal_handler(int signum, siginfo_t *si, void *vucontext) 261{ 262 ucontext_t *uctxt = vucontext; 263 int trapno; 264 unsigned long ip; 265 char *fpregs; 266 u32 *pkru_ptr; 267 u64 siginfo_pkey; 268 u32 *si_pkey_ptr; 269 int pkru_offset; 270 fpregset_t fpregset; 271 272 dprint_in_signal = 1; 273 dprintf1(">>>>===============SIGSEGV============================\n"); 274 dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, 275 __rdpkru(), shadow_pkru); 276 277 trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 278 ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 279 fpregset = uctxt->uc_mcontext.fpregs; 280 fpregs = (void *)fpregset; 281 282 dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, 283 trapno, ip, si_code_str(si->si_code), si->si_code); 284#ifdef __i386__ 285 /* 286 * 32-bit has some extra padding so that userspace can tell whether 287 * the XSTATE header is present in addition to the "legacy" FPU 288 * state. We just assume that it is here. 289 */ 290 fpregs += 0x70; 291#endif 292 pkru_offset = pkru_xstate_offset(); 293 pkru_ptr = (void *)(&fpregs[pkru_offset]); 294 295 dprintf1("siginfo: %p\n", si); 296 dprintf1(" fpregs: %p\n", fpregs); 297 /* 298 * If we got a PKRU fault, we *HAVE* to have at least one bit set in 299 * here. 300 */ 301 dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); 302 if (DEBUG_LEVEL > 4) 303 dump_mem(pkru_ptr - 128, 256); 304 pkey_assert(*pkru_ptr); 305 306 if ((si->si_code == SEGV_MAPERR) || 307 (si->si_code == SEGV_ACCERR) || 308 (si->si_code == SEGV_BNDERR)) { 309 printf("non-PK si_code, exiting...\n"); 310 exit(4); 311 } 312 313 si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); 314 dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); 315 dump_mem((u8 *)si_pkey_ptr - 8, 24); 316 siginfo_pkey = *si_pkey_ptr; 317 pkey_assert(siginfo_pkey < NR_PKEYS); 318 last_si_pkey = siginfo_pkey; 319 320 dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); 321 /* need __rdpkru() version so we do not do shadow_pkru checking */ 322 dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); 323 dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); 324 *(u64 *)pkru_ptr = 0x00000000; 325 dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); 326 pkru_faults++; 327 dprintf1("<<<<==================================================\n"); 328 dprint_in_signal = 0; 329} 330 331int wait_all_children(void) 332{ 333 int status; 334 return waitpid(-1, &status, 0); 335} 336 337void sig_chld(int x) 338{ 339 dprint_in_signal = 1; 340 dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); 341 dprint_in_signal = 0; 342} 343 344void setup_sigsegv_handler(void) 345{ 346 int r, rs; 347 struct sigaction newact; 348 struct sigaction oldact; 349 350 /* #PF is mapped to sigsegv */ 351 int signum = SIGSEGV; 352 353 newact.sa_handler = 0; 354 newact.sa_sigaction = signal_handler; 355 356 /*sigset_t - signals to block while in the handler */ 357 /* get the old signal mask. */ 358 rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); 359 pkey_assert(rs == 0); 360 361 /* call sa_sigaction, not sa_handler*/ 362 newact.sa_flags = SA_SIGINFO; 363 364 newact.sa_restorer = 0; /* void(*)(), obsolete */ 365 r = sigaction(signum, &newact, &oldact); 366 r = sigaction(SIGALRM, &newact, &oldact); 367 pkey_assert(r == 0); 368} 369 370void setup_handlers(void) 371{ 372 signal(SIGCHLD, &sig_chld); 373 setup_sigsegv_handler(); 374} 375 376pid_t fork_lazy_child(void) 377{ 378 pid_t forkret; 379 380 forkret = fork(); 381 pkey_assert(forkret >= 0); 382 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 383 384 if (!forkret) { 385 /* in the child */ 386 while (1) { 387 dprintf1("child sleeping...\n"); 388 sleep(30); 389 } 390 } 391 return forkret; 392} 393 394#ifndef PKEY_DISABLE_ACCESS 395# define PKEY_DISABLE_ACCESS 0x1 396#endif 397 398#ifndef PKEY_DISABLE_WRITE 399# define PKEY_DISABLE_WRITE 0x2 400#endif 401 402static u32 hw_pkey_get(int pkey, unsigned long flags) 403{ 404 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 405 u32 pkru = __rdpkru(); 406 u32 shifted_pkru; 407 u32 masked_pkru; 408 409 dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", 410 __func__, pkey, flags, 0, 0); 411 dprintf2("%s() raw pkru: %x\n", __func__, pkru); 412 413 shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); 414 dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); 415 masked_pkru = shifted_pkru & mask; 416 dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); 417 /* 418 * shift down the relevant bits to the lowest two, then 419 * mask off all the other high bits. 420 */ 421 return masked_pkru; 422} 423 424static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) 425{ 426 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 427 u32 old_pkru = __rdpkru(); 428 u32 new_pkru; 429 430 /* make sure that 'rights' only contains the bits we expect: */ 431 assert(!(rights & ~mask)); 432 433 /* copy old pkru */ 434 new_pkru = old_pkru; 435 /* mask out bits from pkey in old value: */ 436 new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); 437 /* OR in new bits for pkey: */ 438 new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); 439 440 __wrpkru(new_pkru); 441 442 dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", 443 __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); 444 return 0; 445} 446 447void pkey_disable_set(int pkey, int flags) 448{ 449 unsigned long syscall_flags = 0; 450 int ret; 451 int pkey_rights; 452 u32 orig_pkru = rdpkru(); 453 454 dprintf1("START->%s(%d, 0x%x)\n", __func__, 455 pkey, flags); 456 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 457 458 pkey_rights = hw_pkey_get(pkey, syscall_flags); 459 460 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 461 pkey, pkey, pkey_rights); 462 pkey_assert(pkey_rights >= 0); 463 464 pkey_rights |= flags; 465 466 ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); 467 assert(!ret); 468 /*pkru and flags have the same format */ 469 shadow_pkru |= flags << (pkey * 2); 470 dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); 471 472 pkey_assert(ret >= 0); 473 474 pkey_rights = hw_pkey_get(pkey, syscall_flags); 475 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 476 pkey, pkey, pkey_rights); 477 478 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 479 if (flags) 480 pkey_assert(rdpkru() > orig_pkru); 481 dprintf1("END<---%s(%d, 0x%x)\n", __func__, 482 pkey, flags); 483} 484 485void pkey_disable_clear(int pkey, int flags) 486{ 487 unsigned long syscall_flags = 0; 488 int ret; 489 int pkey_rights = hw_pkey_get(pkey, syscall_flags); 490 u32 orig_pkru = rdpkru(); 491 492 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 493 494 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 495 pkey, pkey, pkey_rights); 496 pkey_assert(pkey_rights >= 0); 497 498 pkey_rights |= flags; 499 500 ret = hw_pkey_set(pkey, pkey_rights, 0); 501 /* pkru and flags have the same format */ 502 shadow_pkru &= ~(flags << (pkey * 2)); 503 pkey_assert(ret >= 0); 504 505 pkey_rights = hw_pkey_get(pkey, syscall_flags); 506 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 507 pkey, pkey, pkey_rights); 508 509 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 510 if (flags) 511 assert(rdpkru() > orig_pkru); 512} 513 514void pkey_write_allow(int pkey) 515{ 516 pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); 517} 518void pkey_write_deny(int pkey) 519{ 520 pkey_disable_set(pkey, PKEY_DISABLE_WRITE); 521} 522void pkey_access_allow(int pkey) 523{ 524 pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); 525} 526void pkey_access_deny(int pkey) 527{ 528 pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); 529} 530 531int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 532 unsigned long pkey) 533{ 534 int sret; 535 536 dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, 537 ptr, size, orig_prot, pkey); 538 539 errno = 0; 540 sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); 541 if (errno) { 542 dprintf2("SYS_mprotect_key sret: %d\n", sret); 543 dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); 544 dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); 545 if (DEBUG_LEVEL >= 2) 546 perror("SYS_mprotect_pkey"); 547 } 548 return sret; 549} 550 551int sys_pkey_alloc(unsigned long flags, unsigned long init_val) 552{ 553 int ret = syscall(SYS_pkey_alloc, flags, init_val); 554 dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", 555 __func__, flags, init_val, ret, errno); 556 return ret; 557} 558 559int alloc_pkey(void) 560{ 561 int ret; 562 unsigned long init_val = 0x0; 563 564 dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", 565 __LINE__, __rdpkru(), shadow_pkru); 566 ret = sys_pkey_alloc(0, init_val); 567 /* 568 * pkey_alloc() sets PKRU, so we need to reflect it in 569 * shadow_pkru: 570 */ 571 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 572 __LINE__, ret, __rdpkru(), shadow_pkru); 573 if (ret) { 574 /* clear both the bits: */ 575 shadow_pkru &= ~(0x3 << (ret * 2)); 576 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 577 __LINE__, ret, __rdpkru(), shadow_pkru); 578 /* 579 * move the new state in from init_val 580 * (remember, we cheated and init_val == pkru format) 581 */ 582 shadow_pkru |= (init_val << (ret * 2)); 583 } 584 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 585 __LINE__, ret, __rdpkru(), shadow_pkru); 586 dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); 587 /* for shadow checking: */ 588 rdpkru(); 589 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 590 __LINE__, ret, __rdpkru(), shadow_pkru); 591 return ret; 592} 593 594int sys_pkey_free(unsigned long pkey) 595{ 596 int ret = syscall(SYS_pkey_free, pkey); 597 dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); 598 return ret; 599} 600 601/* 602 * I had a bug where pkey bits could be set by mprotect() but 603 * not cleared. This ensures we get lots of random bit sets 604 * and clears on the vma and pte pkey bits. 605 */ 606int alloc_random_pkey(void) 607{ 608 int max_nr_pkey_allocs; 609 int ret; 610 int i; 611 int alloced_pkeys[NR_PKEYS]; 612 int nr_alloced = 0; 613 int random_index; 614 memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); 615 616 /* allocate every possible key and make a note of which ones we got */ 617 max_nr_pkey_allocs = NR_PKEYS; 618 max_nr_pkey_allocs = 1; 619 for (i = 0; i < max_nr_pkey_allocs; i++) { 620 int new_pkey = alloc_pkey(); 621 if (new_pkey < 0) 622 break; 623 alloced_pkeys[nr_alloced++] = new_pkey; 624 } 625 626 pkey_assert(nr_alloced > 0); 627 /* select a random one out of the allocated ones */ 628 random_index = rand() % nr_alloced; 629 ret = alloced_pkeys[random_index]; 630 /* now zero it out so we don't free it next */ 631 alloced_pkeys[random_index] = 0; 632 633 /* go through the allocated ones that we did not want and free them */ 634 for (i = 0; i < nr_alloced; i++) { 635 int free_ret; 636 if (!alloced_pkeys[i]) 637 continue; 638 free_ret = sys_pkey_free(alloced_pkeys[i]); 639 pkey_assert(!free_ret); 640 } 641 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 642 __LINE__, ret, __rdpkru(), shadow_pkru); 643 return ret; 644} 645 646int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 647 unsigned long pkey) 648{ 649 int nr_iterations = random() % 100; 650 int ret; 651 652 while (0) { 653 int rpkey = alloc_random_pkey(); 654 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 655 dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 656 ptr, size, orig_prot, pkey, ret); 657 if (nr_iterations-- < 0) 658 break; 659 660 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 661 __LINE__, ret, __rdpkru(), shadow_pkru); 662 sys_pkey_free(rpkey); 663 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 664 __LINE__, ret, __rdpkru(), shadow_pkru); 665 } 666 pkey_assert(pkey < NR_PKEYS); 667 668 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 669 dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 670 ptr, size, orig_prot, pkey, ret); 671 pkey_assert(!ret); 672 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 673 __LINE__, ret, __rdpkru(), shadow_pkru); 674 return ret; 675} 676 677struct pkey_malloc_record { 678 void *ptr; 679 long size; 680 int prot; 681}; 682struct pkey_malloc_record *pkey_malloc_records; 683struct pkey_malloc_record *pkey_last_malloc_record; 684long nr_pkey_malloc_records; 685void record_pkey_malloc(void *ptr, long size, int prot) 686{ 687 long i; 688 struct pkey_malloc_record *rec = NULL; 689 690 for (i = 0; i < nr_pkey_malloc_records; i++) { 691 rec = &pkey_malloc_records[i]; 692 /* find a free record */ 693 if (rec) 694 break; 695 } 696 if (!rec) { 697 /* every record is full */ 698 size_t old_nr_records = nr_pkey_malloc_records; 699 size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); 700 size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); 701 dprintf2("new_nr_records: %zd\n", new_nr_records); 702 dprintf2("new_size: %zd\n", new_size); 703 pkey_malloc_records = realloc(pkey_malloc_records, new_size); 704 pkey_assert(pkey_malloc_records != NULL); 705 rec = &pkey_malloc_records[nr_pkey_malloc_records]; 706 /* 707 * realloc() does not initialize memory, so zero it from 708 * the first new record all the way to the end. 709 */ 710 for (i = 0; i < new_nr_records - old_nr_records; i++) 711 memset(rec + i, 0, sizeof(*rec)); 712 } 713 dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", 714 (int)(rec - pkey_malloc_records), rec, ptr, size); 715 rec->ptr = ptr; 716 rec->size = size; 717 rec->prot = prot; 718 pkey_last_malloc_record = rec; 719 nr_pkey_malloc_records++; 720} 721 722void free_pkey_malloc(void *ptr) 723{ 724 long i; 725 int ret; 726 dprintf3("%s(%p)\n", __func__, ptr); 727 for (i = 0; i < nr_pkey_malloc_records; i++) { 728 struct pkey_malloc_record *rec = &pkey_malloc_records[i]; 729 dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", 730 ptr, i, rec, rec->ptr, rec->size); 731 if ((ptr < rec->ptr) || 732 (ptr >= rec->ptr + rec->size)) 733 continue; 734 735 dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", 736 ptr, i, rec, rec->ptr, rec->size); 737 nr_pkey_malloc_records--; 738 ret = munmap(rec->ptr, rec->size); 739 dprintf3("munmap ret: %d\n", ret); 740 pkey_assert(!ret); 741 dprintf3("clearing rec->ptr, rec: %p\n", rec); 742 rec->ptr = NULL; 743 dprintf3("done clearing rec->ptr, rec: %p\n", rec); 744 return; 745 } 746 pkey_assert(false); 747} 748 749 750void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) 751{ 752 void *ptr; 753 int ret; 754 755 rdpkru(); 756 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 757 size, prot, pkey); 758 pkey_assert(pkey < NR_PKEYS); 759 ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 760 pkey_assert(ptr != (void *)-1); 761 ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 762 pkey_assert(!ret); 763 record_pkey_malloc(ptr, size, prot); 764 rdpkru(); 765 766 dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 767 return ptr; 768} 769 770void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) 771{ 772 int ret; 773 void *ptr; 774 775 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 776 size, prot, pkey); 777 /* 778 * Guarantee we can fit at least one huge page in the resulting 779 * allocation by allocating space for 2: 780 */ 781 size = ALIGN_UP(size, HPAGE_SIZE * 2); 782 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 783 pkey_assert(ptr != (void *)-1); 784 record_pkey_malloc(ptr, size, prot); 785 mprotect_pkey(ptr, size, prot, pkey); 786 787 dprintf1("unaligned ptr: %p\n", ptr); 788 ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); 789 dprintf1(" aligned ptr: %p\n", ptr); 790 ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); 791 dprintf1("MADV_HUGEPAGE ret: %d\n", ret); 792 ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); 793 dprintf1("MADV_WILLNEED ret: %d\n", ret); 794 memset(ptr, 0, HPAGE_SIZE); 795 796 dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); 797 return ptr; 798} 799 800int hugetlb_setup_ok; 801#define GET_NR_HUGE_PAGES 10 802void setup_hugetlbfs(void) 803{ 804 int err; 805 int fd; 806 char buf[] = "123"; 807 808 if (geteuid() != 0) { 809 fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); 810 return; 811 } 812 813 cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); 814 815 /* 816 * Now go make sure that we got the pages and that they 817 * are 2M pages. Someone might have made 1G the default. 818 */ 819 fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); 820 if (fd < 0) { 821 perror("opening sysfs 2M hugetlb config"); 822 return; 823 } 824 825 /* -1 to guarantee leaving the trailing \0 */ 826 err = read(fd, buf, sizeof(buf)-1); 827 close(fd); 828 if (err <= 0) { 829 perror("reading sysfs 2M hugetlb config"); 830 return; 831 } 832 833 if (atoi(buf) != GET_NR_HUGE_PAGES) { 834 fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", 835 buf, GET_NR_HUGE_PAGES); 836 return; 837 } 838 839 hugetlb_setup_ok = 1; 840} 841 842void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) 843{ 844 void *ptr; 845 int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; 846 847 if (!hugetlb_setup_ok) 848 return PTR_ERR_ENOTSUP; 849 850 dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); 851 size = ALIGN_UP(size, HPAGE_SIZE * 2); 852 pkey_assert(pkey < NR_PKEYS); 853 ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); 854 pkey_assert(ptr != (void *)-1); 855 mprotect_pkey(ptr, size, prot, pkey); 856 857 record_pkey_malloc(ptr, size, prot); 858 859 dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 860 return ptr; 861} 862 863void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) 864{ 865 void *ptr; 866 int fd; 867 868 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 869 size, prot, pkey); 870 pkey_assert(pkey < NR_PKEYS); 871 fd = open("/dax/foo", O_RDWR); 872 pkey_assert(fd >= 0); 873 874 ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); 875 pkey_assert(ptr != (void *)-1); 876 877 mprotect_pkey(ptr, size, prot, pkey); 878 879 record_pkey_malloc(ptr, size, prot); 880 881 dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 882 close(fd); 883 return ptr; 884} 885 886void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { 887 888 malloc_pkey_with_mprotect, 889 malloc_pkey_anon_huge, 890 malloc_pkey_hugetlb 891/* can not do direct with the pkey_mprotect() API: 892 malloc_pkey_mmap_direct, 893 malloc_pkey_mmap_dax, 894*/ 895}; 896 897void *malloc_pkey(long size, int prot, u16 pkey) 898{ 899 void *ret; 900 static int malloc_type; 901 int nr_malloc_types = ARRAY_SIZE(pkey_malloc); 902 903 pkey_assert(pkey < NR_PKEYS); 904 905 while (1) { 906 pkey_assert(malloc_type < nr_malloc_types); 907 908 ret = pkey_malloc[malloc_type](size, prot, pkey); 909 pkey_assert(ret != (void *)-1); 910 911 malloc_type++; 912 if (malloc_type >= nr_malloc_types) 913 malloc_type = (random()%nr_malloc_types); 914 915 /* try again if the malloc_type we tried is unsupported */ 916 if (ret == PTR_ERR_ENOTSUP) 917 continue; 918 919 break; 920 } 921 922 dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, 923 size, prot, pkey, ret); 924 return ret; 925} 926 927int last_pkru_faults; 928#define UNKNOWN_PKEY -2 929void expected_pk_fault(int pkey) 930{ 931 dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", 932 __func__, last_pkru_faults, pkru_faults); 933 dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 934 pkey_assert(last_pkru_faults + 1 == pkru_faults); 935 936 /* 937 * For exec-only memory, we do not know the pkey in 938 * advance, so skip this check. 939 */ 940 if (pkey != UNKNOWN_PKEY) 941 pkey_assert(last_si_pkey == pkey); 942 943 /* 944 * The signal handler shold have cleared out PKRU to let the 945 * test program continue. We now have to restore it. 946 */ 947 if (__rdpkru() != 0) 948 pkey_assert(0); 949 950 __wrpkru(shadow_pkru); 951 dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", 952 __func__, shadow_pkru); 953 last_pkru_faults = pkru_faults; 954 last_si_pkey = -1; 955} 956 957#define do_not_expect_pk_fault(msg) do { \ 958 if (last_pkru_faults != pkru_faults) \ 959 dprintf0("unexpected PK fault: %s\n", msg); \ 960 pkey_assert(last_pkru_faults == pkru_faults); \ 961} while (0) 962 963int test_fds[10] = { -1 }; 964int nr_test_fds; 965void __save_test_fd(int fd) 966{ 967 pkey_assert(fd >= 0); 968 pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); 969 test_fds[nr_test_fds] = fd; 970 nr_test_fds++; 971} 972 973int get_test_read_fd(void) 974{ 975 int test_fd = open("/etc/passwd", O_RDONLY); 976 __save_test_fd(test_fd); 977 return test_fd; 978} 979 980void close_test_fds(void) 981{ 982 int i; 983 984 for (i = 0; i < nr_test_fds; i++) { 985 if (test_fds[i] < 0) 986 continue; 987 close(test_fds[i]); 988 test_fds[i] = -1; 989 } 990 nr_test_fds = 0; 991} 992 993#define barrier() __asm__ __volatile__("": : :"memory") 994__attribute__((noinline)) int read_ptr(int *ptr) 995{ 996 /* 997 * Keep GCC from optimizing this away somehow 998 */ 999 barrier(); 1000 return *ptr; 1001} 1002 1003void test_read_of_write_disabled_region(int *ptr, u16 pkey) 1004{ 1005 int ptr_contents; 1006 1007 dprintf1("disabling write access to PKEY[1], doing read\n"); 1008 pkey_write_deny(pkey); 1009 ptr_contents = read_ptr(ptr); 1010 dprintf1("*ptr: %d\n", ptr_contents); 1011 dprintf1("\n"); 1012} 1013void test_read_of_access_disabled_region(int *ptr, u16 pkey) 1014{ 1015 int ptr_contents; 1016 1017 dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); 1018 rdpkru(); 1019 pkey_access_deny(pkey); 1020 ptr_contents = read_ptr(ptr); 1021 dprintf1("*ptr: %d\n", ptr_contents); 1022 expected_pk_fault(pkey); 1023} 1024void test_write_of_write_disabled_region(int *ptr, u16 pkey) 1025{ 1026 dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); 1027 pkey_write_deny(pkey); 1028 *ptr = __LINE__; 1029 expected_pk_fault(pkey); 1030} 1031void test_write_of_access_disabled_region(int *ptr, u16 pkey) 1032{ 1033 dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); 1034 pkey_access_deny(pkey); 1035 *ptr = __LINE__; 1036 expected_pk_fault(pkey); 1037} 1038void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) 1039{ 1040 int ret; 1041 int test_fd = get_test_read_fd(); 1042 1043 dprintf1("disabling access to PKEY[%02d], " 1044 "having kernel read() to buffer\n", pkey); 1045 pkey_access_deny(pkey); 1046 ret = read(test_fd, ptr, 1); 1047 dprintf1("read ret: %d\n", ret); 1048 pkey_assert(ret); 1049} 1050void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) 1051{ 1052 int ret; 1053 int test_fd = get_test_read_fd(); 1054 1055 pkey_write_deny(pkey); 1056 ret = read(test_fd, ptr, 100); 1057 dprintf1("read ret: %d\n", ret); 1058 if (ret < 0 && (DEBUG_LEVEL > 0)) 1059 perror("verbose read result (OK for this to be bad)"); 1060 pkey_assert(ret); 1061} 1062 1063void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) 1064{ 1065 int pipe_ret, vmsplice_ret; 1066 struct iovec iov; 1067 int pipe_fds[2]; 1068 1069 pipe_ret = pipe(pipe_fds); 1070 1071 pkey_assert(pipe_ret == 0); 1072 dprintf1("disabling access to PKEY[%02d], " 1073 "having kernel vmsplice from buffer\n", pkey); 1074 pkey_access_deny(pkey); 1075 iov.iov_base = ptr; 1076 iov.iov_len = PAGE_SIZE; 1077 vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); 1078 dprintf1("vmsplice() ret: %d\n", vmsplice_ret); 1079 pkey_assert(vmsplice_ret == -1); 1080 1081 close(pipe_fds[0]); 1082 close(pipe_fds[1]); 1083} 1084 1085void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) 1086{ 1087 int ignored = 0xdada; 1088 int futex_ret; 1089 int some_int = __LINE__; 1090 1091 dprintf1("disabling write to PKEY[%02d], " 1092 "doing futex gunk in buffer\n", pkey); 1093 *ptr = some_int; 1094 pkey_write_deny(pkey); 1095 futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, 1096 &ignored, ignored); 1097 if (DEBUG_LEVEL > 0) 1098 perror("futex"); 1099 dprintf1("futex() ret: %d\n", futex_ret); 1100} 1101 1102/* Assumes that all pkeys other than 'pkey' are unallocated */ 1103void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) 1104{ 1105 int err; 1106 int i; 1107 1108 /* Note: 0 is the default pkey, so don't mess with it */ 1109 for (i = 1; i < NR_PKEYS; i++) { 1110 if (pkey == i) 1111 continue; 1112 1113 dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); 1114 err = sys_pkey_free(i); 1115 pkey_assert(err); 1116 1117 err = sys_pkey_free(i); 1118 pkey_assert(err); 1119 1120 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); 1121 pkey_assert(err); 1122 } 1123} 1124 1125/* Assumes that all pkeys other than 'pkey' are unallocated */ 1126void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) 1127{ 1128 int err; 1129 int bad_pkey = NR_PKEYS+99; 1130 1131 /* pass a known-invalid pkey in: */ 1132 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); 1133 pkey_assert(err); 1134} 1135 1136void become_child(void) 1137{ 1138 pid_t forkret; 1139 1140 forkret = fork(); 1141 pkey_assert(forkret >= 0); 1142 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 1143 1144 if (!forkret) { 1145 /* in the child */ 1146 return; 1147 } 1148 exit(0); 1149} 1150 1151/* Assumes that all pkeys other than 'pkey' are unallocated */ 1152void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1153{ 1154 int err; 1155 int allocated_pkeys[NR_PKEYS] = {0}; 1156 int nr_allocated_pkeys = 0; 1157 int i; 1158 1159 for (i = 0; i < NR_PKEYS*3; i++) { 1160 int new_pkey; 1161 dprintf1("%s() alloc loop: %d\n", __func__, i); 1162 new_pkey = alloc_pkey(); 1163 dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, 1164 __LINE__, err, __rdpkru(), shadow_pkru); 1165 rdpkru(); /* for shadow checking */ 1166 dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); 1167 if ((new_pkey == -1) && (errno == ENOSPC)) { 1168 dprintf2("%s() failed to allocate pkey after %d tries\n", 1169 __func__, nr_allocated_pkeys); 1170 } else { 1171 /* 1172 * Ensure the number of successes never 1173 * exceeds the number of keys supported 1174 * in the hardware. 1175 */ 1176 pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1177 allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1178 } 1179 1180 /* 1181 * Make sure that allocation state is properly 1182 * preserved across fork(). 1183 */ 1184 if (i == NR_PKEYS*2) 1185 become_child(); 1186 } 1187 1188 dprintf3("%s()::%d\n", __func__, __LINE__); 1189 1190 /* 1191 * There are 16 pkeys supported in hardware. Three are 1192 * allocated by the time we get here: 1193 * 1. The default key (0) 1194 * 2. One possibly consumed by an execute-only mapping. 1195 * 3. One allocated by the test code and passed in via 1196 * 'pkey' to this function. 1197 * Ensure that we can allocate at least another 13 (16-3). 1198 */ 1199 pkey_assert(i >= NR_PKEYS-3); 1200 1201 for (i = 0; i < nr_allocated_pkeys; i++) { 1202 err = sys_pkey_free(allocated_pkeys[i]); 1203 pkey_assert(!err); 1204 rdpkru(); /* for shadow checking */ 1205 } 1206} 1207 1208/* 1209 * pkey 0 is special. It is allocated by default, so you do not 1210 * have to call pkey_alloc() to use it first. Make sure that it 1211 * is usable. 1212 */ 1213void test_mprotect_with_pkey_0(int *ptr, u16 pkey) 1214{ 1215 long size; 1216 int prot; 1217 1218 assert(pkey_last_malloc_record); 1219 size = pkey_last_malloc_record->size; 1220 /* 1221 * This is a bit of a hack. But mprotect() requires 1222 * huge-page-aligned sizes when operating on hugetlbfs. 1223 * So, make sure that we use something that's a multiple 1224 * of a huge page when we can. 1225 */ 1226 if (size >= HPAGE_SIZE) 1227 size = HPAGE_SIZE; 1228 prot = pkey_last_malloc_record->prot; 1229 1230 /* Use pkey 0 */ 1231 mprotect_pkey(ptr, size, prot, 0); 1232 1233 /* Make sure that we can set it back to the original pkey. */ 1234 mprotect_pkey(ptr, size, prot, pkey); 1235} 1236 1237void test_ptrace_of_child(int *ptr, u16 pkey) 1238{ 1239 __attribute__((__unused__)) int peek_result; 1240 pid_t child_pid; 1241 void *ignored = 0; 1242 long ret; 1243 int status; 1244 /* 1245 * This is the "control" for our little expermient. Make sure 1246 * we can always access it when ptracing. 1247 */ 1248 int *plain_ptr_unaligned = malloc(HPAGE_SIZE); 1249 int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); 1250 1251 /* 1252 * Fork a child which is an exact copy of this process, of course. 1253 * That means we can do all of our tests via ptrace() and then plain 1254 * memory access and ensure they work differently. 1255 */ 1256 child_pid = fork_lazy_child(); 1257 dprintf1("[%d] child pid: %d\n", getpid(), child_pid); 1258 1259 ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); 1260 if (ret) 1261 perror("attach"); 1262 dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); 1263 pkey_assert(ret != -1); 1264 ret = waitpid(child_pid, &status, WUNTRACED); 1265 if ((ret != child_pid) || !(WIFSTOPPED(status))) { 1266 fprintf(stderr, "weird waitpid result %ld stat %x\n", 1267 ret, status); 1268 pkey_assert(0); 1269 } 1270 dprintf2("waitpid ret: %ld\n", ret); 1271 dprintf2("waitpid status: %d\n", status); 1272 1273 pkey_access_deny(pkey); 1274 pkey_write_deny(pkey); 1275 1276 /* Write access, untested for now: 1277 ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); 1278 pkey_assert(ret != -1); 1279 dprintf1("poke at %p: %ld\n", peek_at, ret); 1280 */ 1281 1282 /* 1283 * Try to access the pkey-protected "ptr" via ptrace: 1284 */ 1285 ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); 1286 /* expect it to work, without an error: */ 1287 pkey_assert(ret != -1); 1288 /* Now access from the current task, and expect an exception: */ 1289 peek_result = read_ptr(ptr); 1290 expected_pk_fault(pkey); 1291 1292 /* 1293 * Try to access the NON-pkey-protected "plain_ptr" via ptrace: 1294 */ 1295 ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); 1296 /* expect it to work, without an error: */ 1297 pkey_assert(ret != -1); 1298 /* Now access from the current task, and expect NO exception: */ 1299 peek_result = read_ptr(plain_ptr); 1300 do_not_expect_pk_fault("read plain pointer after ptrace"); 1301 1302 ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1303 pkey_assert(ret != -1); 1304 1305 ret = kill(child_pid, SIGKILL); 1306 pkey_assert(ret != -1); 1307 1308 wait(&status); 1309 1310 free(plain_ptr_unaligned); 1311} 1312 1313void *get_pointer_to_instructions(void) 1314{ 1315 void *p1; 1316 1317 p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1318 dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1319 /* lots_o_noops_around_write should be page-aligned already */ 1320 assert(p1 == &lots_o_noops_around_write); 1321 1322 /* Point 'p1' at the *second* page of the function: */ 1323 p1 += PAGE_SIZE; 1324 1325 /* 1326 * Try to ensure we fault this in on next touch to ensure 1327 * we get an instruction fault as opposed to a data one 1328 */ 1329 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1330 1331 return p1; 1332} 1333 1334void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1335{ 1336 void *p1; 1337 int scratch; 1338 int ptr_contents; 1339 int ret; 1340 1341 p1 = get_pointer_to_instructions(); 1342 lots_o_noops_around_write(&scratch); 1343 ptr_contents = read_ptr(p1); 1344 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1345 1346 ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); 1347 pkey_assert(!ret); 1348 pkey_access_deny(pkey); 1349 1350 dprintf2("pkru: %x\n", rdpkru()); 1351 1352 /* 1353 * Make sure this is an *instruction* fault 1354 */ 1355 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1356 lots_o_noops_around_write(&scratch); 1357 do_not_expect_pk_fault("executing on PROT_EXEC memory"); 1358 ptr_contents = read_ptr(p1); 1359 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1360 expected_pk_fault(pkey); 1361} 1362 1363void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) 1364{ 1365 void *p1; 1366 int scratch; 1367 int ptr_contents; 1368 int ret; 1369 1370 dprintf1("%s() start\n", __func__); 1371 1372 p1 = get_pointer_to_instructions(); 1373 lots_o_noops_around_write(&scratch); 1374 ptr_contents = read_ptr(p1); 1375 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1376 1377 /* Use a *normal* mprotect(), not mprotect_pkey(): */ 1378 ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); 1379 pkey_assert(!ret); 1380 1381 dprintf2("pkru: %x\n", rdpkru()); 1382 1383 /* Make sure this is an *instruction* fault */ 1384 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1385 lots_o_noops_around_write(&scratch); 1386 do_not_expect_pk_fault("executing on PROT_EXEC memory"); 1387 ptr_contents = read_ptr(p1); 1388 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1389 expected_pk_fault(UNKNOWN_PKEY); 1390 1391 /* 1392 * Put the memory back to non-PROT_EXEC. Should clear the 1393 * exec-only pkey off the VMA and allow it to be readable 1394 * again. Go to PROT_NONE first to check for a kernel bug 1395 * that did not clear the pkey when doing PROT_NONE. 1396 */ 1397 ret = mprotect(p1, PAGE_SIZE, PROT_NONE); 1398 pkey_assert(!ret); 1399 1400 ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); 1401 pkey_assert(!ret); 1402 ptr_contents = read_ptr(p1); 1403 do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); 1404} 1405 1406void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1407{ 1408 int size = PAGE_SIZE; 1409 int sret; 1410 1411 if (cpu_has_pku()) { 1412 dprintf1("SKIP: %s: no CPU support\n", __func__); 1413 return; 1414 } 1415 1416 sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); 1417 pkey_assert(sret < 0); 1418} 1419 1420void (*pkey_tests[])(int *ptr, u16 pkey) = { 1421 test_read_of_write_disabled_region, 1422 test_read_of_access_disabled_region, 1423 test_write_of_write_disabled_region, 1424 test_write_of_access_disabled_region, 1425 test_kernel_write_of_access_disabled_region, 1426 test_kernel_write_of_write_disabled_region, 1427 test_kernel_gup_of_access_disabled_region, 1428 test_kernel_gup_write_to_write_disabled_region, 1429 test_executing_on_unreadable_memory, 1430 test_implicit_mprotect_exec_only_memory, 1431 test_mprotect_with_pkey_0, 1432 test_ptrace_of_child, 1433 test_pkey_syscalls_on_non_allocated_pkey, 1434 test_pkey_syscalls_bad_args, 1435 test_pkey_alloc_exhaust, 1436}; 1437 1438void run_tests_once(void) 1439{ 1440 int *ptr; 1441 int prot = PROT_READ|PROT_WRITE; 1442 1443 for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { 1444 int pkey; 1445 int orig_pkru_faults = pkru_faults; 1446 1447 dprintf1("======================\n"); 1448 dprintf1("test %d preparing...\n", test_nr); 1449 1450 tracing_on(); 1451 pkey = alloc_random_pkey(); 1452 dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); 1453 ptr = malloc_pkey(PAGE_SIZE, prot, pkey); 1454 dprintf1("test %d starting...\n", test_nr); 1455 pkey_tests[test_nr](ptr, pkey); 1456 dprintf1("freeing test memory: %p\n", ptr); 1457 free_pkey_malloc(ptr); 1458 sys_pkey_free(pkey); 1459 1460 dprintf1("pkru_faults: %d\n", pkru_faults); 1461 dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); 1462 1463 tracing_off(); 1464 close_test_fds(); 1465 1466 printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); 1467 dprintf1("======================\n\n"); 1468 } 1469 iteration_nr++; 1470} 1471 1472void pkey_setup_shadow(void) 1473{ 1474 shadow_pkru = __rdpkru(); 1475} 1476 1477int main(void) 1478{ 1479 int nr_iterations = 22; 1480 1481 setup_handlers(); 1482 1483 printf("has pku: %d\n", cpu_has_pku()); 1484 1485 if (!cpu_has_pku()) { 1486 int size = PAGE_SIZE; 1487 int *ptr; 1488 1489 printf("running PKEY tests for unsupported CPU/OS\n"); 1490 1491 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1492 assert(ptr != (void *)-1); 1493 test_mprotect_pkey_on_unsupported_cpu(ptr, 1); 1494 exit(0); 1495 } 1496 1497 pkey_setup_shadow(); 1498 printf("startup pkru: %x\n", rdpkru()); 1499 setup_hugetlbfs(); 1500 1501 while (nr_iterations-- > 0) 1502 run_tests_once(); 1503 1504 printf("done (all tests OK)\n"); 1505 return 0; 1506}