Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.12-rc5 1580 lines 42 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) 4 * 5 * There are examples in here of: 6 * * how to set protection keys on memory 7 * * how to set/clear bits in pkey registers (the rights register) 8 * * how to handle SEGV_PKUERR signals and extract pkey-relevant 9 * information from the siginfo 10 * 11 * Things to add: 12 * make sure KSM and KSM COW breaking works 13 * prefault pages in at malloc, or not 14 * protect MPX bounds tables with protection keys? 15 * make sure VMA splitting/merging is working correctly 16 * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys 17 * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel 18 * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks 19 * 20 * Compile like this: 21 * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 22 * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 23 */ 24#define _GNU_SOURCE 25#define __SANE_USERSPACE_TYPES__ 26#include <errno.h> 27#include <linux/futex.h> 28#include <time.h> 29#include <sys/time.h> 30#include <sys/syscall.h> 31#include <string.h> 32#include <stdio.h> 33#include <stdint.h> 34#include <stdbool.h> 35#include <signal.h> 36#include <assert.h> 37#include <stdlib.h> 38#include <ucontext.h> 39#include <sys/mman.h> 40#include <sys/types.h> 41#include <sys/wait.h> 42#include <sys/stat.h> 43#include <fcntl.h> 44#include <unistd.h> 45#include <sys/ptrace.h> 46#include <setjmp.h> 47 48#include "pkey-helpers.h" 49 50int iteration_nr = 1; 51int test_nr; 52 53u64 shadow_pkey_reg; 54int dprint_in_signal; 55char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; 56 57void cat_into_file(char *str, char *file) 58{ 59 int fd = open(file, O_RDWR); 60 int ret; 61 62 dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); 63 /* 64 * these need to be raw because they are called under 65 * pkey_assert() 66 */ 67 if (fd < 0) { 68 fprintf(stderr, "error opening '%s'\n", str); 69 perror("error: "); 70 exit(__LINE__); 71 } 72 73 ret = write(fd, str, strlen(str)); 74 if (ret != strlen(str)) { 75 perror("write to file failed"); 76 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 77 exit(__LINE__); 78 } 79 close(fd); 80} 81 82#if CONTROL_TRACING > 0 83static int warned_tracing; 84int tracing_root_ok(void) 85{ 86 if (geteuid() != 0) { 87 if (!warned_tracing) 88 fprintf(stderr, "WARNING: not run as root, " 89 "can not do tracing control\n"); 90 warned_tracing = 1; 91 return 0; 92 } 93 return 1; 94} 95#endif 96 97void tracing_on(void) 98{ 99#if CONTROL_TRACING > 0 100#define TRACEDIR "/sys/kernel/debug/tracing" 101 char pidstr[32]; 102 103 if (!tracing_root_ok()) 104 return; 105 106 sprintf(pidstr, "%d", getpid()); 107 cat_into_file("0", TRACEDIR "/tracing_on"); 108 cat_into_file("\n", TRACEDIR "/trace"); 109 if (1) { 110 cat_into_file("function_graph", TRACEDIR "/current_tracer"); 111 cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); 112 } else { 113 cat_into_file("nop", TRACEDIR "/current_tracer"); 114 } 115 cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); 116 cat_into_file("1", TRACEDIR "/tracing_on"); 117 dprintf1("enabled tracing\n"); 118#endif 119} 120 121void tracing_off(void) 122{ 123#if CONTROL_TRACING > 0 124 if (!tracing_root_ok()) 125 return; 126 cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); 127#endif 128} 129 130void abort_hooks(void) 131{ 132 fprintf(stderr, "running %s()...\n", __func__); 133 tracing_off(); 134#ifdef SLEEP_ON_ABORT 135 sleep(SLEEP_ON_ABORT); 136#endif 137} 138 139/* 140 * This attempts to have roughly a page of instructions followed by a few 141 * instructions that do a write, and another page of instructions. That 142 * way, we are pretty sure that the write is in the second page of 143 * instructions and has at least a page of padding behind it. 144 * 145 * *That* lets us be sure to madvise() away the write instruction, which 146 * will then fault, which makes sure that the fault code handles 147 * execute-only memory properly. 148 */ 149#ifdef __powerpc64__ 150/* This way, both 4K and 64K alignment are maintained */ 151__attribute__((__aligned__(65536))) 152#else 153__attribute__((__aligned__(PAGE_SIZE))) 154#endif 155void lots_o_noops_around_write(int *write_to_me) 156{ 157 dprintf3("running %s()\n", __func__); 158 __page_o_noops(); 159 /* Assume this happens in the second page of instructions: */ 160 *write_to_me = __LINE__; 161 /* pad out by another page: */ 162 __page_o_noops(); 163 dprintf3("%s() done\n", __func__); 164} 165 166void dump_mem(void *dumpme, int len_bytes) 167{ 168 char *c = (void *)dumpme; 169 int i; 170 171 for (i = 0; i < len_bytes; i += sizeof(u64)) { 172 u64 *ptr = (u64 *)(c + i); 173 dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr); 174 } 175} 176 177static u32 hw_pkey_get(int pkey, unsigned long flags) 178{ 179 u64 pkey_reg = __read_pkey_reg(); 180 181 dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", 182 __func__, pkey, flags, 0, 0); 183 dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg); 184 185 return (u32) get_pkey_bits(pkey_reg, pkey); 186} 187 188static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) 189{ 190 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 191 u64 old_pkey_reg = __read_pkey_reg(); 192 u64 new_pkey_reg; 193 194 /* make sure that 'rights' only contains the bits we expect: */ 195 assert(!(rights & ~mask)); 196 197 /* modify bits accordingly in old pkey_reg and assign it */ 198 new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights); 199 200 __write_pkey_reg(new_pkey_reg); 201 202 dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" 203 " pkey_reg now: %016llx old_pkey_reg: %016llx\n", 204 __func__, pkey, rights, flags, 0, __read_pkey_reg(), 205 old_pkey_reg); 206 return 0; 207} 208 209void pkey_disable_set(int pkey, int flags) 210{ 211 unsigned long syscall_flags = 0; 212 int ret; 213 int pkey_rights; 214 u64 orig_pkey_reg = read_pkey_reg(); 215 216 dprintf1("START->%s(%d, 0x%x)\n", __func__, 217 pkey, flags); 218 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 219 220 pkey_rights = hw_pkey_get(pkey, syscall_flags); 221 222 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 223 pkey, pkey, pkey_rights); 224 225 pkey_assert(pkey_rights >= 0); 226 227 pkey_rights |= flags; 228 229 ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); 230 assert(!ret); 231 /* pkey_reg and flags have the same format */ 232 shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 233 dprintf1("%s(%d) shadow: 0x%016llx\n", 234 __func__, pkey, shadow_pkey_reg); 235 236 pkey_assert(ret >= 0); 237 238 pkey_rights = hw_pkey_get(pkey, syscall_flags); 239 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 240 pkey, pkey, pkey_rights); 241 242 dprintf1("%s(%d) pkey_reg: 0x%016llx\n", 243 __func__, pkey, read_pkey_reg()); 244 if (flags) 245 pkey_assert(read_pkey_reg() >= orig_pkey_reg); 246 dprintf1("END<---%s(%d, 0x%x)\n", __func__, 247 pkey, flags); 248} 249 250void pkey_disable_clear(int pkey, int flags) 251{ 252 unsigned long syscall_flags = 0; 253 int ret; 254 int pkey_rights = hw_pkey_get(pkey, syscall_flags); 255 u64 orig_pkey_reg = read_pkey_reg(); 256 257 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 258 259 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 260 pkey, pkey, pkey_rights); 261 pkey_assert(pkey_rights >= 0); 262 263 pkey_rights &= ~flags; 264 265 ret = hw_pkey_set(pkey, pkey_rights, 0); 266 shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights); 267 pkey_assert(ret >= 0); 268 269 pkey_rights = hw_pkey_get(pkey, syscall_flags); 270 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, 271 pkey, pkey, pkey_rights); 272 273 dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, 274 pkey, read_pkey_reg()); 275 if (flags) 276 assert(read_pkey_reg() <= orig_pkey_reg); 277} 278 279void pkey_write_allow(int pkey) 280{ 281 pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); 282} 283void pkey_write_deny(int pkey) 284{ 285 pkey_disable_set(pkey, PKEY_DISABLE_WRITE); 286} 287void pkey_access_allow(int pkey) 288{ 289 pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); 290} 291void pkey_access_deny(int pkey) 292{ 293 pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); 294} 295 296/* Failed address bound checks: */ 297#ifndef SEGV_BNDERR 298# define SEGV_BNDERR 3 299#endif 300 301#ifndef SEGV_PKUERR 302# define SEGV_PKUERR 4 303#endif 304 305static char *si_code_str(int si_code) 306{ 307 if (si_code == SEGV_MAPERR) 308 return "SEGV_MAPERR"; 309 if (si_code == SEGV_ACCERR) 310 return "SEGV_ACCERR"; 311 if (si_code == SEGV_BNDERR) 312 return "SEGV_BNDERR"; 313 if (si_code == SEGV_PKUERR) 314 return "SEGV_PKUERR"; 315 return "UNKNOWN"; 316} 317 318int pkey_faults; 319int last_si_pkey = -1; 320void signal_handler(int signum, siginfo_t *si, void *vucontext) 321{ 322 ucontext_t *uctxt = vucontext; 323 int trapno; 324 unsigned long ip; 325 char *fpregs; 326#if defined(__i386__) || defined(__x86_64__) /* arch */ 327 u32 *pkey_reg_ptr; 328 int pkey_reg_offset; 329#endif /* arch */ 330 u64 siginfo_pkey; 331 u32 *si_pkey_ptr; 332 333 dprint_in_signal = 1; 334 dprintf1(">>>>===============SIGSEGV============================\n"); 335 dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 336 __func__, __LINE__, 337 __read_pkey_reg(), shadow_pkey_reg); 338 339 trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 340 ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 341 fpregs = (char *) uctxt->uc_mcontext.fpregs; 342 343 dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", 344 __func__, trapno, ip, si_code_str(si->si_code), 345 si->si_code); 346 347#if defined(__i386__) || defined(__x86_64__) /* arch */ 348#ifdef __i386__ 349 /* 350 * 32-bit has some extra padding so that userspace can tell whether 351 * the XSTATE header is present in addition to the "legacy" FPU 352 * state. We just assume that it is here. 353 */ 354 fpregs += 0x70; 355#endif /* i386 */ 356 pkey_reg_offset = pkey_reg_xstate_offset(); 357 pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); 358 359 /* 360 * If we got a PKEY fault, we *HAVE* to have at least one bit set in 361 * here. 362 */ 363 dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); 364 if (DEBUG_LEVEL > 4) 365 dump_mem(pkey_reg_ptr - 128, 256); 366 pkey_assert(*pkey_reg_ptr); 367#endif /* arch */ 368 369 dprintf1("siginfo: %p\n", si); 370 dprintf1(" fpregs: %p\n", fpregs); 371 372 if ((si->si_code == SEGV_MAPERR) || 373 (si->si_code == SEGV_ACCERR) || 374 (si->si_code == SEGV_BNDERR)) { 375 printf("non-PK si_code, exiting...\n"); 376 exit(4); 377 } 378 379 si_pkey_ptr = siginfo_get_pkey_ptr(si); 380 dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); 381 dump_mem((u8 *)si_pkey_ptr - 8, 24); 382 siginfo_pkey = *si_pkey_ptr; 383 pkey_assert(siginfo_pkey < NR_PKEYS); 384 last_si_pkey = siginfo_pkey; 385 386 /* 387 * need __read_pkey_reg() version so we do not do shadow_pkey_reg 388 * checking 389 */ 390 dprintf1("signal pkey_reg from pkey_reg: %016llx\n", 391 __read_pkey_reg()); 392 dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey); 393#if defined(__i386__) || defined(__x86_64__) /* arch */ 394 dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); 395 *(u64 *)pkey_reg_ptr = 0x00000000; 396 dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n"); 397#elif defined(__powerpc64__) /* arch */ 398 /* restore access and let the faulting instruction continue */ 399 pkey_access_allow(siginfo_pkey); 400#endif /* arch */ 401 pkey_faults++; 402 dprintf1("<<<<==================================================\n"); 403 dprint_in_signal = 0; 404} 405 406int wait_all_children(void) 407{ 408 int status; 409 return waitpid(-1, &status, 0); 410} 411 412void sig_chld(int x) 413{ 414 dprint_in_signal = 1; 415 dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); 416 dprint_in_signal = 0; 417} 418 419void setup_sigsegv_handler(void) 420{ 421 int r, rs; 422 struct sigaction newact; 423 struct sigaction oldact; 424 425 /* #PF is mapped to sigsegv */ 426 int signum = SIGSEGV; 427 428 newact.sa_handler = 0; 429 newact.sa_sigaction = signal_handler; 430 431 /*sigset_t - signals to block while in the handler */ 432 /* get the old signal mask. */ 433 rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); 434 pkey_assert(rs == 0); 435 436 /* call sa_sigaction, not sa_handler*/ 437 newact.sa_flags = SA_SIGINFO; 438 439 newact.sa_restorer = 0; /* void(*)(), obsolete */ 440 r = sigaction(signum, &newact, &oldact); 441 r = sigaction(SIGALRM, &newact, &oldact); 442 pkey_assert(r == 0); 443} 444 445void setup_handlers(void) 446{ 447 signal(SIGCHLD, &sig_chld); 448 setup_sigsegv_handler(); 449} 450 451pid_t fork_lazy_child(void) 452{ 453 pid_t forkret; 454 455 forkret = fork(); 456 pkey_assert(forkret >= 0); 457 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 458 459 if (!forkret) { 460 /* in the child */ 461 while (1) { 462 dprintf1("child sleeping...\n"); 463 sleep(30); 464 } 465 } 466 return forkret; 467} 468 469int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 470 unsigned long pkey) 471{ 472 int sret; 473 474 dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, 475 ptr, size, orig_prot, pkey); 476 477 errno = 0; 478 sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); 479 if (errno) { 480 dprintf2("SYS_mprotect_key sret: %d\n", sret); 481 dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); 482 dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); 483 if (DEBUG_LEVEL >= 2) 484 perror("SYS_mprotect_pkey"); 485 } 486 return sret; 487} 488 489int sys_pkey_alloc(unsigned long flags, unsigned long init_val) 490{ 491 int ret = syscall(SYS_pkey_alloc, flags, init_val); 492 dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", 493 __func__, flags, init_val, ret, errno); 494 return ret; 495} 496 497int alloc_pkey(void) 498{ 499 int ret; 500 unsigned long init_val = 0x0; 501 502 dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", 503 __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); 504 ret = sys_pkey_alloc(0, init_val); 505 /* 506 * pkey_alloc() sets PKEY register, so we need to reflect it in 507 * shadow_pkey_reg: 508 */ 509 dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 510 " shadow: 0x%016llx\n", 511 __func__, __LINE__, ret, __read_pkey_reg(), 512 shadow_pkey_reg); 513 if (ret) { 514 /* clear both the bits: */ 515 shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 516 ~PKEY_MASK); 517 dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 518 " shadow: 0x%016llx\n", 519 __func__, 520 __LINE__, ret, __read_pkey_reg(), 521 shadow_pkey_reg); 522 /* 523 * move the new state in from init_val 524 * (remember, we cheated and init_val == pkey_reg format) 525 */ 526 shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret, 527 init_val); 528 } 529 dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 530 " shadow: 0x%016llx\n", 531 __func__, __LINE__, ret, __read_pkey_reg(), 532 shadow_pkey_reg); 533 dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); 534 /* for shadow checking: */ 535 read_pkey_reg(); 536 dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx" 537 " shadow: 0x%016llx\n", 538 __func__, __LINE__, ret, __read_pkey_reg(), 539 shadow_pkey_reg); 540 return ret; 541} 542 543int sys_pkey_free(unsigned long pkey) 544{ 545 int ret = syscall(SYS_pkey_free, pkey); 546 dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); 547 return ret; 548} 549 550/* 551 * I had a bug where pkey bits could be set by mprotect() but 552 * not cleared. This ensures we get lots of random bit sets 553 * and clears on the vma and pte pkey bits. 554 */ 555int alloc_random_pkey(void) 556{ 557 int max_nr_pkey_allocs; 558 int ret; 559 int i; 560 int alloced_pkeys[NR_PKEYS]; 561 int nr_alloced = 0; 562 int random_index; 563 memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); 564 srand((unsigned int)time(NULL)); 565 566 /* allocate every possible key and make a note of which ones we got */ 567 max_nr_pkey_allocs = NR_PKEYS; 568 for (i = 0; i < max_nr_pkey_allocs; i++) { 569 int new_pkey = alloc_pkey(); 570 if (new_pkey < 0) 571 break; 572 alloced_pkeys[nr_alloced++] = new_pkey; 573 } 574 575 pkey_assert(nr_alloced > 0); 576 /* select a random one out of the allocated ones */ 577 random_index = rand() % nr_alloced; 578 ret = alloced_pkeys[random_index]; 579 /* now zero it out so we don't free it next */ 580 alloced_pkeys[random_index] = 0; 581 582 /* go through the allocated ones that we did not want and free them */ 583 for (i = 0; i < nr_alloced; i++) { 584 int free_ret; 585 if (!alloced_pkeys[i]) 586 continue; 587 free_ret = sys_pkey_free(alloced_pkeys[i]); 588 pkey_assert(!free_ret); 589 } 590 dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 591 " shadow: 0x%016llx\n", __func__, 592 __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 593 return ret; 594} 595 596int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 597 unsigned long pkey) 598{ 599 int nr_iterations = random() % 100; 600 int ret; 601 602 while (0) { 603 int rpkey = alloc_random_pkey(); 604 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 605 dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 606 ptr, size, orig_prot, pkey, ret); 607 if (nr_iterations-- < 0) 608 break; 609 610 dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 611 " shadow: 0x%016llx\n", 612 __func__, __LINE__, ret, __read_pkey_reg(), 613 shadow_pkey_reg); 614 sys_pkey_free(rpkey); 615 dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 616 " shadow: 0x%016llx\n", 617 __func__, __LINE__, ret, __read_pkey_reg(), 618 shadow_pkey_reg); 619 } 620 pkey_assert(pkey < NR_PKEYS); 621 622 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 623 dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 624 ptr, size, orig_prot, pkey, ret); 625 pkey_assert(!ret); 626 dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" 627 " shadow: 0x%016llx\n", __func__, 628 __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); 629 return ret; 630} 631 632struct pkey_malloc_record { 633 void *ptr; 634 long size; 635 int prot; 636}; 637struct pkey_malloc_record *pkey_malloc_records; 638struct pkey_malloc_record *pkey_last_malloc_record; 639long nr_pkey_malloc_records; 640void record_pkey_malloc(void *ptr, long size, int prot) 641{ 642 long i; 643 struct pkey_malloc_record *rec = NULL; 644 645 for (i = 0; i < nr_pkey_malloc_records; i++) { 646 rec = &pkey_malloc_records[i]; 647 /* find a free record */ 648 if (rec) 649 break; 650 } 651 if (!rec) { 652 /* every record is full */ 653 size_t old_nr_records = nr_pkey_malloc_records; 654 size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); 655 size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); 656 dprintf2("new_nr_records: %zd\n", new_nr_records); 657 dprintf2("new_size: %zd\n", new_size); 658 pkey_malloc_records = realloc(pkey_malloc_records, new_size); 659 pkey_assert(pkey_malloc_records != NULL); 660 rec = &pkey_malloc_records[nr_pkey_malloc_records]; 661 /* 662 * realloc() does not initialize memory, so zero it from 663 * the first new record all the way to the end. 664 */ 665 for (i = 0; i < new_nr_records - old_nr_records; i++) 666 memset(rec + i, 0, sizeof(*rec)); 667 } 668 dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", 669 (int)(rec - pkey_malloc_records), rec, ptr, size); 670 rec->ptr = ptr; 671 rec->size = size; 672 rec->prot = prot; 673 pkey_last_malloc_record = rec; 674 nr_pkey_malloc_records++; 675} 676 677void free_pkey_malloc(void *ptr) 678{ 679 long i; 680 int ret; 681 dprintf3("%s(%p)\n", __func__, ptr); 682 for (i = 0; i < nr_pkey_malloc_records; i++) { 683 struct pkey_malloc_record *rec = &pkey_malloc_records[i]; 684 dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", 685 ptr, i, rec, rec->ptr, rec->size); 686 if ((ptr < rec->ptr) || 687 (ptr >= rec->ptr + rec->size)) 688 continue; 689 690 dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", 691 ptr, i, rec, rec->ptr, rec->size); 692 nr_pkey_malloc_records--; 693 ret = munmap(rec->ptr, rec->size); 694 dprintf3("munmap ret: %d\n", ret); 695 pkey_assert(!ret); 696 dprintf3("clearing rec->ptr, rec: %p\n", rec); 697 rec->ptr = NULL; 698 dprintf3("done clearing rec->ptr, rec: %p\n", rec); 699 return; 700 } 701 pkey_assert(false); 702} 703 704 705void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) 706{ 707 void *ptr; 708 int ret; 709 710 read_pkey_reg(); 711 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 712 size, prot, pkey); 713 pkey_assert(pkey < NR_PKEYS); 714 ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 715 pkey_assert(ptr != (void *)-1); 716 ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 717 pkey_assert(!ret); 718 record_pkey_malloc(ptr, size, prot); 719 read_pkey_reg(); 720 721 dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 722 return ptr; 723} 724 725void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) 726{ 727 int ret; 728 void *ptr; 729 730 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 731 size, prot, pkey); 732 /* 733 * Guarantee we can fit at least one huge page in the resulting 734 * allocation by allocating space for 2: 735 */ 736 size = ALIGN_UP(size, HPAGE_SIZE * 2); 737 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 738 pkey_assert(ptr != (void *)-1); 739 record_pkey_malloc(ptr, size, prot); 740 mprotect_pkey(ptr, size, prot, pkey); 741 742 dprintf1("unaligned ptr: %p\n", ptr); 743 ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); 744 dprintf1(" aligned ptr: %p\n", ptr); 745 ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); 746 dprintf1("MADV_HUGEPAGE ret: %d\n", ret); 747 ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); 748 dprintf1("MADV_WILLNEED ret: %d\n", ret); 749 memset(ptr, 0, HPAGE_SIZE); 750 751 dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); 752 return ptr; 753} 754 755int hugetlb_setup_ok; 756#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" 757#define GET_NR_HUGE_PAGES 10 758void setup_hugetlbfs(void) 759{ 760 int err; 761 int fd; 762 char buf[256]; 763 long hpagesz_kb; 764 long hpagesz_mb; 765 766 if (geteuid() != 0) { 767 fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); 768 return; 769 } 770 771 cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); 772 773 /* 774 * Now go make sure that we got the pages and that they 775 * are PMD-level pages. Someone might have made PUD-level 776 * pages the default. 777 */ 778 hpagesz_kb = HPAGE_SIZE / 1024; 779 hpagesz_mb = hpagesz_kb / 1024; 780 sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb); 781 fd = open(buf, O_RDONLY); 782 if (fd < 0) { 783 fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n", 784 hpagesz_mb, strerror(errno)); 785 return; 786 } 787 788 /* -1 to guarantee leaving the trailing \0 */ 789 err = read(fd, buf, sizeof(buf)-1); 790 close(fd); 791 if (err <= 0) { 792 fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n", 793 hpagesz_mb, strerror(errno)); 794 return; 795 } 796 797 if (atoi(buf) != GET_NR_HUGE_PAGES) { 798 fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n", 799 hpagesz_mb, buf, GET_NR_HUGE_PAGES); 800 return; 801 } 802 803 hugetlb_setup_ok = 1; 804} 805 806void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) 807{ 808 void *ptr; 809 int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; 810 811 if (!hugetlb_setup_ok) 812 return PTR_ERR_ENOTSUP; 813 814 dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); 815 size = ALIGN_UP(size, HPAGE_SIZE * 2); 816 pkey_assert(pkey < NR_PKEYS); 817 ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); 818 pkey_assert(ptr != (void *)-1); 819 mprotect_pkey(ptr, size, prot, pkey); 820 821 record_pkey_malloc(ptr, size, prot); 822 823 dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 824 return ptr; 825} 826 827void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) 828{ 829 void *ptr; 830 int fd; 831 832 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 833 size, prot, pkey); 834 pkey_assert(pkey < NR_PKEYS); 835 fd = open("/dax/foo", O_RDWR); 836 pkey_assert(fd >= 0); 837 838 ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); 839 pkey_assert(ptr != (void *)-1); 840 841 mprotect_pkey(ptr, size, prot, pkey); 842 843 record_pkey_malloc(ptr, size, prot); 844 845 dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 846 close(fd); 847 return ptr; 848} 849 850void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { 851 852 malloc_pkey_with_mprotect, 853 malloc_pkey_with_mprotect_subpage, 854 malloc_pkey_anon_huge, 855 malloc_pkey_hugetlb 856/* can not do direct with the pkey_mprotect() API: 857 malloc_pkey_mmap_direct, 858 malloc_pkey_mmap_dax, 859*/ 860}; 861 862void *malloc_pkey(long size, int prot, u16 pkey) 863{ 864 void *ret; 865 static int malloc_type; 866 int nr_malloc_types = ARRAY_SIZE(pkey_malloc); 867 868 pkey_assert(pkey < NR_PKEYS); 869 870 while (1) { 871 pkey_assert(malloc_type < nr_malloc_types); 872 873 ret = pkey_malloc[malloc_type](size, prot, pkey); 874 pkey_assert(ret != (void *)-1); 875 876 malloc_type++; 877 if (malloc_type >= nr_malloc_types) 878 malloc_type = (random()%nr_malloc_types); 879 880 /* try again if the malloc_type we tried is unsupported */ 881 if (ret == PTR_ERR_ENOTSUP) 882 continue; 883 884 break; 885 } 886 887 dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, 888 size, prot, pkey, ret); 889 return ret; 890} 891 892int last_pkey_faults; 893#define UNKNOWN_PKEY -2 894void expected_pkey_fault(int pkey) 895{ 896 dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", 897 __func__, last_pkey_faults, pkey_faults); 898 dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 899 pkey_assert(last_pkey_faults + 1 == pkey_faults); 900 901 /* 902 * For exec-only memory, we do not know the pkey in 903 * advance, so skip this check. 904 */ 905 if (pkey != UNKNOWN_PKEY) 906 pkey_assert(last_si_pkey == pkey); 907 908#if defined(__i386__) || defined(__x86_64__) /* arch */ 909 /* 910 * The signal handler shold have cleared out PKEY register to let the 911 * test program continue. We now have to restore it. 912 */ 913 if (__read_pkey_reg() != 0) 914#else /* arch */ 915 if (__read_pkey_reg() != shadow_pkey_reg) 916#endif /* arch */ 917 pkey_assert(0); 918 919 __write_pkey_reg(shadow_pkey_reg); 920 dprintf1("%s() set pkey_reg=%016llx to restore state after signal " 921 "nuked it\n", __func__, shadow_pkey_reg); 922 last_pkey_faults = pkey_faults; 923 last_si_pkey = -1; 924} 925 926#define do_not_expect_pkey_fault(msg) do { \ 927 if (last_pkey_faults != pkey_faults) \ 928 dprintf0("unexpected PKey fault: %s\n", msg); \ 929 pkey_assert(last_pkey_faults == pkey_faults); \ 930} while (0) 931 932int test_fds[10] = { -1 }; 933int nr_test_fds; 934void __save_test_fd(int fd) 935{ 936 pkey_assert(fd >= 0); 937 pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); 938 test_fds[nr_test_fds] = fd; 939 nr_test_fds++; 940} 941 942int get_test_read_fd(void) 943{ 944 int test_fd = open("/etc/passwd", O_RDONLY); 945 __save_test_fd(test_fd); 946 return test_fd; 947} 948 949void close_test_fds(void) 950{ 951 int i; 952 953 for (i = 0; i < nr_test_fds; i++) { 954 if (test_fds[i] < 0) 955 continue; 956 close(test_fds[i]); 957 test_fds[i] = -1; 958 } 959 nr_test_fds = 0; 960} 961 962#define barrier() __asm__ __volatile__("": : :"memory") 963__attribute__((noinline)) int read_ptr(int *ptr) 964{ 965 /* 966 * Keep GCC from optimizing this away somehow 967 */ 968 barrier(); 969 return *ptr; 970} 971 972void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) 973{ 974 int i, err; 975 int max_nr_pkey_allocs; 976 int alloced_pkeys[NR_PKEYS]; 977 int nr_alloced = 0; 978 long size; 979 980 pkey_assert(pkey_last_malloc_record); 981 size = pkey_last_malloc_record->size; 982 /* 983 * This is a bit of a hack. But mprotect() requires 984 * huge-page-aligned sizes when operating on hugetlbfs. 985 * So, make sure that we use something that's a multiple 986 * of a huge page when we can. 987 */ 988 if (size >= HPAGE_SIZE) 989 size = HPAGE_SIZE; 990 991 /* allocate every possible key and make sure key-0 never got allocated */ 992 max_nr_pkey_allocs = NR_PKEYS; 993 for (i = 0; i < max_nr_pkey_allocs; i++) { 994 int new_pkey = alloc_pkey(); 995 pkey_assert(new_pkey != 0); 996 997 if (new_pkey < 0) 998 break; 999 alloced_pkeys[nr_alloced++] = new_pkey; 1000 } 1001 /* free all the allocated keys */ 1002 for (i = 0; i < nr_alloced; i++) { 1003 int free_ret; 1004 1005 if (!alloced_pkeys[i]) 1006 continue; 1007 free_ret = sys_pkey_free(alloced_pkeys[i]); 1008 pkey_assert(!free_ret); 1009 } 1010 1011 /* attach key-0 in various modes */ 1012 err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); 1013 pkey_assert(!err); 1014 err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); 1015 pkey_assert(!err); 1016 err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); 1017 pkey_assert(!err); 1018 err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); 1019 pkey_assert(!err); 1020 err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); 1021 pkey_assert(!err); 1022} 1023 1024void test_read_of_write_disabled_region(int *ptr, u16 pkey) 1025{ 1026 int ptr_contents; 1027 1028 dprintf1("disabling write access to PKEY[1], doing read\n"); 1029 pkey_write_deny(pkey); 1030 ptr_contents = read_ptr(ptr); 1031 dprintf1("*ptr: %d\n", ptr_contents); 1032 dprintf1("\n"); 1033} 1034void test_read_of_access_disabled_region(int *ptr, u16 pkey) 1035{ 1036 int ptr_contents; 1037 1038 dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); 1039 read_pkey_reg(); 1040 pkey_access_deny(pkey); 1041 ptr_contents = read_ptr(ptr); 1042 dprintf1("*ptr: %d\n", ptr_contents); 1043 expected_pkey_fault(pkey); 1044} 1045 1046void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, 1047 u16 pkey) 1048{ 1049 int ptr_contents; 1050 1051 dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", 1052 pkey, ptr); 1053 ptr_contents = read_ptr(ptr); 1054 dprintf1("reading ptr before disabling the read : %d\n", 1055 ptr_contents); 1056 read_pkey_reg(); 1057 pkey_access_deny(pkey); 1058 ptr_contents = read_ptr(ptr); 1059 dprintf1("*ptr: %d\n", ptr_contents); 1060 expected_pkey_fault(pkey); 1061} 1062 1063void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, 1064 u16 pkey) 1065{ 1066 *ptr = __LINE__; 1067 dprintf1("disabling write access; after accessing the page, " 1068 "to PKEY[%02d], doing write\n", pkey); 1069 pkey_write_deny(pkey); 1070 *ptr = __LINE__; 1071 expected_pkey_fault(pkey); 1072} 1073 1074void test_write_of_write_disabled_region(int *ptr, u16 pkey) 1075{ 1076 dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); 1077 pkey_write_deny(pkey); 1078 *ptr = __LINE__; 1079 expected_pkey_fault(pkey); 1080} 1081void test_write_of_access_disabled_region(int *ptr, u16 pkey) 1082{ 1083 dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); 1084 pkey_access_deny(pkey); 1085 *ptr = __LINE__; 1086 expected_pkey_fault(pkey); 1087} 1088 1089void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, 1090 u16 pkey) 1091{ 1092 *ptr = __LINE__; 1093 dprintf1("disabling access; after accessing the page, " 1094 " to PKEY[%02d], doing write\n", pkey); 1095 pkey_access_deny(pkey); 1096 *ptr = __LINE__; 1097 expected_pkey_fault(pkey); 1098} 1099 1100void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) 1101{ 1102 int ret; 1103 int test_fd = get_test_read_fd(); 1104 1105 dprintf1("disabling access to PKEY[%02d], " 1106 "having kernel read() to buffer\n", pkey); 1107 pkey_access_deny(pkey); 1108 ret = read(test_fd, ptr, 1); 1109 dprintf1("read ret: %d\n", ret); 1110 pkey_assert(ret); 1111} 1112void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) 1113{ 1114 int ret; 1115 int test_fd = get_test_read_fd(); 1116 1117 pkey_write_deny(pkey); 1118 ret = read(test_fd, ptr, 100); 1119 dprintf1("read ret: %d\n", ret); 1120 if (ret < 0 && (DEBUG_LEVEL > 0)) 1121 perror("verbose read result (OK for this to be bad)"); 1122 pkey_assert(ret); 1123} 1124 1125void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) 1126{ 1127 int pipe_ret, vmsplice_ret; 1128 struct iovec iov; 1129 int pipe_fds[2]; 1130 1131 pipe_ret = pipe(pipe_fds); 1132 1133 pkey_assert(pipe_ret == 0); 1134 dprintf1("disabling access to PKEY[%02d], " 1135 "having kernel vmsplice from buffer\n", pkey); 1136 pkey_access_deny(pkey); 1137 iov.iov_base = ptr; 1138 iov.iov_len = PAGE_SIZE; 1139 vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); 1140 dprintf1("vmsplice() ret: %d\n", vmsplice_ret); 1141 pkey_assert(vmsplice_ret == -1); 1142 1143 close(pipe_fds[0]); 1144 close(pipe_fds[1]); 1145} 1146 1147void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) 1148{ 1149 int ignored = 0xdada; 1150 int futex_ret; 1151 int some_int = __LINE__; 1152 1153 dprintf1("disabling write to PKEY[%02d], " 1154 "doing futex gunk in buffer\n", pkey); 1155 *ptr = some_int; 1156 pkey_write_deny(pkey); 1157 futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, 1158 &ignored, ignored); 1159 if (DEBUG_LEVEL > 0) 1160 perror("futex"); 1161 dprintf1("futex() ret: %d\n", futex_ret); 1162} 1163 1164/* Assumes that all pkeys other than 'pkey' are unallocated */ 1165void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) 1166{ 1167 int err; 1168 int i; 1169 1170 /* Note: 0 is the default pkey, so don't mess with it */ 1171 for (i = 1; i < NR_PKEYS; i++) { 1172 if (pkey == i) 1173 continue; 1174 1175 dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); 1176 err = sys_pkey_free(i); 1177 pkey_assert(err); 1178 1179 err = sys_pkey_free(i); 1180 pkey_assert(err); 1181 1182 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); 1183 pkey_assert(err); 1184 } 1185} 1186 1187/* Assumes that all pkeys other than 'pkey' are unallocated */ 1188void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) 1189{ 1190 int err; 1191 int bad_pkey = NR_PKEYS+99; 1192 1193 /* pass a known-invalid pkey in: */ 1194 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); 1195 pkey_assert(err); 1196} 1197 1198void become_child(void) 1199{ 1200 pid_t forkret; 1201 1202 forkret = fork(); 1203 pkey_assert(forkret >= 0); 1204 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 1205 1206 if (!forkret) { 1207 /* in the child */ 1208 return; 1209 } 1210 exit(0); 1211} 1212 1213/* Assumes that all pkeys other than 'pkey' are unallocated */ 1214void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1215{ 1216 int err; 1217 int allocated_pkeys[NR_PKEYS] = {0}; 1218 int nr_allocated_pkeys = 0; 1219 int i; 1220 1221 for (i = 0; i < NR_PKEYS*3; i++) { 1222 int new_pkey; 1223 dprintf1("%s() alloc loop: %d\n", __func__, i); 1224 new_pkey = alloc_pkey(); 1225 dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx" 1226 " shadow: 0x%016llx\n", 1227 __func__, __LINE__, err, __read_pkey_reg(), 1228 shadow_pkey_reg); 1229 read_pkey_reg(); /* for shadow checking */ 1230 dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); 1231 if ((new_pkey == -1) && (errno == ENOSPC)) { 1232 dprintf2("%s() failed to allocate pkey after %d tries\n", 1233 __func__, nr_allocated_pkeys); 1234 } else { 1235 /* 1236 * Ensure the number of successes never 1237 * exceeds the number of keys supported 1238 * in the hardware. 1239 */ 1240 pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1241 allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1242 } 1243 1244 /* 1245 * Make sure that allocation state is properly 1246 * preserved across fork(). 1247 */ 1248 if (i == NR_PKEYS*2) 1249 become_child(); 1250 } 1251 1252 dprintf3("%s()::%d\n", __func__, __LINE__); 1253 1254 /* 1255 * On x86: 1256 * There are 16 pkeys supported in hardware. Three are 1257 * allocated by the time we get here: 1258 * 1. The default key (0) 1259 * 2. One possibly consumed by an execute-only mapping. 1260 * 3. One allocated by the test code and passed in via 1261 * 'pkey' to this function. 1262 * Ensure that we can allocate at least another 13 (16-3). 1263 * 1264 * On powerpc: 1265 * There are either 5, 28, 29 or 32 pkeys supported in 1266 * hardware depending on the page size (4K or 64K) and 1267 * platform (powernv or powervm). Four are allocated by 1268 * the time we get here. These include pkey-0, pkey-1, 1269 * exec-only pkey and the one allocated by the test code. 1270 * Ensure that we can allocate the remaining. 1271 */ 1272 pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1)); 1273 1274 for (i = 0; i < nr_allocated_pkeys; i++) { 1275 err = sys_pkey_free(allocated_pkeys[i]); 1276 pkey_assert(!err); 1277 read_pkey_reg(); /* for shadow checking */ 1278 } 1279} 1280 1281/* 1282 * pkey 0 is special. It is allocated by default, so you do not 1283 * have to call pkey_alloc() to use it first. Make sure that it 1284 * is usable. 1285 */ 1286void test_mprotect_with_pkey_0(int *ptr, u16 pkey) 1287{ 1288 long size; 1289 int prot; 1290 1291 assert(pkey_last_malloc_record); 1292 size = pkey_last_malloc_record->size; 1293 /* 1294 * This is a bit of a hack. But mprotect() requires 1295 * huge-page-aligned sizes when operating on hugetlbfs. 1296 * So, make sure that we use something that's a multiple 1297 * of a huge page when we can. 1298 */ 1299 if (size >= HPAGE_SIZE) 1300 size = HPAGE_SIZE; 1301 prot = pkey_last_malloc_record->prot; 1302 1303 /* Use pkey 0 */ 1304 mprotect_pkey(ptr, size, prot, 0); 1305 1306 /* Make sure that we can set it back to the original pkey. */ 1307 mprotect_pkey(ptr, size, prot, pkey); 1308} 1309 1310void test_ptrace_of_child(int *ptr, u16 pkey) 1311{ 1312 __attribute__((__unused__)) int peek_result; 1313 pid_t child_pid; 1314 void *ignored = 0; 1315 long ret; 1316 int status; 1317 /* 1318 * This is the "control" for our little expermient. Make sure 1319 * we can always access it when ptracing. 1320 */ 1321 int *plain_ptr_unaligned = malloc(HPAGE_SIZE); 1322 int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); 1323 1324 /* 1325 * Fork a child which is an exact copy of this process, of course. 1326 * That means we can do all of our tests via ptrace() and then plain 1327 * memory access and ensure they work differently. 1328 */ 1329 child_pid = fork_lazy_child(); 1330 dprintf1("[%d] child pid: %d\n", getpid(), child_pid); 1331 1332 ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); 1333 if (ret) 1334 perror("attach"); 1335 dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); 1336 pkey_assert(ret != -1); 1337 ret = waitpid(child_pid, &status, WUNTRACED); 1338 if ((ret != child_pid) || !(WIFSTOPPED(status))) { 1339 fprintf(stderr, "weird waitpid result %ld stat %x\n", 1340 ret, status); 1341 pkey_assert(0); 1342 } 1343 dprintf2("waitpid ret: %ld\n", ret); 1344 dprintf2("waitpid status: %d\n", status); 1345 1346 pkey_access_deny(pkey); 1347 pkey_write_deny(pkey); 1348 1349 /* Write access, untested for now: 1350 ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); 1351 pkey_assert(ret != -1); 1352 dprintf1("poke at %p: %ld\n", peek_at, ret); 1353 */ 1354 1355 /* 1356 * Try to access the pkey-protected "ptr" via ptrace: 1357 */ 1358 ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); 1359 /* expect it to work, without an error: */ 1360 pkey_assert(ret != -1); 1361 /* Now access from the current task, and expect an exception: */ 1362 peek_result = read_ptr(ptr); 1363 expected_pkey_fault(pkey); 1364 1365 /* 1366 * Try to access the NON-pkey-protected "plain_ptr" via ptrace: 1367 */ 1368 ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); 1369 /* expect it to work, without an error: */ 1370 pkey_assert(ret != -1); 1371 /* Now access from the current task, and expect NO exception: */ 1372 peek_result = read_ptr(plain_ptr); 1373 do_not_expect_pkey_fault("read plain pointer after ptrace"); 1374 1375 ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1376 pkey_assert(ret != -1); 1377 1378 ret = kill(child_pid, SIGKILL); 1379 pkey_assert(ret != -1); 1380 1381 wait(&status); 1382 1383 free(plain_ptr_unaligned); 1384} 1385 1386void *get_pointer_to_instructions(void) 1387{ 1388 void *p1; 1389 1390 p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1391 dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1392 /* lots_o_noops_around_write should be page-aligned already */ 1393 assert(p1 == &lots_o_noops_around_write); 1394 1395 /* Point 'p1' at the *second* page of the function: */ 1396 p1 += PAGE_SIZE; 1397 1398 /* 1399 * Try to ensure we fault this in on next touch to ensure 1400 * we get an instruction fault as opposed to a data one 1401 */ 1402 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1403 1404 return p1; 1405} 1406 1407void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1408{ 1409 void *p1; 1410 int scratch; 1411 int ptr_contents; 1412 int ret; 1413 1414 p1 = get_pointer_to_instructions(); 1415 lots_o_noops_around_write(&scratch); 1416 ptr_contents = read_ptr(p1); 1417 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1418 1419 ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); 1420 pkey_assert(!ret); 1421 pkey_access_deny(pkey); 1422 1423 dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1424 1425 /* 1426 * Make sure this is an *instruction* fault 1427 */ 1428 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1429 lots_o_noops_around_write(&scratch); 1430 do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1431 expect_fault_on_read_execonly_key(p1, pkey); 1432} 1433 1434void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) 1435{ 1436 void *p1; 1437 int scratch; 1438 int ptr_contents; 1439 int ret; 1440 1441 dprintf1("%s() start\n", __func__); 1442 1443 p1 = get_pointer_to_instructions(); 1444 lots_o_noops_around_write(&scratch); 1445 ptr_contents = read_ptr(p1); 1446 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1447 1448 /* Use a *normal* mprotect(), not mprotect_pkey(): */ 1449 ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); 1450 pkey_assert(!ret); 1451 1452 dprintf2("pkey_reg: %016llx\n", read_pkey_reg()); 1453 1454 /* Make sure this is an *instruction* fault */ 1455 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1456 lots_o_noops_around_write(&scratch); 1457 do_not_expect_pkey_fault("executing on PROT_EXEC memory"); 1458 expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY); 1459 1460 /* 1461 * Put the memory back to non-PROT_EXEC. Should clear the 1462 * exec-only pkey off the VMA and allow it to be readable 1463 * again. Go to PROT_NONE first to check for a kernel bug 1464 * that did not clear the pkey when doing PROT_NONE. 1465 */ 1466 ret = mprotect(p1, PAGE_SIZE, PROT_NONE); 1467 pkey_assert(!ret); 1468 1469 ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); 1470 pkey_assert(!ret); 1471 ptr_contents = read_ptr(p1); 1472 do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); 1473} 1474 1475void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1476{ 1477 int size = PAGE_SIZE; 1478 int sret; 1479 1480 if (cpu_has_pkeys()) { 1481 dprintf1("SKIP: %s: no CPU support\n", __func__); 1482 return; 1483 } 1484 1485 sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); 1486 pkey_assert(sret < 0); 1487} 1488 1489void (*pkey_tests[])(int *ptr, u16 pkey) = { 1490 test_read_of_write_disabled_region, 1491 test_read_of_access_disabled_region, 1492 test_read_of_access_disabled_region_with_page_already_mapped, 1493 test_write_of_write_disabled_region, 1494 test_write_of_write_disabled_region_with_page_already_mapped, 1495 test_write_of_access_disabled_region, 1496 test_write_of_access_disabled_region_with_page_already_mapped, 1497 test_kernel_write_of_access_disabled_region, 1498 test_kernel_write_of_write_disabled_region, 1499 test_kernel_gup_of_access_disabled_region, 1500 test_kernel_gup_write_to_write_disabled_region, 1501 test_executing_on_unreadable_memory, 1502 test_implicit_mprotect_exec_only_memory, 1503 test_mprotect_with_pkey_0, 1504 test_ptrace_of_child, 1505 test_pkey_syscalls_on_non_allocated_pkey, 1506 test_pkey_syscalls_bad_args, 1507 test_pkey_alloc_exhaust, 1508 test_pkey_alloc_free_attach_pkey0, 1509}; 1510 1511void run_tests_once(void) 1512{ 1513 int *ptr; 1514 int prot = PROT_READ|PROT_WRITE; 1515 1516 for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { 1517 int pkey; 1518 int orig_pkey_faults = pkey_faults; 1519 1520 dprintf1("======================\n"); 1521 dprintf1("test %d preparing...\n", test_nr); 1522 1523 tracing_on(); 1524 pkey = alloc_random_pkey(); 1525 dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); 1526 ptr = malloc_pkey(PAGE_SIZE, prot, pkey); 1527 dprintf1("test %d starting...\n", test_nr); 1528 pkey_tests[test_nr](ptr, pkey); 1529 dprintf1("freeing test memory: %p\n", ptr); 1530 free_pkey_malloc(ptr); 1531 sys_pkey_free(pkey); 1532 1533 dprintf1("pkey_faults: %d\n", pkey_faults); 1534 dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults); 1535 1536 tracing_off(); 1537 close_test_fds(); 1538 1539 printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); 1540 dprintf1("======================\n\n"); 1541 } 1542 iteration_nr++; 1543} 1544 1545void pkey_setup_shadow(void) 1546{ 1547 shadow_pkey_reg = __read_pkey_reg(); 1548} 1549 1550int main(void) 1551{ 1552 int nr_iterations = 22; 1553 int pkeys_supported = is_pkeys_supported(); 1554 1555 setup_handlers(); 1556 1557 printf("has pkeys: %d\n", pkeys_supported); 1558 1559 if (!pkeys_supported) { 1560 int size = PAGE_SIZE; 1561 int *ptr; 1562 1563 printf("running PKEY tests for unsupported CPU/OS\n"); 1564 1565 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1566 assert(ptr != (void *)-1); 1567 test_mprotect_pkey_on_unsupported_cpu(ptr, 1); 1568 exit(0); 1569 } 1570 1571 pkey_setup_shadow(); 1572 printf("startup pkey_reg: %016llx\n", read_pkey_reg()); 1573 setup_hugetlbfs(); 1574 1575 while (nr_iterations-- > 0) 1576 run_tests_once(); 1577 1578 printf("done (all tests OK)\n"); 1579 return 0; 1580}