Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: selftests: allow using UFFD minor faults for demand paging

UFFD handling of MINOR faults is a new feature whose use case is to
speed up demand paging (compared to MISSING faults). So, it's
interesting to let this selftest exercise this new mode.

Modify the demand paging test to have the option of using UFFD minor
faults, as opposed to missing faults. Now, when turning on userfaultfd
with '-u', the desired mode has to be specified ("MISSING" or "MINOR").

If we're in minor mode, before registering, prefault via the *alias*.
This way, the guest will trigger minor faults, instead of missing
faults, and we can UFFDIO_CONTINUE to resolve them.

Modify the page fault handler function to use the right ioctl depending
on the mode we're running in. In MINOR mode, use UFFDIO_CONTINUE.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-10-axelrasmussen@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Axel Rasmussen and committed by
Paolo Bonzini
a4b9722a 94f3f2b3

+80 -34
+80 -34
tools/testing/selftests/kvm/demand_paging_test.c
··· 73 73 return NULL; 74 74 } 75 75 76 - static int handle_uffd_page_request(int uffd, uint64_t addr) 76 + static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) 77 77 { 78 - pid_t tid; 78 + pid_t tid = syscall(__NR_gettid); 79 79 struct timespec start; 80 80 struct timespec ts_diff; 81 - struct uffdio_copy copy; 82 81 int r; 83 - 84 - tid = syscall(__NR_gettid); 85 - 86 - copy.src = (uint64_t)guest_data_prototype; 87 - copy.dst = addr; 88 - copy.len = demand_paging_size; 89 - copy.mode = 0; 90 82 91 83 clock_gettime(CLOCK_MONOTONIC, &start); 92 84 93 - r = ioctl(uffd, UFFDIO_COPY, &copy); 94 - if (r == -1) { 95 - pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", 96 - addr, tid, errno); 97 - return r; 85 + if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { 86 + struct uffdio_copy copy; 87 + 88 + copy.src = (uint64_t)guest_data_prototype; 89 + copy.dst = addr; 90 + copy.len = demand_paging_size; 91 + copy.mode = 0; 92 + 93 + r = ioctl(uffd, UFFDIO_COPY, &copy); 94 + if (r == -1) { 95 + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", 96 + addr, tid, errno); 97 + return r; 98 + } 99 + } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 100 + struct uffdio_continue cont = {0}; 101 + 102 + cont.range.start = addr; 103 + cont.range.len = demand_paging_size; 104 + 105 + r = ioctl(uffd, UFFDIO_CONTINUE, &cont); 106 + if (r == -1) { 107 + pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", 108 + addr, tid, errno); 109 + return r; 110 + } 111 + } else { 112 + TEST_FAIL("Invalid uffd mode %d", uffd_mode); 98 113 } 99 114 100 115 ts_diff = timespec_elapsed(start); 101 116 102 - PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, 117 + PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, 103 118 timespec_to_ns(ts_diff)); 104 119 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 105 120 demand_paging_size, addr, tid); ··· 125 110 bool quit_uffd_thread; 126 111 127 112 struct uffd_handler_args { 113 + int uffd_mode; 128 114 int uffd; 129 115 int pipefd; 130 116 useconds_t delay; ··· 202 186 if (delay) 203 187 usleep(delay); 204 188 addr = msg.arg.pagefault.address; 205 - r = handle_uffd_page_request(uffd, addr); 189 + r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); 206 190 if (r < 0) 207 191 return NULL; 208 192 pages++; ··· 218 202 219 203 static void setup_demand_paging(struct kvm_vm *vm, 220 204 pthread_t *uffd_handler_thread, int pipefd, 221 - useconds_t uffd_delay, 205 + int uffd_mode, useconds_t uffd_delay, 222 206 struct uffd_handler_args *uffd_args, 223 - void *hva, uint64_t len) 207 + void *hva, void *alias, uint64_t len) 224 208 { 209 + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); 225 210 int uffd; 226 211 struct uffdio_api uffdio_api; 227 212 struct uffdio_register uffdio_register; 213 + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 214 + 215 + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 216 + is_minor ? "MINOR" : "MISSING", 217 + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 218 + 219 + /* In order to get minor faults, prefault via the alias. */ 220 + if (is_minor) { 221 + size_t p; 222 + 223 + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; 224 + 225 + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 226 + for (p = 0; p < (len / demand_paging_size); ++p) { 227 + memcpy(alias + (p * demand_paging_size), 228 + guest_data_prototype, demand_paging_size); 229 + } 230 + } 228 231 229 232 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 230 233 TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); ··· 256 221 257 222 uffdio_register.range.start = (uint64_t)hva; 258 223 uffdio_register.range.len = len; 259 - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 224 + uffdio_register.mode = uffd_mode; 260 225 TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, 261 226 "ioctl UFFDIO_REGISTER failed"); 262 - TEST_ASSERT((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) == 263 - UFFD_API_RANGE_IOCTLS, "unexpected userfaultfd ioctl set"); 227 + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 228 + expected_ioctls, "missing userfaultfd ioctls"); 264 229 230 + uffd_args->uffd_mode = uffd_mode; 265 231 uffd_args->uffd = uffd; 266 232 uffd_args->pipefd = pipefd; 267 233 uffd_args->delay = uffd_delay; ··· 274 238 } 275 239 276 240 struct test_params { 277 - bool use_uffd; 241 + int uffd_mode; 278 242 useconds_t uffd_delay; 279 243 enum vm_mem_backing_src_type src_type; 280 244 bool partition_vcpu_memory_access; ··· 311 275 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, 312 276 p->partition_vcpu_memory_access); 313 277 314 - if (p->use_uffd) { 278 + if (p->uffd_mode) { 315 279 uffd_handler_threads = 316 280 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 317 281 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); ··· 325 289 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 326 290 vm_paddr_t vcpu_gpa; 327 291 void *vcpu_hva; 292 + void *vcpu_alias; 328 293 uint64_t vcpu_mem_size; 329 294 330 295 ··· 340 303 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 341 304 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); 342 305 343 - /* Cache the HVA pointer of the region */ 306 + /* Cache the host addresses of the region */ 344 307 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 308 + vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); 345 309 346 310 /* 347 311 * Set up user fault fd to handle demand paging ··· 353 315 TEST_ASSERT(!r, "Failed to set up pipefd"); 354 316 355 317 setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], 356 - pipefds[vcpu_id * 2], p->uffd_delay, 357 - &uffd_args[vcpu_id], vcpu_hva, 318 + pipefds[vcpu_id * 2], p->uffd_mode, 319 + p->uffd_delay, &uffd_args[vcpu_id], 320 + vcpu_hva, vcpu_alias, 358 321 vcpu_mem_size); 359 322 } 360 323 } ··· 384 345 385 346 pr_info("All vCPU threads joined\n"); 386 347 387 - if (p->use_uffd) { 348 + if (p->uffd_mode) { 388 349 char c; 389 350 390 351 /* Tell the user fault fd handler threads to quit */ ··· 406 367 407 368 free(guest_data_prototype); 408 369 free(vcpu_threads); 409 - if (p->use_uffd) { 370 + if (p->uffd_mode) { 410 371 free(uffd_handler_threads); 411 372 free(uffd_args); 412 373 free(pipefds); ··· 416 377 static void help(char *name) 417 378 { 418 379 puts(""); 419 - printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" 380 + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" 420 381 " [-b memory] [-t type] [-v vcpus] [-o]\n", name); 421 382 guest_modes_help(); 422 - printf(" -u: use User Fault FD to handle vCPU page\n" 423 - " faults.\n"); 383 + printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" 384 + " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); 424 385 printf(" -d: add a delay in usec to the User Fault\n" 425 386 " FD handler to simulate demand paging\n" 426 387 " overheads. Ignored without -u.\n"); ··· 447 408 448 409 guest_modes_append_default(); 449 410 450 - while ((opt = getopt(argc, argv, "hm:ud:b:t:v:o")) != -1) { 411 + while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) { 451 412 switch (opt) { 452 413 case 'm': 453 414 guest_modes_cmdline(optarg); 454 415 break; 455 416 case 'u': 456 - p.use_uffd = true; 417 + if (!strcmp("MISSING", optarg)) 418 + p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; 419 + else if (!strcmp("MINOR", optarg)) 420 + p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; 421 + TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); 457 422 break; 458 423 case 'd': 459 424 p.uffd_delay = strtoul(optarg, NULL, 0); ··· 483 440 break; 484 441 } 485 442 } 443 + 444 + TEST_ASSERT(p.uffd_mode != UFFDIO_REGISTER_MODE_MINOR || p.src_type == VM_MEM_SRC_SHMEM, 445 + "userfaultfd MINOR mode requires shared memory; pick a different -t"); 486 446 487 447 for_each_guest_mode(run_test, &p); 488 448