Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: selftests: Add a userfaultfd library

Move the generic userfaultfd code out of demand_paging_test.c into a
common library, userfaultfd_util. This library consists of a setup and a
stop function. The setup function starts a thread for handling page
faults using the handler callback function. This setup returns a
uffd_desc object which is then used in the stop function (to wait and
destroy the threads).

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-2-ricarkol@google.com

authored by

Ricardo Koller and committed by
Marc Zyngier
a93871d0 30a0b95b

+263 -199
+1
tools/testing/selftests/kvm/Makefile
··· 47 47 LIBKVM += lib/rbtree.c 48 48 LIBKVM += lib/sparsebit.c 49 49 LIBKVM += lib/test_util.c 50 + LIBKVM += lib/userfaultfd_util.c 50 51 51 52 LIBKVM_STRING += lib/string_override.c 52 53
+31 -199
tools/testing/selftests/kvm/demand_paging_test.c
··· 22 22 #include "test_util.h" 23 23 #include "perf_test_util.h" 24 24 #include "guest_modes.h" 25 + #include "userfaultfd_util.h" 25 26 26 27 #ifdef __NR_userfaultfd 27 28 28 - #ifdef PRINT_PER_PAGE_UPDATES 29 - #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 30 - #else 31 - #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 32 - #endif 33 - 34 - #ifdef PRINT_PER_VCPU_UPDATES 35 - #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 36 - #else 37 - #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 38 - #endif 39 - 40 29 static int nr_vcpus = 1; 41 30 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 31 + 42 32 static size_t demand_paging_size; 43 33 static char *guest_data_prototype; 44 34 ··· 57 67 ts_diff.tv_sec, ts_diff.tv_nsec); 58 68 } 59 69 60 - static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) 70 + static int handle_uffd_page_request(int uffd_mode, int uffd, 71 + struct uffd_msg *msg) 61 72 { 62 73 pid_t tid = syscall(__NR_gettid); 74 + uint64_t addr = msg->arg.pagefault.address; 63 75 struct timespec start; 64 76 struct timespec ts_diff; 65 77 int r; ··· 108 116 return 0; 109 117 } 110 118 111 - bool quit_uffd_thread; 112 - 113 - struct uffd_handler_args { 114 - int uffd_mode; 115 - int uffd; 116 - int pipefd; 117 - useconds_t delay; 118 - }; 119 - 120 - static void *uffd_handler_thread_fn(void *arg) 121 - { 122 - struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 123 - int uffd = uffd_args->uffd; 124 - int pipefd = uffd_args->pipefd; 125 - useconds_t delay = uffd_args->delay; 126 - int64_t pages = 0; 127 - struct timespec start; 128 - struct timespec ts_diff; 129 - 130 - clock_gettime(CLOCK_MONOTONIC, &start); 131 - while (!quit_uffd_thread) { 132 - struct uffd_msg msg; 133 - struct pollfd pollfd[2]; 134 - char tmp_chr; 135 - int r; 136 - uint64_t addr; 137 - 138 - pollfd[0].fd = uffd; 139 - pollfd[0].events = POLLIN; 140 - pollfd[1].fd = pipefd; 141 - pollfd[1].events = POLLIN; 142 - 143 - r = poll(pollfd, 2, -1); 144 - switch (r) { 145 - case -1: 146 - pr_info("poll err"); 147 - continue; 148 - case 0: 149 - continue; 150 - case 1: 151 - break; 152 - default: 153 - pr_info("Polling uffd returned %d", r); 154 - return NULL; 155 - } 156 - 157 - if (pollfd[0].revents & POLLERR) { 158 - pr_info("uffd revents has POLLERR"); 159 - return NULL; 160 - } 161 - 162 - if (pollfd[1].revents & POLLIN) { 163 - r = read(pollfd[1].fd, &tmp_chr, 1); 164 - TEST_ASSERT(r == 1, 165 - "Error reading pipefd in UFFD thread\n"); 166 - return NULL; 167 - } 168 - 169 - if (!(pollfd[0].revents & POLLIN)) 170 - continue; 171 - 172 - r = read(uffd, &msg, sizeof(msg)); 173 - if (r == -1) { 174 - if (errno == EAGAIN) 175 - continue; 176 - pr_info("Read of uffd got errno %d\n", errno); 177 - return NULL; 178 - } 179 - 180 - if (r != sizeof(msg)) { 181 - pr_info("Read on uffd returned unexpected size: %d bytes", r); 182 - return NULL; 183 - } 184 - 185 - if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 186 - continue; 187 - 188 - if (delay) 189 - usleep(delay); 190 - addr = msg.arg.pagefault.address; 191 - r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); 192 - if (r < 0) 193 - return NULL; 194 - pages++; 195 - } 196 - 197 - ts_diff = timespec_elapsed(start); 198 - PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 199 - pages, ts_diff.tv_sec, ts_diff.tv_nsec, 200 - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 201 - 202 - return NULL; 203 - } 204 - 205 - static void setup_demand_paging(struct kvm_vm *vm, 206 - pthread_t *uffd_handler_thread, int pipefd, 207 - int uffd_mode, useconds_t uffd_delay, 208 - struct uffd_handler_args *uffd_args, 209 - void *hva, void *alias, uint64_t len) 210 - { 211 - bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); 212 - int uffd; 213 - struct uffdio_api uffdio_api; 214 - struct uffdio_register uffdio_register; 215 - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 216 - int ret; 217 - 218 - PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 219 - is_minor ? "MINOR" : "MISSING", 220 - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 221 - 222 - /* In order to get minor faults, prefault via the alias. */ 223 - if (is_minor) { 224 - size_t p; 225 - 226 - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; 227 - 228 - TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 229 - for (p = 0; p < (len / demand_paging_size); ++p) { 230 - memcpy(alias + (p * demand_paging_size), 231 - guest_data_prototype, demand_paging_size); 232 - } 233 - } 234 - 235 - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 236 - TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); 237 - 238 - uffdio_api.api = UFFD_API; 239 - uffdio_api.features = 0; 240 - ret = ioctl(uffd, UFFDIO_API, &uffdio_api); 241 - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); 242 - 243 - uffdio_register.range.start = (uint64_t)hva; 244 - uffdio_register.range.len = len; 245 - uffdio_register.mode = uffd_mode; 246 - ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); 247 - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); 248 - TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 249 - expected_ioctls, "missing userfaultfd ioctls"); 250 - 251 - uffd_args->uffd_mode = uffd_mode; 252 - uffd_args->uffd = uffd; 253 - uffd_args->pipefd = pipefd; 254 - uffd_args->delay = uffd_delay; 255 - pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 256 - uffd_args); 257 - 258 - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 259 - hva, hva + len); 260 - } 261 - 262 119 struct test_params { 263 120 int uffd_mode; 264 121 useconds_t uffd_delay; ··· 115 274 bool partition_vcpu_memory_access; 116 275 }; 117 276 277 + static void prefault_mem(void *alias, uint64_t len) 278 + { 279 + size_t p; 280 + 281 + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 282 + for (p = 0; p < (len / demand_paging_size); ++p) { 283 + memcpy(alias + (p * demand_paging_size), 284 + guest_data_prototype, demand_paging_size); 285 + } 286 + } 287 + 118 288 static void run_test(enum vm_guest_mode mode, void *arg) 119 289 { 120 290 struct test_params *p = arg; 121 - pthread_t *uffd_handler_threads = NULL; 122 - struct uffd_handler_args *uffd_args = NULL; 291 + struct uffd_desc **uffd_descs = NULL; 123 292 struct timespec start; 124 293 struct timespec ts_diff; 125 - int *pipefds = NULL; 126 294 struct kvm_vm *vm; 127 - int r, i; 295 + int i; 128 296 129 297 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, 130 298 p->src_type, p->partition_vcpu_memory_access); ··· 146 296 memset(guest_data_prototype, 0xAB, demand_paging_size); 147 297 148 298 if (p->uffd_mode) { 149 - uffd_handler_threads = 150 - malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 151 - TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 152 - 153 - uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 154 - TEST_ASSERT(uffd_args, "Memory allocation failed"); 155 - 156 - pipefds = malloc(sizeof(int) * nr_vcpus * 2); 157 - TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 299 + uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); 300 + TEST_ASSERT(uffd_descs, "Memory allocation failed"); 158 301 159 302 for (i = 0; i < nr_vcpus; i++) { 160 303 struct perf_test_vcpu_args *vcpu_args; ··· 160 317 vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); 161 318 vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); 162 319 320 + prefault_mem(vcpu_alias, 321 + vcpu_args->pages * perf_test_args.guest_page_size); 322 + 163 323 /* 164 324 * Set up user fault fd to handle demand paging 165 325 * requests. 166 326 */ 167 - r = pipe2(&pipefds[i * 2], 168 - O_CLOEXEC | O_NONBLOCK); 169 - TEST_ASSERT(!r, "Failed to set up pipefd"); 170 - 171 - setup_demand_paging(vm, &uffd_handler_threads[i], 172 - pipefds[i * 2], p->uffd_mode, 173 - p->uffd_delay, &uffd_args[i], 174 - vcpu_hva, vcpu_alias, 175 - vcpu_args->pages * perf_test_args.guest_page_size); 327 + uffd_descs[i] = uffd_setup_demand_paging( 328 + p->uffd_mode, p->uffd_delay, vcpu_hva, 329 + vcpu_args->pages * perf_test_args.guest_page_size, 330 + &handle_uffd_page_request); 176 331 } 177 332 } 178 333 ··· 185 344 pr_info("All vCPU threads joined\n"); 186 345 187 346 if (p->uffd_mode) { 188 - char c; 189 - 190 347 /* Tell the user fault fd handler threads to quit */ 191 - for (i = 0; i < nr_vcpus; i++) { 192 - r = write(pipefds[i * 2 + 1], &c, 1); 193 - TEST_ASSERT(r == 1, "Unable to write to pipefd"); 194 - 195 - pthread_join(uffd_handler_threads[i], NULL); 196 - } 348 + for (i = 0; i < nr_vcpus; i++) 349 + uffd_stop_demand_paging(uffd_descs[i]); 197 350 } 198 351 199 352 pr_info("Total guest execution time: %ld.%.9lds\n", ··· 199 364 perf_test_destroy_vm(vm); 200 365 201 366 free(guest_data_prototype); 202 - if (p->uffd_mode) { 203 - free(uffd_handler_threads); 204 - free(uffd_args); 205 - free(pipefds); 206 - } 367 + if (p->uffd_mode) 368 + free(uffd_descs); 207 369 } 208 370 209 371 static void help(char *name)
+45
tools/testing/selftests/kvm/include/userfaultfd_util.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * KVM userfaultfd util 4 + * 5 + * Copyright (C) 2018, Red Hat, Inc. 6 + * Copyright (C) 2019-2022 Google LLC 7 + */ 8 + 9 + #define _GNU_SOURCE /* for pipe2 */ 10 + 11 + #include <inttypes.h> 12 + #include <time.h> 13 + #include <pthread.h> 14 + #include <linux/userfaultfd.h> 15 + 16 + #include "test_util.h" 17 + 18 + typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); 19 + 20 + struct uffd_desc { 21 + int uffd_mode; 22 + int uffd; 23 + int pipefds[2]; 24 + useconds_t delay; 25 + uffd_handler_t handler; 26 + pthread_t thread; 27 + }; 28 + 29 + struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, 30 + void *hva, uint64_t len, 31 + uffd_handler_t handler); 32 + 33 + void uffd_stop_demand_paging(struct uffd_desc *uffd); 34 + 35 + #ifdef PRINT_PER_PAGE_UPDATES 36 + #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 37 + #else 38 + #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 39 + #endif 40 + 41 + #ifdef PRINT_PER_VCPU_UPDATES 42 + #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 43 + #else 44 + #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 45 + #endif
+186
tools/testing/selftests/kvm/lib/userfaultfd_util.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * KVM userfaultfd util 4 + * Adapted from demand_paging_test.c 5 + * 6 + * Copyright (C) 2018, Red Hat, Inc. 7 + * Copyright (C) 2019-2022 Google LLC 8 + */ 9 + 10 + #define _GNU_SOURCE /* for pipe2 */ 11 + 12 + #include <inttypes.h> 13 + #include <stdio.h> 14 + #include <stdlib.h> 15 + #include <time.h> 16 + #include <poll.h> 17 + #include <pthread.h> 18 + #include <linux/userfaultfd.h> 19 + #include <sys/syscall.h> 20 + 21 + #include "kvm_util.h" 22 + #include "test_util.h" 23 + #include "perf_test_util.h" 24 + #include "userfaultfd_util.h" 25 + 26 + #ifdef __NR_userfaultfd 27 + 28 + static void *uffd_handler_thread_fn(void *arg) 29 + { 30 + struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; 31 + int uffd = uffd_desc->uffd; 32 + int pipefd = uffd_desc->pipefds[0]; 33 + useconds_t delay = uffd_desc->delay; 34 + int64_t pages = 0; 35 + struct timespec start; 36 + struct timespec ts_diff; 37 + 38 + clock_gettime(CLOCK_MONOTONIC, &start); 39 + while (1) { 40 + struct uffd_msg msg; 41 + struct pollfd pollfd[2]; 42 + char tmp_chr; 43 + int r; 44 + 45 + pollfd[0].fd = uffd; 46 + pollfd[0].events = POLLIN; 47 + pollfd[1].fd = pipefd; 48 + pollfd[1].events = POLLIN; 49 + 50 + r = poll(pollfd, 2, -1); 51 + switch (r) { 52 + case -1: 53 + pr_info("poll err"); 54 + continue; 55 + case 0: 56 + continue; 57 + case 1: 58 + break; 59 + default: 60 + pr_info("Polling uffd returned %d", r); 61 + return NULL; 62 + } 63 + 64 + if (pollfd[0].revents & POLLERR) { 65 + pr_info("uffd revents has POLLERR"); 66 + return NULL; 67 + } 68 + 69 + if (pollfd[1].revents & POLLIN) { 70 + r = read(pollfd[1].fd, &tmp_chr, 1); 71 + TEST_ASSERT(r == 1, 72 + "Error reading pipefd in UFFD thread\n"); 73 + return NULL; 74 + } 75 + 76 + if (!(pollfd[0].revents & POLLIN)) 77 + continue; 78 + 79 + r = read(uffd, &msg, sizeof(msg)); 80 + if (r == -1) { 81 + if (errno == EAGAIN) 82 + continue; 83 + pr_info("Read of uffd got errno %d\n", errno); 84 + return NULL; 85 + } 86 + 87 + if (r != sizeof(msg)) { 88 + pr_info("Read on uffd returned unexpected size: %d bytes", r); 89 + return NULL; 90 + } 91 + 92 + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 93 + continue; 94 + 95 + if (delay) 96 + usleep(delay); 97 + r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); 98 + if (r < 0) 99 + return NULL; 100 + pages++; 101 + } 102 + 103 + ts_diff = timespec_elapsed(start); 104 + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 105 + pages, ts_diff.tv_sec, ts_diff.tv_nsec, 106 + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 107 + 108 + return NULL; 109 + } 110 + 111 + struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, 112 + void *hva, uint64_t len, 113 + uffd_handler_t handler) 114 + { 115 + struct uffd_desc *uffd_desc; 116 + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); 117 + int uffd; 118 + struct uffdio_api uffdio_api; 119 + struct uffdio_register uffdio_register; 120 + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 121 + int ret; 122 + 123 + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 124 + is_minor ? "MINOR" : "MISSING", 125 + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 126 + 127 + uffd_desc = malloc(sizeof(struct uffd_desc)); 128 + TEST_ASSERT(uffd_desc, "malloc failed"); 129 + 130 + /* In order to get minor faults, prefault via the alias. */ 131 + if (is_minor) 132 + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; 133 + 134 + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 135 + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); 136 + 137 + uffdio_api.api = UFFD_API; 138 + uffdio_api.features = 0; 139 + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, 140 + "ioctl UFFDIO_API failed: %" PRIu64, 141 + (uint64_t)uffdio_api.api); 142 + 143 + uffdio_register.range.start = (uint64_t)hva; 144 + uffdio_register.range.len = len; 145 + uffdio_register.mode = uffd_mode; 146 + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, 147 + "ioctl UFFDIO_REGISTER failed"); 148 + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 149 + expected_ioctls, "missing userfaultfd ioctls"); 150 + 151 + ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); 152 + TEST_ASSERT(!ret, "Failed to set up pipefd"); 153 + 154 + uffd_desc->uffd_mode = uffd_mode; 155 + uffd_desc->uffd = uffd; 156 + uffd_desc->delay = delay; 157 + uffd_desc->handler = handler; 158 + pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, 159 + uffd_desc); 160 + 161 + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 162 + hva, hva + len); 163 + 164 + return uffd_desc; 165 + } 166 + 167 + void uffd_stop_demand_paging(struct uffd_desc *uffd) 168 + { 169 + char c = 0; 170 + int ret; 171 + 172 + ret = write(uffd->pipefds[1], &c, 1); 173 + TEST_ASSERT(ret == 1, "Unable to write to pipefd"); 174 + 175 + ret = pthread_join(uffd->thread, NULL); 176 + TEST_ASSERT(ret == 0, "Pthread_join failed."); 177 + 178 + close(uffd->uffd); 179 + 180 + close(uffd->pipefds[1]); 181 + close(uffd->pipefds[0]); 182 + 183 + free(uffd); 184 + } 185 + 186 + #endif /* __NR_userfaultfd */