Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-x86-selftests-6.10' of https://github.com/kvm-x86/linux into HEAD

KVM selftests cleanups and fixes for 6.10:

- Enhance the demand paging test to allow for better reporting and stressing
of UFFD performance.

- Convert the steal time test to generate TAP-friendly output.

- Fix a flaky false positive in the xen_shinfo_test due to comparing elapsed
time across two different clock domains.

- Skip the MONITOR/MWAIT test if the host doesn't actually support MWAIT.

- Avoid unnecessary use of "sudo" in the NX hugepage test to play nice with
running in a minimal userspace environment.

- Allow skipping the RSEQ test's sanity check that the vCPU was able to
complete a reasonable number of KVM_RUNs, as the assert can fail on a
completely valid setup. If the test is run on a large-ish system that is
otherwise idle, and the test isn't affined to a low-ish number of CPUs, the
vCPU task can be repeatedly migrated to CPUs that are in deep sleep states,
which results in the vCPU having very little net runtime before the next
migration due to high wakeup latencies.

+282 -137
+2 -2
tools/testing/selftests/kvm/aarch64/page_fault_test.c
··· 375 375 *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, 376 376 pt_args.hva, 377 377 pt_args.paging_size, 378 - test->uffd_pt_handler); 378 + 1, test->uffd_pt_handler); 379 379 380 380 *data_uffd = NULL; 381 381 if (test->uffd_data_handler) 382 382 *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, 383 383 data_args.hva, 384 384 data_args.paging_size, 385 - test->uffd_data_handler); 385 + 1, test->uffd_data_handler); 386 386 } 387 387 388 388 static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+72 -18
tools/testing/selftests/kvm/demand_paging_test.c
··· 13 13 #include <stdio.h> 14 14 #include <stdlib.h> 15 15 #include <time.h> 16 - #include <poll.h> 17 16 #include <pthread.h> 18 17 #include <linux/userfaultfd.h> 19 18 #include <sys/syscall.h> ··· 76 77 copy.mode = 0; 77 78 78 79 r = ioctl(uffd, UFFDIO_COPY, &copy); 79 - if (r == -1) { 80 - pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", 80 + /* 81 + * With multiple vCPU threads fault on a single page and there are 82 + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs 83 + * will fail with EEXIST: handle that case without signaling an 84 + * error. 85 + * 86 + * Note that this also suppress any EEXISTs occurring from, 87 + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never 88 + * happens here, but a realistic VMM might potentially maintain 89 + * some external state to correctly surface EEXISTs to userspace 90 + * (or prevent duplicate COPY/CONTINUEs in the first place). 91 + */ 92 + if (r == -1 && errno != EEXIST) { 93 + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n", 81 94 addr, tid, errno); 82 95 return r; 83 96 } ··· 100 89 cont.range.len = demand_paging_size; 101 90 102 91 r = ioctl(uffd, UFFDIO_CONTINUE, &cont); 103 - if (r == -1) { 104 - pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", 92 + /* 93 + * With multiple vCPU threads fault on a single page and there are 94 + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs 95 + * will fail with EEXIST: handle that case without signaling an 96 + * error. 97 + * 98 + * Note that this also suppress any EEXISTs occurring from, 99 + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never 100 + * happens here, but a realistic VMM might potentially maintain 101 + * some external state to correctly surface EEXISTs to userspace 102 + * (or prevent duplicate COPY/CONTINUEs in the first place). 103 + */ 104 + if (r == -1 && errno != EEXIST) { 105 + pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n", 105 106 addr, tid, errno); 106 107 return r; 107 108 } ··· 133 110 134 111 struct test_params { 135 112 int uffd_mode; 113 + bool single_uffd; 136 114 useconds_t uffd_delay; 115 + int readers_per_uffd; 137 116 enum vm_mem_backing_src_type src_type; 138 117 bool partition_vcpu_memory_access; 139 118 }; ··· 156 131 struct memstress_vcpu_args *vcpu_args; 157 132 struct test_params *p = arg; 158 133 struct uffd_desc **uffd_descs = NULL; 134 + uint64_t uffd_region_size; 159 135 struct timespec start; 160 136 struct timespec ts_diff; 137 + double vcpu_paging_rate; 161 138 struct kvm_vm *vm; 162 - int i; 139 + int i, num_uffds = 0; 163 140 164 141 vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, 165 142 p->src_type, p->partition_vcpu_memory_access); ··· 174 147 memset(guest_data_prototype, 0xAB, demand_paging_size); 175 148 176 149 if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 177 - for (i = 0; i < nr_vcpus; i++) { 150 + num_uffds = p->single_uffd ? 1 : nr_vcpus; 151 + for (i = 0; i < num_uffds; i++) { 178 152 vcpu_args = &memstress_args.vcpu_args[i]; 179 153 prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), 180 154 vcpu_args->pages * memstress_args.guest_page_size); ··· 183 155 } 184 156 185 157 if (p->uffd_mode) { 186 - uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); 158 + num_uffds = p->single_uffd ? 1 : nr_vcpus; 159 + uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds; 160 + 161 + uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *)); 187 162 TEST_ASSERT(uffd_descs, "Memory allocation failed"); 188 - for (i = 0; i < nr_vcpus; i++) { 163 + for (i = 0; i < num_uffds; i++) { 164 + struct memstress_vcpu_args *vcpu_args; 189 165 void *vcpu_hva; 190 166 191 167 vcpu_args = &memstress_args.vcpu_args[i]; ··· 202 170 */ 203 171 uffd_descs[i] = uffd_setup_demand_paging( 204 172 p->uffd_mode, p->uffd_delay, vcpu_hva, 205 - vcpu_args->pages * memstress_args.guest_page_size, 173 + uffd_region_size, 174 + p->readers_per_uffd, 206 175 &handle_uffd_page_request); 207 176 } 208 177 } ··· 220 187 221 188 if (p->uffd_mode) { 222 189 /* Tell the user fault fd handler threads to quit */ 223 - for (i = 0; i < nr_vcpus; i++) 190 + for (i = 0; i < num_uffds; i++) 224 191 uffd_stop_demand_paging(uffd_descs[i]); 225 192 } 226 193 227 - pr_info("Total guest execution time: %ld.%.9lds\n", 194 + pr_info("Total guest execution time:\t%ld.%.9lds\n", 228 195 ts_diff.tv_sec, ts_diff.tv_nsec); 229 - pr_info("Overall demand paging rate: %f pgs/sec\n", 230 - memstress_args.vcpu_args[0].pages * nr_vcpus / 231 - ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); 196 + 197 + vcpu_paging_rate = memstress_args.vcpu_args[0].pages / 198 + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC); 199 + pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n", 200 + vcpu_paging_rate); 201 + pr_info("Overall demand paging rate:\t%f pgs/sec\n", 202 + vcpu_paging_rate * nr_vcpus); 232 203 233 204 memstress_destroy_vm(vm); 234 205 ··· 244 207 static void help(char *name) 245 208 { 246 209 puts(""); 247 - printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" 248 - " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); 210 + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n" 211 + " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n" 212 + " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); 249 213 guest_modes_help(); 250 214 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" 251 215 " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); 252 216 kvm_print_vcpu_pinning_help(); 217 + printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n" 218 + " creating one for each region paged by a unique vCPU\n" 219 + " Set implicitly with -o, and no effect without -u.\n"); 253 220 printf(" -d: add a delay in usec to the User Fault\n" 254 221 " FD handler to simulate demand paging\n" 255 222 " overheads. Ignored without -u.\n"); 223 + printf(" -r: Set the number of reader threads per uffd.\n"); 256 224 printf(" -b: specify the size of the memory region which should be\n" 257 225 " demand paged by each vCPU. e.g. 10M or 3G.\n" 258 226 " Default: 1G\n"); ··· 276 234 struct test_params p = { 277 235 .src_type = DEFAULT_VM_MEM_SRC, 278 236 .partition_vcpu_memory_access = true, 237 + .readers_per_uffd = 1, 238 + .single_uffd = false, 279 239 }; 280 240 int opt; 281 241 282 242 guest_modes_append_default(); 283 243 284 - while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { 244 + while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { 285 245 switch (opt) { 286 246 case 'm': 287 247 guest_modes_cmdline(optarg); ··· 294 250 else if (!strcmp("MINOR", optarg)) 295 251 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; 296 252 TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); 253 + break; 254 + case 'a': 255 + p.single_uffd = true; 297 256 break; 298 257 case 'd': 299 258 p.uffd_delay = strtoul(optarg, NULL, 0); ··· 318 271 break; 319 272 case 'o': 320 273 p.partition_vcpu_memory_access = false; 274 + p.single_uffd = true; 275 + break; 276 + case 'r': 277 + p.readers_per_uffd = atoi(optarg); 278 + TEST_ASSERT(p.readers_per_uffd >= 1, 279 + "Invalid number of readers per uffd %d: must be >=1", 280 + p.readers_per_uffd); 321 281 break; 322 282 case 'h': 323 283 default:
+13 -3
tools/testing/selftests/kvm/include/userfaultfd_util.h
··· 17 17 18 18 typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); 19 19 20 - struct uffd_desc { 20 + struct uffd_reader_args { 21 21 int uffd_mode; 22 22 int uffd; 23 - int pipefds[2]; 24 23 useconds_t delay; 25 24 uffd_handler_t handler; 26 - pthread_t thread; 25 + /* Holds the read end of the pipe for killing the reader. */ 26 + int pipe; 27 + }; 28 + 29 + struct uffd_desc { 30 + int uffd; 31 + uint64_t num_readers; 32 + /* Holds the write ends of the pipes for killing the readers. */ 33 + int *pipefds; 34 + pthread_t *readers; 35 + struct uffd_reader_args *reader_args; 27 36 }; 28 37 29 38 struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, 30 39 void *hva, uint64_t len, 40 + uint64_t num_readers, 31 41 uffd_handler_t handler); 32 42 33 43 void uffd_stop_demand_paging(struct uffd_desc *uffd);
+8
tools/testing/selftests/kvm/lib/kvm_util.c
··· 929 929 errno, strerror(errno)); 930 930 } 931 931 932 + #define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \ 933 + __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ 934 + "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") 935 + 932 936 int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 933 937 uint64_t gpa, uint64_t size, void *hva, 934 938 uint32_t guest_memfd, uint64_t guest_memfd_offset) ··· 946 942 .guest_memfd = guest_memfd, 947 943 .guest_memfd_offset = guest_memfd_offset, 948 944 }; 945 + 946 + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); 949 947 950 948 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region); 951 949 } ··· 974 968 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 975 969 size_t mem_size = npages * vm->page_size; 976 970 size_t alignment; 971 + 972 + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); 977 973 978 974 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 979 975 "Number of guest pages is not compatible with the host. "
+88 -65
tools/testing/selftests/kvm/lib/userfaultfd_util.c
··· 16 16 #include <poll.h> 17 17 #include <pthread.h> 18 18 #include <linux/userfaultfd.h> 19 + #include <sys/epoll.h> 19 20 #include <sys/syscall.h> 20 21 21 22 #include "kvm_util.h" ··· 28 27 29 28 static void *uffd_handler_thread_fn(void *arg) 30 29 { 31 - struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; 32 - int uffd = uffd_desc->uffd; 33 - int pipefd = uffd_desc->pipefds[0]; 34 - useconds_t delay = uffd_desc->delay; 30 + struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; 31 + int uffd = reader_args->uffd; 35 32 int64_t pages = 0; 36 33 struct timespec start; 37 34 struct timespec ts_diff; 35 + struct epoll_event evt; 36 + int epollfd; 37 + 38 + epollfd = epoll_create(1); 39 + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); 40 + 41 + evt.events = EPOLLIN | EPOLLEXCLUSIVE; 42 + evt.data.u32 = 0; 43 + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt), 44 + "Failed to add uffd to epollfd"); 45 + 46 + evt.events = EPOLLIN; 47 + evt.data.u32 = 1; 48 + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt), 49 + "Failed to add pipe to epollfd"); 38 50 39 51 clock_gettime(CLOCK_MONOTONIC, &start); 40 52 while (1) { 41 53 struct uffd_msg msg; 42 - struct pollfd pollfd[2]; 43 - char tmp_chr; 44 54 int r; 45 55 46 - pollfd[0].fd = uffd; 47 - pollfd[0].events = POLLIN; 48 - pollfd[1].fd = pipefd; 49 - pollfd[1].events = POLLIN; 56 + r = epoll_wait(epollfd, &evt, 1, -1); 57 + TEST_ASSERT(r == 1, 58 + "Unexpected number of events (%d) from epoll, errno = %d", 59 + r, errno); 50 60 51 - r = poll(pollfd, 2, -1); 52 - switch (r) { 53 - case -1: 54 - pr_info("poll err"); 55 - continue; 56 - case 0: 57 - continue; 58 - case 1: 59 - break; 60 - default: 61 - pr_info("Polling uffd returned %d", r); 62 - return NULL; 63 - } 61 + if (evt.data.u32 == 1) { 62 + char tmp_chr; 64 63 65 - if (pollfd[0].revents & POLLERR) { 66 - pr_info("uffd revents has POLLERR"); 67 - return NULL; 68 - } 69 - 70 - if (pollfd[1].revents & POLLIN) { 71 - r = read(pollfd[1].fd, &tmp_chr, 1); 64 + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), 65 + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); 66 + r = read(reader_args->pipe, &tmp_chr, 1); 72 67 TEST_ASSERT(r == 1, 73 - "Error reading pipefd in UFFD thread"); 68 + "Error reading pipefd in uffd reader thread"); 74 69 break; 75 70 } 76 71 77 - if (!(pollfd[0].revents & POLLIN)) 78 - continue; 72 + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), 73 + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); 79 74 80 75 r = read(uffd, &msg, sizeof(msg)); 81 76 if (r == -1) { 82 - if (errno == EAGAIN) 83 - continue; 84 - pr_info("Read of uffd got errno %d\n", errno); 85 - return NULL; 77 + TEST_ASSERT(errno == EAGAIN, 78 + "Error reading from UFFD: errno = %d", errno); 79 + continue; 86 80 } 87 81 88 - if (r != sizeof(msg)) { 89 - pr_info("Read on uffd returned unexpected size: %d bytes", r); 90 - return NULL; 91 - } 82 + TEST_ASSERT(r == sizeof(msg), 83 + "Read on uffd returned unexpected number of bytes (%d)", r); 92 84 93 85 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 94 86 continue; 95 87 96 - if (delay) 97 - usleep(delay); 98 - r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); 99 - if (r < 0) 100 - return NULL; 88 + if (reader_args->delay) 89 + usleep(reader_args->delay); 90 + r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); 91 + TEST_ASSERT(r >= 0, 92 + "Reader thread handler fn returned negative value %d", r); 101 93 pages++; 102 94 } 103 95 ··· 104 110 105 111 struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, 106 112 void *hva, uint64_t len, 113 + uint64_t num_readers, 107 114 uffd_handler_t handler) 108 115 { 109 116 struct uffd_desc *uffd_desc; ··· 113 118 struct uffdio_api uffdio_api; 114 119 struct uffdio_register uffdio_register; 115 120 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 116 - int ret; 121 + int ret, i; 117 122 118 123 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 119 124 is_minor ? "MINOR" : "MISSING", 120 125 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 121 126 122 127 uffd_desc = malloc(sizeof(struct uffd_desc)); 123 - TEST_ASSERT(uffd_desc, "malloc failed"); 128 + TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); 129 + 130 + uffd_desc->pipefds = calloc(sizeof(int), num_readers); 131 + TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes"); 132 + 133 + uffd_desc->readers = calloc(sizeof(pthread_t), num_readers); 134 + TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads"); 135 + 136 + uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers); 137 + TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args"); 138 + 139 + uffd_desc->num_readers = num_readers; 124 140 125 141 /* In order to get minor faults, prefault via the alias. */ 126 142 if (is_minor) ··· 154 148 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 155 149 expected_ioctls, "missing userfaultfd ioctls"); 156 150 157 - ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); 158 - TEST_ASSERT(!ret, "Failed to set up pipefd"); 159 - 160 - uffd_desc->uffd_mode = uffd_mode; 161 151 uffd_desc->uffd = uffd; 162 - uffd_desc->delay = delay; 163 - uffd_desc->handler = handler; 164 - pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, 165 - uffd_desc); 152 + for (i = 0; i < uffd_desc->num_readers; ++i) { 153 + int pipes[2]; 166 154 167 - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 168 - hva, hva + len); 155 + ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK); 156 + TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p", 157 + i, uffd_desc); 158 + 159 + uffd_desc->pipefds[i] = pipes[1]; 160 + 161 + uffd_desc->reader_args[i].uffd_mode = uffd_mode; 162 + uffd_desc->reader_args[i].uffd = uffd; 163 + uffd_desc->reader_args[i].delay = delay; 164 + uffd_desc->reader_args[i].handler = handler; 165 + uffd_desc->reader_args[i].pipe = pipes[0]; 166 + 167 + pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn, 168 + &uffd_desc->reader_args[i]); 169 + 170 + PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n", 171 + i, hva, hva + len); 172 + } 169 173 170 174 return uffd_desc; 171 175 } ··· 183 167 void uffd_stop_demand_paging(struct uffd_desc *uffd) 184 168 { 185 169 char c = 0; 186 - int ret; 170 + int i; 187 171 188 - ret = write(uffd->pipefds[1], &c, 1); 189 - TEST_ASSERT(ret == 1, "Unable to write to pipefd"); 172 + for (i = 0; i < uffd->num_readers; ++i) 173 + TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1, 174 + "Unable to write to pipefd %i for uffd_desc %p", i, uffd); 190 175 191 - ret = pthread_join(uffd->thread, NULL); 192 - TEST_ASSERT(ret == 0, "Pthread_join failed."); 176 + for (i = 0; i < uffd->num_readers; ++i) 177 + TEST_ASSERT(!pthread_join(uffd->readers[i], NULL), 178 + "Pthread_join failed on reader %i for uffd_desc %p", i, uffd); 193 179 194 180 close(uffd->uffd); 195 181 196 - close(uffd->pipefds[1]); 197 - close(uffd->pipefds[0]); 182 + for (i = 0; i < uffd->num_readers; ++i) { 183 + close(uffd->pipefds[i]); 184 + close(uffd->reader_args[i].pipe); 185 + } 198 186 187 + free(uffd->pipefds); 188 + free(uffd->readers); 189 + free(uffd->reader_args); 199 190 free(uffd); 200 191 } 201 192
+33 -2
tools/testing/selftests/kvm/rseq_test.c
··· 186 186 "Only one usable CPU, task migration not possible"); 187 187 } 188 188 189 + static void help(const char *name) 190 + { 191 + puts(""); 192 + printf("usage: %s [-h] [-u]\n", name); 193 + printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); 194 + puts(""); 195 + exit(0); 196 + } 197 + 189 198 int main(int argc, char *argv[]) 190 199 { 200 + bool skip_sanity_check = false; 191 201 int r, i, snapshot; 192 202 struct kvm_vm *vm; 193 203 struct kvm_vcpu *vcpu; 194 204 u32 cpu, rseq_cpu; 205 + int opt; 206 + 207 + while ((opt = getopt(argc, argv, "hu")) != -1) { 208 + switch (opt) { 209 + case 'u': 210 + skip_sanity_check = true; 211 + break; 212 + case 'h': 213 + default: 214 + help(argv[0]); 215 + break; 216 + } 217 + } 195 218 196 219 r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); 197 220 TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, ··· 277 254 * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly 278 255 * conservative ratio on x86-64, which can do _more_ KVM_RUNs than 279 256 * migrations given the 1us+ delay in the migration task. 257 + * 258 + * Another reason why it may have small migration:KVM_RUN ratio is that, 259 + * on systems with large low power mode wakeup latency, it may happen 260 + * quite often that the scheduler is not able to wake up the target CPU 261 + * before the vCPU thread is scheduled to another CPU. 280 262 */ 281 - TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), 282 - "Only performed %d KVM_RUNs, task stalled too much?", i); 263 + TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), 264 + "Only performed %d KVM_RUNs, task stalled too much?\n\n" 265 + " Try disabling deep sleep states to reduce CPU wakeup latency,\n" 266 + " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" 267 + " or run with -u to disable this sanity check.", i); 283 268 284 269 pthread_join(migration_thread, NULL); 285 270
+24 -23
tools/testing/selftests/kvm/steal_time.c
··· 85 85 static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) 86 86 { 87 87 struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); 88 - int i; 89 88 90 - pr_info("VCPU%d:\n", vcpu_idx); 91 - pr_info(" steal: %lld\n", st->steal); 92 - pr_info(" version: %d\n", st->version); 93 - pr_info(" flags: %d\n", st->flags); 94 - pr_info(" preempted: %d\n", st->preempted); 95 - pr_info(" u8_pad: "); 96 - for (i = 0; i < 3; ++i) 97 - pr_info("%d", st->u8_pad[i]); 98 - pr_info("\n pad: "); 99 - for (i = 0; i < 11; ++i) 100 - pr_info("%d", st->pad[i]); 101 - pr_info("\n"); 89 + ksft_print_msg("VCPU%d:\n", vcpu_idx); 90 + ksft_print_msg(" steal: %lld\n", st->steal); 91 + ksft_print_msg(" version: %d\n", st->version); 92 + ksft_print_msg(" flags: %d\n", st->flags); 93 + ksft_print_msg(" preempted: %d\n", st->preempted); 94 + ksft_print_msg(" u8_pad: %d %d %d\n", 95 + st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]); 96 + ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n", 97 + st->pad[0], st->pad[1], st->pad[2], st->pad[3], 98 + st->pad[4], st->pad[5], st->pad[6], st->pad[7], 99 + st->pad[8], st->pad[9], st->pad[10]); 102 100 } 103 101 104 102 #elif defined(__aarch64__) ··· 199 201 { 200 202 struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); 201 203 202 - pr_info("VCPU%d:\n", vcpu_idx); 203 - pr_info(" rev: %d\n", st->rev); 204 - pr_info(" attr: %d\n", st->attr); 205 - pr_info(" st_time: %ld\n", st->st_time); 204 + ksft_print_msg("VCPU%d:\n", vcpu_idx); 205 + ksft_print_msg(" rev: %d\n", st->rev); 206 + ksft_print_msg(" attr: %d\n", st->attr); 207 + ksft_print_msg(" st_time: %ld\n", st->st_time); 206 208 } 207 209 208 210 #elif defined(__riscv) ··· 366 368 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); 367 369 virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); 368 370 371 + ksft_print_header(); 369 372 TEST_REQUIRE(is_steal_time_supported(vcpus[0])); 373 + ksft_set_plan(NR_VCPUS); 370 374 371 375 /* Run test on each VCPU */ 372 376 for (i = 0; i < NR_VCPUS; ++i) { ··· 409 409 run_delay, stolen_time); 410 410 411 411 if (verbose) { 412 - pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i, 413 - guest_stolen_time[i], stolen_time); 414 - if (stolen_time == run_delay) 415 - pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)"); 416 - pr_info("\n"); 412 + ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n", 413 + i, guest_stolen_time[i], stolen_time, 414 + stolen_time == run_delay ? 415 + " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : ""); 417 416 steal_time_dump(vm, i); 418 417 } 418 + ksft_test_result_pass("vcpu%d\n", i); 419 419 } 420 420 421 - return 0; 421 + /* Print results and exit() accordingly */ 422 + ksft_finished(); 422 423 }
+1
tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
··· 75 75 struct ucall uc; 76 76 int testcase; 77 77 78 + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); 78 79 TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); 79 80 80 81 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+12 -1
tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
··· 13 13 NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) 14 14 HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) 15 15 16 + # If we're already root, the host might not have sudo. 17 + if [ $(whoami) == "root" ]; then 18 + function do_sudo () { 19 + "$@" 20 + } 21 + else 22 + function do_sudo () { 23 + sudo "$@" 24 + } 25 + fi 26 + 16 27 set +e 17 28 18 29 function sudo_echo () { 19 - echo "$1" | sudo tee -a "$2" > /dev/null 30 + echo "$1" | do_sudo tee -a "$2" > /dev/null 20 31 } 21 32 22 33 NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+29 -23
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
··· 125 125 uint32_t state; 126 126 uint64_t state_entry_time; 127 127 uint64_t time[5]; 128 - } __attribute__((__packed__));; 128 + } __attribute__((__packed__)); 129 129 130 130 struct arch_vcpu_info { 131 131 unsigned long cr2; ··· 380 380 GUEST_SYNC(TEST_DONE); 381 381 } 382 382 383 - static int cmp_timespec(struct timespec *a, struct timespec *b) 384 - { 385 - if (a->tv_sec > b->tv_sec) 386 - return 1; 387 - else if (a->tv_sec < b->tv_sec) 388 - return -1; 389 - else if (a->tv_nsec > b->tv_nsec) 390 - return 1; 391 - else if (a->tv_nsec < b->tv_nsec) 392 - return -1; 393 - else 394 - return 0; 395 - } 396 - 397 383 static struct shared_info *shinfo; 398 384 static struct vcpu_info *vinfo; 399 385 static struct kvm_vcpu *vcpu; ··· 435 449 436 450 int main(int argc, char *argv[]) 437 451 { 438 - struct timespec min_ts, max_ts, vm_ts; 439 452 struct kvm_xen_hvm_attr evt_reset; 440 453 struct kvm_vm *vm; 441 454 pthread_t thread; ··· 452 467 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 453 468 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 454 469 bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); 455 - 456 - clock_gettime(CLOCK_REALTIME, &min_ts); 457 470 458 471 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 459 472 ··· 993 1010 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); 994 1011 995 1012 alarm(0); 996 - clock_gettime(CLOCK_REALTIME, &max_ts); 997 1013 998 1014 /* 999 1015 * Just a *really* basic check that things are being put in the ··· 1001 1019 */ 1002 1020 struct pvclock_wall_clock *wc; 1003 1021 struct pvclock_vcpu_time_info *ti, *ti2; 1022 + struct kvm_clock_data kcdata; 1023 + long long delta; 1004 1024 1005 1025 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 1006 1026 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); ··· 1018 1034 ti2->tsc_shift, ti2->flags); 1019 1035 } 1020 1036 1021 - vm_ts.tv_sec = wc->sec; 1022 - vm_ts.tv_nsec = wc->nsec; 1023 1037 TEST_ASSERT(wc->version && !(wc->version & 1), 1024 1038 "Bad wallclock version %x", wc->version); 1025 - TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 1026 - TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 1039 + 1040 + vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); 1041 + 1042 + if (kcdata.flags & KVM_CLOCK_REALTIME) { 1043 + if (verbose) { 1044 + printf("KVM_GET_CLOCK clock: %lld.%09lld\n", 1045 + kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); 1046 + printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", 1047 + kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); 1048 + } 1049 + 1050 + delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); 1051 + 1052 + /* 1053 + * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but 1054 + * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s 1055 + * delta as testing clock accuracy is not the goal here. The test just needs to 1056 + * check that the value in shinfo is somewhat sane. 1057 + */ 1058 + TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, 1059 + "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", 1060 + wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, 1061 + (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); 1062 + } else { 1063 + pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); 1064 + } 1027 1065 1028 1066 TEST_ASSERT(ti->version && !(ti->version & 1), 1029 1067 "Bad time_info version %x", ti->version);