Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: selftests: access_tracking_perf_test: Add option to skip the sanity check

Add an option to skip sanity check of number of still idle pages,
and set it by default to skip, in case hypervisor or NUMA balancing
is detected.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Co-developed-by: James Houghton <jthoughton@google.com>
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20250508184649.2576210-3-jthoughton@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

authored by

Maxim Levitsky and committed by
Sean Christopherson
26dcdfa0 d761c14d

+61 -9
+53 -9
tools/testing/selftests/kvm/access_tracking_perf_test.c
··· 65 65 /* Whether to overlap the regions of memory vCPUs access. */ 66 66 static bool overlap_memory_access; 67 67 68 + /* 69 + * If the test should only warn if there are too many idle pages (i.e., it is 70 + * expected). 71 + * -1: Not yet set. 72 + * 0: We do not expect too many idle pages, so FAIL if too many idle pages. 73 + * 1: Having too many idle pages is expected, so merely print a warning if 74 + * too many idle pages are found. 75 + */ 76 + static int idle_pages_warn_only = -1; 77 + 68 78 struct test_params { 69 79 /* The backing source for the region of memory. */ 70 80 enum vm_mem_backing_src_type backing_src; ··· 187 177 * arbitrary; high enough that we ensure most memory access went through 188 178 * access tracking but low enough as to not make the test too brittle 189 179 * over time and across architectures. 190 - * 191 - * When running the guest as a nested VM, "warn" instead of asserting 192 - * as the TLB size is effectively unlimited and the KVM doesn't 193 - * explicitly flush the TLB when aging SPTEs. As a result, more pages 194 - * are cached and the guest won't see the "idle" bit cleared. 195 180 */ 196 181 if (still_idle >= pages / 10) { 197 - #ifdef __x86_64__ 198 - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), 182 + TEST_ASSERT(idle_pages_warn_only, 199 183 "vCPU%d: Too many pages still idle (%lu out of %lu)", 200 184 vcpu_idx, still_idle, pages); 201 - #endif 185 + 202 186 printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " 203 187 "this will affect performance results.\n", 204 188 vcpu_idx, still_idle, pages); ··· 332 328 memstress_destroy_vm(vm); 333 329 } 334 330 331 + static int access_tracking_unreliable(void) 332 + { 333 + #ifdef __x86_64__ 334 + /* 335 + * When running nested, the TLB size may be effectively unlimited (for 336 + * example, this is the case when running on KVM L0), and KVM doesn't 337 + * explicitly flush the TLB when aging SPTEs. As a result, more pages 338 + * are cached and the guest won't see the "idle" bit cleared. 339 + */ 340 + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { 341 + puts("Skipping idle page count sanity check, because the test is run nested"); 342 + return 1; 343 + } 344 + #endif 345 + /* 346 + * When NUMA balancing is enabled, guest memory will be unmapped to get 347 + * NUMA faults, dropping the Accessed bits. 348 + */ 349 + if (is_numa_balancing_enabled()) { 350 + puts("Skipping idle page count sanity check, because NUMA balancing is enabled"); 351 + return 1; 352 + } 353 + 354 + return 0; 355 + } 356 + 335 357 static void help(char *name) 336 358 { 337 359 puts(""); ··· 372 342 printf(" -v: specify the number of vCPUs to run.\n"); 373 343 printf(" -o: Overlap guest memory accesses instead of partitioning\n" 374 344 " them into a separate region of memory for each vCPU.\n"); 345 + printf(" -w: Control whether the test warns or fails if more than 10%%\n" 346 + " of pages are still seen as idle/old after accessing guest\n" 347 + " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" 348 + " mode, the test fails by default, but switches to warn only\n" 349 + " if NUMA balancing is enabled or the test detects it's running\n" 350 + " in a VM.\n"); 375 351 backing_src_help("-s"); 376 352 puts(""); 377 353 exit(0); ··· 395 359 396 360 guest_modes_append_default(); 397 361 398 - while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { 362 + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { 399 363 switch (opt) { 400 364 case 'm': 401 365 guest_modes_cmdline(optarg); ··· 412 376 case 's': 413 377 params.backing_src = parse_backing_src_type(optarg); 414 378 break; 379 + case 'w': 380 + idle_pages_warn_only = 381 + atoi_non_negative("Idle pages warning", 382 + optarg); 383 + break; 415 384 case 'h': 416 385 default: 417 386 help(argv[0]); ··· 428 387 __TEST_REQUIRE(page_idle_fd >= 0, 429 388 "CONFIG_IDLE_PAGE_TRACKING is not enabled"); 430 389 close(page_idle_fd); 390 + 391 + if (idle_pages_warn_only == -1) 392 + idle_pages_warn_only = access_tracking_unreliable(); 431 393 432 394 for_each_guest_mode(run_test, &params); 433 395
+1
tools/testing/selftests/kvm/include/test_util.h
··· 153 153 void backing_src_help(const char *flag); 154 154 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); 155 155 long get_run_delay(void); 156 + bool is_numa_balancing_enabled(void); 156 157 157 158 /* 158 159 * Whether or not the given source type is shared memory (as opposed to
+7
tools/testing/selftests/kvm/lib/test_util.c
··· 176 176 return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); 177 177 } 178 178 179 + bool is_numa_balancing_enabled(void) 180 + { 181 + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) 182 + return false; 183 + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; 184 + } 185 + 179 186 size_t get_def_hugetlb_pagesz(void) 180 187 { 181 188 char buf[64];