Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: selftests: Add guest_memfd tests for mmap and NUMA policy support

Add tests for NUMA memory policy binding and NUMA aware allocation in
guest_memfd. This extends the existing selftests by adding proper
validation for:
- KVM GMEM set_policy and get_policy() vm_ops functionality using
mbind() and get_mempolicy()
- NUMA policy application before and after memory allocation

Run the NUMA mbind() test with and without INIT_SHARED, as KVM should allow
doing mbind(), madvise(), etc. on guest-private memory, e.g. so that
userspace can set NUMA policy for CoCo VMs.

Run the NUMA allocation test only for INIT_SHARED, i.e. if the host can't
fault-in memory (via direct access, madvise(), etc.) as move_pages()
returns -ENOENT if the page hasn't been faulted in (walks the host page
tables to find the associated folio)

[sean: don't skip entire test when running on non-NUMA system, test mbind()
with private memory, provide more info in assert messages]

Signed-off-by: Shivank Garg <shivankg@amd.com>
Tested-by: Ashish Kalra <ashish.kalra@amd.com>
Link: https://lore.kernel.org/r/20251016172853.52451-12-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

authored by

Shivank Garg and committed by
Sean Christopherson
38ccc50a e698e89b

+98
+98
tools/testing/selftests/kvm/guest_memfd_test.c
··· 19 19 #include <sys/stat.h> 20 20 21 21 #include "kvm_util.h" 22 + #include "numaif.h" 22 23 #include "test_util.h" 23 24 #include "ucall_common.h" 24 25 ··· 72 71 memset(mem, val, page_size); 73 72 for (i = 0; i < total_size; i++) 74 73 TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); 74 + 75 + kvm_munmap(mem, total_size); 76 + } 77 + 78 + static void test_mbind(int fd, size_t total_size) 79 + { 80 + const unsigned long nodemask_0 = 1; /* nid: 0 */ 81 + unsigned long nodemask = 0; 82 + unsigned long maxnode = 8; 83 + int policy; 84 + char *mem; 85 + int ret; 86 + 87 + if (!is_multi_numa_node_system()) 88 + return; 89 + 90 + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 91 + 92 + /* Test MPOL_INTERLEAVE policy */ 93 + kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); 94 + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); 95 + TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, 96 + "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", 97 + MPOL_INTERLEAVE, nodemask_0, policy, nodemask); 98 + 99 + /* Test basic MPOL_BIND policy */ 100 + kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); 101 + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); 102 + TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, 103 + "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", 104 + MPOL_BIND, nodemask_0, policy, nodemask); 105 + 106 + /* Test MPOL_DEFAULT policy */ 107 + kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); 108 + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); 109 + TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, 110 + "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", 111 + MPOL_DEFAULT, policy, nodemask); 112 + 113 + /* Test with invalid policy */ 114 + ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); 115 + TEST_ASSERT(ret == -1 && errno == EINVAL, 116 + "mbind with invalid policy should fail with EINVAL"); 117 + 118 + kvm_munmap(mem, total_size); 119 + } 120 + 121 + static void test_numa_allocation(int fd, size_t total_size) 122 + { 123 + unsigned long node0_mask = 1; /* Node 0 */ 124 + unsigned long node1_mask = 2; /* Node 1 */ 125 + unsigned long maxnode = 8; 126 + void *pages[4]; 127 + int status[4]; 128 + char *mem; 129 + int i; 130 + 131 + if (!is_multi_numa_node_system()) 132 + return; 133 + 134 + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); 135 + 136 + for (i = 0; i < 4; i++) 137 + pages[i] = (char *)mem + page_size * i; 138 + 139 + /* Set NUMA policy after allocation */ 140 + memset(mem, 0xaa, page_size); 141 + kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); 142 + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); 143 + 144 + /* Set NUMA policy before allocation */ 145 + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); 146 + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); 147 + memset(mem, 0xaa, total_size); 148 + 149 + /* Validate if pages are allocated on specified NUMA nodes */ 150 + kvm_move_pages(0, 4, pages, NULL, status, 0); 151 + TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); 152 + TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); 153 + TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); 154 + TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); 155 + 156 + /* Punch hole for all pages */ 157 + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); 158 + 159 + /* Change NUMA policy nodes and reallocate */ 160 + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); 161 + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); 162 + memset(mem, 0xaa, total_size); 163 + 164 + kvm_move_pages(0, 4, pages, NULL, status, 0); 165 + TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); 166 + TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); 167 + TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); 168 + TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); 75 169 76 170 kvm_munmap(mem, total_size); 77 171 } ··· 369 273 if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { 370 274 gmem_test(mmap_supported, vm, flags); 371 275 gmem_test(fault_overflow, vm, flags); 276 + gmem_test(numa_allocation, vm, flags); 372 277 } else { 373 278 gmem_test(fault_private, vm, flags); 374 279 } 375 280 376 281 gmem_test(mmap_cow, vm, flags); 282 + gmem_test(mbind, vm, flags); 377 283 } else { 378 284 gmem_test(mmap_not_supported, vm, flags); 379 285 }