Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Instantiate guest stage-2 page-tables at EL2

Extend the initialisation of guest data structures within the pKVM
hypervisor at EL2 so that we instantiate a memory pool and a full
'struct kvm_s2_mmu' structure for each VM, with a stage-2 page-table
entirely independent from the one managed by the host at EL1.

The 'struct kvm_pgtable_mm_ops' used by the page-table code is populated
with a set of callbacks that can manage guest pages in the hypervisor
without any direct intervention from the host, allocating page-table
pages from the provided pool and returning these to the host on VM
teardown. To keep things simple, the stage-2 MMU for the guest is
configured identically to the host stage-2 in the VTCR register and so
the IPA size of the guest must match the PA size of the host.

For now, the new page-table is unused as there is no way for the host
to map anything into it. Yet.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-20-will@kernel.org

authored by

Quentin Perret and committed by
Marc Zyngier
60dfe093 315775ff

+132 -3
+6
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
··· 9 9 10 10 #include <asm/kvm_pkvm.h> 11 11 12 + #include <nvhe/gfp.h> 13 + #include <nvhe/spinlock.h> 14 + 12 15 /* 13 16 * Holds the relevant data for maintaining the vcpu state completely at hyp. 14 17 */ ··· 33 30 34 31 /* The guest's stage-2 page-table managed by the hypervisor. */ 35 32 struct kvm_pgtable pgt; 33 + struct kvm_pgtable_mm_ops mm_ops; 34 + struct hyp_pool pool; 35 + hyp_spinlock_t lock; 36 36 37 37 /* 38 38 * The number of vcpus initialized and ready to run.
+123 -2
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 25 25 26 26 static struct hyp_pool host_s2_pool; 27 27 28 + static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); 29 + #define current_vm (*this_cpu_ptr(&__current_vm)) 30 + 31 + static void guest_lock_component(struct pkvm_hyp_vm *vm) 32 + { 33 + hyp_spin_lock(&vm->lock); 34 + current_vm = vm; 35 + } 36 + 37 + static void guest_unlock_component(struct pkvm_hyp_vm *vm) 38 + { 39 + current_vm = NULL; 40 + hyp_spin_unlock(&vm->lock); 41 + } 42 + 28 43 static void host_lock_component(void) 29 44 { 30 45 hyp_spin_lock(&host_mmu.lock); ··· 155 140 return 0; 156 141 } 157 142 143 + static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 144 + enum kvm_pgtable_prot prot) 145 + { 146 + return true; 147 + } 148 + 149 + static void *guest_s2_zalloc_pages_exact(size_t size) 150 + { 151 + void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size)); 152 + 153 + WARN_ON(size != (PAGE_SIZE << get_order(size))); 154 + hyp_split_page(hyp_virt_to_page(addr)); 155 + 156 + return addr; 157 + } 158 + 159 + static void guest_s2_free_pages_exact(void *addr, unsigned long size) 160 + { 161 + u8 order = get_order(size); 162 + unsigned int i; 163 + 164 + for (i = 0; i < (1 << order); i++) 165 + hyp_put_page(&current_vm->pool, addr + (i * PAGE_SIZE)); 166 + } 167 + 168 + static void *guest_s2_zalloc_page(void *mc) 169 + { 170 + struct hyp_page *p; 171 + void *addr; 172 + 173 + addr = hyp_alloc_pages(&current_vm->pool, 0); 174 + if (addr) 175 + return addr; 176 + 177 + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); 178 + if (!addr) 179 + return addr; 180 + 181 + memset(addr, 0, PAGE_SIZE); 182 + p = hyp_virt_to_page(addr); 183 + memset(p, 0, sizeof(*p)); 184 + p->refcount = 1; 185 + 186 + return addr; 187 + } 188 + 189 + static void guest_s2_get_page(void *addr) 190 + { 191 + hyp_get_page(&current_vm->pool, addr); 192 + } 193 + 194 + static void guest_s2_put_page(void *addr) 195 + { 196 + hyp_put_page(&current_vm->pool, addr); 197 + } 198 + 199 + static void clean_dcache_guest_page(void *va, size_t size) 200 + { 201 + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 202 + hyp_fixmap_unmap(); 203 + } 204 + 205 + static void invalidate_icache_guest_page(void *va, size_t size) 206 + { 207 + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 208 + hyp_fixmap_unmap(); 209 + } 210 + 158 211 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) 159 212 { 160 - vm->pgt.pgd = pgd; 213 + struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; 214 + unsigned long nr_pages; 215 + int ret; 216 + 217 + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; 218 + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); 219 + if (ret) 220 + return ret; 221 + 222 + hyp_spin_lock_init(&vm->lock); 223 + vm->mm_ops = (struct kvm_pgtable_mm_ops) { 224 + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, 225 + .free_pages_exact = guest_s2_free_pages_exact, 226 + .zalloc_page = guest_s2_zalloc_page, 227 + .phys_to_virt = hyp_phys_to_virt, 228 + .virt_to_phys = hyp_virt_to_phys, 229 + .page_count = hyp_page_count, 230 + .get_page = guest_s2_get_page, 231 + .put_page = guest_s2_put_page, 232 + .dcache_clean_inval_poc = clean_dcache_guest_page, 233 + .icache_inval_pou = invalidate_icache_guest_page, 234 + }; 235 + 236 + guest_lock_component(vm); 237 + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 238 + guest_stage2_force_pte_cb); 239 + guest_unlock_component(vm); 240 + if (ret) 241 + return ret; 242 + 243 + vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); 244 + 161 245 return 0; 162 246 } 163 247 164 248 void reclaim_guest_pages(struct pkvm_hyp_vm *vm) 165 249 { 250 + void *pgd = vm->pgt.pgd; 166 251 unsigned long nr_pages; 167 252 168 253 nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; 169 - WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages)); 254 + 255 + guest_lock_component(vm); 256 + kvm_pgtable_stage2_destroy(&vm->pgt); 257 + vm->kvm.arch.mmu.pgd_phys = 0ULL; 258 + guest_unlock_component(vm); 259 + 260 + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages)); 170 261 } 171 262 172 263 int __pkvm_prot_finalize(void)
+3 -1
arch/arm64/kvm/mmu.c
··· 693 693 return -EINVAL; 694 694 695 695 phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); 696 - if (phys_shift) { 696 + if (is_protected_kvm_enabled()) { 697 + phys_shift = kvm_ipa_limit; 698 + } else if (phys_shift) { 697 699 if (phys_shift > kvm_ipa_limit || 698 700 phys_shift < ARM64_MIN_PARANGE_BITS) 699 701 return -EINVAL;