Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch kvm-arm64/pkvm-np-guest into kvmarm-master/next

* kvm-arm64/pkvm-np-guest:
: .
: pKVM support for non-protected guests using the standard MM
: infrastructure, courtesy of Quentin Perret. From the cover letter:
:
: "This series moves the stage-2 page-table management of non-protected
: guests to EL2 when pKVM is enabled. This is only intended as an
: incremental step towards a 'feature-complete' pKVM, there is however a
: lot more that needs to come on top.
:
: With that series applied, pKVM provides near-parity with standard KVM
: from a functional perspective all while Linux no longer touches the
: stage-2 page-tables itself at EL1. The majority of mm-related KVM
: features work out of the box, including MMU notifiers, dirty logging,
: RO memslots and things of that nature. There are however two gotchas:
:
: - We don't support mapping devices into guests: this requires
: additional hypervisor support for tracking the 'state' of devices,
: which will come in a later series. No device assignment until then.
:
: - Stage-2 mappings are forced to page-granularity even when backed by a
: huge page for the sake of simplicity of this series. I'm only aiming
: at functional parity-ish (from userspace's PoV) for now, support for
: HP can be added on top later as a perf improvement."
: .
KVM: arm64: Plumb the pKVM MMU in KVM
KVM: arm64: Introduce the EL1 pKVM MMU
KVM: arm64: Introduce __pkvm_tlb_flush_vmid()
KVM: arm64: Introduce __pkvm_host_mkyoung_guest()
KVM: arm64: Introduce __pkvm_host_test_clear_young_guest()
KVM: arm64: Introduce __pkvm_host_wrprotect_guest()
KVM: arm64: Introduce __pkvm_host_relax_guest_perms()
KVM: arm64: Introduce __pkvm_host_unshare_guest()
KVM: arm64: Introduce __pkvm_host_share_guest()
KVM: arm64: Introduce __pkvm_vcpu_{load,put}()
KVM: arm64: Add {get,put}_pkvm_hyp_vm() helpers
KVM: arm64: Make kvm_pgtable_stage2_init() a static inline function
KVM: arm64: Pass walk flags to kvm_pgtable_stage2_relax_perms
KVM: arm64: Pass walk flags to kvm_pgtable_stage2_mkyoung
KVM: arm64: Move host page ownership tracking to the hyp vmemmap
KVM: arm64: Make hyp_page::order a u8
KVM: arm64: Move enum pkvm_page_state to memory.h
KVM: arm64: Change the layout of enum pkvm_page_state

Signed-off-by: Marc Zyngier <maz@kernel.org>

# Conflicts:
# arch/arm64/kvm/arm.c

+1005 -144
+9
arch/arm64/include/asm/kvm_asm.h
··· 64 64 /* Hypercalls available after pKVM finalisation */ 65 65 __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, 66 66 __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, 67 + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest, 68 + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest, 69 + __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest, 70 + __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest, 71 + __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest, 72 + __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest, 67 73 __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, 68 74 __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, 69 75 __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, ··· 84 78 __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, 85 79 __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, 86 80 __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, 81 + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, 82 + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, 83 + __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, 87 84 }; 88 85 89 86 #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+4
arch/arm64/include/asm/kvm_host.h
··· 85 85 struct kvm_hyp_memcache { 86 86 phys_addr_t head; 87 87 unsigned long nr_pages; 88 + struct pkvm_mapping *mapping; /* only used from EL1 */ 88 89 }; 89 90 90 91 static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, ··· 775 774 776 775 /* Cache some mmu pages needed inside spinlock regions */ 777 776 struct kvm_mmu_memory_cache mmu_page_cache; 777 + 778 + /* Pages to top-up the pKVM/EL2 guest pool */ 779 + struct kvm_hyp_memcache pkvm_memcache; 778 780 779 781 /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ 780 782 u64 vsesr_el2;
+16
arch/arm64/include/asm/kvm_mmu.h
··· 353 353 return &kvm->arch.mmu != mmu; 354 354 } 355 355 356 + static inline void kvm_fault_lock(struct kvm *kvm) 357 + { 358 + if (is_protected_kvm_enabled()) 359 + write_lock(&kvm->mmu_lock); 360 + else 361 + read_lock(&kvm->mmu_lock); 362 + } 363 + 364 + static inline void kvm_fault_unlock(struct kvm *kvm) 365 + { 366 + if (is_protected_kvm_enabled()) 367 + write_unlock(&kvm->mmu_lock); 368 + else 369 + read_unlock(&kvm->mmu_lock); 370 + } 371 + 356 372 #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS 357 373 void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); 358 374 #else
+24 -12
arch/arm64/include/asm/kvm_pgtable.h
··· 412 412 * be used instead of block mappings. 413 413 */ 414 414 struct kvm_pgtable { 415 - u32 ia_bits; 416 - s8 start_level; 417 - kvm_pteref_t pgd; 418 - struct kvm_pgtable_mm_ops *mm_ops; 415 + union { 416 + struct rb_root pkvm_mappings; 417 + struct { 418 + u32 ia_bits; 419 + s8 start_level; 420 + kvm_pteref_t pgd; 421 + struct kvm_pgtable_mm_ops *mm_ops; 419 422 420 - /* Stage-2 only */ 421 - struct kvm_s2_mmu *mmu; 422 - enum kvm_pgtable_stage2_flags flags; 423 - kvm_pgtable_force_pte_cb_t force_pte_cb; 423 + /* Stage-2 only */ 424 + enum kvm_pgtable_stage2_flags flags; 425 + kvm_pgtable_force_pte_cb_t force_pte_cb; 426 + }; 427 + }; 428 + struct kvm_s2_mmu *mmu; 424 429 }; 425 430 426 431 /** ··· 531 526 enum kvm_pgtable_stage2_flags flags, 532 527 kvm_pgtable_force_pte_cb_t force_pte_cb); 533 528 534 - #define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \ 535 - __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL) 529 + static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 530 + struct kvm_pgtable_mm_ops *mm_ops) 531 + { 532 + return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL); 533 + } 536 534 537 535 /** 538 536 * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. ··· 677 669 * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. 678 670 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 679 671 * @addr: Intermediate physical address to identify the page-table entry. 672 + * @flags: Flags to control the page-table walk (ex. a shared walk) 680 673 * 681 674 * The offset of @addr within a page is ignored. 682 675 * 683 676 * If there is a valid, leaf page-table entry used to translate @addr, then 684 677 * set the access flag in that entry. 685 678 */ 686 - void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); 679 + void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 680 + enum kvm_pgtable_walk_flags flags); 687 681 688 682 /** 689 683 * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access ··· 715 705 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 716 706 * @addr: Intermediate physical address to identify the page-table entry. 717 707 * @prot: Additional permissions to grant for the mapping. 708 + * @flags: Flags to control the page-table walk (ex. a shared walk) 718 709 * 719 710 * The offset of @addr within a page is ignored. 720 711 * ··· 728 717 * Return: 0 on success, negative error code on failure. 729 718 */ 730 719 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 731 - enum kvm_pgtable_prot prot); 720 + enum kvm_pgtable_prot prot, 721 + enum kvm_pgtable_walk_flags flags); 732 722 733 723 /** 734 724 * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
+26
arch/arm64/include/asm/kvm_pkvm.h
··· 137 137 SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl))); 138 138 } 139 139 140 + struct pkvm_mapping { 141 + struct rb_node node; 142 + u64 gfn; 143 + u64 pfn; 144 + }; 145 + 146 + int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 147 + struct kvm_pgtable_mm_ops *mm_ops); 148 + void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 149 + int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 150 + enum kvm_pgtable_prot prot, void *mc, 151 + enum kvm_pgtable_walk_flags flags); 152 + int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); 153 + int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); 154 + int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); 155 + bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); 156 + int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 157 + enum kvm_pgtable_walk_flags flags); 158 + void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 159 + enum kvm_pgtable_walk_flags flags); 160 + int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 161 + struct kvm_mmu_memory_cache *mc); 162 + void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); 163 + kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 164 + enum kvm_pgtable_prot prot, void *mc, 165 + bool force_pte); 140 166 #endif /* __ARM64_KVM_PKVM_H__ */
+22 -1
arch/arm64/kvm/arm.c
··· 500 500 501 501 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 502 502 { 503 - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 503 + if (!is_protected_kvm_enabled()) 504 + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 505 + else 506 + free_hyp_memcache(&vcpu->arch.pkvm_memcache); 504 507 kvm_timer_vcpu_terminate(vcpu); 505 508 kvm_pmu_vcpu_destroy(vcpu); 506 509 kvm_vgic_vcpu_destroy(vcpu); ··· 575 572 struct kvm_s2_mmu *mmu; 576 573 int *last_ran; 577 574 575 + if (is_protected_kvm_enabled()) 576 + goto nommu; 577 + 578 578 if (vcpu_has_nv(vcpu)) 579 579 kvm_vcpu_load_hw_mmu(vcpu); 580 580 ··· 598 592 *last_ran = vcpu->vcpu_idx; 599 593 } 600 594 595 + nommu: 601 596 vcpu->cpu = cpu; 602 597 603 598 kvm_vgic_load(vcpu); ··· 623 616 624 617 vcpu_set_pauth_traps(vcpu); 625 618 619 + if (is_protected_kvm_enabled()) { 620 + kvm_call_hyp_nvhe(__pkvm_vcpu_load, 621 + vcpu->kvm->arch.pkvm.handle, 622 + vcpu->vcpu_idx, vcpu->arch.hcr_el2); 623 + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, 624 + &vcpu->arch.vgic_cpu.vgic_v3); 625 + } 626 + 626 627 if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) 627 628 vcpu_set_on_unsupported_cpu(vcpu); 628 629 } 629 630 630 631 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 631 632 { 633 + if (is_protected_kvm_enabled()) { 634 + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, 635 + &vcpu->arch.vgic_cpu.vgic_v3); 636 + kvm_call_hyp_nvhe(__pkvm_vcpu_put); 637 + } 638 + 632 639 kvm_vcpu_put_debug(vcpu); 633 640 kvm_arch_vcpu_put_fp(vcpu); 634 641 if (has_vhe())
+3 -3
arch/arm64/kvm/hyp/include/nvhe/gfp.h
··· 7 7 #include <nvhe/memory.h> 8 8 #include <nvhe/spinlock.h> 9 9 10 - #define HYP_NO_ORDER USHRT_MAX 10 + #define HYP_NO_ORDER ((u8)(~0)) 11 11 12 12 struct hyp_pool { 13 13 /* ··· 19 19 struct list_head free_area[NR_PAGE_ORDERS]; 20 20 phys_addr_t range_start; 21 21 phys_addr_t range_end; 22 - unsigned short max_order; 22 + u8 max_order; 23 23 }; 24 24 25 25 /* Allocation */ 26 - void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); 26 + void *hyp_alloc_pages(struct hyp_pool *pool, u8 order); 27 27 void hyp_split_page(struct hyp_page *page); 28 28 void hyp_get_page(struct hyp_pool *pool, void *addr); 29 29 void hyp_put_page(struct hyp_pool *pool, void *addr);
+8 -31
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
··· 11 11 #include <asm/kvm_mmu.h> 12 12 #include <asm/kvm_pgtable.h> 13 13 #include <asm/virt.h> 14 + #include <nvhe/memory.h> 14 15 #include <nvhe/pkvm.h> 15 16 #include <nvhe/spinlock.h> 16 - 17 - /* 18 - * SW bits 0-1 are reserved to track the memory ownership state of each page: 19 - * 00: The page is owned exclusively by the page-table owner. 20 - * 01: The page is owned by the page-table owner, but is shared 21 - * with another entity. 22 - * 10: The page is shared with, but not owned by the page-table owner. 23 - * 11: Reserved for future use (lending). 24 - */ 25 - enum pkvm_page_state { 26 - PKVM_PAGE_OWNED = 0ULL, 27 - PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, 28 - PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, 29 - __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | 30 - KVM_PGTABLE_PROT_SW1, 31 - 32 - /* Meta-states which aren't encoded directly in the PTE's SW bits */ 33 - PKVM_NOPAGE, 34 - }; 35 - 36 - #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) 37 - static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, 38 - enum pkvm_page_state state) 39 - { 40 - return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; 41 - } 42 - 43 - static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) 44 - { 45 - return prot & PKVM_PAGE_STATE_PROT_MASK; 46 - } 47 17 48 18 struct host_mmu { 49 19 struct kvm_arch arch; ··· 39 69 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); 40 70 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages); 41 71 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); 72 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 73 + enum kvm_pgtable_prot prot); 74 + int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 75 + int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot); 76 + int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 77 + int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm); 78 + int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu); 42 79 43 80 bool addr_is_memory(phys_addr_t phys); 44 81 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
+47 -3
arch/arm64/kvm/hyp/include/nvhe/memory.h
··· 7 7 8 8 #include <linux/types.h> 9 9 10 + /* 11 + * Bits 0-1 are reserved to track the memory ownership state of each page: 12 + * 00: The page is owned exclusively by the page-table owner. 13 + * 01: The page is owned by the page-table owner, but is shared 14 + * with another entity. 15 + * 10: The page is shared with, but not owned by the page-table owner. 16 + * 11: Reserved for future use (lending). 17 + */ 18 + enum pkvm_page_state { 19 + PKVM_PAGE_OWNED = 0ULL, 20 + PKVM_PAGE_SHARED_OWNED = BIT(0), 21 + PKVM_PAGE_SHARED_BORROWED = BIT(1), 22 + __PKVM_PAGE_RESERVED = BIT(0) | BIT(1), 23 + 24 + /* Meta-states which aren't encoded directly in the PTE's SW bits */ 25 + PKVM_NOPAGE = BIT(2), 26 + }; 27 + #define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED) 28 + 29 + #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) 30 + static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, 31 + enum pkvm_page_state state) 32 + { 33 + prot &= ~PKVM_PAGE_STATE_PROT_MASK; 34 + prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state); 35 + return prot; 36 + } 37 + 38 + static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) 39 + { 40 + return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot); 41 + } 42 + 10 43 struct hyp_page { 11 - unsigned short refcount; 12 - unsigned short order; 44 + u16 refcount; 45 + u8 order; 46 + 47 + /* Host (non-meta) state. Guarded by the host stage-2 lock. */ 48 + enum pkvm_page_state host_state : 8; 49 + 50 + u32 host_share_guest_count; 13 51 }; 14 52 15 53 extern u64 __hyp_vmemmap; ··· 67 29 68 30 #define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) 69 31 #define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT)) 70 - #define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)]) 32 + 33 + static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys) 34 + { 35 + BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64)); 36 + return &hyp_vmemmap[hyp_phys_to_pfn(phys)]; 37 + } 38 + 71 39 #define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt)) 72 40 #define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt)) 73 41
+16
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
··· 20 20 21 21 /* Backpointer to the host's (untrusted) vCPU instance. */ 22 22 struct kvm_vcpu *host_vcpu; 23 + 24 + /* 25 + * If this hyp vCPU is loaded, then this is a backpointer to the 26 + * per-cpu pointer tracking us. Otherwise, NULL if not loaded. 27 + */ 28 + struct pkvm_hyp_vcpu **loaded_hyp_vcpu; 23 29 }; 24 30 25 31 /* ··· 64 58 return vcpu_is_protected(&hyp_vcpu->vcpu); 65 59 } 66 60 61 + static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) 62 + { 63 + return kvm_vm_is_protected(&hyp_vm->kvm); 64 + } 65 + 67 66 void pkvm_hyp_vm_table_init(void *tbl); 68 67 69 68 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, ··· 80 69 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, 81 70 unsigned int vcpu_idx); 82 71 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); 72 + struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void); 73 + 74 + struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle); 75 + struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle); 76 + void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm); 83 77 84 78 #endif /* __ARM64_KVM_NVHE_PKVM_H__ */
+190 -11
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 103 103 /* Limit guest vector length to the maximum supported by the host. */ 104 104 hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); 105 105 106 - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; 107 - 108 106 hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; 109 107 hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); 110 108 hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & ··· 137 139 host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; 138 140 } 139 141 142 + static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) 143 + { 144 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 145 + DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2); 146 + DECLARE_REG(u64, hcr_el2, host_ctxt, 3); 147 + struct pkvm_hyp_vcpu *hyp_vcpu; 148 + 149 + if (!is_protected_kvm_enabled()) 150 + return; 151 + 152 + hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx); 153 + if (!hyp_vcpu) 154 + return; 155 + 156 + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) { 157 + /* Propagate WFx trapping flags */ 158 + hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); 159 + hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); 160 + } 161 + } 162 + 163 + static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt) 164 + { 165 + struct pkvm_hyp_vcpu *hyp_vcpu; 166 + 167 + if (!is_protected_kvm_enabled()) 168 + return; 169 + 170 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 171 + if (hyp_vcpu) 172 + pkvm_put_hyp_vcpu(hyp_vcpu); 173 + } 174 + 140 175 static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) 141 176 { 142 177 DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); 143 178 int ret; 144 179 145 - host_vcpu = kern_hyp_va(host_vcpu); 146 - 147 180 if (unlikely(is_protected_kvm_enabled())) { 148 - struct pkvm_hyp_vcpu *hyp_vcpu; 149 - struct kvm *host_kvm; 181 + struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 150 182 151 183 /* 152 184 * KVM (and pKVM) doesn't support SME guests for now, and ··· 189 161 goto out; 190 162 } 191 163 192 - host_kvm = kern_hyp_va(host_vcpu->kvm); 193 - hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, 194 - host_vcpu->vcpu_idx); 195 164 if (!hyp_vcpu) { 196 165 ret = -EINVAL; 197 166 goto out; ··· 199 174 ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); 200 175 201 176 sync_hyp_vcpu(hyp_vcpu); 202 - pkvm_put_hyp_vcpu(hyp_vcpu); 203 177 } else { 204 178 /* The host is fully trusted, run its vCPU directly. */ 205 - ret = __kvm_vcpu_run(host_vcpu); 179 + ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); 206 180 } 181 + out: 182 + cpu_reg(host_ctxt, 1) = ret; 183 + } 207 184 185 + static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu) 186 + { 187 + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; 188 + 189 + return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache, 190 + host_vcpu->arch.pkvm_memcache.nr_pages, 191 + &host_vcpu->arch.pkvm_memcache); 192 + } 193 + 194 + static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt) 195 + { 196 + DECLARE_REG(u64, pfn, host_ctxt, 1); 197 + DECLARE_REG(u64, gfn, host_ctxt, 2); 198 + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); 199 + struct pkvm_hyp_vcpu *hyp_vcpu; 200 + int ret = -EINVAL; 201 + 202 + if (!is_protected_kvm_enabled()) 203 + goto out; 204 + 205 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 206 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 207 + goto out; 208 + 209 + ret = pkvm_refill_memcache(hyp_vcpu); 210 + if (ret) 211 + goto out; 212 + 213 + ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot); 214 + out: 215 + cpu_reg(host_ctxt, 1) = ret; 216 + } 217 + 218 + static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt) 219 + { 220 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 221 + DECLARE_REG(u64, gfn, host_ctxt, 2); 222 + struct pkvm_hyp_vm *hyp_vm; 223 + int ret = -EINVAL; 224 + 225 + if (!is_protected_kvm_enabled()) 226 + goto out; 227 + 228 + hyp_vm = get_np_pkvm_hyp_vm(handle); 229 + if (!hyp_vm) 230 + goto out; 231 + 232 + ret = __pkvm_host_unshare_guest(gfn, hyp_vm); 233 + put_pkvm_hyp_vm(hyp_vm); 234 + out: 235 + cpu_reg(host_ctxt, 1) = ret; 236 + } 237 + 238 + static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt) 239 + { 240 + DECLARE_REG(u64, gfn, host_ctxt, 1); 241 + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2); 242 + struct pkvm_hyp_vcpu *hyp_vcpu; 243 + int ret = -EINVAL; 244 + 245 + if (!is_protected_kvm_enabled()) 246 + goto out; 247 + 248 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 249 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 250 + goto out; 251 + 252 + ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot); 253 + out: 254 + cpu_reg(host_ctxt, 1) = ret; 255 + } 256 + 257 + static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt) 258 + { 259 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 260 + DECLARE_REG(u64, gfn, host_ctxt, 2); 261 + struct pkvm_hyp_vm *hyp_vm; 262 + int ret = -EINVAL; 263 + 264 + if (!is_protected_kvm_enabled()) 265 + goto out; 266 + 267 + hyp_vm = get_np_pkvm_hyp_vm(handle); 268 + if (!hyp_vm) 269 + goto out; 270 + 271 + ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm); 272 + put_pkvm_hyp_vm(hyp_vm); 273 + out: 274 + cpu_reg(host_ctxt, 1) = ret; 275 + } 276 + 277 + static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt) 278 + { 279 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 280 + DECLARE_REG(u64, gfn, host_ctxt, 2); 281 + DECLARE_REG(bool, mkold, host_ctxt, 3); 282 + struct pkvm_hyp_vm *hyp_vm; 283 + int ret = -EINVAL; 284 + 285 + if (!is_protected_kvm_enabled()) 286 + goto out; 287 + 288 + hyp_vm = get_np_pkvm_hyp_vm(handle); 289 + if (!hyp_vm) 290 + goto out; 291 + 292 + ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm); 293 + put_pkvm_hyp_vm(hyp_vm); 294 + out: 295 + cpu_reg(host_ctxt, 1) = ret; 296 + } 297 + 298 + static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt) 299 + { 300 + DECLARE_REG(u64, gfn, host_ctxt, 1); 301 + struct pkvm_hyp_vcpu *hyp_vcpu; 302 + int ret = -EINVAL; 303 + 304 + if (!is_protected_kvm_enabled()) 305 + goto out; 306 + 307 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 308 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 309 + goto out; 310 + 311 + ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu); 208 312 out: 209 313 cpu_reg(host_ctxt, 1) = ret; 210 314 } ··· 383 229 DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); 384 230 385 231 __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); 232 + } 233 + 234 + static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) 235 + { 236 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 237 + struct pkvm_hyp_vm *hyp_vm; 238 + 239 + if (!is_protected_kvm_enabled()) 240 + return; 241 + 242 + hyp_vm = get_np_pkvm_hyp_vm(handle); 243 + if (!hyp_vm) 244 + return; 245 + 246 + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); 247 + put_pkvm_hyp_vm(hyp_vm); 386 248 } 387 249 388 250 static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) ··· 557 387 558 388 HANDLE_FUNC(__pkvm_host_share_hyp), 559 389 HANDLE_FUNC(__pkvm_host_unshare_hyp), 390 + HANDLE_FUNC(__pkvm_host_share_guest), 391 + HANDLE_FUNC(__pkvm_host_unshare_guest), 392 + HANDLE_FUNC(__pkvm_host_relax_perms_guest), 393 + HANDLE_FUNC(__pkvm_host_wrprotect_guest), 394 + HANDLE_FUNC(__pkvm_host_test_clear_young_guest), 395 + HANDLE_FUNC(__pkvm_host_mkyoung_guest), 560 396 HANDLE_FUNC(__kvm_adjust_pc), 561 397 HANDLE_FUNC(__kvm_vcpu_run), 562 398 HANDLE_FUNC(__kvm_flush_vm_context), ··· 577 401 HANDLE_FUNC(__pkvm_init_vm), 578 402 HANDLE_FUNC(__pkvm_init_vcpu), 579 403 HANDLE_FUNC(__pkvm_teardown_vm), 404 + HANDLE_FUNC(__pkvm_vcpu_load), 405 + HANDLE_FUNC(__pkvm_vcpu_put), 406 + HANDLE_FUNC(__pkvm_tlb_flush_vmid), 580 407 }; 581 408 582 409 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+290 -30
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 201 201 202 202 memset(addr, 0, PAGE_SIZE); 203 203 p = hyp_virt_to_page(addr); 204 - memset(p, 0, sizeof(*p)); 205 204 p->refcount = 1; 205 + p->order = 0; 206 206 207 207 return addr; 208 208 } ··· 268 268 269 269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 270 { 271 + struct hyp_page *page; 271 272 void *addr; 272 273 273 274 /* Dump all pgtable pages in the hyp_pool */ ··· 280 279 /* Drain the hyp_pool into the memcache */ 281 280 addr = hyp_alloc_pages(&vm->pool, 0); 282 281 while (addr) { 283 - memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); 282 + page = hyp_virt_to_page(addr); 283 + page->refcount = 0; 284 + page->order = 0; 284 285 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 285 286 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 286 287 addr = hyp_alloc_pages(&vm->pool, 0); ··· 385 382 return !!find_mem_range(phys, &range); 386 383 } 387 384 388 - static bool addr_is_allowed_memory(phys_addr_t phys) 385 + static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 386 + { 387 + return range->start <= addr && addr < range->end; 388 + } 389 + 390 + static int check_range_allowed_memory(u64 start, u64 end) 389 391 { 390 392 struct memblock_region *reg; 391 393 struct kvm_mem_range range; 392 394 393 - reg = find_mem_range(phys, &range); 395 + /* 396 + * Callers can't check the state of a range that overlaps memory and 397 + * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. 398 + */ 399 + reg = find_mem_range(start, &range); 400 + if (!is_in_mem_range(end - 1, &range)) 401 + return -EINVAL; 394 402 395 - return reg && !(reg->flags & MEMBLOCK_NOMAP); 396 - } 403 + if (!reg || reg->flags & MEMBLOCK_NOMAP) 404 + return -EPERM; 397 405 398 - static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 399 - { 400 - return range->start <= addr && addr < range->end; 406 + return 0; 401 407 } 402 408 403 409 static bool range_is_memory(u64 start, u64 end) ··· 466 454 if (kvm_pte_valid(pte)) 467 455 return -EAGAIN; 468 456 469 - if (pte) 457 + if (pte) { 458 + WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); 470 459 return -EPERM; 460 + } 471 461 472 462 do { 473 463 u64 granule = kvm_granule_size(level); ··· 491 477 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 492 478 } 493 479 480 + static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) 481 + { 482 + phys_addr_t end = addr + size; 483 + 484 + for (; addr < end; addr += PAGE_SIZE) 485 + hyp_phys_to_page(addr)->host_state = state; 486 + } 487 + 494 488 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 495 489 { 496 - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 497 - addr, size, &host_s2_pool, owner_id); 490 + int ret; 491 + 492 + if (!addr_is_memory(addr)) 493 + return -EPERM; 494 + 495 + ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 496 + addr, size, &host_s2_pool, owner_id); 497 + if (ret) 498 + return ret; 499 + 500 + /* Don't forget to update the vmemmap tracking for the host */ 501 + if (owner_id == PKVM_ID_HOST) 502 + __host_update_page_state(addr, size, PKVM_PAGE_OWNED); 503 + else 504 + __host_update_page_state(addr, size, PKVM_NOPAGE); 505 + 506 + return 0; 498 507 } 499 508 500 509 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) ··· 641 604 return kvm_pgtable_walk(pgt, addr, size, &walker); 642 605 } 643 606 644 - static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) 645 - { 646 - if (!addr_is_allowed_memory(addr)) 647 - return PKVM_NOPAGE; 648 - 649 - if (!kvm_pte_valid(pte) && pte) 650 - return PKVM_NOPAGE; 651 - 652 - return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 653 - } 654 - 655 607 static int __host_check_page_state_range(u64 addr, u64 size, 656 608 enum pkvm_page_state state) 657 609 { 658 - struct check_walk_data d = { 659 - .desired = state, 660 - .get_page_state = host_get_page_state, 661 - }; 610 + u64 end = addr + size; 611 + int ret; 612 + 613 + ret = check_range_allowed_memory(addr, end); 614 + if (ret) 615 + return ret; 662 616 663 617 hyp_assert_lock_held(&host_mmu.lock); 664 - return check_page_state_range(&host_mmu.pgt, addr, size, &d); 618 + for (; addr < end; addr += PAGE_SIZE) { 619 + if (hyp_phys_to_page(addr)->host_state != state) 620 + return -EPERM; 621 + } 622 + 623 + return 0; 665 624 } 666 625 667 626 static int __host_set_page_state_range(u64 addr, u64 size, 668 627 enum pkvm_page_state state) 669 628 { 670 - enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); 629 + if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { 630 + int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); 671 631 672 - return host_stage2_idmap_locked(addr, size, prot); 632 + if (ret) 633 + return ret; 634 + } 635 + 636 + __host_update_page_state(addr, size, state); 637 + 638 + return 0; 673 639 } 674 640 675 641 static int host_request_owned_transition(u64 *completer_addr, ··· 865 825 enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 866 826 867 827 return pkvm_create_mappings_locked(start, end, prot); 828 + } 829 + 830 + static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) 831 + { 832 + if (!kvm_pte_valid(pte)) 833 + return PKVM_NOPAGE; 834 + 835 + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 836 + } 837 + 838 + static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, 839 + u64 size, enum pkvm_page_state state) 840 + { 841 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 842 + struct check_walk_data d = { 843 + .desired = state, 844 + .get_page_state = guest_get_page_state, 845 + }; 846 + 847 + hyp_assert_lock_held(&vm->lock); 848 + return check_page_state_range(&vm->pgt, addr, size, &d); 868 849 } 869 850 870 851 static int check_share(struct pkvm_mem_share *share) ··· 1366 1305 1367 1306 host_lock_component(); 1368 1307 ret = do_unshare(&share); 1308 + host_unlock_component(); 1309 + 1310 + return ret; 1311 + } 1312 + 1313 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 1314 + enum kvm_pgtable_prot prot) 1315 + { 1316 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1317 + u64 phys = hyp_pfn_to_phys(pfn); 1318 + u64 ipa = hyp_pfn_to_phys(gfn); 1319 + struct hyp_page *page; 1320 + int ret; 1321 + 1322 + if (prot & ~KVM_PGTABLE_PROT_RWX) 1323 + return -EINVAL; 1324 + 1325 + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 1326 + if (ret) 1327 + return ret; 1328 + 1329 + host_lock_component(); 1330 + guest_lock_component(vm); 1331 + 1332 + ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); 1333 + if (ret) 1334 + goto unlock; 1335 + 1336 + page = hyp_phys_to_page(phys); 1337 + switch (page->host_state) { 1338 + case PKVM_PAGE_OWNED: 1339 + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); 1340 + break; 1341 + case PKVM_PAGE_SHARED_OWNED: 1342 + if (page->host_share_guest_count) 1343 + break; 1344 + /* Only host to np-guest multi-sharing is tolerated */ 1345 + WARN_ON(1); 1346 + fallthrough; 1347 + default: 1348 + ret = -EPERM; 1349 + goto unlock; 1350 + } 1351 + 1352 + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, 1353 + pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), 1354 + &vcpu->vcpu.arch.pkvm_memcache, 0)); 1355 + page->host_share_guest_count++; 1356 + 1357 + unlock: 1358 + guest_unlock_component(vm); 1359 + host_unlock_component(); 1360 + 1361 + return ret; 1362 + } 1363 + 1364 + static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) 1365 + { 1366 + enum pkvm_page_state state; 1367 + struct hyp_page *page; 1368 + kvm_pte_t pte; 1369 + u64 phys; 1370 + s8 level; 1371 + int ret; 1372 + 1373 + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); 1374 + if (ret) 1375 + return ret; 1376 + if (level != KVM_PGTABLE_LAST_LEVEL) 1377 + return -E2BIG; 1378 + if (!kvm_pte_valid(pte)) 1379 + return -ENOENT; 1380 + 1381 + state = guest_get_page_state(pte, ipa); 1382 + if (state != PKVM_PAGE_SHARED_BORROWED) 1383 + return -EPERM; 1384 + 1385 + phys = kvm_pte_to_phys(pte); 1386 + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 1387 + if (WARN_ON(ret)) 1388 + return ret; 1389 + 1390 + page = hyp_phys_to_page(phys); 1391 + if (page->host_state != PKVM_PAGE_SHARED_OWNED) 1392 + return -EPERM; 1393 + if (WARN_ON(!page->host_share_guest_count)) 1394 + return -EINVAL; 1395 + 1396 + *__phys = phys; 1397 + 1398 + return 0; 1399 + } 1400 + 1401 + int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1402 + { 1403 + u64 ipa = hyp_pfn_to_phys(gfn); 1404 + struct hyp_page *page; 1405 + u64 phys; 1406 + int ret; 1407 + 1408 + host_lock_component(); 1409 + guest_lock_component(vm); 1410 + 1411 + ret = __check_host_shared_guest(vm, &phys, ipa); 1412 + if (ret) 1413 + goto unlock; 1414 + 1415 + ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); 1416 + if (ret) 1417 + goto unlock; 1418 + 1419 + page = hyp_phys_to_page(phys); 1420 + page->host_share_guest_count--; 1421 + if (!page->host_share_guest_count) 1422 + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); 1423 + 1424 + unlock: 1425 + guest_unlock_component(vm); 1426 + host_unlock_component(); 1427 + 1428 + return ret; 1429 + } 1430 + 1431 + int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) 1432 + { 1433 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1434 + u64 ipa = hyp_pfn_to_phys(gfn); 1435 + u64 phys; 1436 + int ret; 1437 + 1438 + if (prot & ~KVM_PGTABLE_PROT_RWX) 1439 + return -EINVAL; 1440 + 1441 + host_lock_component(); 1442 + guest_lock_component(vm); 1443 + 1444 + ret = __check_host_shared_guest(vm, &phys, ipa); 1445 + if (!ret) 1446 + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1447 + 1448 + guest_unlock_component(vm); 1449 + host_unlock_component(); 1450 + 1451 + return ret; 1452 + } 1453 + 1454 + int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1455 + { 1456 + u64 ipa = hyp_pfn_to_phys(gfn); 1457 + u64 phys; 1458 + int ret; 1459 + 1460 + host_lock_component(); 1461 + guest_lock_component(vm); 1462 + 1463 + ret = __check_host_shared_guest(vm, &phys, ipa); 1464 + if (!ret) 1465 + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); 1466 + 1467 + guest_unlock_component(vm); 1468 + host_unlock_component(); 1469 + 1470 + return ret; 1471 + } 1472 + 1473 + int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) 1474 + { 1475 + u64 ipa = hyp_pfn_to_phys(gfn); 1476 + u64 phys; 1477 + int ret; 1478 + 1479 + host_lock_component(); 1480 + guest_lock_component(vm); 1481 + 1482 + ret = __check_host_shared_guest(vm, &phys, ipa); 1483 + if (!ret) 1484 + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); 1485 + 1486 + guest_unlock_component(vm); 1487 + host_unlock_component(); 1488 + 1489 + return ret; 1490 + } 1491 + 1492 + int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) 1493 + { 1494 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1495 + u64 ipa = hyp_pfn_to_phys(gfn); 1496 + u64 phys; 1497 + int ret; 1498 + 1499 + host_lock_component(); 1500 + guest_lock_component(vm); 1501 + 1502 + ret = __check_host_shared_guest(vm, &phys, ipa); 1503 + if (!ret) 1504 + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1505 + 1506 + guest_unlock_component(vm); 1369 1507 host_unlock_component(); 1370 1508 1371 1509 return ret;
+7 -7
arch/arm64/kvm/hyp/nvhe/page_alloc.c
··· 32 32 */ 33 33 static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, 34 34 struct hyp_page *p, 35 - unsigned short order) 35 + u8 order) 36 36 { 37 37 phys_addr_t addr = hyp_page_to_phys(p); 38 38 ··· 51 51 /* Find a buddy page currently available for allocation */ 52 52 static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, 53 53 struct hyp_page *p, 54 - unsigned short order) 54 + u8 order) 55 55 { 56 56 struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); 57 57 ··· 94 94 struct hyp_page *p) 95 95 { 96 96 phys_addr_t phys = hyp_page_to_phys(p); 97 - unsigned short order = p->order; 97 + u8 order = p->order; 98 98 struct hyp_page *buddy; 99 99 100 100 memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); ··· 129 129 130 130 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, 131 131 struct hyp_page *p, 132 - unsigned short order) 132 + u8 order) 133 133 { 134 134 struct hyp_page *buddy; 135 135 ··· 183 183 184 184 void hyp_split_page(struct hyp_page *p) 185 185 { 186 - unsigned short order = p->order; 186 + u8 order = p->order; 187 187 unsigned int i; 188 188 189 189 p->order = 0; ··· 195 195 } 196 196 } 197 197 198 - void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) 198 + void *hyp_alloc_pages(struct hyp_pool *pool, u8 order) 199 199 { 200 - unsigned short i = order; 201 200 struct hyp_page *p; 201 + u8 i = order; 202 202 203 203 hyp_spin_lock(&pool->lock); 204 204
+69
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 24 24 unsigned int kvm_host_sve_max_vl; 25 25 26 26 /* 27 + * The currently loaded hyp vCPU for each physical CPU. Used only when 28 + * protected KVM is enabled, but for both protected and non-protected VMs. 29 + */ 30 + static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); 31 + 32 + /* 27 33 * Set trap register values based on features in ID_AA64PFR0. 28 34 */ 29 35 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) ··· 312 306 struct pkvm_hyp_vcpu *hyp_vcpu = NULL; 313 307 struct pkvm_hyp_vm *hyp_vm; 314 308 309 + /* Cannot load a new vcpu without putting the old one first. */ 310 + if (__this_cpu_read(loaded_hyp_vcpu)) 311 + return NULL; 312 + 315 313 hyp_spin_lock(&vm_table_lock); 316 314 hyp_vm = get_vm_by_handle(handle); 317 315 if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) 318 316 goto unlock; 319 317 320 318 hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; 319 + 320 + /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ 321 + if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) { 322 + hyp_vcpu = NULL; 323 + goto unlock; 324 + } 325 + 326 + hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu); 321 327 hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); 322 328 unlock: 323 329 hyp_spin_unlock(&vm_table_lock); 330 + 331 + if (hyp_vcpu) 332 + __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu); 324 333 return hyp_vcpu; 325 334 } 326 335 ··· 344 323 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); 345 324 346 325 hyp_spin_lock(&vm_table_lock); 326 + hyp_vcpu->loaded_hyp_vcpu = NULL; 327 + __this_cpu_write(loaded_hyp_vcpu, NULL); 347 328 hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); 348 329 hyp_spin_unlock(&vm_table_lock); 330 + } 331 + 332 + struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void) 333 + { 334 + return __this_cpu_read(loaded_hyp_vcpu); 335 + 336 + } 337 + 338 + struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle) 339 + { 340 + struct pkvm_hyp_vm *hyp_vm; 341 + 342 + hyp_spin_lock(&vm_table_lock); 343 + hyp_vm = get_vm_by_handle(handle); 344 + if (hyp_vm) 345 + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); 346 + hyp_spin_unlock(&vm_table_lock); 347 + 348 + return hyp_vm; 349 + } 350 + 351 + void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm) 352 + { 353 + hyp_spin_lock(&vm_table_lock); 354 + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); 355 + hyp_spin_unlock(&vm_table_lock); 356 + } 357 + 358 + struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle) 359 + { 360 + struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle); 361 + 362 + if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) { 363 + put_pkvm_hyp_vm(hyp_vm); 364 + hyp_vm = NULL; 365 + } 366 + 367 + return hyp_vm; 349 368 } 350 369 351 370 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm) ··· 807 746 /* Push the metadata pages to the teardown memcache */ 808 747 for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { 809 748 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; 749 + struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; 750 + 751 + while (vcpu_mc->nr_pages) { 752 + void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt); 753 + 754 + push_hyp_memcache(mc, addr, hyp_virt_to_phys); 755 + unmap_donated_memory_noclear(addr, PAGE_SIZE); 756 + } 810 757 811 758 teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); 812 759 }
+3 -4
arch/arm64/kvm/hyp/nvhe/setup.c
··· 180 180 static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, 181 181 enum kvm_pgtable_walk_flags visit) 182 182 { 183 - enum kvm_pgtable_prot prot; 184 183 enum pkvm_page_state state; 185 184 phys_addr_t phys; 186 185 ··· 202 203 case PKVM_PAGE_OWNED: 203 204 return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); 204 205 case PKVM_PAGE_SHARED_OWNED: 205 - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); 206 + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED; 206 207 break; 207 208 case PKVM_PAGE_SHARED_BORROWED: 208 - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); 209 + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED; 209 210 break; 210 211 default: 211 212 return -EINVAL; 212 213 } 213 214 214 - return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); 215 + return 0; 215 216 } 216 217 217 218 static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
+5 -8
arch/arm64/kvm/hyp/pgtable.c
··· 1245 1245 NULL, NULL, 0); 1246 1246 } 1247 1247 1248 - void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 1248 + void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 1249 + enum kvm_pgtable_walk_flags flags) 1249 1250 { 1250 1251 int ret; 1251 1252 1252 1253 ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 1253 - NULL, NULL, 1254 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1255 - KVM_PGTABLE_WALK_SHARED); 1254 + NULL, NULL, flags); 1256 1255 if (!ret) 1257 1256 dsb(ishst); 1258 1257 } ··· 1307 1308 } 1308 1309 1309 1310 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 1310 - enum kvm_pgtable_prot prot) 1311 + enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) 1311 1312 { 1312 1313 int ret; 1313 1314 s8 level; ··· 1325 1326 if (prot & KVM_PGTABLE_PROT_X) 1326 1327 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 1327 1328 1328 - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, 1329 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1330 - KVM_PGTABLE_WALK_SHARED); 1329 + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags); 1331 1330 if (!ret || ret == -EAGAIN) 1332 1331 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); 1333 1332 return ret;
+61 -32
arch/arm64/kvm/mmu.c
··· 15 15 #include <asm/kvm_arm.h> 16 16 #include <asm/kvm_mmu.h> 17 17 #include <asm/kvm_pgtable.h> 18 + #include <asm/kvm_pkvm.h> 18 19 #include <asm/kvm_ras.h> 19 20 #include <asm/kvm_asm.h> 20 21 #include <asm/kvm_emulate.h> ··· 31 30 static phys_addr_t __ro_after_init hyp_idmap_vector; 32 31 33 32 static unsigned long __ro_after_init io_map_base; 33 + 34 + #define KVM_PGT_FN(fn) (!is_protected_kvm_enabled() ? fn : p ## fn) 34 35 35 36 static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, 36 37 phys_addr_t size) ··· 150 147 return -EINVAL; 151 148 152 149 next = __stage2_range_addr_end(addr, end, chunk_size); 153 - ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache); 150 + ret = KVM_PGT_FN(kvm_pgtable_stage2_split)(pgt, addr, next - addr, cache); 154 151 if (ret) 155 152 break; 156 153 } while (addr = next, addr != end); ··· 171 168 */ 172 169 int kvm_arch_flush_remote_tlbs(struct kvm *kvm) 173 170 { 174 - kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 171 + if (is_protected_kvm_enabled()) 172 + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); 173 + else 174 + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 175 175 return 0; 176 176 } 177 177 178 178 int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, 179 179 gfn_t gfn, u64 nr_pages) 180 180 { 181 - kvm_tlb_flush_vmid_range(&kvm->arch.mmu, 182 - gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); 181 + u64 size = nr_pages << PAGE_SHIFT; 182 + u64 addr = gfn << PAGE_SHIFT; 183 + 184 + if (is_protected_kvm_enabled()) 185 + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); 186 + else 187 + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size); 183 188 return 0; 184 189 } 185 190 ··· 236 225 void *pgtable = page_to_virt(page); 237 226 s8 level = page_private(page); 238 227 239 - kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); 228 + KVM_PGT_FN(kvm_pgtable_stage2_free_unlinked)(&kvm_s2_mm_ops, pgtable, level); 240 229 } 241 230 242 231 static void stage2_free_unlinked_table(void *addr, s8 level) ··· 335 324 336 325 lockdep_assert_held_write(&kvm->mmu_lock); 337 326 WARN_ON(size & ~PAGE_MASK); 338 - WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, 327 + WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap), 339 328 may_block)); 340 329 } 341 330 ··· 347 336 348 337 void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 349 338 { 350 - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush); 339 + stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_flush)); 351 340 } 352 341 353 342 static void stage2_flush_memslot(struct kvm *kvm, ··· 953 942 return -ENOMEM; 954 943 955 944 mmu->arch = &kvm->arch; 956 - err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops); 945 + err = KVM_PGT_FN(kvm_pgtable_stage2_init)(pgt, mmu, &kvm_s2_mm_ops); 957 946 if (err) 958 947 goto out_free_pgtable; 948 + 949 + mmu->pgt = pgt; 950 + if (is_protected_kvm_enabled()) 951 + return 0; 959 952 960 953 mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); 961 954 if (!mmu->last_vcpu_ran) { ··· 974 959 mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; 975 960 mmu->split_page_cache.gfp_zero = __GFP_ZERO; 976 961 977 - mmu->pgt = pgt; 978 962 mmu->pgd_phys = __pa(pgt->pgd); 979 963 980 964 if (kvm_is_nested_s2_mmu(kvm, mmu)) ··· 982 968 return 0; 983 969 984 970 out_destroy_pgtable: 985 - kvm_pgtable_stage2_destroy(pgt); 971 + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 986 972 out_free_pgtable: 987 973 kfree(pgt); 988 974 return err; ··· 1079 1065 write_unlock(&kvm->mmu_lock); 1080 1066 1081 1067 if (pgt) { 1082 - kvm_pgtable_stage2_destroy(pgt); 1068 + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 1083 1069 kfree(pgt); 1084 1070 } 1085 1071 } ··· 1096 1082 1097 1083 void free_hyp_memcache(struct kvm_hyp_memcache *mc) 1098 1084 { 1099 - if (is_protected_kvm_enabled()) 1100 - __free_hyp_memcache(mc, hyp_mc_free_fn, 1101 - kvm_host_va, NULL); 1085 + if (!is_protected_kvm_enabled()) 1086 + return; 1087 + 1088 + kfree(mc->mapping); 1089 + __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL); 1102 1090 } 1103 1091 1104 1092 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) 1105 1093 { 1106 1094 if (!is_protected_kvm_enabled()) 1107 1095 return 0; 1096 + 1097 + if (!mc->mapping) { 1098 + mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT); 1099 + if (!mc->mapping) 1100 + return -ENOMEM; 1101 + } 1108 1102 1109 1103 return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, 1110 1104 kvm_host_pa, NULL); ··· 1152 1130 break; 1153 1131 1154 1132 write_lock(&kvm->mmu_lock); 1155 - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, 1156 - &cache, 0); 1133 + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, addr, PAGE_SIZE, 1134 + pa, prot, &cache, 0); 1157 1135 write_unlock(&kvm->mmu_lock); 1158 1136 if (ret) 1159 1137 break; ··· 1173 1151 */ 1174 1152 void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 1175 1153 { 1176 - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); 1154 + stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_wrprotect)); 1177 1155 } 1178 1156 1179 1157 /** ··· 1464 1442 unsigned long mmu_seq; 1465 1443 phys_addr_t ipa = fault_ipa; 1466 1444 struct kvm *kvm = vcpu->kvm; 1467 - struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 1468 1445 struct vm_area_struct *vma; 1469 1446 short vma_shift; 1447 + void *memcache; 1470 1448 gfn_t gfn; 1471 1449 kvm_pfn_t pfn; 1472 1450 bool logging_active = memslot_is_logging(memslot); ··· 1474 1452 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; 1475 1453 struct kvm_pgtable *pgt; 1476 1454 struct page *page; 1455 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; 1477 1456 1478 1457 if (fault_is_perm) 1479 1458 fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); ··· 1494 1471 * and a write fault needs to collapse a block entry into a table. 1495 1472 */ 1496 1473 if (!fault_is_perm || (logging_active && write_fault)) { 1497 - ret = kvm_mmu_topup_memory_cache(memcache, 1498 - kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); 1474 + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); 1475 + 1476 + if (!is_protected_kvm_enabled()) { 1477 + memcache = &vcpu->arch.mmu_page_cache; 1478 + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); 1479 + } else { 1480 + memcache = &vcpu->arch.pkvm_memcache; 1481 + ret = topup_hyp_memcache(memcache, min_pages); 1482 + } 1499 1483 if (ret) 1500 1484 return ret; 1501 1485 } ··· 1523 1493 * logging_active is guaranteed to never be true for VM_PFNMAP 1524 1494 * memslots. 1525 1495 */ 1526 - if (logging_active) { 1496 + if (logging_active || is_protected_kvm_enabled()) { 1527 1497 force_pte = true; 1528 1498 vma_shift = PAGE_SHIFT; 1529 1499 } else { ··· 1663 1633 prot |= kvm_encode_nested_level(nested); 1664 1634 } 1665 1635 1666 - read_lock(&kvm->mmu_lock); 1636 + kvm_fault_lock(kvm); 1667 1637 pgt = vcpu->arch.hw_mmu->pgt; 1668 1638 if (mmu_invalidate_retry(kvm, mmu_seq)) { 1669 1639 ret = -EAGAIN; ··· 1725 1695 * PTE, which will be preserved. 1726 1696 */ 1727 1697 prot &= ~KVM_NV_GUEST_MAP_SZ; 1728 - ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); 1698 + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags); 1729 1699 } else { 1730 - ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, 1700 + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize, 1731 1701 __pfn_to_phys(pfn), prot, 1732 - memcache, 1733 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1734 - KVM_PGTABLE_WALK_SHARED); 1702 + memcache, flags); 1735 1703 } 1736 1704 1737 1705 out_unlock: 1738 1706 kvm_release_faultin_page(kvm, page, !!ret, writable); 1739 - read_unlock(&kvm->mmu_lock); 1707 + kvm_fault_unlock(kvm); 1740 1708 1741 1709 /* Mark the page dirty only if the fault is handled successfully */ 1742 1710 if (writable && !ret) ··· 1746 1718 /* Resolve the access fault by making the page young again. */ 1747 1719 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1748 1720 { 1721 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; 1749 1722 struct kvm_s2_mmu *mmu; 1750 1723 1751 1724 trace_kvm_access_fault(fault_ipa); 1752 1725 1753 1726 read_lock(&vcpu->kvm->mmu_lock); 1754 1727 mmu = vcpu->arch.hw_mmu; 1755 - kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); 1728 + KVM_PGT_FN(kvm_pgtable_stage2_mkyoung)(mmu->pgt, fault_ipa, flags); 1756 1729 read_unlock(&vcpu->kvm->mmu_lock); 1757 1730 } 1758 1731 ··· 1793 1764 } 1794 1765 1795 1766 /* Falls between the IPA range and the PARange? */ 1796 - if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { 1767 + if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { 1797 1768 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); 1798 1769 1799 1770 if (is_iabt) ··· 1959 1930 if (!kvm->arch.mmu.pgt) 1960 1931 return false; 1961 1932 1962 - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, 1933 + return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt, 1963 1934 range->start << PAGE_SHIFT, 1964 1935 size, true); 1965 1936 /* ··· 1975 1946 if (!kvm->arch.mmu.pgt) 1976 1947 return false; 1977 1948 1978 - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, 1949 + return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt, 1979 1950 range->start << PAGE_SHIFT, 1980 1951 size, false); 1981 1952 }
+201
arch/arm64/kvm/pkvm.c
··· 7 7 #include <linux/init.h> 8 8 #include <linux/kmemleak.h> 9 9 #include <linux/kvm_host.h> 10 + #include <asm/kvm_mmu.h> 10 11 #include <linux/memblock.h> 11 12 #include <linux/mutex.h> 12 13 #include <linux/sort.h> ··· 269 268 return ret; 270 269 } 271 270 device_initcall_sync(finalize_pkvm); 271 + 272 + static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) 273 + { 274 + struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); 275 + struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node); 276 + 277 + if (a->gfn < b->gfn) 278 + return -1; 279 + if (a->gfn > b->gfn) 280 + return 1; 281 + return 0; 282 + } 283 + 284 + static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) 285 + { 286 + struct rb_node *node = root->rb_node, *prev = NULL; 287 + struct pkvm_mapping *mapping; 288 + 289 + while (node) { 290 + mapping = rb_entry(node, struct pkvm_mapping, node); 291 + if (mapping->gfn == gfn) 292 + return node; 293 + prev = node; 294 + node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right; 295 + } 296 + 297 + return prev; 298 + } 299 + 300 + /* 301 + * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing 302 + * of __map inline. 303 + */ 304 + #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ 305 + for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \ 306 + ((__start) >> PAGE_SHIFT)); \ 307 + __tmp && ({ \ 308 + __map = rb_entry(__tmp, struct pkvm_mapping, node); \ 309 + __tmp = rb_next(__tmp); \ 310 + true; \ 311 + }); \ 312 + ) \ 313 + if (__map->gfn < ((__start) >> PAGE_SHIFT)) \ 314 + continue; \ 315 + else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \ 316 + break; \ 317 + else 318 + 319 + int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 320 + struct kvm_pgtable_mm_ops *mm_ops) 321 + { 322 + pgt->pkvm_mappings = RB_ROOT; 323 + pgt->mmu = mmu; 324 + 325 + return 0; 326 + } 327 + 328 + void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 329 + { 330 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 331 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 332 + struct pkvm_mapping *mapping; 333 + struct rb_node *node; 334 + 335 + if (!handle) 336 + return; 337 + 338 + node = rb_first(&pgt->pkvm_mappings); 339 + while (node) { 340 + mapping = rb_entry(node, struct pkvm_mapping, node); 341 + kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 342 + node = rb_next(node); 343 + rb_erase(&mapping->node, &pgt->pkvm_mappings); 344 + kfree(mapping); 345 + } 346 + } 347 + 348 + int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 349 + u64 phys, enum kvm_pgtable_prot prot, 350 + void *mc, enum kvm_pgtable_walk_flags flags) 351 + { 352 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 353 + struct pkvm_mapping *mapping = NULL; 354 + struct kvm_hyp_memcache *cache = mc; 355 + u64 gfn = addr >> PAGE_SHIFT; 356 + u64 pfn = phys >> PAGE_SHIFT; 357 + int ret; 358 + 359 + if (size != PAGE_SIZE) 360 + return -EINVAL; 361 + 362 + lockdep_assert_held_write(&kvm->mmu_lock); 363 + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot); 364 + if (ret) { 365 + /* Is the gfn already mapped due to a racing vCPU? */ 366 + if (ret == -EPERM) 367 + return -EAGAIN; 368 + } 369 + 370 + swap(mapping, cache->mapping); 371 + mapping->gfn = gfn; 372 + mapping->pfn = pfn; 373 + WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings)); 374 + 375 + return ret; 376 + } 377 + 378 + int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 379 + { 380 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 381 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 382 + struct pkvm_mapping *mapping; 383 + int ret = 0; 384 + 385 + lockdep_assert_held_write(&kvm->mmu_lock); 386 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 387 + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 388 + if (WARN_ON(ret)) 389 + break; 390 + rb_erase(&mapping->node, &pgt->pkvm_mappings); 391 + kfree(mapping); 392 + } 393 + 394 + return ret; 395 + } 396 + 397 + int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 398 + { 399 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 400 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 401 + struct pkvm_mapping *mapping; 402 + int ret = 0; 403 + 404 + lockdep_assert_held(&kvm->mmu_lock); 405 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 406 + ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn); 407 + if (WARN_ON(ret)) 408 + break; 409 + } 410 + 411 + return ret; 412 + } 413 + 414 + int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 415 + { 416 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 417 + struct pkvm_mapping *mapping; 418 + 419 + lockdep_assert_held(&kvm->mmu_lock); 420 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 421 + __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE); 422 + 423 + return 0; 424 + } 425 + 426 + bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) 427 + { 428 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 429 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 430 + struct pkvm_mapping *mapping; 431 + bool young = false; 432 + 433 + lockdep_assert_held(&kvm->mmu_lock); 434 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 435 + young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, 436 + mkold); 437 + 438 + return young; 439 + } 440 + 441 + int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 442 + enum kvm_pgtable_walk_flags flags) 443 + { 444 + return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); 445 + } 446 + 447 + void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 448 + enum kvm_pgtable_walk_flags flags) 449 + { 450 + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); 451 + } 452 + 453 + void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) 454 + { 455 + WARN_ON_ONCE(1); 456 + } 457 + 458 + kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 459 + enum kvm_pgtable_prot prot, void *mc, bool force_pte) 460 + { 461 + WARN_ON_ONCE(1); 462 + return NULL; 463 + } 464 + 465 + int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 466 + struct kvm_mmu_memory_cache *mc) 467 + { 468 + WARN_ON_ONCE(1); 469 + return -EINVAL; 470 + }
+4 -2
arch/arm64/kvm/vgic/vgic-v3.c
··· 734 734 { 735 735 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 736 736 737 - kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); 737 + if (likely(!is_protected_kvm_enabled())) 738 + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); 738 739 739 740 if (has_vhe()) 740 741 __vgic_v3_activate_traps(cpu_if); ··· 747 746 { 748 747 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 749 748 750 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 749 + if (likely(!is_protected_kvm_enabled())) 750 + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 751 751 WARN_ON(vgic_v4_put(vcpu)); 752 752 753 753 if (has_vhe())