Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Use LPA2 page-tables for stage2 and hyp stage1

Implement a simple policy whereby if the HW supports FEAT_LPA2 for the
page size we are using, always use LPA2-style page-tables for stage 2
and hyp stage 1 (assuming an nvhe hyp), regardless of the VMM-requested
IPA size or HW-implemented PA size. When in use we can now support up to
52-bit IPA and PA sizes.

We use the previously created cpu feature to track whether LPA2 is
supported for deciding whether to use the LPA2 or classic pte format.

Note that FEAT_LPA2 brings support for bigger block mappings (512GB with
4KB, 64GB with 16KB). We explicitly don't enable these in the library
because stage2_apply_range() works on batch sizes of the largest used
block mapping, and increasing the size of the batch would lead to soft
lockups. See commit 5994bc9e05c2 ("KVM: arm64: Limit
stage2_apply_range() batch size to largest block").

With the addition of LPA2 support in the hypervisor, the PA size
supported by the HW must be capped with a runtime decision, rather than
simply using a compile-time decision based on PA_BITS. For example, on a
system that advertises 52 bit PA but does not support FEAT_LPA2, A 4KB
or 16KB kernel compiled with LPA2 support must still limit the PA size
to 48 bits.

Therefore, move the insertion of the PS field into TCR_EL2 out of
__kvm_hyp_init assembly code and instead do it in cpu_prepare_hyp_mode()
where the rest of TCR_EL2 is prepared. This allows us to figure out PS
with kvm_get_parange(), which has the appropriate logic to ensure the
above requirement. (and the PS field of VTCR_EL2 is already populated
this way).

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231127111737.1897081-8-ryan.roberts@arm.com

authored by

Ryan Roberts and committed by
Marc Zyngier
bd412e2a d4fbbb26

+52 -17
+35 -10
arch/arm64/include/asm/kvm_pgtable.h
··· 25 25 #define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U 26 26 #endif 27 27 28 - #define kvm_lpa2_is_enabled() false 28 + #define kvm_lpa2_is_enabled() system_supports_lpa2() 29 + 30 + static inline u64 kvm_get_parange_max(void) 31 + { 32 + if (kvm_lpa2_is_enabled() || 33 + (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16)) 34 + return ID_AA64MMFR0_EL1_PARANGE_52; 35 + else 36 + return ID_AA64MMFR0_EL1_PARANGE_48; 37 + } 29 38 30 39 static inline u64 kvm_get_parange(u64 mmfr0) 31 40 { 41 + u64 parange_max = kvm_get_parange_max(); 32 42 u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, 33 43 ID_AA64MMFR0_EL1_PARANGE_SHIFT); 34 - if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX) 35 - parange = ID_AA64MMFR0_EL1_PARANGE_MAX; 44 + if (parange > parange_max) 45 + parange = parange_max; 36 46 37 47 return parange; 38 48 } ··· 53 43 54 44 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) 55 45 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) 46 + #define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT) 47 + #define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8) 56 48 57 49 #define KVM_PHYS_INVALID (-1ULL) 58 50 ··· 65 53 66 54 static inline u64 kvm_pte_to_phys(kvm_pte_t pte) 67 55 { 68 - u64 pa = pte & KVM_PTE_ADDR_MASK; 56 + u64 pa; 69 57 70 - if (PAGE_SHIFT == 16) 71 - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 58 + if (kvm_lpa2_is_enabled()) { 59 + pa = pte & KVM_PTE_ADDR_MASK_LPA2; 60 + pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50; 61 + } else { 62 + pa = pte & KVM_PTE_ADDR_MASK; 63 + if (PAGE_SHIFT == 16) 64 + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 65 + } 72 66 73 67 return pa; 74 68 } 75 69 76 70 static inline kvm_pte_t kvm_phys_to_pte(u64 pa) 77 71 { 78 - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; 72 + kvm_pte_t pte; 79 73 80 - if (PAGE_SHIFT == 16) { 81 - pa &= GENMASK(51, 48); 82 - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 74 + if (kvm_lpa2_is_enabled()) { 75 + pte = pa & KVM_PTE_ADDR_MASK_LPA2; 76 + pa &= GENMASK(51, 50); 77 + pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50); 78 + } else { 79 + pte = pa & KVM_PTE_ADDR_MASK; 80 + if (PAGE_SHIFT == 16) { 81 + pa &= GENMASK(51, 48); 82 + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 83 + } 83 84 } 84 85 85 86 return pte;
+5
arch/arm64/kvm/arm.c
··· 1837 1837 static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) 1838 1838 { 1839 1839 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 1840 + u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1840 1841 unsigned long tcr; 1841 1842 1842 1843 /* ··· 1860 1859 } 1861 1860 tcr &= ~TCR_T0SZ_MASK; 1862 1861 tcr |= TCR_T0SZ(hyp_va_bits); 1862 + tcr &= ~TCR_EL2_PS_MASK; 1863 + tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0)); 1864 + if (kvm_lpa2_is_enabled()) 1865 + tcr |= TCR_EL2_DS; 1863 1866 params->tcr_el2 = tcr; 1864 1867 1865 1868 params->pgd_pa = kvm_mmu_get_httbr();
-4
arch/arm64/kvm/hyp/nvhe/hyp-init.S
··· 122 122 alternative_else_nop_endif 123 123 msr ttbr0_el2, x2 124 124 125 - /* 126 - * Set the PS bits in TCR_EL2. 127 - */ 128 125 ldr x0, [x0, #NVHE_INIT_TCR_EL2] 129 - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 130 126 msr tcr_el2, x0 131 127 132 128 isb
+12 -3
arch/arm64/kvm/hyp/pgtable.c
··· 79 79 80 80 static bool kvm_phys_is_valid(u64 phys) 81 81 { 82 - return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); 82 + u64 parange_max = kvm_get_parange_max(); 83 + u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max); 84 + 85 + return phys < BIT(shift); 83 86 } 84 87 85 88 static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) ··· 411 408 } 412 409 413 410 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 414 - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 411 + if (!kvm_lpa2_is_enabled()) 412 + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 415 413 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 416 414 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; 417 415 *ptep = attr; ··· 658 654 vtcr |= VTCR_EL2_HA; 659 655 #endif /* CONFIG_ARM64_HW_AFDBM */ 660 656 657 + if (kvm_lpa2_is_enabled()) 658 + vtcr |= VTCR_EL2_DS; 659 + 661 660 /* Set the vmid bits */ 662 661 vtcr |= (get_vmid_bits(mmfr1) == 16) ? 663 662 VTCR_EL2_VS_16BIT : ··· 718 711 if (prot & KVM_PGTABLE_PROT_W) 719 712 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 720 713 721 - attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 714 + if (!kvm_lpa2_is_enabled()) 715 + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 716 + 722 717 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 723 718 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; 724 719 *ptep = attr;