Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Introduce MTE VM feature

Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging
for a VM. This will expose the feature to the guest and automatically
tag memory pages touched by the VM as PG_mte_tagged (and clear the tag
storage) to ensure that the guest cannot see stale tags, and so that
the tags are correctly saved/restored across swap.

Actually exposing the new capability to user space happens in a later
patch.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
[maz: move VM_SHARED sampling into the critical section]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com

authored by

Steven Price and committed by
Marc Zyngier
ea7fc1bb 69e3b846

+83 -2
+3
arch/arm64/include/asm/kvm_emulate.h
··· 84 84 if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || 85 85 vcpu_el1_is_32bit(vcpu)) 86 86 vcpu->arch.hcr_el2 |= HCR_TID2; 87 + 88 + if (kvm_has_mte(vcpu->kvm)) 89 + vcpu->arch.hcr_el2 |= HCR_ATA; 87 90 } 88 91 89 92 static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
+4
arch/arm64/include/asm/kvm_host.h
··· 132 132 133 133 u8 pfr0_csv2; 134 134 u8 pfr0_csv3; 135 + 136 + /* Memory Tagging Extension enabled for the guest */ 137 + bool mte_enabled; 135 138 }; 136 139 137 140 struct kvm_vcpu_fault_info { ··· 772 769 #define kvm_arm_vcpu_sve_finalized(vcpu) \ 773 770 ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) 774 771 772 + #define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) 775 773 #define kvm_vcpu_has_pmu(vcpu) \ 776 774 (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) 777 775
+2 -1
arch/arm64/kvm/hyp/exception.c
··· 112 112 new |= (old & PSR_C_BIT); 113 113 new |= (old & PSR_V_BIT); 114 114 115 - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) 115 + if (kvm_has_mte(vcpu->kvm)) 116 + new |= PSR_TCO_BIT; 116 117 117 118 new |= (old & PSR_DIT_BIT); 118 119
+66 -1
arch/arm64/kvm/mmu.c
··· 822 822 return PAGE_SIZE; 823 823 } 824 824 825 + /* 826 + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be 827 + * able to see the page's tags and therefore they must be initialised first. If 828 + * PG_mte_tagged is set, tags have already been initialised. 829 + * 830 + * The race in the test/set of the PG_mte_tagged flag is handled by: 831 + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs 832 + * racing to santise the same page 833 + * - mmap_lock protects between a VM faulting a page in and the VMM performing 834 + * an mprotect() to add VM_MTE 835 + */ 836 + static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, 837 + unsigned long size) 838 + { 839 + unsigned long i, nr_pages = size >> PAGE_SHIFT; 840 + struct page *page; 841 + 842 + if (!kvm_has_mte(kvm)) 843 + return 0; 844 + 845 + /* 846 + * pfn_to_online_page() is used to reject ZONE_DEVICE pages 847 + * that may not support tags. 848 + */ 849 + page = pfn_to_online_page(pfn); 850 + 851 + if (!page) 852 + return -EFAULT; 853 + 854 + for (i = 0; i < nr_pages; i++, page++) { 855 + if (!test_bit(PG_mte_tagged, &page->flags)) { 856 + mte_clear_page_tags(page_address(page)); 857 + set_bit(PG_mte_tagged, &page->flags); 858 + } 859 + } 860 + 861 + return 0; 862 + } 863 + 825 864 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 826 865 struct kvm_memory_slot *memslot, unsigned long hva, 827 866 unsigned long fault_status) ··· 869 830 bool write_fault, writable, force_pte = false; 870 831 bool exec_fault; 871 832 bool device = false; 833 + bool shared; 872 834 unsigned long mmu_seq; 873 835 struct kvm *kvm = vcpu->kvm; 874 836 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; ··· 912 872 force_pte = true; 913 873 vma_shift = PAGE_SHIFT; 914 874 } 875 + 876 + shared = (vma->vm_flags & VM_PFNMAP); 915 877 916 878 switch (vma_shift) { 917 879 #ifndef __PAGETABLE_PMD_FOLDED ··· 1013 971 if (writable) 1014 972 prot |= KVM_PGTABLE_PROT_W; 1015 973 1016 - if (fault_status != FSC_PERM && !device) 974 + if (fault_status != FSC_PERM && !device) { 975 + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ 976 + if (kvm_has_mte(kvm) && shared) { 977 + ret = -EFAULT; 978 + goto out_unlock; 979 + } 980 + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); 981 + if (ret) 982 + goto out_unlock; 983 + 1017 984 clean_dcache_guest_page(pfn, vma_pagesize); 985 + } 1018 986 1019 987 if (exec_fault) { 1020 988 prot |= KVM_PGTABLE_PROT_X; ··· 1220 1168 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) 1221 1169 { 1222 1170 kvm_pfn_t pfn = pte_pfn(range->pte); 1171 + int ret; 1223 1172 1224 1173 if (!kvm->arch.mmu.pgt) 1225 1174 return false; 1226 1175 1227 1176 WARN_ON(range->end - range->start != 1); 1177 + 1178 + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); 1179 + if (ret) 1180 + return false; 1228 1181 1229 1182 /* 1230 1183 * We've moved a page around, probably through CoW, so let's treat it ··· 1437 1380 vma = find_vma_intersection(current->mm, hva, reg_end); 1438 1381 if (!vma) 1439 1382 break; 1383 + 1384 + /* 1385 + * VM_SHARED mappings are not allowed with MTE to avoid races 1386 + * when updating the PG_mte_tagged page flag, see 1387 + * sanitise_mte_tags for more details. 1388 + */ 1389 + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) 1390 + return -EINVAL; 1440 1391 1441 1392 /* 1442 1393 * Take the intersection of this VMA with the memory region
+7
arch/arm64/kvm/sys_regs.c
··· 1047 1047 break; 1048 1048 case SYS_ID_AA64PFR1_EL1: 1049 1049 val &= ~FEATURE(ID_AA64PFR1_MTE); 1050 + if (kvm_has_mte(vcpu->kvm)) { 1051 + u64 pfr, mte; 1052 + 1053 + pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); 1054 + mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); 1055 + val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); 1056 + } 1050 1057 break; 1051 1058 case SYS_ID_AA64ISAR1_EL1: 1052 1059 if (!vcpu_has_ptrauth(vcpu))
+1
include/uapi/linux/kvm.h
··· 1083 1083 #define KVM_CAP_SGX_ATTRIBUTE 196 1084 1084 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 1085 1085 #define KVM_CAP_PTP_KVM 198 1086 + #define KVM_CAP_ARM_MTE 199 1086 1087 1087 1088 #ifdef KVM_CAP_IRQ_ROUTING 1088 1089