commit 8a511e7efc5a72173f64d191f01cda236d54e27a · tjh.dev/kernel

+1 -1

arch/arm64/include/asm/kvm_hyp.h

··· 118 118 119 119 u64 __guest_enter(struct kvm_vcpu *vcpu); 120 120 121 - bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); 121 + bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); 122 122 123 123 #ifdef __KVM_NVHE_HYPERVISOR__ 124 124 void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,

+1 -1

arch/arm64/kvm/hyp/include/nvhe/ffa.h

··· 12 12 #define FFA_MAX_FUNC_NUM 0x7F 13 13 14 14 int hyp_ffa_init(void *pages); 15 - bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); 15 + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); 16 16 17 17 #endif /* __KVM_HYP_FFA_H */

+1 -2

arch/arm64/kvm/hyp/nvhe/ffa.c

··· 634 634 return true; 635 635 } 636 636 637 - bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) 637 + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) 638 638 { 639 - DECLARE_REG(u64, func_id, host_ctxt, 0); 640 639 struct arm_smccc_res res; 641 640 642 641 /*

+1

arch/arm64/kvm/hyp/nvhe/hyp-init.S

··· 57 57 cmp x0, #HVC_STUB_HCALL_NR 58 58 b.lo __kvm_handle_stub_hvc 59 59 60 + bic x0, x0, #ARM_SMCCC_CALL_HINTS 60 61 mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) 61 62 cmp x0, x3 62 63 b.eq 1f

+6 -2

arch/arm64/kvm/hyp/nvhe/hyp-main.c

··· 368 368 if (static_branch_unlikely(&kvm_protected_mode_initialized)) 369 369 hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; 370 370 371 + id &= ~ARM_SMCCC_CALL_HINTS; 371 372 id -= KVM_HOST_SMCCC_ID(0); 372 373 373 374 if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) ··· 393 392 394 393 static void handle_host_smc(struct kvm_cpu_context *host_ctxt) 395 394 { 395 + DECLARE_REG(u64, func_id, host_ctxt, 0); 396 396 bool handled; 397 397 398 - handled = kvm_host_psci_handler(host_ctxt); 398 + func_id &= ~ARM_SMCCC_CALL_HINTS; 399 + 400 + handled = kvm_host_psci_handler(host_ctxt, func_id); 399 401 if (!handled) 400 - handled = kvm_host_ffa_handler(host_ctxt); 402 + handled = kvm_host_ffa_handler(host_ctxt, func_id); 401 403 if (!handled) 402 404 default_host_smc_handler(host_ctxt); 403 405

+1 -2

arch/arm64/kvm/hyp/nvhe/psci-relay.c

··· 273 273 } 274 274 } 275 275 276 - bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) 276 + bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) 277 277 { 278 - DECLARE_REG(u64, func_id, host_ctxt, 0); 279 278 unsigned long ret; 280 279 281 280 switch (kvm_host_psci_config.version) {

+3

arch/arm64/kvm/mmu.c

··· 652 652 653 653 mutex_unlock(&kvm_hyp_pgd_mutex); 654 654 655 + if (!ret) 656 + *haddr = base; 657 + 655 658 return ret; 656 659 } 657 660

+5 -2

arch/riscv/kvm/vcpu_onereg.c

··· 460 460 reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) 461 461 return -ENOENT; 462 462 463 - *reg_val = 0; 464 463 host_isa_ext = kvm_isa_ext_arr[reg_num]; 464 + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) 465 + return -ENOENT; 466 + 467 + *reg_val = 0; 465 468 if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) 466 469 *reg_val = 1; /* Mark the given extension as available */ 467 470 ··· 845 842 u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; 846 843 847 844 isa_ext = kvm_isa_ext_arr[i]; 848 - if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext)) 845 + if (!__riscv_isa_extension_available(NULL, isa_ext)) 849 846 continue; 850 847 851 848 if (uindices) {

+1 -2

arch/x86/include/asm/kvm_host.h

··· 1419 1419 * the thread holds the MMU lock in write mode. 1420 1420 */ 1421 1421 spinlock_t tdp_mmu_pages_lock; 1422 - struct workqueue_struct *tdp_mmu_zap_wq; 1423 1422 #endif /* CONFIG_X86_64 */ 1424 1423 1425 1424 /* ··· 1834 1835 1835 1836 void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 1836 1837 int kvm_mmu_create(struct kvm_vcpu *vcpu); 1837 - int kvm_mmu_init_vm(struct kvm *kvm); 1838 + void kvm_mmu_init_vm(struct kvm *kvm); 1838 1839 void kvm_mmu_uninit_vm(struct kvm *kvm); 1839 1840 1840 1841 void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);

+5 -16

arch/x86/kvm/mmu/mmu.c

··· 6167 6167 return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); 6168 6168 } 6169 6169 6170 - int kvm_mmu_init_vm(struct kvm *kvm) 6170 + void kvm_mmu_init_vm(struct kvm *kvm) 6171 6171 { 6172 - int r; 6173 - 6174 6172 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 6175 6173 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 6176 6174 INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); 6177 6175 spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); 6178 6176 6179 - if (tdp_mmu_enabled) { 6180 - r = kvm_mmu_init_tdp_mmu(kvm); 6181 - if (r < 0) 6182 - return r; 6183 - } 6177 + if (tdp_mmu_enabled) 6178 + kvm_mmu_init_tdp_mmu(kvm); 6184 6179 6185 6180 kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; 6186 6181 kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; ··· 6184 6189 6185 6190 kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; 6186 6191 kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; 6187 - 6188 - return 0; 6189 6192 } 6190 6193 6191 6194 static void mmu_free_vm_memory_caches(struct kvm *kvm) ··· 6239 6246 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) 6240 6247 { 6241 6248 bool flush; 6242 - int i; 6243 6249 6244 6250 if (WARN_ON_ONCE(gfn_end <= gfn_start)) 6245 6251 return; ··· 6249 6257 6250 6258 flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); 6251 6259 6252 - if (tdp_mmu_enabled) { 6253 - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) 6254 - flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, 6255 - gfn_end, true, flush); 6256 - } 6260 + if (tdp_mmu_enabled) 6261 + flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush); 6257 6262 6258 6263 if (flush) 6259 6264 kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);

+7 -8

arch/x86/kvm/mmu/mmu_internal.h

··· 58 58 59 59 bool tdp_mmu_page; 60 60 bool unsync; 61 - u8 mmu_valid_gen; 61 + union { 62 + u8 mmu_valid_gen; 63 + 64 + /* Only accessed under slots_lock. */ 65 + bool tdp_mmu_scheduled_root_to_zap; 66 + }; 62 67 63 68 /* 64 69 * The shadow page can't be replaced by an equivalent huge page ··· 105 100 struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ 106 101 tdp_ptep_t ptep; 107 102 }; 108 - union { 109 - DECLARE_BITMAP(unsync_child_bitmap, 512); 110 - struct { 111 - struct work_struct tdp_mmu_async_work; 112 - void *tdp_mmu_async_data; 113 - }; 114 - }; 103 + DECLARE_BITMAP(unsync_child_bitmap, 512); 115 104 116 105 /* 117 106 * Tracks shadow pages that, if zapped, would allow KVM to create an NX

+67 -85

arch/x86/kvm/mmu/tdp_mmu.c

··· 12 12 #include <trace/events/kvm.h> 13 13 14 14 /* Initializes the TDP MMU for the VM, if enabled. */ 15 - int kvm_mmu_init_tdp_mmu(struct kvm *kvm) 15 + void kvm_mmu_init_tdp_mmu(struct kvm *kvm) 16 16 { 17 - struct workqueue_struct *wq; 18 - 19 - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); 20 - if (!wq) 21 - return -ENOMEM; 22 - 23 17 INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); 24 18 spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); 25 - kvm->arch.tdp_mmu_zap_wq = wq; 26 - return 1; 27 19 } 28 20 29 21 /* Arbitrarily returns true so that this may be used in if statements. */ ··· 38 46 * ultimately frees all roots. 39 47 */ 40 48 kvm_tdp_mmu_invalidate_all_roots(kvm); 41 - 42 - /* 43 - * Destroying a workqueue also first flushes the workqueue, i.e. no 44 - * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). 45 - */ 46 - destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); 49 + kvm_tdp_mmu_zap_invalidated_roots(kvm); 47 50 48 51 WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); 49 52 WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); 50 53 51 54 /* 52 55 * Ensure that all the outstanding RCU callbacks to free shadow pages 53 - * can run before the VM is torn down. Work items on tdp_mmu_zap_wq 54 - * can call kvm_tdp_mmu_put_root and create new callbacks. 56 + * can run before the VM is torn down. Putting the last reference to 57 + * zapped roots will create new callbacks. 55 58 */ 56 59 rcu_barrier(); 57 60 } ··· 71 84 rcu_head); 72 85 73 86 tdp_mmu_free_sp(sp); 74 - } 75 - 76 - static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, 77 - bool shared); 78 - 79 - static void tdp_mmu_zap_root_work(struct work_struct *work) 80 - { 81 - struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page, 82 - tdp_mmu_async_work); 83 - struct kvm *kvm = root->tdp_mmu_async_data; 84 - 85 - read_lock(&kvm->mmu_lock); 86 - 87 - /* 88 - * A TLB flush is not necessary as KVM performs a local TLB flush when 89 - * allocating a new root (see kvm_mmu_load()), and when migrating vCPU 90 - * to a different pCPU. Note, the local TLB flush on reuse also 91 - * invalidates any paging-structure-cache entries, i.e. TLB entries for 92 - * intermediate paging structures, that may be zapped, as such entries 93 - * are associated with the ASID on both VMX and SVM. 94 - */ 95 - tdp_mmu_zap_root(kvm, root, true); 96 - 97 - /* 98 - * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for 99 - * avoiding an infinite loop. By design, the root is reachable while 100 - * it's being asynchronously zapped, thus a different task can put its 101 - * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an 102 - * asynchronously zapped root is unavoidable. 103 - */ 104 - kvm_tdp_mmu_put_root(kvm, root, true); 105 - 106 - read_unlock(&kvm->mmu_lock); 107 - } 108 - 109 - static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root) 110 - { 111 - root->tdp_mmu_async_data = kvm; 112 - INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work); 113 - queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); 114 87 } 115 88 116 89 void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, ··· 158 211 #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ 159 212 __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) 160 213 161 - #define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \ 162 - __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false) 214 + #define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \ 215 + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \ 216 + _root; \ 217 + _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \ 218 + if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \ 219 + } else 163 220 164 221 /* 165 222 * Iterate over all TDP MMU roots. Requires that mmu_lock be held for write, ··· 243 292 * by a memslot update or by the destruction of the VM. Initialize the 244 293 * refcount to two; one reference for the vCPU, and one reference for 245 294 * the TDP MMU itself, which is held until the root is invalidated and 246 - * is ultimately put by tdp_mmu_zap_root_work(). 295 + * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots(). 247 296 */ 248 297 refcount_set(&root->tdp_mmu_root_count, 2); 249 298 ··· 828 877 * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or 829 878 * more SPTEs were zapped since the MMU lock was last acquired. 830 879 */ 831 - bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, 832 - bool can_yield, bool flush) 880 + bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) 833 881 { 834 882 struct kvm_mmu_page *root; 835 883 836 - for_each_tdp_mmu_root_yield_safe(kvm, root, as_id) 837 - flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush); 884 + for_each_tdp_mmu_root_yield_safe(kvm, root, false) 885 + flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); 838 886 839 887 return flush; 840 888 } ··· 841 891 void kvm_tdp_mmu_zap_all(struct kvm *kvm) 842 892 { 843 893 struct kvm_mmu_page *root; 844 - int i; 845 894 846 895 /* 847 896 * Zap all roots, including invalid roots, as all SPTEs must be dropped ··· 854 905 * is being destroyed or the userspace VMM has exited. In both cases, 855 906 * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. 856 907 */ 857 - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 858 - for_each_tdp_mmu_root_yield_safe(kvm, root, i) 859 - tdp_mmu_zap_root(kvm, root, false); 860 - } 908 + for_each_tdp_mmu_root_yield_safe(kvm, root, false) 909 + tdp_mmu_zap_root(kvm, root, false); 861 910 } 862 911 863 912 /* ··· 864 917 */ 865 918 void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) 866 919 { 867 - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); 920 + struct kvm_mmu_page *root; 921 + 922 + read_lock(&kvm->mmu_lock); 923 + 924 + for_each_tdp_mmu_root_yield_safe(kvm, root, true) { 925 + if (!root->tdp_mmu_scheduled_root_to_zap) 926 + continue; 927 + 928 + root->tdp_mmu_scheduled_root_to_zap = false; 929 + KVM_BUG_ON(!root->role.invalid, kvm); 930 + 931 + /* 932 + * A TLB flush is not necessary as KVM performs a local TLB 933 + * flush when allocating a new root (see kvm_mmu_load()), and 934 + * when migrating a vCPU to a different pCPU. Note, the local 935 + * TLB flush on reuse also invalidates paging-structure-cache 936 + * entries, i.e. TLB entries for intermediate paging structures, 937 + * that may be zapped, as such entries are associated with the 938 + * ASID on both VMX and SVM. 939 + */ 940 + tdp_mmu_zap_root(kvm, root, true); 941 + 942 + /* 943 + * The referenced needs to be put *after* zapping the root, as 944 + * the root must be reachable by mmu_notifiers while it's being 945 + * zapped 946 + */ 947 + kvm_tdp_mmu_put_root(kvm, root, true); 948 + } 949 + 950 + read_unlock(&kvm->mmu_lock); 868 951 } 869 952 870 953 /* 871 954 * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that 872 955 * is about to be zapped, e.g. in response to a memslots update. The actual 873 - * zapping is performed asynchronously. Using a separate workqueue makes it 874 - * easy to ensure that the destruction is performed before the "fast zap" 875 - * completes, without keeping a separate list of invalidated roots; the list is 876 - * effectively the list of work items in the workqueue. 956 + * zapping is done separately so that it happens with mmu_lock with read, 957 + * whereas invalidating roots must be done with mmu_lock held for write (unless 958 + * the VM is being destroyed). 877 959 * 878 - * Note, the asynchronous worker is gifted the TDP MMU's reference. 960 + * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference. 879 961 * See kvm_tdp_mmu_get_vcpu_root_hpa(). 880 962 */ 881 963 void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) ··· 929 953 /* 930 954 * As above, mmu_lock isn't held when destroying the VM! There can't 931 955 * be other references to @kvm, i.e. nothing else can invalidate roots 932 - * or be consuming roots, but walking the list of roots does need to be 933 - * guarded against roots being deleted by the asynchronous zap worker. 956 + * or get/put references to roots. 934 957 */ 935 - rcu_read_lock(); 936 - 937 - list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { 958 + list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { 959 + /* 960 + * Note, invalid roots can outlive a memslot update! Invalid 961 + * roots must be *zapped* before the memslot update completes, 962 + * but a different task can acquire a reference and keep the 963 + * root alive after its been zapped. 964 + */ 938 965 if (!root->role.invalid) { 966 + root->tdp_mmu_scheduled_root_to_zap = true; 939 967 root->role.invalid = true; 940 - tdp_mmu_schedule_zap_root(kvm, root); 941 968 } 942 969 } 943 - 944 - rcu_read_unlock(); 945 970 } 946 971 947 972 /* ··· 1123 1146 bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, 1124 1147 bool flush) 1125 1148 { 1126 - return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start, 1127 - range->end, range->may_block, flush); 1149 + struct kvm_mmu_page *root; 1150 + 1151 + __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false) 1152 + flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, 1153 + range->may_block, flush); 1154 + 1155 + return flush; 1128 1156 } 1129 1157 1130 1158 typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,

+2 -3

arch/x86/kvm/mmu/tdp_mmu.h

··· 7 7 8 8 #include "spte.h" 9 9 10 - int kvm_mmu_init_tdp_mmu(struct kvm *kvm); 10 + void kvm_mmu_init_tdp_mmu(struct kvm *kvm); 11 11 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); 12 12 13 13 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); ··· 20 20 void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, 21 21 bool shared); 22 22 23 - bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, 24 - gfn_t end, bool can_yield, bool flush); 23 + bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush); 25 24 bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); 26 25 void kvm_tdp_mmu_zap_all(struct kvm *kvm); 27 26 void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);

+26 -8

arch/x86/kvm/svm/sev.c

··· 2962 2962 count, in); 2963 2963 } 2964 2964 2965 + static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) 2966 + { 2967 + struct kvm_vcpu *vcpu = &svm->vcpu; 2968 + 2969 + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { 2970 + bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || 2971 + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); 2972 + 2973 + set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); 2974 + } 2975 + } 2976 + 2977 + void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) 2978 + { 2979 + struct kvm_vcpu *vcpu = &svm->vcpu; 2980 + struct kvm_cpuid_entry2 *best; 2981 + 2982 + /* For sev guests, the memory encryption bit is not reserved in CR3. */ 2983 + best = kvm_find_cpuid_entry(vcpu, 0x8000001F); 2984 + if (best) 2985 + vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); 2986 + 2987 + if (sev_es_guest(svm->vcpu.kvm)) 2988 + sev_es_vcpu_after_set_cpuid(svm); 2989 + } 2990 + 2965 2991 static void sev_es_init_vmcb(struct vcpu_svm *svm) 2966 2992 { 2967 2993 struct vmcb *vmcb = svm->vmcb01.ptr; ··· 3050 3024 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 3051 3025 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 3052 3026 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 3053 - 3054 - if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && 3055 - (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) || 3056 - guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) { 3057 - set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1); 3058 - if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP)) 3059 - svm_clr_intercept(svm, INTERCEPT_RDTSCP); 3060 - } 3061 3027 } 3062 3028 3063 3029 void sev_init_vmcb(struct vcpu_svm *svm)

+35 -8

arch/x86/kvm/svm/svm.c

··· 683 683 684 684 amd_pmu_enable_virt(); 685 685 686 + /* 687 + * If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type 688 + * "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests. 689 + * Since Linux does not change the value of TSC_AUX once set, prime the 690 + * TSC_AUX field now to avoid a RDMSR on every vCPU run. 691 + */ 692 + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { 693 + struct sev_es_save_area *hostsa; 694 + u32 msr_hi; 695 + 696 + hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); 697 + 698 + rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi); 699 + } 700 + 686 701 return 0; 687 702 } 688 703 ··· 1547 1532 if (tsc_scaling) 1548 1533 __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); 1549 1534 1550 - if (likely(tsc_aux_uret_slot >= 0)) 1535 + /* 1536 + * TSC_AUX is always virtualized for SEV-ES guests when the feature is 1537 + * available. The user return MSR support is not required in this case 1538 + * because TSC_AUX is restored on #VMEXIT from the host save area 1539 + * (which has been initialized in svm_hardware_enable()). 1540 + */ 1541 + if (likely(tsc_aux_uret_slot >= 0) && 1542 + (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) 1551 1543 kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); 1552 1544 1553 1545 svm->guest_state_loaded = true; ··· 3109 3087 break; 3110 3088 case MSR_TSC_AUX: 3111 3089 /* 3090 + * TSC_AUX is always virtualized for SEV-ES guests when the 3091 + * feature is available. The user return MSR support is not 3092 + * required in this case because TSC_AUX is restored on #VMEXIT 3093 + * from the host save area (which has been initialized in 3094 + * svm_hardware_enable()). 3095 + */ 3096 + if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm)) 3097 + break; 3098 + 3099 + /* 3112 3100 * TSC_AUX is usually changed only during boot and never read 3113 3101 * directly. Intercept TSC_AUX instead of exposing it to the 3114 3102 * guest via direct_access_msrs, and switch it via user return. ··· 4316 4284 static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) 4317 4285 { 4318 4286 struct vcpu_svm *svm = to_svm(vcpu); 4319 - struct kvm_cpuid_entry2 *best; 4320 4287 4321 4288 /* 4322 4289 * SVM doesn't provide a way to disable just XSAVES in the guest, KVM ··· 4359 4328 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0, 4360 4329 !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); 4361 4330 4362 - /* For sev guests, the memory encryption bit is not reserved in CR3. */ 4363 - if (sev_guest(vcpu->kvm)) { 4364 - best = kvm_find_cpuid_entry(vcpu, 0x8000001F); 4365 - if (best) 4366 - vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); 4367 - } 4331 + if (sev_guest(vcpu->kvm)) 4332 + sev_vcpu_after_set_cpuid(svm); 4368 4333 4369 4334 init_vmcb_after_set_cpuid(vcpu); 4370 4335 }

+1

arch/x86/kvm/svm/svm.h

··· 684 684 void sev_hardware_unsetup(void); 685 685 int sev_cpu_init(struct svm_cpu_data *sd); 686 686 void sev_init_vmcb(struct vcpu_svm *svm); 687 + void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm); 687 688 void sev_free_vcpu(struct kvm_vcpu *vcpu); 688 689 int sev_handle_vmgexit(struct kvm_vcpu *vcpu); 689 690 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);

+1 -4

arch/x86/kvm/x86.c

··· 12308 12308 if (ret) 12309 12309 goto out; 12310 12310 12311 - ret = kvm_mmu_init_vm(kvm); 12312 - if (ret) 12313 - goto out_page_track; 12311 + kvm_mmu_init_vm(kvm); 12314 12312 12315 12313 ret = static_call(kvm_x86_vm_init)(kvm); 12316 12314 if (ret) ··· 12353 12355 12354 12356 out_uninit_mmu: 12355 12357 kvm_mmu_uninit_vm(kvm); 12356 - out_page_track: 12357 12358 kvm_page_track_cleanup(kvm); 12358 12359 out: 12359 12360 return ret;

+2

include/linux/arm-smccc.h

··· 67 67 #define ARM_SMCCC_VERSION_1_3 0x10003 68 68 69 69 #define ARM_SMCCC_1_3_SVE_HINT 0x10000 70 + #define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT 71 + 70 72 71 73 #define ARM_SMCCC_VERSION_FUNC_ID \ 72 74 ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \

+1 -1

tools/testing/selftests/kvm/lib/test_util.c

··· 387 387 char *str; 388 388 389 389 va_start(ap, fmt); 390 - vasprintf(&str, fmt, ap); 390 + TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed"); 391 391 va_end(ap); 392 392 393 393 return str;

+43 -17

tools/testing/selftests/kvm/riscv/get-reg-list.c

··· 12 12 13 13 #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) 14 14 15 + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; 16 + 15 17 bool filter_reg(__u64 reg) 16 18 { 17 - /* 18 - * Some ISA extensions are optional and not present on all host, 19 - * but they can't be disabled through ISA_EXT registers when present. 20 - * So, to make life easy, just filtering out these kind of registers. 21 - */ 22 19 switch (reg & ~REG_MASK) { 20 + /* 21 + * Same set of ISA_EXT registers are not present on all host because 22 + * ISA_EXT registers are visible to the KVM user space based on the 23 + * ISA extensions available on the host. Also, disabling an ISA 24 + * extension using corresponding ISA_EXT register does not affect 25 + * the visibility of the ISA_EXT register itself. 26 + * 27 + * Based on above, we should filter-out all ISA_EXT registers. 28 + */ 29 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: 30 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: 31 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D: 32 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F: 33 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: 34 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: 35 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: 36 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: 23 37 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: 24 38 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: 25 39 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: 40 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: 41 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: 26 42 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: 27 43 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: 44 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: 45 + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: 28 46 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: 29 47 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: 30 48 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: ··· 50 32 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: 51 33 case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: 52 34 return true; 35 + /* AIA registers are always available when Ssaia can't be disabled */ 36 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): 37 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1): 38 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2): 39 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh): 40 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph): 41 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h): 42 + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h): 43 + return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA]; 53 44 default: 54 45 break; 55 46 } ··· 77 50 unsigned long value; 78 51 79 52 ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); 80 - if (ret) { 81 - printf("Failed to get ext %d", ext); 82 - return false; 83 - } 84 - 85 - return !!value; 53 + return (ret) ? false : !!value; 86 54 } 87 55 88 56 void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) 89 57 { 58 + unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; 90 59 struct vcpu_reg_sublist *s; 60 + int rc; 61 + 62 + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) 63 + __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]); 91 64 92 65 /* 93 66 * Disable all extensions which were enabled by default 94 67 * if they were available in the risc-v host. 95 68 */ 96 - for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) 97 - __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); 69 + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { 70 + rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); 71 + if (rc && isa_ext_state[i]) 72 + isa_ext_cant_disable[i] = true; 73 + } 98 74 99 75 for_each_sublist(c, s) { 100 76 if (!s->feature) ··· 536 506 KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), 537 507 KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), 538 508 KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), 539 - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A, 540 - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C, 541 - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I, 542 - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M, 543 509 KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, 544 510 KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, 545 511 KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,