Merge tag 'kvm-x86-misc-6.18' of https://github.com/kvm-x86/linux into HEAD

+6

Documentation/virt/kvm/api.rst

··· 3075 3075 Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2. 3076 3076 See KVM_GET_PIT2 for details on struct kvm_pit_state2. 3077 3077 3078 + .. Tip:: 3079 + ``KVM_SET_PIT2`` strictly adheres to the spec of Intel 8254 PIT. For example, 3080 + a ``count`` value of 0 in ``struct kvm_pit_channel_state`` is interpreted as 3081 + 65536, which is the maximum count value. Refer to `Intel 8254 programmable 3082 + interval timer <https://www.scs.stanford.edu/10wi-cs140/pintos/specs/8254.pdf>`_. 3083 + 3078 3084 This IOCTL replaces the obsolete KVM_SET_PIT. 3079 3085 3080 3086

+3 -3

Documentation/virt/kvm/x86/hypercalls.rst

··· 137 137 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, 138 138 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. 139 139 140 - 6. KVM_HC_SEND_IPI 140 + 7. KVM_HC_SEND_IPI 141 141 ------------------ 142 142 143 143 :Architecture: x86 ··· 158 158 159 159 Returns the number of CPUs to which the IPIs were delivered successfully. 160 160 161 - 7. KVM_HC_SCHED_YIELD 161 + 8. KVM_HC_SCHED_YIELD 162 162 --------------------- 163 163 164 164 :Architecture: x86 ··· 170 170 :Usage example: When sending a call-function IPI-many to vCPUs, yield if 171 171 any of the IPI target vCPUs was preempted. 172 172 173 - 8. KVM_HC_MAP_GPA_RANGE 173 + 9. KVM_HC_MAP_GPA_RANGE 174 174 ------------------------- 175 175 :Architecture: x86 176 176 :Status: active

+1

arch/x86/include/asm/cpufeatures.h

··· 497 497 #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ 498 498 #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ 499 499 #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ 500 + #define X86_FEATURE_MSR_IMM (21*32+15) /* MSR immediate form instructions */ 500 501 501 502 /* 502 503 * BUG word(s)

+1 -1

arch/x86/include/asm/kvm-x86-ops.h

··· 138 138 KVM_X86_OP(apic_init_signal_blocked) 139 139 KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) 140 140 KVM_X86_OP_OPTIONAL(migrate_timers) 141 - KVM_X86_OP(recalc_msr_intercepts) 141 + KVM_X86_OP(recalc_intercepts) 142 142 KVM_X86_OP(complete_emulated_msr) 143 143 KVM_X86_OP(vcpu_deliver_sipi_vector) 144 144 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);

+20 -11

arch/x86/include/asm/kvm_host.h

··· 120 120 #define KVM_REQ_TLB_FLUSH_GUEST \ 121 121 KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 122 122 #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) 123 - #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) 123 + #define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29) 124 124 #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ 125 125 KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 126 126 #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ ··· 545 545 #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ 546 546 KVM_MAX_NR_AMD_GP_COUNTERS) 547 547 548 - #define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3 549 - #define KVM_MAX_NR_AMD_FIXED_COUTNERS 0 550 - #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \ 551 - KVM_MAX_NR_AMD_FIXED_COUTNERS) 548 + #define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3 549 + #define KVM_MAX_NR_AMD_FIXED_COUNTERS 0 550 + #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \ 551 + KVM_MAX_NR_AMD_FIXED_COUNTERS) 552 552 553 553 struct kvm_pmu { 554 554 u8 version; ··· 578 578 }; 579 579 DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); 580 580 DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); 581 + 582 + DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX); 583 + DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX); 581 584 582 585 u64 ds_area; 583 586 u64 pebs_enable; ··· 774 771 CPUID_7_2_EDX, 775 772 CPUID_24_0_EBX, 776 773 CPUID_8000_0021_ECX, 774 + CPUID_7_1_ECX, 777 775 NR_KVM_CPU_CAPS, 778 776 779 777 NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, ··· 930 926 bool emulate_regs_need_sync_from_vcpu; 931 927 int (*complete_userspace_io)(struct kvm_vcpu *vcpu); 932 928 unsigned long cui_linear_rip; 929 + int cui_rdmsr_imm_reg; 933 930 934 931 gpa_t time; 935 932 s8 pvclock_tsc_shift; ··· 1386 1381 u8 vm_type; 1387 1382 bool has_private_mem; 1388 1383 bool has_protected_state; 1384 + bool has_protected_eoi; 1389 1385 bool pre_fault_allowed; 1390 1386 struct hlist_head *mmu_page_hash; 1391 1387 struct list_head active_mmu_pages; ··· 1927 1921 int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); 1928 1922 1929 1923 void (*migrate_timers)(struct kvm_vcpu *vcpu); 1930 - void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu); 1924 + void (*recalc_intercepts)(struct kvm_vcpu *vcpu); 1931 1925 int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); 1932 1926 1933 1927 void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); ··· 2168 2162 2169 2163 void kvm_enable_efer_bits(u64); 2170 2164 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); 2171 - int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); 2172 - int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); 2173 - int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); 2174 - int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); 2175 - int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); 2165 + int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); 2166 + int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); 2167 + int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); 2168 + int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); 2169 + int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data); 2170 + int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data); 2176 2171 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); 2172 + int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); 2177 2173 int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); 2174 + int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); 2178 2175 int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); 2179 2176 int kvm_emulate_invd(struct kvm_vcpu *vcpu); 2180 2177 int kvm_emulate_mwait(struct kvm_vcpu *vcpu);

+10 -6

arch/x86/include/asm/msr-index.h

··· 315 315 #define PERF_CAP_PT_IDX 16 316 316 317 317 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 318 - #define PERF_CAP_PEBS_TRAP BIT_ULL(6) 319 - #define PERF_CAP_ARCH_REG BIT_ULL(7) 320 - #define PERF_CAP_PEBS_FORMAT 0xf00 321 - #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) 322 - #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ 323 - PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) 318 + 319 + #define PERF_CAP_LBR_FMT 0x3f 320 + #define PERF_CAP_PEBS_TRAP BIT_ULL(6) 321 + #define PERF_CAP_ARCH_REG BIT_ULL(7) 322 + #define PERF_CAP_PEBS_FORMAT 0xf00 323 + #define PERF_CAP_FW_WRITES BIT_ULL(13) 324 + #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) 325 + #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ 326 + PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) 324 327 325 328 #define MSR_IA32_RTIT_CTL 0x00000570 326 329 #define RTIT_CTL_TRACEEN BIT(0) ··· 736 733 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 737 734 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 738 735 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 736 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 739 737 740 738 /* AMD Hardware Feedback Support MSRs */ 741 739 #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500

+5 -1

arch/x86/include/uapi/asm/vmx.h

··· 94 94 #define EXIT_REASON_BUS_LOCK 74 95 95 #define EXIT_REASON_NOTIFY 75 96 96 #define EXIT_REASON_TDCALL 77 97 + #define EXIT_REASON_MSR_READ_IMM 84 98 + #define EXIT_REASON_MSR_WRITE_IMM 85 97 99 98 100 #define VMX_EXIT_REASONS \ 99 101 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ ··· 160 158 { EXIT_REASON_TPAUSE, "TPAUSE" }, \ 161 159 { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ 162 160 { EXIT_REASON_NOTIFY, "NOTIFY" }, \ 163 - { EXIT_REASON_TDCALL, "TDCALL" } 161 + { EXIT_REASON_TDCALL, "TDCALL" }, \ 162 + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ 163 + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } 164 164 165 165 #define VMX_EXIT_REASON_FLAGS \ 166 166 { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }

+1

arch/x86/kernel/cpu/scattered.c

··· 27 27 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 28 28 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 29 29 { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, 30 + { X86_FEATURE_MSR_IMM, CPUID_ECX, 5, 0x00000007, 1 }, 30 31 { X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 }, 31 32 { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, 32 33 { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },

+10 -3

arch/x86/kvm/cpuid.c

··· 448 448 * adjustments to the reserved GPA bits. 449 449 */ 450 450 kvm_mmu_after_set_cpuid(vcpu); 451 + 452 + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); 451 453 } 452 454 453 455 int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) ··· 987 985 F(LAM), 988 986 ); 989 987 988 + kvm_cpu_cap_init(CPUID_7_1_ECX, 989 + SCATTERED_F(MSR_IMM), 990 + ); 991 + 990 992 kvm_cpu_cap_init(CPUID_7_1_EDX, 991 993 F(AVX_VNNI_INT8), 992 994 F(AVX_NE_CONVERT), ··· 1417 1411 goto out; 1418 1412 1419 1413 cpuid_entry_override(entry, CPUID_7_1_EAX); 1414 + cpuid_entry_override(entry, CPUID_7_1_ECX); 1420 1415 cpuid_entry_override(entry, CPUID_7_1_EDX); 1421 1416 entry->ebx = 0; 1422 - entry->ecx = 0; 1423 1417 } 1424 1418 if (max_idx >= 2) { 1425 1419 entry = do_host_cpuid(array, function, 2); ··· 1826 1820 int r; 1827 1821 1828 1822 if (func == CENTAUR_CPUID_SIGNATURE && 1829 - boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) 1823 + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && 1824 + boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 1830 1825 return 0; 1831 1826 1832 1827 r = do_cpuid_func(array, func, type); ··· 2008 2001 if (function == 7 && index == 0) { 2009 2002 u64 data; 2010 2003 if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) && 2011 - !__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) && 2004 + !kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) && 2012 2005 (data & TSX_CTRL_CPUID_CLEAR)) 2013 2006 *ebx &= ~(feature_bit(RTM) | feature_bit(HLE)); 2014 2007 } else if (function == 0x80000007) {

+6 -7

arch/x86/kvm/emulate.c

··· 4330 4330 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg), 4331 4331 G(ByteOp, group11), G(0, group11), 4332 4332 /* 0xC8 - 0xCF */ 4333 - I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter), 4334 - I(Stack | IsBranch, em_leave), 4333 + I(Stack | SrcImmU16 | Src2ImmByte, em_enter), 4334 + I(Stack, em_leave), 4335 4335 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm), 4336 4336 I(ImplicitOps | IsBranch, em_ret_far), 4337 4337 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn), ··· 5107 5107 ctxt->mem_read.end = 0; 5108 5108 } 5109 5109 5110 - int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 5110 + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) 5111 5111 { 5112 5112 const struct x86_emulate_ops *ops = ctxt->ops; 5113 5113 int rc = X86EMUL_CONTINUE; 5114 5114 int saved_dst_type = ctxt->dst.type; 5115 - bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt); 5116 5115 5117 5116 ctxt->mem_read.pos = 0; 5118 5117 ··· 5159 5160 fetch_possible_mmx_operand(&ctxt->dst); 5160 5161 } 5161 5162 5162 - if (unlikely(is_guest_mode) && ctxt->intercept) { 5163 + if (unlikely(check_intercepts) && ctxt->intercept) { 5163 5164 rc = emulator_check_intercept(ctxt, ctxt->intercept, 5164 5165 X86_ICPT_PRE_EXCEPT); 5165 5166 if (rc != X86EMUL_CONTINUE) ··· 5188 5189 goto done; 5189 5190 } 5190 5191 5191 - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { 5192 + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { 5192 5193 rc = emulator_check_intercept(ctxt, ctxt->intercept, 5193 5194 X86_ICPT_POST_EXCEPT); 5194 5195 if (rc != X86EMUL_CONTINUE) ··· 5242 5243 5243 5244 special_insn: 5244 5245 5245 - if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) { 5246 + if (unlikely(check_intercepts) && (ctxt->d & Intercept)) { 5246 5247 rc = emulator_check_intercept(ctxt, ctxt->intercept, 5247 5248 X86_ICPT_POST_MEMACCESS); 5248 5249 if (rc != X86EMUL_CONTINUE)

+5 -7

arch/x86/kvm/hyperv.c

··· 1168 1168 BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); 1169 1169 BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); 1170 1170 1171 - mutex_lock(&hv->hv_lock); 1171 + guard(mutex)(&hv->hv_lock); 1172 1172 1173 1173 if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || 1174 1174 hv->hv_tsc_page_status == HV_TSC_PAGE_SET || 1175 1175 hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) 1176 - goto out_unlock; 1176 + return; 1177 1177 1178 1178 if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) 1179 - goto out_unlock; 1179 + return; 1180 1180 1181 1181 gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; 1182 1182 /* ··· 1192 1192 goto out_err; 1193 1193 1194 1194 hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1195 - goto out_unlock; 1195 + return; 1196 1196 } 1197 1197 1198 1198 /* ··· 1228 1228 goto out_err; 1229 1229 1230 1230 hv->hv_tsc_page_status = HV_TSC_PAGE_SET; 1231 - goto out_unlock; 1231 + return; 1232 1232 1233 1233 out_err: 1234 1234 hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; 1235 - out_unlock: 1236 - mutex_unlock(&hv->hv_lock); 1237 1235 } 1238 1236 1239 1237 void kvm_hv_request_tsc_page_update(struct kvm *kvm)

+1 -14

arch/x86/kvm/ioapic.c

··· 1 + // SPDX-License-Identifier: LGPL-2.1-or-later 1 2 /* 2 3 * Copyright (C) 2001 MandrakeSoft S.A. 3 4 * Copyright 2010 Red Hat, Inc. and/or its affiliates. ··· 8 7 * 75002 Paris - France 9 8 * http://www.linux-mandrake.com/ 10 9 * http://www.mandrakesoft.com/ 11 - * 12 - * This library is free software; you can redistribute it and/or 13 - * modify it under the terms of the GNU Lesser General Public 14 - * License as published by the Free Software Foundation; either 15 - * version 2 of the License, or (at your option) any later version. 16 - * 17 - * This library is distributed in the hope that it will be useful, 18 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 - * Lesser General Public License for more details. 21 - * 22 - * You should have received a copy of the GNU Lesser General Public 23 - * License along with this library; if not, write to the Free Software 24 - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 10 * 26 11 * Yunhong Jiang <yunhong.jiang@intel.com> 27 12 * Yaozu (Eddie) Dong <eddie.dong@intel.com>

-57

arch/x86/kvm/irq.c

··· 195 195 return irqchip_in_kernel(kvm); 196 196 } 197 197 198 - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 199 - struct kvm_lapic_irq *irq, struct dest_map *dest_map) 200 - { 201 - int r = -1; 202 - struct kvm_vcpu *vcpu, *lowest = NULL; 203 - unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; 204 - unsigned int dest_vcpus = 0; 205 - 206 - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) 207 - return r; 208 - 209 - if (irq->dest_mode == APIC_DEST_PHYSICAL && 210 - irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { 211 - pr_info("apic: phys broadcast and lowest prio\n"); 212 - irq->delivery_mode = APIC_DM_FIXED; 213 - } 214 - 215 - memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); 216 - 217 - kvm_for_each_vcpu(i, vcpu, kvm) { 218 - if (!kvm_apic_present(vcpu)) 219 - continue; 220 - 221 - if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, 222 - irq->dest_id, irq->dest_mode)) 223 - continue; 224 - 225 - if (!kvm_lowest_prio_delivery(irq)) { 226 - if (r < 0) 227 - r = 0; 228 - r += kvm_apic_set_irq(vcpu, irq, dest_map); 229 - } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { 230 - if (!kvm_vector_hashing_enabled()) { 231 - if (!lowest) 232 - lowest = vcpu; 233 - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) 234 - lowest = vcpu; 235 - } else { 236 - __set_bit(i, dest_vcpu_bitmap); 237 - dest_vcpus++; 238 - } 239 - } 240 - } 241 - 242 - if (dest_vcpus != 0) { 243 - int idx = kvm_vector_to_index(irq->vector, dest_vcpus, 244 - dest_vcpu_bitmap, KVM_MAX_VCPUS); 245 - 246 - lowest = kvm_get_vcpu(kvm, idx); 247 - } 248 - 249 - if (lowest) 250 - r = kvm_apic_set_irq(lowest, irq, dest_map); 251 - 252 - return r; 253 - } 254 - 255 198 static void kvm_msi_to_lapic_irq(struct kvm *kvm, 256 199 struct kvm_kernel_irq_routing_entry *e, 257 200 struct kvm_lapic_irq *irq)

-4

arch/x86/kvm/irq.h

··· 121 121 122 122 int apic_has_pending_timer(struct kvm_vcpu *vcpu); 123 123 124 - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 125 - struct kvm_lapic_irq *irq, 126 - struct dest_map *dest_map); 127 - 128 124 #endif

+1 -2

arch/x86/kvm/kvm_emulate.h

··· 235 235 void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); 236 236 237 237 bool (*is_smm)(struct x86_emulate_ctxt *ctxt); 238 - bool (*is_guest_mode)(struct x86_emulate_ctxt *ctxt); 239 238 int (*leave_smm)(struct x86_emulate_ctxt *ctxt); 240 239 void (*triple_fault)(struct x86_emulate_ctxt *ctxt); 241 240 int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); ··· 520 521 #define EMULATION_RESTART 1 521 522 #define EMULATION_INTERCEPTED 2 522 523 void init_decode_cache(struct x86_emulate_ctxt *ctxt); 523 - int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 524 + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts); 524 525 int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 525 526 u16 tss_selector, int idt_index, int reason, 526 527 bool has_error_code, u32 error_code);

+129 -44

arch/x86/kvm/lapic.c

··· 74 74 #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 75 75 /* step-by-step approximation to mitigate fluctuation */ 76 76 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 77 + 78 + static bool __read_mostly vector_hashing_enabled = true; 79 + module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444); 80 + 77 81 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); 78 82 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data); 79 83 ··· 134 130 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); 135 131 } 136 132 137 - bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 133 + static bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 138 134 { 139 135 return kvm_x86_ops.set_hv_timer 140 136 && !(kvm_mwait_in_guest(vcpu->kvm) || ··· 1067 1063 } 1068 1064 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 1069 1065 1070 - int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 1071 - const unsigned long *bitmap, u32 bitmap_size) 1066 + static int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 1067 + const unsigned long *bitmap, u32 bitmap_size) 1072 1068 { 1073 - u32 mod; 1074 - int i, idx = -1; 1069 + int idx = find_nth_bit(bitmap, bitmap_size, vector % dest_vcpus); 1075 1070 1076 - mod = vector % dest_vcpus; 1077 - 1078 - for (i = 0; i <= mod; i++) { 1079 - idx = find_next_bit(bitmap, bitmap_size, idx + 1); 1080 - BUG_ON(idx == bitmap_size); 1081 - } 1082 - 1071 + BUG_ON(idx >= bitmap_size); 1083 1072 return idx; 1084 1073 } 1085 1074 ··· 1101 1104 } 1102 1105 1103 1106 return false; 1107 + } 1108 + 1109 + static bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) 1110 + { 1111 + return (irq->delivery_mode == APIC_DM_LOWEST || irq->msi_redir_hint); 1112 + } 1113 + 1114 + static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1115 + { 1116 + return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1104 1117 } 1105 1118 1106 1119 /* Return true if the interrupt can be handled by using *bitmap as index mask ··· 1156 1149 if (!kvm_lowest_prio_delivery(irq)) 1157 1150 return true; 1158 1151 1159 - if (!kvm_vector_hashing_enabled()) { 1152 + if (!vector_hashing_enabled) { 1160 1153 lowest = -1; 1161 1154 for_each_set_bit(i, bitmap, 16) { 1162 1155 if (!(*dst)[i]) ··· 1263 1256 1264 1257 rcu_read_unlock(); 1265 1258 return ret; 1259 + } 1260 + 1261 + int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 1262 + struct kvm_lapic_irq *irq, struct dest_map *dest_map) 1263 + { 1264 + int r = -1; 1265 + struct kvm_vcpu *vcpu, *lowest = NULL; 1266 + unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; 1267 + unsigned int dest_vcpus = 0; 1268 + 1269 + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) 1270 + return r; 1271 + 1272 + if (irq->dest_mode == APIC_DEST_PHYSICAL && 1273 + irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { 1274 + pr_info("apic: phys broadcast and lowest prio\n"); 1275 + irq->delivery_mode = APIC_DM_FIXED; 1276 + } 1277 + 1278 + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); 1279 + 1280 + kvm_for_each_vcpu(i, vcpu, kvm) { 1281 + if (!kvm_apic_present(vcpu)) 1282 + continue; 1283 + 1284 + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, 1285 + irq->dest_id, irq->dest_mode)) 1286 + continue; 1287 + 1288 + if (!kvm_lowest_prio_delivery(irq)) { 1289 + if (r < 0) 1290 + r = 0; 1291 + r += kvm_apic_set_irq(vcpu, irq, dest_map); 1292 + } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { 1293 + if (!vector_hashing_enabled) { 1294 + if (!lowest) 1295 + lowest = vcpu; 1296 + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) 1297 + lowest = vcpu; 1298 + } else { 1299 + __set_bit(i, dest_vcpu_bitmap); 1300 + dest_vcpus++; 1301 + } 1302 + } 1303 + } 1304 + 1305 + if (dest_vcpus != 0) { 1306 + int idx = kvm_vector_to_index(irq->vector, dest_vcpus, 1307 + dest_vcpu_bitmap, KVM_MAX_VCPUS); 1308 + 1309 + lowest = kvm_get_vcpu(kvm, idx); 1310 + } 1311 + 1312 + if (lowest) 1313 + r = kvm_apic_set_irq(lowest, irq, dest_map); 1314 + 1315 + return r; 1266 1316 } 1267 1317 1268 1318 /* ··· 1465 1401 rcu_read_unlock(); 1466 1402 } 1467 1403 1468 - int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1469 - { 1470 - return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1471 - } 1472 - 1473 1404 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1474 1405 { 1475 1406 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); ··· 1542 1483 } 1543 1484 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1544 1485 1486 + static void kvm_icr_to_lapic_irq(struct kvm_lapic *apic, u32 icr_low, 1487 + u32 icr_high, struct kvm_lapic_irq *irq) 1488 + { 1489 + /* KVM has no delay and should always clear the BUSY/PENDING flag. */ 1490 + WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); 1491 + 1492 + irq->vector = icr_low & APIC_VECTOR_MASK; 1493 + irq->delivery_mode = icr_low & APIC_MODE_MASK; 1494 + irq->dest_mode = icr_low & APIC_DEST_MASK; 1495 + irq->level = (icr_low & APIC_INT_ASSERT) != 0; 1496 + irq->trig_mode = icr_low & APIC_INT_LEVELTRIG; 1497 + irq->shorthand = icr_low & APIC_SHORT_MASK; 1498 + irq->msi_redir_hint = false; 1499 + if (apic_x2apic_mode(apic)) 1500 + irq->dest_id = icr_high; 1501 + else 1502 + irq->dest_id = GET_XAPIC_DEST_FIELD(icr_high); 1503 + } 1504 + 1545 1505 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1546 1506 { 1547 1507 struct kvm_lapic_irq irq; 1548 1508 1549 - /* KVM has no delay and should always clear the BUSY/PENDING flag. */ 1550 - WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); 1551 - 1552 - irq.vector = icr_low & APIC_VECTOR_MASK; 1553 - irq.delivery_mode = icr_low & APIC_MODE_MASK; 1554 - irq.dest_mode = icr_low & APIC_DEST_MASK; 1555 - irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1556 - irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1557 - irq.shorthand = icr_low & APIC_SHORT_MASK; 1558 - irq.msi_redir_hint = false; 1559 - if (apic_x2apic_mode(apic)) 1560 - irq.dest_id = icr_high; 1561 - else 1562 - irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high); 1509 + kvm_icr_to_lapic_irq(apic, icr_low, icr_high, &irq); 1563 1510 1564 1511 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1565 1512 ··· 2500 2435 2501 2436 #define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) 2502 2437 2503 - int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) 2438 + static int __kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data, bool fast) 2504 2439 { 2505 2440 if (data & X2APIC_ICR_RESERVED_BITS) 2506 2441 return 1; ··· 2515 2450 */ 2516 2451 data &= ~APIC_ICR_BUSY; 2517 2452 2518 - kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); 2453 + if (fast) { 2454 + struct kvm_lapic_irq irq; 2455 + int ignored; 2456 + 2457 + kvm_icr_to_lapic_irq(apic, (u32)data, (u32)(data >> 32), &irq); 2458 + 2459 + if (!kvm_irq_delivery_to_apic_fast(apic->vcpu->kvm, apic, &irq, 2460 + &ignored, NULL)) 2461 + return -EWOULDBLOCK; 2462 + 2463 + trace_kvm_apic_ipi((u32)data, irq.dest_id); 2464 + } else { 2465 + kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); 2466 + } 2519 2467 if (kvm_x86_ops.x2apic_icr_is_split) { 2520 2468 kvm_lapic_set_reg(apic, APIC_ICR, data); 2521 2469 kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32); ··· 2537 2459 } 2538 2460 trace_kvm_apic_write(APIC_ICR, data); 2539 2461 return 0; 2462 + } 2463 + 2464 + static int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) 2465 + { 2466 + return __kvm_x2apic_icr_write(apic, data, false); 2467 + } 2468 + 2469 + int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data) 2470 + { 2471 + return __kvm_x2apic_icr_write(apic, data, true); 2540 2472 } 2541 2473 2542 2474 static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic) ··· 2749 2661 int kvm_alloc_apic_access_page(struct kvm *kvm) 2750 2662 { 2751 2663 void __user *hva; 2752 - int ret = 0; 2753 2664 2754 - mutex_lock(&kvm->slots_lock); 2665 + guard(mutex)(&kvm->slots_lock); 2666 + 2755 2667 if (kvm->arch.apic_access_memslot_enabled || 2756 2668 kvm->arch.apic_access_memslot_inhibited) 2757 - goto out; 2669 + return 0; 2758 2670 2759 2671 hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 2760 2672 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); 2761 - if (IS_ERR(hva)) { 2762 - ret = PTR_ERR(hva); 2763 - goto out; 2764 - } 2673 + if (IS_ERR(hva)) 2674 + return PTR_ERR(hva); 2765 2675 2766 2676 kvm->arch.apic_access_memslot_enabled = true; 2767 - out: 2768 - mutex_unlock(&kvm->slots_lock); 2769 - return ret; 2677 + 2678 + return 0; 2770 2679 } 2771 2680 EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page); 2772 2681

+4 -11

arch/x86/kvm/lapic.h

··· 105 105 void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu); 106 106 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 107 107 int shorthand, unsigned int dest, int dest_mode); 108 - int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 109 108 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec); 110 109 bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr); 111 110 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr); ··· 118 119 119 120 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 120 121 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map); 122 + int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 123 + struct kvm_lapic_irq *irq, 124 + struct dest_map *dest_map); 121 125 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); 122 126 123 127 int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); ··· 139 137 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 140 138 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); 141 139 142 - int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data); 140 + int kvm_x2apic_icr_write_fast(struct kvm_lapic *apic, u64 data); 143 141 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); 144 142 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 145 143 ··· 224 222 !kvm_x86_call(apic_init_signal_blocked)(vcpu); 225 223 } 226 224 227 - static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) 228 - { 229 - return (irq->delivery_mode == APIC_DM_LOWEST || 230 - irq->msi_redir_hint); 231 - } 232 - 233 225 static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) 234 226 { 235 227 return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); ··· 238 242 239 243 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 240 244 struct kvm_vcpu **dest_vcpu); 241 - int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 242 - const unsigned long *bitmap, u32 bitmap_size); 243 245 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); 244 246 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); 245 247 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); 246 248 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); 247 249 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); 248 - bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu); 249 250 250 251 static inline enum lapic_mode kvm_apic_mode(u64 apic_base) 251 252 {

+137 -32

arch/x86/kvm/pmu.c

··· 26 26 /* This is enough to filter the vast majority of currently defined events. */ 27 27 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 28 28 29 + /* Unadultered PMU capabilities of the host, i.e. of hardware. */ 30 + static struct x86_pmu_capability __read_mostly kvm_host_pmu; 31 + 32 + /* KVM's PMU capabilities, i.e. the intersection of KVM and hardware support. */ 29 33 struct x86_pmu_capability __read_mostly kvm_pmu_cap; 30 34 EXPORT_SYMBOL_GPL(kvm_pmu_cap); 31 35 32 - struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel; 33 - EXPORT_SYMBOL_GPL(kvm_pmu_eventsel); 36 + struct kvm_pmu_emulated_event_selectors { 37 + u64 INSTRUCTIONS_RETIRED; 38 + u64 BRANCH_INSTRUCTIONS_RETIRED; 39 + }; 40 + static struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel; 34 41 35 42 /* Precise Distribution of Instructions Retired (PDIR) */ 36 43 static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = { ··· 101 94 #define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP 102 95 #include <asm/kvm-x86-pmu-ops.h> 103 96 #undef __KVM_X86_PMU_OP 97 + } 98 + 99 + void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) 100 + { 101 + bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; 102 + int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; 103 + 104 + perf_get_x86_pmu_capability(&kvm_host_pmu); 105 + 106 + /* 107 + * Hybrid PMUs don't play nice with virtualization without careful 108 + * configuration by userspace, and KVM's APIs for reporting supported 109 + * vPMU features do not account for hybrid PMUs. Disable vPMU support 110 + * for hybrid PMUs until KVM gains a way to let userspace opt-in. 111 + */ 112 + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 113 + enable_pmu = false; 114 + 115 + if (enable_pmu) { 116 + /* 117 + * WARN if perf did NOT disable hardware PMU if the number of 118 + * architecturally required GP counters aren't present, i.e. if 119 + * there are a non-zero number of counters, but fewer than what 120 + * is architecturally required. 121 + */ 122 + if (!kvm_host_pmu.num_counters_gp || 123 + WARN_ON_ONCE(kvm_host_pmu.num_counters_gp < min_nr_gp_ctrs)) 124 + enable_pmu = false; 125 + else if (is_intel && !kvm_host_pmu.version) 126 + enable_pmu = false; 127 + } 128 + 129 + if (!enable_pmu) { 130 + memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); 131 + return; 132 + } 133 + 134 + memcpy(&kvm_pmu_cap, &kvm_host_pmu, sizeof(kvm_host_pmu)); 135 + kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2); 136 + kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp, 137 + pmu_ops->MAX_NR_GP_COUNTERS); 138 + kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed, 139 + KVM_MAX_NR_FIXED_COUNTERS); 140 + 141 + kvm_pmu_eventsel.INSTRUCTIONS_RETIRED = 142 + perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS); 143 + kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED = 144 + perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 104 145 } 105 146 106 147 static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) ··· 481 426 return true; 482 427 } 483 428 484 - static bool check_pmu_event_filter(struct kvm_pmc *pmc) 429 + static bool pmc_is_event_allowed(struct kvm_pmc *pmc) 485 430 { 486 431 struct kvm_x86_pmu_event_filter *filter; 487 432 struct kvm *kvm = pmc->vcpu->kvm; ··· 496 441 return is_fixed_event_allowed(filter, pmc->idx); 497 442 } 498 443 499 - static bool pmc_event_is_allowed(struct kvm_pmc *pmc) 500 - { 501 - return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && 502 - check_pmu_event_filter(pmc); 503 - } 504 - 505 444 static int reprogram_counter(struct kvm_pmc *pmc) 506 445 { 507 446 struct kvm_pmu *pmu = pmc_to_pmu(pmc); ··· 506 457 507 458 emulate_overflow = pmc_pause_counter(pmc); 508 459 509 - if (!pmc_event_is_allowed(pmc)) 460 + if (!pmc_is_globally_enabled(pmc) || !pmc_is_locally_enabled(pmc) || 461 + !pmc_is_event_allowed(pmc)) 510 462 return 0; 511 463 512 464 if (emulate_overflow) ··· 541 491 !(eventsel & ARCH_PERFMON_EVENTSEL_OS), 542 492 eventsel & ARCH_PERFMON_EVENTSEL_INT); 543 493 } 494 + 495 + static bool pmc_is_event_match(struct kvm_pmc *pmc, u64 eventsel) 496 + { 497 + /* 498 + * Ignore checks for edge detect (all events currently emulated by KVM 499 + * are always rising edges), pin control (unsupported by modern CPUs), 500 + * and counter mask and its invert flag (KVM doesn't emulate multiple 501 + * events in a single clock cycle). 502 + * 503 + * Note, the uppermost nibble of AMD's mask overlaps Intel's IN_TX (bit 504 + * 32) and IN_TXCP (bit 33), as well as two reserved bits (bits 35:34). 505 + * Checking the "in HLE/RTM transaction" flags is correct as the vCPU 506 + * can't be in a transaction if KVM is emulating an instruction. 507 + * 508 + * Checking the reserved bits might be wrong if they are defined in the 509 + * future, but so could ignoring them, so do the simple thing for now. 510 + */ 511 + return !((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB); 512 + } 513 + 514 + void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc) 515 + { 516 + bitmap_clear(pmu->pmc_counting_instructions, pmc->idx, 1); 517 + bitmap_clear(pmu->pmc_counting_branches, pmc->idx, 1); 518 + 519 + /* 520 + * Do NOT consult the PMU event filters, as the filters must be checked 521 + * at the time of emulation to ensure KVM uses fresh information, e.g. 522 + * omitting a PMC from a bitmap could result in a missed event if the 523 + * filter is changed to allow counting the event. 524 + */ 525 + if (!pmc_is_locally_enabled(pmc)) 526 + return; 527 + 528 + if (pmc_is_event_match(pmc, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED)) 529 + bitmap_set(pmu->pmc_counting_instructions, pmc->idx, 1); 530 + 531 + if (pmc_is_event_match(pmc, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED)) 532 + bitmap_set(pmu->pmc_counting_branches, pmc->idx, 1); 533 + } 534 + EXPORT_SYMBOL_GPL(kvm_pmu_recalc_pmc_emulation); 544 535 545 536 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) 546 537 { ··· 618 527 */ 619 528 if (unlikely(pmu->need_cleanup)) 620 529 kvm_pmu_cleanup(vcpu); 530 + 531 + kvm_for_each_pmc(pmu, pmc, bit, bitmap) 532 + kvm_pmu_recalc_pmc_emulation(pmu, pmc); 621 533 } 622 534 623 535 int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx) ··· 744 650 msr_info->data = pmu->global_ctrl; 745 651 break; 746 652 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 653 + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: 747 654 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 748 655 msr_info->data = 0; 749 656 break; ··· 805 710 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 806 711 if (!msr_info->host_initiated) 807 712 pmu->global_status &= ~data; 713 + break; 714 + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: 715 + if (!msr_info->host_initiated) 716 + pmu->global_status |= data & ~pmu->global_status_rsvd; 808 717 break; 809 718 default: 810 719 kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); ··· 888 789 */ 889 790 if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) 890 791 pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); 792 + 793 + bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); 794 + bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX, 795 + pmu->nr_arch_fixed_counters); 891 796 } 892 797 893 798 void kvm_pmu_init(struct kvm_vcpu *vcpu) ··· 916 813 pmu->pmc_in_use, X86_PMC_IDX_MAX); 917 814 918 815 kvm_for_each_pmc(pmu, pmc, i, bitmask) { 919 - if (pmc->perf_event && !pmc_speculative_in_use(pmc)) 816 + if (pmc->perf_event && !pmc_is_locally_enabled(pmc)) 920 817 pmc_stop_counter(pmc); 921 818 } 922 819 ··· 963 860 select_user; 964 861 } 965 862 966 - void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel) 863 + static void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, 864 + const unsigned long *event_pmcs) 967 865 { 968 866 DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX); 969 867 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 970 868 struct kvm_pmc *pmc; 971 - int i; 869 + int i, idx; 972 870 973 871 BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX); 974 872 873 + if (bitmap_empty(event_pmcs, X86_PMC_IDX_MAX)) 874 + return; 875 + 975 876 if (!kvm_pmu_has_perf_global_ctrl(pmu)) 976 - bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); 977 - else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx, 877 + bitmap_copy(bitmap, event_pmcs, X86_PMC_IDX_MAX); 878 + else if (!bitmap_and(bitmap, event_pmcs, 978 879 (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX)) 979 880 return; 980 881 882 + idx = srcu_read_lock(&vcpu->kvm->srcu); 981 883 kvm_for_each_pmc(pmu, pmc, i, bitmap) { 982 - /* 983 - * Ignore checks for edge detect (all events currently emulated 984 - * but KVM are always rising edges), pin control (unsupported 985 - * by modern CPUs), and counter mask and its invert flag (KVM 986 - * doesn't emulate multiple events in a single clock cycle). 987 - * 988 - * Note, the uppermost nibble of AMD's mask overlaps Intel's 989 - * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved 990 - * bits (bits 35:34). Checking the "in HLE/RTM transaction" 991 - * flags is correct as the vCPU can't be in a transaction if 992 - * KVM is emulating an instruction. Checking the reserved bits 993 - * might be wrong if they are defined in the future, but so 994 - * could ignoring them, so do the simple thing for now. 995 - */ 996 - if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) || 997 - !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc)) 884 + if (!pmc_is_event_allowed(pmc) || !cpl_is_matched(pmc)) 998 885 continue; 999 886 1000 887 kvm_pmu_incr_counter(pmc); 1001 888 } 889 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 1002 890 } 1003 - EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); 891 + 892 + void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu) 893 + { 894 + kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_instructions); 895 + } 896 + EXPORT_SYMBOL_GPL(kvm_pmu_instruction_retired); 897 + 898 + void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu) 899 + { 900 + kvm_pmu_trigger_event(vcpu, vcpu_to_pmu(vcpu)->pmc_counting_branches); 901 + } 902 + EXPORT_SYMBOL_GPL(kvm_pmu_branch_retired); 1004 903 1005 904 static bool is_masked_filter_valid(const struct kvm_x86_pmu_event_filter *filter) 1006 905 {

+7 -53

arch/x86/kvm/pmu.h

··· 23 23 24 24 #define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED 25 25 26 - struct kvm_pmu_emulated_event_selectors { 27 - u64 INSTRUCTIONS_RETIRED; 28 - u64 BRANCH_INSTRUCTIONS_RETIRED; 29 - }; 30 - 31 26 struct kvm_pmu_ops { 32 27 struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu, 33 28 unsigned int idx, u64 *mask); ··· 160 165 return NULL; 161 166 } 162 167 163 - static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) 168 + static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc) 164 169 { 165 170 struct kvm_pmu *pmu = pmc_to_pmu(pmc); 166 171 ··· 173 178 } 174 179 175 180 extern struct x86_pmu_capability kvm_pmu_cap; 176 - extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel; 177 181 178 - static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) 179 - { 180 - bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; 181 - int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; 182 + void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops); 182 183 183 - /* 184 - * Hybrid PMUs don't play nice with virtualization without careful 185 - * configuration by userspace, and KVM's APIs for reporting supported 186 - * vPMU features do not account for hybrid PMUs. Disable vPMU support 187 - * for hybrid PMUs until KVM gains a way to let userspace opt-in. 188 - */ 189 - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 190 - enable_pmu = false; 191 - 192 - if (enable_pmu) { 193 - perf_get_x86_pmu_capability(&kvm_pmu_cap); 194 - 195 - /* 196 - * WARN if perf did NOT disable hardware PMU if the number of 197 - * architecturally required GP counters aren't present, i.e. if 198 - * there are a non-zero number of counters, but fewer than what 199 - * is architecturally required. 200 - */ 201 - if (!kvm_pmu_cap.num_counters_gp || 202 - WARN_ON_ONCE(kvm_pmu_cap.num_counters_gp < min_nr_gp_ctrs)) 203 - enable_pmu = false; 204 - else if (is_intel && !kvm_pmu_cap.version) 205 - enable_pmu = false; 206 - } 207 - 208 - if (!enable_pmu) { 209 - memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); 210 - return; 211 - } 212 - 213 - kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2); 214 - kvm_pmu_cap.num_counters_gp = min(kvm_pmu_cap.num_counters_gp, 215 - pmu_ops->MAX_NR_GP_COUNTERS); 216 - kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed, 217 - KVM_MAX_NR_FIXED_COUNTERS); 218 - 219 - kvm_pmu_eventsel.INSTRUCTIONS_RETIRED = 220 - perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS); 221 - kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED = 222 - perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 223 - } 184 + void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc); 224 185 225 186 static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) 226 187 { 188 + kvm_pmu_recalc_pmc_emulation(pmc_to_pmu(pmc), pmc); 189 + 227 190 set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); 228 191 kvm_make_request(KVM_REQ_PMU, pmc->vcpu); 229 192 } ··· 225 272 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu); 226 273 void kvm_pmu_destroy(struct kvm_vcpu *vcpu); 227 274 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); 228 - void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel); 275 + void kvm_pmu_instruction_retired(struct kvm_vcpu *vcpu); 276 + void kvm_pmu_branch_retired(struct kvm_vcpu *vcpu); 229 277 230 278 bool is_vmware_backdoor_pmc(u32 pmc_idx); 231 279

+5

arch/x86/kvm/reverse_cpuid.h

··· 25 25 #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) 26 26 #define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11) 27 27 28 + /* Intel-defined sub-features, CPUID level 0x00000007:1 (ECX) */ 29 + #define KVM_X86_FEATURE_MSR_IMM KVM_X86_FEATURE(CPUID_7_1_ECX, 5) 30 + 28 31 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ 29 32 #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) 30 33 #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) ··· 90 87 [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, 91 88 [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, 92 89 [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, 90 + [CPUID_7_1_ECX] = { 7, 1, CPUID_ECX}, 93 91 }; 94 92 95 93 /* ··· 132 128 KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); 133 129 KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); 134 130 KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); 131 + KVM_X86_TRANSLATE_FEATURE(MSR_IMM); 135 132 default: 136 133 return x86_feature; 137 134 }

+2 -2

arch/x86/kvm/smm.c

··· 529 529 530 530 vcpu->arch.smbase = smstate->smbase; 531 531 532 - if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) 532 + if (__kvm_emulate_msr_write(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) 533 533 return X86EMUL_UNHANDLEABLE; 534 534 535 535 rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); ··· 620 620 621 621 /* And finally go back to 32-bit mode. */ 622 622 efer = 0; 623 - kvm_set_msr(vcpu, MSR_EFER, efer); 623 + __kvm_emulate_msr_write(vcpu, MSR_EFER, efer); 624 624 } 625 625 #endif 626 626

+4 -4

arch/x86/kvm/svm/pmu.c

··· 41 41 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); 42 42 unsigned int idx; 43 43 44 - if (!vcpu->kvm->arch.enable_pmu) 44 + if (!pmu->version) 45 45 return NULL; 46 46 47 47 switch (msr) { ··· 113 113 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: 114 114 case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: 115 115 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 116 + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: 116 117 return pmu->version > 1; 117 118 default: 118 119 if (msr > MSR_F15H_PERF_CTR5 && ··· 200 199 kvm_pmu_cap.num_counters_gp); 201 200 202 201 if (pmu->version > 1) { 203 - pmu->global_ctrl_rsvd = ~((1ull << pmu->nr_arch_gp_counters) - 1); 202 + pmu->global_ctrl_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1); 204 203 pmu->global_status_rsvd = pmu->global_ctrl_rsvd; 205 204 } 206 205 207 - pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; 206 + pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(48) - 1; 208 207 pmu->reserved_bits = 0xfffffff000280000ull; 209 208 pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; 210 209 /* not applicable to AMD; but clean them to prevent any fall out */ 211 210 pmu->counter_bitmask[KVM_PMC_FIXED] = 0; 212 211 pmu->nr_arch_fixed_counters = 0; 213 - bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); 214 212 } 215 213 216 214 static void amd_pmu_init(struct kvm_vcpu *vcpu)

+21 -9

arch/x86/kvm/svm/svm.c

··· 1008 1008 } 1009 1009 } 1010 1010 1011 - static void svm_recalc_intercepts_after_set_cpuid(struct kvm_vcpu *vcpu) 1011 + static void svm_recalc_intercepts(struct kvm_vcpu *vcpu) 1012 1012 { 1013 1013 svm_recalc_instruction_intercepts(vcpu); 1014 1014 svm_recalc_msr_intercepts(vcpu); ··· 1156 1156 1157 1157 svm_hv_init_vmcb(vmcb); 1158 1158 1159 - svm_recalc_intercepts_after_set_cpuid(vcpu); 1159 + kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu); 1160 1160 1161 1161 vmcb_mark_all_dirty(vmcb); 1162 1162 ··· 4093 4093 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) 4094 4094 { 4095 4095 struct vcpu_svm *svm = to_svm(vcpu); 4096 + struct vmcb_control_area *control = &svm->vmcb->control; 4097 + 4098 + /* 4099 + * Next RIP must be provided as IRQs are disabled, and accessing guest 4100 + * memory to decode the instruction might fault, i.e. might sleep. 4101 + */ 4102 + if (!nrips || !control->next_rip) 4103 + return EXIT_FASTPATH_NONE; 4096 4104 4097 4105 if (is_guest_mode(vcpu)) 4098 4106 return EXIT_FASTPATH_NONE; 4099 4107 4100 - switch (svm->vmcb->control.exit_code) { 4108 + switch (control->exit_code) { 4101 4109 case SVM_EXIT_MSR: 4102 - if (!svm->vmcb->control.exit_info_1) 4110 + if (!control->exit_info_1) 4103 4111 break; 4104 - return handle_fastpath_set_msr_irqoff(vcpu); 4112 + return handle_fastpath_wrmsr(vcpu); 4105 4113 case SVM_EXIT_HLT: 4106 4114 return handle_fastpath_hlt(vcpu); 4115 + case SVM_EXIT_INVD: 4116 + return handle_fastpath_invd(vcpu); 4107 4117 default: 4108 4118 break; 4109 4119 } ··· 4390 4380 4391 4381 if (sev_guest(vcpu->kvm)) 4392 4382 sev_vcpu_after_set_cpuid(svm); 4393 - 4394 - svm_recalc_intercepts_after_set_cpuid(vcpu); 4395 4383 } 4396 4384 4397 4385 static bool svm_has_wbinvd_exit(void) ··· 5091 5083 5092 5084 .apic_init_signal_blocked = svm_apic_init_signal_blocked, 5093 5085 5094 - .recalc_msr_intercepts = svm_recalc_msr_intercepts, 5086 + .recalc_intercepts = svm_recalc_intercepts, 5095 5087 .complete_emulated_msr = svm_complete_emulated_msr, 5096 5088 5097 5089 .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, ··· 5221 5213 /* CPUID 0x8000001F (SME/SEV features) */ 5222 5214 sev_set_cpu_caps(); 5223 5215 5224 - /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ 5216 + /* 5217 + * Clear capabilities that are automatically configured by common code, 5218 + * but that require explicit SVM support (that isn't yet implemented). 5219 + */ 5225 5220 kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); 5221 + kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM); 5226 5222 } 5227 5223 5228 5224 static __init int svm_hardware_setup(void)

-3

arch/x86/kvm/vmx/capabilities.h

··· 20 20 #define PT_MODE_SYSTEM 0 21 21 #define PT_MODE_HOST_GUEST 1 22 22 23 - #define PMU_CAP_FW_WRITES (1ULL << 13) 24 - #define PMU_CAP_LBR_FMT 0x3f 25 - 26 23 struct nested_vmx_msrs { 27 24 /* 28 25 * We only store the "true" versions of the VMX capability MSRs. We

+7 -7

arch/x86/kvm/vmx/main.c

··· 188 188 return vmx_get_msr(vcpu, msr_info); 189 189 } 190 190 191 - static void vt_recalc_msr_intercepts(struct kvm_vcpu *vcpu) 191 + static void vt_recalc_intercepts(struct kvm_vcpu *vcpu) 192 192 { 193 193 /* 194 - * TDX doesn't allow VMM to configure interception of MSR accesses. 195 - * TDX guest requests MSR accesses by calling TDVMCALL. The MSR 196 - * filters will be applied when handling the TDVMCALL for RDMSR/WRMSR 197 - * if the userspace has set any. 194 + * TDX doesn't allow VMM to configure interception of instructions or 195 + * MSR accesses. TDX guest requests MSR accesses by calling TDVMCALL. 196 + * The MSR filters will be applied when handling the TDVMCALL for 197 + * RDMSR/WRMSR if the userspace has set any. 198 198 */ 199 199 if (is_td_vcpu(vcpu)) 200 200 return; 201 201 202 - vmx_recalc_msr_intercepts(vcpu); 202 + vmx_recalc_intercepts(vcpu); 203 203 } 204 204 205 205 static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) ··· 996 996 .apic_init_signal_blocked = vt_op(apic_init_signal_blocked), 997 997 .migrate_timers = vmx_migrate_timers, 998 998 999 - .recalc_msr_intercepts = vt_op(recalc_msr_intercepts), 999 + .recalc_intercepts = vt_op(recalc_intercepts), 1000 1000 .complete_emulated_msr = vt_op(complete_emulated_msr), 1001 1001 1002 1002 .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,

+19 -10

arch/x86/kvm/vmx/nested.c

··· 997 997 __func__, i, e.index, e.reserved); 998 998 goto fail; 999 999 } 1000 - if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { 1000 + if (kvm_emulate_msr_write(vcpu, e.index, e.value)) { 1001 1001 pr_debug_ratelimited( 1002 1002 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", 1003 1003 __func__, i, e.index, e.value); ··· 1033 1033 } 1034 1034 } 1035 1035 1036 - if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { 1036 + if (kvm_emulate_msr_read(vcpu, msr_index, data)) { 1037 1037 pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, 1038 1038 msr_index); 1039 1039 return false; ··· 2770 2770 2771 2771 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && 2772 2772 kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && 2773 - WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 2774 - vmcs12->guest_ia32_perf_global_ctrl))) { 2773 + WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 2774 + vmcs12->guest_ia32_perf_global_ctrl))) { 2775 2775 *entry_failure_code = ENTRY_FAIL_DEFAULT; 2776 2776 return -EINVAL; 2777 2777 } ··· 3690 3690 return 1; 3691 3691 } 3692 3692 3693 - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); 3693 + kvm_pmu_branch_retired(vcpu); 3694 3694 3695 3695 if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) 3696 3696 return nested_vmx_failInvalid(vcpu); ··· 4758 4758 } 4759 4759 if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && 4760 4760 kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) 4761 - WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 4762 - vmcs12->host_ia32_perf_global_ctrl)); 4761 + WARN_ON_ONCE(__kvm_emulate_msr_write(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, 4762 + vmcs12->host_ia32_perf_global_ctrl)); 4763 4763 4764 4764 /* Set L1 segment info according to Intel SDM 4765 4765 27.5.2 Loading Host Segment and Descriptor-Table Registers */ ··· 4937 4937 goto vmabort; 4938 4938 } 4939 4939 4940 - if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { 4940 + if (kvm_emulate_msr_write(vcpu, h.index, h.value)) { 4941 4941 pr_debug_ratelimited( 4942 4942 "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", 4943 4943 __func__, j, h.index, h.value); ··· 6216 6216 struct vmcs12 *vmcs12, 6217 6217 union vmx_exit_reason exit_reason) 6218 6218 { 6219 - u32 msr_index = kvm_rcx_read(vcpu); 6219 + u32 msr_index; 6220 6220 gpa_t bitmap; 6221 6221 6222 6222 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) 6223 6223 return true; 6224 + 6225 + if (exit_reason.basic == EXIT_REASON_MSR_READ_IMM || 6226 + exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) 6227 + msr_index = vmx_get_exit_qual(vcpu); 6228 + else 6229 + msr_index = kvm_rcx_read(vcpu); 6224 6230 6225 6231 /* 6226 6232 * The MSR_BITMAP page is divided into four 1024-byte bitmaps, ··· 6234 6228 * First we need to figure out which of the four to use: 6235 6229 */ 6236 6230 bitmap = vmcs12->msr_bitmap; 6237 - if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 6231 + if (exit_reason.basic == EXIT_REASON_MSR_WRITE || 6232 + exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) 6238 6233 bitmap += 2048; 6239 6234 if (msr_index >= 0xc0000000) { 6240 6235 msr_index -= 0xc0000000; ··· 6534 6527 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC); 6535 6528 case EXIT_REASON_MSR_READ: 6536 6529 case EXIT_REASON_MSR_WRITE: 6530 + case EXIT_REASON_MSR_READ_IMM: 6531 + case EXIT_REASON_MSR_WRITE_IMM: 6537 6532 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); 6538 6533 case EXIT_REASON_INVALID_STATE: 6539 6534 return true;

+35 -44

arch/x86/kvm/vmx/pmu_intel.c

··· 138 138 139 139 static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) 140 140 { 141 - return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; 141 + return (vcpu_get_perf_capabilities(vcpu) & PERF_CAP_FW_WRITES) != 0; 142 142 } 143 143 144 144 static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) ··· 478 478 }; 479 479 u64 eventsel; 480 480 481 - BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUTNERS); 482 - BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUTNERS); 481 + BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_MAX_NR_INTEL_FIXED_COUNTERS); 482 + BUILD_BUG_ON(index >= KVM_MAX_NR_INTEL_FIXED_COUNTERS); 483 483 484 484 /* 485 485 * Yell if perf reports support for a fixed counter but perf doesn't ··· 536 536 kvm_pmu_cap.num_counters_gp); 537 537 eax.split.bit_width = min_t(int, eax.split.bit_width, 538 538 kvm_pmu_cap.bit_width_gp); 539 - pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; 539 + pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(eax.split.bit_width) - 1; 540 540 eax.split.mask_length = min_t(int, eax.split.mask_length, 541 541 kvm_pmu_cap.events_mask_len); 542 - pmu->available_event_types = ~entry->ebx & 543 - ((1ull << eax.split.mask_length) - 1); 542 + pmu->available_event_types = ~entry->ebx & (BIT_ULL(eax.split.mask_length) - 1); 544 543 545 - if (pmu->version == 1) { 546 - pmu->nr_arch_fixed_counters = 0; 547 - } else { 548 - pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, 549 - kvm_pmu_cap.num_counters_fixed); 550 - edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, 551 - kvm_pmu_cap.bit_width_fixed); 552 - pmu->counter_bitmask[KVM_PMC_FIXED] = 553 - ((u64)1 << edx.split.bit_width_fixed) - 1; 544 + entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); 545 + if (entry && 546 + (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && 547 + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { 548 + pmu->reserved_bits ^= HSW_IN_TX; 549 + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 554 550 } 551 + 552 + perf_capabilities = vcpu_get_perf_capabilities(vcpu); 553 + if (intel_pmu_lbr_is_compatible(vcpu) && 554 + (perf_capabilities & PERF_CAP_LBR_FMT)) 555 + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); 556 + else 557 + lbr_desc->records.nr = 0; 558 + 559 + if (lbr_desc->records.nr) 560 + bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); 561 + 562 + if (pmu->version == 1) 563 + return; 564 + 565 + pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, 566 + kvm_pmu_cap.num_counters_fixed); 567 + edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, 568 + kvm_pmu_cap.bit_width_fixed); 569 + pmu->counter_bitmask[KVM_PMC_FIXED] = BIT_ULL(edx.split.bit_width_fixed) - 1; 555 570 556 571 intel_pmu_enable_fixed_counter_bits(pmu, INTEL_FIXED_0_KERNEL | 557 572 INTEL_FIXED_0_USER | 558 573 INTEL_FIXED_0_ENABLE_PMI); 559 574 560 - counter_rsvd = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | 561 - (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX)); 575 + counter_rsvd = ~((BIT_ULL(pmu->nr_arch_gp_counters) - 1) | 576 + ((BIT_ULL(pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX)); 562 577 pmu->global_ctrl_rsvd = counter_rsvd; 563 578 564 579 /* ··· 588 573 pmu->global_status_rsvd &= 589 574 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; 590 575 591 - entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); 592 - if (entry && 593 - (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && 594 - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { 595 - pmu->reserved_bits ^= HSW_IN_TX; 596 - pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 597 - } 598 - 599 - bitmap_set(pmu->all_valid_pmc_idx, 600 - 0, pmu->nr_arch_gp_counters); 601 - bitmap_set(pmu->all_valid_pmc_idx, 602 - INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); 603 - 604 - perf_capabilities = vcpu_get_perf_capabilities(vcpu); 605 - if (intel_pmu_lbr_is_compatible(vcpu) && 606 - (perf_capabilities & PMU_CAP_LBR_FMT)) 607 - memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); 608 - else 609 - lbr_desc->records.nr = 0; 610 - 611 - if (lbr_desc->records.nr) 612 - bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); 613 - 614 576 if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { 615 577 if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { 616 578 pmu->pebs_enable_rsvd = counter_rsvd; ··· 595 603 pmu->pebs_data_cfg_rsvd = ~0xff00000full; 596 604 intel_pmu_enable_fixed_counter_bits(pmu, ICL_FIXED_0_ADAPTIVE); 597 605 } else { 598 - pmu->pebs_enable_rsvd = 599 - ~((1ull << pmu->nr_arch_gp_counters) - 1); 606 + pmu->pebs_enable_rsvd = ~(BIT_ULL(pmu->nr_arch_gp_counters) - 1); 600 607 } 601 608 } 602 609 } ··· 616 625 pmu->gp_counters[i].current_config = 0; 617 626 } 618 627 619 - for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUTNERS; i++) { 628 + for (i = 0; i < KVM_MAX_NR_INTEL_FIXED_COUNTERS; i++) { 620 629 pmu->fixed_counters[i].type = KVM_PMC_FIXED; 621 630 pmu->fixed_counters[i].vcpu = vcpu; 622 631 pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX; ··· 753 762 int bit, hw_idx; 754 763 755 764 kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) { 756 - if (!pmc_speculative_in_use(pmc) || 765 + if (!pmc_is_locally_enabled(pmc) || 757 766 !pmc_is_globally_enabled(pmc) || !pmc->perf_event) 758 767 continue; 759 768

+5

arch/x86/kvm/vmx/tdx.c

··· 629 629 struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); 630 630 631 631 kvm->arch.has_protected_state = true; 632 + /* 633 + * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap, 634 + * i.e. all EOIs are accelerated and never trigger exits. 635 + */ 636 + kvm->arch.has_protected_eoi = true; 632 637 kvm->arch.has_private_mem = true; 633 638 kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT; 634 639

+59 -32

arch/x86/kvm/vmx/vmx.c

··· 2140 2140 (host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) 2141 2141 debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; 2142 2142 2143 - if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && 2143 + if ((kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT) && 2144 2144 (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) 2145 2145 debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 2146 2146 ··· 2425 2425 vmx->pt_desc.guest.addr_a[index / 2] = data; 2426 2426 break; 2427 2427 case MSR_IA32_PERF_CAPABILITIES: 2428 - if (data & PMU_CAP_LBR_FMT) { 2429 - if ((data & PMU_CAP_LBR_FMT) != 2430 - (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) 2428 + if (data & PERF_CAP_LBR_FMT) { 2429 + if ((data & PERF_CAP_LBR_FMT) != 2430 + (kvm_caps.supported_perf_cap & PERF_CAP_LBR_FMT)) 2431 2431 return 1; 2432 2432 if (!cpuid_model_is_consistent(vcpu)) 2433 2433 return 1; ··· 4081 4081 } 4082 4082 } 4083 4083 4084 - void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) 4084 + static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu) 4085 4085 { 4086 4086 if (!cpu_has_vmx_msr_bitmap()) 4087 4087 return; ··· 4132 4132 * x2APIC and LBR MSR intercepts are modified on-demand and cannot be 4133 4133 * filtered by userspace. 4134 4134 */ 4135 + } 4136 + 4137 + void vmx_recalc_intercepts(struct kvm_vcpu *vcpu) 4138 + { 4139 + vmx_recalc_msr_intercepts(vcpu); 4135 4140 } 4136 4141 4137 4142 static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, ··· 4322 4317 return pin_based_exec_ctrl; 4323 4318 } 4324 4319 4325 - static u32 vmx_vmentry_ctrl(void) 4320 + static u32 vmx_get_initial_vmentry_ctrl(void) 4326 4321 { 4327 4322 u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; 4328 4323 ··· 4339 4334 return vmentry_ctrl; 4340 4335 } 4341 4336 4342 - static u32 vmx_vmexit_ctrl(void) 4337 + static u32 vmx_get_initial_vmexit_ctrl(void) 4343 4338 { 4344 4339 u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; 4345 4340 ··· 4369 4364 4370 4365 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); 4371 4366 4372 - if (kvm_vcpu_apicv_active(vcpu)) { 4373 - secondary_exec_controls_setbit(vmx, 4374 - SECONDARY_EXEC_APIC_REGISTER_VIRT | 4375 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 4376 - if (enable_ipiv) 4377 - tertiary_exec_controls_setbit(vmx, TERTIARY_EXEC_IPI_VIRT); 4378 - } else { 4379 - secondary_exec_controls_clearbit(vmx, 4380 - SECONDARY_EXEC_APIC_REGISTER_VIRT | 4381 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 4382 - if (enable_ipiv) 4383 - tertiary_exec_controls_clearbit(vmx, TERTIARY_EXEC_IPI_VIRT); 4384 - } 4367 + secondary_exec_controls_changebit(vmx, 4368 + SECONDARY_EXEC_APIC_REGISTER_VIRT | 4369 + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY, 4370 + kvm_vcpu_apicv_active(vcpu)); 4371 + if (enable_ipiv) 4372 + tertiary_exec_controls_changebit(vmx, TERTIARY_EXEC_IPI_VIRT, 4373 + kvm_vcpu_apicv_active(vcpu)); 4385 4374 4386 4375 vmx_update_msr_bitmap_x2apic(vcpu); 4387 4376 } ··· 4698 4699 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) 4699 4700 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 4700 4701 4701 - vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); 4702 + vm_exit_controls_set(vmx, vmx_get_initial_vmexit_ctrl()); 4702 4703 4703 4704 /* 22.2.1, 20.8.1 */ 4704 - vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); 4705 + vm_entry_controls_set(vmx, vmx_get_initial_vmentry_ctrl()); 4705 4706 4706 4707 vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); 4707 4708 vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); ··· 6022 6023 return 1; 6023 6024 } 6024 6025 6026 + static int vmx_get_msr_imm_reg(struct kvm_vcpu *vcpu) 6027 + { 6028 + return vmx_get_instr_info_reg(vmcs_read32(VMX_INSTRUCTION_INFO)); 6029 + } 6030 + 6031 + static int handle_rdmsr_imm(struct kvm_vcpu *vcpu) 6032 + { 6033 + return kvm_emulate_rdmsr_imm(vcpu, vmx_get_exit_qual(vcpu), 6034 + vmx_get_msr_imm_reg(vcpu)); 6035 + } 6036 + 6037 + static int handle_wrmsr_imm(struct kvm_vcpu *vcpu) 6038 + { 6039 + return kvm_emulate_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu), 6040 + vmx_get_msr_imm_reg(vcpu)); 6041 + } 6042 + 6025 6043 /* 6026 6044 * The exit handlers return 1 if the exit was handled fully and guest execution 6027 6045 * may resume. Otherwise they set the kvm_run parameter to indicate what needs ··· 6097 6081 [EXIT_REASON_ENCLS] = handle_encls, 6098 6082 [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, 6099 6083 [EXIT_REASON_NOTIFY] = handle_notify, 6084 + [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, 6085 + [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, 6100 6086 }; 6101 6087 6102 6088 static const int kvm_vmx_max_exit_handlers = ··· 6533 6515 #ifdef CONFIG_MITIGATION_RETPOLINE 6534 6516 if (exit_reason.basic == EXIT_REASON_MSR_WRITE) 6535 6517 return kvm_emulate_wrmsr(vcpu); 6518 + else if (exit_reason.basic == EXIT_REASON_MSR_WRITE_IMM) 6519 + return handle_wrmsr_imm(vcpu); 6536 6520 else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) 6537 6521 return handle_preemption_timer(vcpu); 6538 6522 else if (exit_reason.basic == EXIT_REASON_INTERRUPT_WINDOW) ··· 7210 7190 7211 7191 switch (vmx_get_exit_reason(vcpu).basic) { 7212 7192 case EXIT_REASON_MSR_WRITE: 7213 - return handle_fastpath_set_msr_irqoff(vcpu); 7193 + return handle_fastpath_wrmsr(vcpu); 7194 + case EXIT_REASON_MSR_WRITE_IMM: 7195 + return handle_fastpath_wrmsr_imm(vcpu, vmx_get_exit_qual(vcpu), 7196 + vmx_get_msr_imm_reg(vcpu)); 7214 7197 case EXIT_REASON_PREEMPTION_TIMER: 7215 7198 return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); 7216 7199 case EXIT_REASON_HLT: 7217 7200 return handle_fastpath_hlt(vcpu); 7201 + case EXIT_REASON_INVD: 7202 + return handle_fastpath_invd(vcpu); 7218 7203 default: 7219 7204 return EXIT_FASTPATH_NONE; 7220 7205 } ··· 7820 7795 vmx->msr_ia32_feature_control_valid_bits &= 7821 7796 ~FEAT_CTL_SGX_LC_ENABLED; 7822 7797 7823 - /* Recalc MSR interception to account for feature changes. */ 7824 - vmx_recalc_msr_intercepts(vcpu); 7825 - 7826 7798 /* Refresh #PF interception to account for MAXPHYADDR changes. */ 7827 7799 vmx_update_exception_bitmap(vcpu); 7828 7800 } 7829 7801 7830 7802 static __init u64 vmx_get_perf_capabilities(void) 7831 7803 { 7832 - u64 perf_cap = PMU_CAP_FW_WRITES; 7804 + u64 perf_cap = PERF_CAP_FW_WRITES; 7833 7805 u64 host_perf_cap = 0; 7834 7806 7835 7807 if (!enable_pmu) ··· 7846 7824 if (!vmx_lbr_caps.has_callstack) 7847 7825 memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps)); 7848 7826 else if (vmx_lbr_caps.nr) 7849 - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; 7827 + perf_cap |= host_perf_cap & PERF_CAP_LBR_FMT; 7850 7828 } 7851 7829 7852 7830 if (vmx_pebs_supported()) { ··· 8375 8353 8376 8354 vmx_setup_user_return_msrs(); 8377 8355 8378 - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) 8379 - return -EIO; 8380 8356 8381 8357 if (boot_cpu_has(X86_FEATURE_NX)) 8382 8358 kvm_enable_efer_bits(EFER_NX); ··· 8600 8580 return -EOPNOTSUPP; 8601 8581 8602 8582 /* 8603 - * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing 8604 - * to unwind if a later step fails. 8583 + * Note, VMCS and eVMCS configuration only touch VMX knobs/variables, 8584 + * i.e. there's nothing to unwind if a later step fails. 8605 8585 */ 8606 8586 hv_init_evmcs(); 8587 + 8588 + /* 8589 + * Parse the VMCS config and VMX capabilities before anything else, so 8590 + * that the information is available to all setup flows. 8591 + */ 8592 + if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) 8593 + return -EIO; 8607 8594 8608 8595 r = kvm_x86_vendor_init(&vt_init_ops); 8609 8596 if (r)

+13

arch/x86/kvm/vmx/vmx.h

··· 608 608 { \ 609 609 BUILD_BUG_ON(!(val & (KVM_REQUIRED_VMX_##uname | KVM_OPTIONAL_VMX_##uname))); \ 610 610 lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ 611 + } \ 612 + static __always_inline void lname##_controls_changebit(struct vcpu_vmx *vmx, u##bits val, \ 613 + bool set) \ 614 + { \ 615 + if (set) \ 616 + lname##_controls_setbit(vmx, val); \ 617 + else \ 618 + lname##_controls_clearbit(vmx, val); \ 611 619 } 612 620 BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS, 32) 613 621 BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS, 32) ··· 713 705 } 714 706 715 707 void dump_vmcs(struct kvm_vcpu *vcpu); 708 + 709 + static inline int vmx_get_instr_info_reg(u32 vmx_instr_info) 710 + { 711 + return (vmx_instr_info >> 3) & 0xf; 712 + } 716 713 717 714 static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info) 718 715 {

+1 -1

arch/x86/kvm/vmx/x86_ops.h

··· 52 52 int trig_mode, int vector); 53 53 void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); 54 54 bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); 55 - void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu); 55 + void vmx_recalc_intercepts(struct kvm_vcpu *vcpu); 56 56 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); 57 57 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); 58 58 int vmx_get_feature_msr(u32 msr, u64 *data);

+189 -147

arch/x86/kvm/x86.c

··· 164 164 static u32 __read_mostly tsc_tolerance_ppm = 250; 165 165 module_param(tsc_tolerance_ppm, uint, 0644); 166 166 167 - static bool __read_mostly vector_hashing = true; 168 - module_param(vector_hashing, bool, 0444); 169 - 170 167 bool __read_mostly enable_vmware_backdoor = false; 171 168 module_param(enable_vmware_backdoor, bool, 0444); 172 169 EXPORT_SYMBOL_GPL(enable_vmware_backdoor); ··· 364 367 MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 365 368 MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, 366 369 MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 370 + MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET, 367 371 }; 368 372 369 373 static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) + ··· 1577 1579 1578 1580 int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu) 1579 1581 { 1580 - u32 ecx = kvm_rcx_read(vcpu); 1582 + u32 pmc = kvm_rcx_read(vcpu); 1581 1583 u64 data; 1582 1584 1583 - if (kvm_pmu_rdpmc(vcpu, ecx, &data)) { 1585 + if (kvm_pmu_rdpmc(vcpu, pmc, &data)) { 1584 1586 kvm_inject_gp(vcpu, 0); 1585 1587 return 1; 1586 1588 } ··· 1903 1905 * Returns 0 on success, non-0 otherwise. 1904 1906 * Assumes vcpu_load() was already called. 1905 1907 */ 1906 - int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, 1907 - bool host_initiated) 1908 + static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, 1909 + bool host_initiated) 1908 1910 { 1909 1911 struct msr_data msr; 1910 1912 int ret; ··· 1930 1932 return ret; 1931 1933 } 1932 1934 1935 + int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) 1936 + { 1937 + return __kvm_set_msr(vcpu, index, data, true); 1938 + } 1939 + 1940 + int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) 1941 + { 1942 + return __kvm_get_msr(vcpu, index, data, true); 1943 + } 1944 + 1933 1945 static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, 1934 1946 u32 index, u64 *data, bool host_initiated) 1935 1947 { ··· 1947 1939 __kvm_get_msr); 1948 1940 } 1949 1941 1950 - int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) 1942 + int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) 1943 + { 1944 + return kvm_get_msr_ignored_check(vcpu, index, data, false); 1945 + } 1946 + EXPORT_SYMBOL_GPL(__kvm_emulate_msr_read); 1947 + 1948 + int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) 1949 + { 1950 + return kvm_set_msr_ignored_check(vcpu, index, data, false); 1951 + } 1952 + EXPORT_SYMBOL_GPL(__kvm_emulate_msr_write); 1953 + 1954 + int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data) 1951 1955 { 1952 1956 if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) 1953 1957 return KVM_MSR_RET_FILTERED; 1954 - return kvm_get_msr_ignored_check(vcpu, index, data, false); 1955 - } 1956 - EXPORT_SYMBOL_GPL(kvm_get_msr_with_filter); 1957 1958 1958 - int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) 1959 + return __kvm_emulate_msr_read(vcpu, index, data); 1960 + } 1961 + EXPORT_SYMBOL_GPL(kvm_emulate_msr_read); 1962 + 1963 + int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data) 1959 1964 { 1960 1965 if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) 1961 1966 return KVM_MSR_RET_FILTERED; 1962 - return kvm_set_msr_ignored_check(vcpu, index, data, false); 1963 - } 1964 - EXPORT_SYMBOL_GPL(kvm_set_msr_with_filter); 1965 1967 1966 - int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) 1967 - { 1968 - return kvm_get_msr_ignored_check(vcpu, index, data, false); 1968 + return __kvm_emulate_msr_write(vcpu, index, data); 1969 1969 } 1970 - EXPORT_SYMBOL_GPL(kvm_get_msr); 1970 + EXPORT_SYMBOL_GPL(kvm_emulate_msr_write); 1971 1971 1972 - int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) 1973 - { 1974 - return kvm_set_msr_ignored_check(vcpu, index, data, false); 1975 - } 1976 - EXPORT_SYMBOL_GPL(kvm_set_msr); 1977 1972 1978 1973 static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu) 1979 1974 { ··· 2005 1994 static int complete_fast_rdmsr(struct kvm_vcpu *vcpu) 2006 1995 { 2007 1996 complete_userspace_rdmsr(vcpu); 1997 + return complete_fast_msr_access(vcpu); 1998 + } 1999 + 2000 + static int complete_fast_rdmsr_imm(struct kvm_vcpu *vcpu) 2001 + { 2002 + if (!vcpu->run->msr.error) 2003 + kvm_register_write(vcpu, vcpu->arch.cui_rdmsr_imm_reg, 2004 + vcpu->run->msr.data); 2005 + 2008 2006 return complete_fast_msr_access(vcpu); 2009 2007 } 2010 2008 ··· 2051 2031 return 1; 2052 2032 } 2053 2033 2054 - int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) 2034 + static int __kvm_emulate_rdmsr(struct kvm_vcpu *vcpu, u32 msr, int reg, 2035 + int (*complete_rdmsr)(struct kvm_vcpu *)) 2055 2036 { 2056 - u32 ecx = kvm_rcx_read(vcpu); 2057 2037 u64 data; 2058 2038 int r; 2059 2039 2060 - r = kvm_get_msr_with_filter(vcpu, ecx, &data); 2040 + r = kvm_emulate_msr_read(vcpu, msr, &data); 2061 2041 2062 2042 if (!r) { 2063 - trace_kvm_msr_read(ecx, data); 2043 + trace_kvm_msr_read(msr, data); 2064 2044 2065 - kvm_rax_write(vcpu, data & -1u); 2066 - kvm_rdx_write(vcpu, (data >> 32) & -1u); 2045 + if (reg < 0) { 2046 + kvm_rax_write(vcpu, data & -1u); 2047 + kvm_rdx_write(vcpu, (data >> 32) & -1u); 2048 + } else { 2049 + kvm_register_write(vcpu, reg, data); 2050 + } 2067 2051 } else { 2068 2052 /* MSR read failed? See if we should ask user space */ 2069 - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0, 2070 - complete_fast_rdmsr, r)) 2053 + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_RDMSR, 0, 2054 + complete_rdmsr, r)) 2071 2055 return 0; 2072 - trace_kvm_msr_read_ex(ecx); 2056 + trace_kvm_msr_read_ex(msr); 2073 2057 } 2074 2058 2075 2059 return kvm_x86_call(complete_emulated_msr)(vcpu, r); 2076 2060 } 2061 + 2062 + int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) 2063 + { 2064 + return __kvm_emulate_rdmsr(vcpu, kvm_rcx_read(vcpu), -1, 2065 + complete_fast_rdmsr); 2066 + } 2077 2067 EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr); 2078 2068 2079 - int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) 2069 + int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) 2080 2070 { 2081 - u32 ecx = kvm_rcx_read(vcpu); 2082 - u64 data = kvm_read_edx_eax(vcpu); 2071 + vcpu->arch.cui_rdmsr_imm_reg = reg; 2072 + 2073 + return __kvm_emulate_rdmsr(vcpu, msr, reg, complete_fast_rdmsr_imm); 2074 + } 2075 + EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr_imm); 2076 + 2077 + static int __kvm_emulate_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2078 + { 2083 2079 int r; 2084 2080 2085 - r = kvm_set_msr_with_filter(vcpu, ecx, data); 2086 - 2081 + r = kvm_emulate_msr_write(vcpu, msr, data); 2087 2082 if (!r) { 2088 - trace_kvm_msr_write(ecx, data); 2083 + trace_kvm_msr_write(msr, data); 2089 2084 } else { 2090 2085 /* MSR write failed? See if we should ask user space */ 2091 - if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data, 2086 + if (kvm_msr_user_space(vcpu, msr, KVM_EXIT_X86_WRMSR, data, 2092 2087 complete_fast_msr_access, r)) 2093 2088 return 0; 2094 2089 /* Signal all other negative errors to userspace */ 2095 2090 if (r < 0) 2096 2091 return r; 2097 - trace_kvm_msr_write_ex(ecx, data); 2092 + trace_kvm_msr_write_ex(msr, data); 2098 2093 } 2099 2094 2100 2095 return kvm_x86_call(complete_emulated_msr)(vcpu, r); 2101 2096 } 2097 + 2098 + int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) 2099 + { 2100 + return __kvm_emulate_wrmsr(vcpu, kvm_rcx_read(vcpu), 2101 + kvm_read_edx_eax(vcpu)); 2102 + } 2102 2103 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); 2104 + 2105 + int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) 2106 + { 2107 + return __kvm_emulate_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); 2108 + } 2109 + EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr_imm); 2103 2110 2104 2111 int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) 2105 2112 { ··· 2139 2092 return kvm_emulate_as_nop(vcpu); 2140 2093 } 2141 2094 EXPORT_SYMBOL_GPL(kvm_emulate_invd); 2095 + 2096 + fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu) 2097 + { 2098 + if (!kvm_emulate_invd(vcpu)) 2099 + return EXIT_FASTPATH_EXIT_USERSPACE; 2100 + 2101 + return EXIT_FASTPATH_REENTER_GUEST; 2102 + } 2103 + EXPORT_SYMBOL_GPL(handle_fastpath_invd); 2142 2104 2143 2105 int kvm_handle_invalid_op(struct kvm_vcpu *vcpu) 2144 2106 { ··· 2196 2140 kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); 2197 2141 } 2198 2142 2199 - /* 2200 - * The fast path for frequent and performance sensitive wrmsr emulation, 2201 - * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces 2202 - * the latency of virtual IPI by avoiding the expensive bits of transitioning 2203 - * from guest to host, e.g. reacquiring KVM's SRCU lock. In contrast to the 2204 - * other cases which must be called after interrupts are enabled on the host. 2205 - */ 2206 - static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data) 2143 + static fastpath_t __handle_fastpath_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2207 2144 { 2208 - if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic)) 2209 - return 1; 2210 - 2211 - if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) && 2212 - ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) && 2213 - ((data & APIC_MODE_MASK) == APIC_DM_FIXED) && 2214 - ((u32)(data >> 32) != X2APIC_BROADCAST)) 2215 - return kvm_x2apic_icr_write(vcpu->arch.apic, data); 2216 - 2217 - return 1; 2218 - } 2219 - 2220 - static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) 2221 - { 2222 - if (!kvm_can_use_hv_timer(vcpu)) 2223 - return 1; 2224 - 2225 - kvm_set_lapic_tscdeadline_msr(vcpu, data); 2226 - return 0; 2227 - } 2228 - 2229 - fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) 2230 - { 2231 - u32 msr = kvm_rcx_read(vcpu); 2232 - u64 data; 2233 - fastpath_t ret; 2234 - bool handled; 2235 - 2236 - kvm_vcpu_srcu_read_lock(vcpu); 2237 - 2238 2145 switch (msr) { 2239 2146 case APIC_BASE_MSR + (APIC_ICR >> 4): 2240 - data = kvm_read_edx_eax(vcpu); 2241 - handled = !handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); 2147 + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic) || 2148 + kvm_x2apic_icr_write_fast(vcpu->arch.apic, data)) 2149 + return EXIT_FASTPATH_NONE; 2242 2150 break; 2243 2151 case MSR_IA32_TSC_DEADLINE: 2244 - data = kvm_read_edx_eax(vcpu); 2245 - handled = !handle_fastpath_set_tscdeadline(vcpu, data); 2152 + kvm_set_lapic_tscdeadline_msr(vcpu, data); 2246 2153 break; 2247 2154 default: 2248 - handled = false; 2249 - break; 2155 + return EXIT_FASTPATH_NONE; 2250 2156 } 2251 2157 2252 - if (handled) { 2253 - if (!kvm_skip_emulated_instruction(vcpu)) 2254 - ret = EXIT_FASTPATH_EXIT_USERSPACE; 2255 - else 2256 - ret = EXIT_FASTPATH_REENTER_GUEST; 2257 - trace_kvm_msr_write(msr, data); 2258 - } else { 2259 - ret = EXIT_FASTPATH_NONE; 2260 - } 2158 + trace_kvm_msr_write(msr, data); 2261 2159 2262 - kvm_vcpu_srcu_read_unlock(vcpu); 2160 + if (!kvm_skip_emulated_instruction(vcpu)) 2161 + return EXIT_FASTPATH_EXIT_USERSPACE; 2263 2162 2264 - return ret; 2163 + return EXIT_FASTPATH_REENTER_GUEST; 2265 2164 } 2266 - EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); 2165 + 2166 + fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu) 2167 + { 2168 + return __handle_fastpath_wrmsr(vcpu, kvm_rcx_read(vcpu), 2169 + kvm_read_edx_eax(vcpu)); 2170 + } 2171 + EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr); 2172 + 2173 + fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg) 2174 + { 2175 + return __handle_fastpath_wrmsr(vcpu, msr, kvm_register_read(vcpu, reg)); 2176 + } 2177 + EXPORT_SYMBOL_GPL(handle_fastpath_wrmsr_imm); 2267 2178 2268 2179 /* 2269 2180 * Adapt set_msr() to msr_io()'s calling convention ··· 6801 6778 6802 6779 kvm_free_msr_filter(old_filter); 6803 6780 6804 - kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); 6781 + /* 6782 + * Recalc MSR intercepts as userspace may want to intercept accesses to 6783 + * MSRs that KVM would otherwise pass through to the guest. 6784 + */ 6785 + kvm_make_all_cpus_request(kvm, KVM_REQ_RECALC_INTERCEPTS); 6805 6786 6806 6787 return 0; 6807 6788 } ··· 6998 6971 6999 6972 r = -EEXIST; 7000 6973 if (irqchip_in_kernel(kvm)) 6974 + goto create_irqchip_unlock; 6975 + 6976 + /* 6977 + * Disallow an in-kernel I/O APIC if the VM has protected EOIs, 6978 + * i.e. if KVM can't intercept EOIs and thus can't properly 6979 + * emulate level-triggered interrupts. 6980 + */ 6981 + r = -ENOTTY; 6982 + if (kvm->arch.has_protected_eoi) 7001 6983 goto create_irqchip_unlock; 7002 6984 7003 6985 r = -EINVAL; ··· 7396 7360 case MSR_AMD64_PERF_CNTR_GLOBAL_CTL: 7397 7361 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: 7398 7362 case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: 7363 + case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET: 7399 7364 if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) 7400 7365 return; 7401 7366 break; ··· 8397 8360 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 8398 8361 int r; 8399 8362 8400 - r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); 8363 + r = kvm_emulate_msr_read(vcpu, msr_index, pdata); 8401 8364 if (r < 0) 8402 8365 return X86EMUL_UNHANDLEABLE; 8403 8366 ··· 8420 8383 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 8421 8384 int r; 8422 8385 8423 - r = kvm_set_msr_with_filter(vcpu, msr_index, data); 8386 + r = kvm_emulate_msr_write(vcpu, msr_index, data); 8424 8387 if (r < 0) 8425 8388 return X86EMUL_UNHANDLEABLE; 8426 8389 ··· 8440 8403 static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, 8441 8404 u32 msr_index, u64 *pdata) 8442 8405 { 8443 - return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); 8406 + return __kvm_emulate_msr_read(emul_to_vcpu(ctxt), msr_index, pdata); 8444 8407 } 8445 8408 8446 8409 static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc) ··· 8512 8475 static bool emulator_is_smm(struct x86_emulate_ctxt *ctxt) 8513 8476 { 8514 8477 return is_smm(emul_to_vcpu(ctxt)); 8515 - } 8516 - 8517 - static bool emulator_is_guest_mode(struct x86_emulate_ctxt *ctxt) 8518 - { 8519 - return is_guest_mode(emul_to_vcpu(ctxt)); 8520 8478 } 8521 8479 8522 8480 #ifndef CONFIG_KVM_SMM ··· 8597 8565 .guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible, 8598 8566 .set_nmi_mask = emulator_set_nmi_mask, 8599 8567 .is_smm = emulator_is_smm, 8600 - .is_guest_mode = emulator_is_guest_mode, 8601 8568 .leave_smm = emulator_leave_smm, 8602 8569 .triple_fault = emulator_triple_fault, 8603 8570 .set_xcr = emulator_set_xcr, ··· 8902 8871 if (unlikely(!r)) 8903 8872 return 0; 8904 8873 8905 - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); 8874 + kvm_pmu_instruction_retired(vcpu); 8906 8875 8907 8876 /* 8908 8877 * rflags is the old, "raw" value of the flags. The new value has ··· 9181 9150 ctxt->exception.address = 0; 9182 9151 } 9183 9152 9184 - r = x86_emulate_insn(ctxt); 9153 + /* 9154 + * Check L1's instruction intercepts when emulating instructions for 9155 + * L2, unless KVM is re-emulating a previously decoded instruction, 9156 + * e.g. to complete userspace I/O, in which case KVM has already 9157 + * checked the intercepts. 9158 + */ 9159 + r = x86_emulate_insn(ctxt, is_guest_mode(vcpu) && 9160 + !(emulation_type & EMULTYPE_NO_DECODE)); 9185 9161 9186 9162 if (r == EMULATION_INTERCEPTED) 9187 9163 return 1; ··· 9243 9205 */ 9244 9206 if (!ctxt->have_exception || 9245 9207 exception_type(ctxt->exception.vector) == EXCPT_TRAP) { 9246 - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED); 9208 + kvm_pmu_instruction_retired(vcpu); 9247 9209 if (ctxt->is_branch) 9248 - kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED); 9210 + kvm_pmu_branch_retired(vcpu); 9249 9211 kvm_rip_write(vcpu, ctxt->eip); 9250 9212 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) 9251 9213 r = kvm_vcpu_do_singlestep(vcpu); ··· 10841 10803 if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) 10842 10804 kvm_check_async_pf_completion(vcpu); 10843 10805 10844 - /* 10845 - * Recalc MSR intercepts as userspace may want to intercept 10846 - * accesses to MSRs that KVM would otherwise pass through to 10847 - * the guest. 10848 - */ 10849 - if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu)) 10850 - kvm_x86_call(recalc_msr_intercepts)(vcpu); 10806 + if (kvm_check_request(KVM_REQ_RECALC_INTERCEPTS, vcpu)) 10807 + kvm_x86_call(recalc_intercepts)(vcpu); 10851 10808 10852 10809 if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu)) 10853 10810 kvm_x86_call(update_cpu_dirty_logging)(vcpu); ··· 11343 11310 11344 11311 fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu) 11345 11312 { 11346 - int ret; 11347 - 11348 - kvm_vcpu_srcu_read_lock(vcpu); 11349 - ret = kvm_emulate_halt(vcpu); 11350 - kvm_vcpu_srcu_read_unlock(vcpu); 11351 - 11352 - if (!ret) 11313 + if (!kvm_emulate_halt(vcpu)) 11353 11314 return EXIT_FASTPATH_EXIT_USERSPACE; 11354 11315 11355 11316 if (kvm_vcpu_running(vcpu)) ··· 12422 12395 kvfree(vcpu->arch.cpuid_entries); 12423 12396 } 12424 12397 12398 + static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) 12399 + { 12400 + struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; 12401 + u64 xfeatures_mask; 12402 + int i; 12403 + 12404 + /* 12405 + * Guest FPU state is zero allocated and so doesn't need to be manually 12406 + * cleared on RESET, i.e. during vCPU creation. 12407 + */ 12408 + if (!init_event || !fpstate) 12409 + return; 12410 + 12411 + /* 12412 + * On INIT, only select XSTATE components are zeroed, most components 12413 + * are unchanged. Currently, the only components that are zeroed and 12414 + * supported by KVM are MPX related. 12415 + */ 12416 + xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) & 12417 + (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); 12418 + if (!xfeatures_mask) 12419 + return; 12420 + 12421 + BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); 12422 + 12423 + /* 12424 + * All paths that lead to INIT are required to load the guest's FPU 12425 + * state (because most paths are buried in KVM_RUN). 12426 + */ 12427 + kvm_put_guest_fpu(vcpu); 12428 + for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) 12429 + fpstate_clear_xstate_component(fpstate, i); 12430 + kvm_load_guest_fpu(vcpu); 12431 + } 12432 + 12425 12433 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 12426 12434 { 12427 12435 struct kvm_cpuid_entry2 *cpuid_0x1; ··· 12514 12452 kvm_async_pf_hash_reset(vcpu); 12515 12453 vcpu->arch.apf.halted = false; 12516 12454 12517 - if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) { 12518 - struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; 12519 - 12520 - /* 12521 - * All paths that lead to INIT are required to load the guest's 12522 - * FPU state (because most paths are buried in KVM_RUN). 12523 - */ 12524 - if (init_event) 12525 - kvm_put_guest_fpu(vcpu); 12526 - 12527 - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS); 12528 - fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR); 12529 - 12530 - if (init_event) 12531 - kvm_load_guest_fpu(vcpu); 12532 - } 12455 + kvm_xstate_reset(vcpu, init_event); 12533 12456 12534 12457 if (!init_event) { 12535 12458 vcpu->arch.smbase = 0x30000; ··· 12526 12479 MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; 12527 12480 12528 12481 __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); 12529 - __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); 12482 + kvm_msr_write(vcpu, MSR_IA32_XSS, 0); 12530 12483 } 12531 12484 12532 12485 /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ ··· 13572 13525 return atomic_read(&kvm->arch.noncoherent_dma_count); 13573 13526 } 13574 13527 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); 13575 - 13576 - bool kvm_vector_hashing_enabled(void) 13577 - { 13578 - return vector_hashing; 13579 - } 13580 13528 13581 13529 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) 13582 13530 {

+3 -2

arch/x86/kvm/x86.h

··· 431 431 432 432 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); 433 433 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 434 - bool kvm_vector_hashing_enabled(void); 435 434 void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code); 436 435 int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, 437 436 void *insn, int insn_len); 438 437 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, 439 438 int emulation_type, void *insn, int insn_len); 440 - fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); 439 + fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu); 440 + fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); 441 441 fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); 442 + fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); 442 443 443 444 extern struct kvm_caps kvm_caps; 444 445 extern struct kvm_host_values kvm_host;

+5 -3

tools/testing/selftests/kvm/x86/pmu_counters_test.c

··· 14 14 #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) 15 15 16 16 /* 17 - * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 18 - * 1 LOOP. 17 + * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, 18 + * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. 19 19 */ 20 - #define NUM_INSNS_PER_LOOP 4 20 + #define NUM_INSNS_PER_LOOP 6 21 21 22 22 /* 23 23 * Number of "extra" instructions that will be counted, i.e. the number of ··· 226 226 __asm__ __volatile__("wrmsr\n\t" \ 227 227 " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ 228 228 "1:\n\t" \ 229 + FEP "enter $0, $0\n\t" \ 229 230 clflush "\n\t" \ 230 231 "mfence\n\t" \ 231 232 "mov %[m], %%eax\n\t" \ 233 + FEP "leave\n\t" \ 232 234 FEP "loop 1b\n\t" \ 233 235 FEP "mov %%edi, %%ecx\n\t" \ 234 236 FEP "xor %%eax, %%eax\n\t" \