Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"Bugfixes (arm and x86) and cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
selftests: kvm: Adding config fragments
KVM: selftests: Update gitignore file for latest changes
kvm: remove unnecessary PageReserved check
KVM: arm/arm64: vgic: Reevaluate level sensitive interrupts on enable
KVM: arm: Don't write junk to CP15 registers on reset
KVM: arm64: Don't write junk to sysregs on reset
KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
x86: kvm: remove useless calls to kvm_para_available
KVM: no need to check return value of debugfs_create functions
KVM: remove kvm_arch_has_vcpu_debugfs()
KVM: Fix leak vCPU's VMCS value into other pCPU
KVM: Check preempted_in_kernel for involuntary preemption
KVM: LAPIC: Don't need to wakeup vCPU twice afer timer fire
arm64: KVM: hyp: debug-sr: Mark expected switch fall-through
KVM: arm64: Update kvm_arm_exception_class and esr_class_str for new EC
KVM: arm: vgic-v3: Mark expected switch fall-through
arm64: KVM: regmap: Fix unexpected switch fall-through
KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter index

+249 -142
+15 -8
arch/arm/kvm/coproc.c
··· 651 651 } 652 652 653 653 static void reset_coproc_regs(struct kvm_vcpu *vcpu, 654 - const struct coproc_reg *table, size_t num) 654 + const struct coproc_reg *table, size_t num, 655 + unsigned long *bmap) 655 656 { 656 657 unsigned long i; 657 658 658 659 for (i = 0; i < num; i++) 659 - if (table[i].reset) 660 + if (table[i].reset) { 661 + int reg = table[i].reg; 662 + 660 663 table[i].reset(vcpu, &table[i]); 664 + if (reg > 0 && reg < NR_CP15_REGS) { 665 + set_bit(reg, bmap); 666 + if (table[i].is_64bit) 667 + set_bit(reg + 1, bmap); 668 + } 669 + } 661 670 } 662 671 663 672 static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu) ··· 1441 1432 { 1442 1433 size_t num; 1443 1434 const struct coproc_reg *table; 1444 - 1445 - /* Catch someone adding a register without putting in reset entry. */ 1446 - memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15)); 1435 + DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, }; 1447 1436 1448 1437 /* Generic chip reset first (so target could override). */ 1449 - reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); 1438 + reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap); 1450 1439 1451 1440 table = get_target_table(vcpu->arch.target, &num); 1452 - reset_coproc_regs(vcpu, table, num); 1441 + reset_coproc_regs(vcpu, table, num, bmap); 1453 1442 1454 1443 for (num = 1; num < NR_CP15_REGS; num++) 1455 - WARN(vcpu_cp15(vcpu, num) == 0x42424242, 1444 + WARN(!test_bit(num, bmap), 1456 1445 "Didn't reset vcpu_cp15(vcpu, %zi)", num); 1457 1446 }
+4 -3
arch/arm64/include/asm/kvm_arm.h
··· 316 316 317 317 #define kvm_arm_exception_class \ 318 318 ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ 319 - ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ 320 - ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ 321 - ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ 319 + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ 320 + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ 321 + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ 322 + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ 322 323 ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ 323 324 ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ 324 325 ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+1
arch/arm64/kernel/traps.c
··· 733 733 [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC", 734 734 [ESR_ELx_EC_FP_ASIMD] = "ASIMD", 735 735 [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", 736 + [ESR_ELx_EC_PAC] = "PAC", 736 737 [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", 737 738 [ESR_ELx_EC_ILL] = "PSTATE.IL", 738 739 [ESR_ELx_EC_SVC32] = "SVC (AArch32)",
+30
arch/arm64/kvm/hyp/debug-sr.c
··· 18 18 #define save_debug(ptr,reg,nr) \ 19 19 switch (nr) { \ 20 20 case 15: ptr[15] = read_debug(reg, 15); \ 21 + /* Fall through */ \ 21 22 case 14: ptr[14] = read_debug(reg, 14); \ 23 + /* Fall through */ \ 22 24 case 13: ptr[13] = read_debug(reg, 13); \ 25 + /* Fall through */ \ 23 26 case 12: ptr[12] = read_debug(reg, 12); \ 27 + /* Fall through */ \ 24 28 case 11: ptr[11] = read_debug(reg, 11); \ 29 + /* Fall through */ \ 25 30 case 10: ptr[10] = read_debug(reg, 10); \ 31 + /* Fall through */ \ 26 32 case 9: ptr[9] = read_debug(reg, 9); \ 33 + /* Fall through */ \ 27 34 case 8: ptr[8] = read_debug(reg, 8); \ 35 + /* Fall through */ \ 28 36 case 7: ptr[7] = read_debug(reg, 7); \ 37 + /* Fall through */ \ 29 38 case 6: ptr[6] = read_debug(reg, 6); \ 39 + /* Fall through */ \ 30 40 case 5: ptr[5] = read_debug(reg, 5); \ 41 + /* Fall through */ \ 31 42 case 4: ptr[4] = read_debug(reg, 4); \ 43 + /* Fall through */ \ 32 44 case 3: ptr[3] = read_debug(reg, 3); \ 45 + /* Fall through */ \ 33 46 case 2: ptr[2] = read_debug(reg, 2); \ 47 + /* Fall through */ \ 34 48 case 1: ptr[1] = read_debug(reg, 1); \ 49 + /* Fall through */ \ 35 50 default: ptr[0] = read_debug(reg, 0); \ 36 51 } 37 52 38 53 #define restore_debug(ptr,reg,nr) \ 39 54 switch (nr) { \ 40 55 case 15: write_debug(ptr[15], reg, 15); \ 56 + /* Fall through */ \ 41 57 case 14: write_debug(ptr[14], reg, 14); \ 58 + /* Fall through */ \ 42 59 case 13: write_debug(ptr[13], reg, 13); \ 60 + /* Fall through */ \ 43 61 case 12: write_debug(ptr[12], reg, 12); \ 62 + /* Fall through */ \ 44 63 case 11: write_debug(ptr[11], reg, 11); \ 64 + /* Fall through */ \ 45 65 case 10: write_debug(ptr[10], reg, 10); \ 66 + /* Fall through */ \ 46 67 case 9: write_debug(ptr[9], reg, 9); \ 68 + /* Fall through */ \ 47 69 case 8: write_debug(ptr[8], reg, 8); \ 70 + /* Fall through */ \ 48 71 case 7: write_debug(ptr[7], reg, 7); \ 72 + /* Fall through */ \ 49 73 case 6: write_debug(ptr[6], reg, 6); \ 74 + /* Fall through */ \ 50 75 case 5: write_debug(ptr[5], reg, 5); \ 76 + /* Fall through */ \ 51 77 case 4: write_debug(ptr[4], reg, 4); \ 78 + /* Fall through */ \ 52 79 case 3: write_debug(ptr[3], reg, 3); \ 80 + /* Fall through */ \ 53 81 case 2: write_debug(ptr[2], reg, 2); \ 82 + /* Fall through */ \ 54 83 case 1: write_debug(ptr[1], reg, 1); \ 84 + /* Fall through */ \ 55 85 default: write_debug(ptr[0], reg, 0); \ 56 86 } 57 87
+5
arch/arm64/kvm/regmap.c
··· 178 178 switch (spsr_idx) { 179 179 case KVM_SPSR_SVC: 180 180 write_sysreg_el1(v, SYS_SPSR); 181 + break; 181 182 case KVM_SPSR_ABT: 182 183 write_sysreg(v, spsr_abt); 184 + break; 183 185 case KVM_SPSR_UND: 184 186 write_sysreg(v, spsr_und); 187 + break; 185 188 case KVM_SPSR_IRQ: 186 189 write_sysreg(v, spsr_irq); 190 + break; 187 191 case KVM_SPSR_FIQ: 188 192 write_sysreg(v, spsr_fiq); 193 + break; 189 194 } 190 195 }
+18 -14
arch/arm64/kvm/sys_regs.c
··· 632 632 */ 633 633 val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) 634 634 | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); 635 - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; 635 + __vcpu_sys_reg(vcpu, r->reg) = val; 636 636 } 637 637 638 638 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) ··· 981 981 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ 982 982 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 983 983 { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ 984 - trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ 984 + trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ 985 985 { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ 986 - trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ 986 + trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ 987 987 { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ 988 - trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ 988 + trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ 989 989 { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ 990 - trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } 990 + trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } 991 991 992 992 /* Macro to expand the PMEVCNTRn_EL0 register */ 993 993 #define PMU_PMEVCNTR_EL0(n) \ ··· 1540 1540 { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, 1541 1541 { SYS_DESC(SYS_CTR_EL0), access_ctr }, 1542 1542 1543 - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, 1543 + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, 1544 1544 { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, 1545 1545 { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, 1546 1546 { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, ··· 2254 2254 } 2255 2255 2256 2256 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, 2257 - const struct sys_reg_desc *table, size_t num) 2257 + const struct sys_reg_desc *table, size_t num, 2258 + unsigned long *bmap) 2258 2259 { 2259 2260 unsigned long i; 2260 2261 2261 2262 for (i = 0; i < num; i++) 2262 - if (table[i].reset) 2263 + if (table[i].reset) { 2264 + int reg = table[i].reg; 2265 + 2263 2266 table[i].reset(vcpu, &table[i]); 2267 + if (reg > 0 && reg < NR_SYS_REGS) 2268 + set_bit(reg, bmap); 2269 + } 2264 2270 } 2265 2271 2266 2272 /** ··· 2780 2774 { 2781 2775 size_t num; 2782 2776 const struct sys_reg_desc *table; 2783 - 2784 - /* Catch someone adding a register without putting in reset entry. */ 2785 - memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); 2777 + DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; 2786 2778 2787 2779 /* Generic chip reset first (so target could override). */ 2788 - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2780 + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); 2789 2781 2790 2782 table = get_target_table(vcpu->arch.target, true, &num); 2791 - reset_sys_reg_descs(vcpu, table, num); 2783 + reset_sys_reg_descs(vcpu, table, num, bmap); 2792 2784 2793 2785 for (num = 1; num < NR_SYS_REGS; num++) { 2794 - if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242, 2786 + if (WARN(!test_bit(num, bmap), 2795 2787 "Didn't reset __vcpu_sys_reg(%zi)\n", num)) 2796 2788 break; 2797 2789 }
-10
arch/mips/kvm/mips.c
··· 150 150 return 0; 151 151 } 152 152 153 - bool kvm_arch_has_vcpu_debugfs(void) 154 - { 155 - return false; 156 - } 157 - 158 - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 159 - { 160 - return 0; 161 - } 162 - 163 153 void kvm_mips_free_vcpus(struct kvm *kvm) 164 154 { 165 155 unsigned int i;
+5 -10
arch/powerpc/kvm/powerpc.c
··· 50 50 return !!(v->arch.pending_exceptions) || kvm_request_pending(v); 51 51 } 52 52 53 + bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) 54 + { 55 + return kvm_arch_vcpu_runnable(vcpu); 56 + } 57 + 53 58 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 54 59 { 55 60 return false; ··· 455 450 return kvmppc_core_init_vm(kvm); 456 451 err_out: 457 452 return -EINVAL; 458 - } 459 - 460 - bool kvm_arch_has_vcpu_debugfs(void) 461 - { 462 - return false; 463 - } 464 - 465 - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 466 - { 467 - return 0; 468 453 } 469 454 470 455 void kvm_arch_destroy_vm(struct kvm *kvm)
-10
arch/s390/kvm/kvm-s390.c
··· 2516 2516 return rc; 2517 2517 } 2518 2518 2519 - bool kvm_arch_has_vcpu_debugfs(void) 2520 - { 2521 - return false; 2522 - } 2523 - 2524 - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2525 - { 2526 - return 0; 2527 - } 2528 - 2529 2519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 2530 2520 { 2531 2521 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+3
arch/x86/include/asm/kvm_host.h
··· 35 35 #include <asm/kvm_vcpu_regs.h> 36 36 #include <asm/hyperv-tlfs.h> 37 37 38 + #define __KVM_HAVE_ARCH_VCPU_DEBUGFS 39 + 38 40 #define KVM_MAX_VCPUS 288 39 41 #define KVM_SOFT_MAX_VCPUS 240 40 42 #define KVM_MAX_VCPU_ID 1023 ··· 1177 1175 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, 1178 1176 uint32_t guest_irq, bool set); 1179 1177 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); 1178 + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); 1180 1179 1181 1180 int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, 1182 1181 bool *expired);
-8
arch/x86/kernel/kvm.c
··· 308 308 309 309 static void kvm_guest_cpu_init(void) 310 310 { 311 - if (!kvm_para_available()) 312 - return; 313 - 314 311 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { 315 312 u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); 316 313 ··· 622 625 { 623 626 int i; 624 627 625 - if (!kvm_para_available()) 626 - return; 627 - 628 628 paravirt_ops_setup(); 629 629 register_reboot_notifier(&kvm_pv_reboot_nb); 630 630 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) ··· 842 848 */ 843 849 void __init kvm_spinlock_init(void) 844 850 { 845 - if (!kvm_para_available()) 846 - return; 847 851 /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ 848 852 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) 849 853 return;
+13 -33
arch/x86/kvm/debugfs.c
··· 8 8 #include <linux/debugfs.h> 9 9 #include "lapic.h" 10 10 11 - bool kvm_arch_has_vcpu_debugfs(void) 12 - { 13 - return true; 14 - } 15 - 16 11 static int vcpu_get_timer_advance_ns(void *data, u64 *val) 17 12 { 18 13 struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; ··· 43 48 44 49 DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); 45 50 46 - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 51 + void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 47 52 { 48 - struct dentry *ret; 53 + debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, 54 + &vcpu_tsc_offset_fops); 49 55 50 - ret = debugfs_create_file("tsc-offset", 0444, 51 - vcpu->debugfs_dentry, 52 - vcpu, &vcpu_tsc_offset_fops); 53 - if (!ret) 54 - return -ENOMEM; 55 - 56 - if (lapic_in_kernel(vcpu)) { 57 - ret = debugfs_create_file("lapic_timer_advance_ns", 0444, 58 - vcpu->debugfs_dentry, 59 - vcpu, &vcpu_timer_advance_ns_fops); 60 - if (!ret) 61 - return -ENOMEM; 62 - } 56 + if (lapic_in_kernel(vcpu)) 57 + debugfs_create_file("lapic_timer_advance_ns", 0444, 58 + vcpu->debugfs_dentry, vcpu, 59 + &vcpu_timer_advance_ns_fops); 63 60 64 61 if (kvm_has_tsc_control) { 65 - ret = debugfs_create_file("tsc-scaling-ratio", 0444, 66 - vcpu->debugfs_dentry, 67 - vcpu, &vcpu_tsc_scaling_fops); 68 - if (!ret) 69 - return -ENOMEM; 70 - ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, 71 - vcpu->debugfs_dentry, 72 - vcpu, &vcpu_tsc_scaling_frac_fops); 73 - if (!ret) 74 - return -ENOMEM; 75 - 62 + debugfs_create_file("tsc-scaling-ratio", 0444, 63 + vcpu->debugfs_dentry, vcpu, 64 + &vcpu_tsc_scaling_fops); 65 + debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, 66 + vcpu->debugfs_dentry, vcpu, 67 + &vcpu_tsc_scaling_frac_fops); 76 68 } 77 - 78 - return 0; 79 69 }
-8
arch/x86/kvm/lapic.c
··· 1548 1548 static void apic_timer_expired(struct kvm_lapic *apic) 1549 1549 { 1550 1550 struct kvm_vcpu *vcpu = apic->vcpu; 1551 - struct swait_queue_head *q = &vcpu->wq; 1552 1551 struct kvm_timer *ktimer = &apic->lapic_timer; 1553 1552 1554 1553 if (atomic_read(&apic->lapic_timer.pending)) ··· 1565 1566 1566 1567 atomic_inc(&apic->lapic_timer.pending); 1567 1568 kvm_set_pending_timer(vcpu); 1568 - 1569 - /* 1570 - * For x86, the atomic_inc() is serialized, thus 1571 - * using swait_active() is safe. 1572 - */ 1573 - if (swait_active(q)) 1574 - swake_up_one(q); 1575 1569 } 1576 1570 1577 1571 static void start_sw_tscdeadline(struct kvm_lapic *apic)
+6
arch/x86/kvm/svm.c
··· 5190 5190 kvm_vcpu_wake_up(vcpu); 5191 5191 } 5192 5192 5193 + static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) 5194 + { 5195 + return false; 5196 + } 5197 + 5193 5198 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) 5194 5199 { 5195 5200 unsigned long flags; ··· 7319 7314 7320 7315 .pmu_ops = &amd_pmu_ops, 7321 7316 .deliver_posted_interrupt = svm_deliver_avic_intr, 7317 + .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, 7322 7318 .update_pi_irte = svm_update_pi_irte, 7323 7319 .setup_mce = svm_setup_mce, 7324 7320
+6
arch/x86/kvm/vmx/vmx.c
··· 6117 6117 return max_irr; 6118 6118 } 6119 6119 6120 + static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) 6121 + { 6122 + return pi_test_on(vcpu_to_pi_desc(vcpu)); 6123 + } 6124 + 6120 6125 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 6121 6126 { 6122 6127 if (!kvm_vcpu_apicv_active(vcpu)) ··· 7731 7726 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, 7732 7727 .sync_pir_to_irr = vmx_sync_pir_to_irr, 7733 7728 .deliver_posted_interrupt = vmx_deliver_posted_interrupt, 7729 + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, 7734 7730 7735 7731 .set_tss_addr = vmx_set_tss_addr, 7736 7732 .set_identity_map_addr = vmx_set_identity_map_addr,
+16
arch/x86/kvm/x86.c
··· 9698 9698 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); 9699 9699 } 9700 9700 9701 + bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) 9702 + { 9703 + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) 9704 + return true; 9705 + 9706 + if (kvm_test_request(KVM_REQ_NMI, vcpu) || 9707 + kvm_test_request(KVM_REQ_SMI, vcpu) || 9708 + kvm_test_request(KVM_REQ_EVENT, vcpu)) 9709 + return true; 9710 + 9711 + if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu)) 9712 + return true; 9713 + 9714 + return false; 9715 + } 9716 + 9701 9717 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 9702 9718 { 9703 9719 return vcpu->arch.preempted_in_kernel;
+2
include/kvm/arm_pmu.h
··· 34 34 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); 35 35 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); 36 36 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); 37 + void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); 37 38 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); 38 39 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); 39 40 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val); ··· 72 71 { 73 72 return 0; 74 73 } 74 + static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {} 75 75 static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} 76 76 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} 77 77 static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
+1
include/kvm/arm_vgic.h
··· 350 350 351 351 void kvm_vgic_load(struct kvm_vcpu *vcpu); 352 352 void kvm_vgic_put(struct kvm_vcpu *vcpu); 353 + void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); 353 354 354 355 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 355 356 #define vgic_initialized(k) ((k)->arch.vgic.initialized)
+4 -2
include/linux/kvm_host.h
··· 861 861 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); 862 862 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); 863 863 864 - bool kvm_arch_has_vcpu_debugfs(void); 865 - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); 864 + #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS 865 + void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); 866 + #endif 866 867 867 868 int kvm_arch_hardware_enable(void); 868 869 void kvm_arch_hardware_disable(void); ··· 873 872 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); 874 873 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); 875 874 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); 875 + bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); 876 876 877 877 #ifndef __KVM_HAVE_ARCH_VM_ALLOC 878 878 /*
+2 -1
tools/testing/selftests/kvm/.gitignore
··· 1 + /s390x/sync_regs_test 1 2 /x86_64/cr4_cpuid_sync_test 2 3 /x86_64/evmcs_test 3 4 /x86_64/hyperv_cpuid 4 - /x86_64/kvm_create_max_vcpus 5 5 /x86_64/mmio_warning_test 6 6 /x86_64/platform_info_test 7 7 /x86_64/set_sregs_test ··· 13 13 /x86_64/vmx_tsc_adjust_test 14 14 /clear_dirty_log_test 15 15 /dirty_log_test 16 + /kvm_create_max_vcpus
+3
tools/testing/selftests/kvm/config
··· 1 + CONFIG_KVM=y 2 + CONFIG_KVM_INTEL=y 3 + CONFIG_KVM_AMD=y
+13 -5
virt/kvm/arm/arm.c
··· 144 144 return ret; 145 145 } 146 146 147 - bool kvm_arch_has_vcpu_debugfs(void) 148 - { 149 - return false; 150 - } 151 - 152 147 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 153 148 { 154 149 return 0; ··· 318 323 319 324 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 320 325 { 326 + /* 327 + * If we're about to block (most likely because we've just hit a 328 + * WFI), we need to sync back the state of the GIC CPU interface 329 + * so that we have the lastest PMR and group enables. This ensures 330 + * that kvm_arch_vcpu_runnable has up-to-date data to decide 331 + * whether we have pending interrupts. 332 + */ 333 + preempt_disable(); 334 + kvm_vgic_vmcr_sync(vcpu); 335 + preempt_enable(); 336 + 321 337 kvm_vgic_v4_enable_doorbell(vcpu); 322 338 } 323 339 ··· 345 339 346 340 /* Set up the timer */ 347 341 kvm_timer_vcpu_init(vcpu); 342 + 343 + kvm_pmu_vcpu_init(vcpu); 348 344 349 345 kvm_arm_reset_debug_ptr(vcpu); 350 346
+8
virt/kvm/arm/hyp/vgic-v3-sr.c
··· 349 349 case 7: 350 350 cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); 351 351 cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); 352 + /* Fall through */ 352 353 case 6: 353 354 cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); 355 + /* Fall through */ 354 356 default: 355 357 cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); 356 358 } ··· 361 359 case 7: 362 360 cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); 363 361 cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); 362 + /* Fall through */ 364 363 case 6: 365 364 cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); 365 + /* Fall through */ 366 366 default: 367 367 cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); 368 368 } ··· 386 382 case 7: 387 383 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); 388 384 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); 385 + /* Fall through */ 389 386 case 6: 390 387 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); 388 + /* Fall through */ 391 389 default: 392 390 __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); 393 391 } ··· 398 392 case 7: 399 393 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); 400 394 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); 395 + /* Fall through */ 401 396 case 6: 402 397 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); 398 + /* Fall through */ 403 399 default: 404 400 __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); 405 401 }
+15 -3
virt/kvm/arm/pmu.c
··· 215 215 } 216 216 217 217 /** 218 + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 219 + * @vcpu: The vcpu pointer 220 + * 221 + */ 222 + void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 223 + { 224 + int i; 225 + struct kvm_pmu *pmu = &vcpu->arch.pmu; 226 + 227 + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 228 + pmu->pmc[i].idx = i; 229 + } 230 + 231 + /** 218 232 * kvm_pmu_vcpu_reset - reset pmu state for cpu 219 233 * @vcpu: The vcpu pointer 220 234 * ··· 238 224 int i; 239 225 struct kvm_pmu *pmu = &vcpu->arch.pmu; 240 226 241 - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 227 + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 242 228 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 243 - pmu->pmc[i].idx = i; 244 - } 245 229 246 230 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 247 231 }
+16
virt/kvm/arm/vgic/vgic-mmio.c
··· 113 113 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 114 114 115 115 raw_spin_lock_irqsave(&irq->irq_lock, flags); 116 + if (vgic_irq_is_mapped_level(irq)) { 117 + bool was_high = irq->line_level; 118 + 119 + /* 120 + * We need to update the state of the interrupt because 121 + * the guest might have changed the state of the device 122 + * while the interrupt was disabled at the VGIC level. 123 + */ 124 + irq->line_level = vgic_get_phys_line_level(irq); 125 + /* 126 + * Deactivate the physical interrupt so the GIC will let 127 + * us know when it is asserted again. 128 + */ 129 + if (!irq->active && was_high && !irq->line_level) 130 + vgic_irq_set_phys_active(irq, false); 131 + } 116 132 irq->enabled = true; 117 133 vgic_queue_irq_unlock(vcpu->kvm, irq, flags); 118 134
+8 -1
virt/kvm/arm/vgic/vgic-v2.c
··· 484 484 kvm_vgic_global_state.vctrl_base + GICH_APR); 485 485 } 486 486 487 - void vgic_v2_put(struct kvm_vcpu *vcpu) 487 + void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) 488 488 { 489 489 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 490 490 491 491 cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 492 + } 493 + 494 + void vgic_v2_put(struct kvm_vcpu *vcpu) 495 + { 496 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 497 + 498 + vgic_v2_vmcr_sync(vcpu); 492 499 cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); 493 500 }
+6 -1
virt/kvm/arm/vgic/vgic-v3.c
··· 662 662 __vgic_v3_activate_traps(vcpu); 663 663 } 664 664 665 - void vgic_v3_put(struct kvm_vcpu *vcpu) 665 + void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) 666 666 { 667 667 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 668 668 669 669 if (likely(cpu_if->vgic_sre)) 670 670 cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); 671 + } 672 + 673 + void vgic_v3_put(struct kvm_vcpu *vcpu) 674 + { 675 + vgic_v3_vmcr_sync(vcpu); 671 676 672 677 kvm_call_hyp(__vgic_v3_save_aprs, vcpu); 673 678
+11
virt/kvm/arm/vgic/vgic.c
··· 919 919 vgic_v3_put(vcpu); 920 920 } 921 921 922 + void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) 923 + { 924 + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 925 + return; 926 + 927 + if (kvm_vgic_global_state.type == VGIC_V2) 928 + vgic_v2_vmcr_sync(vcpu); 929 + else 930 + vgic_v3_vmcr_sync(vcpu); 931 + } 932 + 922 933 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 923 934 { 924 935 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+2
virt/kvm/arm/vgic/vgic.h
··· 193 193 void vgic_v2_init_lrs(void); 194 194 void vgic_v2_load(struct kvm_vcpu *vcpu); 195 195 void vgic_v2_put(struct kvm_vcpu *vcpu); 196 + void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); 196 197 197 198 void vgic_v2_save_state(struct kvm_vcpu *vcpu); 198 199 void vgic_v2_restore_state(struct kvm_vcpu *vcpu); ··· 224 223 225 224 void vgic_v3_load(struct kvm_vcpu *vcpu); 226 225 void vgic_v3_put(struct kvm_vcpu *vcpu); 226 + void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); 227 227 228 228 bool vgic_has_its(struct kvm *kvm); 229 229 int kvm_vgic_register_its_device(void);
+36 -25
virt/kvm/kvm_main.c
··· 1855 1855 if (!kvm_is_reserved_pfn(pfn)) { 1856 1856 struct page *page = pfn_to_page(pfn); 1857 1857 1858 - if (!PageReserved(page)) 1859 - SetPageDirty(page); 1858 + SetPageDirty(page); 1860 1859 } 1861 1860 } 1862 1861 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); ··· 2476 2477 #endif 2477 2478 } 2478 2479 2480 + /* 2481 + * Unlike kvm_arch_vcpu_runnable, this function is called outside 2482 + * a vcpu_load/vcpu_put pair. However, for most architectures 2483 + * kvm_arch_vcpu_runnable does not require vcpu_load. 2484 + */ 2485 + bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) 2486 + { 2487 + return kvm_arch_vcpu_runnable(vcpu); 2488 + } 2489 + 2490 + static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu) 2491 + { 2492 + if (kvm_arch_dy_runnable(vcpu)) 2493 + return true; 2494 + 2495 + #ifdef CONFIG_KVM_ASYNC_PF 2496 + if (!list_empty_careful(&vcpu->async_pf.done)) 2497 + return true; 2498 + #endif 2499 + 2500 + return false; 2501 + } 2502 + 2479 2503 void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) 2480 2504 { 2481 2505 struct kvm *kvm = me->kvm; ··· 2528 2506 continue; 2529 2507 if (vcpu == me) 2530 2508 continue; 2531 - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) 2509 + if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) 2532 2510 continue; 2533 - if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) 2511 + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && 2512 + !kvm_arch_vcpu_in_kernel(vcpu)) 2534 2513 continue; 2535 2514 if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) 2536 2515 continue; ··· 2614 2591 return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); 2615 2592 } 2616 2593 2617 - static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2594 + static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) 2618 2595 { 2596 + #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS 2619 2597 char dir_name[ITOA_MAX_LEN * 2]; 2620 - int ret; 2621 - 2622 - if (!kvm_arch_has_vcpu_debugfs()) 2623 - return 0; 2624 2598 2625 2599 if (!debugfs_initialized()) 2626 - return 0; 2600 + return; 2627 2601 2628 2602 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); 2629 2603 vcpu->debugfs_dentry = debugfs_create_dir(dir_name, 2630 - vcpu->kvm->debugfs_dentry); 2631 - if (!vcpu->debugfs_dentry) 2632 - return -ENOMEM; 2604 + vcpu->kvm->debugfs_dentry); 2633 2605 2634 - ret = kvm_arch_create_vcpu_debugfs(vcpu); 2635 - if (ret < 0) { 2636 - debugfs_remove_recursive(vcpu->debugfs_dentry); 2637 - return ret; 2638 - } 2639 - 2640 - return 0; 2606 + kvm_arch_create_vcpu_debugfs(vcpu); 2607 + #endif 2641 2608 } 2642 2609 2643 2610 /* ··· 2662 2649 if (r) 2663 2650 goto vcpu_destroy; 2664 2651 2665 - r = kvm_create_vcpu_debugfs(vcpu); 2666 - if (r) 2667 - goto vcpu_destroy; 2652 + kvm_create_vcpu_debugfs(vcpu); 2668 2653 2669 2654 mutex_lock(&kvm->lock); 2670 2655 if (kvm_get_vcpu_by_id(kvm, id)) { ··· 4216 4205 { 4217 4206 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 4218 4207 4219 - vcpu->preempted = false; 4208 + WRITE_ONCE(vcpu->preempted, false); 4220 4209 WRITE_ONCE(vcpu->ready, false); 4221 4210 4222 4211 kvm_arch_sched_in(vcpu, cpu); ··· 4230 4219 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 4231 4220 4232 4221 if (current->state == TASK_RUNNING) { 4233 - vcpu->preempted = true; 4222 + WRITE_ONCE(vcpu->preempted, true); 4234 4223 WRITE_ONCE(vcpu->ready, true); 4235 4224 } 4236 4225 kvm_arch_vcpu_put(vcpu);