Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"s390:

- PCI interpretation compile fixes

RISC-V:

- fix unused variable warnings in vcpu_timer.c

- move extern sbi_ext declarations to a header

x86:

- check validity of argument to KVM_SET_MP_STATE

- use guest's global_ctrl to completely disable guest PEBS

- fix a memory leak on memory allocation failure

- mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES

- fix build failure with Clang integrated assembler

- fix MSR interception

- always flush TLBs when enabling dirty logging"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: check validity of argument to KVM_SET_MP_STATE
perf/x86/core: Completely disable guest PEBS via guest's global_ctrl
KVM: x86: fix memoryleak in kvm_arch_vcpu_create()
KVM: x86: Mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES
KVM: s390: pci: Hook to access KVM lowlevel from VFIO
riscv: kvm: move extern sbi_ext declarations to a header
riscv: kvm: vcpu_timer: fix unused variable warnings
KVM: selftests: Fix ambiguous mov in KVM_ASM_SAFE()
KVM: selftests: Fix KVM_EXCEPTION_MAGIC build with Clang
KVM: VMX: Heed the 'msr' argument in msr_write_intercepted()
kvm: x86: mmu: Always flush TLBs when enabling dirty logging
kvm: x86: mmu: Drop the need_remote_flush() function

Changed files
+151 -103
arch
riscv
s390
include
kvm
pci
x86
events
intel
kvm
drivers
vfio
tools
testing
selftests
kvm
include
x86_64
+12
arch/riscv/include/asm/kvm_vcpu_sbi.h
··· 33 33 u32 type, u64 flags); 34 34 const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); 35 35 36 + #ifdef CONFIG_RISCV_SBI_V01 37 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; 38 + #endif 39 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; 40 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; 41 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; 42 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; 43 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; 44 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; 45 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; 46 + extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; 47 + 36 48 #endif /* __RISCV_KVM_VCPU_SBI_H__ */
+1 -11
arch/riscv/kvm/vcpu_sbi.c
··· 32 32 }; 33 33 } 34 34 35 - #ifdef CONFIG_RISCV_SBI_V01 36 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; 37 - #else 35 + #ifndef CONFIG_RISCV_SBI_V01 38 36 static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = { 39 37 .extid_start = -1UL, 40 38 .extid_end = -1UL, 41 39 .handler = NULL, 42 40 }; 43 41 #endif 44 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base; 45 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time; 46 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi; 47 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; 48 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; 49 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; 50 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; 51 - extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; 52 42 53 43 static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { 54 44 &vcpu_sbi_ext_v01,
-4
arch/riscv/kvm/vcpu_timer.c
··· 299 299 300 300 void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) 301 301 { 302 - struct kvm_vcpu_csr *csr; 303 302 struct kvm_vcpu_timer *t = &vcpu->arch.timer; 304 303 305 304 kvm_riscv_vcpu_update_timedelta(vcpu); ··· 306 307 if (!t->sstc_enabled) 307 308 return; 308 309 309 - csr = &vcpu->arch.guest_csr; 310 310 #if defined(CONFIG_32BIT) 311 311 csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); 312 312 csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); ··· 322 324 323 325 void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu) 324 326 { 325 - struct kvm_vcpu_csr *csr; 326 327 struct kvm_vcpu_timer *t = &vcpu->arch.timer; 327 328 328 329 if (!t->sstc_enabled) 329 330 return; 330 331 331 - csr = &vcpu->arch.guest_csr; 332 332 t = &vcpu->arch.timer; 333 333 #if defined(CONFIG_32BIT) 334 334 t->next_cycles = csr_read(CSR_VSTIMECMP);
+6 -11
arch/s390/include/asm/kvm_host.h
··· 1038 1038 #define __KVM_HAVE_ARCH_VM_FREE 1039 1039 void kvm_arch_free_vm(struct kvm *kvm); 1040 1040 1041 - #ifdef CONFIG_VFIO_PCI_ZDEV_KVM 1042 - int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm); 1043 - void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev); 1044 - #else 1045 - static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev, 1046 - struct kvm *kvm) 1047 - { 1048 - return -EPERM; 1049 - } 1050 - static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {} 1051 - #endif 1041 + struct zpci_kvm_hook { 1042 + int (*kvm_register)(void *opaque, struct kvm *kvm); 1043 + void (*kvm_unregister)(void *opaque); 1044 + }; 1045 + 1046 + extern struct zpci_kvm_hook zpci_kvm_hook; 1052 1047 1053 1048 #endif
+8 -4
arch/s390/kvm/pci.c
··· 431 431 * available, enable them and let userspace indicate whether or not they will 432 432 * be used (specify SHM bit to disable). 433 433 */ 434 - int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm) 434 + static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 435 435 { 436 + struct zpci_dev *zdev = opaque; 436 437 int rc; 437 438 438 439 if (!zdev) ··· 511 510 kvm_put_kvm(kvm); 512 511 return rc; 513 512 } 514 - EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm); 515 513 516 - void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev) 514 + static void kvm_s390_pci_unregister_kvm(void *opaque) 517 515 { 516 + struct zpci_dev *zdev = opaque; 518 517 struct kvm *kvm; 519 518 520 519 if (!zdev) ··· 567 566 568 567 kvm_put_kvm(kvm); 569 568 } 570 - EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm); 571 569 572 570 void kvm_s390_pci_init_list(struct kvm *kvm) 573 571 { ··· 678 678 679 679 spin_lock_init(&aift->gait_lock); 680 680 mutex_init(&aift->aift_lock); 681 + zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; 682 + zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; 681 683 682 684 return 0; 683 685 } ··· 687 685 void kvm_s390_pci_exit(void) 688 686 { 689 687 mutex_destroy(&aift->aift_lock); 688 + zpci_kvm_hook.kvm_register = NULL; 689 + zpci_kvm_hook.kvm_unregister = NULL; 690 690 691 691 kfree(aift); 692 692 }
+1 -1
arch/s390/pci/Makefile
··· 5 5 6 6 obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ 7 7 pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ 8 - pci_bus.o 8 + pci_bus.o pci_kvm_hook.o 9 9 obj-$(CONFIG_PCI_IOV) += pci_iov.o
+11
arch/s390/pci/pci_kvm_hook.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * VFIO ZPCI devices support 4 + * 5 + * Copyright (C) IBM Corp. 2022. All rights reserved. 6 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> 7 + */ 8 + #include <linux/kvm_host.h> 9 + 10 + struct zpci_kvm_hook zpci_kvm_hook; 11 + EXPORT_SYMBOL_GPL(zpci_kvm_hook);
+2 -1
arch/x86/events/intel/core.c
··· 4052 4052 /* Disable guest PEBS if host PEBS is enabled. */ 4053 4053 arr[pebs_enable].guest = 0; 4054 4054 } else { 4055 - /* Disable guest PEBS for cross-mapped PEBS counters. */ 4055 + /* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */ 4056 4056 arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask; 4057 + arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask; 4057 4058 /* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */ 4058 4059 arr[global_ctrl].guest |= arr[pebs_enable].guest; 4059 4060 }
+8 -52
arch/x86/kvm/mmu/mmu.c
··· 5361 5361 __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); 5362 5362 } 5363 5363 5364 - static bool need_remote_flush(u64 old, u64 new) 5365 - { 5366 - if (!is_shadow_present_pte(old)) 5367 - return false; 5368 - if (!is_shadow_present_pte(new)) 5369 - return true; 5370 - if ((old ^ new) & SPTE_BASE_ADDR_MASK) 5371 - return true; 5372 - old ^= shadow_nx_mask; 5373 - new ^= shadow_nx_mask; 5374 - return (old & ~new & SPTE_PERM_MASK) != 0; 5375 - } 5376 - 5377 5364 static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, 5378 5365 int *bytes) 5379 5366 { ··· 5506 5519 mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL); 5507 5520 if (gentry && sp->role.level != PG_LEVEL_4K) 5508 5521 ++vcpu->kvm->stat.mmu_pde_zapped; 5509 - if (need_remote_flush(entry, *spte)) 5522 + if (is_shadow_present_pte(entry)) 5510 5523 flush = true; 5511 5524 ++spte; 5512 5525 } ··· 6072 6085 const struct kvm_memory_slot *memslot, 6073 6086 int start_level) 6074 6087 { 6075 - bool flush = false; 6076 - 6077 6088 if (kvm_memslots_have_rmaps(kvm)) { 6078 6089 write_lock(&kvm->mmu_lock); 6079 - flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, 6080 - start_level, KVM_MAX_HUGEPAGE_LEVEL, 6081 - false); 6090 + slot_handle_level(kvm, memslot, slot_rmap_write_protect, 6091 + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); 6082 6092 write_unlock(&kvm->mmu_lock); 6083 6093 } 6084 6094 6085 6095 if (is_tdp_mmu_enabled(kvm)) { 6086 6096 read_lock(&kvm->mmu_lock); 6087 - flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); 6097 + kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); 6088 6098 read_unlock(&kvm->mmu_lock); 6089 6099 } 6090 - 6091 - /* 6092 - * Flush TLBs if any SPTEs had to be write-protected to ensure that 6093 - * guest writes are reflected in the dirty bitmap before the memslot 6094 - * update completes, i.e. before enabling dirty logging is visible to 6095 - * userspace. 6096 - * 6097 - * Perform the TLB flush outside the mmu_lock to reduce the amount of 6098 - * time the lock is held. However, this does mean that another CPU can 6099 - * now grab mmu_lock and encounter a write-protected SPTE while CPUs 6100 - * still have a writable mapping for the associated GFN in their TLB. 6101 - * 6102 - * This is safe but requires KVM to be careful when making decisions 6103 - * based on the write-protection status of an SPTE. Specifically, KVM 6104 - * also write-protects SPTEs to monitor changes to guest page tables 6105 - * during shadow paging, and must guarantee no CPUs can write to those 6106 - * page before the lock is dropped. As mentioned in the previous 6107 - * paragraph, a write-protected SPTE is no guarantee that CPU cannot 6108 - * perform writes. So to determine if a TLB flush is truly required, KVM 6109 - * will clear a separate software-only bit (MMU-writable) and skip the 6110 - * flush if-and-only-if this bit was already clear. 6111 - * 6112 - * See is_writable_pte() for more details. 6113 - */ 6114 - if (flush) 6115 - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); 6116 6100 } 6117 6101 6118 6102 static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min) ··· 6451 6493 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, 6452 6494 const struct kvm_memory_slot *memslot) 6453 6495 { 6454 - bool flush = false; 6455 - 6456 6496 if (kvm_memslots_have_rmaps(kvm)) { 6457 6497 write_lock(&kvm->mmu_lock); 6458 6498 /* 6459 6499 * Clear dirty bits only on 4k SPTEs since the legacy MMU only 6460 6500 * support dirty logging at a 4k granularity. 6461 6501 */ 6462 - flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); 6502 + slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false); 6463 6503 write_unlock(&kvm->mmu_lock); 6464 6504 } 6465 6505 6466 6506 if (is_tdp_mmu_enabled(kvm)) { 6467 6507 read_lock(&kvm->mmu_lock); 6468 - flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); 6508 + kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); 6469 6509 read_unlock(&kvm->mmu_lock); 6470 6510 } 6471 6511 6472 6512 /* 6513 + * The caller will flush the TLBs after this function returns. 6514 + * 6473 6515 * It's also safe to flush TLBs out of mmu lock here as currently this 6474 6516 * function is only used for dirty logging, in which case flushing TLB 6475 6517 * out of mmu lock also guarantees no dirty pages will be lost in 6476 6518 * dirty_bitmap. 6477 6519 */ 6478 - if (flush) 6479 - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); 6480 6520 } 6481 6521 6482 6522 void kvm_mmu_zap_all(struct kvm *kvm)
+10 -4
arch/x86/kvm/mmu/spte.h
··· 343 343 } 344 344 345 345 /* 346 - * An shadow-present leaf SPTE may be non-writable for 3 possible reasons: 346 + * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: 347 347 * 348 348 * 1. To intercept writes for dirty logging. KVM write-protects huge pages 349 349 * so that they can be split be split down into the dirty logging ··· 361 361 * read-only memslot or guest memory backed by a read-only VMA. Writes to 362 362 * such pages are disallowed entirely. 363 363 * 364 - * To keep track of why a given SPTE is write-protected, KVM uses 2 365 - * software-only bits in the SPTE: 364 + * 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this 365 + * case, the SPTE is access-protected, not just write-protected! 366 + * 367 + * For cases #1 and #4, KVM can safely make such SPTEs writable without taking 368 + * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it. 369 + * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits 370 + * in the SPTE: 366 371 * 367 372 * shadow_mmu_writable_mask, aka MMU-writable - 368 373 * Cleared on SPTEs that KVM is currently write-protecting for shadow paging ··· 396 391 * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging 397 392 * (which does not clear the MMU-writable bit), does not flush TLBs before 398 393 * dropping the lock, as it only needs to synchronize guest writes with the 399 - * dirty bitmap. 394 + * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for 395 + * access-tracking via the clear_young() MMU notifier also does not flush TLBs. 400 396 * 401 397 * So, there is the problem: clearing the MMU-writable bit can encounter a 402 398 * write-protected SPTE while CPUs still have writable mappings for that SPTE
+1 -2
arch/x86/kvm/vmx/vmx.c
··· 843 843 if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) 844 844 return true; 845 845 846 - return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, 847 - MSR_IA32_SPEC_CTRL); 846 + return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); 848 847 } 849 848 850 849 unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
+83 -9
arch/x86/kvm/x86.c
··· 1557 1557 static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; 1558 1558 static unsigned int num_msr_based_features; 1559 1559 1560 + /* 1561 + * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM 1562 + * does not yet virtualize. These include: 1563 + * 10 - MISC_PACKAGE_CTRLS 1564 + * 11 - ENERGY_FILTERING_CTL 1565 + * 12 - DOITM 1566 + * 18 - FB_CLEAR_CTRL 1567 + * 21 - XAPIC_DISABLE_STATUS 1568 + * 23 - OVERCLOCKING_STATUS 1569 + */ 1570 + 1571 + #define KVM_SUPPORTED_ARCH_CAP \ 1572 + (ARCH_CAP_RDCL_NO | ARCH_CAP_IBRS_ALL | ARCH_CAP_RSBA | \ 1573 + ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ 1574 + ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ 1575 + ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ 1576 + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO) 1577 + 1560 1578 static u64 kvm_get_arch_capabilities(void) 1561 1579 { 1562 1580 u64 data = 0; 1563 1581 1564 - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) 1582 + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { 1565 1583 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); 1584 + data &= KVM_SUPPORTED_ARCH_CAP; 1585 + } 1566 1586 1567 1587 /* 1568 1588 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that ··· 1629 1609 * using VERW to clear CPU buffers. 1630 1610 */ 1631 1611 } 1632 - 1633 - /* Guests don't need to know "Fill buffer clear control" exists */ 1634 - data &= ~ARCH_CAP_FB_CLEAR_CTRL; 1635 1612 1636 1613 return data; 1637 1614 } ··· 10669 10652 case KVM_MP_STATE_INIT_RECEIVED: 10670 10653 break; 10671 10654 default: 10672 - return -EINTR; 10655 + WARN_ON_ONCE(1); 10656 + break; 10673 10657 } 10674 10658 return 1; 10675 10659 } ··· 11111 11093 11112 11094 vcpu_load(vcpu); 11113 11095 11114 - if (!lapic_in_kernel(vcpu) && 11115 - mp_state->mp_state != KVM_MP_STATE_RUNNABLE) 11096 + switch (mp_state->mp_state) { 11097 + case KVM_MP_STATE_UNINITIALIZED: 11098 + case KVM_MP_STATE_HALTED: 11099 + case KVM_MP_STATE_AP_RESET_HOLD: 11100 + case KVM_MP_STATE_INIT_RECEIVED: 11101 + case KVM_MP_STATE_SIPI_RECEIVED: 11102 + if (!lapic_in_kernel(vcpu)) 11103 + goto out; 11104 + break; 11105 + 11106 + case KVM_MP_STATE_RUNNABLE: 11107 + break; 11108 + 11109 + default: 11116 11110 goto out; 11111 + } 11117 11112 11118 11113 /* 11119 11114 * KVM_MP_STATE_INIT_RECEIVED means the processor is in ··· 11594 11563 vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64), 11595 11564 GFP_KERNEL_ACCOUNT); 11596 11565 if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks) 11597 - goto fail_free_pio_data; 11566 + goto fail_free_mce_banks; 11598 11567 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 11599 11568 11600 11569 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, ··· 11648 11617 fail_free_mce_banks: 11649 11618 kfree(vcpu->arch.mce_banks); 11650 11619 kfree(vcpu->arch.mci_ctl2_banks); 11651 - fail_free_pio_data: 11652 11620 free_page((unsigned long)vcpu->arch.pio_data); 11653 11621 fail_free_lapic: 11654 11622 kvm_free_lapic(vcpu); ··· 12503 12473 } else { 12504 12474 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); 12505 12475 } 12476 + 12477 + /* 12478 + * Unconditionally flush the TLBs after enabling dirty logging. 12479 + * A flush is almost always going to be necessary (see below), 12480 + * and unconditionally flushing allows the helpers to omit 12481 + * the subtly complex checks when removing write access. 12482 + * 12483 + * Do the flush outside of mmu_lock to reduce the amount of 12484 + * time mmu_lock is held. Flushing after dropping mmu_lock is 12485 + * safe as KVM only needs to guarantee the slot is fully 12486 + * write-protected before returning to userspace, i.e. before 12487 + * userspace can consume the dirty status. 12488 + * 12489 + * Flushing outside of mmu_lock requires KVM to be careful when 12490 + * making decisions based on writable status of an SPTE, e.g. a 12491 + * !writable SPTE doesn't guarantee a CPU can't perform writes. 12492 + * 12493 + * Specifically, KVM also write-protects guest page tables to 12494 + * monitor changes when using shadow paging, and must guarantee 12495 + * no CPUs can write to those page before mmu_lock is dropped. 12496 + * Because CPUs may have stale TLB entries at this point, a 12497 + * !writable SPTE doesn't guarantee CPUs can't perform writes. 12498 + * 12499 + * KVM also allows making SPTES writable outside of mmu_lock, 12500 + * e.g. to allow dirty logging without taking mmu_lock. 12501 + * 12502 + * To handle these scenarios, KVM uses a separate software-only 12503 + * bit (MMU-writable) to track if a SPTE is !writable due to 12504 + * a guest page table being write-protected (KVM clears the 12505 + * MMU-writable flag when write-protecting for shadow paging). 12506 + * 12507 + * The use of MMU-writable is also the primary motivation for 12508 + * the unconditional flush. Because KVM must guarantee that a 12509 + * CPU doesn't contain stale, writable TLB entries for a 12510 + * !MMU-writable SPTE, KVM must flush if it encounters any 12511 + * MMU-writable SPTE regardless of whether the actual hardware 12512 + * writable bit was set. I.e. KVM is almost guaranteed to need 12513 + * to flush, while unconditionally flushing allows the "remove 12514 + * write access" helpers to ignore MMU-writable entirely. 12515 + * 12516 + * See is_writable_pte() for more details (the case involving 12517 + * access-tracked SPTEs is particularly relevant). 12518 + */ 12519 + kvm_arch_flush_remote_tlbs_memslot(kvm, new); 12506 12520 } 12507 12521 } 12508 12522
+6 -2
drivers/vfio/pci/vfio_pci_zdev.c
··· 151 151 if (!vdev->vdev.kvm) 152 152 return 0; 153 153 154 - return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm); 154 + if (zpci_kvm_hook.kvm_register) 155 + return zpci_kvm_hook.kvm_register(zdev, vdev->vdev.kvm); 156 + 157 + return -ENOENT; 155 158 } 156 159 157 160 void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) ··· 164 161 if (!zdev || !vdev->vdev.kvm) 165 162 return; 166 163 167 - kvm_s390_pci_unregister_kvm(zdev); 164 + if (zpci_kvm_hook.kvm_unregister) 165 + zpci_kvm_hook.kvm_unregister(zdev); 168 166 }
+2 -2
tools/testing/selftests/kvm/include/x86_64/processor.h
··· 754 754 void (*handler)(struct ex_regs *)); 755 755 756 756 /* If a toddler were to say "abracadabra". */ 757 - #define KVM_EXCEPTION_MAGIC 0xabacadabaull 757 + #define KVM_EXCEPTION_MAGIC 0xabacadabaULL 758 758 759 759 /* 760 760 * KVM selftest exception fixup uses registers to coordinate with the exception ··· 786 786 "lea 1f(%%rip), %%r10\n\t" \ 787 787 "lea 2f(%%rip), %%r11\n\t" \ 788 788 "1: " insn "\n\t" \ 789 - "mov $0, %[vector]\n\t" \ 789 + "movb $0, %[vector]\n\t" \ 790 790 "jmp 3f\n\t" \ 791 791 "2:\n\t" \ 792 792 "mov %%r9b, %[vector]\n\t" \