Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
"A few simple fixes for ARM, x86, PPC and generic code.

The x86 MMU fix is a bit larger because the surrounding code needed a
cleanup, but nothing worrisome"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: MMU: fix reserved bit check for ept=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0
KVM: MMU: fix ept=0/pte.u=1/pte.w=0/CR0.WP=0/CR4.SMEP=1/EFER.NX=0 combo
kvm: cap halt polling at exactly halt_poll_ns
KVM: s390: correct fprs on SIGP (STOP AND) STORE STATUS
KVM: VMX: disable PEBS before a guest entry
KVM: PPC: Book3S HV: Sanitize special-purpose register values on guest exit

Changed files
+53 -16
Documentation
virtual
kvm
arch
powerpc
s390
x86
kvm
virt
+2 -1
Documentation/virtual/kvm/mmu.txt
··· 358 358 - if CR4.SMEP is enabled: since we've turned the page into a kernel page, 359 359 the kernel may now execute it. We handle this by also setting spte.nx. 360 360 If we get a user fetch or read fault, we'll change spte.u=1 and 361 - spte.nx=gpte.nx back. 361 + spte.nx=gpte.nx back. For this to work, KVM forces EFER.NX to 1 when 362 + shadow paging is in use. 362 363 - if CR4.SMAP is disabled: since the page has been changed to a kernel 363 364 page, it can not be reused when CR4.SMAP is enabled. We set 364 365 CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
+14
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 1370 1370 std r6, VCPU_ACOP(r9) 1371 1371 stw r7, VCPU_GUEST_PID(r9) 1372 1372 std r8, VCPU_WORT(r9) 1373 + /* 1374 + * Restore various registers to 0, where non-zero values 1375 + * set by the guest could disrupt the host. 1376 + */ 1377 + li r0, 0 1378 + mtspr SPRN_IAMR, r0 1379 + mtspr SPRN_CIABR, r0 1380 + mtspr SPRN_DAWRX, r0 1381 + mtspr SPRN_TCSCR, r0 1382 + mtspr SPRN_WORT, r0 1383 + /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ 1384 + li r0, 1 1385 + sldi r0, r0, 31 1386 + mtspr SPRN_MMCRS, r0 1373 1387 8: 1374 1388 1375 1389 /* Save and reset AMR and UAMOR before turning on the MMU */
+1 -1
arch/s390/kvm/kvm-s390.c
··· 2381 2381 2382 2382 /* manually convert vector registers if necessary */ 2383 2383 if (MACHINE_HAS_VX) { 2384 - convert_vx_to_fp(fprs, current->thread.fpu.vxrs); 2384 + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); 2385 2385 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, 2386 2386 fprs, 128); 2387 2387 } else {
+3 -1
arch/x86/kvm/mmu.c
··· 3721 3721 void 3722 3722 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3723 3723 { 3724 + bool uses_nx = context->nx || context->base_role.smep_andnot_wp; 3725 + 3724 3726 /* 3725 3727 * Passing "true" to the last argument is okay; it adds a check 3726 3728 * on bit 8 of the SPTEs which KVM doesn't use anyway. 3727 3729 */ 3728 3730 __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, 3729 3731 boot_cpu_data.x86_phys_bits, 3730 - context->shadow_root_level, context->nx, 3732 + context->shadow_root_level, uses_nx, 3731 3733 guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), 3732 3734 true); 3733 3735 }
+30 -13
arch/x86/kvm/vmx.c
··· 1813 1813 return; 1814 1814 } 1815 1815 break; 1816 + case MSR_IA32_PEBS_ENABLE: 1817 + /* PEBS needs a quiescent period after being disabled (to write 1818 + * a record). Disabling PEBS through VMX MSR swapping doesn't 1819 + * provide that period, so a CPU could write host's record into 1820 + * guest's memory. 1821 + */ 1822 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 1816 1823 } 1817 1824 1818 1825 for (i = 0; i < m->nr; ++i) ··· 1857 1850 1858 1851 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) 1859 1852 { 1860 - u64 guest_efer; 1861 - u64 ignore_bits; 1853 + u64 guest_efer = vmx->vcpu.arch.efer; 1854 + u64 ignore_bits = 0; 1862 1855 1863 - guest_efer = vmx->vcpu.arch.efer; 1856 + if (!enable_ept) { 1857 + /* 1858 + * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing 1859 + * host CPUID is more efficient than testing guest CPUID 1860 + * or CR4. Host SMEP is anyway a requirement for guest SMEP. 1861 + */ 1862 + if (boot_cpu_has(X86_FEATURE_SMEP)) 1863 + guest_efer |= EFER_NX; 1864 + else if (!(guest_efer & EFER_NX)) 1865 + ignore_bits |= EFER_NX; 1866 + } 1864 1867 1865 1868 /* 1866 - * NX is emulated; LMA and LME handled by hardware; SCE meaningless 1867 - * outside long mode 1869 + * LMA and LME handled by hardware; SCE meaningless outside long mode. 1868 1870 */ 1869 - ignore_bits = EFER_NX | EFER_SCE; 1871 + ignore_bits |= EFER_SCE; 1870 1872 #ifdef CONFIG_X86_64 1871 1873 ignore_bits |= EFER_LMA | EFER_LME; 1872 1874 /* SCE is meaningful only in long mode on Intel */ 1873 1875 if (guest_efer & EFER_LMA) 1874 1876 ignore_bits &= ~(u64)EFER_SCE; 1875 1877 #endif 1876 - guest_efer &= ~ignore_bits; 1877 - guest_efer |= host_efer & ignore_bits; 1878 - vmx->guest_msrs[efer_offset].data = guest_efer; 1879 - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 1880 1878 1881 1879 clear_atomic_switch_msr(vmx, MSR_EFER); 1882 1880 ··· 1892 1880 */ 1893 1881 if (cpu_has_load_ia32_efer || 1894 1882 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { 1895 - guest_efer = vmx->vcpu.arch.efer; 1896 1883 if (!(guest_efer & EFER_LMA)) 1897 1884 guest_efer &= ~EFER_LME; 1898 1885 if (guest_efer != host_efer) 1899 1886 add_atomic_switch_msr(vmx, MSR_EFER, 1900 1887 guest_efer, host_efer); 1901 1888 return false; 1902 - } 1889 + } else { 1890 + guest_efer &= ~ignore_bits; 1891 + guest_efer |= host_efer & ignore_bits; 1903 1892 1904 - return true; 1893 + vmx->guest_msrs[efer_offset].data = guest_efer; 1894 + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 1895 + 1896 + return true; 1897 + } 1905 1898 } 1906 1899 1907 1900 static unsigned long segment_base(u16 selector)
+3
virt/kvm/kvm_main.c
··· 1952 1952 else 1953 1953 val *= halt_poll_ns_grow; 1954 1954 1955 + if (val > halt_poll_ns) 1956 + val = halt_poll_ns; 1957 + 1955 1958 vcpu->halt_poll_ns = val; 1956 1959 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); 1957 1960 }