Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+24

Documentation/virtual/kvm/00-INDEX

··· 1 + 00-INDEX 2 + - this file. 3 + api.txt 4 + - KVM userspace API. 5 + cpuid.txt 6 + - KVM-specific cpuid leaves (x86). 7 + devices/ 8 + - KVM_CAP_DEVICE_CTRL userspace API. 9 + hypercalls.txt 10 + - KVM hypercalls. 11 + locking.txt 12 + - notes on KVM locks. 13 + mmu.txt 14 + - the x86 kvm shadow mmu. 15 + msr.txt 16 + - KVM-specific MSRs (x86). 17 + nested-vmx.txt 18 + - notes on nested virtualization for Intel x86 processors. 19 + ppc-pv.txt 20 + - the paravirtualization interface on PowerPC. 21 + review-checklist.txt 22 + - review checklist for KVM patches. 23 + timekeeping.txt 24 + - timekeeping virtualization for x86-based architectures.

+145 -7

Documentation/virtual/kvm/api.txt

··· 1122 1122 struct kvm_cpuid_entry2 entries[0]; 1123 1123 }; 1124 1124 1125 - #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 1126 - #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 1127 - #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 1125 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 1126 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 1127 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 1128 1128 1129 1129 struct kvm_cpuid_entry2 { 1130 1130 __u32 function; ··· 1810 1810 PPC | KVM_REG_PPC_TLB3PS | 32 1811 1811 PPC | KVM_REG_PPC_EPTCFG | 32 1812 1812 PPC | KVM_REG_PPC_ICP_STATE | 64 1813 + PPC | KVM_REG_PPC_TB_OFFSET | 64 1814 + PPC | KVM_REG_PPC_SPMC1 | 32 1815 + PPC | KVM_REG_PPC_SPMC2 | 32 1816 + PPC | KVM_REG_PPC_IAMR | 64 1817 + PPC | KVM_REG_PPC_TFHAR | 64 1818 + PPC | KVM_REG_PPC_TFIAR | 64 1819 + PPC | KVM_REG_PPC_TEXASR | 64 1820 + PPC | KVM_REG_PPC_FSCR | 64 1821 + PPC | KVM_REG_PPC_PSPB | 32 1822 + PPC | KVM_REG_PPC_EBBHR | 64 1823 + PPC | KVM_REG_PPC_EBBRR | 64 1824 + PPC | KVM_REG_PPC_BESCR | 64 1825 + PPC | KVM_REG_PPC_TAR | 64 1826 + PPC | KVM_REG_PPC_DPDES | 64 1827 + PPC | KVM_REG_PPC_DAWR | 64 1828 + PPC | KVM_REG_PPC_DAWRX | 64 1829 + PPC | KVM_REG_PPC_CIABR | 64 1830 + PPC | KVM_REG_PPC_IC | 64 1831 + PPC | KVM_REG_PPC_VTB | 64 1832 + PPC | KVM_REG_PPC_CSIGR | 64 1833 + PPC | KVM_REG_PPC_TACR | 64 1834 + PPC | KVM_REG_PPC_TCSCR | 64 1835 + PPC | KVM_REG_PPC_PID | 64 1836 + PPC | KVM_REG_PPC_ACOP | 64 1837 + PPC | KVM_REG_PPC_VRSAVE | 32 1838 + PPC | KVM_REG_PPC_LPCR | 64 1839 + PPC | KVM_REG_PPC_PPR | 64 1840 + PPC | KVM_REG_PPC_ARCH_COMPAT 32 1841 + PPC | KVM_REG_PPC_TM_GPR0 | 64 1842 + ... 1843 + PPC | KVM_REG_PPC_TM_GPR31 | 64 1844 + PPC | KVM_REG_PPC_TM_VSR0 | 128 1845 + ... 1846 + PPC | KVM_REG_PPC_TM_VSR63 | 128 1847 + PPC | KVM_REG_PPC_TM_CR | 64 1848 + PPC | KVM_REG_PPC_TM_LR | 64 1849 + PPC | KVM_REG_PPC_TM_CTR | 64 1850 + PPC | KVM_REG_PPC_TM_FPSCR | 64 1851 + PPC | KVM_REG_PPC_TM_AMR | 64 1852 + PPC | KVM_REG_PPC_TM_PPR | 64 1853 + PPC | KVM_REG_PPC_TM_VRSAVE | 64 1854 + PPC | KVM_REG_PPC_TM_VSCR | 32 1855 + PPC | KVM_REG_PPC_TM_DSCR | 64 1856 + PPC | KVM_REG_PPC_TM_TAR | 64 1813 1857 1814 1858 ARM registers are mapped using the lower 32 bits. The upper 16 of that 1815 1859 is the register group type, or coprocessor number: ··· 2348 2304 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2349 2305 2350 2306 2351 - 4.83 KVM_GET_REG_LIST 2307 + 4.83 KVM_ARM_PREFERRED_TARGET 2308 + 2309 + Capability: basic 2310 + Architectures: arm, arm64 2311 + Type: vm ioctl 2312 + Parameters: struct struct kvm_vcpu_init (out) 2313 + Returns: 0 on success; -1 on error 2314 + Errors: 2315 + ENODEV: no preferred target available for the host 2316 + 2317 + This queries KVM for preferred CPU target type which can be emulated 2318 + by KVM on underlying host. 2319 + 2320 + The ioctl returns struct kvm_vcpu_init instance containing information 2321 + about preferred CPU target type and recommended features for it. The 2322 + kvm_vcpu_init->features bitmap returned will have feature bits set if 2323 + the preferred target recommends setting these features, but this is 2324 + not mandatory. 2325 + 2326 + The information returned by this ioctl can be used to prepare an instance 2327 + of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in 2328 + in VCPU matching underlying host. 2329 + 2330 + 2331 + 4.84 KVM_GET_REG_LIST 2352 2332 2353 2333 Capability: basic 2354 2334 Architectures: arm, arm64 ··· 2391 2323 This ioctl returns the guest registers that are supported for the 2392 2324 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2393 2325 2394 - 2395 - 4.84 KVM_ARM_SET_DEVICE_ADDR 2326 + 4.85 KVM_ARM_SET_DEVICE_ADDR 2396 2327 2397 2328 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR 2398 2329 Architectures: arm, arm64 ··· 2429 2362 KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the 2430 2363 base addresses will return -EEXIST. 2431 2364 2432 - 4.85 KVM_PPC_RTAS_DEFINE_TOKEN 2365 + 4.86 KVM_PPC_RTAS_DEFINE_TOKEN 2433 2366 2434 2367 Capability: KVM_CAP_PPC_RTAS 2435 2368 Architectures: ppc ··· 2726 2659 for general purpose registers) 2727 2660 2728 2661 }; 2662 + 2663 + 2664 + 4.81 KVM_GET_EMULATED_CPUID 2665 + 2666 + Capability: KVM_CAP_EXT_EMUL_CPUID 2667 + Architectures: x86 2668 + Type: system ioctl 2669 + Parameters: struct kvm_cpuid2 (in/out) 2670 + Returns: 0 on success, -1 on error 2671 + 2672 + struct kvm_cpuid2 { 2673 + __u32 nent; 2674 + __u32 flags; 2675 + struct kvm_cpuid_entry2 entries[0]; 2676 + }; 2677 + 2678 + The member 'flags' is used for passing flags from userspace. 2679 + 2680 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 2681 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 2682 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 2683 + 2684 + struct kvm_cpuid_entry2 { 2685 + __u32 function; 2686 + __u32 index; 2687 + __u32 flags; 2688 + __u32 eax; 2689 + __u32 ebx; 2690 + __u32 ecx; 2691 + __u32 edx; 2692 + __u32 padding[3]; 2693 + }; 2694 + 2695 + This ioctl returns x86 cpuid features which are emulated by 2696 + kvm.Userspace can use the information returned by this ioctl to query 2697 + which features are emulated by kvm instead of being present natively. 2698 + 2699 + Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 2700 + structure with the 'nent' field indicating the number of entries in 2701 + the variable-size array 'entries'. If the number of entries is too low 2702 + to describe the cpu capabilities, an error (E2BIG) is returned. If the 2703 + number is too high, the 'nent' field is adjusted and an error (ENOMEM) 2704 + is returned. If the number is just right, the 'nent' field is adjusted 2705 + to the number of valid entries in the 'entries' array, which is then 2706 + filled. 2707 + 2708 + The entries returned are the set CPUID bits of the respective features 2709 + which kvm emulates, as returned by the CPUID instruction, with unknown 2710 + or unsupported feature bits cleared. 2711 + 2712 + Features like x2apic, for example, may not be present in the host cpu 2713 + but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be 2714 + emulated efficiently and thus not included here. 2715 + 2716 + The fields in each entry are defined as follows: 2717 + 2718 + function: the eax value used to obtain the entry 2719 + index: the ecx value used to obtain the entry (for entries that are 2720 + affected by ecx) 2721 + flags: an OR of zero or more of the following: 2722 + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: 2723 + if the index field is valid 2724 + KVM_CPUID_FLAG_STATEFUL_FUNC: 2725 + if cpuid for this function returns different values for successive 2726 + invocations; there will be several entries with the same function, 2727 + all with this flag set 2728 + KVM_CPUID_FLAG_STATE_READ_NEXT: 2729 + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is 2730 + the first entry to be read by a cpu 2731 + eax, ebx, ecx, edx: the values returned by the cpuid instruction for 2732 + this function/index combination 2729 2733 2730 2734 2731 2735 6. Capabilities that can be enabled

+7

Documentation/virtual/kvm/cpuid.txt

··· 43 43 KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by 44 44 || || writing to msr 0x4b564d02 45 45 ------------------------------------------------------------------------------ 46 + KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by 47 + || || writing to msr 0x4b564d03. 48 + ------------------------------------------------------------------------------ 49 + KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt 50 + || || handler can be enabled by writing 51 + || || to msr 0x4b564d04. 52 + ------------------------------------------------------------------------------ 46 53 KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit 47 54 || || before enabling paravirtualized 48 55 || || spinlock support.

+22

Documentation/virtual/kvm/devices/vfio.txt

··· 1 + VFIO virtual device 2 + =================== 3 + 4 + Device types supported: 5 + KVM_DEV_TYPE_VFIO 6 + 7 + Only one VFIO instance may be created per VM. The created device 8 + tracks VFIO groups in use by the VM and features of those groups 9 + important to the correctness and acceleration of the VM. As groups 10 + are enabled and disabled for use by the VM, KVM should be updated 11 + about their presence. When registered with KVM, a reference to the 12 + VFIO-group is held by KVM. 13 + 14 + Groups: 15 + KVM_DEV_VFIO_GROUP 16 + 17 + KVM_DEV_VFIO_GROUP attributes: 18 + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking 19 + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking 20 + 21 + For each, kvm_device_attr.addr points to an int32_t file descriptor 22 + for the VFIO group.

+17 -2

Documentation/virtual/kvm/locking.txt

··· 132 132 ------------ 133 133 134 134 Name: kvm_lock 135 - Type: raw_spinlock 135 + Type: spinlock_t 136 136 Arch: any 137 137 Protects: - vm_list 138 - - hardware virtualization enable/disable 138 + 139 + Name: kvm_count_lock 140 + Type: raw_spinlock_t 141 + Arch: any 142 + Protects: - hardware virtualization enable/disable 139 143 Comment: 'raw' because hardware enabling/disabling must be atomic /wrt 140 144 migration. 141 145 ··· 155 151 Arch: any 156 152 Protects: -shadow page/shadow tlb entry 157 153 Comment: it is a spinlock since it is used in mmu notifier. 154 + 155 + Name: kvm->srcu 156 + Type: srcu lock 157 + Arch: any 158 + Protects: - kvm->memslots 159 + - kvm->buses 160 + Comment: The srcu read lock must be held while accessing memslots (e.g. 161 + when using gfn_to_* functions) and while accessing in-kernel 162 + MMIO/PIO address->device structure mapping (kvm->buses). 163 + The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu 164 + if it is needed by multiple functions.

+2 -1

MAINTAINERS

··· 4871 4871 M: Gleb Natapov <gleb@redhat.com> 4872 4872 M: Paolo Bonzini <pbonzini@redhat.com> 4873 4873 L: kvm@vger.kernel.org 4874 - W: http://linux-kvm.org 4874 + W: http://www.linux-kvm.org 4875 + T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git 4875 4876 S: Supported 4876 4877 F: Documentation/*/kvm*.txt 4877 4878 F: Documentation/virtual/kvm/

+6 -3

arch/arm/include/asm/kvm_arm.h

··· 57 57 * TSC: Trap SMC 58 58 * TSW: Trap cache operations by set/way 59 59 * TWI: Trap WFI 60 + * TWE: Trap WFE 60 61 * TIDCP: Trap L2CTLR/L2ECTLR 61 62 * BSU_IS: Upgrade barriers to the inner shareable domain 62 63 * FB: Force broadcast of all maintainance operations ··· 68 67 */ 69 68 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 70 69 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 71 - HCR_SWIO | HCR_TIDCP) 70 + HCR_TWE | HCR_SWIO | HCR_TIDCP) 72 71 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 73 72 74 73 /* System Control Register (SCTLR) bits */ ··· 96 95 #define TTBCR_IRGN1 (3 << 24) 97 96 #define TTBCR_EPD1 (1 << 23) 98 97 #define TTBCR_A1 (1 << 22) 99 - #define TTBCR_T1SZ (3 << 16) 98 + #define TTBCR_T1SZ (7 << 16) 100 99 #define TTBCR_SH0 (3 << 12) 101 100 #define TTBCR_ORGN0 (3 << 10) 102 101 #define TTBCR_IRGN0 (3 << 8) 103 102 #define TTBCR_EPD0 (1 << 7) 104 - #define TTBCR_T0SZ 3 103 + #define TTBCR_T0SZ (7 << 0) 105 104 #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) 106 105 107 106 /* Hyp System Trap Register */ ··· 208 207 #define HSR_EC_IABT_HYP (0x21) 209 208 #define HSR_EC_DABT (0x24) 210 209 #define HSR_EC_DABT_HYP (0x25) 210 + 211 + #define HSR_WFI_IS_WFE (1U << 0) 211 212 212 213 #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) 213 214

+1 -1

arch/arm/include/asm/kvm_asm.h

··· 39 39 #define c6_IFAR 17 /* Instruction Fault Address Register */ 40 40 #define c7_PAR 18 /* Physical Address Register */ 41 41 #define c7_PAR_high 19 /* PAR top 32 bits */ 42 - #define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ 42 + #define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ 43 43 #define c10_PRRR 21 /* Primary Region Remap Register */ 44 44 #define c10_NMRR 22 /* Normal Memory Remap Register */ 45 45 #define c12_VBAR 23 /* Vector Base Address Register */

+51

arch/arm/include/asm/kvm_emulate.h

··· 157 157 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; 158 158 } 159 159 160 + static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 161 + { 162 + return vcpu->arch.cp15[c0_MPIDR]; 163 + } 164 + 165 + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 166 + { 167 + *vcpu_cpsr(vcpu) |= PSR_E_BIT; 168 + } 169 + 170 + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) 171 + { 172 + return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); 173 + } 174 + 175 + static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, 176 + unsigned long data, 177 + unsigned int len) 178 + { 179 + if (kvm_vcpu_is_be(vcpu)) { 180 + switch (len) { 181 + case 1: 182 + return data & 0xff; 183 + case 2: 184 + return be16_to_cpu(data & 0xffff); 185 + default: 186 + return be32_to_cpu(data); 187 + } 188 + } 189 + 190 + return data; /* Leave LE untouched */ 191 + } 192 + 193 + static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, 194 + unsigned long data, 195 + unsigned int len) 196 + { 197 + if (kvm_vcpu_is_be(vcpu)) { 198 + switch (len) { 199 + case 1: 200 + return data & 0xff; 201 + case 2: 202 + return cpu_to_be16(data & 0xffff); 203 + default: 204 + return cpu_to_be32(data); 205 + } 206 + } 207 + 208 + return data; /* Leave LE untouched */ 209 + } 210 + 160 211 #endif /* __ARM_KVM_EMULATE_H__ */

+1 -5

arch/arm/include/asm/kvm_host.h

··· 38 38 39 39 #define KVM_VCPU_MAX_FEATURES 1 40 40 41 - /* We don't currently support large pages. */ 42 - #define KVM_HPAGE_GFN_SHIFT(x) 0 43 - #define KVM_NR_PAGE_SIZES 1 44 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 45 - 46 41 #include <kvm/arm_vgic.h> 47 42 48 43 struct kvm_vcpu; ··· 149 154 struct kvm_vcpu_init; 150 155 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 151 156 const struct kvm_vcpu_init *init); 157 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 152 158 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 153 159 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 154 160 struct kvm_one_reg;

+14 -3

arch/arm/include/asm/kvm_mmu.h

··· 62 62 int kvm_mmu_init(void); 63 63 void kvm_clear_hyp_idmap(void); 64 64 65 + static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) 66 + { 67 + *pmd = new_pmd; 68 + flush_pmd_entry(pmd); 69 + } 70 + 65 71 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) 66 72 { 67 73 *pte = new_pte; ··· 109 103 pte_val(*pte) |= L_PTE_S2_RDWR; 110 104 } 111 105 106 + static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 107 + { 108 + pmd_val(*pmd) |= L_PMD_S2_RDWR; 109 + } 110 + 112 111 struct kvm; 113 112 114 - static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 113 + static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 114 + unsigned long size) 115 115 { 116 116 /* 117 117 * If we are going to insert an instruction page and the icache is ··· 132 120 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 133 121 */ 134 122 if (icache_is_pipt()) { 135 - unsigned long hva = gfn_to_hva(kvm, gfn); 136 - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); 123 + __cpuc_coherent_user_range(hva, hva + size); 137 124 } else if (!icache_is_vivt_asid_tagged()) { 138 125 /* any kind of VIPT cache */ 139 126 __flush_icache_all();

+2

arch/arm/include/asm/pgtable-3level.h

··· 126 126 #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ 127 127 #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 128 128 129 + #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 130 + 129 131 /* 130 132 * Hyp-mode PL2 PTE definitions for LPAE. 131 133 */

+2 -1

arch/arm/include/uapi/asm/kvm.h

··· 63 63 64 64 /* Supported Processor Types */ 65 65 #define KVM_ARM_TARGET_CORTEX_A15 0 66 - #define KVM_ARM_NUM_TARGETS 1 66 + #define KVM_ARM_TARGET_CORTEX_A7 1 67 + #define KVM_ARM_NUM_TARGETS 2 67 68 68 69 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 69 70 #define KVM_ARM_DEVICE_TYPE_SHIFT 0

+1

arch/arm/kvm/Kconfig

··· 20 20 bool "Kernel-based Virtual Machine (KVM) support" 21 21 select PREEMPT_NOTIFIERS 22 22 select ANON_INODES 23 + select HAVE_KVM_CPU_RELAX_INTERCEPT 23 24 select KVM_MMIO 24 25 select KVM_ARM_HOST 25 26 depends on ARM_VIRT_EXT && ARM_LPAE

+1 -1

arch/arm/kvm/Makefile

··· 19 19 20 20 obj-y += kvm-arm.o init.o interrupts.o 21 21 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 22 - obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o 22 + obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 23 23 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 24 24 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o

+16 -2

arch/arm/kvm/arm.c

··· 152 152 return VM_FAULT_SIGBUS; 153 153 } 154 154 155 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 155 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 156 156 struct kvm_memory_slot *dont) 157 157 { 158 158 } 159 159 160 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 160 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 161 + unsigned long npages) 161 162 { 162 163 return 0; 163 164 } ··· 797 796 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) 798 797 return -EFAULT; 799 798 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 799 + } 800 + case KVM_ARM_PREFERRED_TARGET: { 801 + int err; 802 + struct kvm_vcpu_init init; 803 + 804 + err = kvm_vcpu_preferred_target(&init); 805 + if (err) 806 + return err; 807 + 808 + if (copy_to_user(argp, &init, sizeof(init))) 809 + return -EFAULT; 810 + 811 + return 0; 800 812 } 801 813 default: 802 814 return -EINVAL;

+120

arch/arm/kvm/coproc.c

··· 71 71 return 1; 72 72 } 73 73 74 + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 75 + { 76 + /* 77 + * Compute guest MPIDR. We build a virtual cluster out of the 78 + * vcpu_id, but we read the 'U' bit from the underlying 79 + * hardware directly. 80 + */ 81 + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | 82 + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | 83 + (vcpu->vcpu_id & 3)); 84 + } 85 + 86 + /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ 87 + static bool access_actlr(struct kvm_vcpu *vcpu, 88 + const struct coproc_params *p, 89 + const struct coproc_reg *r) 90 + { 91 + if (p->is_write) 92 + return ignore_write(vcpu, p); 93 + 94 + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; 95 + return true; 96 + } 97 + 98 + /* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ 99 + static bool access_cbar(struct kvm_vcpu *vcpu, 100 + const struct coproc_params *p, 101 + const struct coproc_reg *r) 102 + { 103 + if (p->is_write) 104 + return write_to_read_only(vcpu, p); 105 + return read_zero(vcpu, p); 106 + } 107 + 108 + /* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ 109 + static bool access_l2ctlr(struct kvm_vcpu *vcpu, 110 + const struct coproc_params *p, 111 + const struct coproc_reg *r) 112 + { 113 + if (p->is_write) 114 + return ignore_write(vcpu, p); 115 + 116 + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; 117 + return true; 118 + } 119 + 120 + static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 121 + { 122 + u32 l2ctlr, ncores; 123 + 124 + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); 125 + l2ctlr &= ~(3 << 24); 126 + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; 127 + /* How many cores in the current cluster and the next ones */ 128 + ncores -= (vcpu->vcpu_id & ~3); 129 + /* Cap it to the maximum number of cores in a single cluster */ 130 + ncores = min(ncores, 3U); 131 + l2ctlr |= (ncores & 3) << 24; 132 + 133 + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; 134 + } 135 + 136 + static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 137 + { 138 + u32 actlr; 139 + 140 + /* ACTLR contains SMP bit: make sure you create all cpus first! */ 141 + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); 142 + /* Make the SMP bit consistent with the guest configuration */ 143 + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) 144 + actlr |= 1U << 6; 145 + else 146 + actlr &= ~(1U << 6); 147 + 148 + vcpu->arch.cp15[c1_ACTLR] = actlr; 149 + } 150 + 151 + /* 152 + * TRM entries: A7:4.3.50, A15:4.3.49 153 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). 154 + */ 155 + static bool access_l2ectlr(struct kvm_vcpu *vcpu, 156 + const struct coproc_params *p, 157 + const struct coproc_reg *r) 158 + { 159 + if (p->is_write) 160 + return ignore_write(vcpu, p); 161 + 162 + *vcpu_reg(vcpu, p->Rt1) = 0; 163 + return true; 164 + } 165 + 74 166 /* See note at ARM ARM B1.14.4 */ 75 167 static bool access_dcsw(struct kvm_vcpu *vcpu, 76 168 const struct coproc_params *p, ··· 245 153 * registers preceding 32-bit ones. 246 154 */ 247 155 static const struct coproc_reg cp15_regs[] = { 156 + /* MPIDR: we use VMPIDR for guest access. */ 157 + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, 158 + NULL, reset_mpidr, c0_MPIDR }, 159 + 248 160 /* CSSELR: swapped by interrupt.S. */ 249 161 { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, 250 162 NULL, reset_unknown, c0_CSSELR }, 163 + 164 + /* ACTLR: trapped by HCR.TAC bit. */ 165 + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, 166 + access_actlr, reset_actlr, c1_ACTLR }, 167 + 168 + /* CPACR: swapped by interrupt.S. */ 169 + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, 170 + NULL, reset_val, c1_CPACR, 0x00000000 }, 251 171 252 172 /* TTBR0/TTBR1: swapped by interrupt.S. */ 253 173 { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, ··· 298 194 { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw}, 299 195 { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, 300 196 { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, 197 + /* 198 + * L2CTLR access (guest wants to know #CPUs). 199 + */ 200 + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, 201 + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, 202 + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, 203 + 301 204 /* 302 205 * Dummy performance monitor implementation. 303 206 */ ··· 345 234 /* CNTKCTL: swapped by interrupt.S. */ 346 235 { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, 347 236 NULL, reset_val, c14_CNTKCTL, 0x00000000 }, 237 + 238 + /* The Configuration Base Address Register. */ 239 + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, 348 240 }; 349 241 350 242 /* Target specific emulation tables */ ··· 355 241 356 242 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) 357 243 { 244 + unsigned int i; 245 + 246 + for (i = 1; i < table->num; i++) 247 + BUG_ON(cmp_reg(&table->table[i-1], 248 + &table->table[i]) >= 0); 249 + 358 250 target_tables[table->target] = table; 359 251 } 360 252

+1 -116

arch/arm/kvm/coproc_a15.c

··· 17 17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 18 */ 19 19 #include <linux/kvm_host.h> 20 - #include <asm/cputype.h> 21 - #include <asm/kvm_arm.h> 22 - #include <asm/kvm_host.h> 23 - #include <asm/kvm_emulate.h> 24 20 #include <asm/kvm_coproc.h> 21 + #include <asm/kvm_emulate.h> 25 22 #include <linux/init.h> 26 23 27 - static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 28 - { 29 - /* 30 - * Compute guest MPIDR: 31 - * (Even if we present only one VCPU to the guest on an SMP 32 - * host we don't set the U bit in the MPIDR, or vice versa, as 33 - * revealing the underlying hardware properties is likely to 34 - * be the best choice). 35 - */ 36 - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) 37 - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); 38 - } 39 - 40 24 #include "coproc.h" 41 - 42 - /* A15 TRM 4.3.28: RO WI */ 43 - static bool access_actlr(struct kvm_vcpu *vcpu, 44 - const struct coproc_params *p, 45 - const struct coproc_reg *r) 46 - { 47 - if (p->is_write) 48 - return ignore_write(vcpu, p); 49 - 50 - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; 51 - return true; 52 - } 53 - 54 - /* A15 TRM 4.3.60: R/O. */ 55 - static bool access_cbar(struct kvm_vcpu *vcpu, 56 - const struct coproc_params *p, 57 - const struct coproc_reg *r) 58 - { 59 - if (p->is_write) 60 - return write_to_read_only(vcpu, p); 61 - return read_zero(vcpu, p); 62 - } 63 - 64 - /* A15 TRM 4.3.48: R/O WI. */ 65 - static bool access_l2ctlr(struct kvm_vcpu *vcpu, 66 - const struct coproc_params *p, 67 - const struct coproc_reg *r) 68 - { 69 - if (p->is_write) 70 - return ignore_write(vcpu, p); 71 - 72 - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; 73 - return true; 74 - } 75 - 76 - static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 77 - { 78 - u32 l2ctlr, ncores; 79 - 80 - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); 81 - l2ctlr &= ~(3 << 24); 82 - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; 83 - l2ctlr |= (ncores & 3) << 24; 84 - 85 - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; 86 - } 87 - 88 - static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 89 - { 90 - u32 actlr; 91 - 92 - /* ACTLR contains SMP bit: make sure you create all cpus first! */ 93 - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); 94 - /* Make the SMP bit consistent with the guest configuration */ 95 - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) 96 - actlr |= 1U << 6; 97 - else 98 - actlr &= ~(1U << 6); 99 - 100 - vcpu->arch.cp15[c1_ACTLR] = actlr; 101 - } 102 - 103 - /* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ 104 - static bool access_l2ectlr(struct kvm_vcpu *vcpu, 105 - const struct coproc_params *p, 106 - const struct coproc_reg *r) 107 - { 108 - if (p->is_write) 109 - return ignore_write(vcpu, p); 110 - 111 - *vcpu_reg(vcpu, p->Rt1) = 0; 112 - return true; 113 - } 114 25 115 26 /* 116 27 * A15-specific CP15 registers. ··· 32 121 * registers preceding 32-bit ones. 33 122 */ 34 123 static const struct coproc_reg a15_regs[] = { 35 - /* MPIDR: we use VMPIDR for guest access. */ 36 - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, 37 - NULL, reset_mpidr, c0_MPIDR }, 38 - 39 124 /* SCTLR: swapped by interrupt.S. */ 40 125 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 41 126 NULL, reset_val, c1_SCTLR, 0x00C50078 }, 42 - /* ACTLR: trapped by HCR.TAC bit. */ 43 - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, 44 - access_actlr, reset_actlr, c1_ACTLR }, 45 - /* CPACR: swapped by interrupt.S. */ 46 - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, 47 - NULL, reset_val, c1_CPACR, 0x00000000 }, 48 - 49 - /* 50 - * L2CTLR access (guest wants to know #CPUs). 51 - */ 52 - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, 53 - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, 54 - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, 55 - 56 - /* The Configuration Base Address Register. */ 57 - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, 58 127 }; 59 128 60 129 static struct kvm_coproc_target_table a15_target_table = { ··· 45 154 46 155 static int __init coproc_a15_init(void) 47 156 { 48 - unsigned int i; 49 - 50 - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) 51 - BUG_ON(cmp_reg(&a15_regs[i-1], 52 - &a15_regs[i]) >= 0); 53 - 54 157 kvm_register_target_coproc_table(&a15_target_table); 55 158 return 0; 56 159 }

+54

arch/arm/kvm/coproc_a7.c

··· 1 + /* 2 + * Copyright (C) 2012 - Virtual Open Systems and Columbia University 3 + * Copyright (C) 2013 - ARM Ltd 4 + * 5 + * Authors: Rusty Russell <rusty@rustcorp.au> 6 + * Christoffer Dall <c.dall@virtualopensystems.com> 7 + * Jonathan Austin <jonathan.austin@arm.com> 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License, version 2, as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with this program; if not, write to the Free Software 20 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 + */ 22 + #include <linux/kvm_host.h> 23 + #include <asm/kvm_coproc.h> 24 + #include <asm/kvm_emulate.h> 25 + #include <linux/init.h> 26 + 27 + #include "coproc.h" 28 + 29 + /* 30 + * Cortex-A7 specific CP15 registers. 31 + * CRn denotes the primary register number, but is copied to the CRm in the 32 + * user space API for 64-bit register access in line with the terminology used 33 + * in the ARM ARM. 34 + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit 35 + * registers preceding 32-bit ones. 36 + */ 37 + static const struct coproc_reg a7_regs[] = { 38 + /* SCTLR: swapped by interrupt.S. */ 39 + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 40 + NULL, reset_val, c1_SCTLR, 0x00C50878 }, 41 + }; 42 + 43 + static struct kvm_coproc_target_table a7_target_table = { 44 + .target = KVM_ARM_TARGET_CORTEX_A7, 45 + .table = a7_regs, 46 + .num = ARRAY_SIZE(a7_regs), 47 + }; 48 + 49 + static int __init coproc_a7_init(void) 50 + { 51 + kvm_register_target_coproc_table(&a7_target_table); 52 + return 0; 53 + } 54 + late_initcall(coproc_a7_init);

+1 -1

arch/arm/kvm/emulate.c

··· 354 354 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; 355 355 356 356 if (is_pabt) { 357 - /* Set DFAR and DFSR */ 357 + /* Set IFAR and IFSR */ 358 358 vcpu->arch.cp15[c6_IFAR] = addr; 359 359 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); 360 360 /* Always give debug fault for now - should give guest a clue */

+23 -1

arch/arm/kvm/guest.c

··· 190 190 return -EINVAL; 191 191 192 192 switch (part_number) { 193 + case ARM_CPU_PART_CORTEX_A7: 194 + return KVM_ARM_TARGET_CORTEX_A7; 193 195 case ARM_CPU_PART_CORTEX_A15: 194 196 return KVM_ARM_TARGET_CORTEX_A15; 195 197 default: ··· 204 202 { 205 203 unsigned int i; 206 204 207 - /* We can only do a cortex A15 for now. */ 205 + /* We can only cope with guest==host and only on A15/A7 (for now). */ 208 206 if (init->target != kvm_target_cpu()) 209 207 return -EINVAL; 210 208 ··· 222 220 223 221 /* Now we know what it is, we can reset it. */ 224 222 return kvm_reset_vcpu(vcpu); 223 + } 224 + 225 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 226 + { 227 + int target = kvm_target_cpu(); 228 + 229 + if (target < 0) 230 + return -ENODEV; 231 + 232 + memset(init, 0, sizeof(*init)); 233 + 234 + /* 235 + * For now, we don't return any features. 236 + * In future, we might use features to return target 237 + * specific features available for the preferred 238 + * target type. 239 + */ 240 + init->target = (__u32)target; 241 + 242 + return 0; 225 243 } 226 244 227 245 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)

+13 -7

arch/arm/kvm/handle_exit.c

··· 73 73 } 74 74 75 75 /** 76 - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 76 + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests 77 77 * @vcpu: the vcpu pointer 78 78 * @run: the kvm_run structure pointer 79 79 * 80 - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will 81 - * halt execution of world-switches and schedule other host processes until 82 - * there is an incoming IRQ or FIQ to the VM. 80 + * WFE: Yield the CPU and come back to this vcpu when the scheduler 81 + * decides to. 82 + * WFI: Simply call kvm_vcpu_block(), which will halt execution of 83 + * world-switches and schedule other host processes until there is an 84 + * incoming IRQ or FIQ to the VM. 83 85 */ 84 - static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 86 + static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 85 87 { 86 88 trace_kvm_wfi(*vcpu_pc(vcpu)); 87 - kvm_vcpu_block(vcpu); 89 + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) 90 + kvm_vcpu_on_spin(vcpu); 91 + else 92 + kvm_vcpu_block(vcpu); 93 + 88 94 return 1; 89 95 } 90 96 91 97 static exit_handle_fn arm_exit_handlers[] = { 92 - [HSR_EC_WFI] = kvm_handle_wfi, 98 + [HSR_EC_WFI] = kvm_handle_wfx, 93 99 [HSR_EC_CP15_32] = kvm_handle_cp15_32, 94 100 [HSR_EC_CP15_64] = kvm_handle_cp15_64, 95 101 [HSR_EC_CP14_MR] = kvm_handle_cp14_access,

+75 -11

arch/arm/kvm/mmio.c

··· 23 23 24 24 #include "trace.h" 25 25 26 + static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) 27 + { 28 + void *datap = NULL; 29 + union { 30 + u8 byte; 31 + u16 hword; 32 + u32 word; 33 + u64 dword; 34 + } tmp; 35 + 36 + switch (len) { 37 + case 1: 38 + tmp.byte = data; 39 + datap = &tmp.byte; 40 + break; 41 + case 2: 42 + tmp.hword = data; 43 + datap = &tmp.hword; 44 + break; 45 + case 4: 46 + tmp.word = data; 47 + datap = &tmp.word; 48 + break; 49 + case 8: 50 + tmp.dword = data; 51 + datap = &tmp.dword; 52 + break; 53 + } 54 + 55 + memcpy(buf, datap, len); 56 + } 57 + 58 + static unsigned long mmio_read_buf(char *buf, unsigned int len) 59 + { 60 + unsigned long data = 0; 61 + union { 62 + u16 hword; 63 + u32 word; 64 + u64 dword; 65 + } tmp; 66 + 67 + switch (len) { 68 + case 1: 69 + data = buf[0]; 70 + break; 71 + case 2: 72 + memcpy(&tmp.hword, buf, len); 73 + data = tmp.hword; 74 + break; 75 + case 4: 76 + memcpy(&tmp.word, buf, len); 77 + data = tmp.word; 78 + break; 79 + case 8: 80 + memcpy(&tmp.dword, buf, len); 81 + data = tmp.dword; 82 + break; 83 + } 84 + 85 + return data; 86 + } 87 + 26 88 /** 27 89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation 28 90 * @vcpu: The VCPU pointer ··· 95 33 */ 96 34 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) 97 35 { 98 - unsigned long *dest; 36 + unsigned long data; 99 37 unsigned int len; 100 38 int mask; 101 39 102 40 if (!run->mmio.is_write) { 103 - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); 104 - *dest = 0; 105 - 106 41 len = run->mmio.len; 107 42 if (len > sizeof(unsigned long)) 108 43 return -EINVAL; 109 44 110 - memcpy(dest, run->mmio.data, len); 111 - 112 - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 113 - *((u64 *)run->mmio.data)); 45 + data = mmio_read_buf(run->mmio.data, len); 114 46 115 47 if (vcpu->arch.mmio_decode.sign_extend && 116 48 len < sizeof(unsigned long)) { 117 49 mask = 1U << ((len * 8) - 1); 118 - *dest = (*dest ^ mask) - mask; 50 + data = (data ^ mask) - mask; 119 51 } 52 + 53 + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 54 + data); 55 + data = vcpu_data_host_to_guest(vcpu, data, len); 56 + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; 120 57 } 121 58 122 59 return 0; ··· 166 105 phys_addr_t fault_ipa) 167 106 { 168 107 struct kvm_exit_mmio mmio; 108 + unsigned long data; 169 109 unsigned long rt; 170 110 int ret; 171 111 ··· 187 125 } 188 126 189 127 rt = vcpu->arch.mmio_decode.rt; 128 + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); 129 + 190 130 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : 191 131 KVM_TRACE_MMIO_READ_UNSATISFIED, 192 132 mmio.len, fault_ipa, 193 - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); 133 + (mmio.is_write) ? data : 0); 194 134 195 135 if (mmio.is_write) 196 - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); 136 + mmio_write_buf(mmio.data, mmio.len, data); 197 137 198 138 if (vgic_handle_mmio(vcpu, run, &mmio)) 199 139 return 1;

+185 -38

arch/arm/kvm/mmu.c

··· 19 19 #include <linux/mman.h> 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/io.h> 22 + #include <linux/hugetlb.h> 22 23 #include <trace/events/kvm.h> 23 24 #include <asm/pgalloc.h> 24 25 #include <asm/cacheflush.h> ··· 41 40 static unsigned long hyp_idmap_start; 42 41 static unsigned long hyp_idmap_end; 43 42 static phys_addr_t hyp_idmap_vector; 43 + 44 + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 44 45 45 46 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 46 47 { ··· 96 93 97 94 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 98 95 { 99 - pmd_t *pmd_table = pmd_offset(pud, 0); 100 - pud_clear(pud); 101 - kvm_tlb_flush_vmid_ipa(kvm, addr); 102 - pmd_free(NULL, pmd_table); 96 + if (pud_huge(*pud)) { 97 + pud_clear(pud); 98 + kvm_tlb_flush_vmid_ipa(kvm, addr); 99 + } else { 100 + pmd_t *pmd_table = pmd_offset(pud, 0); 101 + pud_clear(pud); 102 + kvm_tlb_flush_vmid_ipa(kvm, addr); 103 + pmd_free(NULL, pmd_table); 104 + } 103 105 put_page(virt_to_page(pud)); 104 106 } 105 107 106 108 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 107 109 { 108 - pte_t *pte_table = pte_offset_kernel(pmd, 0); 109 - pmd_clear(pmd); 110 - kvm_tlb_flush_vmid_ipa(kvm, addr); 111 - pte_free_kernel(NULL, pte_table); 110 + if (kvm_pmd_huge(*pmd)) { 111 + pmd_clear(pmd); 112 + kvm_tlb_flush_vmid_ipa(kvm, addr); 113 + } else { 114 + pte_t *pte_table = pte_offset_kernel(pmd, 0); 115 + pmd_clear(pmd); 116 + kvm_tlb_flush_vmid_ipa(kvm, addr); 117 + pte_free_kernel(NULL, pte_table); 118 + } 112 119 put_page(virt_to_page(pmd)); 113 120 } 114 121 ··· 149 136 continue; 150 137 } 151 138 139 + if (pud_huge(*pud)) { 140 + /* 141 + * If we are dealing with a huge pud, just clear it and 142 + * move on. 143 + */ 144 + clear_pud_entry(kvm, pud, addr); 145 + addr = pud_addr_end(addr, end); 146 + continue; 147 + } 148 + 152 149 pmd = pmd_offset(pud, addr); 153 150 if (pmd_none(*pmd)) { 154 151 addr = pmd_addr_end(addr, end); 155 152 continue; 156 153 } 157 154 158 - pte = pte_offset_kernel(pmd, addr); 159 - clear_pte_entry(kvm, pte, addr); 160 - next = addr + PAGE_SIZE; 155 + if (!kvm_pmd_huge(*pmd)) { 156 + pte = pte_offset_kernel(pmd, addr); 157 + clear_pte_entry(kvm, pte, addr); 158 + next = addr + PAGE_SIZE; 159 + } 161 160 162 - /* If we emptied the pte, walk back up the ladder */ 163 - if (page_empty(pte)) { 161 + /* 162 + * If the pmd entry is to be cleared, walk back up the ladder 163 + */ 164 + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { 164 165 clear_pmd_entry(kvm, pmd, addr); 165 166 next = pmd_addr_end(addr, end); 166 167 if (page_empty(pmd) && !page_empty(pud)) { ··· 447 420 kvm->arch.pgd = NULL; 448 421 } 449 422 450 - 451 - static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 452 - phys_addr_t addr, const pte_t *new_pte, bool iomap) 423 + static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 424 + phys_addr_t addr) 453 425 { 454 426 pgd_t *pgd; 455 427 pud_t *pud; 456 428 pmd_t *pmd; 457 - pte_t *pte, old_pte; 458 429 459 - /* Create 2nd stage page table mapping - Level 1 */ 460 430 pgd = kvm->arch.pgd + pgd_index(addr); 461 431 pud = pud_offset(pgd, addr); 462 432 if (pud_none(*pud)) { 463 433 if (!cache) 464 - return 0; /* ignore calls from kvm_set_spte_hva */ 434 + return NULL; 465 435 pmd = mmu_memory_cache_alloc(cache); 466 436 pud_populate(NULL, pud, pmd); 467 437 get_page(virt_to_page(pud)); 468 438 } 469 439 470 - pmd = pmd_offset(pud, addr); 440 + return pmd_offset(pud, addr); 441 + } 471 442 472 - /* Create 2nd stage page table mapping - Level 2 */ 443 + static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 444 + *cache, phys_addr_t addr, const pmd_t *new_pmd) 445 + { 446 + pmd_t *pmd, old_pmd; 447 + 448 + pmd = stage2_get_pmd(kvm, cache, addr); 449 + VM_BUG_ON(!pmd); 450 + 451 + /* 452 + * Mapping in huge pages should only happen through a fault. If a 453 + * page is merged into a transparent huge page, the individual 454 + * subpages of that huge page should be unmapped through MMU 455 + * notifiers before we get here. 456 + * 457 + * Merging of CompoundPages is not supported; they should become 458 + * splitting first, unmapped, merged, and mapped back in on-demand. 459 + */ 460 + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 461 + 462 + old_pmd = *pmd; 463 + kvm_set_pmd(pmd, *new_pmd); 464 + if (pmd_present(old_pmd)) 465 + kvm_tlb_flush_vmid_ipa(kvm, addr); 466 + else 467 + get_page(virt_to_page(pmd)); 468 + return 0; 469 + } 470 + 471 + static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 472 + phys_addr_t addr, const pte_t *new_pte, bool iomap) 473 + { 474 + pmd_t *pmd; 475 + pte_t *pte, old_pte; 476 + 477 + /* Create stage-2 page table mapping - Level 1 */ 478 + pmd = stage2_get_pmd(kvm, cache, addr); 479 + if (!pmd) { 480 + /* 481 + * Ignore calls from kvm_set_spte_hva for unallocated 482 + * address ranges. 483 + */ 484 + return 0; 485 + } 486 + 487 + /* Create stage-2 page mappings - Level 2 */ 473 488 if (pmd_none(*pmd)) { 474 489 if (!cache) 475 490 return 0; /* ignore calls from kvm_set_spte_hva */ ··· 576 507 return ret; 577 508 } 578 509 510 + static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) 511 + { 512 + pfn_t pfn = *pfnp; 513 + gfn_t gfn = *ipap >> PAGE_SHIFT; 514 + 515 + if (PageTransCompound(pfn_to_page(pfn))) { 516 + unsigned long mask; 517 + /* 518 + * The address we faulted on is backed by a transparent huge 519 + * page. However, because we map the compound huge page and 520 + * not the individual tail page, we need to transfer the 521 + * refcount to the head page. We have to be careful that the 522 + * THP doesn't start to split while we are adjusting the 523 + * refcounts. 524 + * 525 + * We are sure this doesn't happen, because mmu_notifier_retry 526 + * was successful and we are holding the mmu_lock, so if this 527 + * THP is trying to split, it will be blocked in the mmu 528 + * notifier before touching any of the pages, specifically 529 + * before being able to call __split_huge_page_refcount(). 530 + * 531 + * We can therefore safely transfer the refcount from PG_tail 532 + * to PG_head and switch the pfn from a tail page to the head 533 + * page accordingly. 534 + */ 535 + mask = PTRS_PER_PMD - 1; 536 + VM_BUG_ON((gfn & mask) != (pfn & mask)); 537 + if (pfn & mask) { 538 + *ipap &= PMD_MASK; 539 + kvm_release_pfn_clean(pfn); 540 + pfn &= ~mask; 541 + kvm_get_pfn(pfn); 542 + *pfnp = pfn; 543 + } 544 + 545 + return true; 546 + } 547 + 548 + return false; 549 + } 550 + 579 551 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 580 - gfn_t gfn, struct kvm_memory_slot *memslot, 552 + struct kvm_memory_slot *memslot, 581 553 unsigned long fault_status) 582 554 { 583 - pte_t new_pte; 584 - pfn_t pfn; 585 555 int ret; 586 - bool write_fault, writable; 556 + bool write_fault, writable, hugetlb = false, force_pte = false; 587 557 unsigned long mmu_seq; 558 + gfn_t gfn = fault_ipa >> PAGE_SHIFT; 559 + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); 560 + struct kvm *kvm = vcpu->kvm; 588 561 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 562 + struct vm_area_struct *vma; 563 + pfn_t pfn; 589 564 590 565 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); 591 566 if (fault_status == FSC_PERM && !write_fault) { 592 567 kvm_err("Unexpected L2 read permission error\n"); 593 568 return -EFAULT; 594 569 } 570 + 571 + /* Let's check if we will get back a huge page backed by hugetlbfs */ 572 + down_read(&current->mm->mmap_sem); 573 + vma = find_vma_intersection(current->mm, hva, hva + 1); 574 + if (is_vm_hugetlb_page(vma)) { 575 + hugetlb = true; 576 + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 577 + } else { 578 + /* 579 + * Pages belonging to VMAs not aligned to the PMD mapping 580 + * granularity cannot be mapped using block descriptors even 581 + * if the pages belong to a THP for the process, because the 582 + * stage-2 block descriptor will cover more than a single THP 583 + * and we loose atomicity for unmapping, updates, and splits 584 + * of the THP or other pages in the stage-2 block range. 585 + */ 586 + if (vma->vm_start & ~PMD_MASK) 587 + force_pte = true; 588 + } 589 + up_read(&current->mm->mmap_sem); 595 590 596 591 /* We need minimum second+third level pages */ 597 592 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); ··· 674 541 */ 675 542 smp_rmb(); 676 543 677 - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); 544 + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); 678 545 if (is_error_pfn(pfn)) 679 546 return -EFAULT; 680 547 681 - new_pte = pfn_pte(pfn, PAGE_S2); 682 - coherent_icache_guest_page(vcpu->kvm, gfn); 683 - 684 - spin_lock(&vcpu->kvm->mmu_lock); 685 - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 548 + spin_lock(&kvm->mmu_lock); 549 + if (mmu_notifier_retry(kvm, mmu_seq)) 686 550 goto out_unlock; 687 - if (writable) { 688 - kvm_set_s2pte_writable(&new_pte); 689 - kvm_set_pfn_dirty(pfn); 551 + if (!hugetlb && !force_pte) 552 + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 553 + 554 + if (hugetlb) { 555 + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); 556 + new_pmd = pmd_mkhuge(new_pmd); 557 + if (writable) { 558 + kvm_set_s2pmd_writable(&new_pmd); 559 + kvm_set_pfn_dirty(pfn); 560 + } 561 + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); 562 + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 563 + } else { 564 + pte_t new_pte = pfn_pte(pfn, PAGE_S2); 565 + if (writable) { 566 + kvm_set_s2pte_writable(&new_pte); 567 + kvm_set_pfn_dirty(pfn); 568 + } 569 + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); 570 + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); 690 571 } 691 - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); 572 + 692 573 693 574 out_unlock: 694 - spin_unlock(&vcpu->kvm->mmu_lock); 575 + spin_unlock(&kvm->mmu_lock); 695 576 kvm_release_pfn_clean(pfn); 696 - return 0; 577 + return ret; 697 578 } 698 579 699 580 /** ··· 776 629 777 630 memslot = gfn_to_memslot(vcpu->kvm, gfn); 778 631 779 - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); 632 + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); 780 633 if (ret == 0) 781 634 ret = 1; 782 635 out_unlock:

+17 -4

arch/arm/kvm/psci.c

··· 18 18 #include <linux/kvm_host.h> 19 19 #include <linux/wait.h> 20 20 21 + #include <asm/cputype.h> 21 22 #include <asm/kvm_emulate.h> 22 23 #include <asm/kvm_psci.h> 23 24 ··· 35 34 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 36 35 { 37 36 struct kvm *kvm = source_vcpu->kvm; 38 - struct kvm_vcpu *vcpu; 37 + struct kvm_vcpu *vcpu = NULL, *tmp; 39 38 wait_queue_head_t *wq; 40 39 unsigned long cpu_id; 40 + unsigned long mpidr; 41 41 phys_addr_t target_pc; 42 + int i; 42 43 43 44 cpu_id = *vcpu_reg(source_vcpu, 1); 44 45 if (vcpu_mode_is_32bit(source_vcpu)) 45 46 cpu_id &= ~((u32) 0); 46 47 47 - if (cpu_id >= atomic_read(&kvm->online_vcpus)) 48 + kvm_for_each_vcpu(i, tmp, kvm) { 49 + mpidr = kvm_vcpu_get_mpidr(tmp); 50 + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { 51 + vcpu = tmp; 52 + break; 53 + } 54 + } 55 + 56 + if (!vcpu) 48 57 return KVM_PSCI_RET_INVAL; 49 58 50 59 target_pc = *vcpu_reg(source_vcpu, 2); 51 - 52 - vcpu = kvm_get_vcpu(kvm, cpu_id); 53 60 54 61 wq = kvm_arch_vcpu_wq(vcpu); 55 62 if (!waitqueue_active(wq)) ··· 70 61 target_pc &= ~((phys_addr_t) 1); 71 62 vcpu_set_thumb(vcpu); 72 63 } 64 + 65 + /* Propagate caller endianness */ 66 + if (kvm_vcpu_is_be(source_vcpu)) 67 + kvm_vcpu_set_be(vcpu); 73 68 74 69 *vcpu_pc(vcpu) = target_pc; 75 70 vcpu->arch.pause = false;

+6 -9

arch/arm/kvm/reset.c

··· 30 30 #include <kvm/arm_arch_timer.h> 31 31 32 32 /****************************************************************************** 33 - * Cortex-A15 Reset Values 33 + * Cortex-A15 and Cortex-A7 Reset Values 34 34 */ 35 35 36 - static const int a15_max_cpu_idx = 3; 37 - 38 - static struct kvm_regs a15_regs_reset = { 36 + static struct kvm_regs cortexa_regs_reset = { 39 37 .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, 40 38 }; 41 39 42 - static const struct kvm_irq_level a15_vtimer_irq = { 40 + static const struct kvm_irq_level cortexa_vtimer_irq = { 43 41 { .irq = 27 }, 44 42 .level = 1, 45 43 }; ··· 60 62 const struct kvm_irq_level *cpu_vtimer_irq; 61 63 62 64 switch (vcpu->arch.target) { 65 + case KVM_ARM_TARGET_CORTEX_A7: 63 66 case KVM_ARM_TARGET_CORTEX_A15: 64 - if (vcpu->vcpu_id > a15_max_cpu_idx) 65 - return -EINVAL; 66 - reset_regs = &a15_regs_reset; 67 + reset_regs = &cortexa_regs_reset; 67 68 vcpu->arch.midr = read_cpuid_id(); 68 - cpu_vtimer_irq = &a15_vtimer_irq; 69 + cpu_vtimer_irq = &cortexa_vtimer_irq; 69 70 break; 70 71 default: 71 72 return -ENODEV;

+6 -2

arch/arm64/include/asm/kvm_arm.h

··· 63 63 * TAC: Trap ACTLR 64 64 * TSC: Trap SMC 65 65 * TSW: Trap cache operations by set/way 66 + * TWE: Trap WFE 66 67 * TWI: Trap WFI 67 68 * TIDCP: Trap L2CTLR/L2ECTLR 68 69 * BSU_IS: Upgrade barriers to the inner shareable domain ··· 73 72 * FMO: Override CPSR.F and enable signaling with VF 74 73 * SWIO: Turn set/way invalidates into set/way clean+invalidate 75 74 */ 76 - #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 77 - HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 75 + #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ 76 + HCR_BSU_IS | HCR_FB | HCR_TAC | \ 77 + HCR_AMO | HCR_IMO | HCR_FMO | \ 78 78 HCR_SWIO | HCR_TIDCP | HCR_RW) 79 79 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 80 80 ··· 243 241 #define ESR_EL2_EC_BRK64 (0x3C) 244 242 245 243 #define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 244 + 245 + #define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) 246 246 247 247 #endif /* __ARM64_KVM_ARM_H__ */

+61

arch/arm64/include/asm/kvm_emulate.h

··· 177 177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; 178 178 } 179 179 180 + static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 181 + { 182 + return vcpu_sys_reg(vcpu, MPIDR_EL1); 183 + } 184 + 185 + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 186 + { 187 + if (vcpu_mode_is_32bit(vcpu)) 188 + *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; 189 + else 190 + vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); 191 + } 192 + 193 + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) 194 + { 195 + if (vcpu_mode_is_32bit(vcpu)) 196 + return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); 197 + 198 + return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); 199 + } 200 + 201 + static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, 202 + unsigned long data, 203 + unsigned int len) 204 + { 205 + if (kvm_vcpu_is_be(vcpu)) { 206 + switch (len) { 207 + case 1: 208 + return data & 0xff; 209 + case 2: 210 + return be16_to_cpu(data & 0xffff); 211 + case 4: 212 + return be32_to_cpu(data & 0xffffffff); 213 + default: 214 + return be64_to_cpu(data); 215 + } 216 + } 217 + 218 + return data; /* Leave LE untouched */ 219 + } 220 + 221 + static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, 222 + unsigned long data, 223 + unsigned int len) 224 + { 225 + if (kvm_vcpu_is_be(vcpu)) { 226 + switch (len) { 227 + case 1: 228 + return data & 0xff; 229 + case 2: 230 + return cpu_to_be16(data & 0xffff); 231 + case 4: 232 + return cpu_to_be32(data & 0xffffffff); 233 + default: 234 + return cpu_to_be64(data); 235 + } 236 + } 237 + 238 + return data; /* Leave LE untouched */ 239 + } 240 + 180 241 #endif /* __ARM64_KVM_EMULATE_H__ */

+1 -5

arch/arm64/include/asm/kvm_host.h

··· 36 36 37 37 #define KVM_VCPU_MAX_FEATURES 2 38 38 39 - /* We don't currently support large pages. */ 40 - #define KVM_HPAGE_GFN_SHIFT(x) 0 41 - #define KVM_NR_PAGE_SIZES 1 42 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 43 - 44 39 struct kvm_vcpu; 45 40 int kvm_target_cpu(void); 46 41 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); ··· 146 151 struct kvm_vcpu_init; 147 152 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 148 153 const struct kvm_vcpu_init *init); 154 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 149 155 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 150 156 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 151 157 struct kvm_one_reg;

+9 -3

arch/arm64/include/asm/kvm_mmu.h

··· 91 91 void kvm_clear_hyp_idmap(void); 92 92 93 93 #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) 94 + #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 94 95 95 96 static inline bool kvm_is_write_fault(unsigned long esr) 96 97 { ··· 117 116 pte_val(*pte) |= PTE_S2_RDWR; 118 117 } 119 118 119 + static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 120 + { 121 + pmd_val(*pmd) |= PMD_S2_RDWR; 122 + } 123 + 120 124 struct kvm; 121 125 122 - static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 126 + static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 127 + unsigned long size) 123 128 { 124 129 if (!icache_is_aliasing()) { /* PIPT */ 125 - unsigned long hva = gfn_to_hva(kvm, gfn); 126 - flush_icache_range(hva, hva + PAGE_SIZE); 130 + flush_icache_range(hva, hva + size); 127 131 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 128 132 /* any kind of VIPT cache */ 129 133 __flush_icache_all();

+2

arch/arm64/include/asm/pgtable-hwdef.h

··· 85 85 #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ 86 86 #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 87 87 88 + #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 89 + 88 90 /* 89 91 * Memory Attribute override for Stage-2 (MemAttr[3:0]) 90 92 */

+1

arch/arm64/kvm/Kconfig

··· 21 21 select MMU_NOTIFIER 22 22 select PREEMPT_NOTIFIERS 23 23 select ANON_INODES 24 + select HAVE_KVM_CPU_RELAX_INTERCEPT 24 25 select KVM_MMIO 25 26 select KVM_ARM_HOST 26 27 select KVM_ARM_VGIC

+20

arch/arm64/kvm/guest.c

··· 248 248 return kvm_reset_vcpu(vcpu); 249 249 } 250 250 251 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 252 + { 253 + int target = kvm_target_cpu(); 254 + 255 + if (target < 0) 256 + return -ENODEV; 257 + 258 + memset(init, 0, sizeof(*init)); 259 + 260 + /* 261 + * For now, we don't return any features. 262 + * In future, we might use features to return target 263 + * specific features available for the preferred 264 + * target type. 265 + */ 266 + init->target = (__u32)target; 267 + 268 + return 0; 269 + } 270 + 251 271 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 252 272 { 253 273 return -EINVAL;

+13 -5

arch/arm64/kvm/handle_exit.c

··· 47 47 } 48 48 49 49 /** 50 - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 50 + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event 51 + * instruction executed by a guest 52 + * 51 53 * @vcpu: the vcpu pointer 52 54 * 53 - * Simply call kvm_vcpu_block(), which will halt execution of 55 + * WFE: Yield the CPU and come back to this vcpu when the scheduler 56 + * decides to. 57 + * WFI: Simply call kvm_vcpu_block(), which will halt execution of 54 58 * world-switches and schedule other host processes until there is an 55 59 * incoming IRQ or FIQ to the VM. 56 60 */ 57 - static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 61 + static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 58 62 { 59 - kvm_vcpu_block(vcpu); 63 + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) 64 + kvm_vcpu_on_spin(vcpu); 65 + else 66 + kvm_vcpu_block(vcpu); 67 + 60 68 return 1; 61 69 } 62 70 63 71 static exit_handle_fn arm_exit_handlers[] = { 64 - [ESR_EL2_EC_WFI] = kvm_handle_wfi, 72 + [ESR_EL2_EC_WFI] = kvm_handle_wfx, 65 73 [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, 66 74 [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, 67 75 [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access,

+1 -5

arch/ia64/include/asm/kvm_host.h

··· 234 234 #define KVM_REQ_PTC_G 32 235 235 #define KVM_REQ_RESUME 33 236 236 237 - #define KVM_HPAGE_GFN_SHIFT(x) 0 238 - #define KVM_NR_PAGE_SIZES 1 239 - #define KVM_PAGES_PER_HPAGE(x) 1 240 - 241 237 struct kvm; 242 238 struct kvm_vcpu; 243 239 ··· 476 480 477 481 struct list_head assigned_dev_head; 478 482 struct iommu_domain *iommu_domain; 479 - int iommu_flags; 483 + bool iommu_noncoherent; 480 484 481 485 unsigned long irq_sources_bitmap; 482 486 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];

+3 -2

arch/ia64/kvm/kvm-ia64.c

··· 1550 1550 return VM_FAULT_SIGBUS; 1551 1551 } 1552 1552 1553 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1553 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1554 1554 struct kvm_memory_slot *dont) 1555 1555 { 1556 1556 } 1557 1557 1558 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1558 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1559 + unsigned long npages) 1559 1560 { 1560 1561 return 0; 1561 1562 }

-7

arch/mips/include/asm/kvm_host.h

··· 27 27 28 28 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 29 29 30 - /* Don't support huge pages */ 31 - #define KVM_HPAGE_GFN_SHIFT(x) 0 32 - 33 - /* We don't currently support large pages. */ 34 - #define KVM_NR_PAGE_SIZES 1 35 - #define KVM_PAGES_PER_HPAGE(x) 1 36 - 37 30 38 31 39 32 /* Special address that contains the comm page, used for reducing # of traps */

+3 -2

arch/mips/kvm/kvm_mips.c

··· 198 198 return -ENOIOCTLCMD; 199 199 } 200 200 201 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 201 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 202 202 struct kvm_memory_slot *dont) 203 203 { 204 204 } 205 205 206 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 206 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 207 + unsigned long npages) 207 208 { 208 209 return 0; 209 210 }

+4

arch/powerpc/include/asm/disassemble.h

··· 77 77 return inst & 0xffff; 78 78 } 79 79 80 + static inline unsigned int get_oc(u32 inst) 81 + { 82 + return (inst >> 11) & 0x7fff; 83 + } 80 84 #endif /* __ASM_PPC_DISASSEMBLE_H__ */

+20 -1

arch/powerpc/include/asm/exception-64s.h

··· 198 198 cmpwi r10,0; \ 199 199 bne do_kvm_##n 200 200 201 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 202 + /* 203 + * If hv is possible, interrupts come into to the hv version 204 + * of the kvmppc_interrupt code, which then jumps to the PR handler, 205 + * kvmppc_interrupt_pr, if the guest is a PR guest. 206 + */ 207 + #define kvmppc_interrupt kvmppc_interrupt_hv 208 + #else 209 + #define kvmppc_interrupt kvmppc_interrupt_pr 210 + #endif 211 + 201 212 #define __KVM_HANDLER(area, h, n) \ 202 213 do_kvm_##n: \ 203 214 BEGIN_FTR_SECTION_NESTED(947) \ 204 215 ld r10,area+EX_CFAR(r13); \ 205 216 std r10,HSTATE_CFAR(r13); \ 206 217 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ 218 + BEGIN_FTR_SECTION_NESTED(948) \ 219 + ld r10,area+EX_PPR(r13); \ 220 + std r10,HSTATE_PPR(r13); \ 221 + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 207 222 ld r10,area+EX_R10(r13); \ 208 223 stw r9,HSTATE_SCRATCH1(r13); \ 209 224 ld r9,area+EX_R9(r13); \ ··· 232 217 ld r10,area+EX_R10(r13); \ 233 218 beq 89f; \ 234 219 stw r9,HSTATE_SCRATCH1(r13); \ 220 + BEGIN_FTR_SECTION_NESTED(948) \ 221 + ld r9,area+EX_PPR(r13); \ 222 + std r9,HSTATE_PPR(r13); \ 223 + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 235 224 ld r9,area+EX_R9(r13); \ 236 225 std r12,HSTATE_SCRATCH0(r13); \ 237 226 li r12,n; \ ··· 255 236 #define KVM_HANDLER_SKIP(area, h, n) 256 237 #endif 257 238 258 - #ifdef CONFIG_KVM_BOOK3S_PR 239 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 259 240 #define KVMTEST_PR(n) __KVMTEST(n) 260 241 #define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) 261 242 #define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)

+4

arch/powerpc/include/asm/kvm_asm.h

··· 123 123 #define BOOK3S_HFLAG_SLB 0x2 124 124 #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 125 125 #define BOOK3S_HFLAG_NATIVE_PS 0x8 126 + #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 127 + #define BOOK3S_HFLAG_NEW_TLBIE 0x20 126 128 127 129 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 128 130 #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ ··· 138 136 #define KVM_GUEST_MODE_NONE 0 139 137 #define KVM_GUEST_MODE_GUEST 1 140 138 #define KVM_GUEST_MODE_SKIP 2 139 + #define KVM_GUEST_MODE_GUEST_HV 3 140 + #define KVM_GUEST_MODE_HOST_HV 4 141 141 142 142 #define KVM_INST_FETCH_FAILED -1 143 143

+19 -213

arch/powerpc/include/asm/kvm_book3s.h

··· 58 58 struct hlist_node list_pte_long; 59 59 struct hlist_node list_vpte; 60 60 struct hlist_node list_vpte_long; 61 + #ifdef CONFIG_PPC_BOOK3S_64 62 + struct hlist_node list_vpte_64k; 63 + #endif 61 64 struct rcu_head rcu_head; 62 65 u64 host_vpn; 63 66 u64 pfn; 64 67 ulong slot; 65 68 struct kvmppc_pte pte; 69 + int pagesize; 66 70 }; 67 71 68 72 struct kvmppc_vcpu_book3s { 69 - struct kvm_vcpu vcpu; 70 - struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; 71 73 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 72 74 struct { 73 75 u64 esid; ··· 101 99 struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; 102 100 struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; 103 101 struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; 102 + #ifdef CONFIG_PPC_BOOK3S_64 103 + struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K]; 104 + #endif 104 105 int hpte_cache_count; 105 106 spinlock_t mmu_lock; 106 107 }; ··· 112 107 #define CONTEXT_GUEST 1 113 108 #define CONTEXT_GUEST_END 2 114 109 115 - #define VSID_REAL 0x0fffffffffc00000ULL 116 - #define VSID_BAT 0x0fffffffffb00000ULL 110 + #define VSID_REAL 0x07ffffffffc00000ULL 111 + #define VSID_BAT 0x07ffffffffb00000ULL 112 + #define VSID_64K 0x0800000000000000ULL 117 113 #define VSID_1T 0x1000000000000000ULL 118 114 #define VSID_REAL_DR 0x2000000000000000ULL 119 115 #define VSID_REAL_IR 0x4000000000000000ULL ··· 124 118 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); 125 119 extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); 126 120 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); 127 - extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr); 128 121 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); 129 122 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); 130 123 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); 131 - extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 124 + extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, 125 + bool iswrite); 126 + extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 132 127 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 133 128 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); 134 129 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); ··· 141 134 142 135 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 143 136 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); 137 + extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte); 144 138 extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); 145 139 extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); 146 140 extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); ··· 159 151 bool upper, u32 val); 160 152 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 161 153 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); 162 - extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); 154 + extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, 155 + bool *writable); 163 156 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 164 157 unsigned long *rmap, long pte_index, int realmode); 165 158 extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, ··· 181 172 unsigned long *hpret); 182 173 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 183 174 struct kvm_memory_slot *memslot, unsigned long *map); 175 + extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, 176 + unsigned long mask); 184 177 185 178 extern void kvmppc_entry_trampoline(void); 186 179 extern void kvmppc_hv_entry_trampoline(void); ··· 195 184 196 185 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 197 186 { 198 - return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); 187 + return vcpu->arch.book3s; 199 188 } 200 - 201 - extern void kvm_return_point(void); 202 189 203 190 /* Also add subarch specific defines */ 204 191 ··· 206 197 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 207 198 #include <asm/kvm_book3s_64.h> 208 199 #endif 209 - 210 - #ifdef CONFIG_KVM_BOOK3S_PR 211 - 212 - static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 213 - { 214 - return to_book3s(vcpu)->hior; 215 - } 216 - 217 - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 218 - unsigned long pending_now, unsigned long old_pending) 219 - { 220 - if (pending_now) 221 - vcpu->arch.shared->int_pending = 1; 222 - else if (old_pending) 223 - vcpu->arch.shared->int_pending = 0; 224 - } 225 - 226 - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 227 - { 228 - if ( num < 14 ) { 229 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 230 - svcpu->gpr[num] = val; 231 - svcpu_put(svcpu); 232 - to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; 233 - } else 234 - vcpu->arch.gpr[num] = val; 235 - } 236 - 237 - static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 238 - { 239 - if ( num < 14 ) { 240 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 241 - ulong r = svcpu->gpr[num]; 242 - svcpu_put(svcpu); 243 - return r; 244 - } else 245 - return vcpu->arch.gpr[num]; 246 - } 247 - 248 - static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 249 - { 250 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 251 - svcpu->cr = val; 252 - svcpu_put(svcpu); 253 - to_book3s(vcpu)->shadow_vcpu->cr = val; 254 - } 255 - 256 - static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 257 - { 258 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 259 - u32 r; 260 - r = svcpu->cr; 261 - svcpu_put(svcpu); 262 - return r; 263 - } 264 - 265 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 266 - { 267 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 268 - svcpu->xer = val; 269 - to_book3s(vcpu)->shadow_vcpu->xer = val; 270 - svcpu_put(svcpu); 271 - } 272 - 273 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 274 - { 275 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 276 - u32 r; 277 - r = svcpu->xer; 278 - svcpu_put(svcpu); 279 - return r; 280 - } 281 - 282 - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) 283 - { 284 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 285 - svcpu->ctr = val; 286 - svcpu_put(svcpu); 287 - } 288 - 289 - static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) 290 - { 291 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 292 - ulong r; 293 - r = svcpu->ctr; 294 - svcpu_put(svcpu); 295 - return r; 296 - } 297 - 298 - static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) 299 - { 300 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 301 - svcpu->lr = val; 302 - svcpu_put(svcpu); 303 - } 304 - 305 - static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) 306 - { 307 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 308 - ulong r; 309 - r = svcpu->lr; 310 - svcpu_put(svcpu); 311 - return r; 312 - } 313 - 314 - static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) 315 - { 316 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 317 - svcpu->pc = val; 318 - svcpu_put(svcpu); 319 - } 320 - 321 - static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) 322 - { 323 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 324 - ulong r; 325 - r = svcpu->pc; 326 - svcpu_put(svcpu); 327 - return r; 328 - } 329 - 330 - static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) 331 - { 332 - ulong pc = kvmppc_get_pc(vcpu); 333 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 334 - u32 r; 335 - 336 - /* Load the instruction manually if it failed to do so in the 337 - * exit path */ 338 - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) 339 - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); 340 - 341 - r = svcpu->last_inst; 342 - svcpu_put(svcpu); 343 - return r; 344 - } 345 - 346 - /* 347 - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. 348 - * Because the sc instruction sets SRR0 to point to the following 349 - * instruction, we have to fetch from pc - 4. 350 - */ 351 - static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) 352 - { 353 - ulong pc = kvmppc_get_pc(vcpu) - 4; 354 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 355 - u32 r; 356 - 357 - /* Load the instruction manually if it failed to do so in the 358 - * exit path */ 359 - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) 360 - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); 361 - 362 - r = svcpu->last_inst; 363 - svcpu_put(svcpu); 364 - return r; 365 - } 366 - 367 - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) 368 - { 369 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 370 - ulong r; 371 - r = svcpu->fault_dar; 372 - svcpu_put(svcpu); 373 - return r; 374 - } 375 - 376 - static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 377 - { 378 - ulong crit_raw = vcpu->arch.shared->critical; 379 - ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); 380 - bool crit; 381 - 382 - /* Truncate crit indicators in 32 bit mode */ 383 - if (!(vcpu->arch.shared->msr & MSR_SF)) { 384 - crit_raw &= 0xffffffff; 385 - crit_r1 &= 0xffffffff; 386 - } 387 - 388 - /* Critical section when crit == r1 */ 389 - crit = (crit_raw == crit_r1); 390 - /* ... and we're in supervisor mode */ 391 - crit = crit && !(vcpu->arch.shared->msr & MSR_PR); 392 - 393 - return crit; 394 - } 395 - #else /* CONFIG_KVM_BOOK3S_PR */ 396 - 397 - static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 398 - { 399 - return 0; 400 - } 401 - 402 - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 403 - unsigned long pending_now, unsigned long old_pending) 404 - { 405 - } 406 200 407 201 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 408 202 { ··· 300 488 { 301 489 return vcpu->arch.fault_dar; 302 490 } 303 - 304 - static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 305 - { 306 - return false; 307 - } 308 - #endif 309 491 310 492 /* Magic register values loaded into r3 and r4 before the 'sc' assembly 311 493 * instruction for the OSI hypercalls */

+1 -1

arch/powerpc/include/asm/kvm_book3s_32.h

··· 22 22 23 23 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 24 24 { 25 - return to_book3s(vcpu)->shadow_vcpu; 25 + return vcpu->arch.shadow_vcpu; 26 26 } 27 27 28 28 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)

+4 -4

arch/powerpc/include/asm/kvm_book3s_64.h

··· 20 20 #ifndef __ASM_KVM_BOOK3S_64_H__ 21 21 #define __ASM_KVM_BOOK3S_64_H__ 22 22 23 - #ifdef CONFIG_KVM_BOOK3S_PR 23 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 24 24 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 25 25 { 26 26 preempt_disable(); ··· 35 35 36 36 #define SPAPR_TCE_SHIFT 12 37 37 38 - #ifdef CONFIG_KVM_BOOK3S_64_HV 38 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 39 39 #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 40 40 extern unsigned long kvm_rma_pages; 41 41 #endif ··· 278 278 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); 279 279 } 280 280 281 - #ifdef CONFIG_KVM_BOOK3S_64_HV 281 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 282 282 /* 283 283 * Note modification of an HPTE; set the HPTE modified bit 284 284 * if anyone is interested. ··· 289 289 if (atomic_read(&kvm->arch.hpte_mod_interest)) 290 290 rev->guest_rpte |= HPTE_GR_MODIFIED; 291 291 } 292 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 292 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 293 293 294 294 #endif /* __ASM_KVM_BOOK3S_64_H__ */

+5 -4

arch/powerpc/include/asm/kvm_book3s_asm.h

··· 83 83 u8 restore_hid5; 84 84 u8 napping; 85 85 86 - #ifdef CONFIG_KVM_BOOK3S_64_HV 86 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 87 87 u8 hwthread_req; 88 88 u8 hwthread_state; 89 89 u8 host_ipi; ··· 101 101 #endif 102 102 #ifdef CONFIG_PPC_BOOK3S_64 103 103 u64 cfar; 104 + u64 ppr; 104 105 #endif 105 106 }; 106 107 ··· 109 108 ulong gpr[14]; 110 109 u32 cr; 111 110 u32 xer; 112 - 113 - u32 fault_dsisr; 114 - u32 last_inst; 115 111 ulong ctr; 116 112 ulong lr; 117 113 ulong pc; 114 + 118 115 ulong shadow_srr1; 119 116 ulong fault_dar; 117 + u32 fault_dsisr; 118 + u32 last_inst; 120 119 121 120 #ifdef CONFIG_PPC_BOOK3S_32 122 121 u32 sr[16]; /* Guest SRs */

+6 -1

arch/powerpc/include/asm/kvm_booke.h

··· 26 26 /* LPIDs we support with this build -- runtime limit may be lower */ 27 27 #define KVMPPC_NR_LPIDS 64 28 28 29 - #define KVMPPC_INST_EHPRIV 0x7c00021c 29 + #define KVMPPC_INST_EHPRIV 0x7c00021c 30 + #define EHPRIV_OC_SHIFT 11 31 + /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ 32 + #define EHPRIV_OC_DEBUG 1 33 + #define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ 34 + (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) 30 35 31 36 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 32 37 {

+34 -23

arch/powerpc/include/asm/kvm_host.h

··· 63 63 64 64 #endif 65 65 66 - /* We don't currently support large pages. */ 67 - #define KVM_HPAGE_GFN_SHIFT(x) 0 68 - #define KVM_NR_PAGE_SIZES 1 69 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 70 - 71 66 #define HPTEG_CACHE_NUM (1 << 15) 72 67 #define HPTEG_HASH_BITS_PTE 13 73 68 #define HPTEG_HASH_BITS_PTE_LONG 12 74 69 #define HPTEG_HASH_BITS_VPTE 13 75 70 #define HPTEG_HASH_BITS_VPTE_LONG 5 71 + #define HPTEG_HASH_BITS_VPTE_64K 11 76 72 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) 77 73 #define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) 78 74 #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) 79 75 #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) 76 + #define HPTEG_HASH_NUM_VPTE_64K (1 << HPTEG_HASH_BITS_VPTE_64K) 80 77 81 78 /* Physical Address Mask - allowed range of real mode RAM access */ 82 79 #define KVM_PAM 0x0fffffffffffffffULL ··· 85 88 struct lppaca; 86 89 struct slb_shadow; 87 90 struct dtl_entry; 91 + 92 + struct kvmppc_vcpu_book3s; 93 + struct kvmppc_book3s_shadow_vcpu; 88 94 89 95 struct kvm_vm_stat { 90 96 u32 remote_tlb_flush; ··· 224 224 #define KVMPPC_GOT_PAGE 0x80 225 225 226 226 struct kvm_arch_memory_slot { 227 - #ifdef CONFIG_KVM_BOOK3S_64_HV 227 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 228 228 unsigned long *rmap; 229 229 unsigned long *slot_phys; 230 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 230 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 231 231 }; 232 232 233 233 struct kvm_arch { 234 234 unsigned int lpid; 235 - #ifdef CONFIG_KVM_BOOK3S_64_HV 235 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 236 236 unsigned long hpt_virt; 237 237 struct revmap_entry *revmap; 238 238 unsigned int host_lpid; ··· 256 256 cpumask_t need_tlb_flush; 257 257 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 258 258 int hpt_cma_alloc; 259 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 259 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 260 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 261 + struct mutex hpt_mutex; 262 + #endif 260 263 #ifdef CONFIG_PPC_BOOK3S_64 261 264 struct list_head spapr_tce_tables; 262 265 struct list_head rtas_tokens; ··· 270 267 #ifdef CONFIG_KVM_XICS 271 268 struct kvmppc_xics *xics; 272 269 #endif 270 + struct kvmppc_ops *kvm_ops; 273 271 }; 274 272 275 273 /* ··· 298 294 u64 stolen_tb; 299 295 u64 preempt_tb; 300 296 struct kvm_vcpu *runner; 297 + u64 tb_offset; /* guest timebase - host timebase */ 298 + ulong lpcr; 299 + u32 arch_compat; 300 + ulong pcr; 301 301 }; 302 302 303 303 #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) ··· 336 328 bool may_read : 1; 337 329 bool may_write : 1; 338 330 bool may_execute : 1; 331 + u8 page_size; /* MMU_PAGE_xxx */ 339 332 }; 340 333 341 334 struct kvmppc_mmu { ··· 349 340 /* book3s */ 350 341 void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value); 351 342 u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); 352 - int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); 343 + int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, 344 + struct kvmppc_pte *pte, bool data, bool iswrite); 353 345 void (*reset_msr)(struct kvm_vcpu *vcpu); 354 346 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); 355 347 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); ··· 370 360 bool large : 1; /* PTEs are 16MB */ 371 361 bool tb : 1; /* 1TB segment */ 372 362 bool class : 1; 363 + u8 base_page_size; /* MMU_PAGE_xxx */ 373 364 }; 374 365 375 366 # ifdef CONFIG_PPC_FSL_BOOK3E ··· 388 377 #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ 389 378 #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ 390 379 391 - struct kvmppc_booke_debug_reg { 392 - u32 dbcr0; 393 - u32 dbcr1; 394 - u32 dbcr2; 395 - #ifdef CONFIG_KVM_E500MC 396 - u32 dbcr4; 397 - #endif 398 - u64 iac[KVMPPC_BOOKE_MAX_IAC]; 399 - u64 dac[KVMPPC_BOOKE_MAX_DAC]; 400 - }; 401 - 402 380 #define KVMPPC_IRQ_DEFAULT 0 403 381 #define KVMPPC_IRQ_MPIC 1 404 382 #define KVMPPC_IRQ_XICS 2 ··· 402 402 int slb_max; /* 1 + index of last valid entry in slb[] */ 403 403 int slb_nr; /* total number of entries in SLB */ 404 404 struct kvmppc_mmu mmu; 405 + struct kvmppc_vcpu_book3s *book3s; 406 + #endif 407 + #ifdef CONFIG_PPC_BOOK3S_32 408 + struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; 405 409 #endif 406 410 407 411 ulong gpr[32]; ··· 467 463 u32 ctrl; 468 464 ulong dabr; 469 465 ulong cfar; 466 + ulong ppr; 467 + ulong shadow_srr1; 470 468 #endif 471 469 u32 vrsave; /* also USPRG0 */ 472 470 u32 mmucr; ··· 504 498 505 499 u64 mmcr[3]; 506 500 u32 pmc[8]; 501 + u64 siar; 502 + u64 sdar; 507 503 508 504 #ifdef CONFIG_KVM_EXIT_TIMING 509 505 struct mutex exit_timing_lock; ··· 539 531 u32 eptcfg; 540 532 u32 epr; 541 533 u32 crit_save; 542 - struct kvmppc_booke_debug_reg dbg_reg; 534 + /* guest debug registers*/ 535 + struct debug_reg dbg_reg; 536 + /* hardware visible debug registers when in guest state */ 537 + struct debug_reg shadow_dbg_reg; 543 538 #endif 544 539 gpa_t paddr_accessed; 545 540 gva_t vaddr_accessed; ··· 593 582 struct kvmppc_icp *icp; /* XICS presentation controller */ 594 583 #endif 595 584 596 - #ifdef CONFIG_KVM_BOOK3S_64_HV 585 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 597 586 struct kvm_vcpu_arch_shared shregs; 598 587 599 588 unsigned long pgfault_addr;

+79 -28

arch/powerpc/include/asm/kvm_ppc.h

··· 106 106 struct kvm_interrupt *irq); 107 107 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); 108 108 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); 109 - 110 - extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 111 - unsigned int op, int *advance); 112 - extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, 113 - ulong val); 114 - extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, 115 - ulong *val); 116 109 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); 117 110 118 111 extern int kvmppc_booke_init(void); ··· 128 135 struct kvm_create_spapr_tce *args); 129 136 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 130 137 unsigned long ioba, unsigned long tce); 131 - extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, 132 - struct kvm_allocate_rma *rma); 133 138 extern struct kvm_rma_info *kvm_alloc_rma(void); 134 139 extern void kvm_release_rma(struct kvm_rma_info *ri); 135 140 extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 136 141 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); 137 142 extern int kvmppc_core_init_vm(struct kvm *kvm); 138 143 extern void kvmppc_core_destroy_vm(struct kvm *kvm); 139 - extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 144 + extern void kvmppc_core_free_memslot(struct kvm *kvm, 145 + struct kvm_memory_slot *free, 140 146 struct kvm_memory_slot *dont); 141 - extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 147 + extern int kvmppc_core_create_memslot(struct kvm *kvm, 148 + struct kvm_memory_slot *slot, 142 149 unsigned long npages); 143 150 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, 144 151 struct kvm_memory_slot *memslot, ··· 169 176 u32 *priority); 170 177 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); 171 178 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); 179 + 180 + union kvmppc_one_reg { 181 + u32 wval; 182 + u64 dval; 183 + vector128 vval; 184 + u64 vsxval[2]; 185 + struct { 186 + u64 addr; 187 + u64 length; 188 + } vpaval; 189 + }; 190 + 191 + struct kvmppc_ops { 192 + struct module *owner; 193 + int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 194 + int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 195 + int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id, 196 + union kvmppc_one_reg *val); 197 + int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id, 198 + union kvmppc_one_reg *val); 199 + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); 200 + void (*vcpu_put)(struct kvm_vcpu *vcpu); 201 + void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); 202 + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); 203 + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); 204 + void (*vcpu_free)(struct kvm_vcpu *vcpu); 205 + int (*check_requests)(struct kvm_vcpu *vcpu); 206 + int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log); 207 + void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); 208 + int (*prepare_memory_region)(struct kvm *kvm, 209 + struct kvm_memory_slot *memslot, 210 + struct kvm_userspace_memory_region *mem); 211 + void (*commit_memory_region)(struct kvm *kvm, 212 + struct kvm_userspace_memory_region *mem, 213 + const struct kvm_memory_slot *old); 214 + int (*unmap_hva)(struct kvm *kvm, unsigned long hva); 215 + int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, 216 + unsigned long end); 217 + int (*age_hva)(struct kvm *kvm, unsigned long hva); 218 + int (*test_age_hva)(struct kvm *kvm, unsigned long hva); 219 + void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); 220 + void (*mmu_destroy)(struct kvm_vcpu *vcpu); 221 + void (*free_memslot)(struct kvm_memory_slot *free, 222 + struct kvm_memory_slot *dont); 223 + int (*create_memslot)(struct kvm_memory_slot *slot, 224 + unsigned long npages); 225 + int (*init_vm)(struct kvm *kvm); 226 + void (*destroy_vm)(struct kvm *kvm); 227 + int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); 228 + int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, 229 + unsigned int inst, int *advance); 230 + int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); 231 + int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); 232 + void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); 233 + long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, 234 + unsigned long arg); 235 + 236 + }; 237 + 238 + extern struct kvmppc_ops *kvmppc_hv_ops; 239 + extern struct kvmppc_ops *kvmppc_pr_ops; 240 + 241 + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) 242 + { 243 + return kvm->arch.kvm_ops == kvmppc_hv_ops; 244 + } 172 245 173 246 /* 174 247 * Cuts out inst bits with ordering according to spec. ··· 269 210 return r; 270 211 } 271 212 272 - union kvmppc_one_reg { 273 - u32 wval; 274 - u64 dval; 275 - vector128 vval; 276 - u64 vsxval[2]; 277 - struct { 278 - u64 addr; 279 - u64 length; 280 - } vpaval; 281 - }; 282 - 283 213 #define one_reg_size(id) \ 284 214 (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 285 215 ··· 293 245 __v; \ 294 246 }) 295 247 296 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 248 + int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 297 249 int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 298 250 299 - void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 251 + int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 300 252 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 301 253 302 254 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); ··· 308 260 309 261 struct openpic; 310 262 311 - #ifdef CONFIG_KVM_BOOK3S_64_HV 263 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 312 264 extern void kvm_cma_reserve(void) __init; 313 265 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 314 266 { ··· 317 269 318 270 static inline u32 kvmppc_get_xics_latch(void) 319 271 { 320 - u32 xirr = get_paca()->kvm_hstate.saved_xirr; 272 + u32 xirr; 321 273 274 + xirr = get_paca()->kvm_hstate.saved_xirr; 322 275 get_paca()->kvm_hstate.saved_xirr = 0; 323 - 324 276 return xirr; 325 277 } 326 278 ··· 329 281 paca[cpu].kvm_hstate.host_ipi = host_ipi; 330 282 } 331 283 332 - extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); 284 + static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 285 + { 286 + vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); 287 + } 333 288 334 289 #else 335 290 static inline void __init kvm_cma_reserve(void)

+1 -1

arch/powerpc/include/asm/paca.h

··· 166 166 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ 167 167 168 168 #ifdef CONFIG_KVM_BOOK3S_HANDLER 169 - #ifdef CONFIG_KVM_BOOK3S_PR 169 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 170 170 /* We use this to store guest state in */ 171 171 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 172 172 #endif

+2

arch/powerpc/include/asm/processor.h

··· 208 208 209 209 struct thread_struct { 210 210 unsigned long ksp; /* Kernel stack pointer */ 211 + 211 212 #ifdef CONFIG_PPC64 212 213 unsigned long ksp_vsid; 213 214 #endif ··· 222 221 void *pgdir; /* root of page-table tree */ 223 222 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ 224 223 #endif 224 + /* Debug Registers */ 225 225 struct debug_reg debug; 226 226 struct thread_fp_state fp_state; 227 227 struct thread_fp_state *fp_save_area;

+1 -1

arch/powerpc/include/asm/pte-book3e.h

··· 40 40 #define _PAGE_U1 0x010000 41 41 #define _PAGE_U0 0x020000 42 42 #define _PAGE_ACCESSED 0x040000 43 - #define _PAGE_LENDIAN 0x080000 43 + #define _PAGE_ENDIAN 0x080000 44 44 #define _PAGE_GUARDED 0x100000 45 45 #define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */ 46 46 #define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */

+15

arch/powerpc/include/asm/reg.h

··· 248 248 #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ 249 249 #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ 250 250 #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ 251 + #define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */ 251 252 #define SPRN_SPURR 0x134 /* Scaled PURR */ 252 253 #define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ 253 254 #define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ ··· 289 288 #define LPCR_ISL (1ul << (63-2)) 290 289 #define LPCR_VC_SH (63-2) 291 290 #define LPCR_DPFD_SH (63-11) 291 + #define LPCR_DPFD (7ul << LPCR_DPFD_SH) 292 292 #define LPCR_VRMASD (0x1ful << (63-16)) 293 293 #define LPCR_VRMA_L (1ul << (63-12)) 294 294 #define LPCR_VRMA_LP0 (1ul << (63-15)) ··· 306 304 #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ 307 305 #define LPCR_MER 0x00000800 /* Mediated External Exception */ 308 306 #define LPCR_MER_SH 11 307 + #define LPCR_TC 0x00000200 /* Translation control */ 309 308 #define LPCR_LPES 0x0000000c 310 309 #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ 311 310 #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ ··· 319 316 #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 320 317 #define SPRN_HMER 0x150 /* Hardware m? error recovery */ 321 318 #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 319 + #define SPRN_PCR 0x152 /* Processor compatibility register */ 320 + #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 321 + #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 322 + #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ 322 323 #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ 323 324 #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ 324 325 #define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ ··· 432 425 #define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ 433 426 #define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ 434 427 #define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ 428 + #define HID4_RMOR (0xFFFFul << HID4_RMOR_SH) 435 429 #define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ 436 430 #define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ 437 431 #define HID4_LPID1_SH 0 /* partition ID top 2 bits */ ··· 1114 1106 #define PVR_POWER8 0x004D 1115 1107 #define PVR_BE 0x0070 1116 1108 #define PVR_PA6T 0x0090 1109 + 1110 + /* "Logical" PVR values defined in PAPR, representing architecture levels */ 1111 + #define PVR_ARCH_204 0x0f000001 1112 + #define PVR_ARCH_205 0x0f000002 1113 + #define PVR_ARCH_206 0x0f000003 1114 + #define PVR_ARCH_206p 0x0f100003 1115 + #define PVR_ARCH_207 0x0f000004 1117 1116 1118 1117 /* Macros for setting and retrieving special purpose registers */ 1119 1118 #ifndef __ASSEMBLY__

+82 -4

arch/powerpc/include/uapi/asm/kvm.h

··· 27 27 #define __KVM_HAVE_PPC_SMT 28 28 #define __KVM_HAVE_IRQCHIP 29 29 #define __KVM_HAVE_IRQ_LINE 30 + #define __KVM_HAVE_GUEST_DEBUG 30 31 31 32 struct kvm_regs { 32 33 __u64 pc; ··· 270 269 __u64 fpr[32]; 271 270 }; 272 271 272 + /* 273 + * Defines for h/w breakpoint, watchpoint (read, write or both) and 274 + * software breakpoint. 275 + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status" 276 + * for KVM_DEBUG_EXIT. 277 + */ 278 + #define KVMPPC_DEBUG_NONE 0x0 279 + #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) 280 + #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) 281 + #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) 273 282 struct kvm_debug_exit_arch { 283 + __u64 address; 284 + /* 285 + * exiting to userspace because of h/w breakpoint, watchpoint 286 + * (read, write or both) and software breakpoint. 287 + */ 288 + __u32 status; 289 + __u32 reserved; 274 290 }; 275 291 276 292 /* for KVM_SET_GUEST_DEBUG */ ··· 299 281 * Type denotes h/w breakpoint, read watchpoint, write 300 282 * watchpoint or watchpoint (both read and write). 301 283 */ 302 - #define KVMPPC_DEBUG_NONE 0x0 303 - #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) 304 - #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) 305 - #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) 306 284 __u32 type; 307 285 __u32 reserved; 308 286 } bp[16]; ··· 443 429 #define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) 444 430 #define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) 445 431 #define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) 432 + #define KVM_REG_PPC_MMCR2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13) 433 + #define KVM_REG_PPC_MMCRS (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14) 434 + #define KVM_REG_PPC_SIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15) 435 + #define KVM_REG_PPC_SDAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16) 436 + #define KVM_REG_PPC_SIER (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17) 446 437 447 438 #define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) 448 439 #define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) ··· 517 498 #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) 518 499 #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) 519 500 #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) 501 + 502 + /* Timebase offset */ 503 + #define KVM_REG_PPC_TB_OFFSET (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c) 504 + 505 + /* POWER8 registers */ 506 + #define KVM_REG_PPC_SPMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d) 507 + #define KVM_REG_PPC_SPMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e) 508 + #define KVM_REG_PPC_IAMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f) 509 + #define KVM_REG_PPC_TFHAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0) 510 + #define KVM_REG_PPC_TFIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1) 511 + #define KVM_REG_PPC_TEXASR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2) 512 + #define KVM_REG_PPC_FSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3) 513 + #define KVM_REG_PPC_PSPB (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4) 514 + #define KVM_REG_PPC_EBBHR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5) 515 + #define KVM_REG_PPC_EBBRR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6) 516 + #define KVM_REG_PPC_BESCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7) 517 + #define KVM_REG_PPC_TAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8) 518 + #define KVM_REG_PPC_DPDES (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9) 519 + #define KVM_REG_PPC_DAWR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa) 520 + #define KVM_REG_PPC_DAWRX (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab) 521 + #define KVM_REG_PPC_CIABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac) 522 + #define KVM_REG_PPC_IC (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad) 523 + #define KVM_REG_PPC_VTB (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae) 524 + #define KVM_REG_PPC_CSIGR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf) 525 + #define KVM_REG_PPC_TACR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0) 526 + #define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) 527 + #define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) 528 + #define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) 529 + 530 + #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) 531 + #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) 532 + #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) 533 + 534 + /* Architecture compatibility level */ 535 + #define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) 536 + 537 + /* Transactional Memory checkpointed state: 538 + * This is all GPRs, all VSX regs and a subset of SPRs 539 + */ 540 + #define KVM_REG_PPC_TM (KVM_REG_PPC | 0x80000000) 541 + /* TM GPRs */ 542 + #define KVM_REG_PPC_TM_GPR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0) 543 + #define KVM_REG_PPC_TM_GPR(n) (KVM_REG_PPC_TM_GPR0 + (n)) 544 + #define KVM_REG_PPC_TM_GPR31 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f) 545 + /* TM VSX */ 546 + #define KVM_REG_PPC_TM_VSR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20) 547 + #define KVM_REG_PPC_TM_VSR(n) (KVM_REG_PPC_TM_VSR0 + (n)) 548 + #define KVM_REG_PPC_TM_VSR63 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f) 549 + /* TM SPRS */ 550 + #define KVM_REG_PPC_TM_CR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60) 551 + #define KVM_REG_PPC_TM_LR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61) 552 + #define KVM_REG_PPC_TM_CTR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62) 553 + #define KVM_REG_PPC_TM_FPSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63) 554 + #define KVM_REG_PPC_TM_AMR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64) 555 + #define KVM_REG_PPC_TM_PPR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65) 556 + #define KVM_REG_PPC_TM_VRSAVE (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66) 557 + #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) 558 + #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) 559 + #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) 520 560 521 561 /* PPC64 eXternal Interrupt Controller Specification */ 522 562 #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */

+14 -7

arch/powerpc/kernel/asm-offsets.c

··· 439 439 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 440 440 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 441 441 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 442 - #ifdef CONFIG_KVM_BOOK3S_64_HV 442 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 443 443 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); 444 444 DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); 445 445 DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); ··· 470 470 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); 471 471 472 472 /* book3s */ 473 - #ifdef CONFIG_KVM_BOOK3S_64_HV 473 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 474 474 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); 475 475 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 476 476 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); ··· 502 502 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); 503 503 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 504 504 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 505 + DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); 506 + DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); 505 507 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 506 508 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); 507 509 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); ··· 513 511 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 514 512 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); 515 513 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); 514 + DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); 515 + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 516 516 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 517 517 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); 518 518 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 519 519 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 520 - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 521 - offsetof(struct kvmppc_vcpu_book3s, vcpu)); 520 + DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); 521 + DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); 522 + DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); 522 523 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 523 524 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); 524 525 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); 525 526 526 527 #ifdef CONFIG_PPC_BOOK3S_64 527 - #ifdef CONFIG_KVM_BOOK3S_PR 528 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 529 + DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); 528 530 # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) 529 531 #else 530 532 # define SVCPU_FIELD(x, f) ··· 580 574 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); 581 575 HSTATE_FIELD(HSTATE_NAPPING, napping); 582 576 583 - #ifdef CONFIG_KVM_BOOK3S_64_HV 577 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 584 578 HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); 585 579 HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); 586 580 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); ··· 596 590 HSTATE_FIELD(HSTATE_DABR, dabr); 597 591 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 598 592 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 599 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 593 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 600 594 601 595 #ifdef CONFIG_PPC_BOOK3S_64 602 596 HSTATE_FIELD(HSTATE_CFAR, cfar); 597 + HSTATE_FIELD(HSTATE_PPR, ppr); 603 598 #endif /* CONFIG_PPC_BOOK3S_64 */ 604 599 605 600 #else /* CONFIG_PPC_BOOK3S */

+28 -2

arch/powerpc/kernel/exceptions-64s.S

··· 126 126 bgt cr1,. 127 127 GET_PACA(r13) 128 128 129 - #ifdef CONFIG_KVM_BOOK3S_64_HV 129 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 130 130 li r0,KVM_HWTHREAD_IN_KERNEL 131 131 stb r0,HSTATE_HWTHREAD_STATE(r13) 132 132 /* Order setting hwthread_state vs. testing hwthread_req */ ··· 425 425 mfspr r9,SPRN_DSISR 426 426 srdi r10,r10,60 427 427 rlwimi r10,r9,16,0x20 428 - #ifdef CONFIG_KVM_BOOK3S_PR 428 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 429 429 lbz r9,HSTATE_IN_GUEST(r13) 430 430 rlwimi r10,r9,8,0x300 431 431 #endif ··· 649 649 rfid 650 650 b . /* prevent spec. execution */ 651 651 #endif /* __DISABLED__ */ 652 + 653 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 654 + kvmppc_skip_interrupt: 655 + /* 656 + * Here all GPRs are unchanged from when the interrupt happened 657 + * except for r13, which is saved in SPRG_SCRATCH0. 658 + */ 659 + mfspr r13, SPRN_SRR0 660 + addi r13, r13, 4 661 + mtspr SPRN_SRR0, r13 662 + GET_SCRATCH0(r13) 663 + rfid 664 + b . 665 + 666 + kvmppc_skip_Hinterrupt: 667 + /* 668 + * Here all GPRs are unchanged from when the interrupt happened 669 + * except for r13, which is saved in SPRG_SCRATCH0. 670 + */ 671 + mfspr r13, SPRN_HSRR0 672 + addi r13, r13, 4 673 + mtspr SPRN_HSRR0, r13 674 + GET_SCRATCH0(r13) 675 + hrfid 676 + b . 677 + #endif 652 678 653 679 /* 654 680 * Code from here down to __end_handlers is invoked from the

+1 -1

arch/powerpc/kernel/idle_power7.S

··· 84 84 std r9,_MSR(r1) 85 85 std r1,PACAR1(r13) 86 86 87 - #ifdef CONFIG_KVM_BOOK3S_64_HV 87 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 88 88 /* Tell KVM we're napping */ 89 89 li r4,KVM_HWTHREAD_IN_NAP 90 90 stb r4,HSTATE_HWTHREAD_STATE(r13)

+1 -1

arch/powerpc/kernel/traps.c

··· 1529 1529 * back on or not. 1530 1530 */ 1531 1531 if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, 1532 - current->thread.debug.dbcr1)) 1532 + current->thread.debug.dbcr1)) 1533 1533 regs->msr |= MSR_DE; 1534 1534 else 1535 1535 /* Make sure the IDM flag is off */

+43 -15

arch/powerpc/kvm/44x.c

··· 31 31 #include "44x_tlb.h" 32 32 #include "booke.h" 33 33 34 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 34 + static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) 35 35 { 36 36 kvmppc_booke_vcpu_load(vcpu, cpu); 37 37 kvmppc_44x_tlb_load(vcpu); 38 38 } 39 39 40 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 40 + static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) 41 41 { 42 42 kvmppc_44x_tlb_put(vcpu); 43 43 kvmppc_booke_vcpu_put(vcpu); ··· 114 114 return 0; 115 115 } 116 116 117 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 117 + static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, 118 + struct kvm_sregs *sregs) 118 119 { 119 - kvmppc_get_sregs_ivor(vcpu, sregs); 120 + return kvmppc_get_sregs_ivor(vcpu, sregs); 120 121 } 121 122 122 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 123 + static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, 124 + struct kvm_sregs *sregs) 123 125 { 124 126 return kvmppc_set_sregs_ivor(vcpu, sregs); 125 127 } 126 128 127 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 128 - union kvmppc_one_reg *val) 129 + static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, 130 + union kvmppc_one_reg *val) 129 131 { 130 132 return -EINVAL; 131 133 } 132 134 133 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 134 - union kvmppc_one_reg *val) 135 + static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, 136 + union kvmppc_one_reg *val) 135 137 { 136 138 return -EINVAL; 137 139 } 138 140 139 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 141 + static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, 142 + unsigned int id) 140 143 { 141 144 struct kvmppc_vcpu_44x *vcpu_44x; 142 145 struct kvm_vcpu *vcpu; ··· 170 167 return ERR_PTR(err); 171 168 } 172 169 173 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 170 + static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) 174 171 { 175 172 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 176 173 ··· 179 176 kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 180 177 } 181 178 182 - int kvmppc_core_init_vm(struct kvm *kvm) 179 + static int kvmppc_core_init_vm_44x(struct kvm *kvm) 183 180 { 184 181 return 0; 185 182 } 186 183 187 - void kvmppc_core_destroy_vm(struct kvm *kvm) 184 + static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) 188 185 { 189 186 } 187 + 188 + static struct kvmppc_ops kvm_ops_44x = { 189 + .get_sregs = kvmppc_core_get_sregs_44x, 190 + .set_sregs = kvmppc_core_set_sregs_44x, 191 + .get_one_reg = kvmppc_get_one_reg_44x, 192 + .set_one_reg = kvmppc_set_one_reg_44x, 193 + .vcpu_load = kvmppc_core_vcpu_load_44x, 194 + .vcpu_put = kvmppc_core_vcpu_put_44x, 195 + .vcpu_create = kvmppc_core_vcpu_create_44x, 196 + .vcpu_free = kvmppc_core_vcpu_free_44x, 197 + .mmu_destroy = kvmppc_mmu_destroy_44x, 198 + .init_vm = kvmppc_core_init_vm_44x, 199 + .destroy_vm = kvmppc_core_destroy_vm_44x, 200 + .emulate_op = kvmppc_core_emulate_op_44x, 201 + .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, 202 + .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, 203 + }; 190 204 191 205 static int __init kvmppc_44x_init(void) 192 206 { ··· 211 191 212 192 r = kvmppc_booke_init(); 213 193 if (r) 214 - return r; 194 + goto err_out; 215 195 216 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 196 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 197 + if (r) 198 + goto err_out; 199 + kvm_ops_44x.owner = THIS_MODULE; 200 + kvmppc_pr_ops = &kvm_ops_44x; 201 + 202 + err_out: 203 + return r; 217 204 } 218 205 219 206 static void __exit kvmppc_44x_exit(void) 220 207 { 208 + kvmppc_pr_ops = NULL; 221 209 kvmppc_booke_exit(); 222 210 } 223 211

+4 -4

arch/powerpc/kvm/44x_emulate.c

··· 91 91 return EMULATE_DONE; 92 92 } 93 93 94 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 95 - unsigned int inst, int *advance) 94 + int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, 95 + unsigned int inst, int *advance) 96 96 { 97 97 int emulated = EMULATE_DONE; 98 98 int dcrn = get_dcrn(inst); ··· 152 152 return emulated; 153 153 } 154 154 155 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 155 + int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 156 156 { 157 157 int emulated = EMULATE_DONE; 158 158 ··· 172 172 return emulated; 173 173 } 174 174 175 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 175 + int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 176 176 { 177 177 int emulated = EMULATE_DONE; 178 178

+1 -1

arch/powerpc/kvm/44x_tlb.c

··· 268 268 trace_kvm_stlb_inval(stlb_index); 269 269 } 270 270 271 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 271 + void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) 272 272 { 273 273 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 274 274 int i;

+22 -6

arch/powerpc/kvm/Kconfig

··· 35 35 bool 36 36 select KVM_BOOK3S_HANDLER 37 37 38 - config KVM_BOOK3S_PR 38 + config KVM_BOOK3S_PR_POSSIBLE 39 39 bool 40 40 select KVM_MMIO 41 41 select MMU_NOTIFIER 42 + 43 + config KVM_BOOK3S_HV_POSSIBLE 44 + bool 42 45 43 46 config KVM_BOOK3S_32 44 47 tristate "KVM support for PowerPC book3s_32 processors" 45 48 depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT 46 49 select KVM 47 50 select KVM_BOOK3S_32_HANDLER 48 - select KVM_BOOK3S_PR 51 + select KVM_BOOK3S_PR_POSSIBLE 49 52 ---help--- 50 53 Support running unmodified book3s_32 guest kernels 51 54 in virtual machines on book3s_32 host processors. ··· 63 60 depends on PPC_BOOK3S_64 64 61 select KVM_BOOK3S_64_HANDLER 65 62 select KVM 63 + select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE 66 64 ---help--- 67 65 Support running unmodified book3s_64 and book3s_32 guest kernels 68 66 in virtual machines on book3s_64 host processors. ··· 74 70 If unsure, say N. 75 71 76 72 config KVM_BOOK3S_64_HV 77 - bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 73 + tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 78 74 depends on KVM_BOOK3S_64 75 + select KVM_BOOK3S_HV_POSSIBLE 79 76 select MMU_NOTIFIER 80 77 select CMA 81 78 ---help--- ··· 95 90 If unsure, say N. 96 91 97 92 config KVM_BOOK3S_64_PR 98 - def_bool y 99 - depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV 100 - select KVM_BOOK3S_PR 93 + tristate "KVM support without using hypervisor mode in host" 94 + depends on KVM_BOOK3S_64 95 + select KVM_BOOK3S_PR_POSSIBLE 96 + ---help--- 97 + Support running guest kernels in virtual machines on processors 98 + without using hypervisor mode in the host, by running the 99 + guest in user mode (problem state) and emulating all 100 + privileged instructions and registers. 101 + 102 + This is not as fast as using hypervisor mode, but works on 103 + machines where hypervisor mode is not available or not usable, 104 + and can emulate processors that are different from the host 105 + processor, including emulating 32-bit processors on a 64-bit 106 + host. 101 107 102 108 config KVM_BOOKE_HV 103 109 bool

+22 -9

arch/powerpc/kvm/Makefile

··· 53 53 e500_emulate.o 54 54 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) 55 55 56 - kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 57 - $(KVM)/coalesced_mmio.o \ 56 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ 57 + book3s_64_vio_hv.o 58 + 59 + kvm-pr-y := \ 58 60 fpu.o \ 59 61 book3s_paired_singles.o \ 60 62 book3s_pr.o \ 61 63 book3s_pr_papr.o \ 62 - book3s_64_vio_hv.o \ 63 64 book3s_emulate.o \ 64 65 book3s_interrupts.o \ 65 66 book3s_mmu_hpte.o \ 66 67 book3s_64_mmu_host.o \ 67 68 book3s_64_mmu.o \ 68 69 book3s_32_mmu.o 69 - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 70 - book3s_rmhandlers.o 71 70 72 - kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 71 + ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 72 + kvm-book3s_64-module-objs := \ 73 + $(KVM)/coalesced_mmio.o 74 + 75 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 76 + book3s_rmhandlers.o 77 + endif 78 + 79 + kvm-hv-y += \ 73 80 book3s_hv.o \ 74 81 book3s_hv_interrupts.o \ 75 82 book3s_64_mmu_hv.o 83 + 76 84 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 77 85 book3s_hv_rm_xics.o 78 - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 86 + 87 + ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 88 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 79 89 book3s_hv_rmhandlers.o \ 80 90 book3s_hv_rm_mmu.o \ 81 - book3s_64_vio_hv.o \ 82 91 book3s_hv_ras.o \ 83 92 book3s_hv_builtin.o \ 84 93 book3s_hv_cma.o \ 85 94 $(kvm-book3s_64-builtin-xics-objs-y) 95 + endif 86 96 87 97 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 88 98 book3s_xics.o 89 99 90 - kvm-book3s_64-module-objs := \ 100 + kvm-book3s_64-module-objs += \ 91 101 $(KVM)/kvm_main.o \ 92 102 $(KVM)/eventfd.o \ 93 103 powerpc.o \ ··· 132 122 obj-$(CONFIG_KVM_E500MC) += kvm.o 133 123 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 134 124 obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 125 + 126 + obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o 127 + obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o 135 128 136 129 obj-y += $(kvm-book3s_64-builtin-objs-y)

+246 -11

arch/powerpc/kvm/book3s.c

··· 34 34 #include <linux/vmalloc.h> 35 35 #include <linux/highmem.h> 36 36 37 + #include "book3s.h" 37 38 #include "trace.h" 38 39 39 40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU ··· 68 67 69 68 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) 70 69 { 70 + } 71 + 72 + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 73 + { 74 + if (!is_kvmppc_hv_enabled(vcpu->kvm)) 75 + return to_book3s(vcpu)->hior; 76 + return 0; 77 + } 78 + 79 + static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 80 + unsigned long pending_now, unsigned long old_pending) 81 + { 82 + if (is_kvmppc_hv_enabled(vcpu->kvm)) 83 + return; 84 + if (pending_now) 85 + vcpu->arch.shared->int_pending = 1; 86 + else if (old_pending) 87 + vcpu->arch.shared->int_pending = 0; 88 + } 89 + 90 + static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 91 + { 92 + ulong crit_raw; 93 + ulong crit_r1; 94 + bool crit; 95 + 96 + if (is_kvmppc_hv_enabled(vcpu->kvm)) 97 + return false; 98 + 99 + crit_raw = vcpu->arch.shared->critical; 100 + crit_r1 = kvmppc_get_gpr(vcpu, 1); 101 + 102 + /* Truncate crit indicators in 32 bit mode */ 103 + if (!(vcpu->arch.shared->msr & MSR_SF)) { 104 + crit_raw &= 0xffffffff; 105 + crit_r1 &= 0xffffffff; 106 + } 107 + 108 + /* Critical section when crit == r1 */ 109 + crit = (crit_raw == crit_r1); 110 + /* ... and we're in supervisor mode */ 111 + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); 112 + 113 + return crit; 71 114 } 72 115 73 116 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) ··· 171 126 printk(KERN_INFO "Queueing interrupt %x\n", vec); 172 127 #endif 173 128 } 174 - 129 + EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); 175 130 176 131 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 177 132 { 178 133 /* might as well deliver this straight away */ 179 134 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); 180 135 } 136 + EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); 181 137 182 138 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 183 139 { 184 140 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 185 141 } 142 + EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec); 186 143 187 144 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 188 145 { 189 146 return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 190 147 } 148 + EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec); 191 149 192 150 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 193 151 { 194 152 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 195 153 } 154 + EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); 196 155 197 156 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 198 157 struct kvm_interrupt *irq) ··· 334 285 335 286 return 0; 336 287 } 288 + EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); 337 289 338 - pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 290 + pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, 291 + bool *writable) 339 292 { 340 293 ulong mp_pa = vcpu->arch.magic_page_pa; 341 294 ··· 353 302 354 303 pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; 355 304 get_page(pfn_to_page(pfn)); 305 + if (writable) 306 + *writable = true; 356 307 return pfn; 357 308 } 358 309 359 - return gfn_to_pfn(vcpu->kvm, gfn); 310 + return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); 360 311 } 312 + EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); 361 313 362 314 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 363 - struct kvmppc_pte *pte) 315 + bool iswrite, struct kvmppc_pte *pte) 364 316 { 365 317 int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); 366 318 int r; 367 319 368 320 if (relocated) { 369 - r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); 321 + r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite); 370 322 } else { 371 323 pte->eaddr = eaddr; 372 324 pte->raddr = eaddr & KVM_PAM; ··· 415 361 416 362 vcpu->stat.st++; 417 363 418 - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 364 + if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) 419 365 return -ENOENT; 420 366 421 367 *eaddr = pte.raddr; ··· 428 374 429 375 return EMULATE_DONE; 430 376 } 377 + EXPORT_SYMBOL_GPL(kvmppc_st); 431 378 432 379 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 433 380 bool data) ··· 438 383 439 384 vcpu->stat.ld++; 440 385 441 - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 386 + if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) 442 387 goto nopte; 443 388 444 389 *eaddr = pte.raddr; ··· 459 404 mmio: 460 405 return EMULATE_DO_MMIO; 461 406 } 407 + EXPORT_SYMBOL_GPL(kvmppc_ld); 462 408 463 409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 464 410 { ··· 473 417 474 418 void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) 475 419 { 420 + } 421 + 422 + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 423 + struct kvm_sregs *sregs) 424 + { 425 + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); 426 + } 427 + 428 + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 429 + struct kvm_sregs *sregs) 430 + { 431 + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); 476 432 } 477 433 478 434 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ··· 563 495 if (size > sizeof(val)) 564 496 return -EINVAL; 565 497 566 - r = kvmppc_get_one_reg(vcpu, reg->id, &val); 567 - 498 + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); 568 499 if (r == -EINVAL) { 569 500 r = 0; 570 501 switch (reg->id) { ··· 594 527 break; 595 528 } 596 529 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); 530 + break; 531 + case KVM_REG_PPC_VRSAVE: 532 + val = get_reg_val(reg->id, vcpu->arch.vrsave); 597 533 break; 598 534 #endif /* CONFIG_ALTIVEC */ 599 535 case KVM_REG_PPC_DEBUG_INST: { ··· 642 572 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) 643 573 return -EFAULT; 644 574 645 - r = kvmppc_set_one_reg(vcpu, reg->id, &val); 646 - 575 + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); 647 576 if (r == -EINVAL) { 648 577 r = 0; 649 578 switch (reg->id) { ··· 674 605 } 675 606 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); 676 607 break; 608 + case KVM_REG_PPC_VRSAVE: 609 + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { 610 + r = -ENXIO; 611 + break; 612 + } 613 + vcpu->arch.vrsave = set_reg_val(reg->id, val); 614 + break; 677 615 #endif /* CONFIG_ALTIVEC */ 678 616 #ifdef CONFIG_KVM_XICS 679 617 case KVM_REG_PPC_ICP_STATE: ··· 699 623 } 700 624 701 625 return r; 626 + } 627 + 628 + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 629 + { 630 + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); 631 + } 632 + 633 + void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 634 + { 635 + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); 636 + } 637 + 638 + void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 639 + { 640 + vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); 641 + } 642 + EXPORT_SYMBOL_GPL(kvmppc_set_msr); 643 + 644 + int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 645 + { 646 + return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); 702 647 } 703 648 704 649 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, ··· 741 644 kvmppc_core_queue_dec(vcpu); 742 645 kvm_vcpu_kick(vcpu); 743 646 } 647 + 648 + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 649 + { 650 + return kvm->arch.kvm_ops->vcpu_create(kvm, id); 651 + } 652 + 653 + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 654 + { 655 + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); 656 + } 657 + 658 + int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) 659 + { 660 + return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); 661 + } 662 + 663 + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 664 + { 665 + return kvm->arch.kvm_ops->get_dirty_log(kvm, log); 666 + } 667 + 668 + void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 669 + struct kvm_memory_slot *dont) 670 + { 671 + kvm->arch.kvm_ops->free_memslot(free, dont); 672 + } 673 + 674 + int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 675 + unsigned long npages) 676 + { 677 + return kvm->arch.kvm_ops->create_memslot(slot, npages); 678 + } 679 + 680 + void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 681 + { 682 + kvm->arch.kvm_ops->flush_memslot(kvm, memslot); 683 + } 684 + 685 + int kvmppc_core_prepare_memory_region(struct kvm *kvm, 686 + struct kvm_memory_slot *memslot, 687 + struct kvm_userspace_memory_region *mem) 688 + { 689 + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); 690 + } 691 + 692 + void kvmppc_core_commit_memory_region(struct kvm *kvm, 693 + struct kvm_userspace_memory_region *mem, 694 + const struct kvm_memory_slot *old) 695 + { 696 + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); 697 + } 698 + 699 + int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 700 + { 701 + return kvm->arch.kvm_ops->unmap_hva(kvm, hva); 702 + } 703 + EXPORT_SYMBOL_GPL(kvm_unmap_hva); 704 + 705 + int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 706 + { 707 + return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); 708 + } 709 + 710 + int kvm_age_hva(struct kvm *kvm, unsigned long hva) 711 + { 712 + return kvm->arch.kvm_ops->age_hva(kvm, hva); 713 + } 714 + 715 + int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 716 + { 717 + return kvm->arch.kvm_ops->test_age_hva(kvm, hva); 718 + } 719 + 720 + void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 721 + { 722 + kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); 723 + } 724 + 725 + void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 726 + { 727 + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); 728 + } 729 + 730 + int kvmppc_core_init_vm(struct kvm *kvm) 731 + { 732 + 733 + #ifdef CONFIG_PPC64 734 + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 735 + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 736 + #endif 737 + 738 + return kvm->arch.kvm_ops->init_vm(kvm); 739 + } 740 + 741 + void kvmppc_core_destroy_vm(struct kvm *kvm) 742 + { 743 + kvm->arch.kvm_ops->destroy_vm(kvm); 744 + 745 + #ifdef CONFIG_PPC64 746 + kvmppc_rtas_tokens_free(kvm); 747 + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 748 + #endif 749 + } 750 + 751 + int kvmppc_core_check_processor_compat(void) 752 + { 753 + /* 754 + * We always return 0 for book3s. We check 755 + * for compatability while loading the HV 756 + * or PR module 757 + */ 758 + return 0; 759 + } 760 + 761 + static int kvmppc_book3s_init(void) 762 + { 763 + int r; 764 + 765 + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 766 + if (r) 767 + return r; 768 + #ifdef CONFIG_KVM_BOOK3S_32 769 + r = kvmppc_book3s_init_pr(); 770 + #endif 771 + return r; 772 + 773 + } 774 + 775 + static void kvmppc_book3s_exit(void) 776 + { 777 + #ifdef CONFIG_KVM_BOOK3S_32 778 + kvmppc_book3s_exit_pr(); 779 + #endif 780 + kvm_exit(); 781 + } 782 + 783 + module_init(kvmppc_book3s_init); 784 + module_exit(kvmppc_book3s_exit);

+34

arch/powerpc/kvm/book3s.h

··· 1 + /* 2 + * Copyright IBM Corporation, 2013 3 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License as 7 + * published by the Free Software Foundation; either version 2 of the 8 + * License or (at your optional) any later version of the license. 9 + * 10 + */ 11 + 12 + #ifndef __POWERPC_KVM_BOOK3S_H__ 13 + #define __POWERPC_KVM_BOOK3S_H__ 14 + 15 + extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 16 + struct kvm_memory_slot *memslot); 17 + extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); 18 + extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, 19 + unsigned long end); 20 + extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); 21 + extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); 22 + extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); 23 + 24 + extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); 25 + extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 26 + unsigned int inst, int *advance); 27 + extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, 28 + int sprn, ulong spr_val); 29 + extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, 30 + int sprn, ulong *spr_val); 31 + extern int kvmppc_book3s_init_pr(void); 32 + extern void kvmppc_book3s_exit_pr(void); 33 + 34 + #endif

+41 -30

arch/powerpc/kvm/book3s_32_mmu.c

··· 84 84 } 85 85 86 86 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 87 - struct kvmppc_pte *pte, bool data); 87 + struct kvmppc_pte *pte, bool data, 88 + bool iswrite); 88 89 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 89 90 u64 *vsid); 90 91 ··· 100 99 u64 vsid; 101 100 struct kvmppc_pte pte; 102 101 103 - if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) 102 + if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false)) 104 103 return pte.vpage; 105 104 106 105 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); ··· 112 111 kvmppc_set_msr(vcpu, 0); 113 112 } 114 113 115 - static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, 114 + static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, 116 115 u32 sre, gva_t eaddr, 117 116 bool primary) 118 117 { 118 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 119 119 u32 page, hash, pteg, htabmask; 120 120 hva_t r; 121 121 ··· 134 132 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, 135 133 sr_vsid(sre)); 136 134 137 - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 135 + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); 138 136 if (kvm_is_error_hva(r)) 139 137 return r; 140 138 return r | (pteg & ~PAGE_MASK); ··· 147 145 } 148 146 149 147 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 150 - struct kvmppc_pte *pte, bool data) 148 + struct kvmppc_pte *pte, bool data, 149 + bool iswrite) 151 150 { 152 151 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 153 152 struct kvmppc_bat *bat; ··· 189 186 printk(KERN_INFO "BAT is not readable!\n"); 190 187 continue; 191 188 } 192 - if (!pte->may_write) { 193 - /* let's treat r/o BATs as not-readable for now */ 189 + if (iswrite && !pte->may_write) { 194 190 dprintk_pte("BAT is read-only!\n"); 195 191 continue; 196 192 } ··· 203 201 204 202 static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, 205 203 struct kvmppc_pte *pte, bool data, 206 - bool primary) 204 + bool iswrite, bool primary) 207 205 { 208 - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 209 206 u32 sre; 210 207 hva_t ptegp; 211 208 u32 pteg[16]; ··· 219 218 220 219 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); 221 220 222 - ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); 221 + ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary); 223 222 if (kvm_is_error_hva(ptegp)) { 224 223 printk(KERN_INFO "KVM: Invalid PTEG!\n"); 225 224 goto no_page_found; ··· 259 258 break; 260 259 } 261 260 262 - if ( !pte->may_read ) 263 - continue; 264 - 265 261 dprintk_pte("MMU: Found PTE -> %x %x - %x\n", 266 262 pteg[i], pteg[i+1], pp); 267 263 found = 1; ··· 269 271 /* Update PTE C and A bits, so the guest's swapper knows we used the 270 272 page */ 271 273 if (found) { 272 - u32 oldpte = pteg[i+1]; 274 + u32 pte_r = pteg[i+1]; 275 + char __user *addr = (char __user *) &pteg[i+1]; 273 276 274 - if (pte->may_read) 275 - pteg[i+1] |= PTEG_FLAG_ACCESSED; 276 - if (pte->may_write) 277 - pteg[i+1] |= PTEG_FLAG_DIRTY; 278 - else 279 - dprintk_pte("KVM: Mapping read-only page!\n"); 280 - 281 - /* Write back into the PTEG */ 282 - if (pteg[i+1] != oldpte) 283 - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 284 - 277 + /* 278 + * Use single-byte writes to update the HPTE, to 279 + * conform to what real hardware does. 280 + */ 281 + if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) { 282 + pte_r |= PTEG_FLAG_ACCESSED; 283 + put_user(pte_r >> 8, addr + 2); 284 + } 285 + if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) { 286 + pte_r |= PTEG_FLAG_DIRTY; 287 + put_user(pte_r, addr + 3); 288 + } 289 + if (!pte->may_read || (iswrite && !pte->may_write)) 290 + return -EPERM; 285 291 return 0; 286 292 } 287 293 ··· 304 302 } 305 303 306 304 static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 307 - struct kvmppc_pte *pte, bool data) 305 + struct kvmppc_pte *pte, bool data, 306 + bool iswrite) 308 307 { 309 308 int r; 310 309 ulong mp_ea = vcpu->arch.magic_page_ea; 311 310 312 311 pte->eaddr = eaddr; 312 + pte->page_size = MMU_PAGE_4K; 313 313 314 314 /* Magic page override */ 315 315 if (unlikely(mp_ea) && ··· 327 323 return 0; 328 324 } 329 325 330 - r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); 326 + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite); 331 327 if (r < 0) 332 - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); 328 + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, 329 + data, iswrite, true); 333 330 if (r < 0) 334 - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); 331 + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, 332 + data, iswrite, false); 335 333 336 334 return r; 337 335 } ··· 353 347 354 348 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) 355 349 { 356 - kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); 350 + int i; 351 + struct kvm_vcpu *v; 352 + 353 + /* flush this VA on all cpus */ 354 + kvm_for_each_vcpu(i, v, vcpu->kvm) 355 + kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000); 357 356 } 358 357 359 358 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,

+12 -4

arch/powerpc/kvm/book3s_32_mmu_host.c

··· 138 138 139 139 extern char etext[]; 140 140 141 - int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 141 + int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, 142 + bool iswrite) 142 143 { 143 144 pfn_t hpaddr; 144 145 u64 vpn; ··· 153 152 bool evict = false; 154 153 struct hpte_cache *pte; 155 154 int r = 0; 155 + bool writable; 156 156 157 157 /* Get host physical address for gpa */ 158 - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 158 + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, 159 + iswrite, &writable); 159 160 if (is_error_noslot_pfn(hpaddr)) { 160 161 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 161 162 orig_pte->eaddr); ··· 207 204 (primary ? 0 : PTE_SEC); 208 205 pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; 209 206 210 - if (orig_pte->may_write) { 207 + if (orig_pte->may_write && writable) { 211 208 pteg1 |= PP_RWRW; 212 209 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 213 210 } else { ··· 260 257 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 261 258 out: 262 259 return r; 260 + } 261 + 262 + void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 263 + { 264 + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL); 263 265 } 264 266 265 267 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) ··· 349 341 svcpu_put(svcpu); 350 342 } 351 343 352 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 344 + void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) 353 345 { 354 346 int i; 355 347

+150 -35

arch/powerpc/kvm/book3s_64_mmu.c

··· 107 107 return kvmppc_slb_calc_vpn(slb, eaddr); 108 108 } 109 109 110 + static int mmu_pagesize(int mmu_pg) 111 + { 112 + switch (mmu_pg) { 113 + case MMU_PAGE_64K: 114 + return 16; 115 + case MMU_PAGE_16M: 116 + return 24; 117 + } 118 + return 12; 119 + } 120 + 110 121 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) 111 122 { 112 - return slbe->large ? 24 : 12; 123 + return mmu_pagesize(slbe->base_page_size); 113 124 } 114 125 115 126 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) ··· 130 119 return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); 131 120 } 132 121 133 - static hva_t kvmppc_mmu_book3s_64_get_pteg( 134 - struct kvmppc_vcpu_book3s *vcpu_book3s, 122 + static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu, 135 123 struct kvmppc_slb *slbe, gva_t eaddr, 136 124 bool second) 137 125 { 126 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 138 127 u64 hash, pteg, htabsize; 139 128 u32 ssize; 140 129 hva_t r; ··· 159 148 160 149 /* When running a PAPR guest, SDR1 contains a HVA address instead 161 150 of a GPA */ 162 - if (vcpu_book3s->vcpu.arch.papr_enabled) 151 + if (vcpu->arch.papr_enabled) 163 152 r = pteg; 164 153 else 165 - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 154 + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); 166 155 167 156 if (kvm_is_error_hva(r)) 168 157 return r; ··· 177 166 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); 178 167 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); 179 168 180 - if (p < 24) 181 - avpn >>= ((80 - p) - 56) - 8; 169 + if (p < 16) 170 + avpn >>= ((80 - p) - 56) - 8; /* 16 - p */ 182 171 else 183 - avpn <<= 8; 172 + avpn <<= p - 16; 184 173 185 174 return avpn; 186 175 } 187 176 188 - static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 189 - struct kvmppc_pte *gpte, bool data) 177 + /* 178 + * Return page size encoded in the second word of a HPTE, or 179 + * -1 for an invalid encoding for the base page size indicated by 180 + * the SLB entry. This doesn't handle mixed pagesize segments yet. 181 + */ 182 + static int decode_pagesize(struct kvmppc_slb *slbe, u64 r) 190 183 { 191 - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 184 + switch (slbe->base_page_size) { 185 + case MMU_PAGE_64K: 186 + if ((r & 0xf000) == 0x1000) 187 + return MMU_PAGE_64K; 188 + break; 189 + case MMU_PAGE_16M: 190 + if ((r & 0xff000) == 0) 191 + return MMU_PAGE_16M; 192 + break; 193 + } 194 + return -1; 195 + } 196 + 197 + static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 198 + struct kvmppc_pte *gpte, bool data, 199 + bool iswrite) 200 + { 192 201 struct kvmppc_slb *slbe; 193 202 hva_t ptegp; 194 203 u64 pteg[16]; ··· 220 189 u8 pp, key = 0; 221 190 bool found = false; 222 191 bool second = false; 192 + int pgsize; 223 193 ulong mp_ea = vcpu->arch.magic_page_ea; 224 194 225 195 /* Magic page override */ ··· 234 202 gpte->may_execute = true; 235 203 gpte->may_read = true; 236 204 gpte->may_write = true; 205 + gpte->page_size = MMU_PAGE_4K; 237 206 238 207 return 0; 239 208 } ··· 255 222 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | 256 223 HPTE_V_SECONDARY; 257 224 225 + pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; 226 + 227 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 228 + 258 229 do_second: 259 - ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 230 + ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second); 260 231 if (kvm_is_error_hva(ptegp)) 261 232 goto no_page_found; 262 233 ··· 277 240 for (i=0; i<16; i+=2) { 278 241 /* Check all relevant fields of 1st dword */ 279 242 if ((pteg[i] & v_mask) == v_val) { 243 + /* If large page bit is set, check pgsize encoding */ 244 + if (slbe->large && 245 + (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 246 + pgsize = decode_pagesize(slbe, pteg[i+1]); 247 + if (pgsize < 0) 248 + continue; 249 + } 280 250 found = true; 281 251 break; 282 252 } ··· 300 256 v = pteg[i]; 301 257 r = pteg[i+1]; 302 258 pp = (r & HPTE_R_PP) | key; 303 - eaddr_mask = 0xFFF; 259 + if (r & HPTE_R_PP0) 260 + pp |= 8; 304 261 305 262 gpte->eaddr = eaddr; 306 263 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); 307 - if (slbe->large) 308 - eaddr_mask = 0xFFFFFF; 264 + 265 + eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1; 309 266 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); 267 + gpte->page_size = pgsize; 310 268 gpte->may_execute = ((r & HPTE_R_N) ? false : true); 311 269 gpte->may_read = false; 312 270 gpte->may_write = false; ··· 323 277 case 3: 324 278 case 5: 325 279 case 7: 280 + case 10: 326 281 gpte->may_read = true; 327 282 break; 328 283 } ··· 334 287 335 288 /* Update PTE R and C bits, so the guest's swapper knows we used the 336 289 * page */ 337 - if (gpte->may_read) { 338 - /* Set the accessed flag */ 290 + if (gpte->may_read && !(r & HPTE_R_R)) { 291 + /* 292 + * Set the accessed flag. 293 + * We have to write this back with a single byte write 294 + * because another vcpu may be accessing this on 295 + * non-PAPR platforms such as mac99, and this is 296 + * what real hardware does. 297 + */ 298 + char __user *addr = (char __user *) &pteg[i+1]; 339 299 r |= HPTE_R_R; 300 + put_user(r >> 8, addr + 6); 340 301 } 341 - if (data && gpte->may_write) { 342 - /* Set the dirty flag -- XXX even if not writing */ 302 + if (iswrite && gpte->may_write && !(r & HPTE_R_C)) { 303 + /* Set the dirty flag */ 304 + /* Use a single byte write */ 305 + char __user *addr = (char __user *) &pteg[i+1]; 343 306 r |= HPTE_R_C; 307 + put_user(r, addr + 7); 344 308 } 345 309 346 - /* Write back into the PTEG */ 347 - if (pteg[i+1] != r) { 348 - pteg[i+1] = r; 349 - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 350 - } 310 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 351 311 352 - if (!gpte->may_read) 312 + if (!gpte->may_read || (iswrite && !gpte->may_write)) 353 313 return -EPERM; 354 314 return 0; 355 315 356 316 no_page_found: 317 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 357 318 return -ENOENT; 358 319 359 320 no_seg_found: 360 - 361 321 dprintk("KVM MMU: Trigger segment fault\n"); 362 322 return -EINVAL; 363 323 } ··· 398 344 slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; 399 345 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; 400 346 slbe->class = (rs & SLB_VSID_C) ? 1 : 0; 347 + 348 + slbe->base_page_size = MMU_PAGE_4K; 349 + if (slbe->large) { 350 + if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) { 351 + switch (rs & SLB_VSID_LP) { 352 + case SLB_VSID_LP_00: 353 + slbe->base_page_size = MMU_PAGE_16M; 354 + break; 355 + case SLB_VSID_LP_01: 356 + slbe->base_page_size = MMU_PAGE_64K; 357 + break; 358 + } 359 + } else 360 + slbe->base_page_size = MMU_PAGE_16M; 361 + } 401 362 402 363 slbe->orige = rb & (ESID_MASK | SLB_ESID_V); 403 364 slbe->origv = rs; ··· 529 460 bool large) 530 461 { 531 462 u64 mask = 0xFFFFFFFFFULL; 463 + long i; 464 + struct kvm_vcpu *v; 532 465 533 466 dprintk("KVM MMU: tlbie(0x%lx)\n", va); 534 467 535 - if (large) 536 - mask = 0xFFFFFF000ULL; 537 - kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 468 + /* 469 + * The tlbie instruction changed behaviour starting with 470 + * POWER6. POWER6 and later don't have the large page flag 471 + * in the instruction but in the RB value, along with bits 472 + * indicating page and segment sizes. 473 + */ 474 + if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) { 475 + /* POWER6 or later */ 476 + if (va & 1) { /* L bit */ 477 + if ((va & 0xf000) == 0x1000) 478 + mask = 0xFFFFFFFF0ULL; /* 64k page */ 479 + else 480 + mask = 0xFFFFFF000ULL; /* 16M page */ 481 + } 482 + } else { 483 + /* older processors, e.g. PPC970 */ 484 + if (large) 485 + mask = 0xFFFFFF000ULL; 486 + } 487 + /* flush this VA on all vcpus */ 488 + kvm_for_each_vcpu(i, v, vcpu->kvm) 489 + kvmppc_mmu_pte_vflush(v, va >> 12, mask); 538 490 } 491 + 492 + #ifdef CONFIG_PPC_64K_PAGES 493 + static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid) 494 + { 495 + ulong mp_ea = vcpu->arch.magic_page_ea; 496 + 497 + return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) && 498 + (mp_ea >> SID_SHIFT) == esid; 499 + } 500 + #endif 539 501 540 502 static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 541 503 u64 *vsid) ··· 575 475 struct kvmppc_slb *slb; 576 476 u64 gvsid = esid; 577 477 ulong mp_ea = vcpu->arch.magic_page_ea; 478 + int pagesize = MMU_PAGE_64K; 578 479 579 480 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 580 481 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); 581 482 if (slb) { 582 483 gvsid = slb->vsid; 484 + pagesize = slb->base_page_size; 583 485 if (slb->tb) { 584 486 gvsid <<= SID_SHIFT_1T - SID_SHIFT; 585 487 gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); ··· 592 490 593 491 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 594 492 case 0: 595 - *vsid = VSID_REAL | esid; 493 + gvsid = VSID_REAL | esid; 596 494 break; 597 495 case MSR_IR: 598 - *vsid = VSID_REAL_IR | gvsid; 496 + gvsid |= VSID_REAL_IR; 599 497 break; 600 498 case MSR_DR: 601 - *vsid = VSID_REAL_DR | gvsid; 499 + gvsid |= VSID_REAL_DR; 602 500 break; 603 501 case MSR_DR|MSR_IR: 604 502 if (!slb) 605 503 goto no_slb; 606 504 607 - *vsid = gvsid; 608 505 break; 609 506 default: 610 507 BUG(); 611 508 break; 612 509 } 613 510 614 - if (vcpu->arch.shared->msr & MSR_PR) 615 - *vsid |= VSID_PR; 511 + #ifdef CONFIG_PPC_64K_PAGES 512 + /* 513 + * Mark this as a 64k segment if the host is using 514 + * 64k pages, the host MMU supports 64k pages and 515 + * the guest segment page size is >= 64k, 516 + * but not if this segment contains the magic page. 517 + */ 518 + if (pagesize >= MMU_PAGE_64K && 519 + mmu_psize_defs[MMU_PAGE_64K].shift && 520 + !segment_contains_magic_page(vcpu, esid)) 521 + gvsid |= VSID_64K; 522 + #endif 616 523 524 + if (vcpu->arch.shared->msr & MSR_PR) 525 + gvsid |= VSID_PR; 526 + 527 + *vsid = gvsid; 617 528 return 0; 618 529 619 530 no_slb:

+79 -27

arch/powerpc/kvm/book3s_64_mmu_host.c

··· 27 27 #include <asm/machdep.h> 28 28 #include <asm/mmu_context.h> 29 29 #include <asm/hw_irq.h> 30 - #include "trace.h" 30 + #include "trace_pr.h" 31 31 32 32 #define PTE_SIZE 12 33 33 34 34 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 35 35 { 36 36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, 37 - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, 37 + pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M, 38 38 false); 39 39 } 40 40 ··· 78 78 return NULL; 79 79 } 80 80 81 - int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 81 + int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, 82 + bool iswrite) 82 83 { 83 84 unsigned long vpn; 84 85 pfn_t hpaddr; ··· 91 90 int attempt = 0; 92 91 struct kvmppc_sid_map *map; 93 92 int r = 0; 93 + int hpsize = MMU_PAGE_4K; 94 + bool writable; 95 + unsigned long mmu_seq; 96 + struct kvm *kvm = vcpu->kvm; 97 + struct hpte_cache *cpte; 98 + unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT; 99 + unsigned long pfn; 100 + 101 + /* used to check for invalidations in progress */ 102 + mmu_seq = kvm->mmu_notifier_seq; 103 + smp_rmb(); 94 104 95 105 /* Get host physical address for gpa */ 96 - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 97 - if (is_error_noslot_pfn(hpaddr)) { 98 - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 106 + pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); 107 + if (is_error_noslot_pfn(pfn)) { 108 + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); 99 109 r = -EINVAL; 100 110 goto out; 101 111 } 102 - hpaddr <<= PAGE_SHIFT; 103 - hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 112 + hpaddr = pfn << PAGE_SHIFT; 104 113 105 114 /* and write the mapping ea -> hpa into the pt */ 106 115 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); ··· 128 117 goto out; 129 118 } 130 119 131 - vsid = map->host_vsid; 132 - vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); 120 + vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M); 133 121 134 - if (!orig_pte->may_write) 135 - rflags |= HPTE_R_PP; 136 - else 137 - mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 122 + kvm_set_pfn_accessed(pfn); 123 + if (!orig_pte->may_write || !writable) 124 + rflags |= PP_RXRX; 125 + else { 126 + mark_page_dirty(vcpu->kvm, gfn); 127 + kvm_set_pfn_dirty(pfn); 128 + } 138 129 139 130 if (!orig_pte->may_execute) 140 131 rflags |= HPTE_R_N; 141 132 else 142 - kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); 133 + kvmppc_mmu_flush_icache(pfn); 143 134 144 - hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); 135 + /* 136 + * Use 64K pages if possible; otherwise, on 64K page kernels, 137 + * we need to transfer 4 more bits from guest real to host real addr. 138 + */ 139 + if (vsid & VSID_64K) 140 + hpsize = MMU_PAGE_64K; 141 + else 142 + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 143 + 144 + hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M); 145 + 146 + cpte = kvmppc_mmu_hpte_cache_next(vcpu); 147 + 148 + spin_lock(&kvm->mmu_lock); 149 + if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { 150 + r = -EAGAIN; 151 + goto out_unlock; 152 + } 145 153 146 154 map_again: 147 155 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); ··· 169 139 if (attempt > 1) 170 140 if (ppc_md.hpte_remove(hpteg) < 0) { 171 141 r = -1; 172 - goto out; 142 + goto out_unlock; 173 143 } 174 144 175 145 ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, 176 - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M); 146 + hpsize, hpsize, MMU_SEGSIZE_256M); 177 147 178 148 if (ret < 0) { 179 149 /* If we couldn't map a primary PTE, try a secondary */ ··· 182 152 attempt++; 183 153 goto map_again; 184 154 } else { 185 - struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); 186 - 187 155 trace_kvm_book3s_64_mmu_map(rflags, hpteg, 188 156 vpn, hpaddr, orig_pte); 189 157 ··· 192 164 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 193 165 } 194 166 195 - pte->slot = hpteg + (ret & 7); 196 - pte->host_vpn = vpn; 197 - pte->pte = *orig_pte; 198 - pte->pfn = hpaddr >> PAGE_SHIFT; 167 + cpte->slot = hpteg + (ret & 7); 168 + cpte->host_vpn = vpn; 169 + cpte->pte = *orig_pte; 170 + cpte->pfn = pfn; 171 + cpte->pagesize = hpsize; 199 172 200 - kvmppc_mmu_hpte_cache_map(vcpu, pte); 173 + kvmppc_mmu_hpte_cache_map(vcpu, cpte); 174 + cpte = NULL; 201 175 } 202 - kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 176 + 177 + out_unlock: 178 + spin_unlock(&kvm->mmu_lock); 179 + kvm_release_pfn_clean(pfn); 180 + if (cpte) 181 + kvmppc_mmu_hpte_cache_free(cpte); 203 182 204 183 out: 205 184 return r; 185 + } 186 + 187 + void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 188 + { 189 + u64 mask = 0xfffffffffULL; 190 + u64 vsid; 191 + 192 + vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid); 193 + if (vsid & VSID_64K) 194 + mask = 0xffffffff0ULL; 195 + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask); 206 196 } 207 197 208 198 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) ··· 337 291 slb_vsid &= ~SLB_VSID_KP; 338 292 slb_esid |= slb_index; 339 293 294 + #ifdef CONFIG_PPC_64K_PAGES 295 + /* Set host segment base page size to 64K if possible */ 296 + if (gvsid & VSID_64K) 297 + slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp; 298 + #endif 299 + 340 300 svcpu->slb[slb_index].esid = slb_esid; 341 301 svcpu->slb[slb_index].vsid = slb_vsid; 342 302 ··· 378 326 svcpu_put(svcpu); 379 327 } 380 328 381 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 329 + void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) 382 330 { 383 331 kvmppc_mmu_hpte_destroy(vcpu); 384 332 __destroy_context(to_book3s(vcpu)->context_id[0]);

+10 -14

arch/powerpc/kvm/book3s_64_mmu_hv.c

··· 260 260 return 0; 261 261 } 262 262 263 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 264 - { 265 - } 266 - 267 263 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 268 264 { 269 265 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); ··· 447 451 } 448 452 449 453 static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 450 - struct kvmppc_pte *gpte, bool data) 454 + struct kvmppc_pte *gpte, bool data, bool iswrite) 451 455 { 452 456 struct kvm *kvm = vcpu->kvm; 453 457 struct kvmppc_slb *slbe; ··· 902 906 return 0; 903 907 } 904 908 905 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 909 + int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 906 910 { 907 911 if (kvm->arch.using_mmu_notifiers) 908 912 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 909 913 return 0; 910 914 } 911 915 912 - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 916 + int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 913 917 { 914 918 if (kvm->arch.using_mmu_notifiers) 915 919 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 916 920 return 0; 917 921 } 918 922 919 - void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 923 + void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 924 + struct kvm_memory_slot *memslot) 920 925 { 921 926 unsigned long *rmapp; 922 927 unsigned long gfn; ··· 991 994 return ret; 992 995 } 993 996 994 - int kvm_age_hva(struct kvm *kvm, unsigned long hva) 997 + int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) 995 998 { 996 999 if (!kvm->arch.using_mmu_notifiers) 997 1000 return 0; ··· 1029 1032 return ret; 1030 1033 } 1031 1034 1032 - int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1035 + int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 1033 1036 { 1034 1037 if (!kvm->arch.using_mmu_notifiers) 1035 1038 return 0; 1036 1039 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1037 1040 } 1038 1041 1039 - void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1042 + void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 1040 1043 { 1041 1044 if (!kvm->arch.using_mmu_notifiers) 1042 1045 return; ··· 1509 1512 1510 1513 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1511 1514 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1512 - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1513 - lpcr |= senc << (LPCR_VRMASD_SH - 4); 1514 - kvm->arch.lpcr = lpcr; 1515 + lpcr = senc << (LPCR_VRMASD_SH - 4); 1516 + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1515 1517 rma_setup = 1; 1516 1518 } 1517 1519 ++i;

+1

arch/powerpc/kvm/book3s_64_vio_hv.c

··· 74 74 /* Didn't find the liobn, punt it to userspace */ 75 75 return H_TOO_HARD; 76 76 } 77 + EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);

+7 -11

arch/powerpc/kvm/book3s_emulate.c

··· 86 86 return true; 87 87 } 88 88 89 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 90 - unsigned int inst, int *advance) 89 + int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 90 + unsigned int inst, int *advance) 91 91 { 92 92 int emulated = EMULATE_DONE; 93 93 int rt = get_rt(inst); ··· 172 172 vcpu->arch.mmu.tlbie(vcpu, addr, large); 173 173 break; 174 174 } 175 - #ifdef CONFIG_KVM_BOOK3S_64_PR 175 + #ifdef CONFIG_PPC_BOOK3S_64 176 176 case OP_31_XOP_FAKE_SC1: 177 177 { 178 178 /* SC 1 papr hypercalls */ ··· 267 267 268 268 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 269 269 if ((r == -ENOENT) || (r == -EPERM)) { 270 - struct kvmppc_book3s_shadow_vcpu *svcpu; 271 - 272 - svcpu = svcpu_get(vcpu); 273 270 *advance = 0; 274 271 vcpu->arch.shared->dar = vaddr; 275 - svcpu->fault_dar = vaddr; 272 + vcpu->arch.fault_dar = vaddr; 276 273 277 274 dsisr = DSISR_ISSTORE; 278 275 if (r == -ENOENT) ··· 278 281 dsisr |= DSISR_PROTFAULT; 279 282 280 283 vcpu->arch.shared->dsisr = dsisr; 281 - svcpu->fault_dsisr = dsisr; 282 - svcpu_put(svcpu); 284 + vcpu->arch.fault_dsisr = dsisr; 283 285 284 286 kvmppc_book3s_queue_irqprio(vcpu, 285 287 BOOK3S_INTERRUPT_DATA_STORAGE); ··· 345 349 return bat; 346 350 } 347 351 348 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 352 + int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 349 353 { 350 354 int emulated = EMULATE_DONE; 351 355 ··· 468 472 return emulated; 469 473 } 470 474 471 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 475 + int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 472 476 { 473 477 int emulated = EMULATE_DONE; 474 478

+3 -2

arch/powerpc/kvm/book3s_exports.c

··· 20 20 #include <linux/export.h> 21 21 #include <asm/kvm_book3s.h> 22 22 23 - #ifdef CONFIG_KVM_BOOK3S_64_HV 23 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 24 24 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); 25 - #else 25 + #endif 26 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 26 27 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); 27 28 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 28 29 #ifdef CONFIG_ALTIVEC

+311 -82

arch/powerpc/kvm/book3s_hv.c

··· 52 52 #include <linux/vmalloc.h> 53 53 #include <linux/highmem.h> 54 54 #include <linux/hugetlb.h> 55 + #include <linux/module.h> 56 + 57 + #include "book3s.h" 55 58 56 59 /* #define EXIT_DEBUG */ 57 60 /* #define EXIT_DEBUG_SIMPLE */ ··· 69 66 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 70 67 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 71 68 72 - void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 69 + static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 73 70 { 74 71 int me; 75 72 int cpu = vcpu->cpu; ··· 128 125 * purely defensive; they should never fail.) 129 126 */ 130 127 131 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 128 + static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 132 129 { 133 130 struct kvmppc_vcore *vc = vcpu->arch.vcore; 134 131 ··· 146 143 spin_unlock(&vcpu->arch.tbacct_lock); 147 144 } 148 145 149 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 146 + static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) 150 147 { 151 148 struct kvmppc_vcore *vc = vcpu->arch.vcore; 152 149 ··· 158 155 spin_unlock(&vcpu->arch.tbacct_lock); 159 156 } 160 157 161 - void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 158 + static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) 162 159 { 163 160 vcpu->arch.shregs.msr = msr; 164 161 kvmppc_end_cede(vcpu); 165 162 } 166 163 167 - void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 164 + void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 168 165 { 169 166 vcpu->arch.pvr = pvr; 167 + } 168 + 169 + int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 170 + { 171 + unsigned long pcr = 0; 172 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 173 + 174 + if (arch_compat) { 175 + if (!cpu_has_feature(CPU_FTR_ARCH_206)) 176 + return -EINVAL; /* 970 has no compat mode support */ 177 + 178 + switch (arch_compat) { 179 + case PVR_ARCH_205: 180 + pcr = PCR_ARCH_205; 181 + break; 182 + case PVR_ARCH_206: 183 + case PVR_ARCH_206p: 184 + break; 185 + default: 186 + return -EINVAL; 187 + } 188 + } 189 + 190 + spin_lock(&vc->lock); 191 + vc->arch_compat = arch_compat; 192 + vc->pcr = pcr; 193 + spin_unlock(&vc->lock); 194 + 195 + return 0; 170 196 } 171 197 172 198 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) ··· 227 195 pr_err(" ESID = %.16llx VSID = %.16llx\n", 228 196 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 229 197 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 230 - vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 198 + vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1, 231 199 vcpu->arch.last_inst); 232 200 } 233 201 ··· 521 489 memset(dt, 0, sizeof(struct dtl_entry)); 522 490 dt->dispatch_reason = 7; 523 491 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 524 - dt->timebase = now; 492 + dt->timebase = now + vc->tb_offset; 525 493 dt->enqueue_to_dispatch_time = stolen; 526 494 dt->srr0 = kvmppc_get_pc(vcpu); 527 495 dt->srr1 = vcpu->arch.shregs.msr; ··· 570 538 } 571 539 break; 572 540 case H_CONFER: 541 + target = kvmppc_get_gpr(vcpu, 4); 542 + if (target == -1) 543 + break; 544 + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 545 + if (!tvcpu) { 546 + ret = H_PARAMETER; 547 + break; 548 + } 549 + kvm_vcpu_yield_to(tvcpu); 573 550 break; 574 551 case H_REGISTER_VPA: 575 552 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), ··· 617 576 return RESUME_GUEST; 618 577 } 619 578 620 - static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 621 - struct task_struct *tsk) 579 + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 580 + struct task_struct *tsk) 622 581 { 623 582 int r = RESUME_HOST; 624 583 ··· 712 671 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 713 672 vcpu->arch.trap, kvmppc_get_pc(vcpu), 714 673 vcpu->arch.shregs.msr); 674 + run->hw.hardware_exit_reason = vcpu->arch.trap; 715 675 r = RESUME_HOST; 716 - BUG(); 717 676 break; 718 677 } 719 678 720 679 return r; 721 680 } 722 681 723 - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 724 - struct kvm_sregs *sregs) 682 + static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, 683 + struct kvm_sregs *sregs) 725 684 { 726 685 int i; 727 686 ··· 735 694 return 0; 736 695 } 737 696 738 - int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 739 - struct kvm_sregs *sregs) 697 + static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, 698 + struct kvm_sregs *sregs) 740 699 { 741 700 int i, j; 742 701 743 - kvmppc_set_pvr(vcpu, sregs->pvr); 702 + kvmppc_set_pvr_hv(vcpu, sregs->pvr); 744 703 745 704 j = 0; 746 705 for (i = 0; i < vcpu->arch.slb_nr; i++) { ··· 755 714 return 0; 756 715 } 757 716 758 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 717 + static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) 718 + { 719 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 720 + u64 mask; 721 + 722 + spin_lock(&vc->lock); 723 + /* 724 + * Userspace can only modify DPFD (default prefetch depth), 725 + * ILE (interrupt little-endian) and TC (translation control). 726 + */ 727 + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; 728 + vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); 729 + spin_unlock(&vc->lock); 730 + } 731 + 732 + static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 733 + union kvmppc_one_reg *val) 759 734 { 760 735 int r = 0; 761 736 long int i; ··· 805 748 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 806 749 i = id - KVM_REG_PPC_PMC1; 807 750 *val = get_reg_val(id, vcpu->arch.pmc[i]); 751 + break; 752 + case KVM_REG_PPC_SIAR: 753 + *val = get_reg_val(id, vcpu->arch.siar); 754 + break; 755 + case KVM_REG_PPC_SDAR: 756 + *val = get_reg_val(id, vcpu->arch.sdar); 808 757 break; 809 758 #ifdef CONFIG_VSX 810 759 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: ··· 850 787 val->vpaval.length = vcpu->arch.dtl.len; 851 788 spin_unlock(&vcpu->arch.vpa_update_lock); 852 789 break; 790 + case KVM_REG_PPC_TB_OFFSET: 791 + *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); 792 + break; 793 + case KVM_REG_PPC_LPCR: 794 + *val = get_reg_val(id, vcpu->arch.vcore->lpcr); 795 + break; 796 + case KVM_REG_PPC_PPR: 797 + *val = get_reg_val(id, vcpu->arch.ppr); 798 + break; 799 + case KVM_REG_PPC_ARCH_COMPAT: 800 + *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 801 + break; 853 802 default: 854 803 r = -EINVAL; 855 804 break; ··· 870 795 return r; 871 796 } 872 797 873 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 798 + static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 799 + union kvmppc_one_reg *val) 874 800 { 875 801 int r = 0; 876 802 long int i; ··· 908 832 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 909 833 i = id - KVM_REG_PPC_PMC1; 910 834 vcpu->arch.pmc[i] = set_reg_val(id, *val); 835 + break; 836 + case KVM_REG_PPC_SIAR: 837 + vcpu->arch.siar = set_reg_val(id, *val); 838 + break; 839 + case KVM_REG_PPC_SDAR: 840 + vcpu->arch.sdar = set_reg_val(id, *val); 911 841 break; 912 842 #ifdef CONFIG_VSX 913 843 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: ··· 962 880 len -= len % sizeof(struct dtl_entry); 963 881 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 964 882 break; 883 + case KVM_REG_PPC_TB_OFFSET: 884 + /* round up to multiple of 2^24 */ 885 + vcpu->arch.vcore->tb_offset = 886 + ALIGN(set_reg_val(id, *val), 1UL << 24); 887 + break; 888 + case KVM_REG_PPC_LPCR: 889 + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); 890 + break; 891 + case KVM_REG_PPC_PPR: 892 + vcpu->arch.ppr = set_reg_val(id, *val); 893 + break; 894 + case KVM_REG_PPC_ARCH_COMPAT: 895 + r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 896 + break; 965 897 default: 966 898 r = -EINVAL; 967 899 break; ··· 984 888 return r; 985 889 } 986 890 987 - int kvmppc_core_check_processor_compat(void) 988 - { 989 - if (cpu_has_feature(CPU_FTR_HVMODE)) 990 - return 0; 991 - return -EIO; 992 - } 993 - 994 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 891 + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, 892 + unsigned int id) 995 893 { 996 894 struct kvm_vcpu *vcpu; 997 895 int err = -EINVAL; ··· 1009 919 vcpu->arch.mmcr[0] = MMCR0_FC; 1010 920 vcpu->arch.ctrl = CTRL_RUNLATCH; 1011 921 /* default to host PVR, since we can't spoof it */ 1012 - vcpu->arch.pvr = mfspr(SPRN_PVR); 1013 - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 922 + kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); 1014 923 spin_lock_init(&vcpu->arch.vpa_update_lock); 1015 924 spin_lock_init(&vcpu->arch.tbacct_lock); 1016 925 vcpu->arch.busy_preempt = TB_NIL; ··· 1029 940 spin_lock_init(&vcore->lock); 1030 941 init_waitqueue_head(&vcore->wq); 1031 942 vcore->preempt_tb = TB_NIL; 943 + vcore->lpcr = kvm->arch.lpcr; 1032 944 } 1033 945 kvm->arch.vcores[core] = vcore; 1034 946 kvm->arch.online_vcores++; ··· 1062 972 vpa->dirty); 1063 973 } 1064 974 1065 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 975 + static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) 1066 976 { 1067 977 spin_lock(&vcpu->arch.vpa_update_lock); 1068 978 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); ··· 1071 981 spin_unlock(&vcpu->arch.vpa_update_lock); 1072 982 kvm_vcpu_uninit(vcpu); 1073 983 kmem_cache_free(kvm_vcpu_cache, vcpu); 984 + } 985 + 986 + static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) 987 + { 988 + /* Indicate we want to get back into the guest */ 989 + return 1; 1074 990 } 1075 991 1076 992 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) ··· 1360 1264 1361 1265 ret = RESUME_GUEST; 1362 1266 if (vcpu->arch.trap) 1363 - ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 1364 - vcpu->arch.run_task); 1267 + ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, 1268 + vcpu->arch.run_task); 1365 1269 1366 1270 vcpu->arch.ret = ret; 1367 1271 vcpu->arch.trap = 0; ··· 1520 1424 return vcpu->arch.ret; 1521 1425 } 1522 1426 1523 - int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1427 + static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) 1524 1428 { 1525 1429 int r; 1526 1430 int srcu_idx; ··· 1642 1546 .release = kvm_rma_release, 1643 1547 }; 1644 1548 1645 - long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1549 + static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, 1550 + struct kvm_allocate_rma *ret) 1646 1551 { 1647 1552 long fd; 1648 1553 struct kvm_rma_info *ri; ··· 1689 1592 (*sps)++; 1690 1593 } 1691 1594 1692 - int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1595 + static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, 1596 + struct kvm_ppc_smmu_info *info) 1693 1597 { 1694 1598 struct kvm_ppc_one_seg_page_size *sps; 1695 1599 ··· 1711 1613 /* 1712 1614 * Get (and clear) the dirty memory log for a memory slot. 1713 1615 */ 1714 - int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1616 + static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, 1617 + struct kvm_dirty_log *log) 1715 1618 { 1716 1619 struct kvm_memory_slot *memslot; 1717 1620 int r; ··· 1766 1667 } 1767 1668 } 1768 1669 1769 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1770 - struct kvm_memory_slot *dont) 1670 + static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, 1671 + struct kvm_memory_slot *dont) 1771 1672 { 1772 1673 if (!dont || free->arch.rmap != dont->arch.rmap) { 1773 1674 vfree(free->arch.rmap); ··· 1780 1681 } 1781 1682 } 1782 1683 1783 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1784 - unsigned long npages) 1684 + static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 1685 + unsigned long npages) 1785 1686 { 1786 1687 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 1787 1688 if (!slot->arch.rmap) ··· 1791 1692 return 0; 1792 1693 } 1793 1694 1794 - int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1795 - struct kvm_memory_slot *memslot, 1796 - struct kvm_userspace_memory_region *mem) 1695 + static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, 1696 + struct kvm_memory_slot *memslot, 1697 + struct kvm_userspace_memory_region *mem) 1797 1698 { 1798 1699 unsigned long *phys; 1799 1700 ··· 1809 1710 return 0; 1810 1711 } 1811 1712 1812 - void kvmppc_core_commit_memory_region(struct kvm *kvm, 1813 - struct kvm_userspace_memory_region *mem, 1814 - const struct kvm_memory_slot *old) 1713 + static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, 1714 + struct kvm_userspace_memory_region *mem, 1715 + const struct kvm_memory_slot *old) 1815 1716 { 1816 1717 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1817 1718 struct kvm_memory_slot *memslot; ··· 1828 1729 } 1829 1730 } 1830 1731 1732 + /* 1733 + * Update LPCR values in kvm->arch and in vcores. 1734 + * Caller must hold kvm->lock. 1735 + */ 1736 + void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) 1737 + { 1738 + long int i; 1739 + u32 cores_done = 0; 1740 + 1741 + if ((kvm->arch.lpcr & mask) == lpcr) 1742 + return; 1743 + 1744 + kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr; 1745 + 1746 + for (i = 0; i < KVM_MAX_VCORES; ++i) { 1747 + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; 1748 + if (!vc) 1749 + continue; 1750 + spin_lock(&vc->lock); 1751 + vc->lpcr = (vc->lpcr & ~mask) | lpcr; 1752 + spin_unlock(&vc->lock); 1753 + if (++cores_done >= kvm->arch.online_vcores) 1754 + break; 1755 + } 1756 + } 1757 + 1758 + static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) 1759 + { 1760 + return; 1761 + } 1762 + 1831 1763 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1832 1764 { 1833 1765 int err = 0; ··· 1867 1737 unsigned long hva; 1868 1738 struct kvm_memory_slot *memslot; 1869 1739 struct vm_area_struct *vma; 1870 - unsigned long lpcr, senc; 1740 + unsigned long lpcr = 0, senc; 1741 + unsigned long lpcr_mask = 0; 1871 1742 unsigned long psize, porder; 1872 1743 unsigned long rma_size; 1873 1744 unsigned long rmls; ··· 1933 1802 senc = slb_pgsize_encoding(psize); 1934 1803 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1935 1804 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1936 - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1937 - lpcr |= senc << (LPCR_VRMASD_SH - 4); 1938 - kvm->arch.lpcr = lpcr; 1805 + lpcr_mask = LPCR_VRMASD; 1806 + /* the -4 is to account for senc values starting at 0x10 */ 1807 + lpcr = senc << (LPCR_VRMASD_SH - 4); 1939 1808 1940 1809 /* Create HPTEs in the hash page table for the VRMA */ 1941 1810 kvmppc_map_vrma(vcpu, memslot, porder); ··· 1956 1825 kvm->arch.rma = ri; 1957 1826 1958 1827 /* Update LPCR and RMOR */ 1959 - lpcr = kvm->arch.lpcr; 1960 1828 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1961 1829 /* PPC970; insert RMLS value (split field) in HID4 */ 1962 - lpcr &= ~((1ul << HID4_RMLS0_SH) | 1963 - (3ul << HID4_RMLS2_SH)); 1964 - lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1830 + lpcr_mask = (1ul << HID4_RMLS0_SH) | 1831 + (3ul << HID4_RMLS2_SH) | HID4_RMOR; 1832 + lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | 1965 1833 ((rmls & 3) << HID4_RMLS2_SH); 1966 1834 /* RMOR is also in HID4 */ 1967 1835 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1968 1836 << HID4_RMOR_SH; 1969 1837 } else { 1970 1838 /* POWER7 */ 1971 - lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1972 - lpcr |= rmls << LPCR_RMLS_SH; 1839 + lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; 1840 + lpcr = rmls << LPCR_RMLS_SH; 1973 1841 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; 1974 1842 } 1975 - kvm->arch.lpcr = lpcr; 1976 1843 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1977 1844 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1978 1845 ··· 1989 1860 } 1990 1861 } 1991 1862 1863 + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); 1864 + 1992 1865 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1993 1866 smp_wmb(); 1994 1867 kvm->arch.rma_setup_done = 1; ··· 2006 1875 goto out_srcu; 2007 1876 } 2008 1877 2009 - int kvmppc_core_init_vm(struct kvm *kvm) 1878 + static int kvmppc_core_init_vm_hv(struct kvm *kvm) 2010 1879 { 2011 1880 unsigned long lpcr, lpid; 2012 1881 ··· 2023 1892 * make sure we flush on each core before running the new VM. 2024 1893 */ 2025 1894 cpumask_setall(&kvm->arch.need_tlb_flush); 2026 - 2027 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 2028 - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 2029 1895 2030 1896 kvm->arch.rma = NULL; 2031 1897 ··· 2059 1931 return 0; 2060 1932 } 2061 1933 2062 - void kvmppc_core_destroy_vm(struct kvm *kvm) 1934 + static void kvmppc_free_vcores(struct kvm *kvm) 1935 + { 1936 + long int i; 1937 + 1938 + for (i = 0; i < KVM_MAX_VCORES; ++i) 1939 + kfree(kvm->arch.vcores[i]); 1940 + kvm->arch.online_vcores = 0; 1941 + } 1942 + 1943 + static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) 2063 1944 { 2064 1945 uninhibit_secondary_onlining(); 2065 1946 1947 + kvmppc_free_vcores(kvm); 2066 1948 if (kvm->arch.rma) { 2067 1949 kvm_release_rma(kvm->arch.rma); 2068 1950 kvm->arch.rma = NULL; 2069 1951 } 2070 1952 2071 - kvmppc_rtas_tokens_free(kvm); 2072 - 2073 1953 kvmppc_free_hpt(kvm); 2074 - WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 2075 - } 2076 - 2077 - /* These are stubs for now */ 2078 - void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 2079 - { 2080 1954 } 2081 1955 2082 1956 /* We don't need to emulate any privileged instructions or dcbz */ 2083 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 2084 - unsigned int inst, int *advance) 1957 + static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 1958 + unsigned int inst, int *advance) 2085 1959 { 2086 1960 return EMULATE_FAIL; 2087 1961 } 2088 1962 2089 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 1963 + static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn, 1964 + ulong spr_val) 2090 1965 { 2091 1966 return EMULATE_FAIL; 2092 1967 } 2093 1968 2094 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 1969 + static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, 1970 + ulong *spr_val) 2095 1971 { 2096 1972 return EMULATE_FAIL; 2097 1973 } 2098 1974 2099 - static int kvmppc_book3s_hv_init(void) 1975 + static int kvmppc_core_check_processor_compat_hv(void) 2100 1976 { 2101 - int r; 1977 + if (!cpu_has_feature(CPU_FTR_HVMODE)) 1978 + return -EIO; 1979 + return 0; 1980 + } 2102 1981 2103 - r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1982 + static long kvm_arch_vm_ioctl_hv(struct file *filp, 1983 + unsigned int ioctl, unsigned long arg) 1984 + { 1985 + struct kvm *kvm __maybe_unused = filp->private_data; 1986 + void __user *argp = (void __user *)arg; 1987 + long r; 2104 1988 2105 - if (r) 2106 - return r; 1989 + switch (ioctl) { 2107 1990 2108 - r = kvmppc_mmu_hv_init(); 1991 + case KVM_ALLOCATE_RMA: { 1992 + struct kvm_allocate_rma rma; 1993 + struct kvm *kvm = filp->private_data; 1994 + 1995 + r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 1996 + if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 1997 + r = -EFAULT; 1998 + break; 1999 + } 2000 + 2001 + case KVM_PPC_ALLOCATE_HTAB: { 2002 + u32 htab_order; 2003 + 2004 + r = -EFAULT; 2005 + if (get_user(htab_order, (u32 __user *)argp)) 2006 + break; 2007 + r = kvmppc_alloc_reset_hpt(kvm, &htab_order); 2008 + if (r) 2009 + break; 2010 + r = -EFAULT; 2011 + if (put_user(htab_order, (u32 __user *)argp)) 2012 + break; 2013 + r = 0; 2014 + break; 2015 + } 2016 + 2017 + case KVM_PPC_GET_HTAB_FD: { 2018 + struct kvm_get_htab_fd ghf; 2019 + 2020 + r = -EFAULT; 2021 + if (copy_from_user(&ghf, argp, sizeof(ghf))) 2022 + break; 2023 + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); 2024 + break; 2025 + } 2026 + 2027 + default: 2028 + r = -ENOTTY; 2029 + } 2109 2030 2110 2031 return r; 2111 2032 } 2112 2033 2113 - static void kvmppc_book3s_hv_exit(void) 2034 + static struct kvmppc_ops kvm_ops_hv = { 2035 + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 2036 + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, 2037 + .get_one_reg = kvmppc_get_one_reg_hv, 2038 + .set_one_reg = kvmppc_set_one_reg_hv, 2039 + .vcpu_load = kvmppc_core_vcpu_load_hv, 2040 + .vcpu_put = kvmppc_core_vcpu_put_hv, 2041 + .set_msr = kvmppc_set_msr_hv, 2042 + .vcpu_run = kvmppc_vcpu_run_hv, 2043 + .vcpu_create = kvmppc_core_vcpu_create_hv, 2044 + .vcpu_free = kvmppc_core_vcpu_free_hv, 2045 + .check_requests = kvmppc_core_check_requests_hv, 2046 + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv, 2047 + .flush_memslot = kvmppc_core_flush_memslot_hv, 2048 + .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, 2049 + .commit_memory_region = kvmppc_core_commit_memory_region_hv, 2050 + .unmap_hva = kvm_unmap_hva_hv, 2051 + .unmap_hva_range = kvm_unmap_hva_range_hv, 2052 + .age_hva = kvm_age_hva_hv, 2053 + .test_age_hva = kvm_test_age_hva_hv, 2054 + .set_spte_hva = kvm_set_spte_hva_hv, 2055 + .mmu_destroy = kvmppc_mmu_destroy_hv, 2056 + .free_memslot = kvmppc_core_free_memslot_hv, 2057 + .create_memslot = kvmppc_core_create_memslot_hv, 2058 + .init_vm = kvmppc_core_init_vm_hv, 2059 + .destroy_vm = kvmppc_core_destroy_vm_hv, 2060 + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, 2061 + .emulate_op = kvmppc_core_emulate_op_hv, 2062 + .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, 2063 + .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, 2064 + .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, 2065 + .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, 2066 + }; 2067 + 2068 + static int kvmppc_book3s_init_hv(void) 2114 2069 { 2115 - kvm_exit(); 2070 + int r; 2071 + /* 2072 + * FIXME!! Do we need to check on all cpus ? 2073 + */ 2074 + r = kvmppc_core_check_processor_compat_hv(); 2075 + if (r < 0) 2076 + return r; 2077 + 2078 + kvm_ops_hv.owner = THIS_MODULE; 2079 + kvmppc_hv_ops = &kvm_ops_hv; 2080 + 2081 + r = kvmppc_mmu_hv_init(); 2082 + return r; 2116 2083 } 2117 2084 2118 - module_init(kvmppc_book3s_hv_init); 2119 - module_exit(kvmppc_book3s_hv_exit); 2085 + static void kvmppc_book3s_exit_hv(void) 2086 + { 2087 + kvmppc_hv_ops = NULL; 2088 + } 2089 + 2090 + module_init(kvmppc_book3s_init_hv); 2091 + module_exit(kvmppc_book3s_exit_hv); 2092 + MODULE_LICENSE("GPL");

-3

arch/powerpc/kvm/book3s_hv_interrupts.S

··· 158 158 * Interrupts are enabled again at this point. 159 159 */ 160 160 161 - .global kvmppc_handler_highmem 162 - kvmppc_handler_highmem: 163 - 164 161 /* 165 162 * Register usage at this point: 166 163 *

+356 -262

arch/powerpc/kvm/book3s_hv_rmhandlers.S

··· 33 33 #error Need to fix lppaca and SLB shadow accesses in little endian mode 34 34 #endif 35 35 36 - /***************************************************************************** 37 - * * 38 - * Real Mode handlers that need to be in the linear mapping * 39 - * * 40 - ****************************************************************************/ 41 - 42 - .globl kvmppc_skip_interrupt 43 - kvmppc_skip_interrupt: 44 - mfspr r13,SPRN_SRR0 45 - addi r13,r13,4 46 - mtspr SPRN_SRR0,r13 47 - GET_SCRATCH0(r13) 48 - rfid 49 - b . 50 - 51 - .globl kvmppc_skip_Hinterrupt 52 - kvmppc_skip_Hinterrupt: 53 - mfspr r13,SPRN_HSRR0 54 - addi r13,r13,4 55 - mtspr SPRN_HSRR0,r13 56 - GET_SCRATCH0(r13) 57 - hrfid 58 - b . 59 - 60 36 /* 61 37 * Call kvmppc_hv_entry in real mode. 62 38 * Must be called with interrupts hard-disabled. ··· 42 66 * LR = return address to continue at after eventually re-enabling MMU 43 67 */ 44 68 _GLOBAL(kvmppc_hv_entry_trampoline) 69 + mflr r0 70 + std r0, PPC_LR_STKOFF(r1) 71 + stdu r1, -112(r1) 45 72 mfmsr r10 46 - LOAD_REG_ADDR(r5, kvmppc_hv_entry) 73 + LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) 47 74 li r0,MSR_RI 48 75 andc r0,r10,r0 49 76 li r6,MSR_IR | MSR_DR ··· 56 77 mtsrr1 r6 57 78 RFI 58 79 59 - /****************************************************************************** 60 - * * 61 - * Entry code * 62 - * * 63 - *****************************************************************************/ 80 + kvmppc_call_hv_entry: 81 + bl kvmppc_hv_entry 82 + 83 + /* Back from guest - restore host state and return to caller */ 84 + 85 + /* Restore host DABR and DABRX */ 86 + ld r5,HSTATE_DABR(r13) 87 + li r6,7 88 + mtspr SPRN_DABR,r5 89 + mtspr SPRN_DABRX,r6 90 + 91 + /* Restore SPRG3 */ 92 + ld r3,PACA_SPRG3(r13) 93 + mtspr SPRN_SPRG3,r3 94 + 95 + /* 96 + * Reload DEC. HDEC interrupts were disabled when 97 + * we reloaded the host's LPCR value. 98 + */ 99 + ld r3, HSTATE_DECEXP(r13) 100 + mftb r4 101 + subf r4, r4, r3 102 + mtspr SPRN_DEC, r4 103 + 104 + /* Reload the host's PMU registers */ 105 + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 106 + lbz r4, LPPACA_PMCINUSE(r3) 107 + cmpwi r4, 0 108 + beq 23f /* skip if not */ 109 + lwz r3, HSTATE_PMC(r13) 110 + lwz r4, HSTATE_PMC + 4(r13) 111 + lwz r5, HSTATE_PMC + 8(r13) 112 + lwz r6, HSTATE_PMC + 12(r13) 113 + lwz r8, HSTATE_PMC + 16(r13) 114 + lwz r9, HSTATE_PMC + 20(r13) 115 + BEGIN_FTR_SECTION 116 + lwz r10, HSTATE_PMC + 24(r13) 117 + lwz r11, HSTATE_PMC + 28(r13) 118 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 119 + mtspr SPRN_PMC1, r3 120 + mtspr SPRN_PMC2, r4 121 + mtspr SPRN_PMC3, r5 122 + mtspr SPRN_PMC4, r6 123 + mtspr SPRN_PMC5, r8 124 + mtspr SPRN_PMC6, r9 125 + BEGIN_FTR_SECTION 126 + mtspr SPRN_PMC7, r10 127 + mtspr SPRN_PMC8, r11 128 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 129 + ld r3, HSTATE_MMCR(r13) 130 + ld r4, HSTATE_MMCR + 8(r13) 131 + ld r5, HSTATE_MMCR + 16(r13) 132 + mtspr SPRN_MMCR1, r4 133 + mtspr SPRN_MMCRA, r5 134 + mtspr SPRN_MMCR0, r3 135 + isync 136 + 23: 137 + 138 + /* 139 + * For external and machine check interrupts, we need 140 + * to call the Linux handler to process the interrupt. 141 + * We do that by jumping to absolute address 0x500 for 142 + * external interrupts, or the machine_check_fwnmi label 143 + * for machine checks (since firmware might have patched 144 + * the vector area at 0x200). The [h]rfid at the end of the 145 + * handler will return to the book3s_hv_interrupts.S code. 146 + * For other interrupts we do the rfid to get back 147 + * to the book3s_hv_interrupts.S code here. 148 + */ 149 + ld r8, 112+PPC_LR_STKOFF(r1) 150 + addi r1, r1, 112 151 + ld r7, HSTATE_HOST_MSR(r13) 152 + 153 + cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 154 + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 155 + BEGIN_FTR_SECTION 156 + beq 11f 157 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 158 + 159 + /* RFI into the highmem handler, or branch to interrupt handler */ 160 + mfmsr r6 161 + li r0, MSR_RI 162 + andc r6, r6, r0 163 + mtmsrd r6, 1 /* Clear RI in MSR */ 164 + mtsrr0 r8 165 + mtsrr1 r7 166 + beqa 0x500 /* external interrupt (PPC970) */ 167 + beq cr1, 13f /* machine check */ 168 + RFI 169 + 170 + /* On POWER7, we have external interrupts set to use HSRR0/1 */ 171 + 11: mtspr SPRN_HSRR0, r8 172 + mtspr SPRN_HSRR1, r7 173 + ba 0x500 174 + 175 + 13: b machine_check_fwnmi 176 + 64 177 65 178 /* 66 179 * We come in here when wakened from nap mode on a secondary hw thread. ··· 208 137 cmpdi r4,0 209 138 /* if we have no vcpu to run, go back to sleep */ 210 139 beq kvm_no_guest 211 - b kvmppc_hv_entry 140 + b 30f 212 141 213 142 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ 214 143 b kvm_no_guest ··· 217 146 29: /* External non-IPI interrupt to offline secondary thread? help?? */ 218 147 stw r8,HSTATE_SAVED_XIRR(r13) 219 148 b kvm_no_guest 149 + 150 + 30: bl kvmppc_hv_entry 151 + 152 + /* Back from the guest, go back to nap */ 153 + /* Clear our vcpu pointer so we don't come back in early */ 154 + li r0, 0 155 + std r0, HSTATE_KVM_VCPU(r13) 156 + lwsync 157 + /* Clear any pending IPI - we're an offline thread */ 158 + ld r5, HSTATE_XICS_PHYS(r13) 159 + li r7, XICS_XIRR 160 + lwzcix r3, r5, r7 /* ack any pending interrupt */ 161 + rlwinm. r0, r3, 0, 0xffffff /* any pending? */ 162 + beq 37f 163 + sync 164 + li r0, 0xff 165 + li r6, XICS_MFRR 166 + stbcix r0, r5, r6 /* clear the IPI */ 167 + stwcix r3, r5, r7 /* EOI it */ 168 + 37: sync 169 + 170 + /* increment the nap count and then go to nap mode */ 171 + ld r4, HSTATE_KVM_VCORE(r13) 172 + addi r4, r4, VCORE_NAP_COUNT 173 + lwsync /* make previous updates visible */ 174 + 51: lwarx r3, 0, r4 175 + addi r3, r3, 1 176 + stwcx. r3, 0, r4 177 + bne 51b 178 + 179 + kvm_no_guest: 180 + li r0, KVM_HWTHREAD_IN_NAP 181 + stb r0, HSTATE_HWTHREAD_STATE(r13) 182 + li r3, LPCR_PECE0 183 + mfspr r4, SPRN_LPCR 184 + rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 185 + mtspr SPRN_LPCR, r4 186 + isync 187 + std r0, HSTATE_SCRATCH0(r13) 188 + ptesync 189 + ld r0, HSTATE_SCRATCH0(r13) 190 + 1: cmpd r0, r0 191 + bne 1b 192 + nap 193 + b . 194 + 195 + /****************************************************************************** 196 + * * 197 + * Entry code * 198 + * * 199 + *****************************************************************************/ 220 200 221 201 .global kvmppc_hv_entry 222 202 kvmppc_hv_entry: ··· 281 159 * all other volatile GPRS = free 282 160 */ 283 161 mflr r0 284 - std r0, HSTATE_VMHANDLER(r13) 162 + std r0, PPC_LR_STKOFF(r1) 163 + stdu r1, -112(r1) 285 164 286 165 /* Set partition DABR */ 287 166 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ ··· 323 200 ld r3, VCPU_MMCR(r4) 324 201 ld r5, VCPU_MMCR + 8(r4) 325 202 ld r6, VCPU_MMCR + 16(r4) 203 + ld r7, VCPU_SIAR(r4) 204 + ld r8, VCPU_SDAR(r4) 326 205 mtspr SPRN_MMCR1, r5 327 206 mtspr SPRN_MMCRA, r6 207 + mtspr SPRN_SIAR, r7 208 + mtspr SPRN_SDAR, r8 328 209 mtspr SPRN_MMCR0, r3 329 210 isync 330 211 ··· 381 254 /* Save R1 in the PACA */ 382 255 std r1, HSTATE_HOST_R1(r13) 383 256 384 - /* Increment yield count if they have a VPA */ 385 - ld r3, VCPU_VPA(r4) 386 - cmpdi r3, 0 387 - beq 25f 388 - lwz r5, LPPACA_YIELDCOUNT(r3) 389 - addi r5, r5, 1 390 - stw r5, LPPACA_YIELDCOUNT(r3) 391 - li r6, 1 392 - stb r6, VCPU_VPA_DIRTY(r4) 393 - 25: 394 257 /* Load up DAR and DSISR */ 395 258 ld r5, VCPU_DAR(r4) 396 259 lwz r6, VCPU_DSISR(r4) 397 260 mtspr SPRN_DAR, r5 398 261 mtspr SPRN_DSISR, r6 262 + 263 + li r6, KVM_GUEST_MODE_HOST_HV 264 + stb r6, HSTATE_IN_GUEST(r13) 399 265 400 266 BEGIN_FTR_SECTION 401 267 /* Restore AMR and UAMOR, set AMOR to all 1s */ ··· 463 343 bdnz 28b 464 344 ptesync 465 345 466 - 22: li r0,1 346 + /* Add timebase offset onto timebase */ 347 + 22: ld r8,VCORE_TB_OFFSET(r5) 348 + cmpdi r8,0 349 + beq 37f 350 + mftb r6 /* current host timebase */ 351 + add r8,r8,r6 352 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 353 + mftb r7 /* check if lower 24 bits overflowed */ 354 + clrldi r6,r6,40 355 + clrldi r7,r7,40 356 + cmpld r7,r6 357 + bge 37f 358 + addis r8,r8,0x100 /* if so, increment upper 40 bits */ 359 + mtspr SPRN_TBU40,r8 360 + 361 + /* Load guest PCR value to select appropriate compat mode */ 362 + 37: ld r7, VCORE_PCR(r5) 363 + cmpdi r7, 0 364 + beq 38f 365 + mtspr SPRN_PCR, r7 366 + 38: 367 + li r0,1 467 368 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 468 369 b 10f 469 370 ··· 494 353 beq 20b 495 354 496 355 /* Set LPCR and RMOR. */ 497 - 10: ld r8,KVM_LPCR(r9) 356 + 10: ld r8,VCORE_LPCR(r5) 498 357 mtspr SPRN_LPCR,r8 499 358 ld r8,KVM_RMOR(r9) 500 359 mtspr SPRN_RMOR,r8 501 360 isync 502 361 362 + /* Increment yield count if they have a VPA */ 363 + ld r3, VCPU_VPA(r4) 364 + cmpdi r3, 0 365 + beq 25f 366 + lwz r5, LPPACA_YIELDCOUNT(r3) 367 + addi r5, r5, 1 368 + stw r5, LPPACA_YIELDCOUNT(r3) 369 + li r6, 1 370 + stb r6, VCPU_VPA_DIRTY(r4) 371 + 25: 503 372 /* Check if HDEC expires soon */ 504 373 mfspr r3,SPRN_HDEC 505 374 cmpwi r3,10 ··· 556 405 bne 24b 557 406 isync 558 407 559 - ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ 408 + ld r5,HSTATE_KVM_VCORE(r13) 409 + ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ 560 410 li r0,0x18f 561 411 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ 562 412 or r0,r7,r0 ··· 693 541 mtspr SPRN_HSRR1,r11 694 542 695 543 /* Activate guest mode, so faults get handled by KVM */ 696 - li r9, KVM_GUEST_MODE_GUEST 544 + li r9, KVM_GUEST_MODE_GUEST_HV 697 545 stb r9, HSTATE_IN_GUEST(r13) 698 546 699 547 /* Enter guest */ ··· 702 550 ld r5, VCPU_CFAR(r4) 703 551 mtspr SPRN_CFAR, r5 704 552 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 553 + BEGIN_FTR_SECTION 554 + ld r0, VCPU_PPR(r4) 555 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 705 556 706 557 ld r5, VCPU_LR(r4) 707 558 lwz r6, VCPU_CR(r4) 708 559 mtlr r5 709 560 mtcr r6 710 561 711 - ld r0, VCPU_GPR(R0)(r4) 712 562 ld r1, VCPU_GPR(R1)(r4) 713 563 ld r2, VCPU_GPR(R2)(r4) 714 564 ld r3, VCPU_GPR(R3)(r4) ··· 724 570 ld r12, VCPU_GPR(R12)(r4) 725 571 ld r13, VCPU_GPR(R13)(r4) 726 572 573 + BEGIN_FTR_SECTION 574 + mtspr SPRN_PPR, r0 575 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 576 + ld r0, VCPU_GPR(R0)(r4) 727 577 ld r4, VCPU_GPR(R4)(r4) 728 578 729 579 hrfid ··· 742 584 /* 743 585 * We come here from the first-level interrupt handlers. 744 586 */ 745 - .globl kvmppc_interrupt 746 - kvmppc_interrupt: 587 + .globl kvmppc_interrupt_hv 588 + kvmppc_interrupt_hv: 747 589 /* 748 590 * Register contents: 749 591 * R12 = interrupt vector ··· 753 595 */ 754 596 /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ 755 597 std r9, HSTATE_HOST_R2(r13) 598 + 599 + lbz r9, HSTATE_IN_GUEST(r13) 600 + cmpwi r9, KVM_GUEST_MODE_HOST_HV 601 + beq kvmppc_bad_host_intr 602 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 603 + cmpwi r9, KVM_GUEST_MODE_GUEST 604 + ld r9, HSTATE_HOST_R2(r13) 605 + beq kvmppc_interrupt_pr 606 + #endif 607 + /* We're now back in the host but in guest MMU context */ 608 + li r9, KVM_GUEST_MODE_HOST_HV 609 + stb r9, HSTATE_IN_GUEST(r13) 610 + 756 611 ld r9, HSTATE_KVM_VCPU(r13) 757 612 758 613 /* Save registers */ ··· 791 620 ld r3, HSTATE_CFAR(r13) 792 621 std r3, VCPU_CFAR(r9) 793 622 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 623 + BEGIN_FTR_SECTION 624 + ld r4, HSTATE_PPR(r13) 625 + std r4, VCPU_PPR(r9) 626 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 794 627 795 628 /* Restore R1/R2 so we can handle faults */ 796 629 ld r1, HSTATE_HOST_R1(r13) ··· 816 641 mflr r4 817 642 std r3, VCPU_GPR(R13)(r9) 818 643 std r4, VCPU_LR(r9) 819 - 820 - /* Unset guest mode */ 821 - li r0, KVM_GUEST_MODE_NONE 822 - stb r0, HSTATE_IN_GUEST(r13) 823 644 824 645 stw r12,VCPU_TRAP(r9) 825 646 ··· 867 696 * set, we know the host wants us out so let's do it now 868 697 */ 869 698 do_ext_interrupt: 870 - lbz r0, HSTATE_HOST_IPI(r13) 871 - cmpwi r0, 0 872 - bne ext_interrupt_to_host 873 - 874 - /* Now read the interrupt from the ICP */ 875 - ld r5, HSTATE_XICS_PHYS(r13) 876 - li r7, XICS_XIRR 877 - cmpdi r5, 0 878 - beq- ext_interrupt_to_host 879 - lwzcix r3, r5, r7 880 - rlwinm. r0, r3, 0, 0xffffff 881 - sync 882 - beq 3f /* if nothing pending in the ICP */ 883 - 884 - /* We found something in the ICP... 885 - * 886 - * If it's not an IPI, stash it in the PACA and return to 887 - * the host, we don't (yet) handle directing real external 888 - * interrupts directly to the guest 889 - */ 890 - cmpwi r0, XICS_IPI 891 - bne ext_stash_for_host 892 - 893 - /* It's an IPI, clear the MFRR and EOI it */ 894 - li r0, 0xff 895 - li r6, XICS_MFRR 896 - stbcix r0, r5, r6 /* clear the IPI */ 897 - stwcix r3, r5, r7 /* EOI it */ 898 - sync 899 - 900 - /* We need to re-check host IPI now in case it got set in the 901 - * meantime. If it's clear, we bounce the interrupt to the 902 - * guest 903 - */ 904 - lbz r0, HSTATE_HOST_IPI(r13) 905 - cmpwi r0, 0 906 - bne- 1f 699 + bl kvmppc_read_intr 700 + cmpdi r3, 0 701 + bgt ext_interrupt_to_host 907 702 908 703 /* Allright, looks like an IPI for the guest, we need to set MER */ 909 - 3: 910 704 /* Check if any CPU is heading out to the host, if so head out too */ 911 705 ld r5, HSTATE_KVM_VCORE(r13) 912 706 lwz r0, VCORE_ENTRY_EXIT(r5) ··· 900 764 mtspr SPRN_LPCR, r8 901 765 b fast_guest_return 902 766 903 - /* We raced with the host, we need to resend that IPI, bummer */ 904 - 1: li r0, IPI_PRIORITY 905 - stbcix r0, r5, r6 /* set the IPI */ 906 - sync 907 - b ext_interrupt_to_host 908 - 909 - ext_stash_for_host: 910 - /* It's not an IPI and it's for the host, stash it in the PACA 911 - * before exit, it will be picked up by the host ICP driver 912 - */ 913 - stw r3, HSTATE_SAVED_XIRR(r13) 914 767 ext_interrupt_to_host: 915 768 916 769 guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 917 - /* Save DEC */ 918 - mfspr r5,SPRN_DEC 919 - mftb r6 920 - extsw r5,r5 921 - add r5,r5,r6 922 - std r5,VCPU_DEC_EXPIRES(r9) 923 - 924 770 /* Save more register state */ 925 771 mfdar r6 926 772 mfdsisr r7 ··· 1072 954 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 1073 955 mtspr SPRN_LPID,r7 1074 956 isync 1075 - li r0,0 957 + 958 + /* Subtract timebase offset from timebase */ 959 + ld r8,VCORE_TB_OFFSET(r5) 960 + cmpdi r8,0 961 + beq 17f 962 + mftb r6 /* current host timebase */ 963 + subf r8,r8,r6 964 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 965 + mftb r7 /* check if lower 24 bits overflowed */ 966 + clrldi r6,r6,40 967 + clrldi r7,r7,40 968 + cmpld r7,r6 969 + bge 17f 970 + addis r8,r8,0x100 /* if so, increment upper 40 bits */ 971 + mtspr SPRN_TBU40,r8 972 + 973 + /* Reset PCR */ 974 + 17: ld r0, VCORE_PCR(r5) 975 + cmpdi r0, 0 976 + beq 18f 977 + li r0, 0 978 + mtspr SPRN_PCR, r0 979 + 18: 980 + /* Signal secondary CPUs to continue */ 1076 981 stb r0,VCORE_IN_GUEST(r5) 1077 982 lis r8,0x7fff /* MAX_INT@h */ 1078 983 mtspr SPRN_HDEC,r8 ··· 1193 1052 1: addi r8,r8,16 1194 1053 .endr 1195 1054 1055 + /* Save DEC */ 1056 + mfspr r5,SPRN_DEC 1057 + mftb r6 1058 + extsw r5,r5 1059 + add r5,r5,r6 1060 + std r5,VCPU_DEC_EXPIRES(r9) 1061 + 1196 1062 /* Save and reset AMR and UAMOR before turning on the MMU */ 1197 1063 BEGIN_FTR_SECTION 1198 1064 mfspr r5,SPRN_AMR ··· 1209 1061 li r6,0 1210 1062 mtspr SPRN_AMR,r6 1211 1063 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1064 + 1065 + /* Unset guest mode */ 1066 + li r0, KVM_GUEST_MODE_NONE 1067 + stb r0, HSTATE_IN_GUEST(r13) 1212 1068 1213 1069 /* Switch DSCR back to host value */ 1214 1070 BEGIN_FTR_SECTION ··· 1286 1134 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 1287 1135 b 22f 1288 1136 21: mfspr r5, SPRN_MMCR1 1137 + mfspr r7, SPRN_SIAR 1138 + mfspr r8, SPRN_SDAR 1289 1139 std r4, VCPU_MMCR(r9) 1290 1140 std r5, VCPU_MMCR + 8(r9) 1291 1141 std r6, VCPU_MMCR + 16(r9) 1142 + std r7, VCPU_SIAR(r9) 1143 + std r8, VCPU_SDAR(r9) 1292 1144 mfspr r3, SPRN_PMC1 1293 1145 mfspr r4, SPRN_PMC2 1294 1146 mfspr r5, SPRN_PMC3 ··· 1314 1158 stw r11, VCPU_PMC + 28(r9) 1315 1159 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1316 1160 22: 1161 + ld r0, 112+PPC_LR_STKOFF(r1) 1162 + addi r1, r1, 112 1163 + mtlr r0 1164 + blr 1165 + secondary_too_late: 1166 + ld r5,HSTATE_KVM_VCORE(r13) 1167 + HMT_LOW 1168 + 13: lbz r3,VCORE_IN_GUEST(r5) 1169 + cmpwi r3,0 1170 + bne 13b 1171 + HMT_MEDIUM 1172 + li r0, KVM_GUEST_MODE_NONE 1173 + stb r0, HSTATE_IN_GUEST(r13) 1174 + ld r11,PACA_SLBSHADOWPTR(r13) 1317 1175 1318 - /* Secondary threads go off to take a nap on POWER7 */ 1319 - BEGIN_FTR_SECTION 1320 - lwz r0,VCPU_PTID(r9) 1321 - cmpwi r0,0 1322 - bne secondary_nap 1323 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1324 - 1325 - /* Restore host DABR and DABRX */ 1326 - ld r5,HSTATE_DABR(r13) 1327 - li r6,7 1328 - mtspr SPRN_DABR,r5 1329 - mtspr SPRN_DABRX,r6 1330 - 1331 - /* Restore SPRG3 */ 1332 - ld r3,PACA_SPRG3(r13) 1333 - mtspr SPRN_SPRG3,r3 1334 - 1335 - /* 1336 - * Reload DEC. HDEC interrupts were disabled when 1337 - * we reloaded the host's LPCR value. 1338 - */ 1339 - ld r3, HSTATE_DECEXP(r13) 1340 - mftb r4 1341 - subf r4, r4, r3 1342 - mtspr SPRN_DEC, r4 1343 - 1344 - /* Reload the host's PMU registers */ 1345 - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 1346 - lbz r4, LPPACA_PMCINUSE(r3) 1347 - cmpwi r4, 0 1348 - beq 23f /* skip if not */ 1349 - lwz r3, HSTATE_PMC(r13) 1350 - lwz r4, HSTATE_PMC + 4(r13) 1351 - lwz r5, HSTATE_PMC + 8(r13) 1352 - lwz r6, HSTATE_PMC + 12(r13) 1353 - lwz r8, HSTATE_PMC + 16(r13) 1354 - lwz r9, HSTATE_PMC + 20(r13) 1355 - BEGIN_FTR_SECTION 1356 - lwz r10, HSTATE_PMC + 24(r13) 1357 - lwz r11, HSTATE_PMC + 28(r13) 1358 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1359 - mtspr SPRN_PMC1, r3 1360 - mtspr SPRN_PMC2, r4 1361 - mtspr SPRN_PMC3, r5 1362 - mtspr SPRN_PMC4, r6 1363 - mtspr SPRN_PMC5, r8 1364 - mtspr SPRN_PMC6, r9 1365 - BEGIN_FTR_SECTION 1366 - mtspr SPRN_PMC7, r10 1367 - mtspr SPRN_PMC8, r11 1368 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1369 - ld r3, HSTATE_MMCR(r13) 1370 - ld r4, HSTATE_MMCR + 8(r13) 1371 - ld r5, HSTATE_MMCR + 16(r13) 1372 - mtspr SPRN_MMCR1, r4 1373 - mtspr SPRN_MMCRA, r5 1374 - mtspr SPRN_MMCR0, r3 1375 - isync 1376 - 23: 1377 - /* 1378 - * For external and machine check interrupts, we need 1379 - * to call the Linux handler to process the interrupt. 1380 - * We do that by jumping to absolute address 0x500 for 1381 - * external interrupts, or the machine_check_fwnmi label 1382 - * for machine checks (since firmware might have patched 1383 - * the vector area at 0x200). The [h]rfid at the end of the 1384 - * handler will return to the book3s_hv_interrupts.S code. 1385 - * For other interrupts we do the rfid to get back 1386 - * to the book3s_hv_interrupts.S code here. 1387 - */ 1388 - ld r8, HSTATE_VMHANDLER(r13) 1389 - ld r7, HSTATE_HOST_MSR(r13) 1390 - 1391 - cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1392 - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1393 - BEGIN_FTR_SECTION 1394 - beq 11f 1395 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1396 - 1397 - /* RFI into the highmem handler, or branch to interrupt handler */ 1398 - mfmsr r6 1399 - li r0, MSR_RI 1400 - andc r6, r6, r0 1401 - mtmsrd r6, 1 /* Clear RI in MSR */ 1402 - mtsrr0 r8 1403 - mtsrr1 r7 1404 - beqa 0x500 /* external interrupt (PPC970) */ 1405 - beq cr1, 13f /* machine check */ 1406 - RFI 1407 - 1408 - /* On POWER7, we have external interrupts set to use HSRR0/1 */ 1409 - 11: mtspr SPRN_HSRR0, r8 1410 - mtspr SPRN_HSRR1, r7 1411 - ba 0x500 1412 - 1413 - 13: b machine_check_fwnmi 1176 + .rept SLB_NUM_BOLTED 1177 + ld r5,SLBSHADOW_SAVEAREA(r11) 1178 + ld r6,SLBSHADOW_SAVEAREA+8(r11) 1179 + andis. r7,r5,SLB_ESID_V@h 1180 + beq 1f 1181 + slbmte r6,r5 1182 + 1: addi r11,r11,16 1183 + .endr 1184 + b 22b 1414 1185 1415 1186 /* 1416 1187 * Check whether an HDSI is an HPTE not found fault or something else. ··· 1416 1333 stw r8, VCPU_LAST_INST(r9) 1417 1334 1418 1335 /* Unset guest mode. */ 1419 - li r0, KVM_GUEST_MODE_NONE 1336 + li r0, KVM_GUEST_MODE_HOST_HV 1420 1337 stb r0, HSTATE_IN_GUEST(r13) 1421 1338 b guest_exit_cont 1422 1339 ··· 1784 1701 rotldi r11, r11, 63 1785 1702 b fast_interrupt_c_return 1786 1703 1787 - secondary_too_late: 1788 - ld r5,HSTATE_KVM_VCORE(r13) 1789 - HMT_LOW 1790 - 13: lbz r3,VCORE_IN_GUEST(r5) 1791 - cmpwi r3,0 1792 - bne 13b 1793 - HMT_MEDIUM 1794 - ld r11,PACA_SLBSHADOWPTR(r13) 1704 + /* 1705 + * Determine what sort of external interrupt is pending (if any). 1706 + * Returns: 1707 + * 0 if no interrupt is pending 1708 + * 1 if an interrupt is pending that needs to be handled by the host 1709 + * -1 if there was a guest wakeup IPI (which has now been cleared) 1710 + */ 1711 + kvmppc_read_intr: 1712 + /* see if a host IPI is pending */ 1713 + li r3, 1 1714 + lbz r0, HSTATE_HOST_IPI(r13) 1715 + cmpwi r0, 0 1716 + bne 1f 1795 1717 1796 - .rept SLB_NUM_BOLTED 1797 - ld r5,SLBSHADOW_SAVEAREA(r11) 1798 - ld r6,SLBSHADOW_SAVEAREA+8(r11) 1799 - andis. r7,r5,SLB_ESID_V@h 1800 - beq 1f 1801 - slbmte r6,r5 1802 - 1: addi r11,r11,16 1803 - .endr 1804 - 1805 - secondary_nap: 1806 - /* Clear our vcpu pointer so we don't come back in early */ 1807 - li r0, 0 1808 - std r0, HSTATE_KVM_VCPU(r13) 1809 - lwsync 1810 - /* Clear any pending IPI - assume we're a secondary thread */ 1811 - ld r5, HSTATE_XICS_PHYS(r13) 1718 + /* Now read the interrupt from the ICP */ 1719 + ld r6, HSTATE_XICS_PHYS(r13) 1812 1720 li r7, XICS_XIRR 1813 - lwzcix r3, r5, r7 /* ack any pending interrupt */ 1814 - rlwinm. r0, r3, 0, 0xffffff /* any pending? */ 1815 - beq 37f 1721 + cmpdi r6, 0 1722 + beq- 1f 1723 + lwzcix r0, r6, r7 1724 + rlwinm. r3, r0, 0, 0xffffff 1816 1725 sync 1817 - li r0, 0xff 1818 - li r6, XICS_MFRR 1819 - stbcix r0, r5, r6 /* clear the IPI */ 1820 - stwcix r3, r5, r7 /* EOI it */ 1821 - 37: sync 1726 + beq 1f /* if nothing pending in the ICP */ 1822 1727 1823 - /* increment the nap count and then go to nap mode */ 1824 - ld r4, HSTATE_KVM_VCORE(r13) 1825 - addi r4, r4, VCORE_NAP_COUNT 1826 - lwsync /* make previous updates visible */ 1827 - 51: lwarx r3, 0, r4 1828 - addi r3, r3, 1 1829 - stwcx. r3, 0, r4 1830 - bne 51b 1728 + /* We found something in the ICP... 1729 + * 1730 + * If it's not an IPI, stash it in the PACA and return to 1731 + * the host, we don't (yet) handle directing real external 1732 + * interrupts directly to the guest 1733 + */ 1734 + cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ 1735 + li r3, 1 1736 + bne 42f 1831 1737 1832 - kvm_no_guest: 1833 - li r0, KVM_HWTHREAD_IN_NAP 1834 - stb r0, HSTATE_HWTHREAD_STATE(r13) 1738 + /* It's an IPI, clear the MFRR and EOI it */ 1739 + li r3, 0xff 1740 + li r8, XICS_MFRR 1741 + stbcix r3, r6, r8 /* clear the IPI */ 1742 + stwcix r0, r6, r7 /* EOI it */ 1743 + sync 1835 1744 1836 - li r3, LPCR_PECE0 1837 - mfspr r4, SPRN_LPCR 1838 - rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1839 - mtspr SPRN_LPCR, r4 1840 - isync 1841 - std r0, HSTATE_SCRATCH0(r13) 1842 - ptesync 1843 - ld r0, HSTATE_SCRATCH0(r13) 1844 - 1: cmpd r0, r0 1845 - bne 1b 1846 - nap 1847 - b . 1745 + /* We need to re-check host IPI now in case it got set in the 1746 + * meantime. If it's clear, we bounce the interrupt to the 1747 + * guest 1748 + */ 1749 + lbz r0, HSTATE_HOST_IPI(r13) 1750 + cmpwi r0, 0 1751 + bne- 43f 1752 + 1753 + /* OK, it's an IPI for us */ 1754 + li r3, -1 1755 + 1: blr 1756 + 1757 + 42: /* It's not an IPI and it's for the host, stash it in the PACA 1758 + * before exit, it will be picked up by the host ICP driver 1759 + */ 1760 + stw r0, HSTATE_SAVED_XIRR(r13) 1761 + b 1b 1762 + 1763 + 43: /* We raced with the host, we need to resend that IPI, bummer */ 1764 + li r0, IPI_PRIORITY 1765 + stbcix r0, r6, r8 /* set the IPI */ 1766 + sync 1767 + b 1b 1848 1768 1849 1769 /* 1850 1770 * Save away FP, VMX and VSX registers. ··· 1965 1879 lwz r7,VCPU_VRSAVE(r4) 1966 1880 mtspr SPRN_VRSAVE,r7 1967 1881 blr 1882 + 1883 + /* 1884 + * We come here if we get any exception or interrupt while we are 1885 + * executing host real mode code while in guest MMU context. 1886 + * For now just spin, but we should do something better. 1887 + */ 1888 + kvmppc_bad_host_intr: 1889 + b .

+26 -6

arch/powerpc/kvm/book3s_interrupts.S

··· 26 26 27 27 #if defined(CONFIG_PPC_BOOK3S_64) 28 28 #define FUNC(name) GLUE(.,name) 29 + #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU 30 + 29 31 #elif defined(CONFIG_PPC_BOOK3S_32) 30 32 #define FUNC(name) name 33 + #define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) 34 + 31 35 #endif /* CONFIG_PPC_BOOK3S_XX */ 32 36 33 37 #define VCPU_LOAD_NVGPRS(vcpu) \ ··· 91 87 VCPU_LOAD_NVGPRS(r4) 92 88 93 89 kvm_start_lightweight: 90 + /* Copy registers into shadow vcpu so we can access them in real mode */ 91 + GET_SHADOW_VCPU(r3) 92 + bl FUNC(kvmppc_copy_to_svcpu) 93 + nop 94 + REST_GPR(4, r1) 94 95 95 96 #ifdef CONFIG_PPC_BOOK3S_64 97 + /* Get the dcbz32 flag */ 96 98 PPC_LL r3, VCPU_HFLAGS(r4) 97 99 rldicl r3, r3, 0, 63 /* r3 &= 1 */ 98 100 stb r3, HSTATE_RESTORE_HID5(r13) ··· 121 111 * 122 112 */ 123 113 124 - .global kvmppc_handler_highmem 125 - kvmppc_handler_highmem: 126 - 127 114 /* 128 115 * Register usage at this point: 129 116 * ··· 132 125 * 133 126 */ 134 127 135 - /* R7 = vcpu */ 136 - PPC_LL r7, GPR4(r1) 128 + /* Transfer reg values from shadow vcpu back to vcpu struct */ 129 + /* On 64-bit, interrupts are still off at this point */ 130 + PPC_LL r3, GPR4(r1) /* vcpu pointer */ 131 + GET_SHADOW_VCPU(r4) 132 + bl FUNC(kvmppc_copy_from_svcpu) 133 + nop 137 134 138 135 #ifdef CONFIG_PPC_BOOK3S_64 136 + /* Re-enable interrupts */ 137 + ld r3, HSTATE_HOST_MSR(r13) 138 + ori r3, r3, MSR_EE 139 + MTMSR_EERI(r3) 140 + 139 141 /* 140 142 * Reload kernel SPRG3 value. 141 143 * No need to save guest value as usermode can't modify SPRG3. 142 144 */ 143 145 ld r3, PACA_SPRG3(r13) 144 146 mtspr SPRN_SPRG3, r3 147 + 145 148 #endif /* CONFIG_PPC_BOOK3S_64 */ 149 + 150 + /* R7 = vcpu */ 151 + PPC_LL r7, GPR4(r1) 146 152 147 153 PPC_STL r14, VCPU_GPR(R14)(r7) 148 154 PPC_STL r15, VCPU_GPR(R15)(r7) ··· 181 161 182 162 /* Restore r3 (kvm_run) and r4 (vcpu) */ 183 163 REST_2GPRS(3, r1) 184 - bl FUNC(kvmppc_handle_exit) 164 + bl FUNC(kvmppc_handle_exit_pr) 185 165 186 166 /* If RESUME_GUEST, get back in the loop */ 187 167 cmpwi r3, RESUME_GUEST

+61 -5

arch/powerpc/kvm/book3s_mmu_hpte.c

··· 28 28 #include <asm/mmu_context.h> 29 29 #include <asm/hw_irq.h> 30 30 31 - #include "trace.h" 31 + #include "trace_pr.h" 32 32 33 33 #define PTE_SIZE 12 34 34 ··· 56 56 HPTEG_HASH_BITS_VPTE_LONG); 57 57 } 58 58 59 + #ifdef CONFIG_PPC_BOOK3S_64 60 + static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage) 61 + { 62 + return hash_64((vpage & 0xffffffff0ULL) >> 4, 63 + HPTEG_HASH_BITS_VPTE_64K); 64 + } 65 + #endif 66 + 59 67 void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 60 68 { 61 69 u64 index; ··· 90 82 index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); 91 83 hlist_add_head_rcu(&pte->list_vpte_long, 92 84 &vcpu3s->hpte_hash_vpte_long[index]); 85 + 86 + #ifdef CONFIG_PPC_BOOK3S_64 87 + /* Add to vPTE_64k list */ 88 + index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage); 89 + hlist_add_head_rcu(&pte->list_vpte_64k, 90 + &vcpu3s->hpte_hash_vpte_64k[index]); 91 + #endif 92 + 93 + vcpu3s->hpte_cache_count++; 93 94 94 95 spin_unlock(&vcpu3s->mmu_lock); 95 96 } ··· 130 113 hlist_del_init_rcu(&pte->list_pte_long); 131 114 hlist_del_init_rcu(&pte->list_vpte); 132 115 hlist_del_init_rcu(&pte->list_vpte_long); 116 + #ifdef CONFIG_PPC_BOOK3S_64 117 + hlist_del_init_rcu(&pte->list_vpte_64k); 118 + #endif 119 + vcpu3s->hpte_cache_count--; 133 120 134 121 spin_unlock(&vcpu3s->mmu_lock); 135 122 136 - vcpu3s->hpte_cache_count--; 137 123 call_rcu(&pte->rcu_head, free_pte_rcu); 138 124 } 139 125 ··· 239 219 rcu_read_unlock(); 240 220 } 241 221 222 + #ifdef CONFIG_PPC_BOOK3S_64 223 + /* Flush with mask 0xffffffff0 */ 224 + static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp) 225 + { 226 + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 227 + struct hlist_head *list; 228 + struct hpte_cache *pte; 229 + u64 vp_mask = 0xffffffff0ULL; 230 + 231 + list = &vcpu3s->hpte_hash_vpte_64k[ 232 + kvmppc_mmu_hash_vpte_64k(guest_vp)]; 233 + 234 + rcu_read_lock(); 235 + 236 + /* Check the list for matching entries and invalidate */ 237 + hlist_for_each_entry_rcu(pte, list, list_vpte_64k) 238 + if ((pte->pte.vpage & vp_mask) == guest_vp) 239 + invalidate_pte(vcpu, pte); 240 + 241 + rcu_read_unlock(); 242 + } 243 + #endif 244 + 242 245 /* Flush with mask 0xffffff000 */ 243 246 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 244 247 { ··· 292 249 case 0xfffffffffULL: 293 250 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); 294 251 break; 252 + #ifdef CONFIG_PPC_BOOK3S_64 253 + case 0xffffffff0ULL: 254 + kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp); 255 + break; 256 + #endif 295 257 case 0xffffff000ULL: 296 258 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); 297 259 break; ··· 333 285 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 334 286 struct hpte_cache *pte; 335 287 336 - pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 337 - vcpu3s->hpte_cache_count++; 338 - 339 288 if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) 340 289 kvmppc_mmu_pte_flush_all(vcpu); 341 290 291 + pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 292 + 342 293 return pte; 294 + } 295 + 296 + void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte) 297 + { 298 + kmem_cache_free(hpte_cache, pte); 343 299 } 344 300 345 301 void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) ··· 372 320 ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); 373 321 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, 374 322 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); 323 + #ifdef CONFIG_PPC_BOOK3S_64 324 + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k, 325 + ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k)); 326 + #endif 375 327 376 328 spin_lock_init(&vcpu3s->mmu_lock); 377 329

+364 -140

arch/powerpc/kvm/book3s_pr.c

··· 40 40 #include <linux/sched.h> 41 41 #include <linux/vmalloc.h> 42 42 #include <linux/highmem.h> 43 + #include <linux/module.h> 43 44 44 - #include "trace.h" 45 + #include "book3s.h" 46 + 47 + #define CREATE_TRACE_POINTS 48 + #include "trace_pr.h" 45 49 46 50 /* #define EXIT_DEBUG */ 47 51 /* #define DEBUG_EXT */ ··· 60 56 #define HW_PAGE_SIZE PAGE_SIZE 61 57 #endif 62 58 63 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 59 + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) 64 60 { 65 61 #ifdef CONFIG_PPC_BOOK3S_64 66 62 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 67 63 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); 68 - memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 69 - sizeof(get_paca()->shadow_vcpu)); 70 64 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 71 65 svcpu_put(svcpu); 72 66 #endif 73 67 vcpu->cpu = smp_processor_id(); 74 68 #ifdef CONFIG_PPC_BOOK3S_32 75 - current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 69 + current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; 76 70 #endif 77 71 } 78 72 79 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 73 + static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) 80 74 { 81 75 #ifdef CONFIG_PPC_BOOK3S_64 82 76 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 83 77 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); 84 - memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 85 - sizeof(get_paca()->shadow_vcpu)); 86 78 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 87 79 svcpu_put(svcpu); 88 80 #endif ··· 87 87 vcpu->cpu = -1; 88 88 } 89 89 90 - int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) 90 + /* Copy data needed by real-mode code from vcpu to shadow vcpu */ 91 + void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, 92 + struct kvm_vcpu *vcpu) 93 + { 94 + svcpu->gpr[0] = vcpu->arch.gpr[0]; 95 + svcpu->gpr[1] = vcpu->arch.gpr[1]; 96 + svcpu->gpr[2] = vcpu->arch.gpr[2]; 97 + svcpu->gpr[3] = vcpu->arch.gpr[3]; 98 + svcpu->gpr[4] = vcpu->arch.gpr[4]; 99 + svcpu->gpr[5] = vcpu->arch.gpr[5]; 100 + svcpu->gpr[6] = vcpu->arch.gpr[6]; 101 + svcpu->gpr[7] = vcpu->arch.gpr[7]; 102 + svcpu->gpr[8] = vcpu->arch.gpr[8]; 103 + svcpu->gpr[9] = vcpu->arch.gpr[9]; 104 + svcpu->gpr[10] = vcpu->arch.gpr[10]; 105 + svcpu->gpr[11] = vcpu->arch.gpr[11]; 106 + svcpu->gpr[12] = vcpu->arch.gpr[12]; 107 + svcpu->gpr[13] = vcpu->arch.gpr[13]; 108 + svcpu->cr = vcpu->arch.cr; 109 + svcpu->xer = vcpu->arch.xer; 110 + svcpu->ctr = vcpu->arch.ctr; 111 + svcpu->lr = vcpu->arch.lr; 112 + svcpu->pc = vcpu->arch.pc; 113 + } 114 + 115 + /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ 116 + void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, 117 + struct kvmppc_book3s_shadow_vcpu *svcpu) 118 + { 119 + vcpu->arch.gpr[0] = svcpu->gpr[0]; 120 + vcpu->arch.gpr[1] = svcpu->gpr[1]; 121 + vcpu->arch.gpr[2] = svcpu->gpr[2]; 122 + vcpu->arch.gpr[3] = svcpu->gpr[3]; 123 + vcpu->arch.gpr[4] = svcpu->gpr[4]; 124 + vcpu->arch.gpr[5] = svcpu->gpr[5]; 125 + vcpu->arch.gpr[6] = svcpu->gpr[6]; 126 + vcpu->arch.gpr[7] = svcpu->gpr[7]; 127 + vcpu->arch.gpr[8] = svcpu->gpr[8]; 128 + vcpu->arch.gpr[9] = svcpu->gpr[9]; 129 + vcpu->arch.gpr[10] = svcpu->gpr[10]; 130 + vcpu->arch.gpr[11] = svcpu->gpr[11]; 131 + vcpu->arch.gpr[12] = svcpu->gpr[12]; 132 + vcpu->arch.gpr[13] = svcpu->gpr[13]; 133 + vcpu->arch.cr = svcpu->cr; 134 + vcpu->arch.xer = svcpu->xer; 135 + vcpu->arch.ctr = svcpu->ctr; 136 + vcpu->arch.lr = svcpu->lr; 137 + vcpu->arch.pc = svcpu->pc; 138 + vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; 139 + vcpu->arch.fault_dar = svcpu->fault_dar; 140 + vcpu->arch.fault_dsisr = svcpu->fault_dsisr; 141 + vcpu->arch.last_inst = svcpu->last_inst; 142 + } 143 + 144 + static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) 91 145 { 92 146 int r = 1; /* Indicate we want to get back into the guest */ 93 147 ··· 154 100 } 155 101 156 102 /************* MMU Notifiers *************/ 103 + static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, 104 + unsigned long end) 105 + { 106 + long i; 107 + struct kvm_vcpu *vcpu; 108 + struct kvm_memslots *slots; 109 + struct kvm_memory_slot *memslot; 157 110 158 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 111 + slots = kvm_memslots(kvm); 112 + kvm_for_each_memslot(memslot, slots) { 113 + unsigned long hva_start, hva_end; 114 + gfn_t gfn, gfn_end; 115 + 116 + hva_start = max(start, memslot->userspace_addr); 117 + hva_end = min(end, memslot->userspace_addr + 118 + (memslot->npages << PAGE_SHIFT)); 119 + if (hva_start >= hva_end) 120 + continue; 121 + /* 122 + * {gfn(page) | page intersects with [hva_start, hva_end)} = 123 + * {gfn, gfn+1, ..., gfn_end-1}. 124 + */ 125 + gfn = hva_to_gfn_memslot(hva_start, memslot); 126 + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 127 + kvm_for_each_vcpu(i, vcpu, kvm) 128 + kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, 129 + gfn_end << PAGE_SHIFT); 130 + } 131 + } 132 + 133 + static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) 159 134 { 160 135 trace_kvm_unmap_hva(hva); 161 136 162 - /* 163 - * Flush all shadow tlb entries everywhere. This is slow, but 164 - * we are 100% sure that we catch the to be unmapped page 165 - */ 166 - kvm_flush_remote_tlbs(kvm); 137 + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 167 138 168 139 return 0; 169 140 } 170 141 171 - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 142 + static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, 143 + unsigned long end) 172 144 { 173 - /* kvm_unmap_hva flushes everything anyways */ 174 - kvm_unmap_hva(kvm, start); 145 + do_kvm_unmap_hva(kvm, start, end); 175 146 176 147 return 0; 177 148 } 178 149 179 - int kvm_age_hva(struct kvm *kvm, unsigned long hva) 150 + static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) 180 151 { 181 152 /* XXX could be more clever ;) */ 182 153 return 0; 183 154 } 184 155 185 - int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 156 + static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) 186 157 { 187 158 /* XXX could be more clever ;) */ 188 159 return 0; 189 160 } 190 161 191 - void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 162 + static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) 192 163 { 193 164 /* The page will get remapped properly on its next fault */ 194 - kvm_unmap_hva(kvm, hva); 165 + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 195 166 } 196 167 197 168 /*****************************************/ ··· 238 159 vcpu->arch.shadow_msr = smsr; 239 160 } 240 161 241 - void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 162 + static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) 242 163 { 243 164 ulong old_msr = vcpu->arch.shared->msr; 244 165 ··· 298 219 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 299 220 } 300 221 301 - void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 222 + void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) 302 223 { 303 224 u32 host_pvr; 304 225 ··· 334 255 really needs them in a VM on Cell and force disable them. */ 335 256 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 336 257 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 258 + 259 + /* 260 + * If they're asking for POWER6 or later, set the flag 261 + * indicating that we can do multiple large page sizes 262 + * and 1TB segments. 263 + * Also set the flag that indicates that tlbie has the large 264 + * page bit in the RB operand instead of the instruction. 265 + */ 266 + switch (PVR_VER(pvr)) { 267 + case PVR_POWER6: 268 + case PVR_POWER7: 269 + case PVR_POWER7p: 270 + case PVR_POWER8: 271 + vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | 272 + BOOK3S_HFLAG_NEW_TLBIE; 273 + break; 274 + } 337 275 338 276 #ifdef CONFIG_PPC_BOOK3S_32 339 277 /* 32 bit Book3S always has 32 byte dcbz */ ··· 430 334 ulong eaddr, int vec) 431 335 { 432 336 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 337 + bool iswrite = false; 433 338 int r = RESUME_GUEST; 434 339 int relocated; 435 340 int page_found = 0; ··· 441 344 u64 vsid; 442 345 443 346 relocated = data ? dr : ir; 347 + if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) 348 + iswrite = true; 444 349 445 350 /* Resolve real address if translation turned on */ 446 351 if (relocated) { 447 - page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); 352 + page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite); 448 353 } else { 449 354 pte.may_execute = true; 450 355 pte.may_read = true; ··· 454 355 pte.raddr = eaddr & KVM_PAM; 455 356 pte.eaddr = eaddr; 456 357 pte.vpage = eaddr >> 12; 358 + pte.page_size = MMU_PAGE_64K; 457 359 } 458 360 459 361 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { ··· 488 388 489 389 if (page_found == -ENOENT) { 490 390 /* Page not found in guest PTE entries */ 491 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 492 391 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 493 - vcpu->arch.shared->dsisr = svcpu->fault_dsisr; 392 + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr; 494 393 vcpu->arch.shared->msr |= 495 - (svcpu->shadow_srr1 & 0x00000000f8000000ULL); 496 - svcpu_put(svcpu); 394 + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 497 395 kvmppc_book3s_queue_irqprio(vcpu, vec); 498 396 } else if (page_found == -EPERM) { 499 397 /* Storage protection */ 500 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 501 398 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 502 - vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; 399 + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 503 400 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 504 401 vcpu->arch.shared->msr |= 505 - svcpu->shadow_srr1 & 0x00000000f8000000ULL; 506 - svcpu_put(svcpu); 402 + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 507 403 kvmppc_book3s_queue_irqprio(vcpu, vec); 508 404 } else if (page_found == -EINVAL) { 509 405 /* Page not found in guest SLB */ ··· 507 411 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 508 412 } else if (!is_mmio && 509 413 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 414 + if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { 415 + /* 416 + * There is already a host HPTE there, presumably 417 + * a read-only one for a page the guest thinks 418 + * is writable, so get rid of it first. 419 + */ 420 + kvmppc_mmu_unmap_page(vcpu, &pte); 421 + } 510 422 /* The guest's PTE is not mapped yet. Map on the host */ 511 - kvmppc_mmu_map_page(vcpu, &pte); 423 + kvmppc_mmu_map_page(vcpu, &pte, iswrite); 512 424 if (data) 513 425 vcpu->stat.sp_storage++; 514 426 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 515 - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 427 + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 516 428 kvmppc_patch_dcbz(vcpu, &pte); 517 429 } else { 518 430 /* MMIO */ ··· 723 619 724 620 if (lost_ext & MSR_FP) 725 621 kvmppc_load_up_fpu(); 622 + #ifdef CONFIG_ALTIVEC 726 623 if (lost_ext & MSR_VEC) 727 624 kvmppc_load_up_altivec(); 625 + #endif 728 626 current->thread.regs->msr |= lost_ext; 729 627 } 730 628 731 - int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 732 - unsigned int exit_nr) 629 + int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 630 + unsigned int exit_nr) 733 631 { 734 632 int r = RESUME_HOST; 735 633 int s; ··· 749 643 switch (exit_nr) { 750 644 case BOOK3S_INTERRUPT_INST_STORAGE: 751 645 { 752 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 753 - ulong shadow_srr1 = svcpu->shadow_srr1; 646 + ulong shadow_srr1 = vcpu->arch.shadow_srr1; 754 647 vcpu->stat.pf_instruc++; 755 648 756 649 #ifdef CONFIG_PPC_BOOK3S_32 757 650 /* We set segments as unused segments when invalidating them. So 758 651 * treat the respective fault as segment fault. */ 759 - if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { 760 - kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 761 - r = RESUME_GUEST; 652 + { 653 + struct kvmppc_book3s_shadow_vcpu *svcpu; 654 + u32 sr; 655 + 656 + svcpu = svcpu_get(vcpu); 657 + sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]; 762 658 svcpu_put(svcpu); 763 - break; 659 + if (sr == SR_INVALID) { 660 + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 661 + r = RESUME_GUEST; 662 + break; 663 + } 764 664 } 765 665 #endif 766 - svcpu_put(svcpu); 767 666 768 667 /* only care about PTEG not found errors, but leave NX alone */ 769 668 if (shadow_srr1 & 0x40000000) { 669 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 770 670 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 671 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 771 672 vcpu->stat.sp_instruc++; 772 673 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 773 674 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { ··· 795 682 case BOOK3S_INTERRUPT_DATA_STORAGE: 796 683 { 797 684 ulong dar = kvmppc_get_fault_dar(vcpu); 798 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 799 - u32 fault_dsisr = svcpu->fault_dsisr; 685 + u32 fault_dsisr = vcpu->arch.fault_dsisr; 800 686 vcpu->stat.pf_storage++; 801 687 802 688 #ifdef CONFIG_PPC_BOOK3S_32 803 689 /* We set segments as unused segments when invalidating them. So 804 690 * treat the respective fault as segment fault. */ 805 - if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { 806 - kvmppc_mmu_map_segment(vcpu, dar); 807 - r = RESUME_GUEST; 691 + { 692 + struct kvmppc_book3s_shadow_vcpu *svcpu; 693 + u32 sr; 694 + 695 + svcpu = svcpu_get(vcpu); 696 + sr = svcpu->sr[dar >> SID_SHIFT]; 808 697 svcpu_put(svcpu); 809 - break; 698 + if (sr == SR_INVALID) { 699 + kvmppc_mmu_map_segment(vcpu, dar); 700 + r = RESUME_GUEST; 701 + break; 702 + } 810 703 } 811 704 #endif 812 - svcpu_put(svcpu); 813 705 814 - /* The only case we need to handle is missing shadow PTEs */ 815 - if (fault_dsisr & DSISR_NOHPTE) { 706 + /* 707 + * We need to handle missing shadow PTEs, and 708 + * protection faults due to us mapping a page read-only 709 + * when the guest thinks it is writable. 710 + */ 711 + if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { 712 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 816 713 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 714 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 817 715 } else { 818 716 vcpu->arch.shared->dar = dar; 819 717 vcpu->arch.shared->dsisr = fault_dsisr; ··· 867 743 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 868 744 { 869 745 enum emulation_result er; 870 - struct kvmppc_book3s_shadow_vcpu *svcpu; 871 746 ulong flags; 872 747 873 748 program_interrupt: 874 - svcpu = svcpu_get(vcpu); 875 - flags = svcpu->shadow_srr1 & 0x1f0000ull; 876 - svcpu_put(svcpu); 749 + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 877 750 878 751 if (vcpu->arch.shared->msr & MSR_PR) { 879 752 #ifdef EXIT_DEBUG ··· 919 798 ulong cmd = kvmppc_get_gpr(vcpu, 3); 920 799 int i; 921 800 922 - #ifdef CONFIG_KVM_BOOK3S_64_PR 801 + #ifdef CONFIG_PPC_BOOK3S_64 923 802 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 924 803 r = RESUME_GUEST; 925 804 break; ··· 1002 881 break; 1003 882 default: 1004 883 { 1005 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 1006 - ulong shadow_srr1 = svcpu->shadow_srr1; 1007 - svcpu_put(svcpu); 884 + ulong shadow_srr1 = vcpu->arch.shadow_srr1; 1008 885 /* Ugh - bork here! What did we get? */ 1009 886 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1010 887 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); ··· 1039 920 return r; 1040 921 } 1041 922 1042 - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1043 - struct kvm_sregs *sregs) 923 + static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, 924 + struct kvm_sregs *sregs) 1044 925 { 1045 926 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1046 927 int i; ··· 1066 947 return 0; 1067 948 } 1068 949 1069 - int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1070 - struct kvm_sregs *sregs) 950 + static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, 951 + struct kvm_sregs *sregs) 1071 952 { 1072 953 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1073 954 int i; 1074 955 1075 - kvmppc_set_pvr(vcpu, sregs->pvr); 956 + kvmppc_set_pvr_pr(vcpu, sregs->pvr); 1076 957 1077 958 vcpu3s->sdr1 = sregs->u.s.sdr1; 1078 959 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { ··· 1102 983 return 0; 1103 984 } 1104 985 1105 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 986 + static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 987 + union kvmppc_one_reg *val) 1106 988 { 1107 989 int r = 0; 1108 990 ··· 1132 1012 return r; 1133 1013 } 1134 1014 1135 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 1015 + static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1016 + union kvmppc_one_reg *val) 1136 1017 { 1137 1018 int r = 0; 1138 1019 ··· 1163 1042 return r; 1164 1043 } 1165 1044 1166 - int kvmppc_core_check_processor_compat(void) 1167 - { 1168 - return 0; 1169 - } 1170 - 1171 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1045 + static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, 1046 + unsigned int id) 1172 1047 { 1173 1048 struct kvmppc_vcpu_book3s *vcpu_book3s; 1174 1049 struct kvm_vcpu *vcpu; 1175 1050 int err = -ENOMEM; 1176 1051 unsigned long p; 1177 1052 1178 - vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1179 - if (!vcpu_book3s) 1053 + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1054 + if (!vcpu) 1180 1055 goto out; 1181 1056 1182 - vcpu_book3s->shadow_vcpu = 1183 - kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); 1184 - if (!vcpu_book3s->shadow_vcpu) 1057 + vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1058 + if (!vcpu_book3s) 1185 1059 goto free_vcpu; 1060 + vcpu->arch.book3s = vcpu_book3s; 1186 1061 1187 - vcpu = &vcpu_book3s->vcpu; 1062 + #ifdef CONFIG_KVM_BOOK3S_32 1063 + vcpu->arch.shadow_vcpu = 1064 + kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); 1065 + if (!vcpu->arch.shadow_vcpu) 1066 + goto free_vcpu3s; 1067 + #endif 1068 + 1188 1069 err = kvm_vcpu_init(vcpu, kvm, id); 1189 1070 if (err) 1190 1071 goto free_shadow_vcpu; ··· 1199 1076 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1200 1077 1201 1078 #ifdef CONFIG_PPC_BOOK3S_64 1202 - /* default to book3s_64 (970fx) */ 1079 + /* 1080 + * Default to the same as the host if we're on sufficiently 1081 + * recent machine that we have 1TB segments; 1082 + * otherwise default to PPC970FX. 1083 + */ 1203 1084 vcpu->arch.pvr = 0x3C0301; 1085 + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1086 + vcpu->arch.pvr = mfspr(SPRN_PVR); 1204 1087 #else 1205 1088 /* default to book3s_32 (750) */ 1206 1089 vcpu->arch.pvr = 0x84202; 1207 1090 #endif 1208 - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1091 + kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); 1209 1092 vcpu->arch.slb_nr = 64; 1210 1093 1211 1094 vcpu->arch.shadow_msr = MSR_USER64; ··· 1225 1096 uninit_vcpu: 1226 1097 kvm_vcpu_uninit(vcpu); 1227 1098 free_shadow_vcpu: 1228 - kfree(vcpu_book3s->shadow_vcpu); 1229 - free_vcpu: 1099 + #ifdef CONFIG_KVM_BOOK3S_32 1100 + kfree(vcpu->arch.shadow_vcpu); 1101 + free_vcpu3s: 1102 + #endif 1230 1103 vfree(vcpu_book3s); 1104 + free_vcpu: 1105 + kmem_cache_free(kvm_vcpu_cache, vcpu); 1231 1106 out: 1232 1107 return ERR_PTR(err); 1233 1108 } 1234 1109 1235 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1110 + static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) 1236 1111 { 1237 1112 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1238 1113 1239 1114 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); 1240 1115 kvm_vcpu_uninit(vcpu); 1241 - kfree(vcpu_book3s->shadow_vcpu); 1116 + #ifdef CONFIG_KVM_BOOK3S_32 1117 + kfree(vcpu->arch.shadow_vcpu); 1118 + #endif 1242 1119 vfree(vcpu_book3s); 1120 + kmem_cache_free(kvm_vcpu_cache, vcpu); 1243 1121 } 1244 1122 1245 - int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1123 + static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1246 1124 { 1247 1125 int ret; 1248 1126 struct thread_fp_state fp; ··· 1352 1216 /* 1353 1217 * Get (and clear) the dirty memory log for a memory slot. 1354 1218 */ 1355 - int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1356 - struct kvm_dirty_log *log) 1219 + static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, 1220 + struct kvm_dirty_log *log) 1357 1221 { 1358 1222 struct kvm_memory_slot *memslot; 1359 1223 struct kvm_vcpu *vcpu; ··· 1388 1252 return r; 1389 1253 } 1390 1254 1391 - #ifdef CONFIG_PPC64 1392 - int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1255 + static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, 1256 + struct kvm_memory_slot *memslot) 1393 1257 { 1394 - info->flags = KVM_PPC_1T_SEGMENTS; 1258 + return; 1259 + } 1260 + 1261 + static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, 1262 + struct kvm_memory_slot *memslot, 1263 + struct kvm_userspace_memory_region *mem) 1264 + { 1265 + return 0; 1266 + } 1267 + 1268 + static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, 1269 + struct kvm_userspace_memory_region *mem, 1270 + const struct kvm_memory_slot *old) 1271 + { 1272 + return; 1273 + } 1274 + 1275 + static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, 1276 + struct kvm_memory_slot *dont) 1277 + { 1278 + return; 1279 + } 1280 + 1281 + static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, 1282 + unsigned long npages) 1283 + { 1284 + return 0; 1285 + } 1286 + 1287 + 1288 + #ifdef CONFIG_PPC64 1289 + static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1290 + struct kvm_ppc_smmu_info *info) 1291 + { 1292 + long int i; 1293 + struct kvm_vcpu *vcpu; 1294 + 1295 + info->flags = 0; 1395 1296 1396 1297 /* SLB is always 64 entries */ 1397 1298 info->slb_size = 64; ··· 1439 1266 info->sps[0].enc[0].page_shift = 12; 1440 1267 info->sps[0].enc[0].pte_enc = 0; 1441 1268 1269 + /* 1270 + * 64k large page size. 1271 + * We only want to put this in if the CPUs we're emulating 1272 + * support it, but unfortunately we don't have a vcpu easily 1273 + * to hand here to test. Just pick the first vcpu, and if 1274 + * that doesn't exist yet, report the minimum capability, 1275 + * i.e., no 64k pages. 1276 + * 1T segment support goes along with 64k pages. 1277 + */ 1278 + i = 1; 1279 + vcpu = kvm_get_vcpu(kvm, 0); 1280 + if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 1281 + info->flags = KVM_PPC_1T_SEGMENTS; 1282 + info->sps[i].page_shift = 16; 1283 + info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; 1284 + info->sps[i].enc[0].page_shift = 16; 1285 + info->sps[i].enc[0].pte_enc = 1; 1286 + ++i; 1287 + } 1288 + 1442 1289 /* Standard 16M large page size segment */ 1443 - info->sps[1].page_shift = 24; 1444 - info->sps[1].slb_enc = SLB_VSID_L; 1445 - info->sps[1].enc[0].page_shift = 24; 1446 - info->sps[1].enc[0].pte_enc = 0; 1290 + info->sps[i].page_shift = 24; 1291 + info->sps[i].slb_enc = SLB_VSID_L; 1292 + info->sps[i].enc[0].page_shift = 24; 1293 + info->sps[i].enc[0].pte_enc = 0; 1447 1294 1448 1295 return 0; 1296 + } 1297 + #else 1298 + static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1299 + struct kvm_ppc_smmu_info *info) 1300 + { 1301 + /* We should not get called */ 1302 + BUG(); 1449 1303 } 1450 1304 #endif /* CONFIG_PPC64 */ 1451 - 1452 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1453 - struct kvm_memory_slot *dont) 1454 - { 1455 - } 1456 - 1457 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1458 - unsigned long npages) 1459 - { 1460 - return 0; 1461 - } 1462 - 1463 - int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1464 - struct kvm_memory_slot *memslot, 1465 - struct kvm_userspace_memory_region *mem) 1466 - { 1467 - return 0; 1468 - } 1469 - 1470 - void kvmppc_core_commit_memory_region(struct kvm *kvm, 1471 - struct kvm_userspace_memory_region *mem, 1472 - const struct kvm_memory_slot *old) 1473 - { 1474 - } 1475 - 1476 - void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 1477 - { 1478 - } 1479 1305 1480 1306 static unsigned int kvm_global_user_count = 0; 1481 1307 static DEFINE_SPINLOCK(kvm_global_user_count_lock); 1482 1308 1483 - int kvmppc_core_init_vm(struct kvm *kvm) 1309 + static int kvmppc_core_init_vm_pr(struct kvm *kvm) 1484 1310 { 1485 - #ifdef CONFIG_PPC64 1486 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1487 - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 1488 - #endif 1311 + mutex_init(&kvm->arch.hpt_mutex); 1489 1312 1490 1313 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1491 1314 spin_lock(&kvm_global_user_count_lock); ··· 1492 1323 return 0; 1493 1324 } 1494 1325 1495 - void kvmppc_core_destroy_vm(struct kvm *kvm) 1326 + static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) 1496 1327 { 1497 1328 #ifdef CONFIG_PPC64 1498 1329 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); ··· 1507 1338 } 1508 1339 } 1509 1340 1510 - static int kvmppc_book3s_init(void) 1341 + static int kvmppc_core_check_processor_compat_pr(void) 1342 + { 1343 + /* we are always compatible */ 1344 + return 0; 1345 + } 1346 + 1347 + static long kvm_arch_vm_ioctl_pr(struct file *filp, 1348 + unsigned int ioctl, unsigned long arg) 1349 + { 1350 + return -ENOTTY; 1351 + } 1352 + 1353 + static struct kvmppc_ops kvm_ops_pr = { 1354 + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, 1355 + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, 1356 + .get_one_reg = kvmppc_get_one_reg_pr, 1357 + .set_one_reg = kvmppc_set_one_reg_pr, 1358 + .vcpu_load = kvmppc_core_vcpu_load_pr, 1359 + .vcpu_put = kvmppc_core_vcpu_put_pr, 1360 + .set_msr = kvmppc_set_msr_pr, 1361 + .vcpu_run = kvmppc_vcpu_run_pr, 1362 + .vcpu_create = kvmppc_core_vcpu_create_pr, 1363 + .vcpu_free = kvmppc_core_vcpu_free_pr, 1364 + .check_requests = kvmppc_core_check_requests_pr, 1365 + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, 1366 + .flush_memslot = kvmppc_core_flush_memslot_pr, 1367 + .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, 1368 + .commit_memory_region = kvmppc_core_commit_memory_region_pr, 1369 + .unmap_hva = kvm_unmap_hva_pr, 1370 + .unmap_hva_range = kvm_unmap_hva_range_pr, 1371 + .age_hva = kvm_age_hva_pr, 1372 + .test_age_hva = kvm_test_age_hva_pr, 1373 + .set_spte_hva = kvm_set_spte_hva_pr, 1374 + .mmu_destroy = kvmppc_mmu_destroy_pr, 1375 + .free_memslot = kvmppc_core_free_memslot_pr, 1376 + .create_memslot = kvmppc_core_create_memslot_pr, 1377 + .init_vm = kvmppc_core_init_vm_pr, 1378 + .destroy_vm = kvmppc_core_destroy_vm_pr, 1379 + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, 1380 + .emulate_op = kvmppc_core_emulate_op_pr, 1381 + .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, 1382 + .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, 1383 + .fast_vcpu_kick = kvm_vcpu_kick, 1384 + .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, 1385 + }; 1386 + 1387 + 1388 + int kvmppc_book3s_init_pr(void) 1511 1389 { 1512 1390 int r; 1513 1391 1514 - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1515 - THIS_MODULE); 1516 - 1517 - if (r) 1392 + r = kvmppc_core_check_processor_compat_pr(); 1393 + if (r < 0) 1518 1394 return r; 1519 1395 1520 - r = kvmppc_mmu_hpte_sysinit(); 1396 + kvm_ops_pr.owner = THIS_MODULE; 1397 + kvmppc_pr_ops = &kvm_ops_pr; 1521 1398 1399 + r = kvmppc_mmu_hpte_sysinit(); 1522 1400 return r; 1523 1401 } 1524 1402 1525 - static void kvmppc_book3s_exit(void) 1403 + void kvmppc_book3s_exit_pr(void) 1526 1404 { 1405 + kvmppc_pr_ops = NULL; 1527 1406 kvmppc_mmu_hpte_sysexit(); 1528 - kvm_exit(); 1529 1407 } 1530 1408 1531 - module_init(kvmppc_book3s_init); 1532 - module_exit(kvmppc_book3s_exit); 1409 + /* 1410 + * We only support separate modules for book3s 64 1411 + */ 1412 + #ifdef CONFIG_PPC_BOOK3S_64 1413 + 1414 + module_init(kvmppc_book3s_init_pr); 1415 + module_exit(kvmppc_book3s_exit_pr); 1416 + 1417 + MODULE_LICENSE("GPL"); 1418 + #endif

+37 -15

arch/powerpc/kvm/book3s_pr_papr.c

··· 21 21 #include <asm/kvm_ppc.h> 22 22 #include <asm/kvm_book3s.h> 23 23 24 + #define HPTE_SIZE 16 /* bytes per HPT entry */ 25 + 24 26 static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) 25 27 { 26 28 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); ··· 42 40 long pte_index = kvmppc_get_gpr(vcpu, 5); 43 41 unsigned long pteg[2 * 8]; 44 42 unsigned long pteg_addr, i, *hpte; 43 + long int ret; 45 44 45 + i = pte_index & 7; 46 46 pte_index &= ~7UL; 47 47 pteg_addr = get_pteg_addr(vcpu, pte_index); 48 48 49 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 49 50 copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); 50 51 hpte = pteg; 51 52 53 + ret = H_PTEG_FULL; 52 54 if (likely((flags & H_EXACT) == 0)) { 53 - pte_index &= ~7UL; 54 55 for (i = 0; ; ++i) { 55 56 if (i == 8) 56 - return H_PTEG_FULL; 57 + goto done; 57 58 if ((*hpte & HPTE_V_VALID) == 0) 58 59 break; 59 60 hpte += 2; 60 61 } 61 62 } else { 62 - i = kvmppc_get_gpr(vcpu, 5) & 7UL; 63 63 hpte += i * 2; 64 + if (*hpte & HPTE_V_VALID) 65 + goto done; 64 66 } 65 67 66 68 hpte[0] = kvmppc_get_gpr(vcpu, 6); 67 69 hpte[1] = kvmppc_get_gpr(vcpu, 7); 68 - copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); 69 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 70 + pteg_addr += i * HPTE_SIZE; 71 + copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); 70 72 kvmppc_set_gpr(vcpu, 4, pte_index | i); 73 + ret = H_SUCCESS; 74 + 75 + done: 76 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 77 + kvmppc_set_gpr(vcpu, 3, ret); 71 78 72 79 return EMULATE_DONE; 73 80 } ··· 88 77 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 89 78 unsigned long v = 0, pteg, rb; 90 79 unsigned long pte[2]; 80 + long int ret; 91 81 92 82 pteg = get_pteg_addr(vcpu, pte_index); 83 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 93 84 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 94 85 86 + ret = H_NOT_FOUND; 95 87 if ((pte[0] & HPTE_V_VALID) == 0 || 96 88 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || 97 - ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { 98 - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 99 - return EMULATE_DONE; 100 - } 89 + ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) 90 + goto done; 101 91 102 92 copy_to_user((void __user *)pteg, &v, sizeof(v)); 103 93 104 94 rb = compute_tlbie_rb(pte[0], pte[1], pte_index); 105 95 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 106 96 107 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 97 + ret = H_SUCCESS; 108 98 kvmppc_set_gpr(vcpu, 4, pte[0]); 109 99 kvmppc_set_gpr(vcpu, 5, pte[1]); 100 + 101 + done: 102 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 103 + kvmppc_set_gpr(vcpu, 3, ret); 110 104 111 105 return EMULATE_DONE; 112 106 } ··· 140 124 int paramnr = 4; 141 125 int ret = H_SUCCESS; 142 126 127 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 143 128 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { 144 129 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); 145 130 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); ··· 189 172 } 190 173 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); 191 174 } 175 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 192 176 kvmppc_set_gpr(vcpu, 3, ret); 193 177 194 178 return EMULATE_DONE; ··· 202 184 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 203 185 unsigned long rb, pteg, r, v; 204 186 unsigned long pte[2]; 187 + long int ret; 205 188 206 189 pteg = get_pteg_addr(vcpu, pte_index); 190 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 207 191 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 208 192 193 + ret = H_NOT_FOUND; 209 194 if ((pte[0] & HPTE_V_VALID) == 0 || 210 - ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { 211 - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 212 - return EMULATE_DONE; 213 - } 195 + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) 196 + goto done; 214 197 215 198 v = pte[0]; 216 199 r = pte[1]; ··· 226 207 rb = compute_tlbie_rb(v, r, pte_index); 227 208 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 228 209 copy_to_user((void __user *)pteg, pte, sizeof(pte)); 210 + ret = H_SUCCESS; 229 211 230 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 212 + done: 213 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 214 + kvmppc_set_gpr(vcpu, 3, ret); 231 215 232 216 return EMULATE_DONE; 233 217 }

+5 -27

arch/powerpc/kvm/book3s_rmhandlers.S

··· 38 38 39 39 #define FUNC(name) GLUE(.,name) 40 40 41 - .globl kvmppc_skip_interrupt 42 - kvmppc_skip_interrupt: 43 - /* 44 - * Here all GPRs are unchanged from when the interrupt happened 45 - * except for r13, which is saved in SPRG_SCRATCH0. 46 - */ 47 - mfspr r13, SPRN_SRR0 48 - addi r13, r13, 4 49 - mtspr SPRN_SRR0, r13 50 - GET_SCRATCH0(r13) 51 - rfid 52 - b . 53 - 54 - .globl kvmppc_skip_Hinterrupt 55 - kvmppc_skip_Hinterrupt: 56 - /* 57 - * Here all GPRs are unchanged from when the interrupt happened 58 - * except for r13, which is saved in SPRG_SCRATCH0. 59 - */ 60 - mfspr r13, SPRN_HSRR0 61 - addi r13, r13, 4 62 - mtspr SPRN_HSRR0, r13 63 - GET_SCRATCH0(r13) 64 - hrfid 65 - b . 66 - 67 41 #elif defined(CONFIG_PPC_BOOK3S_32) 68 42 69 43 #define FUNC(name) name ··· 153 179 154 180 li r6, MSR_IR | MSR_DR 155 181 andc r6, r5, r6 /* Clear DR and IR in MSR value */ 182 + #ifdef CONFIG_PPC_BOOK3S_32 156 183 /* 157 184 * Set EE in HOST_MSR so that it's enabled when we get into our 158 - * C exit handler function 185 + * C exit handler function. On 64-bit we delay enabling 186 + * interrupts until we have finished transferring stuff 187 + * to or from the PACA. 159 188 */ 160 189 ori r5, r5, MSR_EE 190 + #endif 161 191 mtsrr0 r7 162 192 mtsrr1 r6 163 193 RFI

+1

arch/powerpc/kvm/book3s_rtas.c

··· 260 260 */ 261 261 return rc; 262 262 } 263 + EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall); 263 264 264 265 void kvmppc_rtas_tokens_free(struct kvm *kvm) 265 266 {

+2 -2

arch/powerpc/kvm/book3s_segment.S

··· 161 161 .global kvmppc_handler_trampoline_exit 162 162 kvmppc_handler_trampoline_exit: 163 163 164 - .global kvmppc_interrupt 165 - kvmppc_interrupt: 164 + .global kvmppc_interrupt_pr 165 + kvmppc_interrupt_pr: 166 166 167 167 /* Register usage at this point: 168 168 *

+4 -3

arch/powerpc/kvm/book3s_xics.c

··· 818 818 } 819 819 820 820 /* Check for real mode returning too hard */ 821 - if (xics->real_mode) 821 + if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm)) 822 822 return kvmppc_xics_rm_complete(vcpu, req); 823 823 824 824 switch (req) { ··· 840 840 841 841 return rc; 842 842 } 843 + EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); 843 844 844 845 845 846 /* -- Initialisation code etc. -- */ ··· 1251 1250 1252 1251 xics_debugfs_init(xics); 1253 1252 1254 - #ifdef CONFIG_KVM_BOOK3S_64_HV 1253 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1255 1254 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1256 1255 /* Enable real mode support */ 1257 1256 xics->real_mode = ENABLE_REALMODE; 1258 1257 xics->real_mode_dbg = DEBUG_REALMODE; 1259 1258 } 1260 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1259 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1261 1260 1262 1261 return 0; 1263 1262 }

+305 -44

arch/powerpc/kvm/booke.c

··· 40 40 41 41 #include "timing.h" 42 42 #include "booke.h" 43 - #include "trace.h" 43 + 44 + #define CREATE_TRACE_POINTS 45 + #include "trace_booke.h" 44 46 45 47 unsigned long kvmppc_booke_handlers; 46 48 ··· 135 133 #endif 136 134 } 137 135 136 + static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) 137 + { 138 + /* Synchronize guest's desire to get debug interrupts into shadow MSR */ 139 + #ifndef CONFIG_KVM_BOOKE_HV 140 + vcpu->arch.shadow_msr &= ~MSR_DE; 141 + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; 142 + #endif 143 + 144 + /* Force enable debug interrupts when user space wants to debug */ 145 + if (vcpu->guest_debug) { 146 + #ifdef CONFIG_KVM_BOOKE_HV 147 + /* 148 + * Since there is no shadow MSR, sync MSR_DE into the guest 149 + * visible MSR. 150 + */ 151 + vcpu->arch.shared->msr |= MSR_DE; 152 + #else 153 + vcpu->arch.shadow_msr |= MSR_DE; 154 + vcpu->arch.shared->msr &= ~MSR_DE; 155 + #endif 156 + } 157 + } 158 + 138 159 /* 139 160 * Helper function for "full" MSR writes. No need to call this if only 140 161 * EE/CE/ME/DE/RI are changing. ··· 175 150 kvmppc_mmu_msr_notify(vcpu, old_msr); 176 151 kvmppc_vcpu_sync_spe(vcpu); 177 152 kvmppc_vcpu_sync_fpu(vcpu); 153 + kvmppc_vcpu_sync_debug(vcpu); 178 154 } 179 155 180 156 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, ··· 681 655 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 682 656 { 683 657 int ret, s; 658 + struct thread_struct thread; 684 659 #ifdef CONFIG_PPC_FPU 685 660 struct thread_fp_state fp; 686 661 int fpexc_mode; ··· 722 695 kvmppc_load_guest_fp(vcpu); 723 696 #endif 724 697 698 + /* Switch to guest debug context */ 699 + thread.debug = vcpu->arch.shadow_dbg_reg; 700 + switch_booke_debug_regs(&thread); 701 + thread.debug = current->thread.debug; 702 + current->thread.debug = vcpu->arch.shadow_dbg_reg; 703 + 725 704 kvmppc_fix_ee_before_entry(); 726 705 727 706 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 728 707 729 708 /* No need for kvm_guest_exit. It's done in handle_exit. 730 709 We also get here with interrupts enabled. */ 710 + 711 + /* Switch back to user space debug context */ 712 + switch_booke_debug_regs(&thread); 713 + current->thread.debug = thread.debug; 731 714 732 715 #ifdef CONFIG_PPC_FPU 733 716 kvmppc_save_guest_fp(vcpu); ··· 792 755 default: 793 756 BUG(); 794 757 } 758 + } 759 + 760 + static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) 761 + { 762 + struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); 763 + u32 dbsr = vcpu->arch.dbsr; 764 + 765 + run->debug.arch.status = 0; 766 + run->debug.arch.address = vcpu->arch.pc; 767 + 768 + if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { 769 + run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; 770 + } else { 771 + if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) 772 + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; 773 + else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) 774 + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; 775 + if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) 776 + run->debug.arch.address = dbg_reg->dac1; 777 + else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) 778 + run->debug.arch.address = dbg_reg->dac2; 779 + } 780 + 781 + return RESUME_HOST; 795 782 } 796 783 797 784 static void kvmppc_fill_pt_regs(struct pt_regs *regs) ··· 877 816 break; 878 817 case BOOKE_INTERRUPT_CRITICAL: 879 818 unknown_exception(&regs); 819 + break; 820 + case BOOKE_INTERRUPT_DEBUG: 821 + /* Save DBSR before preemption is enabled */ 822 + vcpu->arch.dbsr = mfspr(SPRN_DBSR); 823 + kvmppc_clear_dbsr(); 880 824 break; 881 825 } 882 826 } ··· 1200 1134 } 1201 1135 1202 1136 case BOOKE_INTERRUPT_DEBUG: { 1203 - u32 dbsr; 1204 - 1205 - vcpu->arch.pc = mfspr(SPRN_CSRR0); 1206 - 1207 - /* clear IAC events in DBSR register */ 1208 - dbsr = mfspr(SPRN_DBSR); 1209 - dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; 1210 - mtspr(SPRN_DBSR, dbsr); 1211 - 1212 - run->exit_reason = KVM_EXIT_DEBUG; 1137 + r = kvmppc_handle_debug(run, vcpu); 1138 + if (r == RESUME_HOST) 1139 + run->exit_reason = KVM_EXIT_DEBUG; 1213 1140 kvmppc_account_exit(vcpu, DEBUG_EXITS); 1214 - r = RESUME_HOST; 1215 1141 break; 1216 1142 } 1217 1143 ··· 1254 1196 kvmppc_set_msr(vcpu, 0); 1255 1197 1256 1198 #ifndef CONFIG_KVM_BOOKE_HV 1257 - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; 1199 + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; 1258 1200 vcpu->arch.shadow_pid = 1; 1259 1201 vcpu->arch.shared->msr = 0; 1260 1202 #endif ··· 1416 1358 return 0; 1417 1359 } 1418 1360 1419 - void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1361 + int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1420 1362 { 1421 1363 sregs->u.e.features |= KVM_SREGS_E_IVOR; 1422 1364 ··· 1436 1378 sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 1437 1379 sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 1438 1380 sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 1381 + return 0; 1439 1382 } 1440 1383 1441 1384 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ··· 1471 1412 1472 1413 get_sregs_base(vcpu, sregs); 1473 1414 get_sregs_arch206(vcpu, sregs); 1474 - kvmppc_core_get_sregs(vcpu, sregs); 1475 - return 0; 1415 + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); 1476 1416 } 1477 1417 1478 1418 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ··· 1490 1432 if (ret < 0) 1491 1433 return ret; 1492 1434 1493 - return kvmppc_core_set_sregs(vcpu, sregs); 1435 + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); 1494 1436 } 1495 1437 1496 1438 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) ··· 1498 1440 int r = 0; 1499 1441 union kvmppc_one_reg val; 1500 1442 int size; 1501 - long int i; 1502 1443 1503 1444 size = one_reg_size(reg->id); 1504 1445 if (size > sizeof(val)) ··· 1505 1448 1506 1449 switch (reg->id) { 1507 1450 case KVM_REG_PPC_IAC1: 1508 - case KVM_REG_PPC_IAC2: 1509 - case KVM_REG_PPC_IAC3: 1510 - case KVM_REG_PPC_IAC4: 1511 - i = reg->id - KVM_REG_PPC_IAC1; 1512 - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); 1451 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); 1513 1452 break; 1453 + case KVM_REG_PPC_IAC2: 1454 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); 1455 + break; 1456 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1457 + case KVM_REG_PPC_IAC3: 1458 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); 1459 + break; 1460 + case KVM_REG_PPC_IAC4: 1461 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); 1462 + break; 1463 + #endif 1514 1464 case KVM_REG_PPC_DAC1: 1465 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); 1466 + break; 1515 1467 case KVM_REG_PPC_DAC2: 1516 - i = reg->id - KVM_REG_PPC_DAC1; 1517 - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); 1468 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); 1518 1469 break; 1519 1470 case KVM_REG_PPC_EPR: { 1520 1471 u32 epr = get_guest_epr(vcpu); ··· 1541 1476 val = get_reg_val(reg->id, vcpu->arch.tsr); 1542 1477 break; 1543 1478 case KVM_REG_PPC_DEBUG_INST: 1544 - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); 1479 + val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); 1480 + break; 1481 + case KVM_REG_PPC_VRSAVE: 1482 + val = get_reg_val(reg->id, vcpu->arch.vrsave); 1545 1483 break; 1546 1484 default: 1547 - r = kvmppc_get_one_reg(vcpu, reg->id, &val); 1485 + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); 1548 1486 break; 1549 1487 } 1550 1488 ··· 1565 1497 int r = 0; 1566 1498 union kvmppc_one_reg val; 1567 1499 int size; 1568 - long int i; 1569 1500 1570 1501 size = one_reg_size(reg->id); 1571 1502 if (size > sizeof(val)) ··· 1575 1508 1576 1509 switch (reg->id) { 1577 1510 case KVM_REG_PPC_IAC1: 1578 - case KVM_REG_PPC_IAC2: 1579 - case KVM_REG_PPC_IAC3: 1580 - case KVM_REG_PPC_IAC4: 1581 - i = reg->id - KVM_REG_PPC_IAC1; 1582 - vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); 1511 + vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); 1583 1512 break; 1513 + case KVM_REG_PPC_IAC2: 1514 + vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); 1515 + break; 1516 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1517 + case KVM_REG_PPC_IAC3: 1518 + vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); 1519 + break; 1520 + case KVM_REG_PPC_IAC4: 1521 + vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); 1522 + break; 1523 + #endif 1584 1524 case KVM_REG_PPC_DAC1: 1525 + vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); 1526 + break; 1585 1527 case KVM_REG_PPC_DAC2: 1586 - i = reg->id - KVM_REG_PPC_DAC1; 1587 - vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); 1528 + vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); 1588 1529 break; 1589 1530 case KVM_REG_PPC_EPR: { 1590 1531 u32 new_epr = set_reg_val(reg->id, val); ··· 1626 1551 kvmppc_set_tcr(vcpu, tcr); 1627 1552 break; 1628 1553 } 1554 + case KVM_REG_PPC_VRSAVE: 1555 + vcpu->arch.vrsave = set_reg_val(reg->id, val); 1556 + break; 1629 1557 default: 1630 - r = kvmppc_set_one_reg(vcpu, reg->id, &val); 1558 + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); 1631 1559 break; 1632 1560 } 1633 1561 1634 1562 return r; 1635 - } 1636 - 1637 - int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1638 - struct kvm_guest_debug *dbg) 1639 - { 1640 - return -EINVAL; 1641 1563 } 1642 1564 1643 1565 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ··· 1661 1589 return -ENOTSUPP; 1662 1590 } 1663 1591 1664 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1592 + void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1665 1593 struct kvm_memory_slot *dont) 1666 1594 { 1667 1595 } 1668 1596 1669 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1597 + int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1670 1598 unsigned long npages) 1671 1599 { 1672 1600 return 0; ··· 1742 1670 kvmppc_set_tsr_bits(vcpu, TSR_DIS); 1743 1671 } 1744 1672 1673 + static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg, 1674 + uint64_t addr, int index) 1675 + { 1676 + switch (index) { 1677 + case 0: 1678 + dbg_reg->dbcr0 |= DBCR0_IAC1; 1679 + dbg_reg->iac1 = addr; 1680 + break; 1681 + case 1: 1682 + dbg_reg->dbcr0 |= DBCR0_IAC2; 1683 + dbg_reg->iac2 = addr; 1684 + break; 1685 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1686 + case 2: 1687 + dbg_reg->dbcr0 |= DBCR0_IAC3; 1688 + dbg_reg->iac3 = addr; 1689 + break; 1690 + case 3: 1691 + dbg_reg->dbcr0 |= DBCR0_IAC4; 1692 + dbg_reg->iac4 = addr; 1693 + break; 1694 + #endif 1695 + default: 1696 + return -EINVAL; 1697 + } 1698 + 1699 + dbg_reg->dbcr0 |= DBCR0_IDM; 1700 + return 0; 1701 + } 1702 + 1703 + static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr, 1704 + int type, int index) 1705 + { 1706 + switch (index) { 1707 + case 0: 1708 + if (type & KVMPPC_DEBUG_WATCH_READ) 1709 + dbg_reg->dbcr0 |= DBCR0_DAC1R; 1710 + if (type & KVMPPC_DEBUG_WATCH_WRITE) 1711 + dbg_reg->dbcr0 |= DBCR0_DAC1W; 1712 + dbg_reg->dac1 = addr; 1713 + break; 1714 + case 1: 1715 + if (type & KVMPPC_DEBUG_WATCH_READ) 1716 + dbg_reg->dbcr0 |= DBCR0_DAC2R; 1717 + if (type & KVMPPC_DEBUG_WATCH_WRITE) 1718 + dbg_reg->dbcr0 |= DBCR0_DAC2W; 1719 + dbg_reg->dac2 = addr; 1720 + break; 1721 + default: 1722 + return -EINVAL; 1723 + } 1724 + 1725 + dbg_reg->dbcr0 |= DBCR0_IDM; 1726 + return 0; 1727 + } 1728 + void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) 1729 + { 1730 + /* XXX: Add similar MSR protection for BookE-PR */ 1731 + #ifdef CONFIG_KVM_BOOKE_HV 1732 + BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP)); 1733 + if (set) { 1734 + if (prot_bitmap & MSR_UCLE) 1735 + vcpu->arch.shadow_msrp |= MSRP_UCLEP; 1736 + if (prot_bitmap & MSR_DE) 1737 + vcpu->arch.shadow_msrp |= MSRP_DEP; 1738 + if (prot_bitmap & MSR_PMM) 1739 + vcpu->arch.shadow_msrp |= MSRP_PMMP; 1740 + } else { 1741 + if (prot_bitmap & MSR_UCLE) 1742 + vcpu->arch.shadow_msrp &= ~MSRP_UCLEP; 1743 + if (prot_bitmap & MSR_DE) 1744 + vcpu->arch.shadow_msrp &= ~MSRP_DEP; 1745 + if (prot_bitmap & MSR_PMM) 1746 + vcpu->arch.shadow_msrp &= ~MSRP_PMMP; 1747 + } 1748 + #endif 1749 + } 1750 + 1751 + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1752 + struct kvm_guest_debug *dbg) 1753 + { 1754 + struct debug_reg *dbg_reg; 1755 + int n, b = 0, w = 0; 1756 + 1757 + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { 1758 + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1759 + vcpu->guest_debug = 0; 1760 + kvm_guest_protect_msr(vcpu, MSR_DE, false); 1761 + return 0; 1762 + } 1763 + 1764 + kvm_guest_protect_msr(vcpu, MSR_DE, true); 1765 + vcpu->guest_debug = dbg->control; 1766 + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1767 + /* Set DBCR0_EDM in guest visible DBCR0 register. */ 1768 + vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; 1769 + 1770 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 1771 + vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; 1772 + 1773 + /* Code below handles only HW breakpoints */ 1774 + dbg_reg = &(vcpu->arch.shadow_dbg_reg); 1775 + 1776 + #ifdef CONFIG_KVM_BOOKE_HV 1777 + /* 1778 + * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1 1779 + * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0 1780 + */ 1781 + dbg_reg->dbcr1 = 0; 1782 + dbg_reg->dbcr2 = 0; 1783 + #else 1784 + /* 1785 + * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1 1786 + * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR 1787 + * is set. 1788 + */ 1789 + dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | 1790 + DBCR1_IAC4US; 1791 + dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; 1792 + #endif 1793 + 1794 + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1795 + return 0; 1796 + 1797 + for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { 1798 + uint64_t addr = dbg->arch.bp[n].addr; 1799 + uint32_t type = dbg->arch.bp[n].type; 1800 + 1801 + if (type == KVMPPC_DEBUG_NONE) 1802 + continue; 1803 + 1804 + if (type & !(KVMPPC_DEBUG_WATCH_READ | 1805 + KVMPPC_DEBUG_WATCH_WRITE | 1806 + KVMPPC_DEBUG_BREAKPOINT)) 1807 + return -EINVAL; 1808 + 1809 + if (type & KVMPPC_DEBUG_BREAKPOINT) { 1810 + /* Setting H/W breakpoint */ 1811 + if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) 1812 + return -EINVAL; 1813 + } else { 1814 + /* Setting H/W watchpoint */ 1815 + if (kvmppc_booke_add_watchpoint(dbg_reg, addr, 1816 + type, w++)) 1817 + return -EINVAL; 1818 + } 1819 + } 1820 + 1821 + return 0; 1822 + } 1823 + 1745 1824 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1746 1825 { 1747 1826 vcpu->cpu = smp_processor_id(); ··· 1903 1680 { 1904 1681 current->thread.kvm_vcpu = NULL; 1905 1682 vcpu->cpu = -1; 1683 + 1684 + /* Clear pending debug event in DBSR */ 1685 + kvmppc_clear_dbsr(); 1686 + } 1687 + 1688 + void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 1689 + { 1690 + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); 1691 + } 1692 + 1693 + int kvmppc_core_init_vm(struct kvm *kvm) 1694 + { 1695 + return kvm->arch.kvm_ops->init_vm(kvm); 1696 + } 1697 + 1698 + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1699 + { 1700 + return kvm->arch.kvm_ops->vcpu_create(kvm, id); 1701 + } 1702 + 1703 + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1704 + { 1705 + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); 1706 + } 1707 + 1708 + void kvmppc_core_destroy_vm(struct kvm *kvm) 1709 + { 1710 + kvm->arch.kvm_ops->destroy_vm(kvm); 1711 + } 1712 + 1713 + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1714 + { 1715 + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); 1716 + } 1717 + 1718 + void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 1719 + { 1720 + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); 1906 1721 } 1907 1722 1908 1723 int __init kvmppc_booke_init(void)

+29

arch/powerpc/kvm/booke.h

··· 99 99 100 100 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); 101 101 102 + extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); 103 + extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, 104 + unsigned int inst, int *advance); 105 + extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, 106 + ulong spr_val); 107 + extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, 108 + ulong *spr_val); 109 + extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); 110 + extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, 111 + struct kvm_vcpu *vcpu, 112 + unsigned int inst, int *advance); 113 + extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, 114 + ulong spr_val); 115 + extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, 116 + ulong *spr_val); 117 + extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); 118 + extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, 119 + struct kvm_vcpu *vcpu, 120 + unsigned int inst, int *advance); 121 + extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, 122 + ulong spr_val); 123 + extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, 124 + ulong *spr_val); 125 + 102 126 /* 103 127 * Load up guest vcpu FP state if it's needed. 104 128 * It also set the MSR_FP in thread so that host know ··· 152 128 if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) 153 129 giveup_fpu(current); 154 130 #endif 131 + } 132 + 133 + static inline void kvmppc_clear_dbsr(void) 134 + { 135 + mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); 155 136 } 156 137 #endif /* __KVM_BOOKE_H__ */

+44 -15

arch/powerpc/kvm/e500.c

··· 305 305 { 306 306 } 307 307 308 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 308 + static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) 309 309 { 310 310 kvmppc_booke_vcpu_load(vcpu, cpu); 311 311 ··· 313 313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); 314 314 } 315 315 316 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 316 + static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu) 317 317 { 318 318 #ifdef CONFIG_SPE 319 319 if (vcpu->arch.shadow_msr & MSR_SPE) ··· 367 367 return 0; 368 368 } 369 369 370 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 370 + static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu, 371 + struct kvm_sregs *sregs) 371 372 { 372 373 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 373 374 ··· 389 388 390 389 kvmppc_get_sregs_ivor(vcpu, sregs); 391 390 kvmppc_get_sregs_e500_tlb(vcpu, sregs); 391 + return 0; 392 392 } 393 393 394 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 394 + static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu, 395 + struct kvm_sregs *sregs) 395 396 { 396 397 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 397 398 int ret; ··· 428 425 return kvmppc_set_sregs_ivor(vcpu, sregs); 429 426 } 430 427 431 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 432 - union kvmppc_one_reg *val) 428 + static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, 429 + union kvmppc_one_reg *val) 433 430 { 434 431 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 435 432 return r; 436 433 } 437 434 438 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 439 - union kvmppc_one_reg *val) 435 + static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, 436 + union kvmppc_one_reg *val) 440 437 { 441 438 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 442 439 return r; 443 440 } 444 441 445 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 442 + static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, 443 + unsigned int id) 446 444 { 447 445 struct kvmppc_vcpu_e500 *vcpu_e500; 448 446 struct kvm_vcpu *vcpu; ··· 485 481 return ERR_PTR(err); 486 482 } 487 483 488 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 484 + static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) 489 485 { 490 486 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 491 487 ··· 496 492 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 497 493 } 498 494 499 - int kvmppc_core_init_vm(struct kvm *kvm) 495 + static int kvmppc_core_init_vm_e500(struct kvm *kvm) 500 496 { 501 497 return 0; 502 498 } 503 499 504 - void kvmppc_core_destroy_vm(struct kvm *kvm) 500 + static void kvmppc_core_destroy_vm_e500(struct kvm *kvm) 505 501 { 506 502 } 503 + 504 + static struct kvmppc_ops kvm_ops_e500 = { 505 + .get_sregs = kvmppc_core_get_sregs_e500, 506 + .set_sregs = kvmppc_core_set_sregs_e500, 507 + .get_one_reg = kvmppc_get_one_reg_e500, 508 + .set_one_reg = kvmppc_set_one_reg_e500, 509 + .vcpu_load = kvmppc_core_vcpu_load_e500, 510 + .vcpu_put = kvmppc_core_vcpu_put_e500, 511 + .vcpu_create = kvmppc_core_vcpu_create_e500, 512 + .vcpu_free = kvmppc_core_vcpu_free_e500, 513 + .mmu_destroy = kvmppc_mmu_destroy_e500, 514 + .init_vm = kvmppc_core_init_vm_e500, 515 + .destroy_vm = kvmppc_core_destroy_vm_e500, 516 + .emulate_op = kvmppc_core_emulate_op_e500, 517 + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, 518 + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, 519 + }; 507 520 508 521 static int __init kvmppc_e500_init(void) 509 522 { ··· 533 512 534 513 r = kvmppc_core_check_processor_compat(); 535 514 if (r) 536 - return r; 515 + goto err_out; 537 516 538 517 r = kvmppc_booke_init(); 539 518 if (r) 540 - return r; 519 + goto err_out; 541 520 542 521 /* copy extra E500 exception handlers */ 543 522 ivor[0] = mfspr(SPRN_IVOR32); ··· 555 534 flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + 556 535 ivor[max_ivor] + handler_len); 557 536 558 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 537 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 538 + if (r) 539 + goto err_out; 540 + kvm_ops_e500.owner = THIS_MODULE; 541 + kvmppc_pr_ops = &kvm_ops_e500; 542 + 543 + err_out: 544 + return r; 559 545 } 560 546 561 547 static void __exit kvmppc_e500_exit(void) 562 548 { 549 + kvmppc_pr_ops = NULL; 563 550 kvmppc_booke_exit(); 564 551 } 565 552

+1 -1

arch/powerpc/kvm/e500.h

+30 -4

arch/powerpc/kvm/e500_emulate.c

··· 26 26 #define XOP_TLBRE 946 27 27 #define XOP_TLBWE 978 28 28 #define XOP_TLBILX 18 29 + #define XOP_EHPRIV 270 29 30 30 31 #ifdef CONFIG_KVM_E500MC 31 32 static int dbell2prio(ulong param) ··· 83 82 } 84 83 #endif 85 84 86 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 87 - unsigned int inst, int *advance) 85 + static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, 86 + unsigned int inst, int *advance) 87 + { 88 + int emulated = EMULATE_DONE; 89 + 90 + switch (get_oc(inst)) { 91 + case EHPRIV_OC_DEBUG: 92 + run->exit_reason = KVM_EXIT_DEBUG; 93 + run->debug.arch.address = vcpu->arch.pc; 94 + run->debug.arch.status = 0; 95 + kvmppc_account_exit(vcpu, DEBUG_EXITS); 96 + emulated = EMULATE_EXIT_USER; 97 + *advance = 0; 98 + break; 99 + default: 100 + emulated = EMULATE_FAIL; 101 + } 102 + return emulated; 103 + } 104 + 105 + int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, 106 + unsigned int inst, int *advance) 88 107 { 89 108 int emulated = EMULATE_DONE; 90 109 int ra = get_ra(inst); ··· 151 130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); 152 131 break; 153 132 133 + case XOP_EHPRIV: 134 + emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, 135 + advance); 136 + break; 137 + 154 138 default: 155 139 emulated = EMULATE_FAIL; 156 140 } ··· 172 146 return emulated; 173 147 } 174 148 175 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 149 + int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 176 150 { 177 151 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 178 152 int emulated = EMULATE_DONE; ··· 263 237 return emulated; 264 238 } 265 239 266 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 240 + int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 267 241 { 268 242 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 269 243 int emulated = EMULATE_DONE;

+2 -2

arch/powerpc/kvm/e500_mmu.c

··· 32 32 #include <asm/kvm_ppc.h> 33 33 34 34 #include "e500.h" 35 - #include "trace.h" 35 + #include "trace_booke.h" 36 36 #include "timing.h" 37 37 #include "e500_mmu_host.h" 38 38 ··· 536 536 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 537 537 } 538 538 539 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 539 + void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu) 540 540 { 541 541 } 542 542

+5 -1

arch/powerpc/kvm/e500_mmu_host.c

··· 32 32 #include <asm/kvm_ppc.h> 33 33 34 34 #include "e500.h" 35 - #include "trace.h" 36 35 #include "timing.h" 37 36 #include "e500_mmu_host.h" 37 + 38 + #include "trace_booke.h" 38 39 39 40 #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) 40 41 ··· 253 252 { 254 253 ref->pfn = pfn; 255 254 ref->flags |= E500_TLB_VALID; 255 + 256 + /* Mark the page accessed */ 257 + kvm_set_pfn_accessed(pfn); 256 258 257 259 if (tlbe_is_writable(gtlbe)) 258 260 kvm_set_pfn_dirty(pfn);

+43 -15

arch/powerpc/kvm/e500mc.c

··· 110 110 111 111 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); 112 112 113 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 113 + static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) 114 114 { 115 115 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 116 116 ··· 147 147 kvmppc_load_guest_fp(vcpu); 148 148 } 149 149 150 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 150 + static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) 151 151 { 152 152 vcpu->arch.eplc = mfspr(SPRN_EPLC); 153 153 vcpu->arch.epsc = mfspr(SPRN_EPSC); ··· 204 204 return 0; 205 205 } 206 206 207 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 207 + static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu, 208 + struct kvm_sregs *sregs) 208 209 { 209 210 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 210 211 ··· 225 224 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; 226 225 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; 227 226 228 - kvmppc_get_sregs_ivor(vcpu, sregs); 227 + return kvmppc_get_sregs_ivor(vcpu, sregs); 229 228 } 230 229 231 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 230 + static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, 231 + struct kvm_sregs *sregs) 232 232 { 233 233 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 234 234 int ret; ··· 262 260 return kvmppc_set_sregs_ivor(vcpu, sregs); 263 261 } 264 262 265 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 266 - union kvmppc_one_reg *val) 263 + static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, 264 + union kvmppc_one_reg *val) 267 265 { 268 266 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 269 267 return r; 270 268 } 271 269 272 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 273 - union kvmppc_one_reg *val) 270 + static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, 271 + union kvmppc_one_reg *val) 274 272 { 275 273 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); 276 274 return r; 277 275 } 278 276 279 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 277 + static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, 278 + unsigned int id) 280 279 { 281 280 struct kvmppc_vcpu_e500 *vcpu_e500; 282 281 struct kvm_vcpu *vcpu; ··· 318 315 return ERR_PTR(err); 319 316 } 320 317 321 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 318 + static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) 322 319 { 323 320 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 324 321 ··· 328 325 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 329 326 } 330 327 331 - int kvmppc_core_init_vm(struct kvm *kvm) 328 + static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) 332 329 { 333 330 int lpid; 334 331 ··· 340 337 return 0; 341 338 } 342 339 343 - void kvmppc_core_destroy_vm(struct kvm *kvm) 340 + static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) 344 341 { 345 342 kvmppc_free_lpid(kvm->arch.lpid); 346 343 } 344 + 345 + static struct kvmppc_ops kvm_ops_e500mc = { 346 + .get_sregs = kvmppc_core_get_sregs_e500mc, 347 + .set_sregs = kvmppc_core_set_sregs_e500mc, 348 + .get_one_reg = kvmppc_get_one_reg_e500mc, 349 + .set_one_reg = kvmppc_set_one_reg_e500mc, 350 + .vcpu_load = kvmppc_core_vcpu_load_e500mc, 351 + .vcpu_put = kvmppc_core_vcpu_put_e500mc, 352 + .vcpu_create = kvmppc_core_vcpu_create_e500mc, 353 + .vcpu_free = kvmppc_core_vcpu_free_e500mc, 354 + .mmu_destroy = kvmppc_mmu_destroy_e500, 355 + .init_vm = kvmppc_core_init_vm_e500mc, 356 + .destroy_vm = kvmppc_core_destroy_vm_e500mc, 357 + .emulate_op = kvmppc_core_emulate_op_e500, 358 + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, 359 + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, 360 + }; 347 361 348 362 static int __init kvmppc_e500mc_init(void) 349 363 { ··· 368 348 369 349 r = kvmppc_booke_init(); 370 350 if (r) 371 - return r; 351 + goto err_out; 372 352 373 353 kvmppc_init_lpid(64); 374 354 kvmppc_claim_lpid(0); /* host */ 375 355 376 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 356 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 357 + if (r) 358 + goto err_out; 359 + kvm_ops_e500mc.owner = THIS_MODULE; 360 + kvmppc_pr_ops = &kvm_ops_e500mc; 361 + 362 + err_out: 363 + return r; 377 364 } 378 365 379 366 static void __exit kvmppc_e500mc_exit(void) 380 367 { 368 + kvmppc_pr_ops = NULL; 381 369 kvmppc_booke_exit(); 382 370 } 383 371

+7 -5

arch/powerpc/kvm/emulate.c

··· 130 130 case SPRN_PIR: break; 131 131 132 132 default: 133 - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, 134 - spr_val); 133 + emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, 134 + spr_val); 135 135 if (emulated == EMULATE_FAIL) 136 136 printk(KERN_INFO "mtspr: unknown spr " 137 137 "0x%x\n", sprn); ··· 191 191 spr_val = kvmppc_get_dec(vcpu, get_tb()); 192 192 break; 193 193 default: 194 - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, 195 - &spr_val); 194 + emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, 195 + &spr_val); 196 196 if (unlikely(emulated == EMULATE_FAIL)) { 197 197 printk(KERN_INFO "mfspr: unknown spr " 198 198 "0x%x\n", sprn); ··· 464 464 } 465 465 466 466 if (emulated == EMULATE_FAIL) { 467 - emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 467 + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, 468 + &advance); 468 469 if (emulated == EMULATE_AGAIN) { 469 470 advance = 0; 470 471 } else if (emulated == EMULATE_FAIL) { ··· 484 483 485 484 return emulated; 486 485 } 486 + EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);

+94 -75

arch/powerpc/kvm/powerpc.c

··· 26 26 #include <linux/fs.h> 27 27 #include <linux/slab.h> 28 28 #include <linux/file.h> 29 + #include <linux/module.h> 29 30 #include <asm/cputable.h> 30 31 #include <asm/uaccess.h> 31 32 #include <asm/kvm_ppc.h> ··· 40 39 #define CREATE_TRACE_POINTS 41 40 #include "trace.h" 42 41 42 + struct kvmppc_ops *kvmppc_hv_ops; 43 + EXPORT_SYMBOL_GPL(kvmppc_hv_ops); 44 + struct kvmppc_ops *kvmppc_pr_ops; 45 + EXPORT_SYMBOL_GPL(kvmppc_pr_ops); 46 + 47 + 43 48 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 44 49 { 45 50 return !!(v->arch.pending_exceptions) || ··· 57 50 return 1; 58 51 } 59 52 60 - #ifndef CONFIG_KVM_BOOK3S_64_HV 61 53 /* 62 54 * Common checks before entering the guest world. Call with interrupts 63 55 * disabled. ··· 131 125 132 126 return r; 133 127 } 134 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 128 + EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); 135 129 136 130 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 137 131 { ··· 185 179 186 180 return r; 187 181 } 182 + EXPORT_SYMBOL_GPL(kvmppc_kvm_pv); 188 183 189 184 int kvmppc_sanity_check(struct kvm_vcpu *vcpu) 190 185 { ··· 199 192 if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) 200 193 goto out; 201 194 202 - #ifdef CONFIG_KVM_BOOK3S_64_HV 203 195 /* HV KVM can only do PAPR mode for now */ 204 - if (!vcpu->arch.papr_enabled) 196 + if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm)) 205 197 goto out; 206 - #endif 207 198 208 199 #ifdef CONFIG_KVM_BOOKE_HV 209 200 if (!cpu_has_feature(CPU_FTR_EMB_HV)) ··· 214 209 vcpu->arch.sane = r; 215 210 return r ? 0 : -EINVAL; 216 211 } 212 + EXPORT_SYMBOL_GPL(kvmppc_sanity_check); 217 213 218 214 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) 219 215 { ··· 249 243 250 244 return r; 251 245 } 246 + EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); 252 247 253 248 int kvm_arch_hardware_enable(void *garbage) 254 249 { ··· 276 269 277 270 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 278 271 { 279 - if (type) 280 - return -EINVAL; 272 + struct kvmppc_ops *kvm_ops = NULL; 273 + /* 274 + * if we have both HV and PR enabled, default is HV 275 + */ 276 + if (type == 0) { 277 + if (kvmppc_hv_ops) 278 + kvm_ops = kvmppc_hv_ops; 279 + else 280 + kvm_ops = kvmppc_pr_ops; 281 + if (!kvm_ops) 282 + goto err_out; 283 + } else if (type == KVM_VM_PPC_HV) { 284 + if (!kvmppc_hv_ops) 285 + goto err_out; 286 + kvm_ops = kvmppc_hv_ops; 287 + } else if (type == KVM_VM_PPC_PR) { 288 + if (!kvmppc_pr_ops) 289 + goto err_out; 290 + kvm_ops = kvmppc_pr_ops; 291 + } else 292 + goto err_out; 281 293 294 + if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) 295 + return -ENOENT; 296 + 297 + kvm->arch.kvm_ops = kvm_ops; 282 298 return kvmppc_core_init_vm(kvm); 299 + err_out: 300 + return -EINVAL; 283 301 } 284 302 285 303 void kvm_arch_destroy_vm(struct kvm *kvm) ··· 324 292 kvmppc_core_destroy_vm(kvm); 325 293 326 294 mutex_unlock(&kvm->lock); 295 + 296 + /* drop the module reference */ 297 + module_put(kvm->arch.kvm_ops->owner); 327 298 } 328 299 329 300 void kvm_arch_sync_events(struct kvm *kvm) ··· 336 301 int kvm_dev_ioctl_check_extension(long ext) 337 302 { 338 303 int r; 304 + /* FIXME!! 305 + * Should some of this be vm ioctl ? is it possible now ? 306 + */ 307 + int hv_enabled = kvmppc_hv_ops ? 1 : 0; 339 308 340 309 switch (ext) { 341 310 #ifdef CONFIG_BOOKE ··· 359 320 case KVM_CAP_DEVICE_CTRL: 360 321 r = 1; 361 322 break; 362 - #ifndef CONFIG_KVM_BOOK3S_64_HV 363 323 case KVM_CAP_PPC_PAIRED_SINGLES: 364 324 case KVM_CAP_PPC_OSI: 365 325 case KVM_CAP_PPC_GET_PVINFO: 366 326 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 367 327 case KVM_CAP_SW_TLB: 368 328 #endif 369 - #ifdef CONFIG_KVM_MPIC 370 - case KVM_CAP_IRQ_MPIC: 371 - #endif 372 - r = 1; 329 + /* We support this only for PR */ 330 + r = !hv_enabled; 373 331 break; 332 + #ifdef CONFIG_KVM_MMIO 374 333 case KVM_CAP_COALESCED_MMIO: 375 334 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 376 335 break; 377 336 #endif 337 + #ifdef CONFIG_KVM_MPIC 338 + case KVM_CAP_IRQ_MPIC: 339 + r = 1; 340 + break; 341 + #endif 342 + 378 343 #ifdef CONFIG_PPC_BOOK3S_64 379 344 case KVM_CAP_SPAPR_TCE: 380 345 case KVM_CAP_PPC_ALLOC_HTAB: ··· 389 346 r = 1; 390 347 break; 391 348 #endif /* CONFIG_PPC_BOOK3S_64 */ 392 - #ifdef CONFIG_KVM_BOOK3S_64_HV 349 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 393 350 case KVM_CAP_PPC_SMT: 394 - r = threads_per_core; 351 + if (hv_enabled) 352 + r = threads_per_core; 353 + else 354 + r = 0; 395 355 break; 396 356 case KVM_CAP_PPC_RMA: 397 - r = 1; 357 + r = hv_enabled; 398 358 /* PPC970 requires an RMA */ 399 - if (cpu_has_feature(CPU_FTR_ARCH_201)) 359 + if (r && cpu_has_feature(CPU_FTR_ARCH_201)) 400 360 r = 2; 401 361 break; 402 362 #endif 403 363 case KVM_CAP_SYNC_MMU: 404 - #ifdef CONFIG_KVM_BOOK3S_64_HV 405 - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 364 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 365 + if (hv_enabled) 366 + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 367 + else 368 + r = 0; 406 369 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) 407 370 r = 1; 408 371 #else 409 372 r = 0; 410 - break; 411 373 #endif 412 - #ifdef CONFIG_KVM_BOOK3S_64_HV 374 + break; 375 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 413 376 case KVM_CAP_PPC_HTAB_FD: 414 - r = 1; 377 + r = hv_enabled; 415 378 break; 416 379 #endif 417 - break; 418 380 case KVM_CAP_NR_VCPUS: 419 381 /* 420 382 * Recommending a number of CPUs is somewhat arbitrary; we ··· 427 379 * will have secondary threads "offline"), and for other KVM 428 380 * implementations just count online CPUs. 429 381 */ 430 - #ifdef CONFIG_KVM_BOOK3S_64_HV 431 - r = num_present_cpus(); 432 - #else 433 - r = num_online_cpus(); 434 - #endif 382 + if (hv_enabled) 383 + r = num_present_cpus(); 384 + else 385 + r = num_online_cpus(); 435 386 break; 436 387 case KVM_CAP_MAX_VCPUS: 437 388 r = KVM_MAX_VCPUS; ··· 454 407 return -EINVAL; 455 408 } 456 409 457 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 410 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 458 411 struct kvm_memory_slot *dont) 459 412 { 460 - kvmppc_core_free_memslot(free, dont); 413 + kvmppc_core_free_memslot(kvm, free, dont); 461 414 } 462 415 463 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 416 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 417 + unsigned long npages) 464 418 { 465 - return kvmppc_core_create_memslot(slot, npages); 419 + return kvmppc_core_create_memslot(kvm, slot, npages); 466 420 } 467 421 468 422 void kvm_arch_memslots_updated(struct kvm *kvm) ··· 707 659 708 660 return EMULATE_DO_MMIO; 709 661 } 662 + EXPORT_SYMBOL_GPL(kvmppc_handle_load); 710 663 711 664 /* Same as above, but sign extends */ 712 665 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, ··· 769 720 770 721 return EMULATE_DO_MMIO; 771 722 } 723 + EXPORT_SYMBOL_GPL(kvmppc_handle_store); 772 724 773 725 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 774 726 { ··· 1074 1024 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 1075 1025 goto out; 1076 1026 } 1077 - #endif /* CONFIG_PPC_BOOK3S_64 */ 1078 - 1079 - #ifdef CONFIG_KVM_BOOK3S_64_HV 1080 - case KVM_ALLOCATE_RMA: { 1081 - struct kvm_allocate_rma rma; 1082 - struct kvm *kvm = filp->private_data; 1083 - 1084 - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 1085 - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 1086 - r = -EFAULT; 1087 - break; 1088 - } 1089 - 1090 - case KVM_PPC_ALLOCATE_HTAB: { 1091 - u32 htab_order; 1092 - 1093 - r = -EFAULT; 1094 - if (get_user(htab_order, (u32 __user *)argp)) 1095 - break; 1096 - r = kvmppc_alloc_reset_hpt(kvm, &htab_order); 1097 - if (r) 1098 - break; 1099 - r = -EFAULT; 1100 - if (put_user(htab_order, (u32 __user *)argp)) 1101 - break; 1102 - r = 0; 1103 - break; 1104 - } 1105 - 1106 - case KVM_PPC_GET_HTAB_FD: { 1107 - struct kvm_get_htab_fd ghf; 1108 - 1109 - r = -EFAULT; 1110 - if (copy_from_user(&ghf, argp, sizeof(ghf))) 1111 - break; 1112 - r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); 1113 - break; 1114 - } 1115 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1116 - 1117 - #ifdef CONFIG_PPC_BOOK3S_64 1118 1027 case KVM_PPC_GET_SMMU_INFO: { 1119 1028 struct kvm_ppc_smmu_info info; 1029 + struct kvm *kvm = filp->private_data; 1120 1030 1121 1031 memset(&info, 0, sizeof(info)); 1122 - r = kvm_vm_ioctl_get_smmu_info(kvm, &info); 1032 + r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info); 1123 1033 if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) 1124 1034 r = -EFAULT; 1125 1035 break; ··· 1090 1080 r = kvm_vm_ioctl_rtas_define_token(kvm, argp); 1091 1081 break; 1092 1082 } 1093 - #endif /* CONFIG_PPC_BOOK3S_64 */ 1083 + default: { 1084 + struct kvm *kvm = filp->private_data; 1085 + r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); 1086 + } 1087 + #else /* CONFIG_PPC_BOOK3S_64 */ 1094 1088 default: 1095 1089 r = -ENOTTY; 1090 + #endif 1096 1091 } 1097 - 1098 1092 out: 1099 1093 return r; 1100 1094 } ··· 1120 1106 1121 1107 return lpid; 1122 1108 } 1109 + EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid); 1123 1110 1124 1111 void kvmppc_claim_lpid(long lpid) 1125 1112 { 1126 1113 set_bit(lpid, lpid_inuse); 1127 1114 } 1115 + EXPORT_SYMBOL_GPL(kvmppc_claim_lpid); 1128 1116 1129 1117 void kvmppc_free_lpid(long lpid) 1130 1118 { 1131 1119 clear_bit(lpid, lpid_inuse); 1132 1120 } 1121 + EXPORT_SYMBOL_GPL(kvmppc_free_lpid); 1133 1122 1134 1123 void kvmppc_init_lpid(unsigned long nr_lpids_param) 1135 1124 { 1136 1125 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); 1137 1126 memset(lpid_inuse, 0, sizeof(lpid_inuse)); 1138 1127 } 1128 + EXPORT_SYMBOL_GPL(kvmppc_init_lpid); 1139 1129 1140 1130 int kvm_arch_init(void *opaque) 1141 1131 { ··· 1148 1130 1149 1131 void kvm_arch_exit(void) 1150 1132 { 1133 + 1151 1134 }

-429

arch/powerpc/kvm/trace.h

··· 31 31 __entry->inst, __entry->pc, __entry->emulate) 32 32 ); 33 33 34 - #ifdef CONFIG_PPC_BOOK3S 35 - #define kvm_trace_symbol_exit \ 36 - {0x100, "SYSTEM_RESET"}, \ 37 - {0x200, "MACHINE_CHECK"}, \ 38 - {0x300, "DATA_STORAGE"}, \ 39 - {0x380, "DATA_SEGMENT"}, \ 40 - {0x400, "INST_STORAGE"}, \ 41 - {0x480, "INST_SEGMENT"}, \ 42 - {0x500, "EXTERNAL"}, \ 43 - {0x501, "EXTERNAL_LEVEL"}, \ 44 - {0x502, "EXTERNAL_HV"}, \ 45 - {0x600, "ALIGNMENT"}, \ 46 - {0x700, "PROGRAM"}, \ 47 - {0x800, "FP_UNAVAIL"}, \ 48 - {0x900, "DECREMENTER"}, \ 49 - {0x980, "HV_DECREMENTER"}, \ 50 - {0xc00, "SYSCALL"}, \ 51 - {0xd00, "TRACE"}, \ 52 - {0xe00, "H_DATA_STORAGE"}, \ 53 - {0xe20, "H_INST_STORAGE"}, \ 54 - {0xe40, "H_EMUL_ASSIST"}, \ 55 - {0xf00, "PERFMON"}, \ 56 - {0xf20, "ALTIVEC"}, \ 57 - {0xf40, "VSX"} 58 - #else 59 - #define kvm_trace_symbol_exit \ 60 - {0, "CRITICAL"}, \ 61 - {1, "MACHINE_CHECK"}, \ 62 - {2, "DATA_STORAGE"}, \ 63 - {3, "INST_STORAGE"}, \ 64 - {4, "EXTERNAL"}, \ 65 - {5, "ALIGNMENT"}, \ 66 - {6, "PROGRAM"}, \ 67 - {7, "FP_UNAVAIL"}, \ 68 - {8, "SYSCALL"}, \ 69 - {9, "AP_UNAVAIL"}, \ 70 - {10, "DECREMENTER"}, \ 71 - {11, "FIT"}, \ 72 - {12, "WATCHDOG"}, \ 73 - {13, "DTLB_MISS"}, \ 74 - {14, "ITLB_MISS"}, \ 75 - {15, "DEBUG"}, \ 76 - {32, "SPE_UNAVAIL"}, \ 77 - {33, "SPE_FP_DATA"}, \ 78 - {34, "SPE_FP_ROUND"}, \ 79 - {35, "PERFORMANCE_MONITOR"}, \ 80 - {36, "DOORBELL"}, \ 81 - {37, "DOORBELL_CRITICAL"}, \ 82 - {38, "GUEST_DBELL"}, \ 83 - {39, "GUEST_DBELL_CRIT"}, \ 84 - {40, "HV_SYSCALL"}, \ 85 - {41, "HV_PRIV"} 86 - #endif 87 - 88 - TRACE_EVENT(kvm_exit, 89 - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 90 - TP_ARGS(exit_nr, vcpu), 91 - 92 - TP_STRUCT__entry( 93 - __field( unsigned int, exit_nr ) 94 - __field( unsigned long, pc ) 95 - __field( unsigned long, msr ) 96 - __field( unsigned long, dar ) 97 - #ifdef CONFIG_KVM_BOOK3S_PR 98 - __field( unsigned long, srr1 ) 99 - #endif 100 - __field( unsigned long, last_inst ) 101 - ), 102 - 103 - TP_fast_assign( 104 - #ifdef CONFIG_KVM_BOOK3S_PR 105 - struct kvmppc_book3s_shadow_vcpu *svcpu; 106 - #endif 107 - __entry->exit_nr = exit_nr; 108 - __entry->pc = kvmppc_get_pc(vcpu); 109 - __entry->dar = kvmppc_get_fault_dar(vcpu); 110 - __entry->msr = vcpu->arch.shared->msr; 111 - #ifdef CONFIG_KVM_BOOK3S_PR 112 - svcpu = svcpu_get(vcpu); 113 - __entry->srr1 = svcpu->shadow_srr1; 114 - svcpu_put(svcpu); 115 - #endif 116 - __entry->last_inst = vcpu->arch.last_inst; 117 - ), 118 - 119 - TP_printk("exit=%s" 120 - " | pc=0x%lx" 121 - " | msr=0x%lx" 122 - " | dar=0x%lx" 123 - #ifdef CONFIG_KVM_BOOK3S_PR 124 - " | srr1=0x%lx" 125 - #endif 126 - " | last_inst=0x%lx" 127 - , 128 - __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 129 - __entry->pc, 130 - __entry->msr, 131 - __entry->dar, 132 - #ifdef CONFIG_KVM_BOOK3S_PR 133 - __entry->srr1, 134 - #endif 135 - __entry->last_inst 136 - ) 137 - ); 138 - 139 - TRACE_EVENT(kvm_unmap_hva, 140 - TP_PROTO(unsigned long hva), 141 - TP_ARGS(hva), 142 - 143 - TP_STRUCT__entry( 144 - __field( unsigned long, hva ) 145 - ), 146 - 147 - TP_fast_assign( 148 - __entry->hva = hva; 149 - ), 150 - 151 - TP_printk("unmap hva 0x%lx\n", __entry->hva) 152 - ); 153 - 154 34 TRACE_EVENT(kvm_stlb_inval, 155 35 TP_PROTO(unsigned int stlb_index), 156 36 TP_ARGS(stlb_index), ··· 115 235 TP_printk("vcpu=%x requests=%x", 116 236 __entry->cpu_nr, __entry->requests) 117 237 ); 118 - 119 - 120 - /************************************************************************* 121 - * Book3S trace points * 122 - *************************************************************************/ 123 - 124 - #ifdef CONFIG_KVM_BOOK3S_PR 125 - 126 - TRACE_EVENT(kvm_book3s_reenter, 127 - TP_PROTO(int r, struct kvm_vcpu *vcpu), 128 - TP_ARGS(r, vcpu), 129 - 130 - TP_STRUCT__entry( 131 - __field( unsigned int, r ) 132 - __field( unsigned long, pc ) 133 - ), 134 - 135 - TP_fast_assign( 136 - __entry->r = r; 137 - __entry->pc = kvmppc_get_pc(vcpu); 138 - ), 139 - 140 - TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) 141 - ); 142 - 143 - #ifdef CONFIG_PPC_BOOK3S_64 144 - 145 - TRACE_EVENT(kvm_book3s_64_mmu_map, 146 - TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, 147 - struct kvmppc_pte *orig_pte), 148 - TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), 149 - 150 - TP_STRUCT__entry( 151 - __field( unsigned char, flag_w ) 152 - __field( unsigned char, flag_x ) 153 - __field( unsigned long, eaddr ) 154 - __field( unsigned long, hpteg ) 155 - __field( unsigned long, va ) 156 - __field( unsigned long long, vpage ) 157 - __field( unsigned long, hpaddr ) 158 - ), 159 - 160 - TP_fast_assign( 161 - __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; 162 - __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; 163 - __entry->eaddr = orig_pte->eaddr; 164 - __entry->hpteg = hpteg; 165 - __entry->va = va; 166 - __entry->vpage = orig_pte->vpage; 167 - __entry->hpaddr = hpaddr; 168 - ), 169 - 170 - TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", 171 - __entry->flag_w, __entry->flag_x, __entry->eaddr, 172 - __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) 173 - ); 174 - 175 - #endif /* CONFIG_PPC_BOOK3S_64 */ 176 - 177 - TRACE_EVENT(kvm_book3s_mmu_map, 178 - TP_PROTO(struct hpte_cache *pte), 179 - TP_ARGS(pte), 180 - 181 - TP_STRUCT__entry( 182 - __field( u64, host_vpn ) 183 - __field( u64, pfn ) 184 - __field( ulong, eaddr ) 185 - __field( u64, vpage ) 186 - __field( ulong, raddr ) 187 - __field( int, flags ) 188 - ), 189 - 190 - TP_fast_assign( 191 - __entry->host_vpn = pte->host_vpn; 192 - __entry->pfn = pte->pfn; 193 - __entry->eaddr = pte->pte.eaddr; 194 - __entry->vpage = pte->pte.vpage; 195 - __entry->raddr = pte->pte.raddr; 196 - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 197 - (pte->pte.may_write ? 0x2 : 0) | 198 - (pte->pte.may_execute ? 0x1 : 0); 199 - ), 200 - 201 - TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 202 - __entry->host_vpn, __entry->pfn, __entry->eaddr, 203 - __entry->vpage, __entry->raddr, __entry->flags) 204 - ); 205 - 206 - TRACE_EVENT(kvm_book3s_mmu_invalidate, 207 - TP_PROTO(struct hpte_cache *pte), 208 - TP_ARGS(pte), 209 - 210 - TP_STRUCT__entry( 211 - __field( u64, host_vpn ) 212 - __field( u64, pfn ) 213 - __field( ulong, eaddr ) 214 - __field( u64, vpage ) 215 - __field( ulong, raddr ) 216 - __field( int, flags ) 217 - ), 218 - 219 - TP_fast_assign( 220 - __entry->host_vpn = pte->host_vpn; 221 - __entry->pfn = pte->pfn; 222 - __entry->eaddr = pte->pte.eaddr; 223 - __entry->vpage = pte->pte.vpage; 224 - __entry->raddr = pte->pte.raddr; 225 - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 226 - (pte->pte.may_write ? 0x2 : 0) | 227 - (pte->pte.may_execute ? 0x1 : 0); 228 - ), 229 - 230 - TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 231 - __entry->host_vpn, __entry->pfn, __entry->eaddr, 232 - __entry->vpage, __entry->raddr, __entry->flags) 233 - ); 234 - 235 - TRACE_EVENT(kvm_book3s_mmu_flush, 236 - TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, 237 - unsigned long long p2), 238 - TP_ARGS(type, vcpu, p1, p2), 239 - 240 - TP_STRUCT__entry( 241 - __field( int, count ) 242 - __field( unsigned long long, p1 ) 243 - __field( unsigned long long, p2 ) 244 - __field( const char *, type ) 245 - ), 246 - 247 - TP_fast_assign( 248 - __entry->count = to_book3s(vcpu)->hpte_cache_count; 249 - __entry->p1 = p1; 250 - __entry->p2 = p2; 251 - __entry->type = type; 252 - ), 253 - 254 - TP_printk("Flush %d %sPTEs: %llx - %llx", 255 - __entry->count, __entry->type, __entry->p1, __entry->p2) 256 - ); 257 - 258 - TRACE_EVENT(kvm_book3s_slb_found, 259 - TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), 260 - TP_ARGS(gvsid, hvsid), 261 - 262 - TP_STRUCT__entry( 263 - __field( unsigned long long, gvsid ) 264 - __field( unsigned long long, hvsid ) 265 - ), 266 - 267 - TP_fast_assign( 268 - __entry->gvsid = gvsid; 269 - __entry->hvsid = hvsid; 270 - ), 271 - 272 - TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) 273 - ); 274 - 275 - TRACE_EVENT(kvm_book3s_slb_fail, 276 - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), 277 - TP_ARGS(sid_map_mask, gvsid), 278 - 279 - TP_STRUCT__entry( 280 - __field( unsigned short, sid_map_mask ) 281 - __field( unsigned long long, gvsid ) 282 - ), 283 - 284 - TP_fast_assign( 285 - __entry->sid_map_mask = sid_map_mask; 286 - __entry->gvsid = gvsid; 287 - ), 288 - 289 - TP_printk("%x/%x: %llx", __entry->sid_map_mask, 290 - SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) 291 - ); 292 - 293 - TRACE_EVENT(kvm_book3s_slb_map, 294 - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, 295 - unsigned long long hvsid), 296 - TP_ARGS(sid_map_mask, gvsid, hvsid), 297 - 298 - TP_STRUCT__entry( 299 - __field( unsigned short, sid_map_mask ) 300 - __field( unsigned long long, guest_vsid ) 301 - __field( unsigned long long, host_vsid ) 302 - ), 303 - 304 - TP_fast_assign( 305 - __entry->sid_map_mask = sid_map_mask; 306 - __entry->guest_vsid = gvsid; 307 - __entry->host_vsid = hvsid; 308 - ), 309 - 310 - TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, 311 - __entry->guest_vsid, __entry->host_vsid) 312 - ); 313 - 314 - TRACE_EVENT(kvm_book3s_slbmte, 315 - TP_PROTO(u64 slb_vsid, u64 slb_esid), 316 - TP_ARGS(slb_vsid, slb_esid), 317 - 318 - TP_STRUCT__entry( 319 - __field( u64, slb_vsid ) 320 - __field( u64, slb_esid ) 321 - ), 322 - 323 - TP_fast_assign( 324 - __entry->slb_vsid = slb_vsid; 325 - __entry->slb_esid = slb_esid; 326 - ), 327 - 328 - TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) 329 - ); 330 - 331 - #endif /* CONFIG_PPC_BOOK3S */ 332 - 333 - 334 - /************************************************************************* 335 - * Book3E trace points * 336 - *************************************************************************/ 337 - 338 - #ifdef CONFIG_BOOKE 339 - 340 - TRACE_EVENT(kvm_booke206_stlb_write, 341 - TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), 342 - TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), 343 - 344 - TP_STRUCT__entry( 345 - __field( __u32, mas0 ) 346 - __field( __u32, mas8 ) 347 - __field( __u32, mas1 ) 348 - __field( __u64, mas2 ) 349 - __field( __u64, mas7_3 ) 350 - ), 351 - 352 - TP_fast_assign( 353 - __entry->mas0 = mas0; 354 - __entry->mas8 = mas8; 355 - __entry->mas1 = mas1; 356 - __entry->mas2 = mas2; 357 - __entry->mas7_3 = mas7_3; 358 - ), 359 - 360 - TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", 361 - __entry->mas0, __entry->mas8, __entry->mas1, 362 - __entry->mas2, __entry->mas7_3) 363 - ); 364 - 365 - TRACE_EVENT(kvm_booke206_gtlb_write, 366 - TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), 367 - TP_ARGS(mas0, mas1, mas2, mas7_3), 368 - 369 - TP_STRUCT__entry( 370 - __field( __u32, mas0 ) 371 - __field( __u32, mas1 ) 372 - __field( __u64, mas2 ) 373 - __field( __u64, mas7_3 ) 374 - ), 375 - 376 - TP_fast_assign( 377 - __entry->mas0 = mas0; 378 - __entry->mas1 = mas1; 379 - __entry->mas2 = mas2; 380 - __entry->mas7_3 = mas7_3; 381 - ), 382 - 383 - TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", 384 - __entry->mas0, __entry->mas1, 385 - __entry->mas2, __entry->mas7_3) 386 - ); 387 - 388 - TRACE_EVENT(kvm_booke206_ref_release, 389 - TP_PROTO(__u64 pfn, __u32 flags), 390 - TP_ARGS(pfn, flags), 391 - 392 - TP_STRUCT__entry( 393 - __field( __u64, pfn ) 394 - __field( __u32, flags ) 395 - ), 396 - 397 - TP_fast_assign( 398 - __entry->pfn = pfn; 399 - __entry->flags = flags; 400 - ), 401 - 402 - TP_printk("pfn=%llx flags=%x", 403 - __entry->pfn, __entry->flags) 404 - ); 405 - 406 - TRACE_EVENT(kvm_booke_queue_irqprio, 407 - TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), 408 - TP_ARGS(vcpu, priority), 409 - 410 - TP_STRUCT__entry( 411 - __field( __u32, cpu_nr ) 412 - __field( __u32, priority ) 413 - __field( unsigned long, pending ) 414 - ), 415 - 416 - TP_fast_assign( 417 - __entry->cpu_nr = vcpu->vcpu_id; 418 - __entry->priority = priority; 419 - __entry->pending = vcpu->arch.pending_exceptions; 420 - ), 421 - 422 - TP_printk("vcpu=%x prio=%x pending=%lx", 423 - __entry->cpu_nr, __entry->priority, __entry->pending) 424 - ); 425 - 426 - #endif 427 238 428 239 #endif /* _TRACE_KVM_H */ 429 240

+177

arch/powerpc/kvm/trace_booke.h

··· 1 + #if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ) 2 + #define _TRACE_KVM_BOOKE_H 3 + 4 + #include <linux/tracepoint.h> 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM kvm_booke 8 + #define TRACE_INCLUDE_PATH . 9 + #define TRACE_INCLUDE_FILE trace_booke 10 + 11 + #define kvm_trace_symbol_exit \ 12 + {0, "CRITICAL"}, \ 13 + {1, "MACHINE_CHECK"}, \ 14 + {2, "DATA_STORAGE"}, \ 15 + {3, "INST_STORAGE"}, \ 16 + {4, "EXTERNAL"}, \ 17 + {5, "ALIGNMENT"}, \ 18 + {6, "PROGRAM"}, \ 19 + {7, "FP_UNAVAIL"}, \ 20 + {8, "SYSCALL"}, \ 21 + {9, "AP_UNAVAIL"}, \ 22 + {10, "DECREMENTER"}, \ 23 + {11, "FIT"}, \ 24 + {12, "WATCHDOG"}, \ 25 + {13, "DTLB_MISS"}, \ 26 + {14, "ITLB_MISS"}, \ 27 + {15, "DEBUG"}, \ 28 + {32, "SPE_UNAVAIL"}, \ 29 + {33, "SPE_FP_DATA"}, \ 30 + {34, "SPE_FP_ROUND"}, \ 31 + {35, "PERFORMANCE_MONITOR"}, \ 32 + {36, "DOORBELL"}, \ 33 + {37, "DOORBELL_CRITICAL"}, \ 34 + {38, "GUEST_DBELL"}, \ 35 + {39, "GUEST_DBELL_CRIT"}, \ 36 + {40, "HV_SYSCALL"}, \ 37 + {41, "HV_PRIV"} 38 + 39 + TRACE_EVENT(kvm_exit, 40 + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 41 + TP_ARGS(exit_nr, vcpu), 42 + 43 + TP_STRUCT__entry( 44 + __field( unsigned int, exit_nr ) 45 + __field( unsigned long, pc ) 46 + __field( unsigned long, msr ) 47 + __field( unsigned long, dar ) 48 + __field( unsigned long, last_inst ) 49 + ), 50 + 51 + TP_fast_assign( 52 + __entry->exit_nr = exit_nr; 53 + __entry->pc = kvmppc_get_pc(vcpu); 54 + __entry->dar = kvmppc_get_fault_dar(vcpu); 55 + __entry->msr = vcpu->arch.shared->msr; 56 + __entry->last_inst = vcpu->arch.last_inst; 57 + ), 58 + 59 + TP_printk("exit=%s" 60 + " | pc=0x%lx" 61 + " | msr=0x%lx" 62 + " | dar=0x%lx" 63 + " | last_inst=0x%lx" 64 + , 65 + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 66 + __entry->pc, 67 + __entry->msr, 68 + __entry->dar, 69 + __entry->last_inst 70 + ) 71 + ); 72 + 73 + TRACE_EVENT(kvm_unmap_hva, 74 + TP_PROTO(unsigned long hva), 75 + TP_ARGS(hva), 76 + 77 + TP_STRUCT__entry( 78 + __field( unsigned long, hva ) 79 + ), 80 + 81 + TP_fast_assign( 82 + __entry->hva = hva; 83 + ), 84 + 85 + TP_printk("unmap hva 0x%lx\n", __entry->hva) 86 + ); 87 + 88 + TRACE_EVENT(kvm_booke206_stlb_write, 89 + TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), 90 + TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), 91 + 92 + TP_STRUCT__entry( 93 + __field( __u32, mas0 ) 94 + __field( __u32, mas8 ) 95 + __field( __u32, mas1 ) 96 + __field( __u64, mas2 ) 97 + __field( __u64, mas7_3 ) 98 + ), 99 + 100 + TP_fast_assign( 101 + __entry->mas0 = mas0; 102 + __entry->mas8 = mas8; 103 + __entry->mas1 = mas1; 104 + __entry->mas2 = mas2; 105 + __entry->mas7_3 = mas7_3; 106 + ), 107 + 108 + TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", 109 + __entry->mas0, __entry->mas8, __entry->mas1, 110 + __entry->mas2, __entry->mas7_3) 111 + ); 112 + 113 + TRACE_EVENT(kvm_booke206_gtlb_write, 114 + TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), 115 + TP_ARGS(mas0, mas1, mas2, mas7_3), 116 + 117 + TP_STRUCT__entry( 118 + __field( __u32, mas0 ) 119 + __field( __u32, mas1 ) 120 + __field( __u64, mas2 ) 121 + __field( __u64, mas7_3 ) 122 + ), 123 + 124 + TP_fast_assign( 125 + __entry->mas0 = mas0; 126 + __entry->mas1 = mas1; 127 + __entry->mas2 = mas2; 128 + __entry->mas7_3 = mas7_3; 129 + ), 130 + 131 + TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", 132 + __entry->mas0, __entry->mas1, 133 + __entry->mas2, __entry->mas7_3) 134 + ); 135 + 136 + TRACE_EVENT(kvm_booke206_ref_release, 137 + TP_PROTO(__u64 pfn, __u32 flags), 138 + TP_ARGS(pfn, flags), 139 + 140 + TP_STRUCT__entry( 141 + __field( __u64, pfn ) 142 + __field( __u32, flags ) 143 + ), 144 + 145 + TP_fast_assign( 146 + __entry->pfn = pfn; 147 + __entry->flags = flags; 148 + ), 149 + 150 + TP_printk("pfn=%llx flags=%x", 151 + __entry->pfn, __entry->flags) 152 + ); 153 + 154 + TRACE_EVENT(kvm_booke_queue_irqprio, 155 + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), 156 + TP_ARGS(vcpu, priority), 157 + 158 + TP_STRUCT__entry( 159 + __field( __u32, cpu_nr ) 160 + __field( __u32, priority ) 161 + __field( unsigned long, pending ) 162 + ), 163 + 164 + TP_fast_assign( 165 + __entry->cpu_nr = vcpu->vcpu_id; 166 + __entry->priority = priority; 167 + __entry->pending = vcpu->arch.pending_exceptions; 168 + ), 169 + 170 + TP_printk("vcpu=%x prio=%x pending=%lx", 171 + __entry->cpu_nr, __entry->priority, __entry->pending) 172 + ); 173 + 174 + #endif 175 + 176 + /* This part must be outside protection */ 177 + #include <trace/define_trace.h>

+297

arch/powerpc/kvm/trace_pr.h

··· 1 + 2 + #if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ) 3 + #define _TRACE_KVM_PR_H 4 + 5 + #include <linux/tracepoint.h> 6 + 7 + #undef TRACE_SYSTEM 8 + #define TRACE_SYSTEM kvm_pr 9 + #define TRACE_INCLUDE_PATH . 10 + #define TRACE_INCLUDE_FILE trace_pr 11 + 12 + #define kvm_trace_symbol_exit \ 13 + {0x100, "SYSTEM_RESET"}, \ 14 + {0x200, "MACHINE_CHECK"}, \ 15 + {0x300, "DATA_STORAGE"}, \ 16 + {0x380, "DATA_SEGMENT"}, \ 17 + {0x400, "INST_STORAGE"}, \ 18 + {0x480, "INST_SEGMENT"}, \ 19 + {0x500, "EXTERNAL"}, \ 20 + {0x501, "EXTERNAL_LEVEL"}, \ 21 + {0x502, "EXTERNAL_HV"}, \ 22 + {0x600, "ALIGNMENT"}, \ 23 + {0x700, "PROGRAM"}, \ 24 + {0x800, "FP_UNAVAIL"}, \ 25 + {0x900, "DECREMENTER"}, \ 26 + {0x980, "HV_DECREMENTER"}, \ 27 + {0xc00, "SYSCALL"}, \ 28 + {0xd00, "TRACE"}, \ 29 + {0xe00, "H_DATA_STORAGE"}, \ 30 + {0xe20, "H_INST_STORAGE"}, \ 31 + {0xe40, "H_EMUL_ASSIST"}, \ 32 + {0xf00, "PERFMON"}, \ 33 + {0xf20, "ALTIVEC"}, \ 34 + {0xf40, "VSX"} 35 + 36 + TRACE_EVENT(kvm_book3s_reenter, 37 + TP_PROTO(int r, struct kvm_vcpu *vcpu), 38 + TP_ARGS(r, vcpu), 39 + 40 + TP_STRUCT__entry( 41 + __field( unsigned int, r ) 42 + __field( unsigned long, pc ) 43 + ), 44 + 45 + TP_fast_assign( 46 + __entry->r = r; 47 + __entry->pc = kvmppc_get_pc(vcpu); 48 + ), 49 + 50 + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) 51 + ); 52 + 53 + #ifdef CONFIG_PPC_BOOK3S_64 54 + 55 + TRACE_EVENT(kvm_book3s_64_mmu_map, 56 + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, 57 + struct kvmppc_pte *orig_pte), 58 + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), 59 + 60 + TP_STRUCT__entry( 61 + __field( unsigned char, flag_w ) 62 + __field( unsigned char, flag_x ) 63 + __field( unsigned long, eaddr ) 64 + __field( unsigned long, hpteg ) 65 + __field( unsigned long, va ) 66 + __field( unsigned long long, vpage ) 67 + __field( unsigned long, hpaddr ) 68 + ), 69 + 70 + TP_fast_assign( 71 + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; 72 + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; 73 + __entry->eaddr = orig_pte->eaddr; 74 + __entry->hpteg = hpteg; 75 + __entry->va = va; 76 + __entry->vpage = orig_pte->vpage; 77 + __entry->hpaddr = hpaddr; 78 + ), 79 + 80 + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", 81 + __entry->flag_w, __entry->flag_x, __entry->eaddr, 82 + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) 83 + ); 84 + 85 + #endif /* CONFIG_PPC_BOOK3S_64 */ 86 + 87 + TRACE_EVENT(kvm_book3s_mmu_map, 88 + TP_PROTO(struct hpte_cache *pte), 89 + TP_ARGS(pte), 90 + 91 + TP_STRUCT__entry( 92 + __field( u64, host_vpn ) 93 + __field( u64, pfn ) 94 + __field( ulong, eaddr ) 95 + __field( u64, vpage ) 96 + __field( ulong, raddr ) 97 + __field( int, flags ) 98 + ), 99 + 100 + TP_fast_assign( 101 + __entry->host_vpn = pte->host_vpn; 102 + __entry->pfn = pte->pfn; 103 + __entry->eaddr = pte->pte.eaddr; 104 + __entry->vpage = pte->pte.vpage; 105 + __entry->raddr = pte->pte.raddr; 106 + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 107 + (pte->pte.may_write ? 0x2 : 0) | 108 + (pte->pte.may_execute ? 0x1 : 0); 109 + ), 110 + 111 + TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 112 + __entry->host_vpn, __entry->pfn, __entry->eaddr, 113 + __entry->vpage, __entry->raddr, __entry->flags) 114 + ); 115 + 116 + TRACE_EVENT(kvm_book3s_mmu_invalidate, 117 + TP_PROTO(struct hpte_cache *pte), 118 + TP_ARGS(pte), 119 + 120 + TP_STRUCT__entry( 121 + __field( u64, host_vpn ) 122 + __field( u64, pfn ) 123 + __field( ulong, eaddr ) 124 + __field( u64, vpage ) 125 + __field( ulong, raddr ) 126 + __field( int, flags ) 127 + ), 128 + 129 + TP_fast_assign( 130 + __entry->host_vpn = pte->host_vpn; 131 + __entry->pfn = pte->pfn; 132 + __entry->eaddr = pte->pte.eaddr; 133 + __entry->vpage = pte->pte.vpage; 134 + __entry->raddr = pte->pte.raddr; 135 + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 136 + (pte->pte.may_write ? 0x2 : 0) | 137 + (pte->pte.may_execute ? 0x1 : 0); 138 + ), 139 + 140 + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 141 + __entry->host_vpn, __entry->pfn, __entry->eaddr, 142 + __entry->vpage, __entry->raddr, __entry->flags) 143 + ); 144 + 145 + TRACE_EVENT(kvm_book3s_mmu_flush, 146 + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, 147 + unsigned long long p2), 148 + TP_ARGS(type, vcpu, p1, p2), 149 + 150 + TP_STRUCT__entry( 151 + __field( int, count ) 152 + __field( unsigned long long, p1 ) 153 + __field( unsigned long long, p2 ) 154 + __field( const char *, type ) 155 + ), 156 + 157 + TP_fast_assign( 158 + __entry->count = to_book3s(vcpu)->hpte_cache_count; 159 + __entry->p1 = p1; 160 + __entry->p2 = p2; 161 + __entry->type = type; 162 + ), 163 + 164 + TP_printk("Flush %d %sPTEs: %llx - %llx", 165 + __entry->count, __entry->type, __entry->p1, __entry->p2) 166 + ); 167 + 168 + TRACE_EVENT(kvm_book3s_slb_found, 169 + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), 170 + TP_ARGS(gvsid, hvsid), 171 + 172 + TP_STRUCT__entry( 173 + __field( unsigned long long, gvsid ) 174 + __field( unsigned long long, hvsid ) 175 + ), 176 + 177 + TP_fast_assign( 178 + __entry->gvsid = gvsid; 179 + __entry->hvsid = hvsid; 180 + ), 181 + 182 + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) 183 + ); 184 + 185 + TRACE_EVENT(kvm_book3s_slb_fail, 186 + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), 187 + TP_ARGS(sid_map_mask, gvsid), 188 + 189 + TP_STRUCT__entry( 190 + __field( unsigned short, sid_map_mask ) 191 + __field( unsigned long long, gvsid ) 192 + ), 193 + 194 + TP_fast_assign( 195 + __entry->sid_map_mask = sid_map_mask; 196 + __entry->gvsid = gvsid; 197 + ), 198 + 199 + TP_printk("%x/%x: %llx", __entry->sid_map_mask, 200 + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) 201 + ); 202 + 203 + TRACE_EVENT(kvm_book3s_slb_map, 204 + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, 205 + unsigned long long hvsid), 206 + TP_ARGS(sid_map_mask, gvsid, hvsid), 207 + 208 + TP_STRUCT__entry( 209 + __field( unsigned short, sid_map_mask ) 210 + __field( unsigned long long, guest_vsid ) 211 + __field( unsigned long long, host_vsid ) 212 + ), 213 + 214 + TP_fast_assign( 215 + __entry->sid_map_mask = sid_map_mask; 216 + __entry->guest_vsid = gvsid; 217 + __entry->host_vsid = hvsid; 218 + ), 219 + 220 + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, 221 + __entry->guest_vsid, __entry->host_vsid) 222 + ); 223 + 224 + TRACE_EVENT(kvm_book3s_slbmte, 225 + TP_PROTO(u64 slb_vsid, u64 slb_esid), 226 + TP_ARGS(slb_vsid, slb_esid), 227 + 228 + TP_STRUCT__entry( 229 + __field( u64, slb_vsid ) 230 + __field( u64, slb_esid ) 231 + ), 232 + 233 + TP_fast_assign( 234 + __entry->slb_vsid = slb_vsid; 235 + __entry->slb_esid = slb_esid; 236 + ), 237 + 238 + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) 239 + ); 240 + 241 + TRACE_EVENT(kvm_exit, 242 + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 243 + TP_ARGS(exit_nr, vcpu), 244 + 245 + TP_STRUCT__entry( 246 + __field( unsigned int, exit_nr ) 247 + __field( unsigned long, pc ) 248 + __field( unsigned long, msr ) 249 + __field( unsigned long, dar ) 250 + __field( unsigned long, srr1 ) 251 + __field( unsigned long, last_inst ) 252 + ), 253 + 254 + TP_fast_assign( 255 + __entry->exit_nr = exit_nr; 256 + __entry->pc = kvmppc_get_pc(vcpu); 257 + __entry->dar = kvmppc_get_fault_dar(vcpu); 258 + __entry->msr = vcpu->arch.shared->msr; 259 + __entry->srr1 = vcpu->arch.shadow_srr1; 260 + __entry->last_inst = vcpu->arch.last_inst; 261 + ), 262 + 263 + TP_printk("exit=%s" 264 + " | pc=0x%lx" 265 + " | msr=0x%lx" 266 + " | dar=0x%lx" 267 + " | srr1=0x%lx" 268 + " | last_inst=0x%lx" 269 + , 270 + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 271 + __entry->pc, 272 + __entry->msr, 273 + __entry->dar, 274 + __entry->srr1, 275 + __entry->last_inst 276 + ) 277 + ); 278 + 279 + TRACE_EVENT(kvm_unmap_hva, 280 + TP_PROTO(unsigned long hva), 281 + TP_ARGS(hva), 282 + 283 + TP_STRUCT__entry( 284 + __field( unsigned long, hva ) 285 + ), 286 + 287 + TP_fast_assign( 288 + __entry->hva = hva; 289 + ), 290 + 291 + TP_printk("unmap hva 0x%lx\n", __entry->hva) 292 + ); 293 + 294 + #endif /* _TRACE_KVM_H */ 295 + 296 + /* This part must be outside protection */ 297 + #include <trace/define_trace.h>

-8

arch/s390/include/asm/kvm_host.h

··· 38 38 struct sca_entry cpu[64]; 39 39 } __attribute__((packed)); 40 40 41 - #define KVM_NR_PAGE_SIZES 2 42 - #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) 43 - #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) 44 - #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 45 - #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 46 - #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 47 - 48 41 #define CPUSTAT_STOPPED 0x80000000 49 42 #define CPUSTAT_WAIT 0x10000000 50 43 #define CPUSTAT_ECALL_PEND 0x08000000 ··· 213 220 /* for local_interrupt.action_flags */ 214 221 #define ACTION_STORE_ON_STOP (1<<0) 215 222 #define ACTION_STOP_ON_STOP (1<<1) 216 - #define ACTION_RELOADVCPU_ON_STOP (1<<2) 217 223 218 224 struct kvm_s390_local_interrupt { 219 225 spinlock_t lock;

+1 -3

arch/s390/kvm/diag.c

··· 107 107 108 108 static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) 109 109 { 110 - int ret, idx; 110 + int ret; 111 111 112 112 /* No virtio-ccw notification? Get out quickly. */ 113 113 if (!vcpu->kvm->arch.css_support || 114 114 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) 115 115 return -EOPNOTSUPP; 116 116 117 - idx = srcu_read_lock(&vcpu->kvm->srcu); 118 117 /* 119 118 * The layout is as follows: 120 119 * - gpr 2 contains the subchannel id (passed as addr) ··· 124 125 vcpu->run->s.regs.gprs[2], 125 126 8, &vcpu->run->s.regs.gprs[3], 126 127 vcpu->run->s.regs.gprs[4]); 127 - srcu_read_unlock(&vcpu->kvm->srcu, idx); 128 128 129 129 /* 130 130 * Return cookie in gpr 2, but don't overwrite the register if the

+14 -7

arch/s390/kvm/gaccess.h

··· 18 18 #include <asm/uaccess.h> 19 19 #include "kvm-s390.h" 20 20 21 + /* Convert real to absolute address by applying the prefix of the CPU */ 22 + static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, 23 + unsigned long gaddr) 24 + { 25 + unsigned long prefix = vcpu->arch.sie_block->prefix; 26 + if (gaddr < 2 * PAGE_SIZE) 27 + gaddr += prefix; 28 + else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE) 29 + gaddr -= prefix; 30 + return gaddr; 31 + } 32 + 21 33 static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, 22 34 void __user *gptr, 23 35 int prefixing) 24 36 { 25 - unsigned long prefix = vcpu->arch.sie_block->prefix; 26 37 unsigned long gaddr = (unsigned long) gptr; 27 38 unsigned long uaddr; 28 39 29 - if (prefixing) { 30 - if (gaddr < 2 * PAGE_SIZE) 31 - gaddr += prefix; 32 - else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) 33 - gaddr -= prefix; 34 - } 40 + if (prefixing) 41 + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); 35 42 uaddr = gmap_fault(gaddr, vcpu->arch.gmap); 36 43 if (IS_ERR_VALUE(uaddr)) 37 44 uaddr = -EFAULT;

-6

arch/s390/kvm/intercept.c

··· 62 62 63 63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); 64 64 65 - if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { 66 - vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; 67 - rc = SIE_INTERCEPT_RERUNVCPU; 68 - vcpu->run->exit_reason = KVM_EXIT_INTR; 69 - } 70 - 71 65 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { 72 66 atomic_set_mask(CPUSTAT_STOPPED, 73 67 &vcpu->arch.sie_block->cpuflags);

+3

arch/s390/kvm/interrupt.c

··· 436 436 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); 437 437 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); 438 438 no_timer: 439 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 439 440 spin_lock(&vcpu->arch.local_int.float_int->lock); 440 441 spin_lock_bh(&vcpu->arch.local_int.lock); 441 442 add_wait_queue(&vcpu->wq, &wait); ··· 456 455 remove_wait_queue(&vcpu->wq, &wait); 457 456 spin_unlock_bh(&vcpu->arch.local_int.lock); 458 457 spin_unlock(&vcpu->arch.local_int.float_int->lock); 458 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 459 + 459 460 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); 460 461 return 0; 461 462 }

+63 -33

arch/s390/kvm/kvm-s390.c

··· 695 695 return 0; 696 696 } 697 697 698 - static int __vcpu_run(struct kvm_vcpu *vcpu) 698 + static int vcpu_pre_run(struct kvm_vcpu *vcpu) 699 699 { 700 - int rc; 700 + int rc, cpuflags; 701 701 702 702 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 703 703 ··· 715 715 return rc; 716 716 717 717 vcpu->arch.sie_block->icptcode = 0; 718 - VCPU_EVENT(vcpu, 6, "entering sie flags %x", 719 - atomic_read(&vcpu->arch.sie_block->cpuflags)); 720 - trace_kvm_s390_sie_enter(vcpu, 721 - atomic_read(&vcpu->arch.sie_block->cpuflags)); 718 + cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 719 + VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 720 + trace_kvm_s390_sie_enter(vcpu, cpuflags); 722 721 723 - /* 724 - * As PF_VCPU will be used in fault handler, between guest_enter 725 - * and guest_exit should be no uaccess. 726 - */ 727 - preempt_disable(); 728 - kvm_guest_enter(); 729 - preempt_enable(); 730 - rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); 731 - kvm_guest_exit(); 722 + return 0; 723 + } 724 + 725 + static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 726 + { 727 + int rc; 732 728 733 729 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 734 730 vcpu->arch.sie_block->icptcode); 735 731 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 736 732 737 - if (rc > 0) 733 + if (exit_reason >= 0) { 738 734 rc = 0; 739 - if (rc < 0) { 735 + } else { 740 736 if (kvm_is_ucontrol(vcpu->kvm)) { 741 737 rc = SIE_INTERCEPT_UCONTROL; 742 738 } else { ··· 743 747 } 744 748 745 749 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 750 + 751 + if (rc == 0) { 752 + if (kvm_is_ucontrol(vcpu->kvm)) 753 + rc = -EOPNOTSUPP; 754 + else 755 + rc = kvm_handle_sie_intercept(vcpu); 756 + } 757 + 758 + return rc; 759 + } 760 + 761 + static int __vcpu_run(struct kvm_vcpu *vcpu) 762 + { 763 + int rc, exit_reason; 764 + 765 + /* 766 + * We try to hold kvm->srcu during most of vcpu_run (except when run- 767 + * ning the guest), so that memslots (and other stuff) are protected 768 + */ 769 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 770 + 771 + do { 772 + rc = vcpu_pre_run(vcpu); 773 + if (rc) 774 + break; 775 + 776 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 777 + /* 778 + * As PF_VCPU will be used in fault handler, between 779 + * guest_enter and guest_exit should be no uaccess. 780 + */ 781 + preempt_disable(); 782 + kvm_guest_enter(); 783 + preempt_enable(); 784 + exit_reason = sie64a(vcpu->arch.sie_block, 785 + vcpu->run->s.regs.gprs); 786 + kvm_guest_exit(); 787 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 788 + 789 + rc = vcpu_post_run(vcpu, exit_reason); 790 + } while (!signal_pending(current) && !rc); 791 + 792 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 746 793 return rc; 747 794 } 748 795 ··· 794 755 int rc; 795 756 sigset_t sigsaved; 796 757 797 - rerun_vcpu: 798 758 if (vcpu->sigset_active) 799 759 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 800 760 ··· 826 788 } 827 789 828 790 might_fault(); 829 - 830 - do { 831 - rc = __vcpu_run(vcpu); 832 - if (rc) 833 - break; 834 - if (kvm_is_ucontrol(vcpu->kvm)) 835 - rc = -EOPNOTSUPP; 836 - else 837 - rc = kvm_handle_sie_intercept(vcpu); 838 - } while (!signal_pending(current) && !rc); 839 - 840 - if (rc == SIE_INTERCEPT_RERUNVCPU) 841 - goto rerun_vcpu; 791 + rc = __vcpu_run(vcpu); 842 792 843 793 if (signal_pending(current) && !rc) { 844 794 kvm_run->exit_reason = KVM_EXIT_INTR; ··· 984 958 { 985 959 struct kvm_vcpu *vcpu = filp->private_data; 986 960 void __user *argp = (void __user *)arg; 961 + int idx; 987 962 long r; 988 963 989 964 switch (ioctl) { ··· 998 971 break; 999 972 } 1000 973 case KVM_S390_STORE_STATUS: 974 + idx = srcu_read_lock(&vcpu->kvm->srcu); 1001 975 r = kvm_s390_vcpu_store_status(vcpu, arg); 976 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 1002 977 break; 1003 978 case KVM_S390_SET_INITIAL_PSW: { 1004 979 psw_t psw; ··· 1096 1067 return VM_FAULT_SIGBUS; 1097 1068 } 1098 1069 1099 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1070 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1100 1071 struct kvm_memory_slot *dont) 1101 1072 { 1102 1073 } 1103 1074 1104 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1075 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1076 + unsigned long npages) 1105 1077 { 1106 1078 return 0; 1107 1079 }

+5 -4

arch/s390/kvm/kvm-s390.h

··· 28 28 extern unsigned long *vfacilities; 29 29 30 30 /* negativ values are error codes, positive values for internal conditions */ 31 - #define SIE_INTERCEPT_RERUNVCPU (1<<0) 32 - #define SIE_INTERCEPT_UCONTROL (1<<1) 31 + #define SIE_INTERCEPT_UCONTROL (1<<0) 33 32 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 34 33 35 34 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ ··· 90 91 91 92 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) 92 93 { 93 - *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; 94 - *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; 94 + if (r1) 95 + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; 96 + if (r2) 97 + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; 95 98 } 96 99 97 100 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)

+61

arch/s390/kvm/priv.c

··· 30 30 #include "kvm-s390.h" 31 31 #include "trace.h" 32 32 33 + /* Handle SCK (SET CLOCK) interception */ 34 + static int handle_set_clock(struct kvm_vcpu *vcpu) 35 + { 36 + struct kvm_vcpu *cpup; 37 + s64 hostclk, val; 38 + u64 op2; 39 + int i; 40 + 41 + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 42 + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 43 + 44 + op2 = kvm_s390_get_base_disp_s(vcpu); 45 + if (op2 & 7) /* Operand must be on a doubleword boundary */ 46 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 47 + if (get_guest(vcpu, val, (u64 __user *) op2)) 48 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 49 + 50 + if (store_tod_clock(&hostclk)) { 51 + kvm_s390_set_psw_cc(vcpu, 3); 52 + return 0; 53 + } 54 + val = (val - hostclk) & ~0x3fUL; 55 + 56 + mutex_lock(&vcpu->kvm->lock); 57 + kvm_for_each_vcpu(i, cpup, vcpu->kvm) 58 + cpup->arch.sie_block->epoch = val; 59 + mutex_unlock(&vcpu->kvm->lock); 60 + 61 + kvm_s390_set_psw_cc(vcpu, 0); 62 + return 0; 63 + } 64 + 33 65 static int handle_set_prefix(struct kvm_vcpu *vcpu) 34 66 { 35 67 u64 operand2; ··· 157 125 vcpu->arch.sie_block->gpsw.addr = 158 126 __rewind_psw(vcpu->arch.sie_block->gpsw, 4); 159 127 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); 128 + return 0; 129 + } 130 + 131 + static int handle_test_block(struct kvm_vcpu *vcpu) 132 + { 133 + unsigned long hva; 134 + gpa_t addr; 135 + int reg2; 136 + 137 + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 138 + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 139 + 140 + kvm_s390_get_regs_rre(vcpu, NULL, &reg2); 141 + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; 142 + addr = kvm_s390_real_to_abs(vcpu, addr); 143 + 144 + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); 145 + if (kvm_is_error_hva(hva)) 146 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 147 + /* 148 + * We don't expect errors on modern systems, and do not care 149 + * about storage keys (yet), so let's just clear the page. 150 + */ 151 + if (clear_user((void __user *)hva, PAGE_SIZE) != 0) 152 + return -EFAULT; 153 + kvm_s390_set_psw_cc(vcpu, 0); 154 + vcpu->run->s.regs.gprs[0] = 0; 160 155 return 0; 161 156 } 162 157 ··· 497 438 498 439 static const intercept_handler_t b2_handlers[256] = { 499 440 [0x02] = handle_stidp, 441 + [0x04] = handle_set_clock, 500 442 [0x10] = handle_set_prefix, 501 443 [0x11] = handle_store_prefix, 502 444 [0x12] = handle_store_cpu_address, 503 445 [0x29] = handle_skey, 504 446 [0x2a] = handle_skey, 505 447 [0x2b] = handle_skey, 448 + [0x2c] = handle_test_block, 506 449 [0x30] = handle_io_inst, 507 450 [0x31] = handle_io_inst, 508 451 [0x32] = handle_io_inst,

+7 -3

arch/x86/include/asm/kvm_emulate.h

··· 274 274 275 275 bool guest_mode; /* guest running a nested guest */ 276 276 bool perm_ok; /* do not check permissions if true */ 277 - bool only_vendor_specific_insn; 277 + bool ud; /* inject an #UD if host doesn't support insn */ 278 278 279 279 bool have_exception; 280 280 struct x86_exception exception; 281 281 282 - /* decode cache */ 283 - u8 twobyte; 282 + /* 283 + * decode cache 284 + */ 285 + 286 + /* current opcode length in bytes */ 287 + u8 opcode_len; 284 288 u8 b; 285 289 u8 intercept; 286 290 u8 lock_prefix;

+15 -8

arch/x86/include/asm/kvm_host.h

··· 79 79 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 80 80 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 81 81 82 + static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) 83 + { 84 + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ 85 + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 86 + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 87 + } 88 + 82 89 #define SELECTOR_TI_MASK (1 << 2) 83 90 #define SELECTOR_RPL_MASK 0x03 84 91 ··· 260 253 * mode. 261 254 */ 262 255 struct kvm_mmu { 263 - void (*new_cr3)(struct kvm_vcpu *vcpu); 264 256 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); 265 257 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); 266 258 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); ··· 267 261 bool prefault); 268 262 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 269 263 struct x86_exception *fault); 270 - void (*free)(struct kvm_vcpu *vcpu); 271 264 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 272 265 struct x86_exception *exception); 273 266 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); ··· 394 389 395 390 struct fpu guest_fpu; 396 391 u64 xcr0; 392 + u64 guest_supported_xcr0; 393 + u32 guest_xstate_size; 397 394 398 395 struct kvm_pio_request pio; 399 396 void *pio_data; ··· 564 557 565 558 struct list_head assigned_dev_head; 566 559 struct iommu_domain *iommu_domain; 567 - int iommu_flags; 560 + bool iommu_noncoherent; 561 + #define __KVM_HAVE_ARCH_NONCOHERENT_DMA 562 + atomic_t noncoherent_dma_count; 568 563 struct kvm_pic *vpic; 569 564 struct kvm_ioapic *vioapic; 570 565 struct kvm_pit *vpit; ··· 789 780 790 781 void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 791 782 int kvm_mmu_create(struct kvm_vcpu *vcpu); 792 - int kvm_mmu_setup(struct kvm_vcpu *vcpu); 783 + void kvm_mmu_setup(struct kvm_vcpu *vcpu); 793 784 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 794 785 u64 dirty_mask, u64 nx_mask, u64 x_mask); 795 786 796 - int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 787 + void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 797 788 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 798 789 void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 799 790 struct kvm_memory_slot *slot, ··· 931 922 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 932 923 void *insn, int insn_len); 933 924 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 925 + void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); 934 926 935 927 void kvm_enable_tdp(void); 936 928 void kvm_disable_tdp(void); 937 - 938 - int complete_pio(struct kvm_vcpu *vcpu); 939 - bool kvm_check_iopl(struct kvm_vcpu *vcpu); 940 929 941 930 static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 942 931 {

+2

arch/x86/include/asm/pvclock.h

··· 14 14 struct timespec *ts); 15 15 void pvclock_resume(void); 16 16 17 + void pvclock_touch_watchdogs(void); 18 + 17 19 /* 18 20 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 19 21 * yielding a 64-bit result.

+3 -3

arch/x86/include/uapi/asm/kvm.h

··· 211 211 __u32 padding[3]; 212 212 }; 213 213 214 - #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 215 - #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 216 - #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 214 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 215 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 216 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 217 217 218 218 /* for KVM_SET_CPUID2 */ 219 219 struct kvm_cpuid2 {

+1

arch/x86/include/uapi/asm/msr-index.h

··· 536 536 537 537 /* MSR_IA32_VMX_MISC bits */ 538 538 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 539 + #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F 539 540 /* AMD-V MSRs */ 540 541 541 542 #define MSR_VM_CR 0xc0010114

+1

arch/x86/kernel/kvmclock.c

··· 139 139 src = &hv_clock[cpu].pvti; 140 140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 141 141 src->flags &= ~PVCLOCK_GUEST_STOPPED; 142 + pvclock_touch_watchdogs(); 142 143 ret = true; 143 144 } 144 145

+13

arch/x86/kernel/pvclock.c

··· 43 43 return pv_tsc_khz; 44 44 } 45 45 46 + void pvclock_touch_watchdogs(void) 47 + { 48 + touch_softlockup_watchdog_sync(); 49 + clocksource_touch_watchdog(); 50 + rcu_cpu_stall_reset(); 51 + reset_hung_task_detector(); 52 + } 53 + 46 54 static atomic64_t last_value = ATOMIC64_INIT(0); 47 55 48 56 void pvclock_resume(void) ··· 81 73 do { 82 74 version = __pvclock_read_cycles(src, &ret, &flags); 83 75 } while ((src->version & 1) || version != src->version); 76 + 77 + if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { 78 + src->flags &= ~PVCLOCK_GUEST_STOPPED; 79 + pvclock_touch_watchdogs(); 80 + } 84 81 85 82 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 86 83 (flags & PVCLOCK_TSC_STABLE_BIT))

+1

arch/x86/kvm/Kconfig

··· 38 38 select PERF_EVENTS 39 39 select HAVE_KVM_MSI 40 40 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 + select KVM_VFIO 41 42 ---help--- 42 43 Support hosting fully virtualized guest machines using hardware 43 44 virtualization extensions. You will need a fairly recent

+1 -1

arch/x86/kvm/Makefile

··· 9 9 10 10 kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 11 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 12 - $(KVM)/eventfd.o $(KVM)/irqchip.o 12 + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 13 13 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 14 14 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 15 15

+106 -9

arch/x86/kvm/cpuid.c

··· 23 23 #include "mmu.h" 24 24 #include "trace.h" 25 25 26 + static u32 xstate_required_size(u64 xstate_bv) 27 + { 28 + int feature_bit = 0; 29 + u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 30 + 31 + xstate_bv &= ~XSTATE_FPSSE; 32 + while (xstate_bv) { 33 + if (xstate_bv & 0x1) { 34 + u32 eax, ebx, ecx, edx; 35 + cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); 36 + ret = max(ret, eax + ebx); 37 + } 38 + 39 + xstate_bv >>= 1; 40 + feature_bit++; 41 + } 42 + 43 + return ret; 44 + } 45 + 26 46 void kvm_update_cpuid(struct kvm_vcpu *vcpu) 27 47 { 28 48 struct kvm_cpuid_entry2 *best; ··· 64 44 apic->lapic_timer.timer_mode_mask = 3 << 17; 65 45 else 66 46 apic->lapic_timer.timer_mode_mask = 1 << 17; 47 + } 48 + 49 + best = kvm_find_cpuid_entry(vcpu, 0xD, 0); 50 + if (!best) { 51 + vcpu->arch.guest_supported_xcr0 = 0; 52 + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 53 + } else { 54 + vcpu->arch.guest_supported_xcr0 = 55 + (best->eax | ((u64)best->edx << 32)) & 56 + host_xcr0 & KVM_SUPPORTED_XCR0; 57 + vcpu->arch.guest_xstate_size = 58 + xstate_required_size(vcpu->arch.guest_supported_xcr0); 67 59 } 68 60 69 61 kvm_pmu_cpuid_update(vcpu); ··· 214 182 { 215 183 u64 mask = ((u64)1 << bit); 216 184 217 - return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; 185 + return mask & KVM_SUPPORTED_XCR0 & host_xcr0; 218 186 } 219 187 220 188 #define F(x) bit(X86_FEATURE_##x) 221 189 222 - static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 223 - u32 index, int *nent, int maxnent) 190 + static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, 191 + u32 func, u32 index, int *nent, int maxnent) 192 + { 193 + switch (func) { 194 + case 0: 195 + entry->eax = 1; /* only one leaf currently */ 196 + ++*nent; 197 + break; 198 + case 1: 199 + entry->ecx = F(MOVBE); 200 + ++*nent; 201 + break; 202 + default: 203 + break; 204 + } 205 + 206 + entry->function = func; 207 + entry->index = index; 208 + 209 + return 0; 210 + } 211 + 212 + static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 213 + u32 index, int *nent, int maxnent) 224 214 { 225 215 int r; 226 216 unsigned f_nx = is_efer_nx() ? F(NX) : 0; ··· 437 383 case 0xd: { 438 384 int idx, i; 439 385 386 + entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; 387 + entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; 440 388 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 441 389 for (idx = 1, i = 1; idx < 64; ++idx) { 442 390 if (*nent >= maxnent) ··· 537 481 return r; 538 482 } 539 483 484 + static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, 485 + u32 idx, int *nent, int maxnent, unsigned int type) 486 + { 487 + if (type == KVM_GET_EMULATED_CPUID) 488 + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); 489 + 490 + return __do_cpuid_ent(entry, func, idx, nent, maxnent); 491 + } 492 + 540 493 #undef F 541 494 542 495 struct kvm_cpuid_param { ··· 560 495 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; 561 496 } 562 497 563 - int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 564 - struct kvm_cpuid_entry2 __user *entries) 498 + static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, 499 + __u32 num_entries, unsigned int ioctl_type) 500 + { 501 + int i; 502 + __u32 pad[3]; 503 + 504 + if (ioctl_type != KVM_GET_EMULATED_CPUID) 505 + return false; 506 + 507 + /* 508 + * We want to make sure that ->padding is being passed clean from 509 + * userspace in case we want to use it for something in the future. 510 + * 511 + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we 512 + * have to give ourselves satisfied only with the emulated side. /me 513 + * sheds a tear. 514 + */ 515 + for (i = 0; i < num_entries; i++) { 516 + if (copy_from_user(pad, entries[i].padding, sizeof(pad))) 517 + return true; 518 + 519 + if (pad[0] || pad[1] || pad[2]) 520 + return true; 521 + } 522 + return false; 523 + } 524 + 525 + int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 526 + struct kvm_cpuid_entry2 __user *entries, 527 + unsigned int type) 565 528 { 566 529 struct kvm_cpuid_entry2 *cpuid_entries; 567 530 int limit, nent = 0, r = -E2BIG, i; ··· 606 513 goto out; 607 514 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 608 515 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 516 + 517 + if (sanity_check_entries(entries, cpuid->nent, type)) 518 + return -EINVAL; 519 + 609 520 r = -ENOMEM; 610 - cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 521 + cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 611 522 if (!cpuid_entries) 612 523 goto out; 613 524 ··· 623 526 continue; 624 527 625 528 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, 626 - &nent, cpuid->nent); 529 + &nent, cpuid->nent, type); 627 530 628 531 if (r) 629 532 goto out_free; ··· 634 537 limit = cpuid_entries[nent - 1].eax; 635 538 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) 636 539 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, 637 - &nent, cpuid->nent); 540 + &nent, cpuid->nent, type); 638 541 639 542 if (r) 640 543 goto out_free; ··· 758 661 *edx = best->edx; 759 662 } else 760 663 *eax = *ebx = *ecx = *edx = 0; 664 + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); 761 665 } 762 666 EXPORT_SYMBOL_GPL(kvm_cpuid); 763 667 ··· 774 676 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); 775 677 kvm_register_write(vcpu, VCPU_REGS_RDX, edx); 776 678 kvm_x86_ops->skip_emulated_instruction(vcpu); 777 - trace_kvm_cpuid(function, eax, ebx, ecx, edx); 778 679 } 779 680 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);

+3 -2

arch/x86/kvm/cpuid.h

··· 6 6 void kvm_update_cpuid(struct kvm_vcpu *vcpu); 7 7 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 8 8 u32 function, u32 index); 9 - int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 10 - struct kvm_cpuid_entry2 __user *entries); 9 + int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 10 + struct kvm_cpuid_entry2 __user *entries, 11 + unsigned int type); 11 12 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 12 13 struct kvm_cpuid *cpuid, 13 14 struct kvm_cpuid_entry __user *entries);

+107 -23

arch/x86/kvm/emulate.c

··· 130 130 #define Mov (1<<20) 131 131 /* Misc flags */ 132 132 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 133 - #define VendorSpecific (1<<22) /* Vendor specific instruction */ 133 + #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ 134 134 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 135 135 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 136 136 #define Undefined (1<<25) /* No Such Instruction */ ··· 785 785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH. 786 786 */ 787 787 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, 788 - int highbyte_regs) 788 + int byteop) 789 789 { 790 790 void *p; 791 + int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; 791 792 792 793 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) 793 794 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; ··· 1025 1024 struct operand *op) 1026 1025 { 1027 1026 unsigned reg = ctxt->modrm_reg; 1028 - int highbyte_regs = ctxt->rex_prefix == 0; 1029 1027 1030 1028 if (!(ctxt->d & ModRM)) 1031 1029 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); ··· 1045 1045 } 1046 1046 1047 1047 op->type = OP_REG; 1048 - if (ctxt->d & ByteOp) { 1049 - op->addr.reg = decode_register(ctxt, reg, highbyte_regs); 1050 - op->bytes = 1; 1051 - } else { 1052 - op->addr.reg = decode_register(ctxt, reg, 0); 1053 - op->bytes = ctxt->op_bytes; 1054 - } 1048 + op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1049 + op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); 1050 + 1055 1051 fetch_register_operand(op); 1056 1052 op->orig_val = op->val; 1057 1053 } ··· 1078 1082 ctxt->modrm_seg = VCPU_SREG_DS; 1079 1083 1080 1084 if (ctxt->modrm_mod == 3) { 1081 - int highbyte_regs = ctxt->rex_prefix == 0; 1082 - 1083 1085 op->type = OP_REG; 1084 1086 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1085 1087 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1086 - highbyte_regs && (ctxt->d & ByteOp)); 1088 + ctxt->d & ByteOp); 1087 1089 if (ctxt->d & Sse) { 1088 1090 op->type = OP_XMM; 1089 1091 op->bytes = 16; ··· 2955 2961 return X86EMUL_CONTINUE; 2956 2962 } 2957 2963 2964 + #define FFL(x) bit(X86_FEATURE_##x) 2965 + 2966 + static int em_movbe(struct x86_emulate_ctxt *ctxt) 2967 + { 2968 + u32 ebx, ecx, edx, eax = 1; 2969 + u16 tmp; 2970 + 2971 + /* 2972 + * Check MOVBE is set in the guest-visible CPUID leaf. 2973 + */ 2974 + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); 2975 + if (!(ecx & FFL(MOVBE))) 2976 + return emulate_ud(ctxt); 2977 + 2978 + switch (ctxt->op_bytes) { 2979 + case 2: 2980 + /* 2981 + * From MOVBE definition: "...When the operand size is 16 bits, 2982 + * the upper word of the destination register remains unchanged 2983 + * ..." 2984 + * 2985 + * Both casting ->valptr and ->val to u16 breaks strict aliasing 2986 + * rules so we have to do the operation almost per hand. 2987 + */ 2988 + tmp = (u16)ctxt->src.val; 2989 + ctxt->dst.val &= ~0xffffUL; 2990 + ctxt->dst.val |= (unsigned long)swab16(tmp); 2991 + break; 2992 + case 4: 2993 + ctxt->dst.val = swab32((u32)ctxt->src.val); 2994 + break; 2995 + case 8: 2996 + ctxt->dst.val = swab64(ctxt->src.val); 2997 + break; 2998 + default: 2999 + return X86EMUL_PROPAGATE_FAULT; 3000 + } 3001 + return X86EMUL_CONTINUE; 3002 + } 3003 + 2958 3004 static int em_cr_write(struct x86_emulate_ctxt *ctxt) 2959 3005 { 2960 3006 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) ··· 3290 3256 return X86EMUL_CONTINUE; 3291 3257 } 3292 3258 3259 + static int em_sahf(struct x86_emulate_ctxt *ctxt) 3260 + { 3261 + u32 flags; 3262 + 3263 + flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; 3264 + flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; 3265 + 3266 + ctxt->eflags &= ~0xffUL; 3267 + ctxt->eflags |= flags | X86_EFLAGS_FIXED; 3268 + return X86EMUL_CONTINUE; 3269 + } 3270 + 3293 3271 static int em_lahf(struct x86_emulate_ctxt *ctxt) 3294 3272 { 3295 3273 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; ··· 3548 3502 3549 3503 static const struct opcode group7_rm3[] = { 3550 3504 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), 3551 - II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), 3505 + II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), 3552 3506 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), 3553 3507 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), 3554 3508 DIP(SrcNone | Prot | Priv, stgi, check_svme), ··· 3633 3587 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3634 3588 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3635 3589 }, { 3636 - I(SrcNone | Priv | VendorSpecific, em_vmcall), 3590 + I(SrcNone | Priv | EmulateOnUD, em_vmcall), 3637 3591 EXT(0, group7_rm1), 3638 3592 N, EXT(0, group7_rm3), 3639 3593 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, ··· 3796 3750 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3797 3751 I(SrcImmFAddr | No64, em_call_far), N, 3798 3752 II(ImplicitOps | Stack, em_pushf, pushf), 3799 - II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), 3753 + II(ImplicitOps | Stack, em_popf, popf), 3754 + I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), 3800 3755 /* 0xA0 - 0xA7 */ 3801 3756 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3802 3757 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), ··· 3857 3810 static const struct opcode twobyte_table[256] = { 3858 3811 /* 0x00 - 0x0F */ 3859 3812 G(0, group6), GD(0, &group7), N, N, 3860 - N, I(ImplicitOps | VendorSpecific, em_syscall), 3813 + N, I(ImplicitOps | EmulateOnUD, em_syscall), 3861 3814 II(ImplicitOps | Priv, em_clts, clts), N, 3862 3815 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3863 3816 N, D(ImplicitOps | ModRM), N, N, ··· 3877 3830 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3878 3831 II(ImplicitOps | Priv, em_rdmsr, rdmsr), 3879 3832 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), 3880 - I(ImplicitOps | VendorSpecific, em_sysenter), 3881 - I(ImplicitOps | Priv | VendorSpecific, em_sysexit), 3833 + I(ImplicitOps | EmulateOnUD, em_sysenter), 3834 + I(ImplicitOps | Priv | EmulateOnUD, em_sysexit), 3882 3835 N, N, 3883 3836 N, N, N, N, N, N, N, N, 3884 3837 /* 0x40 - 0x4F */ ··· 3937 3890 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3938 3891 /* 0xF0 - 0xFF */ 3939 3892 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3893 + }; 3894 + 3895 + static const struct gprefix three_byte_0f_38_f0 = { 3896 + I(DstReg | SrcMem | Mov, em_movbe), N, N, N 3897 + }; 3898 + 3899 + static const struct gprefix three_byte_0f_38_f1 = { 3900 + I(DstMem | SrcReg | Mov, em_movbe), N, N, N 3901 + }; 3902 + 3903 + /* 3904 + * Insns below are selected by the prefix which indexed by the third opcode 3905 + * byte. 3906 + */ 3907 + static const struct opcode opcode_map_0f_38[256] = { 3908 + /* 0x00 - 0x7f */ 3909 + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 3910 + /* 0x80 - 0xef */ 3911 + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 3912 + /* 0xf0 - 0xf1 */ 3913 + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), 3914 + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), 3915 + /* 0xf2 - 0xff */ 3916 + N, N, X4(N), X8(N) 3940 3917 }; 3941 3918 3942 3919 #undef D ··· 4111 4040 case OpMem8: 4112 4041 ctxt->memop.bytes = 1; 4113 4042 if (ctxt->memop.type == OP_REG) { 4114 - ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); 4043 + ctxt->memop.addr.reg = decode_register(ctxt, 4044 + ctxt->modrm_rm, true); 4115 4045 fetch_register_operand(&ctxt->memop); 4116 4046 } 4117 4047 goto mem_common; ··· 4198 4126 ctxt->_eip = ctxt->eip; 4199 4127 ctxt->fetch.start = ctxt->_eip; 4200 4128 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4129 + ctxt->opcode_len = 1; 4201 4130 if (insn_len > 0) 4202 4131 memcpy(ctxt->fetch.data, insn, insn_len); 4203 4132 ··· 4281 4208 opcode = opcode_table[ctxt->b]; 4282 4209 /* Two-byte opcode? */ 4283 4210 if (ctxt->b == 0x0f) { 4284 - ctxt->twobyte = 1; 4211 + ctxt->opcode_len = 2; 4285 4212 ctxt->b = insn_fetch(u8, ctxt); 4286 4213 opcode = twobyte_table[ctxt->b]; 4214 + 4215 + /* 0F_38 opcode map */ 4216 + if (ctxt->b == 0x38) { 4217 + ctxt->opcode_len = 3; 4218 + ctxt->b = insn_fetch(u8, ctxt); 4219 + opcode = opcode_map_0f_38[ctxt->b]; 4220 + } 4287 4221 } 4288 4222 ctxt->d = opcode.flags; 4289 4223 ··· 4347 4267 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4348 4268 return EMULATION_FAILED; 4349 4269 4350 - if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4270 + if (!(ctxt->d & EmulateOnUD) && ctxt->ud) 4351 4271 return EMULATION_FAILED; 4352 4272 4353 4273 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) ··· 4620 4540 goto writeback; 4621 4541 } 4622 4542 4623 - if (ctxt->twobyte) 4543 + if (ctxt->opcode_len == 2) 4624 4544 goto twobyte_insn; 4545 + else if (ctxt->opcode_len == 3) 4546 + goto threebyte_insn; 4625 4547 4626 4548 switch (ctxt->b) { 4627 4549 case 0x63: /* movsxd */ ··· 4807 4725 default: 4808 4726 goto cannot_emulate; 4809 4727 } 4728 + 4729 + threebyte_insn: 4810 4730 4811 4731 if (rc != X86EMUL_CONTINUE) 4812 4732 goto done;

+34 -81

arch/x86/kvm/mmu.c

··· 2570 2570 kvm_release_pfn_clean(pfn); 2571 2571 } 2572 2572 2573 - static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) 2574 - { 2575 - mmu_free_roots(vcpu); 2576 - } 2577 - 2578 2573 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2579 2574 bool no_dirty_log) 2580 2575 { ··· 3419 3424 return 0; 3420 3425 } 3421 3426 3422 - static void nonpaging_free(struct kvm_vcpu *vcpu) 3427 + static void nonpaging_init_context(struct kvm_vcpu *vcpu, 3428 + struct kvm_mmu *context) 3423 3429 { 3424 - mmu_free_roots(vcpu); 3425 - } 3426 - 3427 - static int nonpaging_init_context(struct kvm_vcpu *vcpu, 3428 - struct kvm_mmu *context) 3429 - { 3430 - context->new_cr3 = nonpaging_new_cr3; 3431 3430 context->page_fault = nonpaging_page_fault; 3432 3431 context->gva_to_gpa = nonpaging_gva_to_gpa; 3433 - context->free = nonpaging_free; 3434 3432 context->sync_page = nonpaging_sync_page; 3435 3433 context->invlpg = nonpaging_invlpg; 3436 3434 context->update_pte = nonpaging_update_pte; ··· 3432 3444 context->root_hpa = INVALID_PAGE; 3433 3445 context->direct_map = true; 3434 3446 context->nx = false; 3435 - return 0; 3436 3447 } 3437 3448 3438 3449 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) ··· 3441 3454 } 3442 3455 EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); 3443 3456 3444 - static void paging_new_cr3(struct kvm_vcpu *vcpu) 3457 + void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) 3445 3458 { 3446 - pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); 3447 3459 mmu_free_roots(vcpu); 3448 3460 } 3449 3461 ··· 3455 3469 struct x86_exception *fault) 3456 3470 { 3457 3471 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 3458 - } 3459 - 3460 - static void paging_free(struct kvm_vcpu *vcpu) 3461 - { 3462 - nonpaging_free(vcpu); 3463 3472 } 3464 3473 3465 3474 static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, ··· 3646 3665 mmu->last_pte_bitmap = map; 3647 3666 } 3648 3667 3649 - static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3650 - struct kvm_mmu *context, 3651 - int level) 3668 + static void paging64_init_context_common(struct kvm_vcpu *vcpu, 3669 + struct kvm_mmu *context, 3670 + int level) 3652 3671 { 3653 3672 context->nx = is_nx(vcpu); 3654 3673 context->root_level = level; ··· 3658 3677 update_last_pte_bitmap(vcpu, context); 3659 3678 3660 3679 ASSERT(is_pae(vcpu)); 3661 - context->new_cr3 = paging_new_cr3; 3662 3680 context->page_fault = paging64_page_fault; 3663 3681 context->gva_to_gpa = paging64_gva_to_gpa; 3664 3682 context->sync_page = paging64_sync_page; 3665 3683 context->invlpg = paging64_invlpg; 3666 3684 context->update_pte = paging64_update_pte; 3667 - context->free = paging_free; 3668 3685 context->shadow_root_level = level; 3669 3686 context->root_hpa = INVALID_PAGE; 3670 3687 context->direct_map = false; 3671 - return 0; 3672 3688 } 3673 3689 3674 - static int paging64_init_context(struct kvm_vcpu *vcpu, 3675 - struct kvm_mmu *context) 3690 + static void paging64_init_context(struct kvm_vcpu *vcpu, 3691 + struct kvm_mmu *context) 3676 3692 { 3677 - return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3693 + paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3678 3694 } 3679 3695 3680 - static int paging32_init_context(struct kvm_vcpu *vcpu, 3681 - struct kvm_mmu *context) 3696 + static void paging32_init_context(struct kvm_vcpu *vcpu, 3697 + struct kvm_mmu *context) 3682 3698 { 3683 3699 context->nx = false; 3684 3700 context->root_level = PT32_ROOT_LEVEL; ··· 3684 3706 update_permission_bitmask(vcpu, context, false); 3685 3707 update_last_pte_bitmap(vcpu, context); 3686 3708 3687 - context->new_cr3 = paging_new_cr3; 3688 3709 context->page_fault = paging32_page_fault; 3689 3710 context->gva_to_gpa = paging32_gva_to_gpa; 3690 - context->free = paging_free; 3691 3711 context->sync_page = paging32_sync_page; 3692 3712 context->invlpg = paging32_invlpg; 3693 3713 context->update_pte = paging32_update_pte; 3694 3714 context->shadow_root_level = PT32E_ROOT_LEVEL; 3695 3715 context->root_hpa = INVALID_PAGE; 3696 3716 context->direct_map = false; 3697 - return 0; 3698 3717 } 3699 3718 3700 - static int paging32E_init_context(struct kvm_vcpu *vcpu, 3701 - struct kvm_mmu *context) 3719 + static void paging32E_init_context(struct kvm_vcpu *vcpu, 3720 + struct kvm_mmu *context) 3702 3721 { 3703 - return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3722 + paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3704 3723 } 3705 3724 3706 - static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3725 + static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3707 3726 { 3708 3727 struct kvm_mmu *context = vcpu->arch.walk_mmu; 3709 3728 3710 3729 context->base_role.word = 0; 3711 - context->new_cr3 = nonpaging_new_cr3; 3712 3730 context->page_fault = tdp_page_fault; 3713 - context->free = nonpaging_free; 3714 3731 context->sync_page = nonpaging_sync_page; 3715 3732 context->invlpg = nonpaging_invlpg; 3716 3733 context->update_pte = nonpaging_update_pte; ··· 3740 3767 3741 3768 update_permission_bitmask(vcpu, context, false); 3742 3769 update_last_pte_bitmap(vcpu, context); 3743 - 3744 - return 0; 3745 3770 } 3746 3771 3747 - int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3772 + void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3748 3773 { 3749 - int r; 3750 3774 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3751 3775 ASSERT(vcpu); 3752 3776 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3753 3777 3754 3778 if (!is_paging(vcpu)) 3755 - r = nonpaging_init_context(vcpu, context); 3779 + nonpaging_init_context(vcpu, context); 3756 3780 else if (is_long_mode(vcpu)) 3757 - r = paging64_init_context(vcpu, context); 3781 + paging64_init_context(vcpu, context); 3758 3782 else if (is_pae(vcpu)) 3759 - r = paging32E_init_context(vcpu, context); 3783 + paging32E_init_context(vcpu, context); 3760 3784 else 3761 - r = paging32_init_context(vcpu, context); 3785 + paging32_init_context(vcpu, context); 3762 3786 3763 3787 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); 3764 3788 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3765 3789 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3766 3790 vcpu->arch.mmu.base_role.smep_andnot_wp 3767 3791 = smep && !is_write_protection(vcpu); 3768 - 3769 - return r; 3770 3792 } 3771 3793 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3772 3794 3773 - int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3795 + void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3774 3796 bool execonly) 3775 3797 { 3776 3798 ASSERT(vcpu); ··· 3774 3806 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 3775 3807 3776 3808 context->nx = true; 3777 - context->new_cr3 = paging_new_cr3; 3778 3809 context->page_fault = ept_page_fault; 3779 3810 context->gva_to_gpa = ept_gva_to_gpa; 3780 3811 context->sync_page = ept_sync_page; 3781 3812 context->invlpg = ept_invlpg; 3782 3813 context->update_pte = ept_update_pte; 3783 - context->free = paging_free; 3784 3814 context->root_level = context->shadow_root_level; 3785 3815 context->root_hpa = INVALID_PAGE; 3786 3816 context->direct_map = false; 3787 3817 3788 3818 update_permission_bitmask(vcpu, context, true); 3789 3819 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 3790 - 3791 - return 0; 3792 3820 } 3793 3821 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 3794 3822 3795 - static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 3823 + static void init_kvm_softmmu(struct kvm_vcpu *vcpu) 3796 3824 { 3797 - int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3798 - 3825 + kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3799 3826 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; 3800 3827 vcpu->arch.walk_mmu->get_cr3 = get_cr3; 3801 3828 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; 3802 3829 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 3803 - 3804 - return r; 3805 3830 } 3806 3831 3807 - static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3832 + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3808 3833 { 3809 3834 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 3810 3835 ··· 3834 3873 3835 3874 update_permission_bitmask(vcpu, g_context, false); 3836 3875 update_last_pte_bitmap(vcpu, g_context); 3837 - 3838 - return 0; 3839 3876 } 3840 3877 3841 - static int init_kvm_mmu(struct kvm_vcpu *vcpu) 3878 + static void init_kvm_mmu(struct kvm_vcpu *vcpu) 3842 3879 { 3843 3880 if (mmu_is_nested(vcpu)) 3844 3881 return init_kvm_nested_mmu(vcpu); ··· 3846 3887 return init_kvm_softmmu(vcpu); 3847 3888 } 3848 3889 3849 - static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 3890 + void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3850 3891 { 3851 3892 ASSERT(vcpu); 3852 - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3853 - /* mmu.free() should set root_hpa = INVALID_PAGE */ 3854 - vcpu->arch.mmu.free(vcpu); 3855 - } 3856 3893 3857 - int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3858 - { 3859 - destroy_kvm_mmu(vcpu); 3860 - return init_kvm_mmu(vcpu); 3894 + kvm_mmu_unload(vcpu); 3895 + init_kvm_mmu(vcpu); 3861 3896 } 3862 3897 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); 3863 3898 ··· 3876 3923 void kvm_mmu_unload(struct kvm_vcpu *vcpu) 3877 3924 { 3878 3925 mmu_free_roots(vcpu); 3926 + WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3879 3927 } 3880 3928 EXPORT_SYMBOL_GPL(kvm_mmu_unload); 3881 3929 ··· 4235 4281 return alloc_mmu_pages(vcpu); 4236 4282 } 4237 4283 4238 - int kvm_mmu_setup(struct kvm_vcpu *vcpu) 4284 + void kvm_mmu_setup(struct kvm_vcpu *vcpu) 4239 4285 { 4240 4286 ASSERT(vcpu); 4241 4287 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 4242 4288 4243 - return init_kvm_mmu(vcpu); 4289 + init_kvm_mmu(vcpu); 4244 4290 } 4245 4291 4246 4292 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) ··· 4382 4428 int nr_to_scan = sc->nr_to_scan; 4383 4429 unsigned long freed = 0; 4384 4430 4385 - raw_spin_lock(&kvm_lock); 4431 + spin_lock(&kvm_lock); 4386 4432 4387 4433 list_for_each_entry(kvm, &vm_list, vm_list) { 4388 4434 int idx; ··· 4432 4478 break; 4433 4479 } 4434 4480 4435 - raw_spin_unlock(&kvm_lock); 4481 + spin_unlock(&kvm_lock); 4436 4482 return freed; 4437 - 4438 4483 } 4439 4484 4440 4485 static unsigned long ··· 4527 4574 { 4528 4575 ASSERT(vcpu); 4529 4576 4530 - destroy_kvm_mmu(vcpu); 4577 + kvm_mmu_unload(vcpu); 4531 4578 free_mmu_pages(vcpu); 4532 4579 mmu_free_memory_caches(vcpu); 4533 4580 }

+2 -2

arch/x86/kvm/mmu.h

··· 70 70 }; 71 71 72 72 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 73 - int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 74 - int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 73 + void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 74 + void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 75 75 bool execonly); 76 76 77 77 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)

+2 -6

arch/x86/kvm/svm.c

··· 1959 1959 nested_svm_vmexit(svm); 1960 1960 } 1961 1961 1962 - static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1962 + static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1963 1963 { 1964 - int r; 1965 - 1966 - r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); 1964 + kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); 1967 1965 1968 1966 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1969 1967 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; ··· 1969 1971 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1970 1972 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1971 1973 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1972 - 1973 - return r; 1974 1974 } 1975 1975 1976 1976 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)

+113 -45

arch/x86/kvm/vmx.c

··· 1498 1498 break; 1499 1499 1500 1500 if (i == NR_AUTOLOAD_MSRS) { 1501 - printk_once(KERN_WARNING"Not enough mst switch entries. " 1501 + printk_once(KERN_WARNING "Not enough msr switch entries. " 1502 1502 "Can't add msr %x\n", msr); 1503 1503 return; 1504 1504 } else if (i == m->nr) { ··· 1898 1898 /* 1899 1899 * KVM wants to inject page-faults which it got to the guest. This function 1900 1900 * checks whether in a nested guest, we need to inject them to L1 or L2. 1901 - * This function assumes it is called with the exit reason in vmcs02 being 1902 - * a #PF exception (this is the only case in which KVM injects a #PF when L2 1903 - * is running). 1904 1901 */ 1905 - static int nested_pf_handled(struct kvm_vcpu *vcpu) 1902 + static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) 1906 1903 { 1907 1904 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1908 1905 1909 - /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1910 - if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) 1906 + if (!(vmcs12->exception_bitmap & (1u << nr))) 1911 1907 return 0; 1912 1908 1913 1909 nested_vmx_vmexit(vcpu); ··· 1917 1921 struct vcpu_vmx *vmx = to_vmx(vcpu); 1918 1922 u32 intr_info = nr | INTR_INFO_VALID_MASK; 1919 1923 1920 - if (nr == PF_VECTOR && is_guest_mode(vcpu) && 1921 - !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) 1924 + if (!reinject && is_guest_mode(vcpu) && 1925 + nested_vmx_check_exception(vcpu, nr)) 1922 1926 return; 1923 1927 1924 1928 if (has_error_code) { ··· 2200 2204 #ifdef CONFIG_X86_64 2201 2205 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2202 2206 #endif 2203 - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2207 + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 2208 + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2209 + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || 2210 + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { 2211 + nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2212 + nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 2213 + } 2204 2214 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2205 - VM_EXIT_LOAD_IA32_EFER); 2215 + VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); 2206 2216 2207 2217 /* entry controls */ 2208 2218 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, ··· 2228 2226 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2229 2227 nested_vmx_procbased_ctls_low = 0; 2230 2228 nested_vmx_procbased_ctls_high &= 2231 - CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2229 + CPU_BASED_VIRTUAL_INTR_PENDING | 2230 + CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2232 2231 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 2233 2232 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 2234 2233 CPU_BASED_CR3_STORE_EXITING | ··· 2255 2252 nested_vmx_secondary_ctls_low = 0; 2256 2253 nested_vmx_secondary_ctls_high &= 2257 2254 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2255 + SECONDARY_EXEC_UNRESTRICTED_GUEST | 2258 2256 SECONDARY_EXEC_WBINVD_EXITING; 2259 2257 2260 2258 if (enable_ept) { 2261 2259 /* nested EPT: emulate EPT also to L1 */ 2262 2260 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2263 2261 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2264 - VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; 2262 + VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2263 + VMX_EPT_INVEPT_BIT; 2265 2264 nested_vmx_ept_caps &= vmx_capability.ept; 2266 2265 /* 2267 2266 * Since invept is completely emulated we support both global ··· 3385 3380 if (enable_ept) { 3386 3381 eptp = construct_eptp(cr3); 3387 3382 vmcs_write64(EPT_POINTER, eptp); 3388 - guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : 3389 - vcpu->kvm->arch.ept_identity_map_addr; 3383 + if (is_paging(vcpu) || is_guest_mode(vcpu)) 3384 + guest_cr3 = kvm_read_cr3(vcpu); 3385 + else 3386 + guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; 3390 3387 ept_load_pdptrs(vcpu); 3391 3388 } 3392 3389 ··· 4886 4879 hypercall[2] = 0xc1; 4887 4880 } 4888 4881 4882 + static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val) 4883 + { 4884 + unsigned long always_on = VMXON_CR0_ALWAYSON; 4885 + 4886 + if (nested_vmx_secondary_ctls_high & 4887 + SECONDARY_EXEC_UNRESTRICTED_GUEST && 4888 + nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) 4889 + always_on &= ~(X86_CR0_PE | X86_CR0_PG); 4890 + return (val & always_on) == always_on; 4891 + } 4892 + 4889 4893 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 4890 4894 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4891 4895 { ··· 4915 4897 val = (val & ~vmcs12->cr0_guest_host_mask) | 4916 4898 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 4917 4899 4918 - /* TODO: will have to take unrestricted guest mode into 4919 - * account */ 4920 - if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) 4900 + if (!nested_cr0_valid(vmcs12, val)) 4921 4901 return 1; 4922 4902 4923 4903 if (kvm_set_cr0(vcpu, val)) ··· 6643 6627 return 0; 6644 6628 else if (is_page_fault(intr_info)) 6645 6629 return enable_ept; 6630 + else if (is_no_device(intr_info) && 6631 + !(nested_read_cr0(vmcs12) & X86_CR0_TS)) 6632 + return 0; 6646 6633 return vmcs12->exception_bitmap & 6647 6634 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6648 6635 case EXIT_REASON_EXTERNAL_INTERRUPT: ··· 6741 6722 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6742 6723 } 6743 6724 6725 + static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) 6726 + { 6727 + u64 delta_tsc_l1; 6728 + u32 preempt_val_l1, preempt_val_l2, preempt_scale; 6729 + 6730 + if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & 6731 + PIN_BASED_VMX_PREEMPTION_TIMER)) 6732 + return; 6733 + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & 6734 + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; 6735 + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 6736 + delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) 6737 + - vcpu->arch.last_guest_tsc; 6738 + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; 6739 + if (preempt_val_l2 <= preempt_val_l1) 6740 + preempt_val_l2 = 0; 6741 + else 6742 + preempt_val_l2 -= preempt_val_l1; 6743 + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); 6744 + } 6745 + 6744 6746 /* 6745 6747 * The guest has exited. See if we can fix it or if we need userspace 6746 6748 * assistance. ··· 6775 6735 /* If guest state is invalid, start emulating */ 6776 6736 if (vmx->emulation_required) 6777 6737 return handle_invalid_guest_state(vcpu); 6778 - 6779 - /* 6780 - * the KVM_REQ_EVENT optimization bit is only on for one entry, and if 6781 - * we did not inject a still-pending event to L1 now because of 6782 - * nested_run_pending, we need to re-enable this bit. 6783 - */ 6784 - if (vmx->nested.nested_run_pending) 6785 - kvm_make_request(KVM_REQ_EVENT, vcpu); 6786 - 6787 - if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH || 6788 - exit_reason == EXIT_REASON_VMRESUME)) 6789 - vmx->nested.nested_run_pending = 1; 6790 - else 6791 - vmx->nested.nested_run_pending = 0; 6792 6738 6793 6739 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6794 6740 nested_vmx_vmexit(vcpu); ··· 7087 7061 case INTR_TYPE_HARD_EXCEPTION: 7088 7062 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 7089 7063 u32 err = vmcs_read32(error_code_field); 7090 - kvm_queue_exception_e(vcpu, vector, err); 7064 + kvm_requeue_exception_e(vcpu, vector, err); 7091 7065 } else 7092 - kvm_queue_exception(vcpu, vector); 7066 + kvm_requeue_exception(vcpu, vector); 7093 7067 break; 7094 7068 case INTR_TYPE_SOFT_INTR: 7095 7069 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); ··· 7172 7146 atomic_switch_perf_msrs(vmx); 7173 7147 debugctlmsr = get_debugctlmsr(); 7174 7148 7149 + if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) 7150 + nested_adjust_preemption_timer(vcpu); 7175 7151 vmx->__launched = vmx->loaded_vmcs->launched; 7176 7152 asm( 7177 7153 /* Store host registers */ ··· 7312 7284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 7313 7285 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); 7314 7286 7287 + /* 7288 + * the KVM_REQ_EVENT optimization bit is only on for one entry, and if 7289 + * we did not inject a still-pending event to L1 now because of 7290 + * nested_run_pending, we need to re-enable this bit. 7291 + */ 7292 + if (vmx->nested.nested_run_pending) 7293 + kvm_make_request(KVM_REQ_EVENT, vcpu); 7294 + 7295 + vmx->nested.nested_run_pending = 0; 7296 + 7315 7297 vmx_complete_atomic_exit(vmx); 7316 7298 vmx_recover_nmi_blocking(vmx); 7317 7299 vmx_complete_interrupts(vmx); ··· 7448 7410 */ 7449 7411 if (is_mmio) 7450 7412 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 7451 - else if (vcpu->kvm->arch.iommu_domain && 7452 - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) 7413 + else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) 7453 7414 ret = kvm_get_guest_memory_type(vcpu, gfn) << 7454 7415 VMX_EPT_MT_EPTE_SHIFT; 7455 7416 else ··· 7538 7501 return get_vmcs12(vcpu)->ept_pointer; 7539 7502 } 7540 7503 7541 - static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7504 + static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7542 7505 { 7543 - int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7506 + kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7544 7507 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); 7545 7508 7546 7509 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; ··· 7548 7511 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 7549 7512 7550 7513 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 7551 - 7552 - return r; 7553 7514 } 7554 7515 7555 7516 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 7556 7517 { 7557 7518 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 7519 + } 7520 + 7521 + static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, 7522 + struct x86_exception *fault) 7523 + { 7524 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 7525 + 7526 + WARN_ON(!is_guest_mode(vcpu)); 7527 + 7528 + /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7529 + if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7530 + nested_vmx_vmexit(vcpu); 7531 + else 7532 + kvm_inject_page_fault(vcpu, fault); 7558 7533 } 7559 7534 7560 7535 /* ··· 7582 7533 { 7583 7534 struct vcpu_vmx *vmx = to_vmx(vcpu); 7584 7535 u32 exec_control; 7536 + u32 exit_control; 7585 7537 7586 7538 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7587 7539 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); ··· 7756 7706 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7757 7707 * bits are further modified by vmx_set_efer() below. 7758 7708 */ 7759 - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 7709 + exit_control = vmcs_config.vmexit_ctrl; 7710 + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7711 + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7712 + vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7760 7713 7761 7714 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7762 7715 * emulated by vmx_set_efer(), below. ··· 7825 7772 /* shadow page tables on either EPT or shadow page tables */ 7826 7773 kvm_set_cr3(vcpu, vmcs12->guest_cr3); 7827 7774 kvm_mmu_reset_context(vcpu); 7775 + 7776 + if (!enable_ept) 7777 + vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; 7828 7778 7829 7779 /* 7830 7780 * L1 may access the L2's PDPTR, so save them to construct vmcs12 ··· 7932 7876 return 1; 7933 7877 } 7934 7878 7935 - if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || 7879 + if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) || 7936 7880 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { 7937 7881 nested_vmx_entry_failure(vcpu, vmcs12, 7938 7882 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); ··· 7993 7937 return -ENOMEM; 7994 7938 7995 7939 enter_guest_mode(vcpu); 7940 + 7941 + vmx->nested.nested_run_pending = 1; 7996 7942 7997 7943 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 7998 7944 ··· 8063 8005 u32 idt_vectoring; 8064 8006 unsigned int nr; 8065 8007 8066 - if (vcpu->arch.exception.pending) { 8008 + if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { 8067 8009 nr = vcpu->arch.exception.nr; 8068 8010 idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 8069 8011 ··· 8081 8023 } 8082 8024 8083 8025 vmcs12->idt_vectoring_info_field = idt_vectoring; 8084 - } else if (vcpu->arch.nmi_pending) { 8026 + } else if (vcpu->arch.nmi_injected) { 8085 8027 vmcs12->idt_vectoring_info_field = 8086 8028 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 8087 8029 } else if (vcpu->arch.interrupt.pending) { ··· 8163 8105 vmcs12->guest_pending_dbg_exceptions = 8164 8106 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8165 8107 8108 + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8109 + (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8110 + vmcs12->vmx_preemption_timer_value = 8111 + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 8112 + 8166 8113 /* 8167 8114 * In some cases (usually, nested EPT), L2 is allowed to change its 8168 8115 * own CR3 without exiting. If it has changed it, we must keep it. ··· 8193 8130 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 8194 8131 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8195 8132 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8133 + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 8134 + vmcs12->guest_ia32_efer = vcpu->arch.efer; 8196 8135 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8197 8136 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8198 8137 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); ··· 8266 8201 * fpu_active (which may have changed). 8267 8202 * Note that vmx_set_cr0 refers to efer set above. 8268 8203 */ 8269 - kvm_set_cr0(vcpu, vmcs12->host_cr0); 8204 + vmx_set_cr0(vcpu, vmcs12->host_cr0); 8270 8205 /* 8271 8206 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need 8272 8207 * to apply the same changes to L1's vmcs. We just set cr0 correctly, ··· 8288 8223 8289 8224 kvm_set_cr3(vcpu, vmcs12->host_cr3); 8290 8225 kvm_mmu_reset_context(vcpu); 8226 + 8227 + if (!enable_ept) 8228 + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 8291 8229 8292 8230 if (enable_vpid) { 8293 8231 /*

+77 -31

arch/x86/kvm/x86.c

··· 577 577 int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 578 578 { 579 579 u64 xcr0; 580 + u64 valid_bits; 580 581 581 582 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 582 583 if (index != XCR_XFEATURE_ENABLED_MASK) ··· 587 586 return 1; 588 587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 589 588 return 1; 590 - if (xcr0 & ~host_xcr0) 589 + 590 + /* 591 + * Do not allow the guest to set bits that we do not support 592 + * saving. However, xcr0 bit 0 is always set, even if the 593 + * emulated CPU does not support XSAVE (see fx_init). 594 + */ 595 + valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; 596 + if (xcr0 & ~valid_bits) 591 597 return 1; 598 + 592 599 kvm_put_guest_xcr0(vcpu); 593 600 vcpu->arch.xcr0 = xcr0; 594 601 return 0; ··· 693 684 694 685 vcpu->arch.cr3 = cr3; 695 686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 696 - vcpu->arch.mmu.new_cr3(vcpu); 687 + kvm_mmu_new_cr3(vcpu); 697 688 return 0; 698 689 } 699 690 EXPORT_SYMBOL_GPL(kvm_set_cr3); ··· 2573 2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 2574 2565 case KVM_CAP_SET_TSS_ADDR: 2575 2566 case KVM_CAP_EXT_CPUID: 2567 + case KVM_CAP_EXT_EMUL_CPUID: 2576 2568 case KVM_CAP_CLOCKSOURCE: 2577 2569 case KVM_CAP_PIT: 2578 2570 case KVM_CAP_NOP_IO_DELAY: ··· 2683 2673 r = 0; 2684 2674 break; 2685 2675 } 2686 - case KVM_GET_SUPPORTED_CPUID: { 2676 + case KVM_GET_SUPPORTED_CPUID: 2677 + case KVM_GET_EMULATED_CPUID: { 2687 2678 struct kvm_cpuid2 __user *cpuid_arg = argp; 2688 2679 struct kvm_cpuid2 cpuid; 2689 2680 2690 2681 r = -EFAULT; 2691 2682 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2692 2683 goto out; 2693 - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 2694 - cpuid_arg->entries); 2684 + 2685 + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, 2686 + ioctl); 2695 2687 if (r) 2696 2688 goto out; 2697 2689 ··· 2727 2715 2728 2716 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 2729 2717 { 2730 - return vcpu->kvm->arch.iommu_domain && 2731 - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); 2718 + return kvm_arch_has_noncoherent_dma(vcpu->kvm); 2732 2719 } 2733 2720 2734 2721 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ··· 2995 2984 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2996 2985 struct kvm_xsave *guest_xsave) 2997 2986 { 2998 - if (cpu_has_xsave) 2987 + if (cpu_has_xsave) { 2999 2988 memcpy(guest_xsave->region, 3000 2989 &vcpu->arch.guest_fpu.state->xsave, 3001 - xstate_size); 3002 - else { 2990 + vcpu->arch.guest_xstate_size); 2991 + *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= 2992 + vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; 2993 + } else { 3003 2994 memcpy(guest_xsave->region, 3004 2995 &vcpu->arch.guest_fpu.state->fxsave, 3005 2996 sizeof(struct i387_fxsave_struct)); ··· 3016 3003 u64 xstate_bv = 3017 3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 3018 3005 3019 - if (cpu_has_xsave) 3006 + if (cpu_has_xsave) { 3007 + /* 3008 + * Here we allow setting states that are not present in 3009 + * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility 3010 + * with old userspace. 3011 + */ 3012 + if (xstate_bv & ~KVM_SUPPORTED_XCR0) 3013 + return -EINVAL; 3014 + if (xstate_bv & ~host_xcr0) 3015 + return -EINVAL; 3020 3016 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3021 - guest_xsave->region, xstate_size); 3022 - else { 3017 + guest_xsave->region, vcpu->arch.guest_xstate_size); 3018 + } else { 3023 3019 if (xstate_bv & ~XSTATE_FPSSE) 3024 3020 return -EINVAL; 3025 3021 memcpy(&vcpu->arch.guest_fpu.state->fxsave, ··· 3064 3042 3065 3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++) 3066 3044 /* Only support XCR0 currently */ 3067 - if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 3045 + if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) { 3068 3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 3069 - guest_xcrs->xcrs[0].value); 3047 + guest_xcrs->xcrs[i].value); 3070 3048 break; 3071 3049 } 3072 3050 if (r) ··· 4797 4775 4798 4776 static void init_decode_cache(struct x86_emulate_ctxt *ctxt) 4799 4777 { 4800 - memset(&ctxt->twobyte, 0, 4801 - (void *)&ctxt->_regs - (void *)&ctxt->twobyte); 4778 + memset(&ctxt->opcode_len, 0, 4779 + (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); 4802 4780 4803 4781 ctxt->fetch.start = 0; 4804 4782 ctxt->fetch.end = 0; ··· 5116 5094 ctxt->have_exception = false; 5117 5095 ctxt->perm_ok = false; 5118 5096 5119 - ctxt->only_vendor_specific_insn 5120 - = emulation_type & EMULTYPE_TRAP_UD; 5097 + ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; 5121 5098 5122 5099 r = x86_decode_insn(ctxt, insn, insn_len); 5123 5100 ··· 5284 5263 5285 5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); 5286 5265 5287 - raw_spin_lock(&kvm_lock); 5266 + spin_lock(&kvm_lock); 5288 5267 list_for_each_entry(kvm, &vm_list, vm_list) { 5289 5268 kvm_for_each_vcpu(i, vcpu, kvm) { 5290 5269 if (vcpu->cpu != freq->cpu) ··· 5294 5273 send_ipi = 1; 5295 5274 } 5296 5275 } 5297 - raw_spin_unlock(&kvm_lock); 5276 + spin_unlock(&kvm_lock); 5298 5277 5299 5278 if (freq->old < freq->new && send_ipi) { 5300 5279 /* ··· 5447 5426 struct kvm_vcpu *vcpu; 5448 5427 int i; 5449 5428 5450 - raw_spin_lock(&kvm_lock); 5429 + spin_lock(&kvm_lock); 5451 5430 list_for_each_entry(kvm, &vm_list, vm_list) 5452 5431 kvm_for_each_vcpu(i, vcpu, kvm) 5453 5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5454 5433 atomic_set(&kvm_guest_has_master_clock, 0); 5455 - raw_spin_unlock(&kvm_lock); 5434 + spin_unlock(&kvm_lock); 5456 5435 } 5457 5436 5458 5437 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); ··· 5966 5945 5967 5946 vcpu->mode = IN_GUEST_MODE; 5968 5947 5948 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5949 + 5969 5950 /* We should set ->mode before check ->requests, 5970 5951 * see the comment in make_all_cpus_request. 5971 5952 */ 5972 - smp_mb(); 5953 + smp_mb__after_srcu_read_unlock(); 5973 5954 5974 5955 local_irq_disable(); 5975 5956 ··· 5981 5958 smp_wmb(); 5982 5959 local_irq_enable(); 5983 5960 preempt_enable(); 5961 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5984 5962 r = 1; 5985 5963 goto cancel_injection; 5986 5964 } 5987 - 5988 - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5989 5965 5990 5966 if (req_immediate_exit) 5991 5967 smp_send_reschedule(vcpu->cpu); ··· 6710 6688 if (r) 6711 6689 return r; 6712 6690 kvm_vcpu_reset(vcpu); 6713 - r = kvm_mmu_setup(vcpu); 6691 + kvm_mmu_setup(vcpu); 6714 6692 vcpu_put(vcpu); 6715 6693 6716 6694 return r; ··· 6962 6940 6963 6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6964 6942 vcpu->arch.pv_time_enabled = false; 6943 + 6944 + vcpu->arch.guest_supported_xcr0 = 0; 6945 + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 6946 + 6965 6947 kvm_async_pf_hash_reset(vcpu); 6966 6948 kvm_pmu_init(vcpu); 6967 6949 ··· 7007 6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7008 6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7009 6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6984 + atomic_set(&kvm->arch.noncoherent_dma_count, 0); 7010 6985 7011 6986 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 7012 6987 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); ··· 7092 7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7093 7066 } 7094 7067 7095 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 7068 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 7096 7069 struct kvm_memory_slot *dont) 7097 7070 { 7098 7071 int i; ··· 7113 7086 } 7114 7087 } 7115 7088 7116 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 7089 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 7090 + unsigned long npages) 7117 7091 { 7118 7092 int i; 7119 7093 ··· 7311 7283 int r; 7312 7284 7313 7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || 7314 - is_error_page(work->page)) 7286 + work->wakeup_all) 7315 7287 return; 7316 7288 7317 7289 r = kvm_mmu_reload(vcpu); ··· 7421 7393 struct x86_exception fault; 7422 7394 7423 7395 trace_kvm_async_pf_ready(work->arch.token, work->gva); 7424 - if (is_error_page(work->page)) 7396 + if (work->wakeup_all) 7425 7397 work->arch.token = ~0; /* broadcast wakeup */ 7426 7398 else 7427 7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); ··· 7447 7419 return !kvm_event_needs_reinjection(vcpu) && 7448 7420 kvm_x86_ops->interrupt_allowed(vcpu); 7449 7421 } 7422 + 7423 + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) 7424 + { 7425 + atomic_inc(&kvm->arch.noncoherent_dma_count); 7426 + } 7427 + EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); 7428 + 7429 + void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) 7430 + { 7431 + atomic_dec(&kvm->arch.noncoherent_dma_count); 7432 + } 7433 + EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); 7434 + 7435 + bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) 7436 + { 7437 + return atomic_read(&kvm->arch.noncoherent_dma_count); 7438 + } 7439 + EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); 7450 7440 7451 7441 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 7452 7442 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);

+1

arch/x86/kvm/x86.h

··· 122 122 gva_t addr, void *val, unsigned int bytes, 123 123 struct x86_exception *exception); 124 124 125 + #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125 126 extern u64 host_xcr0; 126 127 127 128 extern struct static_key kvm_no_apic_vcpu;

+26 -16

include/linux/kvm_host.h

··· 142 142 struct kvm_vcpu; 143 143 extern struct kmem_cache *kvm_vcpu_cache; 144 144 145 - extern raw_spinlock_t kvm_lock; 145 + extern spinlock_t kvm_lock; 146 146 extern struct list_head vm_list; 147 147 148 148 struct kvm_io_range { ··· 189 189 gva_t gva; 190 190 unsigned long addr; 191 191 struct kvm_arch_async_pf arch; 192 - struct page *page; 193 - bool done; 192 + bool wakeup_all; 194 193 }; 195 194 196 195 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); ··· 507 508 struct kvm_userspace_memory_region *mem); 508 509 int __kvm_set_memory_region(struct kvm *kvm, 509 510 struct kvm_userspace_memory_region *mem); 510 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 511 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 511 512 struct kvm_memory_slot *dont); 512 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); 513 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 514 + unsigned long npages); 513 515 void kvm_arch_memslots_updated(struct kvm *kvm); 514 516 int kvm_arch_prepare_memory_region(struct kvm *kvm, 515 517 struct kvm_memory_slot *memslot, ··· 671 671 } 672 672 #endif 673 673 674 + #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA 675 + void kvm_arch_register_noncoherent_dma(struct kvm *kvm); 676 + void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); 677 + bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); 678 + #else 679 + static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) 680 + { 681 + } 682 + 683 + static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) 684 + { 685 + } 686 + 687 + static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) 688 + { 689 + return false; 690 + } 691 + #endif 692 + 674 693 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) 675 694 { 676 695 #ifdef __KVM_HAVE_ARCH_WQP ··· 766 747 int kvm_request_irq_source_id(struct kvm *kvm); 767 748 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 768 749 769 - /* For vcpu->arch.iommu_flags */ 770 - #define KVM_IOMMU_CACHE_COHERENCY 0x1 771 - 772 750 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 773 751 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 774 752 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); ··· 805 789 806 790 /* KVM does not hold any references to rcu protected data when it 807 791 * switches CPU into a guest mode. In fact switching to a guest mode 808 - * is very similar to exiting to userspase from rcu point of view. In 792 + * is very similar to exiting to userspace from rcu point of view. In 809 793 * addition CPU may stay in a guest mode for quite a long time (up to 810 794 * one time slice). Lets treat guest mode as quiescent state, just like 811 795 * we do with user-mode execution. ··· 856 840 static inline int memslot_id(struct kvm *kvm, gfn_t gfn) 857 841 { 858 842 return gfn_to_memslot(kvm, gfn)->id; 859 - } 860 - 861 - static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) 862 - { 863 - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ 864 - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 865 - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 866 843 } 867 844 868 845 static inline gfn_t ··· 1075 1066 1076 1067 extern struct kvm_device_ops kvm_mpic_ops; 1077 1068 extern struct kvm_device_ops kvm_xics_ops; 1069 + extern struct kvm_device_ops kvm_vfio_ops; 1078 1070 1079 1071 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1080 1072

+8

include/linux/sched.h

··· 286 286 } 287 287 #endif 288 288 289 + #ifdef CONFIG_DETECT_HUNG_TASK 290 + void reset_hung_task_detector(void); 291 + #else 292 + static inline void reset_hung_task_detector(void) 293 + { 294 + } 295 + #endif 296 + 289 297 /* Attach to any functions which should be ignored in wchan output. */ 290 298 #define __sched __attribute__((__section__(".sched.text"))) 291 299

+14

include/linux/srcu.h

··· 237 237 __srcu_read_unlock(sp, idx); 238 238 } 239 239 240 + /** 241 + * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock 242 + * 243 + * Converts the preceding srcu_read_unlock into a two-way memory barrier. 244 + * 245 + * Call this after srcu_read_unlock, to guarantee that all memory operations 246 + * that occur after smp_mb__after_srcu_read_unlock will appear to happen after 247 + * the preceding srcu_read_unlock. 248 + */ 249 + static inline void smp_mb__after_srcu_read_unlock(void) 250 + { 251 + /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ 252 + } 253 + 240 254 #endif

+4 -6

include/trace/events/kvm.h

··· 296 296 297 297 TRACE_EVENT( 298 298 kvm_async_pf_completed, 299 - TP_PROTO(unsigned long address, struct page *page, u64 gva), 300 - TP_ARGS(address, page, gva), 299 + TP_PROTO(unsigned long address, u64 gva), 300 + TP_ARGS(address, gva), 301 301 302 302 TP_STRUCT__entry( 303 303 __field(unsigned long, address) 304 - __field(pfn_t, pfn) 305 304 __field(u64, gva) 306 305 ), 307 306 308 307 TP_fast_assign( 309 308 __entry->address = address; 310 - __entry->pfn = page ? page_to_pfn(page) : 0; 311 309 __entry->gva = gva; 312 310 ), 313 311 314 - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, 315 - __entry->address, __entry->pfn) 312 + TP_printk("gva %#llx address %#lx", __entry->gva, 313 + __entry->address) 316 314 ); 317 315 318 316 #endif

+11

include/uapi/linux/kvm.h

··· 518 518 /* machine type bits, to be used as argument to KVM_CREATE_VM */ 519 519 #define KVM_VM_S390_UCONTROL 1 520 520 521 + /* on ppc, 0 indicate default, 1 should force HV and 2 PR */ 522 + #define KVM_VM_PPC_HV 1 523 + #define KVM_VM_PPC_PR 2 524 + 521 525 #define KVM_S390_SIE_PAGE_OFFSET 1 522 526 523 527 /* ··· 545 541 #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 546 542 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 547 543 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 544 + #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) 548 545 549 546 /* 550 547 * Extension capability list. ··· 673 668 #define KVM_CAP_IRQ_XICS 92 674 669 #define KVM_CAP_ARM_EL1_32BIT 93 675 670 #define KVM_CAP_SPAPR_MULTITCE 94 671 + #define KVM_CAP_EXT_EMUL_CPUID 95 676 672 677 673 #ifdef KVM_CAP_IRQ_ROUTING 678 674 ··· 849 843 #define KVM_DEV_TYPE_FSL_MPIC_20 1 850 844 #define KVM_DEV_TYPE_FSL_MPIC_42 2 851 845 #define KVM_DEV_TYPE_XICS 3 846 + #define KVM_DEV_TYPE_VFIO 4 847 + #define KVM_DEV_VFIO_GROUP 1 848 + #define KVM_DEV_VFIO_GROUP_ADD 1 849 + #define KVM_DEV_VFIO_GROUP_DEL 2 852 850 853 851 /* 854 852 * ioctls for VM fds ··· 1022 1012 /* VM is being stopped by host */ 1023 1013 #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) 1024 1014 #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) 1015 + #define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) 1025 1016 #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) 1026 1017 1027 1018 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)

+11

kernel/hung_task.c

··· 207 207 return ret; 208 208 } 209 209 210 + static atomic_t reset_hung_task = ATOMIC_INIT(0); 211 + 212 + void reset_hung_task_detector(void) 213 + { 214 + atomic_set(&reset_hung_task, 1); 215 + } 216 + EXPORT_SYMBOL_GPL(reset_hung_task_detector); 217 + 210 218 /* 211 219 * kthread which checks for tasks stuck in D state 212 220 */ ··· 227 219 228 220 while (schedule_timeout_interruptible(timeout_jiffies(timeout))) 229 221 timeout = sysctl_hung_task_timeout_secs; 222 + 223 + if (atomic_xchg(&reset_hung_task, 0)) 224 + continue; 230 225 231 226 check_hung_uninterruptible_tasks(timeout); 232 227 }

+3

virt/kvm/Kconfig

··· 27 27 28 28 config HAVE_KVM_CPU_RELAX_INTERCEPT 29 29 bool 30 + 31 + config KVM_VFIO 32 + bool

+6 -16

virt/kvm/async_pf.c

··· 56 56 57 57 static void async_pf_execute(struct work_struct *work) 58 58 { 59 - struct page *page = NULL; 60 59 struct kvm_async_pf *apf = 61 60 container_of(work, struct kvm_async_pf, work); 62 61 struct mm_struct *mm = apf->mm; ··· 67 68 68 69 use_mm(mm); 69 70 down_read(&mm->mmap_sem); 70 - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); 71 + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 71 72 up_read(&mm->mmap_sem); 72 73 unuse_mm(mm); 73 74 74 75 spin_lock(&vcpu->async_pf.lock); 75 76 list_add_tail(&apf->link, &vcpu->async_pf.done); 76 - apf->page = page; 77 - apf->done = true; 78 77 spin_unlock(&vcpu->async_pf.lock); 79 78 80 79 /* ··· 80 83 * this point 81 84 */ 82 85 83 - trace_kvm_async_pf_completed(addr, page, gva); 86 + trace_kvm_async_pf_completed(addr, gva); 84 87 85 88 if (waitqueue_active(&vcpu->wq)) 86 89 wake_up_interruptible(&vcpu->wq); ··· 96 99 struct kvm_async_pf *work = 97 100 list_entry(vcpu->async_pf.queue.next, 98 101 typeof(*work), queue); 99 - cancel_work_sync(&work->work); 100 102 list_del(&work->queue); 101 - if (!work->done) { /* work was canceled */ 103 + if (cancel_work_sync(&work->work)) { 102 104 mmdrop(work->mm); 103 105 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 104 106 kmem_cache_free(async_pf_cache, work); ··· 110 114 list_entry(vcpu->async_pf.done.next, 111 115 typeof(*work), link); 112 116 list_del(&work->link); 113 - if (!is_error_page(work->page)) 114 - kvm_release_page_clean(work->page); 115 117 kmem_cache_free(async_pf_cache, work); 116 118 } 117 119 spin_unlock(&vcpu->async_pf.lock); ··· 129 135 list_del(&work->link); 130 136 spin_unlock(&vcpu->async_pf.lock); 131 137 132 - if (work->page) 133 - kvm_arch_async_page_ready(vcpu, work); 138 + kvm_arch_async_page_ready(vcpu, work); 134 139 kvm_arch_async_page_present(vcpu, work); 135 140 136 141 list_del(&work->queue); 137 142 vcpu->async_pf.queued--; 138 - if (!is_error_page(work->page)) 139 - kvm_release_page_clean(work->page); 140 143 kmem_cache_free(async_pf_cache, work); 141 144 } 142 145 } ··· 156 165 if (!work) 157 166 return 0; 158 167 159 - work->page = NULL; 160 - work->done = false; 168 + work->wakeup_all = false; 161 169 work->vcpu = vcpu; 162 170 work->gva = gva; 163 171 work->addr = gfn_to_hva(vcpu->kvm, gfn); ··· 196 206 if (!work) 197 207 return -ENOMEM; 198 208 199 - work->page = KVM_ERR_PTR_BAD_PAGE; 209 + work->wakeup_all = true; 200 210 INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 201 211 202 212 spin_lock(&vcpu->async_pf.lock);

+20 -18

virt/kvm/iommu.c

··· 79 79 flags = IOMMU_READ; 80 80 if (!(slot->flags & KVM_MEM_READONLY)) 81 81 flags |= IOMMU_WRITE; 82 - if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 82 + if (!kvm->arch.iommu_noncoherent) 83 83 flags |= IOMMU_CACHE; 84 84 85 85 ··· 101 101 102 102 /* Make sure gfn is aligned to the page size we want to map */ 103 103 while ((gfn << PAGE_SHIFT) & (page_size - 1)) 104 + page_size >>= 1; 105 + 106 + /* Make sure hva is aligned to the page size we want to map */ 107 + while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) 104 108 page_size >>= 1; 105 109 106 110 /* ··· 144 140 struct kvm_memslots *slots; 145 141 struct kvm_memory_slot *memslot; 146 142 143 + if (kvm->arch.iommu_noncoherent) 144 + kvm_arch_register_noncoherent_dma(kvm); 145 + 147 146 idx = srcu_read_lock(&kvm->srcu); 148 147 slots = kvm_memslots(kvm); 149 148 ··· 165 158 { 166 159 struct pci_dev *pdev = NULL; 167 160 struct iommu_domain *domain = kvm->arch.iommu_domain; 168 - int r, last_flags; 161 + int r; 162 + bool noncoherent; 169 163 170 164 /* check if iommu exists and in use */ 171 165 if (!domain) ··· 182 174 return r; 183 175 } 184 176 185 - last_flags = kvm->arch.iommu_flags; 186 - if (iommu_domain_has_cap(kvm->arch.iommu_domain, 187 - IOMMU_CAP_CACHE_COHERENCY)) 188 - kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; 177 + noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, 178 + IOMMU_CAP_CACHE_COHERENCY); 189 179 190 180 /* Check if need to update IOMMU page table for guest memory */ 191 - if ((last_flags ^ kvm->arch.iommu_flags) == 192 - KVM_IOMMU_CACHE_COHERENCY) { 181 + if (noncoherent != kvm->arch.iommu_noncoherent) { 193 182 kvm_iommu_unmap_memslots(kvm); 183 + kvm->arch.iommu_noncoherent = noncoherent; 194 184 r = kvm_iommu_map_memslots(kvm); 195 185 if (r) 196 186 goto out_unmap; ··· 196 190 197 191 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; 198 192 199 - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", 200 - assigned_dev->host_segnr, 201 - assigned_dev->host_busnr, 202 - PCI_SLOT(assigned_dev->host_devfn), 203 - PCI_FUNC(assigned_dev->host_devfn)); 193 + dev_info(&pdev->dev, "kvm assign device\n"); 204 194 205 195 return 0; 206 196 out_unmap: ··· 222 220 223 221 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 224 222 225 - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", 226 - assigned_dev->host_segnr, 227 - assigned_dev->host_busnr, 228 - PCI_SLOT(assigned_dev->host_devfn), 229 - PCI_FUNC(assigned_dev->host_devfn)); 223 + dev_info(&pdev->dev, "kvm deassign device\n"); 230 224 231 225 return 0; 232 226 } ··· 334 336 335 337 srcu_read_unlock(&kvm->srcu, idx); 336 338 339 + if (kvm->arch.iommu_noncoherent) 340 + kvm_arch_unregister_noncoherent_dma(kvm); 341 + 337 342 return 0; 338 343 } 339 344 ··· 351 350 mutex_lock(&kvm->slots_lock); 352 351 kvm_iommu_unmap_memslots(kvm); 353 352 kvm->arch.iommu_domain = NULL; 353 + kvm->arch.iommu_noncoherent = false; 354 354 mutex_unlock(&kvm->slots_lock); 355 355 356 356 iommu_domain_free(domain);

+55 -79

virt/kvm/kvm_main.c

··· 70 70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 71 71 */ 72 72 73 - DEFINE_RAW_SPINLOCK(kvm_lock); 73 + DEFINE_SPINLOCK(kvm_lock); 74 + static DEFINE_RAW_SPINLOCK(kvm_count_lock); 74 75 LIST_HEAD(vm_list); 75 76 76 77 static cpumask_var_t cpus_hardware_enabled; ··· 187 186 ++kvm->stat.remote_tlb_flush; 188 187 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 189 188 } 189 + EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 190 190 191 191 void kvm_reload_remote_mmus(struct kvm *kvm) 192 192 { ··· 492 490 if (r) 493 491 goto out_err; 494 492 495 - raw_spin_lock(&kvm_lock); 493 + spin_lock(&kvm_lock); 496 494 list_add(&kvm->vm_list, &vm_list); 497 - raw_spin_unlock(&kvm_lock); 495 + spin_unlock(&kvm_lock); 498 496 499 497 return kvm; 500 498 ··· 542 540 /* 543 541 * Free any memory in @free but not in @dont. 544 542 */ 545 - static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 543 + static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, 546 544 struct kvm_memory_slot *dont) 547 545 { 548 546 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 549 547 kvm_destroy_dirty_bitmap(free); 550 548 551 - kvm_arch_free_memslot(free, dont); 549 + kvm_arch_free_memslot(kvm, free, dont); 552 550 553 551 free->npages = 0; 554 552 } ··· 559 557 struct kvm_memory_slot *memslot; 560 558 561 559 kvm_for_each_memslot(memslot, slots) 562 - kvm_free_physmem_slot(memslot, NULL); 560 + kvm_free_physmem_slot(kvm, memslot, NULL); 563 561 564 562 kfree(kvm->memslots); 565 563 } ··· 583 581 struct mm_struct *mm = kvm->mm; 584 582 585 583 kvm_arch_sync_events(kvm); 586 - raw_spin_lock(&kvm_lock); 584 + spin_lock(&kvm_lock); 587 585 list_del(&kvm->vm_list); 588 - raw_spin_unlock(&kvm_lock); 586 + spin_unlock(&kvm_lock); 589 587 kvm_free_irq_routing(kvm); 590 588 for (i = 0; i < KVM_NR_BUSES; i++) 591 589 kvm_io_bus_destroy(kvm->buses[i]); ··· 823 821 if (change == KVM_MR_CREATE) { 824 822 new.userspace_addr = mem->userspace_addr; 825 823 826 - if (kvm_arch_create_memslot(&new, npages)) 824 + if (kvm_arch_create_memslot(kvm, &new, npages)) 827 825 goto out_free; 828 826 } 829 827 ··· 874 872 goto out_free; 875 873 } 876 874 875 + /* actual memory is freed via old in kvm_free_physmem_slot below */ 876 + if (change == KVM_MR_DELETE) { 877 + new.dirty_bitmap = NULL; 878 + memset(&new.arch, 0, sizeof(new.arch)); 879 + } 880 + 881 + old_memslots = install_new_memslots(kvm, slots, &new); 882 + 883 + kvm_arch_commit_memory_region(kvm, mem, &old, change); 884 + 885 + kvm_free_physmem_slot(kvm, &old, &new); 886 + kfree(old_memslots); 887 + 877 888 /* 878 889 * IOMMU mapping: New slots need to be mapped. Old slots need to be 879 890 * un-mapped and re-mapped if their base changes. Since base change ··· 898 883 */ 899 884 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { 900 885 r = kvm_iommu_map_pages(kvm, &new); 901 - if (r) 902 - goto out_slots; 886 + return r; 903 887 } 904 - 905 - /* actual memory is freed via old in kvm_free_physmem_slot below */ 906 - if (change == KVM_MR_DELETE) { 907 - new.dirty_bitmap = NULL; 908 - memset(&new.arch, 0, sizeof(new.arch)); 909 - } 910 - 911 - old_memslots = install_new_memslots(kvm, slots, &new); 912 - 913 - kvm_arch_commit_memory_region(kvm, mem, &old, change); 914 - 915 - kvm_free_physmem_slot(&old, &new); 916 - kfree(old_memslots); 917 888 918 889 return 0; 919 890 920 891 out_slots: 921 892 kfree(slots); 922 893 out_free: 923 - kvm_free_physmem_slot(&new, &old); 894 + kvm_free_physmem_slot(kvm, &new, &old); 924 895 out: 925 896 return r; 926 897 } ··· 965 964 out: 966 965 return r; 967 966 } 967 + EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 968 968 969 969 bool kvm_largepages_enabled(void) 970 970 { ··· 1656 1654 memslot = gfn_to_memslot(kvm, gfn); 1657 1655 mark_page_dirty_in_slot(kvm, memslot, gfn); 1658 1656 } 1657 + EXPORT_SYMBOL_GPL(mark_page_dirty); 1659 1658 1660 1659 /* 1661 1660 * The vCPU has executed a HLT instruction with in-kernel mode enabled. ··· 1682 1679 1683 1680 finish_wait(&vcpu->wq, &wait); 1684 1681 } 1682 + EXPORT_SYMBOL_GPL(kvm_vcpu_block); 1685 1683 1686 1684 #ifndef CONFIG_S390 1687 1685 /* ··· 2275 2271 ops = &kvm_xics_ops; 2276 2272 break; 2277 2273 #endif 2274 + #ifdef CONFIG_KVM_VFIO 2275 + case KVM_DEV_TYPE_VFIO: 2276 + ops = &kvm_vfio_ops; 2277 + break; 2278 + #endif 2278 2279 default: 2279 2280 return -ENODEV; 2280 2281 } ··· 2528 2519 } 2529 2520 #endif 2530 2521 2531 - static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2532 - { 2533 - struct page *page[1]; 2534 - unsigned long addr; 2535 - int npages; 2536 - gfn_t gfn = vmf->pgoff; 2537 - struct kvm *kvm = vma->vm_file->private_data; 2538 - 2539 - addr = gfn_to_hva(kvm, gfn); 2540 - if (kvm_is_error_hva(addr)) 2541 - return VM_FAULT_SIGBUS; 2542 - 2543 - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 2544 - NULL); 2545 - if (unlikely(npages != 1)) 2546 - return VM_FAULT_SIGBUS; 2547 - 2548 - vmf->page = page[0]; 2549 - return 0; 2550 - } 2551 - 2552 - static const struct vm_operations_struct kvm_vm_vm_ops = { 2553 - .fault = kvm_vm_fault, 2554 - }; 2555 - 2556 - static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 2557 - { 2558 - vma->vm_ops = &kvm_vm_vm_ops; 2559 - return 0; 2560 - } 2561 - 2562 2522 static struct file_operations kvm_vm_fops = { 2563 2523 .release = kvm_vm_release, 2564 2524 .unlocked_ioctl = kvm_vm_ioctl, 2565 2525 #ifdef CONFIG_COMPAT 2566 2526 .compat_ioctl = kvm_vm_compat_ioctl, 2567 2527 #endif 2568 - .mmap = kvm_vm_mmap, 2569 2528 .llseek = noop_llseek, 2570 2529 }; 2571 2530 ··· 2660 2683 } 2661 2684 } 2662 2685 2663 - static void hardware_enable(void *junk) 2686 + static void hardware_enable(void) 2664 2687 { 2665 - raw_spin_lock(&kvm_lock); 2666 - hardware_enable_nolock(junk); 2667 - raw_spin_unlock(&kvm_lock); 2688 + raw_spin_lock(&kvm_count_lock); 2689 + if (kvm_usage_count) 2690 + hardware_enable_nolock(NULL); 2691 + raw_spin_unlock(&kvm_count_lock); 2668 2692 } 2669 2693 2670 2694 static void hardware_disable_nolock(void *junk) ··· 2678 2700 kvm_arch_hardware_disable(NULL); 2679 2701 } 2680 2702 2681 - static void hardware_disable(void *junk) 2703 + static void hardware_disable(void) 2682 2704 { 2683 - raw_spin_lock(&kvm_lock); 2684 - hardware_disable_nolock(junk); 2685 - raw_spin_unlock(&kvm_lock); 2705 + raw_spin_lock(&kvm_count_lock); 2706 + if (kvm_usage_count) 2707 + hardware_disable_nolock(NULL); 2708 + raw_spin_unlock(&kvm_count_lock); 2686 2709 } 2687 2710 2688 2711 static void hardware_disable_all_nolock(void) ··· 2697 2718 2698 2719 static void hardware_disable_all(void) 2699 2720 { 2700 - raw_spin_lock(&kvm_lock); 2721 + raw_spin_lock(&kvm_count_lock); 2701 2722 hardware_disable_all_nolock(); 2702 - raw_spin_unlock(&kvm_lock); 2723 + raw_spin_unlock(&kvm_count_lock); 2703 2724 } 2704 2725 2705 2726 static int hardware_enable_all(void) 2706 2727 { 2707 2728 int r = 0; 2708 2729 2709 - raw_spin_lock(&kvm_lock); 2730 + raw_spin_lock(&kvm_count_lock); 2710 2731 2711 2732 kvm_usage_count++; 2712 2733 if (kvm_usage_count == 1) { ··· 2719 2740 } 2720 2741 } 2721 2742 2722 - raw_spin_unlock(&kvm_lock); 2743 + raw_spin_unlock(&kvm_count_lock); 2723 2744 2724 2745 return r; 2725 2746 } ··· 2729 2750 { 2730 2751 int cpu = (long)v; 2731 2752 2732 - if (!kvm_usage_count) 2733 - return NOTIFY_OK; 2734 - 2735 2753 val &= ~CPU_TASKS_FROZEN; 2736 2754 switch (val) { 2737 2755 case CPU_DYING: 2738 2756 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 2739 2757 cpu); 2740 - hardware_disable(NULL); 2758 + hardware_disable(); 2741 2759 break; 2742 2760 case CPU_STARTING: 2743 2761 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2744 2762 cpu); 2745 - hardware_enable(NULL); 2763 + hardware_enable(); 2746 2764 break; 2747 2765 } 2748 2766 return NOTIFY_OK; ··· 3032 3056 struct kvm *kvm; 3033 3057 3034 3058 *val = 0; 3035 - raw_spin_lock(&kvm_lock); 3059 + spin_lock(&kvm_lock); 3036 3060 list_for_each_entry(kvm, &vm_list, vm_list) 3037 3061 *val += *(u32 *)((void *)kvm + offset); 3038 - raw_spin_unlock(&kvm_lock); 3062 + spin_unlock(&kvm_lock); 3039 3063 return 0; 3040 3064 } 3041 3065 ··· 3049 3073 int i; 3050 3074 3051 3075 *val = 0; 3052 - raw_spin_lock(&kvm_lock); 3076 + spin_lock(&kvm_lock); 3053 3077 list_for_each_entry(kvm, &vm_list, vm_list) 3054 3078 kvm_for_each_vcpu(i, vcpu, kvm) 3055 3079 *val += *(u32 *)((void *)vcpu + offset); 3056 3080 3057 - raw_spin_unlock(&kvm_lock); 3081 + spin_unlock(&kvm_lock); 3058 3082 return 0; 3059 3083 } 3060 3084 ··· 3109 3133 static void kvm_resume(void) 3110 3134 { 3111 3135 if (kvm_usage_count) { 3112 - WARN_ON(raw_spin_is_locked(&kvm_lock)); 3136 + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); 3113 3137 hardware_enable_nolock(NULL); 3114 3138 } 3115 3139 }

+264

virt/kvm/vfio.c

··· 1 + /* 2 + * VFIO-KVM bridge pseudo device 3 + * 4 + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. 5 + * Author: Alex Williamson <alex.williamson@redhat.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include <linux/errno.h> 13 + #include <linux/file.h> 14 + #include <linux/kvm_host.h> 15 + #include <linux/list.h> 16 + #include <linux/module.h> 17 + #include <linux/mutex.h> 18 + #include <linux/slab.h> 19 + #include <linux/uaccess.h> 20 + #include <linux/vfio.h> 21 + 22 + struct kvm_vfio_group { 23 + struct list_head node; 24 + struct vfio_group *vfio_group; 25 + }; 26 + 27 + struct kvm_vfio { 28 + struct list_head group_list; 29 + struct mutex lock; 30 + bool noncoherent; 31 + }; 32 + 33 + static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) 34 + { 35 + struct vfio_group *vfio_group; 36 + struct vfio_group *(*fn)(struct file *); 37 + 38 + fn = symbol_get(vfio_group_get_external_user); 39 + if (!fn) 40 + return ERR_PTR(-EINVAL); 41 + 42 + vfio_group = fn(filep); 43 + 44 + symbol_put(vfio_group_get_external_user); 45 + 46 + return vfio_group; 47 + } 48 + 49 + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) 50 + { 51 + void (*fn)(struct vfio_group *); 52 + 53 + fn = symbol_get(vfio_group_put_external_user); 54 + if (!fn) 55 + return; 56 + 57 + fn(vfio_group); 58 + 59 + symbol_put(vfio_group_put_external_user); 60 + } 61 + 62 + /* 63 + * Groups can use the same or different IOMMU domains. If the same then 64 + * adding a new group may change the coherency of groups we've previously 65 + * been told about. We don't want to care about any of that so we retest 66 + * each group and bail as soon as we find one that's noncoherent. This 67 + * means we only ever [un]register_noncoherent_dma once for the whole device. 68 + */ 69 + static void kvm_vfio_update_coherency(struct kvm_device *dev) 70 + { 71 + struct kvm_vfio *kv = dev->private; 72 + bool noncoherent = false; 73 + struct kvm_vfio_group *kvg; 74 + 75 + mutex_lock(&kv->lock); 76 + 77 + list_for_each_entry(kvg, &kv->group_list, node) { 78 + /* 79 + * TODO: We need an interface to check the coherency of 80 + * the IOMMU domain this group is using. For now, assume 81 + * it's always noncoherent. 82 + */ 83 + noncoherent = true; 84 + break; 85 + } 86 + 87 + if (noncoherent != kv->noncoherent) { 88 + kv->noncoherent = noncoherent; 89 + 90 + if (kv->noncoherent) 91 + kvm_arch_register_noncoherent_dma(dev->kvm); 92 + else 93 + kvm_arch_unregister_noncoherent_dma(dev->kvm); 94 + } 95 + 96 + mutex_unlock(&kv->lock); 97 + } 98 + 99 + static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) 100 + { 101 + struct kvm_vfio *kv = dev->private; 102 + struct vfio_group *vfio_group; 103 + struct kvm_vfio_group *kvg; 104 + void __user *argp = (void __user *)arg; 105 + struct fd f; 106 + int32_t fd; 107 + int ret; 108 + 109 + switch (attr) { 110 + case KVM_DEV_VFIO_GROUP_ADD: 111 + if (get_user(fd, (int32_t __user *)argp)) 112 + return -EFAULT; 113 + 114 + f = fdget(fd); 115 + if (!f.file) 116 + return -EBADF; 117 + 118 + vfio_group = kvm_vfio_group_get_external_user(f.file); 119 + fdput(f); 120 + 121 + if (IS_ERR(vfio_group)) 122 + return PTR_ERR(vfio_group); 123 + 124 + mutex_lock(&kv->lock); 125 + 126 + list_for_each_entry(kvg, &kv->group_list, node) { 127 + if (kvg->vfio_group == vfio_group) { 128 + mutex_unlock(&kv->lock); 129 + kvm_vfio_group_put_external_user(vfio_group); 130 + return -EEXIST; 131 + } 132 + } 133 + 134 + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); 135 + if (!kvg) { 136 + mutex_unlock(&kv->lock); 137 + kvm_vfio_group_put_external_user(vfio_group); 138 + return -ENOMEM; 139 + } 140 + 141 + list_add_tail(&kvg->node, &kv->group_list); 142 + kvg->vfio_group = vfio_group; 143 + 144 + mutex_unlock(&kv->lock); 145 + 146 + kvm_vfio_update_coherency(dev); 147 + 148 + return 0; 149 + 150 + case KVM_DEV_VFIO_GROUP_DEL: 151 + if (get_user(fd, (int32_t __user *)argp)) 152 + return -EFAULT; 153 + 154 + f = fdget(fd); 155 + if (!f.file) 156 + return -EBADF; 157 + 158 + vfio_group = kvm_vfio_group_get_external_user(f.file); 159 + fdput(f); 160 + 161 + if (IS_ERR(vfio_group)) 162 + return PTR_ERR(vfio_group); 163 + 164 + ret = -ENOENT; 165 + 166 + mutex_lock(&kv->lock); 167 + 168 + list_for_each_entry(kvg, &kv->group_list, node) { 169 + if (kvg->vfio_group != vfio_group) 170 + continue; 171 + 172 + list_del(&kvg->node); 173 + kvm_vfio_group_put_external_user(kvg->vfio_group); 174 + kfree(kvg); 175 + ret = 0; 176 + break; 177 + } 178 + 179 + mutex_unlock(&kv->lock); 180 + 181 + kvm_vfio_group_put_external_user(vfio_group); 182 + 183 + kvm_vfio_update_coherency(dev); 184 + 185 + return ret; 186 + } 187 + 188 + return -ENXIO; 189 + } 190 + 191 + static int kvm_vfio_set_attr(struct kvm_device *dev, 192 + struct kvm_device_attr *attr) 193 + { 194 + switch (attr->group) { 195 + case KVM_DEV_VFIO_GROUP: 196 + return kvm_vfio_set_group(dev, attr->attr, attr->addr); 197 + } 198 + 199 + return -ENXIO; 200 + } 201 + 202 + static int kvm_vfio_has_attr(struct kvm_device *dev, 203 + struct kvm_device_attr *attr) 204 + { 205 + switch (attr->group) { 206 + case KVM_DEV_VFIO_GROUP: 207 + switch (attr->attr) { 208 + case KVM_DEV_VFIO_GROUP_ADD: 209 + case KVM_DEV_VFIO_GROUP_DEL: 210 + return 0; 211 + } 212 + 213 + break; 214 + } 215 + 216 + return -ENXIO; 217 + } 218 + 219 + static void kvm_vfio_destroy(struct kvm_device *dev) 220 + { 221 + struct kvm_vfio *kv = dev->private; 222 + struct kvm_vfio_group *kvg, *tmp; 223 + 224 + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { 225 + kvm_vfio_group_put_external_user(kvg->vfio_group); 226 + list_del(&kvg->node); 227 + kfree(kvg); 228 + } 229 + 230 + kvm_vfio_update_coherency(dev); 231 + 232 + kfree(kv); 233 + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 234 + } 235 + 236 + static int kvm_vfio_create(struct kvm_device *dev, u32 type) 237 + { 238 + struct kvm_device *tmp; 239 + struct kvm_vfio *kv; 240 + 241 + /* Only one VFIO "device" per VM */ 242 + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) 243 + if (tmp->ops == &kvm_vfio_ops) 244 + return -EBUSY; 245 + 246 + kv = kzalloc(sizeof(*kv), GFP_KERNEL); 247 + if (!kv) 248 + return -ENOMEM; 249 + 250 + INIT_LIST_HEAD(&kv->group_list); 251 + mutex_init(&kv->lock); 252 + 253 + dev->private = kv; 254 + 255 + return 0; 256 + } 257 + 258 + struct kvm_device_ops kvm_vfio_ops = { 259 + .name = "kvm-vfio", 260 + .create = kvm_vfio_create, 261 + .destroy = kvm_vfio_destroy, 262 + .set_attr = kvm_vfio_set_attr, 263 + .has_attr = kvm_vfio_has_attr, 264 + };