Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM changes from Paolo Bonzini:
"Here are the 3.13 KVM changes. There was a lot of work on the PPC
side: the HV and emulation flavors can now coexist in a single kernel
is probably the most interesting change from a user point of view.

On the x86 side there are nested virtualization improvements and a few
bugfixes.

ARM got transparent huge page support, improved overcommit, and
support for big endian guests.

Finally, there is a new interface to connect KVM with VFIO. This
helps with devices that use NoSnoop PCI transactions, letting the
driver in the guest execute WBINVD instructions. This includes some
nVidia cards on Windows, that fail to start without these patches and
the corresponding userspace changes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits)
kvm, vmx: Fix lazy FPU on nested guest
arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu
arm/arm64: KVM: MMIO support for BE guest
kvm, cpuid: Fix sparse warning
kvm: Delete prototype for non-existent function kvm_check_iopl
kvm: Delete prototype for non-existent function complete_pio
hung_task: add method to reset detector
pvclock: detect watchdog reset at pvclock read
kvm: optimize out smp_mb after srcu_read_unlock
srcu: API for barrier after srcu read unlock
KVM: remove vm mmap method
KVM: IOMMU: hva align mapping page size
KVM: x86: trace cpuid emulation when called from emulator
KVM: emulator: cleanup decode_register_operand() a bit
KVM: emulator: check rex prefix inside decode_register()
KVM: x86: fix emulation of "movzbl %bpl, %eax"
kvm_host: typo fix
KVM: x86: emulate SAHF instruction
MAINTAINERS: add tree for kvm.git
Documentation/kvm: add a 00-INDEX file
...

+5182 -2251
+24
Documentation/virtual/kvm/00-INDEX
··· 1 + 00-INDEX 2 + - this file. 3 + api.txt 4 + - KVM userspace API. 5 + cpuid.txt 6 + - KVM-specific cpuid leaves (x86). 7 + devices/ 8 + - KVM_CAP_DEVICE_CTRL userspace API. 9 + hypercalls.txt 10 + - KVM hypercalls. 11 + locking.txt 12 + - notes on KVM locks. 13 + mmu.txt 14 + - the x86 kvm shadow mmu. 15 + msr.txt 16 + - KVM-specific MSRs (x86). 17 + nested-vmx.txt 18 + - notes on nested virtualization for Intel x86 processors. 19 + ppc-pv.txt 20 + - the paravirtualization interface on PowerPC. 21 + review-checklist.txt 22 + - review checklist for KVM patches. 23 + timekeeping.txt 24 + - timekeeping virtualization for x86-based architectures.
+145 -7
Documentation/virtual/kvm/api.txt
··· 1122 1122 struct kvm_cpuid_entry2 entries[0]; 1123 1123 }; 1124 1124 1125 - #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 1126 - #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 1127 - #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 1125 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 1126 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 1127 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 1128 1128 1129 1129 struct kvm_cpuid_entry2 { 1130 1130 __u32 function; ··· 1810 1810 PPC | KVM_REG_PPC_TLB3PS | 32 1811 1811 PPC | KVM_REG_PPC_EPTCFG | 32 1812 1812 PPC | KVM_REG_PPC_ICP_STATE | 64 1813 + PPC | KVM_REG_PPC_TB_OFFSET | 64 1814 + PPC | KVM_REG_PPC_SPMC1 | 32 1815 + PPC | KVM_REG_PPC_SPMC2 | 32 1816 + PPC | KVM_REG_PPC_IAMR | 64 1817 + PPC | KVM_REG_PPC_TFHAR | 64 1818 + PPC | KVM_REG_PPC_TFIAR | 64 1819 + PPC | KVM_REG_PPC_TEXASR | 64 1820 + PPC | KVM_REG_PPC_FSCR | 64 1821 + PPC | KVM_REG_PPC_PSPB | 32 1822 + PPC | KVM_REG_PPC_EBBHR | 64 1823 + PPC | KVM_REG_PPC_EBBRR | 64 1824 + PPC | KVM_REG_PPC_BESCR | 64 1825 + PPC | KVM_REG_PPC_TAR | 64 1826 + PPC | KVM_REG_PPC_DPDES | 64 1827 + PPC | KVM_REG_PPC_DAWR | 64 1828 + PPC | KVM_REG_PPC_DAWRX | 64 1829 + PPC | KVM_REG_PPC_CIABR | 64 1830 + PPC | KVM_REG_PPC_IC | 64 1831 + PPC | KVM_REG_PPC_VTB | 64 1832 + PPC | KVM_REG_PPC_CSIGR | 64 1833 + PPC | KVM_REG_PPC_TACR | 64 1834 + PPC | KVM_REG_PPC_TCSCR | 64 1835 + PPC | KVM_REG_PPC_PID | 64 1836 + PPC | KVM_REG_PPC_ACOP | 64 1837 + PPC | KVM_REG_PPC_VRSAVE | 32 1838 + PPC | KVM_REG_PPC_LPCR | 64 1839 + PPC | KVM_REG_PPC_PPR | 64 1840 + PPC | KVM_REG_PPC_ARCH_COMPAT 32 1841 + PPC | KVM_REG_PPC_TM_GPR0 | 64 1842 + ... 1843 + PPC | KVM_REG_PPC_TM_GPR31 | 64 1844 + PPC | KVM_REG_PPC_TM_VSR0 | 128 1845 + ... 1846 + PPC | KVM_REG_PPC_TM_VSR63 | 128 1847 + PPC | KVM_REG_PPC_TM_CR | 64 1848 + PPC | KVM_REG_PPC_TM_LR | 64 1849 + PPC | KVM_REG_PPC_TM_CTR | 64 1850 + PPC | KVM_REG_PPC_TM_FPSCR | 64 1851 + PPC | KVM_REG_PPC_TM_AMR | 64 1852 + PPC | KVM_REG_PPC_TM_PPR | 64 1853 + PPC | KVM_REG_PPC_TM_VRSAVE | 64 1854 + PPC | KVM_REG_PPC_TM_VSCR | 32 1855 + PPC | KVM_REG_PPC_TM_DSCR | 64 1856 + PPC | KVM_REG_PPC_TM_TAR | 64 1813 1857 1814 1858 ARM registers are mapped using the lower 32 bits. The upper 16 of that 1815 1859 is the register group type, or coprocessor number: ··· 2348 2304 Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). 2349 2305 2350 2306 2351 - 4.83 KVM_GET_REG_LIST 2307 + 4.83 KVM_ARM_PREFERRED_TARGET 2308 + 2309 + Capability: basic 2310 + Architectures: arm, arm64 2311 + Type: vm ioctl 2312 + Parameters: struct struct kvm_vcpu_init (out) 2313 + Returns: 0 on success; -1 on error 2314 + Errors: 2315 + ENODEV: no preferred target available for the host 2316 + 2317 + This queries KVM for preferred CPU target type which can be emulated 2318 + by KVM on underlying host. 2319 + 2320 + The ioctl returns struct kvm_vcpu_init instance containing information 2321 + about preferred CPU target type and recommended features for it. The 2322 + kvm_vcpu_init->features bitmap returned will have feature bits set if 2323 + the preferred target recommends setting these features, but this is 2324 + not mandatory. 2325 + 2326 + The information returned by this ioctl can be used to prepare an instance 2327 + of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in 2328 + in VCPU matching underlying host. 2329 + 2330 + 2331 + 4.84 KVM_GET_REG_LIST 2352 2332 2353 2333 Capability: basic 2354 2334 Architectures: arm, arm64 ··· 2391 2323 This ioctl returns the guest registers that are supported for the 2392 2324 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2393 2325 2394 - 2395 - 4.84 KVM_ARM_SET_DEVICE_ADDR 2326 + 4.85 KVM_ARM_SET_DEVICE_ADDR 2396 2327 2397 2328 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR 2398 2329 Architectures: arm, arm64 ··· 2429 2362 KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the 2430 2363 base addresses will return -EEXIST. 2431 2364 2432 - 4.85 KVM_PPC_RTAS_DEFINE_TOKEN 2365 + 4.86 KVM_PPC_RTAS_DEFINE_TOKEN 2433 2366 2434 2367 Capability: KVM_CAP_PPC_RTAS 2435 2368 Architectures: ppc ··· 2726 2659 for general purpose registers) 2727 2660 2728 2661 }; 2662 + 2663 + 2664 + 4.81 KVM_GET_EMULATED_CPUID 2665 + 2666 + Capability: KVM_CAP_EXT_EMUL_CPUID 2667 + Architectures: x86 2668 + Type: system ioctl 2669 + Parameters: struct kvm_cpuid2 (in/out) 2670 + Returns: 0 on success, -1 on error 2671 + 2672 + struct kvm_cpuid2 { 2673 + __u32 nent; 2674 + __u32 flags; 2675 + struct kvm_cpuid_entry2 entries[0]; 2676 + }; 2677 + 2678 + The member 'flags' is used for passing flags from userspace. 2679 + 2680 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 2681 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 2682 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 2683 + 2684 + struct kvm_cpuid_entry2 { 2685 + __u32 function; 2686 + __u32 index; 2687 + __u32 flags; 2688 + __u32 eax; 2689 + __u32 ebx; 2690 + __u32 ecx; 2691 + __u32 edx; 2692 + __u32 padding[3]; 2693 + }; 2694 + 2695 + This ioctl returns x86 cpuid features which are emulated by 2696 + kvm.Userspace can use the information returned by this ioctl to query 2697 + which features are emulated by kvm instead of being present natively. 2698 + 2699 + Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 2700 + structure with the 'nent' field indicating the number of entries in 2701 + the variable-size array 'entries'. If the number of entries is too low 2702 + to describe the cpu capabilities, an error (E2BIG) is returned. If the 2703 + number is too high, the 'nent' field is adjusted and an error (ENOMEM) 2704 + is returned. If the number is just right, the 'nent' field is adjusted 2705 + to the number of valid entries in the 'entries' array, which is then 2706 + filled. 2707 + 2708 + The entries returned are the set CPUID bits of the respective features 2709 + which kvm emulates, as returned by the CPUID instruction, with unknown 2710 + or unsupported feature bits cleared. 2711 + 2712 + Features like x2apic, for example, may not be present in the host cpu 2713 + but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be 2714 + emulated efficiently and thus not included here. 2715 + 2716 + The fields in each entry are defined as follows: 2717 + 2718 + function: the eax value used to obtain the entry 2719 + index: the ecx value used to obtain the entry (for entries that are 2720 + affected by ecx) 2721 + flags: an OR of zero or more of the following: 2722 + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: 2723 + if the index field is valid 2724 + KVM_CPUID_FLAG_STATEFUL_FUNC: 2725 + if cpuid for this function returns different values for successive 2726 + invocations; there will be several entries with the same function, 2727 + all with this flag set 2728 + KVM_CPUID_FLAG_STATE_READ_NEXT: 2729 + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is 2730 + the first entry to be read by a cpu 2731 + eax, ebx, ecx, edx: the values returned by the cpuid instruction for 2732 + this function/index combination 2729 2733 2730 2734 2731 2735 6. Capabilities that can be enabled
+7
Documentation/virtual/kvm/cpuid.txt
··· 43 43 KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by 44 44 || || writing to msr 0x4b564d02 45 45 ------------------------------------------------------------------------------ 46 + KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by 47 + || || writing to msr 0x4b564d03. 48 + ------------------------------------------------------------------------------ 49 + KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt 50 + || || handler can be enabled by writing 51 + || || to msr 0x4b564d04. 52 + ------------------------------------------------------------------------------ 46 53 KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit 47 54 || || before enabling paravirtualized 48 55 || || spinlock support.
+22
Documentation/virtual/kvm/devices/vfio.txt
··· 1 + VFIO virtual device 2 + =================== 3 + 4 + Device types supported: 5 + KVM_DEV_TYPE_VFIO 6 + 7 + Only one VFIO instance may be created per VM. The created device 8 + tracks VFIO groups in use by the VM and features of those groups 9 + important to the correctness and acceleration of the VM. As groups 10 + are enabled and disabled for use by the VM, KVM should be updated 11 + about their presence. When registered with KVM, a reference to the 12 + VFIO-group is held by KVM. 13 + 14 + Groups: 15 + KVM_DEV_VFIO_GROUP 16 + 17 + KVM_DEV_VFIO_GROUP attributes: 18 + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking 19 + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking 20 + 21 + For each, kvm_device_attr.addr points to an int32_t file descriptor 22 + for the VFIO group.
+17 -2
Documentation/virtual/kvm/locking.txt
··· 132 132 ------------ 133 133 134 134 Name: kvm_lock 135 - Type: raw_spinlock 135 + Type: spinlock_t 136 136 Arch: any 137 137 Protects: - vm_list 138 - - hardware virtualization enable/disable 138 + 139 + Name: kvm_count_lock 140 + Type: raw_spinlock_t 141 + Arch: any 142 + Protects: - hardware virtualization enable/disable 139 143 Comment: 'raw' because hardware enabling/disabling must be atomic /wrt 140 144 migration. 141 145 ··· 155 151 Arch: any 156 152 Protects: -shadow page/shadow tlb entry 157 153 Comment: it is a spinlock since it is used in mmu notifier. 154 + 155 + Name: kvm->srcu 156 + Type: srcu lock 157 + Arch: any 158 + Protects: - kvm->memslots 159 + - kvm->buses 160 + Comment: The srcu read lock must be held while accessing memslots (e.g. 161 + when using gfn_to_* functions) and while accessing in-kernel 162 + MMIO/PIO address->device structure mapping (kvm->buses). 163 + The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu 164 + if it is needed by multiple functions.
+2 -1
MAINTAINERS
··· 4871 4871 M: Gleb Natapov <gleb@redhat.com> 4872 4872 M: Paolo Bonzini <pbonzini@redhat.com> 4873 4873 L: kvm@vger.kernel.org 4874 - W: http://linux-kvm.org 4874 + W: http://www.linux-kvm.org 4875 + T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git 4875 4876 S: Supported 4876 4877 F: Documentation/*/kvm*.txt 4877 4878 F: Documentation/virtual/kvm/
+6 -3
arch/arm/include/asm/kvm_arm.h
··· 57 57 * TSC: Trap SMC 58 58 * TSW: Trap cache operations by set/way 59 59 * TWI: Trap WFI 60 + * TWE: Trap WFE 60 61 * TIDCP: Trap L2CTLR/L2ECTLR 61 62 * BSU_IS: Upgrade barriers to the inner shareable domain 62 63 * FB: Force broadcast of all maintainance operations ··· 68 67 */ 69 68 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 70 69 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 71 - HCR_SWIO | HCR_TIDCP) 70 + HCR_TWE | HCR_SWIO | HCR_TIDCP) 72 71 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 73 72 74 73 /* System Control Register (SCTLR) bits */ ··· 96 95 #define TTBCR_IRGN1 (3 << 24) 97 96 #define TTBCR_EPD1 (1 << 23) 98 97 #define TTBCR_A1 (1 << 22) 99 - #define TTBCR_T1SZ (3 << 16) 98 + #define TTBCR_T1SZ (7 << 16) 100 99 #define TTBCR_SH0 (3 << 12) 101 100 #define TTBCR_ORGN0 (3 << 10) 102 101 #define TTBCR_IRGN0 (3 << 8) 103 102 #define TTBCR_EPD0 (1 << 7) 104 - #define TTBCR_T0SZ 3 103 + #define TTBCR_T0SZ (7 << 0) 105 104 #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) 106 105 107 106 /* Hyp System Trap Register */ ··· 208 207 #define HSR_EC_IABT_HYP (0x21) 209 208 #define HSR_EC_DABT (0x24) 210 209 #define HSR_EC_DABT_HYP (0x25) 210 + 211 + #define HSR_WFI_IS_WFE (1U << 0) 211 212 212 213 #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) 213 214
+1 -1
arch/arm/include/asm/kvm_asm.h
··· 39 39 #define c6_IFAR 17 /* Instruction Fault Address Register */ 40 40 #define c7_PAR 18 /* Physical Address Register */ 41 41 #define c7_PAR_high 19 /* PAR top 32 bits */ 42 - #define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */ 42 + #define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */ 43 43 #define c10_PRRR 21 /* Primary Region Remap Register */ 44 44 #define c10_NMRR 22 /* Normal Memory Remap Register */ 45 45 #define c12_VBAR 23 /* Vector Base Address Register */
+51
arch/arm/include/asm/kvm_emulate.h
··· 157 157 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; 158 158 } 159 159 160 + static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 161 + { 162 + return vcpu->arch.cp15[c0_MPIDR]; 163 + } 164 + 165 + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 166 + { 167 + *vcpu_cpsr(vcpu) |= PSR_E_BIT; 168 + } 169 + 170 + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) 171 + { 172 + return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT); 173 + } 174 + 175 + static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, 176 + unsigned long data, 177 + unsigned int len) 178 + { 179 + if (kvm_vcpu_is_be(vcpu)) { 180 + switch (len) { 181 + case 1: 182 + return data & 0xff; 183 + case 2: 184 + return be16_to_cpu(data & 0xffff); 185 + default: 186 + return be32_to_cpu(data); 187 + } 188 + } 189 + 190 + return data; /* Leave LE untouched */ 191 + } 192 + 193 + static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, 194 + unsigned long data, 195 + unsigned int len) 196 + { 197 + if (kvm_vcpu_is_be(vcpu)) { 198 + switch (len) { 199 + case 1: 200 + return data & 0xff; 201 + case 2: 202 + return cpu_to_be16(data & 0xffff); 203 + default: 204 + return cpu_to_be32(data); 205 + } 206 + } 207 + 208 + return data; /* Leave LE untouched */ 209 + } 210 + 160 211 #endif /* __ARM_KVM_EMULATE_H__ */
+1 -5
arch/arm/include/asm/kvm_host.h
··· 38 38 39 39 #define KVM_VCPU_MAX_FEATURES 1 40 40 41 - /* We don't currently support large pages. */ 42 - #define KVM_HPAGE_GFN_SHIFT(x) 0 43 - #define KVM_NR_PAGE_SIZES 1 44 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 45 - 46 41 #include <kvm/arm_vgic.h> 47 42 48 43 struct kvm_vcpu; ··· 149 154 struct kvm_vcpu_init; 150 155 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 151 156 const struct kvm_vcpu_init *init); 157 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 152 158 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 153 159 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 154 160 struct kvm_one_reg;
+14 -3
arch/arm/include/asm/kvm_mmu.h
··· 62 62 int kvm_mmu_init(void); 63 63 void kvm_clear_hyp_idmap(void); 64 64 65 + static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) 66 + { 67 + *pmd = new_pmd; 68 + flush_pmd_entry(pmd); 69 + } 70 + 65 71 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) 66 72 { 67 73 *pte = new_pte; ··· 109 103 pte_val(*pte) |= L_PTE_S2_RDWR; 110 104 } 111 105 106 + static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 107 + { 108 + pmd_val(*pmd) |= L_PMD_S2_RDWR; 109 + } 110 + 112 111 struct kvm; 113 112 114 - static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 113 + static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 114 + unsigned long size) 115 115 { 116 116 /* 117 117 * If we are going to insert an instruction page and the icache is ··· 132 120 * need any kind of flushing (DDI 0406C.b - Page B3-1392). 133 121 */ 134 122 if (icache_is_pipt()) { 135 - unsigned long hva = gfn_to_hva(kvm, gfn); 136 - __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); 123 + __cpuc_coherent_user_range(hva, hva + size); 137 124 } else if (!icache_is_vivt_asid_tagged()) { 138 125 /* any kind of VIPT cache */ 139 126 __flush_icache_all();
+2
arch/arm/include/asm/pgtable-3level.h
··· 126 126 #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ 127 127 #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 128 128 129 + #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 130 + 129 131 /* 130 132 * Hyp-mode PL2 PTE definitions for LPAE. 131 133 */
+2 -1
arch/arm/include/uapi/asm/kvm.h
··· 63 63 64 64 /* Supported Processor Types */ 65 65 #define KVM_ARM_TARGET_CORTEX_A15 0 66 - #define KVM_ARM_NUM_TARGETS 1 66 + #define KVM_ARM_TARGET_CORTEX_A7 1 67 + #define KVM_ARM_NUM_TARGETS 2 67 68 68 69 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 69 70 #define KVM_ARM_DEVICE_TYPE_SHIFT 0
+1
arch/arm/kvm/Kconfig
··· 20 20 bool "Kernel-based Virtual Machine (KVM) support" 21 21 select PREEMPT_NOTIFIERS 22 22 select ANON_INODES 23 + select HAVE_KVM_CPU_RELAX_INTERCEPT 23 24 select KVM_MMIO 24 25 select KVM_ARM_HOST 25 26 depends on ARM_VIRT_EXT && ARM_LPAE
+1 -1
arch/arm/kvm/Makefile
··· 19 19 20 20 obj-y += kvm-arm.o init.o interrupts.o 21 21 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 22 - obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o 22 + obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 23 23 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 24 24 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+16 -2
arch/arm/kvm/arm.c
··· 152 152 return VM_FAULT_SIGBUS; 153 153 } 154 154 155 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 155 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 156 156 struct kvm_memory_slot *dont) 157 157 { 158 158 } 159 159 160 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 160 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 161 + unsigned long npages) 161 162 { 162 163 return 0; 163 164 } ··· 797 796 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) 798 797 return -EFAULT; 799 798 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 799 + } 800 + case KVM_ARM_PREFERRED_TARGET: { 801 + int err; 802 + struct kvm_vcpu_init init; 803 + 804 + err = kvm_vcpu_preferred_target(&init); 805 + if (err) 806 + return err; 807 + 808 + if (copy_to_user(argp, &init, sizeof(init))) 809 + return -EFAULT; 810 + 811 + return 0; 800 812 } 801 813 default: 802 814 return -EINVAL;
+120
arch/arm/kvm/coproc.c
··· 71 71 return 1; 72 72 } 73 73 74 + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 75 + { 76 + /* 77 + * Compute guest MPIDR. We build a virtual cluster out of the 78 + * vcpu_id, but we read the 'U' bit from the underlying 79 + * hardware directly. 80 + */ 81 + vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) | 82 + ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) | 83 + (vcpu->vcpu_id & 3)); 84 + } 85 + 86 + /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */ 87 + static bool access_actlr(struct kvm_vcpu *vcpu, 88 + const struct coproc_params *p, 89 + const struct coproc_reg *r) 90 + { 91 + if (p->is_write) 92 + return ignore_write(vcpu, p); 93 + 94 + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; 95 + return true; 96 + } 97 + 98 + /* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */ 99 + static bool access_cbar(struct kvm_vcpu *vcpu, 100 + const struct coproc_params *p, 101 + const struct coproc_reg *r) 102 + { 103 + if (p->is_write) 104 + return write_to_read_only(vcpu, p); 105 + return read_zero(vcpu, p); 106 + } 107 + 108 + /* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */ 109 + static bool access_l2ctlr(struct kvm_vcpu *vcpu, 110 + const struct coproc_params *p, 111 + const struct coproc_reg *r) 112 + { 113 + if (p->is_write) 114 + return ignore_write(vcpu, p); 115 + 116 + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; 117 + return true; 118 + } 119 + 120 + static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 121 + { 122 + u32 l2ctlr, ncores; 123 + 124 + asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); 125 + l2ctlr &= ~(3 << 24); 126 + ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; 127 + /* How many cores in the current cluster and the next ones */ 128 + ncores -= (vcpu->vcpu_id & ~3); 129 + /* Cap it to the maximum number of cores in a single cluster */ 130 + ncores = min(ncores, 3U); 131 + l2ctlr |= (ncores & 3) << 24; 132 + 133 + vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; 134 + } 135 + 136 + static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 137 + { 138 + u32 actlr; 139 + 140 + /* ACTLR contains SMP bit: make sure you create all cpus first! */ 141 + asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); 142 + /* Make the SMP bit consistent with the guest configuration */ 143 + if (atomic_read(&vcpu->kvm->online_vcpus) > 1) 144 + actlr |= 1U << 6; 145 + else 146 + actlr &= ~(1U << 6); 147 + 148 + vcpu->arch.cp15[c1_ACTLR] = actlr; 149 + } 150 + 151 + /* 152 + * TRM entries: A7:4.3.50, A15:4.3.49 153 + * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). 154 + */ 155 + static bool access_l2ectlr(struct kvm_vcpu *vcpu, 156 + const struct coproc_params *p, 157 + const struct coproc_reg *r) 158 + { 159 + if (p->is_write) 160 + return ignore_write(vcpu, p); 161 + 162 + *vcpu_reg(vcpu, p->Rt1) = 0; 163 + return true; 164 + } 165 + 74 166 /* See note at ARM ARM B1.14.4 */ 75 167 static bool access_dcsw(struct kvm_vcpu *vcpu, 76 168 const struct coproc_params *p, ··· 245 153 * registers preceding 32-bit ones. 246 154 */ 247 155 static const struct coproc_reg cp15_regs[] = { 156 + /* MPIDR: we use VMPIDR for guest access. */ 157 + { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, 158 + NULL, reset_mpidr, c0_MPIDR }, 159 + 248 160 /* CSSELR: swapped by interrupt.S. */ 249 161 { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32, 250 162 NULL, reset_unknown, c0_CSSELR }, 163 + 164 + /* ACTLR: trapped by HCR.TAC bit. */ 165 + { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, 166 + access_actlr, reset_actlr, c1_ACTLR }, 167 + 168 + /* CPACR: swapped by interrupt.S. */ 169 + { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, 170 + NULL, reset_val, c1_CPACR, 0x00000000 }, 251 171 252 172 /* TTBR0/TTBR1: swapped by interrupt.S. */ 253 173 { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, ··· 298 194 { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw}, 299 195 { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw}, 300 196 { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw}, 197 + /* 198 + * L2CTLR access (guest wants to know #CPUs). 199 + */ 200 + { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, 201 + access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, 202 + { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, 203 + 301 204 /* 302 205 * Dummy performance monitor implementation. 303 206 */ ··· 345 234 /* CNTKCTL: swapped by interrupt.S. */ 346 235 { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, 347 236 NULL, reset_val, c14_CNTKCTL, 0x00000000 }, 237 + 238 + /* The Configuration Base Address Register. */ 239 + { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, 348 240 }; 349 241 350 242 /* Target specific emulation tables */ ··· 355 241 356 242 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table) 357 243 { 244 + unsigned int i; 245 + 246 + for (i = 1; i < table->num; i++) 247 + BUG_ON(cmp_reg(&table->table[i-1], 248 + &table->table[i]) >= 0); 249 + 358 250 target_tables[table->target] = table; 359 251 } 360 252
+1 -116
arch/arm/kvm/coproc_a15.c
··· 17 17 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 18 */ 19 19 #include <linux/kvm_host.h> 20 - #include <asm/cputype.h> 21 - #include <asm/kvm_arm.h> 22 - #include <asm/kvm_host.h> 23 - #include <asm/kvm_emulate.h> 24 20 #include <asm/kvm_coproc.h> 21 + #include <asm/kvm_emulate.h> 25 22 #include <linux/init.h> 26 23 27 - static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 28 - { 29 - /* 30 - * Compute guest MPIDR: 31 - * (Even if we present only one VCPU to the guest on an SMP 32 - * host we don't set the U bit in the MPIDR, or vice versa, as 33 - * revealing the underlying hardware properties is likely to 34 - * be the best choice). 35 - */ 36 - vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK) 37 - | (vcpu->vcpu_id & MPIDR_LEVEL_MASK); 38 - } 39 - 40 24 #include "coproc.h" 41 - 42 - /* A15 TRM 4.3.28: RO WI */ 43 - static bool access_actlr(struct kvm_vcpu *vcpu, 44 - const struct coproc_params *p, 45 - const struct coproc_reg *r) 46 - { 47 - if (p->is_write) 48 - return ignore_write(vcpu, p); 49 - 50 - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR]; 51 - return true; 52 - } 53 - 54 - /* A15 TRM 4.3.60: R/O. */ 55 - static bool access_cbar(struct kvm_vcpu *vcpu, 56 - const struct coproc_params *p, 57 - const struct coproc_reg *r) 58 - { 59 - if (p->is_write) 60 - return write_to_read_only(vcpu, p); 61 - return read_zero(vcpu, p); 62 - } 63 - 64 - /* A15 TRM 4.3.48: R/O WI. */ 65 - static bool access_l2ctlr(struct kvm_vcpu *vcpu, 66 - const struct coproc_params *p, 67 - const struct coproc_reg *r) 68 - { 69 - if (p->is_write) 70 - return ignore_write(vcpu, p); 71 - 72 - *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR]; 73 - return true; 74 - } 75 - 76 - static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 77 - { 78 - u32 l2ctlr, ncores; 79 - 80 - asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); 81 - l2ctlr &= ~(3 << 24); 82 - ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1; 83 - l2ctlr |= (ncores & 3) << 24; 84 - 85 - vcpu->arch.cp15[c9_L2CTLR] = l2ctlr; 86 - } 87 - 88 - static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) 89 - { 90 - u32 actlr; 91 - 92 - /* ACTLR contains SMP bit: make sure you create all cpus first! */ 93 - asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); 94 - /* Make the SMP bit consistent with the guest configuration */ 95 - if (atomic_read(&vcpu->kvm->online_vcpus) > 1) 96 - actlr |= 1U << 6; 97 - else 98 - actlr &= ~(1U << 6); 99 - 100 - vcpu->arch.cp15[c1_ACTLR] = actlr; 101 - } 102 - 103 - /* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */ 104 - static bool access_l2ectlr(struct kvm_vcpu *vcpu, 105 - const struct coproc_params *p, 106 - const struct coproc_reg *r) 107 - { 108 - if (p->is_write) 109 - return ignore_write(vcpu, p); 110 - 111 - *vcpu_reg(vcpu, p->Rt1) = 0; 112 - return true; 113 - } 114 25 115 26 /* 116 27 * A15-specific CP15 registers. ··· 32 121 * registers preceding 32-bit ones. 33 122 */ 34 123 static const struct coproc_reg a15_regs[] = { 35 - /* MPIDR: we use VMPIDR for guest access. */ 36 - { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32, 37 - NULL, reset_mpidr, c0_MPIDR }, 38 - 39 124 /* SCTLR: swapped by interrupt.S. */ 40 125 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 41 126 NULL, reset_val, c1_SCTLR, 0x00C50078 }, 42 - /* ACTLR: trapped by HCR.TAC bit. */ 43 - { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, 44 - access_actlr, reset_actlr, c1_ACTLR }, 45 - /* CPACR: swapped by interrupt.S. */ 46 - { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, 47 - NULL, reset_val, c1_CPACR, 0x00000000 }, 48 - 49 - /* 50 - * L2CTLR access (guest wants to know #CPUs). 51 - */ 52 - { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, 53 - access_l2ctlr, reset_l2ctlr, c9_L2CTLR }, 54 - { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr}, 55 - 56 - /* The Configuration Base Address Register. */ 57 - { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, 58 127 }; 59 128 60 129 static struct kvm_coproc_target_table a15_target_table = { ··· 45 154 46 155 static int __init coproc_a15_init(void) 47 156 { 48 - unsigned int i; 49 - 50 - for (i = 1; i < ARRAY_SIZE(a15_regs); i++) 51 - BUG_ON(cmp_reg(&a15_regs[i-1], 52 - &a15_regs[i]) >= 0); 53 - 54 157 kvm_register_target_coproc_table(&a15_target_table); 55 158 return 0; 56 159 }
+54
arch/arm/kvm/coproc_a7.c
··· 1 + /* 2 + * Copyright (C) 2012 - Virtual Open Systems and Columbia University 3 + * Copyright (C) 2013 - ARM Ltd 4 + * 5 + * Authors: Rusty Russell <rusty@rustcorp.au> 6 + * Christoffer Dall <c.dall@virtualopensystems.com> 7 + * Jonathan Austin <jonathan.austin@arm.com> 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License, version 2, as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with this program; if not, write to the Free Software 20 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 21 + */ 22 + #include <linux/kvm_host.h> 23 + #include <asm/kvm_coproc.h> 24 + #include <asm/kvm_emulate.h> 25 + #include <linux/init.h> 26 + 27 + #include "coproc.h" 28 + 29 + /* 30 + * Cortex-A7 specific CP15 registers. 31 + * CRn denotes the primary register number, but is copied to the CRm in the 32 + * user space API for 64-bit register access in line with the terminology used 33 + * in the ARM ARM. 34 + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit 35 + * registers preceding 32-bit ones. 36 + */ 37 + static const struct coproc_reg a7_regs[] = { 38 + /* SCTLR: swapped by interrupt.S. */ 39 + { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 40 + NULL, reset_val, c1_SCTLR, 0x00C50878 }, 41 + }; 42 + 43 + static struct kvm_coproc_target_table a7_target_table = { 44 + .target = KVM_ARM_TARGET_CORTEX_A7, 45 + .table = a7_regs, 46 + .num = ARRAY_SIZE(a7_regs), 47 + }; 48 + 49 + static int __init coproc_a7_init(void) 50 + { 51 + kvm_register_target_coproc_table(&a7_target_table); 52 + return 0; 53 + } 54 + late_initcall(coproc_a7_init);
+1 -1
arch/arm/kvm/emulate.c
··· 354 354 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; 355 355 356 356 if (is_pabt) { 357 - /* Set DFAR and DFSR */ 357 + /* Set IFAR and IFSR */ 358 358 vcpu->arch.cp15[c6_IFAR] = addr; 359 359 is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31); 360 360 /* Always give debug fault for now - should give guest a clue */
+23 -1
arch/arm/kvm/guest.c
··· 190 190 return -EINVAL; 191 191 192 192 switch (part_number) { 193 + case ARM_CPU_PART_CORTEX_A7: 194 + return KVM_ARM_TARGET_CORTEX_A7; 193 195 case ARM_CPU_PART_CORTEX_A15: 194 196 return KVM_ARM_TARGET_CORTEX_A15; 195 197 default: ··· 204 202 { 205 203 unsigned int i; 206 204 207 - /* We can only do a cortex A15 for now. */ 205 + /* We can only cope with guest==host and only on A15/A7 (for now). */ 208 206 if (init->target != kvm_target_cpu()) 209 207 return -EINVAL; 210 208 ··· 222 220 223 221 /* Now we know what it is, we can reset it. */ 224 222 return kvm_reset_vcpu(vcpu); 223 + } 224 + 225 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 226 + { 227 + int target = kvm_target_cpu(); 228 + 229 + if (target < 0) 230 + return -ENODEV; 231 + 232 + memset(init, 0, sizeof(*init)); 233 + 234 + /* 235 + * For now, we don't return any features. 236 + * In future, we might use features to return target 237 + * specific features available for the preferred 238 + * target type. 239 + */ 240 + init->target = (__u32)target; 241 + 242 + return 0; 225 243 } 226 244 227 245 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+13 -7
arch/arm/kvm/handle_exit.c
··· 73 73 } 74 74 75 75 /** 76 - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 76 + * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests 77 77 * @vcpu: the vcpu pointer 78 78 * @run: the kvm_run structure pointer 79 79 * 80 - * Simply sets the wait_for_interrupts flag on the vcpu structure, which will 81 - * halt execution of world-switches and schedule other host processes until 82 - * there is an incoming IRQ or FIQ to the VM. 80 + * WFE: Yield the CPU and come back to this vcpu when the scheduler 81 + * decides to. 82 + * WFI: Simply call kvm_vcpu_block(), which will halt execution of 83 + * world-switches and schedule other host processes until there is an 84 + * incoming IRQ or FIQ to the VM. 83 85 */ 84 - static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 86 + static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 85 87 { 86 88 trace_kvm_wfi(*vcpu_pc(vcpu)); 87 - kvm_vcpu_block(vcpu); 89 + if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) 90 + kvm_vcpu_on_spin(vcpu); 91 + else 92 + kvm_vcpu_block(vcpu); 93 + 88 94 return 1; 89 95 } 90 96 91 97 static exit_handle_fn arm_exit_handlers[] = { 92 - [HSR_EC_WFI] = kvm_handle_wfi, 98 + [HSR_EC_WFI] = kvm_handle_wfx, 93 99 [HSR_EC_CP15_32] = kvm_handle_cp15_32, 94 100 [HSR_EC_CP15_64] = kvm_handle_cp15_64, 95 101 [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
+75 -11
arch/arm/kvm/mmio.c
··· 23 23 24 24 #include "trace.h" 25 25 26 + static void mmio_write_buf(char *buf, unsigned int len, unsigned long data) 27 + { 28 + void *datap = NULL; 29 + union { 30 + u8 byte; 31 + u16 hword; 32 + u32 word; 33 + u64 dword; 34 + } tmp; 35 + 36 + switch (len) { 37 + case 1: 38 + tmp.byte = data; 39 + datap = &tmp.byte; 40 + break; 41 + case 2: 42 + tmp.hword = data; 43 + datap = &tmp.hword; 44 + break; 45 + case 4: 46 + tmp.word = data; 47 + datap = &tmp.word; 48 + break; 49 + case 8: 50 + tmp.dword = data; 51 + datap = &tmp.dword; 52 + break; 53 + } 54 + 55 + memcpy(buf, datap, len); 56 + } 57 + 58 + static unsigned long mmio_read_buf(char *buf, unsigned int len) 59 + { 60 + unsigned long data = 0; 61 + union { 62 + u16 hword; 63 + u32 word; 64 + u64 dword; 65 + } tmp; 66 + 67 + switch (len) { 68 + case 1: 69 + data = buf[0]; 70 + break; 71 + case 2: 72 + memcpy(&tmp.hword, buf, len); 73 + data = tmp.hword; 74 + break; 75 + case 4: 76 + memcpy(&tmp.word, buf, len); 77 + data = tmp.word; 78 + break; 79 + case 8: 80 + memcpy(&tmp.dword, buf, len); 81 + data = tmp.dword; 82 + break; 83 + } 84 + 85 + return data; 86 + } 87 + 26 88 /** 27 89 * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation 28 90 * @vcpu: The VCPU pointer ··· 95 33 */ 96 34 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) 97 35 { 98 - unsigned long *dest; 36 + unsigned long data; 99 37 unsigned int len; 100 38 int mask; 101 39 102 40 if (!run->mmio.is_write) { 103 - dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt); 104 - *dest = 0; 105 - 106 41 len = run->mmio.len; 107 42 if (len > sizeof(unsigned long)) 108 43 return -EINVAL; 109 44 110 - memcpy(dest, run->mmio.data, len); 111 - 112 - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 113 - *((u64 *)run->mmio.data)); 45 + data = mmio_read_buf(run->mmio.data, len); 114 46 115 47 if (vcpu->arch.mmio_decode.sign_extend && 116 48 len < sizeof(unsigned long)) { 117 49 mask = 1U << ((len * 8) - 1); 118 - *dest = (*dest ^ mask) - mask; 50 + data = (data ^ mask) - mask; 119 51 } 52 + 53 + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, 54 + data); 55 + data = vcpu_data_host_to_guest(vcpu, data, len); 56 + *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data; 120 57 } 121 58 122 59 return 0; ··· 166 105 phys_addr_t fault_ipa) 167 106 { 168 107 struct kvm_exit_mmio mmio; 108 + unsigned long data; 169 109 unsigned long rt; 170 110 int ret; 171 111 ··· 187 125 } 188 126 189 127 rt = vcpu->arch.mmio_decode.rt; 128 + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len); 129 + 190 130 trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE : 191 131 KVM_TRACE_MMIO_READ_UNSATISFIED, 192 132 mmio.len, fault_ipa, 193 - (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0); 133 + (mmio.is_write) ? data : 0); 194 134 195 135 if (mmio.is_write) 196 - memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); 136 + mmio_write_buf(mmio.data, mmio.len, data); 197 137 198 138 if (vgic_handle_mmio(vcpu, run, &mmio)) 199 139 return 1;
+185 -38
arch/arm/kvm/mmu.c
··· 19 19 #include <linux/mman.h> 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/io.h> 22 + #include <linux/hugetlb.h> 22 23 #include <trace/events/kvm.h> 23 24 #include <asm/pgalloc.h> 24 25 #include <asm/cacheflush.h> ··· 41 40 static unsigned long hyp_idmap_start; 42 41 static unsigned long hyp_idmap_end; 43 42 static phys_addr_t hyp_idmap_vector; 43 + 44 + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 44 45 45 46 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 46 47 { ··· 96 93 97 94 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 98 95 { 99 - pmd_t *pmd_table = pmd_offset(pud, 0); 100 - pud_clear(pud); 101 - kvm_tlb_flush_vmid_ipa(kvm, addr); 102 - pmd_free(NULL, pmd_table); 96 + if (pud_huge(*pud)) { 97 + pud_clear(pud); 98 + kvm_tlb_flush_vmid_ipa(kvm, addr); 99 + } else { 100 + pmd_t *pmd_table = pmd_offset(pud, 0); 101 + pud_clear(pud); 102 + kvm_tlb_flush_vmid_ipa(kvm, addr); 103 + pmd_free(NULL, pmd_table); 104 + } 103 105 put_page(virt_to_page(pud)); 104 106 } 105 107 106 108 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 107 109 { 108 - pte_t *pte_table = pte_offset_kernel(pmd, 0); 109 - pmd_clear(pmd); 110 - kvm_tlb_flush_vmid_ipa(kvm, addr); 111 - pte_free_kernel(NULL, pte_table); 110 + if (kvm_pmd_huge(*pmd)) { 111 + pmd_clear(pmd); 112 + kvm_tlb_flush_vmid_ipa(kvm, addr); 113 + } else { 114 + pte_t *pte_table = pte_offset_kernel(pmd, 0); 115 + pmd_clear(pmd); 116 + kvm_tlb_flush_vmid_ipa(kvm, addr); 117 + pte_free_kernel(NULL, pte_table); 118 + } 112 119 put_page(virt_to_page(pmd)); 113 120 } 114 121 ··· 149 136 continue; 150 137 } 151 138 139 + if (pud_huge(*pud)) { 140 + /* 141 + * If we are dealing with a huge pud, just clear it and 142 + * move on. 143 + */ 144 + clear_pud_entry(kvm, pud, addr); 145 + addr = pud_addr_end(addr, end); 146 + continue; 147 + } 148 + 152 149 pmd = pmd_offset(pud, addr); 153 150 if (pmd_none(*pmd)) { 154 151 addr = pmd_addr_end(addr, end); 155 152 continue; 156 153 } 157 154 158 - pte = pte_offset_kernel(pmd, addr); 159 - clear_pte_entry(kvm, pte, addr); 160 - next = addr + PAGE_SIZE; 155 + if (!kvm_pmd_huge(*pmd)) { 156 + pte = pte_offset_kernel(pmd, addr); 157 + clear_pte_entry(kvm, pte, addr); 158 + next = addr + PAGE_SIZE; 159 + } 161 160 162 - /* If we emptied the pte, walk back up the ladder */ 163 - if (page_empty(pte)) { 161 + /* 162 + * If the pmd entry is to be cleared, walk back up the ladder 163 + */ 164 + if (kvm_pmd_huge(*pmd) || page_empty(pte)) { 164 165 clear_pmd_entry(kvm, pmd, addr); 165 166 next = pmd_addr_end(addr, end); 166 167 if (page_empty(pmd) && !page_empty(pud)) { ··· 447 420 kvm->arch.pgd = NULL; 448 421 } 449 422 450 - 451 - static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 452 - phys_addr_t addr, const pte_t *new_pte, bool iomap) 423 + static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 424 + phys_addr_t addr) 453 425 { 454 426 pgd_t *pgd; 455 427 pud_t *pud; 456 428 pmd_t *pmd; 457 - pte_t *pte, old_pte; 458 429 459 - /* Create 2nd stage page table mapping - Level 1 */ 460 430 pgd = kvm->arch.pgd + pgd_index(addr); 461 431 pud = pud_offset(pgd, addr); 462 432 if (pud_none(*pud)) { 463 433 if (!cache) 464 - return 0; /* ignore calls from kvm_set_spte_hva */ 434 + return NULL; 465 435 pmd = mmu_memory_cache_alloc(cache); 466 436 pud_populate(NULL, pud, pmd); 467 437 get_page(virt_to_page(pud)); 468 438 } 469 439 470 - pmd = pmd_offset(pud, addr); 440 + return pmd_offset(pud, addr); 441 + } 471 442 472 - /* Create 2nd stage page table mapping - Level 2 */ 443 + static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 444 + *cache, phys_addr_t addr, const pmd_t *new_pmd) 445 + { 446 + pmd_t *pmd, old_pmd; 447 + 448 + pmd = stage2_get_pmd(kvm, cache, addr); 449 + VM_BUG_ON(!pmd); 450 + 451 + /* 452 + * Mapping in huge pages should only happen through a fault. If a 453 + * page is merged into a transparent huge page, the individual 454 + * subpages of that huge page should be unmapped through MMU 455 + * notifiers before we get here. 456 + * 457 + * Merging of CompoundPages is not supported; they should become 458 + * splitting first, unmapped, merged, and mapped back in on-demand. 459 + */ 460 + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 461 + 462 + old_pmd = *pmd; 463 + kvm_set_pmd(pmd, *new_pmd); 464 + if (pmd_present(old_pmd)) 465 + kvm_tlb_flush_vmid_ipa(kvm, addr); 466 + else 467 + get_page(virt_to_page(pmd)); 468 + return 0; 469 + } 470 + 471 + static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 472 + phys_addr_t addr, const pte_t *new_pte, bool iomap) 473 + { 474 + pmd_t *pmd; 475 + pte_t *pte, old_pte; 476 + 477 + /* Create stage-2 page table mapping - Level 1 */ 478 + pmd = stage2_get_pmd(kvm, cache, addr); 479 + if (!pmd) { 480 + /* 481 + * Ignore calls from kvm_set_spte_hva for unallocated 482 + * address ranges. 483 + */ 484 + return 0; 485 + } 486 + 487 + /* Create stage-2 page mappings - Level 2 */ 473 488 if (pmd_none(*pmd)) { 474 489 if (!cache) 475 490 return 0; /* ignore calls from kvm_set_spte_hva */ ··· 576 507 return ret; 577 508 } 578 509 510 + static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) 511 + { 512 + pfn_t pfn = *pfnp; 513 + gfn_t gfn = *ipap >> PAGE_SHIFT; 514 + 515 + if (PageTransCompound(pfn_to_page(pfn))) { 516 + unsigned long mask; 517 + /* 518 + * The address we faulted on is backed by a transparent huge 519 + * page. However, because we map the compound huge page and 520 + * not the individual tail page, we need to transfer the 521 + * refcount to the head page. We have to be careful that the 522 + * THP doesn't start to split while we are adjusting the 523 + * refcounts. 524 + * 525 + * We are sure this doesn't happen, because mmu_notifier_retry 526 + * was successful and we are holding the mmu_lock, so if this 527 + * THP is trying to split, it will be blocked in the mmu 528 + * notifier before touching any of the pages, specifically 529 + * before being able to call __split_huge_page_refcount(). 530 + * 531 + * We can therefore safely transfer the refcount from PG_tail 532 + * to PG_head and switch the pfn from a tail page to the head 533 + * page accordingly. 534 + */ 535 + mask = PTRS_PER_PMD - 1; 536 + VM_BUG_ON((gfn & mask) != (pfn & mask)); 537 + if (pfn & mask) { 538 + *ipap &= PMD_MASK; 539 + kvm_release_pfn_clean(pfn); 540 + pfn &= ~mask; 541 + kvm_get_pfn(pfn); 542 + *pfnp = pfn; 543 + } 544 + 545 + return true; 546 + } 547 + 548 + return false; 549 + } 550 + 579 551 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 580 - gfn_t gfn, struct kvm_memory_slot *memslot, 552 + struct kvm_memory_slot *memslot, 581 553 unsigned long fault_status) 582 554 { 583 - pte_t new_pte; 584 - pfn_t pfn; 585 555 int ret; 586 - bool write_fault, writable; 556 + bool write_fault, writable, hugetlb = false, force_pte = false; 587 557 unsigned long mmu_seq; 558 + gfn_t gfn = fault_ipa >> PAGE_SHIFT; 559 + unsigned long hva = gfn_to_hva(vcpu->kvm, gfn); 560 + struct kvm *kvm = vcpu->kvm; 588 561 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 562 + struct vm_area_struct *vma; 563 + pfn_t pfn; 589 564 590 565 write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); 591 566 if (fault_status == FSC_PERM && !write_fault) { 592 567 kvm_err("Unexpected L2 read permission error\n"); 593 568 return -EFAULT; 594 569 } 570 + 571 + /* Let's check if we will get back a huge page backed by hugetlbfs */ 572 + down_read(&current->mm->mmap_sem); 573 + vma = find_vma_intersection(current->mm, hva, hva + 1); 574 + if (is_vm_hugetlb_page(vma)) { 575 + hugetlb = true; 576 + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 577 + } else { 578 + /* 579 + * Pages belonging to VMAs not aligned to the PMD mapping 580 + * granularity cannot be mapped using block descriptors even 581 + * if the pages belong to a THP for the process, because the 582 + * stage-2 block descriptor will cover more than a single THP 583 + * and we loose atomicity for unmapping, updates, and splits 584 + * of the THP or other pages in the stage-2 block range. 585 + */ 586 + if (vma->vm_start & ~PMD_MASK) 587 + force_pte = true; 588 + } 589 + up_read(&current->mm->mmap_sem); 595 590 596 591 /* We need minimum second+third level pages */ 597 592 ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); ··· 674 541 */ 675 542 smp_rmb(); 676 543 677 - pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); 544 + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); 678 545 if (is_error_pfn(pfn)) 679 546 return -EFAULT; 680 547 681 - new_pte = pfn_pte(pfn, PAGE_S2); 682 - coherent_icache_guest_page(vcpu->kvm, gfn); 683 - 684 - spin_lock(&vcpu->kvm->mmu_lock); 685 - if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) 548 + spin_lock(&kvm->mmu_lock); 549 + if (mmu_notifier_retry(kvm, mmu_seq)) 686 550 goto out_unlock; 687 - if (writable) { 688 - kvm_set_s2pte_writable(&new_pte); 689 - kvm_set_pfn_dirty(pfn); 551 + if (!hugetlb && !force_pte) 552 + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 553 + 554 + if (hugetlb) { 555 + pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2); 556 + new_pmd = pmd_mkhuge(new_pmd); 557 + if (writable) { 558 + kvm_set_s2pmd_writable(&new_pmd); 559 + kvm_set_pfn_dirty(pfn); 560 + } 561 + coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); 562 + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 563 + } else { 564 + pte_t new_pte = pfn_pte(pfn, PAGE_S2); 565 + if (writable) { 566 + kvm_set_s2pte_writable(&new_pte); 567 + kvm_set_pfn_dirty(pfn); 568 + } 569 + coherent_icache_guest_page(kvm, hva, PAGE_SIZE); 570 + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); 690 571 } 691 - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); 572 + 692 573 693 574 out_unlock: 694 - spin_unlock(&vcpu->kvm->mmu_lock); 575 + spin_unlock(&kvm->mmu_lock); 695 576 kvm_release_pfn_clean(pfn); 696 - return 0; 577 + return ret; 697 578 } 698 579 699 580 /** ··· 776 629 777 630 memslot = gfn_to_memslot(vcpu->kvm, gfn); 778 631 779 - ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); 632 + ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status); 780 633 if (ret == 0) 781 634 ret = 1; 782 635 out_unlock:
+17 -4
arch/arm/kvm/psci.c
··· 18 18 #include <linux/kvm_host.h> 19 19 #include <linux/wait.h> 20 20 21 + #include <asm/cputype.h> 21 22 #include <asm/kvm_emulate.h> 22 23 #include <asm/kvm_psci.h> 23 24 ··· 35 34 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 36 35 { 37 36 struct kvm *kvm = source_vcpu->kvm; 38 - struct kvm_vcpu *vcpu; 37 + struct kvm_vcpu *vcpu = NULL, *tmp; 39 38 wait_queue_head_t *wq; 40 39 unsigned long cpu_id; 40 + unsigned long mpidr; 41 41 phys_addr_t target_pc; 42 + int i; 42 43 43 44 cpu_id = *vcpu_reg(source_vcpu, 1); 44 45 if (vcpu_mode_is_32bit(source_vcpu)) 45 46 cpu_id &= ~((u32) 0); 46 47 47 - if (cpu_id >= atomic_read(&kvm->online_vcpus)) 48 + kvm_for_each_vcpu(i, tmp, kvm) { 49 + mpidr = kvm_vcpu_get_mpidr(tmp); 50 + if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) { 51 + vcpu = tmp; 52 + break; 53 + } 54 + } 55 + 56 + if (!vcpu) 48 57 return KVM_PSCI_RET_INVAL; 49 58 50 59 target_pc = *vcpu_reg(source_vcpu, 2); 51 - 52 - vcpu = kvm_get_vcpu(kvm, cpu_id); 53 60 54 61 wq = kvm_arch_vcpu_wq(vcpu); 55 62 if (!waitqueue_active(wq)) ··· 70 61 target_pc &= ~((phys_addr_t) 1); 71 62 vcpu_set_thumb(vcpu); 72 63 } 64 + 65 + /* Propagate caller endianness */ 66 + if (kvm_vcpu_is_be(source_vcpu)) 67 + kvm_vcpu_set_be(vcpu); 73 68 74 69 *vcpu_pc(vcpu) = target_pc; 75 70 vcpu->arch.pause = false;
+6 -9
arch/arm/kvm/reset.c
··· 30 30 #include <kvm/arm_arch_timer.h> 31 31 32 32 /****************************************************************************** 33 - * Cortex-A15 Reset Values 33 + * Cortex-A15 and Cortex-A7 Reset Values 34 34 */ 35 35 36 - static const int a15_max_cpu_idx = 3; 37 - 38 - static struct kvm_regs a15_regs_reset = { 36 + static struct kvm_regs cortexa_regs_reset = { 39 37 .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, 40 38 }; 41 39 42 - static const struct kvm_irq_level a15_vtimer_irq = { 40 + static const struct kvm_irq_level cortexa_vtimer_irq = { 43 41 { .irq = 27 }, 44 42 .level = 1, 45 43 }; ··· 60 62 const struct kvm_irq_level *cpu_vtimer_irq; 61 63 62 64 switch (vcpu->arch.target) { 65 + case KVM_ARM_TARGET_CORTEX_A7: 63 66 case KVM_ARM_TARGET_CORTEX_A15: 64 - if (vcpu->vcpu_id > a15_max_cpu_idx) 65 - return -EINVAL; 66 - reset_regs = &a15_regs_reset; 67 + reset_regs = &cortexa_regs_reset; 67 68 vcpu->arch.midr = read_cpuid_id(); 68 - cpu_vtimer_irq = &a15_vtimer_irq; 69 + cpu_vtimer_irq = &cortexa_vtimer_irq; 69 70 break; 70 71 default: 71 72 return -ENODEV;
+6 -2
arch/arm64/include/asm/kvm_arm.h
··· 63 63 * TAC: Trap ACTLR 64 64 * TSC: Trap SMC 65 65 * TSW: Trap cache operations by set/way 66 + * TWE: Trap WFE 66 67 * TWI: Trap WFI 67 68 * TIDCP: Trap L2CTLR/L2ECTLR 68 69 * BSU_IS: Upgrade barriers to the inner shareable domain ··· 73 72 * FMO: Override CPSR.F and enable signaling with VF 74 73 * SWIO: Turn set/way invalidates into set/way clean+invalidate 75 74 */ 76 - #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 77 - HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 75 + #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ 76 + HCR_BSU_IS | HCR_FB | HCR_TAC | \ 77 + HCR_AMO | HCR_IMO | HCR_FMO | \ 78 78 HCR_SWIO | HCR_TIDCP | HCR_RW) 79 79 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 80 80 ··· 243 241 #define ESR_EL2_EC_BRK64 (0x3C) 244 242 245 243 #define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 244 + 245 + #define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) 246 246 247 247 #endif /* __ARM64_KVM_ARM_H__ */
+61
arch/arm64/include/asm/kvm_emulate.h
··· 177 177 return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; 178 178 } 179 179 180 + static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 181 + { 182 + return vcpu_sys_reg(vcpu, MPIDR_EL1); 183 + } 184 + 185 + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 186 + { 187 + if (vcpu_mode_is_32bit(vcpu)) 188 + *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; 189 + else 190 + vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); 191 + } 192 + 193 + static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) 194 + { 195 + if (vcpu_mode_is_32bit(vcpu)) 196 + return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); 197 + 198 + return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); 199 + } 200 + 201 + static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, 202 + unsigned long data, 203 + unsigned int len) 204 + { 205 + if (kvm_vcpu_is_be(vcpu)) { 206 + switch (len) { 207 + case 1: 208 + return data & 0xff; 209 + case 2: 210 + return be16_to_cpu(data & 0xffff); 211 + case 4: 212 + return be32_to_cpu(data & 0xffffffff); 213 + default: 214 + return be64_to_cpu(data); 215 + } 216 + } 217 + 218 + return data; /* Leave LE untouched */ 219 + } 220 + 221 + static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, 222 + unsigned long data, 223 + unsigned int len) 224 + { 225 + if (kvm_vcpu_is_be(vcpu)) { 226 + switch (len) { 227 + case 1: 228 + return data & 0xff; 229 + case 2: 230 + return cpu_to_be16(data & 0xffff); 231 + case 4: 232 + return cpu_to_be32(data & 0xffffffff); 233 + default: 234 + return cpu_to_be64(data); 235 + } 236 + } 237 + 238 + return data; /* Leave LE untouched */ 239 + } 240 + 180 241 #endif /* __ARM64_KVM_EMULATE_H__ */
+1 -5
arch/arm64/include/asm/kvm_host.h
··· 36 36 37 37 #define KVM_VCPU_MAX_FEATURES 2 38 38 39 - /* We don't currently support large pages. */ 40 - #define KVM_HPAGE_GFN_SHIFT(x) 0 41 - #define KVM_NR_PAGE_SIZES 1 42 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 43 - 44 39 struct kvm_vcpu; 45 40 int kvm_target_cpu(void); 46 41 int kvm_reset_vcpu(struct kvm_vcpu *vcpu); ··· 146 151 struct kvm_vcpu_init; 147 152 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 148 153 const struct kvm_vcpu_init *init); 154 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); 149 155 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); 150 156 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); 151 157 struct kvm_one_reg;
+9 -3
arch/arm64/include/asm/kvm_mmu.h
··· 91 91 void kvm_clear_hyp_idmap(void); 92 92 93 93 #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) 94 + #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 94 95 95 96 static inline bool kvm_is_write_fault(unsigned long esr) 96 97 { ··· 117 116 pte_val(*pte) |= PTE_S2_RDWR; 118 117 } 119 118 119 + static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 120 + { 121 + pmd_val(*pmd) |= PMD_S2_RDWR; 122 + } 123 + 120 124 struct kvm; 121 125 122 - static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) 126 + static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 127 + unsigned long size) 123 128 { 124 129 if (!icache_is_aliasing()) { /* PIPT */ 125 - unsigned long hva = gfn_to_hva(kvm, gfn); 126 - flush_icache_range(hva, hva + PAGE_SIZE); 130 + flush_icache_range(hva, hva + size); 127 131 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 128 132 /* any kind of VIPT cache */ 129 133 __flush_icache_all();
+2
arch/arm64/include/asm/pgtable-hwdef.h
··· 85 85 #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ 86 86 #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 87 87 88 + #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 89 + 88 90 /* 89 91 * Memory Attribute override for Stage-2 (MemAttr[3:0]) 90 92 */
+1
arch/arm64/kvm/Kconfig
··· 21 21 select MMU_NOTIFIER 22 22 select PREEMPT_NOTIFIERS 23 23 select ANON_INODES 24 + select HAVE_KVM_CPU_RELAX_INTERCEPT 24 25 select KVM_MMIO 25 26 select KVM_ARM_HOST 26 27 select KVM_ARM_VGIC
+20
arch/arm64/kvm/guest.c
··· 248 248 return kvm_reset_vcpu(vcpu); 249 249 } 250 250 251 + int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) 252 + { 253 + int target = kvm_target_cpu(); 254 + 255 + if (target < 0) 256 + return -ENODEV; 257 + 258 + memset(init, 0, sizeof(*init)); 259 + 260 + /* 261 + * For now, we don't return any features. 262 + * In future, we might use features to return target 263 + * specific features available for the preferred 264 + * target type. 265 + */ 266 + init->target = (__u32)target; 267 + 268 + return 0; 269 + } 270 + 251 271 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 252 272 { 253 273 return -EINVAL;
+13 -5
arch/arm64/kvm/handle_exit.c
··· 47 47 } 48 48 49 49 /** 50 - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest 50 + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event 51 + * instruction executed by a guest 52 + * 51 53 * @vcpu: the vcpu pointer 52 54 * 53 - * Simply call kvm_vcpu_block(), which will halt execution of 55 + * WFE: Yield the CPU and come back to this vcpu when the scheduler 56 + * decides to. 57 + * WFI: Simply call kvm_vcpu_block(), which will halt execution of 54 58 * world-switches and schedule other host processes until there is an 55 59 * incoming IRQ or FIQ to the VM. 56 60 */ 57 - static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) 61 + static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 58 62 { 59 - kvm_vcpu_block(vcpu); 63 + if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) 64 + kvm_vcpu_on_spin(vcpu); 65 + else 66 + kvm_vcpu_block(vcpu); 67 + 60 68 return 1; 61 69 } 62 70 63 71 static exit_handle_fn arm_exit_handlers[] = { 64 - [ESR_EL2_EC_WFI] = kvm_handle_wfi, 72 + [ESR_EL2_EC_WFI] = kvm_handle_wfx, 65 73 [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, 66 74 [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, 67 75 [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access,
+1 -5
arch/ia64/include/asm/kvm_host.h
··· 234 234 #define KVM_REQ_PTC_G 32 235 235 #define KVM_REQ_RESUME 33 236 236 237 - #define KVM_HPAGE_GFN_SHIFT(x) 0 238 - #define KVM_NR_PAGE_SIZES 1 239 - #define KVM_PAGES_PER_HPAGE(x) 1 240 - 241 237 struct kvm; 242 238 struct kvm_vcpu; 243 239 ··· 476 480 477 481 struct list_head assigned_dev_head; 478 482 struct iommu_domain *iommu_domain; 479 - int iommu_flags; 483 + bool iommu_noncoherent; 480 484 481 485 unsigned long irq_sources_bitmap; 482 486 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+3 -2
arch/ia64/kvm/kvm-ia64.c
··· 1550 1550 return VM_FAULT_SIGBUS; 1551 1551 } 1552 1552 1553 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1553 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1554 1554 struct kvm_memory_slot *dont) 1555 1555 { 1556 1556 } 1557 1557 1558 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1558 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1559 + unsigned long npages) 1559 1560 { 1560 1561 return 0; 1561 1562 }
-7
arch/mips/include/asm/kvm_host.h
··· 27 27 28 28 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 29 29 30 - /* Don't support huge pages */ 31 - #define KVM_HPAGE_GFN_SHIFT(x) 0 32 - 33 - /* We don't currently support large pages. */ 34 - #define KVM_NR_PAGE_SIZES 1 35 - #define KVM_PAGES_PER_HPAGE(x) 1 36 - 37 30 38 31 39 32 /* Special address that contains the comm page, used for reducing # of traps */
+3 -2
arch/mips/kvm/kvm_mips.c
··· 198 198 return -ENOIOCTLCMD; 199 199 } 200 200 201 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 201 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 202 202 struct kvm_memory_slot *dont) 203 203 { 204 204 } 205 205 206 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 206 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 207 + unsigned long npages) 207 208 { 208 209 return 0; 209 210 }
+4
arch/powerpc/include/asm/disassemble.h
··· 77 77 return inst & 0xffff; 78 78 } 79 79 80 + static inline unsigned int get_oc(u32 inst) 81 + { 82 + return (inst >> 11) & 0x7fff; 83 + } 80 84 #endif /* __ASM_PPC_DISASSEMBLE_H__ */
+20 -1
arch/powerpc/include/asm/exception-64s.h
··· 198 198 cmpwi r10,0; \ 199 199 bne do_kvm_##n 200 200 201 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 202 + /* 203 + * If hv is possible, interrupts come into to the hv version 204 + * of the kvmppc_interrupt code, which then jumps to the PR handler, 205 + * kvmppc_interrupt_pr, if the guest is a PR guest. 206 + */ 207 + #define kvmppc_interrupt kvmppc_interrupt_hv 208 + #else 209 + #define kvmppc_interrupt kvmppc_interrupt_pr 210 + #endif 211 + 201 212 #define __KVM_HANDLER(area, h, n) \ 202 213 do_kvm_##n: \ 203 214 BEGIN_FTR_SECTION_NESTED(947) \ 204 215 ld r10,area+EX_CFAR(r13); \ 205 216 std r10,HSTATE_CFAR(r13); \ 206 217 END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ 218 + BEGIN_FTR_SECTION_NESTED(948) \ 219 + ld r10,area+EX_PPR(r13); \ 220 + std r10,HSTATE_PPR(r13); \ 221 + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 207 222 ld r10,area+EX_R10(r13); \ 208 223 stw r9,HSTATE_SCRATCH1(r13); \ 209 224 ld r9,area+EX_R9(r13); \ ··· 232 217 ld r10,area+EX_R10(r13); \ 233 218 beq 89f; \ 234 219 stw r9,HSTATE_SCRATCH1(r13); \ 220 + BEGIN_FTR_SECTION_NESTED(948) \ 221 + ld r9,area+EX_PPR(r13); \ 222 + std r9,HSTATE_PPR(r13); \ 223 + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 235 224 ld r9,area+EX_R9(r13); \ 236 225 std r12,HSTATE_SCRATCH0(r13); \ 237 226 li r12,n; \ ··· 255 236 #define KVM_HANDLER_SKIP(area, h, n) 256 237 #endif 257 238 258 - #ifdef CONFIG_KVM_BOOK3S_PR 239 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 259 240 #define KVMTEST_PR(n) __KVMTEST(n) 260 241 #define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) 261 242 #define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
+4
arch/powerpc/include/asm/kvm_asm.h
··· 123 123 #define BOOK3S_HFLAG_SLB 0x2 124 124 #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 125 125 #define BOOK3S_HFLAG_NATIVE_PS 0x8 126 + #define BOOK3S_HFLAG_MULTI_PGSIZE 0x10 127 + #define BOOK3S_HFLAG_NEW_TLBIE 0x20 126 128 127 129 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 128 130 #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ ··· 138 136 #define KVM_GUEST_MODE_NONE 0 139 137 #define KVM_GUEST_MODE_GUEST 1 140 138 #define KVM_GUEST_MODE_SKIP 2 139 + #define KVM_GUEST_MODE_GUEST_HV 3 140 + #define KVM_GUEST_MODE_HOST_HV 4 141 141 142 142 #define KVM_INST_FETCH_FAILED -1 143 143
+19 -213
arch/powerpc/include/asm/kvm_book3s.h
··· 58 58 struct hlist_node list_pte_long; 59 59 struct hlist_node list_vpte; 60 60 struct hlist_node list_vpte_long; 61 + #ifdef CONFIG_PPC_BOOK3S_64 62 + struct hlist_node list_vpte_64k; 63 + #endif 61 64 struct rcu_head rcu_head; 62 65 u64 host_vpn; 63 66 u64 pfn; 64 67 ulong slot; 65 68 struct kvmppc_pte pte; 69 + int pagesize; 66 70 }; 67 71 68 72 struct kvmppc_vcpu_book3s { 69 - struct kvm_vcpu vcpu; 70 - struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; 71 73 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 72 74 struct { 73 75 u64 esid; ··· 101 99 struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; 102 100 struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; 103 101 struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; 102 + #ifdef CONFIG_PPC_BOOK3S_64 103 + struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K]; 104 + #endif 104 105 int hpte_cache_count; 105 106 spinlock_t mmu_lock; 106 107 }; ··· 112 107 #define CONTEXT_GUEST 1 113 108 #define CONTEXT_GUEST_END 2 114 109 115 - #define VSID_REAL 0x0fffffffffc00000ULL 116 - #define VSID_BAT 0x0fffffffffb00000ULL 110 + #define VSID_REAL 0x07ffffffffc00000ULL 111 + #define VSID_BAT 0x07ffffffffb00000ULL 112 + #define VSID_64K 0x0800000000000000ULL 117 113 #define VSID_1T 0x1000000000000000ULL 118 114 #define VSID_REAL_DR 0x2000000000000000ULL 119 115 #define VSID_REAL_IR 0x4000000000000000ULL ··· 124 118 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); 125 119 extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); 126 120 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); 127 - extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr); 128 121 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); 129 122 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); 130 123 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); 131 - extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 124 + extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, 125 + bool iswrite); 126 + extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); 132 127 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 133 128 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size); 134 129 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); ··· 141 134 142 135 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 143 136 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); 137 + extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte); 144 138 extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); 145 139 extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); 146 140 extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); ··· 159 151 bool upper, u32 val); 160 152 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 161 153 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); 162 - extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); 154 + extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, 155 + bool *writable); 163 156 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 164 157 unsigned long *rmap, long pte_index, int realmode); 165 158 extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, ··· 181 172 unsigned long *hpret); 182 173 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 183 174 struct kvm_memory_slot *memslot, unsigned long *map); 175 + extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, 176 + unsigned long mask); 184 177 185 178 extern void kvmppc_entry_trampoline(void); 186 179 extern void kvmppc_hv_entry_trampoline(void); ··· 195 184 196 185 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 197 186 { 198 - return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu); 187 + return vcpu->arch.book3s; 199 188 } 200 - 201 - extern void kvm_return_point(void); 202 189 203 190 /* Also add subarch specific defines */ 204 191 ··· 206 197 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 207 198 #include <asm/kvm_book3s_64.h> 208 199 #endif 209 - 210 - #ifdef CONFIG_KVM_BOOK3S_PR 211 - 212 - static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 213 - { 214 - return to_book3s(vcpu)->hior; 215 - } 216 - 217 - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 218 - unsigned long pending_now, unsigned long old_pending) 219 - { 220 - if (pending_now) 221 - vcpu->arch.shared->int_pending = 1; 222 - else if (old_pending) 223 - vcpu->arch.shared->int_pending = 0; 224 - } 225 - 226 - static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 227 - { 228 - if ( num < 14 ) { 229 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 230 - svcpu->gpr[num] = val; 231 - svcpu_put(svcpu); 232 - to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; 233 - } else 234 - vcpu->arch.gpr[num] = val; 235 - } 236 - 237 - static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 238 - { 239 - if ( num < 14 ) { 240 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 241 - ulong r = svcpu->gpr[num]; 242 - svcpu_put(svcpu); 243 - return r; 244 - } else 245 - return vcpu->arch.gpr[num]; 246 - } 247 - 248 - static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 249 - { 250 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 251 - svcpu->cr = val; 252 - svcpu_put(svcpu); 253 - to_book3s(vcpu)->shadow_vcpu->cr = val; 254 - } 255 - 256 - static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 257 - { 258 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 259 - u32 r; 260 - r = svcpu->cr; 261 - svcpu_put(svcpu); 262 - return r; 263 - } 264 - 265 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 266 - { 267 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 268 - svcpu->xer = val; 269 - to_book3s(vcpu)->shadow_vcpu->xer = val; 270 - svcpu_put(svcpu); 271 - } 272 - 273 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 274 - { 275 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 276 - u32 r; 277 - r = svcpu->xer; 278 - svcpu_put(svcpu); 279 - return r; 280 - } 281 - 282 - static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) 283 - { 284 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 285 - svcpu->ctr = val; 286 - svcpu_put(svcpu); 287 - } 288 - 289 - static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) 290 - { 291 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 292 - ulong r; 293 - r = svcpu->ctr; 294 - svcpu_put(svcpu); 295 - return r; 296 - } 297 - 298 - static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) 299 - { 300 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 301 - svcpu->lr = val; 302 - svcpu_put(svcpu); 303 - } 304 - 305 - static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) 306 - { 307 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 308 - ulong r; 309 - r = svcpu->lr; 310 - svcpu_put(svcpu); 311 - return r; 312 - } 313 - 314 - static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) 315 - { 316 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 317 - svcpu->pc = val; 318 - svcpu_put(svcpu); 319 - } 320 - 321 - static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) 322 - { 323 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 324 - ulong r; 325 - r = svcpu->pc; 326 - svcpu_put(svcpu); 327 - return r; 328 - } 329 - 330 - static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) 331 - { 332 - ulong pc = kvmppc_get_pc(vcpu); 333 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 334 - u32 r; 335 - 336 - /* Load the instruction manually if it failed to do so in the 337 - * exit path */ 338 - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) 339 - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); 340 - 341 - r = svcpu->last_inst; 342 - svcpu_put(svcpu); 343 - return r; 344 - } 345 - 346 - /* 347 - * Like kvmppc_get_last_inst(), but for fetching a sc instruction. 348 - * Because the sc instruction sets SRR0 to point to the following 349 - * instruction, we have to fetch from pc - 4. 350 - */ 351 - static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) 352 - { 353 - ulong pc = kvmppc_get_pc(vcpu) - 4; 354 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 355 - u32 r; 356 - 357 - /* Load the instruction manually if it failed to do so in the 358 - * exit path */ 359 - if (svcpu->last_inst == KVM_INST_FETCH_FAILED) 360 - kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); 361 - 362 - r = svcpu->last_inst; 363 - svcpu_put(svcpu); 364 - return r; 365 - } 366 - 367 - static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) 368 - { 369 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 370 - ulong r; 371 - r = svcpu->fault_dar; 372 - svcpu_put(svcpu); 373 - return r; 374 - } 375 - 376 - static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 377 - { 378 - ulong crit_raw = vcpu->arch.shared->critical; 379 - ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); 380 - bool crit; 381 - 382 - /* Truncate crit indicators in 32 bit mode */ 383 - if (!(vcpu->arch.shared->msr & MSR_SF)) { 384 - crit_raw &= 0xffffffff; 385 - crit_r1 &= 0xffffffff; 386 - } 387 - 388 - /* Critical section when crit == r1 */ 389 - crit = (crit_raw == crit_r1); 390 - /* ... and we're in supervisor mode */ 391 - crit = crit && !(vcpu->arch.shared->msr & MSR_PR); 392 - 393 - return crit; 394 - } 395 - #else /* CONFIG_KVM_BOOK3S_PR */ 396 - 397 - static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 398 - { 399 - return 0; 400 - } 401 - 402 - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 403 - unsigned long pending_now, unsigned long old_pending) 404 - { 405 - } 406 200 407 201 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 408 202 { ··· 300 488 { 301 489 return vcpu->arch.fault_dar; 302 490 } 303 - 304 - static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 305 - { 306 - return false; 307 - } 308 - #endif 309 491 310 492 /* Magic register values loaded into r3 and r4 before the 'sc' assembly 311 493 * instruction for the OSI hypercalls */
+1 -1
arch/powerpc/include/asm/kvm_book3s_32.h
··· 22 22 23 23 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 24 24 { 25 - return to_book3s(vcpu)->shadow_vcpu; 25 + return vcpu->arch.shadow_vcpu; 26 26 } 27 27 28 28 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
+4 -4
arch/powerpc/include/asm/kvm_book3s_64.h
··· 20 20 #ifndef __ASM_KVM_BOOK3S_64_H__ 21 21 #define __ASM_KVM_BOOK3S_64_H__ 22 22 23 - #ifdef CONFIG_KVM_BOOK3S_PR 23 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 24 24 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) 25 25 { 26 26 preempt_disable(); ··· 35 35 36 36 #define SPAPR_TCE_SHIFT 12 37 37 38 - #ifdef CONFIG_KVM_BOOK3S_64_HV 38 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 39 39 #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 40 40 extern unsigned long kvm_rma_pages; 41 41 #endif ··· 278 278 (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); 279 279 } 280 280 281 - #ifdef CONFIG_KVM_BOOK3S_64_HV 281 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 282 282 /* 283 283 * Note modification of an HPTE; set the HPTE modified bit 284 284 * if anyone is interested. ··· 289 289 if (atomic_read(&kvm->arch.hpte_mod_interest)) 290 290 rev->guest_rpte |= HPTE_GR_MODIFIED; 291 291 } 292 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 292 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 293 293 294 294 #endif /* __ASM_KVM_BOOK3S_64_H__ */
+5 -4
arch/powerpc/include/asm/kvm_book3s_asm.h
··· 83 83 u8 restore_hid5; 84 84 u8 napping; 85 85 86 - #ifdef CONFIG_KVM_BOOK3S_64_HV 86 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 87 87 u8 hwthread_req; 88 88 u8 hwthread_state; 89 89 u8 host_ipi; ··· 101 101 #endif 102 102 #ifdef CONFIG_PPC_BOOK3S_64 103 103 u64 cfar; 104 + u64 ppr; 104 105 #endif 105 106 }; 106 107 ··· 109 108 ulong gpr[14]; 110 109 u32 cr; 111 110 u32 xer; 112 - 113 - u32 fault_dsisr; 114 - u32 last_inst; 115 111 ulong ctr; 116 112 ulong lr; 117 113 ulong pc; 114 + 118 115 ulong shadow_srr1; 119 116 ulong fault_dar; 117 + u32 fault_dsisr; 118 + u32 last_inst; 120 119 121 120 #ifdef CONFIG_PPC_BOOK3S_32 122 121 u32 sr[16]; /* Guest SRs */
+6 -1
arch/powerpc/include/asm/kvm_booke.h
··· 26 26 /* LPIDs we support with this build -- runtime limit may be lower */ 27 27 #define KVMPPC_NR_LPIDS 64 28 28 29 - #define KVMPPC_INST_EHPRIV 0x7c00021c 29 + #define KVMPPC_INST_EHPRIV 0x7c00021c 30 + #define EHPRIV_OC_SHIFT 11 31 + /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */ 32 + #define EHPRIV_OC_DEBUG 1 33 + #define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \ 34 + (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT)) 30 35 31 36 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 32 37 {
+34 -23
arch/powerpc/include/asm/kvm_host.h
··· 63 63 64 64 #endif 65 65 66 - /* We don't currently support large pages. */ 67 - #define KVM_HPAGE_GFN_SHIFT(x) 0 68 - #define KVM_NR_PAGE_SIZES 1 69 - #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) 70 - 71 66 #define HPTEG_CACHE_NUM (1 << 15) 72 67 #define HPTEG_HASH_BITS_PTE 13 73 68 #define HPTEG_HASH_BITS_PTE_LONG 12 74 69 #define HPTEG_HASH_BITS_VPTE 13 75 70 #define HPTEG_HASH_BITS_VPTE_LONG 5 71 + #define HPTEG_HASH_BITS_VPTE_64K 11 76 72 #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) 77 73 #define HPTEG_HASH_NUM_PTE_LONG (1 << HPTEG_HASH_BITS_PTE_LONG) 78 74 #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) 79 75 #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) 76 + #define HPTEG_HASH_NUM_VPTE_64K (1 << HPTEG_HASH_BITS_VPTE_64K) 80 77 81 78 /* Physical Address Mask - allowed range of real mode RAM access */ 82 79 #define KVM_PAM 0x0fffffffffffffffULL ··· 85 88 struct lppaca; 86 89 struct slb_shadow; 87 90 struct dtl_entry; 91 + 92 + struct kvmppc_vcpu_book3s; 93 + struct kvmppc_book3s_shadow_vcpu; 88 94 89 95 struct kvm_vm_stat { 90 96 u32 remote_tlb_flush; ··· 224 224 #define KVMPPC_GOT_PAGE 0x80 225 225 226 226 struct kvm_arch_memory_slot { 227 - #ifdef CONFIG_KVM_BOOK3S_64_HV 227 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 228 228 unsigned long *rmap; 229 229 unsigned long *slot_phys; 230 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 230 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 231 231 }; 232 232 233 233 struct kvm_arch { 234 234 unsigned int lpid; 235 - #ifdef CONFIG_KVM_BOOK3S_64_HV 235 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 236 236 unsigned long hpt_virt; 237 237 struct revmap_entry *revmap; 238 238 unsigned int host_lpid; ··· 256 256 cpumask_t need_tlb_flush; 257 257 struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; 258 258 int hpt_cma_alloc; 259 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 259 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 260 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 261 + struct mutex hpt_mutex; 262 + #endif 260 263 #ifdef CONFIG_PPC_BOOK3S_64 261 264 struct list_head spapr_tce_tables; 262 265 struct list_head rtas_tokens; ··· 270 267 #ifdef CONFIG_KVM_XICS 271 268 struct kvmppc_xics *xics; 272 269 #endif 270 + struct kvmppc_ops *kvm_ops; 273 271 }; 274 272 275 273 /* ··· 298 294 u64 stolen_tb; 299 295 u64 preempt_tb; 300 296 struct kvm_vcpu *runner; 297 + u64 tb_offset; /* guest timebase - host timebase */ 298 + ulong lpcr; 299 + u32 arch_compat; 300 + ulong pcr; 301 301 }; 302 302 303 303 #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) ··· 336 328 bool may_read : 1; 337 329 bool may_write : 1; 338 330 bool may_execute : 1; 331 + u8 page_size; /* MMU_PAGE_xxx */ 339 332 }; 340 333 341 334 struct kvmppc_mmu { ··· 349 340 /* book3s */ 350 341 void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value); 351 342 u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum); 352 - int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); 343 + int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, 344 + struct kvmppc_pte *pte, bool data, bool iswrite); 353 345 void (*reset_msr)(struct kvm_vcpu *vcpu); 354 346 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); 355 347 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); ··· 370 360 bool large : 1; /* PTEs are 16MB */ 371 361 bool tb : 1; /* 1TB segment */ 372 362 bool class : 1; 363 + u8 base_page_size; /* MMU_PAGE_xxx */ 373 364 }; 374 365 375 366 # ifdef CONFIG_PPC_FSL_BOOK3E ··· 388 377 #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ 389 378 #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ 390 379 391 - struct kvmppc_booke_debug_reg { 392 - u32 dbcr0; 393 - u32 dbcr1; 394 - u32 dbcr2; 395 - #ifdef CONFIG_KVM_E500MC 396 - u32 dbcr4; 397 - #endif 398 - u64 iac[KVMPPC_BOOKE_MAX_IAC]; 399 - u64 dac[KVMPPC_BOOKE_MAX_DAC]; 400 - }; 401 - 402 380 #define KVMPPC_IRQ_DEFAULT 0 403 381 #define KVMPPC_IRQ_MPIC 1 404 382 #define KVMPPC_IRQ_XICS 2 ··· 402 402 int slb_max; /* 1 + index of last valid entry in slb[] */ 403 403 int slb_nr; /* total number of entries in SLB */ 404 404 struct kvmppc_mmu mmu; 405 + struct kvmppc_vcpu_book3s *book3s; 406 + #endif 407 + #ifdef CONFIG_PPC_BOOK3S_32 408 + struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; 405 409 #endif 406 410 407 411 ulong gpr[32]; ··· 467 463 u32 ctrl; 468 464 ulong dabr; 469 465 ulong cfar; 466 + ulong ppr; 467 + ulong shadow_srr1; 470 468 #endif 471 469 u32 vrsave; /* also USPRG0 */ 472 470 u32 mmucr; ··· 504 498 505 499 u64 mmcr[3]; 506 500 u32 pmc[8]; 501 + u64 siar; 502 + u64 sdar; 507 503 508 504 #ifdef CONFIG_KVM_EXIT_TIMING 509 505 struct mutex exit_timing_lock; ··· 539 531 u32 eptcfg; 540 532 u32 epr; 541 533 u32 crit_save; 542 - struct kvmppc_booke_debug_reg dbg_reg; 534 + /* guest debug registers*/ 535 + struct debug_reg dbg_reg; 536 + /* hardware visible debug registers when in guest state */ 537 + struct debug_reg shadow_dbg_reg; 543 538 #endif 544 539 gpa_t paddr_accessed; 545 540 gva_t vaddr_accessed; ··· 593 582 struct kvmppc_icp *icp; /* XICS presentation controller */ 594 583 #endif 595 584 596 - #ifdef CONFIG_KVM_BOOK3S_64_HV 585 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 597 586 struct kvm_vcpu_arch_shared shregs; 598 587 599 588 unsigned long pgfault_addr;
+79 -28
arch/powerpc/include/asm/kvm_ppc.h
··· 106 106 struct kvm_interrupt *irq); 107 107 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); 108 108 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); 109 - 110 - extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 111 - unsigned int op, int *advance); 112 - extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, 113 - ulong val); 114 - extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, 115 - ulong *val); 116 109 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); 117 110 118 111 extern int kvmppc_booke_init(void); ··· 128 135 struct kvm_create_spapr_tce *args); 129 136 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 130 137 unsigned long ioba, unsigned long tce); 131 - extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, 132 - struct kvm_allocate_rma *rma); 133 138 extern struct kvm_rma_info *kvm_alloc_rma(void); 134 139 extern void kvm_release_rma(struct kvm_rma_info *ri); 135 140 extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 136 141 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); 137 142 extern int kvmppc_core_init_vm(struct kvm *kvm); 138 143 extern void kvmppc_core_destroy_vm(struct kvm *kvm); 139 - extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 144 + extern void kvmppc_core_free_memslot(struct kvm *kvm, 145 + struct kvm_memory_slot *free, 140 146 struct kvm_memory_slot *dont); 141 - extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 147 + extern int kvmppc_core_create_memslot(struct kvm *kvm, 148 + struct kvm_memory_slot *slot, 142 149 unsigned long npages); 143 150 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, 144 151 struct kvm_memory_slot *memslot, ··· 169 176 u32 *priority); 170 177 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); 171 178 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); 179 + 180 + union kvmppc_one_reg { 181 + u32 wval; 182 + u64 dval; 183 + vector128 vval; 184 + u64 vsxval[2]; 185 + struct { 186 + u64 addr; 187 + u64 length; 188 + } vpaval; 189 + }; 190 + 191 + struct kvmppc_ops { 192 + struct module *owner; 193 + int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 194 + int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 195 + int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id, 196 + union kvmppc_one_reg *val); 197 + int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id, 198 + union kvmppc_one_reg *val); 199 + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); 200 + void (*vcpu_put)(struct kvm_vcpu *vcpu); 201 + void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr); 202 + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); 203 + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id); 204 + void (*vcpu_free)(struct kvm_vcpu *vcpu); 205 + int (*check_requests)(struct kvm_vcpu *vcpu); 206 + int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log); 207 + void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); 208 + int (*prepare_memory_region)(struct kvm *kvm, 209 + struct kvm_memory_slot *memslot, 210 + struct kvm_userspace_memory_region *mem); 211 + void (*commit_memory_region)(struct kvm *kvm, 212 + struct kvm_userspace_memory_region *mem, 213 + const struct kvm_memory_slot *old); 214 + int (*unmap_hva)(struct kvm *kvm, unsigned long hva); 215 + int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, 216 + unsigned long end); 217 + int (*age_hva)(struct kvm *kvm, unsigned long hva); 218 + int (*test_age_hva)(struct kvm *kvm, unsigned long hva); 219 + void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte); 220 + void (*mmu_destroy)(struct kvm_vcpu *vcpu); 221 + void (*free_memslot)(struct kvm_memory_slot *free, 222 + struct kvm_memory_slot *dont); 223 + int (*create_memslot)(struct kvm_memory_slot *slot, 224 + unsigned long npages); 225 + int (*init_vm)(struct kvm *kvm); 226 + void (*destroy_vm)(struct kvm *kvm); 227 + int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); 228 + int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, 229 + unsigned int inst, int *advance); 230 + int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); 231 + int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); 232 + void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu); 233 + long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, 234 + unsigned long arg); 235 + 236 + }; 237 + 238 + extern struct kvmppc_ops *kvmppc_hv_ops; 239 + extern struct kvmppc_ops *kvmppc_pr_ops; 240 + 241 + static inline bool is_kvmppc_hv_enabled(struct kvm *kvm) 242 + { 243 + return kvm->arch.kvm_ops == kvmppc_hv_ops; 244 + } 172 245 173 246 /* 174 247 * Cuts out inst bits with ordering according to spec. ··· 269 210 return r; 270 211 } 271 212 272 - union kvmppc_one_reg { 273 - u32 wval; 274 - u64 dval; 275 - vector128 vval; 276 - u64 vsxval[2]; 277 - struct { 278 - u64 addr; 279 - u64 length; 280 - } vpaval; 281 - }; 282 - 283 213 #define one_reg_size(id) \ 284 214 (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) 285 215 ··· 293 245 __v; \ 294 246 }) 295 247 296 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 248 + int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 297 249 int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 298 250 299 - void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 251 + int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 300 252 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 301 253 302 254 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); ··· 308 260 309 261 struct openpic; 310 262 311 - #ifdef CONFIG_KVM_BOOK3S_64_HV 263 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 312 264 extern void kvm_cma_reserve(void) __init; 313 265 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 314 266 { ··· 317 269 318 270 static inline u32 kvmppc_get_xics_latch(void) 319 271 { 320 - u32 xirr = get_paca()->kvm_hstate.saved_xirr; 272 + u32 xirr; 321 273 274 + xirr = get_paca()->kvm_hstate.saved_xirr; 322 275 get_paca()->kvm_hstate.saved_xirr = 0; 323 - 324 276 return xirr; 325 277 } 326 278 ··· 329 281 paca[cpu].kvm_hstate.host_ipi = host_ipi; 330 282 } 331 283 332 - extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); 284 + static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 285 + { 286 + vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); 287 + } 333 288 334 289 #else 335 290 static inline void __init kvm_cma_reserve(void)
+1 -1
arch/powerpc/include/asm/paca.h
··· 166 166 struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ 167 167 168 168 #ifdef CONFIG_KVM_BOOK3S_HANDLER 169 - #ifdef CONFIG_KVM_BOOK3S_PR 169 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 170 170 /* We use this to store guest state in */ 171 171 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 172 172 #endif
+2
arch/powerpc/include/asm/processor.h
··· 208 208 209 209 struct thread_struct { 210 210 unsigned long ksp; /* Kernel stack pointer */ 211 + 211 212 #ifdef CONFIG_PPC64 212 213 unsigned long ksp_vsid; 213 214 #endif ··· 222 221 void *pgdir; /* root of page-table tree */ 223 222 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ 224 223 #endif 224 + /* Debug Registers */ 225 225 struct debug_reg debug; 226 226 struct thread_fp_state fp_state; 227 227 struct thread_fp_state *fp_save_area;
+1 -1
arch/powerpc/include/asm/pte-book3e.h
··· 40 40 #define _PAGE_U1 0x010000 41 41 #define _PAGE_U0 0x020000 42 42 #define _PAGE_ACCESSED 0x040000 43 - #define _PAGE_LENDIAN 0x080000 43 + #define _PAGE_ENDIAN 0x080000 44 44 #define _PAGE_GUARDED 0x100000 45 45 #define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */ 46 46 #define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
+15
arch/powerpc/include/asm/reg.h
··· 248 248 #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ 249 249 #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ 250 250 #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ 251 + #define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */ 251 252 #define SPRN_SPURR 0x134 /* Scaled PURR */ 252 253 #define SPRN_HSPRG0 0x130 /* Hypervisor Scratch 0 */ 253 254 #define SPRN_HSPRG1 0x131 /* Hypervisor Scratch 1 */ ··· 289 288 #define LPCR_ISL (1ul << (63-2)) 290 289 #define LPCR_VC_SH (63-2) 291 290 #define LPCR_DPFD_SH (63-11) 291 + #define LPCR_DPFD (7ul << LPCR_DPFD_SH) 292 292 #define LPCR_VRMASD (0x1ful << (63-16)) 293 293 #define LPCR_VRMA_L (1ul << (63-12)) 294 294 #define LPCR_VRMA_LP0 (1ul << (63-15)) ··· 306 304 #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ 307 305 #define LPCR_MER 0x00000800 /* Mediated External Exception */ 308 306 #define LPCR_MER_SH 11 307 + #define LPCR_TC 0x00000200 /* Translation control */ 309 308 #define LPCR_LPES 0x0000000c 310 309 #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ 311 310 #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ ··· 319 316 #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 320 317 #define SPRN_HMER 0x150 /* Hardware m? error recovery */ 321 318 #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 319 + #define SPRN_PCR 0x152 /* Processor compatibility register */ 320 + #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 321 + #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ 322 + #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ 322 323 #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ 323 324 #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ 324 325 #define SPRN_TLBVPNR 0x155 /* P7 TLB control register */ ··· 432 425 #define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ 433 426 #define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ 434 427 #define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ 428 + #define HID4_RMOR (0xFFFFul << HID4_RMOR_SH) 435 429 #define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ 436 430 #define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ 437 431 #define HID4_LPID1_SH 0 /* partition ID top 2 bits */ ··· 1114 1106 #define PVR_POWER8 0x004D 1115 1107 #define PVR_BE 0x0070 1116 1108 #define PVR_PA6T 0x0090 1109 + 1110 + /* "Logical" PVR values defined in PAPR, representing architecture levels */ 1111 + #define PVR_ARCH_204 0x0f000001 1112 + #define PVR_ARCH_205 0x0f000002 1113 + #define PVR_ARCH_206 0x0f000003 1114 + #define PVR_ARCH_206p 0x0f100003 1115 + #define PVR_ARCH_207 0x0f000004 1117 1116 1118 1117 /* Macros for setting and retrieving special purpose registers */ 1119 1118 #ifndef __ASSEMBLY__
+82 -4
arch/powerpc/include/uapi/asm/kvm.h
··· 27 27 #define __KVM_HAVE_PPC_SMT 28 28 #define __KVM_HAVE_IRQCHIP 29 29 #define __KVM_HAVE_IRQ_LINE 30 + #define __KVM_HAVE_GUEST_DEBUG 30 31 31 32 struct kvm_regs { 32 33 __u64 pc; ··· 270 269 __u64 fpr[32]; 271 270 }; 272 271 272 + /* 273 + * Defines for h/w breakpoint, watchpoint (read, write or both) and 274 + * software breakpoint. 275 + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status" 276 + * for KVM_DEBUG_EXIT. 277 + */ 278 + #define KVMPPC_DEBUG_NONE 0x0 279 + #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) 280 + #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) 281 + #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) 273 282 struct kvm_debug_exit_arch { 283 + __u64 address; 284 + /* 285 + * exiting to userspace because of h/w breakpoint, watchpoint 286 + * (read, write or both) and software breakpoint. 287 + */ 288 + __u32 status; 289 + __u32 reserved; 274 290 }; 275 291 276 292 /* for KVM_SET_GUEST_DEBUG */ ··· 299 281 * Type denotes h/w breakpoint, read watchpoint, write 300 282 * watchpoint or watchpoint (both read and write). 301 283 */ 302 - #define KVMPPC_DEBUG_NONE 0x0 303 - #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) 304 - #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) 305 - #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) 306 284 __u32 type; 307 285 __u32 reserved; 308 286 } bp[16]; ··· 443 429 #define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) 444 430 #define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) 445 431 #define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) 432 + #define KVM_REG_PPC_MMCR2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13) 433 + #define KVM_REG_PPC_MMCRS (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14) 434 + #define KVM_REG_PPC_SIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15) 435 + #define KVM_REG_PPC_SDAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16) 436 + #define KVM_REG_PPC_SIER (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17) 446 437 447 438 #define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) 448 439 #define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) ··· 517 498 #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) 518 499 #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) 519 500 #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) 501 + 502 + /* Timebase offset */ 503 + #define KVM_REG_PPC_TB_OFFSET (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c) 504 + 505 + /* POWER8 registers */ 506 + #define KVM_REG_PPC_SPMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d) 507 + #define KVM_REG_PPC_SPMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e) 508 + #define KVM_REG_PPC_IAMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f) 509 + #define KVM_REG_PPC_TFHAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0) 510 + #define KVM_REG_PPC_TFIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1) 511 + #define KVM_REG_PPC_TEXASR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2) 512 + #define KVM_REG_PPC_FSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3) 513 + #define KVM_REG_PPC_PSPB (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4) 514 + #define KVM_REG_PPC_EBBHR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5) 515 + #define KVM_REG_PPC_EBBRR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6) 516 + #define KVM_REG_PPC_BESCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7) 517 + #define KVM_REG_PPC_TAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8) 518 + #define KVM_REG_PPC_DPDES (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9) 519 + #define KVM_REG_PPC_DAWR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa) 520 + #define KVM_REG_PPC_DAWRX (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab) 521 + #define KVM_REG_PPC_CIABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac) 522 + #define KVM_REG_PPC_IC (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad) 523 + #define KVM_REG_PPC_VTB (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae) 524 + #define KVM_REG_PPC_CSIGR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf) 525 + #define KVM_REG_PPC_TACR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0) 526 + #define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) 527 + #define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) 528 + #define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) 529 + 530 + #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) 531 + #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) 532 + #define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6) 533 + 534 + /* Architecture compatibility level */ 535 + #define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) 536 + 537 + /* Transactional Memory checkpointed state: 538 + * This is all GPRs, all VSX regs and a subset of SPRs 539 + */ 540 + #define KVM_REG_PPC_TM (KVM_REG_PPC | 0x80000000) 541 + /* TM GPRs */ 542 + #define KVM_REG_PPC_TM_GPR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0) 543 + #define KVM_REG_PPC_TM_GPR(n) (KVM_REG_PPC_TM_GPR0 + (n)) 544 + #define KVM_REG_PPC_TM_GPR31 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f) 545 + /* TM VSX */ 546 + #define KVM_REG_PPC_TM_VSR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20) 547 + #define KVM_REG_PPC_TM_VSR(n) (KVM_REG_PPC_TM_VSR0 + (n)) 548 + #define KVM_REG_PPC_TM_VSR63 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f) 549 + /* TM SPRS */ 550 + #define KVM_REG_PPC_TM_CR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60) 551 + #define KVM_REG_PPC_TM_LR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61) 552 + #define KVM_REG_PPC_TM_CTR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62) 553 + #define KVM_REG_PPC_TM_FPSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63) 554 + #define KVM_REG_PPC_TM_AMR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64) 555 + #define KVM_REG_PPC_TM_PPR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65) 556 + #define KVM_REG_PPC_TM_VRSAVE (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66) 557 + #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) 558 + #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) 559 + #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) 520 560 521 561 /* PPC64 eXternal Interrupt Controller Specification */ 522 562 #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+14 -7
arch/powerpc/kernel/asm-offsets.c
··· 439 439 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 440 440 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 441 441 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 442 - #ifdef CONFIG_KVM_BOOK3S_64_HV 442 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 443 443 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); 444 444 DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); 445 445 DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); ··· 470 470 DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); 471 471 472 472 /* book3s */ 473 - #ifdef CONFIG_KVM_BOOK3S_64_HV 473 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 474 474 DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); 475 475 DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); 476 476 DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); ··· 502 502 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); 503 503 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 504 504 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 505 + DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); 506 + DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); 505 507 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 506 508 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); 507 509 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); ··· 513 511 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 514 512 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); 515 513 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); 514 + DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); 515 + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 516 516 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 517 517 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); 518 518 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 519 519 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 520 - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - 521 - offsetof(struct kvmppc_vcpu_book3s, vcpu)); 520 + DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); 521 + DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); 522 + DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); 522 523 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 523 524 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); 524 525 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); 525 526 526 527 #ifdef CONFIG_PPC_BOOK3S_64 527 - #ifdef CONFIG_KVM_BOOK3S_PR 528 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 529 + DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); 528 530 # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) 529 531 #else 530 532 # define SVCPU_FIELD(x, f) ··· 580 574 HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); 581 575 HSTATE_FIELD(HSTATE_NAPPING, napping); 582 576 583 - #ifdef CONFIG_KVM_BOOK3S_64_HV 577 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 584 578 HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); 585 579 HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); 586 580 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); ··· 596 590 HSTATE_FIELD(HSTATE_DABR, dabr); 597 591 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 598 592 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 599 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 593 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 600 594 601 595 #ifdef CONFIG_PPC_BOOK3S_64 602 596 HSTATE_FIELD(HSTATE_CFAR, cfar); 597 + HSTATE_FIELD(HSTATE_PPR, ppr); 603 598 #endif /* CONFIG_PPC_BOOK3S_64 */ 604 599 605 600 #else /* CONFIG_PPC_BOOK3S */
+28 -2
arch/powerpc/kernel/exceptions-64s.S
··· 126 126 bgt cr1,. 127 127 GET_PACA(r13) 128 128 129 - #ifdef CONFIG_KVM_BOOK3S_64_HV 129 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 130 130 li r0,KVM_HWTHREAD_IN_KERNEL 131 131 stb r0,HSTATE_HWTHREAD_STATE(r13) 132 132 /* Order setting hwthread_state vs. testing hwthread_req */ ··· 425 425 mfspr r9,SPRN_DSISR 426 426 srdi r10,r10,60 427 427 rlwimi r10,r9,16,0x20 428 - #ifdef CONFIG_KVM_BOOK3S_PR 428 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 429 429 lbz r9,HSTATE_IN_GUEST(r13) 430 430 rlwimi r10,r9,8,0x300 431 431 #endif ··· 649 649 rfid 650 650 b . /* prevent spec. execution */ 651 651 #endif /* __DISABLED__ */ 652 + 653 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 654 + kvmppc_skip_interrupt: 655 + /* 656 + * Here all GPRs are unchanged from when the interrupt happened 657 + * except for r13, which is saved in SPRG_SCRATCH0. 658 + */ 659 + mfspr r13, SPRN_SRR0 660 + addi r13, r13, 4 661 + mtspr SPRN_SRR0, r13 662 + GET_SCRATCH0(r13) 663 + rfid 664 + b . 665 + 666 + kvmppc_skip_Hinterrupt: 667 + /* 668 + * Here all GPRs are unchanged from when the interrupt happened 669 + * except for r13, which is saved in SPRG_SCRATCH0. 670 + */ 671 + mfspr r13, SPRN_HSRR0 672 + addi r13, r13, 4 673 + mtspr SPRN_HSRR0, r13 674 + GET_SCRATCH0(r13) 675 + hrfid 676 + b . 677 + #endif 652 678 653 679 /* 654 680 * Code from here down to __end_handlers is invoked from the
+1 -1
arch/powerpc/kernel/idle_power7.S
··· 84 84 std r9,_MSR(r1) 85 85 std r1,PACAR1(r13) 86 86 87 - #ifdef CONFIG_KVM_BOOK3S_64_HV 87 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 88 88 /* Tell KVM we're napping */ 89 89 li r4,KVM_HWTHREAD_IN_NAP 90 90 stb r4,HSTATE_HWTHREAD_STATE(r13)
+1 -1
arch/powerpc/kernel/traps.c
··· 1529 1529 * back on or not. 1530 1530 */ 1531 1531 if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, 1532 - current->thread.debug.dbcr1)) 1532 + current->thread.debug.dbcr1)) 1533 1533 regs->msr |= MSR_DE; 1534 1534 else 1535 1535 /* Make sure the IDM flag is off */
+43 -15
arch/powerpc/kvm/44x.c
··· 31 31 #include "44x_tlb.h" 32 32 #include "booke.h" 33 33 34 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 34 + static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu) 35 35 { 36 36 kvmppc_booke_vcpu_load(vcpu, cpu); 37 37 kvmppc_44x_tlb_load(vcpu); 38 38 } 39 39 40 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 40 + static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu) 41 41 { 42 42 kvmppc_44x_tlb_put(vcpu); 43 43 kvmppc_booke_vcpu_put(vcpu); ··· 114 114 return 0; 115 115 } 116 116 117 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 117 + static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu, 118 + struct kvm_sregs *sregs) 118 119 { 119 - kvmppc_get_sregs_ivor(vcpu, sregs); 120 + return kvmppc_get_sregs_ivor(vcpu, sregs); 120 121 } 121 122 122 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 123 + static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu, 124 + struct kvm_sregs *sregs) 123 125 { 124 126 return kvmppc_set_sregs_ivor(vcpu, sregs); 125 127 } 126 128 127 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 128 - union kvmppc_one_reg *val) 129 + static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, 130 + union kvmppc_one_reg *val) 129 131 { 130 132 return -EINVAL; 131 133 } 132 134 133 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 134 - union kvmppc_one_reg *val) 135 + static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id, 136 + union kvmppc_one_reg *val) 135 137 { 136 138 return -EINVAL; 137 139 } 138 140 139 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 141 + static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm, 142 + unsigned int id) 140 143 { 141 144 struct kvmppc_vcpu_44x *vcpu_44x; 142 145 struct kvm_vcpu *vcpu; ··· 170 167 return ERR_PTR(err); 171 168 } 172 169 173 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 170 + static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu) 174 171 { 175 172 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 176 173 ··· 179 176 kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 180 177 } 181 178 182 - int kvmppc_core_init_vm(struct kvm *kvm) 179 + static int kvmppc_core_init_vm_44x(struct kvm *kvm) 183 180 { 184 181 return 0; 185 182 } 186 183 187 - void kvmppc_core_destroy_vm(struct kvm *kvm) 184 + static void kvmppc_core_destroy_vm_44x(struct kvm *kvm) 188 185 { 189 186 } 187 + 188 + static struct kvmppc_ops kvm_ops_44x = { 189 + .get_sregs = kvmppc_core_get_sregs_44x, 190 + .set_sregs = kvmppc_core_set_sregs_44x, 191 + .get_one_reg = kvmppc_get_one_reg_44x, 192 + .set_one_reg = kvmppc_set_one_reg_44x, 193 + .vcpu_load = kvmppc_core_vcpu_load_44x, 194 + .vcpu_put = kvmppc_core_vcpu_put_44x, 195 + .vcpu_create = kvmppc_core_vcpu_create_44x, 196 + .vcpu_free = kvmppc_core_vcpu_free_44x, 197 + .mmu_destroy = kvmppc_mmu_destroy_44x, 198 + .init_vm = kvmppc_core_init_vm_44x, 199 + .destroy_vm = kvmppc_core_destroy_vm_44x, 200 + .emulate_op = kvmppc_core_emulate_op_44x, 201 + .emulate_mtspr = kvmppc_core_emulate_mtspr_44x, 202 + .emulate_mfspr = kvmppc_core_emulate_mfspr_44x, 203 + }; 190 204 191 205 static int __init kvmppc_44x_init(void) 192 206 { ··· 211 191 212 192 r = kvmppc_booke_init(); 213 193 if (r) 214 - return r; 194 + goto err_out; 215 195 216 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 196 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); 197 + if (r) 198 + goto err_out; 199 + kvm_ops_44x.owner = THIS_MODULE; 200 + kvmppc_pr_ops = &kvm_ops_44x; 201 + 202 + err_out: 203 + return r; 217 204 } 218 205 219 206 static void __exit kvmppc_44x_exit(void) 220 207 { 208 + kvmppc_pr_ops = NULL; 221 209 kvmppc_booke_exit(); 222 210 } 223 211
+4 -4
arch/powerpc/kvm/44x_emulate.c
··· 91 91 return EMULATE_DONE; 92 92 } 93 93 94 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 95 - unsigned int inst, int *advance) 94 + int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, 95 + unsigned int inst, int *advance) 96 96 { 97 97 int emulated = EMULATE_DONE; 98 98 int dcrn = get_dcrn(inst); ··· 152 152 return emulated; 153 153 } 154 154 155 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 155 + int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 156 156 { 157 157 int emulated = EMULATE_DONE; 158 158 ··· 172 172 return emulated; 173 173 } 174 174 175 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 175 + int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 176 176 { 177 177 int emulated = EMULATE_DONE; 178 178
+1 -1
arch/powerpc/kvm/44x_tlb.c
··· 268 268 trace_kvm_stlb_inval(stlb_index); 269 269 } 270 270 271 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 271 + void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu) 272 272 { 273 273 struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 274 274 int i;
+22 -6
arch/powerpc/kvm/Kconfig
··· 35 35 bool 36 36 select KVM_BOOK3S_HANDLER 37 37 38 - config KVM_BOOK3S_PR 38 + config KVM_BOOK3S_PR_POSSIBLE 39 39 bool 40 40 select KVM_MMIO 41 41 select MMU_NOTIFIER 42 + 43 + config KVM_BOOK3S_HV_POSSIBLE 44 + bool 42 45 43 46 config KVM_BOOK3S_32 44 47 tristate "KVM support for PowerPC book3s_32 processors" 45 48 depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT 46 49 select KVM 47 50 select KVM_BOOK3S_32_HANDLER 48 - select KVM_BOOK3S_PR 51 + select KVM_BOOK3S_PR_POSSIBLE 49 52 ---help--- 50 53 Support running unmodified book3s_32 guest kernels 51 54 in virtual machines on book3s_32 host processors. ··· 63 60 depends on PPC_BOOK3S_64 64 61 select KVM_BOOK3S_64_HANDLER 65 62 select KVM 63 + select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE 66 64 ---help--- 67 65 Support running unmodified book3s_64 and book3s_32 guest kernels 68 66 in virtual machines on book3s_64 host processors. ··· 74 70 If unsure, say N. 75 71 76 72 config KVM_BOOK3S_64_HV 77 - bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" 73 + tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 78 74 depends on KVM_BOOK3S_64 75 + select KVM_BOOK3S_HV_POSSIBLE 79 76 select MMU_NOTIFIER 80 77 select CMA 81 78 ---help--- ··· 95 90 If unsure, say N. 96 91 97 92 config KVM_BOOK3S_64_PR 98 - def_bool y 99 - depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV 100 - select KVM_BOOK3S_PR 93 + tristate "KVM support without using hypervisor mode in host" 94 + depends on KVM_BOOK3S_64 95 + select KVM_BOOK3S_PR_POSSIBLE 96 + ---help--- 97 + Support running guest kernels in virtual machines on processors 98 + without using hypervisor mode in the host, by running the 99 + guest in user mode (problem state) and emulating all 100 + privileged instructions and registers. 101 + 102 + This is not as fast as using hypervisor mode, but works on 103 + machines where hypervisor mode is not available or not usable, 104 + and can emulate processors that are different from the host 105 + processor, including emulating 32-bit processors on a 64-bit 106 + host. 101 107 102 108 config KVM_BOOKE_HV 103 109 bool
+22 -9
arch/powerpc/kvm/Makefile
··· 53 53 e500_emulate.o 54 54 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) 55 55 56 - kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 57 - $(KVM)/coalesced_mmio.o \ 56 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ 57 + book3s_64_vio_hv.o 58 + 59 + kvm-pr-y := \ 58 60 fpu.o \ 59 61 book3s_paired_singles.o \ 60 62 book3s_pr.o \ 61 63 book3s_pr_papr.o \ 62 - book3s_64_vio_hv.o \ 63 64 book3s_emulate.o \ 64 65 book3s_interrupts.o \ 65 66 book3s_mmu_hpte.o \ 66 67 book3s_64_mmu_host.o \ 67 68 book3s_64_mmu.o \ 68 69 book3s_32_mmu.o 69 - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ 70 - book3s_rmhandlers.o 71 70 72 - kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 71 + ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 72 + kvm-book3s_64-module-objs := \ 73 + $(KVM)/coalesced_mmio.o 74 + 75 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 76 + book3s_rmhandlers.o 77 + endif 78 + 79 + kvm-hv-y += \ 73 80 book3s_hv.o \ 74 81 book3s_hv_interrupts.o \ 75 82 book3s_64_mmu_hv.o 83 + 76 84 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 77 85 book3s_hv_rm_xics.o 78 - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ 86 + 87 + ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 88 + kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 79 89 book3s_hv_rmhandlers.o \ 80 90 book3s_hv_rm_mmu.o \ 81 - book3s_64_vio_hv.o \ 82 91 book3s_hv_ras.o \ 83 92 book3s_hv_builtin.o \ 84 93 book3s_hv_cma.o \ 85 94 $(kvm-book3s_64-builtin-xics-objs-y) 95 + endif 86 96 87 97 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 88 98 book3s_xics.o 89 99 90 - kvm-book3s_64-module-objs := \ 100 + kvm-book3s_64-module-objs += \ 91 101 $(KVM)/kvm_main.o \ 92 102 $(KVM)/eventfd.o \ 93 103 powerpc.o \ ··· 132 122 obj-$(CONFIG_KVM_E500MC) += kvm.o 133 123 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 134 124 obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o 125 + 126 + obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o 127 + obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o 135 128 136 129 obj-y += $(kvm-book3s_64-builtin-objs-y)
+246 -11
arch/powerpc/kvm/book3s.c
··· 34 34 #include <linux/vmalloc.h> 35 35 #include <linux/highmem.h> 36 36 37 + #include "book3s.h" 37 38 #include "trace.h" 38 39 39 40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU ··· 68 67 69 68 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) 70 69 { 70 + } 71 + 72 + static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) 73 + { 74 + if (!is_kvmppc_hv_enabled(vcpu->kvm)) 75 + return to_book3s(vcpu)->hior; 76 + return 0; 77 + } 78 + 79 + static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, 80 + unsigned long pending_now, unsigned long old_pending) 81 + { 82 + if (is_kvmppc_hv_enabled(vcpu->kvm)) 83 + return; 84 + if (pending_now) 85 + vcpu->arch.shared->int_pending = 1; 86 + else if (old_pending) 87 + vcpu->arch.shared->int_pending = 0; 88 + } 89 + 90 + static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) 91 + { 92 + ulong crit_raw; 93 + ulong crit_r1; 94 + bool crit; 95 + 96 + if (is_kvmppc_hv_enabled(vcpu->kvm)) 97 + return false; 98 + 99 + crit_raw = vcpu->arch.shared->critical; 100 + crit_r1 = kvmppc_get_gpr(vcpu, 1); 101 + 102 + /* Truncate crit indicators in 32 bit mode */ 103 + if (!(vcpu->arch.shared->msr & MSR_SF)) { 104 + crit_raw &= 0xffffffff; 105 + crit_r1 &= 0xffffffff; 106 + } 107 + 108 + /* Critical section when crit == r1 */ 109 + crit = (crit_raw == crit_r1); 110 + /* ... and we're in supervisor mode */ 111 + crit = crit && !(vcpu->arch.shared->msr & MSR_PR); 112 + 113 + return crit; 71 114 } 72 115 73 116 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) ··· 171 126 printk(KERN_INFO "Queueing interrupt %x\n", vec); 172 127 #endif 173 128 } 174 - 129 + EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); 175 130 176 131 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 177 132 { 178 133 /* might as well deliver this straight away */ 179 134 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); 180 135 } 136 + EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); 181 137 182 138 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 183 139 { 184 140 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 185 141 } 142 + EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec); 186 143 187 144 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 188 145 { 189 146 return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 190 147 } 148 + EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec); 191 149 192 150 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 193 151 { 194 152 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 195 153 } 154 + EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); 196 155 197 156 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 198 157 struct kvm_interrupt *irq) ··· 334 285 335 286 return 0; 336 287 } 288 + EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); 337 289 338 - pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) 290 + pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, 291 + bool *writable) 339 292 { 340 293 ulong mp_pa = vcpu->arch.magic_page_pa; 341 294 ··· 353 302 354 303 pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; 355 304 get_page(pfn_to_page(pfn)); 305 + if (writable) 306 + *writable = true; 356 307 return pfn; 357 308 } 358 309 359 - return gfn_to_pfn(vcpu->kvm, gfn); 310 + return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); 360 311 } 312 + EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); 361 313 362 314 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 363 - struct kvmppc_pte *pte) 315 + bool iswrite, struct kvmppc_pte *pte) 364 316 { 365 317 int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR)); 366 318 int r; 367 319 368 320 if (relocated) { 369 - r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); 321 + r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite); 370 322 } else { 371 323 pte->eaddr = eaddr; 372 324 pte->raddr = eaddr & KVM_PAM; ··· 415 361 416 362 vcpu->stat.st++; 417 363 418 - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 364 + if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte)) 419 365 return -ENOENT; 420 366 421 367 *eaddr = pte.raddr; ··· 428 374 429 375 return EMULATE_DONE; 430 376 } 377 + EXPORT_SYMBOL_GPL(kvmppc_st); 431 378 432 379 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, 433 380 bool data) ··· 438 383 439 384 vcpu->stat.ld++; 440 385 441 - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) 386 + if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte)) 442 387 goto nopte; 443 388 444 389 *eaddr = pte.raddr; ··· 459 404 mmio: 460 405 return EMULATE_DO_MMIO; 461 406 } 407 + EXPORT_SYMBOL_GPL(kvmppc_ld); 462 408 463 409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 464 410 { ··· 473 417 474 418 void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) 475 419 { 420 + } 421 + 422 + int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 423 + struct kvm_sregs *sregs) 424 + { 425 + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); 426 + } 427 + 428 + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 429 + struct kvm_sregs *sregs) 430 + { 431 + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); 476 432 } 477 433 478 434 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ··· 563 495 if (size > sizeof(val)) 564 496 return -EINVAL; 565 497 566 - r = kvmppc_get_one_reg(vcpu, reg->id, &val); 567 - 498 + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); 568 499 if (r == -EINVAL) { 569 500 r = 0; 570 501 switch (reg->id) { ··· 594 527 break; 595 528 } 596 529 val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); 530 + break; 531 + case KVM_REG_PPC_VRSAVE: 532 + val = get_reg_val(reg->id, vcpu->arch.vrsave); 597 533 break; 598 534 #endif /* CONFIG_ALTIVEC */ 599 535 case KVM_REG_PPC_DEBUG_INST: { ··· 642 572 if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) 643 573 return -EFAULT; 644 574 645 - r = kvmppc_set_one_reg(vcpu, reg->id, &val); 646 - 575 + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); 647 576 if (r == -EINVAL) { 648 577 r = 0; 649 578 switch (reg->id) { ··· 674 605 } 675 606 vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); 676 607 break; 608 + case KVM_REG_PPC_VRSAVE: 609 + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { 610 + r = -ENXIO; 611 + break; 612 + } 613 + vcpu->arch.vrsave = set_reg_val(reg->id, val); 614 + break; 677 615 #endif /* CONFIG_ALTIVEC */ 678 616 #ifdef CONFIG_KVM_XICS 679 617 case KVM_REG_PPC_ICP_STATE: ··· 699 623 } 700 624 701 625 return r; 626 + } 627 + 628 + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 629 + { 630 + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); 631 + } 632 + 633 + void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 634 + { 635 + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); 636 + } 637 + 638 + void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 639 + { 640 + vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); 641 + } 642 + EXPORT_SYMBOL_GPL(kvmppc_set_msr); 643 + 644 + int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 645 + { 646 + return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); 702 647 } 703 648 704 649 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, ··· 741 644 kvmppc_core_queue_dec(vcpu); 742 645 kvm_vcpu_kick(vcpu); 743 646 } 647 + 648 + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 649 + { 650 + return kvm->arch.kvm_ops->vcpu_create(kvm, id); 651 + } 652 + 653 + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 654 + { 655 + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); 656 + } 657 + 658 + int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) 659 + { 660 + return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); 661 + } 662 + 663 + int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 664 + { 665 + return kvm->arch.kvm_ops->get_dirty_log(kvm, log); 666 + } 667 + 668 + void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 669 + struct kvm_memory_slot *dont) 670 + { 671 + kvm->arch.kvm_ops->free_memslot(free, dont); 672 + } 673 + 674 + int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 675 + unsigned long npages) 676 + { 677 + return kvm->arch.kvm_ops->create_memslot(slot, npages); 678 + } 679 + 680 + void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 681 + { 682 + kvm->arch.kvm_ops->flush_memslot(kvm, memslot); 683 + } 684 + 685 + int kvmppc_core_prepare_memory_region(struct kvm *kvm, 686 + struct kvm_memory_slot *memslot, 687 + struct kvm_userspace_memory_region *mem) 688 + { 689 + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); 690 + } 691 + 692 + void kvmppc_core_commit_memory_region(struct kvm *kvm, 693 + struct kvm_userspace_memory_region *mem, 694 + const struct kvm_memory_slot *old) 695 + { 696 + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); 697 + } 698 + 699 + int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 700 + { 701 + return kvm->arch.kvm_ops->unmap_hva(kvm, hva); 702 + } 703 + EXPORT_SYMBOL_GPL(kvm_unmap_hva); 704 + 705 + int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 706 + { 707 + return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); 708 + } 709 + 710 + int kvm_age_hva(struct kvm *kvm, unsigned long hva) 711 + { 712 + return kvm->arch.kvm_ops->age_hva(kvm, hva); 713 + } 714 + 715 + int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 716 + { 717 + return kvm->arch.kvm_ops->test_age_hva(kvm, hva); 718 + } 719 + 720 + void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 721 + { 722 + kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); 723 + } 724 + 725 + void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 726 + { 727 + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); 728 + } 729 + 730 + int kvmppc_core_init_vm(struct kvm *kvm) 731 + { 732 + 733 + #ifdef CONFIG_PPC64 734 + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 735 + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 736 + #endif 737 + 738 + return kvm->arch.kvm_ops->init_vm(kvm); 739 + } 740 + 741 + void kvmppc_core_destroy_vm(struct kvm *kvm) 742 + { 743 + kvm->arch.kvm_ops->destroy_vm(kvm); 744 + 745 + #ifdef CONFIG_PPC64 746 + kvmppc_rtas_tokens_free(kvm); 747 + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 748 + #endif 749 + } 750 + 751 + int kvmppc_core_check_processor_compat(void) 752 + { 753 + /* 754 + * We always return 0 for book3s. We check 755 + * for compatability while loading the HV 756 + * or PR module 757 + */ 758 + return 0; 759 + } 760 + 761 + static int kvmppc_book3s_init(void) 762 + { 763 + int r; 764 + 765 + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 766 + if (r) 767 + return r; 768 + #ifdef CONFIG_KVM_BOOK3S_32 769 + r = kvmppc_book3s_init_pr(); 770 + #endif 771 + return r; 772 + 773 + } 774 + 775 + static void kvmppc_book3s_exit(void) 776 + { 777 + #ifdef CONFIG_KVM_BOOK3S_32 778 + kvmppc_book3s_exit_pr(); 779 + #endif 780 + kvm_exit(); 781 + } 782 + 783 + module_init(kvmppc_book3s_init); 784 + module_exit(kvmppc_book3s_exit);
+34
arch/powerpc/kvm/book3s.h
··· 1 + /* 2 + * Copyright IBM Corporation, 2013 3 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or 6 + * modify it under the terms of the GNU General Public License as 7 + * published by the Free Software Foundation; either version 2 of the 8 + * License or (at your optional) any later version of the license. 9 + * 10 + */ 11 + 12 + #ifndef __POWERPC_KVM_BOOK3S_H__ 13 + #define __POWERPC_KVM_BOOK3S_H__ 14 + 15 + extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 16 + struct kvm_memory_slot *memslot); 17 + extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); 18 + extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, 19 + unsigned long end); 20 + extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva); 21 + extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva); 22 + extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte); 23 + 24 + extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu); 25 + extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 26 + unsigned int inst, int *advance); 27 + extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, 28 + int sprn, ulong spr_val); 29 + extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, 30 + int sprn, ulong *spr_val); 31 + extern int kvmppc_book3s_init_pr(void); 32 + extern void kvmppc_book3s_exit_pr(void); 33 + 34 + #endif
+41 -30
arch/powerpc/kvm/book3s_32_mmu.c
··· 84 84 } 85 85 86 86 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 87 - struct kvmppc_pte *pte, bool data); 87 + struct kvmppc_pte *pte, bool data, 88 + bool iswrite); 88 89 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 89 90 u64 *vsid); 90 91 ··· 100 99 u64 vsid; 101 100 struct kvmppc_pte pte; 102 101 103 - if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) 102 + if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false)) 104 103 return pte.vpage; 105 104 106 105 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); ··· 112 111 kvmppc_set_msr(vcpu, 0); 113 112 } 114 113 115 - static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s, 114 + static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, 116 115 u32 sre, gva_t eaddr, 117 116 bool primary) 118 117 { 118 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 119 119 u32 page, hash, pteg, htabmask; 120 120 hva_t r; 121 121 ··· 134 132 kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg, 135 133 sr_vsid(sre)); 136 134 137 - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 135 + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); 138 136 if (kvm_is_error_hva(r)) 139 137 return r; 140 138 return r | (pteg & ~PAGE_MASK); ··· 147 145 } 148 146 149 147 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 150 - struct kvmppc_pte *pte, bool data) 148 + struct kvmppc_pte *pte, bool data, 149 + bool iswrite) 151 150 { 152 151 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 153 152 struct kvmppc_bat *bat; ··· 189 186 printk(KERN_INFO "BAT is not readable!\n"); 190 187 continue; 191 188 } 192 - if (!pte->may_write) { 193 - /* let's treat r/o BATs as not-readable for now */ 189 + if (iswrite && !pte->may_write) { 194 190 dprintk_pte("BAT is read-only!\n"); 195 191 continue; 196 192 } ··· 203 201 204 202 static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, 205 203 struct kvmppc_pte *pte, bool data, 206 - bool primary) 204 + bool iswrite, bool primary) 207 205 { 208 - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 209 206 u32 sre; 210 207 hva_t ptegp; 211 208 u32 pteg[16]; ··· 219 218 220 219 pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data); 221 220 222 - ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary); 221 + ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary); 223 222 if (kvm_is_error_hva(ptegp)) { 224 223 printk(KERN_INFO "KVM: Invalid PTEG!\n"); 225 224 goto no_page_found; ··· 259 258 break; 260 259 } 261 260 262 - if ( !pte->may_read ) 263 - continue; 264 - 265 261 dprintk_pte("MMU: Found PTE -> %x %x - %x\n", 266 262 pteg[i], pteg[i+1], pp); 267 263 found = 1; ··· 269 271 /* Update PTE C and A bits, so the guest's swapper knows we used the 270 272 page */ 271 273 if (found) { 272 - u32 oldpte = pteg[i+1]; 274 + u32 pte_r = pteg[i+1]; 275 + char __user *addr = (char __user *) &pteg[i+1]; 273 276 274 - if (pte->may_read) 275 - pteg[i+1] |= PTEG_FLAG_ACCESSED; 276 - if (pte->may_write) 277 - pteg[i+1] |= PTEG_FLAG_DIRTY; 278 - else 279 - dprintk_pte("KVM: Mapping read-only page!\n"); 280 - 281 - /* Write back into the PTEG */ 282 - if (pteg[i+1] != oldpte) 283 - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 284 - 277 + /* 278 + * Use single-byte writes to update the HPTE, to 279 + * conform to what real hardware does. 280 + */ 281 + if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) { 282 + pte_r |= PTEG_FLAG_ACCESSED; 283 + put_user(pte_r >> 8, addr + 2); 284 + } 285 + if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) { 286 + pte_r |= PTEG_FLAG_DIRTY; 287 + put_user(pte_r, addr + 3); 288 + } 289 + if (!pte->may_read || (iswrite && !pte->may_write)) 290 + return -EPERM; 285 291 return 0; 286 292 } 287 293 ··· 304 302 } 305 303 306 304 static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 307 - struct kvmppc_pte *pte, bool data) 305 + struct kvmppc_pte *pte, bool data, 306 + bool iswrite) 308 307 { 309 308 int r; 310 309 ulong mp_ea = vcpu->arch.magic_page_ea; 311 310 312 311 pte->eaddr = eaddr; 312 + pte->page_size = MMU_PAGE_4K; 313 313 314 314 /* Magic page override */ 315 315 if (unlikely(mp_ea) && ··· 327 323 return 0; 328 324 } 329 325 330 - r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data); 326 + r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite); 331 327 if (r < 0) 332 - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true); 328 + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, 329 + data, iswrite, true); 333 330 if (r < 0) 334 - r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false); 331 + r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, 332 + data, iswrite, false); 335 333 336 334 return r; 337 335 } ··· 353 347 354 348 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) 355 349 { 356 - kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); 350 + int i; 351 + struct kvm_vcpu *v; 352 + 353 + /* flush this VA on all cpus */ 354 + kvm_for_each_vcpu(i, v, vcpu->kvm) 355 + kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000); 357 356 } 358 357 359 358 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
+12 -4
arch/powerpc/kvm/book3s_32_mmu_host.c
··· 138 138 139 139 extern char etext[]; 140 140 141 - int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 141 + int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, 142 + bool iswrite) 142 143 { 143 144 pfn_t hpaddr; 144 145 u64 vpn; ··· 153 152 bool evict = false; 154 153 struct hpte_cache *pte; 155 154 int r = 0; 155 + bool writable; 156 156 157 157 /* Get host physical address for gpa */ 158 - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 158 + hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT, 159 + iswrite, &writable); 159 160 if (is_error_noslot_pfn(hpaddr)) { 160 161 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", 161 162 orig_pte->eaddr); ··· 207 204 (primary ? 0 : PTE_SEC); 208 205 pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; 209 206 210 - if (orig_pte->may_write) { 207 + if (orig_pte->may_write && writable) { 211 208 pteg1 |= PP_RWRW; 212 209 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 213 210 } else { ··· 260 257 kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 261 258 out: 262 259 return r; 260 + } 261 + 262 + void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 263 + { 264 + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL); 263 265 } 264 266 265 267 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) ··· 349 341 svcpu_put(svcpu); 350 342 } 351 343 352 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 344 + void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) 353 345 { 354 346 int i; 355 347
+150 -35
arch/powerpc/kvm/book3s_64_mmu.c
··· 107 107 return kvmppc_slb_calc_vpn(slb, eaddr); 108 108 } 109 109 110 + static int mmu_pagesize(int mmu_pg) 111 + { 112 + switch (mmu_pg) { 113 + case MMU_PAGE_64K: 114 + return 16; 115 + case MMU_PAGE_16M: 116 + return 24; 117 + } 118 + return 12; 119 + } 120 + 110 121 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe) 111 122 { 112 - return slbe->large ? 24 : 12; 123 + return mmu_pagesize(slbe->base_page_size); 113 124 } 114 125 115 126 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr) ··· 130 119 return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p); 131 120 } 132 121 133 - static hva_t kvmppc_mmu_book3s_64_get_pteg( 134 - struct kvmppc_vcpu_book3s *vcpu_book3s, 122 + static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu, 135 123 struct kvmppc_slb *slbe, gva_t eaddr, 136 124 bool second) 137 125 { 126 + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 138 127 u64 hash, pteg, htabsize; 139 128 u32 ssize; 140 129 hva_t r; ··· 159 148 160 149 /* When running a PAPR guest, SDR1 contains a HVA address instead 161 150 of a GPA */ 162 - if (vcpu_book3s->vcpu.arch.papr_enabled) 151 + if (vcpu->arch.papr_enabled) 163 152 r = pteg; 164 153 else 165 - r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT); 154 + r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT); 166 155 167 156 if (kvm_is_error_hva(r)) 168 157 return r; ··· 177 166 avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr); 178 167 avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p); 179 168 180 - if (p < 24) 181 - avpn >>= ((80 - p) - 56) - 8; 169 + if (p < 16) 170 + avpn >>= ((80 - p) - 56) - 8; /* 16 - p */ 182 171 else 183 - avpn <<= 8; 172 + avpn <<= p - 16; 184 173 185 174 return avpn; 186 175 } 187 176 188 - static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 189 - struct kvmppc_pte *gpte, bool data) 177 + /* 178 + * Return page size encoded in the second word of a HPTE, or 179 + * -1 for an invalid encoding for the base page size indicated by 180 + * the SLB entry. This doesn't handle mixed pagesize segments yet. 181 + */ 182 + static int decode_pagesize(struct kvmppc_slb *slbe, u64 r) 190 183 { 191 - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 184 + switch (slbe->base_page_size) { 185 + case MMU_PAGE_64K: 186 + if ((r & 0xf000) == 0x1000) 187 + return MMU_PAGE_64K; 188 + break; 189 + case MMU_PAGE_16M: 190 + if ((r & 0xff000) == 0) 191 + return MMU_PAGE_16M; 192 + break; 193 + } 194 + return -1; 195 + } 196 + 197 + static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 198 + struct kvmppc_pte *gpte, bool data, 199 + bool iswrite) 200 + { 192 201 struct kvmppc_slb *slbe; 193 202 hva_t ptegp; 194 203 u64 pteg[16]; ··· 220 189 u8 pp, key = 0; 221 190 bool found = false; 222 191 bool second = false; 192 + int pgsize; 223 193 ulong mp_ea = vcpu->arch.magic_page_ea; 224 194 225 195 /* Magic page override */ ··· 234 202 gpte->may_execute = true; 235 203 gpte->may_read = true; 236 204 gpte->may_write = true; 205 + gpte->page_size = MMU_PAGE_4K; 237 206 238 207 return 0; 239 208 } ··· 255 222 v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID | 256 223 HPTE_V_SECONDARY; 257 224 225 + pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K; 226 + 227 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 228 + 258 229 do_second: 259 - ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second); 230 + ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second); 260 231 if (kvm_is_error_hva(ptegp)) 261 232 goto no_page_found; 262 233 ··· 277 240 for (i=0; i<16; i+=2) { 278 241 /* Check all relevant fields of 1st dword */ 279 242 if ((pteg[i] & v_mask) == v_val) { 243 + /* If large page bit is set, check pgsize encoding */ 244 + if (slbe->large && 245 + (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 246 + pgsize = decode_pagesize(slbe, pteg[i+1]); 247 + if (pgsize < 0) 248 + continue; 249 + } 280 250 found = true; 281 251 break; 282 252 } ··· 300 256 v = pteg[i]; 301 257 r = pteg[i+1]; 302 258 pp = (r & HPTE_R_PP) | key; 303 - eaddr_mask = 0xFFF; 259 + if (r & HPTE_R_PP0) 260 + pp |= 8; 304 261 305 262 gpte->eaddr = eaddr; 306 263 gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data); 307 - if (slbe->large) 308 - eaddr_mask = 0xFFFFFF; 264 + 265 + eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1; 309 266 gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask); 267 + gpte->page_size = pgsize; 310 268 gpte->may_execute = ((r & HPTE_R_N) ? false : true); 311 269 gpte->may_read = false; 312 270 gpte->may_write = false; ··· 323 277 case 3: 324 278 case 5: 325 279 case 7: 280 + case 10: 326 281 gpte->may_read = true; 327 282 break; 328 283 } ··· 334 287 335 288 /* Update PTE R and C bits, so the guest's swapper knows we used the 336 289 * page */ 337 - if (gpte->may_read) { 338 - /* Set the accessed flag */ 290 + if (gpte->may_read && !(r & HPTE_R_R)) { 291 + /* 292 + * Set the accessed flag. 293 + * We have to write this back with a single byte write 294 + * because another vcpu may be accessing this on 295 + * non-PAPR platforms such as mac99, and this is 296 + * what real hardware does. 297 + */ 298 + char __user *addr = (char __user *) &pteg[i+1]; 339 299 r |= HPTE_R_R; 300 + put_user(r >> 8, addr + 6); 340 301 } 341 - if (data && gpte->may_write) { 342 - /* Set the dirty flag -- XXX even if not writing */ 302 + if (iswrite && gpte->may_write && !(r & HPTE_R_C)) { 303 + /* Set the dirty flag */ 304 + /* Use a single byte write */ 305 + char __user *addr = (char __user *) &pteg[i+1]; 343 306 r |= HPTE_R_C; 307 + put_user(r, addr + 7); 344 308 } 345 309 346 - /* Write back into the PTEG */ 347 - if (pteg[i+1] != r) { 348 - pteg[i+1] = r; 349 - copy_to_user((void __user *)ptegp, pteg, sizeof(pteg)); 350 - } 310 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 351 311 352 - if (!gpte->may_read) 312 + if (!gpte->may_read || (iswrite && !gpte->may_write)) 353 313 return -EPERM; 354 314 return 0; 355 315 356 316 no_page_found: 317 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 357 318 return -ENOENT; 358 319 359 320 no_seg_found: 360 - 361 321 dprintk("KVM MMU: Trigger segment fault\n"); 362 322 return -EINVAL; 363 323 } ··· 398 344 slbe->Kp = (rs & SLB_VSID_KP) ? 1 : 0; 399 345 slbe->nx = (rs & SLB_VSID_N) ? 1 : 0; 400 346 slbe->class = (rs & SLB_VSID_C) ? 1 : 0; 347 + 348 + slbe->base_page_size = MMU_PAGE_4K; 349 + if (slbe->large) { 350 + if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) { 351 + switch (rs & SLB_VSID_LP) { 352 + case SLB_VSID_LP_00: 353 + slbe->base_page_size = MMU_PAGE_16M; 354 + break; 355 + case SLB_VSID_LP_01: 356 + slbe->base_page_size = MMU_PAGE_64K; 357 + break; 358 + } 359 + } else 360 + slbe->base_page_size = MMU_PAGE_16M; 361 + } 401 362 402 363 slbe->orige = rb & (ESID_MASK | SLB_ESID_V); 403 364 slbe->origv = rs; ··· 529 460 bool large) 530 461 { 531 462 u64 mask = 0xFFFFFFFFFULL; 463 + long i; 464 + struct kvm_vcpu *v; 532 465 533 466 dprintk("KVM MMU: tlbie(0x%lx)\n", va); 534 467 535 - if (large) 536 - mask = 0xFFFFFF000ULL; 537 - kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 468 + /* 469 + * The tlbie instruction changed behaviour starting with 470 + * POWER6. POWER6 and later don't have the large page flag 471 + * in the instruction but in the RB value, along with bits 472 + * indicating page and segment sizes. 473 + */ 474 + if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) { 475 + /* POWER6 or later */ 476 + if (va & 1) { /* L bit */ 477 + if ((va & 0xf000) == 0x1000) 478 + mask = 0xFFFFFFFF0ULL; /* 64k page */ 479 + else 480 + mask = 0xFFFFFF000ULL; /* 16M page */ 481 + } 482 + } else { 483 + /* older processors, e.g. PPC970 */ 484 + if (large) 485 + mask = 0xFFFFFF000ULL; 486 + } 487 + /* flush this VA on all vcpus */ 488 + kvm_for_each_vcpu(i, v, vcpu->kvm) 489 + kvmppc_mmu_pte_vflush(v, va >> 12, mask); 538 490 } 491 + 492 + #ifdef CONFIG_PPC_64K_PAGES 493 + static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid) 494 + { 495 + ulong mp_ea = vcpu->arch.magic_page_ea; 496 + 497 + return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) && 498 + (mp_ea >> SID_SHIFT) == esid; 499 + } 500 + #endif 539 501 540 502 static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, 541 503 u64 *vsid) ··· 575 475 struct kvmppc_slb *slb; 576 476 u64 gvsid = esid; 577 477 ulong mp_ea = vcpu->arch.magic_page_ea; 478 + int pagesize = MMU_PAGE_64K; 578 479 579 480 if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 580 481 slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea); 581 482 if (slb) { 582 483 gvsid = slb->vsid; 484 + pagesize = slb->base_page_size; 583 485 if (slb->tb) { 584 486 gvsid <<= SID_SHIFT_1T - SID_SHIFT; 585 487 gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1); ··· 592 490 593 491 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 594 492 case 0: 595 - *vsid = VSID_REAL | esid; 493 + gvsid = VSID_REAL | esid; 596 494 break; 597 495 case MSR_IR: 598 - *vsid = VSID_REAL_IR | gvsid; 496 + gvsid |= VSID_REAL_IR; 599 497 break; 600 498 case MSR_DR: 601 - *vsid = VSID_REAL_DR | gvsid; 499 + gvsid |= VSID_REAL_DR; 602 500 break; 603 501 case MSR_DR|MSR_IR: 604 502 if (!slb) 605 503 goto no_slb; 606 504 607 - *vsid = gvsid; 608 505 break; 609 506 default: 610 507 BUG(); 611 508 break; 612 509 } 613 510 614 - if (vcpu->arch.shared->msr & MSR_PR) 615 - *vsid |= VSID_PR; 511 + #ifdef CONFIG_PPC_64K_PAGES 512 + /* 513 + * Mark this as a 64k segment if the host is using 514 + * 64k pages, the host MMU supports 64k pages and 515 + * the guest segment page size is >= 64k, 516 + * but not if this segment contains the magic page. 517 + */ 518 + if (pagesize >= MMU_PAGE_64K && 519 + mmu_psize_defs[MMU_PAGE_64K].shift && 520 + !segment_contains_magic_page(vcpu, esid)) 521 + gvsid |= VSID_64K; 522 + #endif 616 523 524 + if (vcpu->arch.shared->msr & MSR_PR) 525 + gvsid |= VSID_PR; 526 + 527 + *vsid = gvsid; 617 528 return 0; 618 529 619 530 no_slb:
+79 -27
arch/powerpc/kvm/book3s_64_mmu_host.c
··· 27 27 #include <asm/machdep.h> 28 28 #include <asm/mmu_context.h> 29 29 #include <asm/hw_irq.h> 30 - #include "trace.h" 30 + #include "trace_pr.h" 31 31 32 32 #define PTE_SIZE 12 33 33 34 34 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 35 35 { 36 36 ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, 37 - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M, 37 + pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M, 38 38 false); 39 39 } 40 40 ··· 78 78 return NULL; 79 79 } 80 80 81 - int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) 81 + int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, 82 + bool iswrite) 82 83 { 83 84 unsigned long vpn; 84 85 pfn_t hpaddr; ··· 91 90 int attempt = 0; 92 91 struct kvmppc_sid_map *map; 93 92 int r = 0; 93 + int hpsize = MMU_PAGE_4K; 94 + bool writable; 95 + unsigned long mmu_seq; 96 + struct kvm *kvm = vcpu->kvm; 97 + struct hpte_cache *cpte; 98 + unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT; 99 + unsigned long pfn; 100 + 101 + /* used to check for invalidations in progress */ 102 + mmu_seq = kvm->mmu_notifier_seq; 103 + smp_rmb(); 94 104 95 105 /* Get host physical address for gpa */ 96 - hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); 97 - if (is_error_noslot_pfn(hpaddr)) { 98 - printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); 106 + pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable); 107 + if (is_error_noslot_pfn(pfn)) { 108 + printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn); 99 109 r = -EINVAL; 100 110 goto out; 101 111 } 102 - hpaddr <<= PAGE_SHIFT; 103 - hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 112 + hpaddr = pfn << PAGE_SHIFT; 104 113 105 114 /* and write the mapping ea -> hpa into the pt */ 106 115 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); ··· 128 117 goto out; 129 118 } 130 119 131 - vsid = map->host_vsid; 132 - vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); 120 + vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M); 133 121 134 - if (!orig_pte->may_write) 135 - rflags |= HPTE_R_PP; 136 - else 137 - mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 122 + kvm_set_pfn_accessed(pfn); 123 + if (!orig_pte->may_write || !writable) 124 + rflags |= PP_RXRX; 125 + else { 126 + mark_page_dirty(vcpu->kvm, gfn); 127 + kvm_set_pfn_dirty(pfn); 128 + } 138 129 139 130 if (!orig_pte->may_execute) 140 131 rflags |= HPTE_R_N; 141 132 else 142 - kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); 133 + kvmppc_mmu_flush_icache(pfn); 143 134 144 - hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); 135 + /* 136 + * Use 64K pages if possible; otherwise, on 64K page kernels, 137 + * we need to transfer 4 more bits from guest real to host real addr. 138 + */ 139 + if (vsid & VSID_64K) 140 + hpsize = MMU_PAGE_64K; 141 + else 142 + hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); 143 + 144 + hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M); 145 + 146 + cpte = kvmppc_mmu_hpte_cache_next(vcpu); 147 + 148 + spin_lock(&kvm->mmu_lock); 149 + if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) { 150 + r = -EAGAIN; 151 + goto out_unlock; 152 + } 145 153 146 154 map_again: 147 155 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); ··· 169 139 if (attempt > 1) 170 140 if (ppc_md.hpte_remove(hpteg) < 0) { 171 141 r = -1; 172 - goto out; 142 + goto out_unlock; 173 143 } 174 144 175 145 ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, 176 - MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M); 146 + hpsize, hpsize, MMU_SEGSIZE_256M); 177 147 178 148 if (ret < 0) { 179 149 /* If we couldn't map a primary PTE, try a secondary */ ··· 182 152 attempt++; 183 153 goto map_again; 184 154 } else { 185 - struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); 186 - 187 155 trace_kvm_book3s_64_mmu_map(rflags, hpteg, 188 156 vpn, hpaddr, orig_pte); 189 157 ··· 192 164 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 193 165 } 194 166 195 - pte->slot = hpteg + (ret & 7); 196 - pte->host_vpn = vpn; 197 - pte->pte = *orig_pte; 198 - pte->pfn = hpaddr >> PAGE_SHIFT; 167 + cpte->slot = hpteg + (ret & 7); 168 + cpte->host_vpn = vpn; 169 + cpte->pte = *orig_pte; 170 + cpte->pfn = pfn; 171 + cpte->pagesize = hpsize; 199 172 200 - kvmppc_mmu_hpte_cache_map(vcpu, pte); 173 + kvmppc_mmu_hpte_cache_map(vcpu, cpte); 174 + cpte = NULL; 201 175 } 202 - kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); 176 + 177 + out_unlock: 178 + spin_unlock(&kvm->mmu_lock); 179 + kvm_release_pfn_clean(pfn); 180 + if (cpte) 181 + kvmppc_mmu_hpte_cache_free(cpte); 203 182 204 183 out: 205 184 return r; 185 + } 186 + 187 + void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 188 + { 189 + u64 mask = 0xfffffffffULL; 190 + u64 vsid; 191 + 192 + vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid); 193 + if (vsid & VSID_64K) 194 + mask = 0xffffffff0ULL; 195 + kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask); 206 196 } 207 197 208 198 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) ··· 337 291 slb_vsid &= ~SLB_VSID_KP; 338 292 slb_esid |= slb_index; 339 293 294 + #ifdef CONFIG_PPC_64K_PAGES 295 + /* Set host segment base page size to 64K if possible */ 296 + if (gvsid & VSID_64K) 297 + slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp; 298 + #endif 299 + 340 300 svcpu->slb[slb_index].esid = slb_esid; 341 301 svcpu->slb[slb_index].vsid = slb_vsid; 342 302 ··· 378 326 svcpu_put(svcpu); 379 327 } 380 328 381 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 329 + void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) 382 330 { 383 331 kvmppc_mmu_hpte_destroy(vcpu); 384 332 __destroy_context(to_book3s(vcpu)->context_id[0]);
+10 -14
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 260 260 return 0; 261 261 } 262 262 263 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 264 - { 265 - } 266 - 267 263 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 268 264 { 269 265 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); ··· 447 451 } 448 452 449 453 static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 450 - struct kvmppc_pte *gpte, bool data) 454 + struct kvmppc_pte *gpte, bool data, bool iswrite) 451 455 { 452 456 struct kvm *kvm = vcpu->kvm; 453 457 struct kvmppc_slb *slbe; ··· 902 906 return 0; 903 907 } 904 908 905 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 909 + int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 906 910 { 907 911 if (kvm->arch.using_mmu_notifiers) 908 912 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 909 913 return 0; 910 914 } 911 915 912 - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 916 + int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 913 917 { 914 918 if (kvm->arch.using_mmu_notifiers) 915 919 kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 916 920 return 0; 917 921 } 918 922 919 - void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 923 + void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 924 + struct kvm_memory_slot *memslot) 920 925 { 921 926 unsigned long *rmapp; 922 927 unsigned long gfn; ··· 991 994 return ret; 992 995 } 993 996 994 - int kvm_age_hva(struct kvm *kvm, unsigned long hva) 997 + int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva) 995 998 { 996 999 if (!kvm->arch.using_mmu_notifiers) 997 1000 return 0; ··· 1029 1032 return ret; 1030 1033 } 1031 1034 1032 - int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 1035 + int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 1033 1036 { 1034 1037 if (!kvm->arch.using_mmu_notifiers) 1035 1038 return 0; 1036 1039 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 1037 1040 } 1038 1041 1039 - void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 1042 + void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 1040 1043 { 1041 1044 if (!kvm->arch.using_mmu_notifiers) 1042 1045 return; ··· 1509 1512 1510 1513 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1511 1514 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1512 - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1513 - lpcr |= senc << (LPCR_VRMASD_SH - 4); 1514 - kvm->arch.lpcr = lpcr; 1515 + lpcr = senc << (LPCR_VRMASD_SH - 4); 1516 + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 1515 1517 rma_setup = 1; 1516 1518 } 1517 1519 ++i;
+1
arch/powerpc/kvm/book3s_64_vio_hv.c
··· 74 74 /* Didn't find the liobn, punt it to userspace */ 75 75 return H_TOO_HARD; 76 76 } 77 + EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+7 -11
arch/powerpc/kvm/book3s_emulate.c
··· 86 86 return true; 87 87 } 88 88 89 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 90 - unsigned int inst, int *advance) 89 + int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 90 + unsigned int inst, int *advance) 91 91 { 92 92 int emulated = EMULATE_DONE; 93 93 int rt = get_rt(inst); ··· 172 172 vcpu->arch.mmu.tlbie(vcpu, addr, large); 173 173 break; 174 174 } 175 - #ifdef CONFIG_KVM_BOOK3S_64_PR 175 + #ifdef CONFIG_PPC_BOOK3S_64 176 176 case OP_31_XOP_FAKE_SC1: 177 177 { 178 178 /* SC 1 papr hypercalls */ ··· 267 267 268 268 r = kvmppc_st(vcpu, &addr, 32, zeros, true); 269 269 if ((r == -ENOENT) || (r == -EPERM)) { 270 - struct kvmppc_book3s_shadow_vcpu *svcpu; 271 - 272 - svcpu = svcpu_get(vcpu); 273 270 *advance = 0; 274 271 vcpu->arch.shared->dar = vaddr; 275 - svcpu->fault_dar = vaddr; 272 + vcpu->arch.fault_dar = vaddr; 276 273 277 274 dsisr = DSISR_ISSTORE; 278 275 if (r == -ENOENT) ··· 278 281 dsisr |= DSISR_PROTFAULT; 279 282 280 283 vcpu->arch.shared->dsisr = dsisr; 281 - svcpu->fault_dsisr = dsisr; 282 - svcpu_put(svcpu); 284 + vcpu->arch.fault_dsisr = dsisr; 283 285 284 286 kvmppc_book3s_queue_irqprio(vcpu, 285 287 BOOK3S_INTERRUPT_DATA_STORAGE); ··· 345 349 return bat; 346 350 } 347 351 348 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 352 + int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 349 353 { 350 354 int emulated = EMULATE_DONE; 351 355 ··· 468 472 return emulated; 469 473 } 470 474 471 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 475 + int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 472 476 { 473 477 int emulated = EMULATE_DONE; 474 478
+3 -2
arch/powerpc/kvm/book3s_exports.c
··· 20 20 #include <linux/export.h> 21 21 #include <asm/kvm_book3s.h> 22 22 23 - #ifdef CONFIG_KVM_BOOK3S_64_HV 23 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 24 24 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); 25 - #else 25 + #endif 26 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 26 27 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); 27 28 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 28 29 #ifdef CONFIG_ALTIVEC
+311 -82
arch/powerpc/kvm/book3s_hv.c
··· 52 52 #include <linux/vmalloc.h> 53 53 #include <linux/highmem.h> 54 54 #include <linux/hugetlb.h> 55 + #include <linux/module.h> 56 + 57 + #include "book3s.h" 55 58 56 59 /* #define EXIT_DEBUG */ 57 60 /* #define EXIT_DEBUG_SIMPLE */ ··· 69 66 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 70 67 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 71 68 72 - void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 69 + static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 73 70 { 74 71 int me; 75 72 int cpu = vcpu->cpu; ··· 128 125 * purely defensive; they should never fail.) 129 126 */ 130 127 131 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 128 + static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 132 129 { 133 130 struct kvmppc_vcore *vc = vcpu->arch.vcore; 134 131 ··· 146 143 spin_unlock(&vcpu->arch.tbacct_lock); 147 144 } 148 145 149 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 146 + static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) 150 147 { 151 148 struct kvmppc_vcore *vc = vcpu->arch.vcore; 152 149 ··· 158 155 spin_unlock(&vcpu->arch.tbacct_lock); 159 156 } 160 157 161 - void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 158 + static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) 162 159 { 163 160 vcpu->arch.shregs.msr = msr; 164 161 kvmppc_end_cede(vcpu); 165 162 } 166 163 167 - void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 164 + void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 168 165 { 169 166 vcpu->arch.pvr = pvr; 167 + } 168 + 169 + int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 170 + { 171 + unsigned long pcr = 0; 172 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 173 + 174 + if (arch_compat) { 175 + if (!cpu_has_feature(CPU_FTR_ARCH_206)) 176 + return -EINVAL; /* 970 has no compat mode support */ 177 + 178 + switch (arch_compat) { 179 + case PVR_ARCH_205: 180 + pcr = PCR_ARCH_205; 181 + break; 182 + case PVR_ARCH_206: 183 + case PVR_ARCH_206p: 184 + break; 185 + default: 186 + return -EINVAL; 187 + } 188 + } 189 + 190 + spin_lock(&vc->lock); 191 + vc->arch_compat = arch_compat; 192 + vc->pcr = pcr; 193 + spin_unlock(&vc->lock); 194 + 195 + return 0; 170 196 } 171 197 172 198 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) ··· 227 195 pr_err(" ESID = %.16llx VSID = %.16llx\n", 228 196 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 229 197 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 230 - vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 198 + vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1, 231 199 vcpu->arch.last_inst); 232 200 } 233 201 ··· 521 489 memset(dt, 0, sizeof(struct dtl_entry)); 522 490 dt->dispatch_reason = 7; 523 491 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 524 - dt->timebase = now; 492 + dt->timebase = now + vc->tb_offset; 525 493 dt->enqueue_to_dispatch_time = stolen; 526 494 dt->srr0 = kvmppc_get_pc(vcpu); 527 495 dt->srr1 = vcpu->arch.shregs.msr; ··· 570 538 } 571 539 break; 572 540 case H_CONFER: 541 + target = kvmppc_get_gpr(vcpu, 4); 542 + if (target == -1) 543 + break; 544 + tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 545 + if (!tvcpu) { 546 + ret = H_PARAMETER; 547 + break; 548 + } 549 + kvm_vcpu_yield_to(tvcpu); 573 550 break; 574 551 case H_REGISTER_VPA: 575 552 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), ··· 617 576 return RESUME_GUEST; 618 577 } 619 578 620 - static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 621 - struct task_struct *tsk) 579 + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 580 + struct task_struct *tsk) 622 581 { 623 582 int r = RESUME_HOST; 624 583 ··· 712 671 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 713 672 vcpu->arch.trap, kvmppc_get_pc(vcpu), 714 673 vcpu->arch.shregs.msr); 674 + run->hw.hardware_exit_reason = vcpu->arch.trap; 715 675 r = RESUME_HOST; 716 - BUG(); 717 676 break; 718 677 } 719 678 720 679 return r; 721 680 } 722 681 723 - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 724 - struct kvm_sregs *sregs) 682 + static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, 683 + struct kvm_sregs *sregs) 725 684 { 726 685 int i; 727 686 ··· 735 694 return 0; 736 695 } 737 696 738 - int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 739 - struct kvm_sregs *sregs) 697 + static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, 698 + struct kvm_sregs *sregs) 740 699 { 741 700 int i, j; 742 701 743 - kvmppc_set_pvr(vcpu, sregs->pvr); 702 + kvmppc_set_pvr_hv(vcpu, sregs->pvr); 744 703 745 704 j = 0; 746 705 for (i = 0; i < vcpu->arch.slb_nr; i++) { ··· 755 714 return 0; 756 715 } 757 716 758 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 717 + static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) 718 + { 719 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 720 + u64 mask; 721 + 722 + spin_lock(&vc->lock); 723 + /* 724 + * Userspace can only modify DPFD (default prefetch depth), 725 + * ILE (interrupt little-endian) and TC (translation control). 726 + */ 727 + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; 728 + vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); 729 + spin_unlock(&vc->lock); 730 + } 731 + 732 + static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 733 + union kvmppc_one_reg *val) 759 734 { 760 735 int r = 0; 761 736 long int i; ··· 805 748 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 806 749 i = id - KVM_REG_PPC_PMC1; 807 750 *val = get_reg_val(id, vcpu->arch.pmc[i]); 751 + break; 752 + case KVM_REG_PPC_SIAR: 753 + *val = get_reg_val(id, vcpu->arch.siar); 754 + break; 755 + case KVM_REG_PPC_SDAR: 756 + *val = get_reg_val(id, vcpu->arch.sdar); 808 757 break; 809 758 #ifdef CONFIG_VSX 810 759 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: ··· 850 787 val->vpaval.length = vcpu->arch.dtl.len; 851 788 spin_unlock(&vcpu->arch.vpa_update_lock); 852 789 break; 790 + case KVM_REG_PPC_TB_OFFSET: 791 + *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); 792 + break; 793 + case KVM_REG_PPC_LPCR: 794 + *val = get_reg_val(id, vcpu->arch.vcore->lpcr); 795 + break; 796 + case KVM_REG_PPC_PPR: 797 + *val = get_reg_val(id, vcpu->arch.ppr); 798 + break; 799 + case KVM_REG_PPC_ARCH_COMPAT: 800 + *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 801 + break; 853 802 default: 854 803 r = -EINVAL; 855 804 break; ··· 870 795 return r; 871 796 } 872 797 873 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 798 + static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 799 + union kvmppc_one_reg *val) 874 800 { 875 801 int r = 0; 876 802 long int i; ··· 908 832 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 909 833 i = id - KVM_REG_PPC_PMC1; 910 834 vcpu->arch.pmc[i] = set_reg_val(id, *val); 835 + break; 836 + case KVM_REG_PPC_SIAR: 837 + vcpu->arch.siar = set_reg_val(id, *val); 838 + break; 839 + case KVM_REG_PPC_SDAR: 840 + vcpu->arch.sdar = set_reg_val(id, *val); 911 841 break; 912 842 #ifdef CONFIG_VSX 913 843 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: ··· 962 880 len -= len % sizeof(struct dtl_entry); 963 881 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 964 882 break; 883 + case KVM_REG_PPC_TB_OFFSET: 884 + /* round up to multiple of 2^24 */ 885 + vcpu->arch.vcore->tb_offset = 886 + ALIGN(set_reg_val(id, *val), 1UL << 24); 887 + break; 888 + case KVM_REG_PPC_LPCR: 889 + kvmppc_set_lpcr(vcpu, set_reg_val(id, *val)); 890 + break; 891 + case KVM_REG_PPC_PPR: 892 + vcpu->arch.ppr = set_reg_val(id, *val); 893 + break; 894 + case KVM_REG_PPC_ARCH_COMPAT: 895 + r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 896 + break; 965 897 default: 966 898 r = -EINVAL; 967 899 break; ··· 984 888 return r; 985 889 } 986 890 987 - int kvmppc_core_check_processor_compat(void) 988 - { 989 - if (cpu_has_feature(CPU_FTR_HVMODE)) 990 - return 0; 991 - return -EIO; 992 - } 993 - 994 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 891 + static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, 892 + unsigned int id) 995 893 { 996 894 struct kvm_vcpu *vcpu; 997 895 int err = -EINVAL; ··· 1009 919 vcpu->arch.mmcr[0] = MMCR0_FC; 1010 920 vcpu->arch.ctrl = CTRL_RUNLATCH; 1011 921 /* default to host PVR, since we can't spoof it */ 1012 - vcpu->arch.pvr = mfspr(SPRN_PVR); 1013 - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 922 + kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); 1014 923 spin_lock_init(&vcpu->arch.vpa_update_lock); 1015 924 spin_lock_init(&vcpu->arch.tbacct_lock); 1016 925 vcpu->arch.busy_preempt = TB_NIL; ··· 1029 940 spin_lock_init(&vcore->lock); 1030 941 init_waitqueue_head(&vcore->wq); 1031 942 vcore->preempt_tb = TB_NIL; 943 + vcore->lpcr = kvm->arch.lpcr; 1032 944 } 1033 945 kvm->arch.vcores[core] = vcore; 1034 946 kvm->arch.online_vcores++; ··· 1062 972 vpa->dirty); 1063 973 } 1064 974 1065 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 975 + static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) 1066 976 { 1067 977 spin_lock(&vcpu->arch.vpa_update_lock); 1068 978 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); ··· 1071 981 spin_unlock(&vcpu->arch.vpa_update_lock); 1072 982 kvm_vcpu_uninit(vcpu); 1073 983 kmem_cache_free(kvm_vcpu_cache, vcpu); 984 + } 985 + 986 + static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) 987 + { 988 + /* Indicate we want to get back into the guest */ 989 + return 1; 1074 990 } 1075 991 1076 992 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) ··· 1360 1264 1361 1265 ret = RESUME_GUEST; 1362 1266 if (vcpu->arch.trap) 1363 - ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 1364 - vcpu->arch.run_task); 1267 + ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, 1268 + vcpu->arch.run_task); 1365 1269 1366 1270 vcpu->arch.ret = ret; 1367 1271 vcpu->arch.trap = 0; ··· 1520 1424 return vcpu->arch.ret; 1521 1425 } 1522 1426 1523 - int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1427 + static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) 1524 1428 { 1525 1429 int r; 1526 1430 int srcu_idx; ··· 1642 1546 .release = kvm_rma_release, 1643 1547 }; 1644 1548 1645 - long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1549 + static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, 1550 + struct kvm_allocate_rma *ret) 1646 1551 { 1647 1552 long fd; 1648 1553 struct kvm_rma_info *ri; ··· 1689 1592 (*sps)++; 1690 1593 } 1691 1594 1692 - int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1595 + static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, 1596 + struct kvm_ppc_smmu_info *info) 1693 1597 { 1694 1598 struct kvm_ppc_one_seg_page_size *sps; 1695 1599 ··· 1711 1613 /* 1712 1614 * Get (and clear) the dirty memory log for a memory slot. 1713 1615 */ 1714 - int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1616 + static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, 1617 + struct kvm_dirty_log *log) 1715 1618 { 1716 1619 struct kvm_memory_slot *memslot; 1717 1620 int r; ··· 1766 1667 } 1767 1668 } 1768 1669 1769 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1770 - struct kvm_memory_slot *dont) 1670 + static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, 1671 + struct kvm_memory_slot *dont) 1771 1672 { 1772 1673 if (!dont || free->arch.rmap != dont->arch.rmap) { 1773 1674 vfree(free->arch.rmap); ··· 1780 1681 } 1781 1682 } 1782 1683 1783 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1784 - unsigned long npages) 1684 + static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 1685 + unsigned long npages) 1785 1686 { 1786 1687 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 1787 1688 if (!slot->arch.rmap) ··· 1791 1692 return 0; 1792 1693 } 1793 1694 1794 - int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1795 - struct kvm_memory_slot *memslot, 1796 - struct kvm_userspace_memory_region *mem) 1695 + static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, 1696 + struct kvm_memory_slot *memslot, 1697 + struct kvm_userspace_memory_region *mem) 1797 1698 { 1798 1699 unsigned long *phys; 1799 1700 ··· 1809 1710 return 0; 1810 1711 } 1811 1712 1812 - void kvmppc_core_commit_memory_region(struct kvm *kvm, 1813 - struct kvm_userspace_memory_region *mem, 1814 - const struct kvm_memory_slot *old) 1713 + static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, 1714 + struct kvm_userspace_memory_region *mem, 1715 + const struct kvm_memory_slot *old) 1815 1716 { 1816 1717 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1817 1718 struct kvm_memory_slot *memslot; ··· 1828 1729 } 1829 1730 } 1830 1731 1732 + /* 1733 + * Update LPCR values in kvm->arch and in vcores. 1734 + * Caller must hold kvm->lock. 1735 + */ 1736 + void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) 1737 + { 1738 + long int i; 1739 + u32 cores_done = 0; 1740 + 1741 + if ((kvm->arch.lpcr & mask) == lpcr) 1742 + return; 1743 + 1744 + kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr; 1745 + 1746 + for (i = 0; i < KVM_MAX_VCORES; ++i) { 1747 + struct kvmppc_vcore *vc = kvm->arch.vcores[i]; 1748 + if (!vc) 1749 + continue; 1750 + spin_lock(&vc->lock); 1751 + vc->lpcr = (vc->lpcr & ~mask) | lpcr; 1752 + spin_unlock(&vc->lock); 1753 + if (++cores_done >= kvm->arch.online_vcores) 1754 + break; 1755 + } 1756 + } 1757 + 1758 + static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) 1759 + { 1760 + return; 1761 + } 1762 + 1831 1763 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1832 1764 { 1833 1765 int err = 0; ··· 1867 1737 unsigned long hva; 1868 1738 struct kvm_memory_slot *memslot; 1869 1739 struct vm_area_struct *vma; 1870 - unsigned long lpcr, senc; 1740 + unsigned long lpcr = 0, senc; 1741 + unsigned long lpcr_mask = 0; 1871 1742 unsigned long psize, porder; 1872 1743 unsigned long rma_size; 1873 1744 unsigned long rmls; ··· 1933 1802 senc = slb_pgsize_encoding(psize); 1934 1803 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1935 1804 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1936 - lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1937 - lpcr |= senc << (LPCR_VRMASD_SH - 4); 1938 - kvm->arch.lpcr = lpcr; 1805 + lpcr_mask = LPCR_VRMASD; 1806 + /* the -4 is to account for senc values starting at 0x10 */ 1807 + lpcr = senc << (LPCR_VRMASD_SH - 4); 1939 1808 1940 1809 /* Create HPTEs in the hash page table for the VRMA */ 1941 1810 kvmppc_map_vrma(vcpu, memslot, porder); ··· 1956 1825 kvm->arch.rma = ri; 1957 1826 1958 1827 /* Update LPCR and RMOR */ 1959 - lpcr = kvm->arch.lpcr; 1960 1828 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1961 1829 /* PPC970; insert RMLS value (split field) in HID4 */ 1962 - lpcr &= ~((1ul << HID4_RMLS0_SH) | 1963 - (3ul << HID4_RMLS2_SH)); 1964 - lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1830 + lpcr_mask = (1ul << HID4_RMLS0_SH) | 1831 + (3ul << HID4_RMLS2_SH) | HID4_RMOR; 1832 + lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | 1965 1833 ((rmls & 3) << HID4_RMLS2_SH); 1966 1834 /* RMOR is also in HID4 */ 1967 1835 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1968 1836 << HID4_RMOR_SH; 1969 1837 } else { 1970 1838 /* POWER7 */ 1971 - lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1972 - lpcr |= rmls << LPCR_RMLS_SH; 1839 + lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; 1840 + lpcr = rmls << LPCR_RMLS_SH; 1973 1841 kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; 1974 1842 } 1975 - kvm->arch.lpcr = lpcr; 1976 1843 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1977 1844 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1978 1845 ··· 1989 1860 } 1990 1861 } 1991 1862 1863 + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); 1864 + 1992 1865 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1993 1866 smp_wmb(); 1994 1867 kvm->arch.rma_setup_done = 1; ··· 2006 1875 goto out_srcu; 2007 1876 } 2008 1877 2009 - int kvmppc_core_init_vm(struct kvm *kvm) 1878 + static int kvmppc_core_init_vm_hv(struct kvm *kvm) 2010 1879 { 2011 1880 unsigned long lpcr, lpid; 2012 1881 ··· 2023 1892 * make sure we flush on each core before running the new VM. 2024 1893 */ 2025 1894 cpumask_setall(&kvm->arch.need_tlb_flush); 2026 - 2027 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 2028 - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 2029 1895 2030 1896 kvm->arch.rma = NULL; 2031 1897 ··· 2059 1931 return 0; 2060 1932 } 2061 1933 2062 - void kvmppc_core_destroy_vm(struct kvm *kvm) 1934 + static void kvmppc_free_vcores(struct kvm *kvm) 1935 + { 1936 + long int i; 1937 + 1938 + for (i = 0; i < KVM_MAX_VCORES; ++i) 1939 + kfree(kvm->arch.vcores[i]); 1940 + kvm->arch.online_vcores = 0; 1941 + } 1942 + 1943 + static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) 2063 1944 { 2064 1945 uninhibit_secondary_onlining(); 2065 1946 1947 + kvmppc_free_vcores(kvm); 2066 1948 if (kvm->arch.rma) { 2067 1949 kvm_release_rma(kvm->arch.rma); 2068 1950 kvm->arch.rma = NULL; 2069 1951 } 2070 1952 2071 - kvmppc_rtas_tokens_free(kvm); 2072 - 2073 1953 kvmppc_free_hpt(kvm); 2074 - WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 2075 - } 2076 - 2077 - /* These are stubs for now */ 2078 - void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 2079 - { 2080 1954 } 2081 1955 2082 1956 /* We don't need to emulate any privileged instructions or dcbz */ 2083 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 2084 - unsigned int inst, int *advance) 1957 + static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 1958 + unsigned int inst, int *advance) 2085 1959 { 2086 1960 return EMULATE_FAIL; 2087 1961 } 2088 1962 2089 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 1963 + static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn, 1964 + ulong spr_val) 2090 1965 { 2091 1966 return EMULATE_FAIL; 2092 1967 } 2093 1968 2094 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 1969 + static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, 1970 + ulong *spr_val) 2095 1971 { 2096 1972 return EMULATE_FAIL; 2097 1973 } 2098 1974 2099 - static int kvmppc_book3s_hv_init(void) 1975 + static int kvmppc_core_check_processor_compat_hv(void) 2100 1976 { 2101 - int r; 1977 + if (!cpu_has_feature(CPU_FTR_HVMODE)) 1978 + return -EIO; 1979 + return 0; 1980 + } 2102 1981 2103 - r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1982 + static long kvm_arch_vm_ioctl_hv(struct file *filp, 1983 + unsigned int ioctl, unsigned long arg) 1984 + { 1985 + struct kvm *kvm __maybe_unused = filp->private_data; 1986 + void __user *argp = (void __user *)arg; 1987 + long r; 2104 1988 2105 - if (r) 2106 - return r; 1989 + switch (ioctl) { 2107 1990 2108 - r = kvmppc_mmu_hv_init(); 1991 + case KVM_ALLOCATE_RMA: { 1992 + struct kvm_allocate_rma rma; 1993 + struct kvm *kvm = filp->private_data; 1994 + 1995 + r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 1996 + if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 1997 + r = -EFAULT; 1998 + break; 1999 + } 2000 + 2001 + case KVM_PPC_ALLOCATE_HTAB: { 2002 + u32 htab_order; 2003 + 2004 + r = -EFAULT; 2005 + if (get_user(htab_order, (u32 __user *)argp)) 2006 + break; 2007 + r = kvmppc_alloc_reset_hpt(kvm, &htab_order); 2008 + if (r) 2009 + break; 2010 + r = -EFAULT; 2011 + if (put_user(htab_order, (u32 __user *)argp)) 2012 + break; 2013 + r = 0; 2014 + break; 2015 + } 2016 + 2017 + case KVM_PPC_GET_HTAB_FD: { 2018 + struct kvm_get_htab_fd ghf; 2019 + 2020 + r = -EFAULT; 2021 + if (copy_from_user(&ghf, argp, sizeof(ghf))) 2022 + break; 2023 + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); 2024 + break; 2025 + } 2026 + 2027 + default: 2028 + r = -ENOTTY; 2029 + } 2109 2030 2110 2031 return r; 2111 2032 } 2112 2033 2113 - static void kvmppc_book3s_hv_exit(void) 2034 + static struct kvmppc_ops kvm_ops_hv = { 2035 + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 2036 + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, 2037 + .get_one_reg = kvmppc_get_one_reg_hv, 2038 + .set_one_reg = kvmppc_set_one_reg_hv, 2039 + .vcpu_load = kvmppc_core_vcpu_load_hv, 2040 + .vcpu_put = kvmppc_core_vcpu_put_hv, 2041 + .set_msr = kvmppc_set_msr_hv, 2042 + .vcpu_run = kvmppc_vcpu_run_hv, 2043 + .vcpu_create = kvmppc_core_vcpu_create_hv, 2044 + .vcpu_free = kvmppc_core_vcpu_free_hv, 2045 + .check_requests = kvmppc_core_check_requests_hv, 2046 + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv, 2047 + .flush_memslot = kvmppc_core_flush_memslot_hv, 2048 + .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, 2049 + .commit_memory_region = kvmppc_core_commit_memory_region_hv, 2050 + .unmap_hva = kvm_unmap_hva_hv, 2051 + .unmap_hva_range = kvm_unmap_hva_range_hv, 2052 + .age_hva = kvm_age_hva_hv, 2053 + .test_age_hva = kvm_test_age_hva_hv, 2054 + .set_spte_hva = kvm_set_spte_hva_hv, 2055 + .mmu_destroy = kvmppc_mmu_destroy_hv, 2056 + .free_memslot = kvmppc_core_free_memslot_hv, 2057 + .create_memslot = kvmppc_core_create_memslot_hv, 2058 + .init_vm = kvmppc_core_init_vm_hv, 2059 + .destroy_vm = kvmppc_core_destroy_vm_hv, 2060 + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, 2061 + .emulate_op = kvmppc_core_emulate_op_hv, 2062 + .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, 2063 + .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, 2064 + .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, 2065 + .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, 2066 + }; 2067 + 2068 + static int kvmppc_book3s_init_hv(void) 2114 2069 { 2115 - kvm_exit(); 2070 + int r; 2071 + /* 2072 + * FIXME!! Do we need to check on all cpus ? 2073 + */ 2074 + r = kvmppc_core_check_processor_compat_hv(); 2075 + if (r < 0) 2076 + return r; 2077 + 2078 + kvm_ops_hv.owner = THIS_MODULE; 2079 + kvmppc_hv_ops = &kvm_ops_hv; 2080 + 2081 + r = kvmppc_mmu_hv_init(); 2082 + return r; 2116 2083 } 2117 2084 2118 - module_init(kvmppc_book3s_hv_init); 2119 - module_exit(kvmppc_book3s_hv_exit); 2085 + static void kvmppc_book3s_exit_hv(void) 2086 + { 2087 + kvmppc_hv_ops = NULL; 2088 + } 2089 + 2090 + module_init(kvmppc_book3s_init_hv); 2091 + module_exit(kvmppc_book3s_exit_hv); 2092 + MODULE_LICENSE("GPL");
-3
arch/powerpc/kvm/book3s_hv_interrupts.S
··· 158 158 * Interrupts are enabled again at this point. 159 159 */ 160 160 161 - .global kvmppc_handler_highmem 162 - kvmppc_handler_highmem: 163 - 164 161 /* 165 162 * Register usage at this point: 166 163 *
+356 -262
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 33 33 #error Need to fix lppaca and SLB shadow accesses in little endian mode 34 34 #endif 35 35 36 - /***************************************************************************** 37 - * * 38 - * Real Mode handlers that need to be in the linear mapping * 39 - * * 40 - ****************************************************************************/ 41 - 42 - .globl kvmppc_skip_interrupt 43 - kvmppc_skip_interrupt: 44 - mfspr r13,SPRN_SRR0 45 - addi r13,r13,4 46 - mtspr SPRN_SRR0,r13 47 - GET_SCRATCH0(r13) 48 - rfid 49 - b . 50 - 51 - .globl kvmppc_skip_Hinterrupt 52 - kvmppc_skip_Hinterrupt: 53 - mfspr r13,SPRN_HSRR0 54 - addi r13,r13,4 55 - mtspr SPRN_HSRR0,r13 56 - GET_SCRATCH0(r13) 57 - hrfid 58 - b . 59 - 60 36 /* 61 37 * Call kvmppc_hv_entry in real mode. 62 38 * Must be called with interrupts hard-disabled. ··· 42 66 * LR = return address to continue at after eventually re-enabling MMU 43 67 */ 44 68 _GLOBAL(kvmppc_hv_entry_trampoline) 69 + mflr r0 70 + std r0, PPC_LR_STKOFF(r1) 71 + stdu r1, -112(r1) 45 72 mfmsr r10 46 - LOAD_REG_ADDR(r5, kvmppc_hv_entry) 73 + LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) 47 74 li r0,MSR_RI 48 75 andc r0,r10,r0 49 76 li r6,MSR_IR | MSR_DR ··· 56 77 mtsrr1 r6 57 78 RFI 58 79 59 - /****************************************************************************** 60 - * * 61 - * Entry code * 62 - * * 63 - *****************************************************************************/ 80 + kvmppc_call_hv_entry: 81 + bl kvmppc_hv_entry 82 + 83 + /* Back from guest - restore host state and return to caller */ 84 + 85 + /* Restore host DABR and DABRX */ 86 + ld r5,HSTATE_DABR(r13) 87 + li r6,7 88 + mtspr SPRN_DABR,r5 89 + mtspr SPRN_DABRX,r6 90 + 91 + /* Restore SPRG3 */ 92 + ld r3,PACA_SPRG3(r13) 93 + mtspr SPRN_SPRG3,r3 94 + 95 + /* 96 + * Reload DEC. HDEC interrupts were disabled when 97 + * we reloaded the host's LPCR value. 98 + */ 99 + ld r3, HSTATE_DECEXP(r13) 100 + mftb r4 101 + subf r4, r4, r3 102 + mtspr SPRN_DEC, r4 103 + 104 + /* Reload the host's PMU registers */ 105 + ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 106 + lbz r4, LPPACA_PMCINUSE(r3) 107 + cmpwi r4, 0 108 + beq 23f /* skip if not */ 109 + lwz r3, HSTATE_PMC(r13) 110 + lwz r4, HSTATE_PMC + 4(r13) 111 + lwz r5, HSTATE_PMC + 8(r13) 112 + lwz r6, HSTATE_PMC + 12(r13) 113 + lwz r8, HSTATE_PMC + 16(r13) 114 + lwz r9, HSTATE_PMC + 20(r13) 115 + BEGIN_FTR_SECTION 116 + lwz r10, HSTATE_PMC + 24(r13) 117 + lwz r11, HSTATE_PMC + 28(r13) 118 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 119 + mtspr SPRN_PMC1, r3 120 + mtspr SPRN_PMC2, r4 121 + mtspr SPRN_PMC3, r5 122 + mtspr SPRN_PMC4, r6 123 + mtspr SPRN_PMC5, r8 124 + mtspr SPRN_PMC6, r9 125 + BEGIN_FTR_SECTION 126 + mtspr SPRN_PMC7, r10 127 + mtspr SPRN_PMC8, r11 128 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 129 + ld r3, HSTATE_MMCR(r13) 130 + ld r4, HSTATE_MMCR + 8(r13) 131 + ld r5, HSTATE_MMCR + 16(r13) 132 + mtspr SPRN_MMCR1, r4 133 + mtspr SPRN_MMCRA, r5 134 + mtspr SPRN_MMCR0, r3 135 + isync 136 + 23: 137 + 138 + /* 139 + * For external and machine check interrupts, we need 140 + * to call the Linux handler to process the interrupt. 141 + * We do that by jumping to absolute address 0x500 for 142 + * external interrupts, or the machine_check_fwnmi label 143 + * for machine checks (since firmware might have patched 144 + * the vector area at 0x200). The [h]rfid at the end of the 145 + * handler will return to the book3s_hv_interrupts.S code. 146 + * For other interrupts we do the rfid to get back 147 + * to the book3s_hv_interrupts.S code here. 148 + */ 149 + ld r8, 112+PPC_LR_STKOFF(r1) 150 + addi r1, r1, 112 151 + ld r7, HSTATE_HOST_MSR(r13) 152 + 153 + cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 154 + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 155 + BEGIN_FTR_SECTION 156 + beq 11f 157 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 158 + 159 + /* RFI into the highmem handler, or branch to interrupt handler */ 160 + mfmsr r6 161 + li r0, MSR_RI 162 + andc r6, r6, r0 163 + mtmsrd r6, 1 /* Clear RI in MSR */ 164 + mtsrr0 r8 165 + mtsrr1 r7 166 + beqa 0x500 /* external interrupt (PPC970) */ 167 + beq cr1, 13f /* machine check */ 168 + RFI 169 + 170 + /* On POWER7, we have external interrupts set to use HSRR0/1 */ 171 + 11: mtspr SPRN_HSRR0, r8 172 + mtspr SPRN_HSRR1, r7 173 + ba 0x500 174 + 175 + 13: b machine_check_fwnmi 176 + 64 177 65 178 /* 66 179 * We come in here when wakened from nap mode on a secondary hw thread. ··· 208 137 cmpdi r4,0 209 138 /* if we have no vcpu to run, go back to sleep */ 210 139 beq kvm_no_guest 211 - b kvmppc_hv_entry 140 + b 30f 212 141 213 142 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ 214 143 b kvm_no_guest ··· 217 146 29: /* External non-IPI interrupt to offline secondary thread? help?? */ 218 147 stw r8,HSTATE_SAVED_XIRR(r13) 219 148 b kvm_no_guest 149 + 150 + 30: bl kvmppc_hv_entry 151 + 152 + /* Back from the guest, go back to nap */ 153 + /* Clear our vcpu pointer so we don't come back in early */ 154 + li r0, 0 155 + std r0, HSTATE_KVM_VCPU(r13) 156 + lwsync 157 + /* Clear any pending IPI - we're an offline thread */ 158 + ld r5, HSTATE_XICS_PHYS(r13) 159 + li r7, XICS_XIRR 160 + lwzcix r3, r5, r7 /* ack any pending interrupt */ 161 + rlwinm. r0, r3, 0, 0xffffff /* any pending? */ 162 + beq 37f 163 + sync 164 + li r0, 0xff 165 + li r6, XICS_MFRR 166 + stbcix r0, r5, r6 /* clear the IPI */ 167 + stwcix r3, r5, r7 /* EOI it */ 168 + 37: sync 169 + 170 + /* increment the nap count and then go to nap mode */ 171 + ld r4, HSTATE_KVM_VCORE(r13) 172 + addi r4, r4, VCORE_NAP_COUNT 173 + lwsync /* make previous updates visible */ 174 + 51: lwarx r3, 0, r4 175 + addi r3, r3, 1 176 + stwcx. r3, 0, r4 177 + bne 51b 178 + 179 + kvm_no_guest: 180 + li r0, KVM_HWTHREAD_IN_NAP 181 + stb r0, HSTATE_HWTHREAD_STATE(r13) 182 + li r3, LPCR_PECE0 183 + mfspr r4, SPRN_LPCR 184 + rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 185 + mtspr SPRN_LPCR, r4 186 + isync 187 + std r0, HSTATE_SCRATCH0(r13) 188 + ptesync 189 + ld r0, HSTATE_SCRATCH0(r13) 190 + 1: cmpd r0, r0 191 + bne 1b 192 + nap 193 + b . 194 + 195 + /****************************************************************************** 196 + * * 197 + * Entry code * 198 + * * 199 + *****************************************************************************/ 220 200 221 201 .global kvmppc_hv_entry 222 202 kvmppc_hv_entry: ··· 281 159 * all other volatile GPRS = free 282 160 */ 283 161 mflr r0 284 - std r0, HSTATE_VMHANDLER(r13) 162 + std r0, PPC_LR_STKOFF(r1) 163 + stdu r1, -112(r1) 285 164 286 165 /* Set partition DABR */ 287 166 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ ··· 323 200 ld r3, VCPU_MMCR(r4) 324 201 ld r5, VCPU_MMCR + 8(r4) 325 202 ld r6, VCPU_MMCR + 16(r4) 203 + ld r7, VCPU_SIAR(r4) 204 + ld r8, VCPU_SDAR(r4) 326 205 mtspr SPRN_MMCR1, r5 327 206 mtspr SPRN_MMCRA, r6 207 + mtspr SPRN_SIAR, r7 208 + mtspr SPRN_SDAR, r8 328 209 mtspr SPRN_MMCR0, r3 329 210 isync 330 211 ··· 381 254 /* Save R1 in the PACA */ 382 255 std r1, HSTATE_HOST_R1(r13) 383 256 384 - /* Increment yield count if they have a VPA */ 385 - ld r3, VCPU_VPA(r4) 386 - cmpdi r3, 0 387 - beq 25f 388 - lwz r5, LPPACA_YIELDCOUNT(r3) 389 - addi r5, r5, 1 390 - stw r5, LPPACA_YIELDCOUNT(r3) 391 - li r6, 1 392 - stb r6, VCPU_VPA_DIRTY(r4) 393 - 25: 394 257 /* Load up DAR and DSISR */ 395 258 ld r5, VCPU_DAR(r4) 396 259 lwz r6, VCPU_DSISR(r4) 397 260 mtspr SPRN_DAR, r5 398 261 mtspr SPRN_DSISR, r6 262 + 263 + li r6, KVM_GUEST_MODE_HOST_HV 264 + stb r6, HSTATE_IN_GUEST(r13) 399 265 400 266 BEGIN_FTR_SECTION 401 267 /* Restore AMR and UAMOR, set AMOR to all 1s */ ··· 463 343 bdnz 28b 464 344 ptesync 465 345 466 - 22: li r0,1 346 + /* Add timebase offset onto timebase */ 347 + 22: ld r8,VCORE_TB_OFFSET(r5) 348 + cmpdi r8,0 349 + beq 37f 350 + mftb r6 /* current host timebase */ 351 + add r8,r8,r6 352 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 353 + mftb r7 /* check if lower 24 bits overflowed */ 354 + clrldi r6,r6,40 355 + clrldi r7,r7,40 356 + cmpld r7,r6 357 + bge 37f 358 + addis r8,r8,0x100 /* if so, increment upper 40 bits */ 359 + mtspr SPRN_TBU40,r8 360 + 361 + /* Load guest PCR value to select appropriate compat mode */ 362 + 37: ld r7, VCORE_PCR(r5) 363 + cmpdi r7, 0 364 + beq 38f 365 + mtspr SPRN_PCR, r7 366 + 38: 367 + li r0,1 467 368 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 468 369 b 10f 469 370 ··· 494 353 beq 20b 495 354 496 355 /* Set LPCR and RMOR. */ 497 - 10: ld r8,KVM_LPCR(r9) 356 + 10: ld r8,VCORE_LPCR(r5) 498 357 mtspr SPRN_LPCR,r8 499 358 ld r8,KVM_RMOR(r9) 500 359 mtspr SPRN_RMOR,r8 501 360 isync 502 361 362 + /* Increment yield count if they have a VPA */ 363 + ld r3, VCPU_VPA(r4) 364 + cmpdi r3, 0 365 + beq 25f 366 + lwz r5, LPPACA_YIELDCOUNT(r3) 367 + addi r5, r5, 1 368 + stw r5, LPPACA_YIELDCOUNT(r3) 369 + li r6, 1 370 + stb r6, VCPU_VPA_DIRTY(r4) 371 + 25: 503 372 /* Check if HDEC expires soon */ 504 373 mfspr r3,SPRN_HDEC 505 374 cmpwi r3,10 ··· 556 405 bne 24b 557 406 isync 558 407 559 - ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ 408 + ld r5,HSTATE_KVM_VCORE(r13) 409 + ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ 560 410 li r0,0x18f 561 411 rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ 562 412 or r0,r7,r0 ··· 693 541 mtspr SPRN_HSRR1,r11 694 542 695 543 /* Activate guest mode, so faults get handled by KVM */ 696 - li r9, KVM_GUEST_MODE_GUEST 544 + li r9, KVM_GUEST_MODE_GUEST_HV 697 545 stb r9, HSTATE_IN_GUEST(r13) 698 546 699 547 /* Enter guest */ ··· 702 550 ld r5, VCPU_CFAR(r4) 703 551 mtspr SPRN_CFAR, r5 704 552 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 553 + BEGIN_FTR_SECTION 554 + ld r0, VCPU_PPR(r4) 555 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 705 556 706 557 ld r5, VCPU_LR(r4) 707 558 lwz r6, VCPU_CR(r4) 708 559 mtlr r5 709 560 mtcr r6 710 561 711 - ld r0, VCPU_GPR(R0)(r4) 712 562 ld r1, VCPU_GPR(R1)(r4) 713 563 ld r2, VCPU_GPR(R2)(r4) 714 564 ld r3, VCPU_GPR(R3)(r4) ··· 724 570 ld r12, VCPU_GPR(R12)(r4) 725 571 ld r13, VCPU_GPR(R13)(r4) 726 572 573 + BEGIN_FTR_SECTION 574 + mtspr SPRN_PPR, r0 575 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 576 + ld r0, VCPU_GPR(R0)(r4) 727 577 ld r4, VCPU_GPR(R4)(r4) 728 578 729 579 hrfid ··· 742 584 /* 743 585 * We come here from the first-level interrupt handlers. 744 586 */ 745 - .globl kvmppc_interrupt 746 - kvmppc_interrupt: 587 + .globl kvmppc_interrupt_hv 588 + kvmppc_interrupt_hv: 747 589 /* 748 590 * Register contents: 749 591 * R12 = interrupt vector ··· 753 595 */ 754 596 /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ 755 597 std r9, HSTATE_HOST_R2(r13) 598 + 599 + lbz r9, HSTATE_IN_GUEST(r13) 600 + cmpwi r9, KVM_GUEST_MODE_HOST_HV 601 + beq kvmppc_bad_host_intr 602 + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 603 + cmpwi r9, KVM_GUEST_MODE_GUEST 604 + ld r9, HSTATE_HOST_R2(r13) 605 + beq kvmppc_interrupt_pr 606 + #endif 607 + /* We're now back in the host but in guest MMU context */ 608 + li r9, KVM_GUEST_MODE_HOST_HV 609 + stb r9, HSTATE_IN_GUEST(r13) 610 + 756 611 ld r9, HSTATE_KVM_VCPU(r13) 757 612 758 613 /* Save registers */ ··· 791 620 ld r3, HSTATE_CFAR(r13) 792 621 std r3, VCPU_CFAR(r9) 793 622 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 623 + BEGIN_FTR_SECTION 624 + ld r4, HSTATE_PPR(r13) 625 + std r4, VCPU_PPR(r9) 626 + END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 794 627 795 628 /* Restore R1/R2 so we can handle faults */ 796 629 ld r1, HSTATE_HOST_R1(r13) ··· 816 641 mflr r4 817 642 std r3, VCPU_GPR(R13)(r9) 818 643 std r4, VCPU_LR(r9) 819 - 820 - /* Unset guest mode */ 821 - li r0, KVM_GUEST_MODE_NONE 822 - stb r0, HSTATE_IN_GUEST(r13) 823 644 824 645 stw r12,VCPU_TRAP(r9) 825 646 ··· 867 696 * set, we know the host wants us out so let's do it now 868 697 */ 869 698 do_ext_interrupt: 870 - lbz r0, HSTATE_HOST_IPI(r13) 871 - cmpwi r0, 0 872 - bne ext_interrupt_to_host 873 - 874 - /* Now read the interrupt from the ICP */ 875 - ld r5, HSTATE_XICS_PHYS(r13) 876 - li r7, XICS_XIRR 877 - cmpdi r5, 0 878 - beq- ext_interrupt_to_host 879 - lwzcix r3, r5, r7 880 - rlwinm. r0, r3, 0, 0xffffff 881 - sync 882 - beq 3f /* if nothing pending in the ICP */ 883 - 884 - /* We found something in the ICP... 885 - * 886 - * If it's not an IPI, stash it in the PACA and return to 887 - * the host, we don't (yet) handle directing real external 888 - * interrupts directly to the guest 889 - */ 890 - cmpwi r0, XICS_IPI 891 - bne ext_stash_for_host 892 - 893 - /* It's an IPI, clear the MFRR and EOI it */ 894 - li r0, 0xff 895 - li r6, XICS_MFRR 896 - stbcix r0, r5, r6 /* clear the IPI */ 897 - stwcix r3, r5, r7 /* EOI it */ 898 - sync 899 - 900 - /* We need to re-check host IPI now in case it got set in the 901 - * meantime. If it's clear, we bounce the interrupt to the 902 - * guest 903 - */ 904 - lbz r0, HSTATE_HOST_IPI(r13) 905 - cmpwi r0, 0 906 - bne- 1f 699 + bl kvmppc_read_intr 700 + cmpdi r3, 0 701 + bgt ext_interrupt_to_host 907 702 908 703 /* Allright, looks like an IPI for the guest, we need to set MER */ 909 - 3: 910 704 /* Check if any CPU is heading out to the host, if so head out too */ 911 705 ld r5, HSTATE_KVM_VCORE(r13) 912 706 lwz r0, VCORE_ENTRY_EXIT(r5) ··· 900 764 mtspr SPRN_LPCR, r8 901 765 b fast_guest_return 902 766 903 - /* We raced with the host, we need to resend that IPI, bummer */ 904 - 1: li r0, IPI_PRIORITY 905 - stbcix r0, r5, r6 /* set the IPI */ 906 - sync 907 - b ext_interrupt_to_host 908 - 909 - ext_stash_for_host: 910 - /* It's not an IPI and it's for the host, stash it in the PACA 911 - * before exit, it will be picked up by the host ICP driver 912 - */ 913 - stw r3, HSTATE_SAVED_XIRR(r13) 914 767 ext_interrupt_to_host: 915 768 916 769 guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 917 - /* Save DEC */ 918 - mfspr r5,SPRN_DEC 919 - mftb r6 920 - extsw r5,r5 921 - add r5,r5,r6 922 - std r5,VCPU_DEC_EXPIRES(r9) 923 - 924 770 /* Save more register state */ 925 771 mfdar r6 926 772 mfdsisr r7 ··· 1072 954 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 1073 955 mtspr SPRN_LPID,r7 1074 956 isync 1075 - li r0,0 957 + 958 + /* Subtract timebase offset from timebase */ 959 + ld r8,VCORE_TB_OFFSET(r5) 960 + cmpdi r8,0 961 + beq 17f 962 + mftb r6 /* current host timebase */ 963 + subf r8,r8,r6 964 + mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 965 + mftb r7 /* check if lower 24 bits overflowed */ 966 + clrldi r6,r6,40 967 + clrldi r7,r7,40 968 + cmpld r7,r6 969 + bge 17f 970 + addis r8,r8,0x100 /* if so, increment upper 40 bits */ 971 + mtspr SPRN_TBU40,r8 972 + 973 + /* Reset PCR */ 974 + 17: ld r0, VCORE_PCR(r5) 975 + cmpdi r0, 0 976 + beq 18f 977 + li r0, 0 978 + mtspr SPRN_PCR, r0 979 + 18: 980 + /* Signal secondary CPUs to continue */ 1076 981 stb r0,VCORE_IN_GUEST(r5) 1077 982 lis r8,0x7fff /* MAX_INT@h */ 1078 983 mtspr SPRN_HDEC,r8 ··· 1193 1052 1: addi r8,r8,16 1194 1053 .endr 1195 1054 1055 + /* Save DEC */ 1056 + mfspr r5,SPRN_DEC 1057 + mftb r6 1058 + extsw r5,r5 1059 + add r5,r5,r6 1060 + std r5,VCPU_DEC_EXPIRES(r9) 1061 + 1196 1062 /* Save and reset AMR and UAMOR before turning on the MMU */ 1197 1063 BEGIN_FTR_SECTION 1198 1064 mfspr r5,SPRN_AMR ··· 1209 1061 li r6,0 1210 1062 mtspr SPRN_AMR,r6 1211 1063 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1064 + 1065 + /* Unset guest mode */ 1066 + li r0, KVM_GUEST_MODE_NONE 1067 + stb r0, HSTATE_IN_GUEST(r13) 1212 1068 1213 1069 /* Switch DSCR back to host value */ 1214 1070 BEGIN_FTR_SECTION ··· 1286 1134 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 1287 1135 b 22f 1288 1136 21: mfspr r5, SPRN_MMCR1 1137 + mfspr r7, SPRN_SIAR 1138 + mfspr r8, SPRN_SDAR 1289 1139 std r4, VCPU_MMCR(r9) 1290 1140 std r5, VCPU_MMCR + 8(r9) 1291 1141 std r6, VCPU_MMCR + 16(r9) 1142 + std r7, VCPU_SIAR(r9) 1143 + std r8, VCPU_SDAR(r9) 1292 1144 mfspr r3, SPRN_PMC1 1293 1145 mfspr r4, SPRN_PMC2 1294 1146 mfspr r5, SPRN_PMC3 ··· 1314 1158 stw r11, VCPU_PMC + 28(r9) 1315 1159 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1316 1160 22: 1161 + ld r0, 112+PPC_LR_STKOFF(r1) 1162 + addi r1, r1, 112 1163 + mtlr r0 1164 + blr 1165 + secondary_too_late: 1166 + ld r5,HSTATE_KVM_VCORE(r13) 1167 + HMT_LOW 1168 + 13: lbz r3,VCORE_IN_GUEST(r5) 1169 + cmpwi r3,0 1170 + bne 13b 1171 + HMT_MEDIUM 1172 + li r0, KVM_GUEST_MODE_NONE 1173 + stb r0, HSTATE_IN_GUEST(r13) 1174 + ld r11,PACA_SLBSHADOWPTR(r13) 1317 1175 1318 - /* Secondary threads go off to take a nap on POWER7 */ 1319 - BEGIN_FTR_SECTION 1320 - lwz r0,VCPU_PTID(r9) 1321 - cmpwi r0,0 1322 - bne secondary_nap 1323 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1324 - 1325 - /* Restore host DABR and DABRX */ 1326 - ld r5,HSTATE_DABR(r13) 1327 - li r6,7 1328 - mtspr SPRN_DABR,r5 1329 - mtspr SPRN_DABRX,r6 1330 - 1331 - /* Restore SPRG3 */ 1332 - ld r3,PACA_SPRG3(r13) 1333 - mtspr SPRN_SPRG3,r3 1334 - 1335 - /* 1336 - * Reload DEC. HDEC interrupts were disabled when 1337 - * we reloaded the host's LPCR value. 1338 - */ 1339 - ld r3, HSTATE_DECEXP(r13) 1340 - mftb r4 1341 - subf r4, r4, r3 1342 - mtspr SPRN_DEC, r4 1343 - 1344 - /* Reload the host's PMU registers */ 1345 - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 1346 - lbz r4, LPPACA_PMCINUSE(r3) 1347 - cmpwi r4, 0 1348 - beq 23f /* skip if not */ 1349 - lwz r3, HSTATE_PMC(r13) 1350 - lwz r4, HSTATE_PMC + 4(r13) 1351 - lwz r5, HSTATE_PMC + 8(r13) 1352 - lwz r6, HSTATE_PMC + 12(r13) 1353 - lwz r8, HSTATE_PMC + 16(r13) 1354 - lwz r9, HSTATE_PMC + 20(r13) 1355 - BEGIN_FTR_SECTION 1356 - lwz r10, HSTATE_PMC + 24(r13) 1357 - lwz r11, HSTATE_PMC + 28(r13) 1358 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1359 - mtspr SPRN_PMC1, r3 1360 - mtspr SPRN_PMC2, r4 1361 - mtspr SPRN_PMC3, r5 1362 - mtspr SPRN_PMC4, r6 1363 - mtspr SPRN_PMC5, r8 1364 - mtspr SPRN_PMC6, r9 1365 - BEGIN_FTR_SECTION 1366 - mtspr SPRN_PMC7, r10 1367 - mtspr SPRN_PMC8, r11 1368 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1369 - ld r3, HSTATE_MMCR(r13) 1370 - ld r4, HSTATE_MMCR + 8(r13) 1371 - ld r5, HSTATE_MMCR + 16(r13) 1372 - mtspr SPRN_MMCR1, r4 1373 - mtspr SPRN_MMCRA, r5 1374 - mtspr SPRN_MMCR0, r3 1375 - isync 1376 - 23: 1377 - /* 1378 - * For external and machine check interrupts, we need 1379 - * to call the Linux handler to process the interrupt. 1380 - * We do that by jumping to absolute address 0x500 for 1381 - * external interrupts, or the machine_check_fwnmi label 1382 - * for machine checks (since firmware might have patched 1383 - * the vector area at 0x200). The [h]rfid at the end of the 1384 - * handler will return to the book3s_hv_interrupts.S code. 1385 - * For other interrupts we do the rfid to get back 1386 - * to the book3s_hv_interrupts.S code here. 1387 - */ 1388 - ld r8, HSTATE_VMHANDLER(r13) 1389 - ld r7, HSTATE_HOST_MSR(r13) 1390 - 1391 - cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1392 - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1393 - BEGIN_FTR_SECTION 1394 - beq 11f 1395 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 1396 - 1397 - /* RFI into the highmem handler, or branch to interrupt handler */ 1398 - mfmsr r6 1399 - li r0, MSR_RI 1400 - andc r6, r6, r0 1401 - mtmsrd r6, 1 /* Clear RI in MSR */ 1402 - mtsrr0 r8 1403 - mtsrr1 r7 1404 - beqa 0x500 /* external interrupt (PPC970) */ 1405 - beq cr1, 13f /* machine check */ 1406 - RFI 1407 - 1408 - /* On POWER7, we have external interrupts set to use HSRR0/1 */ 1409 - 11: mtspr SPRN_HSRR0, r8 1410 - mtspr SPRN_HSRR1, r7 1411 - ba 0x500 1412 - 1413 - 13: b machine_check_fwnmi 1176 + .rept SLB_NUM_BOLTED 1177 + ld r5,SLBSHADOW_SAVEAREA(r11) 1178 + ld r6,SLBSHADOW_SAVEAREA+8(r11) 1179 + andis. r7,r5,SLB_ESID_V@h 1180 + beq 1f 1181 + slbmte r6,r5 1182 + 1: addi r11,r11,16 1183 + .endr 1184 + b 22b 1414 1185 1415 1186 /* 1416 1187 * Check whether an HDSI is an HPTE not found fault or something else. ··· 1416 1333 stw r8, VCPU_LAST_INST(r9) 1417 1334 1418 1335 /* Unset guest mode. */ 1419 - li r0, KVM_GUEST_MODE_NONE 1336 + li r0, KVM_GUEST_MODE_HOST_HV 1420 1337 stb r0, HSTATE_IN_GUEST(r13) 1421 1338 b guest_exit_cont 1422 1339 ··· 1784 1701 rotldi r11, r11, 63 1785 1702 b fast_interrupt_c_return 1786 1703 1787 - secondary_too_late: 1788 - ld r5,HSTATE_KVM_VCORE(r13) 1789 - HMT_LOW 1790 - 13: lbz r3,VCORE_IN_GUEST(r5) 1791 - cmpwi r3,0 1792 - bne 13b 1793 - HMT_MEDIUM 1794 - ld r11,PACA_SLBSHADOWPTR(r13) 1704 + /* 1705 + * Determine what sort of external interrupt is pending (if any). 1706 + * Returns: 1707 + * 0 if no interrupt is pending 1708 + * 1 if an interrupt is pending that needs to be handled by the host 1709 + * -1 if there was a guest wakeup IPI (which has now been cleared) 1710 + */ 1711 + kvmppc_read_intr: 1712 + /* see if a host IPI is pending */ 1713 + li r3, 1 1714 + lbz r0, HSTATE_HOST_IPI(r13) 1715 + cmpwi r0, 0 1716 + bne 1f 1795 1717 1796 - .rept SLB_NUM_BOLTED 1797 - ld r5,SLBSHADOW_SAVEAREA(r11) 1798 - ld r6,SLBSHADOW_SAVEAREA+8(r11) 1799 - andis. r7,r5,SLB_ESID_V@h 1800 - beq 1f 1801 - slbmte r6,r5 1802 - 1: addi r11,r11,16 1803 - .endr 1804 - 1805 - secondary_nap: 1806 - /* Clear our vcpu pointer so we don't come back in early */ 1807 - li r0, 0 1808 - std r0, HSTATE_KVM_VCPU(r13) 1809 - lwsync 1810 - /* Clear any pending IPI - assume we're a secondary thread */ 1811 - ld r5, HSTATE_XICS_PHYS(r13) 1718 + /* Now read the interrupt from the ICP */ 1719 + ld r6, HSTATE_XICS_PHYS(r13) 1812 1720 li r7, XICS_XIRR 1813 - lwzcix r3, r5, r7 /* ack any pending interrupt */ 1814 - rlwinm. r0, r3, 0, 0xffffff /* any pending? */ 1815 - beq 37f 1721 + cmpdi r6, 0 1722 + beq- 1f 1723 + lwzcix r0, r6, r7 1724 + rlwinm. r3, r0, 0, 0xffffff 1816 1725 sync 1817 - li r0, 0xff 1818 - li r6, XICS_MFRR 1819 - stbcix r0, r5, r6 /* clear the IPI */ 1820 - stwcix r3, r5, r7 /* EOI it */ 1821 - 37: sync 1726 + beq 1f /* if nothing pending in the ICP */ 1822 1727 1823 - /* increment the nap count and then go to nap mode */ 1824 - ld r4, HSTATE_KVM_VCORE(r13) 1825 - addi r4, r4, VCORE_NAP_COUNT 1826 - lwsync /* make previous updates visible */ 1827 - 51: lwarx r3, 0, r4 1828 - addi r3, r3, 1 1829 - stwcx. r3, 0, r4 1830 - bne 51b 1728 + /* We found something in the ICP... 1729 + * 1730 + * If it's not an IPI, stash it in the PACA and return to 1731 + * the host, we don't (yet) handle directing real external 1732 + * interrupts directly to the guest 1733 + */ 1734 + cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ 1735 + li r3, 1 1736 + bne 42f 1831 1737 1832 - kvm_no_guest: 1833 - li r0, KVM_HWTHREAD_IN_NAP 1834 - stb r0, HSTATE_HWTHREAD_STATE(r13) 1738 + /* It's an IPI, clear the MFRR and EOI it */ 1739 + li r3, 0xff 1740 + li r8, XICS_MFRR 1741 + stbcix r3, r6, r8 /* clear the IPI */ 1742 + stwcix r0, r6, r7 /* EOI it */ 1743 + sync 1835 1744 1836 - li r3, LPCR_PECE0 1837 - mfspr r4, SPRN_LPCR 1838 - rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 1839 - mtspr SPRN_LPCR, r4 1840 - isync 1841 - std r0, HSTATE_SCRATCH0(r13) 1842 - ptesync 1843 - ld r0, HSTATE_SCRATCH0(r13) 1844 - 1: cmpd r0, r0 1845 - bne 1b 1846 - nap 1847 - b . 1745 + /* We need to re-check host IPI now in case it got set in the 1746 + * meantime. If it's clear, we bounce the interrupt to the 1747 + * guest 1748 + */ 1749 + lbz r0, HSTATE_HOST_IPI(r13) 1750 + cmpwi r0, 0 1751 + bne- 43f 1752 + 1753 + /* OK, it's an IPI for us */ 1754 + li r3, -1 1755 + 1: blr 1756 + 1757 + 42: /* It's not an IPI and it's for the host, stash it in the PACA 1758 + * before exit, it will be picked up by the host ICP driver 1759 + */ 1760 + stw r0, HSTATE_SAVED_XIRR(r13) 1761 + b 1b 1762 + 1763 + 43: /* We raced with the host, we need to resend that IPI, bummer */ 1764 + li r0, IPI_PRIORITY 1765 + stbcix r0, r6, r8 /* set the IPI */ 1766 + sync 1767 + b 1b 1848 1768 1849 1769 /* 1850 1770 * Save away FP, VMX and VSX registers. ··· 1965 1879 lwz r7,VCPU_VRSAVE(r4) 1966 1880 mtspr SPRN_VRSAVE,r7 1967 1881 blr 1882 + 1883 + /* 1884 + * We come here if we get any exception or interrupt while we are 1885 + * executing host real mode code while in guest MMU context. 1886 + * For now just spin, but we should do something better. 1887 + */ 1888 + kvmppc_bad_host_intr: 1889 + b .
+26 -6
arch/powerpc/kvm/book3s_interrupts.S
··· 26 26 27 27 #if defined(CONFIG_PPC_BOOK3S_64) 28 28 #define FUNC(name) GLUE(.,name) 29 + #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU 30 + 29 31 #elif defined(CONFIG_PPC_BOOK3S_32) 30 32 #define FUNC(name) name 33 + #define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) 34 + 31 35 #endif /* CONFIG_PPC_BOOK3S_XX */ 32 36 33 37 #define VCPU_LOAD_NVGPRS(vcpu) \ ··· 91 87 VCPU_LOAD_NVGPRS(r4) 92 88 93 89 kvm_start_lightweight: 90 + /* Copy registers into shadow vcpu so we can access them in real mode */ 91 + GET_SHADOW_VCPU(r3) 92 + bl FUNC(kvmppc_copy_to_svcpu) 93 + nop 94 + REST_GPR(4, r1) 94 95 95 96 #ifdef CONFIG_PPC_BOOK3S_64 97 + /* Get the dcbz32 flag */ 96 98 PPC_LL r3, VCPU_HFLAGS(r4) 97 99 rldicl r3, r3, 0, 63 /* r3 &= 1 */ 98 100 stb r3, HSTATE_RESTORE_HID5(r13) ··· 121 111 * 122 112 */ 123 113 124 - .global kvmppc_handler_highmem 125 - kvmppc_handler_highmem: 126 - 127 114 /* 128 115 * Register usage at this point: 129 116 * ··· 132 125 * 133 126 */ 134 127 135 - /* R7 = vcpu */ 136 - PPC_LL r7, GPR4(r1) 128 + /* Transfer reg values from shadow vcpu back to vcpu struct */ 129 + /* On 64-bit, interrupts are still off at this point */ 130 + PPC_LL r3, GPR4(r1) /* vcpu pointer */ 131 + GET_SHADOW_VCPU(r4) 132 + bl FUNC(kvmppc_copy_from_svcpu) 133 + nop 137 134 138 135 #ifdef CONFIG_PPC_BOOK3S_64 136 + /* Re-enable interrupts */ 137 + ld r3, HSTATE_HOST_MSR(r13) 138 + ori r3, r3, MSR_EE 139 + MTMSR_EERI(r3) 140 + 139 141 /* 140 142 * Reload kernel SPRG3 value. 141 143 * No need to save guest value as usermode can't modify SPRG3. 142 144 */ 143 145 ld r3, PACA_SPRG3(r13) 144 146 mtspr SPRN_SPRG3, r3 147 + 145 148 #endif /* CONFIG_PPC_BOOK3S_64 */ 149 + 150 + /* R7 = vcpu */ 151 + PPC_LL r7, GPR4(r1) 146 152 147 153 PPC_STL r14, VCPU_GPR(R14)(r7) 148 154 PPC_STL r15, VCPU_GPR(R15)(r7) ··· 181 161 182 162 /* Restore r3 (kvm_run) and r4 (vcpu) */ 183 163 REST_2GPRS(3, r1) 184 - bl FUNC(kvmppc_handle_exit) 164 + bl FUNC(kvmppc_handle_exit_pr) 185 165 186 166 /* If RESUME_GUEST, get back in the loop */ 187 167 cmpwi r3, RESUME_GUEST
+61 -5
arch/powerpc/kvm/book3s_mmu_hpte.c
··· 28 28 #include <asm/mmu_context.h> 29 29 #include <asm/hw_irq.h> 30 30 31 - #include "trace.h" 31 + #include "trace_pr.h" 32 32 33 33 #define PTE_SIZE 12 34 34 ··· 56 56 HPTEG_HASH_BITS_VPTE_LONG); 57 57 } 58 58 59 + #ifdef CONFIG_PPC_BOOK3S_64 60 + static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage) 61 + { 62 + return hash_64((vpage & 0xffffffff0ULL) >> 4, 63 + HPTEG_HASH_BITS_VPTE_64K); 64 + } 65 + #endif 66 + 59 67 void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) 60 68 { 61 69 u64 index; ··· 90 82 index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); 91 83 hlist_add_head_rcu(&pte->list_vpte_long, 92 84 &vcpu3s->hpte_hash_vpte_long[index]); 85 + 86 + #ifdef CONFIG_PPC_BOOK3S_64 87 + /* Add to vPTE_64k list */ 88 + index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage); 89 + hlist_add_head_rcu(&pte->list_vpte_64k, 90 + &vcpu3s->hpte_hash_vpte_64k[index]); 91 + #endif 92 + 93 + vcpu3s->hpte_cache_count++; 93 94 94 95 spin_unlock(&vcpu3s->mmu_lock); 95 96 } ··· 130 113 hlist_del_init_rcu(&pte->list_pte_long); 131 114 hlist_del_init_rcu(&pte->list_vpte); 132 115 hlist_del_init_rcu(&pte->list_vpte_long); 116 + #ifdef CONFIG_PPC_BOOK3S_64 117 + hlist_del_init_rcu(&pte->list_vpte_64k); 118 + #endif 119 + vcpu3s->hpte_cache_count--; 133 120 134 121 spin_unlock(&vcpu3s->mmu_lock); 135 122 136 - vcpu3s->hpte_cache_count--; 137 123 call_rcu(&pte->rcu_head, free_pte_rcu); 138 124 } 139 125 ··· 239 219 rcu_read_unlock(); 240 220 } 241 221 222 + #ifdef CONFIG_PPC_BOOK3S_64 223 + /* Flush with mask 0xffffffff0 */ 224 + static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp) 225 + { 226 + struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 227 + struct hlist_head *list; 228 + struct hpte_cache *pte; 229 + u64 vp_mask = 0xffffffff0ULL; 230 + 231 + list = &vcpu3s->hpte_hash_vpte_64k[ 232 + kvmppc_mmu_hash_vpte_64k(guest_vp)]; 233 + 234 + rcu_read_lock(); 235 + 236 + /* Check the list for matching entries and invalidate */ 237 + hlist_for_each_entry_rcu(pte, list, list_vpte_64k) 238 + if ((pte->pte.vpage & vp_mask) == guest_vp) 239 + invalidate_pte(vcpu, pte); 240 + 241 + rcu_read_unlock(); 242 + } 243 + #endif 244 + 242 245 /* Flush with mask 0xffffff000 */ 243 246 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) 244 247 { ··· 292 249 case 0xfffffffffULL: 293 250 kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); 294 251 break; 252 + #ifdef CONFIG_PPC_BOOK3S_64 253 + case 0xffffffff0ULL: 254 + kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp); 255 + break; 256 + #endif 295 257 case 0xffffff000ULL: 296 258 kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); 297 259 break; ··· 333 285 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 334 286 struct hpte_cache *pte; 335 287 336 - pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 337 - vcpu3s->hpte_cache_count++; 338 - 339 288 if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM) 340 289 kvmppc_mmu_pte_flush_all(vcpu); 341 290 291 + pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); 292 + 342 293 return pte; 294 + } 295 + 296 + void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte) 297 + { 298 + kmem_cache_free(hpte_cache, pte); 343 299 } 344 300 345 301 void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) ··· 372 320 ARRAY_SIZE(vcpu3s->hpte_hash_vpte)); 373 321 kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long, 374 322 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long)); 323 + #ifdef CONFIG_PPC_BOOK3S_64 324 + kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k, 325 + ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k)); 326 + #endif 375 327 376 328 spin_lock_init(&vcpu3s->mmu_lock); 377 329
+364 -140
arch/powerpc/kvm/book3s_pr.c
··· 40 40 #include <linux/sched.h> 41 41 #include <linux/vmalloc.h> 42 42 #include <linux/highmem.h> 43 + #include <linux/module.h> 43 44 44 - #include "trace.h" 45 + #include "book3s.h" 46 + 47 + #define CREATE_TRACE_POINTS 48 + #include "trace_pr.h" 45 49 46 50 /* #define EXIT_DEBUG */ 47 51 /* #define DEBUG_EXT */ ··· 60 56 #define HW_PAGE_SIZE PAGE_SIZE 61 57 #endif 62 58 63 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 59 + static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) 64 60 { 65 61 #ifdef CONFIG_PPC_BOOK3S_64 66 62 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 67 63 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); 68 - memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, 69 - sizeof(get_paca()->shadow_vcpu)); 70 64 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 71 65 svcpu_put(svcpu); 72 66 #endif 73 67 vcpu->cpu = smp_processor_id(); 74 68 #ifdef CONFIG_PPC_BOOK3S_32 75 - current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; 69 + current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; 76 70 #endif 77 71 } 78 72 79 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 73 + static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) 80 74 { 81 75 #ifdef CONFIG_PPC_BOOK3S_64 82 76 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 83 77 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); 84 - memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 85 - sizeof(get_paca()->shadow_vcpu)); 86 78 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 87 79 svcpu_put(svcpu); 88 80 #endif ··· 87 87 vcpu->cpu = -1; 88 88 } 89 89 90 - int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) 90 + /* Copy data needed by real-mode code from vcpu to shadow vcpu */ 91 + void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, 92 + struct kvm_vcpu *vcpu) 93 + { 94 + svcpu->gpr[0] = vcpu->arch.gpr[0]; 95 + svcpu->gpr[1] = vcpu->arch.gpr[1]; 96 + svcpu->gpr[2] = vcpu->arch.gpr[2]; 97 + svcpu->gpr[3] = vcpu->arch.gpr[3]; 98 + svcpu->gpr[4] = vcpu->arch.gpr[4]; 99 + svcpu->gpr[5] = vcpu->arch.gpr[5]; 100 + svcpu->gpr[6] = vcpu->arch.gpr[6]; 101 + svcpu->gpr[7] = vcpu->arch.gpr[7]; 102 + svcpu->gpr[8] = vcpu->arch.gpr[8]; 103 + svcpu->gpr[9] = vcpu->arch.gpr[9]; 104 + svcpu->gpr[10] = vcpu->arch.gpr[10]; 105 + svcpu->gpr[11] = vcpu->arch.gpr[11]; 106 + svcpu->gpr[12] = vcpu->arch.gpr[12]; 107 + svcpu->gpr[13] = vcpu->arch.gpr[13]; 108 + svcpu->cr = vcpu->arch.cr; 109 + svcpu->xer = vcpu->arch.xer; 110 + svcpu->ctr = vcpu->arch.ctr; 111 + svcpu->lr = vcpu->arch.lr; 112 + svcpu->pc = vcpu->arch.pc; 113 + } 114 + 115 + /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ 116 + void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, 117 + struct kvmppc_book3s_shadow_vcpu *svcpu) 118 + { 119 + vcpu->arch.gpr[0] = svcpu->gpr[0]; 120 + vcpu->arch.gpr[1] = svcpu->gpr[1]; 121 + vcpu->arch.gpr[2] = svcpu->gpr[2]; 122 + vcpu->arch.gpr[3] = svcpu->gpr[3]; 123 + vcpu->arch.gpr[4] = svcpu->gpr[4]; 124 + vcpu->arch.gpr[5] = svcpu->gpr[5]; 125 + vcpu->arch.gpr[6] = svcpu->gpr[6]; 126 + vcpu->arch.gpr[7] = svcpu->gpr[7]; 127 + vcpu->arch.gpr[8] = svcpu->gpr[8]; 128 + vcpu->arch.gpr[9] = svcpu->gpr[9]; 129 + vcpu->arch.gpr[10] = svcpu->gpr[10]; 130 + vcpu->arch.gpr[11] = svcpu->gpr[11]; 131 + vcpu->arch.gpr[12] = svcpu->gpr[12]; 132 + vcpu->arch.gpr[13] = svcpu->gpr[13]; 133 + vcpu->arch.cr = svcpu->cr; 134 + vcpu->arch.xer = svcpu->xer; 135 + vcpu->arch.ctr = svcpu->ctr; 136 + vcpu->arch.lr = svcpu->lr; 137 + vcpu->arch.pc = svcpu->pc; 138 + vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; 139 + vcpu->arch.fault_dar = svcpu->fault_dar; 140 + vcpu->arch.fault_dsisr = svcpu->fault_dsisr; 141 + vcpu->arch.last_inst = svcpu->last_inst; 142 + } 143 + 144 + static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) 91 145 { 92 146 int r = 1; /* Indicate we want to get back into the guest */ 93 147 ··· 154 100 } 155 101 156 102 /************* MMU Notifiers *************/ 103 + static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, 104 + unsigned long end) 105 + { 106 + long i; 107 + struct kvm_vcpu *vcpu; 108 + struct kvm_memslots *slots; 109 + struct kvm_memory_slot *memslot; 157 110 158 - int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 111 + slots = kvm_memslots(kvm); 112 + kvm_for_each_memslot(memslot, slots) { 113 + unsigned long hva_start, hva_end; 114 + gfn_t gfn, gfn_end; 115 + 116 + hva_start = max(start, memslot->userspace_addr); 117 + hva_end = min(end, memslot->userspace_addr + 118 + (memslot->npages << PAGE_SHIFT)); 119 + if (hva_start >= hva_end) 120 + continue; 121 + /* 122 + * {gfn(page) | page intersects with [hva_start, hva_end)} = 123 + * {gfn, gfn+1, ..., gfn_end-1}. 124 + */ 125 + gfn = hva_to_gfn_memslot(hva_start, memslot); 126 + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 127 + kvm_for_each_vcpu(i, vcpu, kvm) 128 + kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, 129 + gfn_end << PAGE_SHIFT); 130 + } 131 + } 132 + 133 + static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) 159 134 { 160 135 trace_kvm_unmap_hva(hva); 161 136 162 - /* 163 - * Flush all shadow tlb entries everywhere. This is slow, but 164 - * we are 100% sure that we catch the to be unmapped page 165 - */ 166 - kvm_flush_remote_tlbs(kvm); 137 + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 167 138 168 139 return 0; 169 140 } 170 141 171 - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) 142 + static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, 143 + unsigned long end) 172 144 { 173 - /* kvm_unmap_hva flushes everything anyways */ 174 - kvm_unmap_hva(kvm, start); 145 + do_kvm_unmap_hva(kvm, start, end); 175 146 176 147 return 0; 177 148 } 178 149 179 - int kvm_age_hva(struct kvm *kvm, unsigned long hva) 150 + static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) 180 151 { 181 152 /* XXX could be more clever ;) */ 182 153 return 0; 183 154 } 184 155 185 - int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 156 + static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) 186 157 { 187 158 /* XXX could be more clever ;) */ 188 159 return 0; 189 160 } 190 161 191 - void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 162 + static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) 192 163 { 193 164 /* The page will get remapped properly on its next fault */ 194 - kvm_unmap_hva(kvm, hva); 165 + do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 195 166 } 196 167 197 168 /*****************************************/ ··· 238 159 vcpu->arch.shadow_msr = smsr; 239 160 } 240 161 241 - void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 162 + static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) 242 163 { 243 164 ulong old_msr = vcpu->arch.shared->msr; 244 165 ··· 298 219 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 299 220 } 300 221 301 - void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 222 + void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) 302 223 { 303 224 u32 host_pvr; 304 225 ··· 334 255 really needs them in a VM on Cell and force disable them. */ 335 256 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 336 257 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 258 + 259 + /* 260 + * If they're asking for POWER6 or later, set the flag 261 + * indicating that we can do multiple large page sizes 262 + * and 1TB segments. 263 + * Also set the flag that indicates that tlbie has the large 264 + * page bit in the RB operand instead of the instruction. 265 + */ 266 + switch (PVR_VER(pvr)) { 267 + case PVR_POWER6: 268 + case PVR_POWER7: 269 + case PVR_POWER7p: 270 + case PVR_POWER8: 271 + vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | 272 + BOOK3S_HFLAG_NEW_TLBIE; 273 + break; 274 + } 337 275 338 276 #ifdef CONFIG_PPC_BOOK3S_32 339 277 /* 32 bit Book3S always has 32 byte dcbz */ ··· 430 334 ulong eaddr, int vec) 431 335 { 432 336 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 337 + bool iswrite = false; 433 338 int r = RESUME_GUEST; 434 339 int relocated; 435 340 int page_found = 0; ··· 441 344 u64 vsid; 442 345 443 346 relocated = data ? dr : ir; 347 + if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) 348 + iswrite = true; 444 349 445 350 /* Resolve real address if translation turned on */ 446 351 if (relocated) { 447 - page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); 352 + page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite); 448 353 } else { 449 354 pte.may_execute = true; 450 355 pte.may_read = true; ··· 454 355 pte.raddr = eaddr & KVM_PAM; 455 356 pte.eaddr = eaddr; 456 357 pte.vpage = eaddr >> 12; 358 + pte.page_size = MMU_PAGE_64K; 457 359 } 458 360 459 361 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { ··· 488 388 489 389 if (page_found == -ENOENT) { 490 390 /* Page not found in guest PTE entries */ 491 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 492 391 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 493 - vcpu->arch.shared->dsisr = svcpu->fault_dsisr; 392 + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr; 494 393 vcpu->arch.shared->msr |= 495 - (svcpu->shadow_srr1 & 0x00000000f8000000ULL); 496 - svcpu_put(svcpu); 394 + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 497 395 kvmppc_book3s_queue_irqprio(vcpu, vec); 498 396 } else if (page_found == -EPERM) { 499 397 /* Storage protection */ 500 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 501 398 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 502 - vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; 399 + vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 503 400 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 504 401 vcpu->arch.shared->msr |= 505 - svcpu->shadow_srr1 & 0x00000000f8000000ULL; 506 - svcpu_put(svcpu); 402 + vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 507 403 kvmppc_book3s_queue_irqprio(vcpu, vec); 508 404 } else if (page_found == -EINVAL) { 509 405 /* Page not found in guest SLB */ ··· 507 411 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 508 412 } else if (!is_mmio && 509 413 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 414 + if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { 415 + /* 416 + * There is already a host HPTE there, presumably 417 + * a read-only one for a page the guest thinks 418 + * is writable, so get rid of it first. 419 + */ 420 + kvmppc_mmu_unmap_page(vcpu, &pte); 421 + } 510 422 /* The guest's PTE is not mapped yet. Map on the host */ 511 - kvmppc_mmu_map_page(vcpu, &pte); 423 + kvmppc_mmu_map_page(vcpu, &pte, iswrite); 512 424 if (data) 513 425 vcpu->stat.sp_storage++; 514 426 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 515 - (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 427 + (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 516 428 kvmppc_patch_dcbz(vcpu, &pte); 517 429 } else { 518 430 /* MMIO */ ··· 723 619 724 620 if (lost_ext & MSR_FP) 725 621 kvmppc_load_up_fpu(); 622 + #ifdef CONFIG_ALTIVEC 726 623 if (lost_ext & MSR_VEC) 727 624 kvmppc_load_up_altivec(); 625 + #endif 728 626 current->thread.regs->msr |= lost_ext; 729 627 } 730 628 731 - int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 732 - unsigned int exit_nr) 629 + int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 630 + unsigned int exit_nr) 733 631 { 734 632 int r = RESUME_HOST; 735 633 int s; ··· 749 643 switch (exit_nr) { 750 644 case BOOK3S_INTERRUPT_INST_STORAGE: 751 645 { 752 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 753 - ulong shadow_srr1 = svcpu->shadow_srr1; 646 + ulong shadow_srr1 = vcpu->arch.shadow_srr1; 754 647 vcpu->stat.pf_instruc++; 755 648 756 649 #ifdef CONFIG_PPC_BOOK3S_32 757 650 /* We set segments as unused segments when invalidating them. So 758 651 * treat the respective fault as segment fault. */ 759 - if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { 760 - kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 761 - r = RESUME_GUEST; 652 + { 653 + struct kvmppc_book3s_shadow_vcpu *svcpu; 654 + u32 sr; 655 + 656 + svcpu = svcpu_get(vcpu); 657 + sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]; 762 658 svcpu_put(svcpu); 763 - break; 659 + if (sr == SR_INVALID) { 660 + kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 661 + r = RESUME_GUEST; 662 + break; 663 + } 764 664 } 765 665 #endif 766 - svcpu_put(svcpu); 767 666 768 667 /* only care about PTEG not found errors, but leave NX alone */ 769 668 if (shadow_srr1 & 0x40000000) { 669 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 770 670 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 671 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 771 672 vcpu->stat.sp_instruc++; 772 673 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 773 674 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { ··· 795 682 case BOOK3S_INTERRUPT_DATA_STORAGE: 796 683 { 797 684 ulong dar = kvmppc_get_fault_dar(vcpu); 798 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 799 - u32 fault_dsisr = svcpu->fault_dsisr; 685 + u32 fault_dsisr = vcpu->arch.fault_dsisr; 800 686 vcpu->stat.pf_storage++; 801 687 802 688 #ifdef CONFIG_PPC_BOOK3S_32 803 689 /* We set segments as unused segments when invalidating them. So 804 690 * treat the respective fault as segment fault. */ 805 - if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { 806 - kvmppc_mmu_map_segment(vcpu, dar); 807 - r = RESUME_GUEST; 691 + { 692 + struct kvmppc_book3s_shadow_vcpu *svcpu; 693 + u32 sr; 694 + 695 + svcpu = svcpu_get(vcpu); 696 + sr = svcpu->sr[dar >> SID_SHIFT]; 808 697 svcpu_put(svcpu); 809 - break; 698 + if (sr == SR_INVALID) { 699 + kvmppc_mmu_map_segment(vcpu, dar); 700 + r = RESUME_GUEST; 701 + break; 702 + } 810 703 } 811 704 #endif 812 - svcpu_put(svcpu); 813 705 814 - /* The only case we need to handle is missing shadow PTEs */ 815 - if (fault_dsisr & DSISR_NOHPTE) { 706 + /* 707 + * We need to handle missing shadow PTEs, and 708 + * protection faults due to us mapping a page read-only 709 + * when the guest thinks it is writable. 710 + */ 711 + if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { 712 + int idx = srcu_read_lock(&vcpu->kvm->srcu); 816 713 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 714 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 817 715 } else { 818 716 vcpu->arch.shared->dar = dar; 819 717 vcpu->arch.shared->dsisr = fault_dsisr; ··· 867 743 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 868 744 { 869 745 enum emulation_result er; 870 - struct kvmppc_book3s_shadow_vcpu *svcpu; 871 746 ulong flags; 872 747 873 748 program_interrupt: 874 - svcpu = svcpu_get(vcpu); 875 - flags = svcpu->shadow_srr1 & 0x1f0000ull; 876 - svcpu_put(svcpu); 749 + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 877 750 878 751 if (vcpu->arch.shared->msr & MSR_PR) { 879 752 #ifdef EXIT_DEBUG ··· 919 798 ulong cmd = kvmppc_get_gpr(vcpu, 3); 920 799 int i; 921 800 922 - #ifdef CONFIG_KVM_BOOK3S_64_PR 801 + #ifdef CONFIG_PPC_BOOK3S_64 923 802 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 924 803 r = RESUME_GUEST; 925 804 break; ··· 1002 881 break; 1003 882 default: 1004 883 { 1005 - struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 1006 - ulong shadow_srr1 = svcpu->shadow_srr1; 1007 - svcpu_put(svcpu); 884 + ulong shadow_srr1 = vcpu->arch.shadow_srr1; 1008 885 /* Ugh - bork here! What did we get? */ 1009 886 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1010 887 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); ··· 1039 920 return r; 1040 921 } 1041 922 1042 - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1043 - struct kvm_sregs *sregs) 923 + static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, 924 + struct kvm_sregs *sregs) 1044 925 { 1045 926 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1046 927 int i; ··· 1066 947 return 0; 1067 948 } 1068 949 1069 - int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1070 - struct kvm_sregs *sregs) 950 + static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, 951 + struct kvm_sregs *sregs) 1071 952 { 1072 953 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1073 954 int i; 1074 955 1075 - kvmppc_set_pvr(vcpu, sregs->pvr); 956 + kvmppc_set_pvr_pr(vcpu, sregs->pvr); 1076 957 1077 958 vcpu3s->sdr1 = sregs->u.s.sdr1; 1078 959 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { ··· 1102 983 return 0; 1103 984 } 1104 985 1105 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 986 + static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 987 + union kvmppc_one_reg *val) 1106 988 { 1107 989 int r = 0; 1108 990 ··· 1132 1012 return r; 1133 1013 } 1134 1014 1135 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 1015 + static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1016 + union kvmppc_one_reg *val) 1136 1017 { 1137 1018 int r = 0; 1138 1019 ··· 1163 1042 return r; 1164 1043 } 1165 1044 1166 - int kvmppc_core_check_processor_compat(void) 1167 - { 1168 - return 0; 1169 - } 1170 - 1171 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1045 + static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, 1046 + unsigned int id) 1172 1047 { 1173 1048 struct kvmppc_vcpu_book3s *vcpu_book3s; 1174 1049 struct kvm_vcpu *vcpu; 1175 1050 int err = -ENOMEM; 1176 1051 unsigned long p; 1177 1052 1178 - vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1179 - if (!vcpu_book3s) 1053 + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1054 + if (!vcpu) 1180 1055 goto out; 1181 1056 1182 - vcpu_book3s->shadow_vcpu = 1183 - kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); 1184 - if (!vcpu_book3s->shadow_vcpu) 1057 + vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1058 + if (!vcpu_book3s) 1185 1059 goto free_vcpu; 1060 + vcpu->arch.book3s = vcpu_book3s; 1186 1061 1187 - vcpu = &vcpu_book3s->vcpu; 1062 + #ifdef CONFIG_KVM_BOOK3S_32 1063 + vcpu->arch.shadow_vcpu = 1064 + kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); 1065 + if (!vcpu->arch.shadow_vcpu) 1066 + goto free_vcpu3s; 1067 + #endif 1068 + 1188 1069 err = kvm_vcpu_init(vcpu, kvm, id); 1189 1070 if (err) 1190 1071 goto free_shadow_vcpu; ··· 1199 1076 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1200 1077 1201 1078 #ifdef CONFIG_PPC_BOOK3S_64 1202 - /* default to book3s_64 (970fx) */ 1079 + /* 1080 + * Default to the same as the host if we're on sufficiently 1081 + * recent machine that we have 1TB segments; 1082 + * otherwise default to PPC970FX. 1083 + */ 1203 1084 vcpu->arch.pvr = 0x3C0301; 1085 + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1086 + vcpu->arch.pvr = mfspr(SPRN_PVR); 1204 1087 #else 1205 1088 /* default to book3s_32 (750) */ 1206 1089 vcpu->arch.pvr = 0x84202; 1207 1090 #endif 1208 - kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1091 + kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); 1209 1092 vcpu->arch.slb_nr = 64; 1210 1093 1211 1094 vcpu->arch.shadow_msr = MSR_USER64; ··· 1225 1096 uninit_vcpu: 1226 1097 kvm_vcpu_uninit(vcpu); 1227 1098 free_shadow_vcpu: 1228 - kfree(vcpu_book3s->shadow_vcpu); 1229 - free_vcpu: 1099 + #ifdef CONFIG_KVM_BOOK3S_32 1100 + kfree(vcpu->arch.shadow_vcpu); 1101 + free_vcpu3s: 1102 + #endif 1230 1103 vfree(vcpu_book3s); 1104 + free_vcpu: 1105 + kmem_cache_free(kvm_vcpu_cache, vcpu); 1231 1106 out: 1232 1107 return ERR_PTR(err); 1233 1108 } 1234 1109 1235 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1110 + static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) 1236 1111 { 1237 1112 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1238 1113 1239 1114 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); 1240 1115 kvm_vcpu_uninit(vcpu); 1241 - kfree(vcpu_book3s->shadow_vcpu); 1116 + #ifdef CONFIG_KVM_BOOK3S_32 1117 + kfree(vcpu->arch.shadow_vcpu); 1118 + #endif 1242 1119 vfree(vcpu_book3s); 1120 + kmem_cache_free(kvm_vcpu_cache, vcpu); 1243 1121 } 1244 1122 1245 - int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1123 + static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1246 1124 { 1247 1125 int ret; 1248 1126 struct thread_fp_state fp; ··· 1352 1216 /* 1353 1217 * Get (and clear) the dirty memory log for a memory slot. 1354 1218 */ 1355 - int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1356 - struct kvm_dirty_log *log) 1219 + static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, 1220 + struct kvm_dirty_log *log) 1357 1221 { 1358 1222 struct kvm_memory_slot *memslot; 1359 1223 struct kvm_vcpu *vcpu; ··· 1388 1252 return r; 1389 1253 } 1390 1254 1391 - #ifdef CONFIG_PPC64 1392 - int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1255 + static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, 1256 + struct kvm_memory_slot *memslot) 1393 1257 { 1394 - info->flags = KVM_PPC_1T_SEGMENTS; 1258 + return; 1259 + } 1260 + 1261 + static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, 1262 + struct kvm_memory_slot *memslot, 1263 + struct kvm_userspace_memory_region *mem) 1264 + { 1265 + return 0; 1266 + } 1267 + 1268 + static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, 1269 + struct kvm_userspace_memory_region *mem, 1270 + const struct kvm_memory_slot *old) 1271 + { 1272 + return; 1273 + } 1274 + 1275 + static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, 1276 + struct kvm_memory_slot *dont) 1277 + { 1278 + return; 1279 + } 1280 + 1281 + static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, 1282 + unsigned long npages) 1283 + { 1284 + return 0; 1285 + } 1286 + 1287 + 1288 + #ifdef CONFIG_PPC64 1289 + static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1290 + struct kvm_ppc_smmu_info *info) 1291 + { 1292 + long int i; 1293 + struct kvm_vcpu *vcpu; 1294 + 1295 + info->flags = 0; 1395 1296 1396 1297 /* SLB is always 64 entries */ 1397 1298 info->slb_size = 64; ··· 1439 1266 info->sps[0].enc[0].page_shift = 12; 1440 1267 info->sps[0].enc[0].pte_enc = 0; 1441 1268 1269 + /* 1270 + * 64k large page size. 1271 + * We only want to put this in if the CPUs we're emulating 1272 + * support it, but unfortunately we don't have a vcpu easily 1273 + * to hand here to test. Just pick the first vcpu, and if 1274 + * that doesn't exist yet, report the minimum capability, 1275 + * i.e., no 64k pages. 1276 + * 1T segment support goes along with 64k pages. 1277 + */ 1278 + i = 1; 1279 + vcpu = kvm_get_vcpu(kvm, 0); 1280 + if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 1281 + info->flags = KVM_PPC_1T_SEGMENTS; 1282 + info->sps[i].page_shift = 16; 1283 + info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; 1284 + info->sps[i].enc[0].page_shift = 16; 1285 + info->sps[i].enc[0].pte_enc = 1; 1286 + ++i; 1287 + } 1288 + 1442 1289 /* Standard 16M large page size segment */ 1443 - info->sps[1].page_shift = 24; 1444 - info->sps[1].slb_enc = SLB_VSID_L; 1445 - info->sps[1].enc[0].page_shift = 24; 1446 - info->sps[1].enc[0].pte_enc = 0; 1290 + info->sps[i].page_shift = 24; 1291 + info->sps[i].slb_enc = SLB_VSID_L; 1292 + info->sps[i].enc[0].page_shift = 24; 1293 + info->sps[i].enc[0].pte_enc = 0; 1447 1294 1448 1295 return 0; 1296 + } 1297 + #else 1298 + static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1299 + struct kvm_ppc_smmu_info *info) 1300 + { 1301 + /* We should not get called */ 1302 + BUG(); 1449 1303 } 1450 1304 #endif /* CONFIG_PPC64 */ 1451 - 1452 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1453 - struct kvm_memory_slot *dont) 1454 - { 1455 - } 1456 - 1457 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1458 - unsigned long npages) 1459 - { 1460 - return 0; 1461 - } 1462 - 1463 - int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1464 - struct kvm_memory_slot *memslot, 1465 - struct kvm_userspace_memory_region *mem) 1466 - { 1467 - return 0; 1468 - } 1469 - 1470 - void kvmppc_core_commit_memory_region(struct kvm *kvm, 1471 - struct kvm_userspace_memory_region *mem, 1472 - const struct kvm_memory_slot *old) 1473 - { 1474 - } 1475 - 1476 - void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) 1477 - { 1478 - } 1479 1305 1480 1306 static unsigned int kvm_global_user_count = 0; 1481 1307 static DEFINE_SPINLOCK(kvm_global_user_count_lock); 1482 1308 1483 - int kvmppc_core_init_vm(struct kvm *kvm) 1309 + static int kvmppc_core_init_vm_pr(struct kvm *kvm) 1484 1310 { 1485 - #ifdef CONFIG_PPC64 1486 - INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1487 - INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 1488 - #endif 1311 + mutex_init(&kvm->arch.hpt_mutex); 1489 1312 1490 1313 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1491 1314 spin_lock(&kvm_global_user_count_lock); ··· 1492 1323 return 0; 1493 1324 } 1494 1325 1495 - void kvmppc_core_destroy_vm(struct kvm *kvm) 1326 + static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) 1496 1327 { 1497 1328 #ifdef CONFIG_PPC64 1498 1329 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); ··· 1507 1338 } 1508 1339 } 1509 1340 1510 - static int kvmppc_book3s_init(void) 1341 + static int kvmppc_core_check_processor_compat_pr(void) 1342 + { 1343 + /* we are always compatible */ 1344 + return 0; 1345 + } 1346 + 1347 + static long kvm_arch_vm_ioctl_pr(struct file *filp, 1348 + unsigned int ioctl, unsigned long arg) 1349 + { 1350 + return -ENOTTY; 1351 + } 1352 + 1353 + static struct kvmppc_ops kvm_ops_pr = { 1354 + .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, 1355 + .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, 1356 + .get_one_reg = kvmppc_get_one_reg_pr, 1357 + .set_one_reg = kvmppc_set_one_reg_pr, 1358 + .vcpu_load = kvmppc_core_vcpu_load_pr, 1359 + .vcpu_put = kvmppc_core_vcpu_put_pr, 1360 + .set_msr = kvmppc_set_msr_pr, 1361 + .vcpu_run = kvmppc_vcpu_run_pr, 1362 + .vcpu_create = kvmppc_core_vcpu_create_pr, 1363 + .vcpu_free = kvmppc_core_vcpu_free_pr, 1364 + .check_requests = kvmppc_core_check_requests_pr, 1365 + .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, 1366 + .flush_memslot = kvmppc_core_flush_memslot_pr, 1367 + .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, 1368 + .commit_memory_region = kvmppc_core_commit_memory_region_pr, 1369 + .unmap_hva = kvm_unmap_hva_pr, 1370 + .unmap_hva_range = kvm_unmap_hva_range_pr, 1371 + .age_hva = kvm_age_hva_pr, 1372 + .test_age_hva = kvm_test_age_hva_pr, 1373 + .set_spte_hva = kvm_set_spte_hva_pr, 1374 + .mmu_destroy = kvmppc_mmu_destroy_pr, 1375 + .free_memslot = kvmppc_core_free_memslot_pr, 1376 + .create_memslot = kvmppc_core_create_memslot_pr, 1377 + .init_vm = kvmppc_core_init_vm_pr, 1378 + .destroy_vm = kvmppc_core_destroy_vm_pr, 1379 + .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, 1380 + .emulate_op = kvmppc_core_emulate_op_pr, 1381 + .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, 1382 + .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, 1383 + .fast_vcpu_kick = kvm_vcpu_kick, 1384 + .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, 1385 + }; 1386 + 1387 + 1388 + int kvmppc_book3s_init_pr(void) 1511 1389 { 1512 1390 int r; 1513 1391 1514 - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, 1515 - THIS_MODULE); 1516 - 1517 - if (r) 1392 + r = kvmppc_core_check_processor_compat_pr(); 1393 + if (r < 0) 1518 1394 return r; 1519 1395 1520 - r = kvmppc_mmu_hpte_sysinit(); 1396 + kvm_ops_pr.owner = THIS_MODULE; 1397 + kvmppc_pr_ops = &kvm_ops_pr; 1521 1398 1399 + r = kvmppc_mmu_hpte_sysinit(); 1522 1400 return r; 1523 1401 } 1524 1402 1525 - static void kvmppc_book3s_exit(void) 1403 + void kvmppc_book3s_exit_pr(void) 1526 1404 { 1405 + kvmppc_pr_ops = NULL; 1527 1406 kvmppc_mmu_hpte_sysexit(); 1528 - kvm_exit(); 1529 1407 } 1530 1408 1531 - module_init(kvmppc_book3s_init); 1532 - module_exit(kvmppc_book3s_exit); 1409 + /* 1410 + * We only support separate modules for book3s 64 1411 + */ 1412 + #ifdef CONFIG_PPC_BOOK3S_64 1413 + 1414 + module_init(kvmppc_book3s_init_pr); 1415 + module_exit(kvmppc_book3s_exit_pr); 1416 + 1417 + MODULE_LICENSE("GPL"); 1418 + #endif
+37 -15
arch/powerpc/kvm/book3s_pr_papr.c
··· 21 21 #include <asm/kvm_ppc.h> 22 22 #include <asm/kvm_book3s.h> 23 23 24 + #define HPTE_SIZE 16 /* bytes per HPT entry */ 25 + 24 26 static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index) 25 27 { 26 28 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); ··· 42 40 long pte_index = kvmppc_get_gpr(vcpu, 5); 43 41 unsigned long pteg[2 * 8]; 44 42 unsigned long pteg_addr, i, *hpte; 43 + long int ret; 45 44 45 + i = pte_index & 7; 46 46 pte_index &= ~7UL; 47 47 pteg_addr = get_pteg_addr(vcpu, pte_index); 48 48 49 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 49 50 copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); 50 51 hpte = pteg; 51 52 53 + ret = H_PTEG_FULL; 52 54 if (likely((flags & H_EXACT) == 0)) { 53 - pte_index &= ~7UL; 54 55 for (i = 0; ; ++i) { 55 56 if (i == 8) 56 - return H_PTEG_FULL; 57 + goto done; 57 58 if ((*hpte & HPTE_V_VALID) == 0) 58 59 break; 59 60 hpte += 2; 60 61 } 61 62 } else { 62 - i = kvmppc_get_gpr(vcpu, 5) & 7UL; 63 63 hpte += i * 2; 64 + if (*hpte & HPTE_V_VALID) 65 + goto done; 64 66 } 65 67 66 68 hpte[0] = kvmppc_get_gpr(vcpu, 6); 67 69 hpte[1] = kvmppc_get_gpr(vcpu, 7); 68 - copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg)); 69 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 70 + pteg_addr += i * HPTE_SIZE; 71 + copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); 70 72 kvmppc_set_gpr(vcpu, 4, pte_index | i); 73 + ret = H_SUCCESS; 74 + 75 + done: 76 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 77 + kvmppc_set_gpr(vcpu, 3, ret); 71 78 72 79 return EMULATE_DONE; 73 80 } ··· 88 77 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 89 78 unsigned long v = 0, pteg, rb; 90 79 unsigned long pte[2]; 80 + long int ret; 91 81 92 82 pteg = get_pteg_addr(vcpu, pte_index); 83 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 93 84 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 94 85 86 + ret = H_NOT_FOUND; 95 87 if ((pte[0] & HPTE_V_VALID) == 0 || 96 88 ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) || 97 - ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) { 98 - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 99 - return EMULATE_DONE; 100 - } 89 + ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) 90 + goto done; 101 91 102 92 copy_to_user((void __user *)pteg, &v, sizeof(v)); 103 93 104 94 rb = compute_tlbie_rb(pte[0], pte[1], pte_index); 105 95 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 106 96 107 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 97 + ret = H_SUCCESS; 108 98 kvmppc_set_gpr(vcpu, 4, pte[0]); 109 99 kvmppc_set_gpr(vcpu, 5, pte[1]); 100 + 101 + done: 102 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 103 + kvmppc_set_gpr(vcpu, 3, ret); 110 104 111 105 return EMULATE_DONE; 112 106 } ··· 140 124 int paramnr = 4; 141 125 int ret = H_SUCCESS; 142 126 127 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 143 128 for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { 144 129 unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); 145 130 unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); ··· 189 172 } 190 173 kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); 191 174 } 175 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 192 176 kvmppc_set_gpr(vcpu, 3, ret); 193 177 194 178 return EMULATE_DONE; ··· 202 184 unsigned long avpn = kvmppc_get_gpr(vcpu, 6); 203 185 unsigned long rb, pteg, r, v; 204 186 unsigned long pte[2]; 187 + long int ret; 205 188 206 189 pteg = get_pteg_addr(vcpu, pte_index); 190 + mutex_lock(&vcpu->kvm->arch.hpt_mutex); 207 191 copy_from_user(pte, (void __user *)pteg, sizeof(pte)); 208 192 193 + ret = H_NOT_FOUND; 209 194 if ((pte[0] & HPTE_V_VALID) == 0 || 210 - ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) { 211 - kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND); 212 - return EMULATE_DONE; 213 - } 195 + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) 196 + goto done; 214 197 215 198 v = pte[0]; 216 199 r = pte[1]; ··· 226 207 rb = compute_tlbie_rb(v, r, pte_index); 227 208 vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); 228 209 copy_to_user((void __user *)pteg, pte, sizeof(pte)); 210 + ret = H_SUCCESS; 229 211 230 - kvmppc_set_gpr(vcpu, 3, H_SUCCESS); 212 + done: 213 + mutex_unlock(&vcpu->kvm->arch.hpt_mutex); 214 + kvmppc_set_gpr(vcpu, 3, ret); 231 215 232 216 return EMULATE_DONE; 233 217 }
+5 -27
arch/powerpc/kvm/book3s_rmhandlers.S
··· 38 38 39 39 #define FUNC(name) GLUE(.,name) 40 40 41 - .globl kvmppc_skip_interrupt 42 - kvmppc_skip_interrupt: 43 - /* 44 - * Here all GPRs are unchanged from when the interrupt happened 45 - * except for r13, which is saved in SPRG_SCRATCH0. 46 - */ 47 - mfspr r13, SPRN_SRR0 48 - addi r13, r13, 4 49 - mtspr SPRN_SRR0, r13 50 - GET_SCRATCH0(r13) 51 - rfid 52 - b . 53 - 54 - .globl kvmppc_skip_Hinterrupt 55 - kvmppc_skip_Hinterrupt: 56 - /* 57 - * Here all GPRs are unchanged from when the interrupt happened 58 - * except for r13, which is saved in SPRG_SCRATCH0. 59 - */ 60 - mfspr r13, SPRN_HSRR0 61 - addi r13, r13, 4 62 - mtspr SPRN_HSRR0, r13 63 - GET_SCRATCH0(r13) 64 - hrfid 65 - b . 66 - 67 41 #elif defined(CONFIG_PPC_BOOK3S_32) 68 42 69 43 #define FUNC(name) name ··· 153 179 154 180 li r6, MSR_IR | MSR_DR 155 181 andc r6, r5, r6 /* Clear DR and IR in MSR value */ 182 + #ifdef CONFIG_PPC_BOOK3S_32 156 183 /* 157 184 * Set EE in HOST_MSR so that it's enabled when we get into our 158 - * C exit handler function 185 + * C exit handler function. On 64-bit we delay enabling 186 + * interrupts until we have finished transferring stuff 187 + * to or from the PACA. 159 188 */ 160 189 ori r5, r5, MSR_EE 190 + #endif 161 191 mtsrr0 r7 162 192 mtsrr1 r6 163 193 RFI
+1
arch/powerpc/kvm/book3s_rtas.c
··· 260 260 */ 261 261 return rc; 262 262 } 263 + EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall); 263 264 264 265 void kvmppc_rtas_tokens_free(struct kvm *kvm) 265 266 {
+2 -2
arch/powerpc/kvm/book3s_segment.S
··· 161 161 .global kvmppc_handler_trampoline_exit 162 162 kvmppc_handler_trampoline_exit: 163 163 164 - .global kvmppc_interrupt 165 - kvmppc_interrupt: 164 + .global kvmppc_interrupt_pr 165 + kvmppc_interrupt_pr: 166 166 167 167 /* Register usage at this point: 168 168 *
+4 -3
arch/powerpc/kvm/book3s_xics.c
··· 818 818 } 819 819 820 820 /* Check for real mode returning too hard */ 821 - if (xics->real_mode) 821 + if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm)) 822 822 return kvmppc_xics_rm_complete(vcpu, req); 823 823 824 824 switch (req) { ··· 840 840 841 841 return rc; 842 842 } 843 + EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); 843 844 844 845 845 846 /* -- Initialisation code etc. -- */ ··· 1251 1250 1252 1251 xics_debugfs_init(xics); 1253 1252 1254 - #ifdef CONFIG_KVM_BOOK3S_64_HV 1253 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1255 1254 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1256 1255 /* Enable real mode support */ 1257 1256 xics->real_mode = ENABLE_REALMODE; 1258 1257 xics->real_mode_dbg = DEBUG_REALMODE; 1259 1258 } 1260 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1259 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1261 1260 1262 1261 return 0; 1263 1262 }
+305 -44
arch/powerpc/kvm/booke.c
··· 40 40 41 41 #include "timing.h" 42 42 #include "booke.h" 43 - #include "trace.h" 43 + 44 + #define CREATE_TRACE_POINTS 45 + #include "trace_booke.h" 44 46 45 47 unsigned long kvmppc_booke_handlers; 46 48 ··· 135 133 #endif 136 134 } 137 135 136 + static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) 137 + { 138 + /* Synchronize guest's desire to get debug interrupts into shadow MSR */ 139 + #ifndef CONFIG_KVM_BOOKE_HV 140 + vcpu->arch.shadow_msr &= ~MSR_DE; 141 + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; 142 + #endif 143 + 144 + /* Force enable debug interrupts when user space wants to debug */ 145 + if (vcpu->guest_debug) { 146 + #ifdef CONFIG_KVM_BOOKE_HV 147 + /* 148 + * Since there is no shadow MSR, sync MSR_DE into the guest 149 + * visible MSR. 150 + */ 151 + vcpu->arch.shared->msr |= MSR_DE; 152 + #else 153 + vcpu->arch.shadow_msr |= MSR_DE; 154 + vcpu->arch.shared->msr &= ~MSR_DE; 155 + #endif 156 + } 157 + } 158 + 138 159 /* 139 160 * Helper function for "full" MSR writes. No need to call this if only 140 161 * EE/CE/ME/DE/RI are changing. ··· 175 150 kvmppc_mmu_msr_notify(vcpu, old_msr); 176 151 kvmppc_vcpu_sync_spe(vcpu); 177 152 kvmppc_vcpu_sync_fpu(vcpu); 153 + kvmppc_vcpu_sync_debug(vcpu); 178 154 } 179 155 180 156 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, ··· 681 655 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 682 656 { 683 657 int ret, s; 658 + struct thread_struct thread; 684 659 #ifdef CONFIG_PPC_FPU 685 660 struct thread_fp_state fp; 686 661 int fpexc_mode; ··· 722 695 kvmppc_load_guest_fp(vcpu); 723 696 #endif 724 697 698 + /* Switch to guest debug context */ 699 + thread.debug = vcpu->arch.shadow_dbg_reg; 700 + switch_booke_debug_regs(&thread); 701 + thread.debug = current->thread.debug; 702 + current->thread.debug = vcpu->arch.shadow_dbg_reg; 703 + 725 704 kvmppc_fix_ee_before_entry(); 726 705 727 706 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 728 707 729 708 /* No need for kvm_guest_exit. It's done in handle_exit. 730 709 We also get here with interrupts enabled. */ 710 + 711 + /* Switch back to user space debug context */ 712 + switch_booke_debug_regs(&thread); 713 + current->thread.debug = thread.debug; 731 714 732 715 #ifdef CONFIG_PPC_FPU 733 716 kvmppc_save_guest_fp(vcpu); ··· 792 755 default: 793 756 BUG(); 794 757 } 758 + } 759 + 760 + static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu) 761 + { 762 + struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg); 763 + u32 dbsr = vcpu->arch.dbsr; 764 + 765 + run->debug.arch.status = 0; 766 + run->debug.arch.address = vcpu->arch.pc; 767 + 768 + if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) { 769 + run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT; 770 + } else { 771 + if (dbsr & (DBSR_DAC1W | DBSR_DAC2W)) 772 + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE; 773 + else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R)) 774 + run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ; 775 + if (dbsr & (DBSR_DAC1R | DBSR_DAC1W)) 776 + run->debug.arch.address = dbg_reg->dac1; 777 + else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W)) 778 + run->debug.arch.address = dbg_reg->dac2; 779 + } 780 + 781 + return RESUME_HOST; 795 782 } 796 783 797 784 static void kvmppc_fill_pt_regs(struct pt_regs *regs) ··· 877 816 break; 878 817 case BOOKE_INTERRUPT_CRITICAL: 879 818 unknown_exception(&regs); 819 + break; 820 + case BOOKE_INTERRUPT_DEBUG: 821 + /* Save DBSR before preemption is enabled */ 822 + vcpu->arch.dbsr = mfspr(SPRN_DBSR); 823 + kvmppc_clear_dbsr(); 880 824 break; 881 825 } 882 826 } ··· 1200 1134 } 1201 1135 1202 1136 case BOOKE_INTERRUPT_DEBUG: { 1203 - u32 dbsr; 1204 - 1205 - vcpu->arch.pc = mfspr(SPRN_CSRR0); 1206 - 1207 - /* clear IAC events in DBSR register */ 1208 - dbsr = mfspr(SPRN_DBSR); 1209 - dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; 1210 - mtspr(SPRN_DBSR, dbsr); 1211 - 1212 - run->exit_reason = KVM_EXIT_DEBUG; 1137 + r = kvmppc_handle_debug(run, vcpu); 1138 + if (r == RESUME_HOST) 1139 + run->exit_reason = KVM_EXIT_DEBUG; 1213 1140 kvmppc_account_exit(vcpu, DEBUG_EXITS); 1214 - r = RESUME_HOST; 1215 1141 break; 1216 1142 } 1217 1143 ··· 1254 1196 kvmppc_set_msr(vcpu, 0); 1255 1197 1256 1198 #ifndef CONFIG_KVM_BOOKE_HV 1257 - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; 1199 + vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS; 1258 1200 vcpu->arch.shadow_pid = 1; 1259 1201 vcpu->arch.shared->msr = 0; 1260 1202 #endif ··· 1416 1358 return 0; 1417 1359 } 1418 1360 1419 - void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1361 + int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 1420 1362 { 1421 1363 sregs->u.e.features |= KVM_SREGS_E_IVOR; 1422 1364 ··· 1436 1378 sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 1437 1379 sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 1438 1380 sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 1381 + return 0; 1439 1382 } 1440 1383 1441 1384 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ··· 1471 1412 1472 1413 get_sregs_base(vcpu, sregs); 1473 1414 get_sregs_arch206(vcpu, sregs); 1474 - kvmppc_core_get_sregs(vcpu, sregs); 1475 - return 0; 1415 + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); 1476 1416 } 1477 1417 1478 1418 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ··· 1490 1432 if (ret < 0) 1491 1433 return ret; 1492 1434 1493 - return kvmppc_core_set_sregs(vcpu, sregs); 1435 + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); 1494 1436 } 1495 1437 1496 1438 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) ··· 1498 1440 int r = 0; 1499 1441 union kvmppc_one_reg val; 1500 1442 int size; 1501 - long int i; 1502 1443 1503 1444 size = one_reg_size(reg->id); 1504 1445 if (size > sizeof(val)) ··· 1505 1448 1506 1449 switch (reg->id) { 1507 1450 case KVM_REG_PPC_IAC1: 1508 - case KVM_REG_PPC_IAC2: 1509 - case KVM_REG_PPC_IAC3: 1510 - case KVM_REG_PPC_IAC4: 1511 - i = reg->id - KVM_REG_PPC_IAC1; 1512 - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); 1451 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1); 1513 1452 break; 1453 + case KVM_REG_PPC_IAC2: 1454 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2); 1455 + break; 1456 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1457 + case KVM_REG_PPC_IAC3: 1458 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3); 1459 + break; 1460 + case KVM_REG_PPC_IAC4: 1461 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4); 1462 + break; 1463 + #endif 1514 1464 case KVM_REG_PPC_DAC1: 1465 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1); 1466 + break; 1515 1467 case KVM_REG_PPC_DAC2: 1516 - i = reg->id - KVM_REG_PPC_DAC1; 1517 - val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); 1468 + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2); 1518 1469 break; 1519 1470 case KVM_REG_PPC_EPR: { 1520 1471 u32 epr = get_guest_epr(vcpu); ··· 1541 1476 val = get_reg_val(reg->id, vcpu->arch.tsr); 1542 1477 break; 1543 1478 case KVM_REG_PPC_DEBUG_INST: 1544 - val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); 1479 + val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG); 1480 + break; 1481 + case KVM_REG_PPC_VRSAVE: 1482 + val = get_reg_val(reg->id, vcpu->arch.vrsave); 1545 1483 break; 1546 1484 default: 1547 - r = kvmppc_get_one_reg(vcpu, reg->id, &val); 1485 + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); 1548 1486 break; 1549 1487 } 1550 1488 ··· 1565 1497 int r = 0; 1566 1498 union kvmppc_one_reg val; 1567 1499 int size; 1568 - long int i; 1569 1500 1570 1501 size = one_reg_size(reg->id); 1571 1502 if (size > sizeof(val)) ··· 1575 1508 1576 1509 switch (reg->id) { 1577 1510 case KVM_REG_PPC_IAC1: 1578 - case KVM_REG_PPC_IAC2: 1579 - case KVM_REG_PPC_IAC3: 1580 - case KVM_REG_PPC_IAC4: 1581 - i = reg->id - KVM_REG_PPC_IAC1; 1582 - vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); 1511 + vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val); 1583 1512 break; 1513 + case KVM_REG_PPC_IAC2: 1514 + vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val); 1515 + break; 1516 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1517 + case KVM_REG_PPC_IAC3: 1518 + vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val); 1519 + break; 1520 + case KVM_REG_PPC_IAC4: 1521 + vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val); 1522 + break; 1523 + #endif 1584 1524 case KVM_REG_PPC_DAC1: 1525 + vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val); 1526 + break; 1585 1527 case KVM_REG_PPC_DAC2: 1586 - i = reg->id - KVM_REG_PPC_DAC1; 1587 - vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); 1528 + vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val); 1588 1529 break; 1589 1530 case KVM_REG_PPC_EPR: { 1590 1531 u32 new_epr = set_reg_val(reg->id, val); ··· 1626 1551 kvmppc_set_tcr(vcpu, tcr); 1627 1552 break; 1628 1553 } 1554 + case KVM_REG_PPC_VRSAVE: 1555 + vcpu->arch.vrsave = set_reg_val(reg->id, val); 1556 + break; 1629 1557 default: 1630 - r = kvmppc_set_one_reg(vcpu, reg->id, &val); 1558 + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); 1631 1559 break; 1632 1560 } 1633 1561 1634 1562 return r; 1635 - } 1636 - 1637 - int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1638 - struct kvm_guest_debug *dbg) 1639 - { 1640 - return -EINVAL; 1641 1563 } 1642 1564 1643 1565 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) ··· 1661 1589 return -ENOTSUPP; 1662 1590 } 1663 1591 1664 - void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1592 + void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1665 1593 struct kvm_memory_slot *dont) 1666 1594 { 1667 1595 } 1668 1596 1669 - int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1597 + int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1670 1598 unsigned long npages) 1671 1599 { 1672 1600 return 0; ··· 1742 1670 kvmppc_set_tsr_bits(vcpu, TSR_DIS); 1743 1671 } 1744 1672 1673 + static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg, 1674 + uint64_t addr, int index) 1675 + { 1676 + switch (index) { 1677 + case 0: 1678 + dbg_reg->dbcr0 |= DBCR0_IAC1; 1679 + dbg_reg->iac1 = addr; 1680 + break; 1681 + case 1: 1682 + dbg_reg->dbcr0 |= DBCR0_IAC2; 1683 + dbg_reg->iac2 = addr; 1684 + break; 1685 + #if CONFIG_PPC_ADV_DEBUG_IACS > 2 1686 + case 2: 1687 + dbg_reg->dbcr0 |= DBCR0_IAC3; 1688 + dbg_reg->iac3 = addr; 1689 + break; 1690 + case 3: 1691 + dbg_reg->dbcr0 |= DBCR0_IAC4; 1692 + dbg_reg->iac4 = addr; 1693 + break; 1694 + #endif 1695 + default: 1696 + return -EINVAL; 1697 + } 1698 + 1699 + dbg_reg->dbcr0 |= DBCR0_IDM; 1700 + return 0; 1701 + } 1702 + 1703 + static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr, 1704 + int type, int index) 1705 + { 1706 + switch (index) { 1707 + case 0: 1708 + if (type & KVMPPC_DEBUG_WATCH_READ) 1709 + dbg_reg->dbcr0 |= DBCR0_DAC1R; 1710 + if (type & KVMPPC_DEBUG_WATCH_WRITE) 1711 + dbg_reg->dbcr0 |= DBCR0_DAC1W; 1712 + dbg_reg->dac1 = addr; 1713 + break; 1714 + case 1: 1715 + if (type & KVMPPC_DEBUG_WATCH_READ) 1716 + dbg_reg->dbcr0 |= DBCR0_DAC2R; 1717 + if (type & KVMPPC_DEBUG_WATCH_WRITE) 1718 + dbg_reg->dbcr0 |= DBCR0_DAC2W; 1719 + dbg_reg->dac2 = addr; 1720 + break; 1721 + default: 1722 + return -EINVAL; 1723 + } 1724 + 1725 + dbg_reg->dbcr0 |= DBCR0_IDM; 1726 + return 0; 1727 + } 1728 + void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set) 1729 + { 1730 + /* XXX: Add similar MSR protection for BookE-PR */ 1731 + #ifdef CONFIG_KVM_BOOKE_HV 1732 + BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP)); 1733 + if (set) { 1734 + if (prot_bitmap & MSR_UCLE) 1735 + vcpu->arch.shadow_msrp |= MSRP_UCLEP; 1736 + if (prot_bitmap & MSR_DE) 1737 + vcpu->arch.shadow_msrp |= MSRP_DEP; 1738 + if (prot_bitmap & MSR_PMM) 1739 + vcpu->arch.shadow_msrp |= MSRP_PMMP; 1740 + } else { 1741 + if (prot_bitmap & MSR_UCLE) 1742 + vcpu->arch.shadow_msrp &= ~MSRP_UCLEP; 1743 + if (prot_bitmap & MSR_DE) 1744 + vcpu->arch.shadow_msrp &= ~MSRP_DEP; 1745 + if (prot_bitmap & MSR_PMM) 1746 + vcpu->arch.shadow_msrp &= ~MSRP_PMMP; 1747 + } 1748 + #endif 1749 + } 1750 + 1751 + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1752 + struct kvm_guest_debug *dbg) 1753 + { 1754 + struct debug_reg *dbg_reg; 1755 + int n, b = 0, w = 0; 1756 + 1757 + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { 1758 + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1759 + vcpu->guest_debug = 0; 1760 + kvm_guest_protect_msr(vcpu, MSR_DE, false); 1761 + return 0; 1762 + } 1763 + 1764 + kvm_guest_protect_msr(vcpu, MSR_DE, true); 1765 + vcpu->guest_debug = dbg->control; 1766 + vcpu->arch.shadow_dbg_reg.dbcr0 = 0; 1767 + /* Set DBCR0_EDM in guest visible DBCR0 register. */ 1768 + vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM; 1769 + 1770 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 1771 + vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC; 1772 + 1773 + /* Code below handles only HW breakpoints */ 1774 + dbg_reg = &(vcpu->arch.shadow_dbg_reg); 1775 + 1776 + #ifdef CONFIG_KVM_BOOKE_HV 1777 + /* 1778 + * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1 1779 + * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0 1780 + */ 1781 + dbg_reg->dbcr1 = 0; 1782 + dbg_reg->dbcr2 = 0; 1783 + #else 1784 + /* 1785 + * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1 1786 + * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR 1787 + * is set. 1788 + */ 1789 + dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US | 1790 + DBCR1_IAC4US; 1791 + dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; 1792 + #endif 1793 + 1794 + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1795 + return 0; 1796 + 1797 + for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) { 1798 + uint64_t addr = dbg->arch.bp[n].addr; 1799 + uint32_t type = dbg->arch.bp[n].type; 1800 + 1801 + if (type == KVMPPC_DEBUG_NONE) 1802 + continue; 1803 + 1804 + if (type & !(KVMPPC_DEBUG_WATCH_READ | 1805 + KVMPPC_DEBUG_WATCH_WRITE | 1806 + KVMPPC_DEBUG_BREAKPOINT)) 1807 + return -EINVAL; 1808 + 1809 + if (type & KVMPPC_DEBUG_BREAKPOINT) { 1810 + /* Setting H/W breakpoint */ 1811 + if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++)) 1812 + return -EINVAL; 1813 + } else { 1814 + /* Setting H/W watchpoint */ 1815 + if (kvmppc_booke_add_watchpoint(dbg_reg, addr, 1816 + type, w++)) 1817 + return -EINVAL; 1818 + } 1819 + } 1820 + 1821 + return 0; 1822 + } 1823 + 1745 1824 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1746 1825 { 1747 1826 vcpu->cpu = smp_processor_id(); ··· 1903 1680 { 1904 1681 current->thread.kvm_vcpu = NULL; 1905 1682 vcpu->cpu = -1; 1683 + 1684 + /* Clear pending debug event in DBSR */ 1685 + kvmppc_clear_dbsr(); 1686 + } 1687 + 1688 + void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 1689 + { 1690 + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); 1691 + } 1692 + 1693 + int kvmppc_core_init_vm(struct kvm *kvm) 1694 + { 1695 + return kvm->arch.kvm_ops->init_vm(kvm); 1696 + } 1697 + 1698 + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1699 + { 1700 + return kvm->arch.kvm_ops->vcpu_create(kvm, id); 1701 + } 1702 + 1703 + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1704 + { 1705 + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); 1706 + } 1707 + 1708 + void kvmppc_core_destroy_vm(struct kvm *kvm) 1709 + { 1710 + kvm->arch.kvm_ops->destroy_vm(kvm); 1711 + } 1712 + 1713 + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1714 + { 1715 + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); 1716 + } 1717 + 1718 + void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 1719 + { 1720 + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); 1906 1721 } 1907 1722 1908 1723 int __init kvmppc_booke_init(void)
+29
arch/powerpc/kvm/booke.h
··· 99 99 100 100 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); 101 101 102 + extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu); 103 + extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu, 104 + unsigned int inst, int *advance); 105 + extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, 106 + ulong spr_val); 107 + extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, 108 + ulong *spr_val); 109 + extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); 110 + extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, 111 + struct kvm_vcpu *vcpu, 112 + unsigned int inst, int *advance); 113 + extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, 114 + ulong spr_val); 115 + extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, 116 + ulong *spr_val); 117 + extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu); 118 + extern int kvmppc_core_emulate_op_e500(struct kvm_run *run, 119 + struct kvm_vcpu *vcpu, 120 + unsigned int inst, int *advance); 121 + extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, 122 + ulong spr_val); 123 + extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, 124 + ulong *spr_val); 125 + 102 126 /* 103 127 * Load up guest vcpu FP state if it's needed. 104 128 * It also set the MSR_FP in thread so that host know ··· 152 128 if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) 153 129 giveup_fpu(current); 154 130 #endif 131 + } 132 + 133 + static inline void kvmppc_clear_dbsr(void) 134 + { 135 + mtspr(SPRN_DBSR, mfspr(SPRN_DBSR)); 155 136 } 156 137 #endif /* __KVM_BOOKE_H__ */
+44 -15
arch/powerpc/kvm/e500.c
··· 305 305 { 306 306 } 307 307 308 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 308 + static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) 309 309 { 310 310 kvmppc_booke_vcpu_load(vcpu, cpu); 311 311 ··· 313 313 kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); 314 314 } 315 315 316 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 316 + static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu) 317 317 { 318 318 #ifdef CONFIG_SPE 319 319 if (vcpu->arch.shadow_msr & MSR_SPE) ··· 367 367 return 0; 368 368 } 369 369 370 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 370 + static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu, 371 + struct kvm_sregs *sregs) 371 372 { 372 373 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 373 374 ··· 389 388 390 389 kvmppc_get_sregs_ivor(vcpu, sregs); 391 390 kvmppc_get_sregs_e500_tlb(vcpu, sregs); 391 + return 0; 392 392 } 393 393 394 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 394 + static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu, 395 + struct kvm_sregs *sregs) 395 396 { 396 397 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 397 398 int ret; ··· 428 425 return kvmppc_set_sregs_ivor(vcpu, sregs); 429 426 } 430 427 431 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 432 - union kvmppc_one_reg *val) 428 + static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, 429 + union kvmppc_one_reg *val) 433 430 { 434 431 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 435 432 return r; 436 433 } 437 434 438 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 439 - union kvmppc_one_reg *val) 435 + static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id, 436 + union kvmppc_one_reg *val) 440 437 { 441 438 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 442 439 return r; 443 440 } 444 441 445 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 442 + static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, 443 + unsigned int id) 446 444 { 447 445 struct kvmppc_vcpu_e500 *vcpu_e500; 448 446 struct kvm_vcpu *vcpu; ··· 485 481 return ERR_PTR(err); 486 482 } 487 483 488 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 484 + static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu) 489 485 { 490 486 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 491 487 ··· 496 492 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 497 493 } 498 494 499 - int kvmppc_core_init_vm(struct kvm *kvm) 495 + static int kvmppc_core_init_vm_e500(struct kvm *kvm) 500 496 { 501 497 return 0; 502 498 } 503 499 504 - void kvmppc_core_destroy_vm(struct kvm *kvm) 500 + static void kvmppc_core_destroy_vm_e500(struct kvm *kvm) 505 501 { 506 502 } 503 + 504 + static struct kvmppc_ops kvm_ops_e500 = { 505 + .get_sregs = kvmppc_core_get_sregs_e500, 506 + .set_sregs = kvmppc_core_set_sregs_e500, 507 + .get_one_reg = kvmppc_get_one_reg_e500, 508 + .set_one_reg = kvmppc_set_one_reg_e500, 509 + .vcpu_load = kvmppc_core_vcpu_load_e500, 510 + .vcpu_put = kvmppc_core_vcpu_put_e500, 511 + .vcpu_create = kvmppc_core_vcpu_create_e500, 512 + .vcpu_free = kvmppc_core_vcpu_free_e500, 513 + .mmu_destroy = kvmppc_mmu_destroy_e500, 514 + .init_vm = kvmppc_core_init_vm_e500, 515 + .destroy_vm = kvmppc_core_destroy_vm_e500, 516 + .emulate_op = kvmppc_core_emulate_op_e500, 517 + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, 518 + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, 519 + }; 507 520 508 521 static int __init kvmppc_e500_init(void) 509 522 { ··· 533 512 534 513 r = kvmppc_core_check_processor_compat(); 535 514 if (r) 536 - return r; 515 + goto err_out; 537 516 538 517 r = kvmppc_booke_init(); 539 518 if (r) 540 - return r; 519 + goto err_out; 541 520 542 521 /* copy extra E500 exception handlers */ 543 522 ivor[0] = mfspr(SPRN_IVOR32); ··· 555 534 flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + 556 535 ivor[max_ivor] + handler_len); 557 536 558 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 537 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 538 + if (r) 539 + goto err_out; 540 + kvm_ops_e500.owner = THIS_MODULE; 541 + kvmppc_pr_ops = &kvm_ops_e500; 542 + 543 + err_out: 544 + return r; 559 545 } 560 546 561 547 static void __exit kvmppc_e500_exit(void) 562 548 { 549 + kvmppc_pr_ops = NULL; 563 550 kvmppc_booke_exit(); 564 551 } 565 552
+1 -1
arch/powerpc/kvm/e500.h
··· 117 117 #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) 118 118 #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) 119 119 #define MAS2_ATTRIB_MASK \ 120 - (MAS2_X0 | MAS2_X1) 120 + (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G) 121 121 #define MAS3_ATTRIB_MASK \ 122 122 (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ 123 123 | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+30 -4
arch/powerpc/kvm/e500_emulate.c
··· 26 26 #define XOP_TLBRE 946 27 27 #define XOP_TLBWE 978 28 28 #define XOP_TLBILX 18 29 + #define XOP_EHPRIV 270 29 30 30 31 #ifdef CONFIG_KVM_E500MC 31 32 static int dbell2prio(ulong param) ··· 83 82 } 84 83 #endif 85 84 86 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 87 - unsigned int inst, int *advance) 85 + static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, 86 + unsigned int inst, int *advance) 87 + { 88 + int emulated = EMULATE_DONE; 89 + 90 + switch (get_oc(inst)) { 91 + case EHPRIV_OC_DEBUG: 92 + run->exit_reason = KVM_EXIT_DEBUG; 93 + run->debug.arch.address = vcpu->arch.pc; 94 + run->debug.arch.status = 0; 95 + kvmppc_account_exit(vcpu, DEBUG_EXITS); 96 + emulated = EMULATE_EXIT_USER; 97 + *advance = 0; 98 + break; 99 + default: 100 + emulated = EMULATE_FAIL; 101 + } 102 + return emulated; 103 + } 104 + 105 + int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, 106 + unsigned int inst, int *advance) 88 107 { 89 108 int emulated = EMULATE_DONE; 90 109 int ra = get_ra(inst); ··· 151 130 emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); 152 131 break; 153 132 133 + case XOP_EHPRIV: 134 + emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, 135 + advance); 136 + break; 137 + 154 138 default: 155 139 emulated = EMULATE_FAIL; 156 140 } ··· 172 146 return emulated; 173 147 } 174 148 175 - int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 149 + int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 176 150 { 177 151 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 178 152 int emulated = EMULATE_DONE; ··· 263 237 return emulated; 264 238 } 265 239 266 - int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 240 + int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 267 241 { 268 242 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 269 243 int emulated = EMULATE_DONE;
+2 -2
arch/powerpc/kvm/e500_mmu.c
··· 32 32 #include <asm/kvm_ppc.h> 33 33 34 34 #include "e500.h" 35 - #include "trace.h" 35 + #include "trace_booke.h" 36 36 #include "timing.h" 37 37 #include "e500_mmu_host.h" 38 38 ··· 536 536 return get_tlb_raddr(gtlbe) | (eaddr & pgmask); 537 537 } 538 538 539 - void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 539 + void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu) 540 540 { 541 541 } 542 542
+5 -1
arch/powerpc/kvm/e500_mmu_host.c
··· 32 32 #include <asm/kvm_ppc.h> 33 33 34 34 #include "e500.h" 35 - #include "trace.h" 36 35 #include "timing.h" 37 36 #include "e500_mmu_host.h" 37 + 38 + #include "trace_booke.h" 38 39 39 40 #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) 40 41 ··· 253 252 { 254 253 ref->pfn = pfn; 255 254 ref->flags |= E500_TLB_VALID; 255 + 256 + /* Mark the page accessed */ 257 + kvm_set_pfn_accessed(pfn); 256 258 257 259 if (tlbe_is_writable(gtlbe)) 258 260 kvm_set_pfn_dirty(pfn);
+43 -15
arch/powerpc/kvm/e500mc.c
··· 110 110 111 111 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); 112 112 113 - void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 113 + static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) 114 114 { 115 115 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 116 116 ··· 147 147 kvmppc_load_guest_fp(vcpu); 148 148 } 149 149 150 - void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 150 + static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu) 151 151 { 152 152 vcpu->arch.eplc = mfspr(SPRN_EPLC); 153 153 vcpu->arch.epsc = mfspr(SPRN_EPSC); ··· 204 204 return 0; 205 205 } 206 206 207 - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 207 + static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu, 208 + struct kvm_sregs *sregs) 208 209 { 209 210 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 210 211 ··· 225 224 sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; 226 225 sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; 227 226 228 - kvmppc_get_sregs_ivor(vcpu, sregs); 227 + return kvmppc_get_sregs_ivor(vcpu, sregs); 229 228 } 230 229 231 - int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 230 + static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu, 231 + struct kvm_sregs *sregs) 232 232 { 233 233 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 234 234 int ret; ··· 262 260 return kvmppc_set_sregs_ivor(vcpu, sregs); 263 261 } 264 262 265 - int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, 266 - union kvmppc_one_reg *val) 263 + static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, 264 + union kvmppc_one_reg *val) 267 265 { 268 266 int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); 269 267 return r; 270 268 } 271 269 272 - int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, 273 - union kvmppc_one_reg *val) 270 + static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id, 271 + union kvmppc_one_reg *val) 274 272 { 275 273 int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); 276 274 return r; 277 275 } 278 276 279 - struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 277 + static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, 278 + unsigned int id) 280 279 { 281 280 struct kvmppc_vcpu_e500 *vcpu_e500; 282 281 struct kvm_vcpu *vcpu; ··· 318 315 return ERR_PTR(err); 319 316 } 320 317 321 - void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 318 + static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu) 322 319 { 323 320 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 324 321 ··· 328 325 kmem_cache_free(kvm_vcpu_cache, vcpu_e500); 329 326 } 330 327 331 - int kvmppc_core_init_vm(struct kvm *kvm) 328 + static int kvmppc_core_init_vm_e500mc(struct kvm *kvm) 332 329 { 333 330 int lpid; 334 331 ··· 340 337 return 0; 341 338 } 342 339 343 - void kvmppc_core_destroy_vm(struct kvm *kvm) 340 + static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm) 344 341 { 345 342 kvmppc_free_lpid(kvm->arch.lpid); 346 343 } 344 + 345 + static struct kvmppc_ops kvm_ops_e500mc = { 346 + .get_sregs = kvmppc_core_get_sregs_e500mc, 347 + .set_sregs = kvmppc_core_set_sregs_e500mc, 348 + .get_one_reg = kvmppc_get_one_reg_e500mc, 349 + .set_one_reg = kvmppc_set_one_reg_e500mc, 350 + .vcpu_load = kvmppc_core_vcpu_load_e500mc, 351 + .vcpu_put = kvmppc_core_vcpu_put_e500mc, 352 + .vcpu_create = kvmppc_core_vcpu_create_e500mc, 353 + .vcpu_free = kvmppc_core_vcpu_free_e500mc, 354 + .mmu_destroy = kvmppc_mmu_destroy_e500, 355 + .init_vm = kvmppc_core_init_vm_e500mc, 356 + .destroy_vm = kvmppc_core_destroy_vm_e500mc, 357 + .emulate_op = kvmppc_core_emulate_op_e500, 358 + .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, 359 + .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, 360 + }; 347 361 348 362 static int __init kvmppc_e500mc_init(void) 349 363 { ··· 368 348 369 349 r = kvmppc_booke_init(); 370 350 if (r) 371 - return r; 351 + goto err_out; 372 352 373 353 kvmppc_init_lpid(64); 374 354 kvmppc_claim_lpid(0); /* host */ 375 355 376 - return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 356 + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); 357 + if (r) 358 + goto err_out; 359 + kvm_ops_e500mc.owner = THIS_MODULE; 360 + kvmppc_pr_ops = &kvm_ops_e500mc; 361 + 362 + err_out: 363 + return r; 377 364 } 378 365 379 366 static void __exit kvmppc_e500mc_exit(void) 380 367 { 368 + kvmppc_pr_ops = NULL; 381 369 kvmppc_booke_exit(); 382 370 } 383 371
+7 -5
arch/powerpc/kvm/emulate.c
··· 130 130 case SPRN_PIR: break; 131 131 132 132 default: 133 - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, 134 - spr_val); 133 + emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, 134 + spr_val); 135 135 if (emulated == EMULATE_FAIL) 136 136 printk(KERN_INFO "mtspr: unknown spr " 137 137 "0x%x\n", sprn); ··· 191 191 spr_val = kvmppc_get_dec(vcpu, get_tb()); 192 192 break; 193 193 default: 194 - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, 195 - &spr_val); 194 + emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, 195 + &spr_val); 196 196 if (unlikely(emulated == EMULATE_FAIL)) { 197 197 printk(KERN_INFO "mfspr: unknown spr " 198 198 "0x%x\n", sprn); ··· 464 464 } 465 465 466 466 if (emulated == EMULATE_FAIL) { 467 - emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 467 + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, 468 + &advance); 468 469 if (emulated == EMULATE_AGAIN) { 469 470 advance = 0; 470 471 } else if (emulated == EMULATE_FAIL) { ··· 484 483 485 484 return emulated; 486 485 } 486 + EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
+94 -75
arch/powerpc/kvm/powerpc.c
··· 26 26 #include <linux/fs.h> 27 27 #include <linux/slab.h> 28 28 #include <linux/file.h> 29 + #include <linux/module.h> 29 30 #include <asm/cputable.h> 30 31 #include <asm/uaccess.h> 31 32 #include <asm/kvm_ppc.h> ··· 40 39 #define CREATE_TRACE_POINTS 41 40 #include "trace.h" 42 41 42 + struct kvmppc_ops *kvmppc_hv_ops; 43 + EXPORT_SYMBOL_GPL(kvmppc_hv_ops); 44 + struct kvmppc_ops *kvmppc_pr_ops; 45 + EXPORT_SYMBOL_GPL(kvmppc_pr_ops); 46 + 47 + 43 48 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 44 49 { 45 50 return !!(v->arch.pending_exceptions) || ··· 57 50 return 1; 58 51 } 59 52 60 - #ifndef CONFIG_KVM_BOOK3S_64_HV 61 53 /* 62 54 * Common checks before entering the guest world. Call with interrupts 63 55 * disabled. ··· 131 125 132 126 return r; 133 127 } 134 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 128 + EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); 135 129 136 130 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) 137 131 { ··· 185 179 186 180 return r; 187 181 } 182 + EXPORT_SYMBOL_GPL(kvmppc_kvm_pv); 188 183 189 184 int kvmppc_sanity_check(struct kvm_vcpu *vcpu) 190 185 { ··· 199 192 if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled) 200 193 goto out; 201 194 202 - #ifdef CONFIG_KVM_BOOK3S_64_HV 203 195 /* HV KVM can only do PAPR mode for now */ 204 - if (!vcpu->arch.papr_enabled) 196 + if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm)) 205 197 goto out; 206 - #endif 207 198 208 199 #ifdef CONFIG_KVM_BOOKE_HV 209 200 if (!cpu_has_feature(CPU_FTR_EMB_HV)) ··· 214 209 vcpu->arch.sane = r; 215 210 return r ? 0 : -EINVAL; 216 211 } 212 + EXPORT_SYMBOL_GPL(kvmppc_sanity_check); 217 213 218 214 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) 219 215 { ··· 249 243 250 244 return r; 251 245 } 246 + EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); 252 247 253 248 int kvm_arch_hardware_enable(void *garbage) 254 249 { ··· 276 269 277 270 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 278 271 { 279 - if (type) 280 - return -EINVAL; 272 + struct kvmppc_ops *kvm_ops = NULL; 273 + /* 274 + * if we have both HV and PR enabled, default is HV 275 + */ 276 + if (type == 0) { 277 + if (kvmppc_hv_ops) 278 + kvm_ops = kvmppc_hv_ops; 279 + else 280 + kvm_ops = kvmppc_pr_ops; 281 + if (!kvm_ops) 282 + goto err_out; 283 + } else if (type == KVM_VM_PPC_HV) { 284 + if (!kvmppc_hv_ops) 285 + goto err_out; 286 + kvm_ops = kvmppc_hv_ops; 287 + } else if (type == KVM_VM_PPC_PR) { 288 + if (!kvmppc_pr_ops) 289 + goto err_out; 290 + kvm_ops = kvmppc_pr_ops; 291 + } else 292 + goto err_out; 281 293 294 + if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) 295 + return -ENOENT; 296 + 297 + kvm->arch.kvm_ops = kvm_ops; 282 298 return kvmppc_core_init_vm(kvm); 299 + err_out: 300 + return -EINVAL; 283 301 } 284 302 285 303 void kvm_arch_destroy_vm(struct kvm *kvm) ··· 324 292 kvmppc_core_destroy_vm(kvm); 325 293 326 294 mutex_unlock(&kvm->lock); 295 + 296 + /* drop the module reference */ 297 + module_put(kvm->arch.kvm_ops->owner); 327 298 } 328 299 329 300 void kvm_arch_sync_events(struct kvm *kvm) ··· 336 301 int kvm_dev_ioctl_check_extension(long ext) 337 302 { 338 303 int r; 304 + /* FIXME!! 305 + * Should some of this be vm ioctl ? is it possible now ? 306 + */ 307 + int hv_enabled = kvmppc_hv_ops ? 1 : 0; 339 308 340 309 switch (ext) { 341 310 #ifdef CONFIG_BOOKE ··· 359 320 case KVM_CAP_DEVICE_CTRL: 360 321 r = 1; 361 322 break; 362 - #ifndef CONFIG_KVM_BOOK3S_64_HV 363 323 case KVM_CAP_PPC_PAIRED_SINGLES: 364 324 case KVM_CAP_PPC_OSI: 365 325 case KVM_CAP_PPC_GET_PVINFO: 366 326 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) 367 327 case KVM_CAP_SW_TLB: 368 328 #endif 369 - #ifdef CONFIG_KVM_MPIC 370 - case KVM_CAP_IRQ_MPIC: 371 - #endif 372 - r = 1; 329 + /* We support this only for PR */ 330 + r = !hv_enabled; 373 331 break; 332 + #ifdef CONFIG_KVM_MMIO 374 333 case KVM_CAP_COALESCED_MMIO: 375 334 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 376 335 break; 377 336 #endif 337 + #ifdef CONFIG_KVM_MPIC 338 + case KVM_CAP_IRQ_MPIC: 339 + r = 1; 340 + break; 341 + #endif 342 + 378 343 #ifdef CONFIG_PPC_BOOK3S_64 379 344 case KVM_CAP_SPAPR_TCE: 380 345 case KVM_CAP_PPC_ALLOC_HTAB: ··· 389 346 r = 1; 390 347 break; 391 348 #endif /* CONFIG_PPC_BOOK3S_64 */ 392 - #ifdef CONFIG_KVM_BOOK3S_64_HV 349 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 393 350 case KVM_CAP_PPC_SMT: 394 - r = threads_per_core; 351 + if (hv_enabled) 352 + r = threads_per_core; 353 + else 354 + r = 0; 395 355 break; 396 356 case KVM_CAP_PPC_RMA: 397 - r = 1; 357 + r = hv_enabled; 398 358 /* PPC970 requires an RMA */ 399 - if (cpu_has_feature(CPU_FTR_ARCH_201)) 359 + if (r && cpu_has_feature(CPU_FTR_ARCH_201)) 400 360 r = 2; 401 361 break; 402 362 #endif 403 363 case KVM_CAP_SYNC_MMU: 404 - #ifdef CONFIG_KVM_BOOK3S_64_HV 405 - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 364 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 365 + if (hv_enabled) 366 + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; 367 + else 368 + r = 0; 406 369 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) 407 370 r = 1; 408 371 #else 409 372 r = 0; 410 - break; 411 373 #endif 412 - #ifdef CONFIG_KVM_BOOK3S_64_HV 374 + break; 375 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 413 376 case KVM_CAP_PPC_HTAB_FD: 414 - r = 1; 377 + r = hv_enabled; 415 378 break; 416 379 #endif 417 - break; 418 380 case KVM_CAP_NR_VCPUS: 419 381 /* 420 382 * Recommending a number of CPUs is somewhat arbitrary; we ··· 427 379 * will have secondary threads "offline"), and for other KVM 428 380 * implementations just count online CPUs. 429 381 */ 430 - #ifdef CONFIG_KVM_BOOK3S_64_HV 431 - r = num_present_cpus(); 432 - #else 433 - r = num_online_cpus(); 434 - #endif 382 + if (hv_enabled) 383 + r = num_present_cpus(); 384 + else 385 + r = num_online_cpus(); 435 386 break; 436 387 case KVM_CAP_MAX_VCPUS: 437 388 r = KVM_MAX_VCPUS; ··· 454 407 return -EINVAL; 455 408 } 456 409 457 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 410 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 458 411 struct kvm_memory_slot *dont) 459 412 { 460 - kvmppc_core_free_memslot(free, dont); 413 + kvmppc_core_free_memslot(kvm, free, dont); 461 414 } 462 415 463 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 416 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 417 + unsigned long npages) 464 418 { 465 - return kvmppc_core_create_memslot(slot, npages); 419 + return kvmppc_core_create_memslot(kvm, slot, npages); 466 420 } 467 421 468 422 void kvm_arch_memslots_updated(struct kvm *kvm) ··· 707 659 708 660 return EMULATE_DO_MMIO; 709 661 } 662 + EXPORT_SYMBOL_GPL(kvmppc_handle_load); 710 663 711 664 /* Same as above, but sign extends */ 712 665 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, ··· 769 720 770 721 return EMULATE_DO_MMIO; 771 722 } 723 + EXPORT_SYMBOL_GPL(kvmppc_handle_store); 772 724 773 725 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 774 726 { ··· 1074 1024 r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); 1075 1025 goto out; 1076 1026 } 1077 - #endif /* CONFIG_PPC_BOOK3S_64 */ 1078 - 1079 - #ifdef CONFIG_KVM_BOOK3S_64_HV 1080 - case KVM_ALLOCATE_RMA: { 1081 - struct kvm_allocate_rma rma; 1082 - struct kvm *kvm = filp->private_data; 1083 - 1084 - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); 1085 - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) 1086 - r = -EFAULT; 1087 - break; 1088 - } 1089 - 1090 - case KVM_PPC_ALLOCATE_HTAB: { 1091 - u32 htab_order; 1092 - 1093 - r = -EFAULT; 1094 - if (get_user(htab_order, (u32 __user *)argp)) 1095 - break; 1096 - r = kvmppc_alloc_reset_hpt(kvm, &htab_order); 1097 - if (r) 1098 - break; 1099 - r = -EFAULT; 1100 - if (put_user(htab_order, (u32 __user *)argp)) 1101 - break; 1102 - r = 0; 1103 - break; 1104 - } 1105 - 1106 - case KVM_PPC_GET_HTAB_FD: { 1107 - struct kvm_get_htab_fd ghf; 1108 - 1109 - r = -EFAULT; 1110 - if (copy_from_user(&ghf, argp, sizeof(ghf))) 1111 - break; 1112 - r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); 1113 - break; 1114 - } 1115 - #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1116 - 1117 - #ifdef CONFIG_PPC_BOOK3S_64 1118 1027 case KVM_PPC_GET_SMMU_INFO: { 1119 1028 struct kvm_ppc_smmu_info info; 1029 + struct kvm *kvm = filp->private_data; 1120 1030 1121 1031 memset(&info, 0, sizeof(info)); 1122 - r = kvm_vm_ioctl_get_smmu_info(kvm, &info); 1032 + r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info); 1123 1033 if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) 1124 1034 r = -EFAULT; 1125 1035 break; ··· 1090 1080 r = kvm_vm_ioctl_rtas_define_token(kvm, argp); 1091 1081 break; 1092 1082 } 1093 - #endif /* CONFIG_PPC_BOOK3S_64 */ 1083 + default: { 1084 + struct kvm *kvm = filp->private_data; 1085 + r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); 1086 + } 1087 + #else /* CONFIG_PPC_BOOK3S_64 */ 1094 1088 default: 1095 1089 r = -ENOTTY; 1090 + #endif 1096 1091 } 1097 - 1098 1092 out: 1099 1093 return r; 1100 1094 } ··· 1120 1106 1121 1107 return lpid; 1122 1108 } 1109 + EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid); 1123 1110 1124 1111 void kvmppc_claim_lpid(long lpid) 1125 1112 { 1126 1113 set_bit(lpid, lpid_inuse); 1127 1114 } 1115 + EXPORT_SYMBOL_GPL(kvmppc_claim_lpid); 1128 1116 1129 1117 void kvmppc_free_lpid(long lpid) 1130 1118 { 1131 1119 clear_bit(lpid, lpid_inuse); 1132 1120 } 1121 + EXPORT_SYMBOL_GPL(kvmppc_free_lpid); 1133 1122 1134 1123 void kvmppc_init_lpid(unsigned long nr_lpids_param) 1135 1124 { 1136 1125 nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); 1137 1126 memset(lpid_inuse, 0, sizeof(lpid_inuse)); 1138 1127 } 1128 + EXPORT_SYMBOL_GPL(kvmppc_init_lpid); 1139 1129 1140 1130 int kvm_arch_init(void *opaque) 1141 1131 { ··· 1148 1130 1149 1131 void kvm_arch_exit(void) 1150 1132 { 1133 + 1151 1134 }
-429
arch/powerpc/kvm/trace.h
··· 31 31 __entry->inst, __entry->pc, __entry->emulate) 32 32 ); 33 33 34 - #ifdef CONFIG_PPC_BOOK3S 35 - #define kvm_trace_symbol_exit \ 36 - {0x100, "SYSTEM_RESET"}, \ 37 - {0x200, "MACHINE_CHECK"}, \ 38 - {0x300, "DATA_STORAGE"}, \ 39 - {0x380, "DATA_SEGMENT"}, \ 40 - {0x400, "INST_STORAGE"}, \ 41 - {0x480, "INST_SEGMENT"}, \ 42 - {0x500, "EXTERNAL"}, \ 43 - {0x501, "EXTERNAL_LEVEL"}, \ 44 - {0x502, "EXTERNAL_HV"}, \ 45 - {0x600, "ALIGNMENT"}, \ 46 - {0x700, "PROGRAM"}, \ 47 - {0x800, "FP_UNAVAIL"}, \ 48 - {0x900, "DECREMENTER"}, \ 49 - {0x980, "HV_DECREMENTER"}, \ 50 - {0xc00, "SYSCALL"}, \ 51 - {0xd00, "TRACE"}, \ 52 - {0xe00, "H_DATA_STORAGE"}, \ 53 - {0xe20, "H_INST_STORAGE"}, \ 54 - {0xe40, "H_EMUL_ASSIST"}, \ 55 - {0xf00, "PERFMON"}, \ 56 - {0xf20, "ALTIVEC"}, \ 57 - {0xf40, "VSX"} 58 - #else 59 - #define kvm_trace_symbol_exit \ 60 - {0, "CRITICAL"}, \ 61 - {1, "MACHINE_CHECK"}, \ 62 - {2, "DATA_STORAGE"}, \ 63 - {3, "INST_STORAGE"}, \ 64 - {4, "EXTERNAL"}, \ 65 - {5, "ALIGNMENT"}, \ 66 - {6, "PROGRAM"}, \ 67 - {7, "FP_UNAVAIL"}, \ 68 - {8, "SYSCALL"}, \ 69 - {9, "AP_UNAVAIL"}, \ 70 - {10, "DECREMENTER"}, \ 71 - {11, "FIT"}, \ 72 - {12, "WATCHDOG"}, \ 73 - {13, "DTLB_MISS"}, \ 74 - {14, "ITLB_MISS"}, \ 75 - {15, "DEBUG"}, \ 76 - {32, "SPE_UNAVAIL"}, \ 77 - {33, "SPE_FP_DATA"}, \ 78 - {34, "SPE_FP_ROUND"}, \ 79 - {35, "PERFORMANCE_MONITOR"}, \ 80 - {36, "DOORBELL"}, \ 81 - {37, "DOORBELL_CRITICAL"}, \ 82 - {38, "GUEST_DBELL"}, \ 83 - {39, "GUEST_DBELL_CRIT"}, \ 84 - {40, "HV_SYSCALL"}, \ 85 - {41, "HV_PRIV"} 86 - #endif 87 - 88 - TRACE_EVENT(kvm_exit, 89 - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 90 - TP_ARGS(exit_nr, vcpu), 91 - 92 - TP_STRUCT__entry( 93 - __field( unsigned int, exit_nr ) 94 - __field( unsigned long, pc ) 95 - __field( unsigned long, msr ) 96 - __field( unsigned long, dar ) 97 - #ifdef CONFIG_KVM_BOOK3S_PR 98 - __field( unsigned long, srr1 ) 99 - #endif 100 - __field( unsigned long, last_inst ) 101 - ), 102 - 103 - TP_fast_assign( 104 - #ifdef CONFIG_KVM_BOOK3S_PR 105 - struct kvmppc_book3s_shadow_vcpu *svcpu; 106 - #endif 107 - __entry->exit_nr = exit_nr; 108 - __entry->pc = kvmppc_get_pc(vcpu); 109 - __entry->dar = kvmppc_get_fault_dar(vcpu); 110 - __entry->msr = vcpu->arch.shared->msr; 111 - #ifdef CONFIG_KVM_BOOK3S_PR 112 - svcpu = svcpu_get(vcpu); 113 - __entry->srr1 = svcpu->shadow_srr1; 114 - svcpu_put(svcpu); 115 - #endif 116 - __entry->last_inst = vcpu->arch.last_inst; 117 - ), 118 - 119 - TP_printk("exit=%s" 120 - " | pc=0x%lx" 121 - " | msr=0x%lx" 122 - " | dar=0x%lx" 123 - #ifdef CONFIG_KVM_BOOK3S_PR 124 - " | srr1=0x%lx" 125 - #endif 126 - " | last_inst=0x%lx" 127 - , 128 - __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 129 - __entry->pc, 130 - __entry->msr, 131 - __entry->dar, 132 - #ifdef CONFIG_KVM_BOOK3S_PR 133 - __entry->srr1, 134 - #endif 135 - __entry->last_inst 136 - ) 137 - ); 138 - 139 - TRACE_EVENT(kvm_unmap_hva, 140 - TP_PROTO(unsigned long hva), 141 - TP_ARGS(hva), 142 - 143 - TP_STRUCT__entry( 144 - __field( unsigned long, hva ) 145 - ), 146 - 147 - TP_fast_assign( 148 - __entry->hva = hva; 149 - ), 150 - 151 - TP_printk("unmap hva 0x%lx\n", __entry->hva) 152 - ); 153 - 154 34 TRACE_EVENT(kvm_stlb_inval, 155 35 TP_PROTO(unsigned int stlb_index), 156 36 TP_ARGS(stlb_index), ··· 115 235 TP_printk("vcpu=%x requests=%x", 116 236 __entry->cpu_nr, __entry->requests) 117 237 ); 118 - 119 - 120 - /************************************************************************* 121 - * Book3S trace points * 122 - *************************************************************************/ 123 - 124 - #ifdef CONFIG_KVM_BOOK3S_PR 125 - 126 - TRACE_EVENT(kvm_book3s_reenter, 127 - TP_PROTO(int r, struct kvm_vcpu *vcpu), 128 - TP_ARGS(r, vcpu), 129 - 130 - TP_STRUCT__entry( 131 - __field( unsigned int, r ) 132 - __field( unsigned long, pc ) 133 - ), 134 - 135 - TP_fast_assign( 136 - __entry->r = r; 137 - __entry->pc = kvmppc_get_pc(vcpu); 138 - ), 139 - 140 - TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) 141 - ); 142 - 143 - #ifdef CONFIG_PPC_BOOK3S_64 144 - 145 - TRACE_EVENT(kvm_book3s_64_mmu_map, 146 - TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, 147 - struct kvmppc_pte *orig_pte), 148 - TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), 149 - 150 - TP_STRUCT__entry( 151 - __field( unsigned char, flag_w ) 152 - __field( unsigned char, flag_x ) 153 - __field( unsigned long, eaddr ) 154 - __field( unsigned long, hpteg ) 155 - __field( unsigned long, va ) 156 - __field( unsigned long long, vpage ) 157 - __field( unsigned long, hpaddr ) 158 - ), 159 - 160 - TP_fast_assign( 161 - __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; 162 - __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; 163 - __entry->eaddr = orig_pte->eaddr; 164 - __entry->hpteg = hpteg; 165 - __entry->va = va; 166 - __entry->vpage = orig_pte->vpage; 167 - __entry->hpaddr = hpaddr; 168 - ), 169 - 170 - TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", 171 - __entry->flag_w, __entry->flag_x, __entry->eaddr, 172 - __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) 173 - ); 174 - 175 - #endif /* CONFIG_PPC_BOOK3S_64 */ 176 - 177 - TRACE_EVENT(kvm_book3s_mmu_map, 178 - TP_PROTO(struct hpte_cache *pte), 179 - TP_ARGS(pte), 180 - 181 - TP_STRUCT__entry( 182 - __field( u64, host_vpn ) 183 - __field( u64, pfn ) 184 - __field( ulong, eaddr ) 185 - __field( u64, vpage ) 186 - __field( ulong, raddr ) 187 - __field( int, flags ) 188 - ), 189 - 190 - TP_fast_assign( 191 - __entry->host_vpn = pte->host_vpn; 192 - __entry->pfn = pte->pfn; 193 - __entry->eaddr = pte->pte.eaddr; 194 - __entry->vpage = pte->pte.vpage; 195 - __entry->raddr = pte->pte.raddr; 196 - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 197 - (pte->pte.may_write ? 0x2 : 0) | 198 - (pte->pte.may_execute ? 0x1 : 0); 199 - ), 200 - 201 - TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 202 - __entry->host_vpn, __entry->pfn, __entry->eaddr, 203 - __entry->vpage, __entry->raddr, __entry->flags) 204 - ); 205 - 206 - TRACE_EVENT(kvm_book3s_mmu_invalidate, 207 - TP_PROTO(struct hpte_cache *pte), 208 - TP_ARGS(pte), 209 - 210 - TP_STRUCT__entry( 211 - __field( u64, host_vpn ) 212 - __field( u64, pfn ) 213 - __field( ulong, eaddr ) 214 - __field( u64, vpage ) 215 - __field( ulong, raddr ) 216 - __field( int, flags ) 217 - ), 218 - 219 - TP_fast_assign( 220 - __entry->host_vpn = pte->host_vpn; 221 - __entry->pfn = pte->pfn; 222 - __entry->eaddr = pte->pte.eaddr; 223 - __entry->vpage = pte->pte.vpage; 224 - __entry->raddr = pte->pte.raddr; 225 - __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 226 - (pte->pte.may_write ? 0x2 : 0) | 227 - (pte->pte.may_execute ? 0x1 : 0); 228 - ), 229 - 230 - TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 231 - __entry->host_vpn, __entry->pfn, __entry->eaddr, 232 - __entry->vpage, __entry->raddr, __entry->flags) 233 - ); 234 - 235 - TRACE_EVENT(kvm_book3s_mmu_flush, 236 - TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, 237 - unsigned long long p2), 238 - TP_ARGS(type, vcpu, p1, p2), 239 - 240 - TP_STRUCT__entry( 241 - __field( int, count ) 242 - __field( unsigned long long, p1 ) 243 - __field( unsigned long long, p2 ) 244 - __field( const char *, type ) 245 - ), 246 - 247 - TP_fast_assign( 248 - __entry->count = to_book3s(vcpu)->hpte_cache_count; 249 - __entry->p1 = p1; 250 - __entry->p2 = p2; 251 - __entry->type = type; 252 - ), 253 - 254 - TP_printk("Flush %d %sPTEs: %llx - %llx", 255 - __entry->count, __entry->type, __entry->p1, __entry->p2) 256 - ); 257 - 258 - TRACE_EVENT(kvm_book3s_slb_found, 259 - TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), 260 - TP_ARGS(gvsid, hvsid), 261 - 262 - TP_STRUCT__entry( 263 - __field( unsigned long long, gvsid ) 264 - __field( unsigned long long, hvsid ) 265 - ), 266 - 267 - TP_fast_assign( 268 - __entry->gvsid = gvsid; 269 - __entry->hvsid = hvsid; 270 - ), 271 - 272 - TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) 273 - ); 274 - 275 - TRACE_EVENT(kvm_book3s_slb_fail, 276 - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), 277 - TP_ARGS(sid_map_mask, gvsid), 278 - 279 - TP_STRUCT__entry( 280 - __field( unsigned short, sid_map_mask ) 281 - __field( unsigned long long, gvsid ) 282 - ), 283 - 284 - TP_fast_assign( 285 - __entry->sid_map_mask = sid_map_mask; 286 - __entry->gvsid = gvsid; 287 - ), 288 - 289 - TP_printk("%x/%x: %llx", __entry->sid_map_mask, 290 - SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) 291 - ); 292 - 293 - TRACE_EVENT(kvm_book3s_slb_map, 294 - TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, 295 - unsigned long long hvsid), 296 - TP_ARGS(sid_map_mask, gvsid, hvsid), 297 - 298 - TP_STRUCT__entry( 299 - __field( unsigned short, sid_map_mask ) 300 - __field( unsigned long long, guest_vsid ) 301 - __field( unsigned long long, host_vsid ) 302 - ), 303 - 304 - TP_fast_assign( 305 - __entry->sid_map_mask = sid_map_mask; 306 - __entry->guest_vsid = gvsid; 307 - __entry->host_vsid = hvsid; 308 - ), 309 - 310 - TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, 311 - __entry->guest_vsid, __entry->host_vsid) 312 - ); 313 - 314 - TRACE_EVENT(kvm_book3s_slbmte, 315 - TP_PROTO(u64 slb_vsid, u64 slb_esid), 316 - TP_ARGS(slb_vsid, slb_esid), 317 - 318 - TP_STRUCT__entry( 319 - __field( u64, slb_vsid ) 320 - __field( u64, slb_esid ) 321 - ), 322 - 323 - TP_fast_assign( 324 - __entry->slb_vsid = slb_vsid; 325 - __entry->slb_esid = slb_esid; 326 - ), 327 - 328 - TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) 329 - ); 330 - 331 - #endif /* CONFIG_PPC_BOOK3S */ 332 - 333 - 334 - /************************************************************************* 335 - * Book3E trace points * 336 - *************************************************************************/ 337 - 338 - #ifdef CONFIG_BOOKE 339 - 340 - TRACE_EVENT(kvm_booke206_stlb_write, 341 - TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), 342 - TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), 343 - 344 - TP_STRUCT__entry( 345 - __field( __u32, mas0 ) 346 - __field( __u32, mas8 ) 347 - __field( __u32, mas1 ) 348 - __field( __u64, mas2 ) 349 - __field( __u64, mas7_3 ) 350 - ), 351 - 352 - TP_fast_assign( 353 - __entry->mas0 = mas0; 354 - __entry->mas8 = mas8; 355 - __entry->mas1 = mas1; 356 - __entry->mas2 = mas2; 357 - __entry->mas7_3 = mas7_3; 358 - ), 359 - 360 - TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", 361 - __entry->mas0, __entry->mas8, __entry->mas1, 362 - __entry->mas2, __entry->mas7_3) 363 - ); 364 - 365 - TRACE_EVENT(kvm_booke206_gtlb_write, 366 - TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), 367 - TP_ARGS(mas0, mas1, mas2, mas7_3), 368 - 369 - TP_STRUCT__entry( 370 - __field( __u32, mas0 ) 371 - __field( __u32, mas1 ) 372 - __field( __u64, mas2 ) 373 - __field( __u64, mas7_3 ) 374 - ), 375 - 376 - TP_fast_assign( 377 - __entry->mas0 = mas0; 378 - __entry->mas1 = mas1; 379 - __entry->mas2 = mas2; 380 - __entry->mas7_3 = mas7_3; 381 - ), 382 - 383 - TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", 384 - __entry->mas0, __entry->mas1, 385 - __entry->mas2, __entry->mas7_3) 386 - ); 387 - 388 - TRACE_EVENT(kvm_booke206_ref_release, 389 - TP_PROTO(__u64 pfn, __u32 flags), 390 - TP_ARGS(pfn, flags), 391 - 392 - TP_STRUCT__entry( 393 - __field( __u64, pfn ) 394 - __field( __u32, flags ) 395 - ), 396 - 397 - TP_fast_assign( 398 - __entry->pfn = pfn; 399 - __entry->flags = flags; 400 - ), 401 - 402 - TP_printk("pfn=%llx flags=%x", 403 - __entry->pfn, __entry->flags) 404 - ); 405 - 406 - TRACE_EVENT(kvm_booke_queue_irqprio, 407 - TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), 408 - TP_ARGS(vcpu, priority), 409 - 410 - TP_STRUCT__entry( 411 - __field( __u32, cpu_nr ) 412 - __field( __u32, priority ) 413 - __field( unsigned long, pending ) 414 - ), 415 - 416 - TP_fast_assign( 417 - __entry->cpu_nr = vcpu->vcpu_id; 418 - __entry->priority = priority; 419 - __entry->pending = vcpu->arch.pending_exceptions; 420 - ), 421 - 422 - TP_printk("vcpu=%x prio=%x pending=%lx", 423 - __entry->cpu_nr, __entry->priority, __entry->pending) 424 - ); 425 - 426 - #endif 427 238 428 239 #endif /* _TRACE_KVM_H */ 429 240
+177
arch/powerpc/kvm/trace_booke.h
··· 1 + #if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ) 2 + #define _TRACE_KVM_BOOKE_H 3 + 4 + #include <linux/tracepoint.h> 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM kvm_booke 8 + #define TRACE_INCLUDE_PATH . 9 + #define TRACE_INCLUDE_FILE trace_booke 10 + 11 + #define kvm_trace_symbol_exit \ 12 + {0, "CRITICAL"}, \ 13 + {1, "MACHINE_CHECK"}, \ 14 + {2, "DATA_STORAGE"}, \ 15 + {3, "INST_STORAGE"}, \ 16 + {4, "EXTERNAL"}, \ 17 + {5, "ALIGNMENT"}, \ 18 + {6, "PROGRAM"}, \ 19 + {7, "FP_UNAVAIL"}, \ 20 + {8, "SYSCALL"}, \ 21 + {9, "AP_UNAVAIL"}, \ 22 + {10, "DECREMENTER"}, \ 23 + {11, "FIT"}, \ 24 + {12, "WATCHDOG"}, \ 25 + {13, "DTLB_MISS"}, \ 26 + {14, "ITLB_MISS"}, \ 27 + {15, "DEBUG"}, \ 28 + {32, "SPE_UNAVAIL"}, \ 29 + {33, "SPE_FP_DATA"}, \ 30 + {34, "SPE_FP_ROUND"}, \ 31 + {35, "PERFORMANCE_MONITOR"}, \ 32 + {36, "DOORBELL"}, \ 33 + {37, "DOORBELL_CRITICAL"}, \ 34 + {38, "GUEST_DBELL"}, \ 35 + {39, "GUEST_DBELL_CRIT"}, \ 36 + {40, "HV_SYSCALL"}, \ 37 + {41, "HV_PRIV"} 38 + 39 + TRACE_EVENT(kvm_exit, 40 + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 41 + TP_ARGS(exit_nr, vcpu), 42 + 43 + TP_STRUCT__entry( 44 + __field( unsigned int, exit_nr ) 45 + __field( unsigned long, pc ) 46 + __field( unsigned long, msr ) 47 + __field( unsigned long, dar ) 48 + __field( unsigned long, last_inst ) 49 + ), 50 + 51 + TP_fast_assign( 52 + __entry->exit_nr = exit_nr; 53 + __entry->pc = kvmppc_get_pc(vcpu); 54 + __entry->dar = kvmppc_get_fault_dar(vcpu); 55 + __entry->msr = vcpu->arch.shared->msr; 56 + __entry->last_inst = vcpu->arch.last_inst; 57 + ), 58 + 59 + TP_printk("exit=%s" 60 + " | pc=0x%lx" 61 + " | msr=0x%lx" 62 + " | dar=0x%lx" 63 + " | last_inst=0x%lx" 64 + , 65 + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 66 + __entry->pc, 67 + __entry->msr, 68 + __entry->dar, 69 + __entry->last_inst 70 + ) 71 + ); 72 + 73 + TRACE_EVENT(kvm_unmap_hva, 74 + TP_PROTO(unsigned long hva), 75 + TP_ARGS(hva), 76 + 77 + TP_STRUCT__entry( 78 + __field( unsigned long, hva ) 79 + ), 80 + 81 + TP_fast_assign( 82 + __entry->hva = hva; 83 + ), 84 + 85 + TP_printk("unmap hva 0x%lx\n", __entry->hva) 86 + ); 87 + 88 + TRACE_EVENT(kvm_booke206_stlb_write, 89 + TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), 90 + TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), 91 + 92 + TP_STRUCT__entry( 93 + __field( __u32, mas0 ) 94 + __field( __u32, mas8 ) 95 + __field( __u32, mas1 ) 96 + __field( __u64, mas2 ) 97 + __field( __u64, mas7_3 ) 98 + ), 99 + 100 + TP_fast_assign( 101 + __entry->mas0 = mas0; 102 + __entry->mas8 = mas8; 103 + __entry->mas1 = mas1; 104 + __entry->mas2 = mas2; 105 + __entry->mas7_3 = mas7_3; 106 + ), 107 + 108 + TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", 109 + __entry->mas0, __entry->mas8, __entry->mas1, 110 + __entry->mas2, __entry->mas7_3) 111 + ); 112 + 113 + TRACE_EVENT(kvm_booke206_gtlb_write, 114 + TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), 115 + TP_ARGS(mas0, mas1, mas2, mas7_3), 116 + 117 + TP_STRUCT__entry( 118 + __field( __u32, mas0 ) 119 + __field( __u32, mas1 ) 120 + __field( __u64, mas2 ) 121 + __field( __u64, mas7_3 ) 122 + ), 123 + 124 + TP_fast_assign( 125 + __entry->mas0 = mas0; 126 + __entry->mas1 = mas1; 127 + __entry->mas2 = mas2; 128 + __entry->mas7_3 = mas7_3; 129 + ), 130 + 131 + TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", 132 + __entry->mas0, __entry->mas1, 133 + __entry->mas2, __entry->mas7_3) 134 + ); 135 + 136 + TRACE_EVENT(kvm_booke206_ref_release, 137 + TP_PROTO(__u64 pfn, __u32 flags), 138 + TP_ARGS(pfn, flags), 139 + 140 + TP_STRUCT__entry( 141 + __field( __u64, pfn ) 142 + __field( __u32, flags ) 143 + ), 144 + 145 + TP_fast_assign( 146 + __entry->pfn = pfn; 147 + __entry->flags = flags; 148 + ), 149 + 150 + TP_printk("pfn=%llx flags=%x", 151 + __entry->pfn, __entry->flags) 152 + ); 153 + 154 + TRACE_EVENT(kvm_booke_queue_irqprio, 155 + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), 156 + TP_ARGS(vcpu, priority), 157 + 158 + TP_STRUCT__entry( 159 + __field( __u32, cpu_nr ) 160 + __field( __u32, priority ) 161 + __field( unsigned long, pending ) 162 + ), 163 + 164 + TP_fast_assign( 165 + __entry->cpu_nr = vcpu->vcpu_id; 166 + __entry->priority = priority; 167 + __entry->pending = vcpu->arch.pending_exceptions; 168 + ), 169 + 170 + TP_printk("vcpu=%x prio=%x pending=%lx", 171 + __entry->cpu_nr, __entry->priority, __entry->pending) 172 + ); 173 + 174 + #endif 175 + 176 + /* This part must be outside protection */ 177 + #include <trace/define_trace.h>
+297
arch/powerpc/kvm/trace_pr.h
··· 1 + 2 + #if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ) 3 + #define _TRACE_KVM_PR_H 4 + 5 + #include <linux/tracepoint.h> 6 + 7 + #undef TRACE_SYSTEM 8 + #define TRACE_SYSTEM kvm_pr 9 + #define TRACE_INCLUDE_PATH . 10 + #define TRACE_INCLUDE_FILE trace_pr 11 + 12 + #define kvm_trace_symbol_exit \ 13 + {0x100, "SYSTEM_RESET"}, \ 14 + {0x200, "MACHINE_CHECK"}, \ 15 + {0x300, "DATA_STORAGE"}, \ 16 + {0x380, "DATA_SEGMENT"}, \ 17 + {0x400, "INST_STORAGE"}, \ 18 + {0x480, "INST_SEGMENT"}, \ 19 + {0x500, "EXTERNAL"}, \ 20 + {0x501, "EXTERNAL_LEVEL"}, \ 21 + {0x502, "EXTERNAL_HV"}, \ 22 + {0x600, "ALIGNMENT"}, \ 23 + {0x700, "PROGRAM"}, \ 24 + {0x800, "FP_UNAVAIL"}, \ 25 + {0x900, "DECREMENTER"}, \ 26 + {0x980, "HV_DECREMENTER"}, \ 27 + {0xc00, "SYSCALL"}, \ 28 + {0xd00, "TRACE"}, \ 29 + {0xe00, "H_DATA_STORAGE"}, \ 30 + {0xe20, "H_INST_STORAGE"}, \ 31 + {0xe40, "H_EMUL_ASSIST"}, \ 32 + {0xf00, "PERFMON"}, \ 33 + {0xf20, "ALTIVEC"}, \ 34 + {0xf40, "VSX"} 35 + 36 + TRACE_EVENT(kvm_book3s_reenter, 37 + TP_PROTO(int r, struct kvm_vcpu *vcpu), 38 + TP_ARGS(r, vcpu), 39 + 40 + TP_STRUCT__entry( 41 + __field( unsigned int, r ) 42 + __field( unsigned long, pc ) 43 + ), 44 + 45 + TP_fast_assign( 46 + __entry->r = r; 47 + __entry->pc = kvmppc_get_pc(vcpu); 48 + ), 49 + 50 + TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc) 51 + ); 52 + 53 + #ifdef CONFIG_PPC_BOOK3S_64 54 + 55 + TRACE_EVENT(kvm_book3s_64_mmu_map, 56 + TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr, 57 + struct kvmppc_pte *orig_pte), 58 + TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte), 59 + 60 + TP_STRUCT__entry( 61 + __field( unsigned char, flag_w ) 62 + __field( unsigned char, flag_x ) 63 + __field( unsigned long, eaddr ) 64 + __field( unsigned long, hpteg ) 65 + __field( unsigned long, va ) 66 + __field( unsigned long long, vpage ) 67 + __field( unsigned long, hpaddr ) 68 + ), 69 + 70 + TP_fast_assign( 71 + __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w'; 72 + __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x'; 73 + __entry->eaddr = orig_pte->eaddr; 74 + __entry->hpteg = hpteg; 75 + __entry->va = va; 76 + __entry->vpage = orig_pte->vpage; 77 + __entry->hpaddr = hpaddr; 78 + ), 79 + 80 + TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx", 81 + __entry->flag_w, __entry->flag_x, __entry->eaddr, 82 + __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr) 83 + ); 84 + 85 + #endif /* CONFIG_PPC_BOOK3S_64 */ 86 + 87 + TRACE_EVENT(kvm_book3s_mmu_map, 88 + TP_PROTO(struct hpte_cache *pte), 89 + TP_ARGS(pte), 90 + 91 + TP_STRUCT__entry( 92 + __field( u64, host_vpn ) 93 + __field( u64, pfn ) 94 + __field( ulong, eaddr ) 95 + __field( u64, vpage ) 96 + __field( ulong, raddr ) 97 + __field( int, flags ) 98 + ), 99 + 100 + TP_fast_assign( 101 + __entry->host_vpn = pte->host_vpn; 102 + __entry->pfn = pte->pfn; 103 + __entry->eaddr = pte->pte.eaddr; 104 + __entry->vpage = pte->pte.vpage; 105 + __entry->raddr = pte->pte.raddr; 106 + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 107 + (pte->pte.may_write ? 0x2 : 0) | 108 + (pte->pte.may_execute ? 0x1 : 0); 109 + ), 110 + 111 + TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 112 + __entry->host_vpn, __entry->pfn, __entry->eaddr, 113 + __entry->vpage, __entry->raddr, __entry->flags) 114 + ); 115 + 116 + TRACE_EVENT(kvm_book3s_mmu_invalidate, 117 + TP_PROTO(struct hpte_cache *pte), 118 + TP_ARGS(pte), 119 + 120 + TP_STRUCT__entry( 121 + __field( u64, host_vpn ) 122 + __field( u64, pfn ) 123 + __field( ulong, eaddr ) 124 + __field( u64, vpage ) 125 + __field( ulong, raddr ) 126 + __field( int, flags ) 127 + ), 128 + 129 + TP_fast_assign( 130 + __entry->host_vpn = pte->host_vpn; 131 + __entry->pfn = pte->pfn; 132 + __entry->eaddr = pte->pte.eaddr; 133 + __entry->vpage = pte->pte.vpage; 134 + __entry->raddr = pte->pte.raddr; 135 + __entry->flags = (pte->pte.may_read ? 0x4 : 0) | 136 + (pte->pte.may_write ? 0x2 : 0) | 137 + (pte->pte.may_execute ? 0x1 : 0); 138 + ), 139 + 140 + TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", 141 + __entry->host_vpn, __entry->pfn, __entry->eaddr, 142 + __entry->vpage, __entry->raddr, __entry->flags) 143 + ); 144 + 145 + TRACE_EVENT(kvm_book3s_mmu_flush, 146 + TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1, 147 + unsigned long long p2), 148 + TP_ARGS(type, vcpu, p1, p2), 149 + 150 + TP_STRUCT__entry( 151 + __field( int, count ) 152 + __field( unsigned long long, p1 ) 153 + __field( unsigned long long, p2 ) 154 + __field( const char *, type ) 155 + ), 156 + 157 + TP_fast_assign( 158 + __entry->count = to_book3s(vcpu)->hpte_cache_count; 159 + __entry->p1 = p1; 160 + __entry->p2 = p2; 161 + __entry->type = type; 162 + ), 163 + 164 + TP_printk("Flush %d %sPTEs: %llx - %llx", 165 + __entry->count, __entry->type, __entry->p1, __entry->p2) 166 + ); 167 + 168 + TRACE_EVENT(kvm_book3s_slb_found, 169 + TP_PROTO(unsigned long long gvsid, unsigned long long hvsid), 170 + TP_ARGS(gvsid, hvsid), 171 + 172 + TP_STRUCT__entry( 173 + __field( unsigned long long, gvsid ) 174 + __field( unsigned long long, hvsid ) 175 + ), 176 + 177 + TP_fast_assign( 178 + __entry->gvsid = gvsid; 179 + __entry->hvsid = hvsid; 180 + ), 181 + 182 + TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid) 183 + ); 184 + 185 + TRACE_EVENT(kvm_book3s_slb_fail, 186 + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid), 187 + TP_ARGS(sid_map_mask, gvsid), 188 + 189 + TP_STRUCT__entry( 190 + __field( unsigned short, sid_map_mask ) 191 + __field( unsigned long long, gvsid ) 192 + ), 193 + 194 + TP_fast_assign( 195 + __entry->sid_map_mask = sid_map_mask; 196 + __entry->gvsid = gvsid; 197 + ), 198 + 199 + TP_printk("%x/%x: %llx", __entry->sid_map_mask, 200 + SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid) 201 + ); 202 + 203 + TRACE_EVENT(kvm_book3s_slb_map, 204 + TP_PROTO(u16 sid_map_mask, unsigned long long gvsid, 205 + unsigned long long hvsid), 206 + TP_ARGS(sid_map_mask, gvsid, hvsid), 207 + 208 + TP_STRUCT__entry( 209 + __field( unsigned short, sid_map_mask ) 210 + __field( unsigned long long, guest_vsid ) 211 + __field( unsigned long long, host_vsid ) 212 + ), 213 + 214 + TP_fast_assign( 215 + __entry->sid_map_mask = sid_map_mask; 216 + __entry->guest_vsid = gvsid; 217 + __entry->host_vsid = hvsid; 218 + ), 219 + 220 + TP_printk("%x: %llx -> %llx", __entry->sid_map_mask, 221 + __entry->guest_vsid, __entry->host_vsid) 222 + ); 223 + 224 + TRACE_EVENT(kvm_book3s_slbmte, 225 + TP_PROTO(u64 slb_vsid, u64 slb_esid), 226 + TP_ARGS(slb_vsid, slb_esid), 227 + 228 + TP_STRUCT__entry( 229 + __field( u64, slb_vsid ) 230 + __field( u64, slb_esid ) 231 + ), 232 + 233 + TP_fast_assign( 234 + __entry->slb_vsid = slb_vsid; 235 + __entry->slb_esid = slb_esid; 236 + ), 237 + 238 + TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid) 239 + ); 240 + 241 + TRACE_EVENT(kvm_exit, 242 + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), 243 + TP_ARGS(exit_nr, vcpu), 244 + 245 + TP_STRUCT__entry( 246 + __field( unsigned int, exit_nr ) 247 + __field( unsigned long, pc ) 248 + __field( unsigned long, msr ) 249 + __field( unsigned long, dar ) 250 + __field( unsigned long, srr1 ) 251 + __field( unsigned long, last_inst ) 252 + ), 253 + 254 + TP_fast_assign( 255 + __entry->exit_nr = exit_nr; 256 + __entry->pc = kvmppc_get_pc(vcpu); 257 + __entry->dar = kvmppc_get_fault_dar(vcpu); 258 + __entry->msr = vcpu->arch.shared->msr; 259 + __entry->srr1 = vcpu->arch.shadow_srr1; 260 + __entry->last_inst = vcpu->arch.last_inst; 261 + ), 262 + 263 + TP_printk("exit=%s" 264 + " | pc=0x%lx" 265 + " | msr=0x%lx" 266 + " | dar=0x%lx" 267 + " | srr1=0x%lx" 268 + " | last_inst=0x%lx" 269 + , 270 + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), 271 + __entry->pc, 272 + __entry->msr, 273 + __entry->dar, 274 + __entry->srr1, 275 + __entry->last_inst 276 + ) 277 + ); 278 + 279 + TRACE_EVENT(kvm_unmap_hva, 280 + TP_PROTO(unsigned long hva), 281 + TP_ARGS(hva), 282 + 283 + TP_STRUCT__entry( 284 + __field( unsigned long, hva ) 285 + ), 286 + 287 + TP_fast_assign( 288 + __entry->hva = hva; 289 + ), 290 + 291 + TP_printk("unmap hva 0x%lx\n", __entry->hva) 292 + ); 293 + 294 + #endif /* _TRACE_KVM_H */ 295 + 296 + /* This part must be outside protection */ 297 + #include <trace/define_trace.h>
-8
arch/s390/include/asm/kvm_host.h
··· 38 38 struct sca_entry cpu[64]; 39 39 } __attribute__((packed)); 40 40 41 - #define KVM_NR_PAGE_SIZES 2 42 - #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) 43 - #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) 44 - #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) 45 - #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 46 - #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 47 - 48 41 #define CPUSTAT_STOPPED 0x80000000 49 42 #define CPUSTAT_WAIT 0x10000000 50 43 #define CPUSTAT_ECALL_PEND 0x08000000 ··· 213 220 /* for local_interrupt.action_flags */ 214 221 #define ACTION_STORE_ON_STOP (1<<0) 215 222 #define ACTION_STOP_ON_STOP (1<<1) 216 - #define ACTION_RELOADVCPU_ON_STOP (1<<2) 217 223 218 224 struct kvm_s390_local_interrupt { 219 225 spinlock_t lock;
+1 -3
arch/s390/kvm/diag.c
··· 107 107 108 108 static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) 109 109 { 110 - int ret, idx; 110 + int ret; 111 111 112 112 /* No virtio-ccw notification? Get out quickly. */ 113 113 if (!vcpu->kvm->arch.css_support || 114 114 (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) 115 115 return -EOPNOTSUPP; 116 116 117 - idx = srcu_read_lock(&vcpu->kvm->srcu); 118 117 /* 119 118 * The layout is as follows: 120 119 * - gpr 2 contains the subchannel id (passed as addr) ··· 124 125 vcpu->run->s.regs.gprs[2], 125 126 8, &vcpu->run->s.regs.gprs[3], 126 127 vcpu->run->s.regs.gprs[4]); 127 - srcu_read_unlock(&vcpu->kvm->srcu, idx); 128 128 129 129 /* 130 130 * Return cookie in gpr 2, but don't overwrite the register if the
+14 -7
arch/s390/kvm/gaccess.h
··· 18 18 #include <asm/uaccess.h> 19 19 #include "kvm-s390.h" 20 20 21 + /* Convert real to absolute address by applying the prefix of the CPU */ 22 + static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, 23 + unsigned long gaddr) 24 + { 25 + unsigned long prefix = vcpu->arch.sie_block->prefix; 26 + if (gaddr < 2 * PAGE_SIZE) 27 + gaddr += prefix; 28 + else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE) 29 + gaddr -= prefix; 30 + return gaddr; 31 + } 32 + 21 33 static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, 22 34 void __user *gptr, 23 35 int prefixing) 24 36 { 25 - unsigned long prefix = vcpu->arch.sie_block->prefix; 26 37 unsigned long gaddr = (unsigned long) gptr; 27 38 unsigned long uaddr; 28 39 29 - if (prefixing) { 30 - if (gaddr < 2 * PAGE_SIZE) 31 - gaddr += prefix; 32 - else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) 33 - gaddr -= prefix; 34 - } 40 + if (prefixing) 41 + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); 35 42 uaddr = gmap_fault(gaddr, vcpu->arch.gmap); 36 43 if (IS_ERR_VALUE(uaddr)) 37 44 uaddr = -EFAULT;
-6
arch/s390/kvm/intercept.c
··· 62 62 63 63 trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); 64 64 65 - if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { 66 - vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; 67 - rc = SIE_INTERCEPT_RERUNVCPU; 68 - vcpu->run->exit_reason = KVM_EXIT_INTR; 69 - } 70 - 71 65 if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { 72 66 atomic_set_mask(CPUSTAT_STOPPED, 73 67 &vcpu->arch.sie_block->cpuflags);
+3
arch/s390/kvm/interrupt.c
··· 436 436 hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); 437 437 VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); 438 438 no_timer: 439 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 439 440 spin_lock(&vcpu->arch.local_int.float_int->lock); 440 441 spin_lock_bh(&vcpu->arch.local_int.lock); 441 442 add_wait_queue(&vcpu->wq, &wait); ··· 456 455 remove_wait_queue(&vcpu->wq, &wait); 457 456 spin_unlock_bh(&vcpu->arch.local_int.lock); 458 457 spin_unlock(&vcpu->arch.local_int.float_int->lock); 458 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 459 + 459 460 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); 460 461 return 0; 461 462 }
+63 -33
arch/s390/kvm/kvm-s390.c
··· 695 695 return 0; 696 696 } 697 697 698 - static int __vcpu_run(struct kvm_vcpu *vcpu) 698 + static int vcpu_pre_run(struct kvm_vcpu *vcpu) 699 699 { 700 - int rc; 700 + int rc, cpuflags; 701 701 702 702 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 703 703 ··· 715 715 return rc; 716 716 717 717 vcpu->arch.sie_block->icptcode = 0; 718 - VCPU_EVENT(vcpu, 6, "entering sie flags %x", 719 - atomic_read(&vcpu->arch.sie_block->cpuflags)); 720 - trace_kvm_s390_sie_enter(vcpu, 721 - atomic_read(&vcpu->arch.sie_block->cpuflags)); 718 + cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 719 + VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 720 + trace_kvm_s390_sie_enter(vcpu, cpuflags); 722 721 723 - /* 724 - * As PF_VCPU will be used in fault handler, between guest_enter 725 - * and guest_exit should be no uaccess. 726 - */ 727 - preempt_disable(); 728 - kvm_guest_enter(); 729 - preempt_enable(); 730 - rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); 731 - kvm_guest_exit(); 722 + return 0; 723 + } 724 + 725 + static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 726 + { 727 + int rc; 732 728 733 729 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 734 730 vcpu->arch.sie_block->icptcode); 735 731 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 736 732 737 - if (rc > 0) 733 + if (exit_reason >= 0) { 738 734 rc = 0; 739 - if (rc < 0) { 735 + } else { 740 736 if (kvm_is_ucontrol(vcpu->kvm)) { 741 737 rc = SIE_INTERCEPT_UCONTROL; 742 738 } else { ··· 743 747 } 744 748 745 749 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 750 + 751 + if (rc == 0) { 752 + if (kvm_is_ucontrol(vcpu->kvm)) 753 + rc = -EOPNOTSUPP; 754 + else 755 + rc = kvm_handle_sie_intercept(vcpu); 756 + } 757 + 758 + return rc; 759 + } 760 + 761 + static int __vcpu_run(struct kvm_vcpu *vcpu) 762 + { 763 + int rc, exit_reason; 764 + 765 + /* 766 + * We try to hold kvm->srcu during most of vcpu_run (except when run- 767 + * ning the guest), so that memslots (and other stuff) are protected 768 + */ 769 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 770 + 771 + do { 772 + rc = vcpu_pre_run(vcpu); 773 + if (rc) 774 + break; 775 + 776 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 777 + /* 778 + * As PF_VCPU will be used in fault handler, between 779 + * guest_enter and guest_exit should be no uaccess. 780 + */ 781 + preempt_disable(); 782 + kvm_guest_enter(); 783 + preempt_enable(); 784 + exit_reason = sie64a(vcpu->arch.sie_block, 785 + vcpu->run->s.regs.gprs); 786 + kvm_guest_exit(); 787 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 788 + 789 + rc = vcpu_post_run(vcpu, exit_reason); 790 + } while (!signal_pending(current) && !rc); 791 + 792 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 746 793 return rc; 747 794 } 748 795 ··· 794 755 int rc; 795 756 sigset_t sigsaved; 796 757 797 - rerun_vcpu: 798 758 if (vcpu->sigset_active) 799 759 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 800 760 ··· 826 788 } 827 789 828 790 might_fault(); 829 - 830 - do { 831 - rc = __vcpu_run(vcpu); 832 - if (rc) 833 - break; 834 - if (kvm_is_ucontrol(vcpu->kvm)) 835 - rc = -EOPNOTSUPP; 836 - else 837 - rc = kvm_handle_sie_intercept(vcpu); 838 - } while (!signal_pending(current) && !rc); 839 - 840 - if (rc == SIE_INTERCEPT_RERUNVCPU) 841 - goto rerun_vcpu; 791 + rc = __vcpu_run(vcpu); 842 792 843 793 if (signal_pending(current) && !rc) { 844 794 kvm_run->exit_reason = KVM_EXIT_INTR; ··· 984 958 { 985 959 struct kvm_vcpu *vcpu = filp->private_data; 986 960 void __user *argp = (void __user *)arg; 961 + int idx; 987 962 long r; 988 963 989 964 switch (ioctl) { ··· 998 971 break; 999 972 } 1000 973 case KVM_S390_STORE_STATUS: 974 + idx = srcu_read_lock(&vcpu->kvm->srcu); 1001 975 r = kvm_s390_vcpu_store_status(vcpu, arg); 976 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 1002 977 break; 1003 978 case KVM_S390_SET_INITIAL_PSW: { 1004 979 psw_t psw; ··· 1096 1067 return VM_FAULT_SIGBUS; 1097 1068 } 1098 1069 1099 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 1070 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1100 1071 struct kvm_memory_slot *dont) 1101 1072 { 1102 1073 } 1103 1074 1104 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 1075 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1076 + unsigned long npages) 1105 1077 { 1106 1078 return 0; 1107 1079 }
+5 -4
arch/s390/kvm/kvm-s390.h
··· 28 28 extern unsigned long *vfacilities; 29 29 30 30 /* negativ values are error codes, positive values for internal conditions */ 31 - #define SIE_INTERCEPT_RERUNVCPU (1<<0) 32 - #define SIE_INTERCEPT_UCONTROL (1<<1) 31 + #define SIE_INTERCEPT_UCONTROL (1<<0) 33 32 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); 34 33 35 34 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ ··· 90 91 91 92 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) 92 93 { 93 - *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; 94 - *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; 94 + if (r1) 95 + *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20; 96 + if (r2) 97 + *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; 95 98 } 96 99 97 100 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+61
arch/s390/kvm/priv.c
··· 30 30 #include "kvm-s390.h" 31 31 #include "trace.h" 32 32 33 + /* Handle SCK (SET CLOCK) interception */ 34 + static int handle_set_clock(struct kvm_vcpu *vcpu) 35 + { 36 + struct kvm_vcpu *cpup; 37 + s64 hostclk, val; 38 + u64 op2; 39 + int i; 40 + 41 + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 42 + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 43 + 44 + op2 = kvm_s390_get_base_disp_s(vcpu); 45 + if (op2 & 7) /* Operand must be on a doubleword boundary */ 46 + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 47 + if (get_guest(vcpu, val, (u64 __user *) op2)) 48 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 49 + 50 + if (store_tod_clock(&hostclk)) { 51 + kvm_s390_set_psw_cc(vcpu, 3); 52 + return 0; 53 + } 54 + val = (val - hostclk) & ~0x3fUL; 55 + 56 + mutex_lock(&vcpu->kvm->lock); 57 + kvm_for_each_vcpu(i, cpup, vcpu->kvm) 58 + cpup->arch.sie_block->epoch = val; 59 + mutex_unlock(&vcpu->kvm->lock); 60 + 61 + kvm_s390_set_psw_cc(vcpu, 0); 62 + return 0; 63 + } 64 + 33 65 static int handle_set_prefix(struct kvm_vcpu *vcpu) 34 66 { 35 67 u64 operand2; ··· 157 125 vcpu->arch.sie_block->gpsw.addr = 158 126 __rewind_psw(vcpu->arch.sie_block->gpsw, 4); 159 127 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); 128 + return 0; 129 + } 130 + 131 + static int handle_test_block(struct kvm_vcpu *vcpu) 132 + { 133 + unsigned long hva; 134 + gpa_t addr; 135 + int reg2; 136 + 137 + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 138 + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 139 + 140 + kvm_s390_get_regs_rre(vcpu, NULL, &reg2); 141 + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; 142 + addr = kvm_s390_real_to_abs(vcpu, addr); 143 + 144 + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); 145 + if (kvm_is_error_hva(hva)) 146 + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 147 + /* 148 + * We don't expect errors on modern systems, and do not care 149 + * about storage keys (yet), so let's just clear the page. 150 + */ 151 + if (clear_user((void __user *)hva, PAGE_SIZE) != 0) 152 + return -EFAULT; 153 + kvm_s390_set_psw_cc(vcpu, 0); 154 + vcpu->run->s.regs.gprs[0] = 0; 160 155 return 0; 161 156 } 162 157 ··· 497 438 498 439 static const intercept_handler_t b2_handlers[256] = { 499 440 [0x02] = handle_stidp, 441 + [0x04] = handle_set_clock, 500 442 [0x10] = handle_set_prefix, 501 443 [0x11] = handle_store_prefix, 502 444 [0x12] = handle_store_cpu_address, 503 445 [0x29] = handle_skey, 504 446 [0x2a] = handle_skey, 505 447 [0x2b] = handle_skey, 448 + [0x2c] = handle_test_block, 506 449 [0x30] = handle_io_inst, 507 450 [0x31] = handle_io_inst, 508 451 [0x32] = handle_io_inst,
+7 -3
arch/x86/include/asm/kvm_emulate.h
··· 274 274 275 275 bool guest_mode; /* guest running a nested guest */ 276 276 bool perm_ok; /* do not check permissions if true */ 277 - bool only_vendor_specific_insn; 277 + bool ud; /* inject an #UD if host doesn't support insn */ 278 278 279 279 bool have_exception; 280 280 struct x86_exception exception; 281 281 282 - /* decode cache */ 283 - u8 twobyte; 282 + /* 283 + * decode cache 284 + */ 285 + 286 + /* current opcode length in bytes */ 287 + u8 opcode_len; 284 288 u8 b; 285 289 u8 intercept; 286 290 u8 lock_prefix;
+15 -8
arch/x86/include/asm/kvm_host.h
··· 79 79 #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) 80 80 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) 81 81 82 + static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) 83 + { 84 + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ 85 + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 86 + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 87 + } 88 + 82 89 #define SELECTOR_TI_MASK (1 << 2) 83 90 #define SELECTOR_RPL_MASK 0x03 84 91 ··· 260 253 * mode. 261 254 */ 262 255 struct kvm_mmu { 263 - void (*new_cr3)(struct kvm_vcpu *vcpu); 264 256 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); 265 257 unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); 266 258 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); ··· 267 261 bool prefault); 268 262 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 269 263 struct x86_exception *fault); 270 - void (*free)(struct kvm_vcpu *vcpu); 271 264 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 272 265 struct x86_exception *exception); 273 266 gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); ··· 394 389 395 390 struct fpu guest_fpu; 396 391 u64 xcr0; 392 + u64 guest_supported_xcr0; 393 + u32 guest_xstate_size; 397 394 398 395 struct kvm_pio_request pio; 399 396 void *pio_data; ··· 564 557 565 558 struct list_head assigned_dev_head; 566 559 struct iommu_domain *iommu_domain; 567 - int iommu_flags; 560 + bool iommu_noncoherent; 561 + #define __KVM_HAVE_ARCH_NONCOHERENT_DMA 562 + atomic_t noncoherent_dma_count; 568 563 struct kvm_pic *vpic; 569 564 struct kvm_ioapic *vioapic; 570 565 struct kvm_pit *vpit; ··· 789 780 790 781 void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 791 782 int kvm_mmu_create(struct kvm_vcpu *vcpu); 792 - int kvm_mmu_setup(struct kvm_vcpu *vcpu); 783 + void kvm_mmu_setup(struct kvm_vcpu *vcpu); 793 784 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 794 785 u64 dirty_mask, u64 nx_mask, u64 x_mask); 795 786 796 - int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 787 + void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 797 788 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 798 789 void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 799 790 struct kvm_memory_slot *slot, ··· 931 922 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 932 923 void *insn, int insn_len); 933 924 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); 925 + void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); 934 926 935 927 void kvm_enable_tdp(void); 936 928 void kvm_disable_tdp(void); 937 - 938 - int complete_pio(struct kvm_vcpu *vcpu); 939 - bool kvm_check_iopl(struct kvm_vcpu *vcpu); 940 929 941 930 static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 942 931 {
+2
arch/x86/include/asm/pvclock.h
··· 14 14 struct timespec *ts); 15 15 void pvclock_resume(void); 16 16 17 + void pvclock_touch_watchdogs(void); 18 + 17 19 /* 18 20 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 19 21 * yielding a 64-bit result.
+3 -3
arch/x86/include/uapi/asm/kvm.h
··· 211 211 __u32 padding[3]; 212 212 }; 213 213 214 - #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 215 - #define KVM_CPUID_FLAG_STATEFUL_FUNC 2 216 - #define KVM_CPUID_FLAG_STATE_READ_NEXT 4 214 + #define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 215 + #define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 216 + #define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 217 217 218 218 /* for KVM_SET_CPUID2 */ 219 219 struct kvm_cpuid2 {
+1
arch/x86/include/uapi/asm/msr-index.h
··· 536 536 537 537 /* MSR_IA32_VMX_MISC bits */ 538 538 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 539 + #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F 539 540 /* AMD-V MSRs */ 540 541 541 542 #define MSR_VM_CR 0xc0010114
+1
arch/x86/kernel/kvmclock.c
··· 139 139 src = &hv_clock[cpu].pvti; 140 140 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 141 141 src->flags &= ~PVCLOCK_GUEST_STOPPED; 142 + pvclock_touch_watchdogs(); 142 143 ret = true; 143 144 } 144 145
+13
arch/x86/kernel/pvclock.c
··· 43 43 return pv_tsc_khz; 44 44 } 45 45 46 + void pvclock_touch_watchdogs(void) 47 + { 48 + touch_softlockup_watchdog_sync(); 49 + clocksource_touch_watchdog(); 50 + rcu_cpu_stall_reset(); 51 + reset_hung_task_detector(); 52 + } 53 + 46 54 static atomic64_t last_value = ATOMIC64_INIT(0); 47 55 48 56 void pvclock_resume(void) ··· 81 73 do { 82 74 version = __pvclock_read_cycles(src, &ret, &flags); 83 75 } while ((src->version & 1) || version != src->version); 76 + 77 + if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { 78 + src->flags &= ~PVCLOCK_GUEST_STOPPED; 79 + pvclock_touch_watchdogs(); 80 + } 84 81 85 82 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && 86 83 (flags & PVCLOCK_TSC_STABLE_BIT))
+1
arch/x86/kvm/Kconfig
··· 38 38 select PERF_EVENTS 39 39 select HAVE_KVM_MSI 40 40 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 + select KVM_VFIO 41 42 ---help--- 42 43 Support hosting fully virtualized guest machines using hardware 43 44 virtualization extensions. You will need a fairly recent
+1 -1
arch/x86/kvm/Makefile
··· 9 9 10 10 kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ 11 11 $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ 12 - $(KVM)/eventfd.o $(KVM)/irqchip.o 12 + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 13 13 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o 14 14 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o 15 15
+106 -9
arch/x86/kvm/cpuid.c
··· 23 23 #include "mmu.h" 24 24 #include "trace.h" 25 25 26 + static u32 xstate_required_size(u64 xstate_bv) 27 + { 28 + int feature_bit = 0; 29 + u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 30 + 31 + xstate_bv &= ~XSTATE_FPSSE; 32 + while (xstate_bv) { 33 + if (xstate_bv & 0x1) { 34 + u32 eax, ebx, ecx, edx; 35 + cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); 36 + ret = max(ret, eax + ebx); 37 + } 38 + 39 + xstate_bv >>= 1; 40 + feature_bit++; 41 + } 42 + 43 + return ret; 44 + } 45 + 26 46 void kvm_update_cpuid(struct kvm_vcpu *vcpu) 27 47 { 28 48 struct kvm_cpuid_entry2 *best; ··· 64 44 apic->lapic_timer.timer_mode_mask = 3 << 17; 65 45 else 66 46 apic->lapic_timer.timer_mode_mask = 1 << 17; 47 + } 48 + 49 + best = kvm_find_cpuid_entry(vcpu, 0xD, 0); 50 + if (!best) { 51 + vcpu->arch.guest_supported_xcr0 = 0; 52 + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 53 + } else { 54 + vcpu->arch.guest_supported_xcr0 = 55 + (best->eax | ((u64)best->edx << 32)) & 56 + host_xcr0 & KVM_SUPPORTED_XCR0; 57 + vcpu->arch.guest_xstate_size = 58 + xstate_required_size(vcpu->arch.guest_supported_xcr0); 67 59 } 68 60 69 61 kvm_pmu_cpuid_update(vcpu); ··· 214 182 { 215 183 u64 mask = ((u64)1 << bit); 216 184 217 - return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; 185 + return mask & KVM_SUPPORTED_XCR0 & host_xcr0; 218 186 } 219 187 220 188 #define F(x) bit(X86_FEATURE_##x) 221 189 222 - static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 223 - u32 index, int *nent, int maxnent) 190 + static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, 191 + u32 func, u32 index, int *nent, int maxnent) 192 + { 193 + switch (func) { 194 + case 0: 195 + entry->eax = 1; /* only one leaf currently */ 196 + ++*nent; 197 + break; 198 + case 1: 199 + entry->ecx = F(MOVBE); 200 + ++*nent; 201 + break; 202 + default: 203 + break; 204 + } 205 + 206 + entry->function = func; 207 + entry->index = index; 208 + 209 + return 0; 210 + } 211 + 212 + static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 213 + u32 index, int *nent, int maxnent) 224 214 { 225 215 int r; 226 216 unsigned f_nx = is_efer_nx() ? F(NX) : 0; ··· 437 383 case 0xd: { 438 384 int idx, i; 439 385 386 + entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; 387 + entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; 440 388 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 441 389 for (idx = 1, i = 1; idx < 64; ++idx) { 442 390 if (*nent >= maxnent) ··· 537 481 return r; 538 482 } 539 483 484 + static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, 485 + u32 idx, int *nent, int maxnent, unsigned int type) 486 + { 487 + if (type == KVM_GET_EMULATED_CPUID) 488 + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); 489 + 490 + return __do_cpuid_ent(entry, func, idx, nent, maxnent); 491 + } 492 + 540 493 #undef F 541 494 542 495 struct kvm_cpuid_param { ··· 560 495 return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; 561 496 } 562 497 563 - int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 564 - struct kvm_cpuid_entry2 __user *entries) 498 + static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, 499 + __u32 num_entries, unsigned int ioctl_type) 500 + { 501 + int i; 502 + __u32 pad[3]; 503 + 504 + if (ioctl_type != KVM_GET_EMULATED_CPUID) 505 + return false; 506 + 507 + /* 508 + * We want to make sure that ->padding is being passed clean from 509 + * userspace in case we want to use it for something in the future. 510 + * 511 + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we 512 + * have to give ourselves satisfied only with the emulated side. /me 513 + * sheds a tear. 514 + */ 515 + for (i = 0; i < num_entries; i++) { 516 + if (copy_from_user(pad, entries[i].padding, sizeof(pad))) 517 + return true; 518 + 519 + if (pad[0] || pad[1] || pad[2]) 520 + return true; 521 + } 522 + return false; 523 + } 524 + 525 + int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 526 + struct kvm_cpuid_entry2 __user *entries, 527 + unsigned int type) 565 528 { 566 529 struct kvm_cpuid_entry2 *cpuid_entries; 567 530 int limit, nent = 0, r = -E2BIG, i; ··· 606 513 goto out; 607 514 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 608 515 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 516 + 517 + if (sanity_check_entries(entries, cpuid->nent, type)) 518 + return -EINVAL; 519 + 609 520 r = -ENOMEM; 610 - cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 521 + cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 611 522 if (!cpuid_entries) 612 523 goto out; 613 524 ··· 623 526 continue; 624 527 625 528 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, 626 - &nent, cpuid->nent); 529 + &nent, cpuid->nent, type); 627 530 628 531 if (r) 629 532 goto out_free; ··· 634 537 limit = cpuid_entries[nent - 1].eax; 635 538 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) 636 539 r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, 637 - &nent, cpuid->nent); 540 + &nent, cpuid->nent, type); 638 541 639 542 if (r) 640 543 goto out_free; ··· 758 661 *edx = best->edx; 759 662 } else 760 663 *eax = *ebx = *ecx = *edx = 0; 664 + trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); 761 665 } 762 666 EXPORT_SYMBOL_GPL(kvm_cpuid); 763 667 ··· 774 676 kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); 775 677 kvm_register_write(vcpu, VCPU_REGS_RDX, edx); 776 678 kvm_x86_ops->skip_emulated_instruction(vcpu); 777 - trace_kvm_cpuid(function, eax, ebx, ecx, edx); 778 679 } 779 680 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
+3 -2
arch/x86/kvm/cpuid.h
··· 6 6 void kvm_update_cpuid(struct kvm_vcpu *vcpu); 7 7 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 8 8 u32 function, u32 index); 9 - int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 10 - struct kvm_cpuid_entry2 __user *entries); 9 + int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, 10 + struct kvm_cpuid_entry2 __user *entries, 11 + unsigned int type); 11 12 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 12 13 struct kvm_cpuid *cpuid, 13 14 struct kvm_cpuid_entry __user *entries);
+107 -23
arch/x86/kvm/emulate.c
··· 130 130 #define Mov (1<<20) 131 131 /* Misc flags */ 132 132 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 133 - #define VendorSpecific (1<<22) /* Vendor specific instruction */ 133 + #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */ 134 134 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 135 135 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ 136 136 #define Undefined (1<<25) /* No Such Instruction */ ··· 785 785 * @highbyte_regs specifies whether to decode AH,CH,DH,BH. 786 786 */ 787 787 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, 788 - int highbyte_regs) 788 + int byteop) 789 789 { 790 790 void *p; 791 + int highbyte_regs = (ctxt->rex_prefix == 0) && byteop; 791 792 792 793 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) 793 794 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; ··· 1025 1024 struct operand *op) 1026 1025 { 1027 1026 unsigned reg = ctxt->modrm_reg; 1028 - int highbyte_regs = ctxt->rex_prefix == 0; 1029 1027 1030 1028 if (!(ctxt->d & ModRM)) 1031 1029 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); ··· 1045 1045 } 1046 1046 1047 1047 op->type = OP_REG; 1048 - if (ctxt->d & ByteOp) { 1049 - op->addr.reg = decode_register(ctxt, reg, highbyte_regs); 1050 - op->bytes = 1; 1051 - } else { 1052 - op->addr.reg = decode_register(ctxt, reg, 0); 1053 - op->bytes = ctxt->op_bytes; 1054 - } 1048 + op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1049 + op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp); 1050 + 1055 1051 fetch_register_operand(op); 1056 1052 op->orig_val = op->val; 1057 1053 } ··· 1078 1082 ctxt->modrm_seg = VCPU_SREG_DS; 1079 1083 1080 1084 if (ctxt->modrm_mod == 3) { 1081 - int highbyte_regs = ctxt->rex_prefix == 0; 1082 - 1083 1085 op->type = OP_REG; 1084 1086 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 1085 1087 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1086 - highbyte_regs && (ctxt->d & ByteOp)); 1088 + ctxt->d & ByteOp); 1087 1089 if (ctxt->d & Sse) { 1088 1090 op->type = OP_XMM; 1089 1091 op->bytes = 16; ··· 2955 2961 return X86EMUL_CONTINUE; 2956 2962 } 2957 2963 2964 + #define FFL(x) bit(X86_FEATURE_##x) 2965 + 2966 + static int em_movbe(struct x86_emulate_ctxt *ctxt) 2967 + { 2968 + u32 ebx, ecx, edx, eax = 1; 2969 + u16 tmp; 2970 + 2971 + /* 2972 + * Check MOVBE is set in the guest-visible CPUID leaf. 2973 + */ 2974 + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); 2975 + if (!(ecx & FFL(MOVBE))) 2976 + return emulate_ud(ctxt); 2977 + 2978 + switch (ctxt->op_bytes) { 2979 + case 2: 2980 + /* 2981 + * From MOVBE definition: "...When the operand size is 16 bits, 2982 + * the upper word of the destination register remains unchanged 2983 + * ..." 2984 + * 2985 + * Both casting ->valptr and ->val to u16 breaks strict aliasing 2986 + * rules so we have to do the operation almost per hand. 2987 + */ 2988 + tmp = (u16)ctxt->src.val; 2989 + ctxt->dst.val &= ~0xffffUL; 2990 + ctxt->dst.val |= (unsigned long)swab16(tmp); 2991 + break; 2992 + case 4: 2993 + ctxt->dst.val = swab32((u32)ctxt->src.val); 2994 + break; 2995 + case 8: 2996 + ctxt->dst.val = swab64(ctxt->src.val); 2997 + break; 2998 + default: 2999 + return X86EMUL_PROPAGATE_FAULT; 3000 + } 3001 + return X86EMUL_CONTINUE; 3002 + } 3003 + 2958 3004 static int em_cr_write(struct x86_emulate_ctxt *ctxt) 2959 3005 { 2960 3006 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) ··· 3290 3256 return X86EMUL_CONTINUE; 3291 3257 } 3292 3258 3259 + static int em_sahf(struct x86_emulate_ctxt *ctxt) 3260 + { 3261 + u32 flags; 3262 + 3263 + flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; 3264 + flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; 3265 + 3266 + ctxt->eflags &= ~0xffUL; 3267 + ctxt->eflags |= flags | X86_EFLAGS_FIXED; 3268 + return X86EMUL_CONTINUE; 3269 + } 3270 + 3293 3271 static int em_lahf(struct x86_emulate_ctxt *ctxt) 3294 3272 { 3295 3273 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; ··· 3548 3502 3549 3503 static const struct opcode group7_rm3[] = { 3550 3504 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), 3551 - II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), 3505 + II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), 3552 3506 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), 3553 3507 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), 3554 3508 DIP(SrcNone | Prot | Priv, stgi, check_svme), ··· 3633 3587 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), 3634 3588 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3635 3589 }, { 3636 - I(SrcNone | Priv | VendorSpecific, em_vmcall), 3590 + I(SrcNone | Priv | EmulateOnUD, em_vmcall), 3637 3591 EXT(0, group7_rm1), 3638 3592 N, EXT(0, group7_rm3), 3639 3593 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, ··· 3796 3750 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3797 3751 I(SrcImmFAddr | No64, em_call_far), N, 3798 3752 II(ImplicitOps | Stack, em_pushf, pushf), 3799 - II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf), 3753 + II(ImplicitOps | Stack, em_popf, popf), 3754 + I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf), 3800 3755 /* 0xA0 - 0xA7 */ 3801 3756 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3802 3757 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), ··· 3857 3810 static const struct opcode twobyte_table[256] = { 3858 3811 /* 0x00 - 0x0F */ 3859 3812 G(0, group6), GD(0, &group7), N, N, 3860 - N, I(ImplicitOps | VendorSpecific, em_syscall), 3813 + N, I(ImplicitOps | EmulateOnUD, em_syscall), 3861 3814 II(ImplicitOps | Priv, em_clts, clts), N, 3862 3815 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3863 3816 N, D(ImplicitOps | ModRM), N, N, ··· 3877 3830 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 3878 3831 II(ImplicitOps | Priv, em_rdmsr, rdmsr), 3879 3832 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc), 3880 - I(ImplicitOps | VendorSpecific, em_sysenter), 3881 - I(ImplicitOps | Priv | VendorSpecific, em_sysexit), 3833 + I(ImplicitOps | EmulateOnUD, em_sysenter), 3834 + I(ImplicitOps | Priv | EmulateOnUD, em_sysexit), 3882 3835 N, N, 3883 3836 N, N, N, N, N, N, N, N, 3884 3837 /* 0x40 - 0x4F */ ··· 3937 3890 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3938 3891 /* 0xF0 - 0xFF */ 3939 3892 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N 3893 + }; 3894 + 3895 + static const struct gprefix three_byte_0f_38_f0 = { 3896 + I(DstReg | SrcMem | Mov, em_movbe), N, N, N 3897 + }; 3898 + 3899 + static const struct gprefix three_byte_0f_38_f1 = { 3900 + I(DstMem | SrcReg | Mov, em_movbe), N, N, N 3901 + }; 3902 + 3903 + /* 3904 + * Insns below are selected by the prefix which indexed by the third opcode 3905 + * byte. 3906 + */ 3907 + static const struct opcode opcode_map_0f_38[256] = { 3908 + /* 0x00 - 0x7f */ 3909 + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 3910 + /* 0x80 - 0xef */ 3911 + X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), 3912 + /* 0xf0 - 0xf1 */ 3913 + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0), 3914 + GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1), 3915 + /* 0xf2 - 0xff */ 3916 + N, N, X4(N), X8(N) 3940 3917 }; 3941 3918 3942 3919 #undef D ··· 4111 4040 case OpMem8: 4112 4041 ctxt->memop.bytes = 1; 4113 4042 if (ctxt->memop.type == OP_REG) { 4114 - ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); 4043 + ctxt->memop.addr.reg = decode_register(ctxt, 4044 + ctxt->modrm_rm, true); 4115 4045 fetch_register_operand(&ctxt->memop); 4116 4046 } 4117 4047 goto mem_common; ··· 4198 4126 ctxt->_eip = ctxt->eip; 4199 4127 ctxt->fetch.start = ctxt->_eip; 4200 4128 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4129 + ctxt->opcode_len = 1; 4201 4130 if (insn_len > 0) 4202 4131 memcpy(ctxt->fetch.data, insn, insn_len); 4203 4132 ··· 4281 4208 opcode = opcode_table[ctxt->b]; 4282 4209 /* Two-byte opcode? */ 4283 4210 if (ctxt->b == 0x0f) { 4284 - ctxt->twobyte = 1; 4211 + ctxt->opcode_len = 2; 4285 4212 ctxt->b = insn_fetch(u8, ctxt); 4286 4213 opcode = twobyte_table[ctxt->b]; 4214 + 4215 + /* 0F_38 opcode map */ 4216 + if (ctxt->b == 0x38) { 4217 + ctxt->opcode_len = 3; 4218 + ctxt->b = insn_fetch(u8, ctxt); 4219 + opcode = opcode_map_0f_38[ctxt->b]; 4220 + } 4287 4221 } 4288 4222 ctxt->d = opcode.flags; 4289 4223 ··· 4347 4267 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4348 4268 return EMULATION_FAILED; 4349 4269 4350 - if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) 4270 + if (!(ctxt->d & EmulateOnUD) && ctxt->ud) 4351 4271 return EMULATION_FAILED; 4352 4272 4353 4273 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) ··· 4620 4540 goto writeback; 4621 4541 } 4622 4542 4623 - if (ctxt->twobyte) 4543 + if (ctxt->opcode_len == 2) 4624 4544 goto twobyte_insn; 4545 + else if (ctxt->opcode_len == 3) 4546 + goto threebyte_insn; 4625 4547 4626 4548 switch (ctxt->b) { 4627 4549 case 0x63: /* movsxd */ ··· 4807 4725 default: 4808 4726 goto cannot_emulate; 4809 4727 } 4728 + 4729 + threebyte_insn: 4810 4730 4811 4731 if (rc != X86EMUL_CONTINUE) 4812 4732 goto done;
+34 -81
arch/x86/kvm/mmu.c
··· 2570 2570 kvm_release_pfn_clean(pfn); 2571 2571 } 2572 2572 2573 - static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) 2574 - { 2575 - mmu_free_roots(vcpu); 2576 - } 2577 - 2578 2573 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2579 2574 bool no_dirty_log) 2580 2575 { ··· 3419 3424 return 0; 3420 3425 } 3421 3426 3422 - static void nonpaging_free(struct kvm_vcpu *vcpu) 3427 + static void nonpaging_init_context(struct kvm_vcpu *vcpu, 3428 + struct kvm_mmu *context) 3423 3429 { 3424 - mmu_free_roots(vcpu); 3425 - } 3426 - 3427 - static int nonpaging_init_context(struct kvm_vcpu *vcpu, 3428 - struct kvm_mmu *context) 3429 - { 3430 - context->new_cr3 = nonpaging_new_cr3; 3431 3430 context->page_fault = nonpaging_page_fault; 3432 3431 context->gva_to_gpa = nonpaging_gva_to_gpa; 3433 - context->free = nonpaging_free; 3434 3432 context->sync_page = nonpaging_sync_page; 3435 3433 context->invlpg = nonpaging_invlpg; 3436 3434 context->update_pte = nonpaging_update_pte; ··· 3432 3444 context->root_hpa = INVALID_PAGE; 3433 3445 context->direct_map = true; 3434 3446 context->nx = false; 3435 - return 0; 3436 3447 } 3437 3448 3438 3449 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) ··· 3441 3454 } 3442 3455 EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb); 3443 3456 3444 - static void paging_new_cr3(struct kvm_vcpu *vcpu) 3457 + void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) 3445 3458 { 3446 - pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); 3447 3459 mmu_free_roots(vcpu); 3448 3460 } 3449 3461 ··· 3455 3469 struct x86_exception *fault) 3456 3470 { 3457 3471 vcpu->arch.mmu.inject_page_fault(vcpu, fault); 3458 - } 3459 - 3460 - static void paging_free(struct kvm_vcpu *vcpu) 3461 - { 3462 - nonpaging_free(vcpu); 3463 3472 } 3464 3473 3465 3474 static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn, ··· 3646 3665 mmu->last_pte_bitmap = map; 3647 3666 } 3648 3667 3649 - static int paging64_init_context_common(struct kvm_vcpu *vcpu, 3650 - struct kvm_mmu *context, 3651 - int level) 3668 + static void paging64_init_context_common(struct kvm_vcpu *vcpu, 3669 + struct kvm_mmu *context, 3670 + int level) 3652 3671 { 3653 3672 context->nx = is_nx(vcpu); 3654 3673 context->root_level = level; ··· 3658 3677 update_last_pte_bitmap(vcpu, context); 3659 3678 3660 3679 ASSERT(is_pae(vcpu)); 3661 - context->new_cr3 = paging_new_cr3; 3662 3680 context->page_fault = paging64_page_fault; 3663 3681 context->gva_to_gpa = paging64_gva_to_gpa; 3664 3682 context->sync_page = paging64_sync_page; 3665 3683 context->invlpg = paging64_invlpg; 3666 3684 context->update_pte = paging64_update_pte; 3667 - context->free = paging_free; 3668 3685 context->shadow_root_level = level; 3669 3686 context->root_hpa = INVALID_PAGE; 3670 3687 context->direct_map = false; 3671 - return 0; 3672 3688 } 3673 3689 3674 - static int paging64_init_context(struct kvm_vcpu *vcpu, 3675 - struct kvm_mmu *context) 3690 + static void paging64_init_context(struct kvm_vcpu *vcpu, 3691 + struct kvm_mmu *context) 3676 3692 { 3677 - return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3693 + paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); 3678 3694 } 3679 3695 3680 - static int paging32_init_context(struct kvm_vcpu *vcpu, 3681 - struct kvm_mmu *context) 3696 + static void paging32_init_context(struct kvm_vcpu *vcpu, 3697 + struct kvm_mmu *context) 3682 3698 { 3683 3699 context->nx = false; 3684 3700 context->root_level = PT32_ROOT_LEVEL; ··· 3684 3706 update_permission_bitmask(vcpu, context, false); 3685 3707 update_last_pte_bitmap(vcpu, context); 3686 3708 3687 - context->new_cr3 = paging_new_cr3; 3688 3709 context->page_fault = paging32_page_fault; 3689 3710 context->gva_to_gpa = paging32_gva_to_gpa; 3690 - context->free = paging_free; 3691 3711 context->sync_page = paging32_sync_page; 3692 3712 context->invlpg = paging32_invlpg; 3693 3713 context->update_pte = paging32_update_pte; 3694 3714 context->shadow_root_level = PT32E_ROOT_LEVEL; 3695 3715 context->root_hpa = INVALID_PAGE; 3696 3716 context->direct_map = false; 3697 - return 0; 3698 3717 } 3699 3718 3700 - static int paging32E_init_context(struct kvm_vcpu *vcpu, 3701 - struct kvm_mmu *context) 3719 + static void paging32E_init_context(struct kvm_vcpu *vcpu, 3720 + struct kvm_mmu *context) 3702 3721 { 3703 - return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3722 + paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); 3704 3723 } 3705 3724 3706 - static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3725 + static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) 3707 3726 { 3708 3727 struct kvm_mmu *context = vcpu->arch.walk_mmu; 3709 3728 3710 3729 context->base_role.word = 0; 3711 - context->new_cr3 = nonpaging_new_cr3; 3712 3730 context->page_fault = tdp_page_fault; 3713 - context->free = nonpaging_free; 3714 3731 context->sync_page = nonpaging_sync_page; 3715 3732 context->invlpg = nonpaging_invlpg; 3716 3733 context->update_pte = nonpaging_update_pte; ··· 3740 3767 3741 3768 update_permission_bitmask(vcpu, context, false); 3742 3769 update_last_pte_bitmap(vcpu, context); 3743 - 3744 - return 0; 3745 3770 } 3746 3771 3747 - int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3772 + void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) 3748 3773 { 3749 - int r; 3750 3774 bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); 3751 3775 ASSERT(vcpu); 3752 3776 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3753 3777 3754 3778 if (!is_paging(vcpu)) 3755 - r = nonpaging_init_context(vcpu, context); 3779 + nonpaging_init_context(vcpu, context); 3756 3780 else if (is_long_mode(vcpu)) 3757 - r = paging64_init_context(vcpu, context); 3781 + paging64_init_context(vcpu, context); 3758 3782 else if (is_pae(vcpu)) 3759 - r = paging32E_init_context(vcpu, context); 3783 + paging32E_init_context(vcpu, context); 3760 3784 else 3761 - r = paging32_init_context(vcpu, context); 3785 + paging32_init_context(vcpu, context); 3762 3786 3763 3787 vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); 3764 3788 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); 3765 3789 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); 3766 3790 vcpu->arch.mmu.base_role.smep_andnot_wp 3767 3791 = smep && !is_write_protection(vcpu); 3768 - 3769 - return r; 3770 3792 } 3771 3793 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); 3772 3794 3773 - int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3795 + void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 3774 3796 bool execonly) 3775 3797 { 3776 3798 ASSERT(vcpu); ··· 3774 3806 context->shadow_root_level = kvm_x86_ops->get_tdp_level(); 3775 3807 3776 3808 context->nx = true; 3777 - context->new_cr3 = paging_new_cr3; 3778 3809 context->page_fault = ept_page_fault; 3779 3810 context->gva_to_gpa = ept_gva_to_gpa; 3780 3811 context->sync_page = ept_sync_page; 3781 3812 context->invlpg = ept_invlpg; 3782 3813 context->update_pte = ept_update_pte; 3783 - context->free = paging_free; 3784 3814 context->root_level = context->shadow_root_level; 3785 3815 context->root_hpa = INVALID_PAGE; 3786 3816 context->direct_map = false; 3787 3817 3788 3818 update_permission_bitmask(vcpu, context, true); 3789 3819 reset_rsvds_bits_mask_ept(vcpu, context, execonly); 3790 - 3791 - return 0; 3792 3820 } 3793 3821 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); 3794 3822 3795 - static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 3823 + static void init_kvm_softmmu(struct kvm_vcpu *vcpu) 3796 3824 { 3797 - int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3798 - 3825 + kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu); 3799 3826 vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; 3800 3827 vcpu->arch.walk_mmu->get_cr3 = get_cr3; 3801 3828 vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; 3802 3829 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 3803 - 3804 - return r; 3805 3830 } 3806 3831 3807 - static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3832 + static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 3808 3833 { 3809 3834 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 3810 3835 ··· 3834 3873 3835 3874 update_permission_bitmask(vcpu, g_context, false); 3836 3875 update_last_pte_bitmap(vcpu, g_context); 3837 - 3838 - return 0; 3839 3876 } 3840 3877 3841 - static int init_kvm_mmu(struct kvm_vcpu *vcpu) 3878 + static void init_kvm_mmu(struct kvm_vcpu *vcpu) 3842 3879 { 3843 3880 if (mmu_is_nested(vcpu)) 3844 3881 return init_kvm_nested_mmu(vcpu); ··· 3846 3887 return init_kvm_softmmu(vcpu); 3847 3888 } 3848 3889 3849 - static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) 3890 + void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3850 3891 { 3851 3892 ASSERT(vcpu); 3852 - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) 3853 - /* mmu.free() should set root_hpa = INVALID_PAGE */ 3854 - vcpu->arch.mmu.free(vcpu); 3855 - } 3856 3893 3857 - int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 3858 - { 3859 - destroy_kvm_mmu(vcpu); 3860 - return init_kvm_mmu(vcpu); 3894 + kvm_mmu_unload(vcpu); 3895 + init_kvm_mmu(vcpu); 3861 3896 } 3862 3897 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); 3863 3898 ··· 3876 3923 void kvm_mmu_unload(struct kvm_vcpu *vcpu) 3877 3924 { 3878 3925 mmu_free_roots(vcpu); 3926 + WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3879 3927 } 3880 3928 EXPORT_SYMBOL_GPL(kvm_mmu_unload); 3881 3929 ··· 4235 4281 return alloc_mmu_pages(vcpu); 4236 4282 } 4237 4283 4238 - int kvm_mmu_setup(struct kvm_vcpu *vcpu) 4284 + void kvm_mmu_setup(struct kvm_vcpu *vcpu) 4239 4285 { 4240 4286 ASSERT(vcpu); 4241 4287 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 4242 4288 4243 - return init_kvm_mmu(vcpu); 4289 + init_kvm_mmu(vcpu); 4244 4290 } 4245 4291 4246 4292 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) ··· 4382 4428 int nr_to_scan = sc->nr_to_scan; 4383 4429 unsigned long freed = 0; 4384 4430 4385 - raw_spin_lock(&kvm_lock); 4431 + spin_lock(&kvm_lock); 4386 4432 4387 4433 list_for_each_entry(kvm, &vm_list, vm_list) { 4388 4434 int idx; ··· 4432 4478 break; 4433 4479 } 4434 4480 4435 - raw_spin_unlock(&kvm_lock); 4481 + spin_unlock(&kvm_lock); 4436 4482 return freed; 4437 - 4438 4483 } 4439 4484 4440 4485 static unsigned long ··· 4527 4574 { 4528 4575 ASSERT(vcpu); 4529 4576 4530 - destroy_kvm_mmu(vcpu); 4577 + kvm_mmu_unload(vcpu); 4531 4578 free_mmu_pages(vcpu); 4532 4579 mmu_free_memory_caches(vcpu); 4533 4580 }
+2 -2
arch/x86/kvm/mmu.h
··· 70 70 }; 71 71 72 72 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); 73 - int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 74 - int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 73 + void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); 74 + void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context, 75 75 bool execonly); 76 76 77 77 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+2 -6
arch/x86/kvm/svm.c
··· 1959 1959 nested_svm_vmexit(svm); 1960 1960 } 1961 1961 1962 - static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1962 + static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1963 1963 { 1964 - int r; 1965 - 1966 - r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); 1964 + kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); 1967 1965 1968 1966 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1969 1967 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; ··· 1969 1971 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1970 1972 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1971 1973 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1972 - 1973 - return r; 1974 1974 } 1975 1975 1976 1976 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
+113 -45
arch/x86/kvm/vmx.c
··· 1498 1498 break; 1499 1499 1500 1500 if (i == NR_AUTOLOAD_MSRS) { 1501 - printk_once(KERN_WARNING"Not enough mst switch entries. " 1501 + printk_once(KERN_WARNING "Not enough msr switch entries. " 1502 1502 "Can't add msr %x\n", msr); 1503 1503 return; 1504 1504 } else if (i == m->nr) { ··· 1898 1898 /* 1899 1899 * KVM wants to inject page-faults which it got to the guest. This function 1900 1900 * checks whether in a nested guest, we need to inject them to L1 or L2. 1901 - * This function assumes it is called with the exit reason in vmcs02 being 1902 - * a #PF exception (this is the only case in which KVM injects a #PF when L2 1903 - * is running). 1904 1901 */ 1905 - static int nested_pf_handled(struct kvm_vcpu *vcpu) 1902 + static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) 1906 1903 { 1907 1904 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 1908 1905 1909 - /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 1910 - if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) 1906 + if (!(vmcs12->exception_bitmap & (1u << nr))) 1911 1907 return 0; 1912 1908 1913 1909 nested_vmx_vmexit(vcpu); ··· 1917 1921 struct vcpu_vmx *vmx = to_vmx(vcpu); 1918 1922 u32 intr_info = nr | INTR_INFO_VALID_MASK; 1919 1923 1920 - if (nr == PF_VECTOR && is_guest_mode(vcpu) && 1921 - !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) 1924 + if (!reinject && is_guest_mode(vcpu) && 1925 + nested_vmx_check_exception(vcpu, nr)) 1922 1926 return; 1923 1927 1924 1928 if (has_error_code) { ··· 2200 2204 #ifdef CONFIG_X86_64 2201 2205 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2202 2206 #endif 2203 - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; 2207 + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 2208 + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2209 + if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || 2210 + !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { 2211 + nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2212 + nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; 2213 + } 2204 2214 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | 2205 - VM_EXIT_LOAD_IA32_EFER); 2215 + VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); 2206 2216 2207 2217 /* entry controls */ 2208 2218 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, ··· 2228 2226 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2229 2227 nested_vmx_procbased_ctls_low = 0; 2230 2228 nested_vmx_procbased_ctls_high &= 2231 - CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2229 + CPU_BASED_VIRTUAL_INTR_PENDING | 2230 + CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2232 2231 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | 2233 2232 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | 2234 2233 CPU_BASED_CR3_STORE_EXITING | ··· 2255 2252 nested_vmx_secondary_ctls_low = 0; 2256 2253 nested_vmx_secondary_ctls_high &= 2257 2254 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 2255 + SECONDARY_EXEC_UNRESTRICTED_GUEST | 2258 2256 SECONDARY_EXEC_WBINVD_EXITING; 2259 2257 2260 2258 if (enable_ept) { 2261 2259 /* nested EPT: emulate EPT also to L1 */ 2262 2260 nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; 2263 2261 nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | 2264 - VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; 2262 + VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | 2263 + VMX_EPT_INVEPT_BIT; 2265 2264 nested_vmx_ept_caps &= vmx_capability.ept; 2266 2265 /* 2267 2266 * Since invept is completely emulated we support both global ··· 3385 3380 if (enable_ept) { 3386 3381 eptp = construct_eptp(cr3); 3387 3382 vmcs_write64(EPT_POINTER, eptp); 3388 - guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : 3389 - vcpu->kvm->arch.ept_identity_map_addr; 3383 + if (is_paging(vcpu) || is_guest_mode(vcpu)) 3384 + guest_cr3 = kvm_read_cr3(vcpu); 3385 + else 3386 + guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; 3390 3387 ept_load_pdptrs(vcpu); 3391 3388 } 3392 3389 ··· 4886 4879 hypercall[2] = 0xc1; 4887 4880 } 4888 4881 4882 + static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val) 4883 + { 4884 + unsigned long always_on = VMXON_CR0_ALWAYSON; 4885 + 4886 + if (nested_vmx_secondary_ctls_high & 4887 + SECONDARY_EXEC_UNRESTRICTED_GUEST && 4888 + nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) 4889 + always_on &= ~(X86_CR0_PE | X86_CR0_PG); 4890 + return (val & always_on) == always_on; 4891 + } 4892 + 4889 4893 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ 4890 4894 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) 4891 4895 { ··· 4915 4897 val = (val & ~vmcs12->cr0_guest_host_mask) | 4916 4898 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); 4917 4899 4918 - /* TODO: will have to take unrestricted guest mode into 4919 - * account */ 4920 - if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) 4900 + if (!nested_cr0_valid(vmcs12, val)) 4921 4901 return 1; 4922 4902 4923 4903 if (kvm_set_cr0(vcpu, val)) ··· 6643 6627 return 0; 6644 6628 else if (is_page_fault(intr_info)) 6645 6629 return enable_ept; 6630 + else if (is_no_device(intr_info) && 6631 + !(nested_read_cr0(vmcs12) & X86_CR0_TS)) 6632 + return 0; 6646 6633 return vmcs12->exception_bitmap & 6647 6634 (1u << (intr_info & INTR_INFO_VECTOR_MASK)); 6648 6635 case EXIT_REASON_EXTERNAL_INTERRUPT: ··· 6741 6722 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6742 6723 } 6743 6724 6725 + static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) 6726 + { 6727 + u64 delta_tsc_l1; 6728 + u32 preempt_val_l1, preempt_val_l2, preempt_scale; 6729 + 6730 + if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & 6731 + PIN_BASED_VMX_PREEMPTION_TIMER)) 6732 + return; 6733 + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & 6734 + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; 6735 + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 6736 + delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) 6737 + - vcpu->arch.last_guest_tsc; 6738 + preempt_val_l1 = delta_tsc_l1 >> preempt_scale; 6739 + if (preempt_val_l2 <= preempt_val_l1) 6740 + preempt_val_l2 = 0; 6741 + else 6742 + preempt_val_l2 -= preempt_val_l1; 6743 + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); 6744 + } 6745 + 6744 6746 /* 6745 6747 * The guest has exited. See if we can fix it or if we need userspace 6746 6748 * assistance. ··· 6775 6735 /* If guest state is invalid, start emulating */ 6776 6736 if (vmx->emulation_required) 6777 6737 return handle_invalid_guest_state(vcpu); 6778 - 6779 - /* 6780 - * the KVM_REQ_EVENT optimization bit is only on for one entry, and if 6781 - * we did not inject a still-pending event to L1 now because of 6782 - * nested_run_pending, we need to re-enable this bit. 6783 - */ 6784 - if (vmx->nested.nested_run_pending) 6785 - kvm_make_request(KVM_REQ_EVENT, vcpu); 6786 - 6787 - if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH || 6788 - exit_reason == EXIT_REASON_VMRESUME)) 6789 - vmx->nested.nested_run_pending = 1; 6790 - else 6791 - vmx->nested.nested_run_pending = 0; 6792 6738 6793 6739 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { 6794 6740 nested_vmx_vmexit(vcpu); ··· 7087 7061 case INTR_TYPE_HARD_EXCEPTION: 7088 7062 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 7089 7063 u32 err = vmcs_read32(error_code_field); 7090 - kvm_queue_exception_e(vcpu, vector, err); 7064 + kvm_requeue_exception_e(vcpu, vector, err); 7091 7065 } else 7092 - kvm_queue_exception(vcpu, vector); 7066 + kvm_requeue_exception(vcpu, vector); 7093 7067 break; 7094 7068 case INTR_TYPE_SOFT_INTR: 7095 7069 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); ··· 7172 7146 atomic_switch_perf_msrs(vmx); 7173 7147 debugctlmsr = get_debugctlmsr(); 7174 7148 7149 + if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) 7150 + nested_adjust_preemption_timer(vcpu); 7175 7151 vmx->__launched = vmx->loaded_vmcs->launched; 7176 7152 asm( 7177 7153 /* Store host registers */ ··· 7312 7284 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 7313 7285 trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); 7314 7286 7287 + /* 7288 + * the KVM_REQ_EVENT optimization bit is only on for one entry, and if 7289 + * we did not inject a still-pending event to L1 now because of 7290 + * nested_run_pending, we need to re-enable this bit. 7291 + */ 7292 + if (vmx->nested.nested_run_pending) 7293 + kvm_make_request(KVM_REQ_EVENT, vcpu); 7294 + 7295 + vmx->nested.nested_run_pending = 0; 7296 + 7315 7297 vmx_complete_atomic_exit(vmx); 7316 7298 vmx_recover_nmi_blocking(vmx); 7317 7299 vmx_complete_interrupts(vmx); ··· 7448 7410 */ 7449 7411 if (is_mmio) 7450 7412 ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; 7451 - else if (vcpu->kvm->arch.iommu_domain && 7452 - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) 7413 + else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) 7453 7414 ret = kvm_get_guest_memory_type(vcpu, gfn) << 7454 7415 VMX_EPT_MT_EPTE_SHIFT; 7455 7416 else ··· 7538 7501 return get_vmcs12(vcpu)->ept_pointer; 7539 7502 } 7540 7503 7541 - static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7504 + static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) 7542 7505 { 7543 - int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7506 + kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu, 7544 7507 nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT); 7545 7508 7546 7509 vcpu->arch.mmu.set_cr3 = vmx_set_cr3; ··· 7548 7511 vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; 7549 7512 7550 7513 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 7551 - 7552 - return r; 7553 7514 } 7554 7515 7555 7516 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) 7556 7517 { 7557 7518 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 7519 + } 7520 + 7521 + static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, 7522 + struct x86_exception *fault) 7523 + { 7524 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 7525 + 7526 + WARN_ON(!is_guest_mode(vcpu)); 7527 + 7528 + /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ 7529 + if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) 7530 + nested_vmx_vmexit(vcpu); 7531 + else 7532 + kvm_inject_page_fault(vcpu, fault); 7558 7533 } 7559 7534 7560 7535 /* ··· 7582 7533 { 7583 7534 struct vcpu_vmx *vmx = to_vmx(vcpu); 7584 7535 u32 exec_control; 7536 + u32 exit_control; 7585 7537 7586 7538 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7587 7539 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); ··· 7756 7706 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7757 7707 * bits are further modified by vmx_set_efer() below. 7758 7708 */ 7759 - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); 7709 + exit_control = vmcs_config.vmexit_ctrl; 7710 + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7711 + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 7712 + vmcs_write32(VM_EXIT_CONTROLS, exit_control); 7760 7713 7761 7714 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7762 7715 * emulated by vmx_set_efer(), below. ··· 7825 7772 /* shadow page tables on either EPT or shadow page tables */ 7826 7773 kvm_set_cr3(vcpu, vmcs12->guest_cr3); 7827 7774 kvm_mmu_reset_context(vcpu); 7775 + 7776 + if (!enable_ept) 7777 + vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; 7828 7778 7829 7779 /* 7830 7780 * L1 may access the L2's PDPTR, so save them to construct vmcs12 ··· 7932 7876 return 1; 7933 7877 } 7934 7878 7935 - if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || 7879 + if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) || 7936 7880 ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { 7937 7881 nested_vmx_entry_failure(vcpu, vmcs12, 7938 7882 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); ··· 7993 7937 return -ENOMEM; 7994 7938 7995 7939 enter_guest_mode(vcpu); 7940 + 7941 + vmx->nested.nested_run_pending = 1; 7996 7942 7997 7943 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 7998 7944 ··· 8063 8005 u32 idt_vectoring; 8064 8006 unsigned int nr; 8065 8007 8066 - if (vcpu->arch.exception.pending) { 8008 + if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { 8067 8009 nr = vcpu->arch.exception.nr; 8068 8010 idt_vectoring = nr | VECTORING_INFO_VALID_MASK; 8069 8011 ··· 8081 8023 } 8082 8024 8083 8025 vmcs12->idt_vectoring_info_field = idt_vectoring; 8084 - } else if (vcpu->arch.nmi_pending) { 8026 + } else if (vcpu->arch.nmi_injected) { 8085 8027 vmcs12->idt_vectoring_info_field = 8086 8028 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; 8087 8029 } else if (vcpu->arch.interrupt.pending) { ··· 8163 8105 vmcs12->guest_pending_dbg_exceptions = 8164 8106 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); 8165 8107 8108 + if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8109 + (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8110 + vmcs12->vmx_preemption_timer_value = 8111 + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 8112 + 8166 8113 /* 8167 8114 * In some cases (usually, nested EPT), L2 is allowed to change its 8168 8115 * own CR3 without exiting. If it has changed it, we must keep it. ··· 8193 8130 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 8194 8131 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8195 8132 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8133 + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 8134 + vmcs12->guest_ia32_efer = vcpu->arch.efer; 8196 8135 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8197 8136 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8198 8137 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); ··· 8266 8201 * fpu_active (which may have changed). 8267 8202 * Note that vmx_set_cr0 refers to efer set above. 8268 8203 */ 8269 - kvm_set_cr0(vcpu, vmcs12->host_cr0); 8204 + vmx_set_cr0(vcpu, vmcs12->host_cr0); 8270 8205 /* 8271 8206 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need 8272 8207 * to apply the same changes to L1's vmcs. We just set cr0 correctly, ··· 8288 8223 8289 8224 kvm_set_cr3(vcpu, vmcs12->host_cr3); 8290 8225 kvm_mmu_reset_context(vcpu); 8226 + 8227 + if (!enable_ept) 8228 + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; 8291 8229 8292 8230 if (enable_vpid) { 8293 8231 /*
+77 -31
arch/x86/kvm/x86.c
··· 577 577 int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 578 578 { 579 579 u64 xcr0; 580 + u64 valid_bits; 580 581 581 582 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 582 583 if (index != XCR_XFEATURE_ENABLED_MASK) ··· 587 586 return 1; 588 587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 589 588 return 1; 590 - if (xcr0 & ~host_xcr0) 589 + 590 + /* 591 + * Do not allow the guest to set bits that we do not support 592 + * saving. However, xcr0 bit 0 is always set, even if the 593 + * emulated CPU does not support XSAVE (see fx_init). 594 + */ 595 + valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; 596 + if (xcr0 & ~valid_bits) 591 597 return 1; 598 + 592 599 kvm_put_guest_xcr0(vcpu); 593 600 vcpu->arch.xcr0 = xcr0; 594 601 return 0; ··· 693 684 694 685 vcpu->arch.cr3 = cr3; 695 686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); 696 - vcpu->arch.mmu.new_cr3(vcpu); 687 + kvm_mmu_new_cr3(vcpu); 697 688 return 0; 698 689 } 699 690 EXPORT_SYMBOL_GPL(kvm_set_cr3); ··· 2573 2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 2574 2565 case KVM_CAP_SET_TSS_ADDR: 2575 2566 case KVM_CAP_EXT_CPUID: 2567 + case KVM_CAP_EXT_EMUL_CPUID: 2576 2568 case KVM_CAP_CLOCKSOURCE: 2577 2569 case KVM_CAP_PIT: 2578 2570 case KVM_CAP_NOP_IO_DELAY: ··· 2683 2673 r = 0; 2684 2674 break; 2685 2675 } 2686 - case KVM_GET_SUPPORTED_CPUID: { 2676 + case KVM_GET_SUPPORTED_CPUID: 2677 + case KVM_GET_EMULATED_CPUID: { 2687 2678 struct kvm_cpuid2 __user *cpuid_arg = argp; 2688 2679 struct kvm_cpuid2 cpuid; 2689 2680 2690 2681 r = -EFAULT; 2691 2682 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2692 2683 goto out; 2693 - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 2694 - cpuid_arg->entries); 2684 + 2685 + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, 2686 + ioctl); 2695 2687 if (r) 2696 2688 goto out; 2697 2689 ··· 2727 2715 2728 2716 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) 2729 2717 { 2730 - return vcpu->kvm->arch.iommu_domain && 2731 - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); 2718 + return kvm_arch_has_noncoherent_dma(vcpu->kvm); 2732 2719 } 2733 2720 2734 2721 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ··· 2995 2984 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, 2996 2985 struct kvm_xsave *guest_xsave) 2997 2986 { 2998 - if (cpu_has_xsave) 2987 + if (cpu_has_xsave) { 2999 2988 memcpy(guest_xsave->region, 3000 2989 &vcpu->arch.guest_fpu.state->xsave, 3001 - xstate_size); 3002 - else { 2990 + vcpu->arch.guest_xstate_size); 2991 + *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &= 2992 + vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE; 2993 + } else { 3003 2994 memcpy(guest_xsave->region, 3004 2995 &vcpu->arch.guest_fpu.state->fxsave, 3005 2996 sizeof(struct i387_fxsave_struct)); ··· 3016 3003 u64 xstate_bv = 3017 3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; 3018 3005 3019 - if (cpu_has_xsave) 3006 + if (cpu_has_xsave) { 3007 + /* 3008 + * Here we allow setting states that are not present in 3009 + * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility 3010 + * with old userspace. 3011 + */ 3012 + if (xstate_bv & ~KVM_SUPPORTED_XCR0) 3013 + return -EINVAL; 3014 + if (xstate_bv & ~host_xcr0) 3015 + return -EINVAL; 3020 3016 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3021 - guest_xsave->region, xstate_size); 3022 - else { 3017 + guest_xsave->region, vcpu->arch.guest_xstate_size); 3018 + } else { 3023 3019 if (xstate_bv & ~XSTATE_FPSSE) 3024 3020 return -EINVAL; 3025 3021 memcpy(&vcpu->arch.guest_fpu.state->fxsave, ··· 3064 3042 3065 3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++) 3066 3044 /* Only support XCR0 currently */ 3067 - if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { 3045 + if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) { 3068 3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, 3069 - guest_xcrs->xcrs[0].value); 3047 + guest_xcrs->xcrs[i].value); 3070 3048 break; 3071 3049 } 3072 3050 if (r) ··· 4797 4775 4798 4776 static void init_decode_cache(struct x86_emulate_ctxt *ctxt) 4799 4777 { 4800 - memset(&ctxt->twobyte, 0, 4801 - (void *)&ctxt->_regs - (void *)&ctxt->twobyte); 4778 + memset(&ctxt->opcode_len, 0, 4779 + (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); 4802 4780 4803 4781 ctxt->fetch.start = 0; 4804 4782 ctxt->fetch.end = 0; ··· 5116 5094 ctxt->have_exception = false; 5117 5095 ctxt->perm_ok = false; 5118 5096 5119 - ctxt->only_vendor_specific_insn 5120 - = emulation_type & EMULTYPE_TRAP_UD; 5097 + ctxt->ud = emulation_type & EMULTYPE_TRAP_UD; 5121 5098 5122 5099 r = x86_decode_insn(ctxt, insn, insn_len); 5123 5100 ··· 5284 5263 5285 5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); 5286 5265 5287 - raw_spin_lock(&kvm_lock); 5266 + spin_lock(&kvm_lock); 5288 5267 list_for_each_entry(kvm, &vm_list, vm_list) { 5289 5268 kvm_for_each_vcpu(i, vcpu, kvm) { 5290 5269 if (vcpu->cpu != freq->cpu) ··· 5294 5273 send_ipi = 1; 5295 5274 } 5296 5275 } 5297 - raw_spin_unlock(&kvm_lock); 5276 + spin_unlock(&kvm_lock); 5298 5277 5299 5278 if (freq->old < freq->new && send_ipi) { 5300 5279 /* ··· 5447 5426 struct kvm_vcpu *vcpu; 5448 5427 int i; 5449 5428 5450 - raw_spin_lock(&kvm_lock); 5429 + spin_lock(&kvm_lock); 5451 5430 list_for_each_entry(kvm, &vm_list, vm_list) 5452 5431 kvm_for_each_vcpu(i, vcpu, kvm) 5453 5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); 5454 5433 atomic_set(&kvm_guest_has_master_clock, 0); 5455 - raw_spin_unlock(&kvm_lock); 5434 + spin_unlock(&kvm_lock); 5456 5435 } 5457 5436 5458 5437 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); ··· 5966 5945 5967 5946 vcpu->mode = IN_GUEST_MODE; 5968 5947 5948 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5949 + 5969 5950 /* We should set ->mode before check ->requests, 5970 5951 * see the comment in make_all_cpus_request. 5971 5952 */ 5972 - smp_mb(); 5953 + smp_mb__after_srcu_read_unlock(); 5973 5954 5974 5955 local_irq_disable(); 5975 5956 ··· 5981 5958 smp_wmb(); 5982 5959 local_irq_enable(); 5983 5960 preempt_enable(); 5961 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5984 5962 r = 1; 5985 5963 goto cancel_injection; 5986 5964 } 5987 - 5988 - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5989 5965 5990 5966 if (req_immediate_exit) 5991 5967 smp_send_reschedule(vcpu->cpu); ··· 6710 6688 if (r) 6711 6689 return r; 6712 6690 kvm_vcpu_reset(vcpu); 6713 - r = kvm_mmu_setup(vcpu); 6691 + kvm_mmu_setup(vcpu); 6714 6692 vcpu_put(vcpu); 6715 6693 6716 6694 return r; ··· 6962 6940 6963 6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 6964 6942 vcpu->arch.pv_time_enabled = false; 6943 + 6944 + vcpu->arch.guest_supported_xcr0 = 0; 6945 + vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 6946 + 6965 6947 kvm_async_pf_hash_reset(vcpu); 6966 6948 kvm_pmu_init(vcpu); 6967 6949 ··· 7007 6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 7008 6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 7009 6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6984 + atomic_set(&kvm->arch.noncoherent_dma_count, 0); 7010 6985 7011 6986 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 7012 6987 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); ··· 7092 7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); 7093 7066 } 7094 7067 7095 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 7068 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 7096 7069 struct kvm_memory_slot *dont) 7097 7070 { 7098 7071 int i; ··· 7113 7086 } 7114 7087 } 7115 7088 7116 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) 7089 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 7090 + unsigned long npages) 7117 7091 { 7118 7092 int i; 7119 7093 ··· 7311 7283 int r; 7312 7284 7313 7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || 7314 - is_error_page(work->page)) 7286 + work->wakeup_all) 7315 7287 return; 7316 7288 7317 7289 r = kvm_mmu_reload(vcpu); ··· 7421 7393 struct x86_exception fault; 7422 7394 7423 7395 trace_kvm_async_pf_ready(work->arch.token, work->gva); 7424 - if (is_error_page(work->page)) 7396 + if (work->wakeup_all) 7425 7397 work->arch.token = ~0; /* broadcast wakeup */ 7426 7398 else 7427 7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn); ··· 7447 7419 return !kvm_event_needs_reinjection(vcpu) && 7448 7420 kvm_x86_ops->interrupt_allowed(vcpu); 7449 7421 } 7422 + 7423 + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) 7424 + { 7425 + atomic_inc(&kvm->arch.noncoherent_dma_count); 7426 + } 7427 + EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); 7428 + 7429 + void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) 7430 + { 7431 + atomic_dec(&kvm->arch.noncoherent_dma_count); 7432 + } 7433 + EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); 7434 + 7435 + bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) 7436 + { 7437 + return atomic_read(&kvm->arch.noncoherent_dma_count); 7438 + } 7439 + EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); 7450 7440 7451 7441 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 7452 7442 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
+1
arch/x86/kvm/x86.h
··· 122 122 gva_t addr, void *val, unsigned int bytes, 123 123 struct x86_exception *exception); 124 124 125 + #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125 126 extern u64 host_xcr0; 126 127 127 128 extern struct static_key kvm_no_apic_vcpu;
+26 -16
include/linux/kvm_host.h
··· 142 142 struct kvm_vcpu; 143 143 extern struct kmem_cache *kvm_vcpu_cache; 144 144 145 - extern raw_spinlock_t kvm_lock; 145 + extern spinlock_t kvm_lock; 146 146 extern struct list_head vm_list; 147 147 148 148 struct kvm_io_range { ··· 189 189 gva_t gva; 190 190 unsigned long addr; 191 191 struct kvm_arch_async_pf arch; 192 - struct page *page; 193 - bool done; 192 + bool wakeup_all; 194 193 }; 195 194 196 195 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); ··· 507 508 struct kvm_userspace_memory_region *mem); 508 509 int __kvm_set_memory_region(struct kvm *kvm, 509 510 struct kvm_userspace_memory_region *mem); 510 - void kvm_arch_free_memslot(struct kvm_memory_slot *free, 511 + void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 511 512 struct kvm_memory_slot *dont); 512 - int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); 513 + int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 514 + unsigned long npages); 513 515 void kvm_arch_memslots_updated(struct kvm *kvm); 514 516 int kvm_arch_prepare_memory_region(struct kvm *kvm, 515 517 struct kvm_memory_slot *memslot, ··· 671 671 } 672 672 #endif 673 673 674 + #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA 675 + void kvm_arch_register_noncoherent_dma(struct kvm *kvm); 676 + void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); 677 + bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); 678 + #else 679 + static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) 680 + { 681 + } 682 + 683 + static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) 684 + { 685 + } 686 + 687 + static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) 688 + { 689 + return false; 690 + } 691 + #endif 692 + 674 693 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) 675 694 { 676 695 #ifdef __KVM_HAVE_ARCH_WQP ··· 766 747 int kvm_request_irq_source_id(struct kvm *kvm); 767 748 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 768 749 769 - /* For vcpu->arch.iommu_flags */ 770 - #define KVM_IOMMU_CACHE_COHERENCY 0x1 771 - 772 750 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 773 751 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 774 752 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); ··· 805 789 806 790 /* KVM does not hold any references to rcu protected data when it 807 791 * switches CPU into a guest mode. In fact switching to a guest mode 808 - * is very similar to exiting to userspase from rcu point of view. In 792 + * is very similar to exiting to userspace from rcu point of view. In 809 793 * addition CPU may stay in a guest mode for quite a long time (up to 810 794 * one time slice). Lets treat guest mode as quiescent state, just like 811 795 * we do with user-mode execution. ··· 856 840 static inline int memslot_id(struct kvm *kvm, gfn_t gfn) 857 841 { 858 842 return gfn_to_memslot(kvm, gfn)->id; 859 - } 860 - 861 - static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) 862 - { 863 - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ 864 - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 865 - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 866 843 } 867 844 868 845 static inline gfn_t ··· 1075 1066 1076 1067 extern struct kvm_device_ops kvm_mpic_ops; 1077 1068 extern struct kvm_device_ops kvm_xics_ops; 1069 + extern struct kvm_device_ops kvm_vfio_ops; 1078 1070 1079 1071 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1080 1072
+8
include/linux/sched.h
··· 286 286 } 287 287 #endif 288 288 289 + #ifdef CONFIG_DETECT_HUNG_TASK 290 + void reset_hung_task_detector(void); 291 + #else 292 + static inline void reset_hung_task_detector(void) 293 + { 294 + } 295 + #endif 296 + 289 297 /* Attach to any functions which should be ignored in wchan output. */ 290 298 #define __sched __attribute__((__section__(".sched.text"))) 291 299
+14
include/linux/srcu.h
··· 237 237 __srcu_read_unlock(sp, idx); 238 238 } 239 239 240 + /** 241 + * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock 242 + * 243 + * Converts the preceding srcu_read_unlock into a two-way memory barrier. 244 + * 245 + * Call this after srcu_read_unlock, to guarantee that all memory operations 246 + * that occur after smp_mb__after_srcu_read_unlock will appear to happen after 247 + * the preceding srcu_read_unlock. 248 + */ 249 + static inline void smp_mb__after_srcu_read_unlock(void) 250 + { 251 + /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ 252 + } 253 + 240 254 #endif
+4 -6
include/trace/events/kvm.h
··· 296 296 297 297 TRACE_EVENT( 298 298 kvm_async_pf_completed, 299 - TP_PROTO(unsigned long address, struct page *page, u64 gva), 300 - TP_ARGS(address, page, gva), 299 + TP_PROTO(unsigned long address, u64 gva), 300 + TP_ARGS(address, gva), 301 301 302 302 TP_STRUCT__entry( 303 303 __field(unsigned long, address) 304 - __field(pfn_t, pfn) 305 304 __field(u64, gva) 306 305 ), 307 306 308 307 TP_fast_assign( 309 308 __entry->address = address; 310 - __entry->pfn = page ? page_to_pfn(page) : 0; 311 309 __entry->gva = gva; 312 310 ), 313 311 314 - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, 315 - __entry->address, __entry->pfn) 312 + TP_printk("gva %#llx address %#lx", __entry->gva, 313 + __entry->address) 316 314 ); 317 315 318 316 #endif
+11
include/uapi/linux/kvm.h
··· 518 518 /* machine type bits, to be used as argument to KVM_CREATE_VM */ 519 519 #define KVM_VM_S390_UCONTROL 1 520 520 521 + /* on ppc, 0 indicate default, 1 should force HV and 2 PR */ 522 + #define KVM_VM_PPC_HV 1 523 + #define KVM_VM_PPC_PR 2 524 + 521 525 #define KVM_S390_SIE_PAGE_OFFSET 1 522 526 523 527 /* ··· 545 541 #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 546 542 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 547 543 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 544 + #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) 548 545 549 546 /* 550 547 * Extension capability list. ··· 673 668 #define KVM_CAP_IRQ_XICS 92 674 669 #define KVM_CAP_ARM_EL1_32BIT 93 675 670 #define KVM_CAP_SPAPR_MULTITCE 94 671 + #define KVM_CAP_EXT_EMUL_CPUID 95 676 672 677 673 #ifdef KVM_CAP_IRQ_ROUTING 678 674 ··· 849 843 #define KVM_DEV_TYPE_FSL_MPIC_20 1 850 844 #define KVM_DEV_TYPE_FSL_MPIC_42 2 851 845 #define KVM_DEV_TYPE_XICS 3 846 + #define KVM_DEV_TYPE_VFIO 4 847 + #define KVM_DEV_VFIO_GROUP 1 848 + #define KVM_DEV_VFIO_GROUP_ADD 1 849 + #define KVM_DEV_VFIO_GROUP_DEL 2 852 850 853 851 /* 854 852 * ioctls for VM fds ··· 1022 1012 /* VM is being stopped by host */ 1023 1013 #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) 1024 1014 #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) 1015 + #define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) 1025 1016 #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) 1026 1017 1027 1018 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+11
kernel/hung_task.c
··· 207 207 return ret; 208 208 } 209 209 210 + static atomic_t reset_hung_task = ATOMIC_INIT(0); 211 + 212 + void reset_hung_task_detector(void) 213 + { 214 + atomic_set(&reset_hung_task, 1); 215 + } 216 + EXPORT_SYMBOL_GPL(reset_hung_task_detector); 217 + 210 218 /* 211 219 * kthread which checks for tasks stuck in D state 212 220 */ ··· 227 219 228 220 while (schedule_timeout_interruptible(timeout_jiffies(timeout))) 229 221 timeout = sysctl_hung_task_timeout_secs; 222 + 223 + if (atomic_xchg(&reset_hung_task, 0)) 224 + continue; 230 225 231 226 check_hung_uninterruptible_tasks(timeout); 232 227 }
+3
virt/kvm/Kconfig
··· 27 27 28 28 config HAVE_KVM_CPU_RELAX_INTERCEPT 29 29 bool 30 + 31 + config KVM_VFIO 32 + bool
+6 -16
virt/kvm/async_pf.c
··· 56 56 57 57 static void async_pf_execute(struct work_struct *work) 58 58 { 59 - struct page *page = NULL; 60 59 struct kvm_async_pf *apf = 61 60 container_of(work, struct kvm_async_pf, work); 62 61 struct mm_struct *mm = apf->mm; ··· 67 68 68 69 use_mm(mm); 69 70 down_read(&mm->mmap_sem); 70 - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); 71 + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 71 72 up_read(&mm->mmap_sem); 72 73 unuse_mm(mm); 73 74 74 75 spin_lock(&vcpu->async_pf.lock); 75 76 list_add_tail(&apf->link, &vcpu->async_pf.done); 76 - apf->page = page; 77 - apf->done = true; 78 77 spin_unlock(&vcpu->async_pf.lock); 79 78 80 79 /* ··· 80 83 * this point 81 84 */ 82 85 83 - trace_kvm_async_pf_completed(addr, page, gva); 86 + trace_kvm_async_pf_completed(addr, gva); 84 87 85 88 if (waitqueue_active(&vcpu->wq)) 86 89 wake_up_interruptible(&vcpu->wq); ··· 96 99 struct kvm_async_pf *work = 97 100 list_entry(vcpu->async_pf.queue.next, 98 101 typeof(*work), queue); 99 - cancel_work_sync(&work->work); 100 102 list_del(&work->queue); 101 - if (!work->done) { /* work was canceled */ 103 + if (cancel_work_sync(&work->work)) { 102 104 mmdrop(work->mm); 103 105 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 104 106 kmem_cache_free(async_pf_cache, work); ··· 110 114 list_entry(vcpu->async_pf.done.next, 111 115 typeof(*work), link); 112 116 list_del(&work->link); 113 - if (!is_error_page(work->page)) 114 - kvm_release_page_clean(work->page); 115 117 kmem_cache_free(async_pf_cache, work); 116 118 } 117 119 spin_unlock(&vcpu->async_pf.lock); ··· 129 135 list_del(&work->link); 130 136 spin_unlock(&vcpu->async_pf.lock); 131 137 132 - if (work->page) 133 - kvm_arch_async_page_ready(vcpu, work); 138 + kvm_arch_async_page_ready(vcpu, work); 134 139 kvm_arch_async_page_present(vcpu, work); 135 140 136 141 list_del(&work->queue); 137 142 vcpu->async_pf.queued--; 138 - if (!is_error_page(work->page)) 139 - kvm_release_page_clean(work->page); 140 143 kmem_cache_free(async_pf_cache, work); 141 144 } 142 145 } ··· 156 165 if (!work) 157 166 return 0; 158 167 159 - work->page = NULL; 160 - work->done = false; 168 + work->wakeup_all = false; 161 169 work->vcpu = vcpu; 162 170 work->gva = gva; 163 171 work->addr = gfn_to_hva(vcpu->kvm, gfn); ··· 196 206 if (!work) 197 207 return -ENOMEM; 198 208 199 - work->page = KVM_ERR_PTR_BAD_PAGE; 209 + work->wakeup_all = true; 200 210 INIT_LIST_HEAD(&work->queue); /* for list_del to work */ 201 211 202 212 spin_lock(&vcpu->async_pf.lock);
+20 -18
virt/kvm/iommu.c
··· 79 79 flags = IOMMU_READ; 80 80 if (!(slot->flags & KVM_MEM_READONLY)) 81 81 flags |= IOMMU_WRITE; 82 - if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) 82 + if (!kvm->arch.iommu_noncoherent) 83 83 flags |= IOMMU_CACHE; 84 84 85 85 ··· 101 101 102 102 /* Make sure gfn is aligned to the page size we want to map */ 103 103 while ((gfn << PAGE_SHIFT) & (page_size - 1)) 104 + page_size >>= 1; 105 + 106 + /* Make sure hva is aligned to the page size we want to map */ 107 + while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) 104 108 page_size >>= 1; 105 109 106 110 /* ··· 144 140 struct kvm_memslots *slots; 145 141 struct kvm_memory_slot *memslot; 146 142 143 + if (kvm->arch.iommu_noncoherent) 144 + kvm_arch_register_noncoherent_dma(kvm); 145 + 147 146 idx = srcu_read_lock(&kvm->srcu); 148 147 slots = kvm_memslots(kvm); 149 148 ··· 165 158 { 166 159 struct pci_dev *pdev = NULL; 167 160 struct iommu_domain *domain = kvm->arch.iommu_domain; 168 - int r, last_flags; 161 + int r; 162 + bool noncoherent; 169 163 170 164 /* check if iommu exists and in use */ 171 165 if (!domain) ··· 182 174 return r; 183 175 } 184 176 185 - last_flags = kvm->arch.iommu_flags; 186 - if (iommu_domain_has_cap(kvm->arch.iommu_domain, 187 - IOMMU_CAP_CACHE_COHERENCY)) 188 - kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; 177 + noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, 178 + IOMMU_CAP_CACHE_COHERENCY); 189 179 190 180 /* Check if need to update IOMMU page table for guest memory */ 191 - if ((last_flags ^ kvm->arch.iommu_flags) == 192 - KVM_IOMMU_CACHE_COHERENCY) { 181 + if (noncoherent != kvm->arch.iommu_noncoherent) { 193 182 kvm_iommu_unmap_memslots(kvm); 183 + kvm->arch.iommu_noncoherent = noncoherent; 194 184 r = kvm_iommu_map_memslots(kvm); 195 185 if (r) 196 186 goto out_unmap; ··· 196 190 197 191 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; 198 192 199 - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", 200 - assigned_dev->host_segnr, 201 - assigned_dev->host_busnr, 202 - PCI_SLOT(assigned_dev->host_devfn), 203 - PCI_FUNC(assigned_dev->host_devfn)); 193 + dev_info(&pdev->dev, "kvm assign device\n"); 204 194 205 195 return 0; 206 196 out_unmap: ··· 222 220 223 221 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; 224 222 225 - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", 226 - assigned_dev->host_segnr, 227 - assigned_dev->host_busnr, 228 - PCI_SLOT(assigned_dev->host_devfn), 229 - PCI_FUNC(assigned_dev->host_devfn)); 223 + dev_info(&pdev->dev, "kvm deassign device\n"); 230 224 231 225 return 0; 232 226 } ··· 334 336 335 337 srcu_read_unlock(&kvm->srcu, idx); 336 338 339 + if (kvm->arch.iommu_noncoherent) 340 + kvm_arch_unregister_noncoherent_dma(kvm); 341 + 337 342 return 0; 338 343 } 339 344 ··· 351 350 mutex_lock(&kvm->slots_lock); 352 351 kvm_iommu_unmap_memslots(kvm); 353 352 kvm->arch.iommu_domain = NULL; 353 + kvm->arch.iommu_noncoherent = false; 354 354 mutex_unlock(&kvm->slots_lock); 355 355 356 356 iommu_domain_free(domain);
+55 -79
virt/kvm/kvm_main.c
··· 70 70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 71 71 */ 72 72 73 - DEFINE_RAW_SPINLOCK(kvm_lock); 73 + DEFINE_SPINLOCK(kvm_lock); 74 + static DEFINE_RAW_SPINLOCK(kvm_count_lock); 74 75 LIST_HEAD(vm_list); 75 76 76 77 static cpumask_var_t cpus_hardware_enabled; ··· 187 186 ++kvm->stat.remote_tlb_flush; 188 187 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 189 188 } 189 + EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 190 190 191 191 void kvm_reload_remote_mmus(struct kvm *kvm) 192 192 { ··· 492 490 if (r) 493 491 goto out_err; 494 492 495 - raw_spin_lock(&kvm_lock); 493 + spin_lock(&kvm_lock); 496 494 list_add(&kvm->vm_list, &vm_list); 497 - raw_spin_unlock(&kvm_lock); 495 + spin_unlock(&kvm_lock); 498 496 499 497 return kvm; 500 498 ··· 542 540 /* 543 541 * Free any memory in @free but not in @dont. 544 542 */ 545 - static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 543 + static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, 546 544 struct kvm_memory_slot *dont) 547 545 { 548 546 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 549 547 kvm_destroy_dirty_bitmap(free); 550 548 551 - kvm_arch_free_memslot(free, dont); 549 + kvm_arch_free_memslot(kvm, free, dont); 552 550 553 551 free->npages = 0; 554 552 } ··· 559 557 struct kvm_memory_slot *memslot; 560 558 561 559 kvm_for_each_memslot(memslot, slots) 562 - kvm_free_physmem_slot(memslot, NULL); 560 + kvm_free_physmem_slot(kvm, memslot, NULL); 563 561 564 562 kfree(kvm->memslots); 565 563 } ··· 583 581 struct mm_struct *mm = kvm->mm; 584 582 585 583 kvm_arch_sync_events(kvm); 586 - raw_spin_lock(&kvm_lock); 584 + spin_lock(&kvm_lock); 587 585 list_del(&kvm->vm_list); 588 - raw_spin_unlock(&kvm_lock); 586 + spin_unlock(&kvm_lock); 589 587 kvm_free_irq_routing(kvm); 590 588 for (i = 0; i < KVM_NR_BUSES; i++) 591 589 kvm_io_bus_destroy(kvm->buses[i]); ··· 823 821 if (change == KVM_MR_CREATE) { 824 822 new.userspace_addr = mem->userspace_addr; 825 823 826 - if (kvm_arch_create_memslot(&new, npages)) 824 + if (kvm_arch_create_memslot(kvm, &new, npages)) 827 825 goto out_free; 828 826 } 829 827 ··· 874 872 goto out_free; 875 873 } 876 874 875 + /* actual memory is freed via old in kvm_free_physmem_slot below */ 876 + if (change == KVM_MR_DELETE) { 877 + new.dirty_bitmap = NULL; 878 + memset(&new.arch, 0, sizeof(new.arch)); 879 + } 880 + 881 + old_memslots = install_new_memslots(kvm, slots, &new); 882 + 883 + kvm_arch_commit_memory_region(kvm, mem, &old, change); 884 + 885 + kvm_free_physmem_slot(kvm, &old, &new); 886 + kfree(old_memslots); 887 + 877 888 /* 878 889 * IOMMU mapping: New slots need to be mapped. Old slots need to be 879 890 * un-mapped and re-mapped if their base changes. Since base change ··· 898 883 */ 899 884 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { 900 885 r = kvm_iommu_map_pages(kvm, &new); 901 - if (r) 902 - goto out_slots; 886 + return r; 903 887 } 904 - 905 - /* actual memory is freed via old in kvm_free_physmem_slot below */ 906 - if (change == KVM_MR_DELETE) { 907 - new.dirty_bitmap = NULL; 908 - memset(&new.arch, 0, sizeof(new.arch)); 909 - } 910 - 911 - old_memslots = install_new_memslots(kvm, slots, &new); 912 - 913 - kvm_arch_commit_memory_region(kvm, mem, &old, change); 914 - 915 - kvm_free_physmem_slot(&old, &new); 916 - kfree(old_memslots); 917 888 918 889 return 0; 919 890 920 891 out_slots: 921 892 kfree(slots); 922 893 out_free: 923 - kvm_free_physmem_slot(&new, &old); 894 + kvm_free_physmem_slot(kvm, &new, &old); 924 895 out: 925 896 return r; 926 897 } ··· 965 964 out: 966 965 return r; 967 966 } 967 + EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 968 968 969 969 bool kvm_largepages_enabled(void) 970 970 { ··· 1656 1654 memslot = gfn_to_memslot(kvm, gfn); 1657 1655 mark_page_dirty_in_slot(kvm, memslot, gfn); 1658 1656 } 1657 + EXPORT_SYMBOL_GPL(mark_page_dirty); 1659 1658 1660 1659 /* 1661 1660 * The vCPU has executed a HLT instruction with in-kernel mode enabled. ··· 1682 1679 1683 1680 finish_wait(&vcpu->wq, &wait); 1684 1681 } 1682 + EXPORT_SYMBOL_GPL(kvm_vcpu_block); 1685 1683 1686 1684 #ifndef CONFIG_S390 1687 1685 /* ··· 2275 2271 ops = &kvm_xics_ops; 2276 2272 break; 2277 2273 #endif 2274 + #ifdef CONFIG_KVM_VFIO 2275 + case KVM_DEV_TYPE_VFIO: 2276 + ops = &kvm_vfio_ops; 2277 + break; 2278 + #endif 2278 2279 default: 2279 2280 return -ENODEV; 2280 2281 } ··· 2528 2519 } 2529 2520 #endif 2530 2521 2531 - static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2532 - { 2533 - struct page *page[1]; 2534 - unsigned long addr; 2535 - int npages; 2536 - gfn_t gfn = vmf->pgoff; 2537 - struct kvm *kvm = vma->vm_file->private_data; 2538 - 2539 - addr = gfn_to_hva(kvm, gfn); 2540 - if (kvm_is_error_hva(addr)) 2541 - return VM_FAULT_SIGBUS; 2542 - 2543 - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 2544 - NULL); 2545 - if (unlikely(npages != 1)) 2546 - return VM_FAULT_SIGBUS; 2547 - 2548 - vmf->page = page[0]; 2549 - return 0; 2550 - } 2551 - 2552 - static const struct vm_operations_struct kvm_vm_vm_ops = { 2553 - .fault = kvm_vm_fault, 2554 - }; 2555 - 2556 - static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 2557 - { 2558 - vma->vm_ops = &kvm_vm_vm_ops; 2559 - return 0; 2560 - } 2561 - 2562 2522 static struct file_operations kvm_vm_fops = { 2563 2523 .release = kvm_vm_release, 2564 2524 .unlocked_ioctl = kvm_vm_ioctl, 2565 2525 #ifdef CONFIG_COMPAT 2566 2526 .compat_ioctl = kvm_vm_compat_ioctl, 2567 2527 #endif 2568 - .mmap = kvm_vm_mmap, 2569 2528 .llseek = noop_llseek, 2570 2529 }; 2571 2530 ··· 2660 2683 } 2661 2684 } 2662 2685 2663 - static void hardware_enable(void *junk) 2686 + static void hardware_enable(void) 2664 2687 { 2665 - raw_spin_lock(&kvm_lock); 2666 - hardware_enable_nolock(junk); 2667 - raw_spin_unlock(&kvm_lock); 2688 + raw_spin_lock(&kvm_count_lock); 2689 + if (kvm_usage_count) 2690 + hardware_enable_nolock(NULL); 2691 + raw_spin_unlock(&kvm_count_lock); 2668 2692 } 2669 2693 2670 2694 static void hardware_disable_nolock(void *junk) ··· 2678 2700 kvm_arch_hardware_disable(NULL); 2679 2701 } 2680 2702 2681 - static void hardware_disable(void *junk) 2703 + static void hardware_disable(void) 2682 2704 { 2683 - raw_spin_lock(&kvm_lock); 2684 - hardware_disable_nolock(junk); 2685 - raw_spin_unlock(&kvm_lock); 2705 + raw_spin_lock(&kvm_count_lock); 2706 + if (kvm_usage_count) 2707 + hardware_disable_nolock(NULL); 2708 + raw_spin_unlock(&kvm_count_lock); 2686 2709 } 2687 2710 2688 2711 static void hardware_disable_all_nolock(void) ··· 2697 2718 2698 2719 static void hardware_disable_all(void) 2699 2720 { 2700 - raw_spin_lock(&kvm_lock); 2721 + raw_spin_lock(&kvm_count_lock); 2701 2722 hardware_disable_all_nolock(); 2702 - raw_spin_unlock(&kvm_lock); 2723 + raw_spin_unlock(&kvm_count_lock); 2703 2724 } 2704 2725 2705 2726 static int hardware_enable_all(void) 2706 2727 { 2707 2728 int r = 0; 2708 2729 2709 - raw_spin_lock(&kvm_lock); 2730 + raw_spin_lock(&kvm_count_lock); 2710 2731 2711 2732 kvm_usage_count++; 2712 2733 if (kvm_usage_count == 1) { ··· 2719 2740 } 2720 2741 } 2721 2742 2722 - raw_spin_unlock(&kvm_lock); 2743 + raw_spin_unlock(&kvm_count_lock); 2723 2744 2724 2745 return r; 2725 2746 } ··· 2729 2750 { 2730 2751 int cpu = (long)v; 2731 2752 2732 - if (!kvm_usage_count) 2733 - return NOTIFY_OK; 2734 - 2735 2753 val &= ~CPU_TASKS_FROZEN; 2736 2754 switch (val) { 2737 2755 case CPU_DYING: 2738 2756 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 2739 2757 cpu); 2740 - hardware_disable(NULL); 2758 + hardware_disable(); 2741 2759 break; 2742 2760 case CPU_STARTING: 2743 2761 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2744 2762 cpu); 2745 - hardware_enable(NULL); 2763 + hardware_enable(); 2746 2764 break; 2747 2765 } 2748 2766 return NOTIFY_OK; ··· 3032 3056 struct kvm *kvm; 3033 3057 3034 3058 *val = 0; 3035 - raw_spin_lock(&kvm_lock); 3059 + spin_lock(&kvm_lock); 3036 3060 list_for_each_entry(kvm, &vm_list, vm_list) 3037 3061 *val += *(u32 *)((void *)kvm + offset); 3038 - raw_spin_unlock(&kvm_lock); 3062 + spin_unlock(&kvm_lock); 3039 3063 return 0; 3040 3064 } 3041 3065 ··· 3049 3073 int i; 3050 3074 3051 3075 *val = 0; 3052 - raw_spin_lock(&kvm_lock); 3076 + spin_lock(&kvm_lock); 3053 3077 list_for_each_entry(kvm, &vm_list, vm_list) 3054 3078 kvm_for_each_vcpu(i, vcpu, kvm) 3055 3079 *val += *(u32 *)((void *)vcpu + offset); 3056 3080 3057 - raw_spin_unlock(&kvm_lock); 3081 + spin_unlock(&kvm_lock); 3058 3082 return 0; 3059 3083 } 3060 3084 ··· 3109 3133 static void kvm_resume(void) 3110 3134 { 3111 3135 if (kvm_usage_count) { 3112 - WARN_ON(raw_spin_is_locked(&kvm_lock)); 3136 + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); 3113 3137 hardware_enable_nolock(NULL); 3114 3138 } 3115 3139 }
+264
virt/kvm/vfio.c
··· 1 + /* 2 + * VFIO-KVM bridge pseudo device 3 + * 4 + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. 5 + * Author: Alex Williamson <alex.williamson@redhat.com> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include <linux/errno.h> 13 + #include <linux/file.h> 14 + #include <linux/kvm_host.h> 15 + #include <linux/list.h> 16 + #include <linux/module.h> 17 + #include <linux/mutex.h> 18 + #include <linux/slab.h> 19 + #include <linux/uaccess.h> 20 + #include <linux/vfio.h> 21 + 22 + struct kvm_vfio_group { 23 + struct list_head node; 24 + struct vfio_group *vfio_group; 25 + }; 26 + 27 + struct kvm_vfio { 28 + struct list_head group_list; 29 + struct mutex lock; 30 + bool noncoherent; 31 + }; 32 + 33 + static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) 34 + { 35 + struct vfio_group *vfio_group; 36 + struct vfio_group *(*fn)(struct file *); 37 + 38 + fn = symbol_get(vfio_group_get_external_user); 39 + if (!fn) 40 + return ERR_PTR(-EINVAL); 41 + 42 + vfio_group = fn(filep); 43 + 44 + symbol_put(vfio_group_get_external_user); 45 + 46 + return vfio_group; 47 + } 48 + 49 + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) 50 + { 51 + void (*fn)(struct vfio_group *); 52 + 53 + fn = symbol_get(vfio_group_put_external_user); 54 + if (!fn) 55 + return; 56 + 57 + fn(vfio_group); 58 + 59 + symbol_put(vfio_group_put_external_user); 60 + } 61 + 62 + /* 63 + * Groups can use the same or different IOMMU domains. If the same then 64 + * adding a new group may change the coherency of groups we've previously 65 + * been told about. We don't want to care about any of that so we retest 66 + * each group and bail as soon as we find one that's noncoherent. This 67 + * means we only ever [un]register_noncoherent_dma once for the whole device. 68 + */ 69 + static void kvm_vfio_update_coherency(struct kvm_device *dev) 70 + { 71 + struct kvm_vfio *kv = dev->private; 72 + bool noncoherent = false; 73 + struct kvm_vfio_group *kvg; 74 + 75 + mutex_lock(&kv->lock); 76 + 77 + list_for_each_entry(kvg, &kv->group_list, node) { 78 + /* 79 + * TODO: We need an interface to check the coherency of 80 + * the IOMMU domain this group is using. For now, assume 81 + * it's always noncoherent. 82 + */ 83 + noncoherent = true; 84 + break; 85 + } 86 + 87 + if (noncoherent != kv->noncoherent) { 88 + kv->noncoherent = noncoherent; 89 + 90 + if (kv->noncoherent) 91 + kvm_arch_register_noncoherent_dma(dev->kvm); 92 + else 93 + kvm_arch_unregister_noncoherent_dma(dev->kvm); 94 + } 95 + 96 + mutex_unlock(&kv->lock); 97 + } 98 + 99 + static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) 100 + { 101 + struct kvm_vfio *kv = dev->private; 102 + struct vfio_group *vfio_group; 103 + struct kvm_vfio_group *kvg; 104 + void __user *argp = (void __user *)arg; 105 + struct fd f; 106 + int32_t fd; 107 + int ret; 108 + 109 + switch (attr) { 110 + case KVM_DEV_VFIO_GROUP_ADD: 111 + if (get_user(fd, (int32_t __user *)argp)) 112 + return -EFAULT; 113 + 114 + f = fdget(fd); 115 + if (!f.file) 116 + return -EBADF; 117 + 118 + vfio_group = kvm_vfio_group_get_external_user(f.file); 119 + fdput(f); 120 + 121 + if (IS_ERR(vfio_group)) 122 + return PTR_ERR(vfio_group); 123 + 124 + mutex_lock(&kv->lock); 125 + 126 + list_for_each_entry(kvg, &kv->group_list, node) { 127 + if (kvg->vfio_group == vfio_group) { 128 + mutex_unlock(&kv->lock); 129 + kvm_vfio_group_put_external_user(vfio_group); 130 + return -EEXIST; 131 + } 132 + } 133 + 134 + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); 135 + if (!kvg) { 136 + mutex_unlock(&kv->lock); 137 + kvm_vfio_group_put_external_user(vfio_group); 138 + return -ENOMEM; 139 + } 140 + 141 + list_add_tail(&kvg->node, &kv->group_list); 142 + kvg->vfio_group = vfio_group; 143 + 144 + mutex_unlock(&kv->lock); 145 + 146 + kvm_vfio_update_coherency(dev); 147 + 148 + return 0; 149 + 150 + case KVM_DEV_VFIO_GROUP_DEL: 151 + if (get_user(fd, (int32_t __user *)argp)) 152 + return -EFAULT; 153 + 154 + f = fdget(fd); 155 + if (!f.file) 156 + return -EBADF; 157 + 158 + vfio_group = kvm_vfio_group_get_external_user(f.file); 159 + fdput(f); 160 + 161 + if (IS_ERR(vfio_group)) 162 + return PTR_ERR(vfio_group); 163 + 164 + ret = -ENOENT; 165 + 166 + mutex_lock(&kv->lock); 167 + 168 + list_for_each_entry(kvg, &kv->group_list, node) { 169 + if (kvg->vfio_group != vfio_group) 170 + continue; 171 + 172 + list_del(&kvg->node); 173 + kvm_vfio_group_put_external_user(kvg->vfio_group); 174 + kfree(kvg); 175 + ret = 0; 176 + break; 177 + } 178 + 179 + mutex_unlock(&kv->lock); 180 + 181 + kvm_vfio_group_put_external_user(vfio_group); 182 + 183 + kvm_vfio_update_coherency(dev); 184 + 185 + return ret; 186 + } 187 + 188 + return -ENXIO; 189 + } 190 + 191 + static int kvm_vfio_set_attr(struct kvm_device *dev, 192 + struct kvm_device_attr *attr) 193 + { 194 + switch (attr->group) { 195 + case KVM_DEV_VFIO_GROUP: 196 + return kvm_vfio_set_group(dev, attr->attr, attr->addr); 197 + } 198 + 199 + return -ENXIO; 200 + } 201 + 202 + static int kvm_vfio_has_attr(struct kvm_device *dev, 203 + struct kvm_device_attr *attr) 204 + { 205 + switch (attr->group) { 206 + case KVM_DEV_VFIO_GROUP: 207 + switch (attr->attr) { 208 + case KVM_DEV_VFIO_GROUP_ADD: 209 + case KVM_DEV_VFIO_GROUP_DEL: 210 + return 0; 211 + } 212 + 213 + break; 214 + } 215 + 216 + return -ENXIO; 217 + } 218 + 219 + static void kvm_vfio_destroy(struct kvm_device *dev) 220 + { 221 + struct kvm_vfio *kv = dev->private; 222 + struct kvm_vfio_group *kvg, *tmp; 223 + 224 + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { 225 + kvm_vfio_group_put_external_user(kvg->vfio_group); 226 + list_del(&kvg->node); 227 + kfree(kvg); 228 + } 229 + 230 + kvm_vfio_update_coherency(dev); 231 + 232 + kfree(kv); 233 + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 234 + } 235 + 236 + static int kvm_vfio_create(struct kvm_device *dev, u32 type) 237 + { 238 + struct kvm_device *tmp; 239 + struct kvm_vfio *kv; 240 + 241 + /* Only one VFIO "device" per VM */ 242 + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) 243 + if (tmp->ops == &kvm_vfio_ops) 244 + return -EBUSY; 245 + 246 + kv = kzalloc(sizeof(*kv), GFP_KERNEL); 247 + if (!kv) 248 + return -ENOMEM; 249 + 250 + INIT_LIST_HEAD(&kv->group_list); 251 + mutex_init(&kv->lock); 252 + 253 + dev->private = kv; 254 + 255 + return 0; 256 + } 257 + 258 + struct kvm_device_ops kvm_vfio_ops = { 259 + .name = "kvm-vfio", 260 + .create = kvm_vfio_create, 261 + .destroy = kvm_vfio_destroy, 262 + .set_attr = kvm_vfio_set_attr, 263 + .has_attr = kvm_vfio_has_attr, 264 + };