commit c812a51d11bbe983f4c24e32b59b265705ddd3c2 · tjh.dev/kernel

+32

Documentation/feature-removal-schedule.txt

··· 556 556 NCCI TTY device nodes. User space (pppdcapiplugin) works without 557 557 noticing the difference. 558 558 Who: Jan Kiszka <jan.kiszka@web.de> 559 + 560 + ---------------------------- 561 + 562 + What: KVM memory aliases support 563 + When: July 2010 564 + Why: Memory aliasing support is used for speeding up guest vga access 565 + through the vga windows. 566 + 567 + Modern userspace no longer uses this feature, so it's just bitrotted 568 + code and can be removed with no impact. 569 + Who: Avi Kivity <avi@redhat.com> 570 + 571 + ---------------------------- 572 + 573 + What: KVM kernel-allocated memory slots 574 + When: July 2010 575 + Why: Since 2.6.25, kvm supports user-allocated memory slots, which are 576 + much more flexible than kernel-allocated slots. All current userspace 577 + supports the newer interface and this code can be removed with no 578 + impact. 579 + Who: Avi Kivity <avi@redhat.com> 580 + 581 + ---------------------------- 582 + 583 + What: KVM paravirt mmu host support 584 + When: January 2011 585 + Why: The paravirt mmu host support is slower than non-paravirt mmu, both 586 + on newer and older hardware. It is already not exposed to the guest, 587 + and kept only for live migration purposes. 588 + Who: Avi Kivity <avi@redhat.com> 589 + 590 + ----------------------------

+6 -6

Documentation/kvm/api.txt

··· 23 23 Only run vcpu ioctls from the same thread that was used to create the 24 24 vcpu. 25 25 26 - 2. File descritpors 26 + 2. File descriptors 27 27 28 28 The kvm API is centered around file descriptors. An initial 29 29 open("/dev/kvm") obtains a handle to the kvm subsystem; this handle 30 30 can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this 31 - handle will create a VM file descripror which can be used to issue VM 31 + handle will create a VM file descriptor which can be used to issue VM 32 32 ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu 33 33 and return a file descriptor pointing to it. Finally, ioctls on a vcpu 34 34 fd can be used to control the vcpu, including the important task of ··· 643 643 Parameters: struct kvm_clock_data (in) 644 644 Returns: 0 on success, -1 on error 645 645 646 - Sets the current timestamp of kvmclock to the valued specific in its parameter. 646 + Sets the current timestamp of kvmclock to the value specified in its parameter. 647 647 In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios 648 648 such as migration. 649 649 ··· 795 795 __u64 data_offset; /* relative to kvm_run start */ 796 796 } io; 797 797 798 - If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has 798 + If exit_reason is KVM_EXIT_IO, then the vcpu has 799 799 executed a port I/O instruction which could not be satisfied by kvm. 800 800 data_offset describes where the data is located (KVM_EXIT_IO_OUT) or 801 801 where kvm expects application code to place the data for the next 802 - KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array. 802 + KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. 803 803 804 804 struct { 805 805 struct kvm_debug_exit_arch arch; ··· 815 815 __u8 is_write; 816 816 } mmio; 817 817 818 - If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has 818 + If exit_reason is KVM_EXIT_MMIO, then the vcpu has 819 819 executed a memory-mapped I/O instruction which could not be satisfied 820 820 by kvm. The 'data' member contains the written data if 'is_write' is 821 821 true, and should be filled by application code otherwise.

+1 -1

MAINTAINERS

··· 3173 3173 F: arch/x86/kvm/svm.c 3174 3174 3175 3175 KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC 3176 - M: Hollis Blanchard <hollisb@us.ibm.com> 3176 + M: Alexander Graf <agraf@suse.de> 3177 3177 L: kvm-ppc@vger.kernel.org 3178 3178 W: http://kvm.qumranet.com 3179 3179 S: Supported

+1

arch/ia64/kvm/Kconfig

··· 26 26 select ANON_INODES 27 27 select HAVE_KVM_IRQCHIP 28 28 select KVM_APIC_ARCHITECTURE 29 + select KVM_MMIO 29 30 ---help--- 30 31 Support hosting fully virtualized guest machines using hardware 31 32 virtualization extensions. You will need a fairly recent

+30 -20

arch/ia64/kvm/kvm-ia64.c

··· 241 241 return 0; 242 242 mmio: 243 243 if (p->dir) 244 - r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr, 244 + r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, 245 245 p->size, &p->data); 246 246 else 247 - r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr, 247 + r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, 248 248 p->size, &p->data); 249 249 if (r) 250 250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); ··· 636 636 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 637 637 { 638 638 union context *host_ctx, *guest_ctx; 639 - int r; 639 + int r, idx; 640 640 641 - /* 642 - * down_read() may sleep and return with interrupts enabled 643 - */ 644 - down_read(&vcpu->kvm->slots_lock); 641 + idx = srcu_read_lock(&vcpu->kvm->srcu); 645 642 646 643 again: 647 644 if (signal_pending(current)) { ··· 660 663 if (r < 0) 661 664 goto vcpu_run_fail; 662 665 663 - up_read(&vcpu->kvm->slots_lock); 666 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 664 667 kvm_guest_enter(); 665 668 666 669 /* ··· 684 687 kvm_guest_exit(); 685 688 preempt_enable(); 686 689 687 - down_read(&vcpu->kvm->slots_lock); 690 + idx = srcu_read_lock(&vcpu->kvm->srcu); 688 691 689 692 r = kvm_handle_exit(kvm_run, vcpu); 690 693 ··· 694 697 } 695 698 696 699 out: 697 - up_read(&vcpu->kvm->slots_lock); 700 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 698 701 if (r > 0) { 699 702 kvm_resched(vcpu); 700 - down_read(&vcpu->kvm->slots_lock); 703 + idx = srcu_read_lock(&vcpu->kvm->srcu); 701 704 goto again; 702 705 } 703 706 ··· 968 971 goto out; 969 972 r = kvm_setup_default_irq_routing(kvm); 970 973 if (r) { 971 - kfree(kvm->arch.vioapic); 974 + kvm_ioapic_destroy(kvm); 972 975 goto out; 973 976 } 974 977 break; ··· 1374 1377 1375 1378 static void kvm_release_vm_pages(struct kvm *kvm) 1376 1379 { 1380 + struct kvm_memslots *slots; 1377 1381 struct kvm_memory_slot *memslot; 1378 1382 int i, j; 1379 1383 unsigned long base_gfn; 1380 1384 1381 - for (i = 0; i < kvm->nmemslots; i++) { 1382 - memslot = &kvm->memslots[i]; 1385 + slots = rcu_dereference(kvm->memslots); 1386 + for (i = 0; i < slots->nmemslots; i++) { 1387 + memslot = &slots->memslots[i]; 1383 1388 base_gfn = memslot->base_gfn; 1384 1389 1385 1390 for (j = 0; j < memslot->npages; j++) { ··· 1404 1405 kfree(kvm->arch.vioapic); 1405 1406 kvm_release_vm_pages(kvm); 1406 1407 kvm_free_physmem(kvm); 1408 + cleanup_srcu_struct(&kvm->srcu); 1407 1409 free_kvm(kvm); 1408 1410 } 1409 1411 ··· 1576 1576 return r; 1577 1577 } 1578 1578 1579 - int kvm_arch_set_memory_region(struct kvm *kvm, 1580 - struct kvm_userspace_memory_region *mem, 1579 + int kvm_arch_prepare_memory_region(struct kvm *kvm, 1580 + struct kvm_memory_slot *memslot, 1581 1581 struct kvm_memory_slot old, 1582 + struct kvm_userspace_memory_region *mem, 1582 1583 int user_alloc) 1583 1584 { 1584 1585 unsigned long i; 1585 1586 unsigned long pfn; 1586 - int npages = mem->memory_size >> PAGE_SHIFT; 1587 - struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1587 + int npages = memslot->npages; 1588 1588 unsigned long base_gfn = memslot->base_gfn; 1589 1589 1590 1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) ··· 1606 1606 } 1607 1607 1608 1608 return 0; 1609 + } 1610 + 1611 + void kvm_arch_commit_memory_region(struct kvm *kvm, 1612 + struct kvm_userspace_memory_region *mem, 1613 + struct kvm_memory_slot old, 1614 + int user_alloc) 1615 + { 1616 + return; 1609 1617 } 1610 1618 1611 1619 void kvm_arch_flush_shadow(struct kvm *kvm) ··· 1810 1802 if (log->slot >= KVM_MEMORY_SLOTS) 1811 1803 goto out; 1812 1804 1813 - memslot = &kvm->memslots[log->slot]; 1805 + memslot = &kvm->memslots->memslots[log->slot]; 1814 1806 r = -ENOENT; 1815 1807 if (!memslot->dirty_bitmap) 1816 1808 goto out; ··· 1835 1827 struct kvm_memory_slot *memslot; 1836 1828 int is_dirty = 0; 1837 1829 1830 + mutex_lock(&kvm->slots_lock); 1838 1831 spin_lock(&kvm->arch.dirty_log_lock); 1839 1832 1840 1833 r = kvm_ia64_sync_dirty_log(kvm, log); ··· 1849 1840 /* If nothing is dirty, don't bother messing with page tables. */ 1850 1841 if (is_dirty) { 1851 1842 kvm_flush_remote_tlbs(kvm); 1852 - memslot = &kvm->memslots[log->slot]; 1843 + memslot = &kvm->memslots->memslots[log->slot]; 1853 1844 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1854 1845 memset(memslot->dirty_bitmap, 0, n); 1855 1846 } 1856 1847 r = 0; 1857 1848 out: 1849 + mutex_unlock(&kvm->slots_lock); 1858 1850 spin_unlock(&kvm->arch.dirty_log_lock); 1859 1851 return r; 1860 1852 }

+13 -15

arch/ia64/kvm/kvm_fw.c

··· 75 75 struct exit_ctl_data *p; 76 76 77 77 p = kvm_get_exit_data(vcpu); 78 - if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { 78 + if (p->exit_reason == EXIT_REASON_PAL_CALL) { 79 79 p->u.pal_data.ret = result; 80 80 return ; 81 81 } ··· 87 87 struct exit_ctl_data *p; 88 88 89 89 p = kvm_get_exit_data(vcpu); 90 - if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { 90 + if (p->exit_reason == EXIT_REASON_SAL_CALL) { 91 91 p->u.sal_data.ret = result; 92 92 return ; 93 93 } ··· 322 322 struct exit_ctl_data *p; 323 323 324 324 p = kvm_get_exit_data(vcpu); 325 - if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) 325 + if (p->exit_reason == EXIT_REASON_PAL_CALL) 326 326 index = p->u.pal_data.gr28; 327 327 328 328 return index; ··· 646 646 647 647 p = kvm_get_exit_data(vcpu); 648 648 649 - if (p) { 650 - if (p->exit_reason == EXIT_REASON_SAL_CALL) { 651 - *in0 = p->u.sal_data.in0; 652 - *in1 = p->u.sal_data.in1; 653 - *in2 = p->u.sal_data.in2; 654 - *in3 = p->u.sal_data.in3; 655 - *in4 = p->u.sal_data.in4; 656 - *in5 = p->u.sal_data.in5; 657 - *in6 = p->u.sal_data.in6; 658 - *in7 = p->u.sal_data.in7; 659 - return ; 660 - } 649 + if (p->exit_reason == EXIT_REASON_SAL_CALL) { 650 + *in0 = p->u.sal_data.in0; 651 + *in1 = p->u.sal_data.in1; 652 + *in2 = p->u.sal_data.in2; 653 + *in3 = p->u.sal_data.in3; 654 + *in4 = p->u.sal_data.in4; 655 + *in5 = p->u.sal_data.in5; 656 + *in6 = p->u.sal_data.in6; 657 + *in7 = p->u.sal_data.in7; 658 + return ; 661 659 } 662 660 *in0 = 0; 663 661 }

+2 -2

arch/ia64/kvm/mmio.c

··· 316 316 return; 317 317 } else { 318 318 inst_type = -1; 319 - panic_vm(vcpu, "Unsupported MMIO access instruction! \ 320 - Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", 319 + panic_vm(vcpu, "Unsupported MMIO access instruction! " 320 + "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", 321 321 bundle.i64[0], bundle.i64[1]); 322 322 } 323 323

+2 -2

arch/ia64/kvm/vcpu.c

··· 1639 1639 * Otherwise panic 1640 1640 */ 1641 1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1642 - panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ 1643 - & vpsr.is=0\n"); 1642 + panic_vm(vcpu, "Only support guests with vpsr.pk =0 " 1643 + "& vpsr.is=0\n"); 1644 1644 1645 1645 /* 1646 1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia

+6

arch/powerpc/include/asm/kvm_asm.h

··· 97 97 #define RESUME_HOST RESUME_FLAG_HOST 98 98 #define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) 99 99 100 + #define KVM_GUEST_MODE_NONE 0 101 + #define KVM_GUEST_MODE_GUEST 1 102 + #define KVM_GUEST_MODE_SKIP 2 103 + 104 + #define KVM_INST_FETCH_FAILED -1 105 + 100 106 #endif /* __POWERPC_KVM_ASM_H__ */

+9 -2

arch/powerpc/include/asm/kvm_book3s.h

··· 22 22 23 23 #include <linux/types.h> 24 24 #include <linux/kvm_host.h> 25 - #include <asm/kvm_ppc.h> 25 + #include <asm/kvm_book3s_64_asm.h> 26 26 27 27 struct kvmppc_slb { 28 28 u64 esid; ··· 33 33 bool Ks; 34 34 bool Kp; 35 35 bool nx; 36 - bool large; 36 + bool large; /* PTEs are 16MB */ 37 + bool tb; /* 1TB segment */ 37 38 bool class; 38 39 }; 39 40 ··· 70 69 71 70 struct kvmppc_vcpu_book3s { 72 71 struct kvm_vcpu vcpu; 72 + struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 73 73 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 74 74 struct kvmppc_slb slb[64]; 75 75 struct { ··· 91 89 u64 vsid_next; 92 90 u64 vsid_max; 93 91 int context_id; 92 + ulong prog_flags; /* flags to inject when giving a 700 trap */ 94 93 }; 95 94 96 95 #define CONTEXT_HOST 0 ··· 122 119 123 120 extern u32 kvmppc_trampoline_lowmem; 124 121 extern u32 kvmppc_trampoline_enter; 122 + extern void kvmppc_rmcall(ulong srr0, ulong srr1); 123 + extern void kvmppc_load_up_fpu(void); 124 + extern void kvmppc_load_up_altivec(void); 125 + extern void kvmppc_load_up_vsx(void); 125 126 126 127 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 127 128 {

+18

arch/powerpc/include/asm/kvm_book3s_64_asm.h

··· 20 20 #ifndef __ASM_KVM_BOOK3S_ASM_H__ 21 21 #define __ASM_KVM_BOOK3S_ASM_H__ 22 22 23 + #ifdef __ASSEMBLY__ 24 + 23 25 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 24 26 25 27 #include <asm/kvm_asm.h> ··· 56 54 .endm 57 55 58 56 #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ 57 + 58 + #else /*__ASSEMBLY__ */ 59 + 60 + struct kvmppc_book3s_shadow_vcpu { 61 + ulong gpr[14]; 62 + u32 cr; 63 + u32 xer; 64 + ulong host_r1; 65 + ulong host_r2; 66 + ulong handler; 67 + ulong scratch0; 68 + ulong scratch1; 69 + ulong vmhandler; 70 + }; 71 + 72 + #endif /*__ASSEMBLY__ */ 59 73 60 74 #endif /* __ASM_KVM_BOOK3S_ASM_H__ */

+3

arch/powerpc/include/asm/kvm_e500.h

··· 52 52 u32 mas5; 53 53 u32 mas6; 54 54 u32 mas7; 55 + u32 l1csr0; 55 56 u32 l1csr1; 56 57 u32 hid0; 57 58 u32 hid1; 59 + u32 tlb0cfg; 60 + u32 tlb1cfg; 58 61 59 62 struct kvm_vcpu vcpu; 60 63 };

+21 -2

arch/powerpc/include/asm/kvm_host.h

··· 167 167 ulong trampoline_lowmem; 168 168 ulong trampoline_enter; 169 169 ulong highmem_handler; 170 + ulong rmcall; 170 171 ulong host_paca_phys; 171 172 struct kvmppc_mmu mmu; 172 173 #endif 173 174 174 - u64 fpr[32]; 175 175 ulong gpr[32]; 176 176 177 + u64 fpr[32]; 178 + u32 fpscr; 179 + 180 + #ifdef CONFIG_ALTIVEC 181 + vector128 vr[32]; 182 + vector128 vscr; 183 + #endif 184 + 185 + #ifdef CONFIG_VSX 186 + u64 vsr[32]; 187 + #endif 188 + 177 189 ulong pc; 178 - u32 cr; 179 190 ulong ctr; 180 191 ulong lr; 192 + 193 + #ifdef CONFIG_BOOKE 181 194 ulong xer; 195 + u32 cr; 196 + #endif 182 197 183 198 ulong msr; 184 199 #ifdef CONFIG_PPC64 185 200 ulong shadow_msr; 201 + ulong shadow_srr1; 186 202 ulong hflags; 203 + ulong guest_owned_ext; 187 204 #endif 188 205 u32 mmucr; 189 206 ulong sprg0; ··· 259 242 #endif 260 243 ulong fault_dear; 261 244 ulong fault_esr; 245 + ulong queued_dear; 246 + ulong queued_esr; 262 247 gpa_t paddr_accessed; 263 248 264 249 u8 io_gpr; /* GPR used as IO source/target */

+82 -1

arch/powerpc/include/asm/kvm_ppc.h

··· 28 28 #include <linux/types.h> 29 29 #include <linux/kvm_types.h> 30 30 #include <linux/kvm_host.h> 31 + #ifdef CONFIG_PPC_BOOK3S 32 + #include <asm/kvm_book3s.h> 33 + #endif 31 34 32 35 enum emulation_result { 33 36 EMULATE_DONE, /* no further processing */ ··· 83 80 84 81 extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); 85 82 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 86 - extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); 83 + extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); 87 84 extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 85 + extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); 88 86 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 89 87 struct kvm_interrupt *irq); 90 88 ··· 98 94 extern void kvmppc_booke_exit(void); 99 95 100 96 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 97 + 98 + #ifdef CONFIG_PPC_BOOK3S 99 + 100 + /* We assume we're always acting on the current vcpu */ 101 + 102 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 103 + { 104 + if ( num < 14 ) { 105 + get_paca()->shadow_vcpu.gpr[num] = val; 106 + to_book3s(vcpu)->shadow_vcpu.gpr[num] = val; 107 + } else 108 + vcpu->arch.gpr[num] = val; 109 + } 110 + 111 + static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 112 + { 113 + if ( num < 14 ) 114 + return get_paca()->shadow_vcpu.gpr[num]; 115 + else 116 + return vcpu->arch.gpr[num]; 117 + } 118 + 119 + static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 120 + { 121 + get_paca()->shadow_vcpu.cr = val; 122 + to_book3s(vcpu)->shadow_vcpu.cr = val; 123 + } 124 + 125 + static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 126 + { 127 + return get_paca()->shadow_vcpu.cr; 128 + } 129 + 130 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 131 + { 132 + get_paca()->shadow_vcpu.xer = val; 133 + to_book3s(vcpu)->shadow_vcpu.xer = val; 134 + } 135 + 136 + static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 137 + { 138 + return get_paca()->shadow_vcpu.xer; 139 + } 140 + 141 + #else 142 + 143 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 144 + { 145 + vcpu->arch.gpr[num] = val; 146 + } 147 + 148 + static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 149 + { 150 + return vcpu->arch.gpr[num]; 151 + } 152 + 153 + static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 154 + { 155 + vcpu->arch.cr = val; 156 + } 157 + 158 + static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 159 + { 160 + return vcpu->arch.cr; 161 + } 162 + 163 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 164 + { 165 + vcpu->arch.xer = val; 166 + } 167 + 168 + static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 169 + { 170 + return vcpu->arch.xer; 171 + } 172 + 173 + #endif 101 174 102 175 #endif /* __POWERPC_KVM_PPC_H__ */

+5

arch/powerpc/include/asm/paca.h

··· 19 19 #include <asm/mmu.h> 20 20 #include <asm/page.h> 21 21 #include <asm/exception-64e.h> 22 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 23 + #include <asm/kvm_book3s_64_asm.h> 24 + #endif 22 25 23 26 register struct paca_struct *local_paca asm("r13"); 24 27 ··· 138 135 u64 esid; 139 136 u64 vsid; 140 137 } kvm_slb[64]; /* guest SLB */ 138 + /* We use this to store guest state in */ 139 + struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 141 140 u8 kvm_slb_max; /* highest used guest slb entry */ 142 141 u8 kvm_in_guest; /* are we inside the guest? */ 143 142 #endif

+4

arch/powerpc/include/asm/reg.h

··· 426 426 #define SRR1_WAKEMT 0x00280000 /* mtctrl */ 427 427 #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ 428 428 #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ 429 + #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ 430 + #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ 431 + #define SRR1_PROGTRAP 0x00020000 /* Trap */ 432 + #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ 429 433 #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ 430 434 #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ 431 435

+30 -3

arch/powerpc/kernel/asm-offsets.c

··· 194 194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); 195 195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); 196 196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); 197 + DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); 198 + DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); 199 + DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); 200 + DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); 201 + DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); 202 + DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); 203 + DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); 204 + DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); 205 + DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); 206 + DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); 207 + DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); 208 + DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); 209 + DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); 210 + DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); 211 + DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); 212 + DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); 213 + DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); 214 + DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); 215 + DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, 216 + shadow_vcpu.vmhandler)); 217 + DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, 218 + shadow_vcpu.scratch0)); 219 + DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, 220 + shadow_vcpu.scratch1)); 197 221 #endif 198 222 #endif /* CONFIG_PPC64 */ 199 223 ··· 413 389 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 414 390 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 415 391 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 416 - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 417 - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 418 392 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 419 393 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 420 394 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); ··· 433 411 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); 434 412 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); 435 413 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 414 + DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 436 415 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); 437 416 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); 438 417 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); 418 + DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); 439 419 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 440 - #endif 420 + #else 421 + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 422 + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 423 + #endif /* CONFIG_PPC64 */ 441 424 #endif 442 425 #ifdef CONFIG_44x 443 426 DEFINE(PGD_T_LOG2, PGD_T_LOG2);

+1

arch/powerpc/kernel/ppc_ksyms.c

··· 107 107 #endif /* CONFIG_ALTIVEC */ 108 108 #ifdef CONFIG_VSX 109 109 EXPORT_SYMBOL(giveup_vsx); 110 + EXPORT_SYMBOL_GPL(__giveup_vsx); 110 111 #endif /* CONFIG_VSX */ 111 112 #ifdef CONFIG_SPE 112 113 EXPORT_SYMBOL(giveup_spe);

+13 -12

arch/powerpc/kvm/44x_emulate.c

··· 65 65 */ 66 66 switch (dcrn) { 67 67 case DCRN_CPR0_CONFIG_ADDR: 68 - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; 68 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); 69 69 break; 70 70 case DCRN_CPR0_CONFIG_DATA: 71 71 local_irq_disable(); 72 72 mtdcr(DCRN_CPR0_CONFIG_ADDR, 73 73 vcpu->arch.cpr0_cfgaddr); 74 - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); 74 + kvmppc_set_gpr(vcpu, rt, 75 + mfdcr(DCRN_CPR0_CONFIG_DATA)); 75 76 local_irq_enable(); 76 77 break; 77 78 default: ··· 94 93 /* emulate some access in kernel */ 95 94 switch (dcrn) { 96 95 case DCRN_CPR0_CONFIG_ADDR: 97 - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; 96 + vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); 98 97 break; 99 98 default: 100 99 run->dcr.dcrn = dcrn; 101 - run->dcr.data = vcpu->arch.gpr[rs]; 100 + run->dcr.data = kvmppc_get_gpr(vcpu, rs); 102 101 run->dcr.is_write = 1; 103 102 vcpu->arch.dcr_needed = 1; 104 103 kvmppc_account_exit(vcpu, DCR_EXITS); ··· 147 146 148 147 switch (sprn) { 149 148 case SPRN_PID: 150 - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 149 + kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; 151 150 case SPRN_MMUCR: 152 - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; 151 + vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; 153 152 case SPRN_CCR0: 154 - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 153 + vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; 155 154 case SPRN_CCR1: 156 - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 155 + vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; 157 156 default: 158 157 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 159 158 } ··· 168 167 169 168 switch (sprn) { 170 169 case SPRN_PID: 171 - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; 170 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; 172 171 case SPRN_MMUCR: 173 - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 172 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; 174 173 case SPRN_CCR0: 175 - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 174 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; 176 175 case SPRN_CCR1: 177 - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 176 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; 178 177 default: 179 178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 180 179 }

+11 -9

arch/powerpc/kvm/44x_tlb.c

··· 439 439 struct kvmppc_44x_tlbe *tlbe; 440 440 unsigned int gtlb_index; 441 441 442 - gtlb_index = vcpu->arch.gpr[ra]; 442 + gtlb_index = kvmppc_get_gpr(vcpu, ra); 443 443 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { 444 444 printk("%s: index %d\n", __func__, gtlb_index); 445 445 kvmppc_dump_vcpu(vcpu); ··· 455 455 switch (ws) { 456 456 case PPC44x_TLB_PAGEID: 457 457 tlbe->tid = get_mmucr_stid(vcpu); 458 - tlbe->word0 = vcpu->arch.gpr[rs]; 458 + tlbe->word0 = kvmppc_get_gpr(vcpu, rs); 459 459 break; 460 460 461 461 case PPC44x_TLB_XLAT: 462 - tlbe->word1 = vcpu->arch.gpr[rs]; 462 + tlbe->word1 = kvmppc_get_gpr(vcpu, rs); 463 463 break; 464 464 465 465 case PPC44x_TLB_ATTRIB: 466 - tlbe->word2 = vcpu->arch.gpr[rs]; 466 + tlbe->word2 = kvmppc_get_gpr(vcpu, rs); 467 467 break; 468 468 469 469 default: ··· 500 500 unsigned int as = get_mmucr_sts(vcpu); 501 501 unsigned int pid = get_mmucr_stid(vcpu); 502 502 503 - ea = vcpu->arch.gpr[rb]; 503 + ea = kvmppc_get_gpr(vcpu, rb); 504 504 if (ra) 505 - ea += vcpu->arch.gpr[ra]; 505 + ea += kvmppc_get_gpr(vcpu, ra); 506 506 507 507 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); 508 508 if (rc) { 509 + u32 cr = kvmppc_get_cr(vcpu); 510 + 509 511 if (gtlb_index < 0) 510 - vcpu->arch.cr &= ~0x20000000; 512 + kvmppc_set_cr(vcpu, cr & ~0x20000000); 511 513 else 512 - vcpu->arch.cr |= 0x20000000; 514 + kvmppc_set_cr(vcpu, cr | 0x20000000); 513 515 } 514 - vcpu->arch.gpr[rt] = gtlb_index; 516 + kvmppc_set_gpr(vcpu, rt, gtlb_index); 515 517 516 518 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 517 519 return EMULATE_DONE;

+1

arch/powerpc/kvm/Kconfig

··· 20 20 bool 21 21 select PREEMPT_NOTIFIERS 22 22 select ANON_INODES 23 + select KVM_MMIO 23 24 24 25 config KVM_BOOK3S_64_HANDLER 25 26 bool

+262 -47

arch/powerpc/kvm/book3s.c

··· 33 33 34 34 /* #define EXIT_DEBUG */ 35 35 /* #define EXIT_DEBUG_SIMPLE */ 36 + /* #define DEBUG_EXT */ 36 37 37 - /* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0. 38 - * When set, we retrigger a DEC interrupt after that if DEC <= 0. 39 - * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */ 40 - 41 - /* #define AGGRESSIVE_DEC */ 38 + static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 42 39 43 40 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 41 { "exits", VCPU_STAT(sum_exits) }, ··· 69 72 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 70 73 { 71 74 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 75 + memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, 76 + sizeof(get_paca()->shadow_vcpu)); 72 77 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 73 78 } 74 79 75 80 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 76 81 { 77 82 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 83 + memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 84 + sizeof(get_paca()->shadow_vcpu)); 78 85 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 86 + 87 + kvmppc_giveup_ext(vcpu, MSR_FP); 88 + kvmppc_giveup_ext(vcpu, MSR_VEC); 89 + kvmppc_giveup_ext(vcpu, MSR_VSX); 79 90 } 80 91 81 - #if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG) 92 + #if defined(EXIT_DEBUG) 82 93 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) 83 94 { 84 95 u64 jd = mftb() - vcpu->arch.dec_jiffies; 85 96 return vcpu->arch.dec - jd; 86 97 } 87 98 #endif 99 + 100 + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 101 + { 102 + vcpu->arch.shadow_msr = vcpu->arch.msr; 103 + /* Guest MSR values */ 104 + vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | 105 + MSR_BE | MSR_DE; 106 + /* Process MSR values */ 107 + vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | 108 + MSR_EE; 109 + /* External providers the guest reserved */ 110 + vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); 111 + /* 64-bit Process MSR values */ 112 + #ifdef CONFIG_PPC_BOOK3S_64 113 + vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; 114 + #endif 115 + } 88 116 89 117 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 90 118 { ··· 118 96 #ifdef EXIT_DEBUG 119 97 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 120 98 #endif 99 + 121 100 msr &= to_book3s(vcpu)->msr_mask; 122 101 vcpu->arch.msr = msr; 123 - vcpu->arch.shadow_msr = msr | MSR_USER32; 124 - vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 | 125 - MSR_USER64 | MSR_SE | MSR_BE | MSR_DE | 126 - MSR_FE1); 102 + kvmppc_recalc_shadow_msr(vcpu); 127 103 128 104 if (msr & (MSR_WE|MSR_POW)) { 129 105 if (!vcpu->arch.pending_exceptions) { ··· 145 125 vcpu->arch.mmu.reset_msr(vcpu); 146 126 } 147 127 148 - void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 128 + static int kvmppc_book3s_vec2irqprio(unsigned int vec) 149 129 { 150 130 unsigned int prio; 151 131 152 - vcpu->stat.queue_intr++; 153 132 switch (vec) { 154 133 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; 155 134 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; ··· 168 149 default: prio = BOOK3S_IRQPRIO_MAX; break; 169 150 } 170 151 171 - set_bit(prio, &vcpu->arch.pending_exceptions); 152 + return prio; 153 + } 154 + 155 + static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 156 + unsigned int vec) 157 + { 158 + clear_bit(kvmppc_book3s_vec2irqprio(vec), 159 + &vcpu->arch.pending_exceptions); 160 + } 161 + 162 + void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 163 + { 164 + vcpu->stat.queue_intr++; 165 + 166 + set_bit(kvmppc_book3s_vec2irqprio(vec), 167 + &vcpu->arch.pending_exceptions); 172 168 #ifdef EXIT_DEBUG 173 169 printk(KERN_INFO "Queueing interrupt %x\n", vec); 174 170 #endif 175 171 } 176 172 177 173 178 - void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 174 + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 179 175 { 176 + to_book3s(vcpu)->prog_flags = flags; 180 177 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 181 178 } 182 179 ··· 206 171 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); 207 172 } 208 173 174 + void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 175 + { 176 + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 177 + } 178 + 209 179 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 210 180 struct kvm_interrupt *irq) 211 181 { ··· 221 181 { 222 182 int deliver = 1; 223 183 int vec = 0; 184 + ulong flags = 0ULL; 224 185 225 186 switch (priority) { 226 187 case BOOK3S_IRQPRIO_DECREMENTER: ··· 255 214 break; 256 215 case BOOK3S_IRQPRIO_PROGRAM: 257 216 vec = BOOK3S_INTERRUPT_PROGRAM; 217 + flags = to_book3s(vcpu)->prog_flags; 258 218 break; 259 219 case BOOK3S_IRQPRIO_VSX: 260 220 vec = BOOK3S_INTERRUPT_VSX; ··· 286 244 #endif 287 245 288 246 if (deliver) 289 - kvmppc_inject_interrupt(vcpu, vec, 0ULL); 247 + kvmppc_inject_interrupt(vcpu, vec, flags); 290 248 291 249 return deliver; 292 250 } ··· 296 254 unsigned long *pending = &vcpu->arch.pending_exceptions; 297 255 unsigned int priority; 298 256 299 - /* XXX be more clever here - no need to mftb() on every entry */ 300 - /* Issue DEC again if it's still active */ 301 - #ifdef AGGRESSIVE_DEC 302 - if (vcpu->arch.msr & MSR_EE) 303 - if (kvmppc_get_dec(vcpu) & 0x80000000) 304 - kvmppc_core_queue_dec(vcpu); 305 - #endif 306 - 307 257 #ifdef EXIT_DEBUG 308 258 if (vcpu->arch.pending_exceptions) 309 259 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 310 260 #endif 311 261 priority = __ffs(*pending); 312 262 while (priority <= (sizeof(unsigned int) * 8)) { 313 - if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) { 263 + if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && 264 + (priority != BOOK3S_IRQPRIO_DECREMENTER)) { 265 + /* DEC interrupts get cleared by mtdec */ 314 266 clear_bit(priority, &vcpu->arch.pending_exceptions); 315 267 break; 316 268 } ··· 539 503 /* Page not found in guest PTE entries */ 540 504 vcpu->arch.dear = vcpu->arch.fault_dear; 541 505 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 542 - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 506 + vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 543 507 kvmppc_book3s_queue_irqprio(vcpu, vec); 544 508 } else if (page_found == -EPERM) { 545 509 /* Storage protection */ 546 510 vcpu->arch.dear = vcpu->arch.fault_dear; 547 511 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 548 512 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 549 - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 513 + vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 550 514 kvmppc_book3s_queue_irqprio(vcpu, vec); 551 515 } else if (page_found == -EINVAL) { 552 516 /* Page not found in guest SLB */ ··· 568 532 r = kvmppc_emulate_mmio(run, vcpu); 569 533 if ( r == RESUME_HOST_NV ) 570 534 r = RESUME_HOST; 571 - if ( r == RESUME_GUEST_NV ) 572 - r = RESUME_GUEST; 573 535 } 574 536 575 537 return r; 538 + } 539 + 540 + static inline int get_fpr_index(int i) 541 + { 542 + #ifdef CONFIG_VSX 543 + i *= 2; 544 + #endif 545 + return i; 546 + } 547 + 548 + /* Give up external provider (FPU, Altivec, VSX) */ 549 + static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 550 + { 551 + struct thread_struct *t = &current->thread; 552 + u64 *vcpu_fpr = vcpu->arch.fpr; 553 + u64 *vcpu_vsx = vcpu->arch.vsr; 554 + u64 *thread_fpr = (u64*)t->fpr; 555 + int i; 556 + 557 + if (!(vcpu->arch.guest_owned_ext & msr)) 558 + return; 559 + 560 + #ifdef DEBUG_EXT 561 + printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 562 + #endif 563 + 564 + switch (msr) { 565 + case MSR_FP: 566 + giveup_fpu(current); 567 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 568 + vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 569 + 570 + vcpu->arch.fpscr = t->fpscr.val; 571 + break; 572 + case MSR_VEC: 573 + #ifdef CONFIG_ALTIVEC 574 + giveup_altivec(current); 575 + memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 576 + vcpu->arch.vscr = t->vscr; 577 + #endif 578 + break; 579 + case MSR_VSX: 580 + #ifdef CONFIG_VSX 581 + __giveup_vsx(current); 582 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 583 + vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; 584 + #endif 585 + break; 586 + default: 587 + BUG(); 588 + } 589 + 590 + vcpu->arch.guest_owned_ext &= ~msr; 591 + current->thread.regs->msr &= ~msr; 592 + kvmppc_recalc_shadow_msr(vcpu); 593 + } 594 + 595 + /* Handle external providers (FPU, Altivec, VSX) */ 596 + static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 597 + ulong msr) 598 + { 599 + struct thread_struct *t = &current->thread; 600 + u64 *vcpu_fpr = vcpu->arch.fpr; 601 + u64 *vcpu_vsx = vcpu->arch.vsr; 602 + u64 *thread_fpr = (u64*)t->fpr; 603 + int i; 604 + 605 + if (!(vcpu->arch.msr & msr)) { 606 + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 607 + return RESUME_GUEST; 608 + } 609 + 610 + #ifdef DEBUG_EXT 611 + printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 612 + #endif 613 + 614 + current->thread.regs->msr |= msr; 615 + 616 + switch (msr) { 617 + case MSR_FP: 618 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 619 + thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 620 + 621 + t->fpscr.val = vcpu->arch.fpscr; 622 + t->fpexc_mode = 0; 623 + kvmppc_load_up_fpu(); 624 + break; 625 + case MSR_VEC: 626 + #ifdef CONFIG_ALTIVEC 627 + memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 628 + t->vscr = vcpu->arch.vscr; 629 + t->vrsave = -1; 630 + kvmppc_load_up_altivec(); 631 + #endif 632 + break; 633 + case MSR_VSX: 634 + #ifdef CONFIG_VSX 635 + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 636 + thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; 637 + kvmppc_load_up_vsx(); 638 + #endif 639 + break; 640 + default: 641 + BUG(); 642 + } 643 + 644 + vcpu->arch.guest_owned_ext |= msr; 645 + 646 + kvmppc_recalc_shadow_msr(vcpu); 647 + 648 + return RESUME_GUEST; 576 649 } 577 650 578 651 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ··· 708 563 case BOOK3S_INTERRUPT_INST_STORAGE: 709 564 vcpu->stat.pf_instruc++; 710 565 /* only care about PTEG not found errors, but leave NX alone */ 711 - if (vcpu->arch.shadow_msr & 0x40000000) { 566 + if (vcpu->arch.shadow_srr1 & 0x40000000) { 712 567 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 713 568 vcpu->stat.sp_instruc++; 714 569 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && ··· 720 575 */ 721 576 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 722 577 } else { 723 - vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); 578 + vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; 724 579 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 725 580 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 726 581 r = RESUME_GUEST; ··· 766 621 case BOOK3S_INTERRUPT_PROGRAM: 767 622 { 768 623 enum emulation_result er; 624 + ulong flags; 625 + 626 + flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 769 627 770 628 if (vcpu->arch.msr & MSR_PR) { 771 629 #ifdef EXIT_DEBUG ··· 776 628 #endif 777 629 if ((vcpu->arch.last_inst & 0xff0007ff) != 778 630 (INS_DCBZ & 0xfffffff7)) { 779 - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 631 + kvmppc_core_queue_program(vcpu, flags); 780 632 r = RESUME_GUEST; 781 633 break; 782 634 } ··· 786 638 er = kvmppc_emulate_instruction(run, vcpu); 787 639 switch (er) { 788 640 case EMULATE_DONE: 789 - r = RESUME_GUEST; 641 + r = RESUME_GUEST_NV; 790 642 break; 791 643 case EMULATE_FAIL: 792 644 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 793 645 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 794 - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 646 + kvmppc_core_queue_program(vcpu, flags); 795 647 r = RESUME_GUEST; 796 648 break; 797 649 default: ··· 801 653 } 802 654 case BOOK3S_INTERRUPT_SYSCALL: 803 655 #ifdef EXIT_DEBUG 804 - printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]); 656 + printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); 805 657 #endif 806 658 vcpu->stat.syscall_exits++; 807 659 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 808 660 r = RESUME_GUEST; 809 661 break; 810 - case BOOK3S_INTERRUPT_MACHINE_CHECK: 811 662 case BOOK3S_INTERRUPT_FP_UNAVAIL: 812 - case BOOK3S_INTERRUPT_TRACE: 663 + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); 664 + break; 813 665 case BOOK3S_INTERRUPT_ALTIVEC: 666 + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); 667 + break; 814 668 case BOOK3S_INTERRUPT_VSX: 669 + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); 670 + break; 671 + case BOOK3S_INTERRUPT_MACHINE_CHECK: 672 + case BOOK3S_INTERRUPT_TRACE: 815 673 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 816 674 r = RESUME_GUEST; 817 675 break; 818 676 default: 819 677 /* Ugh - bork here! What did we get? */ 820 - printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); 678 + printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 679 + exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); 821 680 r = RESUME_HOST; 822 681 BUG(); 823 682 break; ··· 867 712 int i; 868 713 869 714 regs->pc = vcpu->arch.pc; 870 - regs->cr = vcpu->arch.cr; 715 + regs->cr = kvmppc_get_cr(vcpu); 871 716 regs->ctr = vcpu->arch.ctr; 872 717 regs->lr = vcpu->arch.lr; 873 - regs->xer = vcpu->arch.xer; 718 + regs->xer = kvmppc_get_xer(vcpu); 874 719 regs->msr = vcpu->arch.msr; 875 720 regs->srr0 = vcpu->arch.srr0; 876 721 regs->srr1 = vcpu->arch.srr1; ··· 884 729 regs->sprg7 = vcpu->arch.sprg6; 885 730 886 731 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 887 - regs->gpr[i] = vcpu->arch.gpr[i]; 732 + regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 888 733 889 734 return 0; 890 735 } ··· 894 739 int i; 895 740 896 741 vcpu->arch.pc = regs->pc; 897 - vcpu->arch.cr = regs->cr; 742 + kvmppc_set_cr(vcpu, regs->cr); 898 743 vcpu->arch.ctr = regs->ctr; 899 744 vcpu->arch.lr = regs->lr; 900 - vcpu->arch.xer = regs->xer; 745 + kvmppc_set_xer(vcpu, regs->xer); 901 746 kvmppc_set_msr(vcpu, regs->msr); 902 747 vcpu->arch.srr0 = regs->srr0; 903 748 vcpu->arch.srr1 = regs->srr1; ··· 909 754 vcpu->arch.sprg6 = regs->sprg5; 910 755 vcpu->arch.sprg7 = regs->sprg6; 911 756 912 - for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 913 - vcpu->arch.gpr[i] = regs->gpr[i]; 757 + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 758 + kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 914 759 915 760 return 0; 916 761 } ··· 1005 850 int is_dirty = 0; 1006 851 int r, n; 1007 852 1008 - down_write(&kvm->slots_lock); 853 + mutex_lock(&kvm->slots_lock); 1009 854 1010 855 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1011 856 if (r) ··· 1013 858 1014 859 /* If nothing is dirty, don't bother messing with page tables. */ 1015 860 if (is_dirty) { 1016 - memslot = &kvm->memslots[log->slot]; 861 + memslot = &kvm->memslots->memslots[log->slot]; 1017 862 1018 863 ga = memslot->base_gfn << PAGE_SHIFT; 1019 864 ga_end = ga + (memslot->npages << PAGE_SHIFT); ··· 1027 872 1028 873 r = 0; 1029 874 out: 1030 - up_write(&kvm->slots_lock); 875 + mutex_unlock(&kvm->slots_lock); 1031 876 return r; 1032 877 } 1033 878 ··· 1065 910 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1066 911 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1067 912 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 913 + vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; 1068 914 1069 915 vcpu->arch.shadow_msr = MSR_USER64; 1070 916 ··· 1099 943 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1100 944 { 1101 945 int ret; 946 + struct thread_struct ext_bkp; 947 + bool save_vec = current->thread.used_vr; 948 + bool save_vsx = current->thread.used_vsr; 949 + ulong ext_msr; 1102 950 1103 951 /* No need to go into the guest when all we do is going out */ 1104 952 if (signal_pending(current)) { ··· 1110 950 return -EINTR; 1111 951 } 1112 952 953 + /* Save FPU state in stack */ 954 + if (current->thread.regs->msr & MSR_FP) 955 + giveup_fpu(current); 956 + memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); 957 + ext_bkp.fpscr = current->thread.fpscr; 958 + ext_bkp.fpexc_mode = current->thread.fpexc_mode; 959 + 960 + #ifdef CONFIG_ALTIVEC 961 + /* Save Altivec state in stack */ 962 + if (save_vec) { 963 + if (current->thread.regs->msr & MSR_VEC) 964 + giveup_altivec(current); 965 + memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); 966 + ext_bkp.vscr = current->thread.vscr; 967 + ext_bkp.vrsave = current->thread.vrsave; 968 + } 969 + ext_bkp.used_vr = current->thread.used_vr; 970 + #endif 971 + 972 + #ifdef CONFIG_VSX 973 + /* Save VSX state in stack */ 974 + if (save_vsx && (current->thread.regs->msr & MSR_VSX)) 975 + __giveup_vsx(current); 976 + ext_bkp.used_vsr = current->thread.used_vsr; 977 + #endif 978 + 979 + /* Remember the MSR with disabled extensions */ 980 + ext_msr = current->thread.regs->msr; 981 + 1113 982 /* XXX we get called with irq disabled - change that! */ 1114 983 local_irq_enable(); 1115 984 1116 985 ret = __kvmppc_vcpu_entry(kvm_run, vcpu); 1117 986 1118 987 local_irq_disable(); 988 + 989 + current->thread.regs->msr = ext_msr; 990 + 991 + /* Make sure we save the guest FPU/Altivec/VSX state */ 992 + kvmppc_giveup_ext(vcpu, MSR_FP); 993 + kvmppc_giveup_ext(vcpu, MSR_VEC); 994 + kvmppc_giveup_ext(vcpu, MSR_VSX); 995 + 996 + /* Restore FPU state from stack */ 997 + memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); 998 + current->thread.fpscr = ext_bkp.fpscr; 999 + current->thread.fpexc_mode = ext_bkp.fpexc_mode; 1000 + 1001 + #ifdef CONFIG_ALTIVEC 1002 + /* Restore Altivec state from stack */ 1003 + if (save_vec && current->thread.used_vr) { 1004 + memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); 1005 + current->thread.vscr = ext_bkp.vscr; 1006 + current->thread.vrsave= ext_bkp.vrsave; 1007 + } 1008 + current->thread.used_vr = ext_bkp.used_vr; 1009 + #endif 1010 + 1011 + #ifdef CONFIG_VSX 1012 + current->thread.used_vsr = ext_bkp.used_vsr; 1013 + #endif 1119 1014 1120 1015 return ret; 1121 1016 }

+40 -37

arch/powerpc/kvm/book3s_64_emulate.c

··· 65 65 case 31: 66 66 switch (get_xop(inst)) { 67 67 case OP_31_XOP_MFMSR: 68 - vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr; 68 + kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr); 69 69 break; 70 70 case OP_31_XOP_MTMSRD: 71 71 { 72 - ulong rs = vcpu->arch.gpr[get_rs(inst)]; 72 + ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); 73 73 if (inst & 0x10000) { 74 74 vcpu->arch.msr &= ~(MSR_RI | MSR_EE); 75 75 vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); ··· 78 78 break; 79 79 } 80 80 case OP_31_XOP_MTMSR: 81 - kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]); 81 + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); 82 82 break; 83 83 case OP_31_XOP_MFSRIN: 84 84 { 85 85 int srnum; 86 86 87 - srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf; 87 + srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; 88 88 if (vcpu->arch.mmu.mfsrin) { 89 89 u32 sr; 90 90 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 91 - vcpu->arch.gpr[get_rt(inst)] = sr; 91 + kvmppc_set_gpr(vcpu, get_rt(inst), sr); 92 92 } 93 93 break; 94 94 } 95 95 case OP_31_XOP_MTSRIN: 96 96 vcpu->arch.mmu.mtsrin(vcpu, 97 - (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf, 98 - vcpu->arch.gpr[get_rs(inst)]); 97 + (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, 98 + kvmppc_get_gpr(vcpu, get_rs(inst))); 99 99 break; 100 100 case OP_31_XOP_TLBIE: 101 101 case OP_31_XOP_TLBIEL: 102 102 { 103 103 bool large = (inst & 0x00200000) ? true : false; 104 - ulong addr = vcpu->arch.gpr[get_rb(inst)]; 104 + ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); 105 105 vcpu->arch.mmu.tlbie(vcpu, addr, large); 106 106 break; 107 107 } ··· 111 111 if (!vcpu->arch.mmu.slbmte) 112 112 return EMULATE_FAIL; 113 113 114 - vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)], 115 - vcpu->arch.gpr[get_rb(inst)]); 114 + vcpu->arch.mmu.slbmte(vcpu, 115 + kvmppc_get_gpr(vcpu, get_rs(inst)), 116 + kvmppc_get_gpr(vcpu, get_rb(inst))); 116 117 break; 117 118 case OP_31_XOP_SLBIE: 118 119 if (!vcpu->arch.mmu.slbie) 119 120 return EMULATE_FAIL; 120 121 121 - vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]); 122 + vcpu->arch.mmu.slbie(vcpu, 123 + kvmppc_get_gpr(vcpu, get_rb(inst))); 122 124 break; 123 125 case OP_31_XOP_SLBIA: 124 126 if (!vcpu->arch.mmu.slbia) ··· 134 132 } else { 135 133 ulong t, rb; 136 134 137 - rb = vcpu->arch.gpr[get_rb(inst)]; 135 + rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 138 136 t = vcpu->arch.mmu.slbmfee(vcpu, rb); 139 - vcpu->arch.gpr[get_rt(inst)] = t; 137 + kvmppc_set_gpr(vcpu, get_rt(inst), t); 140 138 } 141 139 break; 142 140 case OP_31_XOP_SLBMFEV: ··· 145 143 } else { 146 144 ulong t, rb; 147 145 148 - rb = vcpu->arch.gpr[get_rb(inst)]; 146 + rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 149 147 t = vcpu->arch.mmu.slbmfev(vcpu, rb); 150 - vcpu->arch.gpr[get_rt(inst)] = t; 148 + kvmppc_set_gpr(vcpu, get_rt(inst), t); 151 149 } 152 150 break; 153 151 case OP_31_XOP_DCBZ: 154 152 { 155 - ulong rb = vcpu->arch.gpr[get_rb(inst)]; 153 + ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 156 154 ulong ra = 0; 157 155 ulong addr; 158 156 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 159 157 160 158 if (get_ra(inst)) 161 - ra = vcpu->arch.gpr[get_ra(inst)]; 159 + ra = kvmppc_get_gpr(vcpu, get_ra(inst)); 162 160 163 161 addr = (ra + rb) & ~31ULL; 164 162 if (!(vcpu->arch.msr & MSR_SF)) ··· 235 233 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 236 234 { 237 235 int emulated = EMULATE_DONE; 236 + ulong spr_val = kvmppc_get_gpr(vcpu, rs); 238 237 239 238 switch (sprn) { 240 239 case SPRN_SDR1: 241 - to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs]; 240 + to_book3s(vcpu)->sdr1 = spr_val; 242 241 break; 243 242 case SPRN_DSISR: 244 - to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs]; 243 + to_book3s(vcpu)->dsisr = spr_val; 245 244 break; 246 245 case SPRN_DAR: 247 - vcpu->arch.dear = vcpu->arch.gpr[rs]; 246 + vcpu->arch.dear = spr_val; 248 247 break; 249 248 case SPRN_HIOR: 250 - to_book3s(vcpu)->hior = vcpu->arch.gpr[rs]; 249 + to_book3s(vcpu)->hior = spr_val; 251 250 break; 252 251 case SPRN_IBAT0U ... SPRN_IBAT3L: 253 252 case SPRN_IBAT4U ... SPRN_IBAT7L: 254 253 case SPRN_DBAT0U ... SPRN_DBAT3L: 255 254 case SPRN_DBAT4U ... SPRN_DBAT7L: 256 - kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]); 255 + kvmppc_write_bat(vcpu, sprn, (u32)spr_val); 257 256 /* BAT writes happen so rarely that we're ok to flush 258 257 * everything here */ 259 258 kvmppc_mmu_pte_flush(vcpu, 0, 0); 260 259 break; 261 260 case SPRN_HID0: 262 - to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs]; 261 + to_book3s(vcpu)->hid[0] = spr_val; 263 262 break; 264 263 case SPRN_HID1: 265 - to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs]; 264 + to_book3s(vcpu)->hid[1] = spr_val; 266 265 break; 267 266 case SPRN_HID2: 268 - to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs]; 267 + to_book3s(vcpu)->hid[2] = spr_val; 269 268 break; 270 269 case SPRN_HID4: 271 - to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs]; 270 + to_book3s(vcpu)->hid[4] = spr_val; 272 271 break; 273 272 case SPRN_HID5: 274 - to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs]; 273 + to_book3s(vcpu)->hid[5] = spr_val; 275 274 /* guest HID5 set can change is_dcbz32 */ 276 275 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 277 276 (mfmsr() & MSR_HV)) ··· 302 299 303 300 switch (sprn) { 304 301 case SPRN_SDR1: 305 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1; 302 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); 306 303 break; 307 304 case SPRN_DSISR: 308 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr; 305 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr); 309 306 break; 310 307 case SPRN_DAR: 311 - vcpu->arch.gpr[rt] = vcpu->arch.dear; 308 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); 312 309 break; 313 310 case SPRN_HIOR: 314 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior; 311 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); 315 312 break; 316 313 case SPRN_HID0: 317 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0]; 314 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); 318 315 break; 319 316 case SPRN_HID1: 320 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1]; 317 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); 321 318 break; 322 319 case SPRN_HID2: 323 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2]; 320 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); 324 321 break; 325 322 case SPRN_HID4: 326 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4]; 323 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); 327 324 break; 328 325 case SPRN_HID5: 329 - vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5]; 326 + kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); 330 327 break; 331 328 case SPRN_THRM1: 332 329 case SPRN_THRM2: 333 330 case SPRN_THRM3: 334 331 case SPRN_CTRLF: 335 332 case SPRN_CTRLT: 336 - vcpu->arch.gpr[rt] = 0; 333 + kvmppc_set_gpr(vcpu, rt, 0); 337 334 break; 338 335 default: 339 336 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);

+8

arch/powerpc/kvm/book3s_64_exports.c

··· 22 22 23 23 EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); 24 24 EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); 25 + EXPORT_SYMBOL_GPL(kvmppc_rmcall); 26 + EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); 27 + #ifdef CONFIG_ALTIVEC 28 + EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); 29 + #endif 30 + #ifdef CONFIG_VSX 31 + EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); 32 + #endif

+130 -204

arch/powerpc/kvm/book3s_64_interrupts.S

··· 28 28 #define ULONG_SIZE 8 29 29 #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) 30 30 31 - .macro mfpaca tmp_reg, src_reg, offset, vcpu_reg 32 - ld \tmp_reg, (PACA_EXMC+\offset)(r13) 33 - std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg) 34 - .endm 35 - 36 31 .macro DISABLE_INTERRUPTS 37 32 mfmsr r0 38 33 rldicl r0,r0,48,1 39 34 rotldi r0,r0,16 40 35 mtmsrd r0,1 41 36 .endm 37 + 38 + #define VCPU_LOAD_NVGPRS(vcpu) \ 39 + ld r14, VCPU_GPR(r14)(vcpu); \ 40 + ld r15, VCPU_GPR(r15)(vcpu); \ 41 + ld r16, VCPU_GPR(r16)(vcpu); \ 42 + ld r17, VCPU_GPR(r17)(vcpu); \ 43 + ld r18, VCPU_GPR(r18)(vcpu); \ 44 + ld r19, VCPU_GPR(r19)(vcpu); \ 45 + ld r20, VCPU_GPR(r20)(vcpu); \ 46 + ld r21, VCPU_GPR(r21)(vcpu); \ 47 + ld r22, VCPU_GPR(r22)(vcpu); \ 48 + ld r23, VCPU_GPR(r23)(vcpu); \ 49 + ld r24, VCPU_GPR(r24)(vcpu); \ 50 + ld r25, VCPU_GPR(r25)(vcpu); \ 51 + ld r26, VCPU_GPR(r26)(vcpu); \ 52 + ld r27, VCPU_GPR(r27)(vcpu); \ 53 + ld r28, VCPU_GPR(r28)(vcpu); \ 54 + ld r29, VCPU_GPR(r29)(vcpu); \ 55 + ld r30, VCPU_GPR(r30)(vcpu); \ 56 + ld r31, VCPU_GPR(r31)(vcpu); \ 42 57 43 58 /***************************************************************************** 44 59 * * ··· 82 67 SAVE_NVGPRS(r1) 83 68 84 69 /* Save LR */ 85 - mflr r14 86 - std r14, _LINK(r1) 87 - 88 - /* XXX optimize non-volatile loading away */ 89 - kvm_start_lightweight: 90 - 91 - DISABLE_INTERRUPTS 92 - 93 - /* Save R1/R2 in the PACA */ 94 - std r1, PACAR1(r13) 95 - std r2, (PACA_EXMC+EX_SRR0)(r13) 96 - ld r3, VCPU_HIGHMEM_HANDLER(r4) 97 - std r3, PACASAVEDMSR(r13) 70 + std r0, _LINK(r1) 98 71 99 72 /* Load non-volatile guest state from the vcpu */ 100 - ld r14, VCPU_GPR(r14)(r4) 101 - ld r15, VCPU_GPR(r15)(r4) 102 - ld r16, VCPU_GPR(r16)(r4) 103 - ld r17, VCPU_GPR(r17)(r4) 104 - ld r18, VCPU_GPR(r18)(r4) 105 - ld r19, VCPU_GPR(r19)(r4) 106 - ld r20, VCPU_GPR(r20)(r4) 107 - ld r21, VCPU_GPR(r21)(r4) 108 - ld r22, VCPU_GPR(r22)(r4) 109 - ld r23, VCPU_GPR(r23)(r4) 110 - ld r24, VCPU_GPR(r24)(r4) 111 - ld r25, VCPU_GPR(r25)(r4) 112 - ld r26, VCPU_GPR(r26)(r4) 113 - ld r27, VCPU_GPR(r27)(r4) 114 - ld r28, VCPU_GPR(r28)(r4) 115 - ld r29, VCPU_GPR(r29)(r4) 116 - ld r30, VCPU_GPR(r30)(r4) 117 - ld r31, VCPU_GPR(r31)(r4) 73 + VCPU_LOAD_NVGPRS(r4) 74 + 75 + /* Save R1/R2 in the PACA */ 76 + std r1, PACA_KVM_HOST_R1(r13) 77 + std r2, PACA_KVM_HOST_R2(r13) 78 + 79 + /* XXX swap in/out on load? */ 80 + ld r3, VCPU_HIGHMEM_HANDLER(r4) 81 + std r3, PACA_KVM_VMHANDLER(r13) 82 + 83 + kvm_start_lightweight: 118 84 119 85 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ 120 86 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 121 87 122 - ld r3, VCPU_TRAMPOLINE_ENTER(r4) 123 - mtsrr0 r3 124 - 125 - LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 126 - mtsrr1 r3 127 - 128 - /* Load guest state in the respective registers */ 129 - lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */ 130 - stw r3, (PACA_EXMC + EX_CCR)(r13) 131 - 132 - ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */ 133 - mtctr r3 /* CTR = r3 */ 88 + /* Load some guest state in the respective registers */ 89 + ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */ 90 + /* will be swapped in by rmcall */ 134 91 135 92 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ 136 93 mtlr r3 /* LR = r3 */ 137 94 138 - ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */ 139 - std r3, (PACA_EXMC + EX_R3)(r13) 95 + DISABLE_INTERRUPTS 140 96 141 97 /* Some guests may need to have dcbz set to 32 byte length. 142 98 * ··· 127 141 mtspr SPRN_HID5,r3 128 142 129 143 no_dcbz32_on: 130 - /* Load guest GPRs */ 131 144 132 - ld r3, VCPU_GPR(r9)(r4) 133 - std r3, (PACA_EXMC + EX_R9)(r13) 134 - ld r3, VCPU_GPR(r10)(r4) 135 - std r3, (PACA_EXMC + EX_R10)(r13) 136 - ld r3, VCPU_GPR(r11)(r4) 137 - std r3, (PACA_EXMC + EX_R11)(r13) 138 - ld r3, VCPU_GPR(r12)(r4) 139 - std r3, (PACA_EXMC + EX_R12)(r13) 140 - ld r3, VCPU_GPR(r13)(r4) 141 - std r3, (PACA_EXMC + EX_R13)(r13) 145 + ld r6, VCPU_RMCALL(r4) 146 + mtctr r6 142 147 143 - ld r0, VCPU_GPR(r0)(r4) 144 - ld r1, VCPU_GPR(r1)(r4) 145 - ld r2, VCPU_GPR(r2)(r4) 146 - ld r3, VCPU_GPR(r3)(r4) 147 - ld r5, VCPU_GPR(r5)(r4) 148 - ld r6, VCPU_GPR(r6)(r4) 149 - ld r7, VCPU_GPR(r7)(r4) 150 - ld r8, VCPU_GPR(r8)(r4) 151 - ld r4, VCPU_GPR(r4)(r4) 152 - 153 - /* This sets the Magic value for the trampoline */ 154 - 155 - li r11, 1 156 - stb r11, PACA_KVM_IN_GUEST(r13) 148 + ld r3, VCPU_TRAMPOLINE_ENTER(r4) 149 + LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 157 150 158 151 /* Jump to SLB patching handlder and into our guest */ 159 - RFI 152 + bctr 160 153 161 154 /* 162 155 * This is the handler in module memory. It gets jumped at from the ··· 149 184 /* 150 185 * Register usage at this point: 151 186 * 152 - * R00 = guest R13 153 - * R01 = host R1 154 - * R02 = host R2 155 - * R10 = guest PC 156 - * R11 = guest MSR 157 - * R12 = exit handler id 158 - * R13 = PACA 159 - * PACA.exmc.R9 = guest R1 160 - * PACA.exmc.R10 = guest R10 161 - * PACA.exmc.R11 = guest R11 162 - * PACA.exmc.R12 = guest R12 163 - * PACA.exmc.R13 = guest R2 164 - * PACA.exmc.DAR = guest DAR 165 - * PACA.exmc.DSISR = guest DSISR 166 - * PACA.exmc.LR = guest instruction 167 - * PACA.exmc.CCR = guest CR 168 - * PACA.exmc.SRR0 = guest R0 187 + * R0 = guest last inst 188 + * R1 = host R1 189 + * R2 = host R2 190 + * R3 = guest PC 191 + * R4 = guest MSR 192 + * R5 = guest DAR 193 + * R6 = guest DSISR 194 + * R13 = PACA 195 + * PACA.KVM.* = guest * 169 196 * 170 197 */ 171 198 172 - std r3, (PACA_EXMC+EX_R3)(r13) 173 - 174 - /* save the exit id in R3 */ 175 - mr r3, r12 176 - 177 - /* R12 = vcpu */ 178 - ld r12, GPR4(r1) 199 + /* R7 = vcpu */ 200 + ld r7, GPR4(r1) 179 201 180 202 /* Now save the guest state */ 181 203 182 - std r0, VCPU_GPR(r13)(r12) 183 - std r4, VCPU_GPR(r4)(r12) 184 - std r5, VCPU_GPR(r5)(r12) 185 - std r6, VCPU_GPR(r6)(r12) 186 - std r7, VCPU_GPR(r7)(r12) 187 - std r8, VCPU_GPR(r8)(r12) 188 - std r9, VCPU_GPR(r9)(r12) 204 + stw r0, VCPU_LAST_INST(r7) 189 205 190 - /* get registers from PACA */ 191 - mfpaca r5, r0, EX_SRR0, r12 192 - mfpaca r5, r3, EX_R3, r12 193 - mfpaca r5, r1, EX_R9, r12 194 - mfpaca r5, r10, EX_R10, r12 195 - mfpaca r5, r11, EX_R11, r12 196 - mfpaca r5, r12, EX_R12, r12 197 - mfpaca r5, r2, EX_R13, r12 206 + std r3, VCPU_PC(r7) 207 + std r4, VCPU_SHADOW_SRR1(r7) 208 + std r5, VCPU_FAULT_DEAR(r7) 209 + std r6, VCPU_FAULT_DSISR(r7) 198 210 199 - lwz r5, (PACA_EXMC+EX_LR)(r13) 200 - stw r5, VCPU_LAST_INST(r12) 201 - 202 - lwz r5, (PACA_EXMC+EX_CCR)(r13) 203 - stw r5, VCPU_CR(r12) 204 - 205 - ld r5, VCPU_HFLAGS(r12) 211 + ld r5, VCPU_HFLAGS(r7) 206 212 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ 207 213 beq no_dcbz32_off 208 214 215 + li r4, 0 209 216 mfspr r5,SPRN_HID5 210 - rldimi r5,r5,6,56 217 + rldimi r5,r4,6,56 211 218 mtspr SPRN_HID5,r5 212 219 213 220 no_dcbz32_off: 214 221 215 - /* XXX maybe skip on lightweight? */ 216 - std r14, VCPU_GPR(r14)(r12) 217 - std r15, VCPU_GPR(r15)(r12) 218 - std r16, VCPU_GPR(r16)(r12) 219 - std r17, VCPU_GPR(r17)(r12) 220 - std r18, VCPU_GPR(r18)(r12) 221 - std r19, VCPU_GPR(r19)(r12) 222 - std r20, VCPU_GPR(r20)(r12) 223 - std r21, VCPU_GPR(r21)(r12) 224 - std r22, VCPU_GPR(r22)(r12) 225 - std r23, VCPU_GPR(r23)(r12) 226 - std r24, VCPU_GPR(r24)(r12) 227 - std r25, VCPU_GPR(r25)(r12) 228 - std r26, VCPU_GPR(r26)(r12) 229 - std r27, VCPU_GPR(r27)(r12) 230 - std r28, VCPU_GPR(r28)(r12) 231 - std r29, VCPU_GPR(r29)(r12) 232 - std r30, VCPU_GPR(r30)(r12) 233 - std r31, VCPU_GPR(r31)(r12) 222 + std r14, VCPU_GPR(r14)(r7) 223 + std r15, VCPU_GPR(r15)(r7) 224 + std r16, VCPU_GPR(r16)(r7) 225 + std r17, VCPU_GPR(r17)(r7) 226 + std r18, VCPU_GPR(r18)(r7) 227 + std r19, VCPU_GPR(r19)(r7) 228 + std r20, VCPU_GPR(r20)(r7) 229 + std r21, VCPU_GPR(r21)(r7) 230 + std r22, VCPU_GPR(r22)(r7) 231 + std r23, VCPU_GPR(r23)(r7) 232 + std r24, VCPU_GPR(r24)(r7) 233 + std r25, VCPU_GPR(r25)(r7) 234 + std r26, VCPU_GPR(r26)(r7) 235 + std r27, VCPU_GPR(r27)(r7) 236 + std r28, VCPU_GPR(r28)(r7) 237 + std r29, VCPU_GPR(r29)(r7) 238 + std r30, VCPU_GPR(r30)(r7) 239 + std r31, VCPU_GPR(r31)(r7) 234 240 235 - /* Restore non-volatile host registers (r14 - r31) */ 236 - REST_NVGPRS(r1) 237 - 238 - /* Save guest PC (R10) */ 239 - std r10, VCPU_PC(r12) 240 - 241 - /* Save guest msr (R11) */ 242 - std r11, VCPU_SHADOW_MSR(r12) 243 - 244 - /* Save guest CTR (in R12) */ 241 + /* Save guest CTR */ 245 242 mfctr r5 246 - std r5, VCPU_CTR(r12) 243 + std r5, VCPU_CTR(r7) 247 244 248 245 /* Save guest LR */ 249 246 mflr r5 250 - std r5, VCPU_LR(r12) 251 - 252 - /* Save guest XER */ 253 - mfxer r5 254 - std r5, VCPU_XER(r12) 255 - 256 - /* Save guest DAR */ 257 - ld r5, (PACA_EXMC+EX_DAR)(r13) 258 - std r5, VCPU_FAULT_DEAR(r12) 259 - 260 - /* Save guest DSISR */ 261 - lwz r5, (PACA_EXMC+EX_DSISR)(r13) 262 - std r5, VCPU_FAULT_DSISR(r12) 247 + std r5, VCPU_LR(r7) 263 248 264 249 /* Restore host msr -> SRR1 */ 265 - ld r7, VCPU_HOST_MSR(r12) 266 - mtsrr1 r7 267 - 268 - /* Restore host IP -> SRR0 */ 269 - ld r6, VCPU_HOST_RETIP(r12) 270 - mtsrr0 r6 250 + ld r6, VCPU_HOST_MSR(r7) 271 251 272 252 /* 273 253 * For some interrupts, we need to call the real Linux ··· 224 314 * r3 = address of interrupt handler (exit reason) 225 315 */ 226 316 227 - cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL 317 + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 228 318 beq call_linux_handler 229 - cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER 319 + cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER 230 320 beq call_linux_handler 231 321 232 - /* Back to Interruptable Mode! (goto kvm_return_point) */ 233 - RFI 322 + /* Back to EE=1 */ 323 + mtmsr r6 324 + b kvm_return_point 234 325 235 326 call_linux_handler: 236 327 ··· 244 333 * interrupt handler! 245 334 * 246 335 * R3 still contains the exit code, 247 - * R6 VCPU_HOST_RETIP and 248 - * R7 VCPU_HOST_MSR 336 + * R5 VCPU_HOST_RETIP and 337 + * R6 VCPU_HOST_MSR 249 338 */ 250 339 251 - mtlr r3 340 + /* Restore host IP -> SRR0 */ 341 + ld r5, VCPU_HOST_RETIP(r7) 252 342 253 - ld r5, VCPU_TRAMPOLINE_LOWMEM(r12) 254 - mtsrr0 r5 255 - LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 256 - mtsrr1 r5 343 + /* XXX Better move to a safe function? 344 + * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ 345 + 346 + mtlr r12 347 + 348 + ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) 349 + mtsrr0 r4 350 + LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 351 + mtsrr1 r3 257 352 258 353 RFI 259 354 ··· 268 351 269 352 /* Jump back to lightweight entry if we're supposed to */ 270 353 /* go back into the guest */ 271 - mr r5, r3 354 + 355 + /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ 356 + mr r5, r12 357 + 272 358 /* Restore r3 (kvm_run) and r4 (vcpu) */ 273 359 REST_2GPRS(3, r1) 274 360 bl KVMPPC_HANDLE_EXIT 275 361 276 - #if 0 /* XXX get lightweight exits back */ 362 + /* If RESUME_GUEST, get back in the loop */ 277 363 cmpwi r3, RESUME_GUEST 278 - bne kvm_exit_heavyweight 364 + beq kvm_loop_lightweight 279 365 280 - /* put VCPU and KVM_RUN back into place and roll again! */ 281 - REST_2GPRS(3, r1) 282 - b kvm_start_lightweight 366 + cmpwi r3, RESUME_GUEST_NV 367 + beq kvm_loop_heavyweight 283 368 284 - kvm_exit_heavyweight: 285 - /* Restore non-volatile host registers */ 286 - ld r14, _LINK(r1) 287 - mtlr r14 288 - REST_NVGPRS(r1) 369 + kvm_exit_loop: 289 370 290 - addi r1, r1, SWITCH_FRAME_SIZE 291 - #else 292 371 ld r4, _LINK(r1) 293 372 mtlr r4 294 373 295 - cmpwi r3, RESUME_GUEST 296 - bne kvm_exit_heavyweight 374 + /* Restore non-volatile host registers (r14 - r31) */ 375 + REST_NVGPRS(r1) 297 376 377 + addi r1, r1, SWITCH_FRAME_SIZE 378 + blr 379 + 380 + kvm_loop_heavyweight: 381 + 382 + ld r4, _LINK(r1) 383 + std r4, (16 + SWITCH_FRAME_SIZE)(r1) 384 + 385 + /* Load vcpu and cpu_run */ 298 386 REST_2GPRS(3, r1) 299 387 300 - addi r1, r1, SWITCH_FRAME_SIZE 388 + /* Load non-volatile guest state from the vcpu */ 389 + VCPU_LOAD_NVGPRS(r4) 301 390 302 - b kvm_start_entry 391 + /* Jump back into the beginning of this function */ 392 + b kvm_start_lightweight 303 393 304 - kvm_exit_heavyweight: 394 + kvm_loop_lightweight: 305 395 306 - addi r1, r1, SWITCH_FRAME_SIZE 307 - #endif 396 + /* We'll need the vcpu pointer */ 397 + REST_GPR(4, r1) 308 398 309 - blr 399 + /* Jump back into the beginning of this function */ 400 + b kvm_start_lightweight 401 +

+6 -4

arch/powerpc/kvm/book3s_64_mmu.c

··· 54 54 if (!vcpu_book3s->slb[i].valid) 55 55 continue; 56 56 57 - if (vcpu_book3s->slb[i].large) 57 + if (vcpu_book3s->slb[i].tb) 58 58 cmp_esid = esid_1t; 59 59 60 60 if (vcpu_book3s->slb[i].esid == cmp_esid) ··· 65 65 eaddr, esid, esid_1t); 66 66 for (i = 0; i < vcpu_book3s->slb_nr; i++) { 67 67 if (vcpu_book3s->slb[i].vsid) 68 - dprintk(" %d: %c%c %llx %llx\n", i, 68 + dprintk(" %d: %c%c%c %llx %llx\n", i, 69 69 vcpu_book3s->slb[i].valid ? 'v' : ' ', 70 70 vcpu_book3s->slb[i].large ? 'l' : ' ', 71 + vcpu_book3s->slb[i].tb ? 't' : ' ', 71 72 vcpu_book3s->slb[i].esid, 72 73 vcpu_book3s->slb[i].vsid); 73 74 } ··· 85 84 if (!slb) 86 85 return 0; 87 86 88 - if (slb->large) 87 + if (slb->tb) 89 88 return (((u64)eaddr >> 12) & 0xfffffff) | 90 89 (((u64)slb->vsid) << 28); 91 90 ··· 310 309 slbe = &vcpu_book3s->slb[slb_nr]; 311 310 312 311 slbe->large = (rs & SLB_VSID_L) ? 1 : 0; 313 - slbe->esid = slbe->large ? esid_1t : esid; 312 + slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; 313 + slbe->esid = slbe->tb ? esid_1t : esid; 314 314 slbe->vsid = rs >> 12; 315 315 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; 316 316 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;

+96 -23

arch/powerpc/kvm/book3s_64_rmhandlers.S

··· 45 45 * To distinguish, we check a magic byte in the PACA 46 46 */ 47 47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ 48 - std r12, (PACA_EXMC + EX_R12)(r13) 48 + std r12, PACA_KVM_SCRATCH0(r13) 49 49 mfcr r12 50 - stw r12, (PACA_EXMC + EX_CCR)(r13) 50 + stw r12, PACA_KVM_SCRATCH1(r13) 51 51 lbz r12, PACA_KVM_IN_GUEST(r13) 52 - cmpwi r12, 0 52 + cmpwi r12, KVM_GUEST_MODE_NONE 53 53 bne ..kvmppc_handler_hasmagic_\intno 54 54 /* No KVM guest? Then jump back to the Linux handler! */ 55 - lwz r12, (PACA_EXMC + EX_CCR)(r13) 55 + lwz r12, PACA_KVM_SCRATCH1(r13) 56 56 mtcr r12 57 - ld r12, (PACA_EXMC + EX_R12)(r13) 57 + ld r12, PACA_KVM_SCRATCH0(r13) 58 58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ 59 59 b kvmppc_resume_\intno /* Get back original handler */ 60 60 61 61 /* Now we know we're handling a KVM guest */ 62 62 ..kvmppc_handler_hasmagic_\intno: 63 - /* Unset guest state */ 64 - li r12, 0 65 - stb r12, PACA_KVM_IN_GUEST(r13) 66 63 67 - std r1, (PACA_EXMC+EX_R9)(r13) 68 - std r10, (PACA_EXMC+EX_R10)(r13) 69 - std r11, (PACA_EXMC+EX_R11)(r13) 70 - std r2, (PACA_EXMC+EX_R13)(r13) 71 - 72 - mfsrr0 r10 73 - mfsrr1 r11 74 - 75 - /* Restore R1/R2 so we can handle faults */ 76 - ld r1, PACAR1(r13) 77 - ld r2, (PACA_EXMC+EX_SRR0)(r13) 64 + /* Should we just skip the faulting instruction? */ 65 + cmpwi r12, KVM_GUEST_MODE_SKIP 66 + beq kvmppc_handler_skip_ins 78 67 79 68 /* Let's store which interrupt we're handling */ 80 69 li r12, \intno ··· 91 102 INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX 92 103 93 104 /* 105 + * Bring us back to the faulting code, but skip the 106 + * faulting instruction. 107 + * 108 + * This is a generic exit path from the interrupt 109 + * trampolines above. 110 + * 111 + * Input Registers: 112 + * 113 + * R12 = free 114 + * R13 = PACA 115 + * PACA.KVM.SCRATCH0 = guest R12 116 + * PACA.KVM.SCRATCH1 = guest CR 117 + * SPRG_SCRATCH0 = guest R13 118 + * 119 + */ 120 + kvmppc_handler_skip_ins: 121 + 122 + /* Patch the IP to the next instruction */ 123 + mfsrr0 r12 124 + addi r12, r12, 4 125 + mtsrr0 r12 126 + 127 + /* Clean up all state */ 128 + lwz r12, PACA_KVM_SCRATCH1(r13) 129 + mtcr r12 130 + ld r12, PACA_KVM_SCRATCH0(r13) 131 + mfspr r13, SPRN_SPRG_SCRATCH0 132 + 133 + /* And get back into the code */ 134 + RFI 135 + 136 + /* 94 137 * This trampoline brings us back to a real mode handler 95 138 * 96 139 * Input Registers: 97 140 * 98 - * R6 = SRR0 99 - * R7 = SRR1 141 + * R5 = SRR0 142 + * R6 = SRR1 100 143 * LR = real-mode IP 101 144 * 102 145 */ 103 146 .global kvmppc_handler_lowmem_trampoline 104 147 kvmppc_handler_lowmem_trampoline: 105 148 106 - mtsrr0 r6 107 - mtsrr1 r7 149 + mtsrr0 r5 150 + mtsrr1 r6 108 151 blr 109 152 kvmppc_handler_lowmem_trampoline_end: 153 + 154 + /* 155 + * Call a function in real mode 156 + * 157 + * Input Registers: 158 + * 159 + * R3 = function 160 + * R4 = MSR 161 + * R5 = CTR 162 + * 163 + */ 164 + _GLOBAL(kvmppc_rmcall) 165 + mtmsr r4 /* Disable relocation, so mtsrr 166 + doesn't get interrupted */ 167 + mtctr r5 168 + mtsrr0 r3 169 + mtsrr1 r4 170 + RFI 171 + 172 + /* 173 + * Activate current's external feature (FPU/Altivec/VSX) 174 + */ 175 + #define define_load_up(what) \ 176 + \ 177 + _GLOBAL(kvmppc_load_up_ ## what); \ 178 + subi r1, r1, INT_FRAME_SIZE; \ 179 + mflr r3; \ 180 + std r3, _LINK(r1); \ 181 + mfmsr r4; \ 182 + std r31, GPR3(r1); \ 183 + mr r31, r4; \ 184 + li r5, MSR_DR; \ 185 + oris r5, r5, MSR_EE@h; \ 186 + andc r4, r4, r5; \ 187 + mtmsr r4; \ 188 + \ 189 + bl .load_up_ ## what; \ 190 + \ 191 + mtmsr r31; \ 192 + ld r3, _LINK(r1); \ 193 + ld r31, GPR3(r1); \ 194 + addi r1, r1, INT_FRAME_SIZE; \ 195 + mtlr r3; \ 196 + blr 197 + 198 + define_load_up(fpu) 199 + #ifdef CONFIG_ALTIVEC 200 + define_load_up(altivec) 201 + #endif 202 + #ifdef CONFIG_VSX 203 + define_load_up(vsx) 204 + #endif 110 205 111 206 .global kvmppc_trampoline_lowmem 112 207 kvmppc_trampoline_lowmem:

+99 -59

arch/powerpc/kvm/book3s_64_slb.S

··· 31 31 #define REBOLT_SLB_ENTRY(num) \ 32 32 ld r10, SHADOW_SLB_ESID(num)(r11); \ 33 33 cmpdi r10, 0; \ 34 - beq slb_exit_skip_1; \ 34 + beq slb_exit_skip_ ## num; \ 35 35 oris r10, r10, SLB_ESID_V@h; \ 36 36 ld r9, SHADOW_SLB_VSID(num)(r11); \ 37 37 slbmte r9, r10; \ ··· 51 51 * 52 52 * MSR = ~IR|DR 53 53 * R13 = PACA 54 + * R1 = host R1 55 + * R2 = host R2 54 56 * R9 = guest IP 55 57 * R10 = guest MSR 56 - * R11 = free 57 - * R12 = free 58 - * PACA[PACA_EXMC + EX_R9] = guest R9 59 - * PACA[PACA_EXMC + EX_R10] = guest R10 60 - * PACA[PACA_EXMC + EX_R11] = guest R11 61 - * PACA[PACA_EXMC + EX_R12] = guest R12 62 - * PACA[PACA_EXMC + EX_R13] = guest R13 63 - * PACA[PACA_EXMC + EX_CCR] = guest CR 64 - * PACA[PACA_EXMC + EX_R3] = guest XER 58 + * all other GPRS = free 59 + * PACA[KVM_CR] = guest CR 60 + * PACA[KVM_XER] = guest XER 65 61 */ 66 62 67 63 mtsrr0 r9 68 64 mtsrr1 r10 69 65 70 - mtspr SPRN_SPRG_SCRATCH0, r0 66 + /* Activate guest mode, so faults get handled by KVM */ 67 + li r11, KVM_GUEST_MODE_GUEST 68 + stb r11, PACA_KVM_IN_GUEST(r13) 71 69 72 70 /* Remove LPAR shadow entries */ 73 71 ··· 129 131 130 132 /* Enter guest */ 131 133 132 - mfspr r0, SPRN_SPRG_SCRATCH0 134 + ld r0, (PACA_KVM_R0)(r13) 135 + ld r1, (PACA_KVM_R1)(r13) 136 + ld r2, (PACA_KVM_R2)(r13) 137 + ld r3, (PACA_KVM_R3)(r13) 138 + ld r4, (PACA_KVM_R4)(r13) 139 + ld r5, (PACA_KVM_R5)(r13) 140 + ld r6, (PACA_KVM_R6)(r13) 141 + ld r7, (PACA_KVM_R7)(r13) 142 + ld r8, (PACA_KVM_R8)(r13) 143 + ld r9, (PACA_KVM_R9)(r13) 144 + ld r10, (PACA_KVM_R10)(r13) 145 + ld r12, (PACA_KVM_R12)(r13) 133 146 134 - ld r9, (PACA_EXMC+EX_R9)(r13) 135 - ld r10, (PACA_EXMC+EX_R10)(r13) 136 - ld r12, (PACA_EXMC+EX_R12)(r13) 137 - 138 - lwz r11, (PACA_EXMC+EX_CCR)(r13) 147 + lwz r11, (PACA_KVM_CR)(r13) 139 148 mtcr r11 140 149 141 - ld r11, (PACA_EXMC+EX_R3)(r13) 150 + ld r11, (PACA_KVM_XER)(r13) 142 151 mtxer r11 143 152 144 - ld r11, (PACA_EXMC+EX_R11)(r13) 145 - ld r13, (PACA_EXMC+EX_R13)(r13) 153 + ld r11, (PACA_KVM_R11)(r13) 154 + ld r13, (PACA_KVM_R13)(r13) 146 155 147 156 RFI 148 157 kvmppc_handler_trampoline_enter_end: ··· 167 162 168 163 /* Register usage at this point: 169 164 * 170 - * SPRG_SCRATCH0 = guest R13 171 - * R01 = host R1 172 - * R02 = host R2 173 - * R10 = guest PC 174 - * R11 = guest MSR 175 - * R12 = exit handler id 176 - * R13 = PACA 177 - * PACA.exmc.CCR = guest CR 178 - * PACA.exmc.R9 = guest R1 179 - * PACA.exmc.R10 = guest R10 180 - * PACA.exmc.R11 = guest R11 181 - * PACA.exmc.R12 = guest R12 182 - * PACA.exmc.R13 = guest R2 165 + * SPRG_SCRATCH0 = guest R13 166 + * R12 = exit handler id 167 + * R13 = PACA 168 + * PACA.KVM.SCRATCH0 = guest R12 169 + * PACA.KVM.SCRATCH1 = guest CR 183 170 * 184 171 */ 185 172 186 173 /* Save registers */ 187 174 188 - std r0, (PACA_EXMC+EX_SRR0)(r13) 189 - std r9, (PACA_EXMC+EX_R3)(r13) 190 - std r10, (PACA_EXMC+EX_LR)(r13) 191 - std r11, (PACA_EXMC+EX_DAR)(r13) 175 + std r0, PACA_KVM_R0(r13) 176 + std r1, PACA_KVM_R1(r13) 177 + std r2, PACA_KVM_R2(r13) 178 + std r3, PACA_KVM_R3(r13) 179 + std r4, PACA_KVM_R4(r13) 180 + std r5, PACA_KVM_R5(r13) 181 + std r6, PACA_KVM_R6(r13) 182 + std r7, PACA_KVM_R7(r13) 183 + std r8, PACA_KVM_R8(r13) 184 + std r9, PACA_KVM_R9(r13) 185 + std r10, PACA_KVM_R10(r13) 186 + std r11, PACA_KVM_R11(r13) 187 + 188 + /* Restore R1/R2 so we can handle faults */ 189 + ld r1, PACA_KVM_HOST_R1(r13) 190 + ld r2, PACA_KVM_HOST_R2(r13) 191 + 192 + /* Save guest PC and MSR in GPRs */ 193 + mfsrr0 r3 194 + mfsrr1 r4 195 + 196 + /* Get scratch'ed off registers */ 197 + mfspr r9, SPRN_SPRG_SCRATCH0 198 + std r9, PACA_KVM_R13(r13) 199 + 200 + ld r8, PACA_KVM_SCRATCH0(r13) 201 + std r8, PACA_KVM_R12(r13) 202 + 203 + lwz r7, PACA_KVM_SCRATCH1(r13) 204 + stw r7, PACA_KVM_CR(r13) 205 + 206 + /* Save more register state */ 207 + 208 + mfxer r6 209 + stw r6, PACA_KVM_XER(r13) 210 + 211 + mfdar r5 212 + mfdsisr r6 192 213 193 214 /* 194 215 * In order for us to easily get the last instruction, ··· 233 202 234 203 ld_last_inst: 235 204 /* Save off the guest instruction we're at */ 205 + 206 + /* Set guest mode to 'jump over instruction' so if lwz faults 207 + * we'll just continue at the next IP. */ 208 + li r9, KVM_GUEST_MODE_SKIP 209 + stb r9, PACA_KVM_IN_GUEST(r13) 210 + 236 211 /* 1) enable paging for data */ 237 212 mfmsr r9 238 213 ori r11, r9, MSR_DR /* Enable paging for data */ 239 214 mtmsr r11 240 215 /* 2) fetch the instruction */ 241 - lwz r0, 0(r10) 216 + li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */ 217 + lwz r0, 0(r3) 242 218 /* 3) disable paging again */ 243 219 mtmsr r9 244 220 245 221 no_ld_last_inst: 222 + 223 + /* Unset guest mode */ 224 + li r9, KVM_GUEST_MODE_NONE 225 + stb r9, PACA_KVM_IN_GUEST(r13) 246 226 247 227 /* Restore bolted entries from the shadow and fix it along the way */ 248 228 ··· 275 233 276 234 slb_do_exit: 277 235 278 - /* Restore registers */ 279 - 280 - ld r11, (PACA_EXMC+EX_DAR)(r13) 281 - ld r10, (PACA_EXMC+EX_LR)(r13) 282 - ld r9, (PACA_EXMC+EX_R3)(r13) 283 - 284 - /* Save last inst */ 285 - stw r0, (PACA_EXMC+EX_LR)(r13) 286 - 287 - /* Save DAR and DSISR before going to paged mode */ 288 - mfdar r0 289 - std r0, (PACA_EXMC+EX_DAR)(r13) 290 - mfdsisr r0 291 - stw r0, (PACA_EXMC+EX_DSISR)(r13) 236 + /* Register usage at this point: 237 + * 238 + * R0 = guest last inst 239 + * R1 = host R1 240 + * R2 = host R2 241 + * R3 = guest PC 242 + * R4 = guest MSR 243 + * R5 = guest DAR 244 + * R6 = guest DSISR 245 + * R12 = exit handler id 246 + * R13 = PACA 247 + * PACA.KVM.* = guest * 248 + * 249 + */ 292 250 293 251 /* RFI into the highmem handler */ 294 - mfmsr r0 295 - ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ 296 - mtsrr1 r0 297 - ld r0, PACASAVEDMSR(r13) /* Highmem handler address */ 298 - mtsrr0 r0 299 - 300 - mfspr r0, SPRN_SPRG_SCRATCH0 252 + mfmsr r7 253 + ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ 254 + mtsrr1 r7 255 + ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */ 256 + mtsrr0 r8 301 257 302 258 RFI 303 259 kvmppc_handler_trampoline_exit_end:

+61 -26

arch/powerpc/kvm/booke.c

··· 69 69 70 70 for (i = 0; i < 32; i += 4) { 71 71 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, 72 - vcpu->arch.gpr[i], 73 - vcpu->arch.gpr[i+1], 74 - vcpu->arch.gpr[i+2], 75 - vcpu->arch.gpr[i+3]); 72 + kvmppc_get_gpr(vcpu, i), 73 + kvmppc_get_gpr(vcpu, i+1), 74 + kvmppc_get_gpr(vcpu, i+2), 75 + kvmppc_get_gpr(vcpu, i+3)); 76 76 } 77 77 } 78 78 ··· 82 82 set_bit(priority, &vcpu->arch.pending_exceptions); 83 83 } 84 84 85 - void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 85 + static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, 86 + ulong dear_flags, ulong esr_flags) 86 87 { 88 + vcpu->arch.queued_dear = dear_flags; 89 + vcpu->arch.queued_esr = esr_flags; 90 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 91 + } 92 + 93 + static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, 94 + ulong dear_flags, ulong esr_flags) 95 + { 96 + vcpu->arch.queued_dear = dear_flags; 97 + vcpu->arch.queued_esr = esr_flags; 98 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); 99 + } 100 + 101 + static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, 102 + ulong esr_flags) 103 + { 104 + vcpu->arch.queued_esr = esr_flags; 105 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); 106 + } 107 + 108 + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) 109 + { 110 + vcpu->arch.queued_esr = esr_flags; 87 111 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 88 112 } 89 113 ··· 119 95 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 120 96 { 121 97 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 98 + } 99 + 100 + void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 101 + { 102 + clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 122 103 } 123 104 124 105 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, ··· 138 109 { 139 110 int allowed = 0; 140 111 ulong msr_mask; 112 + bool update_esr = false, update_dear = false; 141 113 142 114 switch (priority) { 143 - case BOOKE_IRQPRIO_PROGRAM: 144 115 case BOOKE_IRQPRIO_DTLB_MISS: 116 + case BOOKE_IRQPRIO_DATA_STORAGE: 117 + update_dear = true; 118 + /* fall through */ 119 + case BOOKE_IRQPRIO_INST_STORAGE: 120 + case BOOKE_IRQPRIO_PROGRAM: 121 + update_esr = true; 122 + /* fall through */ 145 123 case BOOKE_IRQPRIO_ITLB_MISS: 146 124 case BOOKE_IRQPRIO_SYSCALL: 147 - case BOOKE_IRQPRIO_DATA_STORAGE: 148 - case BOOKE_IRQPRIO_INST_STORAGE: 149 125 case BOOKE_IRQPRIO_FP_UNAVAIL: 150 126 case BOOKE_IRQPRIO_SPE_UNAVAIL: 151 127 case BOOKE_IRQPRIO_SPE_FP_DATA: ··· 185 151 vcpu->arch.srr0 = vcpu->arch.pc; 186 152 vcpu->arch.srr1 = vcpu->arch.msr; 187 153 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 154 + if (update_esr == true) 155 + vcpu->arch.esr = vcpu->arch.queued_esr; 156 + if (update_dear == true) 157 + vcpu->arch.dear = vcpu->arch.queued_dear; 188 158 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); 189 159 190 160 clear_bit(priority, &vcpu->arch.pending_exceptions); ··· 261 223 if (vcpu->arch.msr & MSR_PR) { 262 224 /* Program traps generated by user-level software must be handled 263 225 * by the guest kernel. */ 264 - vcpu->arch.esr = vcpu->arch.fault_esr; 265 - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 226 + kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); 266 227 r = RESUME_GUEST; 267 228 kvmppc_account_exit(vcpu, USR_PR_INST); 268 229 break; ··· 317 280 break; 318 281 319 282 case BOOKE_INTERRUPT_DATA_STORAGE: 320 - vcpu->arch.dear = vcpu->arch.fault_dear; 321 - vcpu->arch.esr = vcpu->arch.fault_esr; 322 - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); 283 + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, 284 + vcpu->arch.fault_esr); 323 285 kvmppc_account_exit(vcpu, DSI_EXITS); 324 286 r = RESUME_GUEST; 325 287 break; 326 288 327 289 case BOOKE_INTERRUPT_INST_STORAGE: 328 - vcpu->arch.esr = vcpu->arch.fault_esr; 329 - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); 290 + kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr); 330 291 kvmppc_account_exit(vcpu, ISI_EXITS); 331 292 r = RESUME_GUEST; 332 293 break; ··· 345 310 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); 346 311 if (gtlb_index < 0) { 347 312 /* The guest didn't have a mapping for it. */ 348 - kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 349 - vcpu->arch.dear = vcpu->arch.fault_dear; 350 - vcpu->arch.esr = vcpu->arch.fault_esr; 313 + kvmppc_core_queue_dtlb_miss(vcpu, 314 + vcpu->arch.fault_dear, 315 + vcpu->arch.fault_esr); 351 316 kvmppc_mmu_dtlb_miss(vcpu); 352 317 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 353 318 r = RESUME_GUEST; ··· 461 426 { 462 427 vcpu->arch.pc = 0; 463 428 vcpu->arch.msr = 0; 464 - vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 429 + kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ 465 430 466 431 vcpu->arch.shadow_pid = 1; 467 432 ··· 479 444 int i; 480 445 481 446 regs->pc = vcpu->arch.pc; 482 - regs->cr = vcpu->arch.cr; 447 + regs->cr = kvmppc_get_cr(vcpu); 483 448 regs->ctr = vcpu->arch.ctr; 484 449 regs->lr = vcpu->arch.lr; 485 - regs->xer = vcpu->arch.xer; 450 + regs->xer = kvmppc_get_xer(vcpu); 486 451 regs->msr = vcpu->arch.msr; 487 452 regs->srr0 = vcpu->arch.srr0; 488 453 regs->srr1 = vcpu->arch.srr1; ··· 496 461 regs->sprg7 = vcpu->arch.sprg6; 497 462 498 463 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 499 - regs->gpr[i] = vcpu->arch.gpr[i]; 464 + regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 500 465 501 466 return 0; 502 467 } ··· 506 471 int i; 507 472 508 473 vcpu->arch.pc = regs->pc; 509 - vcpu->arch.cr = regs->cr; 474 + kvmppc_set_cr(vcpu, regs->cr); 510 475 vcpu->arch.ctr = regs->ctr; 511 476 vcpu->arch.lr = regs->lr; 512 - vcpu->arch.xer = regs->xer; 477 + kvmppc_set_xer(vcpu, regs->xer); 513 478 kvmppc_set_msr(vcpu, regs->msr); 514 479 vcpu->arch.srr0 = regs->srr0; 515 480 vcpu->arch.srr1 = regs->srr1; ··· 521 486 vcpu->arch.sprg6 = regs->sprg5; 522 487 vcpu->arch.sprg7 = regs->sprg6; 523 488 524 - for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 525 - vcpu->arch.gpr[i] = regs->gpr[i]; 489 + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 490 + kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 526 491 527 492 return 0; 528 493 }

+54 -53

arch/powerpc/kvm/booke_emulate.c

··· 62 62 63 63 case OP_31_XOP_MFMSR: 64 64 rt = get_rt(inst); 65 - vcpu->arch.gpr[rt] = vcpu->arch.msr; 65 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr); 66 66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 67 67 break; 68 68 69 69 case OP_31_XOP_MTMSR: 70 70 rs = get_rs(inst); 71 71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 72 - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); 72 + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); 73 73 break; 74 74 75 75 case OP_31_XOP_WRTEE: 76 76 rs = get_rs(inst); 77 77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 78 - | (vcpu->arch.gpr[rs] & MSR_EE); 78 + | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); 79 79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 80 80 break; 81 81 ··· 101 101 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 102 102 { 103 103 int emulated = EMULATE_DONE; 104 + ulong spr_val = kvmppc_get_gpr(vcpu, rs); 104 105 105 106 switch (sprn) { 106 107 case SPRN_DEAR: 107 - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; 108 + vcpu->arch.dear = spr_val; break; 108 109 case SPRN_ESR: 109 - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; 110 + vcpu->arch.esr = spr_val; break; 110 111 case SPRN_DBCR0: 111 - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; 112 + vcpu->arch.dbcr0 = spr_val; break; 112 113 case SPRN_DBCR1: 113 - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; 114 + vcpu->arch.dbcr1 = spr_val; break; 114 115 case SPRN_DBSR: 115 - vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; 116 + vcpu->arch.dbsr &= ~spr_val; break; 116 117 case SPRN_TSR: 117 - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; 118 + vcpu->arch.tsr &= ~spr_val; break; 118 119 case SPRN_TCR: 119 - vcpu->arch.tcr = vcpu->arch.gpr[rs]; 120 + vcpu->arch.tcr = spr_val; 120 121 kvmppc_emulate_dec(vcpu); 121 122 break; 122 123 ··· 125 124 * loaded into the real SPRGs when resuming the 126 125 * guest. */ 127 126 case SPRN_SPRG4: 128 - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; 127 + vcpu->arch.sprg4 = spr_val; break; 129 128 case SPRN_SPRG5: 130 - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; 129 + vcpu->arch.sprg5 = spr_val; break; 131 130 case SPRN_SPRG6: 132 - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; 131 + vcpu->arch.sprg6 = spr_val; break; 133 132 case SPRN_SPRG7: 134 - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; 133 + vcpu->arch.sprg7 = spr_val; break; 135 134 136 135 case SPRN_IVPR: 137 - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; 136 + vcpu->arch.ivpr = spr_val; 138 137 break; 139 138 case SPRN_IVOR0: 140 - vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; 139 + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; 141 140 break; 142 141 case SPRN_IVOR1: 143 - vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; 142 + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val; 144 143 break; 145 144 case SPRN_IVOR2: 146 - vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; 145 + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; 147 146 break; 148 147 case SPRN_IVOR3: 149 - vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; 148 + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; 150 149 break; 151 150 case SPRN_IVOR4: 152 - vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; 151 + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val; 153 152 break; 154 153 case SPRN_IVOR5: 155 - vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; 154 + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val; 156 155 break; 157 156 case SPRN_IVOR6: 158 - vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; 157 + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val; 159 158 break; 160 159 case SPRN_IVOR7: 161 - vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; 160 + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val; 162 161 break; 163 162 case SPRN_IVOR8: 164 - vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; 163 + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; 165 164 break; 166 165 case SPRN_IVOR9: 167 - vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; 166 + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; 168 167 break; 169 168 case SPRN_IVOR10: 170 - vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; 169 + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val; 171 170 break; 172 171 case SPRN_IVOR11: 173 - vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; 172 + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val; 174 173 break; 175 174 case SPRN_IVOR12: 176 - vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; 175 + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val; 177 176 break; 178 177 case SPRN_IVOR13: 179 - vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; 178 + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val; 180 179 break; 181 180 case SPRN_IVOR14: 182 - vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; 181 + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val; 183 182 break; 184 183 case SPRN_IVOR15: 185 - vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; 184 + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; 186 185 break; 187 186 188 187 default: ··· 198 197 199 198 switch (sprn) { 200 199 case SPRN_IVPR: 201 - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; 200 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; 202 201 case SPRN_DEAR: 203 - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; 202 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break; 204 203 case SPRN_ESR: 205 - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; 204 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; 206 205 case SPRN_DBCR0: 207 - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; 206 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; 208 207 case SPRN_DBCR1: 209 - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; 208 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; 210 209 case SPRN_DBSR: 211 - vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; 210 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; 212 211 213 212 case SPRN_IVOR0: 214 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 213 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); 215 214 break; 216 215 case SPRN_IVOR1: 217 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 216 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); 218 217 break; 219 218 case SPRN_IVOR2: 220 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 219 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); 221 220 break; 222 221 case SPRN_IVOR3: 223 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 222 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); 224 223 break; 225 224 case SPRN_IVOR4: 226 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 225 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); 227 226 break; 228 227 case SPRN_IVOR5: 229 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 228 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); 230 229 break; 231 230 case SPRN_IVOR6: 232 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 231 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); 233 232 break; 234 233 case SPRN_IVOR7: 235 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 234 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); 236 235 break; 237 236 case SPRN_IVOR8: 238 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 237 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); 239 238 break; 240 239 case SPRN_IVOR9: 241 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 240 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); 242 241 break; 243 242 case SPRN_IVOR10: 244 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 243 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); 245 244 break; 246 245 case SPRN_IVOR11: 247 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 246 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); 248 247 break; 249 248 case SPRN_IVOR12: 250 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 249 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); 251 250 break; 252 251 case SPRN_IVOR13: 253 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 252 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); 254 253 break; 255 254 case SPRN_IVOR14: 256 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 255 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); 257 256 break; 258 257 case SPRN_IVOR15: 259 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 258 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); 260 259 break; 261 260 262 261 default:

+6

arch/powerpc/kvm/e500.c

··· 60 60 61 61 kvmppc_e500_tlb_setup(vcpu_e500); 62 62 63 + /* Registers init */ 64 + vcpu->arch.pvr = mfspr(SPRN_PVR); 65 + 66 + /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ 67 + vcpu->vcpu_id = 0; 68 + 63 69 return 0; 64 70 } 65 71

+46 -47

arch/powerpc/kvm/e500_emulate.c

··· 74 74 { 75 75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 76 76 int emulated = EMULATE_DONE; 77 + ulong spr_val = kvmppc_get_gpr(vcpu, rs); 77 78 78 79 switch (sprn) { 79 80 case SPRN_PID: 80 81 vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 81 - vcpu->arch.pid = vcpu->arch.gpr[rs]; 82 + vcpu->arch.pid = spr_val; 82 83 break; 83 84 case SPRN_PID1: 84 - vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; 85 + vcpu_e500->pid[1] = spr_val; break; 85 86 case SPRN_PID2: 86 - vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; 87 + vcpu_e500->pid[2] = spr_val; break; 87 88 case SPRN_MAS0: 88 - vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; 89 + vcpu_e500->mas0 = spr_val; break; 89 90 case SPRN_MAS1: 90 - vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; 91 + vcpu_e500->mas1 = spr_val; break; 91 92 case SPRN_MAS2: 92 - vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; 93 + vcpu_e500->mas2 = spr_val; break; 93 94 case SPRN_MAS3: 94 - vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; 95 + vcpu_e500->mas3 = spr_val; break; 95 96 case SPRN_MAS4: 96 - vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; 97 + vcpu_e500->mas4 = spr_val; break; 97 98 case SPRN_MAS6: 98 - vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; 99 + vcpu_e500->mas6 = spr_val; break; 99 100 case SPRN_MAS7: 100 - vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; 101 + vcpu_e500->mas7 = spr_val; break; 102 + case SPRN_L1CSR0: 103 + vcpu_e500->l1csr0 = spr_val; 104 + vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); 105 + break; 101 106 case SPRN_L1CSR1: 102 - vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; 107 + vcpu_e500->l1csr1 = spr_val; break; 103 108 case SPRN_HID0: 104 - vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; 109 + vcpu_e500->hid0 = spr_val; break; 105 110 case SPRN_HID1: 106 - vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; 111 + vcpu_e500->hid1 = spr_val; break; 107 112 108 113 case SPRN_MMUCSR0: 109 114 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, 110 - vcpu->arch.gpr[rs]); 115 + spr_val); 111 116 break; 112 117 113 118 /* extra exceptions */ 114 119 case SPRN_IVOR32: 115 - vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; 120 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; 116 121 break; 117 122 case SPRN_IVOR33: 118 - vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; 123 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val; 119 124 break; 120 125 case SPRN_IVOR34: 121 - vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; 126 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; 122 127 break; 123 128 case SPRN_IVOR35: 124 - vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; 129 + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; 125 130 break; 126 131 127 132 default: ··· 143 138 144 139 switch (sprn) { 145 140 case SPRN_PID: 146 - vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; 141 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; 147 142 case SPRN_PID1: 148 - vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; 143 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; 149 144 case SPRN_PID2: 150 - vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; 145 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; 151 146 case SPRN_MAS0: 152 - vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; 147 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break; 153 148 case SPRN_MAS1: 154 - vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; 149 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break; 155 150 case SPRN_MAS2: 156 - vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; 151 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; 157 152 case SPRN_MAS3: 158 - vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; 153 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; 159 154 case SPRN_MAS4: 160 - vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; 155 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; 161 156 case SPRN_MAS6: 162 - vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; 157 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; 163 158 case SPRN_MAS7: 164 - vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; 159 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; 165 160 166 161 case SPRN_TLB0CFG: 167 - vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); 168 - vcpu->arch.gpr[rt] &= ~0xfffUL; 169 - vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0]; 170 - break; 171 - 162 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; 172 163 case SPRN_TLB1CFG: 173 - vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); 174 - vcpu->arch.gpr[rt] &= ~0xfffUL; 175 - vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; 176 - break; 177 - 164 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; 165 + case SPRN_L1CSR0: 166 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; 178 167 case SPRN_L1CSR1: 179 - vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; 168 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; 180 169 case SPRN_HID0: 181 - vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; 170 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; 182 171 case SPRN_HID1: 183 - vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; 172 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; 184 173 185 174 case SPRN_MMUCSR0: 186 - vcpu->arch.gpr[rt] = 0; break; 175 + kvmppc_set_gpr(vcpu, rt, 0); break; 187 176 188 177 case SPRN_MMUCFG: 189 - vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break; 178 + kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; 190 179 191 180 /* extra exceptions */ 192 181 case SPRN_IVOR32: 193 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 182 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); 194 183 break; 195 184 case SPRN_IVOR33: 196 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 185 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); 197 186 break; 198 187 case SPRN_IVOR34: 199 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 188 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); 200 189 break; 201 190 case SPRN_IVOR35: 202 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 191 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); 203 192 break; 204 193 default: 205 194 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);

+8 -2

arch/powerpc/kvm/e500_tlb.c

··· 417 417 int esel, tlbsel; 418 418 gva_t ea; 419 419 420 - ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; 420 + ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb); 421 421 422 422 ia = (ea >> 2) & 0x1; 423 423 ··· 470 470 struct tlbe *gtlbe = NULL; 471 471 gva_t ea; 472 472 473 - ea = vcpu->arch.gpr[rb]; 473 + ea = kvmppc_get_gpr(vcpu, rb); 474 474 475 475 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 476 476 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); ··· 727 727 kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL); 728 728 if (vcpu_e500->shadow_pages[1] == NULL) 729 729 goto err_out_page0; 730 + 731 + /* Init TLB configuration register */ 732 + vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; 733 + vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0]; 734 + vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; 735 + vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1]; 730 736 731 737 return 0; 732 738

+67 -51

arch/powerpc/kvm/emulate.c

··· 83 83 84 84 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 85 85 #ifdef CONFIG_PPC64 86 + /* mtdec lowers the interrupt line when positive. */ 87 + kvmppc_core_dequeue_dec(vcpu); 88 + 86 89 /* POWER4+ triggers a dec interrupt if the value is < 0 */ 87 90 if (vcpu->arch.dec & 0x80000000) { 88 91 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); ··· 143 140 144 141 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); 145 142 143 + /* Try again next time */ 144 + if (inst == KVM_INST_FETCH_FAILED) 145 + return EMULATE_DONE; 146 + 146 147 switch (get_op(inst)) { 147 148 case OP_TRAP: 148 149 #ifdef CONFIG_PPC64 149 150 case OP_TRAP_64: 151 + kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); 150 152 #else 151 - vcpu->arch.esr |= ESR_PTR; 153 + kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR); 152 154 #endif 153 - kvmppc_core_queue_program(vcpu); 154 155 advance = 0; 155 156 break; 156 157 ··· 174 167 case OP_31_XOP_STWX: 175 168 rs = get_rs(inst); 176 169 emulated = kvmppc_handle_store(run, vcpu, 177 - vcpu->arch.gpr[rs], 170 + kvmppc_get_gpr(vcpu, rs), 178 171 4, 1); 179 172 break; 180 173 181 174 case OP_31_XOP_STBX: 182 175 rs = get_rs(inst); 183 176 emulated = kvmppc_handle_store(run, vcpu, 184 - vcpu->arch.gpr[rs], 177 + kvmppc_get_gpr(vcpu, rs), 185 178 1, 1); 186 179 break; 187 180 ··· 190 183 ra = get_ra(inst); 191 184 rb = get_rb(inst); 192 185 193 - ea = vcpu->arch.gpr[rb]; 186 + ea = kvmppc_get_gpr(vcpu, rb); 194 187 if (ra) 195 - ea += vcpu->arch.gpr[ra]; 188 + ea += kvmppc_get_gpr(vcpu, ra); 196 189 197 190 emulated = kvmppc_handle_store(run, vcpu, 198 - vcpu->arch.gpr[rs], 191 + kvmppc_get_gpr(vcpu, rs), 199 192 1, 1); 200 - vcpu->arch.gpr[rs] = ea; 193 + kvmppc_set_gpr(vcpu, rs, ea); 201 194 break; 202 195 203 196 case OP_31_XOP_LHZX: ··· 210 203 ra = get_ra(inst); 211 204 rb = get_rb(inst); 212 205 213 - ea = vcpu->arch.gpr[rb]; 206 + ea = kvmppc_get_gpr(vcpu, rb); 214 207 if (ra) 215 - ea += vcpu->arch.gpr[ra]; 208 + ea += kvmppc_get_gpr(vcpu, ra); 216 209 217 210 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 218 - vcpu->arch.gpr[ra] = ea; 211 + kvmppc_set_gpr(vcpu, ra, ea); 219 212 break; 220 213 221 214 case OP_31_XOP_MFSPR: ··· 224 217 225 218 switch (sprn) { 226 219 case SPRN_SRR0: 227 - vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 220 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break; 228 221 case SPRN_SRR1: 229 - vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 222 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break; 230 223 case SPRN_PVR: 231 - vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 224 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; 232 225 case SPRN_PIR: 233 - vcpu->arch.gpr[rt] = vcpu->vcpu_id; break; 226 + kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; 234 227 case SPRN_MSSSR0: 235 - vcpu->arch.gpr[rt] = 0; break; 228 + kvmppc_set_gpr(vcpu, rt, 0); break; 236 229 237 230 /* Note: mftb and TBRL/TBWL are user-accessible, so 238 231 * the guest can always access the real TB anyways. 239 232 * In fact, we probably will never see these traps. */ 240 233 case SPRN_TBWL: 241 - vcpu->arch.gpr[rt] = get_tb() >> 32; break; 234 + kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; 242 235 case SPRN_TBWU: 243 - vcpu->arch.gpr[rt] = get_tb(); break; 236 + kvmppc_set_gpr(vcpu, rt, get_tb()); break; 244 237 245 238 case SPRN_SPRG0: 246 - vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; 239 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break; 247 240 case SPRN_SPRG1: 248 - vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; 241 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break; 249 242 case SPRN_SPRG2: 250 - vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; 243 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break; 251 244 case SPRN_SPRG3: 252 - vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; 245 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break; 253 246 /* Note: SPRG4-7 are user-readable, so we don't get 254 247 * a trap. */ 255 248 256 249 case SPRN_DEC: 257 250 { 258 251 u64 jd = get_tb() - vcpu->arch.dec_jiffies; 259 - vcpu->arch.gpr[rt] = vcpu->arch.dec - jd; 260 - pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]); 252 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); 253 + pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", 254 + vcpu->arch.dec, jd, 255 + kvmppc_get_gpr(vcpu, rt)); 261 256 break; 262 257 } 263 258 default: 264 259 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); 265 260 if (emulated == EMULATE_FAIL) { 266 261 printk("mfspr: unknown spr %x\n", sprn); 267 - vcpu->arch.gpr[rt] = 0; 262 + kvmppc_set_gpr(vcpu, rt, 0); 268 263 } 269 264 break; 270 265 } ··· 278 269 rb = get_rb(inst); 279 270 280 271 emulated = kvmppc_handle_store(run, vcpu, 281 - vcpu->arch.gpr[rs], 272 + kvmppc_get_gpr(vcpu, rs), 282 273 2, 1); 283 274 break; 284 275 ··· 287 278 ra = get_ra(inst); 288 279 rb = get_rb(inst); 289 280 290 - ea = vcpu->arch.gpr[rb]; 281 + ea = kvmppc_get_gpr(vcpu, rb); 291 282 if (ra) 292 - ea += vcpu->arch.gpr[ra]; 283 + ea += kvmppc_get_gpr(vcpu, ra); 293 284 294 285 emulated = kvmppc_handle_store(run, vcpu, 295 - vcpu->arch.gpr[rs], 286 + kvmppc_get_gpr(vcpu, rs), 296 287 2, 1); 297 - vcpu->arch.gpr[ra] = ea; 288 + kvmppc_set_gpr(vcpu, ra, ea); 298 289 break; 299 290 300 291 case OP_31_XOP_MTSPR: ··· 302 293 rs = get_rs(inst); 303 294 switch (sprn) { 304 295 case SPRN_SRR0: 305 - vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 296 + vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break; 306 297 case SPRN_SRR1: 307 - vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 298 + vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break; 308 299 309 300 /* XXX We need to context-switch the timebase for 310 301 * watchdog and FIT. */ ··· 314 305 case SPRN_MSSSR0: break; 315 306 316 307 case SPRN_DEC: 317 - vcpu->arch.dec = vcpu->arch.gpr[rs]; 308 + vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); 318 309 kvmppc_emulate_dec(vcpu); 319 310 break; 320 311 321 312 case SPRN_SPRG0: 322 - vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 313 + vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break; 323 314 case SPRN_SPRG1: 324 - vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; 315 + vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break; 325 316 case SPRN_SPRG2: 326 - vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; 317 + vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break; 327 318 case SPRN_SPRG3: 328 - vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 319 + vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break; 329 320 330 321 default: 331 322 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); ··· 357 348 rb = get_rb(inst); 358 349 359 350 emulated = kvmppc_handle_store(run, vcpu, 360 - vcpu->arch.gpr[rs], 351 + kvmppc_get_gpr(vcpu, rs), 361 352 4, 0); 362 353 break; 363 354 ··· 372 363 rb = get_rb(inst); 373 364 374 365 emulated = kvmppc_handle_store(run, vcpu, 375 - vcpu->arch.gpr[rs], 366 + kvmppc_get_gpr(vcpu, rs), 376 367 2, 0); 377 368 break; 378 369 ··· 391 382 ra = get_ra(inst); 392 383 rt = get_rt(inst); 393 384 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 394 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 385 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 395 386 break; 396 387 397 388 case OP_LBZ: ··· 403 394 ra = get_ra(inst); 404 395 rt = get_rt(inst); 405 396 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 406 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 397 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 407 398 break; 408 399 409 400 case OP_STW: 410 401 rs = get_rs(inst); 411 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 402 + emulated = kvmppc_handle_store(run, vcpu, 403 + kvmppc_get_gpr(vcpu, rs), 412 404 4, 1); 413 405 break; 414 406 415 407 case OP_STWU: 416 408 ra = get_ra(inst); 417 409 rs = get_rs(inst); 418 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 410 + emulated = kvmppc_handle_store(run, vcpu, 411 + kvmppc_get_gpr(vcpu, rs), 419 412 4, 1); 420 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 413 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 421 414 break; 422 415 423 416 case OP_STB: 424 417 rs = get_rs(inst); 425 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 418 + emulated = kvmppc_handle_store(run, vcpu, 419 + kvmppc_get_gpr(vcpu, rs), 426 420 1, 1); 427 421 break; 428 422 429 423 case OP_STBU: 430 424 ra = get_ra(inst); 431 425 rs = get_rs(inst); 432 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 426 + emulated = kvmppc_handle_store(run, vcpu, 427 + kvmppc_get_gpr(vcpu, rs), 433 428 1, 1); 434 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 429 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 435 430 break; 436 431 437 432 case OP_LHZ: ··· 447 434 ra = get_ra(inst); 448 435 rt = get_rt(inst); 449 436 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 450 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 437 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 451 438 break; 452 439 453 440 case OP_STH: 454 441 rs = get_rs(inst); 455 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 442 + emulated = kvmppc_handle_store(run, vcpu, 443 + kvmppc_get_gpr(vcpu, rs), 456 444 2, 1); 457 445 break; 458 446 459 447 case OP_STHU: 460 448 ra = get_ra(inst); 461 449 rs = get_rs(inst); 462 - emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 450 + emulated = kvmppc_handle_store(run, vcpu, 451 + kvmppc_get_gpr(vcpu, rs), 463 452 2, 1); 464 - vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 453 + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 465 454 break; 466 455 467 456 default: ··· 476 461 advance = 0; 477 462 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 478 463 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 464 + kvmppc_core_queue_program(vcpu, 0); 479 465 } 480 466 } 481 467

+26 -14

arch/powerpc/kvm/powerpc.c

··· 137 137 { 138 138 kvmppc_free_vcpus(kvm); 139 139 kvm_free_physmem(kvm); 140 + cleanup_srcu_struct(&kvm->srcu); 140 141 kfree(kvm); 141 142 } 142 143 ··· 166 165 return -EINVAL; 167 166 } 168 167 169 - int kvm_arch_set_memory_region(struct kvm *kvm, 170 - struct kvm_userspace_memory_region *mem, 171 - struct kvm_memory_slot old, 172 - int user_alloc) 168 + int kvm_arch_prepare_memory_region(struct kvm *kvm, 169 + struct kvm_memory_slot *memslot, 170 + struct kvm_memory_slot old, 171 + struct kvm_userspace_memory_region *mem, 172 + int user_alloc) 173 173 { 174 174 return 0; 175 175 } 176 + 177 + void kvm_arch_commit_memory_region(struct kvm *kvm, 178 + struct kvm_userspace_memory_region *mem, 179 + struct kvm_memory_slot old, 180 + int user_alloc) 181 + { 182 + return; 183 + } 184 + 176 185 177 186 void kvm_arch_flush_shadow(struct kvm *kvm) 178 187 { ··· 271 260 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 272 261 struct kvm_run *run) 273 262 { 274 - ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 275 - *gpr = run->dcr.data; 263 + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); 276 264 } 277 265 278 266 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 279 267 struct kvm_run *run) 280 268 { 281 - ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 269 + ulong gpr; 282 270 283 - if (run->mmio.len > sizeof(*gpr)) { 271 + if (run->mmio.len > sizeof(gpr)) { 284 272 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 285 273 return; 286 274 } 287 275 288 276 if (vcpu->arch.mmio_is_bigendian) { 289 277 switch (run->mmio.len) { 290 - case 4: *gpr = *(u32 *)run->mmio.data; break; 291 - case 2: *gpr = *(u16 *)run->mmio.data; break; 292 - case 1: *gpr = *(u8 *)run->mmio.data; break; 278 + case 4: gpr = *(u32 *)run->mmio.data; break; 279 + case 2: gpr = *(u16 *)run->mmio.data; break; 280 + case 1: gpr = *(u8 *)run->mmio.data; break; 293 281 } 294 282 } else { 295 283 /* Convert BE data from userland back to LE. */ 296 284 switch (run->mmio.len) { 297 - case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; 298 - case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; 299 - case 1: *gpr = *(u8 *)run->mmio.data; break; 285 + case 4: gpr = ld_le32((u32 *)run->mmio.data); break; 286 + case 2: gpr = ld_le16((u16 *)run->mmio.data); break; 287 + case 1: gpr = *(u8 *)run->mmio.data; break; 300 288 } 301 289 } 290 + 291 + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 302 292 } 303 293 304 294 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,

+17 -9

arch/s390/kvm/kvm-s390.c

··· 242 242 kvm_free_physmem(kvm); 243 243 free_page((unsigned long)(kvm->arch.sca)); 244 244 debug_unregister(kvm->arch.dbf); 245 + cleanup_srcu_struct(&kvm->srcu); 245 246 kfree(kvm); 246 247 } 247 248 ··· 691 690 } 692 691 693 692 /* Section: memory related */ 694 - int kvm_arch_set_memory_region(struct kvm *kvm, 695 - struct kvm_userspace_memory_region *mem, 696 - struct kvm_memory_slot old, 697 - int user_alloc) 693 + int kvm_arch_prepare_memory_region(struct kvm *kvm, 694 + struct kvm_memory_slot *memslot, 695 + struct kvm_memory_slot old, 696 + struct kvm_userspace_memory_region *mem, 697 + int user_alloc) 698 698 { 699 - int i; 700 - struct kvm_vcpu *vcpu; 701 - 702 699 /* A few sanity checks. We can have exactly one memory slot which has 703 700 to start at guest virtual zero and which has to be located at a 704 701 page boundary in userland and which has to end at a page boundary. ··· 719 720 if (!user_alloc) 720 721 return -EINVAL; 721 722 723 + return 0; 724 + } 725 + 726 + void kvm_arch_commit_memory_region(struct kvm *kvm, 727 + struct kvm_userspace_memory_region *mem, 728 + struct kvm_memory_slot old, 729 + int user_alloc) 730 + { 731 + int i; 732 + struct kvm_vcpu *vcpu; 733 + 722 734 /* request update of sie control block for all available vcpus */ 723 735 kvm_for_each_vcpu(i, vcpu, kvm) { 724 736 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 725 737 continue; 726 738 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); 727 739 } 728 - 729 - return 0; 730 740 } 731 741 732 742 void kvm_arch_flush_shadow(struct kvm *kvm)

+7 -3

arch/s390/kvm/kvm-s390.h

··· 67 67 68 68 static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) 69 69 { 70 + int idx; 70 71 struct kvm_memory_slot *mem; 72 + struct kvm_memslots *memslots; 71 73 72 - down_read(&vcpu->kvm->slots_lock); 73 - mem = &vcpu->kvm->memslots[0]; 74 + idx = srcu_read_lock(&vcpu->kvm->srcu); 75 + memslots = rcu_dereference(vcpu->kvm->memslots); 76 + 77 + mem = &memslots->memslots[0]; 74 78 75 79 vcpu->arch.sie_block->gmsor = mem->userspace_addr; 76 80 vcpu->arch.sie_block->gmslm = ··· 82 78 (mem->npages << PAGE_SHIFT) + 83 79 VIRTIODESCSPACE - 1ul; 84 80 85 - up_read(&vcpu->kvm->slots_lock); 81 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 86 82 } 87 83 88 84 /* implemented in priv.c */

+1

arch/x86/include/asm/Kbuild

··· 11 11 header-y += ucontext.h 12 12 header-y += processor-flags.h 13 13 header-y += hw_breakpoint.h 14 + header-y += hyperv.h 14 15 15 16 unifdef-y += e820.h 16 17 unifdef-y += ist.h

+186

arch/x86/include/asm/hyperv.h

··· 1 + #ifndef _ASM_X86_KVM_HYPERV_H 2 + #define _ASM_X86_KVM_HYPERV_H 3 + 4 + #include <linux/types.h> 5 + 6 + /* 7 + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent 8 + * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). 9 + */ 10 + #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 11 + #define HYPERV_CPUID_INTERFACE 0x40000001 12 + #define HYPERV_CPUID_VERSION 0x40000002 13 + #define HYPERV_CPUID_FEATURES 0x40000003 14 + #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 15 + #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 16 + 17 + /* 18 + * Feature identification. EAX indicates which features are available 19 + * to the partition based upon the current partition privileges. 20 + */ 21 + 22 + /* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ 23 + #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) 24 + /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ 25 + #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) 26 + /* 27 + * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM 28 + * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available 29 + */ 30 + #define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) 31 + /* 32 + * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through 33 + * HV_X64_MSR_STIMER3_COUNT) available 34 + */ 35 + #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) 36 + /* 37 + * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) 38 + * are available 39 + */ 40 + #define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4) 41 + /* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ 42 + #define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5) 43 + /* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ 44 + #define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6) 45 + /* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ 46 + #define HV_X64_MSR_RESET_AVAILABLE (1 << 7) 47 + /* 48 + * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, 49 + * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, 50 + * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available 51 + */ 52 + #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) 53 + 54 + /* 55 + * Feature identification: EBX indicates which flags were specified at 56 + * partition creation. The format is the same as the partition creation 57 + * flag structure defined in section Partition Creation Flags. 58 + */ 59 + #define HV_X64_CREATE_PARTITIONS (1 << 0) 60 + #define HV_X64_ACCESS_PARTITION_ID (1 << 1) 61 + #define HV_X64_ACCESS_MEMORY_POOL (1 << 2) 62 + #define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3) 63 + #define HV_X64_POST_MESSAGES (1 << 4) 64 + #define HV_X64_SIGNAL_EVENTS (1 << 5) 65 + #define HV_X64_CREATE_PORT (1 << 6) 66 + #define HV_X64_CONNECT_PORT (1 << 7) 67 + #define HV_X64_ACCESS_STATS (1 << 8) 68 + #define HV_X64_DEBUGGING (1 << 11) 69 + #define HV_X64_CPU_POWER_MANAGEMENT (1 << 12) 70 + #define HV_X64_CONFIGURE_PROFILER (1 << 13) 71 + 72 + /* 73 + * Feature identification. EDX indicates which miscellaneous features 74 + * are available to the partition. 75 + */ 76 + /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ 77 + #define HV_X64_MWAIT_AVAILABLE (1 << 0) 78 + /* Guest debugging support is available */ 79 + #define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1) 80 + /* Performance Monitor support is available*/ 81 + #define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2) 82 + /* Support for physical CPU dynamic partitioning events is available*/ 83 + #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3) 84 + /* 85 + * Support for passing hypercall input parameter block via XMM 86 + * registers is available 87 + */ 88 + #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) 89 + /* Support for a virtual guest idle state is available */ 90 + #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) 91 + 92 + /* 93 + * Implementation recommendations. Indicates which behaviors the hypervisor 94 + * recommends the OS implement for optimal performance. 95 + */ 96 + /* 97 + * Recommend using hypercall for address space switches rather 98 + * than MOV to CR3 instruction 99 + */ 100 + #define HV_X64_MWAIT_RECOMMENDED (1 << 0) 101 + /* Recommend using hypercall for local TLB flushes rather 102 + * than INVLPG or MOV to CR3 instructions */ 103 + #define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) 104 + /* 105 + * Recommend using hypercall for remote TLB flushes rather 106 + * than inter-processor interrupts 107 + */ 108 + #define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2) 109 + /* 110 + * Recommend using MSRs for accessing APIC registers 111 + * EOI, ICR and TPR rather than their memory-mapped counterparts 112 + */ 113 + #define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3) 114 + /* Recommend using the hypervisor-provided MSR to initiate a system RESET */ 115 + #define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4) 116 + /* 117 + * Recommend using relaxed timing for this partition. If used, 118 + * the VM should disable any watchdog timeouts that rely on the 119 + * timely delivery of external interrupts 120 + */ 121 + #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) 122 + 123 + /* MSR used to identify the guest OS. */ 124 + #define HV_X64_MSR_GUEST_OS_ID 0x40000000 125 + 126 + /* MSR used to setup pages used to communicate with the hypervisor. */ 127 + #define HV_X64_MSR_HYPERCALL 0x40000001 128 + 129 + /* MSR used to provide vcpu index */ 130 + #define HV_X64_MSR_VP_INDEX 0x40000002 131 + 132 + /* Define the virtual APIC registers */ 133 + #define HV_X64_MSR_EOI 0x40000070 134 + #define HV_X64_MSR_ICR 0x40000071 135 + #define HV_X64_MSR_TPR 0x40000072 136 + #define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 137 + 138 + /* Define synthetic interrupt controller model specific registers. */ 139 + #define HV_X64_MSR_SCONTROL 0x40000080 140 + #define HV_X64_MSR_SVERSION 0x40000081 141 + #define HV_X64_MSR_SIEFP 0x40000082 142 + #define HV_X64_MSR_SIMP 0x40000083 143 + #define HV_X64_MSR_EOM 0x40000084 144 + #define HV_X64_MSR_SINT0 0x40000090 145 + #define HV_X64_MSR_SINT1 0x40000091 146 + #define HV_X64_MSR_SINT2 0x40000092 147 + #define HV_X64_MSR_SINT3 0x40000093 148 + #define HV_X64_MSR_SINT4 0x40000094 149 + #define HV_X64_MSR_SINT5 0x40000095 150 + #define HV_X64_MSR_SINT6 0x40000096 151 + #define HV_X64_MSR_SINT7 0x40000097 152 + #define HV_X64_MSR_SINT8 0x40000098 153 + #define HV_X64_MSR_SINT9 0x40000099 154 + #define HV_X64_MSR_SINT10 0x4000009A 155 + #define HV_X64_MSR_SINT11 0x4000009B 156 + #define HV_X64_MSR_SINT12 0x4000009C 157 + #define HV_X64_MSR_SINT13 0x4000009D 158 + #define HV_X64_MSR_SINT14 0x4000009E 159 + #define HV_X64_MSR_SINT15 0x4000009F 160 + 161 + 162 + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 163 + #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 164 + #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ 165 + (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) 166 + 167 + /* Declare the various hypercall operations. */ 168 + #define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 169 + 170 + #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 171 + #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 172 + #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ 173 + (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) 174 + 175 + #define HV_PROCESSOR_POWER_STATE_C0 0 176 + #define HV_PROCESSOR_POWER_STATE_C1 1 177 + #define HV_PROCESSOR_POWER_STATE_C2 2 178 + #define HV_PROCESSOR_POWER_STATE_C3 3 179 + 180 + /* hypercall status code */ 181 + #define HV_STATUS_SUCCESS 0 182 + #define HV_STATUS_INVALID_HYPERCALL_CODE 2 183 + #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 184 + #define HV_STATUS_INVALID_ALIGNMENT 4 185 + 186 + #endif

+14 -3

arch/x86/include/asm/kvm_emulate.h

··· 54 54 struct x86_emulate_ops { 55 55 /* 56 56 * read_std: Read bytes of standard (non-emulated/special) memory. 57 - * Used for instruction fetch, stack operations, and others. 57 + * Used for descriptor reading. 58 58 * @addr: [IN ] Linear address from which to read. 59 59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 60 60 * @bytes: [IN ] Number of bytes to read from memory. 61 61 */ 62 62 int (*read_std)(unsigned long addr, void *val, 63 - unsigned int bytes, struct kvm_vcpu *vcpu); 63 + unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); 64 + 65 + /* 66 + * fetch: Read bytes of standard (non-emulated/special) memory. 67 + * Used for instruction fetch. 68 + * @addr: [IN ] Linear address from which to read. 69 + * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 70 + * @bytes: [IN ] Number of bytes to read from memory. 71 + */ 72 + int (*fetch)(unsigned long addr, void *val, 73 + unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); 64 74 65 75 /* 66 76 * read_emulated: Read bytes from emulated/special memory area. ··· 84 74 struct kvm_vcpu *vcpu); 85 75 86 76 /* 87 - * write_emulated: Read bytes from emulated/special memory area. 77 + * write_emulated: Write bytes to emulated/special memory area. 88 78 * @addr: [IN ] Linear address to which to write. 89 79 * @val: [IN ] Value to write to memory (low-order bytes used as 90 80 * required). ··· 178 168 179 169 /* Execution mode, passed to the emulator. */ 180 170 #define X86EMUL_MODE_REAL 0 /* Real mode. */ 171 + #define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ 181 172 #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ 182 173 #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ 183 174 #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */

+36 -24

arch/x86/include/asm/kvm_host.h

··· 25 25 #include <asm/mtrr.h> 26 26 #include <asm/msr-index.h> 27 27 28 - #define KVM_MAX_VCPUS 16 28 + #define KVM_MAX_VCPUS 64 29 29 #define KVM_MEMORY_SLOTS 32 30 30 /* memory slots that does not exposed to userspace */ 31 31 #define KVM_PRIVATE_MEM_SLOTS 4 ··· 37 37 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) 38 38 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 39 39 0xFFFFFF0000000000ULL) 40 - 41 - #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 42 - (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 43 - #define KVM_GUEST_CR0_MASK \ 44 - (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 45 - #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ 46 - (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP) 47 - #define KVM_VM_CR0_ALWAYS_ON \ 48 - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 49 - #define KVM_GUEST_CR4_MASK \ 50 - (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) 51 - #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 52 - #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 53 40 54 41 #define INVALID_PAGE (~(hpa_t)0) 55 42 #define UNMAPPED_GVA (~(gpa_t)0) ··· 243 256 void (*new_cr3)(struct kvm_vcpu *vcpu); 244 257 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); 245 258 void (*free)(struct kvm_vcpu *vcpu); 246 - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); 259 + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, 260 + u32 *error); 247 261 void (*prefetch_page)(struct kvm_vcpu *vcpu, 248 262 struct kvm_mmu_page *page); 249 263 int (*sync_page)(struct kvm_vcpu *vcpu, ··· 270 282 u32 regs_dirty; 271 283 272 284 unsigned long cr0; 285 + unsigned long cr0_guest_owned_bits; 273 286 unsigned long cr2; 274 287 unsigned long cr3; 275 288 unsigned long cr4; 289 + unsigned long cr4_guest_owned_bits; 276 290 unsigned long cr8; 277 291 u32 hflags; 278 292 u64 pdptrs[4]; /* pae */ 279 - u64 shadow_efer; 293 + u64 efer; 280 294 u64 apic_base; 281 295 struct kvm_lapic *apic; /* kernel irqchip context */ 282 296 int32_t apic_arb_prio; ··· 364 374 /* used for guest single stepping over the given code position */ 365 375 u16 singlestep_cs; 366 376 unsigned long singlestep_rip; 377 + /* fields used by HYPER-V emulation */ 378 + u64 hv_vapic; 367 379 }; 368 380 369 381 struct kvm_mem_alias { 370 382 gfn_t base_gfn; 371 383 unsigned long npages; 372 384 gfn_t target_gfn; 385 + #define KVM_ALIAS_INVALID 1UL 386 + unsigned long flags; 373 387 }; 374 388 375 - struct kvm_arch{ 376 - int naliases; 389 + #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION 390 + 391 + struct kvm_mem_aliases { 377 392 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 393 + int naliases; 394 + }; 395 + 396 + struct kvm_arch { 397 + struct kvm_mem_aliases *aliases; 378 398 379 399 unsigned int n_free_mmu_pages; 380 400 unsigned int n_requested_mmu_pages; ··· 416 416 s64 kvmclock_offset; 417 417 418 418 struct kvm_xen_hvm_config xen_hvm_config; 419 + 420 + /* fields used by HYPER-V emulation */ 421 + u64 hv_guest_os_id; 422 + u64 hv_hypercall; 419 423 }; 420 424 421 425 struct kvm_vm_stat { ··· 475 471 int (*hardware_setup)(void); /* __init */ 476 472 void (*hardware_unsetup)(void); /* __exit */ 477 473 bool (*cpu_has_accelerated_tpr)(void); 474 + void (*cpuid_update)(struct kvm_vcpu *vcpu); 478 475 479 476 /* Create, but do not attach this VCPU */ 480 477 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); ··· 497 492 void (*set_segment)(struct kvm_vcpu *vcpu, 498 493 struct kvm_segment *var, int seg); 499 494 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 495 + void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); 500 496 void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); 501 497 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); 502 498 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); ··· 507 501 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 508 502 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 509 503 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 510 - unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); 511 - void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, 512 - int *exception); 504 + int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); 505 + int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); 513 506 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 514 507 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 515 508 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 509 + void (*fpu_activate)(struct kvm_vcpu *vcpu); 510 + void (*fpu_deactivate)(struct kvm_vcpu *vcpu); 516 511 517 512 void (*tlb_flush)(struct kvm_vcpu *vcpu); 518 513 ··· 538 531 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 539 532 int (*get_tdp_level)(void); 540 533 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 541 - bool (*gb_page_enable)(void); 534 + int (*get_lpage_level)(void); 535 + bool (*rdtscp_supported)(void); 542 536 543 537 const struct trace_print_flags *exit_reasons_str; 544 538 }; ··· 614 606 unsigned long value); 615 607 616 608 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 617 - int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 618 - int type_bits, int seg); 609 + int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 619 610 620 611 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); 621 612 ··· 660 653 int kvm_mmu_load(struct kvm_vcpu *vcpu); 661 654 void kvm_mmu_unload(struct kvm_vcpu *vcpu); 662 655 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 656 + gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); 657 + gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); 658 + gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); 659 + gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); 663 660 664 661 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 665 662 ··· 677 666 678 667 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 679 668 int complete_pio(struct kvm_vcpu *vcpu); 669 + bool kvm_check_iopl(struct kvm_vcpu *vcpu); 680 670 681 671 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); 682 672

+1

arch/x86/include/asm/kvm_para.h

··· 2 2 #define _ASM_X86_KVM_PARA_H 3 3 4 4 #include <linux/types.h> 5 + #include <asm/hyperv.h> 5 6 6 7 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It 7 8 * should be used to determine that a VM is running under KVM.

+1 -1

arch/x86/include/asm/svm.h

··· 313 313 314 314 #define SVM_EXIT_ERR -1 315 315 316 - #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ 316 + #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) 317 317 318 318 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" 319 319 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"

+4 -1

arch/x86/include/asm/vmx.h

··· 53 53 */ 54 54 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 55 55 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 56 + #define SECONDARY_EXEC_RDTSCP 0x00000008 56 57 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 57 58 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 58 59 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 ··· 252 251 #define EXIT_REASON_MSR_READ 31 253 252 #define EXIT_REASON_MSR_WRITE 32 254 253 #define EXIT_REASON_MWAIT_INSTRUCTION 36 254 + #define EXIT_REASON_MONITOR_INSTRUCTION 39 255 255 #define EXIT_REASON_PAUSE_INSTRUCTION 40 256 256 #define EXIT_REASON_MCE_DURING_VMENTRY 41 257 257 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 ··· 364 362 #define VMX_EPTP_UC_BIT (1ull << 8) 365 363 #define VMX_EPTP_WB_BIT (1ull << 14) 366 364 #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) 365 + #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) 367 366 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) 368 367 #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 369 368 #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) ··· 377 374 #define VMX_EPT_READABLE_MASK 0x1ull 378 375 #define VMX_EPT_WRITABLE_MASK 0x2ull 379 376 #define VMX_EPT_EXECUTABLE_MASK 0x4ull 380 - #define VMX_EPT_IGMT_BIT (1ull << 6) 377 + #define VMX_EPT_IPAT_BIT (1ull << 6) 381 378 382 379 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 383 380

+2 -1

arch/x86/kernel/vsyscall_64.c

··· 301 301 register_sysctl_table(kernel_root_table2); 302 302 #endif 303 303 on_each_cpu(cpu_vsyscall_init, NULL, 1); 304 - hotcpu_notifier(cpu_vsyscall_notifier, 0); 304 + /* notifier priority > KVM */ 305 + hotcpu_notifier(cpu_vsyscall_notifier, 30); 305 306 return 0; 306 307 } 307 308

+1

arch/x86/kvm/Kconfig

··· 29 29 select HAVE_KVM_EVENTFD 30 30 select KVM_APIC_ARCHITECTURE 31 31 select USER_RETURN_NOTIFIER 32 + select KVM_MMIO 32 33 ---help--- 33 34 Support hosting fully virtualized guest machines using hardware 34 35 virtualization extensions. You will need a fairly recent

+300 -140

arch/x86/kvm/emulate.c

··· 32 32 #include <linux/module.h> 33 33 #include <asm/kvm_emulate.h> 34 34 35 - #include "mmu.h" /* for is_long_mode() */ 35 + #include "x86.h" 36 36 37 37 /* 38 38 * Opcode effective-address decode tables. ··· 76 76 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 77 77 #define GroupMask 0xff /* Group number stored in bits 0:7 */ 78 78 /* Misc flags */ 79 + #define Lock (1<<26) /* lock prefix is allowed for the instruction */ 80 + #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ 79 81 #define No64 (1<<28) 80 82 /* Source 2 operand type */ 81 83 #define Src2None (0<<29) ··· 90 88 enum { 91 89 Group1_80, Group1_81, Group1_82, Group1_83, 92 90 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 91 + Group8, Group9, 93 92 }; 94 93 95 94 static u32 opcode_table[256] = { 96 95 /* 0x00 - 0x07 */ 97 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 96 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 98 97 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 99 98 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 100 99 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 101 100 /* 0x08 - 0x0F */ 102 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 101 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 103 102 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 104 103 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 105 104 ImplicitOps | Stack | No64, 0, 106 105 /* 0x10 - 0x17 */ 107 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 106 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 108 107 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 109 108 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 110 109 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 111 110 /* 0x18 - 0x1F */ 112 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 111 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 113 112 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 114 113 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 115 114 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 116 115 /* 0x20 - 0x27 */ 117 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 116 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 118 117 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 119 118 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 120 119 /* 0x28 - 0x2F */ 121 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 120 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 122 121 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 123 122 0, 0, 0, 0, 124 123 /* 0x30 - 0x37 */ 125 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 124 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 126 125 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 127 126 0, 0, 0, 0, 128 127 /* 0x38 - 0x3F */ ··· 159 156 Group | Group1_80, Group | Group1_81, 160 157 Group | Group1_82, Group | Group1_83, 161 158 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 162 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 159 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 163 160 /* 0x88 - 0x8F */ 164 161 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 165 162 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, ··· 213 210 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 214 211 /* 0xF0 - 0xF7 */ 215 212 0, 0, 0, 0, 216 - ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, 213 + ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, 217 214 /* 0xF8 - 0xFF */ 218 215 ImplicitOps, 0, ImplicitOps, ImplicitOps, 219 216 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, ··· 221 218 222 219 static u32 twobyte_table[256] = { 223 220 /* 0x00 - 0x0F */ 224 - 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 225 - ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 221 + 0, Group | GroupDual | Group7, 0, 0, 222 + 0, ImplicitOps, ImplicitOps | Priv, 0, 223 + ImplicitOps | Priv, ImplicitOps | Priv, 0, 0, 224 + 0, ImplicitOps | ModRM, 0, 0, 226 225 /* 0x10 - 0x1F */ 227 226 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 228 227 /* 0x20 - 0x2F */ 229 - ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, 228 + ModRM | ImplicitOps | Priv, ModRM | Priv, 229 + ModRM | ImplicitOps | Priv, ModRM | Priv, 230 + 0, 0, 0, 0, 230 231 0, 0, 0, 0, 0, 0, 0, 0, 231 232 /* 0x30 - 0x3F */ 232 - ImplicitOps, 0, ImplicitOps, 0, 233 - ImplicitOps, ImplicitOps, 0, 0, 233 + ImplicitOps | Priv, 0, ImplicitOps | Priv, 0, 234 + ImplicitOps, ImplicitOps | Priv, 0, 0, 234 235 0, 0, 0, 0, 0, 0, 0, 0, 235 236 /* 0x40 - 0x47 */ 236 237 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, ··· 264 257 DstMem | SrcReg | Src2CL | ModRM, 0, 0, 265 258 /* 0xA8 - 0xAF */ 266 259 ImplicitOps | Stack, ImplicitOps | Stack, 267 - 0, DstMem | SrcReg | ModRM | BitOp, 260 + 0, DstMem | SrcReg | ModRM | BitOp | Lock, 268 261 DstMem | SrcReg | Src2ImmByte | ModRM, 269 262 DstMem | SrcReg | Src2CL | ModRM, 270 263 ModRM, 0, 271 264 /* 0xB0 - 0xB7 */ 272 - ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 273 - DstMem | SrcReg | ModRM | BitOp, 265 + ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, 266 + 0, DstMem | SrcReg | ModRM | BitOp | Lock, 274 267 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 275 268 DstReg | SrcMem16 | ModRM | Mov, 276 269 /* 0xB8 - 0xBF */ 277 - 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, 270 + 0, 0, 271 + Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock, 278 272 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 279 273 DstReg | SrcMem16 | ModRM | Mov, 280 274 /* 0xC0 - 0xCF */ 281 - 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, 275 + 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 276 + 0, 0, 0, Group | GroupDual | Group9, 282 277 0, 0, 0, 0, 0, 0, 0, 0, 283 278 /* 0xD0 - 0xDF */ 284 279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ··· 292 283 293 284 static u32 group_table[] = { 294 285 [Group1_80*8] = 295 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 296 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 297 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 298 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 286 + ByteOp | DstMem | SrcImm | ModRM | Lock, 287 + ByteOp | DstMem | SrcImm | ModRM | Lock, 288 + ByteOp | DstMem | SrcImm | ModRM | Lock, 289 + ByteOp | DstMem | SrcImm | ModRM | Lock, 290 + ByteOp | DstMem | SrcImm | ModRM | Lock, 291 + ByteOp | DstMem | SrcImm | ModRM | Lock, 292 + ByteOp | DstMem | SrcImm | ModRM | Lock, 293 + ByteOp | DstMem | SrcImm | ModRM, 299 294 [Group1_81*8] = 300 - DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 301 - DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 302 - DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 303 - DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 295 + DstMem | SrcImm | ModRM | Lock, 296 + DstMem | SrcImm | ModRM | Lock, 297 + DstMem | SrcImm | ModRM | Lock, 298 + DstMem | SrcImm | ModRM | Lock, 299 + DstMem | SrcImm | ModRM | Lock, 300 + DstMem | SrcImm | ModRM | Lock, 301 + DstMem | SrcImm | ModRM | Lock, 302 + DstMem | SrcImm | ModRM, 304 303 [Group1_82*8] = 305 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 306 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 307 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 308 - ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 304 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 305 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 306 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 307 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 308 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 309 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 310 + ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, 311 + ByteOp | DstMem | SrcImm | ModRM | No64, 309 312 [Group1_83*8] = 310 - DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 311 - DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 312 - DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 313 - DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 313 + DstMem | SrcImmByte | ModRM | Lock, 314 + DstMem | SrcImmByte | ModRM | Lock, 315 + DstMem | SrcImmByte | ModRM | Lock, 316 + DstMem | SrcImmByte | ModRM | Lock, 317 + DstMem | SrcImmByte | ModRM | Lock, 318 + DstMem | SrcImmByte | ModRM | Lock, 319 + DstMem | SrcImmByte | ModRM | Lock, 320 + DstMem | SrcImmByte | ModRM, 314 321 [Group1A*8] = 315 322 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, 316 323 [Group3_Byte*8] = ··· 345 320 SrcMem | ModRM | Stack, 0, 346 321 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, 347 322 [Group7*8] = 348 - 0, 0, ModRM | SrcMem, ModRM | SrcMem, 323 + 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, 349 324 SrcNone | ModRM | DstMem | Mov, 0, 350 - SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 325 + SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv, 326 + [Group8*8] = 327 + 0, 0, 0, 0, 328 + DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, 329 + DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, 330 + [Group9*8] = 331 + 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, 351 332 }; 352 333 353 334 static u32 group2_table[] = { 354 335 [Group7*8] = 355 - SrcNone | ModRM, 0, 0, SrcNone | ModRM, 336 + SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, 356 337 SrcNone | ModRM | DstMem | Mov, 0, 357 338 SrcMem16 | ModRM | Mov, 0, 339 + [Group9*8] = 340 + 0, 0, 0, 0, 0, 0, 0, 0, 358 341 }; 359 342 360 343 /* EFLAGS bit definitions. */ 344 + #define EFLG_ID (1<<21) 345 + #define EFLG_VIP (1<<20) 346 + #define EFLG_VIF (1<<19) 347 + #define EFLG_AC (1<<18) 361 348 #define EFLG_VM (1<<17) 362 349 #define EFLG_RF (1<<16) 350 + #define EFLG_IOPL (3<<12) 351 + #define EFLG_NT (1<<14) 363 352 #define EFLG_OF (1<<11) 364 353 #define EFLG_DF (1<<10) 365 354 #define EFLG_IF (1<<9) 355 + #define EFLG_TF (1<<8) 366 356 #define EFLG_SF (1<<7) 367 357 #define EFLG_ZF (1<<6) 368 358 #define EFLG_AF (1<<4) ··· 646 606 647 607 if (linear < fc->start || linear >= fc->end) { 648 608 size = min(15UL, PAGE_SIZE - offset_in_page(linear)); 649 - rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); 609 + rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); 650 610 if (rc) 651 611 return rc; 652 612 fc->start = linear; ··· 701 661 op_bytes = 3; 702 662 *address = 0; 703 663 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, 704 - ctxt->vcpu); 664 + ctxt->vcpu, NULL); 705 665 if (rc) 706 666 return rc; 707 667 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, 708 - ctxt->vcpu); 668 + ctxt->vcpu, NULL); 709 669 return rc; 710 670 } 711 671 ··· 929 889 930 890 switch (mode) { 931 891 case X86EMUL_MODE_REAL: 892 + case X86EMUL_MODE_VM86: 932 893 case X86EMUL_MODE_PROT16: 933 894 def_op_bytes = def_ad_bytes = 2; 934 895 break; ··· 1016 975 } 1017 976 1018 977 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 1019 - kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; 978 + kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); 1020 979 return -1; 1021 980 } 1022 981 ··· 1237 1196 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1238 1197 c->regs[VCPU_REGS_RSP]), 1239 1198 dest, len, ctxt->vcpu); 1240 - if (rc != 0) 1199 + if (rc != X86EMUL_CONTINUE) 1241 1200 return rc; 1242 1201 1243 1202 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); 1203 + return rc; 1204 + } 1205 + 1206 + static int emulate_popf(struct x86_emulate_ctxt *ctxt, 1207 + struct x86_emulate_ops *ops, 1208 + void *dest, int len) 1209 + { 1210 + int rc; 1211 + unsigned long val, change_mask; 1212 + int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1213 + int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); 1214 + 1215 + rc = emulate_pop(ctxt, ops, &val, len); 1216 + if (rc != X86EMUL_CONTINUE) 1217 + return rc; 1218 + 1219 + change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF 1220 + | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; 1221 + 1222 + switch(ctxt->mode) { 1223 + case X86EMUL_MODE_PROT64: 1224 + case X86EMUL_MODE_PROT32: 1225 + case X86EMUL_MODE_PROT16: 1226 + if (cpl == 0) 1227 + change_mask |= EFLG_IOPL; 1228 + if (cpl <= iopl) 1229 + change_mask |= EFLG_IF; 1230 + break; 1231 + case X86EMUL_MODE_VM86: 1232 + if (iopl < 3) { 1233 + kvm_inject_gp(ctxt->vcpu, 0); 1234 + return X86EMUL_PROPAGATE_FAULT; 1235 + } 1236 + change_mask |= EFLG_IF; 1237 + break; 1238 + default: /* real mode */ 1239 + change_mask |= (EFLG_IOPL | EFLG_IF); 1240 + break; 1241 + } 1242 + 1243 + *(unsigned long *)dest = 1244 + (ctxt->eflags & ~change_mask) | (val & change_mask); 1245 + 1244 1246 return rc; 1245 1247 } 1246 1248 ··· 1309 1225 if (rc != 0) 1310 1226 return rc; 1311 1227 1312 - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); 1228 + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); 1313 1229 return rc; 1314 1230 } 1315 1231 ··· 1454 1370 int rc; 1455 1371 1456 1372 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); 1457 - if (rc != 0) 1373 + if (rc != X86EMUL_CONTINUE) 1458 1374 return rc; 1459 1375 1460 1376 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || ··· 1469 1385 (u32) c->regs[VCPU_REGS_RBX]; 1470 1386 1471 1387 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); 1472 - if (rc != 0) 1388 + if (rc != X86EMUL_CONTINUE) 1473 1389 return rc; 1474 1390 ctxt->eflags |= EFLG_ZF; 1475 1391 } ··· 1491 1407 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1492 1408 if (rc) 1493 1409 return rc; 1494 - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); 1410 + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); 1495 1411 return rc; 1496 1412 } 1497 1413 ··· 1535 1451 &c->dst.val, 1536 1452 c->dst.bytes, 1537 1453 ctxt->vcpu); 1538 - if (rc != 0) 1454 + if (rc != X86EMUL_CONTINUE) 1539 1455 return rc; 1540 1456 break; 1541 1457 case OP_NONE: ··· 1598 1514 u64 msr_data; 1599 1515 1600 1516 /* syscall is not available in real mode */ 1601 - if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL 1602 - || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) 1603 - return -1; 1517 + if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) 1518 + return X86EMUL_UNHANDLEABLE; 1604 1519 1605 1520 setup_syscalls_segments(ctxt, &cs, &ss); 1606 1521 ··· 1636 1553 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 1637 1554 } 1638 1555 1639 - return 0; 1556 + return X86EMUL_CONTINUE; 1640 1557 } 1641 1558 1642 1559 static int ··· 1646 1563 struct kvm_segment cs, ss; 1647 1564 u64 msr_data; 1648 1565 1649 - /* inject #UD if LOCK prefix is used */ 1650 - if (c->lock_prefix) 1651 - return -1; 1652 - 1653 - /* inject #GP if in real mode or paging is disabled */ 1654 - if (ctxt->mode == X86EMUL_MODE_REAL || 1655 - !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { 1566 + /* inject #GP if in real mode */ 1567 + if (ctxt->mode == X86EMUL_MODE_REAL) { 1656 1568 kvm_inject_gp(ctxt->vcpu, 0); 1657 - return -1; 1569 + return X86EMUL_UNHANDLEABLE; 1658 1570 } 1659 1571 1660 1572 /* XXX sysenter/sysexit have not been tested in 64bit mode. 1661 1573 * Therefore, we inject an #UD. 1662 1574 */ 1663 1575 if (ctxt->mode == X86EMUL_MODE_PROT64) 1664 - return -1; 1576 + return X86EMUL_UNHANDLEABLE; 1665 1577 1666 1578 setup_syscalls_segments(ctxt, &cs, &ss); 1667 1579 ··· 1665 1587 case X86EMUL_MODE_PROT32: 1666 1588 if ((msr_data & 0xfffc) == 0x0) { 1667 1589 kvm_inject_gp(ctxt->vcpu, 0); 1668 - return -1; 1590 + return X86EMUL_PROPAGATE_FAULT; 1669 1591 } 1670 1592 break; 1671 1593 case X86EMUL_MODE_PROT64: 1672 1594 if (msr_data == 0x0) { 1673 1595 kvm_inject_gp(ctxt->vcpu, 0); 1674 - return -1; 1596 + return X86EMUL_PROPAGATE_FAULT; 1675 1597 } 1676 1598 break; 1677 1599 } ··· 1696 1618 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 1697 1619 c->regs[VCPU_REGS_RSP] = msr_data; 1698 1620 1699 - return 0; 1621 + return X86EMUL_CONTINUE; 1700 1622 } 1701 1623 1702 1624 static int ··· 1707 1629 u64 msr_data; 1708 1630 int usermode; 1709 1631 1710 - /* inject #UD if LOCK prefix is used */ 1711 - if (c->lock_prefix) 1712 - return -1; 1713 - 1714 - /* inject #GP if in real mode or paging is disabled */ 1715 - if (ctxt->mode == X86EMUL_MODE_REAL 1716 - || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { 1632 + /* inject #GP if in real mode or Virtual 8086 mode */ 1633 + if (ctxt->mode == X86EMUL_MODE_REAL || 1634 + ctxt->mode == X86EMUL_MODE_VM86) { 1717 1635 kvm_inject_gp(ctxt->vcpu, 0); 1718 - return -1; 1719 - } 1720 - 1721 - /* sysexit must be called from CPL 0 */ 1722 - if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) { 1723 - kvm_inject_gp(ctxt->vcpu, 0); 1724 - return -1; 1636 + return X86EMUL_UNHANDLEABLE; 1725 1637 } 1726 1638 1727 1639 setup_syscalls_segments(ctxt, &cs, &ss); ··· 1729 1661 cs.selector = (u16)(msr_data + 16); 1730 1662 if ((msr_data & 0xfffc) == 0x0) { 1731 1663 kvm_inject_gp(ctxt->vcpu, 0); 1732 - return -1; 1664 + return X86EMUL_PROPAGATE_FAULT; 1733 1665 } 1734 1666 ss.selector = (u16)(msr_data + 24); 1735 1667 break; ··· 1737 1669 cs.selector = (u16)(msr_data + 32); 1738 1670 if (msr_data == 0x0) { 1739 1671 kvm_inject_gp(ctxt->vcpu, 0); 1740 - return -1; 1672 + return X86EMUL_PROPAGATE_FAULT; 1741 1673 } 1742 1674 ss.selector = cs.selector + 8; 1743 1675 cs.db = 0; ··· 1753 1685 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; 1754 1686 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; 1755 1687 1756 - return 0; 1688 + return X86EMUL_CONTINUE; 1689 + } 1690 + 1691 + static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) 1692 + { 1693 + int iopl; 1694 + if (ctxt->mode == X86EMUL_MODE_REAL) 1695 + return false; 1696 + if (ctxt->mode == X86EMUL_MODE_VM86) 1697 + return true; 1698 + iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1699 + return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; 1700 + } 1701 + 1702 + static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, 1703 + struct x86_emulate_ops *ops, 1704 + u16 port, u16 len) 1705 + { 1706 + struct kvm_segment tr_seg; 1707 + int r; 1708 + u16 io_bitmap_ptr; 1709 + u8 perm, bit_idx = port & 0x7; 1710 + unsigned mask = (1 << len) - 1; 1711 + 1712 + kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); 1713 + if (tr_seg.unusable) 1714 + return false; 1715 + if (tr_seg.limit < 103) 1716 + return false; 1717 + r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, 1718 + NULL); 1719 + if (r != X86EMUL_CONTINUE) 1720 + return false; 1721 + if (io_bitmap_ptr + port/8 > tr_seg.limit) 1722 + return false; 1723 + r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, 1724 + ctxt->vcpu, NULL); 1725 + if (r != X86EMUL_CONTINUE) 1726 + return false; 1727 + if ((perm >> bit_idx) & mask) 1728 + return false; 1729 + return true; 1730 + } 1731 + 1732 + static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, 1733 + struct x86_emulate_ops *ops, 1734 + u16 port, u16 len) 1735 + { 1736 + if (emulator_bad_iopl(ctxt)) 1737 + if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) 1738 + return false; 1739 + return true; 1757 1740 } 1758 1741 1759 1742 int ··· 1827 1708 1828 1709 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 1829 1710 saved_eip = c->eip; 1711 + 1712 + /* LOCK prefix is allowed only with some instructions */ 1713 + if (c->lock_prefix && !(c->d & Lock)) { 1714 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 1715 + goto done; 1716 + } 1717 + 1718 + /* Privileged instruction can be executed only in CPL=0 */ 1719 + if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { 1720 + kvm_inject_gp(ctxt->vcpu, 0); 1721 + goto done; 1722 + } 1830 1723 1831 1724 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) 1832 1725 memop = c->modrm_ea; ··· 1880 1749 &c->src.val, 1881 1750 c->src.bytes, 1882 1751 ctxt->vcpu); 1883 - if (rc != 0) 1752 + if (rc != X86EMUL_CONTINUE) 1884 1753 goto done; 1885 1754 c->src.orig_val = c->src.val; 1886 1755 } ··· 1899 1768 c->dst.ptr = (void *)c->dst.ptr + 1900 1769 (c->src.val & mask) / 8; 1901 1770 } 1902 - if (!(c->d & Mov) && 1903 - /* optimisation - avoid slow emulated read */ 1904 - ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1905 - &c->dst.val, 1906 - c->dst.bytes, ctxt->vcpu)) != 0)) 1907 - goto done; 1771 + if (!(c->d & Mov)) { 1772 + /* optimisation - avoid slow emulated read */ 1773 + rc = ops->read_emulated((unsigned long)c->dst.ptr, 1774 + &c->dst.val, 1775 + c->dst.bytes, 1776 + ctxt->vcpu); 1777 + if (rc != X86EMUL_CONTINUE) 1778 + goto done; 1779 + } 1908 1780 } 1909 1781 c->dst.orig_val = c->dst.val; 1910 1782 ··· 2010 1876 break; 2011 1877 case 0x6c: /* insb */ 2012 1878 case 0x6d: /* insw/insd */ 2013 - if (kvm_emulate_pio_string(ctxt->vcpu, 1879 + if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 1880 + (c->d & ByteOp) ? 1 : c->op_bytes)) { 1881 + kvm_inject_gp(ctxt->vcpu, 0); 1882 + goto done; 1883 + } 1884 + if (kvm_emulate_pio_string(ctxt->vcpu, 2014 1885 1, 2015 1886 (c->d & ByteOp) ? 1 : c->op_bytes, 2016 1887 c->rep_prefix ? ··· 2031 1892 return 0; 2032 1893 case 0x6e: /* outsb */ 2033 1894 case 0x6f: /* outsw/outsd */ 1895 + if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 1896 + (c->d & ByteOp) ? 1 : c->op_bytes)) { 1897 + kvm_inject_gp(ctxt->vcpu, 0); 1898 + goto done; 1899 + } 2034 1900 if (kvm_emulate_pio_string(ctxt->vcpu, 2035 1901 0, 2036 1902 (c->d & ByteOp) ? 1 : c->op_bytes, ··· 2122 1978 break; 2123 1979 case 0x8e: { /* mov seg, r/m16 */ 2124 1980 uint16_t sel; 2125 - int type_bits; 2126 - int err; 2127 1981 2128 1982 sel = c->src.val; 1983 + 1984 + if (c->modrm_reg == VCPU_SREG_CS || 1985 + c->modrm_reg > VCPU_SREG_GS) { 1986 + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 1987 + goto done; 1988 + } 1989 + 2129 1990 if (c->modrm_reg == VCPU_SREG_SS) 2130 1991 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); 2131 1992 2132 - if (c->modrm_reg <= 5) { 2133 - type_bits = (c->modrm_reg == 1) ? 9 : 1; 2134 - err = kvm_load_segment_descriptor(ctxt->vcpu, sel, 2135 - type_bits, c->modrm_reg); 2136 - } else { 2137 - printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", 2138 - c->modrm); 2139 - goto cannot_emulate; 2140 - } 2141 - 2142 - if (err < 0) 2143 - goto cannot_emulate; 1993 + rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); 2144 1994 2145 1995 c->dst.type = OP_NONE; /* Disable writeback. */ 2146 1996 break; ··· 2163 2025 c->dst.type = OP_REG; 2164 2026 c->dst.ptr = (unsigned long *) &ctxt->eflags; 2165 2027 c->dst.bytes = c->op_bytes; 2166 - goto pop_instruction; 2028 + rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); 2029 + if (rc != X86EMUL_CONTINUE) 2030 + goto done; 2031 + break; 2167 2032 case 0xa0 ... 0xa1: /* mov */ 2168 2033 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2169 2034 c->dst.val = c->src.val; ··· 2180 2039 c->dst.ptr = (unsigned long *)register_address(c, 2181 2040 es_base(ctxt), 2182 2041 c->regs[VCPU_REGS_RDI]); 2183 - if ((rc = ops->read_emulated(register_address(c, 2184 - seg_override_base(ctxt, c), 2185 - c->regs[VCPU_REGS_RSI]), 2042 + rc = ops->read_emulated(register_address(c, 2043 + seg_override_base(ctxt, c), 2044 + c->regs[VCPU_REGS_RSI]), 2186 2045 &c->dst.val, 2187 - c->dst.bytes, ctxt->vcpu)) != 0) 2046 + c->dst.bytes, ctxt->vcpu); 2047 + if (rc != X86EMUL_CONTINUE) 2188 2048 goto done; 2189 2049 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2190 2050 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes ··· 2200 2058 c->src.ptr = (unsigned long *)register_address(c, 2201 2059 seg_override_base(ctxt, c), 2202 2060 c->regs[VCPU_REGS_RSI]); 2203 - if ((rc = ops->read_emulated((unsigned long)c->src.ptr, 2204 - &c->src.val, 2205 - c->src.bytes, 2206 - ctxt->vcpu)) != 0) 2061 + rc = ops->read_emulated((unsigned long)c->src.ptr, 2062 + &c->src.val, 2063 + c->src.bytes, 2064 + ctxt->vcpu); 2065 + if (rc != X86EMUL_CONTINUE) 2207 2066 goto done; 2208 2067 2209 2068 c->dst.type = OP_NONE; /* Disable writeback. */ ··· 2212 2069 c->dst.ptr = (unsigned long *)register_address(c, 2213 2070 es_base(ctxt), 2214 2071 c->regs[VCPU_REGS_RDI]); 2215 - if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 2216 - &c->dst.val, 2217 - c->dst.bytes, 2218 - ctxt->vcpu)) != 0) 2072 + rc = ops->read_emulated((unsigned long)c->dst.ptr, 2073 + &c->dst.val, 2074 + c->dst.bytes, 2075 + ctxt->vcpu); 2076 + if (rc != X86EMUL_CONTINUE) 2219 2077 goto done; 2220 2078 2221 2079 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); ··· 2246 2102 c->dst.type = OP_REG; 2247 2103 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2248 2104 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2249 - if ((rc = ops->read_emulated(register_address(c, 2250 - seg_override_base(ctxt, c), 2251 - c->regs[VCPU_REGS_RSI]), 2252 - &c->dst.val, 2253 - c->dst.bytes, 2254 - ctxt->vcpu)) != 0) 2105 + rc = ops->read_emulated(register_address(c, 2106 + seg_override_base(ctxt, c), 2107 + c->regs[VCPU_REGS_RSI]), 2108 + &c->dst.val, 2109 + c->dst.bytes, 2110 + ctxt->vcpu); 2111 + if (rc != X86EMUL_CONTINUE) 2255 2112 goto done; 2256 2113 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2257 2114 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes ··· 2308 2163 case 0xe9: /* jmp rel */ 2309 2164 goto jmp; 2310 2165 case 0xea: /* jmp far */ 2311 - if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, 2312 - VCPU_SREG_CS) < 0) { 2313 - DPRINTF("jmp far: Failed to load CS descriptor\n"); 2314 - goto cannot_emulate; 2315 - } 2166 + if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 2167 + VCPU_SREG_CS)) 2168 + goto done; 2316 2169 2317 2170 c->eip = c->src.val; 2318 2171 break; ··· 2328 2185 case 0xef: /* out (e/r)ax,dx */ 2329 2186 port = c->regs[VCPU_REGS_RDX]; 2330 2187 io_dir_in = 0; 2331 - do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2188 + do_io: 2189 + if (!emulator_io_permited(ctxt, ops, port, 2190 + (c->d & ByteOp) ? 1 : c->op_bytes)) { 2191 + kvm_inject_gp(ctxt->vcpu, 0); 2192 + goto done; 2193 + } 2194 + if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2332 2195 (c->d & ByteOp) ? 1 : c->op_bytes, 2333 2196 port) != 0) { 2334 2197 c->eip = saved_eip; ··· 2359 2210 c->dst.type = OP_NONE; /* Disable writeback. */ 2360 2211 break; 2361 2212 case 0xfa: /* cli */ 2362 - ctxt->eflags &= ~X86_EFLAGS_IF; 2363 - c->dst.type = OP_NONE; /* Disable writeback. */ 2213 + if (emulator_bad_iopl(ctxt)) 2214 + kvm_inject_gp(ctxt->vcpu, 0); 2215 + else { 2216 + ctxt->eflags &= ~X86_EFLAGS_IF; 2217 + c->dst.type = OP_NONE; /* Disable writeback. */ 2218 + } 2364 2219 break; 2365 2220 case 0xfb: /* sti */ 2366 - toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2367 - ctxt->eflags |= X86_EFLAGS_IF; 2368 - c->dst.type = OP_NONE; /* Disable writeback. */ 2221 + if (emulator_bad_iopl(ctxt)) 2222 + kvm_inject_gp(ctxt->vcpu, 0); 2223 + else { 2224 + toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2225 + ctxt->eflags |= X86_EFLAGS_IF; 2226 + c->dst.type = OP_NONE; /* Disable writeback. */ 2227 + } 2369 2228 break; 2370 2229 case 0xfc: /* cld */ 2371 2230 ctxt->eflags &= ~EFLG_DF; ··· 2476 2319 } 2477 2320 break; 2478 2321 case 0x05: /* syscall */ 2479 - if (emulate_syscall(ctxt) == -1) 2480 - goto cannot_emulate; 2322 + rc = emulate_syscall(ctxt); 2323 + if (rc != X86EMUL_CONTINUE) 2324 + goto done; 2481 2325 else 2482 2326 goto writeback; 2483 2327 break; ··· 2549 2391 c->dst.type = OP_NONE; 2550 2392 break; 2551 2393 case 0x34: /* sysenter */ 2552 - if (emulate_sysenter(ctxt) == -1) 2553 - goto cannot_emulate; 2394 + rc = emulate_sysenter(ctxt); 2395 + if (rc != X86EMUL_CONTINUE) 2396 + goto done; 2554 2397 else 2555 2398 goto writeback; 2556 2399 break; 2557 2400 case 0x35: /* sysexit */ 2558 - if (emulate_sysexit(ctxt) == -1) 2559 - goto cannot_emulate; 2401 + rc = emulate_sysexit(ctxt); 2402 + if (rc != X86EMUL_CONTINUE) 2403 + goto done; 2560 2404 else 2561 2405 goto writeback; 2562 2406 break;

+12 -11

arch/x86/kvm/i8254.c

··· 242 242 { 243 243 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 244 244 irq_ack_notifier); 245 - spin_lock(&ps->inject_lock); 245 + raw_spin_lock(&ps->inject_lock); 246 246 if (atomic_dec_return(&ps->pit_timer.pending) < 0) 247 247 atomic_inc(&ps->pit_timer.pending); 248 248 ps->irq_ack = 1; 249 - spin_unlock(&ps->inject_lock); 249 + raw_spin_unlock(&ps->inject_lock); 250 250 } 251 251 252 252 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) ··· 605 605 .write = speaker_ioport_write, 606 606 }; 607 607 608 - /* Caller must have writers lock on slots_lock */ 608 + /* Caller must hold slots_lock */ 609 609 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) 610 610 { 611 611 struct kvm_pit *pit; ··· 624 624 625 625 mutex_init(&pit->pit_state.lock); 626 626 mutex_lock(&pit->pit_state.lock); 627 - spin_lock_init(&pit->pit_state.inject_lock); 627 + raw_spin_lock_init(&pit->pit_state.inject_lock); 628 628 629 629 kvm->arch.vpit = pit; 630 630 pit->kvm = kvm; ··· 645 645 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); 646 646 647 647 kvm_iodevice_init(&pit->dev, &pit_dev_ops); 648 - ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); 648 + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); 649 649 if (ret < 0) 650 650 goto fail; 651 651 652 652 if (flags & KVM_PIT_SPEAKER_DUMMY) { 653 653 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); 654 - ret = __kvm_io_bus_register_dev(&kvm->pio_bus, 654 + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 655 655 &pit->speaker_dev); 656 656 if (ret < 0) 657 657 goto fail_unregister; ··· 660 660 return pit; 661 661 662 662 fail_unregister: 663 - __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); 663 + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); 664 664 665 665 fail: 666 - if (pit->irq_source_id >= 0) 667 - kvm_free_irq_source_id(kvm, pit->irq_source_id); 666 + kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); 667 + kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 668 + kvm_free_irq_source_id(kvm, pit->irq_source_id); 668 669 669 670 kfree(pit); 670 671 return NULL; ··· 724 723 /* Try to inject pending interrupts when 725 724 * last one has been acked. 726 725 */ 727 - spin_lock(&ps->inject_lock); 726 + raw_spin_lock(&ps->inject_lock); 728 727 if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { 729 728 ps->irq_ack = 0; 730 729 inject = 1; 731 730 } 732 - spin_unlock(&ps->inject_lock); 731 + raw_spin_unlock(&ps->inject_lock); 733 732 if (inject) 734 733 __inject_pit_timer_intr(kvm); 735 734 }

+1 -1

arch/x86/kvm/i8254.h

··· 27 27 u32 speaker_data_on; 28 28 struct mutex lock; 29 29 struct kvm_pit *pit; 30 - spinlock_t inject_lock; 30 + raw_spinlock_t inject_lock; 31 31 unsigned long irq_ack; 32 32 struct kvm_irq_ack_notifier irq_ack_notifier; 33 33 };

+30 -16

arch/x86/kvm/i8259.c

··· 44 44 * Other interrupt may be delivered to PIC while lock is dropped but 45 45 * it should be safe since PIC state is already updated at this stage. 46 46 */ 47 - spin_unlock(&s->pics_state->lock); 47 + raw_spin_unlock(&s->pics_state->lock); 48 48 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); 49 - spin_lock(&s->pics_state->lock); 49 + raw_spin_lock(&s->pics_state->lock); 50 50 } 51 51 52 52 void kvm_pic_clear_isr_ack(struct kvm *kvm) 53 53 { 54 54 struct kvm_pic *s = pic_irqchip(kvm); 55 - spin_lock(&s->lock); 55 + 56 + raw_spin_lock(&s->lock); 56 57 s->pics[0].isr_ack = 0xff; 57 58 s->pics[1].isr_ack = 0xff; 58 - spin_unlock(&s->lock); 59 + raw_spin_unlock(&s->lock); 59 60 } 60 61 61 62 /* ··· 157 156 158 157 void kvm_pic_update_irq(struct kvm_pic *s) 159 158 { 160 - spin_lock(&s->lock); 159 + raw_spin_lock(&s->lock); 161 160 pic_update_irq(s); 162 - spin_unlock(&s->lock); 161 + raw_spin_unlock(&s->lock); 163 162 } 164 163 165 164 int kvm_pic_set_irq(void *opaque, int irq, int level) ··· 167 166 struct kvm_pic *s = opaque; 168 167 int ret = -1; 169 168 170 - spin_lock(&s->lock); 169 + raw_spin_lock(&s->lock); 171 170 if (irq >= 0 && irq < PIC_NUM_PINS) { 172 171 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 173 172 pic_update_irq(s); 174 173 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, 175 174 s->pics[irq >> 3].imr, ret == 0); 176 175 } 177 - spin_unlock(&s->lock); 176 + raw_spin_unlock(&s->lock); 178 177 179 178 return ret; 180 179 } ··· 204 203 int irq, irq2, intno; 205 204 struct kvm_pic *s = pic_irqchip(kvm); 206 205 207 - spin_lock(&s->lock); 206 + raw_spin_lock(&s->lock); 208 207 irq = pic_get_irq(&s->pics[0]); 209 208 if (irq >= 0) { 210 209 pic_intack(&s->pics[0], irq); ··· 229 228 intno = s->pics[0].irq_base + irq; 230 229 } 231 230 pic_update_irq(s); 232 - spin_unlock(&s->lock); 231 + raw_spin_unlock(&s->lock); 233 232 234 233 return intno; 235 234 } ··· 443 442 printk(KERN_ERR "PIC: non byte write\n"); 444 443 return 0; 445 444 } 446 - spin_lock(&s->lock); 445 + raw_spin_lock(&s->lock); 447 446 switch (addr) { 448 447 case 0x20: 449 448 case 0x21: ··· 456 455 elcr_ioport_write(&s->pics[addr & 1], addr, data); 457 456 break; 458 457 } 459 - spin_unlock(&s->lock); 458 + raw_spin_unlock(&s->lock); 460 459 return 0; 461 460 } 462 461 ··· 473 472 printk(KERN_ERR "PIC: non byte read\n"); 474 473 return 0; 475 474 } 476 - spin_lock(&s->lock); 475 + raw_spin_lock(&s->lock); 477 476 switch (addr) { 478 477 case 0x20: 479 478 case 0x21: ··· 487 486 break; 488 487 } 489 488 *(unsigned char *)val = data; 490 - spin_unlock(&s->lock); 489 + raw_spin_unlock(&s->lock); 491 490 return 0; 492 491 } 493 492 ··· 521 520 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 522 521 if (!s) 523 522 return NULL; 524 - spin_lock_init(&s->lock); 523 + raw_spin_lock_init(&s->lock); 525 524 s->kvm = kvm; 526 525 s->pics[0].elcr_mask = 0xf8; 527 526 s->pics[1].elcr_mask = 0xde; ··· 534 533 * Initialize PIO device 535 534 */ 536 535 kvm_iodevice_init(&s->dev, &picdev_ops); 537 - ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); 536 + mutex_lock(&kvm->slots_lock); 537 + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); 538 + mutex_unlock(&kvm->slots_lock); 538 539 if (ret < 0) { 539 540 kfree(s); 540 541 return NULL; 541 542 } 542 543 543 544 return s; 545 + } 546 + 547 + void kvm_destroy_pic(struct kvm *kvm) 548 + { 549 + struct kvm_pic *vpic = kvm->arch.vpic; 550 + 551 + if (vpic) { 552 + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); 553 + kvm->arch.vpic = NULL; 554 + kfree(vpic); 555 + } 544 556 }

+2 -1

arch/x86/kvm/irq.h

··· 62 62 }; 63 63 64 64 struct kvm_pic { 65 - spinlock_t lock; 65 + raw_spinlock_t lock; 66 66 unsigned pending_acks; 67 67 struct kvm *kvm; 68 68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ ··· 75 75 }; 76 76 77 77 struct kvm_pic *kvm_create_pic(struct kvm *kvm); 78 + void kvm_destroy_pic(struct kvm *kvm); 78 79 int kvm_pic_read_irq(struct kvm *kvm); 79 80 void kvm_pic_update_irq(struct kvm_pic *s); 80 81 void kvm_pic_clear_isr_ack(struct kvm *kvm);

+31

arch/x86/kvm/kvm_cache_regs.h

··· 1 1 #ifndef ASM_KVM_CACHE_REGS_H 2 2 #define ASM_KVM_CACHE_REGS_H 3 3 4 + #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS 5 + #define KVM_POSSIBLE_CR4_GUEST_BITS \ 6 + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 7 + | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) 8 + 4 9 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, 5 10 enum kvm_reg reg) 6 11 { ··· 41 36 kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); 42 37 43 38 return vcpu->arch.pdptrs[index]; 39 + } 40 + 41 + static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) 42 + { 43 + ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; 44 + if (tmask & vcpu->arch.cr0_guest_owned_bits) 45 + kvm_x86_ops->decache_cr0_guest_bits(vcpu); 46 + return vcpu->arch.cr0 & mask; 47 + } 48 + 49 + static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) 50 + { 51 + return kvm_read_cr0_bits(vcpu, ~0UL); 52 + } 53 + 54 + static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) 55 + { 56 + ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; 57 + if (tmask & vcpu->arch.cr4_guest_owned_bits) 58 + kvm_x86_ops->decache_cr4_guest_bits(vcpu); 59 + return vcpu->arch.cr4 & mask; 60 + } 61 + 62 + static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) 63 + { 64 + return kvm_read_cr4_bits(vcpu, ~0UL); 44 65 } 45 66 46 67 #endif

+31

arch/x86/kvm/lapic.c

··· 1246 1246 1247 1247 return 0; 1248 1248 } 1249 + 1250 + int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1251 + { 1252 + struct kvm_lapic *apic = vcpu->arch.apic; 1253 + 1254 + if (!irqchip_in_kernel(vcpu->kvm)) 1255 + return 1; 1256 + 1257 + /* if this is ICR write vector before command */ 1258 + if (reg == APIC_ICR) 1259 + apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1260 + return apic_reg_write(apic, reg, (u32)data); 1261 + } 1262 + 1263 + int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1264 + { 1265 + struct kvm_lapic *apic = vcpu->arch.apic; 1266 + u32 low, high = 0; 1267 + 1268 + if (!irqchip_in_kernel(vcpu->kvm)) 1269 + return 1; 1270 + 1271 + if (apic_reg_read(apic, reg, 4, &low)) 1272 + return 1; 1273 + if (reg == APIC_ICR) 1274 + apic_reg_read(apic, APIC_ICR2, 4, &high); 1275 + 1276 + *data = (((u64)high) << 32) | low; 1277 + 1278 + return 0; 1279 + }

+8

arch/x86/kvm/lapic.h

··· 48 48 49 49 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); 50 50 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 51 + 52 + int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); 53 + int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 54 + 55 + static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) 56 + { 57 + return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; 58 + } 51 59 #endif

+64 -73

arch/x86/kvm/mmu.c

··· 18 18 */ 19 19 20 20 #include "mmu.h" 21 + #include "x86.h" 21 22 #include "kvm_cache_regs.h" 22 23 23 24 #include <linux/kvm_host.h> ··· 30 29 #include <linux/swap.h> 31 30 #include <linux/hugetlb.h> 32 31 #include <linux/compiler.h> 32 + #include <linux/srcu.h> 33 33 34 34 #include <asm/page.h> 35 35 #include <asm/cmpxchg.h> ··· 138 136 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ 139 137 | PT64_NX_MASK) 140 138 141 - #define PFERR_PRESENT_MASK (1U << 0) 142 - #define PFERR_WRITE_MASK (1U << 1) 143 - #define PFERR_USER_MASK (1U << 2) 144 - #define PFERR_RSVD_MASK (1U << 3) 145 - #define PFERR_FETCH_MASK (1U << 4) 146 - 147 - #define PT_PDPE_LEVEL 3 148 - #define PT_DIRECTORY_LEVEL 2 149 - #define PT_PAGE_TABLE_LEVEL 1 150 - 151 139 #define RMAP_EXT 4 152 140 153 141 #define ACC_EXEC_MASK 1 ··· 145 153 #define ACC_USER_MASK PT_USER_MASK 146 154 #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 147 155 156 + #include <trace/events/kvm.h> 157 + 158 + #undef TRACE_INCLUDE_FILE 148 159 #define CREATE_TRACE_POINTS 149 160 #include "mmutrace.h" 150 161 ··· 224 229 225 230 static int is_write_protection(struct kvm_vcpu *vcpu) 226 231 { 227 - return vcpu->arch.cr0 & X86_CR0_WP; 232 + return kvm_read_cr0_bits(vcpu, X86_CR0_WP); 228 233 } 229 234 230 235 static int is_cpuid_PSE36(void) ··· 234 239 235 240 static int is_nx(struct kvm_vcpu *vcpu) 236 241 { 237 - return vcpu->arch.shadow_efer & EFER_NX; 242 + return vcpu->arch.efer & EFER_NX; 238 243 } 239 244 240 245 static int is_shadow_present_pte(u64 pte) ··· 248 253 return pte & PT_PAGE_SIZE_MASK; 249 254 } 250 255 251 - static int is_writeble_pte(unsigned long pte) 256 + static int is_writable_pte(unsigned long pte) 252 257 { 253 258 return pte & PT_WRITABLE_MASK; 254 259 } ··· 465 470 466 471 static int host_mapping_level(struct kvm *kvm, gfn_t gfn) 467 472 { 468 - unsigned long page_size = PAGE_SIZE; 469 - struct vm_area_struct *vma; 470 - unsigned long addr; 473 + unsigned long page_size; 471 474 int i, ret = 0; 472 475 473 - addr = gfn_to_hva(kvm, gfn); 474 - if (kvm_is_error_hva(addr)) 475 - return PT_PAGE_TABLE_LEVEL; 476 - 477 - down_read(&current->mm->mmap_sem); 478 - vma = find_vma(current->mm, addr); 479 - if (!vma) 480 - goto out; 481 - 482 - page_size = vma_kernel_pagesize(vma); 483 - 484 - out: 485 - up_read(&current->mm->mmap_sem); 476 + page_size = kvm_host_page_size(kvm, gfn); 486 477 487 478 for (i = PT_PAGE_TABLE_LEVEL; 488 479 i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { ··· 484 503 static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) 485 504 { 486 505 struct kvm_memory_slot *slot; 487 - int host_level; 488 - int level = PT_PAGE_TABLE_LEVEL; 506 + int host_level, level, max_level; 489 507 490 508 slot = gfn_to_memslot(vcpu->kvm, large_gfn); 491 509 if (slot && slot->dirty_bitmap) ··· 495 515 if (host_level == PT_PAGE_TABLE_LEVEL) 496 516 return host_level; 497 517 498 - for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) 518 + max_level = kvm_x86_ops->get_lpage_level() < host_level ? 519 + kvm_x86_ops->get_lpage_level() : host_level; 520 + 521 + for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) 499 522 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 500 523 break; 501 524 ··· 616 633 pfn = spte_to_pfn(*spte); 617 634 if (*spte & shadow_accessed_mask) 618 635 kvm_set_pfn_accessed(pfn); 619 - if (is_writeble_pte(*spte)) 636 + if (is_writable_pte(*spte)) 620 637 kvm_set_pfn_dirty(pfn); 621 638 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 622 639 if (!*rmapp) { ··· 645 662 prev_desc = desc; 646 663 desc = desc->more; 647 664 } 665 + pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); 648 666 BUG(); 649 667 } 650 668 } ··· 692 708 BUG_ON(!spte); 693 709 BUG_ON(!(*spte & PT_PRESENT_MASK)); 694 710 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 695 - if (is_writeble_pte(*spte)) { 711 + if (is_writable_pte(*spte)) { 696 712 __set_spte(spte, *spte & ~PT_WRITABLE_MASK); 697 713 write_protected = 1; 698 714 } ··· 716 732 BUG_ON(!(*spte & PT_PRESENT_MASK)); 717 733 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 718 734 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 719 - if (is_writeble_pte(*spte)) { 735 + if (is_writable_pte(*spte)) { 720 736 rmap_remove(kvm, spte); 721 737 --kvm->stat.lpages; 722 738 __set_spte(spte, shadow_trap_nonpresent_pte); ··· 771 787 772 788 new_spte &= ~PT_WRITABLE_MASK; 773 789 new_spte &= ~SPTE_HOST_WRITEABLE; 774 - if (is_writeble_pte(*spte)) 790 + if (is_writable_pte(*spte)) 775 791 kvm_set_pfn_dirty(spte_to_pfn(*spte)); 776 792 __set_spte(spte, new_spte); 777 793 spte = rmap_next(kvm, rmapp, spte); ··· 789 805 unsigned long data)) 790 806 { 791 807 int i, j; 808 + int ret; 792 809 int retval = 0; 810 + struct kvm_memslots *slots; 793 811 794 - /* 795 - * If mmap_sem isn't taken, we can look the memslots with only 796 - * the mmu_lock by skipping over the slots with userspace_addr == 0. 797 - */ 798 - for (i = 0; i < kvm->nmemslots; i++) { 799 - struct kvm_memory_slot *memslot = &kvm->memslots[i]; 812 + slots = rcu_dereference(kvm->memslots); 813 + 814 + for (i = 0; i < slots->nmemslots; i++) { 815 + struct kvm_memory_slot *memslot = &slots->memslots[i]; 800 816 unsigned long start = memslot->userspace_addr; 801 817 unsigned long end; 802 - 803 - /* mmu_lock protects userspace_addr */ 804 - if (!start) 805 - continue; 806 818 807 819 end = start + (memslot->npages << PAGE_SHIFT); 808 820 if (hva >= start && hva < end) { 809 821 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 810 822 811 - retval |= handler(kvm, &memslot->rmap[gfn_offset], 812 - data); 823 + ret = handler(kvm, &memslot->rmap[gfn_offset], data); 813 824 814 825 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 815 826 int idx = gfn_offset; 816 827 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 817 - retval |= handler(kvm, 828 + ret |= handler(kvm, 818 829 &memslot->lpage_info[j][idx].rmap_pde, 819 830 data); 820 831 } 832 + trace_kvm_age_page(hva, memslot, ret); 833 + retval |= ret; 821 834 } 822 835 } 823 836 ··· 837 856 u64 *spte; 838 857 int young = 0; 839 858 840 - /* always return old for EPT */ 859 + /* 860 + * Emulate the accessed bit for EPT, by checking if this page has 861 + * an EPT mapping, and clearing it if it does. On the next access, 862 + * a new EPT mapping will be established. 863 + * This has some overhead, but not as much as the cost of swapping 864 + * out actively used pages or breaking up actively used hugepages. 865 + */ 841 866 if (!shadow_accessed_mask) 842 - return 0; 867 + return kvm_unmap_rmapp(kvm, rmapp, data); 843 868 844 869 spte = rmap_next(kvm, rmapp, NULL); 845 870 while (spte) { ··· 1602 1615 1603 1616 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) 1604 1617 { 1605 - int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1618 + int slot = memslot_id(kvm, gfn); 1606 1619 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1607 1620 1608 1621 __set_bit(slot, sp->slot_bitmap); ··· 1626 1639 { 1627 1640 struct page *page; 1628 1641 1629 - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1642 + gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); 1630 1643 1631 1644 if (gpa == UNMAPPED_GVA) 1632 1645 return NULL; ··· 1839 1852 * is responsibility of mmu_get_page / kvm_sync_page. 1840 1853 * Same reasoning can be applied to dirty page accounting. 1841 1854 */ 1842 - if (!can_unsync && is_writeble_pte(*sptep)) 1855 + if (!can_unsync && is_writable_pte(*sptep)) 1843 1856 goto set_pte; 1844 1857 1845 1858 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { ··· 1847 1860 __func__, gfn); 1848 1861 ret = 1; 1849 1862 pte_access &= ~ACC_WRITE_MASK; 1850 - if (is_writeble_pte(spte)) 1863 + if (is_writable_pte(spte)) 1851 1864 spte &= ~PT_WRITABLE_MASK; 1852 1865 } 1853 1866 } ··· 1868 1881 bool reset_host_protection) 1869 1882 { 1870 1883 int was_rmapped = 0; 1871 - int was_writeble = is_writeble_pte(*sptep); 1884 + int was_writable = is_writable_pte(*sptep); 1872 1885 int rmap_count; 1873 1886 1874 1887 pgprintk("%s: spte %llx access %x write_fault %d" ··· 1919 1932 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1920 1933 rmap_recycle(vcpu, sptep, gfn); 1921 1934 } else { 1922 - if (was_writeble) 1935 + if (was_writable) 1923 1936 kvm_release_pfn_dirty(pfn); 1924 1937 else 1925 1938 kvm_release_pfn_clean(pfn); ··· 2149 2162 spin_unlock(&vcpu->kvm->mmu_lock); 2150 2163 } 2151 2164 2152 - static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2165 + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, 2166 + u32 access, u32 *error) 2153 2167 { 2168 + if (error) 2169 + *error = 0; 2154 2170 return vaddr; 2155 2171 } 2156 2172 ··· 2737 2747 if (tdp_enabled) 2738 2748 return 0; 2739 2749 2740 - gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 2750 + gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); 2741 2751 2742 2752 spin_lock(&vcpu->kvm->mmu_lock); 2743 2753 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); ··· 2837 2847 */ 2838 2848 page = alloc_page(GFP_KERNEL | __GFP_DMA32); 2839 2849 if (!page) 2840 - goto error_1; 2850 + return -ENOMEM; 2851 + 2841 2852 vcpu->arch.mmu.pae_root = page_address(page); 2842 2853 for (i = 0; i < 4; ++i) 2843 2854 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2844 2855 2845 2856 return 0; 2846 - 2847 - error_1: 2848 - free_mmu_pages(vcpu); 2849 - return -ENOMEM; 2850 2857 } 2851 2858 2852 2859 int kvm_mmu_create(struct kvm_vcpu *vcpu) ··· 2923 2936 spin_lock(&kvm_lock); 2924 2937 2925 2938 list_for_each_entry(kvm, &vm_list, vm_list) { 2926 - int npages; 2939 + int npages, idx; 2927 2940 2928 - if (!down_read_trylock(&kvm->slots_lock)) 2929 - continue; 2941 + idx = srcu_read_lock(&kvm->srcu); 2930 2942 spin_lock(&kvm->mmu_lock); 2931 2943 npages = kvm->arch.n_alloc_mmu_pages - 2932 2944 kvm->arch.n_free_mmu_pages; ··· 2938 2952 nr_to_scan--; 2939 2953 2940 2954 spin_unlock(&kvm->mmu_lock); 2941 - up_read(&kvm->slots_lock); 2955 + srcu_read_unlock(&kvm->srcu, idx); 2942 2956 } 2943 2957 if (kvm_freed) 2944 2958 list_move_tail(&kvm_freed->vm_list, &vm_list); ··· 3005 3019 int i; 3006 3020 unsigned int nr_mmu_pages; 3007 3021 unsigned int nr_pages = 0; 3022 + struct kvm_memslots *slots; 3008 3023 3009 - for (i = 0; i < kvm->nmemslots; i++) 3010 - nr_pages += kvm->memslots[i].npages; 3024 + slots = rcu_dereference(kvm->memslots); 3025 + for (i = 0; i < slots->nmemslots; i++) 3026 + nr_pages += slots->memslots[i].npages; 3011 3027 3012 3028 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; 3013 3029 nr_mmu_pages = max(nr_mmu_pages, ··· 3234 3246 if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) 3235 3247 audit_mappings_page(vcpu, ent, va, level - 1); 3236 3248 else { 3237 - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 3249 + gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); 3238 3250 gfn_t gfn = gpa >> PAGE_SHIFT; 3239 3251 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); 3240 3252 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; ··· 3279 3291 static int count_rmaps(struct kvm_vcpu *vcpu) 3280 3292 { 3281 3293 int nmaps = 0; 3282 - int i, j, k; 3294 + int i, j, k, idx; 3283 3295 3296 + idx = srcu_read_lock(&kvm->srcu); 3297 + slots = rcu_dereference(kvm->memslots); 3284 3298 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3285 - struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; 3299 + struct kvm_memory_slot *m = &slots->memslots[i]; 3286 3300 struct kvm_rmap_desc *d; 3287 3301 3288 3302 for (j = 0; j < m->npages; ++j) { ··· 3307 3317 } 3308 3318 } 3309 3319 } 3320 + srcu_read_unlock(&kvm->srcu, idx); 3310 3321 return nmaps; 3311 3322 } 3312 3323

+11 -24

arch/x86/kvm/mmu.h

··· 2 2 #define __KVM_X86_MMU_H 3 3 4 4 #include <linux/kvm_host.h> 5 + #include "kvm_cache_regs.h" 5 6 6 7 #define PT64_PT_BITS 9 7 8 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) ··· 38 37 #define PT32_ROOT_LEVEL 2 39 38 #define PT32E_ROOT_LEVEL 3 40 39 40 + #define PT_PDPE_LEVEL 3 41 + #define PT_DIRECTORY_LEVEL 2 42 + #define PT_PAGE_TABLE_LEVEL 1 43 + 44 + #define PFERR_PRESENT_MASK (1U << 0) 45 + #define PFERR_WRITE_MASK (1U << 1) 46 + #define PFERR_USER_MASK (1U << 2) 47 + #define PFERR_RSVD_MASK (1U << 3) 48 + #define PFERR_FETCH_MASK (1U << 4) 49 + 41 50 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 42 51 43 52 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) ··· 62 51 return 0; 63 52 64 53 return kvm_mmu_load(vcpu); 65 - } 66 - 67 - static inline int is_long_mode(struct kvm_vcpu *vcpu) 68 - { 69 - #ifdef CONFIG_X86_64 70 - return vcpu->arch.shadow_efer & EFER_LMA; 71 - #else 72 - return 0; 73 - #endif 74 - } 75 - 76 - static inline int is_pae(struct kvm_vcpu *vcpu) 77 - { 78 - return vcpu->arch.cr4 & X86_CR4_PAE; 79 - } 80 - 81 - static inline int is_pse(struct kvm_vcpu *vcpu) 82 - { 83 - return vcpu->arch.cr4 & X86_CR4_PSE; 84 - } 85 - 86 - static inline int is_paging(struct kvm_vcpu *vcpu) 87 - { 88 - return vcpu->arch.cr0 & X86_CR0_PG; 89 54 } 90 55 91 56 static inline int is_present_gpte(unsigned long pte)

+9 -4

arch/x86/kvm/paging_tmpl.h

··· 162 162 if (rsvd_fault) 163 163 goto access_error; 164 164 165 - if (write_fault && !is_writeble_pte(pte)) 165 + if (write_fault && !is_writable_pte(pte)) 166 166 if (user_fault || is_write_protection(vcpu)) 167 167 goto access_error; 168 168 ··· 490 490 spin_unlock(&vcpu->kvm->mmu_lock); 491 491 } 492 492 493 - static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 493 + static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, 494 + u32 *error) 494 495 { 495 496 struct guest_walker walker; 496 497 gpa_t gpa = UNMAPPED_GVA; 497 498 int r; 498 499 499 - r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); 500 + r = FNAME(walk_addr)(&walker, vcpu, vaddr, 501 + !!(access & PFERR_WRITE_MASK), 502 + !!(access & PFERR_USER_MASK), 503 + !!(access & PFERR_FETCH_MASK)); 500 504 501 505 if (r) { 502 506 gpa = gfn_to_gpa(walker.gfn); 503 507 gpa |= vaddr & ~PAGE_MASK; 504 - } 508 + } else if (error) 509 + *error = walker.error_code; 505 510 506 511 return gpa; 507 512 }

+142 -101

arch/x86/kvm/svm.c

··· 231 231 efer &= ~EFER_LME; 232 232 233 233 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 234 - vcpu->arch.shadow_efer = efer; 234 + vcpu->arch.efer = efer; 235 235 } 236 236 237 237 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, ··· 540 540 struct vmcb_control_area *control = &svm->vmcb->control; 541 541 struct vmcb_save_area *save = &svm->vmcb->save; 542 542 543 + svm->vcpu.fpu_active = 1; 544 + 543 545 control->intercept_cr_read = INTERCEPT_CR0_MASK | 544 546 INTERCEPT_CR3_MASK | 545 547 INTERCEPT_CR4_MASK; ··· 554 552 control->intercept_dr_read = INTERCEPT_DR0_MASK | 555 553 INTERCEPT_DR1_MASK | 556 554 INTERCEPT_DR2_MASK | 557 - INTERCEPT_DR3_MASK; 555 + INTERCEPT_DR3_MASK | 556 + INTERCEPT_DR4_MASK | 557 + INTERCEPT_DR5_MASK | 558 + INTERCEPT_DR6_MASK | 559 + INTERCEPT_DR7_MASK; 558 560 559 561 control->intercept_dr_write = INTERCEPT_DR0_MASK | 560 562 INTERCEPT_DR1_MASK | 561 563 INTERCEPT_DR2_MASK | 562 564 INTERCEPT_DR3_MASK | 565 + INTERCEPT_DR4_MASK | 563 566 INTERCEPT_DR5_MASK | 567 + INTERCEPT_DR6_MASK | 564 568 INTERCEPT_DR7_MASK; 565 569 566 570 control->intercept_exceptions = (1 << PF_VECTOR) | ··· 577 569 control->intercept = (1ULL << INTERCEPT_INTR) | 578 570 (1ULL << INTERCEPT_NMI) | 579 571 (1ULL << INTERCEPT_SMI) | 572 + (1ULL << INTERCEPT_SELECTIVE_CR0) | 580 573 (1ULL << INTERCEPT_CPUID) | 581 574 (1ULL << INTERCEPT_INVD) | 582 575 (1ULL << INTERCEPT_HLT) | ··· 650 641 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | 651 642 (1ULL << INTERCEPT_INVLPG)); 652 643 control->intercept_exceptions &= ~(1 << PF_VECTOR); 653 - control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| 654 - INTERCEPT_CR3_MASK); 655 - control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| 656 - INTERCEPT_CR3_MASK); 644 + control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; 645 + control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; 657 646 save->g_pat = 0x0007040600070406ULL; 658 647 save->cr3 = 0; 659 648 save->cr4 = 0; ··· 737 730 init_vmcb(svm); 738 731 739 732 fx_init(&svm->vcpu); 740 - svm->vcpu.fpu_active = 1; 741 733 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 742 734 if (kvm_vcpu_is_bsp(&svm->vcpu)) 743 735 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; ··· 771 765 if (unlikely(cpu != vcpu->cpu)) { 772 766 u64 delta; 773 767 774 - /* 775 - * Make sure that the guest sees a monotonically 776 - * increasing TSC. 777 - */ 778 - delta = vcpu->arch.host_tsc - native_read_tsc(); 779 - svm->vmcb->control.tsc_offset += delta; 780 - if (is_nested(svm)) 781 - svm->nested.hsave->control.tsc_offset += delta; 768 + if (check_tsc_unstable()) { 769 + /* 770 + * Make sure that the guest sees a monotonically 771 + * increasing TSC. 772 + */ 773 + delta = vcpu->arch.host_tsc - native_read_tsc(); 774 + svm->vmcb->control.tsc_offset += delta; 775 + if (is_nested(svm)) 776 + svm->nested.hsave->control.tsc_offset += delta; 777 + } 782 778 vcpu->cpu = cpu; 783 779 kvm_migrate_timers(vcpu); 784 780 svm->asid_generation = 0; ··· 962 954 svm->vmcb->save.gdtr.base = dt->base ; 963 955 } 964 956 957 + static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 958 + { 959 + } 960 + 965 961 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 966 962 { 963 + } 964 + 965 + static void update_cr0_intercept(struct vcpu_svm *svm) 966 + { 967 + ulong gcr0 = svm->vcpu.arch.cr0; 968 + u64 *hcr0 = &svm->vmcb->save.cr0; 969 + 970 + if (!svm->vcpu.fpu_active) 971 + *hcr0 |= SVM_CR0_SELECTIVE_MASK; 972 + else 973 + *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) 974 + | (gcr0 & SVM_CR0_SELECTIVE_MASK); 975 + 976 + 977 + if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 978 + svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 979 + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 980 + } else { 981 + svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; 982 + svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; 983 + } 967 984 } 968 985 969 986 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ··· 996 963 struct vcpu_svm *svm = to_svm(vcpu); 997 964 998 965 #ifdef CONFIG_X86_64 999 - if (vcpu->arch.shadow_efer & EFER_LME) { 966 + if (vcpu->arch.efer & EFER_LME) { 1000 967 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1001 - vcpu->arch.shadow_efer |= EFER_LMA; 968 + vcpu->arch.efer |= EFER_LMA; 1002 969 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1003 970 } 1004 971 1005 972 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1006 - vcpu->arch.shadow_efer &= ~EFER_LMA; 973 + vcpu->arch.efer &= ~EFER_LMA; 1007 974 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1008 975 } 1009 976 } 1010 977 #endif 1011 - if (npt_enabled) 1012 - goto set; 1013 - 1014 - if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 1015 - svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1016 - vcpu->fpu_active = 1; 1017 - } 1018 - 1019 978 vcpu->arch.cr0 = cr0; 1020 - cr0 |= X86_CR0_PG | X86_CR0_WP; 1021 - if (!vcpu->fpu_active) { 1022 - svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 979 + 980 + if (!npt_enabled) 981 + cr0 |= X86_CR0_PG | X86_CR0_WP; 982 + 983 + if (!vcpu->fpu_active) 1023 984 cr0 |= X86_CR0_TS; 1024 - } 1025 - set: 1026 985 /* 1027 986 * re-enable caching here because the QEMU bios 1028 987 * does not do it - this results in some delay at ··· 1022 997 */ 1023 998 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1024 999 svm->vmcb->save.cr0 = cr0; 1000 + update_cr0_intercept(svm); 1025 1001 } 1026 1002 1027 1003 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ··· 1128 1102 svm->vmcb->control.asid = sd->next_asid++; 1129 1103 } 1130 1104 1131 - static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1105 + static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) 1132 1106 { 1133 1107 struct vcpu_svm *svm = to_svm(vcpu); 1134 - unsigned long val; 1135 1108 1136 1109 switch (dr) { 1137 1110 case 0 ... 3: 1138 - val = vcpu->arch.db[dr]; 1111 + *dest = vcpu->arch.db[dr]; 1139 1112 break; 1113 + case 4: 1114 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1115 + return EMULATE_FAIL; /* will re-inject UD */ 1116 + /* fall through */ 1140 1117 case 6: 1141 1118 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1142 - val = vcpu->arch.dr6; 1119 + *dest = vcpu->arch.dr6; 1143 1120 else 1144 - val = svm->vmcb->save.dr6; 1121 + *dest = svm->vmcb->save.dr6; 1145 1122 break; 1123 + case 5: 1124 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1125 + return EMULATE_FAIL; /* will re-inject UD */ 1126 + /* fall through */ 1146 1127 case 7: 1147 1128 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1148 - val = vcpu->arch.dr7; 1129 + *dest = vcpu->arch.dr7; 1149 1130 else 1150 - val = svm->vmcb->save.dr7; 1131 + *dest = svm->vmcb->save.dr7; 1151 1132 break; 1152 - default: 1153 - val = 0; 1154 1133 } 1155 1134 1156 - return val; 1135 + return EMULATE_DONE; 1157 1136 } 1158 1137 1159 - static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 1160 - int *exception) 1138 + static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) 1161 1139 { 1162 1140 struct vcpu_svm *svm = to_svm(vcpu); 1163 - 1164 - *exception = 0; 1165 1141 1166 1142 switch (dr) { 1167 1143 case 0 ... 3: 1168 1144 vcpu->arch.db[dr] = value; 1169 1145 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1170 1146 vcpu->arch.eff_db[dr] = value; 1171 - return; 1172 - case 4 ... 5: 1173 - if (vcpu->arch.cr4 & X86_CR4_DE) 1174 - *exception = UD_VECTOR; 1175 - return; 1147 + break; 1148 + case 4: 1149 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1150 + return EMULATE_FAIL; /* will re-inject UD */ 1151 + /* fall through */ 1176 1152 case 6: 1177 - if (value & 0xffffffff00000000ULL) { 1178 - *exception = GP_VECTOR; 1179 - return; 1180 - } 1181 1153 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; 1182 - return; 1154 + break; 1155 + case 5: 1156 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) 1157 + return EMULATE_FAIL; /* will re-inject UD */ 1158 + /* fall through */ 1183 1159 case 7: 1184 - if (value & 0xffffffff00000000ULL) { 1185 - *exception = GP_VECTOR; 1186 - return; 1187 - } 1188 1160 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; 1189 1161 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 1190 1162 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1191 1163 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); 1192 1164 } 1193 - return; 1194 - default: 1195 - /* FIXME: Possible case? */ 1196 - printk(KERN_DEBUG "%s: unexpected dr %u\n", 1197 - __func__, dr); 1198 - *exception = UD_VECTOR; 1199 - return; 1165 + break; 1200 1166 } 1167 + 1168 + return EMULATE_DONE; 1201 1169 } 1202 1170 1203 1171 static int pf_interception(struct vcpu_svm *svm) ··· 1259 1239 return 1; 1260 1240 } 1261 1241 1242 + static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1243 + { 1244 + struct vcpu_svm *svm = to_svm(vcpu); 1245 + svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1246 + svm->vcpu.fpu_active = 1; 1247 + update_cr0_intercept(svm); 1248 + } 1249 + 1262 1250 static int nm_interception(struct vcpu_svm *svm) 1263 1251 { 1264 - svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1265 - if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) 1266 - svm->vmcb->save.cr0 &= ~X86_CR0_TS; 1267 - svm->vcpu.fpu_active = 1; 1268 - 1252 + svm_fpu_activate(&svm->vcpu); 1269 1253 return 1; 1270 1254 } 1271 1255 ··· 1361 1337 1362 1338 static int nested_svm_check_permissions(struct vcpu_svm *svm) 1363 1339 { 1364 - if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) 1340 + if (!(svm->vcpu.arch.efer & EFER_SVME) 1365 1341 || !is_paging(&svm->vcpu)) { 1366 1342 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1367 1343 return 1; ··· 1764 1740 hsave->save.ds = vmcb->save.ds; 1765 1741 hsave->save.gdtr = vmcb->save.gdtr; 1766 1742 hsave->save.idtr = vmcb->save.idtr; 1767 - hsave->save.efer = svm->vcpu.arch.shadow_efer; 1768 - hsave->save.cr0 = svm->vcpu.arch.cr0; 1743 + hsave->save.efer = svm->vcpu.arch.efer; 1744 + hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 1769 1745 hsave->save.cr4 = svm->vcpu.arch.cr4; 1770 1746 hsave->save.rflags = vmcb->save.rflags; 1771 1747 hsave->save.rip = svm->next_rip; ··· 2177 2153 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2178 2154 u64 data; 2179 2155 2180 - if (svm_get_msr(&svm->vcpu, ecx, &data)) 2156 + if (svm_get_msr(&svm->vcpu, ecx, &data)) { 2157 + trace_kvm_msr_read_ex(ecx); 2181 2158 kvm_inject_gp(&svm->vcpu, 0); 2182 - else { 2159 + } else { 2183 2160 trace_kvm_msr_read(ecx, data); 2184 2161 2185 2162 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; ··· 2272 2247 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2273 2248 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2274 2249 2275 - trace_kvm_msr_write(ecx, data); 2276 2250 2277 2251 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2278 - if (svm_set_msr(&svm->vcpu, ecx, data)) 2252 + if (svm_set_msr(&svm->vcpu, ecx, data)) { 2253 + trace_kvm_msr_write_ex(ecx, data); 2279 2254 kvm_inject_gp(&svm->vcpu, 0); 2280 - else 2255 + } else { 2256 + trace_kvm_msr_write(ecx, data); 2281 2257 skip_emulated_instruction(&svm->vcpu); 2258 + } 2282 2259 return 1; 2283 2260 } 2284 2261 ··· 2324 2297 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2325 2298 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2326 2299 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2327 - /* for now: */ 2300 + [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 2328 2301 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2329 2302 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2330 2303 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, ··· 2333 2306 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2334 2307 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2335 2308 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2309 + [SVM_EXIT_READ_DR4] = emulate_on_interception, 2310 + [SVM_EXIT_READ_DR5] = emulate_on_interception, 2311 + [SVM_EXIT_READ_DR6] = emulate_on_interception, 2312 + [SVM_EXIT_READ_DR7] = emulate_on_interception, 2336 2313 [SVM_EXIT_WRITE_DR0] = emulate_on_interception, 2337 2314 [SVM_EXIT_WRITE_DR1] = emulate_on_interception, 2338 2315 [SVM_EXIT_WRITE_DR2] = emulate_on_interception, 2339 2316 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2317 + [SVM_EXIT_WRITE_DR4] = emulate_on_interception, 2340 2318 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2319 + [SVM_EXIT_WRITE_DR6] = emulate_on_interception, 2341 2320 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2342 2321 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2343 2322 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, ··· 2416 2383 2417 2384 svm_complete_interrupts(svm); 2418 2385 2419 - if (npt_enabled) { 2420 - int mmu_reload = 0; 2421 - if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 2422 - svm_set_cr0(vcpu, svm->vmcb->save.cr0); 2423 - mmu_reload = 1; 2424 - } 2386 + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) 2425 2387 vcpu->arch.cr0 = svm->vmcb->save.cr0; 2388 + if (npt_enabled) 2426 2389 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2427 - if (mmu_reload) { 2428 - kvm_mmu_reset_context(vcpu); 2429 - kvm_mmu_load(vcpu); 2430 - } 2431 - } 2432 - 2433 2390 2434 2391 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2435 2392 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; ··· 2821 2798 2822 2799 svm->vmcb->save.cr3 = root; 2823 2800 force_new_asid(vcpu); 2824 - 2825 - if (vcpu->fpu_active) { 2826 - svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 2827 - svm->vmcb->save.cr0 |= X86_CR0_TS; 2828 - vcpu->fpu_active = 0; 2829 - } 2830 2801 } 2831 2802 2832 2803 static int is_disabled(void) ··· 2867 2850 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 2868 2851 { 2869 2852 return 0; 2853 + } 2854 + 2855 + static void svm_cpuid_update(struct kvm_vcpu *vcpu) 2856 + { 2870 2857 } 2871 2858 2872 2859 static const struct trace_print_flags svm_exit_reasons_str[] = { ··· 2926 2905 { -1, NULL } 2927 2906 }; 2928 2907 2929 - static bool svm_gb_page_enable(void) 2908 + static int svm_get_lpage_level(void) 2930 2909 { 2931 - return true; 2910 + return PT_PDPE_LEVEL; 2911 + } 2912 + 2913 + static bool svm_rdtscp_supported(void) 2914 + { 2915 + return false; 2916 + } 2917 + 2918 + static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 2919 + { 2920 + struct vcpu_svm *svm = to_svm(vcpu); 2921 + 2922 + update_cr0_intercept(svm); 2923 + svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; 2932 2924 } 2933 2925 2934 2926 static struct kvm_x86_ops svm_x86_ops = { ··· 2970 2936 .set_segment = svm_set_segment, 2971 2937 .get_cpl = svm_get_cpl, 2972 2938 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 2939 + .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, 2973 2940 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 2974 2941 .set_cr0 = svm_set_cr0, 2975 2942 .set_cr3 = svm_set_cr3, ··· 2985 2950 .cache_reg = svm_cache_reg, 2986 2951 .get_rflags = svm_get_rflags, 2987 2952 .set_rflags = svm_set_rflags, 2953 + .fpu_activate = svm_fpu_activate, 2954 + .fpu_deactivate = svm_fpu_deactivate, 2988 2955 2989 2956 .tlb_flush = svm_flush_tlb, 2990 2957 ··· 3012 2975 .get_mt_mask = svm_get_mt_mask, 3013 2976 3014 2977 .exit_reasons_str = svm_exit_reasons_str, 3015 - .gb_page_enable = svm_gb_page_enable, 2978 + .get_lpage_level = svm_get_lpage_level, 2979 + 2980 + .cpuid_update = svm_cpuid_update, 2981 + 2982 + .rdtscp_supported = svm_rdtscp_supported, 3016 2983 }; 3017 2984 3018 2985 static int __init svm_init(void)

+48 -11

arch/x86/kvm/trace.h

··· 56 56 ); 57 57 58 58 /* 59 + * Tracepoint for hypercall. 60 + */ 61 + TRACE_EVENT(kvm_hv_hypercall, 62 + TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx, 63 + __u64 ingpa, __u64 outgpa), 64 + TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), 65 + 66 + TP_STRUCT__entry( 67 + __field( __u16, code ) 68 + __field( bool, fast ) 69 + __field( __u16, rep_cnt ) 70 + __field( __u16, rep_idx ) 71 + __field( __u64, ingpa ) 72 + __field( __u64, outgpa ) 73 + ), 74 + 75 + TP_fast_assign( 76 + __entry->code = code; 77 + __entry->fast = fast; 78 + __entry->rep_cnt = rep_cnt; 79 + __entry->rep_idx = rep_idx; 80 + __entry->ingpa = ingpa; 81 + __entry->outgpa = outgpa; 82 + ), 83 + 84 + TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", 85 + __entry->code, __entry->fast ? "fast" : "slow", 86 + __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, 87 + __entry->outgpa) 88 + ); 89 + 90 + /* 59 91 * Tracepoint for PIO. 60 92 */ 61 93 TRACE_EVENT(kvm_pio, ··· 246 214 * Tracepoint for guest MSR access. 247 215 */ 248 216 TRACE_EVENT(kvm_msr, 249 - TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), 250 - TP_ARGS(rw, ecx, data), 217 + TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), 218 + TP_ARGS(write, ecx, data, exception), 251 219 252 220 TP_STRUCT__entry( 253 - __field( unsigned int, rw ) 254 - __field( unsigned int, ecx ) 255 - __field( unsigned long, data ) 221 + __field( unsigned, write ) 222 + __field( u32, ecx ) 223 + __field( u64, data ) 224 + __field( u8, exception ) 256 225 ), 257 226 258 227 TP_fast_assign( 259 - __entry->rw = rw; 228 + __entry->write = write; 260 229 __entry->ecx = ecx; 261 230 __entry->data = data; 231 + __entry->exception = exception; 262 232 ), 263 233 264 - TP_printk("msr_%s %x = 0x%lx", 265 - __entry->rw ? "write" : "read", 266 - __entry->ecx, __entry->data) 234 + TP_printk("msr_%s %x = 0x%llx%s", 235 + __entry->write ? "write" : "read", 236 + __entry->ecx, __entry->data, 237 + __entry->exception ? " (#GP)" : "") 267 238 ); 268 239 269 - #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) 270 - #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) 240 + #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) 241 + #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) 242 + #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) 243 + #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) 271 244 272 245 /* 273 246 * Tracepoint for guest CR access.

+274 -122

arch/x86/kvm/vmx.c

··· 61 61 static int __read_mostly emulate_invalid_guest_state = 0; 62 62 module_param(emulate_invalid_guest_state, bool, S_IRUGO); 63 63 64 + #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ 65 + (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) 66 + #define KVM_GUEST_CR0_MASK \ 67 + (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 68 + #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ 69 + (X86_CR0_WP | X86_CR0_NE) 70 + #define KVM_VM_CR0_ALWAYS_ON \ 71 + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) 72 + #define KVM_CR4_GUEST_OWNED_BITS \ 73 + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 74 + | X86_CR4_OSXMMEXCPT) 75 + 76 + #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 77 + #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 78 + 64 79 /* 65 80 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 66 81 * ple_gap: upper bound on the amount of time between two successive ··· 151 136 ktime_t entry_time; 152 137 s64 vnmi_blocked_time; 153 138 u32 exit_reason; 139 + 140 + bool rdtscp_enabled; 154 141 }; 155 142 156 143 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) ··· 227 210 #ifdef CONFIG_X86_64 228 211 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 229 212 #endif 230 - MSR_EFER, MSR_K6_STAR, 213 + MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, 231 214 }; 232 215 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 233 216 ··· 318 301 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 319 302 } 320 303 304 + static inline bool cpu_has_vmx_ept_1g_page(void) 305 + { 306 + return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); 307 + } 308 + 321 309 static inline int cpu_has_vmx_invept_individual_addr(void) 322 310 { 323 311 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); ··· 358 336 359 337 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 360 338 { 361 - return flexpriority_enabled && 362 - (cpu_has_vmx_virtualize_apic_accesses()) && 363 - (irqchip_in_kernel(kvm)); 339 + return flexpriority_enabled && irqchip_in_kernel(kvm); 364 340 } 365 341 366 342 static inline int cpu_has_vmx_vpid(void) 367 343 { 368 344 return vmcs_config.cpu_based_2nd_exec_ctrl & 369 345 SECONDARY_EXEC_ENABLE_VPID; 346 + } 347 + 348 + static inline int cpu_has_vmx_rdtscp(void) 349 + { 350 + return vmcs_config.cpu_based_2nd_exec_ctrl & 351 + SECONDARY_EXEC_RDTSCP; 370 352 } 371 353 372 354 static inline int cpu_has_virtual_nmis(void) ··· 577 551 { 578 552 u32 eb; 579 553 580 - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 581 - if (!vcpu->fpu_active) 582 - eb |= 1u << NM_VECTOR; 583 - /* 584 - * Unconditionally intercept #DB so we can maintain dr6 without 585 - * reading it every exit. 586 - */ 587 - eb |= 1u << DB_VECTOR; 588 - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 589 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 590 - eb |= 1u << BP_VECTOR; 591 - } 554 + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 555 + (1u << NM_VECTOR) | (1u << DB_VECTOR); 556 + if ((vcpu->guest_debug & 557 + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 558 + (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) 559 + eb |= 1u << BP_VECTOR; 592 560 if (to_vmx(vcpu)->rmode.vm86_active) 593 561 eb = ~0; 594 562 if (enable_ept) 595 563 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 564 + if (vcpu->fpu_active) 565 + eb &= ~(1u << NM_VECTOR); 596 566 vmcs_write32(EXCEPTION_BITMAP, eb); 597 567 } 598 568 ··· 611 589 u64 guest_efer; 612 590 u64 ignore_bits; 613 591 614 - guest_efer = vmx->vcpu.arch.shadow_efer; 592 + guest_efer = vmx->vcpu.arch.efer; 615 593 616 594 /* 617 595 * NX is emulated; LMA and LME handled by hardware; SCE meaninless ··· 789 767 790 768 static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 791 769 { 770 + ulong cr0; 771 + 792 772 if (vcpu->fpu_active) 793 773 return; 794 774 vcpu->fpu_active = 1; 795 - vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); 796 - if (vcpu->arch.cr0 & X86_CR0_TS) 797 - vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 775 + cr0 = vmcs_readl(GUEST_CR0); 776 + cr0 &= ~(X86_CR0_TS | X86_CR0_MP); 777 + cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); 778 + vmcs_writel(GUEST_CR0, cr0); 798 779 update_exception_bitmap(vcpu); 780 + vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; 781 + vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); 799 782 } 783 + 784 + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); 800 785 801 786 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) 802 787 { 803 - if (!vcpu->fpu_active) 804 - return; 805 - vcpu->fpu_active = 0; 806 - vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 788 + vmx_decache_cr0_guest_bits(vcpu); 789 + vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); 807 790 update_exception_bitmap(vcpu); 791 + vcpu->arch.cr0_guest_owned_bits = 0; 792 + vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); 793 + vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 808 794 } 809 795 810 796 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) ··· 908 878 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 909 879 } 910 880 881 + static bool vmx_rdtscp_supported(void) 882 + { 883 + return cpu_has_vmx_rdtscp(); 884 + } 885 + 911 886 /* 912 887 * Swap MSR entry in host/guest MSR entry array. 913 888 */ ··· 948 913 index = __find_msr_index(vmx, MSR_CSTAR); 949 914 if (index >= 0) 950 915 move_msr_up(vmx, index, save_nmsrs++); 916 + index = __find_msr_index(vmx, MSR_TSC_AUX); 917 + if (index >= 0 && vmx->rdtscp_enabled) 918 + move_msr_up(vmx, index, save_nmsrs++); 951 919 /* 952 920 * MSR_K6_STAR is only needed on long mode guests, and only 953 921 * if efer.sce is enabled. 954 922 */ 955 923 index = __find_msr_index(vmx, MSR_K6_STAR); 956 - if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) 924 + if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) 957 925 move_msr_up(vmx, index, save_nmsrs++); 958 926 } 959 927 #endif ··· 1040 1002 case MSR_IA32_SYSENTER_ESP: 1041 1003 data = vmcs_readl(GUEST_SYSENTER_ESP); 1042 1004 break; 1005 + case MSR_TSC_AUX: 1006 + if (!to_vmx(vcpu)->rdtscp_enabled) 1007 + return 1; 1008 + /* Otherwise falls through */ 1043 1009 default: 1044 1010 vmx_load_host_state(to_vmx(vcpu)); 1045 1011 msr = find_msr_entry(to_vmx(vcpu), msr_index); ··· 1107 1065 vcpu->arch.pat = data; 1108 1066 break; 1109 1067 } 1110 - /* Otherwise falls through to kvm_set_msr_common */ 1068 + ret = kvm_set_msr_common(vcpu, msr_index, data); 1069 + break; 1070 + case MSR_TSC_AUX: 1071 + if (!vmx->rdtscp_enabled) 1072 + return 1; 1073 + /* Check reserved bit, higher 32 bits should be zero */ 1074 + if ((data >> 32) != 0) 1075 + return 1; 1076 + /* Otherwise falls through */ 1111 1077 default: 1112 1078 msr = find_msr_entry(vmx, msr_index); 1113 1079 if (msr) { ··· 1274 1224 CPU_BASED_USE_IO_BITMAPS | 1275 1225 CPU_BASED_MOV_DR_EXITING | 1276 1226 CPU_BASED_USE_TSC_OFFSETING | 1227 + CPU_BASED_MWAIT_EXITING | 1228 + CPU_BASED_MONITOR_EXITING | 1277 1229 CPU_BASED_INVLPG_EXITING; 1278 1230 opt = CPU_BASED_TPR_SHADOW | 1279 1231 CPU_BASED_USE_MSR_BITMAPS | ··· 1295 1243 SECONDARY_EXEC_ENABLE_VPID | 1296 1244 SECONDARY_EXEC_ENABLE_EPT | 1297 1245 SECONDARY_EXEC_UNRESTRICTED_GUEST | 1298 - SECONDARY_EXEC_PAUSE_LOOP_EXITING; 1246 + SECONDARY_EXEC_PAUSE_LOOP_EXITING | 1247 + SECONDARY_EXEC_RDTSCP; 1299 1248 if (adjust_vmx_controls(min2, opt2, 1300 1249 MSR_IA32_VMX_PROCBASED_CTLS2, 1301 1250 &_cpu_based_2nd_exec_control) < 0) ··· 1510 1457 static gva_t rmode_tss_base(struct kvm *kvm) 1511 1458 { 1512 1459 if (!kvm->arch.tss_addr) { 1513 - gfn_t base_gfn = kvm->memslots[0].base_gfn + 1514 - kvm->memslots[0].npages - 3; 1460 + struct kvm_memslots *slots; 1461 + gfn_t base_gfn; 1462 + 1463 + slots = rcu_dereference(kvm->memslots); 1464 + base_gfn = kvm->memslots->memslots[0].base_gfn + 1465 + kvm->memslots->memslots[0].npages - 3; 1515 1466 return base_gfn << PAGE_SHIFT; 1516 1467 } 1517 1468 return kvm->arch.tss_addr; ··· 1601 1544 * of this msr depends on is_long_mode(). 1602 1545 */ 1603 1546 vmx_load_host_state(to_vmx(vcpu)); 1604 - vcpu->arch.shadow_efer = efer; 1605 - if (!msr) 1606 - return; 1547 + vcpu->arch.efer = efer; 1607 1548 if (efer & EFER_LMA) { 1608 1549 vmcs_write32(VM_ENTRY_CONTROLS, 1609 1550 vmcs_read32(VM_ENTRY_CONTROLS) | ··· 1631 1576 (guest_tr_ar & ~AR_TYPE_MASK) 1632 1577 | AR_TYPE_BUSY_64_TSS); 1633 1578 } 1634 - vcpu->arch.shadow_efer |= EFER_LMA; 1635 - vmx_set_efer(vcpu, vcpu->arch.shadow_efer); 1579 + vcpu->arch.efer |= EFER_LMA; 1580 + vmx_set_efer(vcpu, vcpu->arch.efer); 1636 1581 } 1637 1582 1638 1583 static void exit_lmode(struct kvm_vcpu *vcpu) 1639 1584 { 1640 - vcpu->arch.shadow_efer &= ~EFER_LMA; 1585 + vcpu->arch.efer &= ~EFER_LMA; 1641 1586 1642 1587 vmcs_write32(VM_ENTRY_CONTROLS, 1643 1588 vmcs_read32(VM_ENTRY_CONTROLS) ··· 1653 1598 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1654 1599 } 1655 1600 1601 + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1602 + { 1603 + ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; 1604 + 1605 + vcpu->arch.cr0 &= ~cr0_guest_owned_bits; 1606 + vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; 1607 + } 1608 + 1656 1609 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1657 1610 { 1658 - vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1659 - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1611 + ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; 1612 + 1613 + vcpu->arch.cr4 &= ~cr4_guest_owned_bits; 1614 + vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; 1660 1615 } 1661 1616 1662 1617 static void ept_load_pdptrs(struct kvm_vcpu *vcpu) ··· 1711 1646 (CPU_BASED_CR3_LOAD_EXITING | 1712 1647 CPU_BASED_CR3_STORE_EXITING)); 1713 1648 vcpu->arch.cr0 = cr0; 1714 - vmx_set_cr4(vcpu, vcpu->arch.cr4); 1649 + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); 1715 1650 } else if (!is_paging(vcpu)) { 1716 1651 /* From nonpaging to paging */ 1717 1652 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, ··· 1719 1654 ~(CPU_BASED_CR3_LOAD_EXITING | 1720 1655 CPU_BASED_CR3_STORE_EXITING)); 1721 1656 vcpu->arch.cr0 = cr0; 1722 - vmx_set_cr4(vcpu, vcpu->arch.cr4); 1657 + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); 1723 1658 } 1724 1659 1725 1660 if (!(cr0 & X86_CR0_WP)) 1726 1661 *hw_cr0 &= ~X86_CR0_WP; 1727 - } 1728 - 1729 - static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, 1730 - struct kvm_vcpu *vcpu) 1731 - { 1732 - if (!is_paging(vcpu)) { 1733 - *hw_cr4 &= ~X86_CR4_PAE; 1734 - *hw_cr4 |= X86_CR4_PSE; 1735 - } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) 1736 - *hw_cr4 &= ~X86_CR4_PAE; 1737 1662 } 1738 1663 1739 1664 static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) ··· 1737 1682 else 1738 1683 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; 1739 1684 1740 - vmx_fpu_deactivate(vcpu); 1741 - 1742 1685 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 1743 1686 enter_pmode(vcpu); 1744 1687 ··· 1744 1691 enter_rmode(vcpu); 1745 1692 1746 1693 #ifdef CONFIG_X86_64 1747 - if (vcpu->arch.shadow_efer & EFER_LME) { 1694 + if (vcpu->arch.efer & EFER_LME) { 1748 1695 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 1749 1696 enter_lmode(vcpu); 1750 1697 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) ··· 1755 1702 if (enable_ept) 1756 1703 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1757 1704 1705 + if (!vcpu->fpu_active) 1706 + hw_cr0 |= X86_CR0_TS | X86_CR0_MP; 1707 + 1758 1708 vmcs_writel(CR0_READ_SHADOW, cr0); 1759 1709 vmcs_writel(GUEST_CR0, hw_cr0); 1760 1710 vcpu->arch.cr0 = cr0; 1761 - 1762 - if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) 1763 - vmx_fpu_activate(vcpu); 1764 1711 } 1765 1712 1766 1713 static u64 construct_eptp(unsigned long root_hpa) ··· 1791 1738 1792 1739 vmx_flush_tlb(vcpu); 1793 1740 vmcs_writel(GUEST_CR3, guest_cr3); 1794 - if (vcpu->arch.cr0 & X86_CR0_PE) 1795 - vmx_fpu_deactivate(vcpu); 1796 1741 } 1797 1742 1798 1743 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ··· 1799 1748 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1800 1749 1801 1750 vcpu->arch.cr4 = cr4; 1802 - if (enable_ept) 1803 - ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1751 + if (enable_ept) { 1752 + if (!is_paging(vcpu)) { 1753 + hw_cr4 &= ~X86_CR4_PAE; 1754 + hw_cr4 |= X86_CR4_PSE; 1755 + } else if (!(cr4 & X86_CR4_PAE)) { 1756 + hw_cr4 &= ~X86_CR4_PAE; 1757 + } 1758 + } 1804 1759 1805 1760 vmcs_writel(CR4_READ_SHADOW, cr4); 1806 1761 vmcs_writel(GUEST_CR4, hw_cr4); ··· 1844 1787 1845 1788 static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1846 1789 { 1847 - if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1790 + if (!is_protmode(vcpu)) 1848 1791 return 0; 1849 1792 1850 1793 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ ··· 2099 2042 static bool guest_state_valid(struct kvm_vcpu *vcpu) 2100 2043 { 2101 2044 /* real mode guest state checks */ 2102 - if (!(vcpu->arch.cr0 & X86_CR0_PE)) { 2045 + if (!is_protmode(vcpu)) { 2103 2046 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 2104 2047 return false; 2105 2048 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) ··· 2232 2175 struct kvm_userspace_memory_region kvm_userspace_mem; 2233 2176 int r = 0; 2234 2177 2235 - down_write(&kvm->slots_lock); 2178 + mutex_lock(&kvm->slots_lock); 2236 2179 if (kvm->arch.apic_access_page) 2237 2180 goto out; 2238 2181 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; ··· 2245 2188 2246 2189 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2247 2190 out: 2248 - up_write(&kvm->slots_lock); 2191 + mutex_unlock(&kvm->slots_lock); 2249 2192 return r; 2250 2193 } 2251 2194 ··· 2254 2197 struct kvm_userspace_memory_region kvm_userspace_mem; 2255 2198 int r = 0; 2256 2199 2257 - down_write(&kvm->slots_lock); 2200 + mutex_lock(&kvm->slots_lock); 2258 2201 if (kvm->arch.ept_identity_pagetable) 2259 2202 goto out; 2260 2203 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; ··· 2269 2212 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2270 2213 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); 2271 2214 out: 2272 - up_write(&kvm->slots_lock); 2215 + mutex_unlock(&kvm->slots_lock); 2273 2216 return r; 2274 2217 } 2275 2218 ··· 2441 2384 for (i = 0; i < NR_VMX_MSR; ++i) { 2442 2385 u32 index = vmx_msr_index[i]; 2443 2386 u32 data_low, data_high; 2444 - u64 data; 2445 2387 int j = vmx->nmsrs; 2446 2388 2447 2389 if (rdmsr_safe(index, &data_low, &data_high) < 0) 2448 2390 continue; 2449 2391 if (wrmsr_safe(index, data_low, data_high) < 0) 2450 2392 continue; 2451 - data = data_low | ((u64)data_high << 32); 2452 2393 vmx->guest_msrs[j].index = i; 2453 2394 vmx->guest_msrs[j].data = 0; 2454 2395 vmx->guest_msrs[j].mask = -1ull; ··· 2459 2404 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 2460 2405 2461 2406 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2462 - vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2407 + vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; 2408 + if (enable_ept) 2409 + vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; 2410 + vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); 2463 2411 2464 2412 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2465 2413 rdtscll(tsc_this); ··· 2487 2429 { 2488 2430 struct vcpu_vmx *vmx = to_vmx(vcpu); 2489 2431 u64 msr; 2490 - int ret; 2432 + int ret, idx; 2491 2433 2492 2434 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2493 - down_read(&vcpu->kvm->slots_lock); 2435 + idx = srcu_read_lock(&vcpu->kvm->srcu); 2494 2436 if (!init_rmode(vmx->vcpu.kvm)) { 2495 2437 ret = -ENOMEM; 2496 2438 goto out; ··· 2584 2526 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 2585 2527 2586 2528 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 2587 - vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 2529 + vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ 2588 2530 vmx_set_cr4(&vmx->vcpu, 0); 2589 2531 vmx_set_efer(&vmx->vcpu, 0); 2590 2532 vmx_fpu_activate(&vmx->vcpu); ··· 2598 2540 vmx->emulation_required = 0; 2599 2541 2600 2542 out: 2601 - up_read(&vcpu->kvm->slots_lock); 2543 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 2602 2544 return ret; 2603 2545 } 2604 2546 ··· 2775 2717 kvm_queue_exception(vcpu, vec); 2776 2718 return 1; 2777 2719 case BP_VECTOR: 2720 + /* 2721 + * Update instruction length as we may reinject the exception 2722 + * from user space while in guest debugging mode. 2723 + */ 2724 + to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = 2725 + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 2778 2726 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 2779 2727 return 0; 2780 2728 /* fall through */ ··· 2903 2839 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 2904 2840 /* fall through */ 2905 2841 case BP_VECTOR: 2842 + /* 2843 + * Update instruction length as we may reinject #BP from 2844 + * user space while in guest debugging mode. Reading it for 2845 + * #DB as well causes no harm, it is not used in that case. 2846 + */ 2847 + vmx->vcpu.arch.event_exit_inst_len = 2848 + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 2906 2849 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2907 2850 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 2908 2851 kvm_run->debug.arch.exception = ex_no; ··· 3011 2940 }; 3012 2941 break; 3013 2942 case 2: /* clts */ 3014 - vmx_fpu_deactivate(vcpu); 3015 - vcpu->arch.cr0 &= ~X86_CR0_TS; 3016 - vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); 3017 - vmx_fpu_activate(vcpu); 2943 + vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 2944 + trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); 3018 2945 skip_emulated_instruction(vcpu); 2946 + vmx_fpu_activate(vcpu); 3019 2947 return 1; 3020 2948 case 1: /*mov from cr*/ 3021 2949 switch (cr) { ··· 3032 2962 } 3033 2963 break; 3034 2964 case 3: /* lmsw */ 3035 - kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 2965 + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; 2966 + trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); 2967 + kvm_lmsw(vcpu, val); 3036 2968 3037 2969 skip_emulated_instruction(vcpu); 3038 2970 return 1; ··· 3047 2975 return 0; 3048 2976 } 3049 2977 2978 + static int check_dr_alias(struct kvm_vcpu *vcpu) 2979 + { 2980 + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { 2981 + kvm_queue_exception(vcpu, UD_VECTOR); 2982 + return -1; 2983 + } 2984 + return 0; 2985 + } 2986 + 3050 2987 static int handle_dr(struct kvm_vcpu *vcpu) 3051 2988 { 3052 2989 unsigned long exit_qualification; 3053 2990 unsigned long val; 3054 2991 int dr, reg; 3055 2992 2993 + /* Do not handle if the CPL > 0, will trigger GP on re-entry */ 3056 2994 if (!kvm_require_cpl(vcpu, 0)) 3057 2995 return 1; 3058 2996 dr = vmcs_readl(GUEST_DR7); ··· 3098 3016 case 0 ... 3: 3099 3017 val = vcpu->arch.db[dr]; 3100 3018 break; 3019 + case 4: 3020 + if (check_dr_alias(vcpu) < 0) 3021 + return 1; 3022 + /* fall through */ 3101 3023 case 6: 3102 3024 val = vcpu->arch.dr6; 3103 3025 break; 3104 - case 7: 3026 + case 5: 3027 + if (check_dr_alias(vcpu) < 0) 3028 + return 1; 3029 + /* fall through */ 3030 + default: /* 7 */ 3105 3031 val = vcpu->arch.dr7; 3106 3032 break; 3107 - default: 3108 - val = 0; 3109 3033 } 3110 3034 kvm_register_write(vcpu, reg, val); 3111 3035 } else { ··· 3122 3034 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3123 3035 vcpu->arch.eff_db[dr] = val; 3124 3036 break; 3125 - case 4 ... 5: 3126 - if (vcpu->arch.cr4 & X86_CR4_DE) 3127 - kvm_queue_exception(vcpu, UD_VECTOR); 3128 - break; 3037 + case 4: 3038 + if (check_dr_alias(vcpu) < 0) 3039 + return 1; 3040 + /* fall through */ 3129 3041 case 6: 3130 3042 if (val & 0xffffffff00000000ULL) { 3131 - kvm_queue_exception(vcpu, GP_VECTOR); 3132 - break; 3043 + kvm_inject_gp(vcpu, 0); 3044 + return 1; 3133 3045 } 3134 3046 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3135 3047 break; 3136 - case 7: 3048 + case 5: 3049 + if (check_dr_alias(vcpu) < 0) 3050 + return 1; 3051 + /* fall through */ 3052 + default: /* 7 */ 3137 3053 if (val & 0xffffffff00000000ULL) { 3138 - kvm_queue_exception(vcpu, GP_VECTOR); 3139 - break; 3054 + kvm_inject_gp(vcpu, 0); 3055 + return 1; 3140 3056 } 3141 3057 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3142 3058 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { ··· 3167 3075 u64 data; 3168 3076 3169 3077 if (vmx_get_msr(vcpu, ecx, &data)) { 3078 + trace_kvm_msr_read_ex(ecx); 3170 3079 kvm_inject_gp(vcpu, 0); 3171 3080 return 1; 3172 3081 } ··· 3187 3094 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3188 3095 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3189 3096 3190 - trace_kvm_msr_write(ecx, data); 3191 - 3192 3097 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3098 + trace_kvm_msr_write_ex(ecx, data); 3193 3099 kvm_inject_gp(vcpu, 0); 3194 3100 return 1; 3195 3101 } 3196 3102 3103 + trace_kvm_msr_write(ecx, data); 3197 3104 skip_emulated_instruction(vcpu); 3198 3105 return 1; 3199 3106 } ··· 3478 3385 } 3479 3386 3480 3387 if (err != EMULATE_DONE) { 3481 - kvm_report_emulation_failure(vcpu, "emulation failure"); 3482 3388 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3483 3389 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3484 3390 vcpu->run->internal.ndata = 0; ··· 3505 3413 skip_emulated_instruction(vcpu); 3506 3414 kvm_vcpu_on_spin(vcpu); 3507 3415 3416 + return 1; 3417 + } 3418 + 3419 + static int handle_invalid_op(struct kvm_vcpu *vcpu) 3420 + { 3421 + kvm_queue_exception(vcpu, UD_VECTOR); 3508 3422 return 1; 3509 3423 } 3510 3424 ··· 3551 3453 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3552 3454 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3553 3455 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 3456 + [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, 3457 + [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, 3554 3458 }; 3555 3459 3556 3460 static const int kvm_vmx_max_exit_handlers = ··· 3786 3686 */ 3787 3687 vmcs_writel(HOST_CR0, read_cr0()); 3788 3688 3789 - if (vcpu->arch.switch_db_regs) 3790 - set_debugreg(vcpu->arch.dr6, 6); 3791 - 3792 3689 asm( 3793 3690 /* Store host registers */ 3794 3691 "push %%"R"dx; push %%"R"bp;" ··· 3885 3788 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 3886 3789 | (1 << VCPU_EXREG_PDPTR)); 3887 3790 vcpu->arch.regs_dirty = 0; 3888 - 3889 - if (vcpu->arch.switch_db_regs) 3890 - get_debugreg(vcpu->arch.dr6, 6); 3891 3791 3892 3792 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3893 3793 if (vmx->rmode.irq.pending) ··· 4014 3920 * b. VT-d with snooping control feature: snooping control feature of 4015 3921 * VT-d engine can guarantee the cache correctness. Just set it 4016 3922 * to WB to keep consistent with host. So the same as item 3. 4017 - * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep 3923 + * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep 4018 3924 * consistent with host MTRR 4019 3925 */ 4020 3926 if (is_mmio) ··· 4025 3931 VMX_EPT_MT_EPTE_SHIFT; 4026 3932 else 4027 3933 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) 4028 - | VMX_EPT_IGMT_BIT; 3934 + | VMX_EPT_IPAT_BIT; 4029 3935 4030 3936 return ret; 4031 3937 } 4032 3938 3939 + #define _ER(x) { EXIT_REASON_##x, #x } 3940 + 4033 3941 static const struct trace_print_flags vmx_exit_reasons_str[] = { 4034 - { EXIT_REASON_EXCEPTION_NMI, "exception" }, 4035 - { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, 4036 - { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, 4037 - { EXIT_REASON_NMI_WINDOW, "nmi_window" }, 4038 - { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, 4039 - { EXIT_REASON_CR_ACCESS, "cr_access" }, 4040 - { EXIT_REASON_DR_ACCESS, "dr_access" }, 4041 - { EXIT_REASON_CPUID, "cpuid" }, 4042 - { EXIT_REASON_MSR_READ, "rdmsr" }, 4043 - { EXIT_REASON_MSR_WRITE, "wrmsr" }, 4044 - { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, 4045 - { EXIT_REASON_HLT, "halt" }, 4046 - { EXIT_REASON_INVLPG, "invlpg" }, 4047 - { EXIT_REASON_VMCALL, "hypercall" }, 4048 - { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, 4049 - { EXIT_REASON_APIC_ACCESS, "apic_access" }, 4050 - { EXIT_REASON_WBINVD, "wbinvd" }, 4051 - { EXIT_REASON_TASK_SWITCH, "task_switch" }, 4052 - { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, 3942 + _ER(EXCEPTION_NMI), 3943 + _ER(EXTERNAL_INTERRUPT), 3944 + _ER(TRIPLE_FAULT), 3945 + _ER(PENDING_INTERRUPT), 3946 + _ER(NMI_WINDOW), 3947 + _ER(TASK_SWITCH), 3948 + _ER(CPUID), 3949 + _ER(HLT), 3950 + _ER(INVLPG), 3951 + _ER(RDPMC), 3952 + _ER(RDTSC), 3953 + _ER(VMCALL), 3954 + _ER(VMCLEAR), 3955 + _ER(VMLAUNCH), 3956 + _ER(VMPTRLD), 3957 + _ER(VMPTRST), 3958 + _ER(VMREAD), 3959 + _ER(VMRESUME), 3960 + _ER(VMWRITE), 3961 + _ER(VMOFF), 3962 + _ER(VMON), 3963 + _ER(CR_ACCESS), 3964 + _ER(DR_ACCESS), 3965 + _ER(IO_INSTRUCTION), 3966 + _ER(MSR_READ), 3967 + _ER(MSR_WRITE), 3968 + _ER(MWAIT_INSTRUCTION), 3969 + _ER(MONITOR_INSTRUCTION), 3970 + _ER(PAUSE_INSTRUCTION), 3971 + _ER(MCE_DURING_VMENTRY), 3972 + _ER(TPR_BELOW_THRESHOLD), 3973 + _ER(APIC_ACCESS), 3974 + _ER(EPT_VIOLATION), 3975 + _ER(EPT_MISCONFIG), 3976 + _ER(WBINVD), 4053 3977 { -1, NULL } 4054 3978 }; 4055 3979 4056 - static bool vmx_gb_page_enable(void) 3980 + #undef _ER 3981 + 3982 + static int vmx_get_lpage_level(void) 4057 3983 { 4058 - return false; 3984 + if (enable_ept && !cpu_has_vmx_ept_1g_page()) 3985 + return PT_DIRECTORY_LEVEL; 3986 + else 3987 + /* For shadow and EPT supported 1GB page */ 3988 + return PT_PDPE_LEVEL; 3989 + } 3990 + 3991 + static inline u32 bit(int bitno) 3992 + { 3993 + return 1 << (bitno & 31); 3994 + } 3995 + 3996 + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 3997 + { 3998 + struct kvm_cpuid_entry2 *best; 3999 + struct vcpu_vmx *vmx = to_vmx(vcpu); 4000 + u32 exec_control; 4001 + 4002 + vmx->rdtscp_enabled = false; 4003 + if (vmx_rdtscp_supported()) { 4004 + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 4005 + if (exec_control & SECONDARY_EXEC_RDTSCP) { 4006 + best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 4007 + if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) 4008 + vmx->rdtscp_enabled = true; 4009 + else { 4010 + exec_control &= ~SECONDARY_EXEC_RDTSCP; 4011 + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, 4012 + exec_control); 4013 + } 4014 + } 4015 + } 4059 4016 } 4060 4017 4061 4018 static struct kvm_x86_ops vmx_x86_ops = { ··· 4135 3990 .set_segment = vmx_set_segment, 4136 3991 .get_cpl = vmx_get_cpl, 4137 3992 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 3993 + .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, 4138 3994 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4139 3995 .set_cr0 = vmx_set_cr0, 4140 3996 .set_cr3 = vmx_set_cr3, ··· 4148 4002 .cache_reg = vmx_cache_reg, 4149 4003 .get_rflags = vmx_get_rflags, 4150 4004 .set_rflags = vmx_set_rflags, 4005 + .fpu_activate = vmx_fpu_activate, 4006 + .fpu_deactivate = vmx_fpu_deactivate, 4151 4007 4152 4008 .tlb_flush = vmx_flush_tlb, 4153 4009 ··· 4175 4027 .get_mt_mask = vmx_get_mt_mask, 4176 4028 4177 4029 .exit_reasons_str = vmx_exit_reasons_str, 4178 - .gb_page_enable = vmx_gb_page_enable, 4030 + .get_lpage_level = vmx_get_lpage_level, 4031 + 4032 + .cpuid_update = vmx_cpuid_update, 4033 + 4034 + .rdtscp_supported = vmx_rdtscp_supported, 4179 4035 }; 4180 4036 4181 4037 static int __init vmx_init(void)

+831 -267

arch/x86/kvm/x86.c

··· 38 38 #include <linux/intel-iommu.h> 39 39 #include <linux/cpufreq.h> 40 40 #include <linux/user-return-notifier.h> 41 + #include <linux/srcu.h> 41 42 #include <trace/events/kvm.h> 42 43 #undef TRACE_INCLUDE_FILE 43 44 #define CREATE_TRACE_POINTS ··· 94 93 95 94 struct kvm_shared_msrs_global { 96 95 int nr; 97 - struct kvm_shared_msr { 98 - u32 msr; 99 - u64 value; 100 - } msrs[KVM_NR_SHARED_MSRS]; 96 + u32 msrs[KVM_NR_SHARED_MSRS]; 101 97 }; 102 98 103 99 struct kvm_shared_msrs { 104 100 struct user_return_notifier urn; 105 101 bool registered; 106 - u64 current_value[KVM_NR_SHARED_MSRS]; 102 + struct kvm_shared_msr_values { 103 + u64 host; 104 + u64 curr; 105 + } values[KVM_NR_SHARED_MSRS]; 107 106 }; 108 107 109 108 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; ··· 148 147 static void kvm_on_user_return(struct user_return_notifier *urn) 149 148 { 150 149 unsigned slot; 151 - struct kvm_shared_msr *global; 152 150 struct kvm_shared_msrs *locals 153 151 = container_of(urn, struct kvm_shared_msrs, urn); 152 + struct kvm_shared_msr_values *values; 154 153 155 154 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 156 - global = &shared_msrs_global.msrs[slot]; 157 - if (global->value != locals->current_value[slot]) { 158 - wrmsrl(global->msr, global->value); 159 - locals->current_value[slot] = global->value; 155 + values = &locals->values[slot]; 156 + if (values->host != values->curr) { 157 + wrmsrl(shared_msrs_global.msrs[slot], values->host); 158 + values->curr = values->host; 160 159 } 161 160 } 162 161 locals->registered = false; 163 162 user_return_notifier_unregister(urn); 164 163 } 165 164 166 - void kvm_define_shared_msr(unsigned slot, u32 msr) 165 + static void shared_msr_update(unsigned slot, u32 msr) 167 166 { 168 - int cpu; 167 + struct kvm_shared_msrs *smsr; 169 168 u64 value; 170 169 170 + smsr = &__get_cpu_var(shared_msrs); 171 + /* only read, and nobody should modify it at this time, 172 + * so don't need lock */ 173 + if (slot >= shared_msrs_global.nr) { 174 + printk(KERN_ERR "kvm: invalid MSR slot!"); 175 + return; 176 + } 177 + rdmsrl_safe(msr, &value); 178 + smsr->values[slot].host = value; 179 + smsr->values[slot].curr = value; 180 + } 181 + 182 + void kvm_define_shared_msr(unsigned slot, u32 msr) 183 + { 171 184 if (slot >= shared_msrs_global.nr) 172 185 shared_msrs_global.nr = slot + 1; 173 - shared_msrs_global.msrs[slot].msr = msr; 174 - rdmsrl_safe(msr, &value); 175 - shared_msrs_global.msrs[slot].value = value; 176 - for_each_online_cpu(cpu) 177 - per_cpu(shared_msrs, cpu).current_value[slot] = value; 186 + shared_msrs_global.msrs[slot] = msr; 187 + /* we need ensured the shared_msr_global have been updated */ 188 + smp_wmb(); 178 189 } 179 190 EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 180 191 181 192 static void kvm_shared_msr_cpu_online(void) 182 193 { 183 194 unsigned i; 184 - struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); 185 195 186 196 for (i = 0; i < shared_msrs_global.nr; ++i) 187 - locals->current_value[i] = shared_msrs_global.msrs[i].value; 197 + shared_msr_update(i, shared_msrs_global.msrs[i]); 188 198 } 189 199 190 200 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 191 201 { 192 202 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 193 203 194 - if (((value ^ smsr->current_value[slot]) & mask) == 0) 204 + if (((value ^ smsr->values[slot].curr) & mask) == 0) 195 205 return; 196 - smsr->current_value[slot] = value; 197 - wrmsrl(shared_msrs_global.msrs[slot].msr, value); 206 + smsr->values[slot].curr = value; 207 + wrmsrl(shared_msrs_global.msrs[slot], value); 198 208 if (!smsr->registered) { 199 209 smsr->urn.on_user_return = kvm_on_user_return; 200 210 user_return_notifier_register(&smsr->urn); ··· 269 257 } 270 258 EXPORT_SYMBOL_GPL(kvm_set_apic_base); 271 259 260 + #define EXCPT_BENIGN 0 261 + #define EXCPT_CONTRIBUTORY 1 262 + #define EXCPT_PF 2 263 + 264 + static int exception_class(int vector) 265 + { 266 + switch (vector) { 267 + case PF_VECTOR: 268 + return EXCPT_PF; 269 + case DE_VECTOR: 270 + case TS_VECTOR: 271 + case NP_VECTOR: 272 + case SS_VECTOR: 273 + case GP_VECTOR: 274 + return EXCPT_CONTRIBUTORY; 275 + default: 276 + break; 277 + } 278 + return EXCPT_BENIGN; 279 + } 280 + 281 + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 282 + unsigned nr, bool has_error, u32 error_code) 283 + { 284 + u32 prev_nr; 285 + int class1, class2; 286 + 287 + if (!vcpu->arch.exception.pending) { 288 + queue: 289 + vcpu->arch.exception.pending = true; 290 + vcpu->arch.exception.has_error_code = has_error; 291 + vcpu->arch.exception.nr = nr; 292 + vcpu->arch.exception.error_code = error_code; 293 + return; 294 + } 295 + 296 + /* to check exception */ 297 + prev_nr = vcpu->arch.exception.nr; 298 + if (prev_nr == DF_VECTOR) { 299 + /* triple fault -> shutdown */ 300 + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 301 + return; 302 + } 303 + class1 = exception_class(prev_nr); 304 + class2 = exception_class(nr); 305 + if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) 306 + || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { 307 + /* generate double fault per SDM Table 5-5 */ 308 + vcpu->arch.exception.pending = true; 309 + vcpu->arch.exception.has_error_code = true; 310 + vcpu->arch.exception.nr = DF_VECTOR; 311 + vcpu->arch.exception.error_code = 0; 312 + } else 313 + /* replace previous exception with a new one in a hope 314 + that instruction re-execution will regenerate lost 315 + exception */ 316 + goto queue; 317 + } 318 + 272 319 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 273 320 { 274 - WARN_ON(vcpu->arch.exception.pending); 275 - vcpu->arch.exception.pending = true; 276 - vcpu->arch.exception.has_error_code = false; 277 - vcpu->arch.exception.nr = nr; 321 + kvm_multiple_exception(vcpu, nr, false, 0); 278 322 } 279 323 EXPORT_SYMBOL_GPL(kvm_queue_exception); 280 324 ··· 338 270 u32 error_code) 339 271 { 340 272 ++vcpu->stat.pf_guest; 341 - 342 - if (vcpu->arch.exception.pending) { 343 - switch(vcpu->arch.exception.nr) { 344 - case DF_VECTOR: 345 - /* triple fault -> shutdown */ 346 - set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 347 - return; 348 - case PF_VECTOR: 349 - vcpu->arch.exception.nr = DF_VECTOR; 350 - vcpu->arch.exception.error_code = 0; 351 - return; 352 - default: 353 - /* replace previous exception with a new one in a hope 354 - that instruction re-execution will regenerate lost 355 - exception */ 356 - vcpu->arch.exception.pending = false; 357 - break; 358 - } 359 - } 360 273 vcpu->arch.cr2 = addr; 361 274 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 362 275 } ··· 350 301 351 302 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 352 303 { 353 - WARN_ON(vcpu->arch.exception.pending); 354 - vcpu->arch.exception.pending = true; 355 - vcpu->arch.exception.has_error_code = true; 356 - vcpu->arch.exception.nr = nr; 357 - vcpu->arch.exception.error_code = error_code; 304 + kvm_multiple_exception(vcpu, nr, true, error_code); 358 305 } 359 306 EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 360 307 ··· 428 383 429 384 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 430 385 { 431 - if (cr0 & CR0_RESERVED_BITS) { 386 + cr0 |= X86_CR0_ET; 387 + 388 + #ifdef CONFIG_X86_64 389 + if (cr0 & 0xffffffff00000000UL) { 432 390 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 433 - cr0, vcpu->arch.cr0); 391 + cr0, kvm_read_cr0(vcpu)); 434 392 kvm_inject_gp(vcpu, 0); 435 393 return; 436 394 } 395 + #endif 396 + 397 + cr0 &= ~CR0_RESERVED_BITS; 437 398 438 399 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 439 400 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); ··· 456 405 457 406 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 458 407 #ifdef CONFIG_X86_64 459 - if ((vcpu->arch.shadow_efer & EFER_LME)) { 408 + if ((vcpu->arch.efer & EFER_LME)) { 460 409 int cs_db, cs_l; 461 410 462 411 if (!is_pae(vcpu)) { ··· 494 443 495 444 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 496 445 { 497 - kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 446 + kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); 498 447 } 499 448 EXPORT_SYMBOL_GPL(kvm_lmsw); 500 449 501 450 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 502 451 { 503 - unsigned long old_cr4 = vcpu->arch.cr4; 452 + unsigned long old_cr4 = kvm_read_cr4(vcpu); 504 453 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 505 454 506 455 if (cr4 & CR4_RESERVED_BITS) { ··· 626 575 * kvm-specific. Those are put in the beginning of the list. 627 576 */ 628 577 629 - #define KVM_SAVE_MSRS_BEGIN 2 578 + #define KVM_SAVE_MSRS_BEGIN 5 630 579 static u32 msrs_to_save[] = { 631 580 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 581 + HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 582 + HV_X64_MSR_APIC_ASSIST_PAGE, 632 583 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 633 584 MSR_K6_STAR, 634 585 #ifdef CONFIG_X86_64 ··· 655 602 } 656 603 657 604 if (is_paging(vcpu) 658 - && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { 605 + && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 659 606 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); 660 607 kvm_inject_gp(vcpu, 0); 661 608 return; ··· 686 633 kvm_x86_ops->set_efer(vcpu, efer); 687 634 688 635 efer &= ~EFER_LMA; 689 - efer |= vcpu->arch.shadow_efer & EFER_LMA; 636 + efer |= vcpu->arch.efer & EFER_LMA; 690 637 691 - vcpu->arch.shadow_efer = efer; 638 + vcpu->arch.efer = efer; 692 639 693 640 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 694 641 kvm_mmu_reset_context(vcpu); ··· 1010 957 return r; 1011 958 } 1012 959 960 + static bool kvm_hv_hypercall_enabled(struct kvm *kvm) 961 + { 962 + return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; 963 + } 964 + 965 + static bool kvm_hv_msr_partition_wide(u32 msr) 966 + { 967 + bool r = false; 968 + switch (msr) { 969 + case HV_X64_MSR_GUEST_OS_ID: 970 + case HV_X64_MSR_HYPERCALL: 971 + r = true; 972 + break; 973 + } 974 + 975 + return r; 976 + } 977 + 978 + static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) 979 + { 980 + struct kvm *kvm = vcpu->kvm; 981 + 982 + switch (msr) { 983 + case HV_X64_MSR_GUEST_OS_ID: 984 + kvm->arch.hv_guest_os_id = data; 985 + /* setting guest os id to zero disables hypercall page */ 986 + if (!kvm->arch.hv_guest_os_id) 987 + kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 988 + break; 989 + case HV_X64_MSR_HYPERCALL: { 990 + u64 gfn; 991 + unsigned long addr; 992 + u8 instructions[4]; 993 + 994 + /* if guest os id is not set hypercall should remain disabled */ 995 + if (!kvm->arch.hv_guest_os_id) 996 + break; 997 + if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 998 + kvm->arch.hv_hypercall = data; 999 + break; 1000 + } 1001 + gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1002 + addr = gfn_to_hva(kvm, gfn); 1003 + if (kvm_is_error_hva(addr)) 1004 + return 1; 1005 + kvm_x86_ops->patch_hypercall(vcpu, instructions); 1006 + ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1007 + if (copy_to_user((void __user *)addr, instructions, 4)) 1008 + return 1; 1009 + kvm->arch.hv_hypercall = data; 1010 + break; 1011 + } 1012 + default: 1013 + pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1014 + "data 0x%llx\n", msr, data); 1015 + return 1; 1016 + } 1017 + return 0; 1018 + } 1019 + 1020 + static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1021 + { 1022 + switch (msr) { 1023 + case HV_X64_MSR_APIC_ASSIST_PAGE: { 1024 + unsigned long addr; 1025 + 1026 + if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1027 + vcpu->arch.hv_vapic = data; 1028 + break; 1029 + } 1030 + addr = gfn_to_hva(vcpu->kvm, data >> 1031 + HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1032 + if (kvm_is_error_hva(addr)) 1033 + return 1; 1034 + if (clear_user((void __user *)addr, PAGE_SIZE)) 1035 + return 1; 1036 + vcpu->arch.hv_vapic = data; 1037 + break; 1038 + } 1039 + case HV_X64_MSR_EOI: 1040 + return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1041 + case HV_X64_MSR_ICR: 1042 + return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1043 + case HV_X64_MSR_TPR: 1044 + return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1045 + default: 1046 + pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1047 + "data 0x%llx\n", msr, data); 1048 + return 1; 1049 + } 1050 + 1051 + return 0; 1052 + } 1053 + 1013 1054 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1014 1055 { 1015 1056 switch (msr) { ··· 1218 1071 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1219 1072 "0x%x data 0x%llx\n", msr, data); 1220 1073 break; 1074 + case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1075 + if (kvm_hv_msr_partition_wide(msr)) { 1076 + int r; 1077 + mutex_lock(&vcpu->kvm->lock); 1078 + r = set_msr_hyperv_pw(vcpu, msr, data); 1079 + mutex_unlock(&vcpu->kvm->lock); 1080 + return r; 1081 + } else 1082 + return set_msr_hyperv(vcpu, msr, data); 1083 + break; 1221 1084 default: 1222 1085 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1223 1086 return xen_hvm_config(vcpu, data); ··· 1327 1170 return 0; 1328 1171 } 1329 1172 1173 + static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1174 + { 1175 + u64 data = 0; 1176 + struct kvm *kvm = vcpu->kvm; 1177 + 1178 + switch (msr) { 1179 + case HV_X64_MSR_GUEST_OS_ID: 1180 + data = kvm->arch.hv_guest_os_id; 1181 + break; 1182 + case HV_X64_MSR_HYPERCALL: 1183 + data = kvm->arch.hv_hypercall; 1184 + break; 1185 + default: 1186 + pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1187 + return 1; 1188 + } 1189 + 1190 + *pdata = data; 1191 + return 0; 1192 + } 1193 + 1194 + static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1195 + { 1196 + u64 data = 0; 1197 + 1198 + switch (msr) { 1199 + case HV_X64_MSR_VP_INDEX: { 1200 + int r; 1201 + struct kvm_vcpu *v; 1202 + kvm_for_each_vcpu(r, v, vcpu->kvm) 1203 + if (v == vcpu) 1204 + data = r; 1205 + break; 1206 + } 1207 + case HV_X64_MSR_EOI: 1208 + return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1209 + case HV_X64_MSR_ICR: 1210 + return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1211 + case HV_X64_MSR_TPR: 1212 + return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1213 + default: 1214 + pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1215 + return 1; 1216 + } 1217 + *pdata = data; 1218 + return 0; 1219 + } 1220 + 1330 1221 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1331 1222 { 1332 1223 u64 data; ··· 1426 1221 data |= (((uint64_t)4ULL) << 40); 1427 1222 break; 1428 1223 case MSR_EFER: 1429 - data = vcpu->arch.shadow_efer; 1224 + data = vcpu->arch.efer; 1430 1225 break; 1431 1226 case MSR_KVM_WALL_CLOCK: 1432 1227 data = vcpu->kvm->arch.wall_clock; ··· 1441 1236 case MSR_IA32_MCG_STATUS: 1442 1237 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1443 1238 return get_msr_mce(vcpu, msr, pdata); 1239 + case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1240 + if (kvm_hv_msr_partition_wide(msr)) { 1241 + int r; 1242 + mutex_lock(&vcpu->kvm->lock); 1243 + r = get_msr_hyperv_pw(vcpu, msr, pdata); 1244 + mutex_unlock(&vcpu->kvm->lock); 1245 + return r; 1246 + } else 1247 + return get_msr_hyperv(vcpu, msr, pdata); 1248 + break; 1444 1249 default: 1445 1250 if (!ignore_msrs) { 1446 1251 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); ··· 1476 1261 int (*do_msr)(struct kvm_vcpu *vcpu, 1477 1262 unsigned index, u64 *data)) 1478 1263 { 1479 - int i; 1264 + int i, idx; 1480 1265 1481 1266 vcpu_load(vcpu); 1482 1267 1483 - down_read(&vcpu->kvm->slots_lock); 1268 + idx = srcu_read_lock(&vcpu->kvm->srcu); 1484 1269 for (i = 0; i < msrs->nmsrs; ++i) 1485 1270 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1486 1271 break; 1487 - up_read(&vcpu->kvm->slots_lock); 1272 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 1488 1273 1489 1274 vcpu_put(vcpu); 1490 1275 ··· 1566 1351 case KVM_CAP_XEN_HVM: 1567 1352 case KVM_CAP_ADJUST_CLOCK: 1568 1353 case KVM_CAP_VCPU_EVENTS: 1354 + case KVM_CAP_HYPERV: 1355 + case KVM_CAP_HYPERV_VAPIC: 1356 + case KVM_CAP_HYPERV_SPIN: 1357 + case KVM_CAP_PCI_SEGMENT: 1358 + case KVM_CAP_X86_ROBUST_SINGLESTEP: 1569 1359 r = 1; 1570 1360 break; 1571 1361 case KVM_CAP_COALESCED_MMIO: ··· 1684 1464 1685 1465 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1686 1466 { 1687 - kvm_x86_ops->vcpu_put(vcpu); 1688 1467 kvm_put_guest_fpu(vcpu); 1468 + kvm_x86_ops->vcpu_put(vcpu); 1689 1469 } 1690 1470 1691 1471 static int is_efer_nx(void) ··· 1750 1530 cpuid_fix_nx_cap(vcpu); 1751 1531 r = 0; 1752 1532 kvm_apic_set_version(vcpu); 1533 + kvm_x86_ops->cpuid_update(vcpu); 1753 1534 1754 1535 out_free: 1755 1536 vfree(cpuid_entries); ··· 1773 1552 goto out; 1774 1553 vcpu->arch.cpuid_nent = cpuid->nent; 1775 1554 kvm_apic_set_version(vcpu); 1555 + kvm_x86_ops->cpuid_update(vcpu); 1776 1556 return 0; 1777 1557 1778 1558 out: ··· 1816 1594 u32 index, int *nent, int maxnent) 1817 1595 { 1818 1596 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1819 - unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0; 1820 1597 #ifdef CONFIG_X86_64 1598 + unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) 1599 + ? F(GBPAGES) : 0; 1821 1600 unsigned f_lm = F(LM); 1822 1601 #else 1602 + unsigned f_gbpages = 0; 1823 1603 unsigned f_lm = 0; 1824 1604 #endif 1605 + unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 1825 1606 1826 1607 /* cpuid 1.edx */ 1827 1608 const u32 kvm_supported_word0_x86_features = ··· 1844 1619 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1845 1620 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1846 1621 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1847 - F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | 1622 + F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | 1848 1623 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1849 1624 /* cpuid 1.ecx */ 1850 1625 const u32 kvm_supported_word4_x86_features = ··· 2091 1866 return 0; 2092 1867 if (mce->status & MCI_STATUS_UC) { 2093 1868 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2094 - !(vcpu->arch.cr4 & X86_CR4_MCE)) { 1869 + !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { 2095 1870 printk(KERN_DEBUG "kvm: set_mce: " 2096 1871 "injects mce exception while " 2097 1872 "previous one is in progress!\n"); ··· 2385 2160 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2386 2161 return -EINVAL; 2387 2162 2388 - down_write(&kvm->slots_lock); 2163 + mutex_lock(&kvm->slots_lock); 2389 2164 spin_lock(&kvm->mmu_lock); 2390 2165 2391 2166 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2392 2167 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2393 2168 2394 2169 spin_unlock(&kvm->mmu_lock); 2395 - up_write(&kvm->slots_lock); 2170 + mutex_unlock(&kvm->slots_lock); 2396 2171 return 0; 2397 2172 } 2398 2173 ··· 2401 2176 return kvm->arch.n_alloc_mmu_pages; 2402 2177 } 2403 2178 2179 + gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) 2180 + { 2181 + int i; 2182 + struct kvm_mem_alias *alias; 2183 + struct kvm_mem_aliases *aliases; 2184 + 2185 + aliases = rcu_dereference(kvm->arch.aliases); 2186 + 2187 + for (i = 0; i < aliases->naliases; ++i) { 2188 + alias = &aliases->aliases[i]; 2189 + if (alias->flags & KVM_ALIAS_INVALID) 2190 + continue; 2191 + if (gfn >= alias->base_gfn 2192 + && gfn < alias->base_gfn + alias->npages) 2193 + return alias->target_gfn + gfn - alias->base_gfn; 2194 + } 2195 + return gfn; 2196 + } 2197 + 2404 2198 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2405 2199 { 2406 2200 int i; 2407 2201 struct kvm_mem_alias *alias; 2202 + struct kvm_mem_aliases *aliases; 2408 2203 2409 - for (i = 0; i < kvm->arch.naliases; ++i) { 2410 - alias = &kvm->arch.aliases[i]; 2204 + aliases = rcu_dereference(kvm->arch.aliases); 2205 + 2206 + for (i = 0; i < aliases->naliases; ++i) { 2207 + alias = &aliases->aliases[i]; 2411 2208 if (gfn >= alias->base_gfn 2412 2209 && gfn < alias->base_gfn + alias->npages) 2413 2210 return alias->target_gfn + gfn - alias->base_gfn; ··· 2447 2200 { 2448 2201 int r, n; 2449 2202 struct kvm_mem_alias *p; 2203 + struct kvm_mem_aliases *aliases, *old_aliases; 2450 2204 2451 2205 r = -EINVAL; 2452 2206 /* General sanity checks */ ··· 2464 2216 < alias->target_phys_addr) 2465 2217 goto out; 2466 2218 2467 - down_write(&kvm->slots_lock); 2468 - spin_lock(&kvm->mmu_lock); 2219 + r = -ENOMEM; 2220 + aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2221 + if (!aliases) 2222 + goto out; 2469 2223 2470 - p = &kvm->arch.aliases[alias->slot]; 2224 + mutex_lock(&kvm->slots_lock); 2225 + 2226 + /* invalidate any gfn reference in case of deletion/shrinking */ 2227 + memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2228 + aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; 2229 + old_aliases = kvm->arch.aliases; 2230 + rcu_assign_pointer(kvm->arch.aliases, aliases); 2231 + synchronize_srcu_expedited(&kvm->srcu); 2232 + kvm_mmu_zap_all(kvm); 2233 + kfree(old_aliases); 2234 + 2235 + r = -ENOMEM; 2236 + aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2237 + if (!aliases) 2238 + goto out_unlock; 2239 + 2240 + memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2241 + 2242 + p = &aliases->aliases[alias->slot]; 2471 2243 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2472 2244 p->npages = alias->memory_size >> PAGE_SHIFT; 2473 2245 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2246 + p->flags &= ~(KVM_ALIAS_INVALID); 2474 2247 2475 2248 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2476 - if (kvm->arch.aliases[n - 1].npages) 2249 + if (aliases->aliases[n - 1].npages) 2477 2250 break; 2478 - kvm->arch.naliases = n; 2251 + aliases->naliases = n; 2479 2252 2480 - spin_unlock(&kvm->mmu_lock); 2481 - kvm_mmu_zap_all(kvm); 2253 + old_aliases = kvm->arch.aliases; 2254 + rcu_assign_pointer(kvm->arch.aliases, aliases); 2255 + synchronize_srcu_expedited(&kvm->srcu); 2256 + kfree(old_aliases); 2257 + r = 0; 2482 2258 2483 - up_write(&kvm->slots_lock); 2484 - 2485 - return 0; 2486 - 2259 + out_unlock: 2260 + mutex_unlock(&kvm->slots_lock); 2487 2261 out: 2488 2262 return r; 2489 2263 } ··· 2543 2273 r = 0; 2544 2274 switch (chip->chip_id) { 2545 2275 case KVM_IRQCHIP_PIC_MASTER: 2546 - spin_lock(&pic_irqchip(kvm)->lock); 2276 + raw_spin_lock(&pic_irqchip(kvm)->lock); 2547 2277 memcpy(&pic_irqchip(kvm)->pics[0], 2548 2278 &chip->chip.pic, 2549 2279 sizeof(struct kvm_pic_state)); 2550 - spin_unlock(&pic_irqchip(kvm)->lock); 2280 + raw_spin_unlock(&pic_irqchip(kvm)->lock); 2551 2281 break; 2552 2282 case KVM_IRQCHIP_PIC_SLAVE: 2553 - spin_lock(&pic_irqchip(kvm)->lock); 2283 + raw_spin_lock(&pic_irqchip(kvm)->lock); 2554 2284 memcpy(&pic_irqchip(kvm)->pics[1], 2555 2285 &chip->chip.pic, 2556 2286 sizeof(struct kvm_pic_state)); 2557 - spin_unlock(&pic_irqchip(kvm)->lock); 2287 + raw_spin_unlock(&pic_irqchip(kvm)->lock); 2558 2288 break; 2559 2289 case KVM_IRQCHIP_IOAPIC: 2560 2290 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); ··· 2634 2364 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2635 2365 struct kvm_dirty_log *log) 2636 2366 { 2637 - int r; 2638 - int n; 2367 + int r, n, i; 2639 2368 struct kvm_memory_slot *memslot; 2640 - int is_dirty = 0; 2369 + unsigned long is_dirty = 0; 2370 + unsigned long *dirty_bitmap = NULL; 2641 2371 2642 - down_write(&kvm->slots_lock); 2372 + mutex_lock(&kvm->slots_lock); 2643 2373 2644 - r = kvm_get_dirty_log(kvm, log, &is_dirty); 2645 - if (r) 2374 + r = -EINVAL; 2375 + if (log->slot >= KVM_MEMORY_SLOTS) 2646 2376 goto out; 2377 + 2378 + memslot = &kvm->memslots->memslots[log->slot]; 2379 + r = -ENOENT; 2380 + if (!memslot->dirty_bitmap) 2381 + goto out; 2382 + 2383 + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2384 + 2385 + r = -ENOMEM; 2386 + dirty_bitmap = vmalloc(n); 2387 + if (!dirty_bitmap) 2388 + goto out; 2389 + memset(dirty_bitmap, 0, n); 2390 + 2391 + for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2392 + is_dirty = memslot->dirty_bitmap[i]; 2647 2393 2648 2394 /* If nothing is dirty, don't bother messing with page tables. */ 2649 2395 if (is_dirty) { 2396 + struct kvm_memslots *slots, *old_slots; 2397 + 2650 2398 spin_lock(&kvm->mmu_lock); 2651 2399 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2652 2400 spin_unlock(&kvm->mmu_lock); 2653 - memslot = &kvm->memslots[log->slot]; 2654 - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2655 - memset(memslot->dirty_bitmap, 0, n); 2401 + 2402 + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2403 + if (!slots) 2404 + goto out_free; 2405 + 2406 + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2407 + slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2408 + 2409 + old_slots = kvm->memslots; 2410 + rcu_assign_pointer(kvm->memslots, slots); 2411 + synchronize_srcu_expedited(&kvm->srcu); 2412 + dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2413 + kfree(old_slots); 2656 2414 } 2415 + 2657 2416 r = 0; 2417 + if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 2418 + r = -EFAULT; 2419 + out_free: 2420 + vfree(dirty_bitmap); 2658 2421 out: 2659 - up_write(&kvm->slots_lock); 2422 + mutex_unlock(&kvm->slots_lock); 2660 2423 return r; 2661 2424 } 2662 2425 ··· 2772 2469 if (vpic) { 2773 2470 r = kvm_ioapic_init(kvm); 2774 2471 if (r) { 2472 + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 2473 + &vpic->dev); 2775 2474 kfree(vpic); 2776 2475 goto create_irqchip_unlock; 2777 2476 } ··· 2785 2480 r = kvm_setup_default_irq_routing(kvm); 2786 2481 if (r) { 2787 2482 mutex_lock(&kvm->irq_lock); 2788 - kfree(kvm->arch.vpic); 2789 - kfree(kvm->arch.vioapic); 2790 - kvm->arch.vpic = NULL; 2791 - kvm->arch.vioapic = NULL; 2483 + kvm_ioapic_destroy(kvm); 2484 + kvm_destroy_pic(kvm); 2792 2485 mutex_unlock(&kvm->irq_lock); 2793 2486 } 2794 2487 create_irqchip_unlock: ··· 2802 2499 sizeof(struct kvm_pit_config))) 2803 2500 goto out; 2804 2501 create_pit: 2805 - down_write(&kvm->slots_lock); 2502 + mutex_lock(&kvm->slots_lock); 2806 2503 r = -EEXIST; 2807 2504 if (kvm->arch.vpit) 2808 2505 goto create_pit_unlock; ··· 2811 2508 if (kvm->arch.vpit) 2812 2509 r = 0; 2813 2510 create_pit_unlock: 2814 - up_write(&kvm->slots_lock); 2511 + mutex_unlock(&kvm->slots_lock); 2815 2512 break; 2816 2513 case KVM_IRQ_LINE_STATUS: 2817 2514 case KVM_IRQ_LINE: { ··· 3028 2725 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3029 2726 return 0; 3030 2727 3031 - return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); 2728 + return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3032 2729 } 3033 2730 3034 2731 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) ··· 3037 2734 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3038 2735 return 0; 3039 2736 3040 - return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); 2737 + return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3041 2738 } 3042 2739 3043 - static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3044 - struct kvm_vcpu *vcpu) 2740 + gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 2741 + { 2742 + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 2743 + return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 2744 + } 2745 + 2746 + gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 2747 + { 2748 + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 2749 + access |= PFERR_FETCH_MASK; 2750 + return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 2751 + } 2752 + 2753 + gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 2754 + { 2755 + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 2756 + access |= PFERR_WRITE_MASK; 2757 + return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 2758 + } 2759 + 2760 + /* uses this to access any guest's mapped memory without checking CPL */ 2761 + gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 2762 + { 2763 + return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); 2764 + } 2765 + 2766 + static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, 2767 + struct kvm_vcpu *vcpu, u32 access, 2768 + u32 *error) 3045 2769 { 3046 2770 void *data = val; 3047 2771 int r = X86EMUL_CONTINUE; 3048 2772 3049 2773 while (bytes) { 3050 - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2774 + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); 3051 2775 unsigned offset = addr & (PAGE_SIZE-1); 3052 2776 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3053 2777 int ret; ··· 3097 2767 return r; 3098 2768 } 3099 2769 2770 + /* used for instruction fetching */ 2771 + static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 2772 + struct kvm_vcpu *vcpu, u32 *error) 2773 + { 2774 + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 2775 + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 2776 + access | PFERR_FETCH_MASK, error); 2777 + } 2778 + 2779 + static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 2780 + struct kvm_vcpu *vcpu, u32 *error) 2781 + { 2782 + u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 2783 + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 2784 + error); 2785 + } 2786 + 2787 + static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 2788 + struct kvm_vcpu *vcpu, u32 *error) 2789 + { 2790 + return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 2791 + } 2792 + 3100 2793 static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3101 - struct kvm_vcpu *vcpu) 2794 + struct kvm_vcpu *vcpu, u32 *error) 3102 2795 { 3103 2796 void *data = val; 3104 2797 int r = X86EMUL_CONTINUE; 3105 2798 3106 2799 while (bytes) { 3107 - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2800 + gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3108 2801 unsigned offset = addr & (PAGE_SIZE-1); 3109 2802 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3110 2803 int ret; ··· 3157 2804 struct kvm_vcpu *vcpu) 3158 2805 { 3159 2806 gpa_t gpa; 2807 + u32 error_code; 3160 2808 3161 2809 if (vcpu->mmio_read_completed) { 3162 2810 memcpy(val, vcpu->mmio_data, bytes); ··· 3167 2813 return X86EMUL_CONTINUE; 3168 2814 } 3169 2815 3170 - gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2816 + gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); 2817 + 2818 + if (gpa == UNMAPPED_GVA) { 2819 + kvm_inject_page_fault(vcpu, addr, error_code); 2820 + return X86EMUL_PROPAGATE_FAULT; 2821 + } 3171 2822 3172 2823 /* For APIC access vmexit */ 3173 2824 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3174 2825 goto mmio; 3175 2826 3176 - if (kvm_read_guest_virt(addr, val, bytes, vcpu) 2827 + if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) 3177 2828 == X86EMUL_CONTINUE) 3178 2829 return X86EMUL_CONTINUE; 3179 - if (gpa == UNMAPPED_GVA) 3180 - return X86EMUL_PROPAGATE_FAULT; 3181 2830 3182 2831 mmio: 3183 2832 /* ··· 3219 2862 struct kvm_vcpu *vcpu) 3220 2863 { 3221 2864 gpa_t gpa; 2865 + u32 error_code; 3222 2866 3223 - gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2867 + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); 3224 2868 3225 2869 if (gpa == UNMAPPED_GVA) { 3226 - kvm_inject_page_fault(vcpu, addr, 2); 2870 + kvm_inject_page_fault(vcpu, addr, error_code); 3227 2871 return X86EMUL_PROPAGATE_FAULT; 3228 2872 } 3229 2873 ··· 3288 2930 char *kaddr; 3289 2931 u64 val; 3290 2932 3291 - gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2933 + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3292 2934 3293 2935 if (gpa == UNMAPPED_GVA || 3294 2936 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) ··· 3325 2967 3326 2968 int emulate_clts(struct kvm_vcpu *vcpu) 3327 2969 { 3328 - kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 2970 + kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 2971 + kvm_x86_ops->fpu_activate(vcpu); 3329 2972 return X86EMUL_CONTINUE; 3330 2973 } 3331 2974 3332 2975 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3333 2976 { 3334 - struct kvm_vcpu *vcpu = ctxt->vcpu; 3335 - 3336 - switch (dr) { 3337 - case 0 ... 3: 3338 - *dest = kvm_x86_ops->get_dr(vcpu, dr); 3339 - return X86EMUL_CONTINUE; 3340 - default: 3341 - pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); 3342 - return X86EMUL_UNHANDLEABLE; 3343 - } 2977 + return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3344 2978 } 3345 2979 3346 2980 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3347 2981 { 3348 2982 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3349 - int exception; 3350 2983 3351 - kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 3352 - if (exception) { 3353 - /* FIXME: better handling */ 3354 - return X86EMUL_UNHANDLEABLE; 3355 - } 3356 - return X86EMUL_CONTINUE; 2984 + return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3357 2985 } 3358 2986 3359 2987 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) ··· 3353 3009 3354 3010 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3355 3011 3356 - kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); 3012 + kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); 3357 3013 3358 3014 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3359 3015 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); ··· 3361 3017 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3362 3018 3363 3019 static struct x86_emulate_ops emulate_ops = { 3364 - .read_std = kvm_read_guest_virt, 3020 + .read_std = kvm_read_guest_virt_system, 3021 + .fetch = kvm_fetch_guest_virt, 3365 3022 .read_emulated = emulator_read_emulated, 3366 3023 .write_emulated = emulator_write_emulated, 3367 3024 .cmpxchg_emulated = emulator_cmpxchg_emulated, ··· 3405 3060 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3406 3061 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3407 3062 vcpu->arch.emulate_ctxt.mode = 3063 + (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3408 3064 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3409 - ? X86EMUL_MODE_REAL : cs_l 3065 + ? X86EMUL_MODE_VM86 : cs_l 3410 3066 ? X86EMUL_MODE_PROT64 : cs_db 3411 3067 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3412 3068 ··· 3499 3153 gva_t q = vcpu->arch.pio.guest_gva; 3500 3154 unsigned bytes; 3501 3155 int ret; 3156 + u32 error_code; 3502 3157 3503 3158 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3504 3159 if (vcpu->arch.pio.in) 3505 - ret = kvm_write_guest_virt(q, p, bytes, vcpu); 3160 + ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); 3506 3161 else 3507 - ret = kvm_read_guest_virt(q, p, bytes, vcpu); 3162 + ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); 3163 + 3164 + if (ret == X86EMUL_PROPAGATE_FAULT) 3165 + kvm_inject_page_fault(vcpu, q, error_code); 3166 + 3508 3167 return ret; 3509 3168 } 3510 3169 ··· 3530 3179 if (io->in) { 3531 3180 r = pio_copy_data(vcpu); 3532 3181 if (r) 3533 - return r; 3182 + goto out; 3534 3183 } 3535 3184 3536 3185 delta = 1; ··· 3557 3206 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3558 3207 } 3559 3208 } 3560 - 3209 + out: 3561 3210 io->count -= io->cur_count; 3562 3211 io->cur_count = 0; 3563 3212 ··· 3570 3219 int r; 3571 3220 3572 3221 if (vcpu->arch.pio.in) 3573 - r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3222 + r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, 3574 3223 vcpu->arch.pio.size, pd); 3575 3224 else 3576 - r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3577 - vcpu->arch.pio.size, pd); 3225 + r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3226 + vcpu->arch.pio.port, vcpu->arch.pio.size, 3227 + pd); 3578 3228 return r; 3579 3229 } 3580 3230 ··· 3586 3234 int i, r = 0; 3587 3235 3588 3236 for (i = 0; i < io->cur_count; i++) { 3589 - if (kvm_io_bus_write(&vcpu->kvm->pio_bus, 3237 + if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3590 3238 io->port, io->size, pd)) { 3591 3239 r = -EOPNOTSUPP; 3592 3240 break; ··· 3600 3248 { 3601 3249 unsigned long val; 3602 3250 3251 + trace_kvm_pio(!in, port, size, 1); 3252 + 3603 3253 vcpu->run->exit_reason = KVM_EXIT_IO; 3604 3254 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3605 3255 vcpu->run->io.size = vcpu->arch.pio.size = size; ··· 3613 3259 vcpu->arch.pio.down = 0; 3614 3260 vcpu->arch.pio.rep = 0; 3615 3261 3616 - trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3617 - size, 1); 3618 - 3619 - val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3620 - memcpy(vcpu->arch.pio_data, &val, 4); 3262 + if (!vcpu->arch.pio.in) { 3263 + val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3264 + memcpy(vcpu->arch.pio_data, &val, 4); 3265 + } 3621 3266 3622 3267 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3623 3268 complete_pio(vcpu); ··· 3633 3280 unsigned now, in_page; 3634 3281 int ret = 0; 3635 3282 3283 + trace_kvm_pio(!in, port, size, count); 3284 + 3636 3285 vcpu->run->exit_reason = KVM_EXIT_IO; 3637 3286 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3638 3287 vcpu->run->io.size = vcpu->arch.pio.size = size; ··· 3645 3290 vcpu->arch.pio.string = 1; 3646 3291 vcpu->arch.pio.down = down; 3647 3292 vcpu->arch.pio.rep = rep; 3648 - 3649 - trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3650 - size, count); 3651 3293 3652 3294 if (!count) { 3653 3295 kvm_x86_ops->skip_emulated_instruction(vcpu); ··· 3677 3325 if (!vcpu->arch.pio.in) { 3678 3326 /* string PIO write */ 3679 3327 ret = pio_copy_data(vcpu); 3680 - if (ret == X86EMUL_PROPAGATE_FAULT) { 3681 - kvm_inject_gp(vcpu, 0); 3328 + if (ret == X86EMUL_PROPAGATE_FAULT) 3682 3329 return 1; 3683 - } 3684 3330 if (ret == 0 && !pio_string_write(vcpu)) { 3685 3331 complete_pio(vcpu); 3686 3332 if (vcpu->arch.pio.count == 0) ··· 3837 3487 return a0 | ((gpa_t)a1 << 32); 3838 3488 } 3839 3489 3490 + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 3491 + { 3492 + u64 param, ingpa, outgpa, ret; 3493 + uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; 3494 + bool fast, longmode; 3495 + int cs_db, cs_l; 3496 + 3497 + /* 3498 + * hypercall generates UD from non zero cpl and real mode 3499 + * per HYPER-V spec 3500 + */ 3501 + if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 3502 + kvm_queue_exception(vcpu, UD_VECTOR); 3503 + return 0; 3504 + } 3505 + 3506 + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3507 + longmode = is_long_mode(vcpu) && cs_l == 1; 3508 + 3509 + if (!longmode) { 3510 + param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 3511 + (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 3512 + ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 3513 + (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 3514 + outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 3515 + (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 3516 + } 3517 + #ifdef CONFIG_X86_64 3518 + else { 3519 + param = kvm_register_read(vcpu, VCPU_REGS_RCX); 3520 + ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 3521 + outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 3522 + } 3523 + #endif 3524 + 3525 + code = param & 0xffff; 3526 + fast = (param >> 16) & 0x1; 3527 + rep_cnt = (param >> 32) & 0xfff; 3528 + rep_idx = (param >> 48) & 0xfff; 3529 + 3530 + trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 3531 + 3532 + switch (code) { 3533 + case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: 3534 + kvm_vcpu_on_spin(vcpu); 3535 + break; 3536 + default: 3537 + res = HV_STATUS_INVALID_HYPERCALL_CODE; 3538 + break; 3539 + } 3540 + 3541 + ret = res | (((u64)rep_done & 0xfff) << 32); 3542 + if (longmode) { 3543 + kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 3544 + } else { 3545 + kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); 3546 + kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); 3547 + } 3548 + 3549 + return 1; 3550 + } 3551 + 3840 3552 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3841 3553 { 3842 3554 unsigned long nr, a0, a1, a2, a3, ret; 3843 3555 int r = 1; 3556 + 3557 + if (kvm_hv_hypercall_enabled(vcpu->kvm)) 3558 + return kvm_hv_hypercall(vcpu); 3844 3559 3845 3560 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3846 3561 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); ··· 3949 3534 int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3950 3535 { 3951 3536 char instruction[3]; 3952 - int ret = 0; 3953 3537 unsigned long rip = kvm_rip_read(vcpu); 3954 - 3955 3538 3956 3539 /* 3957 3540 * Blow out the MMU to ensure that no other VCPU has an active mapping ··· 3959 3546 kvm_mmu_zap_all(vcpu->kvm); 3960 3547 3961 3548 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3962 - if (emulator_write_emulated(rip, instruction, 3, vcpu) 3963 - != X86EMUL_CONTINUE) 3964 - ret = -EFAULT; 3965 3549 3966 - return ret; 3550 + return emulator_write_emulated(rip, instruction, 3, vcpu); 3967 3551 } 3968 3552 3969 3553 static u64 mk_cr_64(u64 curr_cr, u32 new_val) ··· 3993 3583 { 3994 3584 unsigned long value; 3995 3585 3996 - kvm_x86_ops->decache_cr4_guest_bits(vcpu); 3997 3586 switch (cr) { 3998 3587 case 0: 3999 - value = vcpu->arch.cr0; 3588 + value = kvm_read_cr0(vcpu); 4000 3589 break; 4001 3590 case 2: 4002 3591 value = vcpu->arch.cr2; ··· 4004 3595 value = vcpu->arch.cr3; 4005 3596 break; 4006 3597 case 4: 4007 - value = vcpu->arch.cr4; 3598 + value = kvm_read_cr4(vcpu); 4008 3599 break; 4009 3600 case 8: 4010 3601 value = kvm_get_cr8(vcpu); ··· 4022 3613 { 4023 3614 switch (cr) { 4024 3615 case 0: 4025 - kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 3616 + kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 4026 3617 *rflags = kvm_get_rflags(vcpu); 4027 3618 break; 4028 3619 case 2: ··· 4032 3623 kvm_set_cr3(vcpu, val); 4033 3624 break; 4034 3625 case 4: 4035 - kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 3626 + kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 4036 3627 break; 4037 3628 case 8: 4038 3629 kvm_set_cr8(vcpu, val & 0xfUL); ··· 4099 3690 } 4100 3691 return best; 4101 3692 } 3693 + EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); 4102 3694 4103 3695 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4104 3696 { ··· 4183 3773 static void vapic_exit(struct kvm_vcpu *vcpu) 4184 3774 { 4185 3775 struct kvm_lapic *apic = vcpu->arch.apic; 3776 + int idx; 4186 3777 4187 3778 if (!apic || !apic->vapic_addr) 4188 3779 return; 4189 3780 4190 - down_read(&vcpu->kvm->slots_lock); 3781 + idx = srcu_read_lock(&vcpu->kvm->srcu); 4191 3782 kvm_release_page_dirty(apic->vapic_page); 4192 3783 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4193 - up_read(&vcpu->kvm->slots_lock); 3784 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 4194 3785 } 4195 3786 4196 3787 static void update_cr8_intercept(struct kvm_vcpu *vcpu) ··· 4287 3876 r = 0; 4288 3877 goto out; 4289 3878 } 3879 + if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { 3880 + vcpu->fpu_active = 0; 3881 + kvm_x86_ops->fpu_deactivate(vcpu); 3882 + } 4290 3883 } 4291 3884 4292 3885 preempt_disable(); 4293 3886 4294 3887 kvm_x86_ops->prepare_guest_switch(vcpu); 4295 - kvm_load_guest_fpu(vcpu); 3888 + if (vcpu->fpu_active) 3889 + kvm_load_guest_fpu(vcpu); 4296 3890 4297 3891 local_irq_disable(); 4298 3892 ··· 4325 3909 kvm_lapic_sync_to_vapic(vcpu); 4326 3910 } 4327 3911 4328 - up_read(&vcpu->kvm->slots_lock); 3912 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4329 3913 4330 3914 kvm_guest_enter(); 4331 3915 ··· 4367 3951 4368 3952 preempt_enable(); 4369 3953 4370 - down_read(&vcpu->kvm->slots_lock); 3954 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4371 3955 4372 3956 /* 4373 3957 * Profile KVM exit RIPs: ··· 4389 3973 static int __vcpu_run(struct kvm_vcpu *vcpu) 4390 3974 { 4391 3975 int r; 3976 + struct kvm *kvm = vcpu->kvm; 4392 3977 4393 3978 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4394 3979 pr_debug("vcpu %d received sipi with vector # %x\n", ··· 4401 3984 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4402 3985 } 4403 3986 4404 - down_read(&vcpu->kvm->slots_lock); 3987 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4405 3988 vapic_enter(vcpu); 4406 3989 4407 3990 r = 1; ··· 4409 3992 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4410 3993 r = vcpu_enter_guest(vcpu); 4411 3994 else { 4412 - up_read(&vcpu->kvm->slots_lock); 3995 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4413 3996 kvm_vcpu_block(vcpu); 4414 - down_read(&vcpu->kvm->slots_lock); 3997 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4415 3998 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4416 3999 { 4417 4000 switch(vcpu->arch.mp_state) { ··· 4446 4029 ++vcpu->stat.signal_exits; 4447 4030 } 4448 4031 if (need_resched()) { 4449 - up_read(&vcpu->kvm->slots_lock); 4032 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4450 4033 kvm_resched(vcpu); 4451 - down_read(&vcpu->kvm->slots_lock); 4034 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4452 4035 } 4453 4036 } 4454 4037 4455 - up_read(&vcpu->kvm->slots_lock); 4038 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4456 4039 post_kvm_run_save(vcpu); 4457 4040 4458 4041 vapic_exit(vcpu); ··· 4491 4074 vcpu->mmio_read_completed = 1; 4492 4075 vcpu->mmio_needed = 0; 4493 4076 4494 - down_read(&vcpu->kvm->slots_lock); 4077 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4495 4078 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4496 4079 EMULTYPE_NO_DECODE); 4497 - up_read(&vcpu->kvm->slots_lock); 4080 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4498 4081 if (r == EMULATE_DO_MMIO) { 4499 4082 /* 4500 4083 * Read-modify-write. Back to userspace. ··· 4621 4204 sregs->gdt.limit = dt.limit; 4622 4205 sregs->gdt.base = dt.base; 4623 4206 4624 - kvm_x86_ops->decache_cr4_guest_bits(vcpu); 4625 - sregs->cr0 = vcpu->arch.cr0; 4207 + sregs->cr0 = kvm_read_cr0(vcpu); 4626 4208 sregs->cr2 = vcpu->arch.cr2; 4627 4209 sregs->cr3 = vcpu->arch.cr3; 4628 - sregs->cr4 = vcpu->arch.cr4; 4210 + sregs->cr4 = kvm_read_cr4(vcpu); 4629 4211 sregs->cr8 = kvm_get_cr8(vcpu); 4630 - sregs->efer = vcpu->arch.shadow_efer; 4212 + sregs->efer = vcpu->arch.efer; 4631 4213 sregs->apic_base = kvm_get_apic_base(vcpu); 4632 4214 4633 4215 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); ··· 4714 4298 { 4715 4299 struct descriptor_table dtable; 4716 4300 u16 index = selector >> 3; 4301 + int ret; 4302 + u32 err; 4303 + gva_t addr; 4717 4304 4718 4305 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4719 4306 4720 4307 if (dtable.limit < index * 8 + 7) { 4721 4308 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4722 - return 1; 4309 + return X86EMUL_PROPAGATE_FAULT; 4723 4310 } 4724 - return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4311 + addr = dtable.base + index * 8; 4312 + ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), 4313 + vcpu, &err); 4314 + if (ret == X86EMUL_PROPAGATE_FAULT) 4315 + kvm_inject_page_fault(vcpu, addr, err); 4316 + 4317 + return ret; 4725 4318 } 4726 4319 4727 4320 /* allowed just for 8 bytes segments */ ··· 4744 4319 4745 4320 if (dtable.limit < index * 8 + 7) 4746 4321 return 1; 4747 - return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4322 + return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); 4748 4323 } 4749 4324 4750 - static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, 4325 + static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, 4326 + struct desc_struct *seg_desc) 4327 + { 4328 + u32 base_addr = get_desc_base(seg_desc); 4329 + 4330 + return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); 4331 + } 4332 + 4333 + static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, 4751 4334 struct desc_struct *seg_desc) 4752 4335 { 4753 4336 u32 base_addr = get_desc_base(seg_desc); 4754 4337 4755 - return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4338 + return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); 4756 4339 } 4757 4340 4758 4341 static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) ··· 4769 4336 4770 4337 kvm_get_segment(vcpu, &kvm_seg, seg); 4771 4338 return kvm_seg.selector; 4772 - } 4773 - 4774 - static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, 4775 - u16 selector, 4776 - struct kvm_segment *kvm_seg) 4777 - { 4778 - struct desc_struct seg_desc; 4779 - 4780 - if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) 4781 - return 1; 4782 - seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); 4783 - return 0; 4784 4339 } 4785 4340 4786 4341 static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) ··· 4788 4367 .unusable = 0, 4789 4368 }; 4790 4369 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4791 - return 0; 4370 + return X86EMUL_CONTINUE; 4792 4371 } 4793 4372 4794 4373 static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) ··· 4798 4377 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 4799 4378 } 4800 4379 4801 - int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4802 - int type_bits, int seg) 4380 + int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) 4803 4381 { 4804 4382 struct kvm_segment kvm_seg; 4383 + struct desc_struct seg_desc; 4384 + u8 dpl, rpl, cpl; 4385 + unsigned err_vec = GP_VECTOR; 4386 + u32 err_code = 0; 4387 + bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ 4388 + int ret; 4805 4389 4806 - if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) 4390 + if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 4807 4391 return kvm_load_realmode_segment(vcpu, selector, seg); 4808 - if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) 4809 - return 1; 4810 - kvm_seg.type |= type_bits; 4811 4392 4812 - if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && 4813 - seg != VCPU_SREG_LDTR) 4814 - if (!kvm_seg.s) 4815 - kvm_seg.unusable = 1; 4393 + /* NULL selector is not valid for TR, CS and SS */ 4394 + if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 4395 + && null_selector) 4396 + goto exception; 4816 4397 4398 + /* TR should be in GDT only */ 4399 + if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 4400 + goto exception; 4401 + 4402 + ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); 4403 + if (ret) 4404 + return ret; 4405 + 4406 + seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); 4407 + 4408 + if (null_selector) { /* for NULL selector skip all following checks */ 4409 + kvm_seg.unusable = 1; 4410 + goto load; 4411 + } 4412 + 4413 + err_code = selector & 0xfffc; 4414 + err_vec = GP_VECTOR; 4415 + 4416 + /* can't load system descriptor into segment selecor */ 4417 + if (seg <= VCPU_SREG_GS && !kvm_seg.s) 4418 + goto exception; 4419 + 4420 + if (!kvm_seg.present) { 4421 + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; 4422 + goto exception; 4423 + } 4424 + 4425 + rpl = selector & 3; 4426 + dpl = kvm_seg.dpl; 4427 + cpl = kvm_x86_ops->get_cpl(vcpu); 4428 + 4429 + switch (seg) { 4430 + case VCPU_SREG_SS: 4431 + /* 4432 + * segment is not a writable data segment or segment 4433 + * selector's RPL != CPL or segment selector's RPL != CPL 4434 + */ 4435 + if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) 4436 + goto exception; 4437 + break; 4438 + case VCPU_SREG_CS: 4439 + if (!(kvm_seg.type & 8)) 4440 + goto exception; 4441 + 4442 + if (kvm_seg.type & 4) { 4443 + /* conforming */ 4444 + if (dpl > cpl) 4445 + goto exception; 4446 + } else { 4447 + /* nonconforming */ 4448 + if (rpl > cpl || dpl != cpl) 4449 + goto exception; 4450 + } 4451 + /* CS(RPL) <- CPL */ 4452 + selector = (selector & 0xfffc) | cpl; 4453 + break; 4454 + case VCPU_SREG_TR: 4455 + if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) 4456 + goto exception; 4457 + break; 4458 + case VCPU_SREG_LDTR: 4459 + if (kvm_seg.s || kvm_seg.type != 2) 4460 + goto exception; 4461 + break; 4462 + default: /* DS, ES, FS, or GS */ 4463 + /* 4464 + * segment is not a data or readable code segment or 4465 + * ((segment is a data or nonconforming code segment) 4466 + * and (both RPL and CPL > DPL)) 4467 + */ 4468 + if ((kvm_seg.type & 0xa) == 0x8 || 4469 + (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) 4470 + goto exception; 4471 + break; 4472 + } 4473 + 4474 + if (!kvm_seg.unusable && kvm_seg.s) { 4475 + /* mark segment as accessed */ 4476 + kvm_seg.type |= 1; 4477 + seg_desc.type |= 1; 4478 + save_guest_segment_descriptor(vcpu, selector, &seg_desc); 4479 + } 4480 + load: 4817 4481 kvm_set_segment(vcpu, &kvm_seg, seg); 4818 - return 0; 4482 + return X86EMUL_CONTINUE; 4483 + exception: 4484 + kvm_queue_exception_e(vcpu, err_vec, err_code); 4485 + return X86EMUL_PROPAGATE_FAULT; 4819 4486 } 4820 4487 4821 4488 static void save_state_to_tss32(struct kvm_vcpu *vcpu, ··· 4929 4420 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4930 4421 } 4931 4422 4423 + static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) 4424 + { 4425 + struct kvm_segment kvm_seg; 4426 + kvm_get_segment(vcpu, &kvm_seg, seg); 4427 + kvm_seg.selector = sel; 4428 + kvm_set_segment(vcpu, &kvm_seg, seg); 4429 + } 4430 + 4932 4431 static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4933 4432 struct tss_segment_32 *tss) 4934 4433 { ··· 4954 4437 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4955 4438 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4956 4439 4957 - if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 4440 + /* 4441 + * SDM says that segment selectors are loaded before segment 4442 + * descriptors 4443 + */ 4444 + kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); 4445 + kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 4446 + kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 4447 + kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 4448 + kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 4449 + kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); 4450 + kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); 4451 + 4452 + /* 4453 + * Now load segment descriptors. If fault happenes at this stage 4454 + * it is handled in a context of new task 4455 + */ 4456 + if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) 4958 4457 return 1; 4959 4458 4960 - if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4459 + if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 4961 4460 return 1; 4962 4461 4963 - if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4462 + if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 4964 4463 return 1; 4965 4464 4966 - if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4465 + if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 4967 4466 return 1; 4968 4467 4969 - if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4468 + if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 4970 4469 return 1; 4971 4470 4972 - if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 4471 + if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) 4973 4472 return 1; 4974 4473 4975 - if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 4474 + if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) 4976 4475 return 1; 4977 4476 return 0; 4978 4477 } ··· 5028 4495 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5029 4496 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5030 4497 5031 - if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 4498 + /* 4499 + * SDM says that segment selectors are loaded before segment 4500 + * descriptors 4501 + */ 4502 + kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); 4503 + kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 4504 + kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 4505 + kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 4506 + kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 4507 + 4508 + /* 4509 + * Now load segment descriptors. If fault happenes at this stage 4510 + * it is handled in a context of new task 4511 + */ 4512 + if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) 5032 4513 return 1; 5033 4514 5034 - if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4515 + if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 5035 4516 return 1; 5036 4517 5037 - if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4518 + if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 5038 4519 return 1; 5039 4520 5040 - if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4521 + if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 5041 4522 return 1; 5042 4523 5043 - if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4524 + if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 5044 4525 return 1; 5045 4526 return 0; 5046 4527 } ··· 5076 4529 sizeof tss_segment_16)) 5077 4530 goto out; 5078 4531 5079 - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 4532 + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5080 4533 &tss_segment_16, sizeof tss_segment_16)) 5081 4534 goto out; 5082 4535 ··· 5084 4537 tss_segment_16.prev_task_link = old_tss_sel; 5085 4538 5086 4539 if (kvm_write_guest(vcpu->kvm, 5087 - get_tss_base_addr(vcpu, nseg_desc), 4540 + get_tss_base_addr_write(vcpu, nseg_desc), 5088 4541 &tss_segment_16.prev_task_link, 5089 4542 sizeof tss_segment_16.prev_task_link)) 5090 4543 goto out; ··· 5115 4568 sizeof tss_segment_32)) 5116 4569 goto out; 5117 4570 5118 - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 4571 + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5119 4572 &tss_segment_32, sizeof tss_segment_32)) 5120 4573 goto out; 5121 4574 ··· 5123 4576 tss_segment_32.prev_task_link = old_tss_sel; 5124 4577 5125 4578 if (kvm_write_guest(vcpu->kvm, 5126 - get_tss_base_addr(vcpu, nseg_desc), 4579 + get_tss_base_addr_write(vcpu, nseg_desc), 5127 4580 &tss_segment_32.prev_task_link, 5128 4581 sizeof tss_segment_32.prev_task_link)) 5129 4582 goto out; ··· 5146 4599 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5147 4600 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5148 4601 5149 - old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); 4602 + old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5150 4603 5151 4604 /* FIXME: Handle errors. Failure to read either TSS or their 5152 4605 * descriptors should generate a pagefault. ··· 5205 4658 &nseg_desc); 5206 4659 } 5207 4660 5208 - kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 4661 + kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); 5209 4662 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5210 4663 tr_seg.type = 11; 5211 4664 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); ··· 5236 4689 5237 4690 kvm_set_cr8(vcpu, sregs->cr8); 5238 4691 5239 - mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 4692 + mmu_reset_needed |= vcpu->arch.efer != sregs->efer; 5240 4693 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5241 4694 kvm_set_apic_base(vcpu, sregs->apic_base); 5242 4695 5243 - kvm_x86_ops->decache_cr4_guest_bits(vcpu); 5244 - 5245 - mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; 4696 + mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; 5246 4697 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5247 4698 vcpu->arch.cr0 = sregs->cr0; 5248 4699 5249 - mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 4700 + mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5250 4701 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5251 4702 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5252 4703 load_pdptrs(vcpu, vcpu->arch.cr3); ··· 5279 4734 /* Older userspace won't unhalt the vcpu on reset. */ 5280 4735 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5281 4736 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5282 - !(vcpu->arch.cr0 & X86_CR0_PE)) 4737 + !is_protmode(vcpu)) 5283 4738 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5284 4739 5285 4740 vcpu_put(vcpu); ··· 5377 4832 { 5378 4833 unsigned long vaddr = tr->linear_address; 5379 4834 gpa_t gpa; 4835 + int idx; 5380 4836 5381 4837 vcpu_load(vcpu); 5382 - down_read(&vcpu->kvm->slots_lock); 5383 - gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 5384 - up_read(&vcpu->kvm->slots_lock); 4838 + idx = srcu_read_lock(&vcpu->kvm->srcu); 4839 + gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 4840 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 5385 4841 tr->physical_address = gpa; 5386 4842 tr->valid = gpa != UNMAPPED_GVA; 5387 4843 tr->writeable = 1; ··· 5463 4917 5464 4918 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5465 4919 { 5466 - if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 4920 + if (vcpu->guest_fpu_loaded) 5467 4921 return; 5468 4922 5469 4923 vcpu->guest_fpu_loaded = 1; 5470 4924 kvm_fx_save(&vcpu->arch.host_fx_image); 5471 4925 kvm_fx_restore(&vcpu->arch.guest_fx_image); 4926 + trace_kvm_fpu(1); 5472 4927 } 5473 - EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); 5474 4928 5475 4929 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5476 4930 { ··· 5481 4935 kvm_fx_save(&vcpu->arch.guest_fx_image); 5482 4936 kvm_fx_restore(&vcpu->arch.host_fx_image); 5483 4937 ++vcpu->stat.fpu_reload; 4938 + set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); 4939 + trace_kvm_fpu(0); 5484 4940 } 5485 - EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); 5486 4941 5487 4942 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5488 4943 { ··· 5635 5088 5636 5089 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5637 5090 { 5091 + int idx; 5092 + 5638 5093 kfree(vcpu->arch.mce_banks); 5639 5094 kvm_free_lapic(vcpu); 5640 - down_read(&vcpu->kvm->slots_lock); 5095 + idx = srcu_read_lock(&vcpu->kvm->srcu); 5641 5096 kvm_mmu_destroy(vcpu); 5642 - up_read(&vcpu->kvm->slots_lock); 5097 + srcu_read_unlock(&vcpu->kvm->srcu, idx); 5643 5098 free_page((unsigned long)vcpu->arch.pio_data); 5644 5099 } 5645 5100 ··· 5651 5102 5652 5103 if (!kvm) 5653 5104 return ERR_PTR(-ENOMEM); 5105 + 5106 + kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 5107 + if (!kvm->arch.aliases) { 5108 + kfree(kvm); 5109 + return ERR_PTR(-ENOMEM); 5110 + } 5654 5111 5655 5112 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5656 5113 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); ··· 5714 5159 put_page(kvm->arch.apic_access_page); 5715 5160 if (kvm->arch.ept_identity_pagetable) 5716 5161 put_page(kvm->arch.ept_identity_pagetable); 5162 + cleanup_srcu_struct(&kvm->srcu); 5163 + kfree(kvm->arch.aliases); 5717 5164 kfree(kvm); 5718 5165 } 5719 5166 5720 - int kvm_arch_set_memory_region(struct kvm *kvm, 5721 - struct kvm_userspace_memory_region *mem, 5167 + int kvm_arch_prepare_memory_region(struct kvm *kvm, 5168 + struct kvm_memory_slot *memslot, 5722 5169 struct kvm_memory_slot old, 5170 + struct kvm_userspace_memory_region *mem, 5723 5171 int user_alloc) 5724 5172 { 5725 - int npages = mem->memory_size >> PAGE_SHIFT; 5726 - struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 5173 + int npages = memslot->npages; 5727 5174 5728 5175 /*To keep backward compatibility with older userspace, 5729 5176 *x86 needs to hanlde !user_alloc case. ··· 5745 5188 if (IS_ERR((void *)userspace_addr)) 5746 5189 return PTR_ERR((void *)userspace_addr); 5747 5190 5748 - /* set userspace_addr atomically for kvm_hva_to_rmapp */ 5749 - spin_lock(&kvm->mmu_lock); 5750 5191 memslot->userspace_addr = userspace_addr; 5751 - spin_unlock(&kvm->mmu_lock); 5752 - } else { 5753 - if (!old.user_alloc && old.rmap) { 5754 - int ret; 5755 - 5756 - down_write(&current->mm->mmap_sem); 5757 - ret = do_munmap(current->mm, old.userspace_addr, 5758 - old.npages * PAGE_SIZE); 5759 - up_write(&current->mm->mmap_sem); 5760 - if (ret < 0) 5761 - printk(KERN_WARNING 5762 - "kvm_vm_ioctl_set_memory_region: " 5763 - "failed to munmap memory\n"); 5764 - } 5765 5192 } 5193 + } 5194 + 5195 + 5196 + return 0; 5197 + } 5198 + 5199 + void kvm_arch_commit_memory_region(struct kvm *kvm, 5200 + struct kvm_userspace_memory_region *mem, 5201 + struct kvm_memory_slot old, 5202 + int user_alloc) 5203 + { 5204 + 5205 + int npages = mem->memory_size >> PAGE_SHIFT; 5206 + 5207 + if (!user_alloc && !old.user_alloc && old.rmap && !npages) { 5208 + int ret; 5209 + 5210 + down_write(&current->mm->mmap_sem); 5211 + ret = do_munmap(current->mm, old.userspace_addr, 5212 + old.npages * PAGE_SIZE); 5213 + up_write(&current->mm->mmap_sem); 5214 + if (ret < 0) 5215 + printk(KERN_WARNING 5216 + "kvm_vm_ioctl_set_memory_region: " 5217 + "failed to munmap memory\n"); 5766 5218 } 5767 5219 5768 5220 spin_lock(&kvm->mmu_lock); ··· 5782 5216 5783 5217 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5784 5218 spin_unlock(&kvm->mmu_lock); 5785 - 5786 - return 0; 5787 5219 } 5788 5220 5789 5221 void kvm_arch_flush_shadow(struct kvm *kvm)

+30

arch/x86/kvm/x86.h

··· 2 2 #define ARCH_X86_KVM_X86_H 3 3 4 4 #include <linux/kvm_host.h> 5 + #include "kvm_cache_regs.h" 5 6 6 7 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) 7 8 { ··· 35 34 36 35 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 37 36 u32 function, u32 index); 37 + 38 + static inline bool is_protmode(struct kvm_vcpu *vcpu) 39 + { 40 + return kvm_read_cr0_bits(vcpu, X86_CR0_PE); 41 + } 42 + 43 + static inline int is_long_mode(struct kvm_vcpu *vcpu) 44 + { 45 + #ifdef CONFIG_X86_64 46 + return vcpu->arch.efer & EFER_LMA; 47 + #else 48 + return 0; 49 + #endif 50 + } 51 + 52 + static inline int is_pae(struct kvm_vcpu *vcpu) 53 + { 54 + return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); 55 + } 56 + 57 + static inline int is_pse(struct kvm_vcpu *vcpu) 58 + { 59 + return kvm_read_cr4_bits(vcpu, X86_CR4_PSE); 60 + } 61 + 62 + static inline int is_paging(struct kvm_vcpu *vcpu) 63 + { 64 + return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 65 + } 38 66 39 67 #endif

+8 -2

include/linux/kvm.h

··· 103 103 104 104 /* for kvm_memory_region::flags */ 105 105 #define KVM_MEM_LOG_DIRTY_PAGES 1UL 106 - 106 + #define KVM_MEMSLOT_INVALID (1UL << 1) 107 107 108 108 /* for KVM_IRQ_LINE */ 109 109 struct kvm_irq_level { ··· 497 497 #endif 498 498 #define KVM_CAP_S390_PSW 42 499 499 #define KVM_CAP_PPC_SEGSTATE 43 500 + #define KVM_CAP_HYPERV 44 501 + #define KVM_CAP_HYPERV_VAPIC 45 502 + #define KVM_CAP_HYPERV_SPIN 46 503 + #define KVM_CAP_PCI_SEGMENT 47 504 + #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 500 505 501 506 #ifdef KVM_CAP_IRQ_ROUTING 502 507 ··· 696 691 __u32 busnr; 697 692 __u32 devfn; 698 693 __u32 flags; 694 + __u32 segnr; 699 695 union { 700 - __u32 reserved[12]; 696 + __u32 reserved[11]; 701 697 }; 702 698 }; 703 699

+44 -27

include/linux/kvm_host.h

··· 38 38 #define KVM_REQ_MMU_SYNC 7 39 39 #define KVM_REQ_KVMCLOCK_UPDATE 8 40 40 #define KVM_REQ_KICK 9 41 + #define KVM_REQ_DEACTIVATE_FPU 10 41 42 42 43 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 43 44 ··· 58 57 struct kvm_io_device *devs[NR_IOBUS_DEVS]; 59 58 }; 60 59 61 - void kvm_io_bus_init(struct kvm_io_bus *bus); 62 - void kvm_io_bus_destroy(struct kvm_io_bus *bus); 63 - int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len, 64 - const void *val); 65 - int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, 60 + enum kvm_bus { 61 + KVM_MMIO_BUS, 62 + KVM_PIO_BUS, 63 + KVM_NR_BUSES 64 + }; 65 + 66 + int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 67 + int len, const void *val); 68 + int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, 66 69 void *val); 67 - int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, 68 - struct kvm_io_device *dev); 69 - int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 70 + int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 70 71 struct kvm_io_device *dev); 71 - void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 72 - struct kvm_io_device *dev); 73 - void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus, 74 - struct kvm_io_device *dev); 72 + int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 73 + struct kvm_io_device *dev); 75 74 76 75 struct kvm_vcpu { 77 76 struct kvm *kvm; ··· 84 83 struct kvm_run *run; 85 84 unsigned long requests; 86 85 unsigned long guest_debug; 86 + int srcu_idx; 87 + 87 88 int fpu_active; 88 89 int guest_fpu_loaded; 89 90 wait_queue_head_t wq; ··· 153 150 154 151 #endif 155 152 156 - struct kvm { 157 - spinlock_t mmu_lock; 158 - spinlock_t requests_lock; 159 - struct rw_semaphore slots_lock; 160 - struct mm_struct *mm; /* userspace tied to this vm */ 153 + struct kvm_memslots { 161 154 int nmemslots; 162 155 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + 163 156 KVM_PRIVATE_MEM_SLOTS]; 157 + }; 158 + 159 + struct kvm { 160 + spinlock_t mmu_lock; 161 + raw_spinlock_t requests_lock; 162 + struct mutex slots_lock; 163 + struct mm_struct *mm; /* userspace tied to this vm */ 164 + struct kvm_memslots *memslots; 165 + struct srcu_struct srcu; 164 166 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 165 167 u32 bsp_vcpu_id; 166 168 struct kvm_vcpu *bsp_vcpu; ··· 174 166 atomic_t online_vcpus; 175 167 struct list_head vm_list; 176 168 struct mutex lock; 177 - struct kvm_io_bus mmio_bus; 178 - struct kvm_io_bus pio_bus; 169 + struct kvm_io_bus *buses[KVM_NR_BUSES]; 179 170 #ifdef CONFIG_HAVE_KVM_EVENTFD 180 171 struct { 181 172 spinlock_t lock; ··· 256 249 int __kvm_set_memory_region(struct kvm *kvm, 257 250 struct kvm_userspace_memory_region *mem, 258 251 int user_alloc); 259 - int kvm_arch_set_memory_region(struct kvm *kvm, 252 + int kvm_arch_prepare_memory_region(struct kvm *kvm, 253 + struct kvm_memory_slot *memslot, 254 + struct kvm_memory_slot old, 255 + struct kvm_userspace_memory_region *mem, 256 + int user_alloc); 257 + void kvm_arch_commit_memory_region(struct kvm *kvm, 260 258 struct kvm_userspace_memory_region *mem, 261 259 struct kvm_memory_slot old, 262 260 int user_alloc); 263 261 void kvm_disable_largepages(void); 264 262 void kvm_arch_flush_shadow(struct kvm *kvm); 265 263 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 264 + gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn); 265 + 266 266 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 267 267 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 268 268 void kvm_release_page_clean(struct page *page); ··· 278 264 void kvm_set_page_accessed(struct page *page); 279 265 280 266 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 267 + pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 268 + struct kvm_memory_slot *slot, gfn_t gfn); 269 + int memslot_id(struct kvm *kvm, gfn_t gfn); 281 270 void kvm_release_pfn_dirty(pfn_t); 282 271 void kvm_release_pfn_clean(pfn_t pfn); 283 272 void kvm_set_pfn_dirty(pfn_t pfn); ··· 300 283 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); 301 284 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 302 285 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 286 + unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); 303 287 void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 304 288 305 289 void kvm_vcpu_block(struct kvm_vcpu *vcpu); ··· 401 383 struct work_struct interrupt_work; 402 384 struct list_head list; 403 385 int assigned_dev_id; 386 + int host_segnr; 404 387 int host_busnr; 405 388 int host_devfn; 406 389 unsigned int entries_nr; ··· 448 429 #define KVM_IOMMU_CACHE_COHERENCY 0x1 449 430 450 431 #ifdef CONFIG_IOMMU_API 451 - int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 452 - unsigned long npages); 432 + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 453 433 int kvm_iommu_map_guest(struct kvm *kvm); 454 434 int kvm_iommu_unmap_guest(struct kvm *kvm); 455 435 int kvm_assign_device(struct kvm *kvm, ··· 498 480 current->flags &= ~PF_VCPU; 499 481 } 500 482 501 - static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) 502 - { 503 - return slot - kvm->memslots; 504 - } 505 - 506 483 static inline gpa_t gfn_to_gpa(gfn_t gfn) 507 484 { 508 485 return (gpa_t)gfn << PAGE_SHIFT; ··· 543 530 return 1; 544 531 return 0; 545 532 } 533 + #endif 534 + 535 + #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION 536 + #define unalias_gfn_instantiation unalias_gfn 546 537 #endif 547 538 548 539 #ifdef CONFIG_HAVE_KVM_IRQCHIP

+41

include/trace/events/kvm.h

··· 145 145 __entry->len, __entry->gpa, __entry->val) 146 146 ); 147 147 148 + #define kvm_fpu_load_symbol \ 149 + {0, "unload"}, \ 150 + {1, "load"} 151 + 152 + TRACE_EVENT(kvm_fpu, 153 + TP_PROTO(int load), 154 + TP_ARGS(load), 155 + 156 + TP_STRUCT__entry( 157 + __field( u32, load ) 158 + ), 159 + 160 + TP_fast_assign( 161 + __entry->load = load; 162 + ), 163 + 164 + TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol)) 165 + ); 166 + 167 + TRACE_EVENT(kvm_age_page, 168 + TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), 169 + TP_ARGS(hva, slot, ref), 170 + 171 + TP_STRUCT__entry( 172 + __field( u64, hva ) 173 + __field( u64, gfn ) 174 + __field( u8, referenced ) 175 + ), 176 + 177 + TP_fast_assign( 178 + __entry->hva = hva; 179 + __entry->gfn = 180 + slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); 181 + __entry->referenced = ref; 182 + ), 183 + 184 + TP_printk("hva %llx gfn %llx %s", 185 + __entry->hva, __entry->gfn, 186 + __entry->referenced ? "YOUNG" : "OLD") 187 + ); 188 + 148 189 #endif /* _TRACE_KVM_MAIN_H */ 149 190 150 191 /* This part must be outside protection */

+3

virt/kvm/Kconfig

··· 12 12 13 13 config KVM_APIC_ARCHITECTURE 14 14 bool 15 + 16 + config KVM_MMIO 17 + bool

+7 -5

virt/kvm/assigned-dev.c

··· 504 504 static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 505 505 struct kvm_assigned_pci_dev *assigned_dev) 506 506 { 507 - int r = 0; 507 + int r = 0, idx; 508 508 struct kvm_assigned_dev_kernel *match; 509 509 struct pci_dev *dev; 510 510 511 511 mutex_lock(&kvm->lock); 512 - down_read(&kvm->slots_lock); 512 + idx = srcu_read_lock(&kvm->srcu); 513 513 514 514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 515 515 assigned_dev->assigned_dev_id); ··· 526 526 r = -ENOMEM; 527 527 goto out; 528 528 } 529 - dev = pci_get_bus_and_slot(assigned_dev->busnr, 529 + dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, 530 + assigned_dev->busnr, 530 531 assigned_dev->devfn); 531 532 if (!dev) { 532 533 printk(KERN_INFO "%s: host device not found\n", __func__); ··· 549 548 pci_reset_function(dev); 550 549 551 550 match->assigned_dev_id = assigned_dev->assigned_dev_id; 551 + match->host_segnr = assigned_dev->segnr; 552 552 match->host_busnr = assigned_dev->busnr; 553 553 match->host_devfn = assigned_dev->devfn; 554 554 match->flags = assigned_dev->flags; ··· 575 573 } 576 574 577 575 out: 578 - up_read(&kvm->slots_lock); 576 + srcu_read_unlock(&kvm->srcu, idx); 579 577 mutex_unlock(&kvm->lock); 580 578 return r; 581 579 out_list_del: ··· 587 585 pci_dev_put(dev); 588 586 out_free: 589 587 kfree(match); 590 - up_read(&kvm->slots_lock); 588 + srcu_read_unlock(&kvm->srcu, idx); 591 589 mutex_unlock(&kvm->lock); 592 590 return r; 593 591 }

+33 -10

virt/kvm/coalesced_mmio.c

··· 92 92 int kvm_coalesced_mmio_init(struct kvm *kvm) 93 93 { 94 94 struct kvm_coalesced_mmio_dev *dev; 95 + struct page *page; 95 96 int ret; 96 97 98 + ret = -ENOMEM; 99 + page = alloc_page(GFP_KERNEL | __GFP_ZERO); 100 + if (!page) 101 + goto out_err; 102 + kvm->coalesced_mmio_ring = page_address(page); 103 + 104 + ret = -ENOMEM; 97 105 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); 98 106 if (!dev) 99 - return -ENOMEM; 107 + goto out_free_page; 100 108 spin_lock_init(&dev->lock); 101 109 kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); 102 110 dev->kvm = kvm; 103 111 kvm->coalesced_mmio_dev = dev; 104 112 105 - ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); 113 + mutex_lock(&kvm->slots_lock); 114 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); 115 + mutex_unlock(&kvm->slots_lock); 106 116 if (ret < 0) 107 - kfree(dev); 117 + goto out_free_dev; 108 118 119 + return ret; 120 + 121 + out_free_dev: 122 + kfree(dev); 123 + out_free_page: 124 + __free_page(page); 125 + out_err: 109 126 return ret; 110 127 } 111 128 129 + void kvm_coalesced_mmio_free(struct kvm *kvm) 130 + { 131 + if (kvm->coalesced_mmio_ring) 132 + free_page((unsigned long)kvm->coalesced_mmio_ring); 133 + } 134 + 112 135 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 113 - struct kvm_coalesced_mmio_zone *zone) 136 + struct kvm_coalesced_mmio_zone *zone) 114 137 { 115 138 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; 116 139 117 140 if (dev == NULL) 118 141 return -EINVAL; 119 142 120 - down_write(&kvm->slots_lock); 143 + mutex_lock(&kvm->slots_lock); 121 144 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { 122 - up_write(&kvm->slots_lock); 145 + mutex_unlock(&kvm->slots_lock); 123 146 return -ENOBUFS; 124 147 } 125 148 126 149 dev->zone[dev->nb_zones] = *zone; 127 150 dev->nb_zones++; 128 151 129 - up_write(&kvm->slots_lock); 152 + mutex_unlock(&kvm->slots_lock); 130 153 return 0; 131 154 } 132 155 ··· 163 140 if (dev == NULL) 164 141 return -EINVAL; 165 142 166 - down_write(&kvm->slots_lock); 143 + mutex_lock(&kvm->slots_lock); 167 144 168 145 i = dev->nb_zones; 169 - while(i) { 146 + while (i) { 170 147 z = &dev->zone[i - 1]; 171 148 172 149 /* unregister all zones ··· 181 158 i--; 182 159 } 183 160 184 - up_write(&kvm->slots_lock); 161 + mutex_unlock(&kvm->slots_lock); 185 162 186 163 return 0; 187 164 }

+15

virt/kvm/coalesced_mmio.h

··· 1 + #ifndef __KVM_COALESCED_MMIO_H__ 2 + #define __KVM_COALESCED_MMIO_H__ 3 + 1 4 /* 2 5 * KVM coalesced MMIO 3 6 * ··· 9 6 * Author: Laurent Vivier <Laurent.Vivier@bull.net> 10 7 * 11 8 */ 9 + 10 + #ifdef CONFIG_KVM_MMIO 12 11 13 12 #define KVM_COALESCED_MMIO_ZONE_MAX 100 14 13 ··· 23 18 }; 24 19 25 20 int kvm_coalesced_mmio_init(struct kvm *kvm); 21 + void kvm_coalesced_mmio_free(struct kvm *kvm); 26 22 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 27 23 struct kvm_coalesced_mmio_zone *zone); 28 24 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, 29 25 struct kvm_coalesced_mmio_zone *zone); 26 + 27 + #else 28 + 29 + static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; } 30 + static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { } 31 + 32 + #endif 33 + 34 + #endif

+9 -12

virt/kvm/eventfd.c

··· 47 47 int gsi; 48 48 struct list_head list; 49 49 poll_table pt; 50 - wait_queue_head_t *wqh; 51 50 wait_queue_t wait; 52 51 struct work_struct inject; 53 52 struct work_struct shutdown; ··· 158 159 poll_table *pt) 159 160 { 160 161 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 161 - 162 - irqfd->wqh = wqh; 163 162 add_wait_queue(wqh, &irqfd->wait); 164 163 } 165 164 ··· 460 463 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 461 464 { 462 465 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 463 - struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 466 + enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 464 467 struct _ioeventfd *p; 465 468 struct eventfd_ctx *eventfd; 466 469 int ret; ··· 505 508 else 506 509 p->wildcard = true; 507 510 508 - down_write(&kvm->slots_lock); 511 + mutex_lock(&kvm->slots_lock); 509 512 510 513 /* Verify that there isnt a match already */ 511 514 if (ioeventfd_check_collision(kvm, p)) { ··· 515 518 516 519 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 517 520 518 - ret = __kvm_io_bus_register_dev(bus, &p->dev); 521 + ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); 519 522 if (ret < 0) 520 523 goto unlock_fail; 521 524 522 525 list_add_tail(&p->list, &kvm->ioeventfds); 523 526 524 - up_write(&kvm->slots_lock); 527 + mutex_unlock(&kvm->slots_lock); 525 528 526 529 return 0; 527 530 528 531 unlock_fail: 529 - up_write(&kvm->slots_lock); 532 + mutex_unlock(&kvm->slots_lock); 530 533 531 534 fail: 532 535 kfree(p); ··· 539 542 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 540 543 { 541 544 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 542 - struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 545 + enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; 543 546 struct _ioeventfd *p, *tmp; 544 547 struct eventfd_ctx *eventfd; 545 548 int ret = -ENOENT; ··· 548 551 if (IS_ERR(eventfd)) 549 552 return PTR_ERR(eventfd); 550 553 551 - down_write(&kvm->slots_lock); 554 + mutex_lock(&kvm->slots_lock); 552 555 553 556 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 554 557 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); ··· 562 565 if (!p->wildcard && p->datamatch != args->datamatch) 563 566 continue; 564 567 565 - __kvm_io_bus_unregister_dev(bus, &p->dev); 568 + kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 566 569 ioeventfd_release(p); 567 570 ret = 0; 568 571 break; 569 572 } 570 573 571 - up_write(&kvm->slots_lock); 574 + mutex_unlock(&kvm->slots_lock); 572 575 573 576 eventfd_ctx_put(eventfd); 574 577

+36 -2

virt/kvm/ioapic.c

··· 100 100 return injected; 101 101 } 102 102 103 + static void update_handled_vectors(struct kvm_ioapic *ioapic) 104 + { 105 + DECLARE_BITMAP(handled_vectors, 256); 106 + int i; 107 + 108 + memset(handled_vectors, 0, sizeof(handled_vectors)); 109 + for (i = 0; i < IOAPIC_NUM_PINS; ++i) 110 + __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); 111 + memcpy(ioapic->handled_vectors, handled_vectors, 112 + sizeof(handled_vectors)); 113 + smp_wmb(); 114 + } 115 + 103 116 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) 104 117 { 105 118 unsigned index; ··· 147 134 e->bits |= (u32) val; 148 135 e->fields.remote_irr = 0; 149 136 } 137 + update_handled_vectors(ioapic); 150 138 mask_after = e->fields.mask; 151 139 if (mask_before != mask_after) 152 140 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); ··· 255 241 { 256 242 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 257 243 244 + smp_rmb(); 245 + if (!test_bit(vector, ioapic->handled_vectors)) 246 + return; 258 247 mutex_lock(&ioapic->lock); 259 248 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); 260 249 mutex_unlock(&ioapic->lock); ··· 369 352 ioapic->ioregsel = 0; 370 353 ioapic->irr = 0; 371 354 ioapic->id = 0; 355 + update_handled_vectors(ioapic); 372 356 } 373 357 374 358 static const struct kvm_io_device_ops ioapic_mmio_ops = { ··· 390 372 kvm_ioapic_reset(ioapic); 391 373 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 392 374 ioapic->kvm = kvm; 393 - ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); 394 - if (ret < 0) 375 + mutex_lock(&kvm->slots_lock); 376 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 377 + mutex_unlock(&kvm->slots_lock); 378 + if (ret < 0) { 379 + kvm->arch.vioapic = NULL; 395 380 kfree(ioapic); 381 + } 396 382 397 383 return ret; 384 + } 385 + 386 + void kvm_ioapic_destroy(struct kvm *kvm) 387 + { 388 + struct kvm_ioapic *ioapic = kvm->arch.vioapic; 389 + 390 + if (ioapic) { 391 + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 392 + kvm->arch.vioapic = NULL; 393 + kfree(ioapic); 394 + } 398 395 } 399 396 400 397 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) ··· 432 399 433 400 mutex_lock(&ioapic->lock); 434 401 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 402 + update_handled_vectors(ioapic); 435 403 mutex_unlock(&ioapic->lock); 436 404 return 0; 437 405 }

+2

virt/kvm/ioapic.h

··· 46 46 struct kvm *kvm; 47 47 void (*ack_notifier)(void *opaque, int irq); 48 48 struct mutex lock; 49 + DECLARE_BITMAP(handled_vectors, 256); 49 50 }; 50 51 51 52 #ifdef DEBUG ··· 72 71 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 73 72 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 74 73 int kvm_ioapic_init(struct kvm *kvm); 74 + void kvm_ioapic_destroy(struct kvm *kvm); 75 75 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 76 76 void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 77 77 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,

+22 -14

virt/kvm/iommu.c

··· 32 32 static void kvm_iommu_put_pages(struct kvm *kvm, 33 33 gfn_t base_gfn, unsigned long npages); 34 34 35 - int kvm_iommu_map_pages(struct kvm *kvm, 36 - gfn_t base_gfn, unsigned long npages) 35 + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) 37 36 { 38 - gfn_t gfn = base_gfn; 37 + gfn_t gfn = slot->base_gfn; 38 + unsigned long npages = slot->npages; 39 39 pfn_t pfn; 40 40 int i, r = 0; 41 41 struct iommu_domain *domain = kvm->arch.iommu_domain; ··· 54 54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 55 55 continue; 56 56 57 - pfn = gfn_to_pfn(kvm, gfn); 57 + pfn = gfn_to_pfn_memslot(kvm, slot, gfn); 58 58 r = iommu_map_range(domain, 59 59 gfn_to_gpa(gfn), 60 60 pfn_to_hpa(pfn), ··· 69 69 return 0; 70 70 71 71 unmap_pages: 72 - kvm_iommu_put_pages(kvm, base_gfn, i); 72 + kvm_iommu_put_pages(kvm, slot->base_gfn, i); 73 73 return r; 74 74 } 75 75 76 76 static int kvm_iommu_map_memslots(struct kvm *kvm) 77 77 { 78 78 int i, r = 0; 79 + struct kvm_memslots *slots; 79 80 80 - for (i = 0; i < kvm->nmemslots; i++) { 81 - r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, 82 - kvm->memslots[i].npages); 81 + slots = rcu_dereference(kvm->memslots); 82 + 83 + for (i = 0; i < slots->nmemslots; i++) { 84 + r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); 83 85 if (r) 84 86 break; 85 87 } ··· 106 104 107 105 r = iommu_attach_device(domain, &pdev->dev); 108 106 if (r) { 109 - printk(KERN_ERR "assign device %x:%x.%x failed", 107 + printk(KERN_ERR "assign device %x:%x:%x.%x failed", 108 + pci_domain_nr(pdev->bus), 110 109 pdev->bus->number, 111 110 PCI_SLOT(pdev->devfn), 112 111 PCI_FUNC(pdev->devfn)); ··· 128 125 goto out_unmap; 129 126 } 130 127 131 - printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 128 + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", 129 + assigned_dev->host_segnr, 132 130 assigned_dev->host_busnr, 133 131 PCI_SLOT(assigned_dev->host_devfn), 134 132 PCI_FUNC(assigned_dev->host_devfn)); ··· 156 152 157 153 iommu_detach_device(domain, &pdev->dev); 158 154 159 - printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", 155 + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", 156 + assigned_dev->host_segnr, 160 157 assigned_dev->host_busnr, 161 158 PCI_SLOT(assigned_dev->host_devfn), 162 159 PCI_FUNC(assigned_dev->host_devfn)); ··· 215 210 static int kvm_iommu_unmap_memslots(struct kvm *kvm) 216 211 { 217 212 int i; 213 + struct kvm_memslots *slots; 218 214 219 - for (i = 0; i < kvm->nmemslots; i++) { 220 - kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, 221 - kvm->memslots[i].npages); 215 + slots = rcu_dereference(kvm->memslots); 216 + 217 + for (i = 0; i < slots->nmemslots; i++) { 218 + kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, 219 + slots->memslots[i].npages); 222 220 } 223 221 224 222 return 0;

+258 -136

virt/kvm/kvm_main.c

··· 44 44 #include <linux/bitops.h> 45 45 #include <linux/spinlock.h> 46 46 #include <linux/compat.h> 47 + #include <linux/srcu.h> 48 + #include <linux/hugetlb.h> 47 49 48 50 #include <asm/processor.h> 49 51 #include <asm/io.h> ··· 53 51 #include <asm/pgtable.h> 54 52 #include <asm-generic/bitops/le.h> 55 53 56 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 57 54 #include "coalesced_mmio.h" 58 - #endif 59 55 60 56 #define CREATE_TRACE_POINTS 61 57 #include <trace/events/kvm.h> ··· 85 85 unsigned long arg); 86 86 static int hardware_enable_all(void); 87 87 static void hardware_disable_all(void); 88 + 89 + static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 88 90 89 91 static bool kvm_rebooting; 90 92 ··· 138 136 139 137 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 140 138 141 - spin_lock(&kvm->requests_lock); 139 + raw_spin_lock(&kvm->requests_lock); 142 140 me = smp_processor_id(); 143 141 kvm_for_each_vcpu(i, vcpu, kvm) { 144 142 if (test_and_set_bit(req, &vcpu->requests)) ··· 153 151 smp_call_function_many(cpus, ack_flush, NULL, 1); 154 152 else 155 153 called = false; 156 - spin_unlock(&kvm->requests_lock); 154 + raw_spin_unlock(&kvm->requests_lock); 157 155 free_cpumask_var(cpus); 158 156 return called; 159 157 } ··· 217 215 unsigned long address) 218 216 { 219 217 struct kvm *kvm = mmu_notifier_to_kvm(mn); 220 - int need_tlb_flush; 218 + int need_tlb_flush, idx; 221 219 222 220 /* 223 221 * When ->invalidate_page runs, the linux pte has been zapped ··· 237 235 * pte after kvm_unmap_hva returned, without noticing the page 238 236 * is going to be freed. 239 237 */ 238 + idx = srcu_read_lock(&kvm->srcu); 240 239 spin_lock(&kvm->mmu_lock); 241 240 kvm->mmu_notifier_seq++; 242 241 need_tlb_flush = kvm_unmap_hva(kvm, address); 243 242 spin_unlock(&kvm->mmu_lock); 243 + srcu_read_unlock(&kvm->srcu, idx); 244 244 245 245 /* we've to flush the tlb before the pages can be freed */ 246 246 if (need_tlb_flush) ··· 256 252 pte_t pte) 257 253 { 258 254 struct kvm *kvm = mmu_notifier_to_kvm(mn); 255 + int idx; 259 256 257 + idx = srcu_read_lock(&kvm->srcu); 260 258 spin_lock(&kvm->mmu_lock); 261 259 kvm->mmu_notifier_seq++; 262 260 kvm_set_spte_hva(kvm, address, pte); 263 261 spin_unlock(&kvm->mmu_lock); 262 + srcu_read_unlock(&kvm->srcu, idx); 264 263 } 265 264 266 265 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, ··· 272 265 unsigned long end) 273 266 { 274 267 struct kvm *kvm = mmu_notifier_to_kvm(mn); 275 - int need_tlb_flush = 0; 268 + int need_tlb_flush = 0, idx; 276 269 270 + idx = srcu_read_lock(&kvm->srcu); 277 271 spin_lock(&kvm->mmu_lock); 278 272 /* 279 273 * The count increase must become visible at unlock time as no ··· 285 277 for (; start < end; start += PAGE_SIZE) 286 278 need_tlb_flush |= kvm_unmap_hva(kvm, start); 287 279 spin_unlock(&kvm->mmu_lock); 280 + srcu_read_unlock(&kvm->srcu, idx); 288 281 289 282 /* we've to flush the tlb before the pages can be freed */ 290 283 if (need_tlb_flush) ··· 323 314 unsigned long address) 324 315 { 325 316 struct kvm *kvm = mmu_notifier_to_kvm(mn); 326 - int young; 317 + int young, idx; 327 318 319 + idx = srcu_read_lock(&kvm->srcu); 328 320 spin_lock(&kvm->mmu_lock); 329 321 young = kvm_age_hva(kvm, address); 330 322 spin_unlock(&kvm->mmu_lock); 323 + srcu_read_unlock(&kvm->srcu, idx); 331 324 332 325 if (young) 333 326 kvm_flush_remote_tlbs(kvm); ··· 352 341 .change_pte = kvm_mmu_notifier_change_pte, 353 342 .release = kvm_mmu_notifier_release, 354 343 }; 344 + 345 + static int kvm_init_mmu_notifier(struct kvm *kvm) 346 + { 347 + kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 348 + return mmu_notifier_register(&kvm->mmu_notifier, current->mm); 349 + } 350 + 351 + #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ 352 + 353 + static int kvm_init_mmu_notifier(struct kvm *kvm) 354 + { 355 + return 0; 356 + } 357 + 355 358 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 356 359 357 360 static struct kvm *kvm_create_vm(void) 358 361 { 359 - int r = 0; 362 + int r = 0, i; 360 363 struct kvm *kvm = kvm_arch_create_vm(); 361 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 362 - struct page *page; 363 - #endif 364 364 365 365 if (IS_ERR(kvm)) 366 366 goto out; ··· 385 363 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 386 364 #endif 387 365 388 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 389 - page = alloc_page(GFP_KERNEL | __GFP_ZERO); 390 - if (!page) { 391 - r = -ENOMEM; 366 + r = -ENOMEM; 367 + kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 368 + if (!kvm->memslots) 392 369 goto out_err; 393 - } 394 - kvm->coalesced_mmio_ring = 395 - (struct kvm_coalesced_mmio_ring *)page_address(page); 396 - #endif 397 - 398 - #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 399 - { 400 - kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 401 - r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); 402 - if (r) { 403 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 404 - put_page(page); 405 - #endif 370 + if (init_srcu_struct(&kvm->srcu)) 371 + goto out_err; 372 + for (i = 0; i < KVM_NR_BUSES; i++) { 373 + kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 374 + GFP_KERNEL); 375 + if (!kvm->buses[i]) { 376 + cleanup_srcu_struct(&kvm->srcu); 406 377 goto out_err; 407 378 } 408 379 } 409 - #endif 380 + 381 + r = kvm_init_mmu_notifier(kvm); 382 + if (r) { 383 + cleanup_srcu_struct(&kvm->srcu); 384 + goto out_err; 385 + } 410 386 411 387 kvm->mm = current->mm; 412 388 atomic_inc(&kvm->mm->mm_count); 413 389 spin_lock_init(&kvm->mmu_lock); 414 - spin_lock_init(&kvm->requests_lock); 415 - kvm_io_bus_init(&kvm->pio_bus); 390 + raw_spin_lock_init(&kvm->requests_lock); 416 391 kvm_eventfd_init(kvm); 417 392 mutex_init(&kvm->lock); 418 393 mutex_init(&kvm->irq_lock); 419 - kvm_io_bus_init(&kvm->mmio_bus); 420 - init_rwsem(&kvm->slots_lock); 394 + mutex_init(&kvm->slots_lock); 421 395 atomic_set(&kvm->users_count, 1); 422 396 spin_lock(&kvm_lock); 423 397 list_add(&kvm->vm_list, &vm_list); ··· 424 406 out: 425 407 return kvm; 426 408 427 - #if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ 428 - (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) 429 409 out_err: 430 410 hardware_disable_all(); 431 - #endif 432 411 out_err_nodisable: 412 + for (i = 0; i < KVM_NR_BUSES; i++) 413 + kfree(kvm->buses[i]); 414 + kfree(kvm->memslots); 433 415 kfree(kvm); 434 416 return ERR_PTR(r); 435 417 } ··· 464 446 void kvm_free_physmem(struct kvm *kvm) 465 447 { 466 448 int i; 449 + struct kvm_memslots *slots = kvm->memslots; 467 450 468 - for (i = 0; i < kvm->nmemslots; ++i) 469 - kvm_free_physmem_slot(&kvm->memslots[i], NULL); 451 + for (i = 0; i < slots->nmemslots; ++i) 452 + kvm_free_physmem_slot(&slots->memslots[i], NULL); 453 + 454 + kfree(kvm->memslots); 470 455 } 471 456 472 457 static void kvm_destroy_vm(struct kvm *kvm) 473 458 { 459 + int i; 474 460 struct mm_struct *mm = kvm->mm; 475 461 476 462 kvm_arch_sync_events(kvm); ··· 482 460 list_del(&kvm->vm_list); 483 461 spin_unlock(&kvm_lock); 484 462 kvm_free_irq_routing(kvm); 485 - kvm_io_bus_destroy(&kvm->pio_bus); 486 - kvm_io_bus_destroy(&kvm->mmio_bus); 487 - #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 488 - if (kvm->coalesced_mmio_ring != NULL) 489 - free_page((unsigned long)kvm->coalesced_mmio_ring); 490 - #endif 463 + for (i = 0; i < KVM_NR_BUSES; i++) 464 + kvm_io_bus_destroy(kvm->buses[i]); 465 + kvm_coalesced_mmio_free(kvm); 491 466 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 492 467 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 493 468 #else ··· 531 512 struct kvm_userspace_memory_region *mem, 532 513 int user_alloc) 533 514 { 534 - int r; 515 + int r, flush_shadow = 0; 535 516 gfn_t base_gfn; 536 517 unsigned long npages; 537 518 unsigned long i; 538 519 struct kvm_memory_slot *memslot; 539 520 struct kvm_memory_slot old, new; 521 + struct kvm_memslots *slots, *old_memslots; 540 522 541 523 r = -EINVAL; 542 524 /* General sanity checks */ ··· 552 532 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 553 533 goto out; 554 534 555 - memslot = &kvm->memslots[mem->slot]; 535 + memslot = &kvm->memslots->memslots[mem->slot]; 556 536 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 557 537 npages = mem->memory_size >> PAGE_SHIFT; 558 538 ··· 573 553 /* Check for overlaps */ 574 554 r = -EEXIST; 575 555 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 576 - struct kvm_memory_slot *s = &kvm->memslots[i]; 556 + struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; 577 557 578 558 if (s == memslot || !s->npages) 579 559 continue; ··· 599 579 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 600 580 601 581 new.user_alloc = user_alloc; 602 - /* 603 - * hva_to_rmmap() serialzies with the mmu_lock and to be 604 - * safe it has to ignore memslots with !user_alloc && 605 - * !userspace_addr. 606 - */ 607 - if (user_alloc) 608 - new.userspace_addr = mem->userspace_addr; 609 - else 610 - new.userspace_addr = 0; 582 + new.userspace_addr = mem->userspace_addr; 611 583 } 612 584 if (!npages) 613 585 goto skip_lpage; ··· 654 642 if (!new.dirty_bitmap) 655 643 goto out_free; 656 644 memset(new.dirty_bitmap, 0, dirty_bytes); 645 + /* destroy any largepage mappings for dirty tracking */ 657 646 if (old.npages) 658 - kvm_arch_flush_shadow(kvm); 647 + flush_shadow = 1; 659 648 } 660 649 #else /* not defined CONFIG_S390 */ 661 650 new.user_alloc = user_alloc; ··· 664 651 new.userspace_addr = mem->userspace_addr; 665 652 #endif /* not defined CONFIG_S390 */ 666 653 667 - if (!npages) 654 + if (!npages) { 655 + r = -ENOMEM; 656 + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 657 + if (!slots) 658 + goto out_free; 659 + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 660 + if (mem->slot >= slots->nmemslots) 661 + slots->nmemslots = mem->slot + 1; 662 + slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; 663 + 664 + old_memslots = kvm->memslots; 665 + rcu_assign_pointer(kvm->memslots, slots); 666 + synchronize_srcu_expedited(&kvm->srcu); 667 + /* From this point no new shadow pages pointing to a deleted 668 + * memslot will be created. 669 + * 670 + * validation of sp->gfn happens in: 671 + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 672 + * - kvm_is_visible_gfn (mmu_check_roots) 673 + */ 668 674 kvm_arch_flush_shadow(kvm); 669 - 670 - spin_lock(&kvm->mmu_lock); 671 - if (mem->slot >= kvm->nmemslots) 672 - kvm->nmemslots = mem->slot + 1; 673 - 674 - *memslot = new; 675 - spin_unlock(&kvm->mmu_lock); 676 - 677 - r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); 678 - if (r) { 679 - spin_lock(&kvm->mmu_lock); 680 - *memslot = old; 681 - spin_unlock(&kvm->mmu_lock); 682 - goto out_free; 675 + kfree(old_memslots); 683 676 } 684 677 685 - kvm_free_physmem_slot(&old, npages ? &new : NULL); 686 - /* Slot deletion case: we have to update the current slot */ 687 - spin_lock(&kvm->mmu_lock); 688 - if (!npages) 689 - *memslot = old; 690 - spin_unlock(&kvm->mmu_lock); 678 + r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 679 + if (r) 680 + goto out_free; 681 + 691 682 #ifdef CONFIG_DMAR 692 683 /* map the pages in iommu page table */ 693 - r = kvm_iommu_map_pages(kvm, base_gfn, npages); 694 - if (r) 695 - goto out; 684 + if (npages) { 685 + r = kvm_iommu_map_pages(kvm, &new); 686 + if (r) 687 + goto out_free; 688 + } 696 689 #endif 690 + 691 + r = -ENOMEM; 692 + slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 693 + if (!slots) 694 + goto out_free; 695 + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 696 + if (mem->slot >= slots->nmemslots) 697 + slots->nmemslots = mem->slot + 1; 698 + 699 + /* actual memory is freed via old in kvm_free_physmem_slot below */ 700 + if (!npages) { 701 + new.rmap = NULL; 702 + new.dirty_bitmap = NULL; 703 + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) 704 + new.lpage_info[i] = NULL; 705 + } 706 + 707 + slots->memslots[mem->slot] = new; 708 + old_memslots = kvm->memslots; 709 + rcu_assign_pointer(kvm->memslots, slots); 710 + synchronize_srcu_expedited(&kvm->srcu); 711 + 712 + kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 713 + 714 + kvm_free_physmem_slot(&old, &new); 715 + kfree(old_memslots); 716 + 717 + if (flush_shadow) 718 + kvm_arch_flush_shadow(kvm); 719 + 697 720 return 0; 698 721 699 722 out_free: ··· 746 697 { 747 698 int r; 748 699 749 - down_write(&kvm->slots_lock); 700 + mutex_lock(&kvm->slots_lock); 750 701 r = __kvm_set_memory_region(kvm, mem, user_alloc); 751 - up_write(&kvm->slots_lock); 702 + mutex_unlock(&kvm->slots_lock); 752 703 return r; 753 704 } 754 705 EXPORT_SYMBOL_GPL(kvm_set_memory_region); ··· 775 726 if (log->slot >= KVM_MEMORY_SLOTS) 776 727 goto out; 777 728 778 - memslot = &kvm->memslots[log->slot]; 729 + memslot = &kvm->memslots->memslots[log->slot]; 779 730 r = -ENOENT; 780 731 if (!memslot->dirty_bitmap) 781 732 goto out; ··· 829 780 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 830 781 { 831 782 int i; 783 + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 832 784 833 - for (i = 0; i < kvm->nmemslots; ++i) { 834 - struct kvm_memory_slot *memslot = &kvm->memslots[i]; 785 + for (i = 0; i < slots->nmemslots; ++i) { 786 + struct kvm_memory_slot *memslot = &slots->memslots[i]; 835 787 836 788 if (gfn >= memslot->base_gfn 837 789 && gfn < memslot->base_gfn + memslot->npages) ··· 851 801 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 852 802 { 853 803 int i; 804 + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 854 805 855 - gfn = unalias_gfn(kvm, gfn); 806 + gfn = unalias_gfn_instantiation(kvm, gfn); 856 807 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 857 - struct kvm_memory_slot *memslot = &kvm->memslots[i]; 808 + struct kvm_memory_slot *memslot = &slots->memslots[i]; 809 + 810 + if (memslot->flags & KVM_MEMSLOT_INVALID) 811 + continue; 858 812 859 813 if (gfn >= memslot->base_gfn 860 814 && gfn < memslot->base_gfn + memslot->npages) ··· 868 814 } 869 815 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 870 816 817 + unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) 818 + { 819 + struct vm_area_struct *vma; 820 + unsigned long addr, size; 821 + 822 + size = PAGE_SIZE; 823 + 824 + addr = gfn_to_hva(kvm, gfn); 825 + if (kvm_is_error_hva(addr)) 826 + return PAGE_SIZE; 827 + 828 + down_read(&current->mm->mmap_sem); 829 + vma = find_vma(current->mm, addr); 830 + if (!vma) 831 + goto out; 832 + 833 + size = vma_kernel_pagesize(vma); 834 + 835 + out: 836 + up_read(&current->mm->mmap_sem); 837 + 838 + return size; 839 + } 840 + 841 + int memslot_id(struct kvm *kvm, gfn_t gfn) 842 + { 843 + int i; 844 + struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 845 + struct kvm_memory_slot *memslot = NULL; 846 + 847 + gfn = unalias_gfn(kvm, gfn); 848 + for (i = 0; i < slots->nmemslots; ++i) { 849 + memslot = &slots->memslots[i]; 850 + 851 + if (gfn >= memslot->base_gfn 852 + && gfn < memslot->base_gfn + memslot->npages) 853 + break; 854 + } 855 + 856 + return memslot - slots->memslots; 857 + } 858 + 871 859 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 872 860 { 873 861 struct kvm_memory_slot *slot; 874 862 875 - gfn = unalias_gfn(kvm, gfn); 863 + gfn = unalias_gfn_instantiation(kvm, gfn); 876 864 slot = gfn_to_memslot_unaliased(kvm, gfn); 877 - if (!slot) 865 + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 878 866 return bad_hva(); 879 867 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 880 868 } 881 869 EXPORT_SYMBOL_GPL(gfn_to_hva); 882 870 883 - pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 871 + static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) 884 872 { 885 873 struct page *page[1]; 886 - unsigned long addr; 887 874 int npages; 888 875 pfn_t pfn; 889 876 890 877 might_sleep(); 891 - 892 - addr = gfn_to_hva(kvm, gfn); 893 - if (kvm_is_error_hva(addr)) { 894 - get_page(bad_page); 895 - return page_to_pfn(bad_page); 896 - } 897 878 898 879 npages = get_user_pages_fast(addr, 1, 1, page); 899 880 ··· 954 865 return pfn; 955 866 } 956 867 868 + pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 869 + { 870 + unsigned long addr; 871 + 872 + addr = gfn_to_hva(kvm, gfn); 873 + if (kvm_is_error_hva(addr)) { 874 + get_page(bad_page); 875 + return page_to_pfn(bad_page); 876 + } 877 + 878 + return hva_to_pfn(kvm, addr); 879 + } 957 880 EXPORT_SYMBOL_GPL(gfn_to_pfn); 881 + 882 + static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 883 + { 884 + return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 885 + } 886 + 887 + pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 888 + struct kvm_memory_slot *slot, gfn_t gfn) 889 + { 890 + unsigned long addr = gfn_to_hva_memslot(slot, gfn); 891 + return hva_to_pfn(kvm, addr); 892 + } 958 893 959 894 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 960 895 { ··· 1967 1854 .priority = 0, 1968 1855 }; 1969 1856 1970 - void kvm_io_bus_init(struct kvm_io_bus *bus) 1971 - { 1972 - memset(bus, 0, sizeof(*bus)); 1973 - } 1974 - 1975 - void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1857 + static void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1976 1858 { 1977 1859 int i; 1978 1860 ··· 1976 1868 1977 1869 kvm_iodevice_destructor(pos); 1978 1870 } 1871 + kfree(bus); 1979 1872 } 1980 1873 1981 1874 /* kvm_io_bus_write - called under kvm->slots_lock */ 1982 - int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, 1875 + int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 1983 1876 int len, const void *val) 1984 1877 { 1985 1878 int i; 1879 + struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 1986 1880 for (i = 0; i < bus->dev_count; i++) 1987 1881 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1988 1882 return 0; ··· 1992 1882 } 1993 1883 1994 1884 /* kvm_io_bus_read - called under kvm->slots_lock */ 1995 - int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) 1885 + int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 1886 + int len, void *val) 1996 1887 { 1997 1888 int i; 1889 + struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 1890 + 1998 1891 for (i = 0; i < bus->dev_count; i++) 1999 1892 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2000 1893 return 0; 2001 1894 return -EOPNOTSUPP; 2002 1895 } 2003 1896 2004 - int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 2005 - struct kvm_io_device *dev) 1897 + /* Caller must hold slots_lock. */ 1898 + int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 1899 + struct kvm_io_device *dev) 2006 1900 { 2007 - int ret; 1901 + struct kvm_io_bus *new_bus, *bus; 2008 1902 2009 - down_write(&kvm->slots_lock); 2010 - ret = __kvm_io_bus_register_dev(bus, dev); 2011 - up_write(&kvm->slots_lock); 2012 - 2013 - return ret; 2014 - } 2015 - 2016 - /* An unlocked version. Caller must have write lock on slots_lock. */ 2017 - int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, 2018 - struct kvm_io_device *dev) 2019 - { 1903 + bus = kvm->buses[bus_idx]; 2020 1904 if (bus->dev_count > NR_IOBUS_DEVS-1) 2021 1905 return -ENOSPC; 2022 1906 2023 - bus->devs[bus->dev_count++] = dev; 1907 + new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 1908 + if (!new_bus) 1909 + return -ENOMEM; 1910 + memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 1911 + new_bus->devs[new_bus->dev_count++] = dev; 1912 + rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 1913 + synchronize_srcu_expedited(&kvm->srcu); 1914 + kfree(bus); 2024 1915 2025 1916 return 0; 2026 1917 } 2027 1918 2028 - void kvm_io_bus_unregister_dev(struct kvm *kvm, 2029 - struct kvm_io_bus *bus, 2030 - struct kvm_io_device *dev) 1919 + /* Caller must hold slots_lock. */ 1920 + int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 1921 + struct kvm_io_device *dev) 2031 1922 { 2032 - down_write(&kvm->slots_lock); 2033 - __kvm_io_bus_unregister_dev(bus, dev); 2034 - up_write(&kvm->slots_lock); 2035 - } 1923 + int i, r; 1924 + struct kvm_io_bus *new_bus, *bus; 2036 1925 2037 - /* An unlocked version. Caller must have write lock on slots_lock. */ 2038 - void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 2039 - struct kvm_io_device *dev) 2040 - { 2041 - int i; 1926 + new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 1927 + if (!new_bus) 1928 + return -ENOMEM; 2042 1929 2043 - for (i = 0; i < bus->dev_count; i++) 2044 - if (bus->devs[i] == dev) { 2045 - bus->devs[i] = bus->devs[--bus->dev_count]; 1930 + bus = kvm->buses[bus_idx]; 1931 + memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 1932 + 1933 + r = -ENOENT; 1934 + for (i = 0; i < new_bus->dev_count; i++) 1935 + if (new_bus->devs[i] == dev) { 1936 + r = 0; 1937 + new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; 2046 1938 break; 2047 1939 } 1940 + 1941 + if (r) { 1942 + kfree(new_bus); 1943 + return r; 1944 + } 1945 + 1946 + rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 1947 + synchronize_srcu_expedited(&kvm->srcu); 1948 + kfree(bus); 1949 + return r; 2048 1950 } 2049 1951 2050 1952 static struct notifier_block kvm_cpu_notifier = {