Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (131 commits)
KVM: MMU: Use ptep_user for cmpxchg_gpte()
KVM: Fix kvm mmu_notifier initialization order
KVM: Add documentation for KVM_CAP_NR_VCPUS
KVM: make guest mode entry to be rcu quiescent state
KVM: x86 emulator: Make jmp far emulation into a separate function
KVM: x86 emulator: Rename emulate_grpX() to em_grpX()
KVM: x86 emulator: Remove unused arg from emulate_pop()
KVM: x86 emulator: Remove unused arg from writeback()
KVM: x86 emulator: Remove unused arg from read_descriptor()
KVM: x86 emulator: Remove unused arg from seg_override()
KVM: Validate userspace_addr of memslot when registered
KVM: MMU: Clean up gpte reading with copy_from_user()
KVM: PPC: booke: add sregs support
KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0)
KVM: PPC: use ticks, not usecs, for exit timing
KVM: PPC: fix exit accounting for SPRs, tlbwe, tlbsx
KVM: PPC: e500: emulate SVR
KVM: VMX: Cache vmcs segment fields
KVM: x86 emulator: consolidate segment accessors
KVM: VMX: Avoid reading %rip unnecessarily when handling exceptions
...

+3052 -1091
+31 -3
Documentation/virtual/kvm/api.txt
··· 175 175 Returns: vcpu fd on success, -1 on error 176 176 177 177 This API adds a vcpu to a virtual machine. The vcpu id is a small integer 178 - in the range [0, max_vcpus). 178 + in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the 179 + KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. 180 + If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 181 + cpus max. 179 182 180 183 4.8 KVM_GET_DIRTY_LOG (vm ioctl) 181 184 ··· 264 261 4.13 KVM_GET_SREGS 265 262 266 263 Capability: basic 267 - Architectures: x86 264 + Architectures: x86, ppc 268 265 Type: vcpu ioctl 269 266 Parameters: struct kvm_sregs (out) 270 267 Returns: 0 on success, -1 on error ··· 282 279 __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; 283 280 }; 284 281 282 + /* ppc -- see arch/powerpc/include/asm/kvm.h */ 283 + 285 284 interrupt_bitmap is a bitmap of pending external interrupts. At most 286 285 one bit may be set. This interrupt has been acknowledged by the APIC 287 286 but not yet injected into the cpu core. ··· 291 286 4.14 KVM_SET_SREGS 292 287 293 288 Capability: basic 294 - Architectures: x86 289 + Architectures: x86, ppc 295 290 Type: vcpu ioctl 296 291 Parameters: struct kvm_sregs (in) 297 292 Returns: 0 on success, -1 on error ··· 1267 1262 __u16 entry; /* The index of entry in the MSI-X table */ 1268 1263 __u16 padding[3]; 1269 1264 }; 1265 + 1266 + 4.54 KVM_SET_TSC_KHZ 1267 + 1268 + Capability: KVM_CAP_TSC_CONTROL 1269 + Architectures: x86 1270 + Type: vcpu ioctl 1271 + Parameters: virtual tsc_khz 1272 + Returns: 0 on success, -1 on error 1273 + 1274 + Specifies the tsc frequency for the virtual machine. The unit of the 1275 + frequency is KHz. 1276 + 1277 + 4.55 KVM_GET_TSC_KHZ 1278 + 1279 + Capability: KVM_CAP_GET_TSC_KHZ 1280 + Architectures: x86 1281 + Type: vcpu ioctl 1282 + Parameters: none 1283 + Returns: virtual tsc-khz on success, negative value on error 1284 + 1285 + Returns the tsc frequency of the guest. The unit of the return value is 1286 + KHz. If the host has unstable tsc this ioctl returns -EIO instead as an 1287 + error. 1270 1288 1271 1289 5. The kvm_run structure 1272 1290
+13 -13
arch/ia64/kvm/vti.h
··· 83 83 union vac { 84 84 unsigned long value; 85 85 struct { 86 - int a_int:1; 87 - int a_from_int_cr:1; 88 - int a_to_int_cr:1; 89 - int a_from_psr:1; 90 - int a_from_cpuid:1; 91 - int a_cover:1; 92 - int a_bsw:1; 86 + unsigned int a_int:1; 87 + unsigned int a_from_int_cr:1; 88 + unsigned int a_to_int_cr:1; 89 + unsigned int a_from_psr:1; 90 + unsigned int a_from_cpuid:1; 91 + unsigned int a_cover:1; 92 + unsigned int a_bsw:1; 93 93 long reserved:57; 94 94 }; 95 95 }; ··· 97 97 union vdc { 98 98 unsigned long value; 99 99 struct { 100 - int d_vmsw:1; 101 - int d_extint:1; 102 - int d_ibr_dbr:1; 103 - int d_pmc:1; 104 - int d_to_pmd:1; 105 - int d_itm:1; 100 + unsigned int d_vmsw:1; 101 + unsigned int d_extint:1; 102 + unsigned int d_ibr_dbr:1; 103 + unsigned int d_pmc:1; 104 + unsigned int d_to_pmd:1; 105 + unsigned int d_itm:1; 106 106 long reserved:58; 107 107 }; 108 108 };
+184
arch/powerpc/include/asm/kvm.h
··· 45 45 __u64 gpr[32]; 46 46 }; 47 47 48 + #define KVM_SREGS_E_IMPL_NONE 0 49 + #define KVM_SREGS_E_IMPL_FSL 1 50 + 51 + #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ 52 + 53 + /* 54 + * Feature bits indicate which sections of the sregs struct are valid, 55 + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers 56 + * corresponding to unset feature bits will not be modified. This allows 57 + * restoring a checkpoint made without that feature, while keeping the 58 + * default values of the new registers. 59 + * 60 + * KVM_SREGS_E_BASE contains: 61 + * CSRR0/1 (refers to SRR2/3 on 40x) 62 + * ESR 63 + * DEAR 64 + * MCSR 65 + * TSR 66 + * TCR 67 + * DEC 68 + * TB 69 + * VRSAVE (USPRG0) 70 + */ 71 + #define KVM_SREGS_E_BASE (1 << 0) 72 + 73 + /* 74 + * KVM_SREGS_E_ARCH206 contains: 75 + * 76 + * PIR 77 + * MCSRR0/1 78 + * DECAR 79 + * IVPR 80 + */ 81 + #define KVM_SREGS_E_ARCH206 (1 << 1) 82 + 83 + /* 84 + * Contains EPCR, plus the upper half of 64-bit registers 85 + * that are 32-bit on 32-bit implementations. 86 + */ 87 + #define KVM_SREGS_E_64 (1 << 2) 88 + 89 + #define KVM_SREGS_E_SPRG8 (1 << 3) 90 + #define KVM_SREGS_E_MCIVPR (1 << 4) 91 + 92 + /* 93 + * IVORs are used -- contains IVOR0-15, plus additional IVORs 94 + * in combination with an appropriate feature bit. 95 + */ 96 + #define KVM_SREGS_E_IVOR (1 << 5) 97 + 98 + /* 99 + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. 100 + * Also TLBnPS if MMUCFG[MAVN] = 1. 101 + */ 102 + #define KVM_SREGS_E_ARCH206_MMU (1 << 6) 103 + 104 + /* DBSR, DBCR, IAC, DAC, DVC */ 105 + #define KVM_SREGS_E_DEBUG (1 << 7) 106 + 107 + /* Enhanced debug -- DSRR0/1, SPRG9 */ 108 + #define KVM_SREGS_E_ED (1 << 8) 109 + 110 + /* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ 111 + #define KVM_SREGS_E_SPE (1 << 9) 112 + 113 + /* External Proxy (EXP) -- EPR */ 114 + #define KVM_SREGS_EXP (1 << 10) 115 + 116 + /* External PID (E.PD) -- EPSC/EPLC */ 117 + #define KVM_SREGS_E_PD (1 << 11) 118 + 119 + /* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ 120 + #define KVM_SREGS_E_PC (1 << 12) 121 + 122 + /* Page table (E.PT) -- EPTCFG */ 123 + #define KVM_SREGS_E_PT (1 << 13) 124 + 125 + /* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ 126 + #define KVM_SREGS_E_PM (1 << 14) 127 + 128 + /* 129 + * Special updates: 130 + * 131 + * Some registers may change even while a vcpu is not running. 132 + * To avoid losing these changes, by default these registers are 133 + * not updated by KVM_SET_SREGS. To force an update, set the bit 134 + * in u.e.update_special corresponding to the register to be updated. 135 + * 136 + * The update_special field is zero on return from KVM_GET_SREGS. 137 + * 138 + * When restoring a checkpoint, the caller can set update_special 139 + * to 0xffffffff to ensure that everything is restored, even new features 140 + * that the caller doesn't know about. 141 + */ 142 + #define KVM_SREGS_E_UPDATE_MCSR (1 << 0) 143 + #define KVM_SREGS_E_UPDATE_TSR (1 << 1) 144 + #define KVM_SREGS_E_UPDATE_DEC (1 << 2) 145 + #define KVM_SREGS_E_UPDATE_DBSR (1 << 3) 146 + 147 + /* 148 + * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a 149 + * previous KVM_GET_REGS. 150 + * 151 + * Unless otherwise indicated, setting any register with KVM_SET_SREGS 152 + * directly sets its value. It does not trigger any special semantics such 153 + * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct 154 + * just received from KVM_GET_SREGS is always a no-op. 155 + */ 48 156 struct kvm_sregs { 49 157 __u32 pvr; 50 158 union { ··· 170 62 __u64 dbat[8]; 171 63 } ppc32; 172 64 } s; 65 + struct { 66 + union { 67 + struct { /* KVM_SREGS_E_IMPL_FSL */ 68 + __u32 features; /* KVM_SREGS_E_FSL_ */ 69 + __u32 svr; 70 + __u64 mcar; 71 + __u32 hid0; 72 + 73 + /* KVM_SREGS_E_FSL_PIDn */ 74 + __u32 pid1, pid2; 75 + } fsl; 76 + __u8 pad[256]; 77 + } impl; 78 + 79 + __u32 features; /* KVM_SREGS_E_ */ 80 + __u32 impl_id; /* KVM_SREGS_E_IMPL_ */ 81 + __u32 update_special; /* KVM_SREGS_E_UPDATE_ */ 82 + __u32 pir; /* read-only */ 83 + __u64 sprg8; 84 + __u64 sprg9; /* E.ED */ 85 + __u64 csrr0; 86 + __u64 dsrr0; /* E.ED */ 87 + __u64 mcsrr0; 88 + __u32 csrr1; 89 + __u32 dsrr1; /* E.ED */ 90 + __u32 mcsrr1; 91 + __u32 esr; 92 + __u64 dear; 93 + __u64 ivpr; 94 + __u64 mcivpr; 95 + __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */ 96 + 97 + __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */ 98 + __u32 tcr; 99 + __u32 decar; 100 + __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */ 101 + 102 + /* 103 + * Userspace can read TB directly, but the 104 + * value reported here is consistent with "dec". 105 + * 106 + * Read-only. 107 + */ 108 + __u64 tb; 109 + 110 + __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ 111 + __u32 dbcr[3]; 112 + __u32 iac[4]; 113 + __u32 dac[2]; 114 + __u32 dvc[2]; 115 + __u8 num_iac; /* read-only */ 116 + __u8 num_dac; /* read-only */ 117 + __u8 num_dvc; /* read-only */ 118 + __u8 pad; 119 + 120 + __u32 epr; /* EXP */ 121 + __u32 vrsave; /* a.k.a. USPRG0 */ 122 + __u32 epcr; /* KVM_SREGS_E_64 */ 123 + 124 + __u32 mas0; 125 + __u32 mas1; 126 + __u64 mas2; 127 + __u64 mas7_3; 128 + __u32 mas4; 129 + __u32 mas6; 130 + 131 + __u32 ivor_low[16]; /* IVOR0-15 */ 132 + __u32 ivor_high[18]; /* IVOR32+, plus room to expand */ 133 + 134 + __u32 mmucfg; /* read-only */ 135 + __u32 eptcfg; /* E.PT, read-only */ 136 + __u32 tlbcfg[4];/* read-only */ 137 + __u32 tlbps[4]; /* read-only */ 138 + 139 + __u32 eplc, epsc; /* E.PD */ 140 + } e; 173 141 __u8 pad[1020]; 174 142 } u; 175 143 };
-1
arch/powerpc/include/asm/kvm_44x.h
··· 61 61 return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); 62 62 } 63 63 64 - void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); 65 64 void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); 66 65 void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); 67 66
+2
arch/powerpc/include/asm/kvm_e500.h
··· 43 43 44 44 u32 host_pid[E500_PID_NUM]; 45 45 u32 pid[E500_PID_NUM]; 46 + u32 svr; 46 47 47 48 u32 mas0; 48 49 u32 mas1; ··· 59 58 u32 hid1; 60 59 u32 tlb0cfg; 61 60 u32 tlb1cfg; 61 + u64 mcar; 62 62 63 63 struct kvm_vcpu vcpu; 64 64 };
+5
arch/powerpc/include/asm/kvm_host.h
··· 223 223 ulong hflags; 224 224 ulong guest_owned_ext; 225 225 #endif 226 + u32 vrsave; /* also USPRG0 */ 226 227 u32 mmucr; 227 228 ulong sprg4; 228 229 ulong sprg5; ··· 233 232 ulong csrr1; 234 233 ulong dsrr0; 235 234 ulong dsrr1; 235 + ulong mcsrr0; 236 + ulong mcsrr1; 237 + ulong mcsr; 236 238 ulong esr; 237 239 u32 dec; 238 240 u32 decar; ··· 259 255 u32 dbsr; 260 256 261 257 #ifdef CONFIG_KVM_EXIT_TIMING 258 + struct mutex exit_timing_lock; 262 259 struct kvmppc_exit_timing timing_exit; 263 260 struct kvmppc_exit_timing timing_last_enter; 264 261 u32 last_exit_type;
+9
arch/powerpc/include/asm/kvm_ppc.h
··· 61 61 struct kvm_vcpu *vcpu); 62 62 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 63 63 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); 64 + extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); 64 65 65 66 /* Core-specific hooks */ 66 67 ··· 142 141 143 142 return r; 144 143 } 144 + 145 + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 146 + int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 147 + 148 + void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 149 + int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); 150 + 151 + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); 145 152 146 153 #endif /* __POWERPC_KVM_PPC_H__ */
+1
arch/powerpc/kernel/asm-offsets.c
··· 396 396 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 397 397 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 398 398 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 399 + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); 399 400 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); 400 401 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); 401 402 DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
+10
arch/powerpc/kvm/44x.c
··· 107 107 return 0; 108 108 } 109 109 110 + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 111 + { 112 + kvmppc_get_sregs_ivor(vcpu, sregs); 113 + } 114 + 115 + int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 116 + { 117 + return kvmppc_set_sregs_ivor(vcpu, sregs); 118 + } 119 + 110 120 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 111 121 { 112 122 struct kvmppc_vcpu_44x *vcpu_44x;
-2
arch/powerpc/kvm/44x_emulate.c
··· 158 158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 159 159 } 160 160 161 - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); 162 161 return emulated; 163 162 } 164 163 ··· 178 179 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 179 180 } 180 181 181 - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); 182 182 return emulated; 183 183 } 184 184
+152 -2
arch/powerpc/kvm/booke.c
··· 569 569 kvmppc_set_msr(vcpu, regs->msr); 570 570 vcpu->arch.shared->srr0 = regs->srr0; 571 571 vcpu->arch.shared->srr1 = regs->srr1; 572 + kvmppc_set_pid(vcpu, regs->pid); 572 573 vcpu->arch.shared->sprg0 = regs->sprg0; 573 574 vcpu->arch.shared->sprg1 = regs->sprg1; 574 575 vcpu->arch.shared->sprg2 = regs->sprg2; ··· 585 584 return 0; 586 585 } 587 586 587 + static void get_sregs_base(struct kvm_vcpu *vcpu, 588 + struct kvm_sregs *sregs) 589 + { 590 + u64 tb = get_tb(); 591 + 592 + sregs->u.e.features |= KVM_SREGS_E_BASE; 593 + 594 + sregs->u.e.csrr0 = vcpu->arch.csrr0; 595 + sregs->u.e.csrr1 = vcpu->arch.csrr1; 596 + sregs->u.e.mcsr = vcpu->arch.mcsr; 597 + sregs->u.e.esr = vcpu->arch.esr; 598 + sregs->u.e.dear = vcpu->arch.shared->dar; 599 + sregs->u.e.tsr = vcpu->arch.tsr; 600 + sregs->u.e.tcr = vcpu->arch.tcr; 601 + sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); 602 + sregs->u.e.tb = tb; 603 + sregs->u.e.vrsave = vcpu->arch.vrsave; 604 + } 605 + 606 + static int set_sregs_base(struct kvm_vcpu *vcpu, 607 + struct kvm_sregs *sregs) 608 + { 609 + if (!(sregs->u.e.features & KVM_SREGS_E_BASE)) 610 + return 0; 611 + 612 + vcpu->arch.csrr0 = sregs->u.e.csrr0; 613 + vcpu->arch.csrr1 = sregs->u.e.csrr1; 614 + vcpu->arch.mcsr = sregs->u.e.mcsr; 615 + vcpu->arch.esr = sregs->u.e.esr; 616 + vcpu->arch.shared->dar = sregs->u.e.dear; 617 + vcpu->arch.vrsave = sregs->u.e.vrsave; 618 + vcpu->arch.tcr = sregs->u.e.tcr; 619 + 620 + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) 621 + vcpu->arch.dec = sregs->u.e.dec; 622 + 623 + kvmppc_emulate_dec(vcpu); 624 + 625 + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { 626 + /* 627 + * FIXME: existing KVM timer handling is incomplete. 628 + * TSR cannot be read by the guest, and its value in 629 + * vcpu->arch is always zero. For now, just handle 630 + * the case where the caller is trying to inject a 631 + * decrementer interrupt. 632 + */ 633 + 634 + if ((sregs->u.e.tsr & TSR_DIS) && 635 + (vcpu->arch.tcr & TCR_DIE)) 636 + kvmppc_core_queue_dec(vcpu); 637 + } 638 + 639 + return 0; 640 + } 641 + 642 + static void get_sregs_arch206(struct kvm_vcpu *vcpu, 643 + struct kvm_sregs *sregs) 644 + { 645 + sregs->u.e.features |= KVM_SREGS_E_ARCH206; 646 + 647 + sregs->u.e.pir = 0; 648 + sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; 649 + sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; 650 + sregs->u.e.decar = vcpu->arch.decar; 651 + sregs->u.e.ivpr = vcpu->arch.ivpr; 652 + } 653 + 654 + static int set_sregs_arch206(struct kvm_vcpu *vcpu, 655 + struct kvm_sregs *sregs) 656 + { 657 + if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) 658 + return 0; 659 + 660 + if (sregs->u.e.pir != 0) 661 + return -EINVAL; 662 + 663 + vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; 664 + vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1; 665 + vcpu->arch.decar = sregs->u.e.decar; 666 + vcpu->arch.ivpr = sregs->u.e.ivpr; 667 + 668 + return 0; 669 + } 670 + 671 + void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 672 + { 673 + sregs->u.e.features |= KVM_SREGS_E_IVOR; 674 + 675 + sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 676 + sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 677 + sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 678 + sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 679 + sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 680 + sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 681 + sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 682 + sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 683 + sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 684 + sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 685 + sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 686 + sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 687 + sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 688 + sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 689 + sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 690 + sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 691 + } 692 + 693 + int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 694 + { 695 + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) 696 + return 0; 697 + 698 + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0]; 699 + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1]; 700 + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2]; 701 + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3]; 702 + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4]; 703 + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5]; 704 + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6]; 705 + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7]; 706 + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8]; 707 + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9]; 708 + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10]; 709 + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11]; 710 + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12]; 711 + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13]; 712 + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14]; 713 + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15]; 714 + 715 + return 0; 716 + } 717 + 588 718 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 589 719 struct kvm_sregs *sregs) 590 720 { 591 - return -ENOTSUPP; 721 + sregs->pvr = vcpu->arch.pvr; 722 + 723 + get_sregs_base(vcpu, sregs); 724 + get_sregs_arch206(vcpu, sregs); 725 + kvmppc_core_get_sregs(vcpu, sregs); 726 + return 0; 592 727 } 593 728 594 729 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 595 730 struct kvm_sregs *sregs) 596 731 { 597 - return -ENOTSUPP; 732 + int ret; 733 + 734 + if (vcpu->arch.pvr != sregs->pvr) 735 + return -EINVAL; 736 + 737 + ret = set_sregs_base(vcpu, sregs); 738 + if (ret < 0) 739 + return ret; 740 + 741 + ret = set_sregs_arch206(vcpu, sregs); 742 + if (ret < 0) 743 + return ret; 744 + 745 + return kvmppc_core_set_sregs(vcpu, sregs); 598 746 } 599 747 600 748 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-1
arch/powerpc/kvm/booke_interrupts.S
··· 380 380 * because host interrupt handlers would get confused. */ 381 381 lwz r1, VCPU_GPR(r1)(r4) 382 382 383 - /* XXX handle USPRG0 */ 384 383 /* Host interrupt handlers may have clobbered these guest-readable 385 384 * SPRGs, so we need to reload them here with the guest's values. */ 386 385 lwz r3, VCPU_SPRG4(r4)
+76
arch/powerpc/kvm/e500.c
··· 63 63 64 64 /* Registers init */ 65 65 vcpu->arch.pvr = mfspr(SPRN_PVR); 66 + vcpu_e500->svr = mfspr(SPRN_SVR); 66 67 67 68 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ 68 69 vcpu->vcpu_id = 0; ··· 95 94 tr->valid = 1; 96 95 97 96 return 0; 97 + } 98 + 99 + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 100 + { 101 + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 102 + 103 + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE | 104 + KVM_SREGS_E_PM; 105 + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; 106 + 107 + sregs->u.e.impl.fsl.features = 0; 108 + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; 109 + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; 110 + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; 111 + 112 + sregs->u.e.mas0 = vcpu_e500->mas0; 113 + sregs->u.e.mas1 = vcpu_e500->mas1; 114 + sregs->u.e.mas2 = vcpu_e500->mas2; 115 + sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; 116 + sregs->u.e.mas4 = vcpu_e500->mas4; 117 + sregs->u.e.mas6 = vcpu_e500->mas6; 118 + 119 + sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); 120 + sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; 121 + sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; 122 + sregs->u.e.tlbcfg[2] = 0; 123 + sregs->u.e.tlbcfg[3] = 0; 124 + 125 + sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 126 + sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 127 + sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 128 + sregs->u.e.ivor_high[3] = 129 + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 130 + 131 + kvmppc_get_sregs_ivor(vcpu, sregs); 132 + } 133 + 134 + int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) 135 + { 136 + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 137 + 138 + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { 139 + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; 140 + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; 141 + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; 142 + } 143 + 144 + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { 145 + vcpu_e500->mas0 = sregs->u.e.mas0; 146 + vcpu_e500->mas1 = sregs->u.e.mas1; 147 + vcpu_e500->mas2 = sregs->u.e.mas2; 148 + vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; 149 + vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; 150 + vcpu_e500->mas4 = sregs->u.e.mas4; 151 + vcpu_e500->mas6 = sregs->u.e.mas6; 152 + } 153 + 154 + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) 155 + return 0; 156 + 157 + if (sregs->u.e.features & KVM_SREGS_E_SPE) { 158 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = 159 + sregs->u.e.ivor_high[0]; 160 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = 161 + sregs->u.e.ivor_high[1]; 162 + vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = 163 + sregs->u.e.ivor_high[2]; 164 + } 165 + 166 + if (sregs->u.e.features & KVM_SREGS_E_PM) { 167 + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = 168 + sregs->u.e.ivor_high[3]; 169 + } 170 + 171 + return kvmppc_set_sregs_ivor(vcpu, sregs); 98 172 } 99 173 100 174 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+4 -3
arch/powerpc/kvm/e500_emulate.c
··· 1 1 /* 2 - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. 3 3 * 4 4 * Author: Yu Liu, <yu.liu@freescale.com> 5 5 * ··· 78 78 79 79 switch (sprn) { 80 80 case SPRN_PID: 81 - vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 82 - vcpu->arch.pid = spr_val; 81 + kvmppc_set_pid(vcpu, spr_val); 83 82 break; 84 83 case SPRN_PID1: 85 84 vcpu_e500->pid[1] = spr_val; break; ··· 174 175 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; 175 176 case SPRN_HID1: 176 177 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; 178 + case SPRN_SVR: 179 + kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; 177 180 178 181 case SPRN_MMUCSR0: 179 182 kvmppc_set_gpr(vcpu, rt, 0); break;
+12 -1
arch/powerpc/kvm/e500_tlb.c
··· 1 1 /* 2 - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. 2 + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. 3 3 * 4 4 * Author: Yu Liu, yu.liu@freescale.com 5 5 * ··· 24 24 #include "../mm/mmu_decl.h" 25 25 #include "e500_tlb.h" 26 26 #include "trace.h" 27 + #include "timing.h" 27 28 28 29 #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) 29 30 ··· 507 506 vcpu_e500->mas7 = 0; 508 507 } 509 508 509 + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 510 510 return EMULATE_DONE; 511 511 } 512 512 ··· 573 571 write_host_tlbe(vcpu_e500, stlbsel, sesel); 574 572 } 575 573 574 + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); 576 575 return EMULATE_DONE; 577 576 } 578 577 ··· 673 670 } 674 671 675 672 return -1; 673 + } 674 + 675 + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) 676 + { 677 + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 678 + 679 + vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 680 + vcpu->arch.pid = pid; 676 681 } 677 682 678 683 void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+10 -5
arch/powerpc/kvm/emulate.c
··· 114 114 } 115 115 } 116 116 117 + u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) 118 + { 119 + u64 jd = tb - vcpu->arch.dec_jiffies; 120 + return vcpu->arch.dec - jd; 121 + } 122 + 117 123 /* XXX to do: 118 124 * lhax 119 125 * lhaux ··· 285 279 286 280 case SPRN_DEC: 287 281 { 288 - u64 jd = get_tb() - vcpu->arch.dec_jiffies; 289 - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); 290 - pr_debug("mfDEC: %x - %llx = %lx\n", 291 - vcpu->arch.dec, jd, 292 - kvmppc_get_gpr(vcpu, rt)); 282 + kvmppc_set_gpr(vcpu, rt, 283 + kvmppc_get_dec(vcpu, get_tb())); 293 284 break; 294 285 } 295 286 default: ··· 297 294 } 298 295 break; 299 296 } 297 + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); 300 298 break; 301 299 302 300 case OP_31_XOP_STHX: ··· 367 363 printk("mtspr: unknown spr %x\n", sprn); 368 364 break; 369 365 } 366 + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); 370 367 break; 371 368 372 369 case OP_31_XOP_DCBI:
+21
arch/powerpc/kvm/powerpc.c
··· 175 175 int r; 176 176 177 177 switch (ext) { 178 + #ifdef CONFIG_BOOKE 179 + case KVM_CAP_PPC_BOOKE_SREGS: 180 + #else 178 181 case KVM_CAP_PPC_SEGSTATE: 182 + #endif 179 183 case KVM_CAP_PPC_PAIRED_SINGLES: 180 184 case KVM_CAP_PPC_UNSET_IRQ: 181 185 case KVM_CAP_PPC_IRQ_LEVEL: ··· 288 284 tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); 289 285 vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; 290 286 287 + #ifdef CONFIG_KVM_EXIT_TIMING 288 + mutex_init(&vcpu->arch.exit_timing_lock); 289 + #endif 290 + 291 291 return 0; 292 292 } 293 293 ··· 302 294 303 295 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 304 296 { 297 + #ifdef CONFIG_BOOKE 298 + /* 299 + * vrsave (formerly usprg0) isn't used by Linux, but may 300 + * be used by the guest. 301 + * 302 + * On non-booke this is associated with Altivec and 303 + * is handled by code in book3s.c. 304 + */ 305 + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 306 + #endif 305 307 kvmppc_core_vcpu_load(vcpu, cpu); 306 308 } 307 309 308 310 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 309 311 { 310 312 kvmppc_core_vcpu_put(vcpu); 313 + #ifdef CONFIG_BOOKE 314 + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 315 + #endif 311 316 } 312 317 313 318 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+24 -7
arch/powerpc/kvm/timing.c
··· 34 34 { 35 35 int i; 36 36 37 - /* pause guest execution to avoid concurrent updates */ 38 - mutex_lock(&vcpu->mutex); 37 + /* Take a lock to avoid concurrent updates */ 38 + mutex_lock(&vcpu->arch.exit_timing_lock); 39 39 40 40 vcpu->arch.last_exit_type = 0xDEAD; 41 41 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { ··· 49 49 vcpu->arch.timing_exit.tv64 = 0; 50 50 vcpu->arch.timing_last_enter.tv64 = 0; 51 51 52 - mutex_unlock(&vcpu->mutex); 52 + mutex_unlock(&vcpu->arch.exit_timing_lock); 53 53 } 54 54 55 55 static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) ··· 64 64 vcpu->arch.timing_count_type[type]); 65 65 return; 66 66 } 67 + 68 + mutex_lock(&vcpu->arch.exit_timing_lock); 67 69 68 70 vcpu->arch.timing_count_type[type]++; 69 71 ··· 95 93 vcpu->arch.timing_min_duration[type] = duration; 96 94 if (unlikely(duration > vcpu->arch.timing_max_duration[type])) 97 95 vcpu->arch.timing_max_duration[type] = duration; 96 + 97 + mutex_unlock(&vcpu->arch.exit_timing_lock); 98 98 } 99 99 100 100 void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) ··· 151 147 { 152 148 struct kvm_vcpu *vcpu = m->private; 153 149 int i; 150 + u64 min, max, sum, sum_quad; 154 151 155 152 seq_printf(m, "%s", "type count min max sum sum_squared\n"); 156 153 154 + 157 155 for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 156 + 157 + min = vcpu->arch.timing_min_duration[i]; 158 + do_div(min, tb_ticks_per_usec); 159 + max = vcpu->arch.timing_max_duration[i]; 160 + do_div(max, tb_ticks_per_usec); 161 + sum = vcpu->arch.timing_sum_duration[i]; 162 + do_div(sum, tb_ticks_per_usec); 163 + sum_quad = vcpu->arch.timing_sum_quad_duration[i]; 164 + do_div(sum_quad, tb_ticks_per_usec); 165 + 158 166 seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", 159 167 kvm_exit_names[i], 160 168 vcpu->arch.timing_count_type[i], 161 - vcpu->arch.timing_min_duration[i], 162 - vcpu->arch.timing_max_duration[i], 163 - vcpu->arch.timing_sum_duration[i], 164 - vcpu->arch.timing_sum_quad_duration[i]); 169 + min, 170 + max, 171 + sum, 172 + sum_quad); 173 + 165 174 } 166 175 return 0; 167 176 }
+143 -44
arch/x86/include/asm/kvm_emulate.h
··· 14 14 #include <asm/desc_defs.h> 15 15 16 16 struct x86_emulate_ctxt; 17 + enum x86_intercept; 18 + enum x86_intercept_stage; 17 19 18 20 struct x86_exception { 19 21 u8 vector; ··· 23 21 u16 error_code; 24 22 bool nested_page_fault; 25 23 u64 address; /* cr2 or nested page fault gpa */ 24 + }; 25 + 26 + /* 27 + * This struct is used to carry enough information from the instruction 28 + * decoder to main KVM so that a decision can be made whether the 29 + * instruction needs to be intercepted or not. 30 + */ 31 + struct x86_instruction_info { 32 + u8 intercept; /* which intercept */ 33 + u8 rep_prefix; /* rep prefix? */ 34 + u8 modrm_mod; /* mod part of modrm */ 35 + u8 modrm_reg; /* index of register used */ 36 + u8 modrm_rm; /* rm part of modrm */ 37 + u64 src_val; /* value of source operand */ 38 + u8 src_bytes; /* size of source operand */ 39 + u8 dst_bytes; /* size of destination operand */ 40 + u8 ad_bytes; /* size of src/dst address */ 41 + u64 next_rip; /* rip following the instruction */ 26 42 }; 27 43 28 44 /* ··· 82 62 #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ 83 63 #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ 84 64 #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ 65 + #define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ 85 66 86 67 struct x86_emulate_ops { 87 68 /* ··· 92 71 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 93 72 * @bytes: [IN ] Number of bytes to read from memory. 94 73 */ 95 - int (*read_std)(unsigned long addr, void *val, 96 - unsigned int bytes, struct kvm_vcpu *vcpu, 74 + int (*read_std)(struct x86_emulate_ctxt *ctxt, 75 + unsigned long addr, void *val, 76 + unsigned int bytes, 97 77 struct x86_exception *fault); 98 78 99 79 /* ··· 104 82 * @val: [OUT] Value write to memory, zero-extended to 'u_long'. 105 83 * @bytes: [IN ] Number of bytes to write to memory. 106 84 */ 107 - int (*write_std)(unsigned long addr, void *val, 108 - unsigned int bytes, struct kvm_vcpu *vcpu, 85 + int (*write_std)(struct x86_emulate_ctxt *ctxt, 86 + unsigned long addr, void *val, unsigned int bytes, 109 87 struct x86_exception *fault); 110 88 /* 111 89 * fetch: Read bytes of standard (non-emulated/special) memory. ··· 114 92 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 115 93 * @bytes: [IN ] Number of bytes to read from memory. 116 94 */ 117 - int (*fetch)(unsigned long addr, void *val, 118 - unsigned int bytes, struct kvm_vcpu *vcpu, 95 + int (*fetch)(struct x86_emulate_ctxt *ctxt, 96 + unsigned long addr, void *val, unsigned int bytes, 119 97 struct x86_exception *fault); 120 98 121 99 /* ··· 124 102 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 125 103 * @bytes: [IN ] Number of bytes to read from memory. 126 104 */ 127 - int (*read_emulated)(unsigned long addr, 128 - void *val, 129 - unsigned int bytes, 130 - struct x86_exception *fault, 131 - struct kvm_vcpu *vcpu); 105 + int (*read_emulated)(struct x86_emulate_ctxt *ctxt, 106 + unsigned long addr, void *val, unsigned int bytes, 107 + struct x86_exception *fault); 132 108 133 109 /* 134 110 * write_emulated: Write bytes to emulated/special memory area. ··· 135 115 * required). 136 116 * @bytes: [IN ] Number of bytes to write to memory. 137 117 */ 138 - int (*write_emulated)(unsigned long addr, 139 - const void *val, 118 + int (*write_emulated)(struct x86_emulate_ctxt *ctxt, 119 + unsigned long addr, const void *val, 140 120 unsigned int bytes, 141 - struct x86_exception *fault, 142 - struct kvm_vcpu *vcpu); 121 + struct x86_exception *fault); 143 122 144 123 /* 145 124 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an ··· 148 129 * @new: [IN ] Value to write to @addr. 149 130 * @bytes: [IN ] Number of bytes to access using CMPXCHG. 150 131 */ 151 - int (*cmpxchg_emulated)(unsigned long addr, 132 + int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, 133 + unsigned long addr, 152 134 const void *old, 153 135 const void *new, 154 136 unsigned int bytes, 155 - struct x86_exception *fault, 156 - struct kvm_vcpu *vcpu); 137 + struct x86_exception *fault); 138 + void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); 157 139 158 - int (*pio_in_emulated)(int size, unsigned short port, void *val, 159 - unsigned int count, struct kvm_vcpu *vcpu); 140 + int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, 141 + int size, unsigned short port, void *val, 142 + unsigned int count); 160 143 161 - int (*pio_out_emulated)(int size, unsigned short port, const void *val, 162 - unsigned int count, struct kvm_vcpu *vcpu); 144 + int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, 145 + int size, unsigned short port, const void *val, 146 + unsigned int count); 163 147 164 - bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, 165 - int seg, struct kvm_vcpu *vcpu); 166 - void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, 167 - int seg, struct kvm_vcpu *vcpu); 168 - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); 169 - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); 170 - unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); 171 - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 172 - void (*get_idt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); 173 - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); 174 - int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); 175 - int (*cpl)(struct kvm_vcpu *vcpu); 176 - int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); 177 - int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); 178 - int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 179 - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 148 + bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, 149 + struct desc_struct *desc, u32 *base3, int seg); 150 + void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, 151 + struct desc_struct *desc, u32 base3, int seg); 152 + unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, 153 + int seg); 154 + void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 155 + void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 156 + void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 157 + void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); 158 + ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); 159 + int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); 160 + int (*cpl)(struct x86_emulate_ctxt *ctxt); 161 + int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); 162 + int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); 163 + int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); 164 + int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); 165 + void (*halt)(struct x86_emulate_ctxt *ctxt); 166 + void (*wbinvd)(struct x86_emulate_ctxt *ctxt); 167 + int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); 168 + void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ 169 + void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ 170 + int (*intercept)(struct x86_emulate_ctxt *ctxt, 171 + struct x86_instruction_info *info, 172 + enum x86_intercept_stage stage); 180 173 }; 174 + 175 + typedef u32 __attribute__((vector_size(16))) sse128_t; 181 176 182 177 /* Type, address-of, and value of an instruction's operand. */ 183 178 struct operand { 184 - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; 179 + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; 185 180 unsigned int bytes; 186 181 union { 187 182 unsigned long orig_val; ··· 207 174 ulong ea; 208 175 unsigned seg; 209 176 } mem; 177 + unsigned xmm; 210 178 } addr; 211 179 union { 212 180 unsigned long val; 213 181 u64 val64; 214 182 char valptr[sizeof(unsigned long) + 2]; 183 + sse128_t vec_val; 215 184 }; 216 185 }; 217 186 ··· 232 197 struct decode_cache { 233 198 u8 twobyte; 234 199 u8 b; 200 + u8 intercept; 235 201 u8 lock_prefix; 236 202 u8 rep_prefix; 237 203 u8 op_bytes; ··· 245 209 u8 seg_override; 246 210 unsigned int d; 247 211 int (*execute)(struct x86_emulate_ctxt *ctxt); 212 + int (*check_perm)(struct x86_emulate_ctxt *ctxt); 248 213 unsigned long regs[NR_VCPU_REGS]; 249 214 unsigned long eip; 250 215 /* modrm */ ··· 264 227 struct x86_emulate_ops *ops; 265 228 266 229 /* Register state before/after emulation. */ 267 - struct kvm_vcpu *vcpu; 268 - 269 230 unsigned long eflags; 270 231 unsigned long eip; /* eip before instruction emulation */ 271 232 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 272 233 int mode; 273 - u32 cs_base; 274 234 275 235 /* interruptibility state, as a result of execution of STI or MOV SS */ 276 236 int interruptibility; 277 237 238 + bool guest_mode; /* guest running a nested guest */ 278 239 bool perm_ok; /* do not check permissions if true */ 279 240 bool only_vendor_specific_insn; 280 241 ··· 284 249 }; 285 250 286 251 /* Repeat String Operation Prefix */ 287 - #define REPE_PREFIX 1 288 - #define REPNE_PREFIX 2 252 + #define REPE_PREFIX 0xf3 253 + #define REPNE_PREFIX 0xf2 289 254 290 255 /* Execution mode, passed to the emulator. */ 291 256 #define X86EMUL_MODE_REAL 0 /* Real mode. */ ··· 293 258 #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ 294 259 #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ 295 260 #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 261 + 262 + /* any protected mode */ 263 + #define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ 264 + X86EMUL_MODE_PROT64) 265 + 266 + enum x86_intercept_stage { 267 + X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ 268 + X86_ICPT_PRE_EXCEPT, 269 + X86_ICPT_POST_EXCEPT, 270 + X86_ICPT_POST_MEMACCESS, 271 + }; 272 + 273 + enum x86_intercept { 274 + x86_intercept_none, 275 + x86_intercept_cr_read, 276 + x86_intercept_cr_write, 277 + x86_intercept_clts, 278 + x86_intercept_lmsw, 279 + x86_intercept_smsw, 280 + x86_intercept_dr_read, 281 + x86_intercept_dr_write, 282 + x86_intercept_lidt, 283 + x86_intercept_sidt, 284 + x86_intercept_lgdt, 285 + x86_intercept_sgdt, 286 + x86_intercept_lldt, 287 + x86_intercept_sldt, 288 + x86_intercept_ltr, 289 + x86_intercept_str, 290 + x86_intercept_rdtsc, 291 + x86_intercept_rdpmc, 292 + x86_intercept_pushf, 293 + x86_intercept_popf, 294 + x86_intercept_cpuid, 295 + x86_intercept_rsm, 296 + x86_intercept_iret, 297 + x86_intercept_intn, 298 + x86_intercept_invd, 299 + x86_intercept_pause, 300 + x86_intercept_hlt, 301 + x86_intercept_invlpg, 302 + x86_intercept_invlpga, 303 + x86_intercept_vmrun, 304 + x86_intercept_vmload, 305 + x86_intercept_vmsave, 306 + x86_intercept_vmmcall, 307 + x86_intercept_stgi, 308 + x86_intercept_clgi, 309 + x86_intercept_skinit, 310 + x86_intercept_rdtscp, 311 + x86_intercept_icebp, 312 + x86_intercept_wbinvd, 313 + x86_intercept_monitor, 314 + x86_intercept_mwait, 315 + x86_intercept_rdmsr, 316 + x86_intercept_wrmsr, 317 + x86_intercept_in, 318 + x86_intercept_ins, 319 + x86_intercept_out, 320 + x86_intercept_outs, 321 + 322 + nr_x86_intercepts 323 + }; 296 324 297 325 /* Host execution mode. */ 298 326 #if defined(CONFIG_X86_32) ··· 368 270 #define EMULATION_FAILED -1 369 271 #define EMULATION_OK 0 370 272 #define EMULATION_RESTART 1 273 + #define EMULATION_INTERCEPTED 2 371 274 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); 372 275 int emulator_task_switch(struct x86_emulate_ctxt *ctxt, 373 276 u16 tss_selector, int reason,
+42 -13
arch/x86/include/asm/kvm_host.h
··· 30 30 #define KVM_MEMORY_SLOTS 32 31 31 /* memory slots that does not exposed to userspace */ 32 32 #define KVM_PRIVATE_MEM_SLOTS 4 33 + #define KVM_MMIO_SIZE 16 33 34 34 35 #define KVM_PIO_PAGE_OFFSET 1 35 36 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 37 + 38 + #define CR0_RESERVED_BITS \ 39 + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ 40 + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ 41 + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 36 42 37 43 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) 38 44 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) 39 45 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 40 46 0xFFFFFF0000000000ULL) 47 + #define CR4_RESERVED_BITS \ 48 + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 49 + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 50 + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 51 + | X86_CR4_OSXSAVE \ 52 + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 53 + 54 + #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 55 + 56 + 41 57 42 58 #define INVALID_PAGE (~(hpa_t)0) 43 59 #define VALID_PAGE(x) ((x) != INVALID_PAGE) ··· 134 118 enum kvm_reg_ex { 135 119 VCPU_EXREG_PDPTR = NR_VCPU_REGS, 136 120 VCPU_EXREG_CR3, 121 + VCPU_EXREG_RFLAGS, 122 + VCPU_EXREG_CPL, 123 + VCPU_EXREG_SEGMENTS, 137 124 }; 138 125 139 126 enum { ··· 275 256 struct kvm_mmu_page *sp); 276 257 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); 277 258 void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 278 - u64 *spte, const void *pte, unsigned long mmu_seq); 259 + u64 *spte, const void *pte); 279 260 hpa_t root_hpa; 280 261 int root_level; 281 262 int shadow_root_level; ··· 359 340 struct fpu guest_fpu; 360 341 u64 xcr0; 361 342 362 - gva_t mmio_fault_cr2; 363 343 struct kvm_pio_request pio; 364 344 void *pio_data; 365 345 ··· 385 367 /* emulate context */ 386 368 387 369 struct x86_emulate_ctxt emulate_ctxt; 370 + bool emulate_regs_need_sync_to_vcpu; 371 + bool emulate_regs_need_sync_from_vcpu; 388 372 389 373 gpa_t time; 390 374 struct pvclock_vcpu_time_info hv_clock; 391 375 unsigned int hw_tsc_khz; 392 376 unsigned int time_offset; 393 377 struct page *time_page; 394 - u64 last_host_tsc; 395 378 u64 last_guest_tsc; 396 379 u64 last_kernel_ns; 397 380 u64 last_tsc_nsec; 398 381 u64 last_tsc_write; 382 + u32 virtual_tsc_khz; 399 383 bool tsc_catchup; 384 + u32 tsc_catchup_mult; 385 + s8 tsc_catchup_shift; 400 386 401 387 bool nmi_pending; 402 388 bool nmi_injected; ··· 470 448 u64 last_tsc_nsec; 471 449 u64 last_tsc_offset; 472 450 u64 last_tsc_write; 473 - u32 virtual_tsc_khz; 474 - u32 virtual_tsc_mult; 475 - s8 virtual_tsc_shift; 476 451 477 452 struct kvm_xen_hvm_config xen_hvm_config; 478 453 ··· 520 501 u32 irq_injections; 521 502 u32 nmi_injections; 522 503 }; 504 + 505 + struct x86_instruction_info; 523 506 524 507 struct kvm_x86_ops { 525 508 int (*cpu_has_kvm_support)(void); /* __init */ ··· 607 586 608 587 bool (*has_wbinvd_exit)(void); 609 588 589 + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); 610 590 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); 611 591 592 + u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); 593 + 612 594 void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); 595 + 596 + int (*check_intercept)(struct kvm_vcpu *vcpu, 597 + struct x86_instruction_info *info, 598 + enum x86_intercept_stage stage); 599 + 613 600 const struct trace_print_flags *exit_reasons_str; 614 601 }; 615 602 ··· 656 627 657 628 extern bool tdp_enabled; 658 629 630 + /* control of guest tsc rate supported? */ 631 + extern bool kvm_has_tsc_control; 632 + /* minimum supported tsc_khz for guests */ 633 + extern u32 kvm_min_guest_tsc_khz; 634 + /* maximum supported tsc_khz for guests */ 635 + extern u32 kvm_max_guest_tsc_khz; 636 + 659 637 enum emulation_result { 660 638 EMULATE_DONE, /* no further processing */ 661 639 EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ ··· 681 645 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); 682 646 } 683 647 684 - void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 685 - void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 686 - 687 648 void kvm_enable_efer_bits(u64); 688 649 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 689 650 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); ··· 690 657 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); 691 658 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 692 659 int kvm_emulate_halt(struct kvm_vcpu *vcpu); 693 - int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 694 - int emulate_clts(struct kvm_vcpu *vcpu); 695 660 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); 696 661 697 662 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); ··· 751 720 struct x86_exception *exception); 752 721 753 722 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 754 - 755 - int kvm_fix_hypercall(struct kvm_vcpu *vcpu); 756 723 757 724 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, 758 725 void *insn, int insn_len);
+1
arch/x86/include/asm/msr-index.h
··· 118 118 complete list. */ 119 119 120 120 #define MSR_AMD64_PATCH_LEVEL 0x0000008b 121 + #define MSR_AMD64_TSC_RATIO 0xc0000104 121 122 #define MSR_AMD64_NB_CFG 0xc001001f 122 123 #define MSR_AMD64_PATCH_LOADER 0xc0010020 123 124 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+1189 -565
arch/x86/kvm/emulate.c
··· 73 73 #define MemAbs (1<<11) /* Memory operand is absolute displacement */ 74 74 #define String (1<<12) /* String instruction (rep capable) */ 75 75 #define Stack (1<<13) /* Stack instruction (push/pop) */ 76 + #define GroupMask (7<<14) /* Opcode uses one of the group mechanisms */ 76 77 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 77 - #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 78 + #define GroupDual (2<<14) /* Alternate decoding of mod == 3 */ 79 + #define Prefix (3<<14) /* Instruction varies with 66/f2/f3 prefix */ 80 + #define RMExt (4<<14) /* Opcode extension in ModRM r/m if mod == 3 */ 81 + #define Sse (1<<17) /* SSE Vector instruction */ 78 82 /* Misc flags */ 83 + #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */ 79 84 #define VendorSpecific (1<<22) /* Vendor specific instruction */ 80 85 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ 81 86 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ ··· 107 102 108 103 struct opcode { 109 104 u32 flags; 105 + u8 intercept; 110 106 union { 111 107 int (*execute)(struct x86_emulate_ctxt *ctxt); 112 108 struct opcode *group; 113 109 struct group_dual *gdual; 110 + struct gprefix *gprefix; 114 111 } u; 112 + int (*check_perm)(struct x86_emulate_ctxt *ctxt); 115 113 }; 116 114 117 115 struct group_dual { 118 116 struct opcode mod012[8]; 119 117 struct opcode mod3[8]; 118 + }; 119 + 120 + struct gprefix { 121 + struct opcode pfx_no; 122 + struct opcode pfx_66; 123 + struct opcode pfx_f2; 124 + struct opcode pfx_f3; 120 125 }; 121 126 122 127 /* EFLAGS bit definitions. */ ··· 263 248 "w", "r", _LO32, "r", "", "r") 264 249 265 250 /* Instruction has three operands and one operand is stored in ECX register */ 266 - #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ 267 - do { \ 268 - unsigned long _tmp; \ 269 - _type _clv = (_cl).val; \ 270 - _type _srcv = (_src).val; \ 271 - _type _dstv = (_dst).val; \ 272 - \ 273 - __asm__ __volatile__ ( \ 274 - _PRE_EFLAGS("0", "5", "2") \ 275 - _op _suffix " %4,%1 \n" \ 276 - _POST_EFLAGS("0", "5", "2") \ 277 - : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ 278 - : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ 279 - ); \ 280 - \ 281 - (_cl).val = (unsigned long) _clv; \ 282 - (_src).val = (unsigned long) _srcv; \ 283 - (_dst).val = (unsigned long) _dstv; \ 251 + #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ 252 + do { \ 253 + unsigned long _tmp; \ 254 + _type _clv = (_cl).val; \ 255 + _type _srcv = (_src).val; \ 256 + _type _dstv = (_dst).val; \ 257 + \ 258 + __asm__ __volatile__ ( \ 259 + _PRE_EFLAGS("0", "5", "2") \ 260 + _op _suffix " %4,%1 \n" \ 261 + _POST_EFLAGS("0", "5", "2") \ 262 + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ 263 + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ 264 + ); \ 265 + \ 266 + (_cl).val = (unsigned long) _clv; \ 267 + (_src).val = (unsigned long) _srcv; \ 268 + (_dst).val = (unsigned long) _dstv; \ 284 269 } while (0) 285 270 286 - #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ 287 - do { \ 288 - switch ((_dst).bytes) { \ 289 - case 2: \ 290 - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 291 - "w", unsigned short); \ 292 - break; \ 293 - case 4: \ 294 - __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 295 - "l", unsigned int); \ 296 - break; \ 297 - case 8: \ 298 - ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 299 - "q", unsigned long)); \ 300 - break; \ 301 - } \ 271 + #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ 272 + do { \ 273 + switch ((_dst).bytes) { \ 274 + case 2: \ 275 + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 276 + "w", unsigned short); \ 277 + break; \ 278 + case 4: \ 279 + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 280 + "l", unsigned int); \ 281 + break; \ 282 + case 8: \ 283 + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 284 + "q", unsigned long)); \ 285 + break; \ 286 + } \ 302 287 } while (0) 303 288 304 289 #define __emulate_1op(_op, _dst, _eflags, _suffix) \ ··· 361 346 } while (0) 362 347 363 348 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ 364 - #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ 365 - do { \ 366 - switch((_src).bytes) { \ 367 - case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \ 368 - case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "w"); break; \ 369 - case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \ 370 - case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \ 349 + #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags) \ 350 + do { \ 351 + switch((_src).bytes) { \ 352 + case 1: \ 353 + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ 354 + _eflags, "b"); \ 355 + break; \ 356 + case 2: \ 357 + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ 358 + _eflags, "w"); \ 359 + break; \ 360 + case 4: \ 361 + __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ 362 + _eflags, "l"); \ 363 + break; \ 364 + case 8: \ 365 + ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \ 366 + _eflags, "q")); \ 367 + break; \ 371 368 } \ 372 369 } while (0) 373 370 ··· 415 388 (_type)_x; \ 416 389 }) 417 390 418 - #define insn_fetch_arr(_arr, _size, _eip) \ 391 + #define insn_fetch_arr(_arr, _size, _eip) \ 419 392 ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ 420 393 if (rc != X86EMUL_CONTINUE) \ 421 394 goto done; \ 422 395 (_eip) += (_size); \ 423 396 }) 397 + 398 + static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, 399 + enum x86_intercept intercept, 400 + enum x86_intercept_stage stage) 401 + { 402 + struct x86_instruction_info info = { 403 + .intercept = intercept, 404 + .rep_prefix = ctxt->decode.rep_prefix, 405 + .modrm_mod = ctxt->decode.modrm_mod, 406 + .modrm_reg = ctxt->decode.modrm_reg, 407 + .modrm_rm = ctxt->decode.modrm_rm, 408 + .src_val = ctxt->decode.src.val64, 409 + .src_bytes = ctxt->decode.src.bytes, 410 + .dst_bytes = ctxt->decode.dst.bytes, 411 + .ad_bytes = ctxt->decode.ad_bytes, 412 + .next_rip = ctxt->eip, 413 + }; 414 + 415 + return ctxt->ops->intercept(ctxt, &info, stage); 416 + } 424 417 425 418 static inline unsigned long ad_mask(struct decode_cache *c) 426 419 { ··· 477 430 register_address_increment(c, &c->eip, rel); 478 431 } 479 432 433 + static u32 desc_limit_scaled(struct desc_struct *desc) 434 + { 435 + u32 limit = get_desc_limit(desc); 436 + 437 + return desc->g ? (limit << 12) | 0xfff : limit; 438 + } 439 + 480 440 static void set_seg_override(struct decode_cache *c, int seg) 481 441 { 482 442 c->has_seg_override = true; ··· 496 442 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 497 443 return 0; 498 444 499 - return ops->get_cached_segment_base(seg, ctxt->vcpu); 445 + return ops->get_cached_segment_base(ctxt, seg); 500 446 } 501 447 502 448 static unsigned seg_override(struct x86_emulate_ctxt *ctxt, 503 - struct x86_emulate_ops *ops, 504 449 struct decode_cache *c) 505 450 { 506 451 if (!c->has_seg_override) 507 452 return 0; 508 453 509 454 return c->seg_override; 510 - } 511 - 512 - static ulong linear(struct x86_emulate_ctxt *ctxt, 513 - struct segmented_address addr) 514 - { 515 - struct decode_cache *c = &ctxt->decode; 516 - ulong la; 517 - 518 - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; 519 - if (c->ad_bytes != 8) 520 - la &= (u32)-1; 521 - return la; 522 455 } 523 456 524 457 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, ··· 517 476 return X86EMUL_PROPAGATE_FAULT; 518 477 } 519 478 479 + static int emulate_db(struct x86_emulate_ctxt *ctxt) 480 + { 481 + return emulate_exception(ctxt, DB_VECTOR, 0, false); 482 + } 483 + 520 484 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err) 521 485 { 522 486 return emulate_exception(ctxt, GP_VECTOR, err, true); 487 + } 488 + 489 + static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err) 490 + { 491 + return emulate_exception(ctxt, SS_VECTOR, err, true); 523 492 } 524 493 525 494 static int emulate_ud(struct x86_emulate_ctxt *ctxt) ··· 547 496 return emulate_exception(ctxt, DE_VECTOR, 0, false); 548 497 } 549 498 499 + static int emulate_nm(struct x86_emulate_ctxt *ctxt) 500 + { 501 + return emulate_exception(ctxt, NM_VECTOR, 0, false); 502 + } 503 + 504 + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 505 + { 506 + u16 selector; 507 + struct desc_struct desc; 508 + 509 + ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg); 510 + return selector; 511 + } 512 + 513 + static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, 514 + unsigned seg) 515 + { 516 + u16 dummy; 517 + u32 base3; 518 + struct desc_struct desc; 519 + 520 + ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg); 521 + ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); 522 + } 523 + 524 + static int __linearize(struct x86_emulate_ctxt *ctxt, 525 + struct segmented_address addr, 526 + unsigned size, bool write, bool fetch, 527 + ulong *linear) 528 + { 529 + struct decode_cache *c = &ctxt->decode; 530 + struct desc_struct desc; 531 + bool usable; 532 + ulong la; 533 + u32 lim; 534 + u16 sel; 535 + unsigned cpl, rpl; 536 + 537 + la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; 538 + switch (ctxt->mode) { 539 + case X86EMUL_MODE_REAL: 540 + break; 541 + case X86EMUL_MODE_PROT64: 542 + if (((signed long)la << 16) >> 16 != la) 543 + return emulate_gp(ctxt, 0); 544 + break; 545 + default: 546 + usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, 547 + addr.seg); 548 + if (!usable) 549 + goto bad; 550 + /* code segment or read-only data segment */ 551 + if (((desc.type & 8) || !(desc.type & 2)) && write) 552 + goto bad; 553 + /* unreadable code segment */ 554 + if (!fetch && (desc.type & 8) && !(desc.type & 2)) 555 + goto bad; 556 + lim = desc_limit_scaled(&desc); 557 + if ((desc.type & 8) || !(desc.type & 4)) { 558 + /* expand-up segment */ 559 + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 560 + goto bad; 561 + } else { 562 + /* exapand-down segment */ 563 + if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) 564 + goto bad; 565 + lim = desc.d ? 0xffffffff : 0xffff; 566 + if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 567 + goto bad; 568 + } 569 + cpl = ctxt->ops->cpl(ctxt); 570 + rpl = sel & 3; 571 + cpl = max(cpl, rpl); 572 + if (!(desc.type & 8)) { 573 + /* data segment */ 574 + if (cpl > desc.dpl) 575 + goto bad; 576 + } else if ((desc.type & 8) && !(desc.type & 4)) { 577 + /* nonconforming code segment */ 578 + if (cpl != desc.dpl) 579 + goto bad; 580 + } else if ((desc.type & 8) && (desc.type & 4)) { 581 + /* conforming code segment */ 582 + if (cpl < desc.dpl) 583 + goto bad; 584 + } 585 + break; 586 + } 587 + if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) 588 + la &= (u32)-1; 589 + *linear = la; 590 + return X86EMUL_CONTINUE; 591 + bad: 592 + if (addr.seg == VCPU_SREG_SS) 593 + return emulate_ss(ctxt, addr.seg); 594 + else 595 + return emulate_gp(ctxt, addr.seg); 596 + } 597 + 598 + static int linearize(struct x86_emulate_ctxt *ctxt, 599 + struct segmented_address addr, 600 + unsigned size, bool write, 601 + ulong *linear) 602 + { 603 + return __linearize(ctxt, addr, size, write, false, linear); 604 + } 605 + 606 + 607 + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, 608 + struct segmented_address addr, 609 + void *data, 610 + unsigned size) 611 + { 612 + int rc; 613 + ulong linear; 614 + 615 + rc = linearize(ctxt, addr, size, false, &linear); 616 + if (rc != X86EMUL_CONTINUE) 617 + return rc; 618 + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); 619 + } 620 + 550 621 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 551 622 struct x86_emulate_ops *ops, 552 623 unsigned long eip, u8 *dest) ··· 678 505 int size, cur_size; 679 506 680 507 if (eip == fc->end) { 508 + unsigned long linear; 509 + struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip}; 681 510 cur_size = fc->end - fc->start; 682 511 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); 683 - rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, 684 - size, ctxt->vcpu, &ctxt->exception); 512 + rc = __linearize(ctxt, addr, size, false, true, &linear); 513 + if (rc != X86EMUL_CONTINUE) 514 + return rc; 515 + rc = ops->fetch(ctxt, linear, fc->data + cur_size, 516 + size, &ctxt->exception); 685 517 if (rc != X86EMUL_CONTINUE) 686 518 return rc; 687 519 fc->end += size; ··· 729 551 } 730 552 731 553 static int read_descriptor(struct x86_emulate_ctxt *ctxt, 732 - struct x86_emulate_ops *ops, 733 554 struct segmented_address addr, 734 555 u16 *size, unsigned long *address, int op_bytes) 735 556 { ··· 737 560 if (op_bytes == 2) 738 561 op_bytes = 3; 739 562 *address = 0; 740 - rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2, 741 - ctxt->vcpu, &ctxt->exception); 563 + rc = segmented_read_std(ctxt, addr, size, 2); 742 564 if (rc != X86EMUL_CONTINUE) 743 565 return rc; 744 566 addr.ea += 2; 745 - rc = ops->read_std(linear(ctxt, addr), address, op_bytes, 746 - ctxt->vcpu, &ctxt->exception); 567 + rc = segmented_read_std(ctxt, addr, address, op_bytes); 747 568 return rc; 748 569 } 749 570 ··· 798 623 } 799 624 } 800 625 801 - static void decode_register_operand(struct operand *op, 626 + static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) 627 + { 628 + ctxt->ops->get_fpu(ctxt); 629 + switch (reg) { 630 + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; 631 + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; 632 + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; 633 + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; 634 + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; 635 + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; 636 + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; 637 + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; 638 + #ifdef CONFIG_X86_64 639 + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; 640 + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; 641 + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; 642 + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; 643 + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; 644 + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; 645 + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; 646 + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; 647 + #endif 648 + default: BUG(); 649 + } 650 + ctxt->ops->put_fpu(ctxt); 651 + } 652 + 653 + static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, 654 + int reg) 655 + { 656 + ctxt->ops->get_fpu(ctxt); 657 + switch (reg) { 658 + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; 659 + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; 660 + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; 661 + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; 662 + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; 663 + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; 664 + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; 665 + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; 666 + #ifdef CONFIG_X86_64 667 + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; 668 + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; 669 + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; 670 + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; 671 + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; 672 + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; 673 + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; 674 + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; 675 + #endif 676 + default: BUG(); 677 + } 678 + ctxt->ops->put_fpu(ctxt); 679 + } 680 + 681 + static void decode_register_operand(struct x86_emulate_ctxt *ctxt, 682 + struct operand *op, 802 683 struct decode_cache *c, 803 684 int inhibit_bytereg) 804 685 { ··· 863 632 864 633 if (!(c->d & ModRM)) 865 634 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); 635 + 636 + if (c->d & Sse) { 637 + op->type = OP_XMM; 638 + op->bytes = 16; 639 + op->addr.xmm = reg; 640 + read_sse_reg(ctxt, &op->vec_val, reg); 641 + return; 642 + } 643 + 866 644 op->type = OP_REG; 867 645 if ((c->d & ByteOp) && !inhibit_bytereg) { 868 646 op->addr.reg = decode_register(reg, c->regs, highbyte_regs); ··· 911 671 op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 912 672 op->addr.reg = decode_register(c->modrm_rm, 913 673 c->regs, c->d & ByteOp); 674 + if (c->d & Sse) { 675 + op->type = OP_XMM; 676 + op->bytes = 16; 677 + op->addr.xmm = c->modrm_rm; 678 + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); 679 + return rc; 680 + } 914 681 fetch_register_operand(op); 915 682 return rc; 916 683 } ··· 1066 819 if (mc->pos < mc->end) 1067 820 goto read_cached; 1068 821 1069 - rc = ops->read_emulated(addr, mc->data + mc->end, n, 1070 - &ctxt->exception, ctxt->vcpu); 822 + rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, 823 + &ctxt->exception); 1071 824 if (rc != X86EMUL_CONTINUE) 1072 825 return rc; 1073 826 mc->end += n; ··· 1079 832 addr += n; 1080 833 } 1081 834 return X86EMUL_CONTINUE; 835 + } 836 + 837 + static int segmented_read(struct x86_emulate_ctxt *ctxt, 838 + struct segmented_address addr, 839 + void *data, 840 + unsigned size) 841 + { 842 + int rc; 843 + ulong linear; 844 + 845 + rc = linearize(ctxt, addr, size, false, &linear); 846 + if (rc != X86EMUL_CONTINUE) 847 + return rc; 848 + return read_emulated(ctxt, ctxt->ops, linear, data, size); 849 + } 850 + 851 + static int segmented_write(struct x86_emulate_ctxt *ctxt, 852 + struct segmented_address addr, 853 + const void *data, 854 + unsigned size) 855 + { 856 + int rc; 857 + ulong linear; 858 + 859 + rc = linearize(ctxt, addr, size, true, &linear); 860 + if (rc != X86EMUL_CONTINUE) 861 + return rc; 862 + return ctxt->ops->write_emulated(ctxt, linear, data, size, 863 + &ctxt->exception); 864 + } 865 + 866 + static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, 867 + struct segmented_address addr, 868 + const void *orig_data, const void *data, 869 + unsigned size) 870 + { 871 + int rc; 872 + ulong linear; 873 + 874 + rc = linearize(ctxt, addr, size, true, &linear); 875 + if (rc != X86EMUL_CONTINUE) 876 + return rc; 877 + return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data, 878 + size, &ctxt->exception); 1082 879 } 1083 880 1084 881 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, ··· 1145 854 if (n == 0) 1146 855 n = 1; 1147 856 rc->pos = rc->end = 0; 1148 - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) 857 + if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) 1149 858 return 0; 1150 859 rc->end = n * size; 1151 860 } ··· 1155 864 return 1; 1156 865 } 1157 866 1158 - static u32 desc_limit_scaled(struct desc_struct *desc) 1159 - { 1160 - u32 limit = get_desc_limit(desc); 1161 - 1162 - return desc->g ? (limit << 12) | 0xfff : limit; 1163 - } 1164 - 1165 867 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, 1166 868 struct x86_emulate_ops *ops, 1167 869 u16 selector, struct desc_ptr *dt) 1168 870 { 1169 871 if (selector & 1 << 2) { 1170 872 struct desc_struct desc; 873 + u16 sel; 874 + 1171 875 memset (dt, 0, sizeof *dt); 1172 - if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, 1173 - ctxt->vcpu)) 876 + if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) 1174 877 return; 1175 878 1176 879 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ 1177 880 dt->address = get_desc_base(&desc); 1178 881 } else 1179 - ops->get_gdt(dt, ctxt->vcpu); 882 + ops->get_gdt(ctxt, dt); 1180 883 } 1181 884 1182 885 /* allowed just for 8 bytes segments */ ··· 1188 903 if (dt.size < index * 8 + 7) 1189 904 return emulate_gp(ctxt, selector & 0xfffc); 1190 905 addr = dt.address + index * 8; 1191 - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, 1192 - &ctxt->exception); 906 + ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); 1193 907 1194 908 return ret; 1195 909 } ··· 1209 925 return emulate_gp(ctxt, selector & 0xfffc); 1210 926 1211 927 addr = dt.address + index * 8; 1212 - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, 1213 - &ctxt->exception); 928 + ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); 1214 929 1215 930 return ret; 1216 931 } ··· 1269 986 1270 987 rpl = selector & 3; 1271 988 dpl = seg_desc.dpl; 1272 - cpl = ops->cpl(ctxt->vcpu); 989 + cpl = ops->cpl(ctxt); 1273 990 1274 991 switch (seg) { 1275 992 case VCPU_SREG_SS: ··· 1325 1042 return ret; 1326 1043 } 1327 1044 load: 1328 - ops->set_segment_selector(selector, seg, ctxt->vcpu); 1329 - ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); 1045 + ops->set_segment(ctxt, selector, &seg_desc, 0, seg); 1330 1046 return X86EMUL_CONTINUE; 1331 1047 exception: 1332 1048 emulate_exception(ctxt, err_vec, err_code, true); ··· 1351 1069 } 1352 1070 } 1353 1071 1354 - static inline int writeback(struct x86_emulate_ctxt *ctxt, 1355 - struct x86_emulate_ops *ops) 1072 + static int writeback(struct x86_emulate_ctxt *ctxt) 1356 1073 { 1357 1074 int rc; 1358 1075 struct decode_cache *c = &ctxt->decode; ··· 1362 1081 break; 1363 1082 case OP_MEM: 1364 1083 if (c->lock_prefix) 1365 - rc = ops->cmpxchg_emulated( 1366 - linear(ctxt, c->dst.addr.mem), 1367 - &c->dst.orig_val, 1368 - &c->dst.val, 1369 - c->dst.bytes, 1370 - &ctxt->exception, 1371 - ctxt->vcpu); 1084 + rc = segmented_cmpxchg(ctxt, 1085 + c->dst.addr.mem, 1086 + &c->dst.orig_val, 1087 + &c->dst.val, 1088 + c->dst.bytes); 1372 1089 else 1373 - rc = ops->write_emulated( 1374 - linear(ctxt, c->dst.addr.mem), 1375 - &c->dst.val, 1376 - c->dst.bytes, 1377 - &ctxt->exception, 1378 - ctxt->vcpu); 1090 + rc = segmented_write(ctxt, 1091 + c->dst.addr.mem, 1092 + &c->dst.val, 1093 + c->dst.bytes); 1379 1094 if (rc != X86EMUL_CONTINUE) 1380 1095 return rc; 1096 + break; 1097 + case OP_XMM: 1098 + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); 1381 1099 break; 1382 1100 case OP_NONE: 1383 1101 /* no writeback */ ··· 1387 1107 return X86EMUL_CONTINUE; 1388 1108 } 1389 1109 1390 - static inline void emulate_push(struct x86_emulate_ctxt *ctxt, 1391 - struct x86_emulate_ops *ops) 1110 + static int em_push(struct x86_emulate_ctxt *ctxt) 1392 1111 { 1393 1112 struct decode_cache *c = &ctxt->decode; 1113 + struct segmented_address addr; 1394 1114 1395 - c->dst.type = OP_MEM; 1396 - c->dst.bytes = c->op_bytes; 1397 - c->dst.val = c->src.val; 1398 1115 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); 1399 - c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]); 1400 - c->dst.addr.mem.seg = VCPU_SREG_SS; 1116 + addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); 1117 + addr.seg = VCPU_SREG_SS; 1118 + 1119 + /* Disable writeback. */ 1120 + c->dst.type = OP_NONE; 1121 + return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); 1401 1122 } 1402 1123 1403 1124 static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1404 - struct x86_emulate_ops *ops, 1405 1125 void *dest, int len) 1406 1126 { 1407 1127 struct decode_cache *c = &ctxt->decode; ··· 1410 1130 1411 1131 addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); 1412 1132 addr.seg = VCPU_SREG_SS; 1413 - rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len); 1133 + rc = segmented_read(ctxt, addr, dest, len); 1414 1134 if (rc != X86EMUL_CONTINUE) 1415 1135 return rc; 1416 1136 1417 1137 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); 1418 1138 return rc; 1139 + } 1140 + 1141 + static int em_pop(struct x86_emulate_ctxt *ctxt) 1142 + { 1143 + struct decode_cache *c = &ctxt->decode; 1144 + 1145 + return emulate_pop(ctxt, &c->dst.val, c->op_bytes); 1419 1146 } 1420 1147 1421 1148 static int emulate_popf(struct x86_emulate_ctxt *ctxt, ··· 1432 1145 int rc; 1433 1146 unsigned long val, change_mask; 1434 1147 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1435 - int cpl = ops->cpl(ctxt->vcpu); 1148 + int cpl = ops->cpl(ctxt); 1436 1149 1437 - rc = emulate_pop(ctxt, ops, &val, len); 1150 + rc = emulate_pop(ctxt, &val, len); 1438 1151 if (rc != X86EMUL_CONTINUE) 1439 1152 return rc; 1440 1153 ··· 1466 1179 return rc; 1467 1180 } 1468 1181 1469 - static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, 1470 - struct x86_emulate_ops *ops, int seg) 1182 + static int em_popf(struct x86_emulate_ctxt *ctxt) 1471 1183 { 1472 1184 struct decode_cache *c = &ctxt->decode; 1473 1185 1474 - c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); 1186 + c->dst.type = OP_REG; 1187 + c->dst.addr.reg = &ctxt->eflags; 1188 + c->dst.bytes = c->op_bytes; 1189 + return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); 1190 + } 1475 1191 1476 - emulate_push(ctxt, ops); 1192 + static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, 1193 + struct x86_emulate_ops *ops, int seg) 1194 + { 1195 + struct decode_cache *c = &ctxt->decode; 1196 + 1197 + c->src.val = get_segment_selector(ctxt, seg); 1198 + 1199 + return em_push(ctxt); 1477 1200 } 1478 1201 1479 1202 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, ··· 1493 1196 unsigned long selector; 1494 1197 int rc; 1495 1198 1496 - rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); 1199 + rc = emulate_pop(ctxt, &selector, c->op_bytes); 1497 1200 if (rc != X86EMUL_CONTINUE) 1498 1201 return rc; 1499 1202 ··· 1501 1204 return rc; 1502 1205 } 1503 1206 1504 - static int emulate_pusha(struct x86_emulate_ctxt *ctxt, 1505 - struct x86_emulate_ops *ops) 1207 + static int em_pusha(struct x86_emulate_ctxt *ctxt) 1506 1208 { 1507 1209 struct decode_cache *c = &ctxt->decode; 1508 1210 unsigned long old_esp = c->regs[VCPU_REGS_RSP]; ··· 1512 1216 (reg == VCPU_REGS_RSP) ? 1513 1217 (c->src.val = old_esp) : (c->src.val = c->regs[reg]); 1514 1218 1515 - emulate_push(ctxt, ops); 1516 - 1517 - rc = writeback(ctxt, ops); 1219 + rc = em_push(ctxt); 1518 1220 if (rc != X86EMUL_CONTINUE) 1519 1221 return rc; 1520 1222 1521 1223 ++reg; 1522 1224 } 1523 1225 1524 - /* Disable writeback. */ 1525 - c->dst.type = OP_NONE; 1526 - 1527 1226 return rc; 1528 1227 } 1529 1228 1530 - static int emulate_popa(struct x86_emulate_ctxt *ctxt, 1531 - struct x86_emulate_ops *ops) 1229 + static int em_pushf(struct x86_emulate_ctxt *ctxt) 1230 + { 1231 + struct decode_cache *c = &ctxt->decode; 1232 + 1233 + c->src.val = (unsigned long)ctxt->eflags; 1234 + return em_push(ctxt); 1235 + } 1236 + 1237 + static int em_popa(struct x86_emulate_ctxt *ctxt) 1532 1238 { 1533 1239 struct decode_cache *c = &ctxt->decode; 1534 1240 int rc = X86EMUL_CONTINUE; ··· 1543 1245 --reg; 1544 1246 } 1545 1247 1546 - rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); 1248 + rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); 1547 1249 if (rc != X86EMUL_CONTINUE) 1548 1250 break; 1549 1251 --reg; ··· 1563 1265 1564 1266 /* TODO: Add limit checks */ 1565 1267 c->src.val = ctxt->eflags; 1566 - emulate_push(ctxt, ops); 1567 - rc = writeback(ctxt, ops); 1268 + rc = em_push(ctxt); 1568 1269 if (rc != X86EMUL_CONTINUE) 1569 1270 return rc; 1570 1271 1571 1272 ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); 1572 1273 1573 - c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 1574 - emulate_push(ctxt, ops); 1575 - rc = writeback(ctxt, ops); 1274 + c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); 1275 + rc = em_push(ctxt); 1576 1276 if (rc != X86EMUL_CONTINUE) 1577 1277 return rc; 1578 1278 1579 1279 c->src.val = c->eip; 1580 - emulate_push(ctxt, ops); 1581 - rc = writeback(ctxt, ops); 1280 + rc = em_push(ctxt); 1582 1281 if (rc != X86EMUL_CONTINUE) 1583 1282 return rc; 1584 1283 1585 - c->dst.type = OP_NONE; 1586 - 1587 - ops->get_idt(&dt, ctxt->vcpu); 1284 + ops->get_idt(ctxt, &dt); 1588 1285 1589 1286 eip_addr = dt.address + (irq << 2); 1590 1287 cs_addr = dt.address + (irq << 2) + 2; 1591 1288 1592 - rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception); 1289 + rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); 1593 1290 if (rc != X86EMUL_CONTINUE) 1594 1291 return rc; 1595 1292 1596 - rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception); 1293 + rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); 1597 1294 if (rc != X86EMUL_CONTINUE) 1598 1295 return rc; 1599 1296 ··· 1632 1339 1633 1340 /* TODO: Add stack limit check */ 1634 1341 1635 - rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes); 1342 + rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); 1636 1343 1637 1344 if (rc != X86EMUL_CONTINUE) 1638 1345 return rc; ··· 1640 1347 if (temp_eip & ~0xffff) 1641 1348 return emulate_gp(ctxt, 0); 1642 1349 1643 - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1350 + rc = emulate_pop(ctxt, &cs, c->op_bytes); 1644 1351 1645 1352 if (rc != X86EMUL_CONTINUE) 1646 1353 return rc; 1647 1354 1648 - rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes); 1355 + rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); 1649 1356 1650 1357 if (rc != X86EMUL_CONTINUE) 1651 1358 return rc; ··· 1687 1394 } 1688 1395 } 1689 1396 1690 - static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1691 - struct x86_emulate_ops *ops) 1397 + static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1398 + { 1399 + struct decode_cache *c = &ctxt->decode; 1400 + int rc; 1401 + unsigned short sel; 1402 + 1403 + memcpy(&sel, c->src.valptr + c->op_bytes, 2); 1404 + 1405 + rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); 1406 + if (rc != X86EMUL_CONTINUE) 1407 + return rc; 1408 + 1409 + c->eip = 0; 1410 + memcpy(&c->eip, c->src.valptr, c->op_bytes); 1411 + return X86EMUL_CONTINUE; 1412 + } 1413 + 1414 + static int em_grp1a(struct x86_emulate_ctxt *ctxt) 1692 1415 { 1693 1416 struct decode_cache *c = &ctxt->decode; 1694 1417 1695 - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); 1418 + return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); 1696 1419 } 1697 1420 1698 - static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) 1421 + static int em_grp2(struct x86_emulate_ctxt *ctxt) 1699 1422 { 1700 1423 struct decode_cache *c = &ctxt->decode; 1701 1424 switch (c->modrm_reg) { ··· 1738 1429 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); 1739 1430 break; 1740 1431 } 1432 + return X86EMUL_CONTINUE; 1741 1433 } 1742 1434 1743 - static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, 1744 - struct x86_emulate_ops *ops) 1435 + static int em_grp3(struct x86_emulate_ctxt *ctxt) 1745 1436 { 1746 1437 struct decode_cache *c = &ctxt->decode; 1747 1438 unsigned long *rax = &c->regs[VCPU_REGS_RAX]; ··· 1780 1471 return X86EMUL_CONTINUE; 1781 1472 } 1782 1473 1783 - static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, 1784 - struct x86_emulate_ops *ops) 1474 + static int em_grp45(struct x86_emulate_ctxt *ctxt) 1785 1475 { 1786 1476 struct decode_cache *c = &ctxt->decode; 1477 + int rc = X86EMUL_CONTINUE; 1787 1478 1788 1479 switch (c->modrm_reg) { 1789 1480 case 0: /* inc */ ··· 1797 1488 old_eip = c->eip; 1798 1489 c->eip = c->src.val; 1799 1490 c->src.val = old_eip; 1800 - emulate_push(ctxt, ops); 1491 + rc = em_push(ctxt); 1801 1492 break; 1802 1493 } 1803 1494 case 4: /* jmp abs */ 1804 1495 c->eip = c->src.val; 1805 1496 break; 1497 + case 5: /* jmp far */ 1498 + rc = em_jmp_far(ctxt); 1499 + break; 1806 1500 case 6: /* push */ 1807 - emulate_push(ctxt, ops); 1501 + rc = em_push(ctxt); 1808 1502 break; 1809 1503 } 1810 - return X86EMUL_CONTINUE; 1504 + return rc; 1811 1505 } 1812 1506 1813 - static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, 1814 - struct x86_emulate_ops *ops) 1507 + static int em_grp9(struct x86_emulate_ctxt *ctxt) 1815 1508 { 1816 1509 struct decode_cache *c = &ctxt->decode; 1817 1510 u64 old = c->dst.orig_val64; ··· 1839 1528 int rc; 1840 1529 unsigned long cs; 1841 1530 1842 - rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); 1531 + rc = emulate_pop(ctxt, &c->eip, c->op_bytes); 1843 1532 if (rc != X86EMUL_CONTINUE) 1844 1533 return rc; 1845 1534 if (c->op_bytes == 4) 1846 1535 c->eip = (u32)c->eip; 1847 - rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1536 + rc = emulate_pop(ctxt, &cs, c->op_bytes); 1848 1537 if (rc != X86EMUL_CONTINUE) 1849 1538 return rc; 1850 1539 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); ··· 1873 1562 struct x86_emulate_ops *ops, struct desc_struct *cs, 1874 1563 struct desc_struct *ss) 1875 1564 { 1565 + u16 selector; 1566 + 1876 1567 memset(cs, 0, sizeof(struct desc_struct)); 1877 - ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); 1568 + ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); 1878 1569 memset(ss, 0, sizeof(struct desc_struct)); 1879 1570 1880 1571 cs->l = 0; /* will be adjusted later */ ··· 1906 1593 struct desc_struct cs, ss; 1907 1594 u64 msr_data; 1908 1595 u16 cs_sel, ss_sel; 1596 + u64 efer = 0; 1909 1597 1910 1598 /* syscall is not available in real mode */ 1911 1599 if (ctxt->mode == X86EMUL_MODE_REAL || 1912 1600 ctxt->mode == X86EMUL_MODE_VM86) 1913 1601 return emulate_ud(ctxt); 1914 1602 1603 + ops->get_msr(ctxt, MSR_EFER, &efer); 1915 1604 setup_syscalls_segments(ctxt, ops, &cs, &ss); 1916 1605 1917 - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1606 + ops->get_msr(ctxt, MSR_STAR, &msr_data); 1918 1607 msr_data >>= 32; 1919 1608 cs_sel = (u16)(msr_data & 0xfffc); 1920 1609 ss_sel = (u16)(msr_data + 8); 1921 1610 1922 - if (is_long_mode(ctxt->vcpu)) { 1611 + if (efer & EFER_LMA) { 1923 1612 cs.d = 0; 1924 1613 cs.l = 1; 1925 1614 } 1926 - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 1927 - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 1928 - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); 1929 - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 1615 + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 1616 + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 1930 1617 1931 1618 c->regs[VCPU_REGS_RCX] = c->eip; 1932 - if (is_long_mode(ctxt->vcpu)) { 1619 + if (efer & EFER_LMA) { 1933 1620 #ifdef CONFIG_X86_64 1934 1621 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; 1935 1622 1936 - ops->get_msr(ctxt->vcpu, 1623 + ops->get_msr(ctxt, 1937 1624 ctxt->mode == X86EMUL_MODE_PROT64 ? 1938 1625 MSR_LSTAR : MSR_CSTAR, &msr_data); 1939 1626 c->eip = msr_data; 1940 1627 1941 - ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); 1628 + ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); 1942 1629 ctxt->eflags &= ~(msr_data | EFLG_RF); 1943 1630 #endif 1944 1631 } else { 1945 1632 /* legacy mode */ 1946 - ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); 1633 + ops->get_msr(ctxt, MSR_STAR, &msr_data); 1947 1634 c->eip = (u32)msr_data; 1948 1635 1949 1636 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); ··· 1959 1646 struct desc_struct cs, ss; 1960 1647 u64 msr_data; 1961 1648 u16 cs_sel, ss_sel; 1649 + u64 efer = 0; 1962 1650 1651 + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 1963 1652 /* inject #GP if in real mode */ 1964 1653 if (ctxt->mode == X86EMUL_MODE_REAL) 1965 1654 return emulate_gp(ctxt, 0); ··· 1974 1659 1975 1660 setup_syscalls_segments(ctxt, ops, &cs, &ss); 1976 1661 1977 - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 1662 + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 1978 1663 switch (ctxt->mode) { 1979 1664 case X86EMUL_MODE_PROT32: 1980 1665 if ((msr_data & 0xfffc) == 0x0) ··· 1991 1676 cs_sel &= ~SELECTOR_RPL_MASK; 1992 1677 ss_sel = cs_sel + 8; 1993 1678 ss_sel &= ~SELECTOR_RPL_MASK; 1994 - if (ctxt->mode == X86EMUL_MODE_PROT64 1995 - || is_long_mode(ctxt->vcpu)) { 1679 + if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { 1996 1680 cs.d = 0; 1997 1681 cs.l = 1; 1998 1682 } 1999 1683 2000 - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 2001 - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 2002 - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); 2003 - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 1684 + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 1685 + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2004 1686 2005 - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); 1687 + ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); 2006 1688 c->eip = msr_data; 2007 1689 2008 - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 1690 + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); 2009 1691 c->regs[VCPU_REGS_RSP] = msr_data; 2010 1692 2011 1693 return X86EMUL_CONTINUE; ··· 2031 1719 2032 1720 cs.dpl = 3; 2033 1721 ss.dpl = 3; 2034 - ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); 1722 + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2035 1723 switch (usermode) { 2036 1724 case X86EMUL_MODE_PROT32: 2037 1725 cs_sel = (u16)(msr_data + 16); ··· 2051 1739 cs_sel |= SELECTOR_RPL_MASK; 2052 1740 ss_sel |= SELECTOR_RPL_MASK; 2053 1741 2054 - ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); 2055 - ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); 2056 - ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); 2057 - ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); 1742 + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 1743 + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2058 1744 2059 1745 c->eip = c->regs[VCPU_REGS_RDX]; 2060 1746 c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; ··· 2069 1759 if (ctxt->mode == X86EMUL_MODE_VM86) 2070 1760 return true; 2071 1761 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 2072 - return ops->cpl(ctxt->vcpu) > iopl; 1762 + return ops->cpl(ctxt) > iopl; 2073 1763 } 2074 1764 2075 1765 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, ··· 2079 1769 struct desc_struct tr_seg; 2080 1770 u32 base3; 2081 1771 int r; 2082 - u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; 1772 + u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7; 2083 1773 unsigned mask = (1 << len) - 1; 2084 1774 unsigned long base; 2085 1775 2086 - ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); 1776 + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); 2087 1777 if (!tr_seg.p) 2088 1778 return false; 2089 1779 if (desc_limit_scaled(&tr_seg) < 103) ··· 2092 1782 #ifdef CONFIG_X86_64 2093 1783 base |= ((u64)base3) << 32; 2094 1784 #endif 2095 - r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); 1785 + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); 2096 1786 if (r != X86EMUL_CONTINUE) 2097 1787 return false; 2098 1788 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) 2099 1789 return false; 2100 - r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, 2101 - NULL); 1790 + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); 2102 1791 if (r != X86EMUL_CONTINUE) 2103 1792 return false; 2104 1793 if ((perm >> bit_idx) & mask) ··· 2138 1829 tss->si = c->regs[VCPU_REGS_RSI]; 2139 1830 tss->di = c->regs[VCPU_REGS_RDI]; 2140 1831 2141 - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 2142 - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2143 - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 2144 - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 2145 - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 1832 + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); 1833 + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); 1834 + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); 1835 + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); 1836 + tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR); 2146 1837 } 2147 1838 2148 1839 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, ··· 2167 1858 * SDM says that segment selectors are loaded before segment 2168 1859 * descriptors 2169 1860 */ 2170 - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); 2171 - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 2172 - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 2173 - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 2174 - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 1861 + set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR); 1862 + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); 1863 + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); 1864 + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); 1865 + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); 2175 1866 2176 1867 /* 2177 1868 * Now load segment descriptors. If fault happenes at this stage ··· 2205 1896 int ret; 2206 1897 u32 new_tss_base = get_desc_base(new_desc); 2207 1898 2208 - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1899 + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2209 1900 &ctxt->exception); 2210 1901 if (ret != X86EMUL_CONTINUE) 2211 1902 /* FIXME: need to provide precise fault address */ ··· 2213 1904 2214 1905 save_state_to_tss16(ctxt, ops, &tss_seg); 2215 1906 2216 - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1907 + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2217 1908 &ctxt->exception); 2218 1909 if (ret != X86EMUL_CONTINUE) 2219 1910 /* FIXME: need to provide precise fault address */ 2220 1911 return ret; 2221 1912 2222 - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 1913 + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, 2223 1914 &ctxt->exception); 2224 1915 if (ret != X86EMUL_CONTINUE) 2225 1916 /* FIXME: need to provide precise fault address */ ··· 2228 1919 if (old_tss_sel != 0xffff) { 2229 1920 tss_seg.prev_task_link = old_tss_sel; 2230 1921 2231 - ret = ops->write_std(new_tss_base, 1922 + ret = ops->write_std(ctxt, new_tss_base, 2232 1923 &tss_seg.prev_task_link, 2233 1924 sizeof tss_seg.prev_task_link, 2234 - ctxt->vcpu, &ctxt->exception); 1925 + &ctxt->exception); 2235 1926 if (ret != X86EMUL_CONTINUE) 2236 1927 /* FIXME: need to provide precise fault address */ 2237 1928 return ret; ··· 2246 1937 { 2247 1938 struct decode_cache *c = &ctxt->decode; 2248 1939 2249 - tss->cr3 = ops->get_cr(3, ctxt->vcpu); 1940 + tss->cr3 = ops->get_cr(ctxt, 3); 2250 1941 tss->eip = c->eip; 2251 1942 tss->eflags = ctxt->eflags; 2252 1943 tss->eax = c->regs[VCPU_REGS_RAX]; ··· 2258 1949 tss->esi = c->regs[VCPU_REGS_RSI]; 2259 1950 tss->edi = c->regs[VCPU_REGS_RDI]; 2260 1951 2261 - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); 2262 - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2263 - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); 2264 - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); 2265 - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); 2266 - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); 2267 - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); 1952 + tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); 1953 + tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); 1954 + tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS); 1955 + tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS); 1956 + tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS); 1957 + tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS); 1958 + tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR); 2268 1959 } 2269 1960 2270 1961 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, ··· 2274 1965 struct decode_cache *c = &ctxt->decode; 2275 1966 int ret; 2276 1967 2277 - if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) 1968 + if (ops->set_cr(ctxt, 3, tss->cr3)) 2278 1969 return emulate_gp(ctxt, 0); 2279 1970 c->eip = tss->eip; 2280 1971 ctxt->eflags = tss->eflags | 2; ··· 2291 1982 * SDM says that segment selectors are loaded before segment 2292 1983 * descriptors 2293 1984 */ 2294 - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); 2295 - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); 2296 - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); 2297 - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); 2298 - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); 2299 - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); 2300 - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); 1985 + set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); 1986 + set_segment_selector(ctxt, tss->es, VCPU_SREG_ES); 1987 + set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS); 1988 + set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS); 1989 + set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); 1990 + set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS); 1991 + set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); 2301 1992 2302 1993 /* 2303 1994 * Now load segment descriptors. If fault happenes at this stage ··· 2337 2028 int ret; 2338 2029 u32 new_tss_base = get_desc_base(new_desc); 2339 2030 2340 - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2031 + ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2341 2032 &ctxt->exception); 2342 2033 if (ret != X86EMUL_CONTINUE) 2343 2034 /* FIXME: need to provide precise fault address */ ··· 2345 2036 2346 2037 save_state_to_tss32(ctxt, ops, &tss_seg); 2347 2038 2348 - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2039 + ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2349 2040 &ctxt->exception); 2350 2041 if (ret != X86EMUL_CONTINUE) 2351 2042 /* FIXME: need to provide precise fault address */ 2352 2043 return ret; 2353 2044 2354 - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, 2045 + ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, 2355 2046 &ctxt->exception); 2356 2047 if (ret != X86EMUL_CONTINUE) 2357 2048 /* FIXME: need to provide precise fault address */ ··· 2360 2051 if (old_tss_sel != 0xffff) { 2361 2052 tss_seg.prev_task_link = old_tss_sel; 2362 2053 2363 - ret = ops->write_std(new_tss_base, 2054 + ret = ops->write_std(ctxt, new_tss_base, 2364 2055 &tss_seg.prev_task_link, 2365 2056 sizeof tss_seg.prev_task_link, 2366 - ctxt->vcpu, &ctxt->exception); 2057 + &ctxt->exception); 2367 2058 if (ret != X86EMUL_CONTINUE) 2368 2059 /* FIXME: need to provide precise fault address */ 2369 2060 return ret; ··· 2379 2070 { 2380 2071 struct desc_struct curr_tss_desc, next_tss_desc; 2381 2072 int ret; 2382 - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); 2073 + u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); 2383 2074 ulong old_tss_base = 2384 - ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); 2075 + ops->get_cached_segment_base(ctxt, VCPU_SREG_TR); 2385 2076 u32 desc_limit; 2386 2077 2387 2078 /* FIXME: old_tss_base == ~0 ? */ ··· 2397 2088 2398 2089 if (reason != TASK_SWITCH_IRET) { 2399 2090 if ((tss_selector & 3) > next_tss_desc.dpl || 2400 - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) 2091 + ops->cpl(ctxt) > next_tss_desc.dpl) 2401 2092 return emulate_gp(ctxt, 0); 2402 2093 } 2403 2094 ··· 2441 2132 &next_tss_desc); 2442 2133 } 2443 2134 2444 - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); 2445 - ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); 2446 - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); 2135 + ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); 2136 + ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); 2447 2137 2448 2138 if (has_error_code) { 2449 2139 struct decode_cache *c = &ctxt->decode; ··· 2450 2142 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; 2451 2143 c->lock_prefix = 0; 2452 2144 c->src.val = (unsigned long) error_code; 2453 - emulate_push(ctxt, ops); 2145 + ret = em_push(ctxt); 2454 2146 } 2455 2147 2456 2148 return ret; ··· 2470 2162 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, 2471 2163 has_error_code, error_code); 2472 2164 2473 - if (rc == X86EMUL_CONTINUE) { 2474 - rc = writeback(ctxt, ops); 2475 - if (rc == X86EMUL_CONTINUE) 2476 - ctxt->eip = c->eip; 2477 - } 2165 + if (rc == X86EMUL_CONTINUE) 2166 + ctxt->eip = c->eip; 2478 2167 2479 - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 2168 + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; 2480 2169 } 2481 2170 2482 2171 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, ··· 2485 2180 register_address_increment(c, &c->regs[reg], df * op->bytes); 2486 2181 op->addr.mem.ea = register_address(c, c->regs[reg]); 2487 2182 op->addr.mem.seg = seg; 2488 - } 2489 - 2490 - static int em_push(struct x86_emulate_ctxt *ctxt) 2491 - { 2492 - emulate_push(ctxt, ctxt->ops); 2493 - return X86EMUL_CONTINUE; 2494 2183 } 2495 2184 2496 2185 static int em_das(struct x86_emulate_ctxt *ctxt) ··· 2533 2234 ulong old_eip; 2534 2235 int rc; 2535 2236 2536 - old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); 2237 + old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); 2537 2238 old_eip = c->eip; 2538 2239 2539 2240 memcpy(&sel, c->src.valptr + c->op_bytes, 2); ··· 2544 2245 memcpy(&c->eip, c->src.valptr, c->op_bytes); 2545 2246 2546 2247 c->src.val = old_cs; 2547 - emulate_push(ctxt, ctxt->ops); 2548 - rc = writeback(ctxt, ctxt->ops); 2248 + rc = em_push(ctxt); 2549 2249 if (rc != X86EMUL_CONTINUE) 2550 2250 return rc; 2551 2251 2552 2252 c->src.val = old_eip; 2553 - emulate_push(ctxt, ctxt->ops); 2554 - rc = writeback(ctxt, ctxt->ops); 2555 - if (rc != X86EMUL_CONTINUE) 2556 - return rc; 2557 - 2558 - c->dst.type = OP_NONE; 2559 - 2560 - return X86EMUL_CONTINUE; 2253 + return em_push(ctxt); 2561 2254 } 2562 2255 2563 2256 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) ··· 2560 2269 c->dst.type = OP_REG; 2561 2270 c->dst.addr.reg = &c->eip; 2562 2271 c->dst.bytes = c->op_bytes; 2563 - rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); 2272 + rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); 2564 2273 if (rc != X86EMUL_CONTINUE) 2565 2274 return rc; 2566 2275 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); 2276 + return X86EMUL_CONTINUE; 2277 + } 2278 + 2279 + static int em_add(struct x86_emulate_ctxt *ctxt) 2280 + { 2281 + struct decode_cache *c = &ctxt->decode; 2282 + 2283 + emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); 2284 + return X86EMUL_CONTINUE; 2285 + } 2286 + 2287 + static int em_or(struct x86_emulate_ctxt *ctxt) 2288 + { 2289 + struct decode_cache *c = &ctxt->decode; 2290 + 2291 + emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); 2292 + return X86EMUL_CONTINUE; 2293 + } 2294 + 2295 + static int em_adc(struct x86_emulate_ctxt *ctxt) 2296 + { 2297 + struct decode_cache *c = &ctxt->decode; 2298 + 2299 + emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); 2300 + return X86EMUL_CONTINUE; 2301 + } 2302 + 2303 + static int em_sbb(struct x86_emulate_ctxt *ctxt) 2304 + { 2305 + struct decode_cache *c = &ctxt->decode; 2306 + 2307 + emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); 2308 + return X86EMUL_CONTINUE; 2309 + } 2310 + 2311 + static int em_and(struct x86_emulate_ctxt *ctxt) 2312 + { 2313 + struct decode_cache *c = &ctxt->decode; 2314 + 2315 + emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); 2316 + return X86EMUL_CONTINUE; 2317 + } 2318 + 2319 + static int em_sub(struct x86_emulate_ctxt *ctxt) 2320 + { 2321 + struct decode_cache *c = &ctxt->decode; 2322 + 2323 + emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); 2324 + return X86EMUL_CONTINUE; 2325 + } 2326 + 2327 + static int em_xor(struct x86_emulate_ctxt *ctxt) 2328 + { 2329 + struct decode_cache *c = &ctxt->decode; 2330 + 2331 + emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); 2332 + return X86EMUL_CONTINUE; 2333 + } 2334 + 2335 + static int em_cmp(struct x86_emulate_ctxt *ctxt) 2336 + { 2337 + struct decode_cache *c = &ctxt->decode; 2338 + 2339 + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); 2340 + /* Disable writeback. */ 2341 + c->dst.type = OP_NONE; 2567 2342 return X86EMUL_CONTINUE; 2568 2343 } 2569 2344 ··· 2663 2306 2664 2307 static int em_rdtsc(struct x86_emulate_ctxt *ctxt) 2665 2308 { 2666 - unsigned cpl = ctxt->ops->cpl(ctxt->vcpu); 2667 2309 struct decode_cache *c = &ctxt->decode; 2668 2310 u64 tsc = 0; 2669 2311 2670 - if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) 2671 - return emulate_gp(ctxt, 0); 2672 - ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc); 2312 + ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); 2673 2313 c->regs[VCPU_REGS_RAX] = (u32)tsc; 2674 2314 c->regs[VCPU_REGS_RDX] = tsc >> 32; 2675 2315 return X86EMUL_CONTINUE; ··· 2679 2325 return X86EMUL_CONTINUE; 2680 2326 } 2681 2327 2328 + static int em_movdqu(struct x86_emulate_ctxt *ctxt) 2329 + { 2330 + struct decode_cache *c = &ctxt->decode; 2331 + memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); 2332 + return X86EMUL_CONTINUE; 2333 + } 2334 + 2335 + static int em_invlpg(struct x86_emulate_ctxt *ctxt) 2336 + { 2337 + struct decode_cache *c = &ctxt->decode; 2338 + int rc; 2339 + ulong linear; 2340 + 2341 + rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); 2342 + if (rc == X86EMUL_CONTINUE) 2343 + ctxt->ops->invlpg(ctxt, linear); 2344 + /* Disable writeback. */ 2345 + c->dst.type = OP_NONE; 2346 + return X86EMUL_CONTINUE; 2347 + } 2348 + 2349 + static int em_clts(struct x86_emulate_ctxt *ctxt) 2350 + { 2351 + ulong cr0; 2352 + 2353 + cr0 = ctxt->ops->get_cr(ctxt, 0); 2354 + cr0 &= ~X86_CR0_TS; 2355 + ctxt->ops->set_cr(ctxt, 0, cr0); 2356 + return X86EMUL_CONTINUE; 2357 + } 2358 + 2359 + static int em_vmcall(struct x86_emulate_ctxt *ctxt) 2360 + { 2361 + struct decode_cache *c = &ctxt->decode; 2362 + int rc; 2363 + 2364 + if (c->modrm_mod != 3 || c->modrm_rm != 1) 2365 + return X86EMUL_UNHANDLEABLE; 2366 + 2367 + rc = ctxt->ops->fix_hypercall(ctxt); 2368 + if (rc != X86EMUL_CONTINUE) 2369 + return rc; 2370 + 2371 + /* Let the processor re-execute the fixed hypercall */ 2372 + c->eip = ctxt->eip; 2373 + /* Disable writeback. */ 2374 + c->dst.type = OP_NONE; 2375 + return X86EMUL_CONTINUE; 2376 + } 2377 + 2378 + static int em_lgdt(struct x86_emulate_ctxt *ctxt) 2379 + { 2380 + struct decode_cache *c = &ctxt->decode; 2381 + struct desc_ptr desc_ptr; 2382 + int rc; 2383 + 2384 + rc = read_descriptor(ctxt, c->src.addr.mem, 2385 + &desc_ptr.size, &desc_ptr.address, 2386 + c->op_bytes); 2387 + if (rc != X86EMUL_CONTINUE) 2388 + return rc; 2389 + ctxt->ops->set_gdt(ctxt, &desc_ptr); 2390 + /* Disable writeback. */ 2391 + c->dst.type = OP_NONE; 2392 + return X86EMUL_CONTINUE; 2393 + } 2394 + 2395 + static int em_vmmcall(struct x86_emulate_ctxt *ctxt) 2396 + { 2397 + struct decode_cache *c = &ctxt->decode; 2398 + int rc; 2399 + 2400 + rc = ctxt->ops->fix_hypercall(ctxt); 2401 + 2402 + /* Disable writeback. */ 2403 + c->dst.type = OP_NONE; 2404 + return rc; 2405 + } 2406 + 2407 + static int em_lidt(struct x86_emulate_ctxt *ctxt) 2408 + { 2409 + struct decode_cache *c = &ctxt->decode; 2410 + struct desc_ptr desc_ptr; 2411 + int rc; 2412 + 2413 + rc = read_descriptor(ctxt, c->src.addr.mem, 2414 + &desc_ptr.size, &desc_ptr.address, 2415 + c->op_bytes); 2416 + if (rc != X86EMUL_CONTINUE) 2417 + return rc; 2418 + ctxt->ops->set_idt(ctxt, &desc_ptr); 2419 + /* Disable writeback. */ 2420 + c->dst.type = OP_NONE; 2421 + return X86EMUL_CONTINUE; 2422 + } 2423 + 2424 + static int em_smsw(struct x86_emulate_ctxt *ctxt) 2425 + { 2426 + struct decode_cache *c = &ctxt->decode; 2427 + 2428 + c->dst.bytes = 2; 2429 + c->dst.val = ctxt->ops->get_cr(ctxt, 0); 2430 + return X86EMUL_CONTINUE; 2431 + } 2432 + 2433 + static int em_lmsw(struct x86_emulate_ctxt *ctxt) 2434 + { 2435 + struct decode_cache *c = &ctxt->decode; 2436 + ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) 2437 + | (c->src.val & 0x0f)); 2438 + c->dst.type = OP_NONE; 2439 + return X86EMUL_CONTINUE; 2440 + } 2441 + 2442 + static bool valid_cr(int nr) 2443 + { 2444 + switch (nr) { 2445 + case 0: 2446 + case 2 ... 4: 2447 + case 8: 2448 + return true; 2449 + default: 2450 + return false; 2451 + } 2452 + } 2453 + 2454 + static int check_cr_read(struct x86_emulate_ctxt *ctxt) 2455 + { 2456 + struct decode_cache *c = &ctxt->decode; 2457 + 2458 + if (!valid_cr(c->modrm_reg)) 2459 + return emulate_ud(ctxt); 2460 + 2461 + return X86EMUL_CONTINUE; 2462 + } 2463 + 2464 + static int check_cr_write(struct x86_emulate_ctxt *ctxt) 2465 + { 2466 + struct decode_cache *c = &ctxt->decode; 2467 + u64 new_val = c->src.val64; 2468 + int cr = c->modrm_reg; 2469 + u64 efer = 0; 2470 + 2471 + static u64 cr_reserved_bits[] = { 2472 + 0xffffffff00000000ULL, 2473 + 0, 0, 0, /* CR3 checked later */ 2474 + CR4_RESERVED_BITS, 2475 + 0, 0, 0, 2476 + CR8_RESERVED_BITS, 2477 + }; 2478 + 2479 + if (!valid_cr(cr)) 2480 + return emulate_ud(ctxt); 2481 + 2482 + if (new_val & cr_reserved_bits[cr]) 2483 + return emulate_gp(ctxt, 0); 2484 + 2485 + switch (cr) { 2486 + case 0: { 2487 + u64 cr4; 2488 + if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) || 2489 + ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD))) 2490 + return emulate_gp(ctxt, 0); 2491 + 2492 + cr4 = ctxt->ops->get_cr(ctxt, 4); 2493 + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 2494 + 2495 + if ((new_val & X86_CR0_PG) && (efer & EFER_LME) && 2496 + !(cr4 & X86_CR4_PAE)) 2497 + return emulate_gp(ctxt, 0); 2498 + 2499 + break; 2500 + } 2501 + case 3: { 2502 + u64 rsvd = 0; 2503 + 2504 + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 2505 + if (efer & EFER_LMA) 2506 + rsvd = CR3_L_MODE_RESERVED_BITS; 2507 + else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) 2508 + rsvd = CR3_PAE_RESERVED_BITS; 2509 + else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) 2510 + rsvd = CR3_NONPAE_RESERVED_BITS; 2511 + 2512 + if (new_val & rsvd) 2513 + return emulate_gp(ctxt, 0); 2514 + 2515 + break; 2516 + } 2517 + case 4: { 2518 + u64 cr4; 2519 + 2520 + cr4 = ctxt->ops->get_cr(ctxt, 4); 2521 + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 2522 + 2523 + if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE)) 2524 + return emulate_gp(ctxt, 0); 2525 + 2526 + break; 2527 + } 2528 + } 2529 + 2530 + return X86EMUL_CONTINUE; 2531 + } 2532 + 2533 + static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) 2534 + { 2535 + unsigned long dr7; 2536 + 2537 + ctxt->ops->get_dr(ctxt, 7, &dr7); 2538 + 2539 + /* Check if DR7.Global_Enable is set */ 2540 + return dr7 & (1 << 13); 2541 + } 2542 + 2543 + static int check_dr_read(struct x86_emulate_ctxt *ctxt) 2544 + { 2545 + struct decode_cache *c = &ctxt->decode; 2546 + int dr = c->modrm_reg; 2547 + u64 cr4; 2548 + 2549 + if (dr > 7) 2550 + return emulate_ud(ctxt); 2551 + 2552 + cr4 = ctxt->ops->get_cr(ctxt, 4); 2553 + if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5)) 2554 + return emulate_ud(ctxt); 2555 + 2556 + if (check_dr7_gd(ctxt)) 2557 + return emulate_db(ctxt); 2558 + 2559 + return X86EMUL_CONTINUE; 2560 + } 2561 + 2562 + static int check_dr_write(struct x86_emulate_ctxt *ctxt) 2563 + { 2564 + struct decode_cache *c = &ctxt->decode; 2565 + u64 new_val = c->src.val64; 2566 + int dr = c->modrm_reg; 2567 + 2568 + if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) 2569 + return emulate_gp(ctxt, 0); 2570 + 2571 + return check_dr_read(ctxt); 2572 + } 2573 + 2574 + static int check_svme(struct x86_emulate_ctxt *ctxt) 2575 + { 2576 + u64 efer; 2577 + 2578 + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 2579 + 2580 + if (!(efer & EFER_SVME)) 2581 + return emulate_ud(ctxt); 2582 + 2583 + return X86EMUL_CONTINUE; 2584 + } 2585 + 2586 + static int check_svme_pa(struct x86_emulate_ctxt *ctxt) 2587 + { 2588 + u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; 2589 + 2590 + /* Valid physical address? */ 2591 + if (rax & 0xffff000000000000ULL) 2592 + return emulate_gp(ctxt, 0); 2593 + 2594 + return check_svme(ctxt); 2595 + } 2596 + 2597 + static int check_rdtsc(struct x86_emulate_ctxt *ctxt) 2598 + { 2599 + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); 2600 + 2601 + if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt)) 2602 + return emulate_ud(ctxt); 2603 + 2604 + return X86EMUL_CONTINUE; 2605 + } 2606 + 2607 + static int check_rdpmc(struct x86_emulate_ctxt *ctxt) 2608 + { 2609 + u64 cr4 = ctxt->ops->get_cr(ctxt, 4); 2610 + u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; 2611 + 2612 + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || 2613 + (rcx > 3)) 2614 + return emulate_gp(ctxt, 0); 2615 + 2616 + return X86EMUL_CONTINUE; 2617 + } 2618 + 2619 + static int check_perm_in(struct x86_emulate_ctxt *ctxt) 2620 + { 2621 + struct decode_cache *c = &ctxt->decode; 2622 + 2623 + c->dst.bytes = min(c->dst.bytes, 4u); 2624 + if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) 2625 + return emulate_gp(ctxt, 0); 2626 + 2627 + return X86EMUL_CONTINUE; 2628 + } 2629 + 2630 + static int check_perm_out(struct x86_emulate_ctxt *ctxt) 2631 + { 2632 + struct decode_cache *c = &ctxt->decode; 2633 + 2634 + c->src.bytes = min(c->src.bytes, 4u); 2635 + if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) 2636 + return emulate_gp(ctxt, 0); 2637 + 2638 + return X86EMUL_CONTINUE; 2639 + } 2640 + 2682 2641 #define D(_y) { .flags = (_y) } 2642 + #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } 2643 + #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ 2644 + .check_perm = (_p) } 2683 2645 #define N D(0) 2646 + #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 2684 2647 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } 2685 - #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) } 2648 + #define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } 2686 2649 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 2650 + #define II(_f, _e, _i) \ 2651 + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 2652 + #define IIP(_f, _e, _i, _p) \ 2653 + { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ 2654 + .check_perm = (_p) } 2655 + #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } 2687 2656 2688 2657 #define D2bv(_f) D((_f) | ByteOp), D(_f) 2658 + #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) 2689 2659 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) 2690 2660 2691 - #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM), \ 2692 - D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock), \ 2693 - D2bv(((_f) & ~Lock) | DstAcc | SrcImm) 2661 + #define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ 2662 + I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ 2663 + I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) 2694 2664 2665 + static struct opcode group7_rm1[] = { 2666 + DI(SrcNone | ModRM | Priv, monitor), 2667 + DI(SrcNone | ModRM | Priv, mwait), 2668 + N, N, N, N, N, N, 2669 + }; 2670 + 2671 + static struct opcode group7_rm3[] = { 2672 + DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), 2673 + II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), 2674 + DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), 2675 + DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), 2676 + DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), 2677 + DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), 2678 + DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), 2679 + DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), 2680 + }; 2681 + 2682 + static struct opcode group7_rm7[] = { 2683 + N, 2684 + DIP(SrcNone | ModRM, rdtscp, check_rdtsc), 2685 + N, N, N, N, N, N, 2686 + }; 2695 2687 2696 2688 static struct opcode group1[] = { 2697 - X7(D(Lock)), N 2689 + I(Lock, em_add), 2690 + I(Lock, em_or), 2691 + I(Lock, em_adc), 2692 + I(Lock, em_sbb), 2693 + I(Lock, em_and), 2694 + I(Lock, em_sub), 2695 + I(Lock, em_xor), 2696 + I(0, em_cmp), 2698 2697 }; 2699 2698 2700 2699 static struct opcode group1A[] = { ··· 3073 2366 D(SrcMem | ModRM | Stack), N, 3074 2367 }; 3075 2368 2369 + static struct opcode group6[] = { 2370 + DI(ModRM | Prot, sldt), 2371 + DI(ModRM | Prot, str), 2372 + DI(ModRM | Prot | Priv, lldt), 2373 + DI(ModRM | Prot | Priv, ltr), 2374 + N, N, N, N, 2375 + }; 2376 + 3076 2377 static struct group_dual group7 = { { 3077 - N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv), 3078 - D(SrcNone | ModRM | DstMem | Mov), N, 3079 - D(SrcMem16 | ModRM | Mov | Priv), 3080 - D(SrcMem | ModRM | ByteOp | Priv | NoAccess), 2378 + DI(ModRM | Mov | DstMem | Priv, sgdt), 2379 + DI(ModRM | Mov | DstMem | Priv, sidt), 2380 + II(ModRM | SrcMem | Priv, em_lgdt, lgdt), 2381 + II(ModRM | SrcMem | Priv, em_lidt, lidt), 2382 + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, 2383 + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), 2384 + II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), 3081 2385 }, { 3082 - D(SrcNone | ModRM | Priv | VendorSpecific), N, 3083 - N, D(SrcNone | ModRM | Priv | VendorSpecific), 3084 - D(SrcNone | ModRM | DstMem | Mov), N, 3085 - D(SrcMem16 | ModRM | Mov | Priv), N, 2386 + I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), 2387 + EXT(0, group7_rm1), 2388 + N, EXT(0, group7_rm3), 2389 + II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, 2390 + II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), 3086 2391 } }; 3087 2392 3088 2393 static struct opcode group8[] = { ··· 3113 2394 I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)), 3114 2395 }; 3115 2396 2397 + static struct gprefix pfx_0f_6f_0f_7f = { 2398 + N, N, N, I(Sse, em_movdqu), 2399 + }; 2400 + 3116 2401 static struct opcode opcode_table[256] = { 3117 2402 /* 0x00 - 0x07 */ 3118 - D6ALU(Lock), 2403 + I6ALU(Lock, em_add), 3119 2404 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3120 2405 /* 0x08 - 0x0F */ 3121 - D6ALU(Lock), 2406 + I6ALU(Lock, em_or), 3122 2407 D(ImplicitOps | Stack | No64), N, 3123 2408 /* 0x10 - 0x17 */ 3124 - D6ALU(Lock), 2409 + I6ALU(Lock, em_adc), 3125 2410 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3126 2411 /* 0x18 - 0x1F */ 3127 - D6ALU(Lock), 2412 + I6ALU(Lock, em_sbb), 3128 2413 D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 3129 2414 /* 0x20 - 0x27 */ 3130 - D6ALU(Lock), N, N, 2415 + I6ALU(Lock, em_and), N, N, 3131 2416 /* 0x28 - 0x2F */ 3132 - D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das), 2417 + I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), 3133 2418 /* 0x30 - 0x37 */ 3134 - D6ALU(Lock), N, N, 2419 + I6ALU(Lock, em_xor), N, N, 3135 2420 /* 0x38 - 0x3F */ 3136 - D6ALU(0), N, N, 2421 + I6ALU(0, em_cmp), N, N, 3137 2422 /* 0x40 - 0x4F */ 3138 2423 X16(D(DstReg)), 3139 2424 /* 0x50 - 0x57 */ 3140 2425 X8(I(SrcReg | Stack, em_push)), 3141 2426 /* 0x58 - 0x5F */ 3142 - X8(D(DstReg | Stack)), 2427 + X8(I(DstReg | Stack, em_pop)), 3143 2428 /* 0x60 - 0x67 */ 3144 - D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64), 2429 + I(ImplicitOps | Stack | No64, em_pusha), 2430 + I(ImplicitOps | Stack | No64, em_popa), 3145 2431 N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , 3146 2432 N, N, N, N, 3147 2433 /* 0x68 - 0x6F */ ··· 3154 2430 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), 3155 2431 I(SrcImmByte | Mov | Stack, em_push), 3156 2432 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), 3157 - D2bv(DstDI | Mov | String), /* insb, insw/insd */ 3158 - D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */ 2433 + D2bvIP(DstDI | Mov | String, ins, check_perm_in), /* insb, insw/insd */ 2434 + D2bvIP(SrcSI | ImplicitOps | String, outs, check_perm_out), /* outsb, outsw/outsd */ 3159 2435 /* 0x70 - 0x7F */ 3160 2436 X16(D(SrcImmByte)), 3161 2437 /* 0x80 - 0x87 */ ··· 3170 2446 D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), 3171 2447 D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), 3172 2448 /* 0x90 - 0x97 */ 3173 - X8(D(SrcAcc | DstReg)), 2449 + DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), 3174 2450 /* 0x98 - 0x9F */ 3175 2451 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd), 3176 2452 I(SrcImmFAddr | No64, em_call_far), N, 3177 - D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N, 2453 + II(ImplicitOps | Stack, em_pushf, pushf), 2454 + II(ImplicitOps | Stack, em_popf, popf), N, N, 3178 2455 /* 0xA0 - 0xA7 */ 3179 2456 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), 3180 2457 I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov), 3181 2458 I2bv(SrcSI | DstDI | Mov | String, em_mov), 3182 - D2bv(SrcSI | DstDI | String), 2459 + I2bv(SrcSI | DstDI | String, em_cmp), 3183 2460 /* 0xA8 - 0xAF */ 3184 2461 D2bv(DstAcc | SrcImm), 3185 2462 I2bv(SrcAcc | DstDI | Mov | String, em_mov), 3186 2463 I2bv(SrcSI | DstAcc | Mov | String, em_mov), 3187 - D2bv(SrcAcc | DstDI | String), 2464 + I2bv(SrcAcc | DstDI | String, em_cmp), 3188 2465 /* 0xB0 - 0xB7 */ 3189 2466 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), 3190 2467 /* 0xB8 - 0xBF */ ··· 3198 2473 G(ByteOp, group11), G(0, group11), 3199 2474 /* 0xC8 - 0xCF */ 3200 2475 N, N, N, D(ImplicitOps | Stack), 3201 - D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps), 2476 + D(ImplicitOps), DI(SrcImmByte, intn), 2477 + D(ImplicitOps | No64), DI(ImplicitOps, iret), 3202 2478 /* 0xD0 - 0xD7 */ 3203 2479 D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), 3204 2480 N, N, N, N, ··· 3207 2481 N, N, N, N, N, N, N, N, 3208 2482 /* 0xE0 - 0xE7 */ 3209 2483 X4(D(SrcImmByte)), 3210 - D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte), 2484 + D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), 2485 + D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), 3211 2486 /* 0xE8 - 0xEF */ 3212 2487 D(SrcImm | Stack), D(SrcImm | ImplicitOps), 3213 2488 D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), 3214 - D2bv(SrcNone | DstAcc), D2bv(SrcAcc | ImplicitOps), 2489 + D2bvIP(SrcNone | DstAcc, in, check_perm_in), 2490 + D2bvIP(SrcAcc | ImplicitOps, out, check_perm_out), 3215 2491 /* 0xF0 - 0xF7 */ 3216 - N, N, N, N, 3217 - D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3), 2492 + N, DI(ImplicitOps, icebp), N, N, 2493 + DI(ImplicitOps | Priv, hlt), D(ImplicitOps), 2494 + G(ByteOp, group3), G(0, group3), 3218 2495 /* 0xF8 - 0xFF */ 3219 2496 D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), 3220 2497 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), ··· 3225 2496 3226 2497 static struct opcode twobyte_table[256] = { 3227 2498 /* 0x00 - 0x0F */ 3228 - N, GD(0, &group7), N, N, 3229 - N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, 3230 - D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, 2499 + G(0, group6), GD(0, &group7), N, N, 2500 + N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, 2501 + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 3231 2502 N, D(ImplicitOps | ModRM), N, N, 3232 2503 /* 0x10 - 0x1F */ 3233 2504 N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, 3234 2505 /* 0x20 - 0x2F */ 3235 - D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264), 3236 - D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264), 2506 + DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), 2507 + DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), 2508 + DIP(ModRM | SrcMem | Priv | Op3264, cr_write, check_cr_write), 2509 + DIP(ModRM | SrcMem | Priv | Op3264, dr_write, check_dr_write), 3237 2510 N, N, N, N, 3238 2511 N, N, N, N, N, N, N, N, 3239 2512 /* 0x30 - 0x3F */ 3240 - D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), 3241 - D(ImplicitOps | Priv), N, 2513 + DI(ImplicitOps | Priv, wrmsr), 2514 + IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), 2515 + DI(ImplicitOps | Priv, rdmsr), 2516 + DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), 3242 2517 D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), 3243 2518 N, N, 3244 2519 N, N, N, N, N, N, N, N, ··· 3251 2518 /* 0x50 - 0x5F */ 3252 2519 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3253 2520 /* 0x60 - 0x6F */ 3254 - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 2521 + N, N, N, N, 2522 + N, N, N, N, 2523 + N, N, N, N, 2524 + N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f), 3255 2525 /* 0x70 - 0x7F */ 3256 - N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 2526 + N, N, N, N, 2527 + N, N, N, N, 2528 + N, N, N, N, 2529 + N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f), 3257 2530 /* 0x80 - 0x8F */ 3258 2531 X16(D(SrcImm)), 3259 2532 /* 0x90 - 0x9F */ 3260 2533 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), 3261 2534 /* 0xA0 - 0xA7 */ 3262 2535 D(ImplicitOps | Stack), D(ImplicitOps | Stack), 3263 - N, D(DstMem | SrcReg | ModRM | BitOp), 2536 + DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp), 3264 2537 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3265 2538 D(DstMem | SrcReg | Src2CL | ModRM), N, N, 3266 2539 /* 0xA8 - 0xAF */ 3267 2540 D(ImplicitOps | Stack), D(ImplicitOps | Stack), 3268 - N, D(DstMem | SrcReg | ModRM | BitOp | Lock), 2541 + DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock), 3269 2542 D(DstMem | SrcReg | Src2ImmByte | ModRM), 3270 2543 D(DstMem | SrcReg | Src2CL | ModRM), 3271 2544 D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), ··· 3303 2564 #undef G 3304 2565 #undef GD 3305 2566 #undef I 2567 + #undef GP 2568 + #undef EXT 3306 2569 3307 2570 #undef D2bv 2571 + #undef D2bvIP 3308 2572 #undef I2bv 3309 - #undef D6ALU 2573 + #undef I6ALU 3310 2574 3311 2575 static unsigned imm_size(struct decode_cache *c) 3312 2576 { ··· 3367 2625 struct decode_cache *c = &ctxt->decode; 3368 2626 int rc = X86EMUL_CONTINUE; 3369 2627 int mode = ctxt->mode; 3370 - int def_op_bytes, def_ad_bytes, dual, goffset; 3371 - struct opcode opcode, *g_mod012, *g_mod3; 2628 + int def_op_bytes, def_ad_bytes, goffset, simd_prefix; 2629 + bool op_prefix = false; 2630 + struct opcode opcode; 3372 2631 struct operand memop = { .type = OP_NONE }; 3373 2632 3374 2633 c->eip = ctxt->eip; ··· 3377 2634 c->fetch.end = c->fetch.start + insn_len; 3378 2635 if (insn_len > 0) 3379 2636 memcpy(c->fetch.data, insn, insn_len); 3380 - ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); 3381 2637 3382 2638 switch (mode) { 3383 2639 case X86EMUL_MODE_REAL: ··· 3404 2662 for (;;) { 3405 2663 switch (c->b = insn_fetch(u8, 1, c->eip)) { 3406 2664 case 0x66: /* operand-size override */ 2665 + op_prefix = true; 3407 2666 /* switch between 2/4 bytes */ 3408 2667 c->op_bytes = def_op_bytes ^ 6; 3409 2668 break; ··· 3435 2692 c->lock_prefix = 1; 3436 2693 break; 3437 2694 case 0xf2: /* REPNE/REPNZ */ 3438 - c->rep_prefix = REPNE_PREFIX; 3439 - break; 3440 2695 case 0xf3: /* REP/REPE/REPZ */ 3441 - c->rep_prefix = REPE_PREFIX; 2696 + c->rep_prefix = c->b; 3442 2697 break; 3443 2698 default: 3444 2699 goto done_prefixes; ··· 3463 2722 } 3464 2723 c->d = opcode.flags; 3465 2724 3466 - if (c->d & Group) { 3467 - dual = c->d & GroupDual; 3468 - c->modrm = insn_fetch(u8, 1, c->eip); 3469 - --c->eip; 2725 + while (c->d & GroupMask) { 2726 + switch (c->d & GroupMask) { 2727 + case Group: 2728 + c->modrm = insn_fetch(u8, 1, c->eip); 2729 + --c->eip; 2730 + goffset = (c->modrm >> 3) & 7; 2731 + opcode = opcode.u.group[goffset]; 2732 + break; 2733 + case GroupDual: 2734 + c->modrm = insn_fetch(u8, 1, c->eip); 2735 + --c->eip; 2736 + goffset = (c->modrm >> 3) & 7; 2737 + if ((c->modrm >> 6) == 3) 2738 + opcode = opcode.u.gdual->mod3[goffset]; 2739 + else 2740 + opcode = opcode.u.gdual->mod012[goffset]; 2741 + break; 2742 + case RMExt: 2743 + goffset = c->modrm & 7; 2744 + opcode = opcode.u.group[goffset]; 2745 + break; 2746 + case Prefix: 2747 + if (c->rep_prefix && op_prefix) 2748 + return X86EMUL_UNHANDLEABLE; 2749 + simd_prefix = op_prefix ? 0x66 : c->rep_prefix; 2750 + switch (simd_prefix) { 2751 + case 0x00: opcode = opcode.u.gprefix->pfx_no; break; 2752 + case 0x66: opcode = opcode.u.gprefix->pfx_66; break; 2753 + case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break; 2754 + case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; 2755 + } 2756 + break; 2757 + default: 2758 + return X86EMUL_UNHANDLEABLE; 2759 + } 3470 2760 3471 - if (c->d & GroupDual) { 3472 - g_mod012 = opcode.u.gdual->mod012; 3473 - g_mod3 = opcode.u.gdual->mod3; 3474 - } else 3475 - g_mod012 = g_mod3 = opcode.u.group; 3476 - 3477 - c->d &= ~(Group | GroupDual); 3478 - 3479 - goffset = (c->modrm >> 3) & 7; 3480 - 3481 - if ((c->modrm >> 6) == 3) 3482 - opcode = g_mod3[goffset]; 3483 - else 3484 - opcode = g_mod012[goffset]; 2761 + c->d &= ~GroupMask; 3485 2762 c->d |= opcode.flags; 3486 2763 } 3487 2764 3488 2765 c->execute = opcode.u.execute; 2766 + c->check_perm = opcode.check_perm; 2767 + c->intercept = opcode.intercept; 3489 2768 3490 2769 /* Unrecognised? */ 3491 2770 if (c->d == 0 || (c->d & Undefined)) ··· 3524 2763 c->op_bytes = 4; 3525 2764 } 3526 2765 2766 + if (c->d & Sse) 2767 + c->op_bytes = 16; 2768 + 3527 2769 /* ModRM and SIB bytes. */ 3528 2770 if (c->d & ModRM) { 3529 2771 rc = decode_modrm(ctxt, ops, &memop); ··· 3540 2776 if (!c->has_seg_override) 3541 2777 set_seg_override(c, VCPU_SREG_DS); 3542 2778 3543 - memop.addr.mem.seg = seg_override(ctxt, ops, c); 2779 + memop.addr.mem.seg = seg_override(ctxt, c); 3544 2780 3545 2781 if (memop.type == OP_MEM && c->ad_bytes != 8) 3546 2782 memop.addr.mem.ea = (u32)memop.addr.mem.ea; ··· 3556 2792 case SrcNone: 3557 2793 break; 3558 2794 case SrcReg: 3559 - decode_register_operand(&c->src, c, 0); 2795 + decode_register_operand(ctxt, &c->src, c, 0); 3560 2796 break; 3561 2797 case SrcMem16: 3562 2798 memop.bytes = 2; ··· 3600 2836 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 3601 2837 c->src.addr.mem.ea = 3602 2838 register_address(c, c->regs[VCPU_REGS_RSI]); 3603 - c->src.addr.mem.seg = seg_override(ctxt, ops, c), 2839 + c->src.addr.mem.seg = seg_override(ctxt, c); 3604 2840 c->src.val = 0; 3605 2841 break; 3606 2842 case SrcImmFAddr: ··· 3647 2883 /* Decode and fetch the destination operand: register or memory. */ 3648 2884 switch (c->d & DstMask) { 3649 2885 case DstReg: 3650 - decode_register_operand(&c->dst, c, 2886 + decode_register_operand(ctxt, &c->dst, c, 3651 2887 c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); 3652 2888 break; 3653 2889 case DstImmUByte: ··· 3690 2926 } 3691 2927 3692 2928 done: 3693 - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 2929 + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; 3694 2930 } 3695 2931 3696 2932 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) ··· 3743 2979 goto done; 3744 2980 } 3745 2981 2982 + if ((c->d & Sse) 2983 + && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) 2984 + || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { 2985 + rc = emulate_ud(ctxt); 2986 + goto done; 2987 + } 2988 + 2989 + if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 2990 + rc = emulate_nm(ctxt); 2991 + goto done; 2992 + } 2993 + 2994 + if (unlikely(ctxt->guest_mode) && c->intercept) { 2995 + rc = emulator_check_intercept(ctxt, c->intercept, 2996 + X86_ICPT_PRE_EXCEPT); 2997 + if (rc != X86EMUL_CONTINUE) 2998 + goto done; 2999 + } 3000 + 3746 3001 /* Privileged instruction can be executed only in CPL=0 */ 3747 - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { 3002 + if ((c->d & Priv) && ops->cpl(ctxt)) { 3748 3003 rc = emulate_gp(ctxt, 0); 3749 3004 goto done; 3005 + } 3006 + 3007 + /* Instruction can only be executed in protected mode */ 3008 + if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { 3009 + rc = emulate_ud(ctxt); 3010 + goto done; 3011 + } 3012 + 3013 + /* Do instruction specific permission checks */ 3014 + if (c->check_perm) { 3015 + rc = c->check_perm(ctxt); 3016 + if (rc != X86EMUL_CONTINUE) 3017 + goto done; 3018 + } 3019 + 3020 + if (unlikely(ctxt->guest_mode) && c->intercept) { 3021 + rc = emulator_check_intercept(ctxt, c->intercept, 3022 + X86_ICPT_POST_EXCEPT); 3023 + if (rc != X86EMUL_CONTINUE) 3024 + goto done; 3750 3025 } 3751 3026 3752 3027 if (c->rep_prefix && (c->d & String)) { ··· 3797 2994 } 3798 2995 3799 2996 if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { 3800 - rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem), 3801 - c->src.valptr, c->src.bytes); 2997 + rc = segmented_read(ctxt, c->src.addr.mem, 2998 + c->src.valptr, c->src.bytes); 3802 2999 if (rc != X86EMUL_CONTINUE) 3803 3000 goto done; 3804 3001 c->src.orig_val64 = c->src.val64; 3805 3002 } 3806 3003 3807 3004 if (c->src2.type == OP_MEM) { 3808 - rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem), 3809 - &c->src2.val, c->src2.bytes); 3005 + rc = segmented_read(ctxt, c->src2.addr.mem, 3006 + &c->src2.val, c->src2.bytes); 3810 3007 if (rc != X86EMUL_CONTINUE) 3811 3008 goto done; 3812 3009 } ··· 3817 3014 3818 3015 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { 3819 3016 /* optimisation - avoid slow emulated read if Mov */ 3820 - rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem), 3017 + rc = segmented_read(ctxt, c->dst.addr.mem, 3821 3018 &c->dst.val, c->dst.bytes); 3822 3019 if (rc != X86EMUL_CONTINUE) 3823 3020 goto done; ··· 3825 3022 c->dst.orig_val = c->dst.val; 3826 3023 3827 3024 special_insn: 3025 + 3026 + if (unlikely(ctxt->guest_mode) && c->intercept) { 3027 + rc = emulator_check_intercept(ctxt, c->intercept, 3028 + X86_ICPT_POST_MEMACCESS); 3029 + if (rc != X86EMUL_CONTINUE) 3030 + goto done; 3031 + } 3828 3032 3829 3033 if (c->execute) { 3830 3034 rc = c->execute(ctxt); ··· 3844 3034 goto twobyte_insn; 3845 3035 3846 3036 switch (c->b) { 3847 - case 0x00 ... 0x05: 3848 - add: /* add */ 3849 - emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); 3850 - break; 3851 3037 case 0x06: /* push es */ 3852 - emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); 3038 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); 3853 3039 break; 3854 3040 case 0x07: /* pop es */ 3855 3041 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); 3856 3042 break; 3857 - case 0x08 ... 0x0d: 3858 - or: /* or */ 3859 - emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); 3860 - break; 3861 3043 case 0x0e: /* push cs */ 3862 - emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); 3863 - break; 3864 - case 0x10 ... 0x15: 3865 - adc: /* adc */ 3866 - emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); 3044 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); 3867 3045 break; 3868 3046 case 0x16: /* push ss */ 3869 - emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); 3047 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); 3870 3048 break; 3871 3049 case 0x17: /* pop ss */ 3872 3050 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); 3873 3051 break; 3874 - case 0x18 ... 0x1d: 3875 - sbb: /* sbb */ 3876 - emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); 3877 - break; 3878 3052 case 0x1e: /* push ds */ 3879 - emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); 3053 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); 3880 3054 break; 3881 3055 case 0x1f: /* pop ds */ 3882 3056 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); 3883 - break; 3884 - case 0x20 ... 0x25: 3885 - and: /* and */ 3886 - emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); 3887 - break; 3888 - case 0x28 ... 0x2d: 3889 - sub: /* sub */ 3890 - emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); 3891 - break; 3892 - case 0x30 ... 0x35: 3893 - xor: /* xor */ 3894 - emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); 3895 - break; 3896 - case 0x38 ... 0x3d: 3897 - cmp: /* cmp */ 3898 - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); 3899 3057 break; 3900 3058 case 0x40 ... 0x47: /* inc r16/r32 */ 3901 3059 emulate_1op("inc", c->dst, ctxt->eflags); 3902 3060 break; 3903 3061 case 0x48 ... 0x4f: /* dec r16/r32 */ 3904 3062 emulate_1op("dec", c->dst, ctxt->eflags); 3905 - break; 3906 - case 0x58 ... 0x5f: /* pop reg */ 3907 - pop_instruction: 3908 - rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); 3909 - break; 3910 - case 0x60: /* pusha */ 3911 - rc = emulate_pusha(ctxt, ops); 3912 - break; 3913 - case 0x61: /* popa */ 3914 - rc = emulate_popa(ctxt, ops); 3915 3063 break; 3916 3064 case 0x63: /* movsxd */ 3917 3065 if (ctxt->mode != X86EMUL_MODE_PROT64) ··· 3888 3120 case 0x70 ... 0x7f: /* jcc (short) */ 3889 3121 if (test_cc(c->b, ctxt->eflags)) 3890 3122 jmp_rel(c, c->src.val); 3891 - break; 3892 - case 0x80 ... 0x83: /* Grp1 */ 3893 - switch (c->modrm_reg) { 3894 - case 0: 3895 - goto add; 3896 - case 1: 3897 - goto or; 3898 - case 2: 3899 - goto adc; 3900 - case 3: 3901 - goto sbb; 3902 - case 4: 3903 - goto and; 3904 - case 5: 3905 - goto sub; 3906 - case 6: 3907 - goto xor; 3908 - case 7: 3909 - goto cmp; 3910 - } 3911 3123 break; 3912 3124 case 0x84 ... 0x85: 3913 3125 test: ··· 3910 3162 rc = emulate_ud(ctxt); 3911 3163 goto done; 3912 3164 } 3913 - c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); 3165 + c->dst.val = get_segment_selector(ctxt, c->modrm_reg); 3914 3166 break; 3915 3167 case 0x8d: /* lea r16/r32, m */ 3916 3168 c->dst.val = c->src.addr.mem.ea; ··· 3935 3187 break; 3936 3188 } 3937 3189 case 0x8f: /* pop (sole member of Grp1a) */ 3938 - rc = emulate_grp1a(ctxt, ops); 3190 + rc = em_grp1a(ctxt); 3939 3191 break; 3940 3192 case 0x90 ... 0x97: /* nop / xchg reg, rax */ 3941 3193 if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) ··· 3948 3200 case 8: c->dst.val = (s32)c->dst.val; break; 3949 3201 } 3950 3202 break; 3951 - case 0x9c: /* pushf */ 3952 - c->src.val = (unsigned long) ctxt->eflags; 3953 - emulate_push(ctxt, ops); 3954 - break; 3955 - case 0x9d: /* popf */ 3956 - c->dst.type = OP_REG; 3957 - c->dst.addr.reg = &ctxt->eflags; 3958 - c->dst.bytes = c->op_bytes; 3959 - rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); 3960 - break; 3961 - case 0xa6 ... 0xa7: /* cmps */ 3962 - c->dst.type = OP_NONE; /* Disable writeback. */ 3963 - goto cmp; 3964 3203 case 0xa8 ... 0xa9: /* test ax, imm */ 3965 3204 goto test; 3966 - case 0xae ... 0xaf: /* scas */ 3967 - goto cmp; 3968 3205 case 0xc0 ... 0xc1: 3969 - emulate_grp2(ctxt); 3206 + rc = em_grp2(ctxt); 3970 3207 break; 3971 3208 case 0xc3: /* ret */ 3972 3209 c->dst.type = OP_REG; 3973 3210 c->dst.addr.reg = &c->eip; 3974 3211 c->dst.bytes = c->op_bytes; 3975 - goto pop_instruction; 3212 + rc = em_pop(ctxt); 3213 + break; 3976 3214 case 0xc4: /* les */ 3977 3215 rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); 3978 3216 break; ··· 3986 3252 rc = emulate_iret(ctxt, ops); 3987 3253 break; 3988 3254 case 0xd0 ... 0xd1: /* Grp2 */ 3989 - emulate_grp2(ctxt); 3255 + rc = em_grp2(ctxt); 3990 3256 break; 3991 3257 case 0xd2 ... 0xd3: /* Grp2 */ 3992 3258 c->src.val = c->regs[VCPU_REGS_RCX]; 3993 - emulate_grp2(ctxt); 3259 + rc = em_grp2(ctxt); 3994 3260 break; 3995 3261 case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ 3996 3262 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); ··· 4012 3278 long int rel = c->src.val; 4013 3279 c->src.val = (unsigned long) c->eip; 4014 3280 jmp_rel(c, rel); 4015 - emulate_push(ctxt, ops); 3281 + rc = em_push(ctxt); 4016 3282 break; 4017 3283 } 4018 3284 case 0xe9: /* jmp rel */ 4019 3285 goto jmp; 4020 - case 0xea: { /* jmp far */ 4021 - unsigned short sel; 4022 - jump_far: 4023 - memcpy(&sel, c->src.valptr + c->op_bytes, 2); 4024 - 4025 - if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) 4026 - goto done; 4027 - 4028 - c->eip = 0; 4029 - memcpy(&c->eip, c->src.valptr, c->op_bytes); 3286 + case 0xea: /* jmp far */ 3287 + rc = em_jmp_far(ctxt); 4030 3288 break; 4031 - } 4032 3289 case 0xeb: 4033 3290 jmp: /* jmp rel short */ 4034 3291 jmp_rel(c, c->src.val); ··· 4029 3304 case 0xed: /* in (e/r)ax,dx */ 4030 3305 c->src.val = c->regs[VCPU_REGS_RDX]; 4031 3306 do_io_in: 4032 - c->dst.bytes = min(c->dst.bytes, 4u); 4033 - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { 4034 - rc = emulate_gp(ctxt, 0); 4035 - goto done; 4036 - } 4037 3307 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, 4038 3308 &c->dst.val)) 4039 3309 goto done; /* IO is needed */ ··· 4037 3317 case 0xef: /* out dx,(e/r)ax */ 4038 3318 c->dst.val = c->regs[VCPU_REGS_RDX]; 4039 3319 do_io_out: 4040 - c->src.bytes = min(c->src.bytes, 4u); 4041 - if (!emulator_io_permited(ctxt, ops, c->dst.val, 4042 - c->src.bytes)) { 4043 - rc = emulate_gp(ctxt, 0); 4044 - goto done; 4045 - } 4046 - ops->pio_out_emulated(c->src.bytes, c->dst.val, 4047 - &c->src.val, 1, ctxt->vcpu); 3320 + ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, 3321 + &c->src.val, 1); 4048 3322 c->dst.type = OP_NONE; /* Disable writeback. */ 4049 3323 break; 4050 3324 case 0xf4: /* hlt */ 4051 - ctxt->vcpu->arch.halt_request = 1; 3325 + ctxt->ops->halt(ctxt); 4052 3326 break; 4053 3327 case 0xf5: /* cmc */ 4054 3328 /* complement carry flag from eflags reg */ 4055 3329 ctxt->eflags ^= EFLG_CF; 4056 3330 break; 4057 3331 case 0xf6 ... 0xf7: /* Grp3 */ 4058 - rc = emulate_grp3(ctxt, ops); 3332 + rc = em_grp3(ctxt); 4059 3333 break; 4060 3334 case 0xf8: /* clc */ 4061 3335 ctxt->eflags &= ~EFLG_CF; ··· 4080 3366 ctxt->eflags |= EFLG_DF; 4081 3367 break; 4082 3368 case 0xfe: /* Grp4 */ 4083 - grp45: 4084 - rc = emulate_grp45(ctxt, ops); 3369 + rc = em_grp45(ctxt); 4085 3370 break; 4086 3371 case 0xff: /* Grp5 */ 4087 - if (c->modrm_reg == 5) 4088 - goto jump_far; 4089 - goto grp45; 3372 + rc = em_grp45(ctxt); 3373 + break; 4090 3374 default: 4091 3375 goto cannot_emulate; 4092 3376 } ··· 4093 3381 goto done; 4094 3382 4095 3383 writeback: 4096 - rc = writeback(ctxt, ops); 3384 + rc = writeback(ctxt); 4097 3385 if (rc != X86EMUL_CONTINUE) 4098 3386 goto done; 4099 3387 ··· 4104 3392 c->dst.type = saved_dst_type; 4105 3393 4106 3394 if ((c->d & SrcMask) == SrcSI) 4107 - string_addr_inc(ctxt, seg_override(ctxt, ops, c), 3395 + string_addr_inc(ctxt, seg_override(ctxt, c), 4108 3396 VCPU_REGS_RSI, &c->src); 4109 3397 4110 3398 if ((c->d & DstMask) == DstDI) ··· 4139 3427 done: 4140 3428 if (rc == X86EMUL_PROPAGATE_FAULT) 4141 3429 ctxt->have_exception = true; 3430 + if (rc == X86EMUL_INTERCEPTED) 3431 + return EMULATION_INTERCEPTED; 3432 + 4142 3433 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; 4143 3434 4144 3435 twobyte_insn: 4145 3436 switch (c->b) { 4146 - case 0x01: /* lgdt, lidt, lmsw */ 4147 - switch (c->modrm_reg) { 4148 - u16 size; 4149 - unsigned long address; 4150 - 4151 - case 0: /* vmcall */ 4152 - if (c->modrm_mod != 3 || c->modrm_rm != 1) 4153 - goto cannot_emulate; 4154 - 4155 - rc = kvm_fix_hypercall(ctxt->vcpu); 4156 - if (rc != X86EMUL_CONTINUE) 4157 - goto done; 4158 - 4159 - /* Let the processor re-execute the fixed hypercall */ 4160 - c->eip = ctxt->eip; 4161 - /* Disable writeback. */ 4162 - c->dst.type = OP_NONE; 4163 - break; 4164 - case 2: /* lgdt */ 4165 - rc = read_descriptor(ctxt, ops, c->src.addr.mem, 4166 - &size, &address, c->op_bytes); 4167 - if (rc != X86EMUL_CONTINUE) 4168 - goto done; 4169 - realmode_lgdt(ctxt->vcpu, size, address); 4170 - /* Disable writeback. */ 4171 - c->dst.type = OP_NONE; 4172 - break; 4173 - case 3: /* lidt/vmmcall */ 4174 - if (c->modrm_mod == 3) { 4175 - switch (c->modrm_rm) { 4176 - case 1: 4177 - rc = kvm_fix_hypercall(ctxt->vcpu); 4178 - break; 4179 - default: 4180 - goto cannot_emulate; 4181 - } 4182 - } else { 4183 - rc = read_descriptor(ctxt, ops, c->src.addr.mem, 4184 - &size, &address, 4185 - c->op_bytes); 4186 - if (rc != X86EMUL_CONTINUE) 4187 - goto done; 4188 - realmode_lidt(ctxt->vcpu, size, address); 4189 - } 4190 - /* Disable writeback. */ 4191 - c->dst.type = OP_NONE; 4192 - break; 4193 - case 4: /* smsw */ 4194 - c->dst.bytes = 2; 4195 - c->dst.val = ops->get_cr(0, ctxt->vcpu); 4196 - break; 4197 - case 6: /* lmsw */ 4198 - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) | 4199 - (c->src.val & 0x0f), ctxt->vcpu); 4200 - c->dst.type = OP_NONE; 4201 - break; 4202 - case 5: /* not defined */ 4203 - emulate_ud(ctxt); 4204 - rc = X86EMUL_PROPAGATE_FAULT; 4205 - goto done; 4206 - case 7: /* invlpg*/ 4207 - emulate_invlpg(ctxt->vcpu, 4208 - linear(ctxt, c->src.addr.mem)); 4209 - /* Disable writeback. */ 4210 - c->dst.type = OP_NONE; 4211 - break; 4212 - default: 4213 - goto cannot_emulate; 4214 - } 4215 - break; 4216 3437 case 0x05: /* syscall */ 4217 3438 rc = emulate_syscall(ctxt, ops); 4218 3439 break; 4219 3440 case 0x06: 4220 - emulate_clts(ctxt->vcpu); 3441 + rc = em_clts(ctxt); 4221 3442 break; 4222 3443 case 0x09: /* wbinvd */ 4223 - kvm_emulate_wbinvd(ctxt->vcpu); 3444 + (ctxt->ops->wbinvd)(ctxt); 4224 3445 break; 4225 3446 case 0x08: /* invd */ 4226 3447 case 0x0d: /* GrpP (prefetch) */ 4227 3448 case 0x18: /* Grp16 (prefetch/nop) */ 4228 3449 break; 4229 3450 case 0x20: /* mov cr, reg */ 4230 - switch (c->modrm_reg) { 4231 - case 1: 4232 - case 5 ... 7: 4233 - case 9 ... 15: 4234 - emulate_ud(ctxt); 4235 - rc = X86EMUL_PROPAGATE_FAULT; 4236 - goto done; 4237 - } 4238 - c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu); 3451 + c->dst.val = ops->get_cr(ctxt, c->modrm_reg); 4239 3452 break; 4240 3453 case 0x21: /* mov from dr to reg */ 4241 - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 4242 - (c->modrm_reg == 4 || c->modrm_reg == 5)) { 4243 - emulate_ud(ctxt); 4244 - rc = X86EMUL_PROPAGATE_FAULT; 4245 - goto done; 4246 - } 4247 - ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu); 3454 + ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); 4248 3455 break; 4249 3456 case 0x22: /* mov reg, cr */ 4250 - if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) { 3457 + if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { 4251 3458 emulate_gp(ctxt, 0); 4252 3459 rc = X86EMUL_PROPAGATE_FAULT; 4253 3460 goto done; ··· 4174 3543 c->dst.type = OP_NONE; 4175 3544 break; 4176 3545 case 0x23: /* mov from reg to dr */ 4177 - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && 4178 - (c->modrm_reg == 4 || c->modrm_reg == 5)) { 4179 - emulate_ud(ctxt); 4180 - rc = X86EMUL_PROPAGATE_FAULT; 4181 - goto done; 4182 - } 4183 - 4184 - if (ops->set_dr(c->modrm_reg, c->src.val & 3546 + if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & 4185 3547 ((ctxt->mode == X86EMUL_MODE_PROT64) ? 4186 - ~0ULL : ~0U), ctxt->vcpu) < 0) { 3548 + ~0ULL : ~0U)) < 0) { 4187 3549 /* #UD condition is already handled by the code above */ 4188 3550 emulate_gp(ctxt, 0); 4189 3551 rc = X86EMUL_PROPAGATE_FAULT; ··· 4189 3565 /* wrmsr */ 4190 3566 msr_data = (u32)c->regs[VCPU_REGS_RAX] 4191 3567 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 4192 - if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { 3568 + if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { 4193 3569 emulate_gp(ctxt, 0); 4194 3570 rc = X86EMUL_PROPAGATE_FAULT; 4195 3571 goto done; ··· 4198 3574 break; 4199 3575 case 0x32: 4200 3576 /* rdmsr */ 4201 - if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { 3577 + if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { 4202 3578 emulate_gp(ctxt, 0); 4203 3579 rc = X86EMUL_PROPAGATE_FAULT; 4204 3580 goto done; ··· 4227 3603 c->dst.val = test_cc(c->b, ctxt->eflags); 4228 3604 break; 4229 3605 case 0xa0: /* push fs */ 4230 - emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); 3606 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); 4231 3607 break; 4232 3608 case 0xa1: /* pop fs */ 4233 3609 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); ··· 4244 3620 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); 4245 3621 break; 4246 3622 case 0xa8: /* push gs */ 4247 - emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); 3623 + rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); 4248 3624 break; 4249 3625 case 0xa9: /* pop gs */ 4250 3626 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); ··· 4351 3727 (u64) c->src.val; 4352 3728 break; 4353 3729 case 0xc7: /* Grp9 (cmpxchg8b) */ 4354 - rc = emulate_grp9(ctxt, ops); 3730 + rc = em_grp9(ctxt); 4355 3731 break; 4356 3732 default: 4357 3733 goto cannot_emulate; ··· 4363 3739 goto writeback; 4364 3740 4365 3741 cannot_emulate: 4366 - return -1; 3742 + return EMULATION_FAILED; 4367 3743 }
-2
arch/x86/kvm/i8254.h
··· 33 33 }; 34 34 35 35 struct kvm_pit { 36 - unsigned long base_addresss; 37 36 struct kvm_io_device dev; 38 37 struct kvm_io_device speaker_dev; 39 38 struct kvm *kvm; ··· 50 51 #define KVM_MAX_PIT_INTR_INTERVAL HZ / 100 51 52 #define KVM_PIT_CHANNEL_MASK 0x3 52 53 53 - void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); 54 54 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start); 55 55 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); 56 56 void kvm_free_pit(struct kvm *kvm);
-2
arch/x86/kvm/irq.h
··· 75 75 void kvm_destroy_pic(struct kvm *kvm); 76 76 int kvm_pic_read_irq(struct kvm *kvm); 77 77 void kvm_pic_update_irq(struct kvm_pic *s); 78 - void kvm_pic_clear_isr_ack(struct kvm *kvm); 79 78 80 79 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) 81 80 { ··· 99 100 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 100 101 void __kvm_migrate_timers(struct kvm_vcpu *vcpu); 101 102 102 - int pit_has_pending_timer(struct kvm_vcpu *vcpu); 103 103 int apic_has_pending_timer(struct kvm_vcpu *vcpu); 104 104 105 105 #endif
+5 -11
arch/x86/kvm/mmu.c
··· 1206 1206 1207 1207 static void nonpaging_update_pte(struct kvm_vcpu *vcpu, 1208 1208 struct kvm_mmu_page *sp, u64 *spte, 1209 - const void *pte, unsigned long mmu_seq) 1209 + const void *pte) 1210 1210 { 1211 1211 WARN_ON(1); 1212 1212 } ··· 3163 3163 } 3164 3164 3165 3165 static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, 3166 - struct kvm_mmu_page *sp, 3167 - u64 *spte, 3168 - const void *new, unsigned long mmu_seq) 3166 + struct kvm_mmu_page *sp, u64 *spte, 3167 + const void *new) 3169 3168 { 3170 3169 if (sp->role.level != PT_PAGE_TABLE_LEVEL) { 3171 3170 ++vcpu->kvm->stat.mmu_pde_zapped; ··· 3172 3173 } 3173 3174 3174 3175 ++vcpu->kvm->stat.mmu_pte_updated; 3175 - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); 3176 + vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); 3176 3177 } 3177 3178 3178 3179 static bool need_remote_flush(u64 old, u64 new) ··· 3228 3229 struct kvm_mmu_page *sp; 3229 3230 struct hlist_node *node; 3230 3231 LIST_HEAD(invalid_list); 3231 - unsigned long mmu_seq; 3232 3232 u64 entry, gentry, *spte; 3233 3233 unsigned pte_size, page_offset, misaligned, quadrant, offset; 3234 3234 int level, npte, invlpg_counter, r, flooded = 0; ··· 3268 3270 gentry = 0; 3269 3271 break; 3270 3272 } 3271 - 3272 - mmu_seq = vcpu->kvm->mmu_notifier_seq; 3273 - smp_rmb(); 3274 3273 3275 3274 spin_lock(&vcpu->kvm->mmu_lock); 3276 3275 if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) ··· 3340 3345 if (gentry && 3341 3346 !((sp->role.word ^ vcpu->arch.mmu.base_role.word) 3342 3347 & mask.word)) 3343 - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, 3344 - mmu_seq); 3348 + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); 3345 3349 if (!remote_flush && need_remote_flush(entry, *spte)) 3346 3350 remote_flush = true; 3347 3351 ++spte;
+57 -26
arch/x86/kvm/paging_tmpl.h
··· 78 78 return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; 79 79 } 80 80 81 - static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, 82 - gfn_t table_gfn, unsigned index, 83 - pt_element_t orig_pte, pt_element_t new_pte) 81 + static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, 82 + pt_element_t __user *ptep_user, unsigned index, 83 + pt_element_t orig_pte, pt_element_t new_pte) 84 84 { 85 + int npages; 85 86 pt_element_t ret; 86 87 pt_element_t *table; 87 88 struct page *page; 88 89 89 - page = gfn_to_page(kvm, table_gfn); 90 + npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); 91 + /* Check if the user is doing something meaningless. */ 92 + if (unlikely(npages != 1)) 93 + return -EFAULT; 90 94 91 95 table = kmap_atomic(page, KM_USER0); 92 96 ret = CMPXCHG(&table[index], orig_pte, new_pte); ··· 121 117 gva_t addr, u32 access) 122 118 { 123 119 pt_element_t pte; 120 + pt_element_t __user *ptep_user; 124 121 gfn_t table_gfn; 125 122 unsigned index, pt_access, uninitialized_var(pte_access); 126 123 gpa_t pte_gpa; ··· 157 152 pt_access = ACC_ALL; 158 153 159 154 for (;;) { 155 + gfn_t real_gfn; 156 + unsigned long host_addr; 157 + 160 158 index = PT_INDEX(addr, walker->level); 161 159 162 160 table_gfn = gpte_to_gfn(pte); ··· 168 160 walker->table_gfn[walker->level - 1] = table_gfn; 169 161 walker->pte_gpa[walker->level - 1] = pte_gpa; 170 162 171 - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, 172 - offset, sizeof(pte), 173 - PFERR_USER_MASK|PFERR_WRITE_MASK)) { 163 + real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), 164 + PFERR_USER_MASK|PFERR_WRITE_MASK); 165 + if (unlikely(real_gfn == UNMAPPED_GVA)) { 166 + present = false; 167 + break; 168 + } 169 + real_gfn = gpa_to_gfn(real_gfn); 170 + 171 + host_addr = gfn_to_hva(vcpu->kvm, real_gfn); 172 + if (unlikely(kvm_is_error_hva(host_addr))) { 173 + present = false; 174 + break; 175 + } 176 + 177 + ptep_user = (pt_element_t __user *)((void *)host_addr + offset); 178 + if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { 174 179 present = false; 175 180 break; 176 181 } 177 182 178 183 trace_kvm_mmu_paging_element(pte, walker->level); 179 184 180 - if (!is_present_gpte(pte)) { 185 + if (unlikely(!is_present_gpte(pte))) { 181 186 present = false; 182 187 break; 183 188 } 184 189 185 - if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { 190 + if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, 191 + walker->level))) { 186 192 rsvd_fault = true; 187 193 break; 188 194 } 189 195 190 - if (write_fault && !is_writable_pte(pte)) 191 - if (user_fault || is_write_protection(vcpu)) 192 - eperm = true; 196 + if (unlikely(write_fault && !is_writable_pte(pte) 197 + && (user_fault || is_write_protection(vcpu)))) 198 + eperm = true; 193 199 194 - if (user_fault && !(pte & PT_USER_MASK)) 200 + if (unlikely(user_fault && !(pte & PT_USER_MASK))) 195 201 eperm = true; 196 202 197 203 #if PTTYPE == 64 198 - if (fetch_fault && (pte & PT64_NX_MASK)) 204 + if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) 199 205 eperm = true; 200 206 #endif 201 207 202 - if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { 208 + if (!eperm && !rsvd_fault 209 + && unlikely(!(pte & PT_ACCESSED_MASK))) { 210 + int ret; 203 211 trace_kvm_mmu_set_accessed_bit(table_gfn, index, 204 212 sizeof(pte)); 205 - if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, 206 - index, pte, pte|PT_ACCESSED_MASK)) 213 + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, 214 + pte, pte|PT_ACCESSED_MASK); 215 + if (unlikely(ret < 0)) { 216 + present = false; 217 + break; 218 + } else if (ret) 207 219 goto walk; 220 + 208 221 mark_page_dirty(vcpu->kvm, table_gfn); 209 222 pte |= PT_ACCESSED_MASK; 210 223 } ··· 270 241 --walker->level; 271 242 } 272 243 273 - if (!present || eperm || rsvd_fault) 244 + if (unlikely(!present || eperm || rsvd_fault)) 274 245 goto error; 275 246 276 - if (write_fault && !is_dirty_gpte(pte)) { 277 - bool ret; 247 + if (write_fault && unlikely(!is_dirty_gpte(pte))) { 248 + int ret; 278 249 279 250 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 280 - ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, 281 - pte|PT_DIRTY_MASK); 282 - if (ret) 251 + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, 252 + pte, pte|PT_DIRTY_MASK); 253 + if (unlikely(ret < 0)) { 254 + present = false; 255 + goto error; 256 + } else if (ret) 283 257 goto walk; 258 + 284 259 mark_page_dirty(vcpu->kvm, table_gfn); 285 260 pte |= PT_DIRTY_MASK; 286 261 walker->ptes[walker->level - 1] = pte; ··· 358 325 } 359 326 360 327 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 361 - u64 *spte, const void *pte, unsigned long mmu_seq) 328 + u64 *spte, const void *pte) 362 329 { 363 330 pt_element_t gpte; 364 331 unsigned pte_access; ··· 375 342 kvm_release_pfn_clean(pfn); 376 343 return; 377 344 } 378 - if (mmu_notifier_retry(vcpu, mmu_seq)) 379 - return; 380 345 381 346 /* 382 347 * we call mmu_set_spte() with host_writable = true because that
+434 -151
arch/x86/kvm/svm.c
··· 63 63 64 64 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 65 65 66 + #define TSC_RATIO_RSVD 0xffffff0000000000ULL 67 + #define TSC_RATIO_MIN 0x0000000000000001ULL 68 + #define TSC_RATIO_MAX 0x000000ffffffffffULL 69 + 66 70 static bool erratum_383_found __read_mostly; 67 71 68 72 static const u32 host_save_user_msrs[] = { ··· 96 92 97 93 /* A VMEXIT is required but not yet emulated */ 98 94 bool exit_required; 99 - 100 - /* 101 - * If we vmexit during an instruction emulation we need this to restore 102 - * the l1 guest rip after the emulation 103 - */ 104 - unsigned long vmexit_rip; 105 - unsigned long vmexit_rsp; 106 - unsigned long vmexit_rax; 107 95 108 96 /* cache for intercepts of the guest */ 109 97 u32 intercept_cr; ··· 140 144 unsigned int3_injected; 141 145 unsigned long int3_rip; 142 146 u32 apf_reason; 147 + 148 + u64 tsc_ratio; 143 149 }; 150 + 151 + static DEFINE_PER_CPU(u64, current_tsc_ratio); 152 + #define TSC_RATIO_DEFAULT 0x0100000000ULL 144 153 145 154 #define MSR_INVALID 0xffffffffU 146 155 ··· 191 190 static int nested_svm_vmexit(struct vcpu_svm *svm); 192 191 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 193 192 bool has_error_code, u32 error_code); 193 + static u64 __scale_tsc(u64 ratio, u64 tsc); 194 194 195 195 enum { 196 196 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, ··· 378 376 }; 379 377 380 378 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 381 - static uint32_t svm_features; 382 379 383 380 struct svm_init_data { 384 381 int cpu; ··· 570 569 571 570 static void svm_hardware_disable(void *garbage) 572 571 { 572 + /* Make sure we clean up behind us */ 573 + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 574 + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 575 + 573 576 cpu_svm_disable(); 574 577 } 575 578 ··· 614 609 wrmsrl(MSR_EFER, efer | EFER_SVME); 615 610 616 611 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); 612 + 613 + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { 614 + wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 615 + __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; 616 + } 617 617 618 618 svm_init_erratum_383(); 619 619 ··· 801 791 if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 802 792 kvm_enable_efer_bits(EFER_FFXSR); 803 793 794 + if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 795 + u64 max; 796 + 797 + kvm_has_tsc_control = true; 798 + 799 + /* 800 + * Make sure the user can only configure tsc_khz values that 801 + * fit into a signed integer. 802 + * A min value is not calculated needed because it will always 803 + * be 1 on all machines and a value of 0 is used to disable 804 + * tsc-scaling for the vcpu. 805 + */ 806 + max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); 807 + 808 + kvm_max_guest_tsc_khz = max; 809 + } 810 + 804 811 if (nested) { 805 812 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 806 813 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); ··· 828 801 if (r) 829 802 goto err; 830 803 } 831 - 832 - svm_features = cpuid_edx(SVM_CPUID_FUNC); 833 804 834 805 if (!boot_cpu_has(X86_FEATURE_NPT)) 835 806 npt_enabled = false; ··· 879 854 seg->base = 0; 880 855 } 881 856 857 + static u64 __scale_tsc(u64 ratio, u64 tsc) 858 + { 859 + u64 mult, frac, _tsc; 860 + 861 + mult = ratio >> 32; 862 + frac = ratio & ((1ULL << 32) - 1); 863 + 864 + _tsc = tsc; 865 + _tsc *= mult; 866 + _tsc += (tsc >> 32) * frac; 867 + _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; 868 + 869 + return _tsc; 870 + } 871 + 872 + static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) 873 + { 874 + struct vcpu_svm *svm = to_svm(vcpu); 875 + u64 _tsc = tsc; 876 + 877 + if (svm->tsc_ratio != TSC_RATIO_DEFAULT) 878 + _tsc = __scale_tsc(svm->tsc_ratio, tsc); 879 + 880 + return _tsc; 881 + } 882 + 883 + static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 884 + { 885 + struct vcpu_svm *svm = to_svm(vcpu); 886 + u64 ratio; 887 + u64 khz; 888 + 889 + /* TSC scaling supported? */ 890 + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) 891 + return; 892 + 893 + /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ 894 + if (user_tsc_khz == 0) { 895 + vcpu->arch.virtual_tsc_khz = 0; 896 + svm->tsc_ratio = TSC_RATIO_DEFAULT; 897 + return; 898 + } 899 + 900 + khz = user_tsc_khz; 901 + 902 + /* TSC scaling required - calculate ratio */ 903 + ratio = khz << 32; 904 + do_div(ratio, tsc_khz); 905 + 906 + if (ratio == 0 || ratio & TSC_RATIO_RSVD) { 907 + WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", 908 + user_tsc_khz); 909 + return; 910 + } 911 + vcpu->arch.virtual_tsc_khz = user_tsc_khz; 912 + svm->tsc_ratio = ratio; 913 + } 914 + 882 915 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 883 916 { 884 917 struct vcpu_svm *svm = to_svm(vcpu); ··· 961 878 if (is_guest_mode(vcpu)) 962 879 svm->nested.hsave->control.tsc_offset += adjustment; 963 880 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 881 + } 882 + 883 + static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) 884 + { 885 + u64 tsc; 886 + 887 + tsc = svm_scale_tsc(vcpu, native_read_tsc()); 888 + 889 + return target_tsc - tsc; 964 890 } 965 891 966 892 static void init_vmcb(struct vcpu_svm *svm) ··· 1067 975 svm_set_efer(&svm->vcpu, 0); 1068 976 save->dr6 = 0xffff0ff0; 1069 977 save->dr7 = 0x400; 1070 - save->rflags = 2; 978 + kvm_set_rflags(&svm->vcpu, 2); 1071 979 save->rip = 0x0000fff0; 1072 980 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 1073 981 ··· 1139 1047 err = -ENOMEM; 1140 1048 goto out; 1141 1049 } 1050 + 1051 + svm->tsc_ratio = TSC_RATIO_DEFAULT; 1142 1052 1143 1053 err = kvm_vcpu_init(&svm->vcpu, kvm, id); 1144 1054 if (err) ··· 1235 1141 1236 1142 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1237 1143 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1144 + 1145 + if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && 1146 + svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { 1147 + __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; 1148 + wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); 1149 + } 1238 1150 } 1239 1151 1240 1152 static void svm_vcpu_put(struct kvm_vcpu *vcpu) ··· 1464 1364 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1465 1365 { 1466 1366 struct vcpu_svm *svm = to_svm(vcpu); 1467 - 1468 - if (is_guest_mode(vcpu)) { 1469 - /* 1470 - * We are here because we run in nested mode, the host kvm 1471 - * intercepts cr0 writes but the l1 hypervisor does not. 1472 - * But the L1 hypervisor may intercept selective cr0 writes. 1473 - * This needs to be checked here. 1474 - */ 1475 - unsigned long old, new; 1476 - 1477 - /* Remove bits that would trigger a real cr0 write intercept */ 1478 - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; 1479 - new = cr0 & SVM_CR0_SELECTIVE_MASK; 1480 - 1481 - if (old == new) { 1482 - /* cr0 write with ts and mp unchanged */ 1483 - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 1484 - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { 1485 - svm->nested.vmexit_rip = kvm_rip_read(vcpu); 1486 - svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); 1487 - svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 1488 - return; 1489 - } 1490 - } 1491 - } 1492 1367 1493 1368 #ifdef CONFIG_X86_64 1494 1369 if (vcpu->arch.efer & EFER_LME) { ··· 2202 2127 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 2203 2128 nested_vmcb->save.cr2 = vmcb->save.cr2; 2204 2129 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 2205 - nested_vmcb->save.rflags = vmcb->save.rflags; 2130 + nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 2206 2131 nested_vmcb->save.rip = vmcb->save.rip; 2207 2132 nested_vmcb->save.rsp = vmcb->save.rsp; 2208 2133 nested_vmcb->save.rax = vmcb->save.rax; ··· 2259 2184 svm->vmcb->save.ds = hsave->save.ds; 2260 2185 svm->vmcb->save.gdtr = hsave->save.gdtr; 2261 2186 svm->vmcb->save.idtr = hsave->save.idtr; 2262 - svm->vmcb->save.rflags = hsave->save.rflags; 2187 + kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 2263 2188 svm_set_efer(&svm->vcpu, hsave->save.efer); 2264 2189 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 2265 2190 svm_set_cr4(&svm->vcpu, hsave->save.cr4); ··· 2387 2312 hsave->save.efer = svm->vcpu.arch.efer; 2388 2313 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 2389 2314 hsave->save.cr4 = svm->vcpu.arch.cr4; 2390 - hsave->save.rflags = vmcb->save.rflags; 2315 + hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 2391 2316 hsave->save.rip = kvm_rip_read(&svm->vcpu); 2392 2317 hsave->save.rsp = vmcb->save.rsp; 2393 2318 hsave->save.rax = vmcb->save.rax; ··· 2398 2323 2399 2324 copy_vmcb_control_area(hsave, vmcb); 2400 2325 2401 - if (svm->vmcb->save.rflags & X86_EFLAGS_IF) 2326 + if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 2402 2327 svm->vcpu.arch.hflags |= HF_HIF_MASK; 2403 2328 else 2404 2329 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; ··· 2416 2341 svm->vmcb->save.ds = nested_vmcb->save.ds; 2417 2342 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 2418 2343 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 2419 - svm->vmcb->save.rflags = nested_vmcb->save.rflags; 2344 + kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 2420 2345 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 2421 2346 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 2422 2347 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); ··· 2518 2443 if (nested_svm_check_permissions(svm)) 2519 2444 return 1; 2520 2445 2521 - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2522 - skip_emulated_instruction(&svm->vcpu); 2523 - 2524 2446 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2525 2447 if (!nested_vmcb) 2526 2448 return 1; 2449 + 2450 + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2451 + skip_emulated_instruction(&svm->vcpu); 2527 2452 2528 2453 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2529 2454 nested_svm_unmap(page); ··· 2539 2464 if (nested_svm_check_permissions(svm)) 2540 2465 return 1; 2541 2466 2542 - svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2543 - skip_emulated_instruction(&svm->vcpu); 2544 - 2545 2467 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2546 2468 if (!nested_vmcb) 2547 2469 return 1; 2470 + 2471 + svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2472 + skip_emulated_instruction(&svm->vcpu); 2548 2473 2549 2474 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2550 2475 nested_svm_unmap(page); ··· 2751 2676 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2752 2677 } 2753 2678 2679 + bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) 2680 + { 2681 + unsigned long cr0 = svm->vcpu.arch.cr0; 2682 + bool ret = false; 2683 + u64 intercept; 2684 + 2685 + intercept = svm->nested.intercept; 2686 + 2687 + if (!is_guest_mode(&svm->vcpu) || 2688 + (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) 2689 + return false; 2690 + 2691 + cr0 &= ~SVM_CR0_SELECTIVE_MASK; 2692 + val &= ~SVM_CR0_SELECTIVE_MASK; 2693 + 2694 + if (cr0 ^ val) { 2695 + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 2696 + ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); 2697 + } 2698 + 2699 + return ret; 2700 + } 2701 + 2754 2702 #define CR_VALID (1ULL << 63) 2755 2703 2756 2704 static int cr_interception(struct vcpu_svm *svm) ··· 2797 2699 val = kvm_register_read(&svm->vcpu, reg); 2798 2700 switch (cr) { 2799 2701 case 0: 2800 - err = kvm_set_cr0(&svm->vcpu, val); 2702 + if (!check_selective_cr0_intercepted(svm, val)) 2703 + err = kvm_set_cr0(&svm->vcpu, val); 2704 + else 2705 + return 1; 2706 + 2801 2707 break; 2802 2708 case 3: 2803 2709 err = kvm_set_cr3(&svm->vcpu, val); ··· 2844 2742 kvm_complete_insn_gp(&svm->vcpu, err); 2845 2743 2846 2744 return 1; 2847 - } 2848 - 2849 - static int cr0_write_interception(struct vcpu_svm *svm) 2850 - { 2851 - struct kvm_vcpu *vcpu = &svm->vcpu; 2852 - int r; 2853 - 2854 - r = cr_interception(svm); 2855 - 2856 - if (svm->nested.vmexit_rip) { 2857 - kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip); 2858 - kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp); 2859 - kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax); 2860 - svm->nested.vmexit_rip = 0; 2861 - } 2862 - 2863 - return r; 2864 2745 } 2865 2746 2866 2747 static int dr_interception(struct vcpu_svm *svm) ··· 2898 2813 case MSR_IA32_TSC: { 2899 2814 struct vmcb *vmcb = get_host_vmcb(svm); 2900 2815 2901 - *data = vmcb->control.tsc_offset + native_read_tsc(); 2816 + *data = vmcb->control.tsc_offset + 2817 + svm_scale_tsc(vcpu, native_read_tsc()); 2818 + 2902 2819 break; 2903 2820 } 2904 2821 case MSR_STAR: ··· 3135 3048 [SVM_EXIT_READ_CR4] = cr_interception, 3136 3049 [SVM_EXIT_READ_CR8] = cr_interception, 3137 3050 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 3138 - [SVM_EXIT_WRITE_CR0] = cr0_write_interception, 3051 + [SVM_EXIT_WRITE_CR0] = cr_interception, 3139 3052 [SVM_EXIT_WRITE_CR3] = cr_interception, 3140 3053 [SVM_EXIT_WRITE_CR4] = cr_interception, 3141 3054 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, ··· 3191 3104 [SVM_EXIT_NPF] = pf_interception, 3192 3105 }; 3193 3106 3194 - void dump_vmcb(struct kvm_vcpu *vcpu) 3107 + static void dump_vmcb(struct kvm_vcpu *vcpu) 3195 3108 { 3196 3109 struct vcpu_svm *svm = to_svm(vcpu); 3197 3110 struct vmcb_control_area *control = &svm->vmcb->control; 3198 3111 struct vmcb_save_area *save = &svm->vmcb->save; 3199 3112 3200 3113 pr_err("VMCB Control Area:\n"); 3201 - pr_err("cr_read: %04x\n", control->intercept_cr & 0xffff); 3202 - pr_err("cr_write: %04x\n", control->intercept_cr >> 16); 3203 - pr_err("dr_read: %04x\n", control->intercept_dr & 0xffff); 3204 - pr_err("dr_write: %04x\n", control->intercept_dr >> 16); 3205 - pr_err("exceptions: %08x\n", control->intercept_exceptions); 3206 - pr_err("intercepts: %016llx\n", control->intercept); 3207 - pr_err("pause filter count: %d\n", control->pause_filter_count); 3208 - pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); 3209 - pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); 3210 - pr_err("tsc_offset: %016llx\n", control->tsc_offset); 3211 - pr_err("asid: %d\n", control->asid); 3212 - pr_err("tlb_ctl: %d\n", control->tlb_ctl); 3213 - pr_err("int_ctl: %08x\n", control->int_ctl); 3214 - pr_err("int_vector: %08x\n", control->int_vector); 3215 - pr_err("int_state: %08x\n", control->int_state); 3216 - pr_err("exit_code: %08x\n", control->exit_code); 3217 - pr_err("exit_info1: %016llx\n", control->exit_info_1); 3218 - pr_err("exit_info2: %016llx\n", control->exit_info_2); 3219 - pr_err("exit_int_info: %08x\n", control->exit_int_info); 3220 - pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); 3221 - pr_err("nested_ctl: %lld\n", control->nested_ctl); 3222 - pr_err("nested_cr3: %016llx\n", control->nested_cr3); 3223 - pr_err("event_inj: %08x\n", control->event_inj); 3224 - pr_err("event_inj_err: %08x\n", control->event_inj_err); 3225 - pr_err("lbr_ctl: %lld\n", control->lbr_ctl); 3226 - pr_err("next_rip: %016llx\n", control->next_rip); 3114 + pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); 3115 + pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); 3116 + pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); 3117 + pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); 3118 + pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); 3119 + pr_err("%-20s%016llx\n", "intercepts:", control->intercept); 3120 + pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); 3121 + pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); 3122 + pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); 3123 + pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); 3124 + pr_err("%-20s%d\n", "asid:", control->asid); 3125 + pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); 3126 + pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); 3127 + pr_err("%-20s%08x\n", "int_vector:", control->int_vector); 3128 + pr_err("%-20s%08x\n", "int_state:", control->int_state); 3129 + pr_err("%-20s%08x\n", "exit_code:", control->exit_code); 3130 + pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); 3131 + pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); 3132 + pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); 3133 + pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); 3134 + pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); 3135 + pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); 3136 + pr_err("%-20s%08x\n", "event_inj:", control->event_inj); 3137 + pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); 3138 + pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); 3139 + pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); 3227 3140 pr_err("VMCB State Save Area:\n"); 3228 - pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", 3229 - save->es.selector, save->es.attrib, 3230 - save->es.limit, save->es.base); 3231 - pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", 3232 - save->cs.selector, save->cs.attrib, 3233 - save->cs.limit, save->cs.base); 3234 - pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", 3235 - save->ss.selector, save->ss.attrib, 3236 - save->ss.limit, save->ss.base); 3237 - pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", 3238 - save->ds.selector, save->ds.attrib, 3239 - save->ds.limit, save->ds.base); 3240 - pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", 3241 - save->fs.selector, save->fs.attrib, 3242 - save->fs.limit, save->fs.base); 3243 - pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", 3244 - save->gs.selector, save->gs.attrib, 3245 - save->gs.limit, save->gs.base); 3246 - pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", 3247 - save->gdtr.selector, save->gdtr.attrib, 3248 - save->gdtr.limit, save->gdtr.base); 3249 - pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", 3250 - save->ldtr.selector, save->ldtr.attrib, 3251 - save->ldtr.limit, save->ldtr.base); 3252 - pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", 3253 - save->idtr.selector, save->idtr.attrib, 3254 - save->idtr.limit, save->idtr.base); 3255 - pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", 3256 - save->tr.selector, save->tr.attrib, 3257 - save->tr.limit, save->tr.base); 3141 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3142 + "es:", 3143 + save->es.selector, save->es.attrib, 3144 + save->es.limit, save->es.base); 3145 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3146 + "cs:", 3147 + save->cs.selector, save->cs.attrib, 3148 + save->cs.limit, save->cs.base); 3149 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3150 + "ss:", 3151 + save->ss.selector, save->ss.attrib, 3152 + save->ss.limit, save->ss.base); 3153 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3154 + "ds:", 3155 + save->ds.selector, save->ds.attrib, 3156 + save->ds.limit, save->ds.base); 3157 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3158 + "fs:", 3159 + save->fs.selector, save->fs.attrib, 3160 + save->fs.limit, save->fs.base); 3161 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3162 + "gs:", 3163 + save->gs.selector, save->gs.attrib, 3164 + save->gs.limit, save->gs.base); 3165 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3166 + "gdtr:", 3167 + save->gdtr.selector, save->gdtr.attrib, 3168 + save->gdtr.limit, save->gdtr.base); 3169 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3170 + "ldtr:", 3171 + save->ldtr.selector, save->ldtr.attrib, 3172 + save->ldtr.limit, save->ldtr.base); 3173 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3174 + "idtr:", 3175 + save->idtr.selector, save->idtr.attrib, 3176 + save->idtr.limit, save->idtr.base); 3177 + pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3178 + "tr:", 3179 + save->tr.selector, save->tr.attrib, 3180 + save->tr.limit, save->tr.base); 3258 3181 pr_err("cpl: %d efer: %016llx\n", 3259 3182 save->cpl, save->efer); 3260 - pr_err("cr0: %016llx cr2: %016llx\n", 3261 - save->cr0, save->cr2); 3262 - pr_err("cr3: %016llx cr4: %016llx\n", 3263 - save->cr3, save->cr4); 3264 - pr_err("dr6: %016llx dr7: %016llx\n", 3265 - save->dr6, save->dr7); 3266 - pr_err("rip: %016llx rflags: %016llx\n", 3267 - save->rip, save->rflags); 3268 - pr_err("rsp: %016llx rax: %016llx\n", 3269 - save->rsp, save->rax); 3270 - pr_err("star: %016llx lstar: %016llx\n", 3271 - save->star, save->lstar); 3272 - pr_err("cstar: %016llx sfmask: %016llx\n", 3273 - save->cstar, save->sfmask); 3274 - pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", 3275 - save->kernel_gs_base, save->sysenter_cs); 3276 - pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", 3277 - save->sysenter_esp, save->sysenter_eip); 3278 - pr_err("gpat: %016llx dbgctl: %016llx\n", 3279 - save->g_pat, save->dbgctl); 3280 - pr_err("br_from: %016llx br_to: %016llx\n", 3281 - save->br_from, save->br_to); 3282 - pr_err("excp_from: %016llx excp_to: %016llx\n", 3283 - save->last_excp_from, save->last_excp_to); 3284 - 3183 + pr_err("%-15s %016llx %-13s %016llx\n", 3184 + "cr0:", save->cr0, "cr2:", save->cr2); 3185 + pr_err("%-15s %016llx %-13s %016llx\n", 3186 + "cr3:", save->cr3, "cr4:", save->cr4); 3187 + pr_err("%-15s %016llx %-13s %016llx\n", 3188 + "dr6:", save->dr6, "dr7:", save->dr7); 3189 + pr_err("%-15s %016llx %-13s %016llx\n", 3190 + "rip:", save->rip, "rflags:", save->rflags); 3191 + pr_err("%-15s %016llx %-13s %016llx\n", 3192 + "rsp:", save->rsp, "rax:", save->rax); 3193 + pr_err("%-15s %016llx %-13s %016llx\n", 3194 + "star:", save->star, "lstar:", save->lstar); 3195 + pr_err("%-15s %016llx %-13s %016llx\n", 3196 + "cstar:", save->cstar, "sfmask:", save->sfmask); 3197 + pr_err("%-15s %016llx %-13s %016llx\n", 3198 + "kernel_gs_base:", save->kernel_gs_base, 3199 + "sysenter_cs:", save->sysenter_cs); 3200 + pr_err("%-15s %016llx %-13s %016llx\n", 3201 + "sysenter_esp:", save->sysenter_esp, 3202 + "sysenter_eip:", save->sysenter_eip); 3203 + pr_err("%-15s %016llx %-13s %016llx\n", 3204 + "gpat:", save->g_pat, "dbgctl:", save->dbgctl); 3205 + pr_err("%-15s %016llx %-13s %016llx\n", 3206 + "br_from:", save->br_from, "br_to:", save->br_to); 3207 + pr_err("%-15s %016llx %-13s %016llx\n", 3208 + "excp_from:", save->last_excp_from, 3209 + "excp_to:", save->last_excp_to); 3285 3210 } 3286 3211 3287 3212 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) ··· 3483 3384 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) 3484 3385 return 0; 3485 3386 3486 - ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); 3387 + ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); 3487 3388 3488 3389 if (is_guest_mode(vcpu)) 3489 3390 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); ··· 3970 3871 update_cr0_intercept(svm); 3971 3872 } 3972 3873 3874 + #define PRE_EX(exit) { .exit_code = (exit), \ 3875 + .stage = X86_ICPT_PRE_EXCEPT, } 3876 + #define POST_EX(exit) { .exit_code = (exit), \ 3877 + .stage = X86_ICPT_POST_EXCEPT, } 3878 + #define POST_MEM(exit) { .exit_code = (exit), \ 3879 + .stage = X86_ICPT_POST_MEMACCESS, } 3880 + 3881 + static struct __x86_intercept { 3882 + u32 exit_code; 3883 + enum x86_intercept_stage stage; 3884 + } x86_intercept_map[] = { 3885 + [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), 3886 + [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), 3887 + [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), 3888 + [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), 3889 + [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), 3890 + [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), 3891 + [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), 3892 + [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), 3893 + [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), 3894 + [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), 3895 + [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), 3896 + [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), 3897 + [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), 3898 + [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), 3899 + [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), 3900 + [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), 3901 + [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), 3902 + [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), 3903 + [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), 3904 + [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), 3905 + [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), 3906 + [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), 3907 + [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), 3908 + [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), 3909 + [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), 3910 + [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), 3911 + [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), 3912 + [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), 3913 + [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), 3914 + [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), 3915 + [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), 3916 + [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), 3917 + [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), 3918 + [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), 3919 + [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), 3920 + [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), 3921 + [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), 3922 + [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), 3923 + [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), 3924 + [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), 3925 + [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), 3926 + [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), 3927 + [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), 3928 + [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), 3929 + [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), 3930 + [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), 3931 + }; 3932 + 3933 + #undef PRE_EX 3934 + #undef POST_EX 3935 + #undef POST_MEM 3936 + 3937 + static int svm_check_intercept(struct kvm_vcpu *vcpu, 3938 + struct x86_instruction_info *info, 3939 + enum x86_intercept_stage stage) 3940 + { 3941 + struct vcpu_svm *svm = to_svm(vcpu); 3942 + int vmexit, ret = X86EMUL_CONTINUE; 3943 + struct __x86_intercept icpt_info; 3944 + struct vmcb *vmcb = svm->vmcb; 3945 + 3946 + if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) 3947 + goto out; 3948 + 3949 + icpt_info = x86_intercept_map[info->intercept]; 3950 + 3951 + if (stage != icpt_info.stage) 3952 + goto out; 3953 + 3954 + switch (icpt_info.exit_code) { 3955 + case SVM_EXIT_READ_CR0: 3956 + if (info->intercept == x86_intercept_cr_read) 3957 + icpt_info.exit_code += info->modrm_reg; 3958 + break; 3959 + case SVM_EXIT_WRITE_CR0: { 3960 + unsigned long cr0, val; 3961 + u64 intercept; 3962 + 3963 + if (info->intercept == x86_intercept_cr_write) 3964 + icpt_info.exit_code += info->modrm_reg; 3965 + 3966 + if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) 3967 + break; 3968 + 3969 + intercept = svm->nested.intercept; 3970 + 3971 + if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) 3972 + break; 3973 + 3974 + cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 3975 + val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 3976 + 3977 + if (info->intercept == x86_intercept_lmsw) { 3978 + cr0 &= 0xfUL; 3979 + val &= 0xfUL; 3980 + /* lmsw can't clear PE - catch this here */ 3981 + if (cr0 & X86_CR0_PE) 3982 + val |= X86_CR0_PE; 3983 + } 3984 + 3985 + if (cr0 ^ val) 3986 + icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 3987 + 3988 + break; 3989 + } 3990 + case SVM_EXIT_READ_DR0: 3991 + case SVM_EXIT_WRITE_DR0: 3992 + icpt_info.exit_code += info->modrm_reg; 3993 + break; 3994 + case SVM_EXIT_MSR: 3995 + if (info->intercept == x86_intercept_wrmsr) 3996 + vmcb->control.exit_info_1 = 1; 3997 + else 3998 + vmcb->control.exit_info_1 = 0; 3999 + break; 4000 + case SVM_EXIT_PAUSE: 4001 + /* 4002 + * We get this for NOP only, but pause 4003 + * is rep not, check this here 4004 + */ 4005 + if (info->rep_prefix != REPE_PREFIX) 4006 + goto out; 4007 + case SVM_EXIT_IOIO: { 4008 + u64 exit_info; 4009 + u32 bytes; 4010 + 4011 + exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; 4012 + 4013 + if (info->intercept == x86_intercept_in || 4014 + info->intercept == x86_intercept_ins) { 4015 + exit_info |= SVM_IOIO_TYPE_MASK; 4016 + bytes = info->src_bytes; 4017 + } else { 4018 + bytes = info->dst_bytes; 4019 + } 4020 + 4021 + if (info->intercept == x86_intercept_outs || 4022 + info->intercept == x86_intercept_ins) 4023 + exit_info |= SVM_IOIO_STR_MASK; 4024 + 4025 + if (info->rep_prefix) 4026 + exit_info |= SVM_IOIO_REP_MASK; 4027 + 4028 + bytes = min(bytes, 4u); 4029 + 4030 + exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; 4031 + 4032 + exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); 4033 + 4034 + vmcb->control.exit_info_1 = exit_info; 4035 + vmcb->control.exit_info_2 = info->next_rip; 4036 + 4037 + break; 4038 + } 4039 + default: 4040 + break; 4041 + } 4042 + 4043 + vmcb->control.next_rip = info->next_rip; 4044 + vmcb->control.exit_code = icpt_info.exit_code; 4045 + vmexit = nested_svm_exit_handled(svm); 4046 + 4047 + ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED 4048 + : X86EMUL_CONTINUE; 4049 + 4050 + out: 4051 + return ret; 4052 + } 4053 + 3973 4054 static struct kvm_x86_ops svm_x86_ops = { 3974 4055 .cpu_has_kvm_support = has_svm, 3975 4056 .disabled_by_bios = is_disabled, ··· 4231 3952 4232 3953 .has_wbinvd_exit = svm_has_wbinvd_exit, 4233 3954 3955 + .set_tsc_khz = svm_set_tsc_khz, 4234 3956 .write_tsc_offset = svm_write_tsc_offset, 4235 3957 .adjust_tsc_offset = svm_adjust_tsc_offset, 3958 + .compute_tsc_offset = svm_compute_tsc_offset, 4236 3959 4237 3960 .set_tdp_cr3 = set_tdp_cr3, 3961 + 3962 + .check_intercept = svm_check_intercept, 4238 3963 }; 4239 3964 4240 3965 static int __init svm_init(void)
+192 -36
arch/x86/kvm/vmx.c
··· 128 128 unsigned long host_rsp; 129 129 int launched; 130 130 u8 fail; 131 + u8 cpl; 132 + bool nmi_known_unmasked; 131 133 u32 exit_intr_info; 132 134 u32 idt_vectoring_info; 135 + ulong rflags; 133 136 struct shared_msr_entry *guest_msrs; 134 137 int nmsrs; 135 138 int save_nmsrs; ··· 162 159 u32 ar; 163 160 } tr, es, ds, fs, gs; 164 161 } rmode; 162 + struct { 163 + u32 bitmask; /* 4 bits per segment (1 bit per field) */ 164 + struct kvm_save_segment seg[8]; 165 + } segment_cache; 165 166 int vpid; 166 167 bool emulation_required; 167 168 ··· 176 169 u32 exit_reason; 177 170 178 171 bool rdtscp_enabled; 172 + }; 173 + 174 + enum segment_cache_field { 175 + SEG_FIELD_SEL = 0, 176 + SEG_FIELD_BASE = 1, 177 + SEG_FIELD_LIMIT = 2, 178 + SEG_FIELD_AR = 3, 179 + 180 + SEG_FIELD_NR = 4 179 181 }; 180 182 181 183 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) ··· 659 643 vmcs_writel(field, vmcs_readl(field) | mask); 660 644 } 661 645 646 + static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) 647 + { 648 + vmx->segment_cache.bitmask = 0; 649 + } 650 + 651 + static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg, 652 + unsigned field) 653 + { 654 + bool ret; 655 + u32 mask = 1 << (seg * SEG_FIELD_NR + field); 656 + 657 + if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) { 658 + vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS); 659 + vmx->segment_cache.bitmask = 0; 660 + } 661 + ret = vmx->segment_cache.bitmask & mask; 662 + vmx->segment_cache.bitmask |= mask; 663 + return ret; 664 + } 665 + 666 + static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg) 667 + { 668 + u16 *p = &vmx->segment_cache.seg[seg].selector; 669 + 670 + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL)) 671 + *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector); 672 + return *p; 673 + } 674 + 675 + static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg) 676 + { 677 + ulong *p = &vmx->segment_cache.seg[seg].base; 678 + 679 + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE)) 680 + *p = vmcs_readl(kvm_vmx_segment_fields[seg].base); 681 + return *p; 682 + } 683 + 684 + static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg) 685 + { 686 + u32 *p = &vmx->segment_cache.seg[seg].limit; 687 + 688 + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT)) 689 + *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit); 690 + return *p; 691 + } 692 + 693 + static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg) 694 + { 695 + u32 *p = &vmx->segment_cache.seg[seg].ar; 696 + 697 + if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR)) 698 + *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes); 699 + return *p; 700 + } 701 + 662 702 static void update_exception_bitmap(struct kvm_vcpu *vcpu) 663 703 { 664 704 u32 eb; ··· 1042 970 { 1043 971 unsigned long rflags, save_rflags; 1044 972 1045 - rflags = vmcs_readl(GUEST_RFLAGS); 1046 - if (to_vmx(vcpu)->rmode.vm86_active) { 1047 - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 1048 - save_rflags = to_vmx(vcpu)->rmode.save_rflags; 1049 - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 973 + if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) { 974 + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); 975 + rflags = vmcs_readl(GUEST_RFLAGS); 976 + if (to_vmx(vcpu)->rmode.vm86_active) { 977 + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; 978 + save_rflags = to_vmx(vcpu)->rmode.save_rflags; 979 + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; 980 + } 981 + to_vmx(vcpu)->rflags = rflags; 1050 982 } 1051 - return rflags; 983 + return to_vmx(vcpu)->rflags; 1052 984 } 1053 985 1054 986 static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1055 987 { 988 + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); 989 + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 990 + to_vmx(vcpu)->rflags = rflags; 1056 991 if (to_vmx(vcpu)->rmode.vm86_active) { 1057 992 to_vmx(vcpu)->rmode.save_rflags = rflags; 1058 993 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; ··· 1132 1053 } 1133 1054 1134 1055 if (vmx->rmode.vm86_active) { 1135 - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) 1056 + int inc_eip = 0; 1057 + if (kvm_exception_is_soft(nr)) 1058 + inc_eip = vcpu->arch.event_exit_inst_len; 1059 + if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) 1136 1060 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 1137 1061 return; 1138 1062 } ··· 1233 1151 } 1234 1152 1235 1153 /* 1154 + * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ 1155 + * ioctl. In this case the call-back should update internal vmx state to make 1156 + * the changes effective. 1157 + */ 1158 + static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) 1159 + { 1160 + /* Nothing to do here */ 1161 + } 1162 + 1163 + /* 1236 1164 * writes 'offset' into guest's timestamp counter offset register 1237 1165 */ 1238 1166 static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ··· 1254 1162 { 1255 1163 u64 offset = vmcs_read64(TSC_OFFSET); 1256 1164 vmcs_write64(TSC_OFFSET, offset + adjustment); 1165 + } 1166 + 1167 + static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) 1168 + { 1169 + return target_tsc - native_read_tsc(); 1257 1170 } 1258 1171 1259 1172 /* ··· 1340 1243 break; 1341 1244 #ifdef CONFIG_X86_64 1342 1245 case MSR_FS_BASE: 1246 + vmx_segment_cache_clear(vmx); 1343 1247 vmcs_writel(GUEST_FS_BASE, data); 1344 1248 break; 1345 1249 case MSR_GS_BASE: 1250 + vmx_segment_cache_clear(vmx); 1346 1251 vmcs_writel(GUEST_GS_BASE, data); 1347 1252 break; 1348 1253 case MSR_KERNEL_GS_BASE: ··· 1788 1689 vmx->emulation_required = 1; 1789 1690 vmx->rmode.vm86_active = 0; 1790 1691 1692 + vmx_segment_cache_clear(vmx); 1693 + 1791 1694 vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); 1792 1695 vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); 1793 1696 vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); ··· 1812 1711 fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); 1813 1712 fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); 1814 1713 fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); 1714 + 1715 + vmx_segment_cache_clear(vmx); 1815 1716 1816 1717 vmcs_write16(GUEST_SS_SELECTOR, 0); 1817 1718 vmcs_write32(GUEST_SS_AR_BYTES, 0x93); ··· 1877 1774 vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); 1878 1775 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1879 1776 } 1777 + 1778 + vmx_segment_cache_clear(vmx); 1880 1779 1881 1780 vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); 1882 1781 vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); ··· 1955 1850 static void enter_lmode(struct kvm_vcpu *vcpu) 1956 1851 { 1957 1852 u32 guest_tr_ar; 1853 + 1854 + vmx_segment_cache_clear(to_vmx(vcpu)); 1958 1855 1959 1856 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); 1960 1857 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { ··· 2105 1998 vmcs_writel(CR0_READ_SHADOW, cr0); 2106 1999 vmcs_writel(GUEST_CR0, hw_cr0); 2107 2000 vcpu->arch.cr0 = cr0; 2001 + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 2108 2002 } 2109 2003 2110 2004 static u64 construct_eptp(unsigned long root_hpa) ··· 2161 2053 struct kvm_segment *var, int seg) 2162 2054 { 2163 2055 struct vcpu_vmx *vmx = to_vmx(vcpu); 2164 - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2165 2056 struct kvm_save_segment *save; 2166 2057 u32 ar; 2167 2058 ··· 2182 2075 var->limit = save->limit; 2183 2076 ar = save->ar; 2184 2077 if (seg == VCPU_SREG_TR 2185 - || var->selector == vmcs_read16(sf->selector)) 2078 + || var->selector == vmx_read_guest_seg_selector(vmx, seg)) 2186 2079 goto use_saved_rmode_seg; 2187 2080 } 2188 - var->base = vmcs_readl(sf->base); 2189 - var->limit = vmcs_read32(sf->limit); 2190 - var->selector = vmcs_read16(sf->selector); 2191 - ar = vmcs_read32(sf->ar_bytes); 2081 + var->base = vmx_read_guest_seg_base(vmx, seg); 2082 + var->limit = vmx_read_guest_seg_limit(vmx, seg); 2083 + var->selector = vmx_read_guest_seg_selector(vmx, seg); 2084 + ar = vmx_read_guest_seg_ar(vmx, seg); 2192 2085 use_saved_rmode_seg: 2193 2086 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) 2194 2087 ar = 0; ··· 2205 2098 2206 2099 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 2207 2100 { 2208 - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2209 2101 struct kvm_segment s; 2210 2102 2211 2103 if (to_vmx(vcpu)->rmode.vm86_active) { 2212 2104 vmx_get_segment(vcpu, &s, seg); 2213 2105 return s.base; 2214 2106 } 2215 - return vmcs_readl(sf->base); 2107 + return vmx_read_guest_seg_base(to_vmx(vcpu), seg); 2216 2108 } 2217 2109 2218 - static int vmx_get_cpl(struct kvm_vcpu *vcpu) 2110 + static int __vmx_get_cpl(struct kvm_vcpu *vcpu) 2219 2111 { 2220 2112 if (!is_protmode(vcpu)) 2221 2113 return 0; 2222 2114 2223 - if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 2115 + if (!is_long_mode(vcpu) 2116 + && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ 2224 2117 return 3; 2225 2118 2226 - return vmcs_read16(GUEST_CS_SELECTOR) & 3; 2119 + return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; 2227 2120 } 2121 + 2122 + static int vmx_get_cpl(struct kvm_vcpu *vcpu) 2123 + { 2124 + if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { 2125 + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 2126 + to_vmx(vcpu)->cpl = __vmx_get_cpl(vcpu); 2127 + } 2128 + return to_vmx(vcpu)->cpl; 2129 + } 2130 + 2228 2131 2229 2132 static u32 vmx_segment_access_rights(struct kvm_segment *var) 2230 2133 { ··· 2264 2147 struct vcpu_vmx *vmx = to_vmx(vcpu); 2265 2148 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 2266 2149 u32 ar; 2150 + 2151 + vmx_segment_cache_clear(vmx); 2267 2152 2268 2153 if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { 2269 2154 vmcs_write16(sf->selector, var->selector); ··· 2303 2184 ar |= 0x1; /* Accessed */ 2304 2185 2305 2186 vmcs_write32(sf->ar_bytes, ar); 2187 + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); 2306 2188 } 2307 2189 2308 2190 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 2309 2191 { 2310 - u32 ar = vmcs_read32(GUEST_CS_AR_BYTES); 2192 + u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS); 2311 2193 2312 2194 *db = (ar >> 14) & 1; 2313 2195 *l = (ar >> 13) & 1; ··· 2895 2775 if (ret != 0) 2896 2776 goto out; 2897 2777 2778 + vmx_segment_cache_clear(vmx); 2779 + 2898 2780 seg_setup(VCPU_SREG_CS); 2899 2781 /* 2900 2782 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode ··· 3026 2904 3027 2905 ++vcpu->stat.irq_injections; 3028 2906 if (vmx->rmode.vm86_active) { 3029 - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) 2907 + int inc_eip = 0; 2908 + if (vcpu->arch.interrupt.soft) 2909 + inc_eip = vcpu->arch.event_exit_inst_len; 2910 + if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) 3030 2911 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3031 2912 return; 3032 2913 } ··· 3062 2937 } 3063 2938 3064 2939 ++vcpu->stat.nmi_injections; 2940 + vmx->nmi_known_unmasked = false; 3065 2941 if (vmx->rmode.vm86_active) { 3066 - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) 2942 + if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) 3067 2943 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); 3068 2944 return; 3069 2945 } ··· 3087 2961 { 3088 2962 if (!cpu_has_virtual_nmis()) 3089 2963 return to_vmx(vcpu)->soft_vnmi_blocked; 2964 + if (to_vmx(vcpu)->nmi_known_unmasked) 2965 + return false; 3090 2966 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; 3091 2967 } 3092 2968 ··· 3102 2974 vmx->vnmi_blocked_time = 0; 3103 2975 } 3104 2976 } else { 2977 + vmx->nmi_known_unmasked = !masked; 3105 2978 if (masked) 3106 2979 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3107 2980 GUEST_INTR_STATE_NMI); ··· 3220 3091 enum emulation_result er; 3221 3092 3222 3093 vect_info = vmx->idt_vectoring_info; 3223 - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3094 + intr_info = vmx->exit_intr_info; 3224 3095 3225 3096 if (is_machine_check(intr_info)) 3226 3097 return handle_machine_check(vcpu); ··· 3251 3122 } 3252 3123 3253 3124 error_code = 0; 3254 - rip = kvm_rip_read(vcpu); 3255 3125 if (intr_info & INTR_INFO_DELIVER_CODE_MASK) 3256 3126 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); 3257 3127 if (is_page_fault(intr_info)) { ··· 3297 3169 vmx->vcpu.arch.event_exit_inst_len = 3298 3170 vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 3299 3171 kvm_run->exit_reason = KVM_EXIT_DEBUG; 3172 + rip = kvm_rip_read(vcpu); 3300 3173 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 3301 3174 kvm_run->debug.arch.exception = ex_no; 3302 3175 break; ··· 3634 3505 switch (type) { 3635 3506 case INTR_TYPE_NMI_INTR: 3636 3507 vcpu->arch.nmi_injected = false; 3637 - if (cpu_has_virtual_nmis()) 3638 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3639 - GUEST_INTR_STATE_NMI); 3508 + vmx_set_nmi_mask(vcpu, true); 3640 3509 break; 3641 3510 case INTR_TYPE_EXT_INTR: 3642 3511 case INTR_TYPE_SOFT_INTR: ··· 3994 3867 3995 3868 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 3996 3869 { 3997 - u32 exit_intr_info = vmx->exit_intr_info; 3870 + u32 exit_intr_info; 3871 + 3872 + if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY 3873 + || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) 3874 + return; 3875 + 3876 + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3877 + exit_intr_info = vmx->exit_intr_info; 3998 3878 3999 3879 /* Handle machine checks before interrupts are enabled */ 4000 - if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) 4001 - || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI 4002 - && is_machine_check(exit_intr_info))) 3880 + if (is_machine_check(exit_intr_info)) 4003 3881 kvm_machine_check(); 4004 3882 4005 3883 /* We need to handle NMIs before interrupts are enabled */ ··· 4018 3886 4019 3887 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 4020 3888 { 4021 - u32 exit_intr_info = vmx->exit_intr_info; 3889 + u32 exit_intr_info; 4022 3890 bool unblock_nmi; 4023 3891 u8 vector; 4024 3892 bool idtv_info_valid; ··· 4026 3894 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; 4027 3895 4028 3896 if (cpu_has_virtual_nmis()) { 3897 + if (vmx->nmi_known_unmasked) 3898 + return; 3899 + /* 3900 + * Can't use vmx->exit_intr_info since we're not sure what 3901 + * the exit reason is. 3902 + */ 3903 + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 4029 3904 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; 4030 3905 vector = exit_intr_info & INTR_INFO_VECTOR_MASK; 4031 3906 /* ··· 4049 3910 vector != DF_VECTOR && !idtv_info_valid) 4050 3911 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 4051 3912 GUEST_INTR_STATE_NMI); 3913 + else 3914 + vmx->nmi_known_unmasked = 3915 + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) 3916 + & GUEST_INTR_STATE_NMI); 4052 3917 } else if (unlikely(vmx->soft_vnmi_blocked)) 4053 3918 vmx->vnmi_blocked_time += 4054 3919 ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); ··· 4089 3946 * Clear bit "block by NMI" before VM entry if a NMI 4090 3947 * delivery faulted. 4091 3948 */ 4092 - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, 4093 - GUEST_INTR_STATE_NMI); 3949 + vmx_set_nmi_mask(&vmx->vcpu, false); 4094 3950 break; 4095 3951 case INTR_TYPE_SOFT_EXCEPTION: 4096 3952 vmx->vcpu.arch.event_exit_inst_len = ··· 4266 4124 ); 4267 4125 4268 4126 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) 4127 + | (1 << VCPU_EXREG_RFLAGS) 4128 + | (1 << VCPU_EXREG_CPL) 4269 4129 | (1 << VCPU_EXREG_PDPTR) 4130 + | (1 << VCPU_EXREG_SEGMENTS) 4270 4131 | (1 << VCPU_EXREG_CR3)); 4271 4132 vcpu->arch.regs_dirty = 0; 4272 4133 ··· 4279 4134 vmx->launched = 1; 4280 4135 4281 4136 vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); 4282 - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 4283 4137 4284 4138 vmx_complete_atomic_exit(vmx); 4285 4139 vmx_recover_nmi_blocking(vmx); ··· 4339 4195 goto free_vcpu; 4340 4196 4341 4197 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 4198 + err = -ENOMEM; 4342 4199 if (!vmx->guest_msrs) { 4343 - err = -ENOMEM; 4344 4200 goto uninit_vcpu; 4345 4201 } 4346 4202 ··· 4359 4215 if (err) 4360 4216 goto free_vmcs; 4361 4217 if (vm_need_virtualize_apic_accesses(kvm)) 4362 - if (alloc_apic_access_page(kvm) != 0) 4218 + err = alloc_apic_access_page(kvm); 4219 + if (err) 4363 4220 goto free_vmcs; 4364 4221 4365 4222 if (enable_ept) { ··· 4513 4368 { 4514 4369 } 4515 4370 4371 + static int vmx_check_intercept(struct kvm_vcpu *vcpu, 4372 + struct x86_instruction_info *info, 4373 + enum x86_intercept_stage stage) 4374 + { 4375 + return X86EMUL_CONTINUE; 4376 + } 4377 + 4516 4378 static struct kvm_x86_ops vmx_x86_ops = { 4517 4379 .cpu_has_kvm_support = cpu_has_kvm_support, 4518 4380 .disabled_by_bios = vmx_disabled_by_bios, ··· 4601 4449 4602 4450 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, 4603 4451 4452 + .set_tsc_khz = vmx_set_tsc_khz, 4604 4453 .write_tsc_offset = vmx_write_tsc_offset, 4605 4454 .adjust_tsc_offset = vmx_adjust_tsc_offset, 4455 + .compute_tsc_offset = vmx_compute_tsc_offset, 4606 4456 4607 4457 .set_tdp_cr3 = vmx_set_cr3, 4458 + 4459 + .check_intercept = vmx_check_intercept, 4608 4460 }; 4609 4461 4610 4462 static int __init vmx_init(void)
+397 -175
arch/x86/kvm/x86.c
··· 60 60 #include <asm/div64.h> 61 61 62 62 #define MAX_IO_MSRS 256 63 - #define CR0_RESERVED_BITS \ 64 - (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ 65 - | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ 66 - | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 67 - #define CR4_RESERVED_BITS \ 68 - (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 69 - | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 70 - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 71 - | X86_CR4_OSXSAVE \ 72 - | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 73 - 74 - #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 75 - 76 63 #define KVM_MAX_MCE_BANKS 32 77 64 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) 65 + 66 + #define emul_to_vcpu(ctxt) \ 67 + container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) 78 68 79 69 /* EFER defaults: 80 70 * - enable syscall per default because its emulated by KVM ··· 89 99 90 100 int ignore_msrs = 0; 91 101 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 102 + 103 + bool kvm_has_tsc_control; 104 + EXPORT_SYMBOL_GPL(kvm_has_tsc_control); 105 + u32 kvm_max_guest_tsc_khz; 106 + EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); 92 107 93 108 #define KVM_NR_SHARED_MSRS 16 94 109 ··· 151 156 }; 152 157 153 158 u64 __read_mostly host_xcr0; 159 + 160 + int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); 154 161 155 162 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) 156 163 { ··· 358 361 359 362 void kvm_inject_nmi(struct kvm_vcpu *vcpu) 360 363 { 361 - kvm_make_request(KVM_REQ_NMI, vcpu); 362 364 kvm_make_request(KVM_REQ_EVENT, vcpu); 365 + vcpu->arch.nmi_pending = 1; 363 366 } 364 367 EXPORT_SYMBOL_GPL(kvm_inject_nmi); 365 368 ··· 979 982 return ret; 980 983 } 981 984 982 - static inline u64 nsec_to_cycles(u64 nsec) 985 + static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) 986 + { 987 + if (vcpu->arch.virtual_tsc_khz) 988 + return vcpu->arch.virtual_tsc_khz; 989 + else 990 + return __this_cpu_read(cpu_tsc_khz); 991 + } 992 + 993 + static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) 983 994 { 984 995 u64 ret; 985 996 ··· 995 990 if (kvm_tsc_changes_freq()) 996 991 printk_once(KERN_WARNING 997 992 "kvm: unreliable cycle conversion on adjustable rate TSC\n"); 998 - ret = nsec * __this_cpu_read(cpu_tsc_khz); 993 + ret = nsec * vcpu_tsc_khz(vcpu); 999 994 do_div(ret, USEC_PER_SEC); 1000 995 return ret; 1001 996 } 1002 997 1003 - static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz) 998 + static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) 1004 999 { 1005 1000 /* Compute a scale to convert nanoseconds in TSC cycles */ 1006 1001 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, 1007 - &kvm->arch.virtual_tsc_shift, 1008 - &kvm->arch.virtual_tsc_mult); 1009 - kvm->arch.virtual_tsc_khz = this_tsc_khz; 1002 + &vcpu->arch.tsc_catchup_shift, 1003 + &vcpu->arch.tsc_catchup_mult); 1010 1004 } 1011 1005 1012 1006 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) 1013 1007 { 1014 1008 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, 1015 - vcpu->kvm->arch.virtual_tsc_mult, 1016 - vcpu->kvm->arch.virtual_tsc_shift); 1009 + vcpu->arch.tsc_catchup_mult, 1010 + vcpu->arch.tsc_catchup_shift); 1017 1011 tsc += vcpu->arch.last_tsc_write; 1018 1012 return tsc; 1019 1013 } ··· 1025 1021 s64 sdiff; 1026 1022 1027 1023 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1028 - offset = data - native_read_tsc(); 1024 + offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); 1029 1025 ns = get_kernel_ns(); 1030 1026 elapsed = ns - kvm->arch.last_tsc_nsec; 1031 1027 sdiff = data - kvm->arch.last_tsc_write; ··· 1041 1037 * In that case, for a reliable TSC, we can match TSC offsets, 1042 1038 * or make a best guest using elapsed value. 1043 1039 */ 1044 - if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) && 1040 + if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && 1045 1041 elapsed < 5ULL * NSEC_PER_SEC) { 1046 1042 if (!check_tsc_unstable()) { 1047 1043 offset = kvm->arch.last_tsc_offset; 1048 1044 pr_debug("kvm: matched tsc offset for %llu\n", data); 1049 1045 } else { 1050 - u64 delta = nsec_to_cycles(elapsed); 1046 + u64 delta = nsec_to_cycles(vcpu, elapsed); 1051 1047 offset += delta; 1052 1048 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1053 1049 } ··· 1079 1075 local_irq_save(flags); 1080 1076 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); 1081 1077 kernel_ns = get_kernel_ns(); 1082 - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); 1083 - 1078 + this_tsc_khz = vcpu_tsc_khz(v); 1084 1079 if (unlikely(this_tsc_khz == 0)) { 1085 1080 local_irq_restore(flags); 1086 1081 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); ··· 1996 1993 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1997 1994 case KVM_CAP_XSAVE: 1998 1995 case KVM_CAP_ASYNC_PF: 1996 + case KVM_CAP_GET_TSC_KHZ: 1999 1997 r = 1; 2000 1998 break; 2001 1999 case KVM_CAP_COALESCED_MMIO: ··· 2022 2018 break; 2023 2019 case KVM_CAP_XCRS: 2024 2020 r = cpu_has_xsave; 2021 + break; 2022 + case KVM_CAP_TSC_CONTROL: 2023 + r = kvm_has_tsc_control; 2025 2024 break; 2026 2025 default: 2027 2026 r = 0; ··· 2127 2120 kvm_x86_ops->vcpu_load(vcpu, cpu); 2128 2121 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { 2129 2122 /* Make sure TSC doesn't go backwards */ 2130 - s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : 2131 - native_read_tsc() - vcpu->arch.last_host_tsc; 2123 + s64 tsc_delta; 2124 + u64 tsc; 2125 + 2126 + kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); 2127 + tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : 2128 + tsc - vcpu->arch.last_guest_tsc; 2129 + 2132 2130 if (tsc_delta < 0) 2133 2131 mark_tsc_unstable("KVM discovered backwards TSC"); 2134 2132 if (check_tsc_unstable()) { ··· 2151 2139 { 2152 2140 kvm_x86_ops->vcpu_put(vcpu); 2153 2141 kvm_put_guest_fpu(vcpu); 2154 - vcpu->arch.last_host_tsc = native_read_tsc(); 2142 + kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); 2155 2143 } 2156 2144 2157 2145 static int is_efer_nx(void) ··· 2336 2324 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 2337 2325 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); 2338 2326 2327 + /* cpuid 0xC0000001.edx */ 2328 + const u32 kvm_supported_word5_x86_features = 2329 + F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | 2330 + F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | 2331 + F(PMM) | F(PMM_EN); 2332 + 2339 2333 /* all calls to cpuid_count() should be made on the same cpu */ 2340 2334 get_cpu(); 2341 2335 do_cpuid_1_ent(entry, function, index); ··· 2436 2418 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | 2437 2419 (1 << KVM_FEATURE_NOP_IO_DELAY) | 2438 2420 (1 << KVM_FEATURE_CLOCKSOURCE2) | 2421 + (1 << KVM_FEATURE_ASYNC_PF) | 2439 2422 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); 2440 2423 entry->ebx = 0; 2441 2424 entry->ecx = 0; ··· 2450 2431 cpuid_mask(&entry->edx, 1); 2451 2432 entry->ecx &= kvm_supported_word6_x86_features; 2452 2433 cpuid_mask(&entry->ecx, 6); 2434 + break; 2435 + /*Add support for Centaur's CPUID instruction*/ 2436 + case 0xC0000000: 2437 + /*Just support up to 0xC0000004 now*/ 2438 + entry->eax = min(entry->eax, 0xC0000004); 2439 + break; 2440 + case 0xC0000001: 2441 + entry->edx &= kvm_supported_word5_x86_features; 2442 + cpuid_mask(&entry->edx, 5); 2443 + break; 2444 + case 0xC0000002: 2445 + case 0xC0000003: 2446 + case 0xC0000004: 2447 + /*Now nothing to do, reserved for the future*/ 2453 2448 break; 2454 2449 } 2455 2450 ··· 2510 2477 r = -E2BIG; 2511 2478 if (nent >= cpuid->nent) 2512 2479 goto out_free; 2480 + 2481 + /* Add support for Centaur's CPUID instruction. */ 2482 + if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { 2483 + do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, 2484 + &nent, cpuid->nent); 2485 + 2486 + r = -E2BIG; 2487 + if (nent >= cpuid->nent) 2488 + goto out_free; 2489 + 2490 + limit = cpuid_entries[nent - 1].eax; 2491 + for (func = 0xC0000001; 2492 + func <= limit && nent < cpuid->nent; ++func) 2493 + do_cpuid_ent(&cpuid_entries[nent], func, 0, 2494 + &nent, cpuid->nent); 2495 + 2496 + r = -E2BIG; 2497 + if (nent >= cpuid->nent) 2498 + goto out_free; 2499 + } 2513 2500 2514 2501 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, 2515 2502 cpuid->nent); ··· 3099 3046 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); 3100 3047 break; 3101 3048 } 3049 + case KVM_SET_TSC_KHZ: { 3050 + u32 user_tsc_khz; 3051 + 3052 + r = -EINVAL; 3053 + if (!kvm_has_tsc_control) 3054 + break; 3055 + 3056 + user_tsc_khz = (u32)arg; 3057 + 3058 + if (user_tsc_khz >= kvm_max_guest_tsc_khz) 3059 + goto out; 3060 + 3061 + kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); 3062 + 3063 + r = 0; 3064 + goto out; 3065 + } 3066 + case KVM_GET_TSC_KHZ: { 3067 + r = -EIO; 3068 + if (check_tsc_unstable()) 3069 + goto out; 3070 + 3071 + r = vcpu_tsc_khz(vcpu); 3072 + 3073 + goto out; 3074 + } 3102 3075 default: 3103 3076 r = -EINVAL; 3104 3077 } ··· 3674 3595 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 3675 3596 const void *v) 3676 3597 { 3677 - if (vcpu->arch.apic && 3678 - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3679 - return 0; 3598 + int handled = 0; 3599 + int n; 3680 3600 3681 - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3601 + do { 3602 + n = min(len, 8); 3603 + if (!(vcpu->arch.apic && 3604 + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) 3605 + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) 3606 + break; 3607 + handled += n; 3608 + addr += n; 3609 + len -= n; 3610 + v += n; 3611 + } while (len); 3612 + 3613 + return handled; 3682 3614 } 3683 3615 3684 3616 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3685 3617 { 3686 - if (vcpu->arch.apic && 3687 - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3688 - return 0; 3618 + int handled = 0; 3619 + int n; 3689 3620 3690 - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3621 + do { 3622 + n = min(len, 8); 3623 + if (!(vcpu->arch.apic && 3624 + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) 3625 + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) 3626 + break; 3627 + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); 3628 + handled += n; 3629 + addr += n; 3630 + len -= n; 3631 + v += n; 3632 + } while (len); 3633 + 3634 + return handled; 3691 3635 } 3692 3636 3693 3637 static void kvm_set_segment(struct kvm_vcpu *vcpu, ··· 3805 3703 } 3806 3704 3807 3705 /* used for instruction fetching */ 3808 - static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 3809 - struct kvm_vcpu *vcpu, 3706 + static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, 3707 + gva_t addr, void *val, unsigned int bytes, 3810 3708 struct x86_exception *exception) 3811 3709 { 3710 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3812 3711 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3712 + 3813 3713 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 3814 3714 access | PFERR_FETCH_MASK, 3815 3715 exception); 3816 3716 } 3817 3717 3818 - static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3819 - struct kvm_vcpu *vcpu, 3718 + static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 3719 + gva_t addr, void *val, unsigned int bytes, 3820 3720 struct x86_exception *exception) 3821 3721 { 3722 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3822 3723 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3724 + 3823 3725 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3824 3726 exception); 3825 3727 } 3826 3728 3827 - static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 3828 - struct kvm_vcpu *vcpu, 3729 + static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, 3730 + gva_t addr, void *val, unsigned int bytes, 3829 3731 struct x86_exception *exception) 3830 3732 { 3733 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3831 3734 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); 3832 3735 } 3833 3736 3834 - static int kvm_write_guest_virt_system(gva_t addr, void *val, 3737 + static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, 3738 + gva_t addr, void *val, 3835 3739 unsigned int bytes, 3836 - struct kvm_vcpu *vcpu, 3837 3740 struct x86_exception *exception) 3838 3741 { 3742 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3839 3743 void *data = val; 3840 3744 int r = X86EMUL_CONTINUE; 3841 3745 ··· 3869 3761 return r; 3870 3762 } 3871 3763 3872 - static int emulator_read_emulated(unsigned long addr, 3764 + static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, 3765 + unsigned long addr, 3873 3766 void *val, 3874 3767 unsigned int bytes, 3875 - struct x86_exception *exception, 3876 - struct kvm_vcpu *vcpu) 3768 + struct x86_exception *exception) 3877 3769 { 3770 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3878 3771 gpa_t gpa; 3772 + int handled; 3879 3773 3880 3774 if (vcpu->mmio_read_completed) { 3881 3775 memcpy(val, vcpu->mmio_data, bytes); ··· 3896 3786 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3897 3787 goto mmio; 3898 3788 3899 - if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception) 3789 + if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) 3900 3790 == X86EMUL_CONTINUE) 3901 3791 return X86EMUL_CONTINUE; 3902 3792 ··· 3904 3794 /* 3905 3795 * Is this MMIO handled locally? 3906 3796 */ 3907 - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { 3908 - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); 3797 + handled = vcpu_mmio_read(vcpu, gpa, bytes, val); 3798 + 3799 + if (handled == bytes) 3909 3800 return X86EMUL_CONTINUE; 3910 - } 3801 + 3802 + gpa += handled; 3803 + bytes -= handled; 3804 + val += handled; 3911 3805 3912 3806 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3913 3807 3914 3808 vcpu->mmio_needed = 1; 3915 3809 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3916 3810 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3917 - vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3811 + vcpu->mmio_size = bytes; 3812 + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); 3918 3813 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; 3814 + vcpu->mmio_index = 0; 3919 3815 3920 3816 return X86EMUL_IO_NEEDED; 3921 3817 } ··· 3945 3829 struct kvm_vcpu *vcpu) 3946 3830 { 3947 3831 gpa_t gpa; 3832 + int handled; 3948 3833 3949 3834 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); 3950 3835 ··· 3964 3847 /* 3965 3848 * Is this MMIO handled locally? 3966 3849 */ 3967 - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) 3850 + handled = vcpu_mmio_write(vcpu, gpa, bytes, val); 3851 + if (handled == bytes) 3968 3852 return X86EMUL_CONTINUE; 3969 3853 3854 + gpa += handled; 3855 + bytes -= handled; 3856 + val += handled; 3857 + 3970 3858 vcpu->mmio_needed = 1; 3859 + memcpy(vcpu->mmio_data, val, bytes); 3971 3860 vcpu->run->exit_reason = KVM_EXIT_MMIO; 3972 3861 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; 3973 - vcpu->run->mmio.len = vcpu->mmio_size = bytes; 3862 + vcpu->mmio_size = bytes; 3863 + vcpu->run->mmio.len = min(vcpu->mmio_size, 8); 3974 3864 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; 3975 - memcpy(vcpu->run->mmio.data, val, bytes); 3865 + memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); 3866 + vcpu->mmio_index = 0; 3976 3867 3977 3868 return X86EMUL_CONTINUE; 3978 3869 } 3979 3870 3980 - int emulator_write_emulated(unsigned long addr, 3871 + int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, 3872 + unsigned long addr, 3981 3873 const void *val, 3982 3874 unsigned int bytes, 3983 - struct x86_exception *exception, 3984 - struct kvm_vcpu *vcpu) 3875 + struct x86_exception *exception) 3985 3876 { 3877 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3878 + 3986 3879 /* Crossing a page boundary? */ 3987 3880 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3988 3881 int rc, now; ··· 4020 3893 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) 4021 3894 #endif 4022 3895 4023 - static int emulator_cmpxchg_emulated(unsigned long addr, 3896 + static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, 3897 + unsigned long addr, 4024 3898 const void *old, 4025 3899 const void *new, 4026 3900 unsigned int bytes, 4027 - struct x86_exception *exception, 4028 - struct kvm_vcpu *vcpu) 3901 + struct x86_exception *exception) 4029 3902 { 3903 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4030 3904 gpa_t gpa; 4031 3905 struct page *page; 4032 3906 char *kaddr; ··· 4083 3955 emul_write: 4084 3956 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 4085 3957 4086 - return emulator_write_emulated(addr, new, bytes, exception, vcpu); 3958 + return emulator_write_emulated(ctxt, addr, new, bytes, exception); 4087 3959 } 4088 3960 4089 3961 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) ··· 4102 3974 } 4103 3975 4104 3976 4105 - static int emulator_pio_in_emulated(int size, unsigned short port, void *val, 4106 - unsigned int count, struct kvm_vcpu *vcpu) 3977 + static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, 3978 + int size, unsigned short port, void *val, 3979 + unsigned int count) 4107 3980 { 3981 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 3982 + 4108 3983 if (vcpu->arch.pio.count) 4109 3984 goto data_avail; 4110 3985 ··· 4135 4004 return 0; 4136 4005 } 4137 4006 4138 - static int emulator_pio_out_emulated(int size, unsigned short port, 4139 - const void *val, unsigned int count, 4140 - struct kvm_vcpu *vcpu) 4007 + static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, 4008 + int size, unsigned short port, 4009 + const void *val, unsigned int count) 4141 4010 { 4011 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4012 + 4142 4013 trace_kvm_pio(1, port, size, count); 4143 4014 4144 4015 vcpu->arch.pio.port = port; ··· 4170 4037 return kvm_x86_ops->get_segment_base(vcpu, seg); 4171 4038 } 4172 4039 4173 - int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 4040 + static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) 4174 4041 { 4175 - kvm_mmu_invlpg(vcpu, address); 4176 - return X86EMUL_CONTINUE; 4042 + kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); 4177 4043 } 4178 4044 4179 4045 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) ··· 4194 4062 } 4195 4063 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); 4196 4064 4197 - int emulate_clts(struct kvm_vcpu *vcpu) 4065 + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) 4198 4066 { 4199 - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 4200 - kvm_x86_ops->fpu_activate(vcpu); 4201 - return X86EMUL_CONTINUE; 4067 + kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); 4202 4068 } 4203 4069 4204 - int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) 4070 + int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 4205 4071 { 4206 - return _kvm_get_dr(vcpu, dr, dest); 4072 + return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); 4207 4073 } 4208 4074 4209 - int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) 4075 + int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 4210 4076 { 4211 4077 4212 - return __kvm_set_dr(vcpu, dr, value); 4078 + return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); 4213 4079 } 4214 4080 4215 4081 static u64 mk_cr_64(u64 curr_cr, u32 new_val) ··· 4215 4085 return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 4216 4086 } 4217 4087 4218 - static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) 4088 + static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) 4219 4089 { 4090 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4220 4091 unsigned long value; 4221 4092 4222 4093 switch (cr) { ··· 4244 4113 return value; 4245 4114 } 4246 4115 4247 - static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) 4116 + static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) 4248 4117 { 4118 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4249 4119 int res = 0; 4250 4120 4251 4121 switch (cr) { ··· 4273 4141 return res; 4274 4142 } 4275 4143 4276 - static int emulator_get_cpl(struct kvm_vcpu *vcpu) 4144 + static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) 4277 4145 { 4278 - return kvm_x86_ops->get_cpl(vcpu); 4146 + return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); 4279 4147 } 4280 4148 4281 - static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 4149 + static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) 4282 4150 { 4283 - kvm_x86_ops->get_gdt(vcpu, dt); 4151 + kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); 4284 4152 } 4285 4153 4286 - static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) 4154 + static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) 4287 4155 { 4288 - kvm_x86_ops->get_idt(vcpu, dt); 4156 + kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); 4289 4157 } 4290 4158 4291 - static unsigned long emulator_get_cached_segment_base(int seg, 4292 - struct kvm_vcpu *vcpu) 4159 + static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) 4293 4160 { 4294 - return get_segment_base(vcpu, seg); 4161 + kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); 4295 4162 } 4296 4163 4297 - static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, 4298 - int seg, struct kvm_vcpu *vcpu) 4164 + static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) 4165 + { 4166 + kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); 4167 + } 4168 + 4169 + static unsigned long emulator_get_cached_segment_base( 4170 + struct x86_emulate_ctxt *ctxt, int seg) 4171 + { 4172 + return get_segment_base(emul_to_vcpu(ctxt), seg); 4173 + } 4174 + 4175 + static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, 4176 + struct desc_struct *desc, u32 *base3, 4177 + int seg) 4299 4178 { 4300 4179 struct kvm_segment var; 4301 4180 4302 - kvm_get_segment(vcpu, &var, seg); 4181 + kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); 4182 + *selector = var.selector; 4303 4183 4304 4184 if (var.unusable) 4305 4185 return false; ··· 4336 4192 return true; 4337 4193 } 4338 4194 4339 - static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, 4340 - int seg, struct kvm_vcpu *vcpu) 4195 + static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, 4196 + struct desc_struct *desc, u32 base3, 4197 + int seg) 4341 4198 { 4199 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4342 4200 struct kvm_segment var; 4343 4201 4344 - /* needed to preserve selector */ 4345 - kvm_get_segment(vcpu, &var, seg); 4346 - 4202 + var.selector = selector; 4347 4203 var.base = get_desc_base(desc); 4348 4204 #ifdef CONFIG_X86_64 4349 4205 var.base |= ((u64)base3) << 32; ··· 4367 4223 return; 4368 4224 } 4369 4225 4370 - static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) 4226 + static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, 4227 + u32 msr_index, u64 *pdata) 4371 4228 { 4372 - struct kvm_segment kvm_seg; 4373 - 4374 - kvm_get_segment(vcpu, &kvm_seg, seg); 4375 - return kvm_seg.selector; 4229 + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); 4376 4230 } 4377 4231 4378 - static void emulator_set_segment_selector(u16 sel, int seg, 4379 - struct kvm_vcpu *vcpu) 4232 + static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, 4233 + u32 msr_index, u64 data) 4380 4234 { 4381 - struct kvm_segment kvm_seg; 4235 + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); 4236 + } 4382 4237 4383 - kvm_get_segment(vcpu, &kvm_seg, seg); 4384 - kvm_seg.selector = sel; 4385 - kvm_set_segment(vcpu, &kvm_seg, seg); 4238 + static void emulator_halt(struct x86_emulate_ctxt *ctxt) 4239 + { 4240 + emul_to_vcpu(ctxt)->arch.halt_request = 1; 4241 + } 4242 + 4243 + static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) 4244 + { 4245 + preempt_disable(); 4246 + kvm_load_guest_fpu(emul_to_vcpu(ctxt)); 4247 + /* 4248 + * CR0.TS may reference the host fpu state, not the guest fpu state, 4249 + * so it may be clear at this point. 4250 + */ 4251 + clts(); 4252 + } 4253 + 4254 + static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) 4255 + { 4256 + preempt_enable(); 4257 + } 4258 + 4259 + static int emulator_intercept(struct x86_emulate_ctxt *ctxt, 4260 + struct x86_instruction_info *info, 4261 + enum x86_intercept_stage stage) 4262 + { 4263 + return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); 4386 4264 } 4387 4265 4388 4266 static struct x86_emulate_ops emulate_ops = { ··· 4414 4248 .read_emulated = emulator_read_emulated, 4415 4249 .write_emulated = emulator_write_emulated, 4416 4250 .cmpxchg_emulated = emulator_cmpxchg_emulated, 4251 + .invlpg = emulator_invlpg, 4417 4252 .pio_in_emulated = emulator_pio_in_emulated, 4418 4253 .pio_out_emulated = emulator_pio_out_emulated, 4419 - .get_cached_descriptor = emulator_get_cached_descriptor, 4420 - .set_cached_descriptor = emulator_set_cached_descriptor, 4421 - .get_segment_selector = emulator_get_segment_selector, 4422 - .set_segment_selector = emulator_set_segment_selector, 4254 + .get_segment = emulator_get_segment, 4255 + .set_segment = emulator_set_segment, 4423 4256 .get_cached_segment_base = emulator_get_cached_segment_base, 4424 4257 .get_gdt = emulator_get_gdt, 4425 4258 .get_idt = emulator_get_idt, 4259 + .set_gdt = emulator_set_gdt, 4260 + .set_idt = emulator_set_idt, 4426 4261 .get_cr = emulator_get_cr, 4427 4262 .set_cr = emulator_set_cr, 4428 4263 .cpl = emulator_get_cpl, 4429 4264 .get_dr = emulator_get_dr, 4430 4265 .set_dr = emulator_set_dr, 4431 - .set_msr = kvm_set_msr, 4432 - .get_msr = kvm_get_msr, 4266 + .set_msr = emulator_set_msr, 4267 + .get_msr = emulator_get_msr, 4268 + .halt = emulator_halt, 4269 + .wbinvd = emulator_wbinvd, 4270 + .fix_hypercall = emulator_fix_hypercall, 4271 + .get_fpu = emulator_get_fpu, 4272 + .put_fpu = emulator_put_fpu, 4273 + .intercept = emulator_intercept, 4433 4274 }; 4434 4275 4435 4276 static void cache_all_regs(struct kvm_vcpu *vcpu) ··· 4478 4305 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4479 4306 int cs_db, cs_l; 4480 4307 4308 + /* 4309 + * TODO: fix emulate.c to use guest_read/write_register 4310 + * instead of direct ->regs accesses, can save hundred cycles 4311 + * on Intel for instructions that don't read/change RSP, for 4312 + * for example. 4313 + */ 4481 4314 cache_all_regs(vcpu); 4482 4315 4483 4316 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 4484 4317 4485 - vcpu->arch.emulate_ctxt.vcpu = vcpu; 4486 - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); 4318 + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 4487 4319 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); 4488 4320 vcpu->arch.emulate_ctxt.mode = 4489 4321 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : ··· 4496 4318 ? X86EMUL_MODE_VM86 : cs_l 4497 4319 ? X86EMUL_MODE_PROT64 : cs_db 4498 4320 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 4321 + vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); 4499 4322 memset(c, 0, sizeof(struct decode_cache)); 4500 4323 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4324 + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; 4501 4325 } 4502 4326 4503 - int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) 4327 + int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) 4504 4328 { 4505 4329 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4506 4330 int ret; ··· 4511 4331 4512 4332 vcpu->arch.emulate_ctxt.decode.op_bytes = 2; 4513 4333 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; 4514 - vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; 4334 + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + 4335 + inc_eip; 4515 4336 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); 4516 4337 4517 4338 if (ret != X86EMUL_CONTINUE) ··· 4521 4340 vcpu->arch.emulate_ctxt.eip = c->eip; 4522 4341 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4523 4342 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4524 - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4343 + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4525 4344 4526 4345 if (irq == NMI_VECTOR) 4527 4346 vcpu->arch.nmi_pending = false; ··· 4583 4402 { 4584 4403 int r; 4585 4404 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4405 + bool writeback = true; 4586 4406 4587 4407 kvm_clear_exception_queue(vcpu); 4588 - vcpu->arch.mmio_fault_cr2 = cr2; 4589 - /* 4590 - * TODO: fix emulate.c to use guest_read/write_register 4591 - * instead of direct ->regs accesses, can save hundred cycles 4592 - * on Intel for instructions that don't read/change RSP, for 4593 - * for example. 4594 - */ 4595 - cache_all_regs(vcpu); 4596 4408 4597 4409 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4598 4410 init_emulate_ctxt(vcpu); ··· 4616 4442 return EMULATE_DONE; 4617 4443 } 4618 4444 4619 - /* this is needed for vmware backdor interface to work since it 4445 + /* this is needed for vmware backdoor interface to work since it 4620 4446 changes registers values during IO operation */ 4621 - memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4447 + if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { 4448 + vcpu->arch.emulate_regs_need_sync_from_vcpu = false; 4449 + memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); 4450 + } 4622 4451 4623 4452 restart: 4624 4453 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); 4454 + 4455 + if (r == EMULATION_INTERCEPTED) 4456 + return EMULATE_DONE; 4625 4457 4626 4458 if (r == EMULATION_FAILED) { 4627 4459 if (reexecute_instruction(vcpu, cr2)) ··· 4642 4462 } else if (vcpu->arch.pio.count) { 4643 4463 if (!vcpu->arch.pio.in) 4644 4464 vcpu->arch.pio.count = 0; 4465 + else 4466 + writeback = false; 4645 4467 r = EMULATE_DO_MMIO; 4646 4468 } else if (vcpu->mmio_needed) { 4647 - if (vcpu->mmio_is_write) 4648 - vcpu->mmio_needed = 0; 4469 + if (!vcpu->mmio_is_write) 4470 + writeback = false; 4649 4471 r = EMULATE_DO_MMIO; 4650 4472 } else if (r == EMULATION_RESTART) 4651 4473 goto restart; 4652 4474 else 4653 4475 r = EMULATE_DONE; 4654 4476 4655 - toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); 4656 - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4657 - kvm_make_request(KVM_REQ_EVENT, vcpu); 4658 - memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4659 - kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4477 + if (writeback) { 4478 + toggle_interruptibility(vcpu, 4479 + vcpu->arch.emulate_ctxt.interruptibility); 4480 + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 4481 + kvm_make_request(KVM_REQ_EVENT, vcpu); 4482 + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 4483 + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 4484 + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 4485 + } else 4486 + vcpu->arch.emulate_regs_need_sync_to_vcpu = true; 4660 4487 4661 4488 return r; 4662 4489 } ··· 4672 4485 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) 4673 4486 { 4674 4487 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); 4675 - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); 4488 + int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, 4489 + size, port, &val, 1); 4676 4490 /* do not return to emulator after return from userspace */ 4677 4491 vcpu->arch.pio.count = 0; 4678 4492 return ret; ··· 5067 4879 } 5068 4880 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 5069 4881 5070 - int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 4882 + int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) 5071 4883 { 4884 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 5072 4885 char instruction[3]; 5073 4886 unsigned long rip = kvm_rip_read(vcpu); 5074 4887 ··· 5082 4893 5083 4894 kvm_x86_ops->patch_hypercall(vcpu, instruction); 5084 4895 5085 - return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); 5086 - } 5087 - 5088 - void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 5089 - { 5090 - struct desc_ptr dt = { limit, base }; 5091 - 5092 - kvm_x86_ops->set_gdt(vcpu, &dt); 5093 - } 5094 - 5095 - void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 5096 - { 5097 - struct desc_ptr dt = { limit, base }; 5098 - 5099 - kvm_x86_ops->set_idt(vcpu, &dt); 4896 + return emulator_write_emulated(&vcpu->arch.emulate_ctxt, 4897 + rip, instruction, 3, NULL); 5100 4898 } 5101 4899 5102 4900 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) ··· 5346 5170 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 5347 5171 { 5348 5172 int r; 5173 + bool nmi_pending; 5349 5174 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 5350 5175 vcpu->run->request_interrupt_window; 5351 5176 ··· 5384 5207 r = 1; 5385 5208 goto out; 5386 5209 } 5387 - if (kvm_check_request(KVM_REQ_NMI, vcpu)) 5388 - vcpu->arch.nmi_pending = true; 5389 5210 } 5390 5211 5391 5212 r = kvm_mmu_reload(vcpu); 5392 5213 if (unlikely(r)) 5393 5214 goto out; 5394 5215 5216 + /* 5217 + * An NMI can be injected between local nmi_pending read and 5218 + * vcpu->arch.nmi_pending read inside inject_pending_event(). 5219 + * But in that case, KVM_REQ_EVENT will be set, which makes 5220 + * the race described above benign. 5221 + */ 5222 + nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); 5223 + 5395 5224 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 5396 5225 inject_pending_event(vcpu); 5397 5226 5398 5227 /* enable NMI/IRQ window open exits if needed */ 5399 - if (vcpu->arch.nmi_pending) 5228 + if (nmi_pending) 5400 5229 kvm_x86_ops->enable_nmi_window(vcpu); 5401 5230 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 5402 5231 kvm_x86_ops->enable_irq_window(vcpu); ··· 5582 5399 return r; 5583 5400 } 5584 5401 5402 + static int complete_mmio(struct kvm_vcpu *vcpu) 5403 + { 5404 + struct kvm_run *run = vcpu->run; 5405 + int r; 5406 + 5407 + if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) 5408 + return 1; 5409 + 5410 + if (vcpu->mmio_needed) { 5411 + vcpu->mmio_needed = 0; 5412 + if (!vcpu->mmio_is_write) 5413 + memcpy(vcpu->mmio_data + vcpu->mmio_index, 5414 + run->mmio.data, 8); 5415 + vcpu->mmio_index += 8; 5416 + if (vcpu->mmio_index < vcpu->mmio_size) { 5417 + run->exit_reason = KVM_EXIT_MMIO; 5418 + run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; 5419 + memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); 5420 + run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); 5421 + run->mmio.is_write = vcpu->mmio_is_write; 5422 + vcpu->mmio_needed = 1; 5423 + return 0; 5424 + } 5425 + if (vcpu->mmio_is_write) 5426 + return 1; 5427 + vcpu->mmio_read_completed = 1; 5428 + } 5429 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5430 + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 5431 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5432 + if (r != EMULATE_DONE) 5433 + return 0; 5434 + return 1; 5435 + } 5436 + 5585 5437 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 5586 5438 { 5587 5439 int r; ··· 5643 5425 } 5644 5426 } 5645 5427 5646 - if (vcpu->arch.pio.count || vcpu->mmio_needed) { 5647 - if (vcpu->mmio_needed) { 5648 - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 5649 - vcpu->mmio_read_completed = 1; 5650 - vcpu->mmio_needed = 0; 5651 - } 5652 - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5653 - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); 5654 - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5655 - if (r != EMULATE_DONE) { 5656 - r = 0; 5657 - goto out; 5658 - } 5659 - } 5428 + r = complete_mmio(vcpu); 5429 + if (r <= 0) 5430 + goto out; 5431 + 5660 5432 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 5661 5433 kvm_register_write(vcpu, VCPU_REGS_RAX, 5662 5434 kvm_run->hypercall.ret); ··· 5663 5455 5664 5456 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 5665 5457 { 5458 + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { 5459 + /* 5460 + * We are here if userspace calls get_regs() in the middle of 5461 + * instruction emulation. Registers state needs to be copied 5462 + * back from emulation context to vcpu. Usrapace shouldn't do 5463 + * that usually, but some bad designed PV devices (vmware 5464 + * backdoor interface) need this to work 5465 + */ 5466 + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 5467 + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 5468 + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 5469 + } 5666 5470 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 5667 5471 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 5668 5472 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); ··· 5702 5482 5703 5483 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 5704 5484 { 5485 + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; 5486 + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 5487 + 5705 5488 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 5706 5489 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 5707 5490 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); ··· 5815 5592 5816 5593 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); 5817 5594 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); 5818 - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 5595 + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 5819 5596 kvm_make_request(KVM_REQ_EVENT, vcpu); 5820 5597 return EMULATE_DONE; 5821 5598 } ··· 6197 5974 } 6198 5975 vcpu->arch.pio_data = page_address(page); 6199 5976 6200 - if (!kvm->arch.virtual_tsc_khz) 6201 - kvm_arch_set_tsc_khz(kvm, max_tsc_khz); 5977 + kvm_init_tsc_catchup(vcpu, max_tsc_khz); 6202 5978 6203 5979 r = kvm_mmu_create(vcpu); 6204 5980 if (r < 0)
+1 -1
arch/x86/kvm/x86.h
··· 77 77 78 78 void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 79 79 void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 80 - int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80 + int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); 81 81 82 82 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); 83 83
+6
include/linux/kvm.h
··· 541 541 #define KVM_CAP_PPC_GET_PVINFO 57 542 542 #define KVM_CAP_PPC_IRQ_LEVEL 58 543 543 #define KVM_CAP_ASYNC_PF 59 544 + #define KVM_CAP_TSC_CONTROL 60 545 + #define KVM_CAP_GET_TSC_KHZ 61 546 + #define KVM_CAP_PPC_BOOKE_SREGS 62 544 547 545 548 #ifdef KVM_CAP_IRQ_ROUTING 546 549 ··· 680 677 #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) 681 678 /* Available with KVM_CAP_PPC_GET_PVINFO */ 682 679 #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) 680 + /* Available with KVM_CAP_TSC_CONTROL */ 681 + #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) 682 + #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) 683 683 684 684 /* 685 685 * ioctls for vcpu fds
+24 -6
include/linux/kvm_host.h
··· 27 27 28 28 #include <asm/kvm_host.h> 29 29 30 + #ifndef KVM_MMIO_SIZE 31 + #define KVM_MMIO_SIZE 8 32 + #endif 33 + 30 34 /* 31 35 * vcpu->requests bit members 32 36 */ ··· 47 43 #define KVM_REQ_DEACTIVATE_FPU 10 48 44 #define KVM_REQ_EVENT 11 49 45 #define KVM_REQ_APF_HALT 12 50 - #define KVM_REQ_NMI 13 51 46 52 47 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 53 48 ··· 136 133 int mmio_read_completed; 137 134 int mmio_is_write; 138 135 int mmio_size; 139 - unsigned char mmio_data[8]; 136 + int mmio_index; 137 + unsigned char mmio_data[KVM_MMIO_SIZE]; 140 138 gpa_t mmio_phys_addr; 141 139 #endif 142 140 ··· 296 292 } 297 293 298 294 #define kvm_for_each_vcpu(idx, vcpup, kvm) \ 299 - for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ 300 - idx < atomic_read(&kvm->online_vcpus) && vcpup; \ 301 - vcpup = kvm_get_vcpu(kvm, ++idx)) 295 + for (idx = 0; \ 296 + idx < atomic_read(&kvm->online_vcpus) && \ 297 + (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ 298 + idx++) 302 299 303 300 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); 304 301 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); ··· 370 365 bool *writable); 371 366 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 372 367 struct kvm_memory_slot *slot, gfn_t gfn); 373 - int memslot_id(struct kvm *kvm, gfn_t gfn); 374 368 void kvm_release_pfn_dirty(pfn_t); 375 369 void kvm_release_pfn_clean(pfn_t pfn); 376 370 void kvm_set_pfn_dirty(pfn_t pfn); ··· 591 587 592 588 static inline void kvm_guest_enter(void) 593 589 { 590 + BUG_ON(preemptible()); 594 591 account_system_vtime(current); 595 592 current->flags |= PF_VCPU; 593 + /* KVM does not hold any references to rcu protected data when it 594 + * switches CPU into a guest mode. In fact switching to a guest mode 595 + * is very similar to exiting to userspase from rcu point of view. In 596 + * addition CPU may stay in a guest mode for quite a long time (up to 597 + * one time slice). Lets treat guest mode as quiescent state, just like 598 + * we do with user-mode execution. 599 + */ 600 + rcu_virt_note_context_switch(smp_processor_id()); 596 601 } 597 602 598 603 static inline void kvm_guest_exit(void) 599 604 { 600 605 account_system_vtime(current); 601 606 current->flags &= ~PF_VCPU; 607 + } 608 + 609 + static inline int memslot_id(struct kvm *kvm, gfn_t gfn) 610 + { 611 + return gfn_to_memslot(kvm, gfn)->id; 602 612 } 603 613 604 614 static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+1 -1
virt/kvm/ioapic.c
··· 167 167 168 168 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 169 169 "vector=%x trig_mode=%x\n", 170 - entry->fields.dest, entry->fields.dest_mode, 170 + entry->fields.dest_id, entry->fields.dest_mode, 171 171 entry->fields.delivery_mode, entry->fields.vector, 172 172 entry->fields.trig_mode); 173 173
+6 -20
virt/kvm/kvm_main.c
··· 467 467 if (!kvm->buses[i]) 468 468 goto out_err; 469 469 } 470 + spin_lock_init(&kvm->mmu_lock); 470 471 471 472 r = kvm_init_mmu_notifier(kvm); 472 473 if (r) ··· 475 474 476 475 kvm->mm = current->mm; 477 476 atomic_inc(&kvm->mm->mm_count); 478 - spin_lock_init(&kvm->mmu_lock); 479 477 kvm_eventfd_init(kvm); 480 478 mutex_init(&kvm->lock); 481 479 mutex_init(&kvm->irq_lock); ··· 648 648 goto out; 649 649 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 650 650 goto out; 651 - if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 651 + /* We can read the guest memory with __xxx_user() later on. */ 652 + if (user_alloc && 653 + ((mem->userspace_addr & (PAGE_SIZE - 1)) || 654 + !access_ok(VERIFY_WRITE, mem->userspace_addr, mem->memory_size))) 652 655 goto out; 653 656 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 654 657 goto out; ··· 999 996 return size; 1000 997 } 1001 998 1002 - int memslot_id(struct kvm *kvm, gfn_t gfn) 1003 - { 1004 - int i; 1005 - struct kvm_memslots *slots = kvm_memslots(kvm); 1006 - struct kvm_memory_slot *memslot = NULL; 1007 - 1008 - for (i = 0; i < slots->nmemslots; ++i) { 1009 - memslot = &slots->memslots[i]; 1010 - 1011 - if (gfn >= memslot->base_gfn 1012 - && gfn < memslot->base_gfn + memslot->npages) 1013 - break; 1014 - } 1015 - 1016 - return memslot - slots->memslots; 1017 - } 1018 - 1019 999 static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, 1020 1000 gfn_t *nr_pages) 1021 1001 { ··· 1286 1300 addr = gfn_to_hva(kvm, gfn); 1287 1301 if (kvm_is_error_hva(addr)) 1288 1302 return -EFAULT; 1289 - r = copy_from_user(data, (void __user *)addr + offset, len); 1303 + r = __copy_from_user(data, (void __user *)addr + offset, len); 1290 1304 if (r) 1291 1305 return -EFAULT; 1292 1306 return 0;