Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits)
KVM: Remove unused 'instruction_length'
KVM: Don't require explicit indication of completion of mmio or pio
KVM: Remove extraneous guest entry on mmio read
KVM: SVM: Only save/restore MSRs when needed
KVM: fix an if() condition
KVM: VMX: Add lazy FPU support for VT
KVM: VMX: Properly shadow the CR0 register in the vcpu struct
KVM: Don't complain about cpu erratum AA15
KVM: Lazy FPU support for SVM
KVM: Allow passing 64-bit values to the emulated read/write API
KVM: Per-vcpu statistics
KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles
KVM: MMU: Avoid heavy ASSERT at non debug mode.
KVM: VMX: Only save/restore MSR_K6_STAR if necessary
KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c
KVM: VMX: Don't switch 64-bit msrs for 32-bit guests
KVM: VMX: Reduce unnecessary saving of host msrs
KVM: Handle guest page faults when emulating mmio
KVM: SVM: Report hardware exit reason to userspace instead of dmesg
KVM: Retry sleeping allocation if atomic allocation fails
...

+1301 -478
+68 -32
drivers/kvm/kvm.h
··· 51 51 #define UNMAPPED_GVA (~(gpa_t)0) 52 52 53 53 #define KVM_MAX_VCPUS 1 54 + #define KVM_ALIAS_SLOTS 4 54 55 #define KVM_MEMORY_SLOTS 4 55 56 #define KVM_NUM_MMU_PAGES 256 56 57 #define KVM_MIN_FREE_MMU_PAGES 5 57 58 #define KVM_REFILL_PAGES 25 59 + #define KVM_MAX_CPUID_ENTRIES 40 58 60 59 61 #define FX_IMAGE_SIZE 512 60 62 #define FX_IMAGE_ALIGN 16 61 63 #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) 62 64 63 65 #define DE_VECTOR 0 66 + #define NM_VECTOR 7 64 67 #define DF_VECTOR 8 65 68 #define TS_VECTOR 10 66 69 #define NP_VECTOR 11 ··· 75 72 #define SELECTOR_RPL_MASK 0x03 76 73 77 74 #define IOPL_SHIFT 12 75 + 76 + #define KVM_PIO_PAGE_OFFSET 1 78 77 79 78 /* 80 79 * Address types: ··· 111 106 * bits 4:7 - page table level for this shadow (1-4) 112 107 * bits 8:9 - page table quadrant for 2-level guests 113 108 * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) 109 + * bits 17:18 - "access" - the user and writable bits of a huge page pde 114 110 */ 115 111 union kvm_mmu_page_role { 116 112 unsigned word; ··· 121 115 unsigned quadrant : 2; 122 116 unsigned pad_for_nice_hex_output : 6; 123 117 unsigned metaphysical : 1; 118 + unsigned hugepage_access : 2; 124 119 }; 125 120 }; 126 121 ··· 140 133 unsigned long slot_bitmap; /* One bit set per slot which has memory 141 134 * in this shadow page. 142 135 */ 143 - int global; /* Set if all ptes in this page are global */ 144 136 int multimapped; /* More than one parent_pte? */ 145 137 int root_count; /* Currently serving as active root */ 146 138 union { ··· 225 219 VCPU_SREG_LDTR, 226 220 }; 227 221 222 + struct kvm_pio_request { 223 + unsigned long count; 224 + int cur_count; 225 + struct page *guest_pages[2]; 226 + unsigned guest_page_offset; 227 + int in; 228 + int size; 229 + int string; 230 + int down; 231 + int rep; 232 + }; 233 + 234 + struct kvm_stat { 235 + u32 pf_fixed; 236 + u32 pf_guest; 237 + u32 tlb_flush; 238 + u32 invlpg; 239 + 240 + u32 exits; 241 + u32 io_exits; 242 + u32 mmio_exits; 243 + u32 signal_exits; 244 + u32 irq_window_exits; 245 + u32 halt_exits; 246 + u32 request_irq_exits; 247 + u32 irq_exits; 248 + }; 249 + 228 250 struct kvm_vcpu { 229 251 struct kvm *kvm; 230 252 union { ··· 262 228 struct mutex mutex; 263 229 int cpu; 264 230 int launched; 231 + u64 host_tsc; 232 + struct kvm_run *run; 265 233 int interrupt_window_open; 266 234 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ 267 235 #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) ··· 302 266 char fx_buf[FX_BUF_SIZE]; 303 267 char *host_fx_image; 304 268 char *guest_fx_image; 269 + int fpu_active; 305 270 306 271 int mmio_needed; 307 272 int mmio_read_completed; ··· 310 273 int mmio_size; 311 274 unsigned char mmio_data[8]; 312 275 gpa_t mmio_phys_addr; 276 + gva_t mmio_fault_cr2; 277 + struct kvm_pio_request pio; 278 + void *pio_data; 279 + 280 + int sigset_active; 281 + sigset_t sigset; 282 + 283 + struct kvm_stat stat; 313 284 314 285 struct { 315 286 int active; ··· 329 284 u32 ar; 330 285 } tr, es, ds, fs, gs; 331 286 } rmode; 287 + 288 + int cpuid_nent; 289 + struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; 290 + }; 291 + 292 + struct kvm_mem_alias { 293 + gfn_t base_gfn; 294 + unsigned long npages; 295 + gfn_t target_gfn; 332 296 }; 333 297 334 298 struct kvm_memory_slot { ··· 350 296 351 297 struct kvm { 352 298 spinlock_t lock; /* protects everything except vcpus */ 299 + int naliases; 300 + struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 353 301 int nmemslots; 354 302 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; 355 303 /* ··· 366 310 unsigned long rmap_overflow; 367 311 struct list_head vm_list; 368 312 struct file *filp; 369 - }; 370 - 371 - struct kvm_stat { 372 - u32 pf_fixed; 373 - u32 pf_guest; 374 - u32 tlb_flush; 375 - u32 invlpg; 376 - 377 - u32 exits; 378 - u32 io_exits; 379 - u32 mmio_exits; 380 - u32 signal_exits; 381 - u32 irq_window_exits; 382 - u32 halt_exits; 383 - u32 request_irq_exits; 384 - u32 irq_exits; 385 313 }; 386 314 387 315 struct descriptor_table { ··· 398 358 void (*set_segment)(struct kvm_vcpu *vcpu, 399 359 struct kvm_segment *var, int seg); 400 360 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 401 - void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu); 361 + void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); 402 362 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); 403 - void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, 404 - unsigned long cr0); 405 363 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 406 364 void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); 407 365 void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); ··· 429 391 unsigned char *hypercall_addr); 430 392 }; 431 393 432 - extern struct kvm_stat kvm_stat; 433 394 extern struct kvm_arch_ops *kvm_arch_ops; 434 395 435 396 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) ··· 437 400 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); 438 401 void kvm_exit_arch(void); 439 402 403 + int kvm_mmu_module_init(void); 404 + void kvm_mmu_module_exit(void); 405 + 440 406 void kvm_mmu_destroy(struct kvm_vcpu *vcpu); 441 407 int kvm_mmu_create(struct kvm_vcpu *vcpu); 442 408 int kvm_mmu_setup(struct kvm_vcpu *vcpu); 443 409 444 410 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 445 411 void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); 412 + void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); 446 413 447 414 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); 448 415 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 449 416 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) 450 417 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } 451 418 hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); 419 + struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 452 420 453 421 void kvm_emulator_want_group7_invlpg(void); 454 422 455 423 extern hpa_t bad_page_address; 456 424 457 - static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn) 458 - { 459 - return slot->phys_mem[gfn - slot->base_gfn]; 460 - } 461 - 425 + struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 462 426 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 463 427 void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 464 428 ··· 482 444 483 445 struct x86_emulate_ctxt; 484 446 447 + int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 448 + int size, unsigned long count, int string, int down, 449 + gva_t address, int rep, unsigned port); 450 + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 485 451 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 486 452 int emulate_clts(struct kvm_vcpu *vcpu); 487 453 int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, ··· 533 491 if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) 534 492 kvm_mmu_free_some_pages(vcpu); 535 493 return vcpu->mmu.page_fault(vcpu, gva, error_code); 536 - } 537 - 538 - static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) 539 - { 540 - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 541 - return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL; 542 494 } 543 495 544 496 static inline int is_long_mode(struct kvm_vcpu *vcpu)
+675 -117
drivers/kvm/kvm_main.c
··· 51 51 static LIST_HEAD(vm_list); 52 52 53 53 struct kvm_arch_ops *kvm_arch_ops; 54 - struct kvm_stat kvm_stat; 55 - EXPORT_SYMBOL_GPL(kvm_stat); 54 + 55 + #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) 56 56 57 57 static struct kvm_stats_debugfs_item { 58 58 const char *name; 59 - u32 *data; 59 + int offset; 60 60 struct dentry *dentry; 61 61 } debugfs_entries[] = { 62 - { "pf_fixed", &kvm_stat.pf_fixed }, 63 - { "pf_guest", &kvm_stat.pf_guest }, 64 - { "tlb_flush", &kvm_stat.tlb_flush }, 65 - { "invlpg", &kvm_stat.invlpg }, 66 - { "exits", &kvm_stat.exits }, 67 - { "io_exits", &kvm_stat.io_exits }, 68 - { "mmio_exits", &kvm_stat.mmio_exits }, 69 - { "signal_exits", &kvm_stat.signal_exits }, 70 - { "irq_window", &kvm_stat.irq_window_exits }, 71 - { "halt_exits", &kvm_stat.halt_exits }, 72 - { "request_irq", &kvm_stat.request_irq_exits }, 73 - { "irq_exits", &kvm_stat.irq_exits }, 74 - { NULL, NULL } 62 + { "pf_fixed", STAT_OFFSET(pf_fixed) }, 63 + { "pf_guest", STAT_OFFSET(pf_guest) }, 64 + { "tlb_flush", STAT_OFFSET(tlb_flush) }, 65 + { "invlpg", STAT_OFFSET(invlpg) }, 66 + { "exits", STAT_OFFSET(exits) }, 67 + { "io_exits", STAT_OFFSET(io_exits) }, 68 + { "mmio_exits", STAT_OFFSET(mmio_exits) }, 69 + { "signal_exits", STAT_OFFSET(signal_exits) }, 70 + { "irq_window", STAT_OFFSET(irq_window_exits) }, 71 + { "halt_exits", STAT_OFFSET(halt_exits) }, 72 + { "request_irq", STAT_OFFSET(request_irq_exits) }, 73 + { "irq_exits", STAT_OFFSET(irq_exits) }, 74 + { NULL } 75 75 }; 76 76 77 77 static struct dentry *debugfs_dir; ··· 346 346 kvm_free_physmem_slot(&kvm->memslots[i], NULL); 347 347 } 348 348 349 + static void free_pio_guest_pages(struct kvm_vcpu *vcpu) 350 + { 351 + int i; 352 + 353 + for (i = 0; i < 2; ++i) 354 + if (vcpu->pio.guest_pages[i]) { 355 + __free_page(vcpu->pio.guest_pages[i]); 356 + vcpu->pio.guest_pages[i] = NULL; 357 + } 358 + } 359 + 349 360 static void kvm_free_vcpu(struct kvm_vcpu *vcpu) 350 361 { 351 362 if (!vcpu->vmcs) ··· 366 355 kvm_mmu_destroy(vcpu); 367 356 vcpu_put(vcpu); 368 357 kvm_arch_ops->vcpu_free(vcpu); 358 + free_page((unsigned long)vcpu->run); 359 + vcpu->run = NULL; 360 + free_page((unsigned long)vcpu->pio_data); 361 + vcpu->pio_data = NULL; 362 + free_pio_guest_pages(vcpu); 369 363 } 370 364 371 365 static void kvm_free_vcpus(struct kvm *kvm) ··· 420 404 u64 pdpte; 421 405 u64 *pdpt; 422 406 int ret; 423 - struct kvm_memory_slot *memslot; 407 + struct page *page; 424 408 425 409 spin_lock(&vcpu->kvm->lock); 426 - memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn); 427 - /* FIXME: !memslot - emulate? 0xff? */ 428 - pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); 410 + page = gfn_to_page(vcpu->kvm, pdpt_gfn); 411 + /* FIXME: !page - emulate? 0xff? */ 412 + pdpt = kmap_atomic(page, KM_USER0); 429 413 430 414 ret = 1; 431 415 for (i = 0; i < 4; ++i) { ··· 510 494 511 495 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 512 496 { 513 - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); 514 497 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); 515 498 } 516 499 EXPORT_SYMBOL_GPL(lmsw); ··· 845 830 return r; 846 831 } 847 832 848 - struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 833 + /* 834 + * Set a new alias region. Aliases map a portion of physical memory into 835 + * another portion. This is useful for memory windows, for example the PC 836 + * VGA region. 837 + */ 838 + static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, 839 + struct kvm_memory_alias *alias) 840 + { 841 + int r, n; 842 + struct kvm_mem_alias *p; 843 + 844 + r = -EINVAL; 845 + /* General sanity checks */ 846 + if (alias->memory_size & (PAGE_SIZE - 1)) 847 + goto out; 848 + if (alias->guest_phys_addr & (PAGE_SIZE - 1)) 849 + goto out; 850 + if (alias->slot >= KVM_ALIAS_SLOTS) 851 + goto out; 852 + if (alias->guest_phys_addr + alias->memory_size 853 + < alias->guest_phys_addr) 854 + goto out; 855 + if (alias->target_phys_addr + alias->memory_size 856 + < alias->target_phys_addr) 857 + goto out; 858 + 859 + spin_lock(&kvm->lock); 860 + 861 + p = &kvm->aliases[alias->slot]; 862 + p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 863 + p->npages = alias->memory_size >> PAGE_SHIFT; 864 + p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 865 + 866 + for (n = KVM_ALIAS_SLOTS; n > 0; --n) 867 + if (kvm->aliases[n - 1].npages) 868 + break; 869 + kvm->naliases = n; 870 + 871 + spin_unlock(&kvm->lock); 872 + 873 + vcpu_load(&kvm->vcpus[0]); 874 + spin_lock(&kvm->lock); 875 + kvm_mmu_zap_all(&kvm->vcpus[0]); 876 + spin_unlock(&kvm->lock); 877 + vcpu_put(&kvm->vcpus[0]); 878 + 879 + return 0; 880 + 881 + out: 882 + return r; 883 + } 884 + 885 + static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 886 + { 887 + int i; 888 + struct kvm_mem_alias *alias; 889 + 890 + for (i = 0; i < kvm->naliases; ++i) { 891 + alias = &kvm->aliases[i]; 892 + if (gfn >= alias->base_gfn 893 + && gfn < alias->base_gfn + alias->npages) 894 + return alias->target_gfn + gfn - alias->base_gfn; 895 + } 896 + return gfn; 897 + } 898 + 899 + static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 849 900 { 850 901 int i; 851 902 ··· 924 843 } 925 844 return NULL; 926 845 } 927 - EXPORT_SYMBOL_GPL(gfn_to_memslot); 846 + 847 + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 848 + { 849 + gfn = unalias_gfn(kvm, gfn); 850 + return __gfn_to_memslot(kvm, gfn); 851 + } 852 + 853 + struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 854 + { 855 + struct kvm_memory_slot *slot; 856 + 857 + gfn = unalias_gfn(kvm, gfn); 858 + slot = __gfn_to_memslot(kvm, gfn); 859 + if (!slot) 860 + return NULL; 861 + return slot->phys_mem[gfn - slot->base_gfn]; 862 + } 863 + EXPORT_SYMBOL_GPL(gfn_to_page); 928 864 929 865 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 930 866 { ··· 969 871 } 970 872 971 873 static int emulator_read_std(unsigned long addr, 972 - unsigned long *val, 874 + void *val, 973 875 unsigned int bytes, 974 876 struct x86_emulate_ctxt *ctxt) 975 877 { ··· 981 883 unsigned offset = addr & (PAGE_SIZE-1); 982 884 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); 983 885 unsigned long pfn; 984 - struct kvm_memory_slot *memslot; 985 - void *page; 886 + struct page *page; 887 + void *page_virt; 986 888 987 889 if (gpa == UNMAPPED_GVA) 988 890 return X86EMUL_PROPAGATE_FAULT; 989 891 pfn = gpa >> PAGE_SHIFT; 990 - memslot = gfn_to_memslot(vcpu->kvm, pfn); 991 - if (!memslot) 892 + page = gfn_to_page(vcpu->kvm, pfn); 893 + if (!page) 992 894 return X86EMUL_UNHANDLEABLE; 993 - page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0); 895 + page_virt = kmap_atomic(page, KM_USER0); 994 896 995 - memcpy(data, page + offset, tocopy); 897 + memcpy(data, page_virt + offset, tocopy); 996 898 997 - kunmap_atomic(page, KM_USER0); 899 + kunmap_atomic(page_virt, KM_USER0); 998 900 999 901 bytes -= tocopy; 1000 902 data += tocopy; ··· 1005 907 } 1006 908 1007 909 static int emulator_write_std(unsigned long addr, 1008 - unsigned long val, 910 + const void *val, 1009 911 unsigned int bytes, 1010 912 struct x86_emulate_ctxt *ctxt) 1011 913 { ··· 1015 917 } 1016 918 1017 919 static int emulator_read_emulated(unsigned long addr, 1018 - unsigned long *val, 920 + void *val, 1019 921 unsigned int bytes, 1020 922 struct x86_emulate_ctxt *ctxt) 1021 923 { ··· 1043 945 } 1044 946 1045 947 static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 1046 - unsigned long val, int bytes) 948 + const void *val, int bytes) 1047 949 { 1048 - struct kvm_memory_slot *m; 1049 950 struct page *page; 1050 951 void *virt; 1051 952 1052 953 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) 1053 954 return 0; 1054 - m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); 1055 - if (!m) 955 + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 956 + if (!page) 1056 957 return 0; 1057 - page = gfn_to_page(m, gpa >> PAGE_SHIFT); 1058 958 kvm_mmu_pre_write(vcpu, gpa, bytes); 1059 959 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); 1060 960 virt = kmap_atomic(page, KM_USER0); 1061 - memcpy(virt + offset_in_page(gpa), &val, bytes); 961 + memcpy(virt + offset_in_page(gpa), val, bytes); 1062 962 kunmap_atomic(virt, KM_USER0); 1063 963 kvm_mmu_post_write(vcpu, gpa, bytes); 1064 964 return 1; 1065 965 } 1066 966 1067 967 static int emulator_write_emulated(unsigned long addr, 1068 - unsigned long val, 968 + const void *val, 1069 969 unsigned int bytes, 1070 970 struct x86_emulate_ctxt *ctxt) 1071 971 { 1072 972 struct kvm_vcpu *vcpu = ctxt->vcpu; 1073 973 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); 1074 974 1075 - if (gpa == UNMAPPED_GVA) 975 + if (gpa == UNMAPPED_GVA) { 976 + kvm_arch_ops->inject_page_fault(vcpu, addr, 2); 1076 977 return X86EMUL_PROPAGATE_FAULT; 978 + } 1077 979 1078 980 if (emulator_write_phys(vcpu, gpa, val, bytes)) 1079 981 return X86EMUL_CONTINUE; ··· 1082 984 vcpu->mmio_phys_addr = gpa; 1083 985 vcpu->mmio_size = bytes; 1084 986 vcpu->mmio_is_write = 1; 1085 - memcpy(vcpu->mmio_data, &val, bytes); 987 + memcpy(vcpu->mmio_data, val, bytes); 1086 988 1087 989 return X86EMUL_CONTINUE; 1088 990 } 1089 991 1090 992 static int emulator_cmpxchg_emulated(unsigned long addr, 1091 - unsigned long old, 1092 - unsigned long new, 993 + const void *old, 994 + const void *new, 1093 995 unsigned int bytes, 1094 996 struct x86_emulate_ctxt *ctxt) 1095 997 { ··· 1101 1003 } 1102 1004 return emulator_write_emulated(addr, new, bytes, ctxt); 1103 1005 } 1104 - 1105 - #ifdef CONFIG_X86_32 1106 - 1107 - static int emulator_cmpxchg8b_emulated(unsigned long addr, 1108 - unsigned long old_lo, 1109 - unsigned long old_hi, 1110 - unsigned long new_lo, 1111 - unsigned long new_hi, 1112 - struct x86_emulate_ctxt *ctxt) 1113 - { 1114 - static int reported; 1115 - int r; 1116 - 1117 - if (!reported) { 1118 - reported = 1; 1119 - printk(KERN_WARNING "kvm: emulating exchange8b as write\n"); 1120 - } 1121 - r = emulator_write_emulated(addr, new_lo, 4, ctxt); 1122 - if (r != X86EMUL_CONTINUE) 1123 - return r; 1124 - return emulator_write_emulated(addr+4, new_hi, 4, ctxt); 1125 - } 1126 - 1127 - #endif 1128 1006 1129 1007 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 1130 1008 { ··· 1116 1042 { 1117 1043 unsigned long cr0; 1118 1044 1119 - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); 1120 1045 cr0 = vcpu->cr0 & ~CR0_TS_MASK; 1121 1046 kvm_arch_ops->set_cr0(vcpu, cr0); 1122 1047 return X86EMUL_CONTINUE; ··· 1175 1102 .read_emulated = emulator_read_emulated, 1176 1103 .write_emulated = emulator_write_emulated, 1177 1104 .cmpxchg_emulated = emulator_cmpxchg_emulated, 1178 - #ifdef CONFIG_X86_32 1179 - .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated, 1180 - #endif 1181 1105 }; 1182 1106 1183 1107 int emulate_instruction(struct kvm_vcpu *vcpu, ··· 1186 1116 int r; 1187 1117 int cs_db, cs_l; 1188 1118 1119 + vcpu->mmio_fault_cr2 = cr2; 1189 1120 kvm_arch_ops->cache_regs(vcpu); 1190 1121 1191 1122 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ··· 1237 1166 kvm_arch_ops->decache_regs(vcpu); 1238 1167 kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); 1239 1168 1240 - if (vcpu->mmio_is_write) 1169 + if (vcpu->mmio_is_write) { 1170 + vcpu->mmio_needed = 0; 1241 1171 return EMULATE_DO_MMIO; 1172 + } 1242 1173 1243 1174 return EMULATE_DONE; 1244 1175 } ··· 1250 1177 { 1251 1178 unsigned long nr, a0, a1, a2, a3, a4, a5, ret; 1252 1179 1253 - kvm_arch_ops->decache_regs(vcpu); 1180 + kvm_arch_ops->cache_regs(vcpu); 1254 1181 ret = -KVM_EINVAL; 1255 1182 #ifdef CONFIG_X86_64 1256 1183 if (is_long_mode(vcpu)) { ··· 1274 1201 } 1275 1202 switch (nr) { 1276 1203 default: 1277 - ; 1204 + run->hypercall.args[0] = a0; 1205 + run->hypercall.args[1] = a1; 1206 + run->hypercall.args[2] = a2; 1207 + run->hypercall.args[3] = a3; 1208 + run->hypercall.args[4] = a4; 1209 + run->hypercall.args[5] = a5; 1210 + run->hypercall.ret = ret; 1211 + run->hypercall.longmode = is_long_mode(vcpu); 1212 + kvm_arch_ops->decache_regs(vcpu); 1213 + return 0; 1278 1214 } 1279 1215 vcpu->regs[VCPU_REGS_RAX] = ret; 1280 - kvm_arch_ops->cache_regs(vcpu); 1216 + kvm_arch_ops->decache_regs(vcpu); 1281 1217 return 1; 1282 1218 } 1283 1219 EXPORT_SYMBOL_GPL(kvm_hypercall); ··· 1319 1237 1320 1238 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 1321 1239 { 1322 - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); 1240 + kvm_arch_ops->decache_cr4_guest_bits(vcpu); 1323 1241 switch (cr) { 1324 1242 case 0: 1325 1243 return vcpu->cr0; ··· 1524 1442 printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 1525 1443 __FUNCTION__, data); 1526 1444 break; 1445 + case MSR_IA32_MCG_STATUS: 1446 + printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 1447 + __FUNCTION__, data); 1448 + break; 1527 1449 case MSR_IA32_UCODE_REV: 1528 1450 case MSR_IA32_UCODE_WRITE: 1529 1451 case 0x200 ... 0x2ff: /* MTRRs */ ··· 1564 1478 1565 1479 void kvm_resched(struct kvm_vcpu *vcpu) 1566 1480 { 1481 + if (!need_resched()) 1482 + return; 1567 1483 vcpu_put(vcpu); 1568 1484 cond_resched(); 1569 1485 vcpu_load(vcpu); ··· 1590 1502 } 1591 1503 EXPORT_SYMBOL_GPL(save_msrs); 1592 1504 1505 + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 1506 + { 1507 + int i; 1508 + u32 function; 1509 + struct kvm_cpuid_entry *e, *best; 1510 + 1511 + kvm_arch_ops->cache_regs(vcpu); 1512 + function = vcpu->regs[VCPU_REGS_RAX]; 1513 + vcpu->regs[VCPU_REGS_RAX] = 0; 1514 + vcpu->regs[VCPU_REGS_RBX] = 0; 1515 + vcpu->regs[VCPU_REGS_RCX] = 0; 1516 + vcpu->regs[VCPU_REGS_RDX] = 0; 1517 + best = NULL; 1518 + for (i = 0; i < vcpu->cpuid_nent; ++i) { 1519 + e = &vcpu->cpuid_entries[i]; 1520 + if (e->function == function) { 1521 + best = e; 1522 + break; 1523 + } 1524 + /* 1525 + * Both basic or both extended? 1526 + */ 1527 + if (((e->function ^ function) & 0x80000000) == 0) 1528 + if (!best || e->function > best->function) 1529 + best = e; 1530 + } 1531 + if (best) { 1532 + vcpu->regs[VCPU_REGS_RAX] = best->eax; 1533 + vcpu->regs[VCPU_REGS_RBX] = best->ebx; 1534 + vcpu->regs[VCPU_REGS_RCX] = best->ecx; 1535 + vcpu->regs[VCPU_REGS_RDX] = best->edx; 1536 + } 1537 + kvm_arch_ops->decache_regs(vcpu); 1538 + kvm_arch_ops->skip_emulated_instruction(vcpu); 1539 + } 1540 + EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 1541 + 1542 + static int pio_copy_data(struct kvm_vcpu *vcpu) 1543 + { 1544 + void *p = vcpu->pio_data; 1545 + void *q; 1546 + unsigned bytes; 1547 + int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; 1548 + 1549 + kvm_arch_ops->vcpu_put(vcpu); 1550 + q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, 1551 + PAGE_KERNEL); 1552 + if (!q) { 1553 + kvm_arch_ops->vcpu_load(vcpu); 1554 + free_pio_guest_pages(vcpu); 1555 + return -ENOMEM; 1556 + } 1557 + q += vcpu->pio.guest_page_offset; 1558 + bytes = vcpu->pio.size * vcpu->pio.cur_count; 1559 + if (vcpu->pio.in) 1560 + memcpy(q, p, bytes); 1561 + else 1562 + memcpy(p, q, bytes); 1563 + q -= vcpu->pio.guest_page_offset; 1564 + vunmap(q); 1565 + kvm_arch_ops->vcpu_load(vcpu); 1566 + free_pio_guest_pages(vcpu); 1567 + return 0; 1568 + } 1569 + 1570 + static int complete_pio(struct kvm_vcpu *vcpu) 1571 + { 1572 + struct kvm_pio_request *io = &vcpu->pio; 1573 + long delta; 1574 + int r; 1575 + 1576 + kvm_arch_ops->cache_regs(vcpu); 1577 + 1578 + if (!io->string) { 1579 + if (io->in) 1580 + memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, 1581 + io->size); 1582 + } else { 1583 + if (io->in) { 1584 + r = pio_copy_data(vcpu); 1585 + if (r) { 1586 + kvm_arch_ops->cache_regs(vcpu); 1587 + return r; 1588 + } 1589 + } 1590 + 1591 + delta = 1; 1592 + if (io->rep) { 1593 + delta *= io->cur_count; 1594 + /* 1595 + * The size of the register should really depend on 1596 + * current address size. 1597 + */ 1598 + vcpu->regs[VCPU_REGS_RCX] -= delta; 1599 + } 1600 + if (io->down) 1601 + delta = -delta; 1602 + delta *= io->size; 1603 + if (io->in) 1604 + vcpu->regs[VCPU_REGS_RDI] += delta; 1605 + else 1606 + vcpu->regs[VCPU_REGS_RSI] += delta; 1607 + } 1608 + 1609 + kvm_arch_ops->decache_regs(vcpu); 1610 + 1611 + io->count -= io->cur_count; 1612 + io->cur_count = 0; 1613 + 1614 + if (!io->count) 1615 + kvm_arch_ops->skip_emulated_instruction(vcpu); 1616 + return 0; 1617 + } 1618 + 1619 + int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 1620 + int size, unsigned long count, int string, int down, 1621 + gva_t address, int rep, unsigned port) 1622 + { 1623 + unsigned now, in_page; 1624 + int i; 1625 + int nr_pages = 1; 1626 + struct page *page; 1627 + 1628 + vcpu->run->exit_reason = KVM_EXIT_IO; 1629 + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1630 + vcpu->run->io.size = size; 1631 + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 1632 + vcpu->run->io.count = count; 1633 + vcpu->run->io.port = port; 1634 + vcpu->pio.count = count; 1635 + vcpu->pio.cur_count = count; 1636 + vcpu->pio.size = size; 1637 + vcpu->pio.in = in; 1638 + vcpu->pio.string = string; 1639 + vcpu->pio.down = down; 1640 + vcpu->pio.guest_page_offset = offset_in_page(address); 1641 + vcpu->pio.rep = rep; 1642 + 1643 + if (!string) { 1644 + kvm_arch_ops->cache_regs(vcpu); 1645 + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); 1646 + kvm_arch_ops->decache_regs(vcpu); 1647 + return 0; 1648 + } 1649 + 1650 + if (!count) { 1651 + kvm_arch_ops->skip_emulated_instruction(vcpu); 1652 + return 1; 1653 + } 1654 + 1655 + now = min(count, PAGE_SIZE / size); 1656 + 1657 + if (!down) 1658 + in_page = PAGE_SIZE - offset_in_page(address); 1659 + else 1660 + in_page = offset_in_page(address) + size; 1661 + now = min(count, (unsigned long)in_page / size); 1662 + if (!now) { 1663 + /* 1664 + * String I/O straddles page boundary. Pin two guest pages 1665 + * so that we satisfy atomicity constraints. Do just one 1666 + * transaction to avoid complexity. 1667 + */ 1668 + nr_pages = 2; 1669 + now = 1; 1670 + } 1671 + if (down) { 1672 + /* 1673 + * String I/O in reverse. Yuck. Kill the guest, fix later. 1674 + */ 1675 + printk(KERN_ERR "kvm: guest string pio down\n"); 1676 + inject_gp(vcpu); 1677 + return 1; 1678 + } 1679 + vcpu->run->io.count = now; 1680 + vcpu->pio.cur_count = now; 1681 + 1682 + for (i = 0; i < nr_pages; ++i) { 1683 + spin_lock(&vcpu->kvm->lock); 1684 + page = gva_to_page(vcpu, address + i * PAGE_SIZE); 1685 + if (page) 1686 + get_page(page); 1687 + vcpu->pio.guest_pages[i] = page; 1688 + spin_unlock(&vcpu->kvm->lock); 1689 + if (!page) { 1690 + inject_gp(vcpu); 1691 + free_pio_guest_pages(vcpu); 1692 + return 1; 1693 + } 1694 + } 1695 + 1696 + if (!vcpu->pio.in) 1697 + return pio_copy_data(vcpu); 1698 + return 0; 1699 + } 1700 + EXPORT_SYMBOL_GPL(kvm_setup_pio); 1701 + 1593 1702 static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1594 1703 { 1595 1704 int r; 1705 + sigset_t sigsaved; 1596 1706 1597 1707 vcpu_load(vcpu); 1708 + 1709 + if (vcpu->sigset_active) 1710 + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 1598 1711 1599 1712 /* re-sync apic's tpr */ 1600 1713 vcpu->cr8 = kvm_run->cr8; 1601 1714 1602 - if (kvm_run->emulated) { 1603 - kvm_arch_ops->skip_emulated_instruction(vcpu); 1604 - kvm_run->emulated = 0; 1715 + if (vcpu->pio.cur_count) { 1716 + r = complete_pio(vcpu); 1717 + if (r) 1718 + goto out; 1605 1719 } 1606 1720 1607 - if (kvm_run->mmio_completed) { 1721 + if (vcpu->mmio_needed) { 1608 1722 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 1609 1723 vcpu->mmio_read_completed = 1; 1724 + vcpu->mmio_needed = 0; 1725 + r = emulate_instruction(vcpu, kvm_run, 1726 + vcpu->mmio_fault_cr2, 0); 1727 + if (r == EMULATE_DO_MMIO) { 1728 + /* 1729 + * Read-modify-write. Back to userspace. 1730 + */ 1731 + kvm_run->exit_reason = KVM_EXIT_MMIO; 1732 + r = 0; 1733 + goto out; 1734 + } 1610 1735 } 1611 1736 1612 - vcpu->mmio_needed = 0; 1737 + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { 1738 + kvm_arch_ops->cache_regs(vcpu); 1739 + vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; 1740 + kvm_arch_ops->decache_regs(vcpu); 1741 + } 1613 1742 1614 1743 r = kvm_arch_ops->run(vcpu, kvm_run); 1744 + 1745 + out: 1746 + if (vcpu->sigset_active) 1747 + sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1615 1748 1616 1749 vcpu_put(vcpu); 1617 1750 return r; ··· 1942 1633 sregs->gdt.limit = dt.limit; 1943 1634 sregs->gdt.base = dt.base; 1944 1635 1945 - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); 1636 + kvm_arch_ops->decache_cr4_guest_bits(vcpu); 1946 1637 sregs->cr0 = vcpu->cr0; 1947 1638 sregs->cr2 = vcpu->cr2; 1948 1639 sregs->cr3 = vcpu->cr3; ··· 1974 1665 1975 1666 vcpu_load(vcpu); 1976 1667 1977 - set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 1978 - set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 1979 - set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 1980 - set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 1981 - set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 1982 - set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 1983 - 1984 - set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 1985 - set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 1986 - 1987 1668 dt.limit = sregs->idt.limit; 1988 1669 dt.base = sregs->idt.base; 1989 1670 kvm_arch_ops->set_idt(vcpu, &dt); ··· 1993 1694 #endif 1994 1695 vcpu->apic_base = sregs->apic_base; 1995 1696 1996 - kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu); 1697 + kvm_arch_ops->decache_cr4_guest_bits(vcpu); 1997 1698 1998 1699 mmu_reset_needed |= vcpu->cr0 != sregs->cr0; 1999 - kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); 1700 + kvm_arch_ops->set_cr0(vcpu, sregs->cr0); 2000 1701 2001 1702 mmu_reset_needed |= vcpu->cr4 != sregs->cr4; 2002 1703 kvm_arch_ops->set_cr4(vcpu, sregs->cr4); ··· 2012 1713 for (i = 0; i < NR_IRQ_WORDS; ++i) 2013 1714 if (vcpu->irq_pending[i]) 2014 1715 __set_bit(i, &vcpu->irq_summary); 1716 + 1717 + set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 1718 + set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 1719 + set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 1720 + set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 1721 + set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 1722 + set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 1723 + 1724 + set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 1725 + set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 2015 1726 2016 1727 vcpu_put(vcpu); 2017 1728 ··· 2196 1887 return r; 2197 1888 } 2198 1889 1890 + static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, 1891 + unsigned long address, 1892 + int *type) 1893 + { 1894 + struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1895 + unsigned long pgoff; 1896 + struct page *page; 1897 + 1898 + *type = VM_FAULT_MINOR; 1899 + pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 1900 + if (pgoff == 0) 1901 + page = virt_to_page(vcpu->run); 1902 + else if (pgoff == KVM_PIO_PAGE_OFFSET) 1903 + page = virt_to_page(vcpu->pio_data); 1904 + else 1905 + return NOPAGE_SIGBUS; 1906 + get_page(page); 1907 + return page; 1908 + } 1909 + 1910 + static struct vm_operations_struct kvm_vcpu_vm_ops = { 1911 + .nopage = kvm_vcpu_nopage, 1912 + }; 1913 + 1914 + static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1915 + { 1916 + vma->vm_ops = &kvm_vcpu_vm_ops; 1917 + return 0; 1918 + } 1919 + 2199 1920 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 2200 1921 { 2201 1922 struct kvm_vcpu *vcpu = filp->private_data; ··· 2238 1899 .release = kvm_vcpu_release, 2239 1900 .unlocked_ioctl = kvm_vcpu_ioctl, 2240 1901 .compat_ioctl = kvm_vcpu_ioctl, 1902 + .mmap = kvm_vcpu_mmap, 2241 1903 }; 2242 1904 2243 1905 /* ··· 2287 1947 { 2288 1948 int r; 2289 1949 struct kvm_vcpu *vcpu; 1950 + struct page *page; 2290 1951 2291 1952 r = -EINVAL; 2292 1953 if (!valid_vcpu(n)) ··· 2302 1961 return -EEXIST; 2303 1962 } 2304 1963 1964 + page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1965 + r = -ENOMEM; 1966 + if (!page) 1967 + goto out_unlock; 1968 + vcpu->run = page_address(page); 1969 + 1970 + page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1971 + r = -ENOMEM; 1972 + if (!page) 1973 + goto out_free_run; 1974 + vcpu->pio_data = page_address(page); 1975 + 2305 1976 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, 2306 1977 FX_IMAGE_ALIGN); 2307 1978 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; 1979 + vcpu->cr0 = 0x10; 2308 1980 2309 1981 r = kvm_arch_ops->vcpu_create(vcpu); 2310 1982 if (r < 0) ··· 2344 1990 2345 1991 out_free_vcpus: 2346 1992 kvm_free_vcpu(vcpu); 1993 + out_free_run: 1994 + free_page((unsigned long)vcpu->run); 1995 + vcpu->run = NULL; 1996 + out_unlock: 2347 1997 mutex_unlock(&vcpu->mutex); 2348 1998 out: 2349 1999 return r; 2000 + } 2001 + 2002 + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 2003 + struct kvm_cpuid *cpuid, 2004 + struct kvm_cpuid_entry __user *entries) 2005 + { 2006 + int r; 2007 + 2008 + r = -E2BIG; 2009 + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 2010 + goto out; 2011 + r = -EFAULT; 2012 + if (copy_from_user(&vcpu->cpuid_entries, entries, 2013 + cpuid->nent * sizeof(struct kvm_cpuid_entry))) 2014 + goto out; 2015 + vcpu->cpuid_nent = cpuid->nent; 2016 + return 0; 2017 + 2018 + out: 2019 + return r; 2020 + } 2021 + 2022 + static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 2023 + { 2024 + if (sigset) { 2025 + sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 2026 + vcpu->sigset_active = 1; 2027 + vcpu->sigset = *sigset; 2028 + } else 2029 + vcpu->sigset_active = 0; 2030 + return 0; 2031 + } 2032 + 2033 + /* 2034 + * fxsave fpu state. Taken from x86_64/processor.h. To be killed when 2035 + * we have asm/x86/processor.h 2036 + */ 2037 + struct fxsave { 2038 + u16 cwd; 2039 + u16 swd; 2040 + u16 twd; 2041 + u16 fop; 2042 + u64 rip; 2043 + u64 rdp; 2044 + u32 mxcsr; 2045 + u32 mxcsr_mask; 2046 + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ 2047 + #ifdef CONFIG_X86_64 2048 + u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ 2049 + #else 2050 + u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ 2051 + #endif 2052 + }; 2053 + 2054 + static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2055 + { 2056 + struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2057 + 2058 + vcpu_load(vcpu); 2059 + 2060 + memcpy(fpu->fpr, fxsave->st_space, 128); 2061 + fpu->fcw = fxsave->cwd; 2062 + fpu->fsw = fxsave->swd; 2063 + fpu->ftwx = fxsave->twd; 2064 + fpu->last_opcode = fxsave->fop; 2065 + fpu->last_ip = fxsave->rip; 2066 + fpu->last_dp = fxsave->rdp; 2067 + memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 2068 + 2069 + vcpu_put(vcpu); 2070 + 2071 + return 0; 2072 + } 2073 + 2074 + static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2075 + { 2076 + struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2077 + 2078 + vcpu_load(vcpu); 2079 + 2080 + memcpy(fxsave->st_space, fpu->fpr, 128); 2081 + fxsave->cwd = fpu->fcw; 2082 + fxsave->swd = fpu->fsw; 2083 + fxsave->twd = fpu->ftwx; 2084 + fxsave->fop = fpu->last_opcode; 2085 + fxsave->rip = fpu->last_ip; 2086 + fxsave->rdp = fpu->last_dp; 2087 + memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 2088 + 2089 + vcpu_put(vcpu); 2090 + 2091 + return 0; 2350 2092 } 2351 2093 2352 2094 static long kvm_vcpu_ioctl(struct file *filp, ··· 2453 2003 int r = -EINVAL; 2454 2004 2455 2005 switch (ioctl) { 2456 - case KVM_RUN: { 2457 - struct kvm_run kvm_run; 2458 - 2459 - r = -EFAULT; 2460 - if (copy_from_user(&kvm_run, argp, sizeof kvm_run)) 2006 + case KVM_RUN: 2007 + r = -EINVAL; 2008 + if (arg) 2461 2009 goto out; 2462 - r = kvm_vcpu_ioctl_run(vcpu, &kvm_run); 2463 - if (r < 0 && r != -EINTR) 2464 - goto out; 2465 - if (copy_to_user(argp, &kvm_run, sizeof kvm_run)) { 2466 - r = -EFAULT; 2467 - goto out; 2468 - } 2010 + r = kvm_vcpu_ioctl_run(vcpu, vcpu->run); 2469 2011 break; 2470 - } 2471 2012 case KVM_GET_REGS: { 2472 2013 struct kvm_regs kvm_regs; 2473 2014 ··· 2554 2113 case KVM_SET_MSRS: 2555 2114 r = msr_io(vcpu, argp, do_set_msr, 0); 2556 2115 break; 2116 + case KVM_SET_CPUID: { 2117 + struct kvm_cpuid __user *cpuid_arg = argp; 2118 + struct kvm_cpuid cpuid; 2119 + 2120 + r = -EFAULT; 2121 + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2122 + goto out; 2123 + r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); 2124 + if (r) 2125 + goto out; 2126 + break; 2127 + } 2128 + case KVM_SET_SIGNAL_MASK: { 2129 + struct kvm_signal_mask __user *sigmask_arg = argp; 2130 + struct kvm_signal_mask kvm_sigmask; 2131 + sigset_t sigset, *p; 2132 + 2133 + p = NULL; 2134 + if (argp) { 2135 + r = -EFAULT; 2136 + if (copy_from_user(&kvm_sigmask, argp, 2137 + sizeof kvm_sigmask)) 2138 + goto out; 2139 + r = -EINVAL; 2140 + if (kvm_sigmask.len != sizeof sigset) 2141 + goto out; 2142 + r = -EFAULT; 2143 + if (copy_from_user(&sigset, sigmask_arg->sigset, 2144 + sizeof sigset)) 2145 + goto out; 2146 + p = &sigset; 2147 + } 2148 + r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 2149 + break; 2150 + } 2151 + case KVM_GET_FPU: { 2152 + struct kvm_fpu fpu; 2153 + 2154 + memset(&fpu, 0, sizeof fpu); 2155 + r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu); 2156 + if (r) 2157 + goto out; 2158 + r = -EFAULT; 2159 + if (copy_to_user(argp, &fpu, sizeof fpu)) 2160 + goto out; 2161 + r = 0; 2162 + break; 2163 + } 2164 + case KVM_SET_FPU: { 2165 + struct kvm_fpu fpu; 2166 + 2167 + r = -EFAULT; 2168 + if (copy_from_user(&fpu, argp, sizeof fpu)) 2169 + goto out; 2170 + r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu); 2171 + if (r) 2172 + goto out; 2173 + r = 0; 2174 + break; 2175 + } 2557 2176 default: 2558 2177 ; 2559 2178 } ··· 2656 2155 goto out; 2657 2156 break; 2658 2157 } 2158 + case KVM_SET_MEMORY_ALIAS: { 2159 + struct kvm_memory_alias alias; 2160 + 2161 + r = -EFAULT; 2162 + if (copy_from_user(&alias, argp, sizeof alias)) 2163 + goto out; 2164 + r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); 2165 + if (r) 2166 + goto out; 2167 + break; 2168 + } 2659 2169 default: 2660 2170 ; 2661 2171 } ··· 2680 2168 { 2681 2169 struct kvm *kvm = vma->vm_file->private_data; 2682 2170 unsigned long pgoff; 2683 - struct kvm_memory_slot *slot; 2684 2171 struct page *page; 2685 2172 2686 2173 *type = VM_FAULT_MINOR; 2687 2174 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 2688 - slot = gfn_to_memslot(kvm, pgoff); 2689 - if (!slot) 2690 - return NOPAGE_SIGBUS; 2691 - page = gfn_to_page(slot, pgoff); 2175 + page = gfn_to_page(kvm, pgoff); 2692 2176 if (!page) 2693 2177 return NOPAGE_SIGBUS; 2694 2178 get_page(page); ··· 2756 2248 unsigned int ioctl, unsigned long arg) 2757 2249 { 2758 2250 void __user *argp = (void __user *)arg; 2759 - int r = -EINVAL; 2251 + long r = -EINVAL; 2760 2252 2761 2253 switch (ioctl) { 2762 2254 case KVM_GET_API_VERSION: 2255 + r = -EINVAL; 2256 + if (arg) 2257 + goto out; 2763 2258 r = KVM_API_VERSION; 2764 2259 break; 2765 2260 case KVM_CREATE_VM: 2261 + r = -EINVAL; 2262 + if (arg) 2263 + goto out; 2766 2264 r = kvm_dev_ioctl_create_vm(); 2767 2265 break; 2768 2266 case KVM_GET_MSR_INDEX_LIST: { ··· 2798 2284 r = 0; 2799 2285 break; 2800 2286 } 2287 + case KVM_CHECK_EXTENSION: 2288 + /* 2289 + * No extensions defined at present. 2290 + */ 2291 + r = 0; 2292 + break; 2293 + case KVM_GET_VCPU_MMAP_SIZE: 2294 + r = -EINVAL; 2295 + if (arg) 2296 + goto out; 2297 + r = 2 * PAGE_SIZE; 2298 + break; 2801 2299 default: 2802 2300 ; 2803 2301 } ··· 2825 2299 }; 2826 2300 2827 2301 static struct miscdevice kvm_dev = { 2828 - MISC_DYNAMIC_MINOR, 2302 + KVM_MINOR, 2829 2303 "kvm", 2830 2304 &kvm_chardev_ops, 2831 2305 }; ··· 2911 2385 .priority = 20, /* must be > scheduler priority */ 2912 2386 }; 2913 2387 2388 + static u64 stat_get(void *_offset) 2389 + { 2390 + unsigned offset = (long)_offset; 2391 + u64 total = 0; 2392 + struct kvm *kvm; 2393 + struct kvm_vcpu *vcpu; 2394 + int i; 2395 + 2396 + spin_lock(&kvm_lock); 2397 + list_for_each_entry(kvm, &vm_list, vm_list) 2398 + for (i = 0; i < KVM_MAX_VCPUS; ++i) { 2399 + vcpu = &kvm->vcpus[i]; 2400 + total += *(u32 *)((void *)vcpu + offset); 2401 + } 2402 + spin_unlock(&kvm_lock); 2403 + return total; 2404 + } 2405 + 2406 + static void stat_set(void *offset, u64 val) 2407 + { 2408 + } 2409 + 2410 + DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); 2411 + 2914 2412 static __init void kvm_init_debug(void) 2915 2413 { 2916 2414 struct kvm_stats_debugfs_item *p; 2917 2415 2918 2416 debugfs_dir = debugfs_create_dir("kvm", NULL); 2919 2417 for (p = debugfs_entries; p->name; ++p) 2920 - p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, 2921 - p->data); 2418 + p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, 2419 + (void *)(long)p->offset, 2420 + &stat_fops); 2922 2421 } 2923 2422 2924 2423 static void kvm_exit_debug(void) ··· 3073 2522 static struct page *bad_page; 3074 2523 int r; 3075 2524 2525 + r = kvm_mmu_module_init(); 2526 + if (r) 2527 + goto out4; 2528 + 3076 2529 r = register_filesystem(&kvm_fs_type); 3077 2530 if (r) 3078 2531 goto out3; ··· 3105 2550 out2: 3106 2551 unregister_filesystem(&kvm_fs_type); 3107 2552 out3: 2553 + kvm_mmu_module_exit(); 2554 + out4: 3108 2555 return r; 3109 2556 } 3110 2557 ··· 3116 2559 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); 3117 2560 mntput(kvmfs_mnt); 3118 2561 unregister_filesystem(&kvm_fs_type); 2562 + kvm_mmu_module_exit(); 3119 2563 } 3120 2564 3121 2565 module_init(kvm_init)
+5 -8
drivers/kvm/kvm_svm.h
··· 9 9 #include "svm.h" 10 10 #include "kvm.h" 11 11 12 - static const u32 host_save_msrs[] = { 12 + static const u32 host_save_user_msrs[] = { 13 13 #ifdef CONFIG_X86_64 14 14 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 15 - MSR_FS_BASE, MSR_GS_BASE, 15 + MSR_FS_BASE, 16 16 #endif 17 17 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 18 - MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP, 19 - MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/ 20 18 }; 21 19 22 - #define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs) 20 + #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 23 21 #define NUM_DB_REGS 4 24 22 25 23 struct vcpu_svm { ··· 26 28 struct svm_cpu_data *svm_data; 27 29 uint64_t asid_generation; 28 30 29 - unsigned long cr0; 30 - unsigned long cr4; 31 31 unsigned long db_regs[NUM_DB_REGS]; 32 32 33 33 u64 next_rip; 34 34 35 - u64 host_msrs[NR_HOST_SAVE_MSRS]; 35 + u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 36 + u64 host_gs_base; 36 37 unsigned long host_cr2; 37 38 unsigned long host_db_regs[NUM_DB_REGS]; 38 39 unsigned long host_dr6;
-14
drivers/kvm/kvm_vmx.h
··· 1 - #ifndef __KVM_VMX_H 2 - #define __KVM_VMX_H 3 - 4 - #ifdef CONFIG_X86_64 5 - /* 6 - * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt 7 - * mechanism (cpu bug AA24) 8 - */ 9 - #define NR_BAD_MSRS 2 10 - #else 11 - #define NR_BAD_MSRS 0 12 - #endif 13 - 14 - #endif
+112 -42
drivers/kvm/mmu.c
··· 52 52 static int dbg = 1; 53 53 #endif 54 54 55 + #ifndef MMU_DEBUG 56 + #define ASSERT(x) do { } while (0) 57 + #else 55 58 #define ASSERT(x) \ 56 59 if (!(x)) { \ 57 60 printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ 58 61 __FILE__, __LINE__, #x); \ 59 62 } 63 + #endif 60 64 61 65 #define PT64_PT_BITS 9 62 66 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) ··· 163 159 struct kvm_rmap_desc *more; 164 160 }; 165 161 162 + static struct kmem_cache *pte_chain_cache; 163 + static struct kmem_cache *rmap_desc_cache; 164 + 166 165 static int is_write_protection(struct kvm_vcpu *vcpu) 167 166 { 168 167 return vcpu->cr0 & CR0_WP_MASK; ··· 203 196 } 204 197 205 198 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 206 - size_t objsize, int min) 199 + struct kmem_cache *base_cache, int min, 200 + gfp_t gfp_flags) 207 201 { 208 202 void *obj; 209 203 210 204 if (cache->nobjs >= min) 211 205 return 0; 212 206 while (cache->nobjs < ARRAY_SIZE(cache->objects)) { 213 - obj = kzalloc(objsize, GFP_NOWAIT); 207 + obj = kmem_cache_zalloc(base_cache, gfp_flags); 214 208 if (!obj) 215 209 return -ENOMEM; 216 210 cache->objects[cache->nobjs++] = obj; ··· 225 217 kfree(mc->objects[--mc->nobjs]); 226 218 } 227 219 228 - static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) 220 + static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) 229 221 { 230 222 int r; 231 223 232 224 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, 233 - sizeof(struct kvm_pte_chain), 4); 225 + pte_chain_cache, 4, gfp_flags); 234 226 if (r) 235 227 goto out; 236 228 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, 237 - sizeof(struct kvm_rmap_desc), 1); 229 + rmap_desc_cache, 1, gfp_flags); 238 230 out: 231 + return r; 232 + } 233 + 234 + static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) 235 + { 236 + int r; 237 + 238 + r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); 239 + if (r < 0) { 240 + spin_unlock(&vcpu->kvm->lock); 241 + kvm_arch_ops->vcpu_put(vcpu); 242 + r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); 243 + kvm_arch_ops->vcpu_load(vcpu); 244 + spin_lock(&vcpu->kvm->lock); 245 + } 239 246 return r; 240 247 } 241 248 ··· 413 390 { 414 391 struct kvm *kvm = vcpu->kvm; 415 392 struct page *page; 416 - struct kvm_memory_slot *slot; 417 393 struct kvm_rmap_desc *desc; 418 394 u64 *spte; 419 395 420 - slot = gfn_to_memslot(kvm, gfn); 421 - BUG_ON(!slot); 422 - page = gfn_to_page(slot, gfn); 396 + page = gfn_to_page(kvm, gfn); 397 + BUG_ON(!page); 423 398 424 399 while (page_private(page)) { 425 400 if (!(page_private(page) & 1)) ··· 438 417 } 439 418 } 440 419 420 + #ifdef MMU_DEBUG 441 421 static int is_empty_shadow_page(hpa_t page_hpa) 442 422 { 443 423 u64 *pos; ··· 453 431 } 454 432 return 1; 455 433 } 434 + #endif 456 435 457 436 static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) 458 437 { 459 438 struct kvm_mmu_page *page_head = page_header(page_hpa); 460 439 461 440 ASSERT(is_empty_shadow_page(page_hpa)); 462 - list_del(&page_head->link); 463 441 page_head->page_hpa = page_hpa; 464 - list_add(&page_head->link, &vcpu->free_pages); 442 + list_move(&page_head->link, &vcpu->free_pages); 465 443 ++vcpu->kvm->n_free_mmu_pages; 466 444 } 467 445 ··· 479 457 return NULL; 480 458 481 459 page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); 482 - list_del(&page->link); 483 - list_add(&page->link, &vcpu->kvm->active_mmu_pages); 460 + list_move(&page->link, &vcpu->kvm->active_mmu_pages); 484 461 ASSERT(is_empty_shadow_page(page->page_hpa)); 485 462 page->slot_bitmap = 0; 486 - page->global = 1; 487 463 page->multimapped = 0; 488 464 page->parent_pte = parent_pte; 489 465 --vcpu->kvm->n_free_mmu_pages; ··· 589 569 gva_t gaddr, 590 570 unsigned level, 591 571 int metaphysical, 572 + unsigned hugepage_access, 592 573 u64 *parent_pte) 593 574 { 594 575 union kvm_mmu_page_role role; ··· 603 582 role.glevels = vcpu->mmu.root_level; 604 583 role.level = level; 605 584 role.metaphysical = metaphysical; 585 + role.hugepage_access = hugepage_access; 606 586 if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { 607 587 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 608 588 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; ··· 691 669 if (!page->root_count) { 692 670 hlist_del(&page->hash_link); 693 671 kvm_mmu_free_page(vcpu, page->page_hpa); 694 - } else { 695 - list_del(&page->link); 696 - list_add(&page->link, &vcpu->kvm->active_mmu_pages); 697 - } 672 + } else 673 + list_move(&page->link, &vcpu->kvm->active_mmu_pages); 698 674 } 699 675 700 676 static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) ··· 734 714 735 715 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) 736 716 { 737 - struct kvm_memory_slot *slot; 738 717 struct page *page; 739 718 740 719 ASSERT((gpa & HPA_ERR_MASK) == 0); 741 - slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); 742 - if (!slot) 720 + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 721 + if (!page) 743 722 return gpa | HPA_ERR_MASK; 744 - page = gfn_to_page(slot, gpa >> PAGE_SHIFT); 745 723 return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) 746 724 | (gpa & (PAGE_SIZE-1)); 747 725 } ··· 751 733 if (gpa == UNMAPPED_GVA) 752 734 return UNMAPPED_GVA; 753 735 return gpa_to_hpa(vcpu, gpa); 736 + } 737 + 738 + struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) 739 + { 740 + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); 741 + 742 + if (gpa == UNMAPPED_GVA) 743 + return NULL; 744 + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); 754 745 } 755 746 756 747 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) ··· 799 772 >> PAGE_SHIFT; 800 773 new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, 801 774 v, level - 1, 802 - 1, &table[index]); 775 + 1, 0, &table[index]); 803 776 if (!new_table) { 804 777 pgprintk("nonpaging_map: ENOMEM\n"); 805 778 return -ENOMEM; ··· 831 804 for (i = 0; i < 4; ++i) { 832 805 hpa_t root = vcpu->mmu.pae_root[i]; 833 806 834 - ASSERT(VALID_PAGE(root)); 835 - root &= PT64_BASE_ADDR_MASK; 836 - page = page_header(root); 837 - --page->root_count; 807 + if (root) { 808 + ASSERT(VALID_PAGE(root)); 809 + root &= PT64_BASE_ADDR_MASK; 810 + page = page_header(root); 811 + --page->root_count; 812 + } 838 813 vcpu->mmu.pae_root[i] = INVALID_PAGE; 839 814 } 840 815 vcpu->mmu.root_hpa = INVALID_PAGE; ··· 856 827 857 828 ASSERT(!VALID_PAGE(root)); 858 829 page = kvm_mmu_get_page(vcpu, root_gfn, 0, 859 - PT64_ROOT_LEVEL, 0, NULL); 830 + PT64_ROOT_LEVEL, 0, 0, NULL); 860 831 root = page->page_hpa; 861 832 ++page->root_count; 862 833 vcpu->mmu.root_hpa = root; ··· 867 838 hpa_t root = vcpu->mmu.pae_root[i]; 868 839 869 840 ASSERT(!VALID_PAGE(root)); 870 - if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) 841 + if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) { 842 + if (!is_present_pte(vcpu->pdptrs[i])) { 843 + vcpu->mmu.pae_root[i] = 0; 844 + continue; 845 + } 871 846 root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; 872 - else if (vcpu->mmu.root_level == 0) 847 + } else if (vcpu->mmu.root_level == 0) 873 848 root_gfn = 0; 874 849 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 875 850 PT32_ROOT_LEVEL, !is_paging(vcpu), 876 - NULL); 851 + 0, NULL); 877 852 root = page->page_hpa; 878 853 ++page->root_count; 879 854 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; ··· 936 903 937 904 static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 938 905 { 939 - ++kvm_stat.tlb_flush; 906 + ++vcpu->stat.tlb_flush; 940 907 kvm_arch_ops->tlb_flush(vcpu); 941 908 } 942 909 ··· 949 916 mmu_alloc_roots(vcpu); 950 917 kvm_mmu_flush_tlb(vcpu); 951 918 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); 952 - } 953 - 954 - static void mark_pagetable_nonglobal(void *shadow_pte) 955 - { 956 - page_header(__pa(shadow_pte))->global = 0; 957 919 } 958 920 959 921 static inline void set_pte_common(struct kvm_vcpu *vcpu, ··· 967 939 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); 968 940 969 941 *shadow_pte |= access_bits; 970 - 971 - if (!(*shadow_pte & PT_GLOBAL_MASK)) 972 - mark_pagetable_nonglobal(shadow_pte); 973 942 974 943 if (is_error_hpa(paddr)) { 975 944 *shadow_pte |= gaddr; ··· 1341 1316 } 1342 1317 } 1343 1318 1319 + void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) 1320 + { 1321 + destroy_kvm_mmu(vcpu); 1322 + 1323 + while (!list_empty(&vcpu->kvm->active_mmu_pages)) { 1324 + struct kvm_mmu_page *page; 1325 + 1326 + page = container_of(vcpu->kvm->active_mmu_pages.next, 1327 + struct kvm_mmu_page, link); 1328 + kvm_mmu_zap_page(vcpu, page); 1329 + } 1330 + 1331 + mmu_free_memory_caches(vcpu); 1332 + kvm_arch_ops->tlb_flush(vcpu); 1333 + init_kvm_mmu(vcpu); 1334 + } 1335 + 1336 + void kvm_mmu_module_exit(void) 1337 + { 1338 + if (pte_chain_cache) 1339 + kmem_cache_destroy(pte_chain_cache); 1340 + if (rmap_desc_cache) 1341 + kmem_cache_destroy(rmap_desc_cache); 1342 + } 1343 + 1344 + int kvm_mmu_module_init(void) 1345 + { 1346 + pte_chain_cache = kmem_cache_create("kvm_pte_chain", 1347 + sizeof(struct kvm_pte_chain), 1348 + 0, 0, NULL, NULL); 1349 + if (!pte_chain_cache) 1350 + goto nomem; 1351 + rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", 1352 + sizeof(struct kvm_rmap_desc), 1353 + 0, 0, NULL, NULL); 1354 + if (!rmap_desc_cache) 1355 + goto nomem; 1356 + 1357 + return 0; 1358 + 1359 + nomem: 1360 + kvm_mmu_module_exit(); 1361 + return -ENOMEM; 1362 + } 1363 + 1344 1364 #ifdef AUDIT 1345 1365 1346 1366 static const char *audit_msg; ··· 1408 1338 for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { 1409 1339 u64 ent = pt[i]; 1410 1340 1411 - if (!ent & PT_PRESENT_MASK) 1341 + if (!(ent & PT_PRESENT_MASK)) 1412 1342 continue; 1413 1343 1414 1344 va = canonicalize(va); ··· 1430 1360 1431 1361 static void audit_mappings(struct kvm_vcpu *vcpu) 1432 1362 { 1433 - int i; 1363 + unsigned i; 1434 1364 1435 1365 if (vcpu->mmu.root_level == 4) 1436 1366 audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
+8 -4
drivers/kvm/paging_tmpl.h
··· 148 148 break; 149 149 } 150 150 151 - if (walker->level != 3 || is_long_mode(vcpu)) 152 - walker->inherited_ar &= walker->table[index]; 151 + walker->inherited_ar &= walker->table[index]; 153 152 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 154 153 paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); 155 154 kunmap_atomic(walker->table, KM_USER0); ··· 247 248 u64 shadow_pte; 248 249 int metaphysical; 249 250 gfn_t table_gfn; 251 + unsigned hugepage_access = 0; 250 252 251 253 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { 252 254 if (level == PT_PAGE_TABLE_LEVEL) ··· 277 277 if (level - 1 == PT_PAGE_TABLE_LEVEL 278 278 && walker->level == PT_DIRECTORY_LEVEL) { 279 279 metaphysical = 1; 280 + hugepage_access = *guest_ent; 281 + hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; 282 + hugepage_access >>= PT_WRITABLE_SHIFT; 280 283 table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) 281 284 >> PAGE_SHIFT; 282 285 } else { ··· 287 284 table_gfn = walker->table_gfn[level - 2]; 288 285 } 289 286 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, 290 - metaphysical, shadow_ent); 287 + metaphysical, hugepage_access, 288 + shadow_ent); 291 289 shadow_addr = shadow_page->page_hpa; 292 290 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK 293 291 | PT_WRITABLE_MASK | PT_USER_MASK; ··· 448 444 if (is_io_pte(*shadow_pte)) 449 445 return 1; 450 446 451 - ++kvm_stat.pf_fixed; 447 + ++vcpu->stat.pf_fixed; 452 448 kvm_mmu_audit(vcpu, "post page fault (fixed)"); 453 449 454 450 return write_pt;
+125 -72
drivers/kvm/svm.c
··· 44 44 #define KVM_EFER_LMA (1 << 10) 45 45 #define KVM_EFER_LME (1 << 8) 46 46 47 + #define SVM_FEATURE_NPT (1 << 0) 48 + #define SVM_FEATURE_LBRV (1 << 1) 49 + #define SVM_DEATURE_SVML (1 << 2) 50 + 47 51 unsigned long iopm_base; 48 52 unsigned long msrpm_base; 49 53 ··· 63 59 struct svm_cpu_data { 64 60 int cpu; 65 61 66 - uint64_t asid_generation; 67 - uint32_t max_asid; 68 - uint32_t next_asid; 62 + u64 asid_generation; 63 + u32 max_asid; 64 + u32 next_asid; 69 65 struct kvm_ldttss_desc *tss_desc; 70 66 71 67 struct page *save_area; 72 68 }; 73 69 74 70 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 71 + static uint32_t svm_features; 75 72 76 73 struct svm_init_data { 77 74 int cpu; ··· 86 81 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 87 82 88 83 #define MAX_INST_SIZE 15 84 + 85 + static inline u32 svm_has(u32 feat) 86 + { 87 + return svm_features & feat; 88 + } 89 89 90 90 static unsigned get_addr_size(struct kvm_vcpu *vcpu) 91 91 { ··· 213 203 UD_VECTOR; 214 204 } 215 205 216 - static void inject_db(struct kvm_vcpu *vcpu) 217 - { 218 - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 219 - SVM_EVTINJ_TYPE_EXEPT | 220 - DB_VECTOR; 221 - } 222 - 223 206 static int is_page_fault(uint32_t info) 224 207 { 225 208 info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; ··· 312 309 svm_data->asid_generation = 1; 313 310 svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 314 311 svm_data->next_asid = svm_data->max_asid + 1; 312 + svm_features = cpuid_edx(SVM_CPUID_FUNC); 315 313 316 314 asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); 317 315 gdt = (struct desc_struct *)gdt_descr.address; ··· 463 459 { 464 460 struct vmcb_control_area *control = &vmcb->control; 465 461 struct vmcb_save_area *save = &vmcb->save; 466 - u64 tsc; 467 462 468 463 control->intercept_cr_read = INTERCEPT_CR0_MASK | 469 464 INTERCEPT_CR3_MASK | ··· 514 511 (1ULL << INTERCEPT_VMSAVE) | 515 512 (1ULL << INTERCEPT_STGI) | 516 513 (1ULL << INTERCEPT_CLGI) | 517 - (1ULL << INTERCEPT_SKINIT); 514 + (1ULL << INTERCEPT_SKINIT) | 515 + (1ULL << INTERCEPT_MONITOR) | 516 + (1ULL << INTERCEPT_MWAIT); 518 517 519 518 control->iopm_base_pa = iopm_base; 520 519 control->msrpm_base_pa = msrpm_base; 521 - rdtscll(tsc); 522 - control->tsc_offset = -tsc; 520 + control->tsc_offset = 0; 523 521 control->int_ctl = V_INTR_MASKING_MASK; 524 522 525 523 init_seg(&save->es); ··· 580 576 vcpu->svm->vmcb = page_address(page); 581 577 memset(vcpu->svm->vmcb, 0, PAGE_SIZE); 582 578 vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 583 - vcpu->svm->cr0 = 0x00000010; 584 579 vcpu->svm->asid_generation = 0; 585 580 memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); 586 581 init_vmcb(vcpu->svm->vmcb); 587 582 588 583 fx_init(vcpu); 584 + vcpu->fpu_active = 1; 585 + vcpu->apic_base = 0xfee00000 | 586 + /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | 587 + MSR_IA32_APICBASE_ENABLE; 589 588 590 589 return 0; 591 590 ··· 609 602 610 603 static void svm_vcpu_load(struct kvm_vcpu *vcpu) 611 604 { 612 - get_cpu(); 605 + int cpu, i; 606 + 607 + cpu = get_cpu(); 608 + if (unlikely(cpu != vcpu->cpu)) { 609 + u64 tsc_this, delta; 610 + 611 + /* 612 + * Make sure that the guest sees a monotonically 613 + * increasing TSC. 614 + */ 615 + rdtscll(tsc_this); 616 + delta = vcpu->host_tsc - tsc_this; 617 + vcpu->svm->vmcb->control.tsc_offset += delta; 618 + vcpu->cpu = cpu; 619 + } 620 + 621 + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 622 + rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); 613 623 } 614 624 615 625 static void svm_vcpu_put(struct kvm_vcpu *vcpu) 616 626 { 627 + int i; 628 + 629 + for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 630 + wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); 631 + 632 + rdtscll(vcpu->host_tsc); 617 633 put_cpu(); 618 634 } 619 635 ··· 744 714 vcpu->svm->vmcb->save.gdtr.base = dt->base ; 745 715 } 746 716 747 - static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) 717 + static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 748 718 { 749 719 } 750 720 ··· 763 733 } 764 734 } 765 735 #endif 766 - vcpu->svm->cr0 = cr0; 767 - vcpu->svm->vmcb->save.cr0 = cr0 | CR0_PG_MASK | CR0_WP_MASK; 736 + if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { 737 + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 738 + vcpu->fpu_active = 1; 739 + } 740 + 768 741 vcpu->cr0 = cr0; 742 + cr0 |= CR0_PG_MASK | CR0_WP_MASK; 743 + cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); 744 + vcpu->svm->vmcb->save.cr0 = cr0; 769 745 } 770 746 771 747 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ··· 821 785 822 786 static void load_host_msrs(struct kvm_vcpu *vcpu) 823 787 { 824 - int i; 825 - 826 - for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) 827 - wrmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); 788 + #ifdef CONFIG_X86_64 789 + wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); 790 + #endif 828 791 } 829 792 830 793 static void save_host_msrs(struct kvm_vcpu *vcpu) 831 794 { 832 - int i; 833 - 834 - for ( i = 0; i < NR_HOST_SAVE_MSRS; i++) 835 - rdmsrl(host_save_msrs[i], vcpu->svm->host_msrs[i]); 795 + #ifdef CONFIG_X86_64 796 + rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); 797 + #endif 836 798 } 837 799 838 800 static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) ··· 924 890 case EMULATE_DONE: 925 891 return 1; 926 892 case EMULATE_DO_MMIO: 927 - ++kvm_stat.mmio_exits; 893 + ++vcpu->stat.mmio_exits; 928 894 kvm_run->exit_reason = KVM_EXIT_MMIO; 929 895 return 0; 930 896 case EMULATE_FAIL: ··· 936 902 937 903 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 938 904 return 0; 905 + } 906 + 907 + static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 908 + { 909 + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 910 + if (!(vcpu->cr0 & CR0_TS_MASK)) 911 + vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; 912 + vcpu->fpu_active = 1; 913 + 914 + return 1; 939 915 } 940 916 941 917 static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ··· 1025 981 return 0; 1026 982 } 1027 983 1028 - static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) 984 + static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) 1029 985 { 1030 986 unsigned long addr_mask; 1031 987 unsigned long *reg; ··· 1069 1025 static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1070 1026 { 1071 1027 u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? 1072 - int _in = io_info & SVM_IOIO_TYPE_MASK; 1028 + int size, down, in, string, rep; 1029 + unsigned port; 1030 + unsigned long count; 1031 + gva_t address = 0; 1073 1032 1074 - ++kvm_stat.io_exits; 1033 + ++vcpu->stat.io_exits; 1075 1034 1076 1035 vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; 1077 1036 1078 - kvm_run->exit_reason = KVM_EXIT_IO; 1079 - kvm_run->io.port = io_info >> 16; 1080 - kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1081 - kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); 1082 - kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; 1083 - kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1037 + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1038 + port = io_info >> 16; 1039 + size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1040 + string = (io_info & SVM_IOIO_STR_MASK) != 0; 1041 + rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1042 + count = 1; 1043 + down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1084 1044 1085 - if (kvm_run->io.string) { 1045 + if (string) { 1086 1046 unsigned addr_mask; 1087 1047 1088 - addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); 1048 + addr_mask = io_adress(vcpu, in, &address); 1089 1049 if (!addr_mask) { 1090 1050 printk(KERN_DEBUG "%s: get io address failed\n", 1091 1051 __FUNCTION__); 1092 1052 return 1; 1093 1053 } 1094 1054 1095 - if (kvm_run->io.rep) { 1096 - kvm_run->io.count 1097 - = vcpu->regs[VCPU_REGS_RCX] & addr_mask; 1098 - kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags 1099 - & X86_EFLAGS_DF) != 0; 1100 - } 1101 - } else 1102 - kvm_run->io.value = vcpu->svm->vmcb->save.rax; 1103 - return 0; 1055 + if (rep) 1056 + count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; 1057 + } 1058 + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, 1059 + address, rep, port); 1104 1060 } 1105 1061 1106 1062 static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ··· 1116 1072 return 1; 1117 1073 1118 1074 kvm_run->exit_reason = KVM_EXIT_HLT; 1119 - ++kvm_stat.halt_exits; 1075 + ++vcpu->stat.halt_exits; 1120 1076 return 0; 1121 1077 } 1122 1078 1123 1079 static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1124 1080 { 1125 - vcpu->svm->vmcb->save.rip += 3; 1081 + vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; 1082 + skip_emulated_instruction(vcpu); 1126 1083 return kvm_hypercall(vcpu, kvm_run); 1127 1084 } 1128 1085 ··· 1143 1098 static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1144 1099 { 1145 1100 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1146 - kvm_run->exit_reason = KVM_EXIT_CPUID; 1147 - return 0; 1101 + kvm_emulate_cpuid(vcpu); 1102 + return 1; 1148 1103 } 1149 1104 1150 1105 static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ··· 1284 1239 */ 1285 1240 if (kvm_run->request_interrupt_window && 1286 1241 !vcpu->irq_summary) { 1287 - ++kvm_stat.irq_window_exits; 1242 + ++vcpu->stat.irq_window_exits; 1288 1243 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1289 1244 return 0; 1290 1245 } ··· 1312 1267 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 1313 1268 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 1314 1269 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1270 + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1315 1271 [SVM_EXIT_INTR] = nop_on_interception, 1316 1272 [SVM_EXIT_NMI] = nop_on_interception, 1317 1273 [SVM_EXIT_SMI] = nop_on_interception, ··· 1334 1288 [SVM_EXIT_STGI] = invalid_op_interception, 1335 1289 [SVM_EXIT_CLGI] = invalid_op_interception, 1336 1290 [SVM_EXIT_SKINIT] = invalid_op_interception, 1291 + [SVM_EXIT_MONITOR] = invalid_op_interception, 1292 + [SVM_EXIT_MWAIT] = invalid_op_interception, 1337 1293 }; 1338 1294 1339 1295 1340 1296 static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1341 1297 { 1342 1298 u32 exit_code = vcpu->svm->vmcb->control.exit_code; 1343 - 1344 - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; 1345 1299 1346 1300 if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && 1347 1301 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) ··· 1353 1307 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 1354 1308 || svm_exit_handlers[exit_code] == 0) { 1355 1309 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1356 - printk(KERN_ERR "%s: 0x%x @ 0x%llx cr0 0x%lx rflags 0x%llx\n", 1357 - __FUNCTION__, 1358 - exit_code, 1359 - vcpu->svm->vmcb->save.rip, 1360 - vcpu->cr0, 1361 - vcpu->svm->vmcb->save.rflags); 1310 + kvm_run->hw.hardware_exit_reason = exit_code; 1362 1311 return 0; 1363 1312 } 1364 1313 ··· 1502 1461 load_db_regs(vcpu->svm->db_regs); 1503 1462 } 1504 1463 1505 - fx_save(vcpu->host_fx_image); 1506 - fx_restore(vcpu->guest_fx_image); 1464 + if (vcpu->fpu_active) { 1465 + fx_save(vcpu->host_fx_image); 1466 + fx_restore(vcpu->guest_fx_image); 1467 + } 1507 1468 1508 1469 asm volatile ( 1509 1470 #ifdef CONFIG_X86_64 ··· 1616 1573 #endif 1617 1574 : "cc", "memory" ); 1618 1575 1619 - fx_save(vcpu->guest_fx_image); 1620 - fx_restore(vcpu->host_fx_image); 1576 + if (vcpu->fpu_active) { 1577 + fx_save(vcpu->guest_fx_image); 1578 + fx_restore(vcpu->host_fx_image); 1579 + } 1621 1580 1622 1581 if ((vcpu->svm->vmcb->save.dr7 & 0xff)) 1623 1582 load_db_regs(vcpu->svm->host_db_regs); ··· 1651 1606 vcpu->svm->next_rip = 0; 1652 1607 1653 1608 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1654 - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 1655 - kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; 1609 + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 1610 + kvm_run->fail_entry.hardware_entry_failure_reason 1611 + = vcpu->svm->vmcb->control.exit_code; 1656 1612 post_kvm_run_save(vcpu, kvm_run); 1657 1613 return 0; 1658 1614 } ··· 1661 1615 r = handle_exit(vcpu, kvm_run); 1662 1616 if (r > 0) { 1663 1617 if (signal_pending(current)) { 1664 - ++kvm_stat.signal_exits; 1618 + ++vcpu->stat.signal_exits; 1665 1619 post_kvm_run_save(vcpu, kvm_run); 1620 + kvm_run->exit_reason = KVM_EXIT_INTR; 1666 1621 return -EINTR; 1667 1622 } 1668 1623 1669 1624 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 1670 - ++kvm_stat.request_irq_exits; 1625 + ++vcpu->stat.request_irq_exits; 1671 1626 post_kvm_run_save(vcpu, kvm_run); 1627 + kvm_run->exit_reason = KVM_EXIT_INTR; 1672 1628 return -EINTR; 1673 1629 } 1674 1630 kvm_resched(vcpu); ··· 1689 1641 { 1690 1642 vcpu->svm->vmcb->save.cr3 = root; 1691 1643 force_new_asid(vcpu); 1644 + 1645 + if (vcpu->fpu_active) { 1646 + vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 1647 + vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; 1648 + vcpu->fpu_active = 0; 1649 + } 1692 1650 } 1693 1651 1694 1652 static void svm_inject_page_fault(struct kvm_vcpu *vcpu, ··· 1703 1649 { 1704 1650 uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; 1705 1651 1706 - ++kvm_stat.pf_guest; 1652 + ++vcpu->stat.pf_guest; 1707 1653 1708 1654 if (is_page_fault(exit_int_info)) { 1709 1655 ··· 1763 1709 .get_segment = svm_get_segment, 1764 1710 .set_segment = svm_set_segment, 1765 1711 .get_cs_db_l_bits = svm_get_cs_db_l_bits, 1766 - .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits, 1712 + .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 1767 1713 .set_cr0 = svm_set_cr0, 1768 - .set_cr0_no_modeswitch = svm_set_cr0, 1769 1714 .set_cr3 = svm_set_cr3, 1770 1715 .set_cr4 = svm_set_cr4, 1771 1716 .set_efer = svm_set_efer,
+6
drivers/kvm/svm.h
··· 44 44 INTERCEPT_RDTSCP, 45 45 INTERCEPT_ICEBP, 46 46 INTERCEPT_WBINVD, 47 + INTERCEPT_MONITOR, 48 + INTERCEPT_MWAIT, 49 + INTERCEPT_MWAIT_COND, 47 50 }; 48 51 49 52 ··· 301 298 #define SVM_EXIT_RDTSCP 0x087 302 299 #define SVM_EXIT_ICEBP 0x088 303 300 #define SVM_EXIT_WBINVD 0x089 301 + #define SVM_EXIT_MONITOR 0x08a 302 + #define SVM_EXIT_MWAIT 0x08b 303 + #define SVM_EXIT_MWAIT_COND 0x08c 304 304 #define SVM_EXIT_NPF 0x400 305 305 306 306 #define SVM_EXIT_ERR -1
+187 -86
drivers/kvm/vmx.c
··· 17 17 18 18 #include "kvm.h" 19 19 #include "vmx.h" 20 - #include "kvm_vmx.h" 21 20 #include <linux/module.h> 22 21 #include <linux/kernel.h> 23 22 #include <linux/mm.h> ··· 69 70 VMX_SEGMENT_FIELD(LDTR), 70 71 }; 71 72 73 + /* 74 + * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it 75 + * away by decrementing the array size. 76 + */ 72 77 static const u32 vmx_msr_index[] = { 73 78 #ifdef CONFIG_X86_64 74 79 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, ··· 81 78 }; 82 79 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 83 80 81 + #ifdef CONFIG_X86_64 82 + static unsigned msr_offset_kernel_gs_base; 83 + #define NR_64BIT_MSRS 4 84 + /* 85 + * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt 86 + * mechanism (cpu bug AA24) 87 + */ 88 + #define NR_BAD_MSRS 2 89 + #else 90 + #define NR_64BIT_MSRS 0 91 + #define NR_BAD_MSRS 0 92 + #endif 93 + 84 94 static inline int is_page_fault(u32 intr_info) 85 95 { 86 96 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 87 97 INTR_INFO_VALID_MASK)) == 88 98 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 99 + } 100 + 101 + static inline int is_no_device(u32 intr_info) 102 + { 103 + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 104 + INTR_INFO_VALID_MASK)) == 105 + (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 89 106 } 90 107 91 108 static inline int is_external_interrupt(u32 intr_info) ··· 223 200 #endif 224 201 } 225 202 203 + static void vmcs_clear_bits(unsigned long field, u32 mask) 204 + { 205 + vmcs_writel(field, vmcs_readl(field) & ~mask); 206 + } 207 + 208 + static void vmcs_set_bits(unsigned long field, u32 mask) 209 + { 210 + vmcs_writel(field, vmcs_readl(field) | mask); 211 + } 212 + 226 213 /* 227 214 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 228 215 * vcpu mutex is already taken. ··· 327 294 INTR_TYPE_EXCEPTION | 328 295 INTR_INFO_DELIEVER_CODE_MASK | 329 296 INTR_INFO_VALID_MASK); 297 + } 298 + 299 + /* 300 + * Set up the vmcs to automatically save and restore system 301 + * msrs. Don't touch the 64-bit msrs if the guest is in legacy 302 + * mode, as fiddling with msrs is very expensive. 303 + */ 304 + static void setup_msrs(struct kvm_vcpu *vcpu) 305 + { 306 + int nr_skip, nr_good_msrs; 307 + 308 + if (is_long_mode(vcpu)) 309 + nr_skip = NR_BAD_MSRS; 310 + else 311 + nr_skip = NR_64BIT_MSRS; 312 + nr_good_msrs = vcpu->nmsrs - nr_skip; 313 + 314 + /* 315 + * MSR_K6_STAR is only needed on long mode guests, and only 316 + * if efer.sce is enabled. 317 + */ 318 + if (find_msr_entry(vcpu, MSR_K6_STAR)) { 319 + --nr_good_msrs; 320 + #ifdef CONFIG_X86_64 321 + if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) 322 + ++nr_good_msrs; 323 + #endif 324 + } 325 + 326 + vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, 327 + virt_to_phys(vcpu->guest_msrs + nr_skip)); 328 + vmcs_writel(VM_EXIT_MSR_STORE_ADDR, 329 + virt_to_phys(vcpu->guest_msrs + nr_skip)); 330 + vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, 331 + virt_to_phys(vcpu->host_msrs + nr_skip)); 332 + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ 333 + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ 334 + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ 330 335 } 331 336 332 337 /* ··· 783 712 784 713 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); 785 714 vmcs_write32(GUEST_CS_LIMIT, 0xffff); 715 + if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) 716 + vmcs_writel(GUEST_CS_BASE, 0xf0000); 786 717 vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4); 787 718 788 719 fix_rmode_seg(VCPU_SREG_ES, &vcpu->rmode.es); ··· 827 754 828 755 #endif 829 756 830 - static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) 757 + static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 831 758 { 832 - vcpu->cr0 &= KVM_GUEST_CR0_MASK; 833 - vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; 834 - 835 759 vcpu->cr4 &= KVM_GUEST_CR4_MASK; 836 760 vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 837 761 } ··· 850 780 } 851 781 #endif 852 782 853 - vmcs_writel(CR0_READ_SHADOW, cr0); 854 - vmcs_writel(GUEST_CR0, 855 - (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); 856 - vcpu->cr0 = cr0; 857 - } 783 + if (!(cr0 & CR0_TS_MASK)) { 784 + vcpu->fpu_active = 1; 785 + vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); 786 + } 858 787 859 - /* 860 - * Used when restoring the VM to avoid corrupting segment registers 861 - */ 862 - static void vmx_set_cr0_no_modeswitch(struct kvm_vcpu *vcpu, unsigned long cr0) 863 - { 864 - if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) 865 - enter_rmode(vcpu); 866 - 867 - vcpu->rmode.active = ((cr0 & CR0_PE_MASK) == 0); 868 - update_exception_bitmap(vcpu); 869 788 vmcs_writel(CR0_READ_SHADOW, cr0); 870 789 vmcs_writel(GUEST_CR0, 871 790 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); ··· 864 805 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 865 806 { 866 807 vmcs_writel(GUEST_CR3, cr3); 808 + 809 + if (!(vcpu->cr0 & CR0_TS_MASK)) { 810 + vcpu->fpu_active = 0; 811 + vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); 812 + vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); 813 + } 867 814 } 868 815 869 816 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ··· 900 835 901 836 msr->data = efer & ~EFER_LME; 902 837 } 838 + setup_msrs(vcpu); 903 839 } 904 840 905 841 #endif ··· 944 878 vmcs_writel(sf->base, var->base); 945 879 vmcs_write32(sf->limit, var->limit); 946 880 vmcs_write16(sf->selector, var->selector); 947 - if (var->unusable) 881 + if (vcpu->rmode.active && var->s) { 882 + /* 883 + * Hack real-mode segments into vm86 compatibility. 884 + */ 885 + if (var->base == 0xffff0000 && var->selector == 0xf000) 886 + vmcs_writel(sf->base, 0xf0000); 887 + ar = 0xf3; 888 + } else if (var->unusable) 948 889 ar = 1 << 16; 949 890 else { 950 891 ar = var->type & 15; ··· 1006 933 gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; 1007 934 char *page; 1008 935 1009 - p1 = _gfn_to_page(kvm, fn++); 1010 - p2 = _gfn_to_page(kvm, fn++); 1011 - p3 = _gfn_to_page(kvm, fn); 936 + p1 = gfn_to_page(kvm, fn++); 937 + p2 = gfn_to_page(kvm, fn++); 938 + p3 = gfn_to_page(kvm, fn); 1012 939 1013 940 if (!p1 || !p2 || !p3) { 1014 941 kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); ··· 1064 991 struct descriptor_table dt; 1065 992 int i; 1066 993 int ret = 0; 1067 - int nr_good_msrs; 1068 994 extern asmlinkage void kvm_vmx_return(void); 1069 995 1070 996 if (!init_rmode_tss(vcpu->kvm)) { ··· 1208 1136 vcpu->host_msrs[j].reserved = 0; 1209 1137 vcpu->host_msrs[j].data = data; 1210 1138 vcpu->guest_msrs[j] = vcpu->host_msrs[j]; 1139 + #ifdef CONFIG_X86_64 1140 + if (index == MSR_KERNEL_GS_BASE) 1141 + msr_offset_kernel_gs_base = j; 1142 + #endif 1211 1143 ++vcpu->nmsrs; 1212 1144 } 1213 - printk(KERN_DEBUG "kvm: msrs: %d\n", vcpu->nmsrs); 1214 1145 1215 - nr_good_msrs = vcpu->nmsrs - NR_BAD_MSRS; 1216 - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, 1217 - virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); 1218 - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, 1219 - virt_to_phys(vcpu->guest_msrs + NR_BAD_MSRS)); 1220 - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, 1221 - virt_to_phys(vcpu->host_msrs + NR_BAD_MSRS)); 1146 + setup_msrs(vcpu); 1147 + 1222 1148 vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, 1223 1149 (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ 1224 - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ 1225 - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ 1226 - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ 1227 - 1228 1150 1229 1151 /* 22.2.1, 20.8.1 */ 1230 1152 vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, ··· 1230 1164 vmcs_writel(TPR_THRESHOLD, 0); 1231 1165 #endif 1232 1166 1233 - vmcs_writel(CR0_GUEST_HOST_MASK, KVM_GUEST_CR0_MASK); 1167 + vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 1234 1168 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 1235 1169 1236 1170 vcpu->cr0 = 0x60000010; ··· 1256 1190 u16 sp = vmcs_readl(GUEST_RSP); 1257 1191 u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT); 1258 1192 1259 - if (sp > ss_limit || sp - 6 > sp) { 1193 + if (sp > ss_limit || sp < 6 ) { 1260 1194 vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n", 1261 1195 __FUNCTION__, 1262 1196 vmcs_readl(GUEST_RSP), ··· 1396 1330 asm ("int $2"); 1397 1331 return 1; 1398 1332 } 1333 + 1334 + if (is_no_device(intr_info)) { 1335 + vcpu->fpu_active = 1; 1336 + vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); 1337 + if (!(vcpu->cr0 & CR0_TS_MASK)) 1338 + vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); 1339 + return 1; 1340 + } 1341 + 1399 1342 error_code = 0; 1400 1343 rip = vmcs_readl(GUEST_RIP); 1401 1344 if (intr_info & INTR_INFO_DELIEVER_CODE_MASK) ··· 1430 1355 case EMULATE_DONE: 1431 1356 return 1; 1432 1357 case EMULATE_DO_MMIO: 1433 - ++kvm_stat.mmio_exits; 1358 + ++vcpu->stat.mmio_exits; 1434 1359 kvm_run->exit_reason = KVM_EXIT_MMIO; 1435 1360 return 0; 1436 1361 case EMULATE_FAIL: ··· 1459 1384 static int handle_external_interrupt(struct kvm_vcpu *vcpu, 1460 1385 struct kvm_run *kvm_run) 1461 1386 { 1462 - ++kvm_stat.irq_exits; 1387 + ++vcpu->stat.irq_exits; 1463 1388 return 1; 1464 1389 } 1465 1390 ··· 1469 1394 return 0; 1470 1395 } 1471 1396 1472 - static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) 1397 + static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) 1473 1398 { 1474 1399 u64 inst; 1475 1400 gva_t rip; ··· 1514 1439 done: 1515 1440 countr_size *= 8; 1516 1441 *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); 1442 + //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); 1517 1443 return 1; 1518 1444 } 1519 1445 1520 1446 static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1521 1447 { 1522 1448 u64 exit_qualification; 1449 + int size, down, in, string, rep; 1450 + unsigned port; 1451 + unsigned long count; 1452 + gva_t address; 1523 1453 1524 - ++kvm_stat.io_exits; 1454 + ++vcpu->stat.io_exits; 1525 1455 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 1526 - kvm_run->exit_reason = KVM_EXIT_IO; 1527 - if (exit_qualification & 8) 1528 - kvm_run->io.direction = KVM_EXIT_IO_IN; 1529 - else 1530 - kvm_run->io.direction = KVM_EXIT_IO_OUT; 1531 - kvm_run->io.size = (exit_qualification & 7) + 1; 1532 - kvm_run->io.string = (exit_qualification & 16) != 0; 1533 - kvm_run->io.string_down 1534 - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; 1535 - kvm_run->io.rep = (exit_qualification & 32) != 0; 1536 - kvm_run->io.port = exit_qualification >> 16; 1537 - if (kvm_run->io.string) { 1538 - if (!get_io_count(vcpu, &kvm_run->io.count)) 1456 + in = (exit_qualification & 8) != 0; 1457 + size = (exit_qualification & 7) + 1; 1458 + string = (exit_qualification & 16) != 0; 1459 + down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; 1460 + count = 1; 1461 + rep = (exit_qualification & 32) != 0; 1462 + port = exit_qualification >> 16; 1463 + address = 0; 1464 + if (string) { 1465 + if (rep && !get_io_count(vcpu, &count)) 1539 1466 return 1; 1540 - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); 1541 - } else 1542 - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ 1543 - return 0; 1467 + address = vmcs_readl(GUEST_LINEAR_ADDRESS); 1468 + } 1469 + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, 1470 + address, rep, port); 1544 1471 } 1545 1472 1546 1473 static void ··· 1591 1514 return 1; 1592 1515 }; 1593 1516 break; 1517 + case 2: /* clts */ 1518 + vcpu_load_rsp_rip(vcpu); 1519 + vcpu->fpu_active = 1; 1520 + vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); 1521 + vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); 1522 + vcpu->cr0 &= ~CR0_TS_MASK; 1523 + vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); 1524 + skip_emulated_instruction(vcpu); 1525 + return 1; 1594 1526 case 1: /*mov from cr*/ 1595 1527 switch (cr) { 1596 1528 case 3: ··· 1609 1523 skip_emulated_instruction(vcpu); 1610 1524 return 1; 1611 1525 case 8: 1612 - printk(KERN_DEBUG "handle_cr: read CR8 " 1613 - "cpu erratum AA15\n"); 1614 1526 vcpu_load_rsp_rip(vcpu); 1615 1527 vcpu->regs[reg] = vcpu->cr8; 1616 1528 vcpu_put_rsp_rip(vcpu); ··· 1667 1583 1668 1584 static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1669 1585 { 1670 - kvm_run->exit_reason = KVM_EXIT_CPUID; 1671 - return 0; 1586 + kvm_emulate_cpuid(vcpu); 1587 + return 1; 1672 1588 } 1673 1589 1674 1590 static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ··· 1723 1639 if (kvm_run->request_interrupt_window && 1724 1640 !vcpu->irq_summary) { 1725 1641 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1726 - ++kvm_stat.irq_window_exits; 1642 + ++vcpu->stat.irq_window_exits; 1727 1643 return 0; 1728 1644 } 1729 1645 return 1; ··· 1736 1652 return 1; 1737 1653 1738 1654 kvm_run->exit_reason = KVM_EXIT_HLT; 1739 - ++kvm_stat.halt_exits; 1655 + ++vcpu->stat.halt_exits; 1740 1656 return 0; 1741 1657 } 1742 1658 1743 1659 static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1744 1660 { 1745 - vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3); 1661 + skip_emulated_instruction(vcpu); 1746 1662 return kvm_hypercall(vcpu, kvm_run); 1747 1663 } 1748 1664 ··· 1783 1699 exit_reason != EXIT_REASON_EXCEPTION_NMI ) 1784 1700 printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 1785 1701 "exit reason is 0x%x\n", __FUNCTION__, exit_reason); 1786 - kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); 1787 1702 if (exit_reason < kvm_vmx_max_exit_handlers 1788 1703 && kvm_vmx_exit_handlers[exit_reason]) 1789 1704 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); ··· 1846 1763 if (vcpu->guest_debug.enabled) 1847 1764 kvm_guest_debug_pre(vcpu); 1848 1765 1849 - fx_save(vcpu->host_fx_image); 1850 - fx_restore(vcpu->guest_fx_image); 1766 + if (vcpu->fpu_active) { 1767 + fx_save(vcpu->host_fx_image); 1768 + fx_restore(vcpu->guest_fx_image); 1769 + } 1770 + /* 1771 + * Loading guest fpu may have cleared host cr0.ts 1772 + */ 1773 + vmcs_writel(HOST_CR0, read_cr0()); 1851 1774 1852 - save_msrs(vcpu->host_msrs, vcpu->nmsrs); 1853 - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 1775 + #ifdef CONFIG_X86_64 1776 + if (is_long_mode(vcpu)) { 1777 + save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); 1778 + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 1779 + } 1780 + #endif 1854 1781 1855 1782 asm ( 1856 1783 /* Store host registers */ ··· 2002 1909 2003 1910 reload_tss(); 2004 1911 } 2005 - ++kvm_stat.exits; 1912 + ++vcpu->stat.exits; 2006 1913 2007 - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 2008 - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); 1914 + #ifdef CONFIG_X86_64 1915 + if (is_long_mode(vcpu)) { 1916 + save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); 1917 + load_msrs(vcpu->host_msrs, NR_BAD_MSRS); 1918 + } 1919 + #endif 2009 1920 2010 - fx_save(vcpu->guest_fx_image); 2011 - fx_restore(vcpu->host_fx_image); 1921 + if (vcpu->fpu_active) { 1922 + fx_save(vcpu->guest_fx_image); 1923 + fx_restore(vcpu->host_fx_image); 1924 + } 1925 + 2012 1926 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 2013 1927 2014 1928 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2015 1929 2016 - kvm_run->exit_type = 0; 2017 1930 if (fail) { 2018 - kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 2019 - kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); 1931 + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 1932 + kvm_run->fail_entry.hardware_entry_failure_reason 1933 + = vmcs_read32(VM_INSTRUCTION_ERROR); 2020 1934 r = 0; 2021 1935 } else { 2022 1936 /* ··· 2033 1933 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); 2034 1934 2035 1935 vcpu->launched = 1; 2036 - kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; 2037 1936 r = kvm_handle_exit(kvm_run, vcpu); 2038 1937 if (r > 0) { 2039 1938 /* Give scheduler a change to reschedule. */ 2040 1939 if (signal_pending(current)) { 2041 - ++kvm_stat.signal_exits; 1940 + ++vcpu->stat.signal_exits; 2042 1941 post_kvm_run_save(vcpu, kvm_run); 1942 + kvm_run->exit_reason = KVM_EXIT_INTR; 2043 1943 return -EINTR; 2044 1944 } 2045 1945 2046 1946 if (dm_request_for_irq_injection(vcpu, kvm_run)) { 2047 - ++kvm_stat.request_irq_exits; 1947 + ++vcpu->stat.request_irq_exits; 2048 1948 post_kvm_run_save(vcpu, kvm_run); 1949 + kvm_run->exit_reason = KVM_EXIT_INTR; 2049 1950 return -EINTR; 2050 1951 } 2051 1952 ··· 2070 1969 { 2071 1970 u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 2072 1971 2073 - ++kvm_stat.pf_guest; 1972 + ++vcpu->stat.pf_guest; 2074 1973 2075 1974 if (is_page_fault(vect_info)) { 2076 1975 printk(KERN_DEBUG "inject_page_fault: " ··· 2127 2026 vmcs_clear(vmcs); 2128 2027 vcpu->vmcs = vmcs; 2129 2028 vcpu->launched = 0; 2029 + vcpu->fpu_active = 1; 2130 2030 2131 2031 return 0; 2132 2032 ··· 2164 2062 .get_segment = vmx_get_segment, 2165 2063 .set_segment = vmx_set_segment, 2166 2064 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 2167 - .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, 2065 + .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 2168 2066 .set_cr0 = vmx_set_cr0, 2169 - .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, 2170 2067 .set_cr3 = vmx_set_cr3, 2171 2068 .set_cr4 = vmx_set_cr4, 2172 2069 #ifdef CONFIG_X86_64
+12 -39
drivers/kvm/x86_emulate.c
··· 833 833 dst.ptr = (unsigned long *)cr2; 834 834 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 835 835 if (d & BitOp) { 836 - dst.ptr += src.val / BITS_PER_LONG; 837 - dst.bytes = sizeof(long); 836 + unsigned long mask = ~(dst.bytes * 8 - 1); 837 + 838 + dst.ptr = (void *)dst.ptr + (src.val & mask) / 8; 838 839 } 839 840 if (!(d & Mov) && /* optimisation - avoid slow emulated read */ 840 841 ((rc = ops->read_emulated((unsigned long)dst.ptr, ··· 1045 1044 if ((rc = ops->write_std( 1046 1045 register_address(ctxt->ss_base, 1047 1046 _regs[VCPU_REGS_RSP]), 1048 - dst.val, dst.bytes, ctxt)) != 0) 1047 + &dst.val, dst.bytes, ctxt)) != 0) 1049 1048 goto done; 1050 1049 dst.val = dst.orig_val; /* skanky: disable writeback */ 1051 1050 break; ··· 1078 1077 case OP_MEM: 1079 1078 if (lock_prefix) 1080 1079 rc = ops->cmpxchg_emulated((unsigned long)dst. 1081 - ptr, dst.orig_val, 1082 - dst.val, dst.bytes, 1080 + ptr, &dst.orig_val, 1081 + &dst.val, dst.bytes, 1083 1082 ctxt); 1084 1083 else 1085 1084 rc = ops->write_emulated((unsigned long)dst.ptr, 1086 - dst.val, dst.bytes, 1085 + &dst.val, dst.bytes, 1087 1086 ctxt); 1088 1087 if (rc != 0) 1089 1088 goto done; ··· 1321 1320 realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags); 1322 1321 break; 1323 1322 case 0xc7: /* Grp9 (cmpxchg8b) */ 1324 - #if defined(__i386__) 1325 1323 { 1326 - unsigned long old_lo, old_hi; 1327 - if (((rc = ops->read_emulated(cr2 + 0, &old_lo, 4, 1328 - ctxt)) != 0) 1329 - || ((rc = ops->read_emulated(cr2 + 4, &old_hi, 4, 1330 - ctxt)) != 0)) 1331 - goto done; 1332 - if ((old_lo != _regs[VCPU_REGS_RAX]) 1333 - || (old_hi != _regs[VCPU_REGS_RDX])) { 1334 - _regs[VCPU_REGS_RAX] = old_lo; 1335 - _regs[VCPU_REGS_RDX] = old_hi; 1336 - _eflags &= ~EFLG_ZF; 1337 - } else if (ops->cmpxchg8b_emulated == NULL) { 1338 - rc = X86EMUL_UNHANDLEABLE; 1339 - goto done; 1340 - } else { 1341 - if ((rc = ops->cmpxchg8b_emulated(cr2, old_lo, 1342 - old_hi, 1343 - _regs[VCPU_REGS_RBX], 1344 - _regs[VCPU_REGS_RCX], 1345 - ctxt)) != 0) 1346 - goto done; 1347 - _eflags |= EFLG_ZF; 1348 - } 1349 - break; 1350 - } 1351 - #elif defined(CONFIG_X86_64) 1352 - { 1353 - unsigned long old, new; 1324 + u64 old, new; 1354 1325 if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) 1355 1326 goto done; 1356 1327 if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || ··· 1331 1358 _regs[VCPU_REGS_RDX] = (u32) (old >> 32); 1332 1359 _eflags &= ~EFLG_ZF; 1333 1360 } else { 1334 - new = (_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX]; 1335 - if ((rc = ops->cmpxchg_emulated(cr2, old, 1336 - new, 8, ctxt)) != 0) 1361 + new = ((u64)_regs[VCPU_REGS_RCX] << 32) 1362 + | (u32) _regs[VCPU_REGS_RBX]; 1363 + if ((rc = ops->cmpxchg_emulated(cr2, &old, 1364 + &new, 8, ctxt)) != 0) 1337 1365 goto done; 1338 1366 _eflags |= EFLG_ZF; 1339 1367 } 1340 1368 break; 1341 1369 } 1342 - #endif 1343 1370 } 1344 1371 goto writeback; 1345 1372
+6 -26
drivers/kvm/x86_emulate.h
··· 59 59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 60 60 * @bytes: [IN ] Number of bytes to read from memory. 61 61 */ 62 - int (*read_std)(unsigned long addr, 63 - unsigned long *val, 62 + int (*read_std)(unsigned long addr, void *val, 64 63 unsigned int bytes, struct x86_emulate_ctxt * ctxt); 65 64 66 65 /* ··· 70 71 * required). 71 72 * @bytes: [IN ] Number of bytes to write to memory. 72 73 */ 73 - int (*write_std)(unsigned long addr, 74 - unsigned long val, 74 + int (*write_std)(unsigned long addr, const void *val, 75 75 unsigned int bytes, struct x86_emulate_ctxt * ctxt); 76 76 77 77 /* ··· 80 82 * @bytes: [IN ] Number of bytes to read from memory. 81 83 */ 82 84 int (*read_emulated) (unsigned long addr, 83 - unsigned long *val, 85 + void *val, 84 86 unsigned int bytes, 85 87 struct x86_emulate_ctxt * ctxt); 86 88 ··· 92 94 * @bytes: [IN ] Number of bytes to write to memory. 93 95 */ 94 96 int (*write_emulated) (unsigned long addr, 95 - unsigned long val, 97 + const void *val, 96 98 unsigned int bytes, 97 99 struct x86_emulate_ctxt * ctxt); 98 100 ··· 105 107 * @bytes: [IN ] Number of bytes to access using CMPXCHG. 106 108 */ 107 109 int (*cmpxchg_emulated) (unsigned long addr, 108 - unsigned long old, 109 - unsigned long new, 110 + const void *old, 111 + const void *new, 110 112 unsigned int bytes, 111 113 struct x86_emulate_ctxt * ctxt); 112 114 113 - /* 114 - * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an 115 - * emulated/special memory area. 116 - * @addr: [IN ] Linear address to access. 117 - * @old: [IN ] Value expected to be current at @addr. 118 - * @new: [IN ] Value to write to @addr. 119 - * NOTES: 120 - * 1. This function is only ever called when emulating a real CMPXCHG8B. 121 - * 2. This function is *never* called on x86/64 systems. 122 - * 2. Not defining this function (i.e., specifying NULL) is equivalent 123 - * to defining a function that always returns X86EMUL_UNHANDLEABLE. 124 - */ 125 - int (*cmpxchg8b_emulated) (unsigned long addr, 126 - unsigned long old_lo, 127 - unsigned long old_hi, 128 - unsigned long new_lo, 129 - unsigned long new_hi, 130 - struct x86_emulate_ctxt * ctxt); 131 115 }; 132 116 133 117 struct cpu_user_regs;
+1
include/linux/Kbuild
··· 96 96 header-y += ixjuser.h 97 97 header-y += jffs2.h 98 98 header-y += keyctl.h 99 + header-y += kvm.h 99 100 header-y += limits.h 100 101 header-y += lock_dlm_plock.h 101 102 header-y += magic.h
+95 -38
include/linux/kvm.h
··· 11 11 #include <asm/types.h> 12 12 #include <linux/ioctl.h> 13 13 14 - #define KVM_API_VERSION 4 14 + #define KVM_API_VERSION 12 15 15 16 16 /* 17 17 * Architectural interrupt line count, and the size of the bitmap needed ··· 33 33 /* for kvm_memory_region::flags */ 34 34 #define KVM_MEM_LOG_DIRTY_PAGES 1UL 35 35 36 - 37 - #define KVM_EXIT_TYPE_FAIL_ENTRY 1 38 - #define KVM_EXIT_TYPE_VM_EXIT 2 36 + struct kvm_memory_alias { 37 + __u32 slot; /* this has a different namespace than memory slots */ 38 + __u32 flags; 39 + __u64 guest_phys_addr; 40 + __u64 memory_size; 41 + __u64 target_phys_addr; 42 + }; 39 43 40 44 enum kvm_exit_reason { 41 45 KVM_EXIT_UNKNOWN = 0, 42 46 KVM_EXIT_EXCEPTION = 1, 43 47 KVM_EXIT_IO = 2, 44 - KVM_EXIT_CPUID = 3, 48 + KVM_EXIT_HYPERCALL = 3, 45 49 KVM_EXIT_DEBUG = 4, 46 50 KVM_EXIT_HLT = 5, 47 51 KVM_EXIT_MMIO = 6, 48 52 KVM_EXIT_IRQ_WINDOW_OPEN = 7, 49 53 KVM_EXIT_SHUTDOWN = 8, 54 + KVM_EXIT_FAIL_ENTRY = 9, 55 + KVM_EXIT_INTR = 10, 50 56 }; 51 57 52 - /* for KVM_RUN */ 58 + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ 53 59 struct kvm_run { 54 60 /* in */ 55 - __u32 emulated; /* skip current instruction */ 56 - __u32 mmio_completed; /* mmio request completed */ 57 61 __u8 request_interrupt_window; 58 62 __u8 padding1[7]; 59 63 60 64 /* out */ 61 - __u32 exit_type; 62 65 __u32 exit_reason; 63 - __u32 instruction_length; 64 66 __u8 ready_for_interrupt_injection; 65 67 __u8 if_flag; 66 - __u16 padding2; 68 + __u8 padding2[2]; 67 69 68 70 /* in (pre_kvm_run), out (post_kvm_run) */ 69 71 __u64 cr8; ··· 74 72 union { 75 73 /* KVM_EXIT_UNKNOWN */ 76 74 struct { 77 - __u32 hardware_exit_reason; 75 + __u64 hardware_exit_reason; 78 76 } hw; 77 + /* KVM_EXIT_FAIL_ENTRY */ 78 + struct { 79 + __u64 hardware_entry_failure_reason; 80 + } fail_entry; 79 81 /* KVM_EXIT_EXCEPTION */ 80 82 struct { 81 83 __u32 exception; 82 84 __u32 error_code; 83 85 } ex; 84 86 /* KVM_EXIT_IO */ 85 - struct { 87 + struct kvm_io { 86 88 #define KVM_EXIT_IO_IN 0 87 89 #define KVM_EXIT_IO_OUT 1 88 90 __u8 direction; 89 91 __u8 size; /* bytes */ 90 - __u8 string; 91 - __u8 string_down; 92 - __u8 rep; 93 - __u8 pad; 94 92 __u16 port; 95 - __u64 count; 96 - union { 97 - __u64 address; 98 - __u32 value; 99 - }; 93 + __u32 count; 94 + __u64 data_offset; /* relative to kvm_run start */ 100 95 } io; 101 96 struct { 102 97 } debug; ··· 104 105 __u32 len; 105 106 __u8 is_write; 106 107 } mmio; 108 + /* KVM_EXIT_HYPERCALL */ 109 + struct { 110 + __u64 args[6]; 111 + __u64 ret; 112 + __u32 longmode; 113 + __u32 pad; 114 + } hypercall; 107 115 }; 108 116 }; 109 117 ··· 122 116 __u64 r8, r9, r10, r11; 123 117 __u64 r12, r13, r14, r15; 124 118 __u64 rip, rflags; 119 + }; 120 + 121 + /* for KVM_GET_FPU and KVM_SET_FPU */ 122 + struct kvm_fpu { 123 + __u8 fpr[8][16]; 124 + __u16 fcw; 125 + __u16 fsw; 126 + __u8 ftwx; /* in fxsave format */ 127 + __u8 pad1; 128 + __u16 last_opcode; 129 + __u64 last_ip; 130 + __u64 last_dp; 131 + __u8 xmm[16][16]; 132 + __u32 mxcsr; 133 + __u32 pad2; 125 134 }; 126 135 127 136 struct kvm_segment { ··· 231 210 }; 232 211 }; 233 212 213 + struct kvm_cpuid_entry { 214 + __u32 function; 215 + __u32 eax; 216 + __u32 ebx; 217 + __u32 ecx; 218 + __u32 edx; 219 + __u32 padding; 220 + }; 221 + 222 + /* for KVM_SET_CPUID */ 223 + struct kvm_cpuid { 224 + __u32 nent; 225 + __u32 padding; 226 + struct kvm_cpuid_entry entries[0]; 227 + }; 228 + 229 + /* for KVM_SET_SIGNAL_MASK */ 230 + struct kvm_signal_mask { 231 + __u32 len; 232 + __u8 sigset[0]; 233 + }; 234 + 234 235 #define KVMIO 0xAE 235 236 236 237 /* 237 238 * ioctls for /dev/kvm fds: 238 239 */ 239 - #define KVM_GET_API_VERSION _IO(KVMIO, 1) 240 - #define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */ 241 - #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list) 240 + #define KVM_GET_API_VERSION _IO(KVMIO, 0x00) 241 + #define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ 242 + #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) 243 + /* 244 + * Check if a kvm extension is available. Argument is extension number, 245 + * return is 1 (yes) or 0 (no, sorry). 246 + */ 247 + #define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) 248 + /* 249 + * Get size for mmap(vcpu_fd) 250 + */ 251 + #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ 242 252 243 253 /* 244 254 * ioctls for VM fds 245 255 */ 246 - #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region) 256 + #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) 247 257 /* 248 258 * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns 249 259 * a vcpu fd. 250 260 */ 251 - #define KVM_CREATE_VCPU _IOW(KVMIO, 11, int) 252 - #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log) 261 + #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) 262 + #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) 263 + #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) 253 264 254 265 /* 255 266 * ioctls for vcpu fds 256 267 */ 257 - #define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run) 258 - #define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs) 259 - #define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs) 260 - #define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs) 261 - #define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs) 262 - #define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation) 263 - #define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt) 264 - #define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest) 265 - #define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs) 266 - #define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs) 268 + #define KVM_RUN _IO(KVMIO, 0x80) 269 + #define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) 270 + #define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) 271 + #define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) 272 + #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) 273 + #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) 274 + #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) 275 + #define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) 276 + #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) 277 + #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) 278 + #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) 279 + #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) 280 + #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) 281 + #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) 267 282 268 283 #endif
+1
include/linux/miscdevice.h
··· 29 29 30 30 #define TUN_MINOR 200 31 31 #define HPET_MINOR 228 32 + #define KVM_MINOR 232 32 33 33 34 struct device; 34 35