commit 597b0d21626da4e6f09f132442caf0cc2b0eb47c

-2

MAINTAINERS

··· 2542 S: Supported 2543 2544 KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) 2545 - P: Anthony Xu 2546 - M: anthony.xu@intel.com 2547 P: Xiantao Zhang 2548 M: xiantao.zhang@intel.com 2549 L: kvm-ia64@vger.kernel.org

··· 2542 S: Supported 2543 2544 KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) 2545 P: Xiantao Zhang 2546 M: xiantao.zhang@intel.com 2547 L: kvm-ia64@vger.kernel.org

+4 -2

arch/ia64/include/asm/kvm.h

··· 166 }; 167 168 struct kvm_regs { 169 - char *saved_guest; 170 - char *saved_stack; 171 struct saved_vpd vpd; 172 /*Arch-regs*/ 173 int mp_state; ··· 198 unsigned long fp_psr; /*used for lazy float register */ 199 unsigned long saved_gp; 200 /*for phycial emulation */ 201 }; 202 203 struct kvm_sregs {

··· 166 }; 167 168 struct kvm_regs { 169 struct saved_vpd vpd; 170 /*Arch-regs*/ 171 int mp_state; ··· 200 unsigned long fp_psr; /*used for lazy float register */ 201 unsigned long saved_gp; 202 /*for phycial emulation */ 203 + 204 + union context saved_guest; 205 + 206 + unsigned long reserved[64]; /* for future use */ 207 }; 208 209 struct kvm_sregs {

+114 -62

arch/ia64/include/asm/kvm_host.h

··· 23 #ifndef __ASM_KVM_HOST_H 24 #define __ASM_KVM_HOST_H 25 26 - 27 - #include <linux/types.h> 28 - #include <linux/mm.h> 29 - #include <linux/kvm.h> 30 - #include <linux/kvm_para.h> 31 - #include <linux/kvm_types.h> 32 - 33 - #include <asm/pal.h> 34 - #include <asm/sal.h> 35 - 36 - #define KVM_MAX_VCPUS 4 37 #define KVM_MEMORY_SLOTS 32 38 /* memory slots that does not exposed to userspace */ 39 #define KVM_PRIVATE_MEM_SLOTS 4 ··· 39 #define EXIT_REASON_EXTERNAL_INTERRUPT 6 40 #define EXIT_REASON_IPI 7 41 #define EXIT_REASON_PTC_G 8 42 43 /*Define vmm address space and vm data space.*/ 44 - #define KVM_VMM_SIZE (16UL<<20) 45 #define KVM_VMM_SHIFT 24 46 - #define KVM_VMM_BASE 0xD000000000000000UL 47 - #define VMM_SIZE (8UL<<20) 48 49 /* 50 * Define vm_buffer, used by PAL Services, base address. 51 - * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M 52 */ 53 #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) 54 - #define KVM_VM_BUFFER_SIZE (8UL<<20) 55 56 - /*Define Virtual machine data layout.*/ 57 - #define KVM_VM_DATA_SHIFT 24 58 - #define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) 59 - #define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) 60 61 62 - #define KVM_P2M_BASE KVM_VM_DATA_BASE 63 - #define KVM_P2M_OFS 0 64 - #define KVM_P2M_SIZE (8UL << 20) 65 66 - #define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) 67 - #define KVM_VHPT_OFS KVM_P2M_SIZE 68 - #define KVM_VHPT_BLOCK_SIZE (2UL << 20) 69 - #define VHPT_SHIFT 18 70 - #define VHPT_SIZE (1UL << VHPT_SHIFT) 71 - #define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) 72 73 - #define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) 74 - #define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) 75 - #define KVM_VTLB_BLOCK_SIZE (1UL<<20) 76 - #define VTLB_SHIFT 17 77 - #define VTLB_SIZE (1UL<<VTLB_SHIFT) 78 - #define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) 79 80 - #define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) 81 - #define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) 82 - #define KVM_VPD_BLOCK_SIZE (2UL<<20) 83 - #define VPD_SHIFT 16 84 - #define VPD_SIZE (1UL<<VPD_SHIFT) 85 86 - #define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) 87 - #define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) 88 - #define KVM_VCPU_BLOCK_SIZE (2UL<<20) 89 - #define VCPU_SHIFT 18 90 - #define VCPU_SIZE (1UL<<VCPU_SHIFT) 91 - #define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE 92 93 - #define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) 94 - #define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) 95 - #define KVM_VM_BLOCK_SIZE (1UL<<19) 96 97 - #define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) 98 - #define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) 99 - #define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) 100 101 - /* Get vpd, vhpt, tlb, vcpu, base*/ 102 - #define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) 103 - #define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) 104 - #define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) 105 - #define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) 106 107 /*IO section definitions*/ 108 #define IOREQ_READ 1 ··· 440 441 unsigned long opcode; 442 unsigned long cause; 443 union context host; 444 union context guest; 445 }; ··· 455 }; 456 457 struct kvm_arch { 458 unsigned long vm_base; 459 unsigned long metaphysical_rr0; 460 unsigned long metaphysical_rr4; 461 unsigned long vmm_init_rr; 462 - unsigned long vhpt_base; 463 - unsigned long vtlb_base; 464 - unsigned long vpd_base; 465 - spinlock_t dirty_log_lock; 466 struct kvm_ioapic *vioapic; 467 struct kvm_vm_stat stat; 468 struct kvm_sal_data rdv_sal_data; ··· 563 564 static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) 565 { 566 - return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; 567 } 568 569 typedef int kvm_vmm_entry(void); ··· 582 void kvm_sal_emul(struct kvm_vcpu *vcpu); 583 584 static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} 585 586 #endif

··· 23 #ifndef __ASM_KVM_HOST_H 24 #define __ASM_KVM_HOST_H 25 26 #define KVM_MEMORY_SLOTS 32 27 /* memory slots that does not exposed to userspace */ 28 #define KVM_PRIVATE_MEM_SLOTS 4 ··· 50 #define EXIT_REASON_EXTERNAL_INTERRUPT 6 51 #define EXIT_REASON_IPI 7 52 #define EXIT_REASON_PTC_G 8 53 + #define EXIT_REASON_DEBUG 20 54 55 /*Define vmm address space and vm data space.*/ 56 + #define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) 57 #define KVM_VMM_SHIFT 24 58 + #define KVM_VMM_BASE 0xD000000000000000 59 + #define VMM_SIZE (__IA64_UL_CONST(8)<<20) 60 61 /* 62 * Define vm_buffer, used by PAL Services, base address. 63 + * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M 64 */ 65 #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) 66 + #define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) 67 68 + /* 69 + * kvm guest's data area looks as follow: 70 + * 71 + * +----------------------+ ------- KVM_VM_DATA_SIZE 72 + * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET 73 + * | | | / | 74 + * | .......... | | /vcpu's struct&stack | 75 + * | .......... | | /---------------------|---- 0 76 + * | vcpu[5]'s data | | / vpd | 77 + * | vcpu[4]'s data | |/-----------------------| 78 + * | vcpu[3]'s data | / vtlb | 79 + * | vcpu[2]'s data | /|------------------------| 80 + * | vcpu[1]'s data |/ | vhpt | 81 + * | vcpu[0]'s data |____________________________| 82 + * +----------------------+ | 83 + * | memory dirty log | | 84 + * +----------------------+ | 85 + * | vm's data struct | | 86 + * +----------------------+ | 87 + * | | | 88 + * | | | 89 + * | | | 90 + * | | | 91 + * | | | 92 + * | | | 93 + * | | | 94 + * | vm's p2m table | | 95 + * | | | 96 + * | | | 97 + * | | | | 98 + * vm's data->| | | | 99 + * +----------------------+ ------- 0 100 + * To support large memory, needs to increase the size of p2m. 101 + * To support more vcpus, needs to ensure it has enough space to 102 + * hold vcpus' data. 103 + */ 104 105 + #define KVM_VM_DATA_SHIFT 26 106 + #define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) 107 + #define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) 108 109 + #define KVM_P2M_BASE KVM_VM_DATA_BASE 110 + #define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) 111 112 + #define VHPT_SHIFT 16 113 + #define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) 114 + #define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) 115 116 + #define VTLB_SHIFT 16 117 + #define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) 118 + #define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) 119 120 + #define VPD_SHIFT 16 121 + #define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) 122 123 + #define VCPU_STRUCT_SHIFT 16 124 + #define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) 125 126 + #define KVM_STK_OFFSET VCPU_STRUCT_SIZE 127 128 + #define KVM_VM_STRUCT_SHIFT 19 129 + #define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) 130 131 + #define KVM_MEM_DIRY_LOG_SHIFT 19 132 + #define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) 133 + 134 + #ifndef __ASSEMBLY__ 135 + 136 + /*Define the max vcpus and memory for Guests.*/ 137 + #define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ 138 + KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) 139 + #define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) 140 + 141 + #define VMM_LOG_LEN 256 142 + 143 + #include <linux/types.h> 144 + #include <linux/mm.h> 145 + #include <linux/kvm.h> 146 + #include <linux/kvm_para.h> 147 + #include <linux/kvm_types.h> 148 + 149 + #include <asm/pal.h> 150 + #include <asm/sal.h> 151 + #include <asm/page.h> 152 + 153 + struct kvm_vcpu_data { 154 + char vcpu_vhpt[VHPT_SIZE]; 155 + char vcpu_vtlb[VTLB_SIZE]; 156 + char vcpu_vpd[VPD_SIZE]; 157 + char vcpu_struct[VCPU_STRUCT_SIZE]; 158 + }; 159 + 160 + struct kvm_vm_data { 161 + char kvm_p2m[KVM_P2M_SIZE]; 162 + char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; 163 + char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; 164 + struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; 165 + }; 166 + 167 + #define VCPU_BASE(n) KVM_VM_DATA_BASE + \ 168 + offsetof(struct kvm_vm_data, vcpu_data[n]) 169 + #define VM_BASE KVM_VM_DATA_BASE + \ 170 + offsetof(struct kvm_vm_data, kvm_vm_struct) 171 + #define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ 172 + offsetof(struct kvm_vm_data, kvm_mem_dirty_log) 173 + 174 + #define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) 175 + #define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) 176 + #define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) 177 + #define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ 178 + offsetof(struct kvm_vcpu_data, vcpu_struct)) 179 180 /*IO section definitions*/ 181 #define IOREQ_READ 1 ··· 389 390 unsigned long opcode; 391 unsigned long cause; 392 + char log_buf[VMM_LOG_LEN]; 393 union context host; 394 union context guest; 395 }; ··· 403 }; 404 405 struct kvm_arch { 406 + spinlock_t dirty_log_lock; 407 + 408 unsigned long vm_base; 409 unsigned long metaphysical_rr0; 410 unsigned long metaphysical_rr4; 411 unsigned long vmm_init_rr; 412 + 413 struct kvm_ioapic *vioapic; 414 struct kvm_vm_stat stat; 415 struct kvm_sal_data rdv_sal_data; ··· 512 513 static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) 514 { 515 + return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1; 516 } 517 518 typedef int kvm_vmm_entry(void); ··· 531 void kvm_sal_emul(struct kvm_vcpu *vcpu); 532 533 static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} 534 + #endif /* __ASSEMBLY__*/ 535 536 #endif

+1 -1

arch/ia64/kvm/Makefile

··· 60 61 CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 62 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 63 - vtlb.o process.o 64 #Add link memcpy and memset to avoid possible structure assignment error 65 kvm-intel-objs += memcpy.o memset.o 66 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

··· 60 61 CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 62 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ 63 + vtlb.o process.o kvm_lib.o 64 #Add link memcpy and memset to avoid possible structure assignment error 65 kvm-intel-objs += memcpy.o memset.o 66 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

+1 -10

arch/ia64/kvm/asm-offsets.c

··· 24 25 #include <linux/autoconf.h> 26 #include <linux/kvm_host.h> 27 28 #include "vcpu.h" 29 - 30 - #define task_struct kvm_vcpu 31 - 32 - #define DEFINE(sym, val) \ 33 - asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) 34 - 35 - #define BLANK() asm volatile("\n->" : :) 36 - 37 - #define OFFSET(_sym, _str, _mem) \ 38 - DEFINE(_sym, offsetof(_str, _mem)); 39 40 void foo(void) 41 {

··· 24 25 #include <linux/autoconf.h> 26 #include <linux/kvm_host.h> 27 + #include <linux/kbuild.h> 28 29 #include "vcpu.h" 30 31 void foo(void) 32 {

+47 -60

arch/ia64/kvm/kvm-ia64.c

··· 180 181 switch (ext) { 182 case KVM_CAP_IRQCHIP: 183 - case KVM_CAP_USER_MEMORY: 184 case KVM_CAP_MP_STATE: 185 186 r = 1; ··· 438 expires = div64_u64(itc_diff, cyc_per_usec); 439 kt = ktime_set(0, 1000 * expires); 440 441 - down_read(&vcpu->kvm->slots_lock); 442 vcpu->arch.ht_active = 1; 443 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 444 ··· 450 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 451 vcpu->arch.mp_state = 452 KVM_MP_STATE_RUNNABLE; 453 - up_read(&vcpu->kvm->slots_lock); 454 455 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 456 return -EINTR; ··· 473 return 1; 474 } 475 476 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 477 struct kvm_run *kvm_run) = { 478 [EXIT_REASON_VM_PANIC] = handle_vm_error, ··· 491 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 492 [EXIT_REASON_IPI] = handle_ipi, 493 [EXIT_REASON_PTC_G] = handle_global_purge, 494 495 }; 496 ··· 703 return r; 704 } 705 706 - /* 707 - * Allocate 16M memory for every vm to hold its specific data. 708 - * Its memory map is defined in kvm_host.h. 709 - */ 710 static struct kvm *kvm_alloc_kvm(void) 711 { 712 713 struct kvm *kvm; 714 uint64_t vm_base; 715 716 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 717 718 if (!vm_base) 719 return ERR_PTR(-ENOMEM); 720 - printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); 721 722 - /* Zero all pages before use! */ 723 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 724 - 725 - kvm = (struct kvm *)(vm_base + KVM_VM_OFS); 726 kvm->arch.vm_base = vm_base; 727 728 return kvm; 729 } ··· 762 763 static void kvm_init_vm(struct kvm *kvm) 764 { 765 - long vm_base; 766 - 767 BUG_ON(!kvm); 768 769 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 770 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 771 kvm->arch.vmm_init_rr = VMM_INIT_RR; 772 - 773 - vm_base = kvm->arch.vm_base; 774 - if (vm_base) { 775 - kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; 776 - kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; 777 - kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; 778 - } 779 780 /* 781 *Fill P2M entries for MMIO/IO ranges ··· 831 832 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 833 { 834 - int i; 835 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 836 - int r; 837 838 vcpu_load(vcpu); 839 ··· 849 850 vpd->vpr = regs->vpd.vpr; 851 852 - r = -EFAULT; 853 - r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, 854 - sizeof(union context)); 855 - if (r) 856 - goto out; 857 - r = copy_from_user(vcpu + 1, regs->saved_stack + 858 - sizeof(struct kvm_vcpu), 859 - IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); 860 - if (r) 861 - goto out; 862 - vcpu->arch.exit_data = 863 - ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; 864 865 RESTORE_REGS(mp_state); 866 RESTORE_REGS(vmm_rr); ··· 883 set_bit(KVM_REQ_RESUME, &vcpu->requests); 884 885 vcpu_put(vcpu); 886 - r = 0; 887 - out: 888 - return r; 889 } 890 891 long kvm_arch_vm_ioctl(struct file *filp, ··· 1146 /*Set entry address for first run.*/ 1147 regs->cr_iip = PALE_RESET_ENTRY; 1148 1149 - /*Initilize itc offset for vcpus*/ 1150 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1151 - for (i = 0; i < MAX_VCPU_NUM; i++) { 1152 - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 1153 v->arch.itc_offset = itc_offset; 1154 v->arch.last_itc = 0; 1155 } ··· 1164 vcpu->arch.apic->vcpu = vcpu; 1165 1166 p_ctx->gr[1] = 0; 1167 - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); 1168 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1169 p_ctx->psr = 0x1008522000UL; 1170 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ ··· 1199 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1200 1201 vcpu->arch.last_run_cpu = -1; 1202 - vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); 1203 vcpu->arch.vsa_base = kvm_vsa_base; 1204 vcpu->arch.__gp = kvm_vmm_gp; 1205 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1206 - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); 1207 - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); 1208 init_ptce_info(vcpu); 1209 1210 r = 0; ··· 1254 int r; 1255 int cpu; 1256 1257 r = -ENOMEM; 1258 if (!vm_base) { 1259 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1260 goto fail; 1261 } 1262 - vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); 1263 vcpu->kvm = kvm; 1264 1265 cpu = get_cpu(); ··· 1365 1366 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1367 { 1368 - int i; 1369 - int r; 1370 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1371 vcpu_load(vcpu); 1372 1373 for (i = 0; i < 16; i++) { ··· 1382 regs->vpd.vpsr = vpd->vpsr; 1383 regs->vpd.vpr = vpd->vpr; 1384 1385 - r = -EFAULT; 1386 - r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, 1387 - sizeof(union context)); 1388 - if (r) 1389 - goto out; 1390 - r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); 1391 - if (r) 1392 - goto out; 1393 SAVE_REGS(mp_state); 1394 SAVE_REGS(vmm_rr); 1395 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); ··· 1411 SAVE_REGS(metaphysical_saved_rr4); 1412 SAVE_REGS(fp_psr); 1413 SAVE_REGS(saved_gp); 1414 vcpu_put(vcpu); 1415 - r = 0; 1416 - out: 1417 - return r; 1418 } 1419 1420 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) ··· 1440 int npages = mem->memory_size >> PAGE_SHIFT; 1441 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1442 unsigned long base_gfn = memslot->base_gfn; 1443 1444 for (i = 0; i < npages; i++) { 1445 pfn = gfn_to_pfn(kvm, base_gfn + i); ··· 1618 struct kvm_memory_slot *memslot; 1619 int r, i; 1620 long n, base; 1621 - unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS 1622 - + KVM_MEM_DIRTY_LOG_OFS); 1623 1624 r = -EINVAL; 1625 if (log->slot >= KVM_MEMORY_SLOTS)

··· 180 181 switch (ext) { 182 case KVM_CAP_IRQCHIP: 183 case KVM_CAP_MP_STATE: 184 185 r = 1; ··· 439 expires = div64_u64(itc_diff, cyc_per_usec); 440 kt = ktime_set(0, 1000 * expires); 441 442 vcpu->arch.ht_active = 1; 443 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 444 ··· 452 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 453 vcpu->arch.mp_state = 454 KVM_MP_STATE_RUNNABLE; 455 456 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 457 return -EINTR; ··· 476 return 1; 477 } 478 479 + static int handle_vcpu_debug(struct kvm_vcpu *vcpu, 480 + struct kvm_run *kvm_run) 481 + { 482 + printk("VMM: %s", vcpu->arch.log_buf); 483 + return 1; 484 + } 485 + 486 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 487 struct kvm_run *kvm_run) = { 488 [EXIT_REASON_VM_PANIC] = handle_vm_error, ··· 487 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 488 [EXIT_REASON_IPI] = handle_ipi, 489 [EXIT_REASON_PTC_G] = handle_global_purge, 490 + [EXIT_REASON_DEBUG] = handle_vcpu_debug, 491 492 }; 493 ··· 698 return r; 699 } 700 701 static struct kvm *kvm_alloc_kvm(void) 702 { 703 704 struct kvm *kvm; 705 uint64_t vm_base; 706 707 + BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); 708 + 709 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 710 711 if (!vm_base) 712 return ERR_PTR(-ENOMEM); 713 714 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 715 + kvm = (struct kvm *)(vm_base + 716 + offsetof(struct kvm_vm_data, kvm_vm_struct)); 717 kvm->arch.vm_base = vm_base; 718 + printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); 719 720 return kvm; 721 } ··· 760 761 static void kvm_init_vm(struct kvm *kvm) 762 { 763 BUG_ON(!kvm); 764 765 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 766 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 767 kvm->arch.vmm_init_rr = VMM_INIT_RR; 768 769 /* 770 *Fill P2M entries for MMIO/IO ranges ··· 838 839 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 840 { 841 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 842 + int i; 843 844 vcpu_load(vcpu); 845 ··· 857 858 vpd->vpr = regs->vpd.vpr; 859 860 + memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context)); 861 862 RESTORE_REGS(mp_state); 863 RESTORE_REGS(vmm_rr); ··· 902 set_bit(KVM_REQ_RESUME, &vcpu->requests); 903 904 vcpu_put(vcpu); 905 + 906 + return 0; 907 } 908 909 long kvm_arch_vm_ioctl(struct file *filp, ··· 1166 /*Set entry address for first run.*/ 1167 regs->cr_iip = PALE_RESET_ENTRY; 1168 1169 + /*Initialize itc offset for vcpus*/ 1170 itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); 1171 + for (i = 0; i < KVM_MAX_VCPUS; i++) { 1172 + v = (struct kvm_vcpu *)((char *)vcpu + 1173 + sizeof(struct kvm_vcpu_data) * i); 1174 v->arch.itc_offset = itc_offset; 1175 v->arch.last_itc = 0; 1176 } ··· 1183 vcpu->arch.apic->vcpu = vcpu; 1184 1185 p_ctx->gr[1] = 0; 1186 + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); 1187 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1188 p_ctx->psr = 0x1008522000UL; 1189 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ ··· 1218 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1219 1220 vcpu->arch.last_run_cpu = -1; 1221 + vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); 1222 vcpu->arch.vsa_base = kvm_vsa_base; 1223 vcpu->arch.__gp = kvm_vmm_gp; 1224 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1225 + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); 1226 + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); 1227 init_ptce_info(vcpu); 1228 1229 r = 0; ··· 1273 int r; 1274 int cpu; 1275 1276 + BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); 1277 + 1278 + r = -EINVAL; 1279 + if (id >= KVM_MAX_VCPUS) { 1280 + printk(KERN_ERR"kvm: Can't configure vcpus > %ld", 1281 + KVM_MAX_VCPUS); 1282 + goto fail; 1283 + } 1284 + 1285 r = -ENOMEM; 1286 if (!vm_base) { 1287 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1288 goto fail; 1289 } 1290 + vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, 1291 + vcpu_data[id].vcpu_struct)); 1292 vcpu->kvm = kvm; 1293 1294 cpu = get_cpu(); ··· 1374 1375 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1376 { 1377 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1378 + int i; 1379 + 1380 vcpu_load(vcpu); 1381 1382 for (i = 0; i < 16; i++) { ··· 1391 regs->vpd.vpsr = vpd->vpsr; 1392 regs->vpd.vpr = vpd->vpr; 1393 1394 + memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context)); 1395 + 1396 SAVE_REGS(mp_state); 1397 SAVE_REGS(vmm_rr); 1398 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); ··· 1426 SAVE_REGS(metaphysical_saved_rr4); 1427 SAVE_REGS(fp_psr); 1428 SAVE_REGS(saved_gp); 1429 + 1430 vcpu_put(vcpu); 1431 + return 0; 1432 } 1433 1434 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) ··· 1456 int npages = mem->memory_size >> PAGE_SHIFT; 1457 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; 1458 unsigned long base_gfn = memslot->base_gfn; 1459 + 1460 + if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) 1461 + return -ENOMEM; 1462 1463 for (i = 0; i < npages; i++) { 1464 pfn = gfn_to_pfn(kvm, base_gfn + i); ··· 1631 struct kvm_memory_slot *memslot; 1632 int r, i; 1633 long n, base; 1634 + unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1635 + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1636 1637 r = -EINVAL; 1638 if (log->slot >= KVM_MEMORY_SLOTS)

+15

arch/ia64/kvm/kvm_lib.c

···

··· 1 + /* 2 + * kvm_lib.c: Compile some libraries for kvm-intel module. 3 + * 4 + * Just include kernel's library, and disable symbols export. 5 + * Copyright (C) 2008, Intel Corporation. 6 + * Xiantao Zhang (xiantao.zhang@intel.com) 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + * 12 + */ 13 + #undef CONFIG_MODULES 14 + #include "../../../lib/vsprintf.c" 15 + #include "../../../lib/ctype.c"

+3 -1

arch/ia64/kvm/kvm_minstate.h

··· 24 #include <asm/asmmacro.h> 25 #include <asm/types.h> 26 #include <asm/kregs.h> 27 #include "asm-offsets.h" 28 29 #define KVM_MINSTATE_START_SAVE_MIN \ ··· 35 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ 36 ;; \ 37 lfetch.fault.excl.nt1 [r22]; \ 38 - addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ 39 mov r23 = ar.bspstore; /* save ar.bspstore */ \ 40 ;; \ 41 mov ar.bspstore = r22; /* switch to kernel RBS */\

··· 24 #include <asm/asmmacro.h> 25 #include <asm/types.h> 26 #include <asm/kregs.h> 27 + #include <asm/kvm_host.h> 28 + 29 #include "asm-offsets.h" 30 31 #define KVM_MINSTATE_START_SAVE_MIN \ ··· 33 addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ 34 ;; \ 35 lfetch.fault.excl.nt1 [r22]; \ 36 + addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ 37 mov r23 = ar.bspstore; /* save ar.bspstore */ \ 38 ;; \ 39 mov ar.bspstore = r22; /* switch to kernel RBS */\

+2 -1

arch/ia64/kvm/misc.h

··· 27 */ 28 static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) 29 { 30 - return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); 31 } 32 33 static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,

··· 27 */ 28 static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) 29 { 30 + return (uint64_t *)(kvm->arch.vm_base + 31 + offsetof(struct kvm_vm_data, kvm_p2m)); 32 } 33 34 static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,

+15 -23

arch/ia64/kvm/mmio.c

··· 66 67 switch (addr) { 68 case PIB_OFST_INTA: 69 - /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ 70 - panic_vm(v); 71 break; 72 case PIB_OFST_XTP: 73 if (length == 1) { 74 vlsapic_write_xtp(v, val); 75 } else { 76 - /*panic_domain(NULL, 77 - "Undefined write on PIB XTP\n");*/ 78 - panic_vm(v); 79 } 80 break; 81 default: 82 if (PIB_LOW_HALF(addr)) { 83 - /*lower half */ 84 if (length != 8) 85 - /*panic_domain(NULL, 86 - "Can't LHF write with size %ld!\n", 87 - length);*/ 88 - panic_vm(v); 89 else 90 vlsapic_write_ipi(v, addr, val); 91 - } else { /* upper half 92 - printk("IPI-UHF write %lx\n",addr);*/ 93 - panic_vm(v); 94 } 95 break; 96 } ··· 102 if (length == 1) /* 1 byte load */ 103 ; /* There is no i8259, there is no INTA access*/ 104 else 105 - /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ 106 - panic_vm(v); 107 108 break; 109 case PIB_OFST_XTP: 110 if (length == 1) { 111 result = VLSAPIC_XTP(v); 112 - /* printk("read xtp %lx\n", result); */ 113 } else { 114 - /*panic_domain(NULL, 115 - "Undefined read on PIB XTP\n");*/ 116 - panic_vm(v); 117 } 118 break; 119 default: 120 - panic_vm(v); 121 break; 122 } 123 return result; ··· 152 /* it's necessary to ensure zero extending */ 153 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); 154 } else 155 - panic_vm(vcpu); 156 out: 157 local_irq_restore(psr); 158 return ; ··· 314 return; 315 } else { 316 inst_type = -1; 317 - panic_vm(vcpu); 318 } 319 320 size = 1 << size; ··· 327 if (inst_type == SL_INTEGER) 328 vcpu_set_gr(vcpu, inst.M1.r1, data, 0); 329 else 330 - panic_vm(vcpu); 331 332 } 333 vcpu_increment_iip(vcpu);

··· 66 67 switch (addr) { 68 case PIB_OFST_INTA: 69 + panic_vm(v, "Undefined write on PIB INTA\n"); 70 break; 71 case PIB_OFST_XTP: 72 if (length == 1) { 73 vlsapic_write_xtp(v, val); 74 } else { 75 + panic_vm(v, "Undefined write on PIB XTP\n"); 76 } 77 break; 78 default: 79 if (PIB_LOW_HALF(addr)) { 80 + /*Lower half */ 81 if (length != 8) 82 + panic_vm(v, "Can't LHF write with size %ld!\n", 83 + length); 84 else 85 vlsapic_write_ipi(v, addr, val); 86 + } else { /*Upper half */ 87 + panic_vm(v, "IPI-UHF write %lx\n", addr); 88 } 89 break; 90 } ··· 108 if (length == 1) /* 1 byte load */ 109 ; /* There is no i8259, there is no INTA access*/ 110 else 111 + panic_vm(v, "Undefined read on PIB INTA\n"); 112 113 break; 114 case PIB_OFST_XTP: 115 if (length == 1) { 116 result = VLSAPIC_XTP(v); 117 } else { 118 + panic_vm(v, "Undefined read on PIB XTP\n"); 119 } 120 break; 121 default: 122 + panic_vm(v, "Undefined addr access for lsapic!\n"); 123 break; 124 } 125 return result; ··· 162 /* it's necessary to ensure zero extending */ 163 *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); 164 } else 165 + panic_vm(vcpu, "Unhandled mmio access returned!\n"); 166 out: 167 local_irq_restore(psr); 168 return ; ··· 324 return; 325 } else { 326 inst_type = -1; 327 + panic_vm(vcpu, "Unsupported MMIO access instruction! \ 328 + Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", 329 + bundle.i64[0], bundle.i64[1]); 330 } 331 332 size = 1 << size; ··· 335 if (inst_type == SL_INTEGER) 336 vcpu_set_gr(vcpu, inst.M1.r1, data, 0); 337 else 338 + panic_vm(vcpu, "Unsupported instruction type!\n"); 339 340 } 341 vcpu_increment_iip(vcpu);

+25 -4

arch/ia64/kvm/process.c

··· 527 vector = vec2off[vec]; 528 529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { 530 - panic_vm(vcpu); 531 return; 532 } 533 ··· 587 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); 588 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); 589 } else 590 - panic_vm(vcpu); 591 } 592 593 static void set_sal_call_data(struct kvm_vcpu *vcpu) ··· 615 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); 616 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); 617 } else 618 - panic_vm(vcpu); 619 } 620 621 void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, ··· 681 vpsr = VCPU(vcpu, vpsr); 682 isr = vpsr & IA64_PSR_RI; 683 if (!(vpsr & IA64_PSR_IC)) 684 - panic_vm(vcpu); 685 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ 686 } 687 ··· 942 ia64_set_pta(vcpu->arch.vhpt.pta.val); 943 } 944 945 static void kvm_do_resume_op(struct kvm_vcpu *vcpu) 946 { 947 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { 948 vcpu_do_resume(vcpu); 949 return; ··· 980 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, 981 1, 0, 0, 0, 0, 0); 982 kvm_do_resume_op(vcpu); 983 }

··· 527 vector = vec2off[vec]; 528 529 if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { 530 + panic_vm(vcpu, "Interruption with vector :0x%lx occurs " 531 + "with psr.ic = 0\n", vector); 532 return; 533 } 534 ··· 586 vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); 587 vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); 588 } else 589 + panic_vm(vcpu, "Mis-set for exit reason!\n"); 590 } 591 592 static void set_sal_call_data(struct kvm_vcpu *vcpu) ··· 614 vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); 615 vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); 616 } else 617 + panic_vm(vcpu, "Mis-set for exit reason!\n"); 618 } 619 620 void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, ··· 680 vpsr = VCPU(vcpu, vpsr); 681 isr = vpsr & IA64_PSR_RI; 682 if (!(vpsr & IA64_PSR_IC)) 683 + panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); 684 reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ 685 } 686 ··· 941 ia64_set_pta(vcpu->arch.vhpt.pta.val); 942 } 943 944 + static void vmm_sanity_check(struct kvm_vcpu *vcpu) 945 + { 946 + struct exit_ctl_data *p = &vcpu->arch.exit_data; 947 + 948 + if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { 949 + panic_vm(vcpu, "Failed to do vmm sanity check," 950 + "it maybe caused by crashed vmm!!\n\n"); 951 + } 952 + } 953 + 954 static void kvm_do_resume_op(struct kvm_vcpu *vcpu) 955 { 956 + vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/ 957 + 958 if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { 959 vcpu_do_resume(vcpu); 960 return; ··· 967 ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, 968 1, 0, 0, 0, 0, 0); 969 kvm_do_resume_op(vcpu); 970 + } 971 + 972 + void vmm_panic_handler(u64 vec) 973 + { 974 + struct kvm_vcpu *vcpu = current_vcpu; 975 + vmm_sanity = 0; 976 + panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", 977 + vec2off[vec]); 978 }

+68 -6

arch/ia64/kvm/vcpu.c

··· 816 unsigned long vitv = VCPU(vcpu, itv); 817 818 if (vcpu->vcpu_id == 0) { 819 - for (i = 0; i < MAX_VCPU_NUM; i++) { 820 - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); 821 VMX(v, itc_offset) = itc_offset; 822 VMX(v, last_itc) = 0; 823 } ··· 1651 * Otherwise panic 1652 */ 1653 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1654 - panic_vm(vcpu); 1655 1656 /* 1657 * For those IA64_PSR bits: id/da/dd/ss/ed/ia ··· 2105 2106 if (is_physical_mode(vcpu)) { 2107 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) 2108 - panic_vm(vcpu); 2109 2110 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); 2111 ia64_dv_serialize_data(); ··· 2154 return 0; 2155 } 2156 2157 - void panic_vm(struct kvm_vcpu *v) 2158 { 2159 - struct exit_ctl_data *p = &v->arch.exit_data; 2160 2161 p->exit_reason = EXIT_REASON_VM_PANIC; 2162 vmm_transition(v); 2163 /*Never to return*/

··· 816 unsigned long vitv = VCPU(vcpu, itv); 817 818 if (vcpu->vcpu_id == 0) { 819 + for (i = 0; i < KVM_MAX_VCPUS; i++) { 820 + v = (struct kvm_vcpu *)((char *)vcpu + 821 + sizeof(struct kvm_vcpu_data) * i); 822 VMX(v, itc_offset) = itc_offset; 823 VMX(v, last_itc) = 0; 824 } ··· 1650 * Otherwise panic 1651 */ 1652 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1653 + panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ 1654 + & vpsr.is=0\n"); 1655 1656 /* 1657 * For those IA64_PSR bits: id/da/dd/ss/ed/ia ··· 2103 2104 if (is_physical_mode(vcpu)) { 2105 if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) 2106 + panic_vm(vcpu, "Machine Status conflicts!\n"); 2107 2108 ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); 2109 ia64_dv_serialize_data(); ··· 2152 return 0; 2153 } 2154 2155 + static void kvm_show_registers(struct kvm_pt_regs *regs) 2156 { 2157 + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; 2158 2159 + struct kvm_vcpu *vcpu = current_vcpu; 2160 + if (vcpu != NULL) 2161 + printk("vcpu 0x%p vcpu %d\n", 2162 + vcpu, vcpu->vcpu_id); 2163 + 2164 + printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", 2165 + regs->cr_ipsr, regs->cr_ifs, ip); 2166 + 2167 + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", 2168 + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); 2169 + printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", 2170 + regs->ar_rnat, regs->ar_bspstore, regs->pr); 2171 + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", 2172 + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); 2173 + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); 2174 + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, 2175 + regs->b6, regs->b7); 2176 + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", 2177 + regs->f6.u.bits[1], regs->f6.u.bits[0], 2178 + regs->f7.u.bits[1], regs->f7.u.bits[0]); 2179 + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", 2180 + regs->f8.u.bits[1], regs->f8.u.bits[0], 2181 + regs->f9.u.bits[1], regs->f9.u.bits[0]); 2182 + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", 2183 + regs->f10.u.bits[1], regs->f10.u.bits[0], 2184 + regs->f11.u.bits[1], regs->f11.u.bits[0]); 2185 + 2186 + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, 2187 + regs->r2, regs->r3); 2188 + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, 2189 + regs->r9, regs->r10); 2190 + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, 2191 + regs->r12, regs->r13); 2192 + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, 2193 + regs->r15, regs->r16); 2194 + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, 2195 + regs->r18, regs->r19); 2196 + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, 2197 + regs->r21, regs->r22); 2198 + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, 2199 + regs->r24, regs->r25); 2200 + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, 2201 + regs->r27, regs->r28); 2202 + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, 2203 + regs->r30, regs->r31); 2204 + 2205 + } 2206 + 2207 + void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) 2208 + { 2209 + va_list args; 2210 + char buf[256]; 2211 + 2212 + struct kvm_pt_regs *regs = vcpu_regs(v); 2213 + struct exit_ctl_data *p = &v->arch.exit_data; 2214 + va_start(args, fmt); 2215 + vsnprintf(buf, sizeof(buf), fmt, args); 2216 + va_end(args); 2217 + printk(buf); 2218 + kvm_show_registers(regs); 2219 p->exit_reason = EXIT_REASON_VM_PANIC; 2220 vmm_transition(v); 2221 /*Never to return*/

+4 -1

arch/ia64/kvm/vcpu.h

··· 737 void kvm_init_vhpt(struct kvm_vcpu *v); 738 void thash_init(struct thash_cb *hcb, u64 sz); 739 740 - void panic_vm(struct kvm_vcpu *v); 741 742 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 744 #endif 745 #endif /* __VCPU_H__ */

··· 737 void kvm_init_vhpt(struct kvm_vcpu *v); 738 void thash_init(struct thash_cb *hcb, u64 sz); 739 740 + void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); 741 742 extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, 743 u64 arg4, u64 arg5, u64 arg6, u64 arg7); 744 + 745 + extern long vmm_sanity; 746 + 747 #endif 748 #endif /* __VCPU_H__ */

+29

arch/ia64/kvm/vmm.c

··· 20 */ 21 22 23 #include<linux/module.h> 24 #include<asm/fpswa.h> 25 ··· 31 32 extern char kvm_ia64_ivt; 33 extern fpswa_interface_t *vmm_fpswa_interface; 34 35 struct kvm_vmm_info vmm_info = { 36 .module = THIS_MODULE, ··· 65 { 66 _vmm_raw_spin_unlock(lock); 67 } 68 module_init(kvm_vmm_init) 69 module_exit(kvm_vmm_exit)

··· 20 */ 21 22 23 + #include<linux/kernel.h> 24 #include<linux/module.h> 25 #include<asm/fpswa.h> 26 ··· 30 31 extern char kvm_ia64_ivt; 32 extern fpswa_interface_t *vmm_fpswa_interface; 33 + 34 + long vmm_sanity = 1; 35 36 struct kvm_vmm_info vmm_info = { 37 .module = THIS_MODULE, ··· 62 { 63 _vmm_raw_spin_unlock(lock); 64 } 65 + 66 + static void vcpu_debug_exit(struct kvm_vcpu *vcpu) 67 + { 68 + struct exit_ctl_data *p = &vcpu->arch.exit_data; 69 + long psr; 70 + 71 + local_irq_save(psr); 72 + p->exit_reason = EXIT_REASON_DEBUG; 73 + vmm_transition(vcpu); 74 + local_irq_restore(psr); 75 + } 76 + 77 + asmlinkage int printk(const char *fmt, ...) 78 + { 79 + struct kvm_vcpu *vcpu = current_vcpu; 80 + va_list args; 81 + int r; 82 + 83 + memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); 84 + va_start(args, fmt); 85 + r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); 86 + va_end(args); 87 + vcpu_debug_exit(vcpu); 88 + return r; 89 + } 90 + 91 module_init(kvm_vmm_init) 92 module_exit(kvm_vmm_exit)

+730 -735

arch/ia64/kvm/vmm_ivt.S

··· 1 /* 2 - * /ia64/kvm_ivt.S 3 * 4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 5 * Stephane Eranian <eranian@hpl.hp.com> ··· 70 # define PSR_DEFAULT_BITS 0 71 #endif 72 73 - 74 #define KVM_FAULT(n) \ 75 - kvm_fault_##n:; \ 76 - mov r19=n;; \ 77 - br.sptk.many kvm_fault_##n; \ 78 - ;; \ 79 - 80 81 #define KVM_REFLECT(n) \ 82 - mov r31=pr; \ 83 - mov r19=n; /* prepare to save predicates */ \ 84 - mov r29=cr.ipsr; \ 85 - ;; \ 86 - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ 87 - (p7)br.sptk.many kvm_dispatch_reflection; \ 88 - br.sptk.many kvm_panic; \ 89 90 - 91 - GLOBAL_ENTRY(kvm_panic) 92 - br.sptk.many kvm_panic 93 - ;; 94 - END(kvm_panic) 95 - 96 - 97 - 98 - 99 100 .section .text.ivt,"ax" 101 ··· 112 /////////////////////////////////////////////////////////////// 113 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) 114 ENTRY(kvm_vhpt_miss) 115 - KVM_FAULT(0) 116 END(kvm_vhpt_miss) 117 - 118 119 .org kvm_ia64_ivt+0x400 120 //////////////////////////////////////////////////////////////// 121 // 0x0400 Entry 1 (size 64 bundles) ITLB (21) 122 ENTRY(kvm_itlb_miss) 123 - mov r31 = pr 124 - mov r29=cr.ipsr; 125 - ;; 126 - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 127 - (p6) br.sptk kvm_alt_itlb_miss 128 - mov r19 = 1 129 - br.sptk kvm_itlb_miss_dispatch 130 - KVM_FAULT(1); 131 END(kvm_itlb_miss) 132 133 .org kvm_ia64_ivt+0x0800 134 ////////////////////////////////////////////////////////////////// 135 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) 136 ENTRY(kvm_dtlb_miss) 137 - mov r31 = pr 138 - mov r29=cr.ipsr; 139 - ;; 140 - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 141 - (p6)br.sptk kvm_alt_dtlb_miss 142 - br.sptk kvm_dtlb_miss_dispatch 143 END(kvm_dtlb_miss) 144 145 .org kvm_ia64_ivt+0x0c00 146 //////////////////////////////////////////////////////////////////// 147 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) 148 ENTRY(kvm_alt_itlb_miss) 149 - mov r16=cr.ifa // get address that caused the TLB miss 150 - ;; 151 - movl r17=PAGE_KERNEL 152 - mov r24=cr.ipsr 153 - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 154 - ;; 155 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 156 - ;; 157 - or r19=r17,r19 // insert PTE control bits into r19 158 - ;; 159 - movl r20=IA64_GRANULE_SHIFT<<2 160 - ;; 161 - mov cr.itir=r20 162 - ;; 163 - itc.i r19 // insert the TLB entry 164 - mov pr=r31,-1 165 - rfi 166 END(kvm_alt_itlb_miss) 167 168 .org kvm_ia64_ivt+0x1000 169 ///////////////////////////////////////////////////////////////////// 170 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) 171 ENTRY(kvm_alt_dtlb_miss) 172 - mov r16=cr.ifa // get address that caused the TLB miss 173 - ;; 174 - movl r17=PAGE_KERNEL 175 - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 176 - mov r24=cr.ipsr 177 - ;; 178 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 179 - ;; 180 - or r19=r19,r17 // insert PTE control bits into r19 181 - ;; 182 - movl r20=IA64_GRANULE_SHIFT<<2 183 - ;; 184 - mov cr.itir=r20 185 - ;; 186 - itc.d r19 // insert the TLB entry 187 - mov pr=r31,-1 188 - rfi 189 END(kvm_alt_dtlb_miss) 190 191 .org kvm_ia64_ivt+0x1400 192 ////////////////////////////////////////////////////////////////////// 193 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) 194 ENTRY(kvm_nested_dtlb_miss) 195 - KVM_FAULT(5) 196 END(kvm_nested_dtlb_miss) 197 198 .org kvm_ia64_ivt+0x1800 199 ///////////////////////////////////////////////////////////////////// 200 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) 201 ENTRY(kvm_ikey_miss) 202 - KVM_REFLECT(6) 203 END(kvm_ikey_miss) 204 205 .org kvm_ia64_ivt+0x1c00 206 ///////////////////////////////////////////////////////////////////// 207 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) 208 ENTRY(kvm_dkey_miss) 209 - KVM_REFLECT(7) 210 END(kvm_dkey_miss) 211 212 .org kvm_ia64_ivt+0x2000 213 //////////////////////////////////////////////////////////////////// 214 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) 215 ENTRY(kvm_dirty_bit) 216 - KVM_REFLECT(8) 217 END(kvm_dirty_bit) 218 219 .org kvm_ia64_ivt+0x2400 220 //////////////////////////////////////////////////////////////////// 221 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 222 ENTRY(kvm_iaccess_bit) 223 - KVM_REFLECT(9) 224 END(kvm_iaccess_bit) 225 226 .org kvm_ia64_ivt+0x2800 227 /////////////////////////////////////////////////////////////////// 228 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 229 ENTRY(kvm_daccess_bit) 230 - KVM_REFLECT(10) 231 END(kvm_daccess_bit) 232 233 .org kvm_ia64_ivt+0x2c00 234 ///////////////////////////////////////////////////////////////// 235 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) 236 ENTRY(kvm_break_fault) 237 - mov r31=pr 238 - mov r19=11 239 - mov r29=cr.ipsr 240 - ;; 241 - KVM_SAVE_MIN_WITH_COVER_R19 242 - ;; 243 - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) 244 - mov out0=cr.ifa 245 - mov out2=cr.isr // FIXME: pity to make this slow access twice 246 - mov out3=cr.iim // FIXME: pity to make this slow access twice 247 - adds r3=8,r2 // set up second base pointer 248 - ;; 249 - ssm psr.ic 250 - ;; 251 - srlz.i // guarantee that interruption collection is on 252 - ;; 253 - //(p15)ssm psr.i // restore psr.i 254 - addl r14=@gprel(ia64_leave_hypervisor),gp 255 - ;; 256 - KVM_SAVE_REST 257 - mov rp=r14 258 - ;; 259 - adds out1=16,sp 260 - br.call.sptk.many b6=kvm_ia64_handle_break 261 - ;; 262 END(kvm_break_fault) 263 264 .org kvm_ia64_ivt+0x3000 265 ///////////////////////////////////////////////////////////////// 266 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) 267 ENTRY(kvm_interrupt) 268 - mov r31=pr // prepare to save predicates 269 - mov r19=12 270 - mov r29=cr.ipsr 271 - ;; 272 - tbit.z p6,p7=r29,IA64_PSR_VM_BIT 273 - tbit.z p0,p15=r29,IA64_PSR_I_BIT 274 - ;; 275 - (p7) br.sptk kvm_dispatch_interrupt 276 - ;; 277 - mov r27=ar.rsc /* M */ 278 - mov r20=r1 /* A */ 279 - mov r25=ar.unat /* M */ 280 - mov r26=ar.pfs /* I */ 281 - mov r28=cr.iip /* M */ 282 - cover /* B (or nothing) */ 283 - ;; 284 - mov r1=sp 285 - ;; 286 - invala /* M */ 287 - mov r30=cr.ifs 288 - ;; 289 - addl r1=-VMM_PT_REGS_SIZE,r1 290 - ;; 291 - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ 292 - adds r16=PT(CR_IPSR),r1 293 - ;; 294 - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES 295 - st8 [r16]=r29 /* save cr.ipsr */ 296 - ;; 297 - lfetch.fault.excl.nt1 [r17] 298 - mov r29=b0 299 - ;; 300 - adds r16=PT(R8),r1 /* initialize first base pointer */ 301 - adds r17=PT(R9),r1 /* initialize second base pointer */ 302 - mov r18=r0 /* make sure r18 isn't NaT */ 303 - ;; 304 .mem.offset 0,0; st8.spill [r16]=r8,16 305 .mem.offset 8,0; st8.spill [r17]=r9,16 306 ;; 307 .mem.offset 0,0; st8.spill [r16]=r10,24 308 .mem.offset 8,0; st8.spill [r17]=r11,24 309 ;; 310 - st8 [r16]=r28,16 /* save cr.iip */ 311 - st8 [r17]=r30,16 /* save cr.ifs */ 312 - mov r8=ar.fpsr /* M */ 313 - mov r9=ar.csd 314 - mov r10=ar.ssd 315 - movl r11=FPSR_DEFAULT /* L-unit */ 316 - ;; 317 - st8 [r16]=r25,16 /* save ar.unat */ 318 - st8 [r17]=r26,16 /* save ar.pfs */ 319 - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ 320 - ;; 321 - st8 [r16]=r27,16 /* save ar.rsc */ 322 - adds r17=16,r17 /* skip over ar_rnat field */ 323 - ;; 324 - st8 [r17]=r31,16 /* save predicates */ 325 - adds r16=16,r16 /* skip over ar_bspstore field */ 326 - ;; 327 - st8 [r16]=r29,16 /* save b0 */ 328 - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ 329 - ;; 330 .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ 331 .mem.offset 8,0; st8.spill [r17]=r12,16 332 - adds r12=-16,r1 333 - /* switch to kernel memory stack (with 16 bytes of scratch) */ 334 - ;; 335 .mem.offset 0,0; st8.spill [r16]=r13,16 336 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ 337 - ;; 338 .mem.offset 0,0; st8.spill [r16]=r15,16 339 .mem.offset 8,0; st8.spill [r17]=r14,16 340 - dep r14=-1,r0,60,4 341 - ;; 342 .mem.offset 0,0; st8.spill [r16]=r2,16 343 .mem.offset 8,0; st8.spill [r17]=r3,16 344 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 345 - adds r14 = VMM_VCPU_GP_OFFSET,r13 346 - ;; 347 - mov r8=ar.ccv 348 - ld8 r14 = [r14] 349 - ;; 350 - mov r1=r14 /* establish kernel global pointer */ 351 - ;; \ 352 - bsw.1 353 - ;; 354 - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 355 - mov out0=r13 356 - ;; 357 - ssm psr.ic 358 - ;; 359 - srlz.i 360 - ;; 361 - //(p15) ssm psr.i 362 - adds r3=8,r2 // set up second base pointer for SAVE_REST 363 - srlz.i // ensure everybody knows psr.ic is back on 364 - ;; 365 .mem.offset 0,0; st8.spill [r2]=r16,16 366 .mem.offset 8,0; st8.spill [r3]=r17,16 367 - ;; 368 .mem.offset 0,0; st8.spill [r2]=r18,16 369 .mem.offset 8,0; st8.spill [r3]=r19,16 370 - ;; 371 .mem.offset 0,0; st8.spill [r2]=r20,16 372 .mem.offset 8,0; st8.spill [r3]=r21,16 373 - mov r18=b6 374 - ;; 375 .mem.offset 0,0; st8.spill [r2]=r22,16 376 .mem.offset 8,0; st8.spill [r3]=r23,16 377 - mov r19=b7 378 - ;; 379 .mem.offset 0,0; st8.spill [r2]=r24,16 380 .mem.offset 8,0; st8.spill [r3]=r25,16 381 - ;; 382 .mem.offset 0,0; st8.spill [r2]=r26,16 383 .mem.offset 8,0; st8.spill [r3]=r27,16 384 - ;; 385 .mem.offset 0,0; st8.spill [r2]=r28,16 386 .mem.offset 8,0; st8.spill [r3]=r29,16 387 - ;; 388 .mem.offset 0,0; st8.spill [r2]=r30,16 389 .mem.offset 8,0; st8.spill [r3]=r31,32 390 - ;; 391 - mov ar.fpsr=r11 /* M-unit */ 392 - st8 [r2]=r8,8 /* ar.ccv */ 393 - adds r24=PT(B6)-PT(F7),r3 394 - ;; 395 - stf.spill [r2]=f6,32 396 - stf.spill [r3]=f7,32 397 - ;; 398 - stf.spill [r2]=f8,32 399 - stf.spill [r3]=f9,32 400 - ;; 401 - stf.spill [r2]=f10 402 - stf.spill [r3]=f11 403 - adds r25=PT(B7)-PT(F11),r3 404 - ;; 405 - st8 [r24]=r18,16 /* b6 */ 406 - st8 [r25]=r19,16 /* b7 */ 407 - ;; 408 - st8 [r24]=r9 /* ar.csd */ 409 - st8 [r25]=r10 /* ar.ssd */ 410 - ;; 411 - srlz.d // make sure we see the effect of cr.ivr 412 - addl r14=@gprel(ia64_leave_nested),gp 413 - ;; 414 - mov rp=r14 415 - br.call.sptk.many b6=kvm_ia64_handle_irq 416 - ;; 417 END(kvm_interrupt) 418 419 .global kvm_dispatch_vexirq ··· 420 ////////////////////////////////////////////////////////////////////// 421 // 0x3400 Entry 13 (size 64 bundles) Reserved 422 ENTRY(kvm_virtual_exirq) 423 - mov r31=pr 424 - mov r19=13 425 - mov r30 =r0 426 - ;; 427 kvm_dispatch_vexirq: 428 - cmp.eq p6,p0 = 1,r30 429 - ;; 430 - (p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 431 - ;; 432 - (p6)ld8 r1 = [r29] 433 - ;; 434 - KVM_SAVE_MIN_WITH_COVER_R19 435 - alloc r14=ar.pfs,0,0,1,0 436 - mov out0=r13 437 438 - ssm psr.ic 439 - ;; 440 - srlz.i // guarantee that interruption collection is on 441 - ;; 442 - //(p15) ssm psr.i // restore psr.i 443 - adds r3=8,r2 // set up second base pointer 444 - ;; 445 - KVM_SAVE_REST 446 - addl r14=@gprel(ia64_leave_hypervisor),gp 447 - ;; 448 - mov rp=r14 449 - br.call.sptk.many b6=kvm_vexirq 450 END(kvm_virtual_exirq) 451 452 .org kvm_ia64_ivt+0x3800 453 ///////////////////////////////////////////////////////////////////// 454 // 0x3800 Entry 14 (size 64 bundles) Reserved 455 - KVM_FAULT(14) 456 - // this code segment is from 2.6.16.13 457 - 458 459 .org kvm_ia64_ivt+0x3c00 460 /////////////////////////////////////////////////////////////////////// 461 // 0x3c00 Entry 15 (size 64 bundles) Reserved 462 - KVM_FAULT(15) 463 - 464 465 .org kvm_ia64_ivt+0x4000 466 /////////////////////////////////////////////////////////////////////// 467 // 0x4000 Entry 16 (size 64 bundles) Reserved 468 - KVM_FAULT(16) 469 470 .org kvm_ia64_ivt+0x4400 471 ////////////////////////////////////////////////////////////////////// 472 // 0x4400 Entry 17 (size 64 bundles) Reserved 473 - KVM_FAULT(17) 474 475 .org kvm_ia64_ivt+0x4800 476 ////////////////////////////////////////////////////////////////////// 477 // 0x4800 Entry 18 (size 64 bundles) Reserved 478 - KVM_FAULT(18) 479 480 .org kvm_ia64_ivt+0x4c00 481 ////////////////////////////////////////////////////////////////////// 482 // 0x4c00 Entry 19 (size 64 bundles) Reserved 483 - KVM_FAULT(19) 484 485 .org kvm_ia64_ivt+0x5000 486 ////////////////////////////////////////////////////////////////////// 487 // 0x5000 Entry 20 (size 16 bundles) Page Not Present 488 ENTRY(kvm_page_not_present) 489 - KVM_REFLECT(20) 490 END(kvm_page_not_present) 491 492 .org kvm_ia64_ivt+0x5100 493 /////////////////////////////////////////////////////////////////////// 494 // 0x5100 Entry 21 (size 16 bundles) Key Permission vector 495 ENTRY(kvm_key_permission) 496 - KVM_REFLECT(21) 497 END(kvm_key_permission) 498 499 .org kvm_ia64_ivt+0x5200 500 ////////////////////////////////////////////////////////////////////// 501 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) 502 ENTRY(kvm_iaccess_rights) 503 - KVM_REFLECT(22) 504 END(kvm_iaccess_rights) 505 506 .org kvm_ia64_ivt+0x5300 507 ////////////////////////////////////////////////////////////////////// 508 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 509 ENTRY(kvm_daccess_rights) 510 - KVM_REFLECT(23) 511 END(kvm_daccess_rights) 512 513 .org kvm_ia64_ivt+0x5400 514 ///////////////////////////////////////////////////////////////////// 515 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) 516 ENTRY(kvm_general_exception) 517 - KVM_REFLECT(24) 518 - KVM_FAULT(24) 519 END(kvm_general_exception) 520 521 .org kvm_ia64_ivt+0x5500 522 ////////////////////////////////////////////////////////////////////// 523 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) 524 ENTRY(kvm_disabled_fp_reg) 525 - KVM_REFLECT(25) 526 END(kvm_disabled_fp_reg) 527 528 .org kvm_ia64_ivt+0x5600 529 //////////////////////////////////////////////////////////////////// 530 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) 531 ENTRY(kvm_nat_consumption) 532 - KVM_REFLECT(26) 533 END(kvm_nat_consumption) 534 535 .org kvm_ia64_ivt+0x5700 536 ///////////////////////////////////////////////////////////////////// 537 // 0x5700 Entry 27 (size 16 bundles) Speculation (40) 538 ENTRY(kvm_speculation_vector) 539 - KVM_REFLECT(27) 540 END(kvm_speculation_vector) 541 542 .org kvm_ia64_ivt+0x5800 543 ///////////////////////////////////////////////////////////////////// 544 // 0x5800 Entry 28 (size 16 bundles) Reserved 545 - KVM_FAULT(28) 546 547 .org kvm_ia64_ivt+0x5900 548 /////////////////////////////////////////////////////////////////// 549 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) 550 ENTRY(kvm_debug_vector) 551 - KVM_FAULT(29) 552 END(kvm_debug_vector) 553 554 .org kvm_ia64_ivt+0x5a00 555 /////////////////////////////////////////////////////////////// 556 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) 557 ENTRY(kvm_unaligned_access) 558 - KVM_REFLECT(30) 559 END(kvm_unaligned_access) 560 561 .org kvm_ia64_ivt+0x5b00 562 ////////////////////////////////////////////////////////////////////// 563 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) 564 ENTRY(kvm_unsupported_data_reference) 565 - KVM_REFLECT(31) 566 END(kvm_unsupported_data_reference) 567 568 .org kvm_ia64_ivt+0x5c00 569 //////////////////////////////////////////////////////////////////// 570 // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) 571 ENTRY(kvm_floating_point_fault) 572 - KVM_REFLECT(32) 573 END(kvm_floating_point_fault) 574 575 .org kvm_ia64_ivt+0x5d00 576 ///////////////////////////////////////////////////////////////////// 577 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) 578 ENTRY(kvm_floating_point_trap) 579 - KVM_REFLECT(33) 580 END(kvm_floating_point_trap) 581 582 .org kvm_ia64_ivt+0x5e00 583 ////////////////////////////////////////////////////////////////////// 584 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) 585 ENTRY(kvm_lower_privilege_trap) 586 - KVM_REFLECT(34) 587 END(kvm_lower_privilege_trap) 588 589 .org kvm_ia64_ivt+0x5f00 590 ////////////////////////////////////////////////////////////////////// 591 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) 592 ENTRY(kvm_taken_branch_trap) 593 - KVM_REFLECT(35) 594 END(kvm_taken_branch_trap) 595 596 .org kvm_ia64_ivt+0x6000 597 //////////////////////////////////////////////////////////////////// 598 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) 599 ENTRY(kvm_single_step_trap) 600 - KVM_REFLECT(36) 601 END(kvm_single_step_trap) 602 .global kvm_virtualization_fault_back 603 .org kvm_ia64_ivt+0x6100 604 ///////////////////////////////////////////////////////////////////// 605 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault 606 ENTRY(kvm_virtualization_fault) 607 - mov r31=pr 608 - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 609 - ;; 610 - st8 [r16] = r1 611 - adds r17 = VMM_VCPU_GP_OFFSET, r21 612 - ;; 613 - ld8 r1 = [r17] 614 - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 615 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 616 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 617 - cmp.eq p9,p0=EVENT_RSM,r24 618 - cmp.eq p10,p0=EVENT_SSM,r24 619 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 620 - cmp.eq p12,p0=EVENT_THASH,r24 621 - (p6) br.dptk.many kvm_asm_mov_from_ar 622 - (p7) br.dptk.many kvm_asm_mov_from_rr 623 - (p8) br.dptk.many kvm_asm_mov_to_rr 624 - (p9) br.dptk.many kvm_asm_rsm 625 - (p10) br.dptk.many kvm_asm_ssm 626 - (p11) br.dptk.many kvm_asm_mov_to_psr 627 - (p12) br.dptk.many kvm_asm_thash 628 - ;; 629 kvm_virtualization_fault_back: 630 - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 631 - ;; 632 - ld8 r1 = [r16] 633 - ;; 634 - mov r19=37 635 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 636 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 637 - ;; 638 - st8 [r16] = r24 639 - st8 [r17] = r25 640 - ;; 641 - cmp.ne p6,p0=EVENT_RFI, r24 642 - (p6) br.sptk kvm_dispatch_virtualization_fault 643 - ;; 644 - adds r18=VMM_VPD_BASE_OFFSET,r21 645 - ;; 646 - ld8 r18=[r18] 647 - ;; 648 - adds r18=VMM_VPD_VIFS_OFFSET,r18 649 - ;; 650 - ld8 r18=[r18] 651 - ;; 652 - tbit.z p6,p0=r18,63 653 - (p6) br.sptk kvm_dispatch_virtualization_fault 654 - ;; 655 - //if vifs.v=1 desert current register frame 656 - alloc r18=ar.pfs,0,0,0,0 657 - br.sptk kvm_dispatch_virtualization_fault 658 END(kvm_virtualization_fault) 659 660 .org kvm_ia64_ivt+0x6200 661 ////////////////////////////////////////////////////////////// 662 // 0x6200 Entry 38 (size 16 bundles) Reserved 663 - KVM_FAULT(38) 664 665 .org kvm_ia64_ivt+0x6300 666 ///////////////////////////////////////////////////////////////// 667 // 0x6300 Entry 39 (size 16 bundles) Reserved 668 - KVM_FAULT(39) 669 670 .org kvm_ia64_ivt+0x6400 671 ///////////////////////////////////////////////////////////////// 672 // 0x6400 Entry 40 (size 16 bundles) Reserved 673 - KVM_FAULT(40) 674 675 .org kvm_ia64_ivt+0x6500 676 ////////////////////////////////////////////////////////////////// 677 // 0x6500 Entry 41 (size 16 bundles) Reserved 678 - KVM_FAULT(41) 679 680 .org kvm_ia64_ivt+0x6600 681 ////////////////////////////////////////////////////////////////// 682 // 0x6600 Entry 42 (size 16 bundles) Reserved 683 - KVM_FAULT(42) 684 685 .org kvm_ia64_ivt+0x6700 686 ////////////////////////////////////////////////////////////////// 687 // 0x6700 Entry 43 (size 16 bundles) Reserved 688 - KVM_FAULT(43) 689 690 .org kvm_ia64_ivt+0x6800 691 ////////////////////////////////////////////////////////////////// 692 // 0x6800 Entry 44 (size 16 bundles) Reserved 693 - KVM_FAULT(44) 694 695 .org kvm_ia64_ivt+0x6900 696 /////////////////////////////////////////////////////////////////// 697 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 698 //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) 699 ENTRY(kvm_ia32_exception) 700 - KVM_FAULT(45) 701 END(kvm_ia32_exception) 702 703 .org kvm_ia64_ivt+0x6a00 704 //////////////////////////////////////////////////////////////////// 705 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) 706 ENTRY(kvm_ia32_intercept) 707 - KVM_FAULT(47) 708 END(kvm_ia32_intercept) 709 710 .org kvm_ia64_ivt+0x6c00 711 ///////////////////////////////////////////////////////////////////// 712 // 0x6c00 Entry 48 (size 16 bundles) Reserved 713 - KVM_FAULT(48) 714 715 .org kvm_ia64_ivt+0x6d00 716 ////////////////////////////////////////////////////////////////////// 717 // 0x6d00 Entry 49 (size 16 bundles) Reserved 718 - KVM_FAULT(49) 719 720 .org kvm_ia64_ivt+0x6e00 721 ////////////////////////////////////////////////////////////////////// 722 // 0x6e00 Entry 50 (size 16 bundles) Reserved 723 - KVM_FAULT(50) 724 725 .org kvm_ia64_ivt+0x6f00 726 ///////////////////////////////////////////////////////////////////// 727 // 0x6f00 Entry 51 (size 16 bundles) Reserved 728 - KVM_FAULT(52) 729 730 .org kvm_ia64_ivt+0x7100 731 //////////////////////////////////////////////////////////////////// 732 // 0x7100 Entry 53 (size 16 bundles) Reserved 733 - KVM_FAULT(53) 734 735 .org kvm_ia64_ivt+0x7200 736 ///////////////////////////////////////////////////////////////////// 737 // 0x7200 Entry 54 (size 16 bundles) Reserved 738 - KVM_FAULT(54) 739 740 .org kvm_ia64_ivt+0x7300 741 //////////////////////////////////////////////////////////////////// 742 // 0x7300 Entry 55 (size 16 bundles) Reserved 743 - KVM_FAULT(55) 744 745 .org kvm_ia64_ivt+0x7400 746 //////////////////////////////////////////////////////////////////// 747 // 0x7400 Entry 56 (size 16 bundles) Reserved 748 - KVM_FAULT(56) 749 750 .org kvm_ia64_ivt+0x7500 751 ///////////////////////////////////////////////////////////////////// 752 // 0x7500 Entry 57 (size 16 bundles) Reserved 753 - KVM_FAULT(57) 754 755 .org kvm_ia64_ivt+0x7600 756 ///////////////////////////////////////////////////////////////////// 757 // 0x7600 Entry 58 (size 16 bundles) Reserved 758 - KVM_FAULT(58) 759 760 .org kvm_ia64_ivt+0x7700 761 //////////////////////////////////////////////////////////////////// 762 // 0x7700 Entry 59 (size 16 bundles) Reserved 763 - KVM_FAULT(59) 764 765 .org kvm_ia64_ivt+0x7800 766 //////////////////////////////////////////////////////////////////// 767 // 0x7800 Entry 60 (size 16 bundles) Reserved 768 - KVM_FAULT(60) 769 770 .org kvm_ia64_ivt+0x7900 771 ///////////////////////////////////////////////////////////////////// 772 // 0x7900 Entry 61 (size 16 bundles) Reserved 773 - KVM_FAULT(61) 774 775 .org kvm_ia64_ivt+0x7a00 776 ///////////////////////////////////////////////////////////////////// 777 // 0x7a00 Entry 62 (size 16 bundles) Reserved 778 - KVM_FAULT(62) 779 780 .org kvm_ia64_ivt+0x7b00 781 ///////////////////////////////////////////////////////////////////// 782 // 0x7b00 Entry 63 (size 16 bundles) Reserved 783 - KVM_FAULT(63) 784 785 .org kvm_ia64_ivt+0x7c00 786 //////////////////////////////////////////////////////////////////// 787 // 0x7c00 Entry 64 (size 16 bundles) Reserved 788 - KVM_FAULT(64) 789 790 .org kvm_ia64_ivt+0x7d00 791 ///////////////////////////////////////////////////////////////////// 792 // 0x7d00 Entry 65 (size 16 bundles) Reserved 793 - KVM_FAULT(65) 794 795 .org kvm_ia64_ivt+0x7e00 796 ///////////////////////////////////////////////////////////////////// 797 // 0x7e00 Entry 66 (size 16 bundles) Reserved 798 - KVM_FAULT(66) 799 800 .org kvm_ia64_ivt+0x7f00 801 //////////////////////////////////////////////////////////////////// 802 // 0x7f00 Entry 67 (size 16 bundles) Reserved 803 - KVM_FAULT(67) 804 805 .org kvm_ia64_ivt+0x8000 806 // There is no particular reason for this code to be here, other than that ··· 808 809 810 ENTRY(kvm_dtlb_miss_dispatch) 811 - mov r19 = 2 812 - KVM_SAVE_MIN_WITH_COVER_R19 813 - alloc r14=ar.pfs,0,0,3,0 814 - mov out0=cr.ifa 815 - mov out1=r15 816 - adds r3=8,r2 // set up second base pointer 817 - ;; 818 - ssm psr.ic 819 - ;; 820 - srlz.i // guarantee that interruption collection is on 821 - ;; 822 - //(p15) ssm psr.i // restore psr.i 823 - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 824 - ;; 825 - KVM_SAVE_REST 826 - KVM_SAVE_EXTRA 827 - mov rp=r14 828 - ;; 829 - adds out2=16,r12 830 - br.call.sptk.many b6=kvm_page_fault 831 END(kvm_dtlb_miss_dispatch) 832 833 ENTRY(kvm_itlb_miss_dispatch) 834 835 - KVM_SAVE_MIN_WITH_COVER_R19 836 - alloc r14=ar.pfs,0,0,3,0 837 - mov out0=cr.ifa 838 - mov out1=r15 839 - adds r3=8,r2 // set up second base pointer 840 - ;; 841 - ssm psr.ic 842 - ;; 843 - srlz.i // guarantee that interruption collection is on 844 - ;; 845 - //(p15) ssm psr.i // restore psr.i 846 - addl r14=@gprel(ia64_leave_hypervisor),gp 847 - ;; 848 - KVM_SAVE_REST 849 - mov rp=r14 850 - ;; 851 - adds out2=16,r12 852 - br.call.sptk.many b6=kvm_page_fault 853 END(kvm_itlb_miss_dispatch) 854 855 ENTRY(kvm_dispatch_reflection) 856 - /* 857 - * Input: 858 - * psr.ic: off 859 - * r19: intr type (offset into ivt, see ia64_int.h) 860 - * r31: contains saved predicates (pr) 861 - */ 862 - KVM_SAVE_MIN_WITH_COVER_R19 863 - alloc r14=ar.pfs,0,0,5,0 864 - mov out0=cr.ifa 865 - mov out1=cr.isr 866 - mov out2=cr.iim 867 - mov out3=r15 868 - adds r3=8,r2 // set up second base pointer 869 - ;; 870 - ssm psr.ic 871 - ;; 872 - srlz.i // guarantee that interruption collection is on 873 - ;; 874 - //(p15) ssm psr.i // restore psr.i 875 - addl r14=@gprel(ia64_leave_hypervisor),gp 876 - ;; 877 - KVM_SAVE_REST 878 - mov rp=r14 879 - ;; 880 - adds out4=16,r12 881 - br.call.sptk.many b6=reflect_interruption 882 END(kvm_dispatch_reflection) 883 884 ENTRY(kvm_dispatch_virtualization_fault) 885 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 886 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 887 - ;; 888 - st8 [r16] = r24 889 - st8 [r17] = r25 890 - ;; 891 - KVM_SAVE_MIN_WITH_COVER_R19 892 - ;; 893 - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) 894 - mov out0=r13 //vcpu 895 - adds r3=8,r2 // set up second base pointer 896 - ;; 897 - ssm psr.ic 898 - ;; 899 - srlz.i // guarantee that interruption collection is on 900 - ;; 901 - //(p15) ssm psr.i // restore psr.i 902 - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 903 - ;; 904 - KVM_SAVE_REST 905 - KVM_SAVE_EXTRA 906 - mov rp=r14 907 - ;; 908 - adds out1=16,sp //regs 909 - br.call.sptk.many b6=kvm_emulate 910 END(kvm_dispatch_virtualization_fault) 911 912 913 ENTRY(kvm_dispatch_interrupt) 914 - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 915 - ;; 916 - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 917 - //mov out0=cr.ivr // pass cr.ivr as first arg 918 - adds r3=8,r2 // set up second base pointer for SAVE_REST 919 - ;; 920 - ssm psr.ic 921 - ;; 922 - srlz.i 923 - ;; 924 - //(p15) ssm psr.i 925 - addl r14=@gprel(ia64_leave_hypervisor),gp 926 - ;; 927 - KVM_SAVE_REST 928 - mov rp=r14 929 - ;; 930 - mov out0=r13 // pass pointer to pt_regs as second arg 931 - br.call.sptk.many b6=kvm_ia64_handle_irq 932 END(kvm_dispatch_interrupt) 933 - 934 - 935 - 936 937 GLOBAL_ENTRY(ia64_leave_nested) 938 rsm psr.i ··· 1008 ;; 1009 ldf.fill f11=[r2] 1010 // mov r18=r13 1011 - // mov r21=r13 1012 adds r16=PT(CR_IPSR)+16,r12 1013 adds r17=PT(CR_IIP)+16,r12 1014 ;; ··· 1058 rfi 1059 END(ia64_leave_nested) 1060 1061 - 1062 - 1063 GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) 1064 - /* 1065 - * work.need_resched etc. mustn't get changed 1066 - *by this CPU before it returns to 1067 - ;; 1068 - * user- or fsys-mode, hence we disable interrupts early on: 1069 - */ 1070 - adds r2 = PT(R4)+16,r12 1071 - adds r3 = PT(R5)+16,r12 1072 - adds r8 = PT(EML_UNAT)+16,r12 1073 - ;; 1074 - ld8 r8 = [r8] 1075 - ;; 1076 - mov ar.unat=r8 1077 - ;; 1078 - ld8.fill r4=[r2],16 //load r4 1079 - ld8.fill r5=[r3],16 //load r5 1080 - ;; 1081 - ld8.fill r6=[r2] //load r6 1082 - ld8.fill r7=[r3] //load r7 1083 - ;; 1084 END(ia64_leave_hypervisor_prepare) 1085 //fall through 1086 GLOBAL_ENTRY(ia64_leave_hypervisor) 1087 - rsm psr.i 1088 - ;; 1089 - br.call.sptk.many b0=leave_hypervisor_tail 1090 - ;; 1091 - adds r20=PT(PR)+16,r12 1092 - adds r8=PT(EML_UNAT)+16,r12 1093 - ;; 1094 - ld8 r8=[r8] 1095 - ;; 1096 - mov ar.unat=r8 1097 - ;; 1098 - lfetch [r20],PT(CR_IPSR)-PT(PR) 1099 - adds r2 = PT(B6)+16,r12 1100 - adds r3 = PT(B7)+16,r12 1101 - ;; 1102 - lfetch [r20] 1103 - ;; 1104 - ld8 r24=[r2],16 /* B6 */ 1105 - ld8 r25=[r3],16 /* B7 */ 1106 - ;; 1107 - ld8 r26=[r2],16 /* ar_csd */ 1108 - ld8 r27=[r3],16 /* ar_ssd */ 1109 - mov b6 = r24 1110 - ;; 1111 - ld8.fill r8=[r2],16 1112 - ld8.fill r9=[r3],16 1113 - mov b7 = r25 1114 - ;; 1115 - mov ar.csd = r26 1116 - mov ar.ssd = r27 1117 - ;; 1118 - ld8.fill r10=[r2],PT(R15)-PT(R10) 1119 - ld8.fill r11=[r3],PT(R14)-PT(R11) 1120 - ;; 1121 - ld8.fill r15=[r2],PT(R16)-PT(R15) 1122 - ld8.fill r14=[r3],PT(R17)-PT(R14) 1123 - ;; 1124 - ld8.fill r16=[r2],16 1125 - ld8.fill r17=[r3],16 1126 - ;; 1127 - ld8.fill r18=[r2],16 1128 - ld8.fill r19=[r3],16 1129 - ;; 1130 - ld8.fill r20=[r2],16 1131 - ld8.fill r21=[r3],16 1132 - ;; 1133 - ld8.fill r22=[r2],16 1134 - ld8.fill r23=[r3],16 1135 - ;; 1136 - ld8.fill r24=[r2],16 1137 - ld8.fill r25=[r3],16 1138 - ;; 1139 - ld8.fill r26=[r2],16 1140 - ld8.fill r27=[r3],16 1141 - ;; 1142 - ld8.fill r28=[r2],16 1143 - ld8.fill r29=[r3],16 1144 - ;; 1145 - ld8.fill r30=[r2],PT(F6)-PT(R30) 1146 - ld8.fill r31=[r3],PT(F7)-PT(R31) 1147 - ;; 1148 - rsm psr.i | psr.ic 1149 - // initiate turning off of interrupt and interruption collection 1150 - invala // invalidate ALAT 1151 - ;; 1152 - srlz.i // ensure interruption collection is off 1153 - ;; 1154 - bsw.0 1155 - ;; 1156 - adds r16 = PT(CR_IPSR)+16,r12 1157 - adds r17 = PT(CR_IIP)+16,r12 1158 - mov r21=r13 // get current 1159 - ;; 1160 - ld8 r31=[r16],16 // load cr.ipsr 1161 - ld8 r30=[r17],16 // load cr.iip 1162 - ;; 1163 - ld8 r29=[r16],16 // load cr.ifs 1164 - ld8 r28=[r17],16 // load ar.unat 1165 - ;; 1166 - ld8 r27=[r16],16 // load ar.pfs 1167 - ld8 r26=[r17],16 // load ar.rsc 1168 - ;; 1169 - ld8 r25=[r16],16 // load ar.rnat 1170 - ld8 r24=[r17],16 // load ar.bspstore 1171 - ;; 1172 - ld8 r23=[r16],16 // load predicates 1173 - ld8 r22=[r17],16 // load b0 1174 - ;; 1175 - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" 1176 - ld8.fill r1=[r17],16 //load r1 1177 - ;; 1178 - ld8.fill r12=[r16],16 //load r12 1179 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 1180 - ;; 1181 - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr 1182 - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 1183 - ;; 1184 - ld8.fill r3=[r16] //load r3 1185 - ld8 r18=[r17] //load ar_ccv 1186 - ;; 1187 - mov ar.fpsr=r19 1188 - mov ar.ccv=r18 1189 - shr.u r18=r20,16 1190 - ;; 1191 kvm_rbs_switch: 1192 - mov r19=96 1193 1194 kvm_dont_preserve_current_frame: 1195 /* ··· 1198 # define pReturn p7 1199 # define Nregs 14 1200 1201 - alloc loc0=ar.pfs,2,Nregs-2,2,0 1202 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) 1203 - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize 1204 - ;; 1205 - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" 1206 - shladd in0=loc1,3,r19 1207 - mov in1=0 1208 - ;; 1209 - TEXT_ALIGN(32) 1210 kvm_rse_clear_invalid: 1211 - alloc loc0=ar.pfs,2,Nregs-2,2,0 1212 - cmp.lt pRecurse,p0=Nregs*8,in0 1213 - // if more than Nregs regs left to clear, (re)curse 1214 - add out0=-Nregs*8,in0 1215 - add out1=1,in1 // increment recursion count 1216 - mov loc1=0 1217 - mov loc2=0 1218 - ;; 1219 - mov loc3=0 1220 - mov loc4=0 1221 - mov loc5=0 1222 - mov loc6=0 1223 - mov loc7=0 1224 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid 1225 - ;; 1226 - mov loc8=0 1227 - mov loc9=0 1228 - cmp.ne pReturn,p0=r0,in1 1229 - // if recursion count != 0, we need to do a br.ret 1230 - mov loc10=0 1231 - mov loc11=0 1232 (pReturn) br.ret.dptk.many b0 1233 1234 # undef pRecurse 1235 # undef pReturn 1236 1237 // loadrs has already been shifted 1238 - alloc r16=ar.pfs,0,0,0,0 // drop current register frame 1239 - ;; 1240 - loadrs 1241 - ;; 1242 - mov ar.bspstore=r24 1243 - ;; 1244 - mov ar.unat=r28 1245 - mov ar.rnat=r25 1246 - mov ar.rsc=r26 1247 - ;; 1248 - mov cr.ipsr=r31 1249 - mov cr.iip=r30 1250 - mov cr.ifs=r29 1251 - mov ar.pfs=r27 1252 - adds r18=VMM_VPD_BASE_OFFSET,r21 1253 - ;; 1254 - ld8 r18=[r18] //vpd 1255 - adds r17=VMM_VCPU_ISR_OFFSET,r21 1256 - ;; 1257 - ld8 r17=[r17] 1258 - adds r19=VMM_VPD_VPSR_OFFSET,r18 1259 - ;; 1260 - ld8 r19=[r19] //vpsr 1261 - mov r25=r18 1262 - adds r16= VMM_VCPU_GP_OFFSET,r21 1263 - ;; 1264 - ld8 r16= [r16] // Put gp in r24 1265 - movl r24=@gprel(ia64_vmm_entry) // calculate return address 1266 - ;; 1267 - add r24=r24,r16 1268 - ;; 1269 - br.sptk.many kvm_vps_sync_write // call the service 1270 - ;; 1271 END(ia64_leave_hypervisor) 1272 // fall through 1273 GLOBAL_ENTRY(ia64_vmm_entry) ··· 1280 * r22:b0 1281 * r23:predicate 1282 */ 1283 - mov r24=r22 1284 - mov r25=r18 1285 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic 1286 - (p1) br.cond.sptk.few kvm_vps_resume_normal 1287 - (p2) br.cond.sptk.many kvm_vps_resume_handler 1288 - ;; 1289 END(ia64_vmm_entry) 1290 - 1291 - 1292 1293 /* 1294 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, ··· 1305 entry = loc3 1306 hostret = r24 1307 1308 - alloc pfssave=ar.pfs,4,4,0,0 1309 - mov rpsave=rp 1310 - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 1311 - ;; 1312 - ld8 entry=[entry] 1313 - 1: mov hostret=ip 1314 - mov r25=in1 // copy arguments 1315 - mov r26=in2 1316 - mov r27=in3 1317 - mov psrsave=psr 1318 - ;; 1319 - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I 1320 - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC 1321 - ;; 1322 - add hostret=2f-1b,hostret // calculate return address 1323 - add entry=entry,in0 1324 - ;; 1325 - rsm psr.i | psr.ic 1326 - ;; 1327 - srlz.i 1328 - mov b6=entry 1329 - br.cond.sptk b6 // call the service 1330 2: 1331 - // Architectural sequence for enabling interrupts if necessary 1332 (p7) ssm psr.ic 1333 - ;; 1334 (p7) srlz.i 1335 - ;; 1336 //(p6) ssm psr.i 1337 - ;; 1338 - mov rp=rpsave 1339 - mov ar.pfs=pfssave 1340 - mov r8=r31 1341 - ;; 1342 - srlz.d 1343 - br.ret.sptk rp 1344 1345 END(ia64_call_vsa) 1346 1347 #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) 1348 1349 GLOBAL_ENTRY(vmm_reset_entry) 1350 - //set up ipsr, iip, vpd.vpsr, dcr 1351 - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 1352 - // For DCR: all bits 0 1353 - bsw.0 1354 - ;; 1355 - mov r21 =r13 1356 - adds r14=-VMM_PT_REGS_SIZE, r12 1357 - ;; 1358 - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 1359 - movl r10=0x8000000000000000 1360 - adds r16=PT(CR_IIP), r14 1361 - adds r20=PT(R1), r14 1362 - ;; 1363 - rsm psr.ic | psr.i 1364 - ;; 1365 - srlz.i 1366 - ;; 1367 - mov ar.rsc = 0 1368 - ;; 1369 - flushrs 1370 - ;; 1371 - mov ar.bspstore = 0 1372 - // clear BSPSTORE 1373 - ;; 1374 - mov cr.ipsr=r6 1375 - mov cr.ifs=r10 1376 - ld8 r4 = [r16] // Set init iip for first run. 1377 - ld8 r1 = [r20] 1378 - ;; 1379 - mov cr.iip=r4 1380 - adds r16=VMM_VPD_BASE_OFFSET,r13 1381 - ;; 1382 - ld8 r18=[r16] 1383 - ;; 1384 - adds r19=VMM_VPD_VPSR_OFFSET,r18 1385 - ;; 1386 - ld8 r19=[r19] 1387 - mov r17=r0 1388 - mov r22=r0 1389 - mov r23=r0 1390 - br.cond.sptk ia64_vmm_entry 1391 - br.ret.sptk b0 1392 END(vmm_reset_entry)

··· 1 /* 2 + * arch/ia64/kvm/vmm_ivt.S 3 * 4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 5 * Stephane Eranian <eranian@hpl.hp.com> ··· 70 # define PSR_DEFAULT_BITS 0 71 #endif 72 73 #define KVM_FAULT(n) \ 74 + kvm_fault_##n:; \ 75 + mov r19=n;; \ 76 + br.sptk.many kvm_vmm_panic; \ 77 + ;; \ 78 79 #define KVM_REFLECT(n) \ 80 + mov r31=pr; \ 81 + mov r19=n; /* prepare to save predicates */ \ 82 + mov r29=cr.ipsr; \ 83 + ;; \ 84 + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ 85 + (p7) br.sptk.many kvm_dispatch_reflection; \ 86 + br.sptk.many kvm_vmm_panic; \ 87 88 + GLOBAL_ENTRY(kvm_vmm_panic) 89 + KVM_SAVE_MIN_WITH_COVER_R19 90 + alloc r14=ar.pfs,0,0,1,0 91 + mov out0=r15 92 + adds r3=8,r2 // set up second base pointer 93 + ;; 94 + ssm psr.ic 95 + ;; 96 + srlz.i // guarantee that interruption collection is on 97 + ;; 98 + //(p15) ssm psr.i // restore psr.i 99 + addl r14=@gprel(ia64_leave_hypervisor),gp 100 + ;; 101 + KVM_SAVE_REST 102 + mov rp=r14 103 + ;; 104 + br.call.sptk.many b6=vmm_panic_handler; 105 + END(kvm_vmm_panic) 106 107 .section .text.ivt,"ax" 108 ··· 105 /////////////////////////////////////////////////////////////// 106 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) 107 ENTRY(kvm_vhpt_miss) 108 + KVM_FAULT(0) 109 END(kvm_vhpt_miss) 110 111 .org kvm_ia64_ivt+0x400 112 //////////////////////////////////////////////////////////////// 113 // 0x0400 Entry 1 (size 64 bundles) ITLB (21) 114 ENTRY(kvm_itlb_miss) 115 + mov r31 = pr 116 + mov r29=cr.ipsr; 117 + ;; 118 + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 119 + (p6) br.sptk kvm_alt_itlb_miss 120 + mov r19 = 1 121 + br.sptk kvm_itlb_miss_dispatch 122 + KVM_FAULT(1); 123 END(kvm_itlb_miss) 124 125 .org kvm_ia64_ivt+0x0800 126 ////////////////////////////////////////////////////////////////// 127 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) 128 ENTRY(kvm_dtlb_miss) 129 + mov r31 = pr 130 + mov r29=cr.ipsr; 131 + ;; 132 + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; 133 + (p6) br.sptk kvm_alt_dtlb_miss 134 + br.sptk kvm_dtlb_miss_dispatch 135 END(kvm_dtlb_miss) 136 137 .org kvm_ia64_ivt+0x0c00 138 //////////////////////////////////////////////////////////////////// 139 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) 140 ENTRY(kvm_alt_itlb_miss) 141 + mov r16=cr.ifa // get address that caused the TLB miss 142 + ;; 143 + movl r17=PAGE_KERNEL 144 + mov r24=cr.ipsr 145 + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 146 + ;; 147 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 148 + ;; 149 + or r19=r17,r19 // insert PTE control bits into r19 150 + ;; 151 + movl r20=IA64_GRANULE_SHIFT<<2 152 + ;; 153 + mov cr.itir=r20 154 + ;; 155 + itc.i r19 // insert the TLB entry 156 + mov pr=r31,-1 157 + rfi 158 END(kvm_alt_itlb_miss) 159 160 .org kvm_ia64_ivt+0x1000 161 ///////////////////////////////////////////////////////////////////// 162 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) 163 ENTRY(kvm_alt_dtlb_miss) 164 + mov r16=cr.ifa // get address that caused the TLB miss 165 + ;; 166 + movl r17=PAGE_KERNEL 167 + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 168 + mov r24=cr.ipsr 169 + ;; 170 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits 171 + ;; 172 + or r19=r19,r17 // insert PTE control bits into r19 173 + ;; 174 + movl r20=IA64_GRANULE_SHIFT<<2 175 + ;; 176 + mov cr.itir=r20 177 + ;; 178 + itc.d r19 // insert the TLB entry 179 + mov pr=r31,-1 180 + rfi 181 END(kvm_alt_dtlb_miss) 182 183 .org kvm_ia64_ivt+0x1400 184 ////////////////////////////////////////////////////////////////////// 185 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) 186 ENTRY(kvm_nested_dtlb_miss) 187 + KVM_FAULT(5) 188 END(kvm_nested_dtlb_miss) 189 190 .org kvm_ia64_ivt+0x1800 191 ///////////////////////////////////////////////////////////////////// 192 // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) 193 ENTRY(kvm_ikey_miss) 194 + KVM_REFLECT(6) 195 END(kvm_ikey_miss) 196 197 .org kvm_ia64_ivt+0x1c00 198 ///////////////////////////////////////////////////////////////////// 199 // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) 200 ENTRY(kvm_dkey_miss) 201 + KVM_REFLECT(7) 202 END(kvm_dkey_miss) 203 204 .org kvm_ia64_ivt+0x2000 205 //////////////////////////////////////////////////////////////////// 206 // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) 207 ENTRY(kvm_dirty_bit) 208 + KVM_REFLECT(8) 209 END(kvm_dirty_bit) 210 211 .org kvm_ia64_ivt+0x2400 212 //////////////////////////////////////////////////////////////////// 213 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) 214 ENTRY(kvm_iaccess_bit) 215 + KVM_REFLECT(9) 216 END(kvm_iaccess_bit) 217 218 .org kvm_ia64_ivt+0x2800 219 /////////////////////////////////////////////////////////////////// 220 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) 221 ENTRY(kvm_daccess_bit) 222 + KVM_REFLECT(10) 223 END(kvm_daccess_bit) 224 225 .org kvm_ia64_ivt+0x2c00 226 ///////////////////////////////////////////////////////////////// 227 // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) 228 ENTRY(kvm_break_fault) 229 + mov r31=pr 230 + mov r19=11 231 + mov r29=cr.ipsr 232 + ;; 233 + KVM_SAVE_MIN_WITH_COVER_R19 234 + ;; 235 + alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) 236 + mov out0=cr.ifa 237 + mov out2=cr.isr // FIXME: pity to make this slow access twice 238 + mov out3=cr.iim // FIXME: pity to make this slow access twice 239 + adds r3=8,r2 // set up second base pointer 240 + ;; 241 + ssm psr.ic 242 + ;; 243 + srlz.i // guarantee that interruption collection is on 244 + ;; 245 + //(p15)ssm psr.i // restore psr.i 246 + addl r14=@gprel(ia64_leave_hypervisor),gp 247 + ;; 248 + KVM_SAVE_REST 249 + mov rp=r14 250 + ;; 251 + adds out1=16,sp 252 + br.call.sptk.many b6=kvm_ia64_handle_break 253 + ;; 254 END(kvm_break_fault) 255 256 .org kvm_ia64_ivt+0x3000 257 ///////////////////////////////////////////////////////////////// 258 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) 259 ENTRY(kvm_interrupt) 260 + mov r31=pr // prepare to save predicates 261 + mov r19=12 262 + mov r29=cr.ipsr 263 + ;; 264 + tbit.z p6,p7=r29,IA64_PSR_VM_BIT 265 + tbit.z p0,p15=r29,IA64_PSR_I_BIT 266 + ;; 267 + (p7) br.sptk kvm_dispatch_interrupt 268 + ;; 269 + mov r27=ar.rsc /* M */ 270 + mov r20=r1 /* A */ 271 + mov r25=ar.unat /* M */ 272 + mov r26=ar.pfs /* I */ 273 + mov r28=cr.iip /* M */ 274 + cover /* B (or nothing) */ 275 + ;; 276 + mov r1=sp 277 + ;; 278 + invala /* M */ 279 + mov r30=cr.ifs 280 + ;; 281 + addl r1=-VMM_PT_REGS_SIZE,r1 282 + ;; 283 + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ 284 + adds r16=PT(CR_IPSR),r1 285 + ;; 286 + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES 287 + st8 [r16]=r29 /* save cr.ipsr */ 288 + ;; 289 + lfetch.fault.excl.nt1 [r17] 290 + mov r29=b0 291 + ;; 292 + adds r16=PT(R8),r1 /* initialize first base pointer */ 293 + adds r17=PT(R9),r1 /* initialize second base pointer */ 294 + mov r18=r0 /* make sure r18 isn't NaT */ 295 + ;; 296 .mem.offset 0,0; st8.spill [r16]=r8,16 297 .mem.offset 8,0; st8.spill [r17]=r9,16 298 ;; 299 .mem.offset 0,0; st8.spill [r16]=r10,24 300 .mem.offset 8,0; st8.spill [r17]=r11,24 301 ;; 302 + st8 [r16]=r28,16 /* save cr.iip */ 303 + st8 [r17]=r30,16 /* save cr.ifs */ 304 + mov r8=ar.fpsr /* M */ 305 + mov r9=ar.csd 306 + mov r10=ar.ssd 307 + movl r11=FPSR_DEFAULT /* L-unit */ 308 + ;; 309 + st8 [r16]=r25,16 /* save ar.unat */ 310 + st8 [r17]=r26,16 /* save ar.pfs */ 311 + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ 312 + ;; 313 + st8 [r16]=r27,16 /* save ar.rsc */ 314 + adds r17=16,r17 /* skip over ar_rnat field */ 315 + ;; 316 + st8 [r17]=r31,16 /* save predicates */ 317 + adds r16=16,r16 /* skip over ar_bspstore field */ 318 + ;; 319 + st8 [r16]=r29,16 /* save b0 */ 320 + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ 321 + ;; 322 .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ 323 .mem.offset 8,0; st8.spill [r17]=r12,16 324 + adds r12=-16,r1 325 + /* switch to kernel memory stack (with 16 bytes of scratch) */ 326 + ;; 327 .mem.offset 0,0; st8.spill [r16]=r13,16 328 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ 329 + ;; 330 .mem.offset 0,0; st8.spill [r16]=r15,16 331 .mem.offset 8,0; st8.spill [r17]=r14,16 332 + dep r14=-1,r0,60,4 333 + ;; 334 .mem.offset 0,0; st8.spill [r16]=r2,16 335 .mem.offset 8,0; st8.spill [r17]=r3,16 336 + adds r2=VMM_PT_REGS_R16_OFFSET,r1 337 + adds r14 = VMM_VCPU_GP_OFFSET,r13 338 + ;; 339 + mov r8=ar.ccv 340 + ld8 r14 = [r14] 341 + ;; 342 + mov r1=r14 /* establish kernel global pointer */ 343 + ;; \ 344 + bsw.1 345 + ;; 346 + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 347 + mov out0=r13 348 + ;; 349 + ssm psr.ic 350 + ;; 351 + srlz.i 352 + ;; 353 + //(p15) ssm psr.i 354 + adds r3=8,r2 // set up second base pointer for SAVE_REST 355 + srlz.i // ensure everybody knows psr.ic is back on 356 + ;; 357 .mem.offset 0,0; st8.spill [r2]=r16,16 358 .mem.offset 8,0; st8.spill [r3]=r17,16 359 + ;; 360 .mem.offset 0,0; st8.spill [r2]=r18,16 361 .mem.offset 8,0; st8.spill [r3]=r19,16 362 + ;; 363 .mem.offset 0,0; st8.spill [r2]=r20,16 364 .mem.offset 8,0; st8.spill [r3]=r21,16 365 + mov r18=b6 366 + ;; 367 .mem.offset 0,0; st8.spill [r2]=r22,16 368 .mem.offset 8,0; st8.spill [r3]=r23,16 369 + mov r19=b7 370 + ;; 371 .mem.offset 0,0; st8.spill [r2]=r24,16 372 .mem.offset 8,0; st8.spill [r3]=r25,16 373 + ;; 374 .mem.offset 0,0; st8.spill [r2]=r26,16 375 .mem.offset 8,0; st8.spill [r3]=r27,16 376 + ;; 377 .mem.offset 0,0; st8.spill [r2]=r28,16 378 .mem.offset 8,0; st8.spill [r3]=r29,16 379 + ;; 380 .mem.offset 0,0; st8.spill [r2]=r30,16 381 .mem.offset 8,0; st8.spill [r3]=r31,32 382 + ;; 383 + mov ar.fpsr=r11 /* M-unit */ 384 + st8 [r2]=r8,8 /* ar.ccv */ 385 + adds r24=PT(B6)-PT(F7),r3 386 + ;; 387 + stf.spill [r2]=f6,32 388 + stf.spill [r3]=f7,32 389 + ;; 390 + stf.spill [r2]=f8,32 391 + stf.spill [r3]=f9,32 392 + ;; 393 + stf.spill [r2]=f10 394 + stf.spill [r3]=f11 395 + adds r25=PT(B7)-PT(F11),r3 396 + ;; 397 + st8 [r24]=r18,16 /* b6 */ 398 + st8 [r25]=r19,16 /* b7 */ 399 + ;; 400 + st8 [r24]=r9 /* ar.csd */ 401 + st8 [r25]=r10 /* ar.ssd */ 402 + ;; 403 + srlz.d // make sure we see the effect of cr.ivr 404 + addl r14=@gprel(ia64_leave_nested),gp 405 + ;; 406 + mov rp=r14 407 + br.call.sptk.many b6=kvm_ia64_handle_irq 408 + ;; 409 END(kvm_interrupt) 410 411 .global kvm_dispatch_vexirq ··· 414 ////////////////////////////////////////////////////////////////////// 415 // 0x3400 Entry 13 (size 64 bundles) Reserved 416 ENTRY(kvm_virtual_exirq) 417 + mov r31=pr 418 + mov r19=13 419 + mov r30 =r0 420 + ;; 421 kvm_dispatch_vexirq: 422 + cmp.eq p6,p0 = 1,r30 423 + ;; 424 + (p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 425 + ;; 426 + (p6) ld8 r1 = [r29] 427 + ;; 428 + KVM_SAVE_MIN_WITH_COVER_R19 429 + alloc r14=ar.pfs,0,0,1,0 430 + mov out0=r13 431 432 + ssm psr.ic 433 + ;; 434 + srlz.i // guarantee that interruption collection is on 435 + ;; 436 + //(p15) ssm psr.i // restore psr.i 437 + adds r3=8,r2 // set up second base pointer 438 + ;; 439 + KVM_SAVE_REST 440 + addl r14=@gprel(ia64_leave_hypervisor),gp 441 + ;; 442 + mov rp=r14 443 + br.call.sptk.many b6=kvm_vexirq 444 END(kvm_virtual_exirq) 445 446 .org kvm_ia64_ivt+0x3800 447 ///////////////////////////////////////////////////////////////////// 448 // 0x3800 Entry 14 (size 64 bundles) Reserved 449 + KVM_FAULT(14) 450 + // this code segment is from 2.6.16.13 451 452 .org kvm_ia64_ivt+0x3c00 453 /////////////////////////////////////////////////////////////////////// 454 // 0x3c00 Entry 15 (size 64 bundles) Reserved 455 + KVM_FAULT(15) 456 457 .org kvm_ia64_ivt+0x4000 458 /////////////////////////////////////////////////////////////////////// 459 // 0x4000 Entry 16 (size 64 bundles) Reserved 460 + KVM_FAULT(16) 461 462 .org kvm_ia64_ivt+0x4400 463 ////////////////////////////////////////////////////////////////////// 464 // 0x4400 Entry 17 (size 64 bundles) Reserved 465 + KVM_FAULT(17) 466 467 .org kvm_ia64_ivt+0x4800 468 ////////////////////////////////////////////////////////////////////// 469 // 0x4800 Entry 18 (size 64 bundles) Reserved 470 + KVM_FAULT(18) 471 472 .org kvm_ia64_ivt+0x4c00 473 ////////////////////////////////////////////////////////////////////// 474 // 0x4c00 Entry 19 (size 64 bundles) Reserved 475 + KVM_FAULT(19) 476 477 .org kvm_ia64_ivt+0x5000 478 ////////////////////////////////////////////////////////////////////// 479 // 0x5000 Entry 20 (size 16 bundles) Page Not Present 480 ENTRY(kvm_page_not_present) 481 + KVM_REFLECT(20) 482 END(kvm_page_not_present) 483 484 .org kvm_ia64_ivt+0x5100 485 /////////////////////////////////////////////////////////////////////// 486 // 0x5100 Entry 21 (size 16 bundles) Key Permission vector 487 ENTRY(kvm_key_permission) 488 + KVM_REFLECT(21) 489 END(kvm_key_permission) 490 491 .org kvm_ia64_ivt+0x5200 492 ////////////////////////////////////////////////////////////////////// 493 // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) 494 ENTRY(kvm_iaccess_rights) 495 + KVM_REFLECT(22) 496 END(kvm_iaccess_rights) 497 498 .org kvm_ia64_ivt+0x5300 499 ////////////////////////////////////////////////////////////////////// 500 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) 501 ENTRY(kvm_daccess_rights) 502 + KVM_REFLECT(23) 503 END(kvm_daccess_rights) 504 505 .org kvm_ia64_ivt+0x5400 506 ///////////////////////////////////////////////////////////////////// 507 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) 508 ENTRY(kvm_general_exception) 509 + KVM_REFLECT(24) 510 + KVM_FAULT(24) 511 END(kvm_general_exception) 512 513 .org kvm_ia64_ivt+0x5500 514 ////////////////////////////////////////////////////////////////////// 515 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) 516 ENTRY(kvm_disabled_fp_reg) 517 + KVM_REFLECT(25) 518 END(kvm_disabled_fp_reg) 519 520 .org kvm_ia64_ivt+0x5600 521 //////////////////////////////////////////////////////////////////// 522 // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) 523 ENTRY(kvm_nat_consumption) 524 + KVM_REFLECT(26) 525 END(kvm_nat_consumption) 526 527 .org kvm_ia64_ivt+0x5700 528 ///////////////////////////////////////////////////////////////////// 529 // 0x5700 Entry 27 (size 16 bundles) Speculation (40) 530 ENTRY(kvm_speculation_vector) 531 + KVM_REFLECT(27) 532 END(kvm_speculation_vector) 533 534 .org kvm_ia64_ivt+0x5800 535 ///////////////////////////////////////////////////////////////////// 536 // 0x5800 Entry 28 (size 16 bundles) Reserved 537 + KVM_FAULT(28) 538 539 .org kvm_ia64_ivt+0x5900 540 /////////////////////////////////////////////////////////////////// 541 // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) 542 ENTRY(kvm_debug_vector) 543 + KVM_FAULT(29) 544 END(kvm_debug_vector) 545 546 .org kvm_ia64_ivt+0x5a00 547 /////////////////////////////////////////////////////////////// 548 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) 549 ENTRY(kvm_unaligned_access) 550 + KVM_REFLECT(30) 551 END(kvm_unaligned_access) 552 553 .org kvm_ia64_ivt+0x5b00 554 ////////////////////////////////////////////////////////////////////// 555 // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) 556 ENTRY(kvm_unsupported_data_reference) 557 + KVM_REFLECT(31) 558 END(kvm_unsupported_data_reference) 559 560 .org kvm_ia64_ivt+0x5c00 561 //////////////////////////////////////////////////////////////////// 562 // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) 563 ENTRY(kvm_floating_point_fault) 564 + KVM_REFLECT(32) 565 END(kvm_floating_point_fault) 566 567 .org kvm_ia64_ivt+0x5d00 568 ///////////////////////////////////////////////////////////////////// 569 // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) 570 ENTRY(kvm_floating_point_trap) 571 + KVM_REFLECT(33) 572 END(kvm_floating_point_trap) 573 574 .org kvm_ia64_ivt+0x5e00 575 ////////////////////////////////////////////////////////////////////// 576 // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) 577 ENTRY(kvm_lower_privilege_trap) 578 + KVM_REFLECT(34) 579 END(kvm_lower_privilege_trap) 580 581 .org kvm_ia64_ivt+0x5f00 582 ////////////////////////////////////////////////////////////////////// 583 // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) 584 ENTRY(kvm_taken_branch_trap) 585 + KVM_REFLECT(35) 586 END(kvm_taken_branch_trap) 587 588 .org kvm_ia64_ivt+0x6000 589 //////////////////////////////////////////////////////////////////// 590 // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) 591 ENTRY(kvm_single_step_trap) 592 + KVM_REFLECT(36) 593 END(kvm_single_step_trap) 594 .global kvm_virtualization_fault_back 595 .org kvm_ia64_ivt+0x6100 596 ///////////////////////////////////////////////////////////////////// 597 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault 598 ENTRY(kvm_virtualization_fault) 599 + mov r31=pr 600 + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 601 + ;; 602 + st8 [r16] = r1 603 + adds r17 = VMM_VCPU_GP_OFFSET, r21 604 + ;; 605 + ld8 r1 = [r17] 606 + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 607 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 608 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 609 + cmp.eq p9,p0=EVENT_RSM,r24 610 + cmp.eq p10,p0=EVENT_SSM,r24 611 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 612 + cmp.eq p12,p0=EVENT_THASH,r24 613 + (p6) br.dptk.many kvm_asm_mov_from_ar 614 + (p7) br.dptk.many kvm_asm_mov_from_rr 615 + (p8) br.dptk.many kvm_asm_mov_to_rr 616 + (p9) br.dptk.many kvm_asm_rsm 617 + (p10) br.dptk.many kvm_asm_ssm 618 + (p11) br.dptk.many kvm_asm_mov_to_psr 619 + (p12) br.dptk.many kvm_asm_thash 620 + ;; 621 kvm_virtualization_fault_back: 622 + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 623 + ;; 624 + ld8 r1 = [r16] 625 + ;; 626 + mov r19=37 627 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 628 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 629 + ;; 630 + st8 [r16] = r24 631 + st8 [r17] = r25 632 + ;; 633 + cmp.ne p6,p0=EVENT_RFI, r24 634 + (p6) br.sptk kvm_dispatch_virtualization_fault 635 + ;; 636 + adds r18=VMM_VPD_BASE_OFFSET,r21 637 + ;; 638 + ld8 r18=[r18] 639 + ;; 640 + adds r18=VMM_VPD_VIFS_OFFSET,r18 641 + ;; 642 + ld8 r18=[r18] 643 + ;; 644 + tbit.z p6,p0=r18,63 645 + (p6) br.sptk kvm_dispatch_virtualization_fault 646 + ;; 647 + //if vifs.v=1 desert current register frame 648 + alloc r18=ar.pfs,0,0,0,0 649 + br.sptk kvm_dispatch_virtualization_fault 650 END(kvm_virtualization_fault) 651 652 .org kvm_ia64_ivt+0x6200 653 ////////////////////////////////////////////////////////////// 654 // 0x6200 Entry 38 (size 16 bundles) Reserved 655 + KVM_FAULT(38) 656 657 .org kvm_ia64_ivt+0x6300 658 ///////////////////////////////////////////////////////////////// 659 // 0x6300 Entry 39 (size 16 bundles) Reserved 660 + KVM_FAULT(39) 661 662 .org kvm_ia64_ivt+0x6400 663 ///////////////////////////////////////////////////////////////// 664 // 0x6400 Entry 40 (size 16 bundles) Reserved 665 + KVM_FAULT(40) 666 667 .org kvm_ia64_ivt+0x6500 668 ////////////////////////////////////////////////////////////////// 669 // 0x6500 Entry 41 (size 16 bundles) Reserved 670 + KVM_FAULT(41) 671 672 .org kvm_ia64_ivt+0x6600 673 ////////////////////////////////////////////////////////////////// 674 // 0x6600 Entry 42 (size 16 bundles) Reserved 675 + KVM_FAULT(42) 676 677 .org kvm_ia64_ivt+0x6700 678 ////////////////////////////////////////////////////////////////// 679 // 0x6700 Entry 43 (size 16 bundles) Reserved 680 + KVM_FAULT(43) 681 682 .org kvm_ia64_ivt+0x6800 683 ////////////////////////////////////////////////////////////////// 684 // 0x6800 Entry 44 (size 16 bundles) Reserved 685 + KVM_FAULT(44) 686 687 .org kvm_ia64_ivt+0x6900 688 /////////////////////////////////////////////////////////////////// 689 // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception 690 //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) 691 ENTRY(kvm_ia32_exception) 692 + KVM_FAULT(45) 693 END(kvm_ia32_exception) 694 695 .org kvm_ia64_ivt+0x6a00 696 //////////////////////////////////////////////////////////////////// 697 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) 698 ENTRY(kvm_ia32_intercept) 699 + KVM_FAULT(47) 700 END(kvm_ia32_intercept) 701 702 .org kvm_ia64_ivt+0x6c00 703 ///////////////////////////////////////////////////////////////////// 704 // 0x6c00 Entry 48 (size 16 bundles) Reserved 705 + KVM_FAULT(48) 706 707 .org kvm_ia64_ivt+0x6d00 708 ////////////////////////////////////////////////////////////////////// 709 // 0x6d00 Entry 49 (size 16 bundles) Reserved 710 + KVM_FAULT(49) 711 712 .org kvm_ia64_ivt+0x6e00 713 ////////////////////////////////////////////////////////////////////// 714 // 0x6e00 Entry 50 (size 16 bundles) Reserved 715 + KVM_FAULT(50) 716 717 .org kvm_ia64_ivt+0x6f00 718 ///////////////////////////////////////////////////////////////////// 719 // 0x6f00 Entry 51 (size 16 bundles) Reserved 720 + KVM_FAULT(52) 721 722 .org kvm_ia64_ivt+0x7100 723 //////////////////////////////////////////////////////////////////// 724 // 0x7100 Entry 53 (size 16 bundles) Reserved 725 + KVM_FAULT(53) 726 727 .org kvm_ia64_ivt+0x7200 728 ///////////////////////////////////////////////////////////////////// 729 // 0x7200 Entry 54 (size 16 bundles) Reserved 730 + KVM_FAULT(54) 731 732 .org kvm_ia64_ivt+0x7300 733 //////////////////////////////////////////////////////////////////// 734 // 0x7300 Entry 55 (size 16 bundles) Reserved 735 + KVM_FAULT(55) 736 737 .org kvm_ia64_ivt+0x7400 738 //////////////////////////////////////////////////////////////////// 739 // 0x7400 Entry 56 (size 16 bundles) Reserved 740 + KVM_FAULT(56) 741 742 .org kvm_ia64_ivt+0x7500 743 ///////////////////////////////////////////////////////////////////// 744 // 0x7500 Entry 57 (size 16 bundles) Reserved 745 + KVM_FAULT(57) 746 747 .org kvm_ia64_ivt+0x7600 748 ///////////////////////////////////////////////////////////////////// 749 // 0x7600 Entry 58 (size 16 bundles) Reserved 750 + KVM_FAULT(58) 751 752 .org kvm_ia64_ivt+0x7700 753 //////////////////////////////////////////////////////////////////// 754 // 0x7700 Entry 59 (size 16 bundles) Reserved 755 + KVM_FAULT(59) 756 757 .org kvm_ia64_ivt+0x7800 758 //////////////////////////////////////////////////////////////////// 759 // 0x7800 Entry 60 (size 16 bundles) Reserved 760 + KVM_FAULT(60) 761 762 .org kvm_ia64_ivt+0x7900 763 ///////////////////////////////////////////////////////////////////// 764 // 0x7900 Entry 61 (size 16 bundles) Reserved 765 + KVM_FAULT(61) 766 767 .org kvm_ia64_ivt+0x7a00 768 ///////////////////////////////////////////////////////////////////// 769 // 0x7a00 Entry 62 (size 16 bundles) Reserved 770 + KVM_FAULT(62) 771 772 .org kvm_ia64_ivt+0x7b00 773 ///////////////////////////////////////////////////////////////////// 774 // 0x7b00 Entry 63 (size 16 bundles) Reserved 775 + KVM_FAULT(63) 776 777 .org kvm_ia64_ivt+0x7c00 778 //////////////////////////////////////////////////////////////////// 779 // 0x7c00 Entry 64 (size 16 bundles) Reserved 780 + KVM_FAULT(64) 781 782 .org kvm_ia64_ivt+0x7d00 783 ///////////////////////////////////////////////////////////////////// 784 // 0x7d00 Entry 65 (size 16 bundles) Reserved 785 + KVM_FAULT(65) 786 787 .org kvm_ia64_ivt+0x7e00 788 ///////////////////////////////////////////////////////////////////// 789 // 0x7e00 Entry 66 (size 16 bundles) Reserved 790 + KVM_FAULT(66) 791 792 .org kvm_ia64_ivt+0x7f00 793 //////////////////////////////////////////////////////////////////// 794 // 0x7f00 Entry 67 (size 16 bundles) Reserved 795 + KVM_FAULT(67) 796 797 .org kvm_ia64_ivt+0x8000 798 // There is no particular reason for this code to be here, other than that ··· 804 805 806 ENTRY(kvm_dtlb_miss_dispatch) 807 + mov r19 = 2 808 + KVM_SAVE_MIN_WITH_COVER_R19 809 + alloc r14=ar.pfs,0,0,3,0 810 + mov out0=cr.ifa 811 + mov out1=r15 812 + adds r3=8,r2 // set up second base pointer 813 + ;; 814 + ssm psr.ic 815 + ;; 816 + srlz.i // guarantee that interruption collection is on 817 + ;; 818 + //(p15) ssm psr.i // restore psr.i 819 + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 820 + ;; 821 + KVM_SAVE_REST 822 + KVM_SAVE_EXTRA 823 + mov rp=r14 824 + ;; 825 + adds out2=16,r12 826 + br.call.sptk.many b6=kvm_page_fault 827 END(kvm_dtlb_miss_dispatch) 828 829 ENTRY(kvm_itlb_miss_dispatch) 830 831 + KVM_SAVE_MIN_WITH_COVER_R19 832 + alloc r14=ar.pfs,0,0,3,0 833 + mov out0=cr.ifa 834 + mov out1=r15 835 + adds r3=8,r2 // set up second base pointer 836 + ;; 837 + ssm psr.ic 838 + ;; 839 + srlz.i // guarantee that interruption collection is on 840 + ;; 841 + //(p15) ssm psr.i // restore psr.i 842 + addl r14=@gprel(ia64_leave_hypervisor),gp 843 + ;; 844 + KVM_SAVE_REST 845 + mov rp=r14 846 + ;; 847 + adds out2=16,r12 848 + br.call.sptk.many b6=kvm_page_fault 849 END(kvm_itlb_miss_dispatch) 850 851 ENTRY(kvm_dispatch_reflection) 852 + /* 853 + * Input: 854 + * psr.ic: off 855 + * r19: intr type (offset into ivt, see ia64_int.h) 856 + * r31: contains saved predicates (pr) 857 + */ 858 + KVM_SAVE_MIN_WITH_COVER_R19 859 + alloc r14=ar.pfs,0,0,5,0 860 + mov out0=cr.ifa 861 + mov out1=cr.isr 862 + mov out2=cr.iim 863 + mov out3=r15 864 + adds r3=8,r2 // set up second base pointer 865 + ;; 866 + ssm psr.ic 867 + ;; 868 + srlz.i // guarantee that interruption collection is on 869 + ;; 870 + //(p15) ssm psr.i // restore psr.i 871 + addl r14=@gprel(ia64_leave_hypervisor),gp 872 + ;; 873 + KVM_SAVE_REST 874 + mov rp=r14 875 + ;; 876 + adds out4=16,r12 877 + br.call.sptk.many b6=reflect_interruption 878 END(kvm_dispatch_reflection) 879 880 ENTRY(kvm_dispatch_virtualization_fault) 881 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 882 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 883 + ;; 884 + st8 [r16] = r24 885 + st8 [r17] = r25 886 + ;; 887 + KVM_SAVE_MIN_WITH_COVER_R19 888 + ;; 889 + alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) 890 + mov out0=r13 //vcpu 891 + adds r3=8,r2 // set up second base pointer 892 + ;; 893 + ssm psr.ic 894 + ;; 895 + srlz.i // guarantee that interruption collection is on 896 + ;; 897 + //(p15) ssm psr.i // restore psr.i 898 + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp 899 + ;; 900 + KVM_SAVE_REST 901 + KVM_SAVE_EXTRA 902 + mov rp=r14 903 + ;; 904 + adds out1=16,sp //regs 905 + br.call.sptk.many b6=kvm_emulate 906 END(kvm_dispatch_virtualization_fault) 907 908 909 ENTRY(kvm_dispatch_interrupt) 910 + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 911 + ;; 912 + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group 913 + adds r3=8,r2 // set up second base pointer for SAVE_REST 914 + ;; 915 + ssm psr.ic 916 + ;; 917 + srlz.i 918 + ;; 919 + //(p15) ssm psr.i 920 + addl r14=@gprel(ia64_leave_hypervisor),gp 921 + ;; 922 + KVM_SAVE_REST 923 + mov rp=r14 924 + ;; 925 + mov out0=r13 // pass pointer to pt_regs as second arg 926 + br.call.sptk.many b6=kvm_ia64_handle_irq 927 END(kvm_dispatch_interrupt) 928 929 GLOBAL_ENTRY(ia64_leave_nested) 930 rsm psr.i ··· 1008 ;; 1009 ldf.fill f11=[r2] 1010 // mov r18=r13 1011 + // mov r21=r13 1012 adds r16=PT(CR_IPSR)+16,r12 1013 adds r17=PT(CR_IIP)+16,r12 1014 ;; ··· 1058 rfi 1059 END(ia64_leave_nested) 1060 1061 GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) 1062 + /* 1063 + * work.need_resched etc. mustn't get changed 1064 + *by this CPU before it returns to 1065 + * user- or fsys-mode, hence we disable interrupts early on: 1066 + */ 1067 + adds r2 = PT(R4)+16,r12 1068 + adds r3 = PT(R5)+16,r12 1069 + adds r8 = PT(EML_UNAT)+16,r12 1070 + ;; 1071 + ld8 r8 = [r8] 1072 + ;; 1073 + mov ar.unat=r8 1074 + ;; 1075 + ld8.fill r4=[r2],16 //load r4 1076 + ld8.fill r5=[r3],16 //load r5 1077 + ;; 1078 + ld8.fill r6=[r2] //load r6 1079 + ld8.fill r7=[r3] //load r7 1080 + ;; 1081 END(ia64_leave_hypervisor_prepare) 1082 //fall through 1083 GLOBAL_ENTRY(ia64_leave_hypervisor) 1084 + rsm psr.i 1085 + ;; 1086 + br.call.sptk.many b0=leave_hypervisor_tail 1087 + ;; 1088 + adds r20=PT(PR)+16,r12 1089 + adds r8=PT(EML_UNAT)+16,r12 1090 + ;; 1091 + ld8 r8=[r8] 1092 + ;; 1093 + mov ar.unat=r8 1094 + ;; 1095 + lfetch [r20],PT(CR_IPSR)-PT(PR) 1096 + adds r2 = PT(B6)+16,r12 1097 + adds r3 = PT(B7)+16,r12 1098 + ;; 1099 + lfetch [r20] 1100 + ;; 1101 + ld8 r24=[r2],16 /* B6 */ 1102 + ld8 r25=[r3],16 /* B7 */ 1103 + ;; 1104 + ld8 r26=[r2],16 /* ar_csd */ 1105 + ld8 r27=[r3],16 /* ar_ssd */ 1106 + mov b6 = r24 1107 + ;; 1108 + ld8.fill r8=[r2],16 1109 + ld8.fill r9=[r3],16 1110 + mov b7 = r25 1111 + ;; 1112 + mov ar.csd = r26 1113 + mov ar.ssd = r27 1114 + ;; 1115 + ld8.fill r10=[r2],PT(R15)-PT(R10) 1116 + ld8.fill r11=[r3],PT(R14)-PT(R11) 1117 + ;; 1118 + ld8.fill r15=[r2],PT(R16)-PT(R15) 1119 + ld8.fill r14=[r3],PT(R17)-PT(R14) 1120 + ;; 1121 + ld8.fill r16=[r2],16 1122 + ld8.fill r17=[r3],16 1123 + ;; 1124 + ld8.fill r18=[r2],16 1125 + ld8.fill r19=[r3],16 1126 + ;; 1127 + ld8.fill r20=[r2],16 1128 + ld8.fill r21=[r3],16 1129 + ;; 1130 + ld8.fill r22=[r2],16 1131 + ld8.fill r23=[r3],16 1132 + ;; 1133 + ld8.fill r24=[r2],16 1134 + ld8.fill r25=[r3],16 1135 + ;; 1136 + ld8.fill r26=[r2],16 1137 + ld8.fill r27=[r3],16 1138 + ;; 1139 + ld8.fill r28=[r2],16 1140 + ld8.fill r29=[r3],16 1141 + ;; 1142 + ld8.fill r30=[r2],PT(F6)-PT(R30) 1143 + ld8.fill r31=[r3],PT(F7)-PT(R31) 1144 + ;; 1145 + rsm psr.i | psr.ic 1146 + // initiate turning off of interrupt and interruption collection 1147 + invala // invalidate ALAT 1148 + ;; 1149 + srlz.i // ensure interruption collection is off 1150 + ;; 1151 + bsw.0 1152 + ;; 1153 + adds r16 = PT(CR_IPSR)+16,r12 1154 + adds r17 = PT(CR_IIP)+16,r12 1155 + mov r21=r13 // get current 1156 + ;; 1157 + ld8 r31=[r16],16 // load cr.ipsr 1158 + ld8 r30=[r17],16 // load cr.iip 1159 + ;; 1160 + ld8 r29=[r16],16 // load cr.ifs 1161 + ld8 r28=[r17],16 // load ar.unat 1162 + ;; 1163 + ld8 r27=[r16],16 // load ar.pfs 1164 + ld8 r26=[r17],16 // load ar.rsc 1165 + ;; 1166 + ld8 r25=[r16],16 // load ar.rnat 1167 + ld8 r24=[r17],16 // load ar.bspstore 1168 + ;; 1169 + ld8 r23=[r16],16 // load predicates 1170 + ld8 r22=[r17],16 // load b0 1171 + ;; 1172 + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" 1173 + ld8.fill r1=[r17],16 //load r1 1174 + ;; 1175 + ld8.fill r12=[r16],16 //load r12 1176 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 1177 + ;; 1178 + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr 1179 + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 1180 + ;; 1181 + ld8.fill r3=[r16] //load r3 1182 + ld8 r18=[r17] //load ar_ccv 1183 + ;; 1184 + mov ar.fpsr=r19 1185 + mov ar.ccv=r18 1186 + shr.u r18=r20,16 1187 + ;; 1188 kvm_rbs_switch: 1189 + mov r19=96 1190 1191 kvm_dont_preserve_current_frame: 1192 /* ··· 1201 # define pReturn p7 1202 # define Nregs 14 1203 1204 + alloc loc0=ar.pfs,2,Nregs-2,2,0 1205 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) 1206 + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize 1207 + ;; 1208 + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" 1209 + shladd in0=loc1,3,r19 1210 + mov in1=0 1211 + ;; 1212 + TEXT_ALIGN(32) 1213 kvm_rse_clear_invalid: 1214 + alloc loc0=ar.pfs,2,Nregs-2,2,0 1215 + cmp.lt pRecurse,p0=Nregs*8,in0 1216 + // if more than Nregs regs left to clear, (re)curse 1217 + add out0=-Nregs*8,in0 1218 + add out1=1,in1 // increment recursion count 1219 + mov loc1=0 1220 + mov loc2=0 1221 + ;; 1222 + mov loc3=0 1223 + mov loc4=0 1224 + mov loc5=0 1225 + mov loc6=0 1226 + mov loc7=0 1227 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid 1228 + ;; 1229 + mov loc8=0 1230 + mov loc9=0 1231 + cmp.ne pReturn,p0=r0,in1 1232 + // if recursion count != 0, we need to do a br.ret 1233 + mov loc10=0 1234 + mov loc11=0 1235 (pReturn) br.ret.dptk.many b0 1236 1237 # undef pRecurse 1238 # undef pReturn 1239 1240 // loadrs has already been shifted 1241 + alloc r16=ar.pfs,0,0,0,0 // drop current register frame 1242 + ;; 1243 + loadrs 1244 + ;; 1245 + mov ar.bspstore=r24 1246 + ;; 1247 + mov ar.unat=r28 1248 + mov ar.rnat=r25 1249 + mov ar.rsc=r26 1250 + ;; 1251 + mov cr.ipsr=r31 1252 + mov cr.iip=r30 1253 + mov cr.ifs=r29 1254 + mov ar.pfs=r27 1255 + adds r18=VMM_VPD_BASE_OFFSET,r21 1256 + ;; 1257 + ld8 r18=[r18] //vpd 1258 + adds r17=VMM_VCPU_ISR_OFFSET,r21 1259 + ;; 1260 + ld8 r17=[r17] 1261 + adds r19=VMM_VPD_VPSR_OFFSET,r18 1262 + ;; 1263 + ld8 r19=[r19] //vpsr 1264 + mov r25=r18 1265 + adds r16= VMM_VCPU_GP_OFFSET,r21 1266 + ;; 1267 + ld8 r16= [r16] // Put gp in r24 1268 + movl r24=@gprel(ia64_vmm_entry) // calculate return address 1269 + ;; 1270 + add r24=r24,r16 1271 + ;; 1272 + br.sptk.many kvm_vps_sync_write // call the service 1273 + ;; 1274 END(ia64_leave_hypervisor) 1275 // fall through 1276 GLOBAL_ENTRY(ia64_vmm_entry) ··· 1283 * r22:b0 1284 * r23:predicate 1285 */ 1286 + mov r24=r22 1287 + mov r25=r18 1288 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic 1289 + (p1) br.cond.sptk.few kvm_vps_resume_normal 1290 + (p2) br.cond.sptk.many kvm_vps_resume_handler 1291 + ;; 1292 END(ia64_vmm_entry) 1293 1294 /* 1295 * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, ··· 1310 entry = loc3 1311 hostret = r24 1312 1313 + alloc pfssave=ar.pfs,4,4,0,0 1314 + mov rpsave=rp 1315 + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 1316 + ;; 1317 + ld8 entry=[entry] 1318 + 1: mov hostret=ip 1319 + mov r25=in1 // copy arguments 1320 + mov r26=in2 1321 + mov r27=in3 1322 + mov psrsave=psr 1323 + ;; 1324 + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I 1325 + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC 1326 + ;; 1327 + add hostret=2f-1b,hostret // calculate return address 1328 + add entry=entry,in0 1329 + ;; 1330 + rsm psr.i | psr.ic 1331 + ;; 1332 + srlz.i 1333 + mov b6=entry 1334 + br.cond.sptk b6 // call the service 1335 2: 1336 + // Architectural sequence for enabling interrupts if necessary 1337 (p7) ssm psr.ic 1338 + ;; 1339 (p7) srlz.i 1340 + ;; 1341 //(p6) ssm psr.i 1342 + ;; 1343 + mov rp=rpsave 1344 + mov ar.pfs=pfssave 1345 + mov r8=r31 1346 + ;; 1347 + srlz.d 1348 + br.ret.sptk rp 1349 1350 END(ia64_call_vsa) 1351 1352 #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) 1353 1354 GLOBAL_ENTRY(vmm_reset_entry) 1355 + //set up ipsr, iip, vpd.vpsr, dcr 1356 + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 1357 + // For DCR: all bits 0 1358 + bsw.0 1359 + ;; 1360 + mov r21 =r13 1361 + adds r14=-VMM_PT_REGS_SIZE, r12 1362 + ;; 1363 + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 1364 + movl r10=0x8000000000000000 1365 + adds r16=PT(CR_IIP), r14 1366 + adds r20=PT(R1), r14 1367 + ;; 1368 + rsm psr.ic | psr.i 1369 + ;; 1370 + srlz.i 1371 + ;; 1372 + mov ar.rsc = 0 1373 + ;; 1374 + flushrs 1375 + ;; 1376 + mov ar.bspstore = 0 1377 + // clear BSPSTORE 1378 + ;; 1379 + mov cr.ipsr=r6 1380 + mov cr.ifs=r10 1381 + ld8 r4 = [r16] // Set init iip for first run. 1382 + ld8 r1 = [r20] 1383 + ;; 1384 + mov cr.iip=r4 1385 + adds r16=VMM_VPD_BASE_OFFSET,r13 1386 + ;; 1387 + ld8 r18=[r16] 1388 + ;; 1389 + adds r19=VMM_VPD_VPSR_OFFSET,r18 1390 + ;; 1391 + ld8 r19=[r19] 1392 + mov r17=r0 1393 + mov r22=r0 1394 + mov r23=r0 1395 + br.cond.sptk ia64_vmm_entry 1396 + br.ret.sptk b0 1397 END(vmm_reset_entry)

+2 -2

arch/ia64/kvm/vtlb.c

··· 183 u64 i, dirty_pages = 1; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 186 - void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) 187 - + KVM_MEM_DIRTY_LOG_OFS; 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 189 190 vmm_spin_lock(lock);

··· 183 u64 i, dirty_pages = 1; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 185 spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 186 + void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; 187 + 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 189 190 vmm_spin_lock(lock);

+80

arch/powerpc/include/asm/disassemble.h

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + */ 19 + 20 + #ifndef __ASM_PPC_DISASSEMBLE_H__ 21 + #define __ASM_PPC_DISASSEMBLE_H__ 22 + 23 + #include <linux/types.h> 24 + 25 + static inline unsigned int get_op(u32 inst) 26 + { 27 + return inst >> 26; 28 + } 29 + 30 + static inline unsigned int get_xop(u32 inst) 31 + { 32 + return (inst >> 1) & 0x3ff; 33 + } 34 + 35 + static inline unsigned int get_sprn(u32 inst) 36 + { 37 + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); 38 + } 39 + 40 + static inline unsigned int get_dcrn(u32 inst) 41 + { 42 + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); 43 + } 44 + 45 + static inline unsigned int get_rt(u32 inst) 46 + { 47 + return (inst >> 21) & 0x1f; 48 + } 49 + 50 + static inline unsigned int get_rs(u32 inst) 51 + { 52 + return (inst >> 21) & 0x1f; 53 + } 54 + 55 + static inline unsigned int get_ra(u32 inst) 56 + { 57 + return (inst >> 16) & 0x1f; 58 + } 59 + 60 + static inline unsigned int get_rb(u32 inst) 61 + { 62 + return (inst >> 11) & 0x1f; 63 + } 64 + 65 + static inline unsigned int get_rc(u32 inst) 66 + { 67 + return inst & 0x1; 68 + } 69 + 70 + static inline unsigned int get_ws(u32 inst) 71 + { 72 + return (inst >> 11) & 0x1f; 73 + } 74 + 75 + static inline unsigned int get_d(u32 inst) 76 + { 77 + return inst & 0xffff; 78 + } 79 + 80 + #endif /* __ASM_PPC_DISASSEMBLE_H__ */

+61

arch/powerpc/include/asm/kvm_44x.h

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + */ 19 + 20 + #ifndef __ASM_44X_H__ 21 + #define __ASM_44X_H__ 22 + 23 + #include <linux/kvm_host.h> 24 + 25 + #define PPC44x_TLB_SIZE 64 26 + 27 + /* If the guest is expecting it, this can be as large as we like; we'd just 28 + * need to find some way of advertising it. */ 29 + #define KVM44x_GUEST_TLB_SIZE 64 30 + 31 + struct kvmppc_44x_shadow_ref { 32 + struct page *page; 33 + u16 gtlb_index; 34 + u8 writeable; 35 + u8 tid; 36 + }; 37 + 38 + struct kvmppc_vcpu_44x { 39 + /* Unmodified copy of the guest's TLB. */ 40 + struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; 41 + 42 + /* References to guest pages in the hardware TLB. */ 43 + struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; 44 + 45 + /* State of the shadow TLB at guest context switch time. */ 46 + struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; 47 + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; 48 + 49 + struct kvm_vcpu vcpu; 50 + }; 51 + 52 + static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) 53 + { 54 + return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); 55 + } 56 + 57 + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); 58 + void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); 59 + void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); 60 + 61 + #endif /* __ASM_44X_H__ */

+80 -36

arch/powerpc/include/asm/kvm_host.h

··· 64 u32 halt_wakeup; 65 }; 66 67 - struct tlbe { 68 u32 tid; /* Only the low 8 bits are used. */ 69 u32 word0; 70 u32 word1; 71 u32 word2; 72 }; 73 74 struct kvm_arch { 75 }; 76 77 struct kvm_vcpu_arch { 78 - /* Unmodified copy of the guest's TLB. */ 79 - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; 80 - /* TLB that's actually used when the guest is running. */ 81 - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; 82 - /* Pages which are referenced in the shadow TLB. */ 83 - struct page *shadow_pages[PPC44x_TLB_SIZE]; 84 - 85 - /* Track which TLB entries we've modified in the current exit. */ 86 - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; 87 - 88 u32 host_stack; 89 u32 host_pid; 90 u32 host_dbcr0; ··· 125 u32 host_msr; 126 127 u64 fpr[32]; 128 - u32 gpr[32]; 129 130 - u32 pc; 131 u32 cr; 132 - u32 ctr; 133 - u32 lr; 134 - u32 xer; 135 136 - u32 msr; 137 u32 mmucr; 138 - u32 sprg0; 139 - u32 sprg1; 140 - u32 sprg2; 141 - u32 sprg3; 142 - u32 sprg4; 143 - u32 sprg5; 144 - u32 sprg6; 145 - u32 sprg7; 146 - u32 srr0; 147 - u32 srr1; 148 - u32 csrr0; 149 - u32 csrr1; 150 - u32 dsrr0; 151 - u32 dsrr1; 152 - u32 dear; 153 - u32 esr; 154 u32 dec; 155 u32 decar; 156 u32 tbl; ··· 158 u32 tcr; 159 u32 tsr; 160 u32 ivor[16]; 161 - u32 ivpr; 162 u32 pir; 163 164 u32 shadow_pid; ··· 171 u32 dbcr0; 172 u32 dbcr1; 173 174 u32 last_inst; 175 - u32 fault_dear; 176 - u32 fault_esr; 177 gpa_t paddr_accessed; 178 179 u8 io_gpr; /* GPR used as IO source/target */

··· 64 u32 halt_wakeup; 65 }; 66 67 + struct kvmppc_44x_tlbe { 68 u32 tid; /* Only the low 8 bits are used. */ 69 u32 word0; 70 u32 word1; 71 u32 word2; 72 }; 73 74 + enum kvm_exit_types { 75 + MMIO_EXITS, 76 + DCR_EXITS, 77 + SIGNAL_EXITS, 78 + ITLB_REAL_MISS_EXITS, 79 + ITLB_VIRT_MISS_EXITS, 80 + DTLB_REAL_MISS_EXITS, 81 + DTLB_VIRT_MISS_EXITS, 82 + SYSCALL_EXITS, 83 + ISI_EXITS, 84 + DSI_EXITS, 85 + EMULATED_INST_EXITS, 86 + EMULATED_MTMSRWE_EXITS, 87 + EMULATED_WRTEE_EXITS, 88 + EMULATED_MTSPR_EXITS, 89 + EMULATED_MFSPR_EXITS, 90 + EMULATED_MTMSR_EXITS, 91 + EMULATED_MFMSR_EXITS, 92 + EMULATED_TLBSX_EXITS, 93 + EMULATED_TLBWE_EXITS, 94 + EMULATED_RFI_EXITS, 95 + DEC_EXITS, 96 + EXT_INTR_EXITS, 97 + HALT_WAKEUP, 98 + USR_PR_INST, 99 + FP_UNAVAIL, 100 + DEBUG_EXITS, 101 + TIMEINGUEST, 102 + __NUMBER_OF_KVM_EXIT_TYPES 103 + }; 104 + 105 + /* allow access to big endian 32bit upper/lower parts and 64bit var */ 106 + struct kvmppc_exit_timing { 107 + union { 108 + u64 tv64; 109 + struct { 110 + u32 tbu, tbl; 111 + } tv32; 112 + }; 113 + }; 114 + 115 struct kvm_arch { 116 }; 117 118 struct kvm_vcpu_arch { 119 u32 host_stack; 120 u32 host_pid; 121 u32 host_dbcr0; ··· 94 u32 host_msr; 95 96 u64 fpr[32]; 97 + ulong gpr[32]; 98 99 + ulong pc; 100 u32 cr; 101 + ulong ctr; 102 + ulong lr; 103 + ulong xer; 104 105 + ulong msr; 106 u32 mmucr; 107 + ulong sprg0; 108 + ulong sprg1; 109 + ulong sprg2; 110 + ulong sprg3; 111 + ulong sprg4; 112 + ulong sprg5; 113 + ulong sprg6; 114 + ulong sprg7; 115 + ulong srr0; 116 + ulong srr1; 117 + ulong csrr0; 118 + ulong csrr1; 119 + ulong dsrr0; 120 + ulong dsrr1; 121 + ulong dear; 122 + ulong esr; 123 u32 dec; 124 u32 decar; 125 u32 tbl; ··· 127 u32 tcr; 128 u32 tsr; 129 u32 ivor[16]; 130 + ulong ivpr; 131 u32 pir; 132 133 u32 shadow_pid; ··· 140 u32 dbcr0; 141 u32 dbcr1; 142 143 + #ifdef CONFIG_KVM_EXIT_TIMING 144 + struct kvmppc_exit_timing timing_exit; 145 + struct kvmppc_exit_timing timing_last_enter; 146 + u32 last_exit_type; 147 + u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; 148 + u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; 149 + u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; 150 + u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; 151 + u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; 152 + u64 timing_last_exit; 153 + struct dentry *debugfs_exit_timing; 154 + #endif 155 + 156 u32 last_inst; 157 + ulong fault_dear; 158 + ulong fault_esr; 159 gpa_t paddr_accessed; 160 161 u8 io_gpr; /* GPR used as IO source/target */

+28 -43

arch/powerpc/include/asm/kvm_ppc.h

··· 29 #include <linux/kvm_types.h> 30 #include <linux/kvm_host.h> 31 32 - struct kvm_tlb { 33 - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; 34 - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; 35 - }; 36 - 37 enum emulation_result { 38 EMULATE_DONE, /* no further processing */ 39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 40 EMULATE_DO_DCR, /* kvm_run filled with DCR request */ 41 EMULATE_FAIL, /* can't emulate this instruction */ 42 }; 43 - 44 - extern const unsigned char exception_priority[]; 45 - extern const unsigned char priority_exception[]; 46 47 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 48 extern char kvmppc_handlers_start[]; ··· 50 extern int kvmppc_emulate_instruction(struct kvm_run *run, 51 struct kvm_vcpu *vcpu); 52 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 53 54 - extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, 55 - u64 asid, u32 flags); 56 - extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 57 - gva_t eend, u32 asid); 58 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 59 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 60 61 - /* XXX Book E specific */ 62 - extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); 63 64 - extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); 65 66 - static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) 67 - { 68 - unsigned int priority = exception_priority[exception]; 69 - set_bit(priority, &vcpu->arch.pending_exceptions); 70 - } 71 72 - static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) 73 - { 74 - unsigned int priority = exception_priority[exception]; 75 - clear_bit(priority, &vcpu->arch.pending_exceptions); 76 - } 77 78 - /* Helper function for "full" MSR writes. No need to call this if only EE is 79 - * changing. */ 80 - static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 81 - { 82 - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 83 - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 84 85 - vcpu->arch.msr = new_msr; 86 87 - if (vcpu->arch.msr & MSR_WE) 88 - kvm_vcpu_block(vcpu); 89 - } 90 - 91 - static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) 92 - { 93 - if (vcpu->arch.pid != new_pid) { 94 - vcpu->arch.pid = new_pid; 95 - vcpu->arch.swap_pid = 1; 96 - } 97 - } 98 99 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 100

··· 29 #include <linux/kvm_types.h> 30 #include <linux/kvm_host.h> 31 32 enum emulation_result { 33 EMULATE_DONE, /* no further processing */ 34 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 35 EMULATE_DO_DCR, /* kvm_run filled with DCR request */ 36 EMULATE_FAIL, /* can't emulate this instruction */ 37 }; 38 39 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 40 extern char kvmppc_handlers_start[]; ··· 58 extern int kvmppc_emulate_instruction(struct kvm_run *run, 59 struct kvm_vcpu *vcpu); 60 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); 61 + extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); 62 63 + extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, 64 + u64 asid, u32 flags, u32 max_bytes, 65 + unsigned int gtlb_idx); 66 extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 67 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 68 69 + /* Core-specific hooks */ 70 71 + extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, 72 + unsigned int id); 73 + extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); 74 + extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); 75 + extern int kvmppc_core_check_processor_compat(void); 76 + extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, 77 + struct kvm_translation *tr); 78 79 + extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 80 + extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); 81 82 + extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); 83 + extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); 84 85 + extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); 86 + extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 87 + extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); 88 + extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 89 + extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 90 + struct kvm_interrupt *irq); 91 92 + extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 93 + unsigned int op, int *advance); 94 + extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); 95 + extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); 96 97 + extern int kvmppc_booke_init(void); 98 + extern void kvmppc_booke_exit(void); 99 100 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 101

+1

arch/powerpc/include/asm/mmu-44x.h

··· 56 #ifndef __ASSEMBLY__ 57 58 extern unsigned int tlb_44x_hwater; 59 60 typedef struct { 61 unsigned int id;

··· 56 #ifndef __ASSEMBLY__ 57 58 extern unsigned int tlb_44x_hwater; 59 + extern unsigned int tlb_44x_index; 60 61 typedef struct { 62 unsigned int id;

+15 -6

arch/powerpc/kernel/asm-offsets.c

··· 23 #include <linux/mm.h> 24 #include <linux/suspend.h> 25 #include <linux/hrtimer.h> 26 - #ifdef CONFIG_KVM 27 - #include <linux/kvm_host.h> 28 - #endif 29 #ifdef CONFIG_PPC64 30 #include <linux/time.h> 31 #include <linux/hardirq.h> ··· 47 #endif 48 #ifdef CONFIG_PPC_ISERIES 49 #include <asm/iseries/alpaca.h> 50 #endif 51 52 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) ··· 357 DEFINE(PTE_SIZE, sizeof(pte_t)); 358 359 #ifdef CONFIG_KVM 360 - DEFINE(TLBE_BYTES, sizeof(struct tlbe)); 361 362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 364 - DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); 365 - DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); 366 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 367 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 368 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); ··· 381 #ifdef CONFIG_44x 382 DEFINE(PGD_T_LOG2, PGD_T_LOG2); 383 DEFINE(PTE_T_LOG2, PTE_T_LOG2); 384 #endif 385 386 return 0;

··· 23 #include <linux/mm.h> 24 #include <linux/suspend.h> 25 #include <linux/hrtimer.h> 26 #ifdef CONFIG_PPC64 27 #include <linux/time.h> 28 #include <linux/hardirq.h> ··· 50 #endif 51 #ifdef CONFIG_PPC_ISERIES 52 #include <asm/iseries/alpaca.h> 53 + #endif 54 + #ifdef CONFIG_KVM 55 + #include <asm/kvm_44x.h> 56 #endif 57 58 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) ··· 357 DEFINE(PTE_SIZE, sizeof(pte_t)); 358 359 #ifdef CONFIG_KVM 360 + DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); 361 362 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 363 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 364 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 365 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 366 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); ··· 383 #ifdef CONFIG_44x 384 DEFINE(PGD_T_LOG2, PGD_T_LOG2); 385 DEFINE(PTE_T_LOG2, PTE_T_LOG2); 386 + #endif 387 + 388 + #ifdef CONFIG_KVM_EXIT_TIMING 389 + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, 390 + arch.timing_exit.tv32.tbu)); 391 + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, 392 + arch.timing_exit.tv32.tbl)); 393 + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, 394 + arch.timing_last_enter.tv32.tbu)); 395 + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, 396 + arch.timing_last_enter.tv32.tbl)); 397 #endif 398 399 return 0;

+228

arch/powerpc/kvm/44x.c

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + */ 19 + 20 + #include <linux/kvm_host.h> 21 + #include <linux/err.h> 22 + 23 + #include <asm/reg.h> 24 + #include <asm/cputable.h> 25 + #include <asm/tlbflush.h> 26 + #include <asm/kvm_44x.h> 27 + #include <asm/kvm_ppc.h> 28 + 29 + #include "44x_tlb.h" 30 + 31 + /* Note: clearing MSR[DE] just means that the debug interrupt will not be 32 + * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. 33 + * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt 34 + * will be delivered as an "imprecise debug event" (which is indicated by 35 + * DBSR[IDE]. 36 + */ 37 + static void kvm44x_disable_debug_interrupts(void) 38 + { 39 + mtmsr(mfmsr() & ~MSR_DE); 40 + } 41 + 42 + void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) 43 + { 44 + kvm44x_disable_debug_interrupts(); 45 + 46 + mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); 47 + mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); 48 + mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); 49 + mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); 50 + mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); 51 + mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); 52 + mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); 53 + mtmsr(vcpu->arch.host_msr); 54 + } 55 + 56 + void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) 57 + { 58 + struct kvm_guest_debug *dbg = &vcpu->guest_debug; 59 + u32 dbcr0 = 0; 60 + 61 + vcpu->arch.host_msr = mfmsr(); 62 + kvm44x_disable_debug_interrupts(); 63 + 64 + /* Save host debug register state. */ 65 + vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); 66 + vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); 67 + vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); 68 + vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); 69 + vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); 70 + vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); 71 + vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); 72 + 73 + /* set registers up for guest */ 74 + 75 + if (dbg->bp[0]) { 76 + mtspr(SPRN_IAC1, dbg->bp[0]); 77 + dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; 78 + } 79 + if (dbg->bp[1]) { 80 + mtspr(SPRN_IAC2, dbg->bp[1]); 81 + dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; 82 + } 83 + if (dbg->bp[2]) { 84 + mtspr(SPRN_IAC3, dbg->bp[2]); 85 + dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; 86 + } 87 + if (dbg->bp[3]) { 88 + mtspr(SPRN_IAC4, dbg->bp[3]); 89 + dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; 90 + } 91 + 92 + mtspr(SPRN_DBCR0, dbcr0); 93 + mtspr(SPRN_DBCR1, 0); 94 + mtspr(SPRN_DBCR2, 0); 95 + } 96 + 97 + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 98 + { 99 + kvmppc_44x_tlb_load(vcpu); 100 + } 101 + 102 + void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 103 + { 104 + kvmppc_44x_tlb_put(vcpu); 105 + } 106 + 107 + int kvmppc_core_check_processor_compat(void) 108 + { 109 + int r; 110 + 111 + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) 112 + r = 0; 113 + else 114 + r = -ENOTSUPP; 115 + 116 + return r; 117 + } 118 + 119 + int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) 120 + { 121 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 122 + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; 123 + int i; 124 + 125 + tlbe->tid = 0; 126 + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; 127 + tlbe->word1 = 0; 128 + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; 129 + 130 + tlbe++; 131 + tlbe->tid = 0; 132 + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; 133 + tlbe->word1 = 0xef600000; 134 + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR 135 + | PPC44x_TLB_I | PPC44x_TLB_G; 136 + 137 + /* Since the guest can directly access the timebase, it must know the 138 + * real timebase frequency. Accordingly, it must see the state of 139 + * CCR1[TCS]. */ 140 + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); 141 + 142 + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) 143 + vcpu_44x->shadow_refs[i].gtlb_index = -1; 144 + 145 + return 0; 146 + } 147 + 148 + /* 'linear_address' is actually an encoding of AS|PID|EADDR . */ 149 + int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, 150 + struct kvm_translation *tr) 151 + { 152 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 153 + struct kvmppc_44x_tlbe *gtlbe; 154 + int index; 155 + gva_t eaddr; 156 + u8 pid; 157 + u8 as; 158 + 159 + eaddr = tr->linear_address; 160 + pid = (tr->linear_address >> 32) & 0xff; 161 + as = (tr->linear_address >> 40) & 0x1; 162 + 163 + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); 164 + if (index == -1) { 165 + tr->valid = 0; 166 + return 0; 167 + } 168 + 169 + gtlbe = &vcpu_44x->guest_tlb[index]; 170 + 171 + tr->physical_address = tlb_xlate(gtlbe, eaddr); 172 + /* XXX what does "writeable" and "usermode" even mean? */ 173 + tr->valid = 1; 174 + 175 + return 0; 176 + } 177 + 178 + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 179 + { 180 + struct kvmppc_vcpu_44x *vcpu_44x; 181 + struct kvm_vcpu *vcpu; 182 + int err; 183 + 184 + vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 185 + if (!vcpu_44x) { 186 + err = -ENOMEM; 187 + goto out; 188 + } 189 + 190 + vcpu = &vcpu_44x->vcpu; 191 + err = kvm_vcpu_init(vcpu, kvm, id); 192 + if (err) 193 + goto free_vcpu; 194 + 195 + return vcpu; 196 + 197 + free_vcpu: 198 + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 199 + out: 200 + return ERR_PTR(err); 201 + } 202 + 203 + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 204 + { 205 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 206 + 207 + kvm_vcpu_uninit(vcpu); 208 + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); 209 + } 210 + 211 + static int kvmppc_44x_init(void) 212 + { 213 + int r; 214 + 215 + r = kvmppc_booke_init(); 216 + if (r) 217 + return r; 218 + 219 + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); 220 + } 221 + 222 + static void kvmppc_44x_exit(void) 223 + { 224 + kvmppc_booke_exit(); 225 + } 226 + 227 + module_init(kvmppc_44x_init); 228 + module_exit(kvmppc_44x_exit);

+371

arch/powerpc/kvm/44x_emulate.c

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + */ 19 + 20 + #include <asm/kvm_ppc.h> 21 + #include <asm/dcr.h> 22 + #include <asm/dcr-regs.h> 23 + #include <asm/disassemble.h> 24 + #include <asm/kvm_44x.h> 25 + #include "timing.h" 26 + 27 + #include "booke.h" 28 + #include "44x_tlb.h" 29 + 30 + #define OP_RFI 19 31 + 32 + #define XOP_RFI 50 33 + #define XOP_MFMSR 83 34 + #define XOP_WRTEE 131 35 + #define XOP_MTMSR 146 36 + #define XOP_WRTEEI 163 37 + #define XOP_MFDCR 323 38 + #define XOP_MTDCR 451 39 + #define XOP_TLBSX 914 40 + #define XOP_ICCCI 966 41 + #define XOP_TLBWE 978 42 + 43 + static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) 44 + { 45 + vcpu->arch.pc = vcpu->arch.srr0; 46 + kvmppc_set_msr(vcpu, vcpu->arch.srr1); 47 + } 48 + 49 + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 50 + unsigned int inst, int *advance) 51 + { 52 + int emulated = EMULATE_DONE; 53 + int dcrn; 54 + int ra; 55 + int rb; 56 + int rc; 57 + int rs; 58 + int rt; 59 + int ws; 60 + 61 + switch (get_op(inst)) { 62 + case OP_RFI: 63 + switch (get_xop(inst)) { 64 + case XOP_RFI: 65 + kvmppc_emul_rfi(vcpu); 66 + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); 67 + *advance = 0; 68 + break; 69 + 70 + default: 71 + emulated = EMULATE_FAIL; 72 + break; 73 + } 74 + break; 75 + 76 + case 31: 77 + switch (get_xop(inst)) { 78 + 79 + case XOP_MFMSR: 80 + rt = get_rt(inst); 81 + vcpu->arch.gpr[rt] = vcpu->arch.msr; 82 + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 83 + break; 84 + 85 + case XOP_MTMSR: 86 + rs = get_rs(inst); 87 + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 88 + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); 89 + break; 90 + 91 + case XOP_WRTEE: 92 + rs = get_rs(inst); 93 + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 94 + | (vcpu->arch.gpr[rs] & MSR_EE); 95 + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 96 + break; 97 + 98 + case XOP_WRTEEI: 99 + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 100 + | (inst & MSR_EE); 101 + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 102 + break; 103 + 104 + case XOP_MFDCR: 105 + dcrn = get_dcrn(inst); 106 + rt = get_rt(inst); 107 + 108 + /* The guest may access CPR0 registers to determine the timebase 109 + * frequency, and it must know the real host frequency because it 110 + * can directly access the timebase registers. 111 + * 112 + * It would be possible to emulate those accesses in userspace, 113 + * but userspace can really only figure out the end frequency. 114 + * We could decompose that into the factors that compute it, but 115 + * that's tricky math, and it's easier to just report the real 116 + * CPR0 values. 117 + */ 118 + switch (dcrn) { 119 + case DCRN_CPR0_CONFIG_ADDR: 120 + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; 121 + break; 122 + case DCRN_CPR0_CONFIG_DATA: 123 + local_irq_disable(); 124 + mtdcr(DCRN_CPR0_CONFIG_ADDR, 125 + vcpu->arch.cpr0_cfgaddr); 126 + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); 127 + local_irq_enable(); 128 + break; 129 + default: 130 + run->dcr.dcrn = dcrn; 131 + run->dcr.data = 0; 132 + run->dcr.is_write = 0; 133 + vcpu->arch.io_gpr = rt; 134 + vcpu->arch.dcr_needed = 1; 135 + kvmppc_account_exit(vcpu, DCR_EXITS); 136 + emulated = EMULATE_DO_DCR; 137 + } 138 + 139 + break; 140 + 141 + case XOP_MTDCR: 142 + dcrn = get_dcrn(inst); 143 + rs = get_rs(inst); 144 + 145 + /* emulate some access in kernel */ 146 + switch (dcrn) { 147 + case DCRN_CPR0_CONFIG_ADDR: 148 + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; 149 + break; 150 + default: 151 + run->dcr.dcrn = dcrn; 152 + run->dcr.data = vcpu->arch.gpr[rs]; 153 + run->dcr.is_write = 1; 154 + vcpu->arch.dcr_needed = 1; 155 + kvmppc_account_exit(vcpu, DCR_EXITS); 156 + emulated = EMULATE_DO_DCR; 157 + } 158 + 159 + break; 160 + 161 + case XOP_TLBWE: 162 + ra = get_ra(inst); 163 + rs = get_rs(inst); 164 + ws = get_ws(inst); 165 + emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); 166 + break; 167 + 168 + case XOP_TLBSX: 169 + rt = get_rt(inst); 170 + ra = get_ra(inst); 171 + rb = get_rb(inst); 172 + rc = get_rc(inst); 173 + emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); 174 + break; 175 + 176 + case XOP_ICCCI: 177 + break; 178 + 179 + default: 180 + emulated = EMULATE_FAIL; 181 + } 182 + 183 + break; 184 + 185 + default: 186 + emulated = EMULATE_FAIL; 187 + } 188 + 189 + return emulated; 190 + } 191 + 192 + int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 193 + { 194 + switch (sprn) { 195 + case SPRN_MMUCR: 196 + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; 197 + case SPRN_PID: 198 + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 199 + case SPRN_CCR0: 200 + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 201 + case SPRN_CCR1: 202 + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 203 + case SPRN_DEAR: 204 + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; 205 + case SPRN_ESR: 206 + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; 207 + case SPRN_DBCR0: 208 + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; 209 + case SPRN_DBCR1: 210 + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; 211 + case SPRN_TSR: 212 + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; 213 + case SPRN_TCR: 214 + vcpu->arch.tcr = vcpu->arch.gpr[rs]; 215 + kvmppc_emulate_dec(vcpu); 216 + break; 217 + 218 + /* Note: SPRG4-7 are user-readable. These values are 219 + * loaded into the real SPRGs when resuming the 220 + * guest. */ 221 + case SPRN_SPRG4: 222 + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; 223 + case SPRN_SPRG5: 224 + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; 225 + case SPRN_SPRG6: 226 + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; 227 + case SPRN_SPRG7: 228 + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; 229 + 230 + case SPRN_IVPR: 231 + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; 232 + break; 233 + case SPRN_IVOR0: 234 + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; 235 + break; 236 + case SPRN_IVOR1: 237 + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; 238 + break; 239 + case SPRN_IVOR2: 240 + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; 241 + break; 242 + case SPRN_IVOR3: 243 + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; 244 + break; 245 + case SPRN_IVOR4: 246 + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; 247 + break; 248 + case SPRN_IVOR5: 249 + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; 250 + break; 251 + case SPRN_IVOR6: 252 + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; 253 + break; 254 + case SPRN_IVOR7: 255 + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; 256 + break; 257 + case SPRN_IVOR8: 258 + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; 259 + break; 260 + case SPRN_IVOR9: 261 + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; 262 + break; 263 + case SPRN_IVOR10: 264 + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; 265 + break; 266 + case SPRN_IVOR11: 267 + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; 268 + break; 269 + case SPRN_IVOR12: 270 + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; 271 + break; 272 + case SPRN_IVOR13: 273 + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; 274 + break; 275 + case SPRN_IVOR14: 276 + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; 277 + break; 278 + case SPRN_IVOR15: 279 + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; 280 + break; 281 + 282 + default: 283 + return EMULATE_FAIL; 284 + } 285 + 286 + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); 287 + return EMULATE_DONE; 288 + } 289 + 290 + int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 291 + { 292 + switch (sprn) { 293 + /* 440 */ 294 + case SPRN_MMUCR: 295 + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 296 + case SPRN_CCR0: 297 + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 298 + case SPRN_CCR1: 299 + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 300 + 301 + /* Book E */ 302 + case SPRN_PID: 303 + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; 304 + case SPRN_IVPR: 305 + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; 306 + case SPRN_DEAR: 307 + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; 308 + case SPRN_ESR: 309 + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; 310 + case SPRN_DBCR0: 311 + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; 312 + case SPRN_DBCR1: 313 + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; 314 + 315 + case SPRN_IVOR0: 316 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 317 + break; 318 + case SPRN_IVOR1: 319 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 320 + break; 321 + case SPRN_IVOR2: 322 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 323 + break; 324 + case SPRN_IVOR3: 325 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 326 + break; 327 + case SPRN_IVOR4: 328 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 329 + break; 330 + case SPRN_IVOR5: 331 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 332 + break; 333 + case SPRN_IVOR6: 334 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 335 + break; 336 + case SPRN_IVOR7: 337 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 338 + break; 339 + case SPRN_IVOR8: 340 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 341 + break; 342 + case SPRN_IVOR9: 343 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 344 + break; 345 + case SPRN_IVOR10: 346 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 347 + break; 348 + case SPRN_IVOR11: 349 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 350 + break; 351 + case SPRN_IVOR12: 352 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 353 + break; 354 + case SPRN_IVOR13: 355 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 356 + break; 357 + case SPRN_IVOR14: 358 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 359 + break; 360 + case SPRN_IVOR15: 361 + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 362 + break; 363 + 364 + default: 365 + return EMULATE_FAIL; 366 + } 367 + 368 + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); 369 + return EMULATE_DONE; 370 + } 371 +

+358 -109

arch/powerpc/kvm/44x_tlb.c

··· 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 #include <asm/mmu-44x.h> 26 #include <asm/kvm_ppc.h> 27 28 #include "44x_tlb.h" 29 30 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) 31 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) 32 33 - static unsigned int kvmppc_tlb_44x_pos; 34 35 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) 36 { 37 - /* Mask off reserved bits. */ 38 - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; 39 40 if (!usermode) { 41 /* Guest is in supervisor mode, so we need to translate guest ··· 130 /* Make sure host can always access this memory. */ 131 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; 132 133 return attrib; 134 } 135 136 /* Search the guest TLB for a matching entry. */ 137 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, 138 unsigned int as) 139 { 140 int i; 141 142 /* XXX Replace loop with fancy data structures. */ 143 - for (i = 0; i < PPC44x_TLB_SIZE; i++) { 144 - struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; 145 unsigned int tid; 146 147 if (eaddr < get_tlb_eaddr(tlbe)) ··· 208 return -1; 209 } 210 211 - struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 212 { 213 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 214 - unsigned int index; 215 216 - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 217 - if (index == -1) 218 - return NULL; 219 - return &vcpu->arch.guest_tlb[index]; 220 } 221 222 - struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) 223 { 224 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 225 - unsigned int index; 226 227 - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 228 - if (index == -1) 229 - return NULL; 230 - return &vcpu->arch.guest_tlb[index]; 231 } 232 233 - static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) 234 { 235 - return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); 236 - } 237 238 - static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, 239 - unsigned int index) 240 - { 241 - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; 242 - struct page *page = vcpu->arch.shadow_pages[index]; 243 244 - if (get_tlb_v(stlbe)) { 245 - if (kvmppc_44x_tlbe_is_writable(stlbe)) 246 - kvm_release_page_dirty(page); 247 - else 248 - kvm_release_page_clean(page); 249 - } 250 } 251 252 void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 253 { 254 int i; 255 256 for (i = 0; i <= tlb_44x_hwater; i++) 257 - kvmppc_44x_shadow_release(vcpu, i); 258 } 259 260 - void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) 261 { 262 - vcpu->arch.shadow_tlb_mod[i] = 1; 263 - } 264 - 265 - /* Caller must ensure that the specified guest TLB entry is safe to insert into 266 - * the shadow TLB. */ 267 - void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, 268 - u32 flags) 269 - { 270 struct page *new_page; 271 - struct tlbe *stlbe; 272 hpa_t hpaddr; 273 unsigned int victim; 274 275 - /* Future optimization: don't overwrite the TLB entry containing the 276 - * current PC (or stack?). */ 277 - victim = kvmppc_tlb_44x_pos++; 278 - if (kvmppc_tlb_44x_pos > tlb_44x_hwater) 279 - kvmppc_tlb_44x_pos = 0; 280 - stlbe = &vcpu->arch.shadow_tlb[victim]; 281 282 /* Get reference to new page. */ 283 new_page = gfn_to_page(vcpu->kvm, gfn); 284 if (is_error_page(new_page)) { 285 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); ··· 299 } 300 hpaddr = page_to_phys(new_page); 301 302 - /* Drop reference to old page. */ 303 - kvmppc_44x_shadow_release(vcpu, victim); 304 - 305 - vcpu->arch.shadow_pages[victim] = new_page; 306 307 /* XXX Make sure (va, size) doesn't overlap any other 308 * entries. 440x6 user manual says the result would be ··· 308 309 /* XXX what about AS? */ 310 311 - stlbe->tid = !(asid & 0xff); 312 - 313 /* Force TS=1 for all guest mappings. */ 314 - /* For now we hardcode 4KB mappings, but it will be important to 315 - * use host large pages in the future. */ 316 - stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS 317 - | PPC44x_TLB_4K; 318 - stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); 319 - stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, 320 - vcpu->arch.msr & MSR_PR); 321 - kvmppc_tlbe_set_modified(vcpu, victim); 322 323 - KVMTRACE_5D(STLB_WRITE, vcpu, victim, 324 - stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, 325 - handler); 326 } 327 328 - void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, 329 - gva_t eend, u32 asid) 330 { 331 - unsigned int pid = !(asid & 0xff); 332 int i; 333 334 - /* XXX Replace loop with fancy data structures. */ 335 - for (i = 0; i <= tlb_44x_hwater; i++) { 336 - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 337 - unsigned int tid; 338 - 339 - if (!get_tlb_v(stlbe)) 340 - continue; 341 - 342 - if (eend < get_tlb_eaddr(stlbe)) 343 - continue; 344 - 345 - if (eaddr > get_tlb_end(stlbe)) 346 - continue; 347 - 348 - tid = get_tlb_tid(stlbe); 349 - if (tid && (tid != pid)) 350 - continue; 351 - 352 - kvmppc_44x_shadow_release(vcpu, i); 353 - stlbe->word0 = 0; 354 - kvmppc_tlbe_set_modified(vcpu, i); 355 - KVMTRACE_5D(STLB_INVAL, vcpu, i, 356 - stlbe->tid, stlbe->word0, stlbe->word1, 357 - stlbe->word2, handler); 358 } 359 } 360 361 - /* Invalidate all mappings on the privilege switch after PID has been changed. 362 - * The guest always runs with PID=1, so we must clear the entire TLB when 363 - * switching address spaces. */ 364 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) 365 { 366 int i; 367 368 - if (vcpu->arch.swap_pid) { 369 - /* XXX Replace loop with fancy data structures. */ 370 - for (i = 0; i <= tlb_44x_hwater; i++) { 371 - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; 372 373 - /* Future optimization: clear only userspace mappings. */ 374 - kvmppc_44x_shadow_release(vcpu, i); 375 - stlbe->word0 = 0; 376 - kvmppc_tlbe_set_modified(vcpu, i); 377 - KVMTRACE_5D(STLB_INVAL, vcpu, i, 378 - stlbe->tid, stlbe->word0, stlbe->word1, 379 - stlbe->word2, handler); 380 - } 381 - vcpu->arch.swap_pid = 0; 382 } 383 384 - vcpu->arch.shadow_pid = !usermode; 385 }

··· 22 #include <linux/kvm.h> 23 #include <linux/kvm_host.h> 24 #include <linux/highmem.h> 25 + 26 + #include <asm/tlbflush.h> 27 #include <asm/mmu-44x.h> 28 #include <asm/kvm_ppc.h> 29 + #include <asm/kvm_44x.h> 30 + #include "timing.h" 31 32 #include "44x_tlb.h" 33 34 + #ifndef PPC44x_TLBE_SIZE 35 + #define PPC44x_TLBE_SIZE PPC44x_TLB_4K 36 + #endif 37 + 38 + #define PAGE_SIZE_4K (1<<12) 39 + #define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) 40 + 41 + #define PPC44x_TLB_UATTR_MASK \ 42 + (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) 43 #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) 44 #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) 45 46 + #ifdef DEBUG 47 + void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) 48 + { 49 + struct kvmppc_44x_tlbe *tlbe; 50 + int i; 51 + 52 + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); 53 + printk("| %2s | %3s | %8s | %8s | %8s |\n", 54 + "nr", "tid", "word0", "word1", "word2"); 55 + 56 + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { 57 + tlbe = &vcpu_44x->guest_tlb[i]; 58 + if (tlbe->word0 & PPC44x_TLB_VALID) 59 + printk(" G%2d | %02X | %08X | %08X | %08X |\n", 60 + i, tlbe->tid, tlbe->word0, tlbe->word1, 61 + tlbe->word2); 62 + } 63 + } 64 + #endif 65 + 66 + static inline void kvmppc_44x_tlbie(unsigned int index) 67 + { 68 + /* 0 <= index < 64, so the V bit is clear and we can use the index as 69 + * word0. */ 70 + asm volatile( 71 + "tlbwe %[index], %[index], 0\n" 72 + : 73 + : [index] "r"(index) 74 + ); 75 + } 76 + 77 + static inline void kvmppc_44x_tlbre(unsigned int index, 78 + struct kvmppc_44x_tlbe *tlbe) 79 + { 80 + asm volatile( 81 + "tlbre %[word0], %[index], 0\n" 82 + "mfspr %[tid], %[sprn_mmucr]\n" 83 + "andi. %[tid], %[tid], 0xff\n" 84 + "tlbre %[word1], %[index], 1\n" 85 + "tlbre %[word2], %[index], 2\n" 86 + : [word0] "=r"(tlbe->word0), 87 + [word1] "=r"(tlbe->word1), 88 + [word2] "=r"(tlbe->word2), 89 + [tid] "=r"(tlbe->tid) 90 + : [index] "r"(index), 91 + [sprn_mmucr] "i"(SPRN_MMUCR) 92 + : "cc" 93 + ); 94 + } 95 + 96 + static inline void kvmppc_44x_tlbwe(unsigned int index, 97 + struct kvmppc_44x_tlbe *stlbe) 98 + { 99 + unsigned long tmp; 100 + 101 + asm volatile( 102 + "mfspr %[tmp], %[sprn_mmucr]\n" 103 + "rlwimi %[tmp], %[tid], 0, 0xff\n" 104 + "mtspr %[sprn_mmucr], %[tmp]\n" 105 + "tlbwe %[word0], %[index], 0\n" 106 + "tlbwe %[word1], %[index], 1\n" 107 + "tlbwe %[word2], %[index], 2\n" 108 + : [tmp] "=&r"(tmp) 109 + : [word0] "r"(stlbe->word0), 110 + [word1] "r"(stlbe->word1), 111 + [word2] "r"(stlbe->word2), 112 + [tid] "r"(stlbe->tid), 113 + [index] "r"(index), 114 + [sprn_mmucr] "i"(SPRN_MMUCR) 115 + ); 116 + } 117 118 static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) 119 { 120 + /* We only care about the guest's permission and user bits. */ 121 + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; 122 123 if (!usermode) { 124 /* Guest is in supervisor mode, so we need to translate guest ··· 47 /* Make sure host can always access this memory. */ 48 attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; 49 50 + /* WIMGE = 0b00100 */ 51 + attrib |= PPC44x_TLB_M; 52 + 53 return attrib; 54 } 55 + 56 + /* Load shadow TLB back into hardware. */ 57 + void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) 58 + { 59 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 60 + int i; 61 + 62 + for (i = 0; i <= tlb_44x_hwater; i++) { 63 + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; 64 + 65 + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) 66 + kvmppc_44x_tlbwe(i, stlbe); 67 + } 68 + } 69 + 70 + static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, 71 + unsigned int i) 72 + { 73 + vcpu_44x->shadow_tlb_mod[i] = 1; 74 + } 75 + 76 + /* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ 77 + void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) 78 + { 79 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 80 + int i; 81 + 82 + for (i = 0; i <= tlb_44x_hwater; i++) { 83 + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; 84 + 85 + if (vcpu_44x->shadow_tlb_mod[i]) 86 + kvmppc_44x_tlbre(i, stlbe); 87 + 88 + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) 89 + kvmppc_44x_tlbie(i); 90 + } 91 + } 92 + 93 94 /* Search the guest TLB for a matching entry. */ 95 int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, 96 unsigned int as) 97 { 98 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 99 int i; 100 101 /* XXX Replace loop with fancy data structures. */ 102 + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { 103 + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; 104 unsigned int tid; 105 106 if (eaddr < get_tlb_eaddr(tlbe)) ··· 83 return -1; 84 } 85 86 + int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 87 { 88 unsigned int as = !!(vcpu->arch.msr & MSR_IS); 89 90 + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 91 } 92 93 + int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) 94 { 95 unsigned int as = !!(vcpu->arch.msr & MSR_DS); 96 97 + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); 98 } 99 100 + static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, 101 + unsigned int stlb_index) 102 { 103 + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; 104 105 + if (!ref->page) 106 + return; 107 108 + /* Discard from the TLB. */ 109 + /* Note: we could actually invalidate a host mapping, if the host overwrote 110 + * this TLB entry since we inserted a guest mapping. */ 111 + kvmppc_44x_tlbie(stlb_index); 112 + 113 + /* Now release the page. */ 114 + if (ref->writeable) 115 + kvm_release_page_dirty(ref->page); 116 + else 117 + kvm_release_page_clean(ref->page); 118 + 119 + ref->page = NULL; 120 + 121 + /* XXX set tlb_44x_index to stlb_index? */ 122 + 123 + KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); 124 } 125 126 void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) 127 { 128 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 129 int i; 130 131 for (i = 0; i <= tlb_44x_hwater; i++) 132 + kvmppc_44x_shadow_release(vcpu_44x, i); 133 } 134 135 + /** 136 + * kvmppc_mmu_map -- create a host mapping for guest memory 137 + * 138 + * If the guest wanted a larger page than the host supports, only the first 139 + * host page is mapped here and the rest are demand faulted. 140 + * 141 + * If the guest wanted a smaller page than the host page size, we map only the 142 + * guest-size page (i.e. not a full host page mapping). 143 + * 144 + * Caller must ensure that the specified guest TLB entry is safe to insert into 145 + * the shadow TLB. 146 + */ 147 + void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, 148 + u32 flags, u32 max_bytes, unsigned int gtlb_index) 149 { 150 + struct kvmppc_44x_tlbe stlbe; 151 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 152 + struct kvmppc_44x_shadow_ref *ref; 153 struct page *new_page; 154 hpa_t hpaddr; 155 + gfn_t gfn; 156 unsigned int victim; 157 158 + /* Select TLB entry to clobber. Indirectly guard against races with the TLB 159 + * miss handler by disabling interrupts. */ 160 + local_irq_disable(); 161 + victim = ++tlb_44x_index; 162 + if (victim > tlb_44x_hwater) 163 + victim = 0; 164 + tlb_44x_index = victim; 165 + local_irq_enable(); 166 167 /* Get reference to new page. */ 168 + gfn = gpaddr >> PAGE_SHIFT; 169 new_page = gfn_to_page(vcpu->kvm, gfn); 170 if (is_error_page(new_page)) { 171 printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); ··· 163 } 164 hpaddr = page_to_phys(new_page); 165 166 + /* Invalidate any previous shadow mappings. */ 167 + kvmppc_44x_shadow_release(vcpu_44x, victim); 168 169 /* XXX Make sure (va, size) doesn't overlap any other 170 * entries. 440x6 user manual says the result would be ··· 174 175 /* XXX what about AS? */ 176 177 /* Force TS=1 for all guest mappings. */ 178 + stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; 179 180 + if (max_bytes >= PAGE_SIZE) { 181 + /* Guest mapping is larger than or equal to host page size. We can use 182 + * a "native" host mapping. */ 183 + stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; 184 + } else { 185 + /* Guest mapping is smaller than host page size. We must restrict the 186 + * size of the mapping to be at most the smaller of the two, but for 187 + * simplicity we fall back to a 4K mapping (this is probably what the 188 + * guest is using anyways). */ 189 + stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; 190 + 191 + /* 'hpaddr' is a host page, which is larger than the mapping we're 192 + * inserting here. To compensate, we must add the in-page offset to the 193 + * sub-page. */ 194 + hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); 195 + } 196 + 197 + stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); 198 + stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, 199 + vcpu->arch.msr & MSR_PR); 200 + stlbe.tid = !(asid & 0xff); 201 + 202 + /* Keep track of the reference so we can properly release it later. */ 203 + ref = &vcpu_44x->shadow_refs[victim]; 204 + ref->page = new_page; 205 + ref->gtlb_index = gtlb_index; 206 + ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); 207 + ref->tid = stlbe.tid; 208 + 209 + /* Insert shadow mapping into hardware TLB. */ 210 + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); 211 + kvmppc_44x_tlbwe(victim, &stlbe); 212 + KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, 213 + stlbe.word2, handler); 214 } 215 216 + /* For a particular guest TLB entry, invalidate the corresponding host TLB 217 + * mappings and release the host pages. */ 218 + static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, 219 + unsigned int gtlb_index) 220 { 221 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 222 int i; 223 224 + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { 225 + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; 226 + if (ref->gtlb_index == gtlb_index) 227 + kvmppc_44x_shadow_release(vcpu_44x, i); 228 } 229 } 230 231 void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) 232 { 233 + vcpu->arch.shadow_pid = !usermode; 234 + } 235 + 236 + void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) 237 + { 238 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 239 int i; 240 241 + if (unlikely(vcpu->arch.pid == new_pid)) 242 + return; 243 244 + vcpu->arch.pid = new_pid; 245 + 246 + /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it 247 + * can't access guest kernel mappings (TID=1). When we switch to a new 248 + * guest PID, which will also use host PID=0, we must discard the old guest 249 + * userspace mappings. */ 250 + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { 251 + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; 252 + 253 + if (ref->tid == 0) 254 + kvmppc_44x_shadow_release(vcpu_44x, i); 255 + } 256 + } 257 + 258 + static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, 259 + const struct kvmppc_44x_tlbe *tlbe) 260 + { 261 + gpa_t gpa; 262 + 263 + if (!get_tlb_v(tlbe)) 264 + return 0; 265 + 266 + /* Does it match current guest AS? */ 267 + /* XXX what about IS != DS? */ 268 + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) 269 + return 0; 270 + 271 + gpa = get_tlb_raddr(tlbe); 272 + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) 273 + /* Mapping is not for RAM. */ 274 + return 0; 275 + 276 + return 1; 277 + } 278 + 279 + int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) 280 + { 281 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 282 + struct kvmppc_44x_tlbe *tlbe; 283 + unsigned int gtlb_index; 284 + 285 + gtlb_index = vcpu->arch.gpr[ra]; 286 + if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { 287 + printk("%s: index %d\n", __func__, gtlb_index); 288 + kvmppc_dump_vcpu(vcpu); 289 + return EMULATE_FAIL; 290 } 291 292 + tlbe = &vcpu_44x->guest_tlb[gtlb_index]; 293 + 294 + /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ 295 + if (tlbe->word0 & PPC44x_TLB_VALID) 296 + kvmppc_44x_invalidate(vcpu, gtlb_index); 297 + 298 + switch (ws) { 299 + case PPC44x_TLB_PAGEID: 300 + tlbe->tid = get_mmucr_stid(vcpu); 301 + tlbe->word0 = vcpu->arch.gpr[rs]; 302 + break; 303 + 304 + case PPC44x_TLB_XLAT: 305 + tlbe->word1 = vcpu->arch.gpr[rs]; 306 + break; 307 + 308 + case PPC44x_TLB_ATTRIB: 309 + tlbe->word2 = vcpu->arch.gpr[rs]; 310 + break; 311 + 312 + default: 313 + return EMULATE_FAIL; 314 + } 315 + 316 + if (tlbe_is_host_safe(vcpu, tlbe)) { 317 + u64 asid; 318 + gva_t eaddr; 319 + gpa_t gpaddr; 320 + u32 flags; 321 + u32 bytes; 322 + 323 + eaddr = get_tlb_eaddr(tlbe); 324 + gpaddr = get_tlb_raddr(tlbe); 325 + 326 + /* Use the advertised page size to mask effective and real addrs. */ 327 + bytes = get_tlb_bytes(tlbe); 328 + eaddr &= ~(bytes - 1); 329 + gpaddr &= ~(bytes - 1); 330 + 331 + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; 332 + flags = tlbe->word2 & 0xffff; 333 + 334 + kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); 335 + } 336 + 337 + KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, 338 + tlbe->word1, tlbe->word2, handler); 339 + 340 + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); 341 + return EMULATE_DONE; 342 + } 343 + 344 + int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) 345 + { 346 + u32 ea; 347 + int gtlb_index; 348 + unsigned int as = get_mmucr_sts(vcpu); 349 + unsigned int pid = get_mmucr_stid(vcpu); 350 + 351 + ea = vcpu->arch.gpr[rb]; 352 + if (ra) 353 + ea += vcpu->arch.gpr[ra]; 354 + 355 + gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); 356 + if (rc) { 357 + if (gtlb_index < 0) 358 + vcpu->arch.cr &= ~0x20000000; 359 + else 360 + vcpu->arch.cr |= 0x20000000; 361 + } 362 + vcpu->arch.gpr[rt] = gtlb_index; 363 + 364 + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 365 + return EMULATE_DONE; 366 }

+15 -11

arch/powerpc/kvm/44x_tlb.h

··· 25 26 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 27 unsigned int pid, unsigned int as); 28 - extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 29 - extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); 30 31 /* TLB helper functions */ 32 - static inline unsigned int get_tlb_size(const struct tlbe *tlbe) 33 { 34 return (tlbe->word0 >> 4) & 0xf; 35 } 36 37 - static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) 38 { 39 return tlbe->word0 & 0xfffffc00; 40 } 41 42 - static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) 43 { 44 unsigned int pgsize = get_tlb_size(tlbe); 45 return 1 << 10 << (pgsize << 1); 46 } 47 48 - static inline gva_t get_tlb_end(const struct tlbe *tlbe) 49 { 50 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; 51 } 52 53 - static inline u64 get_tlb_raddr(const struct tlbe *tlbe) 54 { 55 u64 word1 = tlbe->word1; 56 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); 57 } 58 59 - static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) 60 { 61 return tlbe->tid & 0xff; 62 } 63 64 - static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) 65 { 66 return (tlbe->word0 >> 8) & 0x1; 67 } 68 69 - static inline unsigned int get_tlb_v(const struct tlbe *tlbe) 70 { 71 return (tlbe->word0 >> 9) & 0x1; 72 } ··· 85 return (vcpu->arch.mmucr >> 16) & 0x1; 86 } 87 88 - static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) 89 { 90 unsigned int pgmask = get_tlb_bytes(tlbe) - 1; 91

··· 25 26 extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, 27 unsigned int pid, unsigned int as); 28 + extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 29 + extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 30 + 31 + extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, 32 + u8 rc); 33 + extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); 34 35 /* TLB helper functions */ 36 + static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) 37 { 38 return (tlbe->word0 >> 4) & 0xf; 39 } 40 41 + static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) 42 { 43 return tlbe->word0 & 0xfffffc00; 44 } 45 46 + static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) 47 { 48 unsigned int pgsize = get_tlb_size(tlbe); 49 return 1 << 10 << (pgsize << 1); 50 } 51 52 + static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) 53 { 54 return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; 55 } 56 57 + static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) 58 { 59 u64 word1 = tlbe->word1; 60 return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); 61 } 62 63 + static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) 64 { 65 return tlbe->tid & 0xff; 66 } 67 68 + static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) 69 { 70 return (tlbe->word0 >> 8) & 0x1; 71 } 72 73 + static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) 74 { 75 return (tlbe->word0 >> 9) & 0x1; 76 } ··· 81 return (vcpu->arch.mmucr >> 16) & 0x1; 82 } 83 84 + static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) 85 { 86 unsigned int pgmask = get_tlb_bytes(tlbe) - 1; 87

+17 -11

arch/powerpc/kvm/Kconfig

··· 15 if VIRTUALIZATION 16 17 config KVM 18 - bool "Kernel-based Virtual Machine (KVM) support" 19 - depends on 44x && EXPERIMENTAL 20 select PREEMPT_NOTIFIERS 21 select ANON_INODES 22 - # We can only run on Book E hosts so far 23 - select KVM_BOOKE_HOST 24 ---help--- 25 - Support hosting virtualized guest machines. You will also 26 - need to select one or more of the processor modules below. 27 28 This module provides access to the hardware capabilities through 29 a character device node named /dev/kvm. 30 31 If unsure, say N. 32 33 - config KVM_BOOKE_HOST 34 - bool "KVM host support for Book E PowerPC processors" 35 - depends on KVM && 44x 36 ---help--- 37 - Provides host support for KVM on Book E PowerPC processors. Currently 38 - this works on 440 processors only. 39 40 config KVM_TRACE 41 bool "KVM trace support"

··· 15 if VIRTUALIZATION 16 17 config KVM 18 + bool 19 select PREEMPT_NOTIFIERS 20 select ANON_INODES 21 + 22 + config KVM_440 23 + bool "KVM support for PowerPC 440 processors" 24 + depends on EXPERIMENTAL && 44x 25 + select KVM 26 ---help--- 27 + Support running unmodified 440 guest kernels in virtual machines on 28 + 440 host processors. 29 30 This module provides access to the hardware capabilities through 31 a character device node named /dev/kvm. 32 33 If unsure, say N. 34 35 + config KVM_EXIT_TIMING 36 + bool "Detailed exit timing" 37 + depends on KVM 38 ---help--- 39 + Calculate elapsed time for every exit/enter cycle. A per-vcpu 40 + report is available in debugfs kvm/vm#_vcpu#_timing. 41 + The overhead is relatively small, however it is not recommended for 42 + production environments. 43 + 44 + If unsure, say N. 45 46 config KVM_TRACE 47 bool "KVM trace support"

+9 -3

arch/powerpc/kvm/Makefile

··· 8 9 common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) 10 11 - kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o 12 obj-$(CONFIG_KVM) += kvm.o 13 14 AFLAGS_booke_interrupts.o := -I$(obj) 15 16 - kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o 17 - obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o

··· 8 9 common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) 10 11 + kvm-objs := $(common-objs-y) powerpc.o emulate.o 12 + obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o 13 obj-$(CONFIG_KVM) += kvm.o 14 15 AFLAGS_booke_interrupts.o := -I$(obj) 16 17 + kvm-440-objs := \ 18 + booke.o \ 19 + booke_interrupts.o \ 20 + 44x.o \ 21 + 44x_tlb.o \ 22 + 44x_emulate.o 23 + obj-$(CONFIG_KVM_440) += kvm-440.o

+60

arch/powerpc/kvm/booke.h

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + */ 19 + 20 + #ifndef __KVM_BOOKE_H__ 21 + #define __KVM_BOOKE_H__ 22 + 23 + #include <linux/types.h> 24 + #include <linux/kvm_host.h> 25 + #include "timing.h" 26 + 27 + /* interrupt priortity ordering */ 28 + #define BOOKE_IRQPRIO_DATA_STORAGE 0 29 + #define BOOKE_IRQPRIO_INST_STORAGE 1 30 + #define BOOKE_IRQPRIO_ALIGNMENT 2 31 + #define BOOKE_IRQPRIO_PROGRAM 3 32 + #define BOOKE_IRQPRIO_FP_UNAVAIL 4 33 + #define BOOKE_IRQPRIO_SYSCALL 5 34 + #define BOOKE_IRQPRIO_AP_UNAVAIL 6 35 + #define BOOKE_IRQPRIO_DTLB_MISS 7 36 + #define BOOKE_IRQPRIO_ITLB_MISS 8 37 + #define BOOKE_IRQPRIO_MACHINE_CHECK 9 38 + #define BOOKE_IRQPRIO_DEBUG 10 39 + #define BOOKE_IRQPRIO_CRITICAL 11 40 + #define BOOKE_IRQPRIO_WATCHDOG 12 41 + #define BOOKE_IRQPRIO_EXTERNAL 13 42 + #define BOOKE_IRQPRIO_FIT 14 43 + #define BOOKE_IRQPRIO_DECREMENTER 15 44 + 45 + /* Helper function for "full" MSR writes. No need to call this if only EE is 46 + * changing. */ 47 + static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) 48 + { 49 + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) 50 + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); 51 + 52 + vcpu->arch.msr = new_msr; 53 + 54 + if (vcpu->arch.msr & MSR_WE) { 55 + kvm_vcpu_block(vcpu); 56 + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); 57 + }; 58 + } 59 + 60 + #endif /* __KVM_BOOKE_H__ */

+193 -233

arch/powerpc/kvm/booke_guest.c arch/powerpc/kvm/booke.c

··· 24 #include <linux/module.h> 25 #include <linux/vmalloc.h> 26 #include <linux/fs.h> 27 #include <asm/cputable.h> 28 #include <asm/uaccess.h> 29 #include <asm/kvm_ppc.h> 30 31 #include "44x_tlb.h" 32 33 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 34 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 35 36 struct kvm_stats_debugfs_item debugfs_entries[] = { 37 - { "exits", VCPU_STAT(sum_exits) }, 38 { "mmio", VCPU_STAT(mmio_exits) }, 39 { "dcr", VCPU_STAT(dcr_exits) }, 40 { "sig", VCPU_STAT(signal_exits) }, 41 - { "light", VCPU_STAT(light_exits) }, 42 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 43 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 44 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, ··· 58 { NULL } 59 }; 60 61 - static const u32 interrupt_msr_mask[16] = { 62 - [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, 63 - [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, 64 - [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, 65 - [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, 66 - [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, 67 - [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, 68 - [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, 69 - [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, 70 - [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, 71 - [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, 72 - [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, 73 - [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, 74 - [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, 75 - [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, 76 - [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, 77 - [BOOKE_INTERRUPT_DEBUG] = MSR_ME, 78 - }; 79 - 80 - const unsigned char exception_priority[] = { 81 - [BOOKE_INTERRUPT_DATA_STORAGE] = 0, 82 - [BOOKE_INTERRUPT_INST_STORAGE] = 1, 83 - [BOOKE_INTERRUPT_ALIGNMENT] = 2, 84 - [BOOKE_INTERRUPT_PROGRAM] = 3, 85 - [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, 86 - [BOOKE_INTERRUPT_SYSCALL] = 5, 87 - [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, 88 - [BOOKE_INTERRUPT_DTLB_MISS] = 7, 89 - [BOOKE_INTERRUPT_ITLB_MISS] = 8, 90 - [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, 91 - [BOOKE_INTERRUPT_DEBUG] = 10, 92 - [BOOKE_INTERRUPT_CRITICAL] = 11, 93 - [BOOKE_INTERRUPT_WATCHDOG] = 12, 94 - [BOOKE_INTERRUPT_EXTERNAL] = 13, 95 - [BOOKE_INTERRUPT_FIT] = 14, 96 - [BOOKE_INTERRUPT_DECREMENTER] = 15, 97 - }; 98 - 99 - const unsigned char priority_exception[] = { 100 - BOOKE_INTERRUPT_DATA_STORAGE, 101 - BOOKE_INTERRUPT_INST_STORAGE, 102 - BOOKE_INTERRUPT_ALIGNMENT, 103 - BOOKE_INTERRUPT_PROGRAM, 104 - BOOKE_INTERRUPT_FP_UNAVAIL, 105 - BOOKE_INTERRUPT_SYSCALL, 106 - BOOKE_INTERRUPT_AP_UNAVAIL, 107 - BOOKE_INTERRUPT_DTLB_MISS, 108 - BOOKE_INTERRUPT_ITLB_MISS, 109 - BOOKE_INTERRUPT_MACHINE_CHECK, 110 - BOOKE_INTERRUPT_DEBUG, 111 - BOOKE_INTERRUPT_CRITICAL, 112 - BOOKE_INTERRUPT_WATCHDOG, 113 - BOOKE_INTERRUPT_EXTERNAL, 114 - BOOKE_INTERRUPT_FIT, 115 - BOOKE_INTERRUPT_DECREMENTER, 116 - }; 117 - 118 - 119 - void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) 120 - { 121 - struct tlbe *tlbe; 122 - int i; 123 - 124 - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); 125 - printk("| %2s | %3s | %8s | %8s | %8s |\n", 126 - "nr", "tid", "word0", "word1", "word2"); 127 - 128 - for (i = 0; i < PPC44x_TLB_SIZE; i++) { 129 - tlbe = &vcpu->arch.guest_tlb[i]; 130 - if (tlbe->word0 & PPC44x_TLB_VALID) 131 - printk(" G%2d | %02X | %08X | %08X | %08X |\n", 132 - i, tlbe->tid, tlbe->word0, tlbe->word1, 133 - tlbe->word2); 134 - } 135 - 136 - for (i = 0; i < PPC44x_TLB_SIZE; i++) { 137 - tlbe = &vcpu->arch.shadow_tlb[i]; 138 - if (tlbe->word0 & PPC44x_TLB_VALID) 139 - printk(" S%2d | %02X | %08X | %08X | %08X |\n", 140 - i, tlbe->tid, tlbe->word0, tlbe->word1, 141 - tlbe->word2); 142 - } 143 - } 144 - 145 /* TODO: use vcpu_printf() */ 146 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 147 { 148 int i; 149 150 - printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); 151 - printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); 152 - printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); 153 154 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 155 156 for (i = 0; i < 32; i += 4) { 157 - printk("gpr%02d: %08x %08x %08x %08x\n", i, 158 vcpu->arch.gpr[i], 159 vcpu->arch.gpr[i+1], 160 vcpu->arch.gpr[i+2], ··· 78 } 79 } 80 81 - /* Check if we are ready to deliver the interrupt */ 82 - static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 83 { 84 - int r; 85 - 86 - switch (interrupt) { 87 - case BOOKE_INTERRUPT_CRITICAL: 88 - r = vcpu->arch.msr & MSR_CE; 89 - break; 90 - case BOOKE_INTERRUPT_MACHINE_CHECK: 91 - r = vcpu->arch.msr & MSR_ME; 92 - break; 93 - case BOOKE_INTERRUPT_EXTERNAL: 94 - r = vcpu->arch.msr & MSR_EE; 95 - break; 96 - case BOOKE_INTERRUPT_DECREMENTER: 97 - r = vcpu->arch.msr & MSR_EE; 98 - break; 99 - case BOOKE_INTERRUPT_FIT: 100 - r = vcpu->arch.msr & MSR_EE; 101 - break; 102 - case BOOKE_INTERRUPT_WATCHDOG: 103 - r = vcpu->arch.msr & MSR_CE; 104 - break; 105 - case BOOKE_INTERRUPT_DEBUG: 106 - r = vcpu->arch.msr & MSR_DE; 107 - break; 108 - default: 109 - r = 1; 110 - } 111 - 112 - return r; 113 } 114 115 - static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) 116 { 117 - switch (interrupt) { 118 - case BOOKE_INTERRUPT_DECREMENTER: 119 - vcpu->arch.tsr |= TSR_DIS; 120 break; 121 } 122 123 - vcpu->arch.srr0 = vcpu->arch.pc; 124 - vcpu->arch.srr1 = vcpu->arch.msr; 125 - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; 126 - kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); 127 } 128 129 /* Check pending exceptions and deliver one, if possible. */ 130 - void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) 131 { 132 unsigned long *pending = &vcpu->arch.pending_exceptions; 133 - unsigned int exception; 134 unsigned int priority; 135 136 - priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); 137 while (priority <= BOOKE_MAX_INTERRUPT) { 138 - exception = priority_exception[priority]; 139 - if (kvmppc_can_deliver_interrupt(vcpu, exception)) { 140 - kvmppc_clear_exception(vcpu, exception); 141 - kvmppc_deliver_interrupt(vcpu, exception); 142 break; 143 - } 144 145 priority = find_next_bit(pending, 146 BITS_PER_BYTE * sizeof(*pending), ··· 186 enum emulation_result er; 187 int r = RESUME_HOST; 188 189 local_irq_enable(); 190 191 run->exit_reason = KVM_EXIT_UNKNOWN; ··· 202 break; 203 204 case BOOKE_INTERRUPT_EXTERNAL: 205 case BOOKE_INTERRUPT_DECREMENTER: 206 /* Since we switched IVPR back to the host's value, the host 207 * handled this interrupt the moment we enabled interrupts. 208 * Now we just offer it a chance to reschedule the guest. */ 209 - 210 - /* XXX At this point the TLB still holds our shadow TLB, so if 211 - * we do reschedule the host will fault over it. Perhaps we 212 - * should politely restore the host's entries to minimize 213 - * misses before ceding control. */ 214 if (need_resched()) 215 cond_resched(); 216 - if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) 217 - vcpu->stat.dec_exits++; 218 - else 219 - vcpu->stat.ext_intr_exits++; 220 r = RESUME_GUEST; 221 break; 222 ··· 223 /* Program traps generated by user-level software must be handled 224 * by the guest kernel. */ 225 vcpu->arch.esr = vcpu->arch.fault_esr; 226 - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 227 r = RESUME_GUEST; 228 break; 229 } 230 231 er = kvmppc_emulate_instruction(run, vcpu); 232 switch (er) { 233 case EMULATE_DONE: 234 /* Future optimization: only reload non-volatiles if 235 * they were actually modified by emulation. */ 236 - vcpu->stat.emulated_inst_exits++; 237 r = RESUME_GUEST_NV; 238 break; 239 case EMULATE_DO_DCR: ··· 244 break; 245 case EMULATE_FAIL: 246 /* XXX Deliver Program interrupt to guest. */ 247 - printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", 248 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 249 /* For debugging, encode the failing instruction and 250 * report it to userspace. */ ··· 258 break; 259 260 case BOOKE_INTERRUPT_FP_UNAVAIL: 261 - kvmppc_queue_exception(vcpu, exit_nr); 262 r = RESUME_GUEST; 263 break; 264 265 case BOOKE_INTERRUPT_DATA_STORAGE: 266 vcpu->arch.dear = vcpu->arch.fault_dear; 267 vcpu->arch.esr = vcpu->arch.fault_esr; 268 - kvmppc_queue_exception(vcpu, exit_nr); 269 - vcpu->stat.dsi_exits++; 270 r = RESUME_GUEST; 271 break; 272 273 case BOOKE_INTERRUPT_INST_STORAGE: 274 vcpu->arch.esr = vcpu->arch.fault_esr; 275 - kvmppc_queue_exception(vcpu, exit_nr); 276 - vcpu->stat.isi_exits++; 277 r = RESUME_GUEST; 278 break; 279 280 case BOOKE_INTERRUPT_SYSCALL: 281 - kvmppc_queue_exception(vcpu, exit_nr); 282 - vcpu->stat.syscall_exits++; 283 r = RESUME_GUEST; 284 break; 285 286 case BOOKE_INTERRUPT_DTLB_MISS: { 287 - struct tlbe *gtlbe; 288 unsigned long eaddr = vcpu->arch.fault_dear; 289 gfn_t gfn; 290 291 /* Check the guest TLB. */ 292 - gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); 293 - if (!gtlbe) { 294 /* The guest didn't have a mapping for it. */ 295 - kvmppc_queue_exception(vcpu, exit_nr); 296 vcpu->arch.dear = vcpu->arch.fault_dear; 297 vcpu->arch.esr = vcpu->arch.fault_esr; 298 - vcpu->stat.dtlb_real_miss_exits++; 299 r = RESUME_GUEST; 300 break; 301 } 302 303 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 304 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; 305 ··· 315 * b) the guest used a large mapping which we're faking 316 * Either way, we need to satisfy the fault without 317 * invoking the guest. */ 318 - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 319 - gtlbe->word2); 320 - vcpu->stat.dtlb_virt_miss_exits++; 321 r = RESUME_GUEST; 322 } else { 323 /* Guest has mapped and accessed a page which is not 324 * actually RAM. */ 325 r = kvmppc_emulate_mmio(run, vcpu); 326 } 327 328 break; 329 } 330 331 case BOOKE_INTERRUPT_ITLB_MISS: { 332 - struct tlbe *gtlbe; 333 unsigned long eaddr = vcpu->arch.pc; 334 gfn_t gfn; 335 336 r = RESUME_GUEST; 337 338 /* Check the guest TLB. */ 339 - gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); 340 - if (!gtlbe) { 341 /* The guest didn't have a mapping for it. */ 342 - kvmppc_queue_exception(vcpu, exit_nr); 343 - vcpu->stat.itlb_real_miss_exits++; 344 break; 345 } 346 347 - vcpu->stat.itlb_virt_miss_exits++; 348 349 - gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; 350 351 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 352 /* The guest TLB had a mapping, but the shadow TLB ··· 362 * b) the guest used a large mapping which we're faking 363 * Either way, we need to satisfy the fault without 364 * invoking the guest. */ 365 - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, 366 - gtlbe->word2); 367 } else { 368 /* Guest mapped and leaped at non-RAM! */ 369 - kvmppc_queue_exception(vcpu, 370 - BOOKE_INTERRUPT_MACHINE_CHECK); 371 } 372 373 break; ··· 383 mtspr(SPRN_DBSR, dbsr); 384 385 run->exit_reason = KVM_EXIT_DEBUG; 386 r = RESUME_HOST; 387 break; 388 } ··· 395 396 local_irq_disable(); 397 398 - kvmppc_check_and_deliver_interrupts(vcpu); 399 400 - /* Do some exit accounting. */ 401 - vcpu->stat.sum_exits++; 402 if (!(r & RESUME_HOST)) { 403 /* To avoid clobbering exit_reason, only check for signals if 404 * we aren't already exiting to userspace for some other ··· 404 if (signal_pending(current)) { 405 run->exit_reason = KVM_EXIT_INTR; 406 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 407 - 408 - vcpu->stat.signal_exits++; 409 - } else { 410 - vcpu->stat.light_exits++; 411 - } 412 - } else { 413 - switch (run->exit_reason) { 414 - case KVM_EXIT_MMIO: 415 - vcpu->stat.mmio_exits++; 416 - break; 417 - case KVM_EXIT_DCR: 418 - vcpu->stat.dcr_exits++; 419 - break; 420 - case KVM_EXIT_INTR: 421 - vcpu->stat.signal_exits++; 422 - break; 423 } 424 } 425 ··· 414 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 415 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 416 { 417 - struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; 418 - 419 - tlbe->tid = 0; 420 - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; 421 - tlbe->word1 = 0; 422 - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; 423 - 424 - tlbe++; 425 - tlbe->tid = 0; 426 - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; 427 - tlbe->word1 = 0xef600000; 428 - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR 429 - | PPC44x_TLB_I | PPC44x_TLB_G; 430 - 431 vcpu->arch.pc = 0; 432 vcpu->arch.msr = 0; 433 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ ··· 424 * before it's programmed its own IVPR. */ 425 vcpu->arch.ivpr = 0x55550000; 426 427 - /* Since the guest can directly access the timebase, it must know the 428 - * real timebase frequency. Accordingly, it must see the state of 429 - * CCR1[TCS]. */ 430 - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); 431 432 - return 0; 433 } 434 435 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ··· 465 vcpu->arch.ctr = regs->ctr; 466 vcpu->arch.lr = regs->lr; 467 vcpu->arch.xer = regs->xer; 468 - vcpu->arch.msr = regs->msr; 469 vcpu->arch.srr0 = regs->srr0; 470 vcpu->arch.srr1 = regs->srr1; 471 vcpu->arch.sprg0 = regs->sprg0; ··· 504 return -ENOTSUPP; 505 } 506 507 - /* 'linear_address' is actually an encoding of AS|PID|EADDR . */ 508 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 509 struct kvm_translation *tr) 510 { 511 - struct tlbe *gtlbe; 512 - int index; 513 - gva_t eaddr; 514 - u8 pid; 515 - u8 as; 516 517 - eaddr = tr->linear_address; 518 - pid = (tr->linear_address >> 32) & 0xff; 519 - as = (tr->linear_address >> 40) & 0x1; 520 521 - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); 522 - if (index == -1) { 523 - tr->valid = 0; 524 - return 0; 525 } 526 - 527 - gtlbe = &vcpu->arch.guest_tlb[index]; 528 - 529 - tr->physical_address = tlb_xlate(gtlbe, eaddr); 530 - /* XXX what does "writeable" and "usermode" even mean? */ 531 - tr->valid = 1; 532 533 return 0; 534 }

··· 24 #include <linux/module.h> 25 #include <linux/vmalloc.h> 26 #include <linux/fs.h> 27 + 28 #include <asm/cputable.h> 29 #include <asm/uaccess.h> 30 #include <asm/kvm_ppc.h> 31 + #include "timing.h" 32 + #include <asm/cacheflush.h> 33 + #include <asm/kvm_44x.h> 34 35 + #include "booke.h" 36 #include "44x_tlb.h" 37 + 38 + unsigned long kvmppc_booke_handlers; 39 40 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 42 43 struct kvm_stats_debugfs_item debugfs_entries[] = { 44 { "mmio", VCPU_STAT(mmio_exits) }, 45 { "dcr", VCPU_STAT(dcr_exits) }, 46 { "sig", VCPU_STAT(signal_exits) }, 47 { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, 48 { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, 49 { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, ··· 53 { NULL } 54 }; 55 56 /* TODO: use vcpu_printf() */ 57 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) 58 { 59 int i; 60 61 + printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); 62 + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); 63 + printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); 64 65 printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); 66 67 for (i = 0; i < 32; i += 4) { 68 + printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, 69 vcpu->arch.gpr[i], 70 vcpu->arch.gpr[i+1], 71 vcpu->arch.gpr[i+2], ··· 157 } 158 } 159 160 + static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, 161 + unsigned int priority) 162 { 163 + set_bit(priority, &vcpu->arch.pending_exceptions); 164 } 165 166 + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 167 { 168 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 169 + } 170 + 171 + void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 172 + { 173 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); 174 + } 175 + 176 + int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 177 + { 178 + return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 179 + } 180 + 181 + void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 182 + struct kvm_interrupt *irq) 183 + { 184 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); 185 + } 186 + 187 + /* Deliver the interrupt of the corresponding priority, if possible. */ 188 + static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 189 + unsigned int priority) 190 + { 191 + int allowed = 0; 192 + ulong msr_mask; 193 + 194 + switch (priority) { 195 + case BOOKE_IRQPRIO_PROGRAM: 196 + case BOOKE_IRQPRIO_DTLB_MISS: 197 + case BOOKE_IRQPRIO_ITLB_MISS: 198 + case BOOKE_IRQPRIO_SYSCALL: 199 + case BOOKE_IRQPRIO_DATA_STORAGE: 200 + case BOOKE_IRQPRIO_INST_STORAGE: 201 + case BOOKE_IRQPRIO_FP_UNAVAIL: 202 + case BOOKE_IRQPRIO_AP_UNAVAIL: 203 + case BOOKE_IRQPRIO_ALIGNMENT: 204 + allowed = 1; 205 + msr_mask = MSR_CE|MSR_ME|MSR_DE; 206 + break; 207 + case BOOKE_IRQPRIO_CRITICAL: 208 + case BOOKE_IRQPRIO_WATCHDOG: 209 + allowed = vcpu->arch.msr & MSR_CE; 210 + msr_mask = MSR_ME; 211 + break; 212 + case BOOKE_IRQPRIO_MACHINE_CHECK: 213 + allowed = vcpu->arch.msr & MSR_ME; 214 + msr_mask = 0; 215 + break; 216 + case BOOKE_IRQPRIO_EXTERNAL: 217 + case BOOKE_IRQPRIO_DECREMENTER: 218 + case BOOKE_IRQPRIO_FIT: 219 + allowed = vcpu->arch.msr & MSR_EE; 220 + msr_mask = MSR_CE|MSR_ME|MSR_DE; 221 + break; 222 + case BOOKE_IRQPRIO_DEBUG: 223 + allowed = vcpu->arch.msr & MSR_DE; 224 + msr_mask = MSR_ME; 225 break; 226 } 227 228 + if (allowed) { 229 + vcpu->arch.srr0 = vcpu->arch.pc; 230 + vcpu->arch.srr1 = vcpu->arch.msr; 231 + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 232 + kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); 233 + 234 + clear_bit(priority, &vcpu->arch.pending_exceptions); 235 + } 236 + 237 + return allowed; 238 } 239 240 /* Check pending exceptions and deliver one, if possible. */ 241 + void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 242 { 243 unsigned long *pending = &vcpu->arch.pending_exceptions; 244 unsigned int priority; 245 246 + priority = __ffs(*pending); 247 while (priority <= BOOKE_MAX_INTERRUPT) { 248 + if (kvmppc_booke_irqprio_deliver(vcpu, priority)) 249 break; 250 251 priority = find_next_bit(pending, 252 BITS_PER_BYTE * sizeof(*pending), ··· 238 enum emulation_result er; 239 int r = RESUME_HOST; 240 241 + /* update before a new last_exit_type is rewritten */ 242 + kvmppc_update_timing_stats(vcpu); 243 + 244 local_irq_enable(); 245 246 run->exit_reason = KVM_EXIT_UNKNOWN; ··· 251 break; 252 253 case BOOKE_INTERRUPT_EXTERNAL: 254 + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); 255 + if (need_resched()) 256 + cond_resched(); 257 + r = RESUME_GUEST; 258 + break; 259 + 260 case BOOKE_INTERRUPT_DECREMENTER: 261 /* Since we switched IVPR back to the host's value, the host 262 * handled this interrupt the moment we enabled interrupts. 263 * Now we just offer it a chance to reschedule the guest. */ 264 + kvmppc_account_exit(vcpu, DEC_EXITS); 265 if (need_resched()) 266 cond_resched(); 267 r = RESUME_GUEST; 268 break; 269 ··· 274 /* Program traps generated by user-level software must be handled 275 * by the guest kernel. */ 276 vcpu->arch.esr = vcpu->arch.fault_esr; 277 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 278 r = RESUME_GUEST; 279 + kvmppc_account_exit(vcpu, USR_PR_INST); 280 break; 281 } 282 283 er = kvmppc_emulate_instruction(run, vcpu); 284 switch (er) { 285 case EMULATE_DONE: 286 + /* don't overwrite subtypes, just account kvm_stats */ 287 + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); 288 /* Future optimization: only reload non-volatiles if 289 * they were actually modified by emulation. */ 290 r = RESUME_GUEST_NV; 291 break; 292 case EMULATE_DO_DCR: ··· 293 break; 294 case EMULATE_FAIL: 295 /* XXX Deliver Program interrupt to guest. */ 296 + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 297 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 298 /* For debugging, encode the failing instruction and 299 * report it to userspace. */ ··· 307 break; 308 309 case BOOKE_INTERRUPT_FP_UNAVAIL: 310 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); 311 + kvmppc_account_exit(vcpu, FP_UNAVAIL); 312 r = RESUME_GUEST; 313 break; 314 315 case BOOKE_INTERRUPT_DATA_STORAGE: 316 vcpu->arch.dear = vcpu->arch.fault_dear; 317 vcpu->arch.esr = vcpu->arch.fault_esr; 318 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); 319 + kvmppc_account_exit(vcpu, DSI_EXITS); 320 r = RESUME_GUEST; 321 break; 322 323 case BOOKE_INTERRUPT_INST_STORAGE: 324 vcpu->arch.esr = vcpu->arch.fault_esr; 325 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); 326 + kvmppc_account_exit(vcpu, ISI_EXITS); 327 r = RESUME_GUEST; 328 break; 329 330 case BOOKE_INTERRUPT_SYSCALL: 331 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); 332 + kvmppc_account_exit(vcpu, SYSCALL_EXITS); 333 r = RESUME_GUEST; 334 break; 335 336 + /* XXX move to a 440-specific file. */ 337 case BOOKE_INTERRUPT_DTLB_MISS: { 338 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 339 + struct kvmppc_44x_tlbe *gtlbe; 340 unsigned long eaddr = vcpu->arch.fault_dear; 341 + int gtlb_index; 342 gfn_t gfn; 343 344 /* Check the guest TLB. */ 345 + gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); 346 + if (gtlb_index < 0) { 347 /* The guest didn't have a mapping for it. */ 348 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 349 vcpu->arch.dear = vcpu->arch.fault_dear; 350 vcpu->arch.esr = vcpu->arch.fault_esr; 351 + kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 352 r = RESUME_GUEST; 353 break; 354 } 355 356 + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; 357 vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); 358 gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; 359 ··· 359 * b) the guest used a large mapping which we're faking 360 * Either way, we need to satisfy the fault without 361 * invoking the guest. */ 362 + kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, 363 + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); 364 + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); 365 r = RESUME_GUEST; 366 } else { 367 /* Guest has mapped and accessed a page which is not 368 * actually RAM. */ 369 r = kvmppc_emulate_mmio(run, vcpu); 370 + kvmppc_account_exit(vcpu, MMIO_EXITS); 371 } 372 373 break; 374 } 375 376 + /* XXX move to a 440-specific file. */ 377 case BOOKE_INTERRUPT_ITLB_MISS: { 378 + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); 379 + struct kvmppc_44x_tlbe *gtlbe; 380 unsigned long eaddr = vcpu->arch.pc; 381 + gpa_t gpaddr; 382 gfn_t gfn; 383 + int gtlb_index; 384 385 r = RESUME_GUEST; 386 387 /* Check the guest TLB. */ 388 + gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); 389 + if (gtlb_index < 0) { 390 /* The guest didn't have a mapping for it. */ 391 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); 392 + kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); 393 break; 394 } 395 396 + kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); 397 398 + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; 399 + gpaddr = tlb_xlate(gtlbe, eaddr); 400 + gfn = gpaddr >> PAGE_SHIFT; 401 402 if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { 403 /* The guest TLB had a mapping, but the shadow TLB ··· 399 * b) the guest used a large mapping which we're faking 400 * Either way, we need to satisfy the fault without 401 * invoking the guest. */ 402 + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, 403 + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); 404 } else { 405 /* Guest mapped and leaped at non-RAM! */ 406 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); 407 } 408 409 break; ··· 421 mtspr(SPRN_DBSR, dbsr); 422 423 run->exit_reason = KVM_EXIT_DEBUG; 424 + kvmppc_account_exit(vcpu, DEBUG_EXITS); 425 r = RESUME_HOST; 426 break; 427 } ··· 432 433 local_irq_disable(); 434 435 + kvmppc_core_deliver_interrupts(vcpu); 436 437 if (!(r & RESUME_HOST)) { 438 /* To avoid clobbering exit_reason, only check for signals if 439 * we aren't already exiting to userspace for some other ··· 443 if (signal_pending(current)) { 444 run->exit_reason = KVM_EXIT_INTR; 445 r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); 446 + kvmppc_account_exit(vcpu, SIGNAL_EXITS); 447 } 448 } 449 ··· 468 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ 469 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 470 { 471 vcpu->arch.pc = 0; 472 vcpu->arch.msr = 0; 473 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ ··· 492 * before it's programmed its own IVPR. */ 493 vcpu->arch.ivpr = 0x55550000; 494 495 + kvmppc_init_timing_stats(vcpu); 496 497 + return kvmppc_core_vcpu_setup(vcpu); 498 } 499 500 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) ··· 536 vcpu->arch.ctr = regs->ctr; 537 vcpu->arch.lr = regs->lr; 538 vcpu->arch.xer = regs->xer; 539 + kvmppc_set_msr(vcpu, regs->msr); 540 vcpu->arch.srr0 = regs->srr0; 541 vcpu->arch.srr1 = regs->srr1; 542 vcpu->arch.sprg0 = regs->sprg0; ··· 575 return -ENOTSUPP; 576 } 577 578 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 579 struct kvm_translation *tr) 580 { 581 + return kvmppc_core_vcpu_translate(vcpu, tr); 582 + } 583 584 + int kvmppc_booke_init(void) 585 + { 586 + unsigned long ivor[16]; 587 + unsigned long max_ivor = 0; 588 + int i; 589 590 + /* We install our own exception handlers by hijacking IVPR. IVPR must 591 + * be 16-bit aligned, so we need a 64KB allocation. */ 592 + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 593 + VCPU_SIZE_ORDER); 594 + if (!kvmppc_booke_handlers) 595 + return -ENOMEM; 596 + 597 + /* XXX make sure our handlers are smaller than Linux's */ 598 + 599 + /* Copy our interrupt handlers to match host IVORs. That way we don't 600 + * have to swap the IVORs on every guest/host transition. */ 601 + ivor[0] = mfspr(SPRN_IVOR0); 602 + ivor[1] = mfspr(SPRN_IVOR1); 603 + ivor[2] = mfspr(SPRN_IVOR2); 604 + ivor[3] = mfspr(SPRN_IVOR3); 605 + ivor[4] = mfspr(SPRN_IVOR4); 606 + ivor[5] = mfspr(SPRN_IVOR5); 607 + ivor[6] = mfspr(SPRN_IVOR6); 608 + ivor[7] = mfspr(SPRN_IVOR7); 609 + ivor[8] = mfspr(SPRN_IVOR8); 610 + ivor[9] = mfspr(SPRN_IVOR9); 611 + ivor[10] = mfspr(SPRN_IVOR10); 612 + ivor[11] = mfspr(SPRN_IVOR11); 613 + ivor[12] = mfspr(SPRN_IVOR12); 614 + ivor[13] = mfspr(SPRN_IVOR13); 615 + ivor[14] = mfspr(SPRN_IVOR14); 616 + ivor[15] = mfspr(SPRN_IVOR15); 617 + 618 + for (i = 0; i < 16; i++) { 619 + if (ivor[i] > max_ivor) 620 + max_ivor = ivor[i]; 621 + 622 + memcpy((void *)kvmppc_booke_handlers + ivor[i], 623 + kvmppc_handlers_start + i * kvmppc_handler_len, 624 + kvmppc_handler_len); 625 } 626 + flush_icache_range(kvmppc_booke_handlers, 627 + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 628 629 return 0; 630 + } 631 + 632 + void __exit kvmppc_booke_exit(void) 633 + { 634 + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); 635 + kvm_exit(); 636 }

-83

arch/powerpc/kvm/booke_host.c

··· 1 - /* 2 - * This program is free software; you can redistribute it and/or modify 3 - * it under the terms of the GNU General Public License, version 2, as 4 - * published by the Free Software Foundation. 5 - * 6 - * This program is distributed in the hope that it will be useful, 7 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 - * GNU General Public License for more details. 10 - * 11 - * You should have received a copy of the GNU General Public License 12 - * along with this program; if not, write to the Free Software 13 - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 - * 15 - * Copyright IBM Corp. 2008 16 - * 17 - * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 - */ 19 - 20 - #include <linux/errno.h> 21 - #include <linux/kvm_host.h> 22 - #include <linux/module.h> 23 - #include <asm/cacheflush.h> 24 - #include <asm/kvm_ppc.h> 25 - 26 - unsigned long kvmppc_booke_handlers; 27 - 28 - static int kvmppc_booke_init(void) 29 - { 30 - unsigned long ivor[16]; 31 - unsigned long max_ivor = 0; 32 - int i; 33 - 34 - /* We install our own exception handlers by hijacking IVPR. IVPR must 35 - * be 16-bit aligned, so we need a 64KB allocation. */ 36 - kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, 37 - VCPU_SIZE_ORDER); 38 - if (!kvmppc_booke_handlers) 39 - return -ENOMEM; 40 - 41 - /* XXX make sure our handlers are smaller than Linux's */ 42 - 43 - /* Copy our interrupt handlers to match host IVORs. That way we don't 44 - * have to swap the IVORs on every guest/host transition. */ 45 - ivor[0] = mfspr(SPRN_IVOR0); 46 - ivor[1] = mfspr(SPRN_IVOR1); 47 - ivor[2] = mfspr(SPRN_IVOR2); 48 - ivor[3] = mfspr(SPRN_IVOR3); 49 - ivor[4] = mfspr(SPRN_IVOR4); 50 - ivor[5] = mfspr(SPRN_IVOR5); 51 - ivor[6] = mfspr(SPRN_IVOR6); 52 - ivor[7] = mfspr(SPRN_IVOR7); 53 - ivor[8] = mfspr(SPRN_IVOR8); 54 - ivor[9] = mfspr(SPRN_IVOR9); 55 - ivor[10] = mfspr(SPRN_IVOR10); 56 - ivor[11] = mfspr(SPRN_IVOR11); 57 - ivor[12] = mfspr(SPRN_IVOR12); 58 - ivor[13] = mfspr(SPRN_IVOR13); 59 - ivor[14] = mfspr(SPRN_IVOR14); 60 - ivor[15] = mfspr(SPRN_IVOR15); 61 - 62 - for (i = 0; i < 16; i++) { 63 - if (ivor[i] > max_ivor) 64 - max_ivor = ivor[i]; 65 - 66 - memcpy((void *)kvmppc_booke_handlers + ivor[i], 67 - kvmppc_handlers_start + i * kvmppc_handler_len, 68 - kvmppc_handler_len); 69 - } 70 - flush_icache_range(kvmppc_booke_handlers, 71 - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 72 - 73 - return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); 74 - } 75 - 76 - static void __exit kvmppc_booke_exit(void) 77 - { 78 - free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); 79 - kvm_exit(); 80 - } 81 - 82 - module_init(kvmppc_booke_init) 83 - module_exit(kvmppc_booke_exit)

···

+24 -48

arch/powerpc/kvm/booke_interrupts.S

··· 107 li r6, 1 108 slw r6, r6, r5 109 110 /* Save the faulting instruction and all GPRs for emulation. */ 111 andi. r7, r6, NEED_INST_MASK 112 beq ..skip_inst_copy ··· 347 lwz r3, VCPU_SHADOW_PID(r4) 348 mtspr SPRN_PID, r3 349 350 - /* Prevent all asynchronous TLB updates. */ 351 - mfmsr r5 352 - lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h 353 - ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l 354 - andc r6, r5, r6 355 - mtmsr r6 356 - 357 - /* Load the guest mappings, leaving the host's "pinned" kernel mappings 358 - * in place. */ 359 - mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ 360 - li r5, PPC44x_TLB_SIZE 361 - lis r5, tlb_44x_hwater@ha 362 - lwz r5, tlb_44x_hwater@l(r5) 363 - mtctr r5 364 - addi r9, r4, VCPU_SHADOW_TLB 365 - addi r5, r4, VCPU_SHADOW_MOD 366 - li r3, 0 367 - 1: 368 - lbzx r7, r3, r5 369 - cmpwi r7, 0 370 - beq 3f 371 - 372 - /* Load guest entry. */ 373 - mulli r11, r3, TLBE_BYTES 374 - add r11, r11, r9 375 - lwz r7, 0(r11) 376 - mtspr SPRN_MMUCR, r7 377 - lwz r7, 4(r11) 378 - tlbwe r7, r3, PPC44x_TLB_PAGEID 379 - lwz r7, 8(r11) 380 - tlbwe r7, r3, PPC44x_TLB_XLAT 381 - lwz r7, 12(r11) 382 - tlbwe r7, r3, PPC44x_TLB_ATTRIB 383 - 3: 384 - addi r3, r3, 1 /* Increment index. */ 385 - bdnz 1b 386 - 387 - mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ 388 - 389 - /* Clear bitmap of modified TLB entries */ 390 - li r5, PPC44x_TLB_SIZE>>2 391 - mtctr r5 392 - addi r5, r4, VCPU_SHADOW_MOD - 4 393 - li r6, 0 394 - 1: 395 - stwu r6, 4(r5) 396 - bdnz 1b 397 - 398 iccci 0, 0 /* XXX hack */ 399 400 /* Load some guest volatiles. */ ··· 386 mtspr SPRN_SPRG6, r3 387 lwz r3, VCPU_SPRG7(r4) 388 mtspr SPRN_SPRG7, r3 389 390 /* Finish loading guest volatiles and jump to guest. */ 391 lwz r3, VCPU_CTR(r4)

··· 107 li r6, 1 108 slw r6, r6, r5 109 110 + #ifdef CONFIG_KVM_EXIT_TIMING 111 + /* save exit time */ 112 + 1: 113 + mfspr r7, SPRN_TBRU 114 + mfspr r8, SPRN_TBRL 115 + mfspr r9, SPRN_TBRU 116 + cmpw r9, r7 117 + bne 1b 118 + stw r8, VCPU_TIMING_EXIT_TBL(r4) 119 + stw r9, VCPU_TIMING_EXIT_TBU(r4) 120 + #endif 121 + 122 /* Save the faulting instruction and all GPRs for emulation. */ 123 andi. r7, r6, NEED_INST_MASK 124 beq ..skip_inst_copy ··· 335 lwz r3, VCPU_SHADOW_PID(r4) 336 mtspr SPRN_PID, r3 337 338 iccci 0, 0 /* XXX hack */ 339 340 /* Load some guest volatiles. */ ··· 422 mtspr SPRN_SPRG6, r3 423 lwz r3, VCPU_SPRG7(r4) 424 mtspr SPRN_SPRG7, r3 425 + 426 + #ifdef CONFIG_KVM_EXIT_TIMING 427 + /* save enter time */ 428 + 1: 429 + mfspr r6, SPRN_TBRU 430 + mfspr r7, SPRN_TBRL 431 + mfspr r8, SPRN_TBRU 432 + cmpw r8, r6 433 + bne 1b 434 + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) 435 + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) 436 + #endif 437 438 /* Finish loading guest volatiles and jump to guest. */ 439 lwz r3, VCPU_CTR(r4)

+31 -416

arch/powerpc/kvm/emulate.c

··· 23 #include <linux/string.h> 24 #include <linux/kvm_host.h> 25 26 - #include <asm/dcr.h> 27 - #include <asm/dcr-regs.h> 28 #include <asm/time.h> 29 #include <asm/byteorder.h> 30 #include <asm/kvm_ppc.h> 31 32 - #include "44x_tlb.h" 33 - 34 - /* Instruction decoding */ 35 - static inline unsigned int get_op(u32 inst) 36 - { 37 - return inst >> 26; 38 - } 39 - 40 - static inline unsigned int get_xop(u32 inst) 41 - { 42 - return (inst >> 1) & 0x3ff; 43 - } 44 - 45 - static inline unsigned int get_sprn(u32 inst) 46 - { 47 - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); 48 - } 49 - 50 - static inline unsigned int get_dcrn(u32 inst) 51 - { 52 - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); 53 - } 54 - 55 - static inline unsigned int get_rt(u32 inst) 56 - { 57 - return (inst >> 21) & 0x1f; 58 - } 59 - 60 - static inline unsigned int get_rs(u32 inst) 61 - { 62 - return (inst >> 21) & 0x1f; 63 - } 64 - 65 - static inline unsigned int get_ra(u32 inst) 66 - { 67 - return (inst >> 16) & 0x1f; 68 - } 69 - 70 - static inline unsigned int get_rb(u32 inst) 71 - { 72 - return (inst >> 11) & 0x1f; 73 - } 74 - 75 - static inline unsigned int get_rc(u32 inst) 76 - { 77 - return inst & 0x1; 78 - } 79 - 80 - static inline unsigned int get_ws(u32 inst) 81 - { 82 - return (inst >> 11) & 0x1f; 83 - } 84 - 85 - static inline unsigned int get_d(u32 inst) 86 - { 87 - return inst & 0xffff; 88 - } 89 - 90 - static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, 91 - const struct tlbe *tlbe) 92 - { 93 - gpa_t gpa; 94 - 95 - if (!get_tlb_v(tlbe)) 96 - return 0; 97 - 98 - /* Does it match current guest AS? */ 99 - /* XXX what about IS != DS? */ 100 - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) 101 - return 0; 102 - 103 - gpa = get_tlb_raddr(tlbe); 104 - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) 105 - /* Mapping is not for RAM. */ 106 - return 0; 107 - 108 - return 1; 109 - } 110 - 111 - static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) 112 - { 113 - u64 eaddr; 114 - u64 raddr; 115 - u64 asid; 116 - u32 flags; 117 - struct tlbe *tlbe; 118 - unsigned int ra; 119 - unsigned int rs; 120 - unsigned int ws; 121 - unsigned int index; 122 - 123 - ra = get_ra(inst); 124 - rs = get_rs(inst); 125 - ws = get_ws(inst); 126 - 127 - index = vcpu->arch.gpr[ra]; 128 - if (index > PPC44x_TLB_SIZE) { 129 - printk("%s: index %d\n", __func__, index); 130 - kvmppc_dump_vcpu(vcpu); 131 - return EMULATE_FAIL; 132 - } 133 - 134 - tlbe = &vcpu->arch.guest_tlb[index]; 135 - 136 - /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ 137 - if (tlbe->word0 & PPC44x_TLB_VALID) { 138 - eaddr = get_tlb_eaddr(tlbe); 139 - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; 140 - kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); 141 - } 142 - 143 - switch (ws) { 144 - case PPC44x_TLB_PAGEID: 145 - tlbe->tid = vcpu->arch.mmucr & 0xff; 146 - tlbe->word0 = vcpu->arch.gpr[rs]; 147 - break; 148 - 149 - case PPC44x_TLB_XLAT: 150 - tlbe->word1 = vcpu->arch.gpr[rs]; 151 - break; 152 - 153 - case PPC44x_TLB_ATTRIB: 154 - tlbe->word2 = vcpu->arch.gpr[rs]; 155 - break; 156 - 157 - default: 158 - return EMULATE_FAIL; 159 - } 160 - 161 - if (tlbe_is_host_safe(vcpu, tlbe)) { 162 - eaddr = get_tlb_eaddr(tlbe); 163 - raddr = get_tlb_raddr(tlbe); 164 - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; 165 - flags = tlbe->word2 & 0xffff; 166 - 167 - /* Create a 4KB mapping on the host. If the guest wanted a 168 - * large page, only the first 4KB is mapped here and the rest 169 - * are mapped on the fly. */ 170 - kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); 171 - } 172 - 173 - KVMTRACE_5D(GTLB_WRITE, vcpu, index, 174 - tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, 175 - handler); 176 - 177 - return EMULATE_DONE; 178 - } 179 - 180 - static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) 181 { 182 if (vcpu->arch.tcr & TCR_DIE) { 183 /* The decrementer ticks at the same rate as the timebase, so ··· 44 } else { 45 del_timer(&vcpu->arch.dec_timer); 46 } 47 - } 48 - 49 - static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) 50 - { 51 - vcpu->arch.pc = vcpu->arch.srr0; 52 - kvmppc_set_msr(vcpu, vcpu->arch.srr1); 53 } 54 55 /* XXX to do: ··· 60 * 61 * XXX is_bigendian should depend on MMU mapping or MSR[LE] 62 */ 63 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 64 { 65 u32 inst = vcpu->arch.last_inst; 66 u32 ea; 67 int ra; 68 int rb; 69 - int rc; 70 int rs; 71 int rt; 72 int sprn; 73 - int dcrn; 74 enum emulation_result emulated = EMULATE_DONE; 75 int advance = 1; 76 77 switch (get_op(inst)) { 78 - case 3: /* trap */ 79 - printk("trap!\n"); 80 - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); 81 advance = 0; 82 - break; 83 - 84 - case 19: 85 - switch (get_xop(inst)) { 86 - case 50: /* rfi */ 87 - kvmppc_emul_rfi(vcpu); 88 - advance = 0; 89 - break; 90 - 91 - default: 92 - emulated = EMULATE_FAIL; 93 - break; 94 - } 95 break; 96 97 case 31: ··· 92 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 93 break; 94 95 - case 83: /* mfmsr */ 96 - rt = get_rt(inst); 97 - vcpu->arch.gpr[rt] = vcpu->arch.msr; 98 - break; 99 - 100 case 87: /* lbzx */ 101 rt = get_rt(inst); 102 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 103 - break; 104 - 105 - case 131: /* wrtee */ 106 - rs = get_rs(inst); 107 - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 108 - | (vcpu->arch.gpr[rs] & MSR_EE); 109 - break; 110 - 111 - case 146: /* mtmsr */ 112 - rs = get_rs(inst); 113 - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); 114 break; 115 116 case 151: /* stwx */ ··· 102 emulated = kvmppc_handle_store(run, vcpu, 103 vcpu->arch.gpr[rs], 104 4, 1); 105 - break; 106 - 107 - case 163: /* wrteei */ 108 - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 109 - | (inst & MSR_EE); 110 break; 111 112 case 215: /* stbx */ ··· 144 vcpu->arch.gpr[ra] = ea; 145 break; 146 147 - case 323: /* mfdcr */ 148 - dcrn = get_dcrn(inst); 149 - rt = get_rt(inst); 150 - 151 - /* The guest may access CPR0 registers to determine the timebase 152 - * frequency, and it must know the real host frequency because it 153 - * can directly access the timebase registers. 154 - * 155 - * It would be possible to emulate those accesses in userspace, 156 - * but userspace can really only figure out the end frequency. 157 - * We could decompose that into the factors that compute it, but 158 - * that's tricky math, and it's easier to just report the real 159 - * CPR0 values. 160 - */ 161 - switch (dcrn) { 162 - case DCRN_CPR0_CONFIG_ADDR: 163 - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; 164 - break; 165 - case DCRN_CPR0_CONFIG_DATA: 166 - local_irq_disable(); 167 - mtdcr(DCRN_CPR0_CONFIG_ADDR, 168 - vcpu->arch.cpr0_cfgaddr); 169 - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); 170 - local_irq_enable(); 171 - break; 172 - default: 173 - run->dcr.dcrn = dcrn; 174 - run->dcr.data = 0; 175 - run->dcr.is_write = 0; 176 - vcpu->arch.io_gpr = rt; 177 - vcpu->arch.dcr_needed = 1; 178 - emulated = EMULATE_DO_DCR; 179 - } 180 - 181 - break; 182 - 183 case 339: /* mfspr */ 184 sprn = get_sprn(inst); 185 rt = get_rt(inst); ··· 153 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 154 case SPRN_SRR1: 155 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 156 - case SPRN_MMUCR: 157 - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 158 - case SPRN_PID: 159 - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; 160 - case SPRN_IVPR: 161 - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; 162 - case SPRN_CCR0: 163 - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 164 - case SPRN_CCR1: 165 - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 166 case SPRN_PVR: 167 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 168 - case SPRN_DEAR: 169 - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; 170 - case SPRN_ESR: 171 - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; 172 - case SPRN_DBCR0: 173 - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; 174 - case SPRN_DBCR1: 175 - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; 176 177 /* Note: mftb and TBRL/TBWL are user-accessible, so 178 * the guest can always access the real TB anyways. ··· 175 /* Note: SPRG4-7 are user-readable, so we don't get 176 * a trap. */ 177 178 - case SPRN_IVOR0: 179 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; 180 - case SPRN_IVOR1: 181 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; 182 - case SPRN_IVOR2: 183 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; 184 - case SPRN_IVOR3: 185 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; 186 - case SPRN_IVOR4: 187 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; 188 - case SPRN_IVOR5: 189 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; 190 - case SPRN_IVOR6: 191 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; 192 - case SPRN_IVOR7: 193 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; 194 - case SPRN_IVOR8: 195 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; 196 - case SPRN_IVOR9: 197 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; 198 - case SPRN_IVOR10: 199 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; 200 - case SPRN_IVOR11: 201 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; 202 - case SPRN_IVOR12: 203 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; 204 - case SPRN_IVOR13: 205 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; 206 - case SPRN_IVOR14: 207 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; 208 - case SPRN_IVOR15: 209 - vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; 210 - 211 default: 212 - printk("mfspr: unknown spr %x\n", sprn); 213 - vcpu->arch.gpr[rt] = 0; 214 break; 215 } 216 break; ··· 210 vcpu->arch.gpr[ra] = ea; 211 break; 212 213 - case 451: /* mtdcr */ 214 - dcrn = get_dcrn(inst); 215 - rs = get_rs(inst); 216 - 217 - /* emulate some access in kernel */ 218 - switch (dcrn) { 219 - case DCRN_CPR0_CONFIG_ADDR: 220 - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; 221 - break; 222 - default: 223 - run->dcr.dcrn = dcrn; 224 - run->dcr.data = vcpu->arch.gpr[rs]; 225 - run->dcr.is_write = 1; 226 - vcpu->arch.dcr_needed = 1; 227 - emulated = EMULATE_DO_DCR; 228 - } 229 - 230 - break; 231 - 232 case 467: /* mtspr */ 233 sprn = get_sprn(inst); 234 rs = get_rs(inst); ··· 218 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 219 case SPRN_SRR1: 220 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 221 - case SPRN_MMUCR: 222 - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; 223 - case SPRN_PID: 224 - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 225 - case SPRN_CCR0: 226 - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 227 - case SPRN_CCR1: 228 - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 229 - case SPRN_DEAR: 230 - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; 231 - case SPRN_ESR: 232 - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; 233 - case SPRN_DBCR0: 234 - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; 235 - case SPRN_DBCR1: 236 - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; 237 238 /* XXX We need to context-switch the timebase for 239 * watchdog and FIT. */ ··· 226 227 case SPRN_DEC: 228 vcpu->arch.dec = vcpu->arch.gpr[rs]; 229 - kvmppc_emulate_dec(vcpu); 230 - break; 231 - 232 - case SPRN_TSR: 233 - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; 234 - 235 - case SPRN_TCR: 236 - vcpu->arch.tcr = vcpu->arch.gpr[rs]; 237 kvmppc_emulate_dec(vcpu); 238 break; 239 ··· 238 case SPRN_SPRG3: 239 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 240 241 - /* Note: SPRG4-7 are user-readable. These values are 242 - * loaded into the real SPRGs when resuming the 243 - * guest. */ 244 - case SPRN_SPRG4: 245 - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; 246 - case SPRN_SPRG5: 247 - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; 248 - case SPRN_SPRG6: 249 - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; 250 - case SPRN_SPRG7: 251 - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; 252 - 253 - case SPRN_IVPR: 254 - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; 255 - case SPRN_IVOR0: 256 - vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; 257 - case SPRN_IVOR1: 258 - vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; 259 - case SPRN_IVOR2: 260 - vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; 261 - case SPRN_IVOR3: 262 - vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; 263 - case SPRN_IVOR4: 264 - vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; 265 - case SPRN_IVOR5: 266 - vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; 267 - case SPRN_IVOR6: 268 - vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; 269 - case SPRN_IVOR7: 270 - vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; 271 - case SPRN_IVOR8: 272 - vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; 273 - case SPRN_IVOR9: 274 - vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; 275 - case SPRN_IVOR10: 276 - vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; 277 - case SPRN_IVOR11: 278 - vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; 279 - case SPRN_IVOR12: 280 - vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; 281 - case SPRN_IVOR13: 282 - vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; 283 - case SPRN_IVOR14: 284 - vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; 285 - case SPRN_IVOR15: 286 - vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; 287 - 288 default: 289 - printk("mtspr: unknown spr %x\n", sprn); 290 - emulated = EMULATE_FAIL; 291 break; 292 } 293 break; ··· 272 4, 0); 273 break; 274 275 - case 978: /* tlbwe */ 276 - emulated = kvmppc_emul_tlbwe(vcpu, inst); 277 - break; 278 - 279 - case 914: { /* tlbsx */ 280 - int index; 281 - unsigned int as = get_mmucr_sts(vcpu); 282 - unsigned int pid = get_mmucr_stid(vcpu); 283 - 284 - rt = get_rt(inst); 285 - ra = get_ra(inst); 286 - rb = get_rb(inst); 287 - rc = get_rc(inst); 288 - 289 - ea = vcpu->arch.gpr[rb]; 290 - if (ra) 291 - ea += vcpu->arch.gpr[ra]; 292 - 293 - index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); 294 - if (rc) { 295 - if (index < 0) 296 - vcpu->arch.cr &= ~0x20000000; 297 - else 298 - vcpu->arch.cr |= 0x20000000; 299 - } 300 - vcpu->arch.gpr[rt] = index; 301 - 302 - } 303 - break; 304 - 305 case 790: /* lhbrx */ 306 rt = get_rt(inst); 307 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); ··· 287 2, 0); 288 break; 289 290 - case 966: /* iccci */ 291 - break; 292 - 293 default: 294 - printk("unknown: op %d xop %d\n", get_op(inst), 295 - get_xop(inst)); 296 emulated = EMULATE_FAIL; 297 - break; 298 } 299 break; 300 ··· 372 break; 373 374 default: 375 - printk("unknown op %d\n", get_op(inst)); 376 emulated = EMULATE_FAIL; 377 - break; 378 } 379 380 - KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); 381 382 if (advance) 383 vcpu->arch.pc += 4; /* Advance past emulated instruction. */

··· 23 #include <linux/string.h> 24 #include <linux/kvm_host.h> 25 26 + #include <asm/reg.h> 27 #include <asm/time.h> 28 #include <asm/byteorder.h> 29 #include <asm/kvm_ppc.h> 30 + #include <asm/disassemble.h> 31 + #include "timing.h" 32 33 + void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) 34 { 35 if (vcpu->arch.tcr & TCR_DIE) { 36 /* The decrementer ticks at the same rate as the timebase, so ··· 191 } else { 192 del_timer(&vcpu->arch.dec_timer); 193 } 194 } 195 196 /* XXX to do: ··· 213 * 214 * XXX is_bigendian should depend on MMU mapping or MSR[LE] 215 */ 216 + /* XXX Should probably auto-generate instruction decoding for a particular core 217 + * from opcode tables in the future. */ 218 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 219 { 220 u32 inst = vcpu->arch.last_inst; 221 u32 ea; 222 int ra; 223 int rb; 224 int rs; 225 int rt; 226 int sprn; 227 enum emulation_result emulated = EMULATE_DONE; 228 int advance = 1; 229 230 + /* this default type might be overwritten by subcategories */ 231 + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); 232 + 233 switch (get_op(inst)) { 234 + case 3: /* trap */ 235 + vcpu->arch.esr |= ESR_PTR; 236 + kvmppc_core_queue_program(vcpu); 237 advance = 0; 238 break; 239 240 case 31: ··· 255 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 256 break; 257 258 case 87: /* lbzx */ 259 rt = get_rt(inst); 260 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 261 break; 262 263 case 151: /* stwx */ ··· 281 emulated = kvmppc_handle_store(run, vcpu, 282 vcpu->arch.gpr[rs], 283 4, 1); 284 break; 285 286 case 215: /* stbx */ ··· 328 vcpu->arch.gpr[ra] = ea; 329 break; 330 331 case 339: /* mfspr */ 332 sprn = get_sprn(inst); 333 rt = get_rt(inst); ··· 373 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 374 case SPRN_SRR1: 375 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 376 case SPRN_PVR: 377 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 378 379 /* Note: mftb and TBRL/TBWL are user-accessible, so 380 * the guest can always access the real TB anyways. ··· 413 /* Note: SPRG4-7 are user-readable, so we don't get 414 * a trap. */ 415 416 default: 417 + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); 418 + if (emulated == EMULATE_FAIL) { 419 + printk("mfspr: unknown spr %x\n", sprn); 420 + vcpu->arch.gpr[rt] = 0; 421 + } 422 break; 423 } 424 break; ··· 478 vcpu->arch.gpr[ra] = ea; 479 break; 480 481 case 467: /* mtspr */ 482 sprn = get_sprn(inst); 483 rs = get_rs(inst); ··· 505 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 506 case SPRN_SRR1: 507 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 508 509 /* XXX We need to context-switch the timebase for 510 * watchdog and FIT. */ ··· 529 530 case SPRN_DEC: 531 vcpu->arch.dec = vcpu->arch.gpr[rs]; 532 kvmppc_emulate_dec(vcpu); 533 break; 534 ··· 549 case SPRN_SPRG3: 550 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 551 552 default: 553 + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); 554 + if (emulated == EMULATE_FAIL) 555 + printk("mtspr: unknown spr %x\n", sprn); 556 break; 557 } 558 break; ··· 629 4, 0); 630 break; 631 632 case 790: /* lhbrx */ 633 rt = get_rt(inst); 634 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); ··· 674 2, 0); 675 break; 676 677 default: 678 + /* Attempt core-specific emulation below. */ 679 emulated = EMULATE_FAIL; 680 } 681 break; 682 ··· 764 break; 765 766 default: 767 emulated = EMULATE_FAIL; 768 } 769 770 + if (emulated == EMULATE_FAIL) { 771 + emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 772 + if (emulated == EMULATE_FAIL) { 773 + advance = 0; 774 + printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 775 + "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 776 + } 777 + } 778 + 779 + KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); 780 781 if (advance) 782 vcpu->arch.pc += 4; /* Advance past emulated instruction. */

+16 -114

arch/powerpc/kvm/powerpc.c

··· 28 #include <asm/uaccess.h> 29 #include <asm/kvm_ppc.h> 30 #include <asm/tlbflush.h> 31 #include "../mm/mmu_decl.h" 32 - 33 34 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 35 { ··· 99 100 void kvm_arch_check_processor_compat(void *rtn) 101 { 102 - int r; 103 - 104 - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) 105 - r = 0; 106 - else 107 - r = -ENOTSUPP; 108 - 109 - *(int *)rtn = r; 110 } 111 112 struct kvm *kvm_arch_create_vm(void) ··· 137 int r; 138 139 switch (ext) { 140 - case KVM_CAP_USER_MEMORY: 141 - r = 1; 142 - break; 143 case KVM_CAP_COALESCED_MMIO: 144 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 145 break; ··· 169 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 170 { 171 struct kvm_vcpu *vcpu; 172 - int err; 173 - 174 - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 175 - if (!vcpu) { 176 - err = -ENOMEM; 177 - goto out; 178 - } 179 - 180 - err = kvm_vcpu_init(vcpu, kvm, id); 181 - if (err) 182 - goto free_vcpu; 183 - 184 return vcpu; 185 - 186 - free_vcpu: 187 - kmem_cache_free(kvm_vcpu_cache, vcpu); 188 - out: 189 - return ERR_PTR(err); 190 } 191 192 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 193 { 194 - kvm_vcpu_uninit(vcpu); 195 - kmem_cache_free(kvm_vcpu_cache, vcpu); 196 } 197 198 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) ··· 187 188 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 189 { 190 - unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; 191 - 192 - return test_bit(priority, &vcpu->arch.pending_exceptions); 193 } 194 195 static void kvmppc_decrementer_func(unsigned long data) 196 { 197 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 198 199 - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); 200 201 if (waitqueue_active(&vcpu->wq)) { 202 wake_up_interruptible(&vcpu->wq); ··· 215 kvmppc_core_destroy_mmu(vcpu); 216 } 217 218 - /* Note: clearing MSR[DE] just means that the debug interrupt will not be 219 - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. 220 - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt 221 - * will be delivered as an "imprecise debug event" (which is indicated by 222 - * DBSR[IDE]. 223 - */ 224 - static void kvmppc_disable_debug_interrupts(void) 225 - { 226 - mtmsr(mfmsr() & ~MSR_DE); 227 - } 228 - 229 - static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) 230 - { 231 - kvmppc_disable_debug_interrupts(); 232 - 233 - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); 234 - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); 235 - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); 236 - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); 237 - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); 238 - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); 239 - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); 240 - mtmsr(vcpu->arch.host_msr); 241 - } 242 - 243 - static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) 244 - { 245 - struct kvm_guest_debug *dbg = &vcpu->guest_debug; 246 - u32 dbcr0 = 0; 247 - 248 - vcpu->arch.host_msr = mfmsr(); 249 - kvmppc_disable_debug_interrupts(); 250 - 251 - /* Save host debug register state. */ 252 - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); 253 - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); 254 - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); 255 - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); 256 - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); 257 - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); 258 - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); 259 - 260 - /* set registers up for guest */ 261 - 262 - if (dbg->bp[0]) { 263 - mtspr(SPRN_IAC1, dbg->bp[0]); 264 - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; 265 - } 266 - if (dbg->bp[1]) { 267 - mtspr(SPRN_IAC2, dbg->bp[1]); 268 - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; 269 - } 270 - if (dbg->bp[2]) { 271 - mtspr(SPRN_IAC3, dbg->bp[2]); 272 - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; 273 - } 274 - if (dbg->bp[3]) { 275 - mtspr(SPRN_IAC4, dbg->bp[3]); 276 - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; 277 - } 278 - 279 - mtspr(SPRN_DBCR0, dbcr0); 280 - mtspr(SPRN_DBCR1, 0); 281 - mtspr(SPRN_DBCR2, 0); 282 - } 283 - 284 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 285 { 286 - int i; 287 - 288 if (vcpu->guest_debug.enabled) 289 - kvmppc_load_guest_debug_registers(vcpu); 290 291 - /* Mark every guest entry in the shadow TLB entry modified, so that they 292 - * will all be reloaded on the next vcpu run (instead of being 293 - * demand-faulted). */ 294 - for (i = 0; i <= tlb_44x_hwater; i++) 295 - kvmppc_tlbe_set_modified(vcpu, i); 296 } 297 298 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 299 { 300 if (vcpu->guest_debug.enabled) 301 - kvmppc_restore_host_debug_state(vcpu); 302 303 /* Don't leave guest TLB entries resident when being de-scheduled. */ 304 /* XXX It would be nice to differentiate between heavyweight exit and 305 * sched_out here, since we could avoid the TLB flush for heavyweight 306 * exits. */ 307 _tlbil_all(); 308 } 309 310 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, ··· 257 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 258 struct kvm_run *run) 259 { 260 - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 261 *gpr = run->dcr.data; 262 } 263 264 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 265 struct kvm_run *run) 266 { 267 - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 268 269 if (run->mmio.len > sizeof(*gpr)) { 270 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); ··· 362 vcpu->arch.dcr_needed = 0; 363 } 364 365 - kvmppc_check_and_deliver_interrupts(vcpu); 366 367 local_irq_disable(); 368 kvm_guest_enter(); ··· 380 381 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 382 { 383 - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); 384 385 if (waitqueue_active(&vcpu->wq)) { 386 wake_up_interruptible(&vcpu->wq);

··· 28 #include <asm/uaccess.h> 29 #include <asm/kvm_ppc.h> 30 #include <asm/tlbflush.h> 31 + #include "timing.h" 32 #include "../mm/mmu_decl.h" 33 34 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 35 { ··· 99 100 void kvm_arch_check_processor_compat(void *rtn) 101 { 102 + *(int *)rtn = kvmppc_core_check_processor_compat(); 103 } 104 105 struct kvm *kvm_arch_create_vm(void) ··· 144 int r; 145 146 switch (ext) { 147 case KVM_CAP_COALESCED_MMIO: 148 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 149 break; ··· 179 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) 180 { 181 struct kvm_vcpu *vcpu; 182 + vcpu = kvmppc_core_vcpu_create(kvm, id); 183 + kvmppc_create_vcpu_debugfs(vcpu, id); 184 return vcpu; 185 } 186 187 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 188 { 189 + kvmppc_remove_vcpu_debugfs(vcpu); 190 + kvmppc_core_vcpu_free(vcpu); 191 } 192 193 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) ··· 212 213 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 214 { 215 + return kvmppc_core_pending_dec(vcpu); 216 } 217 218 static void kvmppc_decrementer_func(unsigned long data) 219 { 220 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; 221 222 + kvmppc_core_queue_dec(vcpu); 223 224 if (waitqueue_active(&vcpu->wq)) { 225 wake_up_interruptible(&vcpu->wq); ··· 242 kvmppc_core_destroy_mmu(vcpu); 243 } 244 245 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 246 { 247 if (vcpu->guest_debug.enabled) 248 + kvmppc_core_load_guest_debugstate(vcpu); 249 250 + kvmppc_core_vcpu_load(vcpu, cpu); 251 } 252 253 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 254 { 255 if (vcpu->guest_debug.enabled) 256 + kvmppc_core_load_host_debugstate(vcpu); 257 258 /* Don't leave guest TLB entries resident when being de-scheduled. */ 259 /* XXX It would be nice to differentiate between heavyweight exit and 260 * sched_out here, since we could avoid the TLB flush for heavyweight 261 * exits. */ 262 _tlbil_all(); 263 + kvmppc_core_vcpu_put(vcpu); 264 } 265 266 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, ··· 355 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 356 struct kvm_run *run) 357 { 358 + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 359 *gpr = run->dcr.data; 360 } 361 362 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 363 struct kvm_run *run) 364 { 365 + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 366 367 if (run->mmio.len > sizeof(*gpr)) { 368 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); ··· 460 vcpu->arch.dcr_needed = 0; 461 } 462 463 + kvmppc_core_deliver_interrupts(vcpu); 464 465 local_irq_disable(); 466 kvm_guest_enter(); ··· 478 479 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 480 { 481 + kvmppc_core_queue_external(vcpu, irq); 482 483 if (waitqueue_active(&vcpu->wq)) { 484 wake_up_interruptible(&vcpu->wq);

+239

arch/powerpc/kvm/timing.c

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Hollis Blanchard <hollisb@us.ibm.com> 18 + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 19 + */ 20 + 21 + #include <linux/kvm_host.h> 22 + #include <linux/fs.h> 23 + #include <linux/seq_file.h> 24 + #include <linux/debugfs.h> 25 + #include <linux/uaccess.h> 26 + 27 + #include <asm/time.h> 28 + #include <asm-generic/div64.h> 29 + 30 + #include "timing.h" 31 + 32 + void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) 33 + { 34 + int i; 35 + 36 + /* pause guest execution to avoid concurrent updates */ 37 + local_irq_disable(); 38 + mutex_lock(&vcpu->mutex); 39 + 40 + vcpu->arch.last_exit_type = 0xDEAD; 41 + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 42 + vcpu->arch.timing_count_type[i] = 0; 43 + vcpu->arch.timing_max_duration[i] = 0; 44 + vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; 45 + vcpu->arch.timing_sum_duration[i] = 0; 46 + vcpu->arch.timing_sum_quad_duration[i] = 0; 47 + } 48 + vcpu->arch.timing_last_exit = 0; 49 + vcpu->arch.timing_exit.tv64 = 0; 50 + vcpu->arch.timing_last_enter.tv64 = 0; 51 + 52 + mutex_unlock(&vcpu->mutex); 53 + local_irq_enable(); 54 + } 55 + 56 + static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) 57 + { 58 + u64 old; 59 + 60 + do_div(duration, tb_ticks_per_usec); 61 + if (unlikely(duration > 0xFFFFFFFF)) { 62 + printk(KERN_ERR"%s - duration too big -> overflow" 63 + " duration %lld type %d exit #%d\n", 64 + __func__, duration, type, 65 + vcpu->arch.timing_count_type[type]); 66 + return; 67 + } 68 + 69 + vcpu->arch.timing_count_type[type]++; 70 + 71 + /* sum */ 72 + old = vcpu->arch.timing_sum_duration[type]; 73 + vcpu->arch.timing_sum_duration[type] += duration; 74 + if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { 75 + printk(KERN_ERR"%s - wrap adding sum of durations" 76 + " old %lld new %lld type %d exit # of type %d\n", 77 + __func__, old, vcpu->arch.timing_sum_duration[type], 78 + type, vcpu->arch.timing_count_type[type]); 79 + } 80 + 81 + /* square sum */ 82 + old = vcpu->arch.timing_sum_quad_duration[type]; 83 + vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); 84 + if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { 85 + printk(KERN_ERR"%s - wrap adding sum of squared durations" 86 + " old %lld new %lld type %d exit # of type %d\n", 87 + __func__, old, 88 + vcpu->arch.timing_sum_quad_duration[type], 89 + type, vcpu->arch.timing_count_type[type]); 90 + } 91 + 92 + /* set min/max */ 93 + if (unlikely(duration < vcpu->arch.timing_min_duration[type])) 94 + vcpu->arch.timing_min_duration[type] = duration; 95 + if (unlikely(duration > vcpu->arch.timing_max_duration[type])) 96 + vcpu->arch.timing_max_duration[type] = duration; 97 + } 98 + 99 + void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) 100 + { 101 + u64 exit = vcpu->arch.timing_last_exit; 102 + u64 enter = vcpu->arch.timing_last_enter.tv64; 103 + 104 + /* save exit time, used next exit when the reenter time is known */ 105 + vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; 106 + 107 + if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) 108 + return; /* skip incomplete cycle (e.g. after reset) */ 109 + 110 + /* update statistics for average and standard deviation */ 111 + add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); 112 + /* enter -> timing_last_exit is time spent in guest - log this too */ 113 + add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), 114 + TIMEINGUEST); 115 + } 116 + 117 + static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { 118 + [MMIO_EXITS] = "MMIO", 119 + [DCR_EXITS] = "DCR", 120 + [SIGNAL_EXITS] = "SIGNAL", 121 + [ITLB_REAL_MISS_EXITS] = "ITLBREAL", 122 + [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", 123 + [DTLB_REAL_MISS_EXITS] = "DTLBREAL", 124 + [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", 125 + [SYSCALL_EXITS] = "SYSCALL", 126 + [ISI_EXITS] = "ISI", 127 + [DSI_EXITS] = "DSI", 128 + [EMULATED_INST_EXITS] = "EMULINST", 129 + [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", 130 + [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", 131 + [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", 132 + [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", 133 + [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", 134 + [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", 135 + [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", 136 + [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", 137 + [EMULATED_RFI_EXITS] = "EMUL_RFI", 138 + [DEC_EXITS] = "DEC", 139 + [EXT_INTR_EXITS] = "EXTINT", 140 + [HALT_WAKEUP] = "HALT", 141 + [USR_PR_INST] = "USR_PR_INST", 142 + [FP_UNAVAIL] = "FP_UNAVAIL", 143 + [DEBUG_EXITS] = "DEBUG", 144 + [TIMEINGUEST] = "TIMEINGUEST" 145 + }; 146 + 147 + static int kvmppc_exit_timing_show(struct seq_file *m, void *private) 148 + { 149 + struct kvm_vcpu *vcpu = m->private; 150 + int i; 151 + 152 + seq_printf(m, "%s", "type count min max sum sum_squared\n"); 153 + 154 + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { 155 + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", 156 + kvm_exit_names[i], 157 + vcpu->arch.timing_count_type[i], 158 + vcpu->arch.timing_min_duration[i], 159 + vcpu->arch.timing_max_duration[i], 160 + vcpu->arch.timing_sum_duration[i], 161 + vcpu->arch.timing_sum_quad_duration[i]); 162 + } 163 + return 0; 164 + } 165 + 166 + /* Write 'c' to clear the timing statistics. */ 167 + static ssize_t kvmppc_exit_timing_write(struct file *file, 168 + const char __user *user_buf, 169 + size_t count, loff_t *ppos) 170 + { 171 + int err = -EINVAL; 172 + char c; 173 + 174 + if (count > 1) { 175 + goto done; 176 + } 177 + 178 + if (get_user(c, user_buf)) { 179 + err = -EFAULT; 180 + goto done; 181 + } 182 + 183 + if (c == 'c') { 184 + struct seq_file *seqf = (struct seq_file *)file->private_data; 185 + struct kvm_vcpu *vcpu = seqf->private; 186 + /* Write does not affect our buffers previously generated with 187 + * show. seq_file is locked here to prevent races of init with 188 + * a show call */ 189 + mutex_lock(&seqf->lock); 190 + kvmppc_init_timing_stats(vcpu); 191 + mutex_unlock(&seqf->lock); 192 + err = count; 193 + } 194 + 195 + done: 196 + return err; 197 + } 198 + 199 + static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) 200 + { 201 + return single_open(file, kvmppc_exit_timing_show, inode->i_private); 202 + } 203 + 204 + static struct file_operations kvmppc_exit_timing_fops = { 205 + .owner = THIS_MODULE, 206 + .open = kvmppc_exit_timing_open, 207 + .read = seq_read, 208 + .write = kvmppc_exit_timing_write, 209 + .llseek = seq_lseek, 210 + .release = single_release, 211 + }; 212 + 213 + void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) 214 + { 215 + static char dbg_fname[50]; 216 + struct dentry *debugfs_file; 217 + 218 + snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", 219 + current->pid, id); 220 + debugfs_file = debugfs_create_file(dbg_fname, 0666, 221 + kvm_debugfs_dir, vcpu, 222 + &kvmppc_exit_timing_fops); 223 + 224 + if (!debugfs_file) { 225 + printk(KERN_ERR"%s: error creating debugfs file %s\n", 226 + __func__, dbg_fname); 227 + return; 228 + } 229 + 230 + vcpu->arch.debugfs_exit_timing = debugfs_file; 231 + } 232 + 233 + void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) 234 + { 235 + if (vcpu->arch.debugfs_exit_timing) { 236 + debugfs_remove(vcpu->arch.debugfs_exit_timing); 237 + vcpu->arch.debugfs_exit_timing = NULL; 238 + } 239 + }

+102

arch/powerpc/kvm/timing.h

···

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * This program is distributed in the hope that it will be useful, 7 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 + * GNU General Public License for more details. 10 + * 11 + * You should have received a copy of the GNU General Public License 12 + * along with this program; if not, write to the Free Software 13 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 + * 15 + * Copyright IBM Corp. 2008 16 + * 17 + * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> 18 + */ 19 + 20 + #ifndef __POWERPC_KVM_EXITTIMING_H__ 21 + #define __POWERPC_KVM_EXITTIMING_H__ 22 + 23 + #include <linux/kvm_host.h> 24 + #include <asm/kvm_host.h> 25 + 26 + #ifdef CONFIG_KVM_EXIT_TIMING 27 + void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); 28 + void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); 29 + void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); 30 + void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); 31 + 32 + static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) 33 + { 34 + vcpu->arch.last_exit_type = type; 35 + } 36 + 37 + #else 38 + /* if exit timing is not configured there is no need to build the c file */ 39 + static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} 40 + static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} 41 + static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, 42 + unsigned int id) {} 43 + static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} 44 + static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} 45 + #endif /* CONFIG_KVM_EXIT_TIMING */ 46 + 47 + /* account the exit in kvm_stats */ 48 + static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) 49 + { 50 + /* type has to be known at build time for optimization */ 51 + BUILD_BUG_ON(__builtin_constant_p(type)); 52 + switch (type) { 53 + case EXT_INTR_EXITS: 54 + vcpu->stat.ext_intr_exits++; 55 + break; 56 + case DEC_EXITS: 57 + vcpu->stat.dec_exits++; 58 + break; 59 + case EMULATED_INST_EXITS: 60 + vcpu->stat.emulated_inst_exits++; 61 + break; 62 + case DCR_EXITS: 63 + vcpu->stat.dcr_exits++; 64 + break; 65 + case DSI_EXITS: 66 + vcpu->stat.dsi_exits++; 67 + break; 68 + case ISI_EXITS: 69 + vcpu->stat.isi_exits++; 70 + break; 71 + case SYSCALL_EXITS: 72 + vcpu->stat.syscall_exits++; 73 + break; 74 + case DTLB_REAL_MISS_EXITS: 75 + vcpu->stat.dtlb_real_miss_exits++; 76 + break; 77 + case DTLB_VIRT_MISS_EXITS: 78 + vcpu->stat.dtlb_virt_miss_exits++; 79 + break; 80 + case MMIO_EXITS: 81 + vcpu->stat.mmio_exits++; 82 + break; 83 + case ITLB_REAL_MISS_EXITS: 84 + vcpu->stat.itlb_real_miss_exits++; 85 + break; 86 + case ITLB_VIRT_MISS_EXITS: 87 + vcpu->stat.itlb_virt_miss_exits++; 88 + break; 89 + case SIGNAL_EXITS: 90 + vcpu->stat.signal_exits++; 91 + break; 92 + } 93 + } 94 + 95 + /* wrapper to set exit time and account for it in kvm_stats */ 96 + static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) 97 + { 98 + kvmppc_set_exit_type(vcpu, type); 99 + kvmppc_account_exit_stat(vcpu, type); 100 + } 101 + 102 + #endif /* __POWERPC_KVM_EXITTIMING_H__ */

+23 -18

arch/s390/kvm/kvm-s390.c

··· 113 int kvm_dev_ioctl_check_extension(long ext) 114 { 115 switch (ext) { 116 - case KVM_CAP_USER_MEMORY: 117 - return 1; 118 default: 119 return 0; 120 } ··· 183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 184 VM_EVENT(kvm, 3, "%s", "vm created"); 185 186 - try_module_get(THIS_MODULE); 187 - 188 return kvm; 189 out_nodbf: 190 free_page((unsigned long)(kvm->arch.sca)); ··· 192 return ERR_PTR(rc); 193 } 194 195 void kvm_arch_destroy_vm(struct kvm *kvm) 196 { 197 - debug_unregister(kvm->arch.dbf); 198 kvm_free_physmem(kvm); 199 free_page((unsigned long)(kvm->arch.sca)); 200 kfree(kvm); 201 - module_put(THIS_MODULE); 202 } 203 204 /* Section: vcpu related */ ··· 229 230 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 231 { 232 - /* kvm common code refers to this, but does'nt call it */ 233 - BUG(); 234 } 235 236 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ··· 323 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 324 vcpu->arch.sie_block); 325 326 - try_module_get(THIS_MODULE); 327 - 328 return vcpu; 329 out_free_cpu: 330 kfree(vcpu); 331 out_nomem: 332 return ERR_PTR(rc); 333 - } 334 - 335 - void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 336 - { 337 - VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); 338 - free_page((unsigned long)(vcpu->arch.sie_block)); 339 - kfree(vcpu); 340 - module_put(THIS_MODULE); 341 } 342 343 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)

··· 113 int kvm_dev_ioctl_check_extension(long ext) 114 { 115 switch (ext) { 116 default: 117 return 0; 118 } ··· 185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 186 VM_EVENT(kvm, 3, "%s", "vm created"); 187 188 return kvm; 189 out_nodbf: 190 free_page((unsigned long)(kvm->arch.sca)); ··· 196 return ERR_PTR(rc); 197 } 198 199 + void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 200 + { 201 + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 202 + free_page((unsigned long)(vcpu->arch.sie_block)); 203 + kvm_vcpu_uninit(vcpu); 204 + kfree(vcpu); 205 + } 206 + 207 + static void kvm_free_vcpus(struct kvm *kvm) 208 + { 209 + unsigned int i; 210 + 211 + for (i = 0; i < KVM_MAX_VCPUS; ++i) { 212 + if (kvm->vcpus[i]) { 213 + kvm_arch_vcpu_destroy(kvm->vcpus[i]); 214 + kvm->vcpus[i] = NULL; 215 + } 216 + } 217 + } 218 + 219 void kvm_arch_destroy_vm(struct kvm *kvm) 220 { 221 + kvm_free_vcpus(kvm); 222 kvm_free_physmem(kvm); 223 free_page((unsigned long)(kvm->arch.sca)); 224 + debug_unregister(kvm->arch.dbf); 225 kfree(kvm); 226 } 227 228 /* Section: vcpu related */ ··· 213 214 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 215 { 216 + /* Nothing todo */ 217 } 218 219 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ··· 308 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 309 vcpu->arch.sie_block); 310 311 return vcpu; 312 out_free_cpu: 313 kfree(vcpu); 314 out_nomem: 315 return ERR_PTR(rc); 316 } 317 318 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)

+26 -19

arch/x86/include/asm/kvm_host.h

··· 21 22 #include <asm/pvclock-abi.h> 23 #include <asm/desc.h> 24 25 #define KVM_MAX_VCPUS 16 26 #define KVM_MEMORY_SLOTS 32 ··· 87 #define KVM_MIN_FREE_MMU_PAGES 5 88 #define KVM_REFILL_PAGES 25 89 #define KVM_MAX_CPUID_ENTRIES 40 90 #define KVM_NR_VAR_MTRR 8 91 92 extern spinlock_t kvm_lock; ··· 182 struct list_head link; 183 struct hlist_node hash_link; 184 185 /* 186 * The following two entries are used to key the shadow page in the 187 * hash table. ··· 194 u64 *spt; 195 /* hold the gfn of each spte inside spt */ 196 gfn_t *gfns; 197 - unsigned long slot_bitmap; /* One bit set per slot which has memory 198 - * in this shadow page. 199 - */ 200 int multimapped; /* More than one parent_pte? */ 201 int root_count; /* Currently serving as active root */ 202 bool unsync; 203 - bool unsync_children; 204 union { 205 u64 *parent_pte; /* !multimapped */ 206 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ ··· 334 335 bool nmi_pending; 336 bool nmi_injected; 337 338 - u64 mtrr[0x100]; 339 }; 340 341 struct kvm_mem_alias { ··· 359 */ 360 struct list_head active_mmu_pages; 361 struct list_head assigned_dev_head; 362 struct dmar_domain *intel_iommu_domain; 363 struct kvm_pic *vpic; 364 struct kvm_ioapic *vioapic; 365 struct kvm_pit *vpit; 366 struct hlist_head irq_ack_notifier_list; 367 368 int round_robin_prev_vcpu; 369 unsigned int tss_addr; ··· 389 u32 mmu_recycled; 390 u32 mmu_cache_miss; 391 u32 mmu_unsync; 392 u32 remote_tlb_flush; 393 u32 lpages; 394 }; ··· 409 u32 halt_exits; 410 u32 halt_wakeup; 411 u32 request_irq_exits; 412 u32 irq_exits; 413 u32 host_state_reload; 414 u32 efer_reload; ··· 418 u32 insn_emulation_fail; 419 u32 hypercalls; 420 u32 irq_injections; 421 }; 422 423 struct descriptor_table { ··· 491 492 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 493 int (*get_tdp_level)(void); 494 }; 495 496 extern struct kvm_x86_ops *kvm_x86_ops; ··· 505 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 506 void kvm_mmu_set_base_ptes(u64 base_pte); 507 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 508 - u64 dirty_mask, u64 nx_mask, u64 x_mask); 509 510 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 511 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); ··· 602 603 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 604 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 605 - const u8 *new, int bytes); 606 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 607 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 608 int kvm_mmu_load(struct kvm_vcpu *vcpu); 609 void kvm_mmu_unload(struct kvm_vcpu *vcpu); 610 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 611 612 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 613 ··· 623 624 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 625 int complete_pio(struct kvm_vcpu *vcpu); 626 627 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 628 { ··· 720 { 721 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 722 } 723 - 724 - #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" 725 - #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" 726 - #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" 727 - #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" 728 - #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" 729 - #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" 730 - #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" 731 - #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" 732 - #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" 733 - #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" 734 - #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 735 736 #define MSR_IA32_TIME_STAMP_COUNTER 0x010 737

··· 21 22 #include <asm/pvclock-abi.h> 23 #include <asm/desc.h> 24 + #include <asm/mtrr.h> 25 26 #define KVM_MAX_VCPUS 16 27 #define KVM_MEMORY_SLOTS 32 ··· 86 #define KVM_MIN_FREE_MMU_PAGES 5 87 #define KVM_REFILL_PAGES 25 88 #define KVM_MAX_CPUID_ENTRIES 40 89 + #define KVM_NR_FIXED_MTRR_REGION 88 90 #define KVM_NR_VAR_MTRR 8 91 92 extern spinlock_t kvm_lock; ··· 180 struct list_head link; 181 struct hlist_node hash_link; 182 183 + struct list_head oos_link; 184 + 185 /* 186 * The following two entries are used to key the shadow page in the 187 * hash table. ··· 190 u64 *spt; 191 /* hold the gfn of each spte inside spt */ 192 gfn_t *gfns; 193 + /* 194 + * One bit set per slot which has memory 195 + * in this shadow page. 196 + */ 197 + DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 198 int multimapped; /* More than one parent_pte? */ 199 int root_count; /* Currently serving as active root */ 200 bool unsync; 201 + bool global; 202 + unsigned int unsync_children; 203 union { 204 u64 *parent_pte; /* !multimapped */ 205 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ ··· 327 328 bool nmi_pending; 329 bool nmi_injected; 330 + bool nmi_window_open; 331 332 + struct mtrr_state_type mtrr_state; 333 + u32 pat; 334 }; 335 336 struct kvm_mem_alias { ··· 350 */ 351 struct list_head active_mmu_pages; 352 struct list_head assigned_dev_head; 353 + struct list_head oos_global_pages; 354 struct dmar_domain *intel_iommu_domain; 355 struct kvm_pic *vpic; 356 struct kvm_ioapic *vioapic; 357 struct kvm_pit *vpit; 358 struct hlist_head irq_ack_notifier_list; 359 + int vapics_in_nmi_mode; 360 361 int round_robin_prev_vcpu; 362 unsigned int tss_addr; ··· 378 u32 mmu_recycled; 379 u32 mmu_cache_miss; 380 u32 mmu_unsync; 381 + u32 mmu_unsync_global; 382 u32 remote_tlb_flush; 383 u32 lpages; 384 }; ··· 397 u32 halt_exits; 398 u32 halt_wakeup; 399 u32 request_irq_exits; 400 + u32 request_nmi_exits; 401 u32 irq_exits; 402 u32 host_state_reload; 403 u32 efer_reload; ··· 405 u32 insn_emulation_fail; 406 u32 hypercalls; 407 u32 irq_injections; 408 + u32 nmi_injections; 409 }; 410 411 struct descriptor_table { ··· 477 478 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 479 int (*get_tdp_level)(void); 480 + int (*get_mt_mask_shift)(void); 481 }; 482 483 extern struct kvm_x86_ops *kvm_x86_ops; ··· 490 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); 491 void kvm_mmu_set_base_ptes(u64 base_pte); 492 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 493 + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); 494 495 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 496 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); ··· 587 588 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 589 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 590 + const u8 *new, int bytes, 591 + bool guest_initiated); 592 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 593 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 594 int kvm_mmu_load(struct kvm_vcpu *vcpu); 595 void kvm_mmu_unload(struct kvm_vcpu *vcpu); 596 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 597 + void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); 598 599 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 600 ··· 606 607 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 608 int complete_pio(struct kvm_vcpu *vcpu); 609 + 610 + struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); 611 612 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) 613 { ··· 701 { 702 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 703 } 704 705 #define MSR_IA32_TIME_STAMP_COUNTER 0x010 706

+4 -7

arch/x86/include/asm/kvm_x86_emulate.h

··· 123 u8 ad_bytes; 124 u8 rex_prefix; 125 struct operand src; 126 struct operand dst; 127 bool has_seg_override; 128 u8 seg_override; ··· 147 /* Register state before/after emulation. */ 148 struct kvm_vcpu *vcpu; 149 150 - /* Linear faulting address (if emulating a page-faulting instruction) */ 151 unsigned long eflags; 152 - 153 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 154 int mode; 155 - 156 u32 cs_base; 157 158 /* decode cache */ 159 - 160 struct decode_cache decode; 161 }; 162 163 /* Repeat String Operation Prefix */ 164 - #define REPE_PREFIX 1 165 - #define REPNE_PREFIX 2 166 167 /* Execution mode, passed to the emulator. */ 168 #define X86EMUL_MODE_REAL 0 /* Real mode. */ ··· 167 #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 168 169 /* Host execution mode. */ 170 - #if defined(__i386__) 171 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 172 #elif defined(CONFIG_X86_64) 173 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64

··· 123 u8 ad_bytes; 124 u8 rex_prefix; 125 struct operand src; 126 + struct operand src2; 127 struct operand dst; 128 bool has_seg_override; 129 u8 seg_override; ··· 146 /* Register state before/after emulation. */ 147 struct kvm_vcpu *vcpu; 148 149 unsigned long eflags; 150 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 151 int mode; 152 u32 cs_base; 153 154 /* decode cache */ 155 struct decode_cache decode; 156 }; 157 158 /* Repeat String Operation Prefix */ 159 + #define REPE_PREFIX 1 160 + #define REPNE_PREFIX 2 161 162 /* Execution mode, passed to the emulator. */ 163 #define X86EMUL_MODE_REAL 0 /* Real mode. */ ··· 170 #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 171 172 /* Host execution mode. */ 173 + #if defined(CONFIG_X86_32) 174 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 175 #elif defined(CONFIG_X86_64) 176 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64

+25

arch/x86/include/asm/mtrr.h

··· 57 }; 58 #endif /* !__i386__ */ 59 60 /* These are the various ioctls */ 61 #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) 62 #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)

··· 57 }; 58 #endif /* !__i386__ */ 59 60 + struct mtrr_var_range { 61 + u32 base_lo; 62 + u32 base_hi; 63 + u32 mask_lo; 64 + u32 mask_hi; 65 + }; 66 + 67 + /* In the Intel processor's MTRR interface, the MTRR type is always held in 68 + an 8 bit field: */ 69 + typedef u8 mtrr_type; 70 + 71 + #define MTRR_NUM_FIXED_RANGES 88 72 + #define MTRR_MAX_VAR_RANGES 256 73 + 74 + struct mtrr_state_type { 75 + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; 76 + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; 77 + unsigned char enabled; 78 + unsigned char have_fixed; 79 + mtrr_type def_type; 80 + }; 81 + 82 + #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) 83 + #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) 84 + 85 /* These are the various ioctls */ 86 #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) 87 #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)

+132

arch/x86/include/asm/virtext.h

···

··· 1 + /* CPU virtualization extensions handling 2 + * 3 + * This should carry the code for handling CPU virtualization extensions 4 + * that needs to live in the kernel core. 5 + * 6 + * Author: Eduardo Habkost <ehabkost@redhat.com> 7 + * 8 + * Copyright (C) 2008, Red Hat Inc. 9 + * 10 + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. 11 + * 12 + * This work is licensed under the terms of the GNU GPL, version 2. See 13 + * the COPYING file in the top-level directory. 14 + */ 15 + #ifndef _ASM_X86_VIRTEX_H 16 + #define _ASM_X86_VIRTEX_H 17 + 18 + #include <asm/processor.h> 19 + #include <asm/system.h> 20 + 21 + #include <asm/vmx.h> 22 + #include <asm/svm.h> 23 + 24 + /* 25 + * VMX functions: 26 + */ 27 + 28 + static inline int cpu_has_vmx(void) 29 + { 30 + unsigned long ecx = cpuid_ecx(1); 31 + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ 32 + } 33 + 34 + 35 + /** Disable VMX on the current CPU 36 + * 37 + * vmxoff causes a undefined-opcode exception if vmxon was not run 38 + * on the CPU previously. Only call this function if you know VMX 39 + * is enabled. 40 + */ 41 + static inline void cpu_vmxoff(void) 42 + { 43 + asm volatile (ASM_VMX_VMXOFF : : : "cc"); 44 + write_cr4(read_cr4() & ~X86_CR4_VMXE); 45 + } 46 + 47 + static inline int cpu_vmx_enabled(void) 48 + { 49 + return read_cr4() & X86_CR4_VMXE; 50 + } 51 + 52 + /** Disable VMX if it is enabled on the current CPU 53 + * 54 + * You shouldn't call this if cpu_has_vmx() returns 0. 55 + */ 56 + static inline void __cpu_emergency_vmxoff(void) 57 + { 58 + if (cpu_vmx_enabled()) 59 + cpu_vmxoff(); 60 + } 61 + 62 + /** Disable VMX if it is supported and enabled on the current CPU 63 + */ 64 + static inline void cpu_emergency_vmxoff(void) 65 + { 66 + if (cpu_has_vmx()) 67 + __cpu_emergency_vmxoff(); 68 + } 69 + 70 + 71 + 72 + 73 + /* 74 + * SVM functions: 75 + */ 76 + 77 + /** Check if the CPU has SVM support 78 + * 79 + * You can use the 'msg' arg to get a message describing the problem, 80 + * if the function returns zero. Simply pass NULL if you are not interested 81 + * on the messages; gcc should take care of not generating code for 82 + * the messages on this case. 83 + */ 84 + static inline int cpu_has_svm(const char **msg) 85 + { 86 + uint32_t eax, ebx, ecx, edx; 87 + 88 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { 89 + if (msg) 90 + *msg = "not amd"; 91 + return 0; 92 + } 93 + 94 + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 95 + if (eax < SVM_CPUID_FUNC) { 96 + if (msg) 97 + *msg = "can't execute cpuid_8000000a"; 98 + return 0; 99 + } 100 + 101 + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); 102 + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { 103 + if (msg) 104 + *msg = "svm not available"; 105 + return 0; 106 + } 107 + return 1; 108 + } 109 + 110 + 111 + /** Disable SVM on the current CPU 112 + * 113 + * You should call this only if cpu_has_svm() returned true. 114 + */ 115 + static inline void cpu_svm_disable(void) 116 + { 117 + uint64_t efer; 118 + 119 + wrmsrl(MSR_VM_HSAVE_PA, 0); 120 + rdmsrl(MSR_EFER, efer); 121 + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 122 + } 123 + 124 + /** Makes sure SVM is disabled, if it is supported on the CPU 125 + */ 126 + static inline void cpu_emergency_svm_disable(void) 127 + { 128 + if (cpu_has_svm(NULL)) 129 + cpu_svm_disable(); 130 + } 131 + 132 + #endif /* _ASM_X86_VIRTEX_H */

+3 -9

arch/x86/kernel/cpu/mtrr/generic.c

··· 14 #include <asm/pat.h> 15 #include "mtrr.h" 16 17 - struct mtrr_state { 18 - struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; 19 - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; 20 - unsigned char enabled; 21 - unsigned char have_fixed; 22 - mtrr_type def_type; 23 - }; 24 - 25 struct fixed_range_block { 26 int base_msr; /* start address of an MTRR block */ 27 int ranges; /* number of MTRRs in this block */ ··· 27 }; 28 29 static unsigned long smp_changes_mask; 30 - static struct mtrr_state mtrr_state = {}; 31 static int mtrr_state_set; 32 u64 mtrr_tom2; 33 34 #undef MODULE_PARAM_PREFIX 35 #define MODULE_PARAM_PREFIX "mtrr."

··· 14 #include <asm/pat.h> 15 #include "mtrr.h" 16 17 struct fixed_range_block { 18 int base_msr; /* start address of an MTRR block */ 19 int ranges; /* number of MTRRs in this block */ ··· 35 }; 36 37 static unsigned long smp_changes_mask; 38 static int mtrr_state_set; 39 u64 mtrr_tom2; 40 + 41 + struct mtrr_state_type mtrr_state = {}; 42 + EXPORT_SYMBOL_GPL(mtrr_state); 43 44 #undef MODULE_PARAM_PREFIX 45 #define MODULE_PARAM_PREFIX "mtrr."

+2 -2

arch/x86/kernel/cpu/mtrr/main.c

··· 49 50 u32 num_var_ranges = 0; 51 52 - unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 53 static DEFINE_MUTEX(mtrr_mutex); 54 55 u64 size_or_mask, size_and_mask; ··· 574 unsigned long lsize; 575 }; 576 577 - static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; 578 579 static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 580 {

··· 49 50 u32 num_var_ranges = 0; 51 52 + unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 53 static DEFINE_MUTEX(mtrr_mutex); 54 55 u64 size_or_mask, size_and_mask; ··· 574 unsigned long lsize; 575 }; 576 577 + static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; 578 579 static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 580 {

+1 -17

arch/x86/kernel/cpu/mtrr/mtrr.h

··· 8 #define MTRRcap_MSR 0x0fe 9 #define MTRRdefType_MSR 0x2ff 10 11 - #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) 12 - #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) 13 - 14 - #define NUM_FIXED_RANGES 88 15 - #define MAX_VAR_RANGES 256 16 #define MTRRfix64K_00000_MSR 0x250 17 #define MTRRfix16K_80000_MSR 0x258 18 #define MTRRfix16K_A0000_MSR 0x259 ··· 24 #define MTRR_CHANGE_MASK_VARIABLE 0x02 25 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 26 27 - /* In the Intel processor's MTRR interface, the MTRR type is always held in 28 - an 8 bit field: */ 29 - typedef u8 mtrr_type; 30 - 31 - extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; 32 33 struct mtrr_ops { 34 u32 vendor; ··· 59 u32 deftype_lo; 60 u32 deftype_hi; 61 u32 ccr3; 62 - }; 63 - 64 - struct mtrr_var_range { 65 - u32 base_lo; 66 - u32 base_hi; 67 - u32 mask_lo; 68 - u32 mask_hi; 69 }; 70 71 void set_mtrr_done(struct set_mtrr_context *ctxt);

··· 8 #define MTRRcap_MSR 0x0fe 9 #define MTRRdefType_MSR 0x2ff 10 11 #define MTRRfix64K_00000_MSR 0x250 12 #define MTRRfix16K_80000_MSR 0x258 13 #define MTRRfix16K_A0000_MSR 0x259 ··· 29 #define MTRR_CHANGE_MASK_VARIABLE 0x02 30 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 31 32 + extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 33 34 struct mtrr_ops { 35 u32 vendor; ··· 68 u32 deftype_lo; 69 u32 deftype_hi; 70 u32 ccr3; 71 }; 72 73 void set_mtrr_done(struct set_mtrr_context *ctxt);

+18

arch/x86/kernel/crash.c

··· 26 #include <linux/kdebug.h> 27 #include <asm/smp.h> 28 #include <asm/reboot.h> 29 30 #include <mach_ipi.h> 31 ··· 49 } 50 #endif 51 crash_save_cpu(regs, cpu); 52 53 disable_local_APIC(); 54 } ··· 90 local_irq_disable(); 91 92 kdump_nmi_shootdown_cpus(); 93 lapic_shutdown(); 94 #if defined(CONFIG_X86_IO_APIC) 95 disable_IO_APIC();

··· 26 #include <linux/kdebug.h> 27 #include <asm/smp.h> 28 #include <asm/reboot.h> 29 + #include <asm/virtext.h> 30 31 #include <mach_ipi.h> 32 ··· 48 } 49 #endif 50 crash_save_cpu(regs, cpu); 51 + 52 + /* Disable VMX or SVM if needed. 53 + * 54 + * We need to disable virtualization on all CPUs. 55 + * Having VMX or SVM enabled on any CPU may break rebooting 56 + * after the kdump kernel has finished its task. 57 + */ 58 + cpu_emergency_vmxoff(); 59 + cpu_emergency_svm_disable(); 60 61 disable_local_APIC(); 62 } ··· 80 local_irq_disable(); 81 82 kdump_nmi_shootdown_cpus(); 83 + 84 + /* Booting kdump kernel with VMX or SVM enabled won't work, 85 + * because (among other limitations) we can't disable paging 86 + * with the virt flags. 87 + */ 88 + cpu_emergency_vmxoff(); 89 + cpu_emergency_svm_disable(); 90 + 91 lapic_shutdown(); 92 #if defined(CONFIG_X86_IO_APIC) 93 disable_IO_APIC();

+6 -4

arch/x86/kernel/kvmclock.c

··· 89 */ 90 static unsigned long kvm_get_tsc_khz(void) 91 { 92 - return preset_lpj; 93 } 94 95 static void kvm_get_preset_lpj(void) 96 { 97 - struct pvclock_vcpu_time_info *src; 98 unsigned long khz; 99 u64 lpj; 100 101 - src = &per_cpu(hv_clock, 0); 102 - khz = pvclock_tsc_khz(src); 103 104 lpj = ((u64)khz * 1000); 105 do_div(lpj, HZ); ··· 194 #endif 195 kvm_get_preset_lpj(); 196 clocksource_register(&kvm_clock); 197 } 198 }

··· 89 */ 90 static unsigned long kvm_get_tsc_khz(void) 91 { 92 + struct pvclock_vcpu_time_info *src; 93 + src = &per_cpu(hv_clock, 0); 94 + return pvclock_tsc_khz(src); 95 } 96 97 static void kvm_get_preset_lpj(void) 98 { 99 unsigned long khz; 100 u64 lpj; 101 102 + khz = kvm_get_tsc_khz(); 103 104 lpj = ((u64)khz * 1000); 105 do_div(lpj, HZ); ··· 194 #endif 195 kvm_get_preset_lpj(); 196 clocksource_register(&kvm_clock); 197 + pv_info.paravirt_enabled = 1; 198 + pv_info.name = "KVM"; 199 } 200 }

+60 -2

arch/x86/kernel/reboot.c

··· 12 #include <asm/proto.h> 13 #include <asm/reboot_fixups.h> 14 #include <asm/reboot.h> 15 16 #ifdef CONFIG_X86_32 17 # include <linux/dmi.h> ··· 39 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 40 static int reboot_cpu = -1; 41 #endif 42 43 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 44 bool port_cf9_safe = false; ··· 375 } 376 } 377 378 void __attribute__((weak)) mach_reboot_fixups(void) 379 { 380 } ··· 424 static void native_machine_emergency_restart(void) 425 { 426 int i; 427 428 /* Tell the BIOS if we want cold or warm reboot */ 429 *((unsigned short *)__va(0x472)) = reboot_mode; ··· 534 #endif 535 } 536 537 static void native_machine_restart(char *__unused) 538 { 539 printk("machine restart\n"); 540 541 if (!reboot_force) 542 machine_shutdown(); 543 - machine_emergency_restart(); 544 } 545 546 static void native_machine_halt(void) ··· 590 591 void machine_emergency_restart(void) 592 { 593 - machine_ops.emergency_restart(); 594 } 595 596 void machine_restart(char *cmd)

··· 12 #include <asm/proto.h> 13 #include <asm/reboot_fixups.h> 14 #include <asm/reboot.h> 15 + #include <asm/virtext.h> 16 17 #ifdef CONFIG_X86_32 18 # include <linux/dmi.h> ··· 38 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 39 static int reboot_cpu = -1; 40 #endif 41 + 42 + /* This is set if we need to go through the 'emergency' path. 43 + * When machine_emergency_restart() is called, we may be on 44 + * an inconsistent state and won't be able to do a clean cleanup 45 + */ 46 + static int reboot_emergency; 47 48 /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 49 bool port_cf9_safe = false; ··· 368 } 369 } 370 371 + static void vmxoff_nmi(int cpu, struct die_args *args) 372 + { 373 + cpu_emergency_vmxoff(); 374 + } 375 + 376 + /* Use NMIs as IPIs to tell all CPUs to disable virtualization 377 + */ 378 + static void emergency_vmx_disable_all(void) 379 + { 380 + /* Just make sure we won't change CPUs while doing this */ 381 + local_irq_disable(); 382 + 383 + /* We need to disable VMX on all CPUs before rebooting, otherwise 384 + * we risk hanging up the machine, because the CPU ignore INIT 385 + * signals when VMX is enabled. 386 + * 387 + * We can't take any locks and we may be on an inconsistent 388 + * state, so we use NMIs as IPIs to tell the other CPUs to disable 389 + * VMX and halt. 390 + * 391 + * For safety, we will avoid running the nmi_shootdown_cpus() 392 + * stuff unnecessarily, but we don't have a way to check 393 + * if other CPUs have VMX enabled. So we will call it only if the 394 + * CPU we are running on has VMX enabled. 395 + * 396 + * We will miss cases where VMX is not enabled on all CPUs. This 397 + * shouldn't do much harm because KVM always enable VMX on all 398 + * CPUs anyway. But we can miss it on the small window where KVM 399 + * is still enabling VMX. 400 + */ 401 + if (cpu_has_vmx() && cpu_vmx_enabled()) { 402 + /* Disable VMX on this CPU. 403 + */ 404 + cpu_vmxoff(); 405 + 406 + /* Halt and disable VMX on the other CPUs */ 407 + nmi_shootdown_cpus(vmxoff_nmi); 408 + 409 + } 410 + } 411 + 412 + 413 void __attribute__((weak)) mach_reboot_fixups(void) 414 { 415 } ··· 375 static void native_machine_emergency_restart(void) 376 { 377 int i; 378 + 379 + if (reboot_emergency) 380 + emergency_vmx_disable_all(); 381 382 /* Tell the BIOS if we want cold or warm reboot */ 383 *((unsigned short *)__va(0x472)) = reboot_mode; ··· 482 #endif 483 } 484 485 + static void __machine_emergency_restart(int emergency) 486 + { 487 + reboot_emergency = emergency; 488 + machine_ops.emergency_restart(); 489 + } 490 + 491 static void native_machine_restart(char *__unused) 492 { 493 printk("machine restart\n"); 494 495 if (!reboot_force) 496 machine_shutdown(); 497 + __machine_emergency_restart(0); 498 } 499 500 static void native_machine_halt(void) ··· 532 533 void machine_emergency_restart(void) 534 { 535 + __machine_emergency_restart(1); 536 } 537 538 void machine_restart(char *cmd)

+19

arch/x86/kvm/i8254.c

··· 603 604 static void __inject_pit_timer_intr(struct kvm *kvm) 605 { 606 mutex_lock(&kvm->lock); 607 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 608 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 609 mutex_unlock(&kvm->lock); 610 } 611 612 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)

··· 603 604 static void __inject_pit_timer_intr(struct kvm *kvm) 605 { 606 + struct kvm_vcpu *vcpu; 607 + int i; 608 + 609 mutex_lock(&kvm->lock); 610 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); 611 kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); 612 mutex_unlock(&kvm->lock); 613 + 614 + /* 615 + * Provides NMI watchdog support via Virtual Wire mode. 616 + * The route is: PIT -> PIC -> LVT0 in NMI mode. 617 + * 618 + * Note: Our Virtual Wire implementation is simplified, only 619 + * propagating PIT interrupts to all VCPUs when they have set 620 + * LVT0 to NMI delivery. Other PIC interrupts are just sent to 621 + * VCPU0, and only if its LVT0 is in EXTINT mode. 622 + */ 623 + if (kvm->arch.vapics_in_nmi_mode > 0) 624 + for (i = 0; i < KVM_MAX_VCPUS; ++i) { 625 + vcpu = kvm->vcpus[i]; 626 + if (vcpu) 627 + kvm_apic_nmi_wd_deliver(vcpu); 628 + } 629 } 630 631 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)

+48 -4

arch/x86/kvm/i8259.c

··· 26 * Port from Qemu. 27 */ 28 #include <linux/mm.h> 29 #include "irq.h" 30 31 #include <linux/kvm_host.h> 32 33 static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 34 { ··· 166 167 void kvm_pic_update_irq(struct kvm_pic *s) 168 { 169 pic_update_irq(s); 170 } 171 172 void kvm_pic_set_irq(void *opaque, int irq, int level) 173 { 174 struct kvm_pic *s = opaque; 175 176 if (irq >= 0 && irq < PIC_NUM_PINS) { 177 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 178 pic_update_irq(s); 179 } 180 } 181 182 /* ··· 206 int irq, irq2, intno; 207 struct kvm_pic *s = pic_irqchip(kvm); 208 209 irq = pic_get_irq(&s->pics[0]); 210 if (irq >= 0) { 211 pic_intack(&s->pics[0], irq); ··· 231 intno = s->pics[0].irq_base + irq; 232 } 233 pic_update_irq(s); 234 kvm_notify_acked_irq(kvm, irq); 235 236 return intno; ··· 239 240 void kvm_pic_reset(struct kvm_kpic_state *s) 241 { 242 - int irq, irqbase; 243 struct kvm *kvm = s->pics_state->irq_request_opaque; 244 struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; 245 ··· 250 251 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { 252 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) 253 - if (s->irr & (1 << irq) || s->isr & (1 << irq)) 254 - kvm_notify_acked_irq(kvm, irq+irqbase); 255 } 256 s->last_irr = 0; 257 s->irr = 0; ··· 444 printk(KERN_ERR "PIC: non byte write\n"); 445 return; 446 } 447 switch (addr) { 448 case 0x20: 449 case 0x21: ··· 457 elcr_ioport_write(&s->pics[addr & 1], addr, data); 458 break; 459 } 460 } 461 462 static void picdev_read(struct kvm_io_device *this, ··· 471 printk(KERN_ERR "PIC: non byte read\n"); 472 return; 473 } 474 switch (addr) { 475 case 0x20: 476 case 0x21: ··· 485 break; 486 } 487 *(unsigned char *)val = data; 488 } 489 490 /* ··· 501 s->output = level; 502 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 503 s->pics[0].isr_ack &= ~(1 << irq); 504 - kvm_vcpu_kick(vcpu); 505 } 506 } 507 ··· 511 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 512 if (!s) 513 return NULL; 514 s->pics[0].elcr_mask = 0xf8; 515 s->pics[1].elcr_mask = 0xde; 516 s->irq_request = pic_irq_request;

··· 26 * Port from Qemu. 27 */ 28 #include <linux/mm.h> 29 + #include <linux/bitops.h> 30 #include "irq.h" 31 32 #include <linux/kvm_host.h> 33 + 34 + static void pic_lock(struct kvm_pic *s) 35 + { 36 + spin_lock(&s->lock); 37 + } 38 + 39 + static void pic_unlock(struct kvm_pic *s) 40 + { 41 + struct kvm *kvm = s->kvm; 42 + unsigned acks = s->pending_acks; 43 + bool wakeup = s->wakeup_needed; 44 + struct kvm_vcpu *vcpu; 45 + 46 + s->pending_acks = 0; 47 + s->wakeup_needed = false; 48 + 49 + spin_unlock(&s->lock); 50 + 51 + while (acks) { 52 + kvm_notify_acked_irq(kvm, __ffs(acks)); 53 + acks &= acks - 1; 54 + } 55 + 56 + if (wakeup) { 57 + vcpu = s->kvm->vcpus[0]; 58 + if (vcpu) 59 + kvm_vcpu_kick(vcpu); 60 + } 61 + } 62 63 static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 64 { ··· 136 137 void kvm_pic_update_irq(struct kvm_pic *s) 138 { 139 + pic_lock(s); 140 pic_update_irq(s); 141 + pic_unlock(s); 142 } 143 144 void kvm_pic_set_irq(void *opaque, int irq, int level) 145 { 146 struct kvm_pic *s = opaque; 147 148 + pic_lock(s); 149 if (irq >= 0 && irq < PIC_NUM_PINS) { 150 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 151 pic_update_irq(s); 152 } 153 + pic_unlock(s); 154 } 155 156 /* ··· 172 int irq, irq2, intno; 173 struct kvm_pic *s = pic_irqchip(kvm); 174 175 + pic_lock(s); 176 irq = pic_get_irq(&s->pics[0]); 177 if (irq >= 0) { 178 pic_intack(&s->pics[0], irq); ··· 196 intno = s->pics[0].irq_base + irq; 197 } 198 pic_update_irq(s); 199 + pic_unlock(s); 200 kvm_notify_acked_irq(kvm, irq); 201 202 return intno; ··· 203 204 void kvm_pic_reset(struct kvm_kpic_state *s) 205 { 206 + int irq, irqbase, n; 207 struct kvm *kvm = s->pics_state->irq_request_opaque; 208 struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; 209 ··· 214 215 for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { 216 if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) 217 + if (s->irr & (1 << irq) || s->isr & (1 << irq)) { 218 + n = irq + irqbase; 219 + s->pics_state->pending_acks |= 1 << n; 220 + } 221 } 222 s->last_irr = 0; 223 s->irr = 0; ··· 406 printk(KERN_ERR "PIC: non byte write\n"); 407 return; 408 } 409 + pic_lock(s); 410 switch (addr) { 411 case 0x20: 412 case 0x21: ··· 418 elcr_ioport_write(&s->pics[addr & 1], addr, data); 419 break; 420 } 421 + pic_unlock(s); 422 } 423 424 static void picdev_read(struct kvm_io_device *this, ··· 431 printk(KERN_ERR "PIC: non byte read\n"); 432 return; 433 } 434 + pic_lock(s); 435 switch (addr) { 436 case 0x20: 437 case 0x21: ··· 444 break; 445 } 446 *(unsigned char *)val = data; 447 + pic_unlock(s); 448 } 449 450 /* ··· 459 s->output = level; 460 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 461 s->pics[0].isr_ack &= ~(1 << irq); 462 + s->wakeup_needed = true; 463 } 464 } 465 ··· 469 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 470 if (!s) 471 return NULL; 472 + spin_lock_init(&s->lock); 473 + s->kvm = kvm; 474 s->pics[0].elcr_mask = 0xf8; 475 s->pics[1].elcr_mask = 0xde; 476 s->irq_request = pic_irq_request;

+6

arch/x86/kvm/irq.h

··· 25 #include <linux/mm_types.h> 26 #include <linux/hrtimer.h> 27 #include <linux/kvm_host.h> 28 29 #include "iodev.h" 30 #include "ioapic.h" ··· 60 }; 61 62 struct kvm_pic { 63 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 64 irq_request_func *irq_request; 65 void *irq_request_opaque; ··· 92 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); 93 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 94 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 95 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 96 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 97 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);

··· 25 #include <linux/mm_types.h> 26 #include <linux/hrtimer.h> 27 #include <linux/kvm_host.h> 28 + #include <linux/spinlock.h> 29 30 #include "iodev.h" 31 #include "ioapic.h" ··· 59 }; 60 61 struct kvm_pic { 62 + spinlock_t lock; 63 + bool wakeup_needed; 64 + unsigned pending_acks; 65 + struct kvm *kvm; 66 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 67 irq_request_func *irq_request; 68 void *irq_request_opaque; ··· 87 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); 88 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 89 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 90 + void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 91 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); 92 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); 93 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);

+1 -1

arch/x86/kvm/kvm_svm.h

··· 7 #include <linux/kvm_host.h> 8 #include <asm/msr.h> 9 10 - #include "svm.h" 11 12 static const u32 host_save_user_msrs[] = { 13 #ifdef CONFIG_X86_64

··· 7 #include <linux/kvm_host.h> 8 #include <asm/msr.h> 9 10 + #include <asm/svm.h> 11 12 static const u32 host_save_user_msrs[] = { 13 #ifdef CONFIG_X86_64

+50 -8

arch/x86/kvm/lapic.c

··· 130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 131 } 132 133 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 134 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 135 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ ··· 359 360 case APIC_DM_NMI: 361 kvm_inject_nmi(vcpu); 362 break; 363 364 case APIC_DM_INIT: ··· 384 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 385 kvm_vcpu_kick(vcpu); 386 } 387 break; 388 389 default: ··· 677 apic->timer.period))); 678 } 679 680 static void apic_mmio_write(struct kvm_io_device *this, 681 gpa_t address, int len, const void *data) 682 { ··· 771 apic_set_reg(apic, APIC_ICR2, val & 0xff000000); 772 break; 773 774 case APIC_LVTT: 775 case APIC_LVTTHMR: 776 case APIC_LVTPC: 777 - case APIC_LVT0: 778 case APIC_LVT1: 779 case APIC_LVTERR: 780 /* TODO: Check vector */ ··· 990 return 0; 991 } 992 993 - static int __inject_apic_timer_irq(struct kvm_lapic *apic) 994 { 995 - int vector; 996 997 - vector = apic_lvt_vector(apic, APIC_LVTT); 998 - return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); 999 } 1000 1001 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) ··· 1104 { 1105 struct kvm_lapic *apic = vcpu->arch.apic; 1106 1107 - if (apic && apic_lvt_enabled(apic, APIC_LVTT) && 1108 - atomic_read(&apic->timer.pending) > 0) { 1109 - if (__inject_apic_timer_irq(apic)) 1110 atomic_dec(&apic->timer.pending); 1111 } 1112 }

··· 130 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 131 } 132 133 + static inline int apic_lvt_nmi_mode(u32 lvt_val) 134 + { 135 + return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 136 + } 137 + 138 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 139 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 140 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ ··· 354 355 case APIC_DM_NMI: 356 kvm_inject_nmi(vcpu); 357 + kvm_vcpu_kick(vcpu); 358 break; 359 360 case APIC_DM_INIT: ··· 378 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 379 kvm_vcpu_kick(vcpu); 380 } 381 + break; 382 + 383 + case APIC_DM_EXTINT: 384 + /* 385 + * Should only be called by kvm_apic_local_deliver() with LVT0, 386 + * before NMI watchdog was enabled. Already handled by 387 + * kvm_apic_accept_pic_intr(). 388 + */ 389 break; 390 391 default: ··· 663 apic->timer.period))); 664 } 665 666 + static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 667 + { 668 + int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); 669 + 670 + if (apic_lvt_nmi_mode(lvt0_val)) { 671 + if (!nmi_wd_enabled) { 672 + apic_debug("Receive NMI setting on APIC_LVT0 " 673 + "for cpu %d\n", apic->vcpu->vcpu_id); 674 + apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 675 + } 676 + } else if (nmi_wd_enabled) 677 + apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 678 + } 679 + 680 static void apic_mmio_write(struct kvm_io_device *this, 681 gpa_t address, int len, const void *data) 682 { ··· 743 apic_set_reg(apic, APIC_ICR2, val & 0xff000000); 744 break; 745 746 + case APIC_LVT0: 747 + apic_manage_nmi_watchdog(apic, val); 748 case APIC_LVTT: 749 case APIC_LVTTHMR: 750 case APIC_LVTPC: 751 case APIC_LVT1: 752 case APIC_LVTERR: 753 /* TODO: Check vector */ ··· 961 return 0; 962 } 963 964 + static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 965 { 966 + u32 reg = apic_get_reg(apic, lvt_type); 967 + int vector, mode, trig_mode; 968 969 + if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 970 + vector = reg & APIC_VECTOR_MASK; 971 + mode = reg & APIC_MODE_MASK; 972 + trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 973 + return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 974 + } 975 + return 0; 976 + } 977 + 978 + void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 979 + { 980 + struct kvm_lapic *apic = vcpu->arch.apic; 981 + 982 + if (apic) 983 + kvm_apic_local_deliver(apic, APIC_LVT0); 984 } 985 986 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) ··· 1061 { 1062 struct kvm_lapic *apic = vcpu->arch.apic; 1063 1064 + if (apic && atomic_read(&apic->timer.pending) > 0) { 1065 + if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1066 atomic_dec(&apic->timer.pending); 1067 } 1068 }

+357 -89

arch/x86/kvm/mmu.c

··· 17 * 18 */ 19 20 - #include "vmx.h" 21 #include "mmu.h" 22 23 #include <linux/kvm_host.h> ··· 32 #include <asm/page.h> 33 #include <asm/cmpxchg.h> 34 #include <asm/io.h> 35 36 /* 37 * When setting this variable to true it enables Two-Dimensional-Paging ··· 168 static u64 __read_mostly shadow_user_mask; 169 static u64 __read_mostly shadow_accessed_mask; 170 static u64 __read_mostly shadow_dirty_mask; 171 172 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 173 { ··· 184 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 185 186 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 187 - u64 dirty_mask, u64 nx_mask, u64 x_mask) 188 { 189 shadow_user_mask = user_mask; 190 shadow_accessed_mask = accessed_mask; 191 shadow_dirty_mask = dirty_mask; 192 shadow_nx_mask = nx_mask; 193 shadow_x_mask = x_mask; 194 } 195 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 196 ··· 386 { 387 int *write_count; 388 389 - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 390 *write_count += 1; 391 } 392 ··· 396 { 397 int *write_count; 398 399 - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); 400 *write_count -= 1; 401 WARN_ON(*write_count < 0); 402 } 403 404 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) 405 { 406 - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 407 int *largepage_idx; 408 409 if (slot) { 410 largepage_idx = slot_largepage_idx(gfn, slot); 411 return *largepage_idx; ··· 621 return NULL; 622 } 623 624 - static void rmap_write_protect(struct kvm *kvm, u64 gfn) 625 { 626 unsigned long *rmapp; 627 u64 *spte; ··· 667 spte = rmap_next(kvm, rmapp, spte); 668 } 669 670 - if (write_protected) 671 - kvm_flush_remote_tlbs(kvm); 672 } 673 674 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) ··· 793 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 796 ASSERT(is_empty_shadow_page(sp->spt)); 797 - sp->slot_bitmap = 0; 798 sp->multimapped = 0; 799 sp->parent_pte = parent_pte; 800 --vcpu->kvm->arch.n_free_mmu_pages; 801 return sp; ··· 909 struct kvm_mmu_page *sp = page_header(__pa(spte)); 910 911 index = spte - sp->spt; 912 - __set_bit(index, sp->unsync_child_bitmap); 913 - sp->unsync_children = 1; 914 } 915 916 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) ··· 938 939 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 940 { 941 - sp->unsync_children = 1; 942 kvm_mmu_update_parents_unsync(sp); 943 return 1; 944 } ··· 968 { 969 } 970 971 #define for_each_unsync_children(bitmap, idx) \ 972 for (idx = find_first_bit(bitmap, 512); \ 973 idx < 512; \ 974 idx = find_next_bit(bitmap, 512, idx+1)) 975 976 - static int mmu_unsync_walk(struct kvm_mmu_page *sp, 977 - struct kvm_unsync_walk *walker) 978 { 979 - int i, ret; 980 981 - if (!sp->unsync_children) 982 - return 0; 983 984 for_each_unsync_children(sp->unsync_child_bitmap, i) { 985 u64 ent = sp->spt[i]; 986 987 - if (is_shadow_present_pte(ent)) { 988 struct kvm_mmu_page *child; 989 child = page_header(ent & PT64_BASE_ADDR_MASK); 990 991 if (child->unsync_children) { 992 - ret = mmu_unsync_walk(child, walker); 993 - if (ret) 994 return ret; 995 - __clear_bit(i, sp->unsync_child_bitmap); 996 } 997 998 if (child->unsync) { 999 - ret = walker->entry(child, walker); 1000 - __clear_bit(i, sp->unsync_child_bitmap); 1001 - if (ret) 1002 - return ret; 1003 } 1004 } 1005 } ··· 1035 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 1036 sp->unsync_children = 0; 1037 1038 - return 0; 1039 } 1040 1041 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) ··· 1068 return NULL; 1069 } 1070 1071 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1072 { 1073 WARN_ON(!sp->unsync); 1074 sp->unsync = 0; 1075 --kvm->stat.mmu_unsync; 1076 } 1077 ··· 1092 return 1; 1093 } 1094 1095 - rmap_write_protect(vcpu->kvm, sp->gfn); 1096 kvm_unlink_unsync_page(vcpu->kvm, sp); 1097 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1098 kvm_mmu_zap_page(vcpu->kvm, sp); ··· 1104 return 0; 1105 } 1106 1107 - struct sync_walker { 1108 - struct kvm_vcpu *vcpu; 1109 - struct kvm_unsync_walk walker; 1110 }; 1111 1112 - static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) 1113 - { 1114 - struct sync_walker *sync_walk = container_of(walk, struct sync_walker, 1115 - walker); 1116 - struct kvm_vcpu *vcpu = sync_walk->vcpu; 1117 1118 - kvm_sync_page(vcpu, sp); 1119 - return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); 1120 } 1121 1122 - static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1123 { 1124 - struct sync_walker walker = { 1125 - .walker = { .entry = mmu_sync_fn, }, 1126 - .vcpu = vcpu, 1127 - }; 1128 1129 - while (mmu_unsync_walk(sp, &walker.walker)) 1130 cond_resched_lock(&vcpu->kvm->mmu_lock); 1131 } 1132 1133 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ··· 1244 sp->role = role; 1245 hlist_add_head(&sp->hash_link, bucket); 1246 if (!metaphysical) { 1247 - rmap_write_protect(vcpu->kvm, gfn); 1248 account_shadowed(vcpu->kvm, gfn); 1249 } 1250 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) ··· 1269 if (level == PT32E_ROOT_LEVEL) { 1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1271 shadow_addr &= PT64_BASE_ADDR_MASK; 1272 --level; 1273 } 1274 ··· 1355 } 1356 } 1357 1358 - struct zap_walker { 1359 - struct kvm_unsync_walk walker; 1360 - struct kvm *kvm; 1361 - int zapped; 1362 - }; 1363 - 1364 - static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) 1365 { 1366 - struct zap_walker *zap_walk = container_of(walk, struct zap_walker, 1367 - walker); 1368 - kvm_mmu_zap_page(zap_walk->kvm, sp); 1369 - zap_walk->zapped = 1; 1370 - return 0; 1371 - } 1372 1373 - static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) 1374 - { 1375 - struct zap_walker walker = { 1376 - .walker = { .entry = mmu_zap_fn, }, 1377 - .kvm = kvm, 1378 - .zapped = 0, 1379 - }; 1380 - 1381 - if (sp->role.level == PT_PAGE_TABLE_LEVEL) 1382 return 0; 1383 - mmu_unsync_walk(sp, &walker.walker); 1384 - return walker.zapped; 1385 } 1386 1387 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) ··· 1476 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1477 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1478 1479 - __set_bit(slot, &sp->slot_bitmap); 1480 } 1481 1482 static void mmu_convert_notrap(struct kvm_mmu_page *sp) ··· 1507 return page; 1508 } 1509 1510 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1511 { 1512 unsigned index; ··· 1627 if (s->role.word != sp->role.word) 1628 return 1; 1629 } 1630 - kvm_mmu_mark_parents_unsync(vcpu, sp); 1631 ++vcpu->kvm->stat.mmu_unsync; 1632 sp->unsync = 1; 1633 mmu_convert_notrap(sp); 1634 return 0; 1635 } ··· 1661 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1662 unsigned pte_access, int user_fault, 1663 int write_fault, int dirty, int largepage, 1664 - gfn_t gfn, pfn_t pfn, bool speculative, 1665 bool can_unsync) 1666 { 1667 u64 spte; 1668 int ret = 0; 1669 /* 1670 * We don't set the accessed bit, since we sometimes want to see 1671 * whether the guest actually used the pte (in order to detect ··· 1697 spte |= shadow_user_mask; 1698 if (largepage) 1699 spte |= PT_PAGE_SIZE_MASK; 1700 1701 spte |= (u64)pfn << PAGE_SHIFT; 1702 ··· 1715 } 1716 1717 spte |= PT_WRITABLE_MASK; 1718 1719 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1720 pgprintk("%s: found shadow page for %lx, marking ro\n", ··· 1746 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1747 unsigned pt_access, unsigned pte_access, 1748 int user_fault, int write_fault, int dirty, 1749 - int *ptwrite, int largepage, gfn_t gfn, 1750 - pfn_t pfn, bool speculative) 1751 { 1752 int was_rmapped = 0; 1753 int was_writeble = is_writeble_pte(*shadow_pte); ··· 1780 } 1781 } 1782 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1783 - dirty, largepage, gfn, pfn, speculative, true)) { 1784 if (write_fault) 1785 *ptwrite = 1; 1786 kvm_x86_ops->tlb_flush(vcpu); ··· 1837 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { 1838 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 1839 0, walk->write, 1, &walk->pt_write, 1840 - walk->largepage, gfn, walk->pfn, false); 1841 ++vcpu->stat.pf_fixed; 1842 return 1; 1843 } ··· 2024 } 2025 } 2026 2027 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 2028 { 2029 spin_lock(&vcpu->kvm->mmu_lock); 2030 mmu_sync_roots(vcpu); 2031 spin_unlock(&vcpu->kvm->mmu_lock); 2032 } 2033 ··· 2445 } 2446 2447 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2448 - const u8 *new, int bytes) 2449 { 2450 gfn_t gfn = gpa >> PAGE_SHIFT; 2451 struct kvm_mmu_page *sp; ··· 2472 kvm_mmu_free_some_pages(vcpu); 2473 ++vcpu->kvm->stat.mmu_pte_write; 2474 kvm_mmu_audit(vcpu, "pre pte write"); 2475 - if (gfn == vcpu->arch.last_pt_write_gfn 2476 - && !last_updated_pte_accessed(vcpu)) { 2477 - ++vcpu->arch.last_pt_write_count; 2478 - if (vcpu->arch.last_pt_write_count >= 3) 2479 - flooded = 1; 2480 - } else { 2481 - vcpu->arch.last_pt_write_gfn = gfn; 2482 - vcpu->arch.last_pt_write_count = 1; 2483 - vcpu->arch.last_pte_updated = NULL; 2484 } 2485 index = kvm_page_table_hashfn(gfn); 2486 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; ··· 2622 2623 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 2624 { 2625 - spin_lock(&vcpu->kvm->mmu_lock); 2626 vcpu->arch.mmu.invlpg(vcpu, gva); 2627 - spin_unlock(&vcpu->kvm->mmu_lock); 2628 kvm_mmu_flush_tlb(vcpu); 2629 ++vcpu->stat.invlpg; 2630 } ··· 2719 int i; 2720 u64 *pt; 2721 2722 - if (!test_bit(slot, &sp->slot_bitmap)) 2723 continue; 2724 2725 pt = sp->spt; ··· 3128 if (sp->role.metaphysical) 3129 continue; 3130 3131 - slot = gfn_to_memslot(vcpu->kvm, sp->gfn); 3132 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3133 rmapp = &slot->rmap[gfn - slot->base_gfn]; 3134 if (*rmapp) 3135 printk(KERN_ERR "%s: (%s) shadow page has writable"

··· 17 * 18 */ 19 20 #include "mmu.h" 21 22 #include <linux/kvm_host.h> ··· 33 #include <asm/page.h> 34 #include <asm/cmpxchg.h> 35 #include <asm/io.h> 36 + #include <asm/vmx.h> 37 38 /* 39 * When setting this variable to true it enables Two-Dimensional-Paging ··· 168 static u64 __read_mostly shadow_user_mask; 169 static u64 __read_mostly shadow_accessed_mask; 170 static u64 __read_mostly shadow_dirty_mask; 171 + static u64 __read_mostly shadow_mt_mask; 172 173 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) 174 { ··· 183 EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); 184 185 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 186 + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) 187 { 188 shadow_user_mask = user_mask; 189 shadow_accessed_mask = accessed_mask; 190 shadow_dirty_mask = dirty_mask; 191 shadow_nx_mask = nx_mask; 192 shadow_x_mask = x_mask; 193 + shadow_mt_mask = mt_mask; 194 } 195 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 196 ··· 384 { 385 int *write_count; 386 387 + gfn = unalias_gfn(kvm, gfn); 388 + write_count = slot_largepage_idx(gfn, 389 + gfn_to_memslot_unaliased(kvm, gfn)); 390 *write_count += 1; 391 } 392 ··· 392 { 393 int *write_count; 394 395 + gfn = unalias_gfn(kvm, gfn); 396 + write_count = slot_largepage_idx(gfn, 397 + gfn_to_memslot_unaliased(kvm, gfn)); 398 *write_count -= 1; 399 WARN_ON(*write_count < 0); 400 } 401 402 static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) 403 { 404 + struct kvm_memory_slot *slot; 405 int *largepage_idx; 406 407 + gfn = unalias_gfn(kvm, gfn); 408 + slot = gfn_to_memslot_unaliased(kvm, gfn); 409 if (slot) { 410 largepage_idx = slot_largepage_idx(gfn, slot); 411 return *largepage_idx; ··· 613 return NULL; 614 } 615 616 + static int rmap_write_protect(struct kvm *kvm, u64 gfn) 617 { 618 unsigned long *rmapp; 619 u64 *spte; ··· 659 spte = rmap_next(kvm, rmapp, spte); 660 } 661 662 + return write_protected; 663 } 664 665 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) ··· 786 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 787 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 788 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 789 + INIT_LIST_HEAD(&sp->oos_link); 790 ASSERT(is_empty_shadow_page(sp->spt)); 791 + bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 792 sp->multimapped = 0; 793 + sp->global = 1; 794 sp->parent_pte = parent_pte; 795 --vcpu->kvm->arch.n_free_mmu_pages; 796 return sp; ··· 900 struct kvm_mmu_page *sp = page_header(__pa(spte)); 901 902 index = spte - sp->spt; 903 + if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) 904 + sp->unsync_children++; 905 + WARN_ON(!sp->unsync_children); 906 } 907 908 static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) ··· 928 929 static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 930 { 931 kvm_mmu_update_parents_unsync(sp); 932 return 1; 933 } ··· 959 { 960 } 961 962 + #define KVM_PAGE_ARRAY_NR 16 963 + 964 + struct kvm_mmu_pages { 965 + struct mmu_page_and_offset { 966 + struct kvm_mmu_page *sp; 967 + unsigned int idx; 968 + } page[KVM_PAGE_ARRAY_NR]; 969 + unsigned int nr; 970 + }; 971 + 972 #define for_each_unsync_children(bitmap, idx) \ 973 for (idx = find_first_bit(bitmap, 512); \ 974 idx < 512; \ 975 idx = find_next_bit(bitmap, 512, idx+1)) 976 977 + int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 978 + int idx) 979 { 980 + int i; 981 982 + if (sp->unsync) 983 + for (i=0; i < pvec->nr; i++) 984 + if (pvec->page[i].sp == sp) 985 + return 0; 986 + 987 + pvec->page[pvec->nr].sp = sp; 988 + pvec->page[pvec->nr].idx = idx; 989 + pvec->nr++; 990 + return (pvec->nr == KVM_PAGE_ARRAY_NR); 991 + } 992 + 993 + static int __mmu_unsync_walk(struct kvm_mmu_page *sp, 994 + struct kvm_mmu_pages *pvec) 995 + { 996 + int i, ret, nr_unsync_leaf = 0; 997 998 for_each_unsync_children(sp->unsync_child_bitmap, i) { 999 u64 ent = sp->spt[i]; 1000 1001 + if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { 1002 struct kvm_mmu_page *child; 1003 child = page_header(ent & PT64_BASE_ADDR_MASK); 1004 1005 if (child->unsync_children) { 1006 + if (mmu_pages_add(pvec, child, i)) 1007 + return -ENOSPC; 1008 + 1009 + ret = __mmu_unsync_walk(child, pvec); 1010 + if (!ret) 1011 + __clear_bit(i, sp->unsync_child_bitmap); 1012 + else if (ret > 0) 1013 + nr_unsync_leaf += ret; 1014 + else 1015 return ret; 1016 } 1017 1018 if (child->unsync) { 1019 + nr_unsync_leaf++; 1020 + if (mmu_pages_add(pvec, child, i)) 1021 + return -ENOSPC; 1022 } 1023 } 1024 } ··· 998 if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) 999 sp->unsync_children = 0; 1000 1001 + return nr_unsync_leaf; 1002 + } 1003 + 1004 + static int mmu_unsync_walk(struct kvm_mmu_page *sp, 1005 + struct kvm_mmu_pages *pvec) 1006 + { 1007 + if (!sp->unsync_children) 1008 + return 0; 1009 + 1010 + mmu_pages_add(pvec, sp, 0); 1011 + return __mmu_unsync_walk(sp, pvec); 1012 } 1013 1014 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) ··· 1021 return NULL; 1022 } 1023 1024 + static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) 1025 + { 1026 + list_del(&sp->oos_link); 1027 + --kvm->stat.mmu_unsync_global; 1028 + } 1029 + 1030 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1031 { 1032 WARN_ON(!sp->unsync); 1033 sp->unsync = 0; 1034 + if (sp->global) 1035 + kvm_unlink_unsync_global(kvm, sp); 1036 --kvm->stat.mmu_unsync; 1037 } 1038 ··· 1037 return 1; 1038 } 1039 1040 + if (rmap_write_protect(vcpu->kvm, sp->gfn)) 1041 + kvm_flush_remote_tlbs(vcpu->kvm); 1042 kvm_unlink_unsync_page(vcpu->kvm, sp); 1043 if (vcpu->arch.mmu.sync_page(vcpu, sp)) { 1044 kvm_mmu_zap_page(vcpu->kvm, sp); ··· 1048 return 0; 1049 } 1050 1051 + struct mmu_page_path { 1052 + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; 1053 + unsigned int idx[PT64_ROOT_LEVEL-1]; 1054 }; 1055 1056 + #define for_each_sp(pvec, sp, parents, i) \ 1057 + for (i = mmu_pages_next(&pvec, &parents, -1), \ 1058 + sp = pvec.page[i].sp; \ 1059 + i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ 1060 + i = mmu_pages_next(&pvec, &parents, i)) 1061 1062 + int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, 1063 + int i) 1064 + { 1065 + int n; 1066 + 1067 + for (n = i+1; n < pvec->nr; n++) { 1068 + struct kvm_mmu_page *sp = pvec->page[n].sp; 1069 + 1070 + if (sp->role.level == PT_PAGE_TABLE_LEVEL) { 1071 + parents->idx[0] = pvec->page[n].idx; 1072 + return n; 1073 + } 1074 + 1075 + parents->parent[sp->role.level-2] = sp; 1076 + parents->idx[sp->role.level-1] = pvec->page[n].idx; 1077 + } 1078 + 1079 + return n; 1080 } 1081 1082 + void mmu_pages_clear_parents(struct mmu_page_path *parents) 1083 { 1084 + struct kvm_mmu_page *sp; 1085 + unsigned int level = 0; 1086 1087 + do { 1088 + unsigned int idx = parents->idx[level]; 1089 + 1090 + sp = parents->parent[level]; 1091 + if (!sp) 1092 + return; 1093 + 1094 + --sp->unsync_children; 1095 + WARN_ON((int)sp->unsync_children < 0); 1096 + __clear_bit(idx, sp->unsync_child_bitmap); 1097 + level++; 1098 + } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); 1099 + } 1100 + 1101 + static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, 1102 + struct mmu_page_path *parents, 1103 + struct kvm_mmu_pages *pvec) 1104 + { 1105 + parents->parent[parent->role.level-1] = NULL; 1106 + pvec->nr = 0; 1107 + } 1108 + 1109 + static void mmu_sync_children(struct kvm_vcpu *vcpu, 1110 + struct kvm_mmu_page *parent) 1111 + { 1112 + int i; 1113 + struct kvm_mmu_page *sp; 1114 + struct mmu_page_path parents; 1115 + struct kvm_mmu_pages pages; 1116 + 1117 + kvm_mmu_pages_init(parent, &parents, &pages); 1118 + while (mmu_unsync_walk(parent, &pages)) { 1119 + int protected = 0; 1120 + 1121 + for_each_sp(pages, sp, parents, i) 1122 + protected |= rmap_write_protect(vcpu->kvm, sp->gfn); 1123 + 1124 + if (protected) 1125 + kvm_flush_remote_tlbs(vcpu->kvm); 1126 + 1127 + for_each_sp(pages, sp, parents, i) { 1128 + kvm_sync_page(vcpu, sp); 1129 + mmu_pages_clear_parents(&parents); 1130 + } 1131 cond_resched_lock(&vcpu->kvm->mmu_lock); 1132 + kvm_mmu_pages_init(parent, &parents, &pages); 1133 + } 1134 } 1135 1136 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ··· 1129 sp->role = role; 1130 hlist_add_head(&sp->hash_link, bucket); 1131 if (!metaphysical) { 1132 + if (rmap_write_protect(vcpu->kvm, gfn)) 1133 + kvm_flush_remote_tlbs(vcpu->kvm); 1134 account_shadowed(vcpu->kvm, gfn); 1135 } 1136 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) ··· 1153 if (level == PT32E_ROOT_LEVEL) { 1154 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1155 shadow_addr &= PT64_BASE_ADDR_MASK; 1156 + if (!shadow_addr) 1157 + return 1; 1158 --level; 1159 } 1160 ··· 1237 } 1238 } 1239 1240 + static int mmu_zap_unsync_children(struct kvm *kvm, 1241 + struct kvm_mmu_page *parent) 1242 { 1243 + int i, zapped = 0; 1244 + struct mmu_page_path parents; 1245 + struct kvm_mmu_pages pages; 1246 1247 + if (parent->role.level == PT_PAGE_TABLE_LEVEL) 1248 return 0; 1249 + 1250 + kvm_mmu_pages_init(parent, &parents, &pages); 1251 + while (mmu_unsync_walk(parent, &pages)) { 1252 + struct kvm_mmu_page *sp; 1253 + 1254 + for_each_sp(pages, sp, parents, i) { 1255 + kvm_mmu_zap_page(kvm, sp); 1256 + mmu_pages_clear_parents(&parents); 1257 + } 1258 + zapped += pages.nr; 1259 + kvm_mmu_pages_init(parent, &parents, &pages); 1260 + } 1261 + 1262 + return zapped; 1263 } 1264 1265 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) ··· 1362 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1363 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1364 1365 + __set_bit(slot, sp->slot_bitmap); 1366 } 1367 1368 static void mmu_convert_notrap(struct kvm_mmu_page *sp) ··· 1393 return page; 1394 } 1395 1396 + /* 1397 + * The function is based on mtrr_type_lookup() in 1398 + * arch/x86/kernel/cpu/mtrr/generic.c 1399 + */ 1400 + static int get_mtrr_type(struct mtrr_state_type *mtrr_state, 1401 + u64 start, u64 end) 1402 + { 1403 + int i; 1404 + u64 base, mask; 1405 + u8 prev_match, curr_match; 1406 + int num_var_ranges = KVM_NR_VAR_MTRR; 1407 + 1408 + if (!mtrr_state->enabled) 1409 + return 0xFF; 1410 + 1411 + /* Make end inclusive end, instead of exclusive */ 1412 + end--; 1413 + 1414 + /* Look in fixed ranges. Just return the type as per start */ 1415 + if (mtrr_state->have_fixed && (start < 0x100000)) { 1416 + int idx; 1417 + 1418 + if (start < 0x80000) { 1419 + idx = 0; 1420 + idx += (start >> 16); 1421 + return mtrr_state->fixed_ranges[idx]; 1422 + } else if (start < 0xC0000) { 1423 + idx = 1 * 8; 1424 + idx += ((start - 0x80000) >> 14); 1425 + return mtrr_state->fixed_ranges[idx]; 1426 + } else if (start < 0x1000000) { 1427 + idx = 3 * 8; 1428 + idx += ((start - 0xC0000) >> 12); 1429 + return mtrr_state->fixed_ranges[idx]; 1430 + } 1431 + } 1432 + 1433 + /* 1434 + * Look in variable ranges 1435 + * Look of multiple ranges matching this address and pick type 1436 + * as per MTRR precedence 1437 + */ 1438 + if (!(mtrr_state->enabled & 2)) 1439 + return mtrr_state->def_type; 1440 + 1441 + prev_match = 0xFF; 1442 + for (i = 0; i < num_var_ranges; ++i) { 1443 + unsigned short start_state, end_state; 1444 + 1445 + if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) 1446 + continue; 1447 + 1448 + base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + 1449 + (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); 1450 + mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + 1451 + (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); 1452 + 1453 + start_state = ((start & mask) == (base & mask)); 1454 + end_state = ((end & mask) == (base & mask)); 1455 + if (start_state != end_state) 1456 + return 0xFE; 1457 + 1458 + if ((start & mask) != (base & mask)) 1459 + continue; 1460 + 1461 + curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; 1462 + if (prev_match == 0xFF) { 1463 + prev_match = curr_match; 1464 + continue; 1465 + } 1466 + 1467 + if (prev_match == MTRR_TYPE_UNCACHABLE || 1468 + curr_match == MTRR_TYPE_UNCACHABLE) 1469 + return MTRR_TYPE_UNCACHABLE; 1470 + 1471 + if ((prev_match == MTRR_TYPE_WRBACK && 1472 + curr_match == MTRR_TYPE_WRTHROUGH) || 1473 + (prev_match == MTRR_TYPE_WRTHROUGH && 1474 + curr_match == MTRR_TYPE_WRBACK)) { 1475 + prev_match = MTRR_TYPE_WRTHROUGH; 1476 + curr_match = MTRR_TYPE_WRTHROUGH; 1477 + } 1478 + 1479 + if (prev_match != curr_match) 1480 + return MTRR_TYPE_UNCACHABLE; 1481 + } 1482 + 1483 + if (prev_match != 0xFF) 1484 + return prev_match; 1485 + 1486 + return mtrr_state->def_type; 1487 + } 1488 + 1489 + static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) 1490 + { 1491 + u8 mtrr; 1492 + 1493 + mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, 1494 + (gfn << PAGE_SHIFT) + PAGE_SIZE); 1495 + if (mtrr == 0xfe || mtrr == 0xff) 1496 + mtrr = MTRR_TYPE_WRBACK; 1497 + return mtrr; 1498 + } 1499 + 1500 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1501 { 1502 unsigned index; ··· 1409 if (s->role.word != sp->role.word) 1410 return 1; 1411 } 1412 ++vcpu->kvm->stat.mmu_unsync; 1413 sp->unsync = 1; 1414 + 1415 + if (sp->global) { 1416 + list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); 1417 + ++vcpu->kvm->stat.mmu_unsync_global; 1418 + } else 1419 + kvm_mmu_mark_parents_unsync(vcpu, sp); 1420 + 1421 mmu_convert_notrap(sp); 1422 return 0; 1423 } ··· 1437 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1438 unsigned pte_access, int user_fault, 1439 int write_fault, int dirty, int largepage, 1440 + int global, gfn_t gfn, pfn_t pfn, bool speculative, 1441 bool can_unsync) 1442 { 1443 u64 spte; 1444 int ret = 0; 1445 + u64 mt_mask = shadow_mt_mask; 1446 + struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); 1447 + 1448 + if (!(vcpu->arch.cr4 & X86_CR4_PGE)) 1449 + global = 0; 1450 + if (!global && sp->global) { 1451 + sp->global = 0; 1452 + if (sp->unsync) { 1453 + kvm_unlink_unsync_global(vcpu->kvm, sp); 1454 + kvm_mmu_mark_parents_unsync(vcpu, sp); 1455 + } 1456 + } 1457 + 1458 /* 1459 * We don't set the accessed bit, since we sometimes want to see 1460 * whether the guest actually used the pte (in order to detect ··· 1460 spte |= shadow_user_mask; 1461 if (largepage) 1462 spte |= PT_PAGE_SIZE_MASK; 1463 + if (mt_mask) { 1464 + mt_mask = get_memory_type(vcpu, gfn) << 1465 + kvm_x86_ops->get_mt_mask_shift(); 1466 + spte |= mt_mask; 1467 + } 1468 1469 spte |= (u64)pfn << PAGE_SHIFT; 1470 ··· 1473 } 1474 1475 spte |= PT_WRITABLE_MASK; 1476 + 1477 + /* 1478 + * Optimization: for pte sync, if spte was writable the hash 1479 + * lookup is unnecessary (and expensive). Write protection 1480 + * is responsibility of mmu_get_page / kvm_sync_page. 1481 + * Same reasoning can be applied to dirty page accounting. 1482 + */ 1483 + if (!can_unsync && is_writeble_pte(*shadow_pte)) 1484 + goto set_pte; 1485 1486 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1487 pgprintk("%s: found shadow page for %lx, marking ro\n", ··· 1495 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1496 unsigned pt_access, unsigned pte_access, 1497 int user_fault, int write_fault, int dirty, 1498 + int *ptwrite, int largepage, int global, 1499 + gfn_t gfn, pfn_t pfn, bool speculative) 1500 { 1501 int was_rmapped = 0; 1502 int was_writeble = is_writeble_pte(*shadow_pte); ··· 1529 } 1530 } 1531 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1532 + dirty, largepage, global, gfn, pfn, speculative, true)) { 1533 if (write_fault) 1534 *ptwrite = 1; 1535 kvm_x86_ops->tlb_flush(vcpu); ··· 1586 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { 1587 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 1588 0, walk->write, 1, &walk->pt_write, 1589 + walk->largepage, 0, gfn, walk->pfn, false); 1590 ++vcpu->stat.pf_fixed; 1591 return 1; 1592 } ··· 1773 } 1774 } 1775 1776 + static void mmu_sync_global(struct kvm_vcpu *vcpu) 1777 + { 1778 + struct kvm *kvm = vcpu->kvm; 1779 + struct kvm_mmu_page *sp, *n; 1780 + 1781 + list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) 1782 + kvm_sync_page(vcpu, sp); 1783 + } 1784 + 1785 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) 1786 { 1787 spin_lock(&vcpu->kvm->mmu_lock); 1788 mmu_sync_roots(vcpu); 1789 + spin_unlock(&vcpu->kvm->mmu_lock); 1790 + } 1791 + 1792 + void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) 1793 + { 1794 + spin_lock(&vcpu->kvm->mmu_lock); 1795 + mmu_sync_global(vcpu); 1796 spin_unlock(&vcpu->kvm->mmu_lock); 1797 } 1798 ··· 2178 } 2179 2180 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2181 + const u8 *new, int bytes, 2182 + bool guest_initiated) 2183 { 2184 gfn_t gfn = gpa >> PAGE_SHIFT; 2185 struct kvm_mmu_page *sp; ··· 2204 kvm_mmu_free_some_pages(vcpu); 2205 ++vcpu->kvm->stat.mmu_pte_write; 2206 kvm_mmu_audit(vcpu, "pre pte write"); 2207 + if (guest_initiated) { 2208 + if (gfn == vcpu->arch.last_pt_write_gfn 2209 + && !last_updated_pte_accessed(vcpu)) { 2210 + ++vcpu->arch.last_pt_write_count; 2211 + if (vcpu->arch.last_pt_write_count >= 3) 2212 + flooded = 1; 2213 + } else { 2214 + vcpu->arch.last_pt_write_gfn = gfn; 2215 + vcpu->arch.last_pt_write_count = 1; 2216 + vcpu->arch.last_pte_updated = NULL; 2217 + } 2218 } 2219 index = kvm_page_table_hashfn(gfn); 2220 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; ··· 2352 2353 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) 2354 { 2355 vcpu->arch.mmu.invlpg(vcpu, gva); 2356 kvm_mmu_flush_tlb(vcpu); 2357 ++vcpu->stat.invlpg; 2358 } ··· 2451 int i; 2452 u64 *pt; 2453 2454 + if (!test_bit(slot, sp->slot_bitmap)) 2455 continue; 2456 2457 pt = sp->spt; ··· 2860 if (sp->role.metaphysical) 2861 continue; 2862 2863 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 2864 + slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); 2865 rmapp = &slot->rmap[gfn - slot->base_gfn]; 2866 if (*rmapp) 2867 printk(KERN_ERR "%s: (%s) shadow page has writable"

+37 -7

arch/x86/kvm/paging_tmpl.h

··· 82 int *ptwrite; 83 pfn_t pfn; 84 u64 *sptep; 85 }; 86 87 static gfn_t gpte_to_gfn(pt_element_t gpte) ··· 223 if (ret) 224 goto walk; 225 pte |= PT_DIRTY_MASK; 226 - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); 227 walker->ptes[walker->level - 1] = pte; 228 } 229 ··· 275 return; 276 kvm_get_pfn(pfn); 277 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 278 - gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 279 pfn, true); 280 } 281 ··· 303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, 304 sw->user_fault, sw->write_fault, 305 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 306 - sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, 307 - false); 308 sw->sptep = sptep; 309 return 1; 310 } ··· 469 struct kvm_vcpu *vcpu, u64 addr, 470 u64 *sptep, int level) 471 { 472 473 - if (level == PT_PAGE_TABLE_LEVEL) { 474 - if (is_shadow_present_pte(*sptep)) 475 rmap_remove(vcpu->kvm, sptep); 476 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 477 return 1; 478 } ··· 495 496 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 497 { 498 struct shadow_walker walker = { 499 .walker = { .entry = FNAME(shadow_invlpg_entry), }, 500 }; 501 502 walk_shadow(&walker.walker, vcpu, gva); 503 } 504 505 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) ··· 610 nr_present++; 611 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 612 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 613 - is_dirty_pte(gpte), 0, gfn, 614 spte_to_pfn(sp->spt[i]), true, false); 615 } 616

··· 82 int *ptwrite; 83 pfn_t pfn; 84 u64 *sptep; 85 + gpa_t pte_gpa; 86 }; 87 88 static gfn_t gpte_to_gfn(pt_element_t gpte) ··· 222 if (ret) 223 goto walk; 224 pte |= PT_DIRTY_MASK; 225 + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); 226 walker->ptes[walker->level - 1] = pte; 227 } 228 ··· 274 return; 275 kvm_get_pfn(pfn); 276 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 277 + gpte & PT_DIRTY_MASK, NULL, largepage, 278 + gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), 279 pfn, true); 280 } 281 ··· 301 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, 302 sw->user_fault, sw->write_fault, 303 gw->ptes[gw->level-1] & PT_DIRTY_MASK, 304 + sw->ptwrite, sw->largepage, 305 + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, 306 + gw->gfn, sw->pfn, false); 307 sw->sptep = sptep; 308 return 1; 309 } ··· 466 struct kvm_vcpu *vcpu, u64 addr, 467 u64 *sptep, int level) 468 { 469 + struct shadow_walker *sw = 470 + container_of(_sw, struct shadow_walker, walker); 471 472 + /* FIXME: properly handle invlpg on large guest pages */ 473 + if (level == PT_PAGE_TABLE_LEVEL || 474 + ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { 475 + struct kvm_mmu_page *sp = page_header(__pa(sptep)); 476 + 477 + sw->pte_gpa = (sp->gfn << PAGE_SHIFT); 478 + sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); 479 + 480 + if (is_shadow_present_pte(*sptep)) { 481 rmap_remove(vcpu->kvm, sptep); 482 + if (is_large_pte(*sptep)) 483 + --vcpu->kvm->stat.lpages; 484 + } 485 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 486 return 1; 487 } ··· 480 481 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 482 { 483 + pt_element_t gpte; 484 struct shadow_walker walker = { 485 .walker = { .entry = FNAME(shadow_invlpg_entry), }, 486 + .pte_gpa = -1, 487 }; 488 489 + spin_lock(&vcpu->kvm->mmu_lock); 490 walk_shadow(&walker.walker, vcpu, gva); 491 + spin_unlock(&vcpu->kvm->mmu_lock); 492 + if (walker.pte_gpa == -1) 493 + return; 494 + if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, 495 + sizeof(pt_element_t))) 496 + return; 497 + if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { 498 + if (mmu_topup_memory_caches(vcpu)) 499 + return; 500 + kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, 501 + sizeof(pt_element_t), 0); 502 + } 503 } 504 505 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) ··· 580 nr_present++; 581 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 582 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 583 + is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, 584 spte_to_pfn(sp->spt[i]), true, false); 585 } 586

+29 -19

arch/x86/kvm/svm.c

··· 28 29 #include <asm/desc.h> 30 31 #define __ex(x) __kvm_handle_fault_on_reboot(x) 32 33 MODULE_AUTHOR("Qumranet"); ··· 247 248 static int has_svm(void) 249 { 250 - uint32_t eax, ebx, ecx, edx; 251 252 - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { 253 - printk(KERN_INFO "has_svm: not amd\n"); 254 return 0; 255 } 256 257 - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); 258 - if (eax < SVM_CPUID_FUNC) { 259 - printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); 260 - return 0; 261 - } 262 - 263 - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); 264 - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { 265 - printk(KERN_DEBUG "has_svm: svm not available\n"); 266 - return 0; 267 - } 268 return 1; 269 } 270 271 static void svm_hardware_disable(void *garbage) 272 { 273 - uint64_t efer; 274 - 275 - wrmsrl(MSR_VM_HSAVE_PA, 0); 276 - rdmsrl(MSR_EFER, efer); 277 - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 278 } 279 280 static void svm_hardware_enable(void *garbage) ··· 759 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 762 var->unusable = !var->present; 763 } 764 ··· 1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1104 1105 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1106 } 1107 ··· 1916 #endif 1917 } 1918 1919 static struct kvm_x86_ops svm_x86_ops = { 1920 .cpu_has_kvm_support = has_svm, 1921 .disabled_by_bios = is_disabled, ··· 1976 1977 .set_tss_addr = svm_set_tss_addr, 1978 .get_tdp_level = get_npt_level, 1979 }; 1980 1981 static int __init svm_init(void)

··· 28 29 #include <asm/desc.h> 30 31 + #include <asm/virtext.h> 32 + 33 #define __ex(x) __kvm_handle_fault_on_reboot(x) 34 35 MODULE_AUTHOR("Qumranet"); ··· 245 246 static int has_svm(void) 247 { 248 + const char *msg; 249 250 + if (!cpu_has_svm(&msg)) { 251 + printk(KERN_INFO "has_svn: %s\n", msg); 252 return 0; 253 } 254 255 return 1; 256 } 257 258 static void svm_hardware_disable(void *garbage) 259 { 260 + cpu_svm_disable(); 261 } 262 263 static void svm_hardware_enable(void *garbage) ··· 772 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 773 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 774 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 775 + 776 + /* 777 + * SVM always stores 0 for the 'G' bit in the CS selector in 778 + * the VMCB on a VMEXIT. This hurts cross-vendor migration: 779 + * Intel's VMENTRY has a check on the 'G' bit. 780 + */ 781 + if (seg == VCPU_SREG_CS) 782 + var->g = s->limit > 0xfffff; 783 + 784 + /* 785 + * Work around a bug where the busy flag in the tr selector 786 + * isn't exposed 787 + */ 788 + if (seg == VCPU_SREG_TR) 789 + var->type |= 0x2; 790 + 791 var->unusable = !var->present; 792 } 793 ··· 1099 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1100 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; 1101 1102 + skip_emulated_instruction(&svm->vcpu); 1103 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1104 } 1105 ··· 1912 #endif 1913 } 1914 1915 + static int svm_get_mt_mask_shift(void) 1916 + { 1917 + return 0; 1918 + } 1919 + 1920 static struct kvm_x86_ops svm_x86_ops = { 1921 .cpu_has_kvm_support = has_svm, 1922 .disabled_by_bios = is_disabled, ··· 1967 1968 .set_tss_addr = svm_set_tss_addr, 1969 .get_tdp_level = get_npt_level, 1970 + .get_mt_mask_shift = svm_get_mt_mask_shift, 1971 }; 1972 1973 static int __init svm_init(void)

arch/x86/kvm/svm.h arch/x86/include/asm/svm.h

+236 -114

arch/x86/kvm/vmx.c

··· 16 */ 17 18 #include "irq.h" 19 - #include "vmx.h" 20 #include "mmu.h" 21 22 #include <linux/kvm_host.h> ··· 30 31 #include <asm/io.h> 32 #include <asm/desc.h> 33 34 #define __ex(x) __kvm_handle_fault_on_reboot(x) 35 ··· 91 } rmode; 92 int vpid; 93 bool emulation_required; 94 }; 95 96 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) ··· 128 u32 vmentry_ctrl; 129 } vmcs_config; 130 131 - struct vmx_capability { 132 u32 ept; 133 u32 vpid; 134 } vmx_capability; ··· 963 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 964 965 break; 966 default: 967 vmx_load_host_state(vmx); 968 msr = find_msr_entry(vmx, msr_index); ··· 1045 1046 static __init int cpu_has_kvm_support(void) 1047 { 1048 - unsigned long ecx = cpuid_ecx(1); 1049 - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ 1050 } 1051 1052 static __init int vmx_disabled_by_bios(void) ··· 1091 __vcpu_clear(vmx); 1092 } 1093 1094 static void hardware_disable(void *garbage) 1095 { 1096 vmclear_local_vcpus(); 1097 - asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1098 - write_cr4(read_cr4() & ~X86_CR4_VMXE); 1099 } 1100 1101 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ··· 1197 #ifdef CONFIG_X86_64 1198 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1199 #endif 1200 - opt = 0; 1201 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1202 &_vmexit_control) < 0) 1203 return -EIO; 1204 1205 - min = opt = 0; 1206 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1207 &_vmentry_control) < 0) 1208 return -EIO; ··· 2109 */ 2110 static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2111 { 2112 - u32 host_sysenter_cs; 2113 u32 junk; 2114 unsigned long a; 2115 struct descriptor_table dt; 2116 int i; ··· 2199 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2200 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2201 2202 for (i = 0; i < NR_VMX_MSR; ++i) { 2203 u32 index = vmx_msr_index[i]; 2204 u32 data_low, data_high; ··· 2266 } 2267 2268 vmx->vcpu.arch.rmode.active = 0; 2269 2270 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2271 kvm_set_cr8(&vmx->vcpu, 0); ··· 2374 return ret; 2375 } 2376 2377 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2378 { 2379 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 2420 2421 static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2422 { 2423 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2424 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2425 } 2426 2427 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) ··· 2480 kvm_queue_interrupt(vcpu, irq); 2481 } 2482 2483 - 2484 static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2485 struct kvm_run *kvm_run) 2486 { 2487 - u32 cpu_based_vm_exec_control; 2488 2489 - vcpu->arch.interrupt_window_open = 2490 - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 2491 - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); 2492 2493 - if (vcpu->arch.interrupt_window_open && 2494 - vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2495 - kvm_do_inject_irq(vcpu); 2496 2497 - if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) 2498 - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2499 - 2500 - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2501 if (!vcpu->arch.interrupt_window_open && 2502 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2503 - /* 2504 - * Interrupts blocked. Wait for unblock. 2505 - */ 2506 - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 2507 - else 2508 - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; 2509 - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2510 } 2511 2512 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2513 { 2514 int ret; 2515 struct kvm_userspace_memory_region tss_mem = { 2516 - .slot = 8, 2517 .guest_phys_addr = addr, 2518 .memory_size = PAGE_SIZE * 3, 2519 .flags = 0, ··· 2607 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2608 } 2609 2610 - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ 2611 return 1; /* already handled by vmx_vcpu_run() */ 2612 2613 if (is_no_device(intr_info)) { ··· 2696 rep = (exit_qualification & 32) != 0; 2697 port = exit_qualification >> 16; 2698 2699 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2700 } 2701 ··· 2883 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2884 2885 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2886 2887 /* 2888 * If the user space waits to inject interrupts, exit as soon as ··· 2892 if (kvm_run->request_interrupt_window && 2893 !vcpu->arch.irq_summary) { 2894 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2895 - ++vcpu->stat.irq_window_exits; 2896 return 0; 2897 } 2898 return 1; ··· 2948 2949 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2950 { 2951 unsigned long exit_qualification; 2952 u16 tss_selector; 2953 int reason; ··· 2956 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2957 2958 reason = (u32)exit_qualification >> 30; 2959 tss_selector = exit_qualification; 2960 2961 return kvm_task_switch(vcpu, tss_selector, reason); ··· 3053 while (!guest_state_valid(vcpu)) { 3054 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3055 3056 - switch (err) { 3057 - case EMULATE_DONE: 3058 - break; 3059 - case EMULATE_DO_MMIO: 3060 - kvm_report_emulation_failure(vcpu, "mmio"); 3061 - /* TODO: Handle MMIO */ 3062 - return; 3063 - default: 3064 - kvm_report_emulation_failure(vcpu, "emulation failure"); 3065 - return; 3066 } 3067 3068 if (signal_pending(current)) ··· 3070 local_irq_disable(); 3071 preempt_disable(); 3072 3073 - /* Guest state should be valid now, no more emulation should be needed */ 3074 - vmx->emulation_required = 0; 3075 } 3076 3077 /* ··· 3120 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 3121 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 3122 3123 /* Access CR3 don't cause VMExit in paging mode, so we need 3124 * to sync with guest real CR3. */ 3125 if (vm_need_ept() && is_paging(vcpu)) { ··· 3141 3142 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3143 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3144 - exit_reason != EXIT_REASON_EPT_VIOLATION)) 3145 - printk(KERN_WARNING "%s: unexpected, valid vectoring info and " 3146 - "exit reason is 0x%x\n", __func__, exit_reason); 3147 if (exit_reason < kvm_vmx_max_exit_handlers 3148 && kvm_vmx_exit_handlers[exit_reason]) 3149 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); ··· 3194 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3195 } 3196 3197 - static void enable_irq_window(struct kvm_vcpu *vcpu) 3198 - { 3199 - u32 cpu_based_vm_exec_control; 3200 - 3201 - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 3202 - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 3203 - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3204 - } 3205 - 3206 - static void enable_nmi_window(struct kvm_vcpu *vcpu) 3207 - { 3208 - u32 cpu_based_vm_exec_control; 3209 - 3210 - if (!cpu_has_virtual_nmis()) 3211 - return; 3212 - 3213 - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 3214 - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 3215 - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 3216 - } 3217 - 3218 - static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) 3219 - { 3220 - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 3221 - return !(guest_intr & (GUEST_INTR_STATE_NMI | 3222 - GUEST_INTR_STATE_MOV_SS | 3223 - GUEST_INTR_STATE_STI)); 3224 - } 3225 - 3226 - static int vmx_irq_enabled(struct kvm_vcpu *vcpu) 3227 - { 3228 - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 3229 - return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | 3230 - GUEST_INTR_STATE_STI)) && 3231 - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); 3232 - } 3233 - 3234 - static void enable_intr_window(struct kvm_vcpu *vcpu) 3235 - { 3236 - if (vcpu->arch.nmi_pending) 3237 - enable_nmi_window(vcpu); 3238 - else if (kvm_cpu_has_interrupt(vcpu)) 3239 - enable_irq_window(vcpu); 3240 - } 3241 - 3242 static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3243 { 3244 u32 exit_intr_info; ··· 3216 if (unblock_nmi && vector != DF_VECTOR) 3217 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3218 GUEST_INTR_STATE_NMI); 3219 - } 3220 3221 idt_vectoring_info = vmx->idt_vectoring_info; 3222 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; ··· 3256 { 3257 update_tpr_threshold(vcpu); 3258 3259 - if (cpu_has_virtual_nmis()) { 3260 - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3261 - if (vcpu->arch.interrupt.pending) { 3262 - enable_nmi_window(vcpu); 3263 - } else if (vmx_nmi_enabled(vcpu)) { 3264 - vcpu->arch.nmi_pending = false; 3265 - vcpu->arch.nmi_injected = true; 3266 - } else { 3267 - enable_intr_window(vcpu); 3268 - return; 3269 - } 3270 - } 3271 - if (vcpu->arch.nmi_injected) { 3272 - vmx_inject_nmi(vcpu); 3273 - enable_intr_window(vcpu); 3274 return; 3275 } 3276 } 3277 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3278 - if (vmx_irq_enabled(vcpu)) 3279 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3280 else 3281 enable_irq_window(vcpu); ··· 3286 if (vcpu->arch.interrupt.pending) { 3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3289 } 3290 } 3291 ··· 3326 { 3327 struct vcpu_vmx *vmx = to_vmx(vcpu); 3328 u32 intr_info; 3329 3330 /* Handle invalid guest state instead of entering VMX */ 3331 if (vmx->emulation_required && emulate_invalid_guest_state) { ··· 3445 if (vmx->rmode.irq.pending) 3446 fixup_rmode_irq(vmx); 3447 3448 - vcpu->arch.interrupt_window_open = 3449 - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3450 - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; 3451 3452 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3453 vmx->launched = 1; ··· 3453 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3454 3455 /* We need to handle NMIs before interrupts are enabled */ 3456 - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && 3457 (intr_info & INTR_INFO_VALID_MASK)) { 3458 KVMTRACE_0D(NMI, vcpu, handler); 3459 asm("int $2"); ··· 3571 return VMX_EPT_DEFAULT_GAW + 1; 3572 } 3573 3574 static struct kvm_x86_ops vmx_x86_ops = { 3575 .cpu_has_kvm_support = cpu_has_kvm_support, 3576 .disabled_by_bios = vmx_disabled_by_bios, ··· 3631 3632 .set_tss_addr = vmx_set_tss_addr, 3633 .get_tdp_level = get_ept_level, 3634 }; 3635 3636 static int __init vmx_init(void) ··· 3688 bypass_guest_pf = 0; 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK | 3691 - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | 3692 VMX_EPT_IGMT_BIT); 3693 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3694 - VMX_EPT_EXECUTABLE_MASK); 3695 kvm_enable_tdp(); 3696 } else 3697 kvm_disable_tdp();

··· 16 */ 17 18 #include "irq.h" 19 #include "mmu.h" 20 21 #include <linux/kvm_host.h> ··· 31 32 #include <asm/io.h> 33 #include <asm/desc.h> 34 + #include <asm/vmx.h> 35 + #include <asm/virtext.h> 36 37 #define __ex(x) __kvm_handle_fault_on_reboot(x) 38 ··· 90 } rmode; 91 int vpid; 92 bool emulation_required; 93 + 94 + /* Support for vnmi-less CPUs */ 95 + int soft_vnmi_blocked; 96 + ktime_t entry_time; 97 + s64 vnmi_blocked_time; 98 }; 99 100 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) ··· 122 u32 vmentry_ctrl; 123 } vmcs_config; 124 125 + static struct vmx_capability { 126 u32 ept; 127 u32 vpid; 128 } vmx_capability; ··· 957 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); 958 959 break; 960 + case MSR_IA32_CR_PAT: 961 + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 962 + vmcs_write64(GUEST_IA32_PAT, data); 963 + vcpu->arch.pat = data; 964 + break; 965 + } 966 + /* Otherwise falls through to kvm_set_msr_common */ 967 default: 968 vmx_load_host_state(vmx); 969 msr = find_msr_entry(vmx, msr_index); ··· 1032 1033 static __init int cpu_has_kvm_support(void) 1034 { 1035 + return cpu_has_vmx(); 1036 } 1037 1038 static __init int vmx_disabled_by_bios(void) ··· 1079 __vcpu_clear(vmx); 1080 } 1081 1082 + 1083 + /* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() 1084 + * tricks. 1085 + */ 1086 + static void kvm_cpu_vmxoff(void) 1087 + { 1088 + asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 1089 + write_cr4(read_cr4() & ~X86_CR4_VMXE); 1090 + } 1091 + 1092 static void hardware_disable(void *garbage) 1093 { 1094 vmclear_local_vcpus(); 1095 + kvm_cpu_vmxoff(); 1096 } 1097 1098 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ··· 1176 #ifdef CONFIG_X86_64 1177 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 1178 #endif 1179 + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; 1180 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 1181 &_vmexit_control) < 0) 1182 return -EIO; 1183 1184 + min = 0; 1185 + opt = VM_ENTRY_LOAD_IA32_PAT; 1186 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 1187 &_vmentry_control) < 0) 1188 return -EIO; ··· 2087 */ 2088 static int vmx_vcpu_setup(struct vcpu_vmx *vmx) 2089 { 2090 + u32 host_sysenter_cs, msr_low, msr_high; 2091 u32 junk; 2092 + u64 host_pat; 2093 unsigned long a; 2094 struct descriptor_table dt; 2095 int i; ··· 2176 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 2177 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 2178 2179 + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { 2180 + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); 2181 + host_pat = msr_low | ((u64) msr_high << 32); 2182 + vmcs_write64(HOST_IA32_PAT, host_pat); 2183 + } 2184 + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { 2185 + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); 2186 + host_pat = msr_low | ((u64) msr_high << 32); 2187 + /* Write the default value follow host pat */ 2188 + vmcs_write64(GUEST_IA32_PAT, host_pat); 2189 + /* Keep arch.pat sync with GUEST_IA32_PAT */ 2190 + vmx->vcpu.arch.pat = host_pat; 2191 + } 2192 + 2193 for (i = 0; i < NR_VMX_MSR; ++i) { 2194 u32 index = vmx_msr_index[i]; 2195 u32 data_low, data_high; ··· 2229 } 2230 2231 vmx->vcpu.arch.rmode.active = 0; 2232 + 2233 + vmx->soft_vnmi_blocked = 0; 2234 2235 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 2236 kvm_set_cr8(&vmx->vcpu, 0); ··· 2335 return ret; 2336 } 2337 2338 + static void enable_irq_window(struct kvm_vcpu *vcpu) 2339 + { 2340 + u32 cpu_based_vm_exec_control; 2341 + 2342 + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2343 + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 2344 + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2345 + } 2346 + 2347 + static void enable_nmi_window(struct kvm_vcpu *vcpu) 2348 + { 2349 + u32 cpu_based_vm_exec_control; 2350 + 2351 + if (!cpu_has_virtual_nmis()) { 2352 + enable_irq_window(vcpu); 2353 + return; 2354 + } 2355 + 2356 + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 2357 + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 2358 + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2359 + } 2360 + 2361 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) 2362 { 2363 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 2358 2359 static void vmx_inject_nmi(struct kvm_vcpu *vcpu) 2360 { 2361 + struct vcpu_vmx *vmx = to_vmx(vcpu); 2362 + 2363 + if (!cpu_has_virtual_nmis()) { 2364 + /* 2365 + * Tracking the NMI-blocked state in software is built upon 2366 + * finding the next open IRQ window. This, in turn, depends on 2367 + * well-behaving guests: They have to keep IRQs disabled at 2368 + * least as long as the NMI handler runs. Otherwise we may 2369 + * cause NMI nesting, maybe breaking the guest. But as this is 2370 + * highly unlikely, we can live with the residual risk. 2371 + */ 2372 + vmx->soft_vnmi_blocked = 1; 2373 + vmx->vnmi_blocked_time = 0; 2374 + } 2375 + 2376 + ++vcpu->stat.nmi_injections; 2377 + if (vcpu->arch.rmode.active) { 2378 + vmx->rmode.irq.pending = true; 2379 + vmx->rmode.irq.vector = NMI_VECTOR; 2380 + vmx->rmode.irq.rip = kvm_rip_read(vcpu); 2381 + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2382 + NMI_VECTOR | INTR_TYPE_SOFT_INTR | 2383 + INTR_INFO_VALID_MASK); 2384 + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 2385 + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); 2386 + return; 2387 + } 2388 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 2389 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); 2390 + } 2391 + 2392 + static void vmx_update_window_states(struct kvm_vcpu *vcpu) 2393 + { 2394 + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 2395 + 2396 + vcpu->arch.nmi_window_open = 2397 + !(guest_intr & (GUEST_INTR_STATE_STI | 2398 + GUEST_INTR_STATE_MOV_SS | 2399 + GUEST_INTR_STATE_NMI)); 2400 + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) 2401 + vcpu->arch.nmi_window_open = 0; 2402 + 2403 + vcpu->arch.interrupt_window_open = 2404 + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && 2405 + !(guest_intr & (GUEST_INTR_STATE_STI | 2406 + GUEST_INTR_STATE_MOV_SS))); 2407 } 2408 2409 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) ··· 2374 kvm_queue_interrupt(vcpu, irq); 2375 } 2376 2377 static void do_interrupt_requests(struct kvm_vcpu *vcpu, 2378 struct kvm_run *kvm_run) 2379 { 2380 + vmx_update_window_states(vcpu); 2381 2382 + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 2383 + if (vcpu->arch.interrupt.pending) { 2384 + enable_nmi_window(vcpu); 2385 + } else if (vcpu->arch.nmi_window_open) { 2386 + vcpu->arch.nmi_pending = false; 2387 + vcpu->arch.nmi_injected = true; 2388 + } else { 2389 + enable_nmi_window(vcpu); 2390 + return; 2391 + } 2392 + } 2393 + if (vcpu->arch.nmi_injected) { 2394 + vmx_inject_nmi(vcpu); 2395 + if (vcpu->arch.nmi_pending) 2396 + enable_nmi_window(vcpu); 2397 + else if (vcpu->arch.irq_summary 2398 + || kvm_run->request_interrupt_window) 2399 + enable_irq_window(vcpu); 2400 + return; 2401 + } 2402 2403 + if (vcpu->arch.interrupt_window_open) { 2404 + if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) 2405 + kvm_do_inject_irq(vcpu); 2406 2407 + if (vcpu->arch.interrupt.pending) 2408 + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 2409 + } 2410 if (!vcpu->arch.interrupt_window_open && 2411 (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) 2412 + enable_irq_window(vcpu); 2413 } 2414 2415 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) 2416 { 2417 int ret; 2418 struct kvm_userspace_memory_region tss_mem = { 2419 + .slot = TSS_PRIVATE_MEMSLOT, 2420 .guest_phys_addr = addr, 2421 .memory_size = PAGE_SIZE * 3, 2422 .flags = 0, ··· 2492 set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); 2493 } 2494 2495 + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) 2496 return 1; /* already handled by vmx_vcpu_run() */ 2497 2498 if (is_no_device(intr_info)) { ··· 2581 rep = (exit_qualification & 32) != 0; 2582 port = exit_qualification >> 16; 2583 2584 + skip_emulated_instruction(vcpu); 2585 return kvm_emulate_pio(vcpu, kvm_run, in, size, port); 2586 } 2587 ··· 2767 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2768 2769 KVMTRACE_0D(PEND_INTR, vcpu, handler); 2770 + ++vcpu->stat.irq_window_exits; 2771 2772 /* 2773 * If the user space waits to inject interrupts, exit as soon as ··· 2775 if (kvm_run->request_interrupt_window && 2776 !vcpu->arch.irq_summary) { 2777 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 2778 return 0; 2779 } 2780 return 1; ··· 2832 2833 static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2834 { 2835 + struct vcpu_vmx *vmx = to_vmx(vcpu); 2836 unsigned long exit_qualification; 2837 u16 tss_selector; 2838 int reason; ··· 2839 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2840 2841 reason = (u32)exit_qualification >> 30; 2842 + if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && 2843 + (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && 2844 + (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) 2845 + == INTR_TYPE_NMI_INTR) { 2846 + vcpu->arch.nmi_injected = false; 2847 + if (cpu_has_virtual_nmis()) 2848 + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 2849 + GUEST_INTR_STATE_NMI); 2850 + } 2851 tss_selector = exit_qualification; 2852 2853 return kvm_task_switch(vcpu, tss_selector, reason); ··· 2927 while (!guest_state_valid(vcpu)) { 2928 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2929 2930 + if (err == EMULATE_DO_MMIO) 2931 + break; 2932 + 2933 + if (err != EMULATE_DONE) { 2934 + kvm_report_emulation_failure(vcpu, "emulation failure"); 2935 + return; 2936 } 2937 2938 if (signal_pending(current)) ··· 2948 local_irq_disable(); 2949 preempt_disable(); 2950 2951 + /* Guest state should be valid now except if we need to 2952 + * emulate an MMIO */ 2953 + if (guest_state_valid(vcpu)) 2954 + vmx->emulation_required = 0; 2955 } 2956 2957 /* ··· 2996 KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), 2997 (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); 2998 2999 + /* If we need to emulate an MMIO from handle_invalid_guest_state 3000 + * we just return 0 */ 3001 + if (vmx->emulation_required && emulate_invalid_guest_state) 3002 + return 0; 3003 + 3004 /* Access CR3 don't cause VMExit in paging mode, so we need 3005 * to sync with guest real CR3. */ 3006 if (vm_need_ept() && is_paging(vcpu)) { ··· 3012 3013 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && 3014 (exit_reason != EXIT_REASON_EXCEPTION_NMI && 3015 + exit_reason != EXIT_REASON_EPT_VIOLATION && 3016 + exit_reason != EXIT_REASON_TASK_SWITCH)) 3017 + printk(KERN_WARNING "%s: unexpected, valid vectoring info " 3018 + "(0x%x) and exit reason is 0x%x\n", 3019 + __func__, vectoring_info, exit_reason); 3020 + 3021 + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { 3022 + if (vcpu->arch.interrupt_window_open) { 3023 + vmx->soft_vnmi_blocked = 0; 3024 + vcpu->arch.nmi_window_open = 1; 3025 + } else if (vmx->vnmi_blocked_time > 1000000000LL && 3026 + vcpu->arch.nmi_pending) { 3027 + /* 3028 + * This CPU don't support us in finding the end of an 3029 + * NMI-blocked window if the guest runs with IRQs 3030 + * disabled. So we pull the trigger after 1 s of 3031 + * futile waiting, but inform the user about this. 3032 + */ 3033 + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " 3034 + "state on VCPU %d after 1 s timeout\n", 3035 + __func__, vcpu->vcpu_id); 3036 + vmx->soft_vnmi_blocked = 0; 3037 + vmx->vcpu.arch.nmi_window_open = 1; 3038 + } 3039 + } 3040 + 3041 if (exit_reason < kvm_vmx_max_exit_handlers 3042 && kvm_vmx_exit_handlers[exit_reason]) 3043 return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); ··· 3042 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); 3043 } 3044 3045 static void vmx_complete_interrupts(struct vcpu_vmx *vmx) 3046 { 3047 u32 exit_intr_info; ··· 3109 if (unblock_nmi && vector != DF_VECTOR) 3110 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 3111 GUEST_INTR_STATE_NMI); 3112 + } else if (unlikely(vmx->soft_vnmi_blocked)) 3113 + vmx->vnmi_blocked_time += 3114 + ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); 3115 3116 idt_vectoring_info = vmx->idt_vectoring_info; 3117 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; ··· 3147 { 3148 update_tpr_threshold(vcpu); 3149 3150 + vmx_update_window_states(vcpu); 3151 + 3152 + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3153 + if (vcpu->arch.interrupt.pending) { 3154 + enable_nmi_window(vcpu); 3155 + } else if (vcpu->arch.nmi_window_open) { 3156 + vcpu->arch.nmi_pending = false; 3157 + vcpu->arch.nmi_injected = true; 3158 + } else { 3159 + enable_nmi_window(vcpu); 3160 return; 3161 } 3162 } 3163 + if (vcpu->arch.nmi_injected) { 3164 + vmx_inject_nmi(vcpu); 3165 + if (vcpu->arch.nmi_pending) 3166 + enable_nmi_window(vcpu); 3167 + else if (kvm_cpu_has_interrupt(vcpu)) 3168 + enable_irq_window(vcpu); 3169 + return; 3170 + } 3171 if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { 3172 + if (vcpu->arch.interrupt_window_open) 3173 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); 3174 else 3175 enable_irq_window(vcpu); ··· 3174 if (vcpu->arch.interrupt.pending) { 3175 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3176 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); 3177 + if (kvm_cpu_has_interrupt(vcpu)) 3178 + enable_irq_window(vcpu); 3179 } 3180 } 3181 ··· 3212 { 3213 struct vcpu_vmx *vmx = to_vmx(vcpu); 3214 u32 intr_info; 3215 + 3216 + /* Record the guest's net vcpu time for enforced NMI injections. */ 3217 + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) 3218 + vmx->entry_time = ktime_get(); 3219 3220 /* Handle invalid guest state instead of entering VMX */ 3221 if (vmx->emulation_required && emulate_invalid_guest_state) { ··· 3327 if (vmx->rmode.irq.pending) 3328 fixup_rmode_irq(vmx); 3329 3330 + vmx_update_window_states(vcpu); 3331 3332 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3333 vmx->launched = 1; ··· 3337 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3338 3339 /* We need to handle NMIs before interrupts are enabled */ 3340 + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && 3341 (intr_info & INTR_INFO_VALID_MASK)) { 3342 KVMTRACE_0D(NMI, vcpu, handler); 3343 asm("int $2"); ··· 3455 return VMX_EPT_DEFAULT_GAW + 1; 3456 } 3457 3458 + static int vmx_get_mt_mask_shift(void) 3459 + { 3460 + return VMX_EPT_MT_EPTE_SHIFT; 3461 + } 3462 + 3463 static struct kvm_x86_ops vmx_x86_ops = { 3464 .cpu_has_kvm_support = cpu_has_kvm_support, 3465 .disabled_by_bios = vmx_disabled_by_bios, ··· 3510 3511 .set_tss_addr = vmx_set_tss_addr, 3512 .get_tdp_level = get_ept_level, 3513 + .get_mt_mask_shift = vmx_get_mt_mask_shift, 3514 }; 3515 3516 static int __init vmx_init(void) ··· 3566 bypass_guest_pf = 0; 3567 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3568 VMX_EPT_WRITABLE_MASK | 3569 VMX_EPT_IGMT_BIT); 3570 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3571 + VMX_EPT_EXECUTABLE_MASK, 3572 + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3573 kvm_enable_tdp(); 3574 } else 3575 kvm_disable_tdp();

+25 -2

arch/x86/kvm/vmx.h arch/x86/include/asm/vmx.h

··· 63 64 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 65 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 66 67 #define VM_ENTRY_IA32E_MODE 0x00000200 68 #define VM_ENTRY_SMM 0x00000400 69 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 70 71 /* VMCS Encodings */ 72 enum vmcs_field { ··· 115 VMCS_LINK_POINTER_HIGH = 0x00002801, 116 GUEST_IA32_DEBUGCTL = 0x00002802, 117 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 118 GUEST_PDPTR0 = 0x0000280a, 119 GUEST_PDPTR0_HIGH = 0x0000280b, 120 GUEST_PDPTR1 = 0x0000280c, ··· 125 GUEST_PDPTR2_HIGH = 0x0000280f, 126 GUEST_PDPTR3 = 0x00002810, 127 GUEST_PDPTR3_HIGH = 0x00002811, 128 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 129 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 130 EXCEPTION_BITMAP = 0x00004004, ··· 338 339 #define AR_RESERVD_MASK 0xfffe0f00 340 341 - #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 342 - #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 343 344 #define VMX_NR_VPIDS (1 << 16) 345 #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 ··· 363 #define VMX_EPT_IGMT_BIT (1ull << 6) 364 365 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 366 367 #endif

··· 63 64 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 65 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 66 + #define VM_EXIT_SAVE_IA32_PAT 0x00040000 67 + #define VM_EXIT_LOAD_IA32_PAT 0x00080000 68 69 #define VM_ENTRY_IA32E_MODE 0x00000200 70 #define VM_ENTRY_SMM 0x00000400 71 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 72 + #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 73 74 /* VMCS Encodings */ 75 enum vmcs_field { ··· 112 VMCS_LINK_POINTER_HIGH = 0x00002801, 113 GUEST_IA32_DEBUGCTL = 0x00002802, 114 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 115 + GUEST_IA32_PAT = 0x00002804, 116 + GUEST_IA32_PAT_HIGH = 0x00002805, 117 GUEST_PDPTR0 = 0x0000280a, 118 GUEST_PDPTR0_HIGH = 0x0000280b, 119 GUEST_PDPTR1 = 0x0000280c, ··· 120 GUEST_PDPTR2_HIGH = 0x0000280f, 121 GUEST_PDPTR3 = 0x00002810, 122 GUEST_PDPTR3_HIGH = 0x00002811, 123 + HOST_IA32_PAT = 0x00002c00, 124 + HOST_IA32_PAT_HIGH = 0x00002c01, 125 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 126 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 127 EXCEPTION_BITMAP = 0x00004004, ··· 331 332 #define AR_RESERVD_MASK 0xfffe0f00 333 334 + #define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) 335 + #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) 336 + #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) 337 338 #define VMX_NR_VPIDS (1 << 16) 339 #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 ··· 355 #define VMX_EPT_IGMT_BIT (1ull << 6) 356 357 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 358 + 359 + 360 + #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" 361 + #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" 362 + #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" 363 + #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" 364 + #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" 365 + #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" 366 + #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" 367 + #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" 368 + #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" 369 + #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" 370 + #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 371 + 372 + 373 374 #endif

+99 -18

arch/x86/kvm/x86.c

··· 39 #include <asm/uaccess.h> 40 #include <asm/msr.h> 41 #include <asm/desc.h> 42 43 #define MAX_IO_MSRS 256 44 #define CR0_RESERVED_BITS \ ··· 87 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 88 { "hypercalls", VCPU_STAT(hypercalls) }, 89 { "request_irq", VCPU_STAT(request_irq_exits) }, 90 { "irq_exits", VCPU_STAT(irq_exits) }, 91 { "host_state_reload", VCPU_STAT(host_state_reload) }, 92 { "efer_reload", VCPU_STAT(efer_reload) }, ··· 95 { "insn_emulation", VCPU_STAT(insn_emulation) }, 96 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 97 { "irq_injections", VCPU_STAT(irq_injections) }, 98 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 99 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 100 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, ··· 104 { "mmu_recycled", VM_STAT(mmu_recycled) }, 105 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 106 { "mmu_unsync", VM_STAT(mmu_unsync) }, 107 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 108 { "largepages", VM_STAT(lpages) }, 109 { NULL } ··· 316 kvm_x86_ops->set_cr0(vcpu, cr0); 317 vcpu->arch.cr0 = cr0; 318 319 kvm_mmu_reset_context(vcpu); 320 return; 321 } ··· 360 } 361 kvm_x86_ops->set_cr4(vcpu, cr4); 362 vcpu->arch.cr4 = cr4; 363 kvm_mmu_reset_context(vcpu); 364 } 365 EXPORT_SYMBOL_GPL(kvm_set_cr4); ··· 455 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 456 #endif 457 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 458 - MSR_IA32_PERF_STATUS, 459 }; 460 461 static unsigned num_msrs_to_save; ··· 654 655 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 656 { 657 if (!msr_mtrr_valid(msr)) 658 return 1; 659 660 - vcpu->arch.mtrr[msr - 0x200] = data; 661 return 0; 662 } 663 ··· 781 782 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 783 { 784 if (!msr_mtrr_valid(msr)) 785 return 1; 786 787 - *pdata = vcpu->arch.mtrr[msr - 0x200]; 788 return 0; 789 } 790 ··· 964 case KVM_CAP_IRQCHIP: 965 case KVM_CAP_HLT: 966 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 - case KVM_CAP_USER_MEMORY: 968 case KVM_CAP_SET_TSS_ADDR: 969 case KVM_CAP_EXT_CPUID: 970 case KVM_CAP_CLOCKSOURCE: ··· 1248 int t, times = entry->eax & 0xff; 1249 1250 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1251 for (t = 1; t < times && *nent < maxnent; ++t) { 1252 do_cpuid_1_ent(&entry[t], function, 0); 1253 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; ··· 1279 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1280 /* read more entries until level_type is zero */ 1281 for (i = 1; *nent < maxnent; ++i) { 1282 - level_type = entry[i - 1].ecx & 0xff; 1283 if (!level_type) 1284 break; 1285 do_cpuid_1_ent(&entry[i], function, i); ··· 1379 return 0; 1380 } 1381 1382 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 1383 struct kvm_tpr_access_ctl *tac) 1384 { ··· 1442 if (copy_from_user(&irq, argp, sizeof irq)) 1443 goto out; 1444 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 1445 if (r) 1446 goto out; 1447 r = 0; ··· 2045 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 2046 if (ret < 0) 2047 return 0; 2048 - kvm_mmu_pte_write(vcpu, gpa, val, bytes); 2049 return 1; 2050 } 2051 ··· 2481 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2482 memcpy(vcpu->arch.pio_data, &val, 4); 2483 2484 - kvm_x86_ops->skip_emulated_instruction(vcpu); 2485 - 2486 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2487 if (pio_dev) { 2488 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); ··· 2616 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2617 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2618 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2619 - PT_DIRTY_MASK, PT64_NX_MASK, 0); 2620 return 0; 2621 2622 out: ··· 2804 2805 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 2806 /* when no next entry is found, the current entry[i] is reselected */ 2807 - for (j = i + 1; j == i; j = (j + 1) % nent) { 2808 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 2809 if (ej->function == e->function) { 2810 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; ··· 3048 pr_debug("vcpu %d received sipi with vector # %x\n", 3049 vcpu->vcpu_id, vcpu->arch.sipi_vector); 3050 kvm_lapic_reset(vcpu); 3051 - r = kvm_x86_ops->vcpu_reset(vcpu); 3052 if (r) 3053 return r; 3054 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ··· 3350 kvm_desct->padding = 0; 3351 } 3352 3353 - static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, 3354 - u16 selector, 3355 - struct descriptor_table *dtable) 3356 { 3357 if (selector & 1 << 2) { 3358 struct kvm_segment kvm_seg; ··· 3377 struct descriptor_table dtable; 3378 u16 index = selector >> 3; 3379 3380 - get_segment_descritptor_dtable(vcpu, selector, &dtable); 3381 3382 if (dtable.limit < index * 8 + 7) { 3383 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); ··· 3396 struct descriptor_table dtable; 3397 u16 index = selector >> 3; 3398 3399 - get_segment_descritptor_dtable(vcpu, selector, &dtable); 3400 3401 if (dtable.limit < index * 8 + 7) 3402 return 1; ··· 3975 /* We do fxsave: this must be aligned. */ 3976 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 3977 3978 vcpu_load(vcpu); 3979 r = kvm_arch_vcpu_reset(vcpu); 3980 if (r == 0) ··· 4001 4002 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 4003 { 4004 return kvm_x86_ops->vcpu_reset(vcpu); 4005 } 4006 ··· 4091 return ERR_PTR(-ENOMEM); 4092 4093 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4094 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4095 4096 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ ··· 4128 4129 void kvm_arch_destroy_vm(struct kvm *kvm) 4130 { 4131 - kvm_iommu_unmap_guest(kvm); 4132 kvm_free_all_assigned_devices(kvm); 4133 kvm_free_pit(kvm); 4134 kfree(kvm->arch.vpic); 4135 kfree(kvm->arch.vioapic); ··· 4207 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4208 { 4209 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4210 - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; 4211 } 4212 4213 static void vcpu_kick_intr(void *info)

··· 39 #include <asm/uaccess.h> 40 #include <asm/msr.h> 41 #include <asm/desc.h> 42 + #include <asm/mtrr.h> 43 44 #define MAX_IO_MSRS 256 45 #define CR0_RESERVED_BITS \ ··· 86 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 87 { "hypercalls", VCPU_STAT(hypercalls) }, 88 { "request_irq", VCPU_STAT(request_irq_exits) }, 89 + { "request_nmi", VCPU_STAT(request_nmi_exits) }, 90 { "irq_exits", VCPU_STAT(irq_exits) }, 91 { "host_state_reload", VCPU_STAT(host_state_reload) }, 92 { "efer_reload", VCPU_STAT(efer_reload) }, ··· 93 { "insn_emulation", VCPU_STAT(insn_emulation) }, 94 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 95 { "irq_injections", VCPU_STAT(irq_injections) }, 96 + { "nmi_injections", VCPU_STAT(nmi_injections) }, 97 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 98 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 99 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, ··· 101 { "mmu_recycled", VM_STAT(mmu_recycled) }, 102 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 103 { "mmu_unsync", VM_STAT(mmu_unsync) }, 104 + { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, 105 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 106 { "largepages", VM_STAT(lpages) }, 107 { NULL } ··· 312 kvm_x86_ops->set_cr0(vcpu, cr0); 313 vcpu->arch.cr0 = cr0; 314 315 + kvm_mmu_sync_global(vcpu); 316 kvm_mmu_reset_context(vcpu); 317 return; 318 } ··· 355 } 356 kvm_x86_ops->set_cr4(vcpu, cr4); 357 vcpu->arch.cr4 = cr4; 358 + kvm_mmu_sync_global(vcpu); 359 kvm_mmu_reset_context(vcpu); 360 } 361 EXPORT_SYMBOL_GPL(kvm_set_cr4); ··· 449 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 450 #endif 451 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 452 + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT 453 }; 454 455 static unsigned num_msrs_to_save; ··· 648 649 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 650 { 651 + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 652 + 653 if (!msr_mtrr_valid(msr)) 654 return 1; 655 656 + if (msr == MSR_MTRRdefType) { 657 + vcpu->arch.mtrr_state.def_type = data; 658 + vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; 659 + } else if (msr == MSR_MTRRfix64K_00000) 660 + p[0] = data; 661 + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 662 + p[1 + msr - MSR_MTRRfix16K_80000] = data; 663 + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 664 + p[3 + msr - MSR_MTRRfix4K_C0000] = data; 665 + else if (msr == MSR_IA32_CR_PAT) 666 + vcpu->arch.pat = data; 667 + else { /* Variable MTRRs */ 668 + int idx, is_mtrr_mask; 669 + u64 *pt; 670 + 671 + idx = (msr - 0x200) / 2; 672 + is_mtrr_mask = msr - 0x200 - 2 * idx; 673 + if (!is_mtrr_mask) 674 + pt = 675 + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 676 + else 677 + pt = 678 + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 679 + *pt = data; 680 + } 681 + 682 + kvm_mmu_reset_context(vcpu); 683 return 0; 684 } 685 ··· 747 748 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 749 { 750 + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 751 + 752 if (!msr_mtrr_valid(msr)) 753 return 1; 754 755 + if (msr == MSR_MTRRdefType) 756 + *pdata = vcpu->arch.mtrr_state.def_type + 757 + (vcpu->arch.mtrr_state.enabled << 10); 758 + else if (msr == MSR_MTRRfix64K_00000) 759 + *pdata = p[0]; 760 + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 761 + *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; 762 + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 763 + *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; 764 + else if (msr == MSR_IA32_CR_PAT) 765 + *pdata = vcpu->arch.pat; 766 + else { /* Variable MTRRs */ 767 + int idx, is_mtrr_mask; 768 + u64 *pt; 769 + 770 + idx = (msr - 0x200) / 2; 771 + is_mtrr_mask = msr - 0x200 - 2 * idx; 772 + if (!is_mtrr_mask) 773 + pt = 774 + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 775 + else 776 + pt = 777 + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 778 + *pdata = *pt; 779 + } 780 + 781 return 0; 782 } 783 ··· 903 case KVM_CAP_IRQCHIP: 904 case KVM_CAP_HLT: 905 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 906 case KVM_CAP_SET_TSS_ADDR: 907 case KVM_CAP_EXT_CPUID: 908 case KVM_CAP_CLOCKSOURCE: ··· 1188 int t, times = entry->eax & 0xff; 1189 1190 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1191 + entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 1192 for (t = 1; t < times && *nent < maxnent; ++t) { 1193 do_cpuid_1_ent(&entry[t], function, 0); 1194 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; ··· 1218 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1219 /* read more entries until level_type is zero */ 1220 for (i = 1; *nent < maxnent; ++i) { 1221 + level_type = entry[i - 1].ecx & 0xff00; 1222 if (!level_type) 1223 break; 1224 do_cpuid_1_ent(&entry[i], function, i); ··· 1318 return 0; 1319 } 1320 1321 + static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 1322 + { 1323 + vcpu_load(vcpu); 1324 + kvm_inject_nmi(vcpu); 1325 + vcpu_put(vcpu); 1326 + 1327 + return 0; 1328 + } 1329 + 1330 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 1331 struct kvm_tpr_access_ctl *tac) 1332 { ··· 1372 if (copy_from_user(&irq, argp, sizeof irq)) 1373 goto out; 1374 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 1375 + if (r) 1376 + goto out; 1377 + r = 0; 1378 + break; 1379 + } 1380 + case KVM_NMI: { 1381 + r = kvm_vcpu_ioctl_nmi(vcpu); 1382 if (r) 1383 goto out; 1384 r = 0; ··· 1968 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 1969 if (ret < 0) 1970 return 0; 1971 + kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); 1972 return 1; 1973 } 1974 ··· 2404 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 2405 memcpy(vcpu->arch.pio_data, &val, 4); 2406 2407 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); 2408 if (pio_dev) { 2409 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); ··· 2541 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 2542 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2544 + PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2545 return 0; 2546 2547 out: ··· 2729 2730 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 2731 /* when no next entry is found, the current entry[i] is reselected */ 2732 + for (j = i + 1; ; j = (j + 1) % nent) { 2733 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 2734 if (ej->function == e->function) { 2735 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; ··· 2973 pr_debug("vcpu %d received sipi with vector # %x\n", 2974 vcpu->vcpu_id, vcpu->arch.sipi_vector); 2975 kvm_lapic_reset(vcpu); 2976 + r = kvm_arch_vcpu_reset(vcpu); 2977 if (r) 2978 return r; 2979 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; ··· 3275 kvm_desct->padding = 0; 3276 } 3277 3278 + static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, 3279 + u16 selector, 3280 + struct descriptor_table *dtable) 3281 { 3282 if (selector & 1 << 2) { 3283 struct kvm_segment kvm_seg; ··· 3302 struct descriptor_table dtable; 3303 u16 index = selector >> 3; 3304 3305 + get_segment_descriptor_dtable(vcpu, selector, &dtable); 3306 3307 if (dtable.limit < index * 8 + 7) { 3308 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); ··· 3321 struct descriptor_table dtable; 3322 u16 index = selector >> 3; 3323 3324 + get_segment_descriptor_dtable(vcpu, selector, &dtable); 3325 3326 if (dtable.limit < index * 8 + 7) 3327 return 1; ··· 3900 /* We do fxsave: this must be aligned. */ 3901 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 3902 3903 + vcpu->arch.mtrr_state.have_fixed = 1; 3904 vcpu_load(vcpu); 3905 r = kvm_arch_vcpu_reset(vcpu); 3906 if (r == 0) ··· 3925 3926 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 3927 { 3928 + vcpu->arch.nmi_pending = false; 3929 + vcpu->arch.nmi_injected = false; 3930 + 3931 return kvm_x86_ops->vcpu_reset(vcpu); 3932 } 3933 ··· 4012 return ERR_PTR(-ENOMEM); 4013 4014 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 4015 + INIT_LIST_HEAD(&kvm->arch.oos_global_pages); 4016 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 4017 4018 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ ··· 4048 4049 void kvm_arch_destroy_vm(struct kvm *kvm) 4050 { 4051 kvm_free_all_assigned_devices(kvm); 4052 + kvm_iommu_unmap_guest(kvm); 4053 kvm_free_pit(kvm); 4054 kfree(kvm->arch.vpic); 4055 kfree(kvm->arch.vioapic); ··· 4127 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4128 { 4129 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4130 + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 4131 + || vcpu->arch.nmi_pending; 4132 } 4133 4134 static void vcpu_kick_intr(void *info)

+174 -123

arch/x86/kvm/x86_emulate.c

··· 58 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ 59 #define SrcImm (5<<4) /* Immediate operand. */ 60 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 61 #define SrcMask (7<<4) 62 /* Generic ModRM decode. */ 63 #define ModRM (1<<7) ··· 71 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 72 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 73 #define GroupMask 0xff /* Group number stored in bits 0:7 */ 74 75 enum { 76 Group1_80, Group1_81, Group1_82, Group1_83, 77 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 78 }; 79 80 - static u16 opcode_table[256] = { 81 /* 0x00 - 0x07 */ 82 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 83 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 84 - 0, 0, 0, 0, 85 /* 0x08 - 0x0F */ 86 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 87 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ··· 202 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 203 }; 204 205 - static u16 twobyte_table[256] = { 206 /* 0x00 - 0x0F */ 207 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, 208 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, ··· 237 /* 0x90 - 0x9F */ 238 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 239 /* 0xA0 - 0xA7 */ 240 - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 241 /* 0xA8 - 0xAF */ 242 - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, 243 /* 0xB0 - 0xB7 */ 244 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 245 DstMem | SrcReg | ModRM | BitOp, ··· 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 266 }; 267 268 - static u16 group_table[] = { 269 [Group1_80*8] = 270 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 271 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ··· 309 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 310 }; 311 312 - static u16 group2_table[] = { 313 [Group7*8] = 314 - SrcNone | ModRM, 0, 0, 0, 315 SrcNone | ModRM | DstMem | Mov, 0, 316 SrcMem16 | ModRM | Mov, 0, 317 }; ··· 371 "andl %"_msk",%"_LO32 _tmp"; " \ 372 "orl %"_LO32 _tmp",%"_sav"; " 373 374 /* Raw emulation: instruction has two explicit operands. */ 375 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ 376 - do { \ 377 - unsigned long _tmp; \ 378 - \ 379 - switch ((_dst).bytes) { \ 380 - case 2: \ 381 - __asm__ __volatile__ ( \ 382 - _PRE_EFLAGS("0", "4", "2") \ 383 - _op"w %"_wx"3,%1; " \ 384 - _POST_EFLAGS("0", "4", "2") \ 385 - : "=m" (_eflags), "=m" ((_dst).val), \ 386 - "=&r" (_tmp) \ 387 - : _wy ((_src).val), "i" (EFLAGS_MASK)); \ 388 - break; \ 389 - case 4: \ 390 - __asm__ __volatile__ ( \ 391 - _PRE_EFLAGS("0", "4", "2") \ 392 - _op"l %"_lx"3,%1; " \ 393 - _POST_EFLAGS("0", "4", "2") \ 394 - : "=m" (_eflags), "=m" ((_dst).val), \ 395 - "=&r" (_tmp) \ 396 - : _ly ((_src).val), "i" (EFLAGS_MASK)); \ 397 - break; \ 398 - case 8: \ 399 - __emulate_2op_8byte(_op, _src, _dst, \ 400 - _eflags, _qx, _qy); \ 401 - break; \ 402 - } \ 403 } while (0) 404 405 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 406 do { \ 407 - unsigned long __tmp; \ 408 switch ((_dst).bytes) { \ 409 case 1: \ 410 - __asm__ __volatile__ ( \ 411 - _PRE_EFLAGS("0", "4", "2") \ 412 - _op"b %"_bx"3,%1; " \ 413 - _POST_EFLAGS("0", "4", "2") \ 414 - : "=m" (_eflags), "=m" ((_dst).val), \ 415 - "=&r" (__tmp) \ 416 - : _by ((_src).val), "i" (EFLAGS_MASK)); \ 417 break; \ 418 default: \ 419 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ ··· 436 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 437 "w", "r", _LO32, "r", "", "r") 438 439 - /* Instruction has only one explicit operand (no source operand). */ 440 - #define emulate_1op(_op, _dst, _eflags) \ 441 do { \ 442 unsigned long _tmp; \ 443 \ 444 switch ((_dst).bytes) { \ 445 - case 1: \ 446 - __asm__ __volatile__ ( \ 447 - _PRE_EFLAGS("0", "3", "2") \ 448 - _op"b %1; " \ 449 - _POST_EFLAGS("0", "3", "2") \ 450 - : "=m" (_eflags), "=m" ((_dst).val), \ 451 - "=&r" (_tmp) \ 452 - : "i" (EFLAGS_MASK)); \ 453 - break; \ 454 - case 2: \ 455 - __asm__ __volatile__ ( \ 456 - _PRE_EFLAGS("0", "3", "2") \ 457 - _op"w %1; " \ 458 - _POST_EFLAGS("0", "3", "2") \ 459 - : "=m" (_eflags), "=m" ((_dst).val), \ 460 - "=&r" (_tmp) \ 461 - : "i" (EFLAGS_MASK)); \ 462 - break; \ 463 - case 4: \ 464 - __asm__ __volatile__ ( \ 465 - _PRE_EFLAGS("0", "3", "2") \ 466 - _op"l %1; " \ 467 - _POST_EFLAGS("0", "3", "2") \ 468 - : "=m" (_eflags), "=m" ((_dst).val), \ 469 - "=&r" (_tmp) \ 470 - : "i" (EFLAGS_MASK)); \ 471 - break; \ 472 - case 8: \ 473 - __emulate_1op_8byte(_op, _dst, _eflags); \ 474 - break; \ 475 } \ 476 } while (0) 477 - 478 - /* Emulate an instruction with quadword operands (x86/64 only). */ 479 - #if defined(CONFIG_X86_64) 480 - #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ 481 - do { \ 482 - __asm__ __volatile__ ( \ 483 - _PRE_EFLAGS("0", "4", "2") \ 484 - _op"q %"_qx"3,%1; " \ 485 - _POST_EFLAGS("0", "4", "2") \ 486 - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 487 - : _qy ((_src).val), "i" (EFLAGS_MASK)); \ 488 - } while (0) 489 - 490 - #define __emulate_1op_8byte(_op, _dst, _eflags) \ 491 - do { \ 492 - __asm__ __volatile__ ( \ 493 - _PRE_EFLAGS("0", "3", "2") \ 494 - _op"q %1; " \ 495 - _POST_EFLAGS("0", "3", "2") \ 496 - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ 497 - : "i" (EFLAGS_MASK)); \ 498 - } while (0) 499 - 500 - #elif defined(__i386__) 501 - #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) 502 - #define __emulate_1op_8byte(_op, _dst, _eflags) 503 - #endif /* __i386__ */ 504 505 /* Fetch next part of the instruction being emulated. */ 506 #define insn_fetch(_type, _size, _eip) \ ··· 1049 c->src.bytes = 1; 1050 c->src.val = insn_fetch(s8, 1, c->eip); 1051 break; 1052 } 1053 1054 /* Decode and fetch the destination operand: register or memory. */ ··· 1135 c->regs[VCPU_REGS_RSP]); 1136 } 1137 1138 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1139 struct x86_emulate_ops *ops) 1140 { 1141 struct decode_cache *c = &ctxt->decode; 1142 int rc; 1143 1144 - rc = ops->read_std(register_address(c, ss_base(ctxt), 1145 - c->regs[VCPU_REGS_RSP]), 1146 - &c->dst.val, c->dst.bytes, ctxt->vcpu); 1147 if (rc != 0) 1148 return rc; 1149 - 1150 - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); 1151 - 1152 return 0; 1153 } 1154 ··· 1463 emulate_1op("dec", c->dst, ctxt->eflags); 1464 break; 1465 case 0x50 ... 0x57: /* push reg */ 1466 - c->dst.type = OP_MEM; 1467 - c->dst.bytes = c->op_bytes; 1468 - c->dst.val = c->src.val; 1469 - register_address_increment(c, &c->regs[VCPU_REGS_RSP], 1470 - -c->op_bytes); 1471 - c->dst.ptr = (void *) register_address( 1472 - c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); 1473 break; 1474 case 0x58 ... 0x5f: /* pop reg */ 1475 pop_instruction: 1476 - if ((rc = ops->read_std(register_address(c, ss_base(ctxt), 1477 - c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1478 - c->op_bytes, ctxt->vcpu)) != 0) 1479 goto done; 1480 - 1481 - register_address_increment(c, &c->regs[VCPU_REGS_RSP], 1482 - c->op_bytes); 1483 - c->dst.type = OP_NONE; /* Disable writeback. */ 1484 break; 1485 case 0x63: /* movsxd */ 1486 if (ctxt->mode != X86EMUL_MODE_PROT64) ··· 1630 emulate_push(ctxt); 1631 break; 1632 case 0x9d: /* popf */ 1633 c->dst.ptr = (unsigned long *) &ctxt->eflags; 1634 goto pop_instruction; 1635 case 0xa0 ... 0xa1: /* mov */ 1636 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; ··· 1730 emulate_grp2(ctxt); 1731 break; 1732 case 0xc3: /* ret */ 1733 c->dst.ptr = &c->eip; 1734 goto pop_instruction; 1735 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1736 mov: ··· 1821 c->eip = saved_eip; 1822 goto cannot_emulate; 1823 } 1824 - return 0; 1825 case 0xf4: /* hlt */ 1826 ctxt->vcpu->arch.halt_request = 1; 1827 break; ··· 2042 c->src.val &= (c->dst.bytes << 3) - 1; 2043 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); 2044 break; 2045 case 0xab: 2046 bts: /* bts */ 2047 /* only subword offset */ 2048 c->src.val &= (c->dst.bytes << 3) - 1; 2049 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 2050 break; 2051 case 0xae: /* clflush */ 2052 break;

··· 58 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ 59 #define SrcImm (5<<4) /* Immediate operand. */ 60 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ 61 + #define SrcOne (7<<4) /* Implied '1' */ 62 #define SrcMask (7<<4) 63 /* Generic ModRM decode. */ 64 #define ModRM (1<<7) ··· 70 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ 71 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 72 #define GroupMask 0xff /* Group number stored in bits 0:7 */ 73 + /* Source 2 operand type */ 74 + #define Src2None (0<<29) 75 + #define Src2CL (1<<29) 76 + #define Src2ImmByte (2<<29) 77 + #define Src2One (3<<29) 78 + #define Src2Mask (7<<29) 79 80 enum { 81 Group1_80, Group1_81, Group1_82, Group1_83, 82 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 83 }; 84 85 + static u32 opcode_table[256] = { 86 /* 0x00 - 0x07 */ 87 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 88 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 89 + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, 90 /* 0x08 - 0x0F */ 91 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 92 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ··· 195 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 196 }; 197 198 + static u32 twobyte_table[256] = { 199 /* 0x00 - 0x0F */ 200 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, 201 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, ··· 230 /* 0x90 - 0x9F */ 231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232 /* 0xA0 - 0xA7 */ 233 + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 234 + DstMem | SrcReg | Src2ImmByte | ModRM, 235 + DstMem | SrcReg | Src2CL | ModRM, 0, 0, 236 /* 0xA8 - 0xAF */ 237 + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 238 + DstMem | SrcReg | Src2ImmByte | ModRM, 239 + DstMem | SrcReg | Src2CL | ModRM, 240 + ModRM, 0, 241 /* 0xB0 - 0xB7 */ 242 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 243 DstMem | SrcReg | ModRM | BitOp, ··· 253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 254 }; 255 256 + static u32 group_table[] = { 257 [Group1_80*8] = 258 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 259 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ··· 297 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 298 }; 299 300 + static u32 group2_table[] = { 301 [Group7*8] = 302 + SrcNone | ModRM, 0, 0, SrcNone | ModRM, 303 SrcNone | ModRM | DstMem | Mov, 0, 304 SrcMem16 | ModRM | Mov, 0, 305 }; ··· 359 "andl %"_msk",%"_LO32 _tmp"; " \ 360 "orl %"_LO32 _tmp",%"_sav"; " 361 362 + #ifdef CONFIG_X86_64 363 + #define ON64(x) x 364 + #else 365 + #define ON64(x) 366 + #endif 367 + 368 + #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ 369 + do { \ 370 + __asm__ __volatile__ ( \ 371 + _PRE_EFLAGS("0", "4", "2") \ 372 + _op _suffix " %"_x"3,%1; " \ 373 + _POST_EFLAGS("0", "4", "2") \ 374 + : "=m" (_eflags), "=m" ((_dst).val), \ 375 + "=&r" (_tmp) \ 376 + : _y ((_src).val), "i" (EFLAGS_MASK)); \ 377 + } while (0) 378 + 379 + 380 /* Raw emulation: instruction has two explicit operands. */ 381 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ 382 + do { \ 383 + unsigned long _tmp; \ 384 + \ 385 + switch ((_dst).bytes) { \ 386 + case 2: \ 387 + ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ 388 + break; \ 389 + case 4: \ 390 + ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ 391 + break; \ 392 + case 8: \ 393 + ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ 394 + break; \ 395 + } \ 396 } while (0) 397 398 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ 399 do { \ 400 + unsigned long _tmp; \ 401 switch ((_dst).bytes) { \ 402 case 1: \ 403 + ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ 404 break; \ 405 default: \ 406 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ ··· 425 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ 426 "w", "r", _LO32, "r", "", "r") 427 428 + /* Instruction has three operands and one operand is stored in ECX register */ 429 + #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ 430 + do { \ 431 + unsigned long _tmp; \ 432 + _type _clv = (_cl).val; \ 433 + _type _srcv = (_src).val; \ 434 + _type _dstv = (_dst).val; \ 435 + \ 436 + __asm__ __volatile__ ( \ 437 + _PRE_EFLAGS("0", "5", "2") \ 438 + _op _suffix " %4,%1 \n" \ 439 + _POST_EFLAGS("0", "5", "2") \ 440 + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ 441 + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ 442 + ); \ 443 + \ 444 + (_cl).val = (unsigned long) _clv; \ 445 + (_src).val = (unsigned long) _srcv; \ 446 + (_dst).val = (unsigned long) _dstv; \ 447 + } while (0) 448 + 449 + #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ 450 + do { \ 451 + switch ((_dst).bytes) { \ 452 + case 2: \ 453 + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 454 + "w", unsigned short); \ 455 + break; \ 456 + case 4: \ 457 + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 458 + "l", unsigned int); \ 459 + break; \ 460 + case 8: \ 461 + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ 462 + "q", unsigned long)); \ 463 + break; \ 464 + } \ 465 + } while (0) 466 + 467 + #define __emulate_1op(_op, _dst, _eflags, _suffix) \ 468 do { \ 469 unsigned long _tmp; \ 470 \ 471 + __asm__ __volatile__ ( \ 472 + _PRE_EFLAGS("0", "3", "2") \ 473 + _op _suffix " %1; " \ 474 + _POST_EFLAGS("0", "3", "2") \ 475 + : "=m" (_eflags), "+m" ((_dst).val), \ 476 + "=&r" (_tmp) \ 477 + : "i" (EFLAGS_MASK)); \ 478 + } while (0) 479 + 480 + /* Instruction has only one explicit operand (no source operand). */ 481 + #define emulate_1op(_op, _dst, _eflags) \ 482 + do { \ 483 switch ((_dst).bytes) { \ 484 + case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ 485 + case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ 486 + case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ 487 + case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ 488 } \ 489 } while (0) 490 491 /* Fetch next part of the instruction being emulated. */ 492 #define insn_fetch(_type, _size, _eip) \ ··· 1041 c->src.bytes = 1; 1042 c->src.val = insn_fetch(s8, 1, c->eip); 1043 break; 1044 + case SrcOne: 1045 + c->src.bytes = 1; 1046 + c->src.val = 1; 1047 + break; 1048 + } 1049 + 1050 + /* 1051 + * Decode and fetch the second source operand: register, memory 1052 + * or immediate. 1053 + */ 1054 + switch (c->d & Src2Mask) { 1055 + case Src2None: 1056 + break; 1057 + case Src2CL: 1058 + c->src2.bytes = 1; 1059 + c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; 1060 + break; 1061 + case Src2ImmByte: 1062 + c->src2.type = OP_IMM; 1063 + c->src2.ptr = (unsigned long *)c->eip; 1064 + c->src2.bytes = 1; 1065 + c->src2.val = insn_fetch(u8, 1, c->eip); 1066 + break; 1067 + case Src2One: 1068 + c->src2.bytes = 1; 1069 + c->src2.val = 1; 1070 + break; 1071 } 1072 1073 /* Decode and fetch the destination operand: register or memory. */ ··· 1100 c->regs[VCPU_REGS_RSP]); 1101 } 1102 1103 + static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1104 + struct x86_emulate_ops *ops) 1105 + { 1106 + struct decode_cache *c = &ctxt->decode; 1107 + int rc; 1108 + 1109 + rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1110 + c->regs[VCPU_REGS_RSP]), 1111 + &c->src.val, c->src.bytes, ctxt->vcpu); 1112 + if (rc != 0) 1113 + return rc; 1114 + 1115 + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); 1116 + return rc; 1117 + } 1118 + 1119 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, 1120 struct x86_emulate_ops *ops) 1121 { 1122 struct decode_cache *c = &ctxt->decode; 1123 int rc; 1124 1125 + c->src.bytes = c->dst.bytes; 1126 + rc = emulate_pop(ctxt, ops); 1127 if (rc != 0) 1128 return rc; 1129 + c->dst.val = c->src.val; 1130 return 0; 1131 } 1132 ··· 1415 emulate_1op("dec", c->dst, ctxt->eflags); 1416 break; 1417 case 0x50 ... 0x57: /* push reg */ 1418 + emulate_push(ctxt); 1419 break; 1420 case 0x58 ... 0x5f: /* pop reg */ 1421 pop_instruction: 1422 + c->src.bytes = c->op_bytes; 1423 + rc = emulate_pop(ctxt, ops); 1424 + if (rc != 0) 1425 goto done; 1426 + c->dst.val = c->src.val; 1427 break; 1428 case 0x63: /* movsxd */ 1429 if (ctxt->mode != X86EMUL_MODE_PROT64) ··· 1591 emulate_push(ctxt); 1592 break; 1593 case 0x9d: /* popf */ 1594 + c->dst.type = OP_REG; 1595 c->dst.ptr = (unsigned long *) &ctxt->eflags; 1596 + c->dst.bytes = c->op_bytes; 1597 goto pop_instruction; 1598 case 0xa0 ... 0xa1: /* mov */ 1599 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; ··· 1689 emulate_grp2(ctxt); 1690 break; 1691 case 0xc3: /* ret */ 1692 + c->dst.type = OP_REG; 1693 c->dst.ptr = &c->eip; 1694 + c->dst.bytes = c->op_bytes; 1695 goto pop_instruction; 1696 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1697 mov: ··· 1778 c->eip = saved_eip; 1779 goto cannot_emulate; 1780 } 1781 + break; 1782 case 0xf4: /* hlt */ 1783 ctxt->vcpu->arch.halt_request = 1; 1784 break; ··· 1999 c->src.val &= (c->dst.bytes << 3) - 1; 2000 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); 2001 break; 2002 + case 0xa4: /* shld imm8, r, r/m */ 2003 + case 0xa5: /* shld cl, r, r/m */ 2004 + emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); 2005 + break; 2006 case 0xab: 2007 bts: /* bts */ 2008 /* only subword offset */ 2009 c->src.val &= (c->dst.bytes << 3) - 1; 2010 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 2011 + break; 2012 + case 0xac: /* shrd imm8, r, r/m */ 2013 + case 0xad: /* shrd cl, r, r/m */ 2014 + emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); 2015 break; 2016 case 0xae: /* clflush */ 2017 break;

+6 -1

fs/anon_inodes.c

··· 79 if (IS_ERR(anon_inode_inode)) 80 return -ENODEV; 81 82 error = get_unused_fd_flags(flags); 83 if (error < 0) 84 - return error; 85 fd = error; 86 87 /* ··· 131 dput(dentry); 132 err_put_unused_fd: 133 put_unused_fd(fd); 134 return error; 135 } 136 EXPORT_SYMBOL_GPL(anon_inode_getfd);

··· 79 if (IS_ERR(anon_inode_inode)) 80 return -ENODEV; 81 82 + if (fops->owner && !try_module_get(fops->owner)) 83 + return -ENOENT; 84 + 85 error = get_unused_fd_flags(flags); 86 if (error < 0) 87 + goto err_module; 88 fd = error; 89 90 /* ··· 128 dput(dentry); 129 err_put_unused_fd: 130 put_unused_fd(fd); 131 + err_module: 132 + module_put(fops->owner); 133 return error; 134 } 135 EXPORT_SYMBOL_GPL(anon_inode_getfd);

+18

include/linux/kvm.h

··· 83 #define KVM_EXIT_S390_SIEIC 13 84 #define KVM_EXIT_S390_RESET 14 85 #define KVM_EXIT_DCR 15 86 87 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ 88 struct kvm_run { ··· 388 #define KVM_CAP_DEVICE_ASSIGNMENT 17 389 #endif 390 #define KVM_CAP_IOMMU 18 391 392 /* 393 * ioctls for VM fds ··· 467 #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) 468 #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 469 #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 470 471 #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) 472 #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) ··· 511 __u32 guest_irq; 512 __u32 flags; 513 union { 514 __u32 reserved[12]; 515 }; 516 }; 517 518 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 519 520 #endif

··· 83 #define KVM_EXIT_S390_SIEIC 13 84 #define KVM_EXIT_S390_RESET 14 85 #define KVM_EXIT_DCR 15 86 + #define KVM_EXIT_NMI 16 87 88 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ 89 struct kvm_run { ··· 387 #define KVM_CAP_DEVICE_ASSIGNMENT 17 388 #endif 389 #define KVM_CAP_IOMMU 18 390 + #if defined(CONFIG_X86) 391 + #define KVM_CAP_DEVICE_MSI 20 392 + #endif 393 + /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ 394 + #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 395 + #if defined(CONFIG_X86) 396 + #define KVM_CAP_USER_NMI 22 397 + #endif 398 399 /* 400 * ioctls for VM fds ··· 458 #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) 459 #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) 460 #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) 461 + /* Available with KVM_CAP_NMI */ 462 + #define KVM_NMI _IO(KVMIO, 0x9a) 463 464 #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) 465 #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) ··· 500 __u32 guest_irq; 501 __u32 flags; 502 union { 503 + struct { 504 + __u32 addr_lo; 505 + __u32 addr_hi; 506 + __u32 data; 507 + } guest_msi; 508 __u32 reserved[12]; 509 }; 510 }; 511 512 #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 513 + 514 + #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) 515 516 #endif

+9 -3

include/linux/kvm_host.h

··· 16 #include <linux/mm.h> 17 #include <linux/preempt.h> 18 #include <linux/marker.h> 19 #include <asm/signal.h> 20 21 #include <linux/kvm.h> ··· 307 int host_busnr; 308 int host_devfn; 309 int host_irq; 310 int guest_irq; 311 - int irq_requested; 312 int irq_source_id; 313 struct pci_dev *dev; 314 struct kvm *kvm; ··· 323 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 324 void kvm_register_irq_ack_notifier(struct kvm *kvm, 325 struct kvm_irq_ack_notifier *kian); 326 - void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 327 - struct kvm_irq_ack_notifier *kian); 328 int kvm_request_irq_source_id(struct kvm *kvm); 329 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 330

··· 16 #include <linux/mm.h> 17 #include <linux/preempt.h> 18 #include <linux/marker.h> 19 + #include <linux/msi.h> 20 #include <asm/signal.h> 21 22 #include <linux/kvm.h> ··· 306 int host_busnr; 307 int host_devfn; 308 int host_irq; 309 + bool host_irq_disabled; 310 int guest_irq; 311 + struct msi_msg guest_msi; 312 + #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) 313 + #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) 314 + #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) 315 + #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) 316 + unsigned long irq_requested_type; 317 int irq_source_id; 318 struct pci_dev *dev; 319 struct kvm *kvm; ··· 316 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); 317 void kvm_register_irq_ack_notifier(struct kvm *kvm, 318 struct kvm_irq_ack_notifier *kian); 319 + void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); 320 int kvm_request_irq_source_id(struct kvm *kvm); 321 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 322

+5 -3

virt/kvm/ioapic.c

··· 150 static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) 151 { 152 kvm_inject_nmi(vcpu); 153 } 154 155 - static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 156 - u8 dest_mode) 157 { 158 u32 mask = 0; 159 int i; ··· 208 "vector=%x trig_mode=%x\n", 209 dest, dest_mode, delivery_mode, vector, trig_mode); 210 211 - deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); 212 if (!deliver_bitmask) { 213 ioapic_debug("no target on destination\n"); 214 return 0;

··· 150 static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) 151 { 152 kvm_inject_nmi(vcpu); 153 + kvm_vcpu_kick(vcpu); 154 } 155 156 + u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 157 + u8 dest_mode) 158 { 159 u32 mask = 0; 160 int i; ··· 207 "vector=%x trig_mode=%x\n", 208 dest, dest_mode, delivery_mode, vector, trig_mode); 209 210 + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, 211 + dest_mode); 212 if (!deliver_bitmask) { 213 ioapic_debug("no target on destination\n"); 214 return 0;

+2

virt/kvm/ioapic.h

··· 85 int kvm_ioapic_init(struct kvm *kvm); 86 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 87 void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 88 89 #endif

··· 85 int kvm_ioapic_init(struct kvm *kvm); 86 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 87 void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 88 + u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, 89 + u8 dest_mode); 90 91 #endif

+12 -7

virt/kvm/irq_comm.c

··· 61 hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); 62 } 63 64 - void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 65 - struct kvm_irq_ack_notifier *kian) 66 { 67 - hlist_del(&kian->link); 68 } 69 70 /* The caller must hold kvm->lock mutex */ ··· 72 unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; 73 int irq_source_id = find_first_zero_bit(bitmap, 74 sizeof(kvm->arch.irq_sources_bitmap)); 75 if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 76 printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); 77 - irq_source_id = -EFAULT; 78 - } else 79 - set_bit(irq_source_id, bitmap); 80 return irq_source_id; 81 } 82 ··· 88 { 89 int i; 90 91 - if (irq_source_id <= 0 || 92 irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 93 printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); 94 return;

··· 61 hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); 62 } 63 64 + void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) 65 { 66 + hlist_del_init(&kian->link); 67 } 68 69 /* The caller must hold kvm->lock mutex */ ··· 73 unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; 74 int irq_source_id = find_first_zero_bit(bitmap, 75 sizeof(kvm->arch.irq_sources_bitmap)); 76 + 77 if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 78 printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); 79 + return -EFAULT; 80 + } 81 + 82 + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); 83 + set_bit(irq_source_id, bitmap); 84 + 85 return irq_source_id; 86 } 87 ··· 85 { 86 int i; 87 88 + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); 89 + 90 + if (irq_source_id < 0 || 91 irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { 92 printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); 93 return;

+330 -98

virt/kvm/kvm_main.c

··· 47 #include <asm/uaccess.h> 48 #include <asm/pgtable.h> 49 50 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 51 #include "coalesced_mmio.h" 52 #endif ··· 64 MODULE_AUTHOR("Qumranet"); 65 MODULE_LICENSE("GPL"); 66 67 DEFINE_SPINLOCK(kvm_lock); 68 LIST_HEAD(vm_list); 69 70 - static cpumask_t cpus_hardware_enabled; 71 72 struct kmem_cache *kvm_vcpu_cache; 73 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); ··· 82 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 83 unsigned long arg); 84 85 - bool kvm_rebooting; 86 87 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 88 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 89 int assigned_dev_id) 90 { ··· 162 * finer-grained lock, update this 163 */ 164 mutex_lock(&assigned_dev->kvm->lock); 165 - kvm_set_irq(assigned_dev->kvm, 166 - assigned_dev->irq_source_id, 167 - assigned_dev->guest_irq, 1); 168 mutex_unlock(&assigned_dev->kvm->lock); 169 kvm_put_kvm(assigned_dev->kvm); 170 } ··· 182 (struct kvm_assigned_dev_kernel *) dev_id; 183 184 kvm_get_kvm(assigned_dev->kvm); 185 schedule_work(&assigned_dev->interrupt_work); 186 disable_irq_nosync(irq); 187 return IRQ_HANDLED; 188 } 189 ··· 201 202 dev = container_of(kian, struct kvm_assigned_dev_kernel, 203 ack_notifier); 204 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 205 - enable_irq(dev->host_irq); 206 } 207 208 - static void kvm_free_assigned_device(struct kvm *kvm, 209 - struct kvm_assigned_dev_kernel 210 - *assigned_dev) 211 { 212 - if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) 213 - free_irq(assigned_dev->host_irq, (void *)assigned_dev); 214 215 - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 216 - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 217 218 if (cancel_work_sync(&assigned_dev->interrupt_work)) 219 /* We had pending work. That means we will have to take 220 * care of kvm_put_kvm. 221 */ 222 kvm_put_kvm(kvm); 223 224 pci_release_regions(assigned_dev->dev); 225 pci_disable_device(assigned_dev->dev); ··· 273 } 274 } 275 276 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 277 struct kvm_assigned_irq 278 *assigned_irq) ··· 378 return -EINVAL; 379 } 380 381 - if (match->irq_requested) { 382 - match->guest_irq = assigned_irq->guest_irq; 383 - match->ack_notifier.gsi = assigned_irq->guest_irq; 384 - mutex_unlock(&kvm->lock); 385 - return 0; 386 - } 387 388 - INIT_WORK(&match->interrupt_work, 389 - kvm_assigned_dev_interrupt_work_handler); 390 391 - if (irqchip_in_kernel(kvm)) { 392 - if (!capable(CAP_SYS_RAWIO)) { 393 - r = -EPERM; 394 - goto out_release; 395 - } 396 - 397 - if (assigned_irq->host_irq) 398 - match->host_irq = assigned_irq->host_irq; 399 - else 400 - match->host_irq = match->dev->irq; 401 - match->guest_irq = assigned_irq->guest_irq; 402 - match->ack_notifier.gsi = assigned_irq->guest_irq; 403 - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 404 - kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 405 - r = kvm_request_irq_source_id(kvm); 406 - if (r < 0) 407 - goto out_release; 408 - else 409 - match->irq_source_id = r; 410 - 411 - /* Even though this is PCI, we don't want to use shared 412 - * interrupts. Sharing host devices with guest-assigned devices 413 - * on the same interrupt line is not a happy situation: there 414 - * are going to be long delays in accepting, acking, etc. 415 - */ 416 - if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, 417 - "kvm_assigned_device", (void *)match)) { 418 - r = -EIO; 419 - goto out_release; 420 } 421 } 422 423 - match->irq_requested = true; 424 mutex_unlock(&kvm->lock); 425 return r; 426 out_release: ··· 490 __func__); 491 goto out_disable; 492 } 493 match->assigned_dev_id = assigned_dev->assigned_dev_id; 494 match->host_busnr = assigned_dev->busnr; 495 match->host_devfn = assigned_dev->devfn; 496 match->dev = dev; 497 - 498 match->kvm = kvm; 499 500 list_add(&match->list, &kvm->arch.assigned_dev_head); ··· 565 { 566 } 567 568 - void kvm_flush_remote_tlbs(struct kvm *kvm) 569 { 570 int i, cpu, me; 571 - cpumask_t cpus; 572 struct kvm_vcpu *vcpu; 573 574 me = get_cpu(); 575 - cpus_clear(cpus); 576 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 577 vcpu = kvm->vcpus[i]; 578 if (!vcpu) 579 continue; 580 - if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 581 continue; 582 cpu = vcpu->cpu; 583 - if (cpu != -1 && cpu != me) 584 - cpu_set(cpu, cpus); 585 } 586 - if (cpus_empty(cpus)) 587 - goto out; 588 - ++kvm->stat.remote_tlb_flush; 589 - smp_call_function_mask(cpus, ack_flush, NULL, 1); 590 - out: 591 put_cpu(); 592 } 593 594 void kvm_reload_remote_mmus(struct kvm *kvm) 595 { 596 - int i, cpu, me; 597 - cpumask_t cpus; 598 - struct kvm_vcpu *vcpu; 599 - 600 - me = get_cpu(); 601 - cpus_clear(cpus); 602 - for (i = 0; i < KVM_MAX_VCPUS; ++i) { 603 - vcpu = kvm->vcpus[i]; 604 - if (!vcpu) 605 - continue; 606 - if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 607 - continue; 608 - cpu = vcpu->cpu; 609 - if (cpu != -1 && cpu != me) 610 - cpu_set(cpu, cpus); 611 - } 612 - if (cpus_empty(cpus)) 613 - goto out; 614 - smp_call_function_mask(cpus, ack_flush, NULL, 1); 615 - out: 616 - put_cpu(); 617 } 618 - 619 620 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 621 { ··· 911 goto out; 912 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 913 goto out; 914 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 915 goto out; 916 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) ··· 1024 goto out_free; 1025 } 1026 1027 - kvm_free_physmem_slot(&old, &new); 1028 #ifdef CONFIG_DMAR 1029 /* map the pages in iommu page table */ 1030 r = kvm_iommu_map_pages(kvm, base_gfn, npages); ··· 1124 } 1125 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 1126 1127 - static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 1128 { 1129 int i; 1130 ··· 1137 } 1138 return NULL; 1139 } 1140 1141 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 1142 { 1143 gfn = unalias_gfn(kvm, gfn); 1144 - return __gfn_to_memslot(kvm, gfn); 1145 } 1146 1147 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) ··· 1166 struct kvm_memory_slot *slot; 1167 1168 gfn = unalias_gfn(kvm, gfn); 1169 - slot = __gfn_to_memslot(kvm, gfn); 1170 if (!slot) 1171 return bad_hva(); 1172 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); ··· 1417 struct kvm_memory_slot *memslot; 1418 1419 gfn = unalias_gfn(kvm, gfn); 1420 - memslot = __gfn_to_memslot(kvm, gfn); 1421 if (memslot && memslot->dirty_bitmap) { 1422 unsigned long rel_gfn = gfn - memslot->base_gfn; 1423 ··· 1502 return 0; 1503 } 1504 1505 - static const struct file_operations kvm_vcpu_fops = { 1506 .release = kvm_vcpu_release, 1507 .unlocked_ioctl = kvm_vcpu_ioctl, 1508 .compat_ioctl = kvm_vcpu_ioctl, ··· 1896 return 0; 1897 } 1898 1899 - static const struct file_operations kvm_vm_fops = { 1900 .release = kvm_vm_release, 1901 .unlocked_ioctl = kvm_vm_ioctl, 1902 .compat_ioctl = kvm_vm_ioctl, ··· 1918 return fd; 1919 } 1920 1921 static long kvm_dev_ioctl(struct file *filp, 1922 unsigned int ioctl, unsigned long arg) 1923 { ··· 1949 r = kvm_dev_ioctl_create_vm(); 1950 break; 1951 case KVM_CHECK_EXTENSION: 1952 - r = kvm_dev_ioctl_check_extension(arg); 1953 break; 1954 case KVM_GET_VCPU_MMAP_SIZE: 1955 r = -EINVAL; ··· 1990 { 1991 int cpu = raw_smp_processor_id(); 1992 1993 - if (cpu_isset(cpu, cpus_hardware_enabled)) 1994 return; 1995 - cpu_set(cpu, cpus_hardware_enabled); 1996 kvm_arch_hardware_enable(NULL); 1997 } 1998 ··· 2000 { 2001 int cpu = raw_smp_processor_id(); 2002 2003 - if (!cpu_isset(cpu, cpus_hardware_enabled)) 2004 return; 2005 - cpu_clear(cpu, cpus_hardware_enabled); 2006 kvm_arch_hardware_disable(NULL); 2007 } 2008 ··· 2236 2237 bad_pfn = page_to_pfn(bad_page); 2238 2239 r = kvm_arch_hardware_setup(); 2240 if (r < 0) 2241 - goto out_free_0; 2242 2243 for_each_online_cpu(cpu) { 2244 smp_call_function_single(cpu, ··· 2277 } 2278 2279 kvm_chardev_ops.owner = module; 2280 2281 r = misc_register(&kvm_dev); 2282 if (r) { ··· 2288 2289 kvm_preempt_ops.sched_in = kvm_sched_in; 2290 kvm_preempt_ops.sched_out = kvm_sched_out; 2291 2292 return 0; 2293 ··· 2307 on_each_cpu(hardware_disable, NULL, 1); 2308 out_free_1: 2309 kvm_arch_hardware_unsetup(); 2310 out_free_0: 2311 __free_page(bad_page); 2312 out: ··· 2332 kvm_arch_hardware_unsetup(); 2333 kvm_arch_exit(); 2334 kvm_exit_debug(); 2335 __free_page(bad_page); 2336 } 2337 EXPORT_SYMBOL_GPL(kvm_exit);

··· 47 #include <asm/uaccess.h> 48 #include <asm/pgtable.h> 49 50 + #ifdef CONFIG_X86 51 + #include <asm/msidef.h> 52 + #endif 53 + 54 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 55 #include "coalesced_mmio.h" 56 #endif ··· 60 MODULE_AUTHOR("Qumranet"); 61 MODULE_LICENSE("GPL"); 62 63 + static int msi2intx = 1; 64 + module_param(msi2intx, bool, 0); 65 + 66 DEFINE_SPINLOCK(kvm_lock); 67 LIST_HEAD(vm_list); 68 69 + static cpumask_var_t cpus_hardware_enabled; 70 71 struct kmem_cache *kvm_vcpu_cache; 72 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); ··· 75 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 76 unsigned long arg); 77 78 + static bool kvm_rebooting; 79 80 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 81 + 82 + #ifdef CONFIG_X86 83 + static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) 84 + { 85 + int vcpu_id; 86 + struct kvm_vcpu *vcpu; 87 + struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); 88 + int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) 89 + >> MSI_ADDR_DEST_ID_SHIFT; 90 + int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) 91 + >> MSI_DATA_VECTOR_SHIFT; 92 + int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, 93 + (unsigned long *)&dev->guest_msi.address_lo); 94 + int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, 95 + (unsigned long *)&dev->guest_msi.data); 96 + int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, 97 + (unsigned long *)&dev->guest_msi.data); 98 + u32 deliver_bitmask; 99 + 100 + BUG_ON(!ioapic); 101 + 102 + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, 103 + dest_id, dest_mode); 104 + /* IOAPIC delivery mode value is the same as MSI here */ 105 + switch (delivery_mode) { 106 + case IOAPIC_LOWEST_PRIORITY: 107 + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, 108 + deliver_bitmask); 109 + if (vcpu != NULL) 110 + kvm_apic_set_irq(vcpu, vector, trig_mode); 111 + else 112 + printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); 113 + break; 114 + case IOAPIC_FIXED: 115 + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { 116 + if (!(deliver_bitmask & (1 << vcpu_id))) 117 + continue; 118 + deliver_bitmask &= ~(1 << vcpu_id); 119 + vcpu = ioapic->kvm->vcpus[vcpu_id]; 120 + if (vcpu) 121 + kvm_apic_set_irq(vcpu, vector, trig_mode); 122 + } 123 + break; 124 + default: 125 + printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); 126 + } 127 + } 128 + #else 129 + static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} 130 + #endif 131 + 132 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 133 int assigned_dev_id) 134 { ··· 104 * finer-grained lock, update this 105 */ 106 mutex_lock(&assigned_dev->kvm->lock); 107 + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) 108 + kvm_set_irq(assigned_dev->kvm, 109 + assigned_dev->irq_source_id, 110 + assigned_dev->guest_irq, 1); 111 + else if (assigned_dev->irq_requested_type & 112 + KVM_ASSIGNED_DEV_GUEST_MSI) { 113 + assigned_device_msi_dispatch(assigned_dev); 114 + enable_irq(assigned_dev->host_irq); 115 + assigned_dev->host_irq_disabled = false; 116 + } 117 mutex_unlock(&assigned_dev->kvm->lock); 118 kvm_put_kvm(assigned_dev->kvm); 119 } ··· 117 (struct kvm_assigned_dev_kernel *) dev_id; 118 119 kvm_get_kvm(assigned_dev->kvm); 120 + 121 schedule_work(&assigned_dev->interrupt_work); 122 + 123 disable_irq_nosync(irq); 124 + assigned_dev->host_irq_disabled = true; 125 + 126 return IRQ_HANDLED; 127 } 128 ··· 132 133 dev = container_of(kian, struct kvm_assigned_dev_kernel, 134 ack_notifier); 135 + 136 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 137 + 138 + /* The guest irq may be shared so this ack may be 139 + * from another device. 140 + */ 141 + if (dev->host_irq_disabled) { 142 + enable_irq(dev->host_irq); 143 + dev->host_irq_disabled = false; 144 + } 145 } 146 147 + static void kvm_free_assigned_irq(struct kvm *kvm, 148 + struct kvm_assigned_dev_kernel *assigned_dev) 149 { 150 + if (!irqchip_in_kernel(kvm)) 151 + return; 152 153 + kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); 154 + 155 + if (assigned_dev->irq_source_id != -1) 156 + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 157 + assigned_dev->irq_source_id = -1; 158 + 159 + if (!assigned_dev->irq_requested_type) 160 + return; 161 162 if (cancel_work_sync(&assigned_dev->interrupt_work)) 163 /* We had pending work. That means we will have to take 164 * care of kvm_put_kvm. 165 */ 166 kvm_put_kvm(kvm); 167 + 168 + free_irq(assigned_dev->host_irq, (void *)assigned_dev); 169 + 170 + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 171 + pci_disable_msi(assigned_dev->dev); 172 + 173 + assigned_dev->irq_requested_type = 0; 174 + } 175 + 176 + 177 + static void kvm_free_assigned_device(struct kvm *kvm, 178 + struct kvm_assigned_dev_kernel 179 + *assigned_dev) 180 + { 181 + kvm_free_assigned_irq(kvm, assigned_dev); 182 + 183 + pci_reset_function(assigned_dev->dev); 184 185 pci_release_regions(assigned_dev->dev); 186 pci_disable_device(assigned_dev->dev); ··· 174 } 175 } 176 177 + static int assigned_device_update_intx(struct kvm *kvm, 178 + struct kvm_assigned_dev_kernel *adev, 179 + struct kvm_assigned_irq *airq) 180 + { 181 + adev->guest_irq = airq->guest_irq; 182 + adev->ack_notifier.gsi = airq->guest_irq; 183 + 184 + if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) 185 + return 0; 186 + 187 + if (irqchip_in_kernel(kvm)) { 188 + if (!msi2intx && 189 + adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { 190 + free_irq(adev->host_irq, (void *)kvm); 191 + pci_disable_msi(adev->dev); 192 + } 193 + 194 + if (!capable(CAP_SYS_RAWIO)) 195 + return -EPERM; 196 + 197 + if (airq->host_irq) 198 + adev->host_irq = airq->host_irq; 199 + else 200 + adev->host_irq = adev->dev->irq; 201 + 202 + /* Even though this is PCI, we don't want to use shared 203 + * interrupts. Sharing host devices with guest-assigned devices 204 + * on the same interrupt line is not a happy situation: there 205 + * are going to be long delays in accepting, acking, etc. 206 + */ 207 + if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 208 + 0, "kvm_assigned_intx_device", (void *)adev)) 209 + return -EIO; 210 + } 211 + 212 + adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | 213 + KVM_ASSIGNED_DEV_HOST_INTX; 214 + return 0; 215 + } 216 + 217 + #ifdef CONFIG_X86 218 + static int assigned_device_update_msi(struct kvm *kvm, 219 + struct kvm_assigned_dev_kernel *adev, 220 + struct kvm_assigned_irq *airq) 221 + { 222 + int r; 223 + 224 + if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { 225 + /* x86 don't care upper address of guest msi message addr */ 226 + adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; 227 + adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; 228 + adev->guest_msi.address_lo = airq->guest_msi.addr_lo; 229 + adev->guest_msi.data = airq->guest_msi.data; 230 + adev->ack_notifier.gsi = -1; 231 + } else if (msi2intx) { 232 + adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; 233 + adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; 234 + adev->guest_irq = airq->guest_irq; 235 + adev->ack_notifier.gsi = airq->guest_irq; 236 + } 237 + 238 + if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) 239 + return 0; 240 + 241 + if (irqchip_in_kernel(kvm)) { 242 + if (!msi2intx) { 243 + if (adev->irq_requested_type & 244 + KVM_ASSIGNED_DEV_HOST_INTX) 245 + free_irq(adev->host_irq, (void *)adev); 246 + 247 + r = pci_enable_msi(adev->dev); 248 + if (r) 249 + return r; 250 + } 251 + 252 + adev->host_irq = adev->dev->irq; 253 + if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, 254 + "kvm_assigned_msi_device", (void *)adev)) 255 + return -EIO; 256 + } 257 + 258 + if (!msi2intx) 259 + adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; 260 + 261 + adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; 262 + return 0; 263 + } 264 + #endif 265 + 266 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 267 struct kvm_assigned_irq 268 *assigned_irq) ··· 190 return -EINVAL; 191 } 192 193 + if (!match->irq_requested_type) { 194 + INIT_WORK(&match->interrupt_work, 195 + kvm_assigned_dev_interrupt_work_handler); 196 + if (irqchip_in_kernel(kvm)) { 197 + /* Register ack nofitier */ 198 + match->ack_notifier.gsi = -1; 199 + match->ack_notifier.irq_acked = 200 + kvm_assigned_dev_ack_irq; 201 + kvm_register_irq_ack_notifier(kvm, 202 + &match->ack_notifier); 203 204 + /* Request IRQ source ID */ 205 + r = kvm_request_irq_source_id(kvm); 206 + if (r < 0) 207 + goto out_release; 208 + else 209 + match->irq_source_id = r; 210 211 + #ifdef CONFIG_X86 212 + /* Determine host device irq type, we can know the 213 + * result from dev->msi_enabled */ 214 + if (msi2intx) 215 + pci_enable_msi(match->dev); 216 + #endif 217 } 218 } 219 220 + if ((!msi2intx && 221 + (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || 222 + (msi2intx && match->dev->msi_enabled)) { 223 + #ifdef CONFIG_X86 224 + r = assigned_device_update_msi(kvm, match, assigned_irq); 225 + if (r) { 226 + printk(KERN_WARNING "kvm: failed to enable " 227 + "MSI device!\n"); 228 + goto out_release; 229 + } 230 + #else 231 + r = -ENOTTY; 232 + #endif 233 + } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { 234 + /* Host device IRQ 0 means don't support INTx */ 235 + if (!msi2intx) { 236 + printk(KERN_WARNING 237 + "kvm: wait device to enable MSI!\n"); 238 + r = 0; 239 + } else { 240 + printk(KERN_WARNING 241 + "kvm: failed to enable MSI device!\n"); 242 + r = -ENOTTY; 243 + goto out_release; 244 + } 245 + } else { 246 + /* Non-sharing INTx mode */ 247 + r = assigned_device_update_intx(kvm, match, assigned_irq); 248 + if (r) { 249 + printk(KERN_WARNING "kvm: failed to enable " 250 + "INTx device!\n"); 251 + goto out_release; 252 + } 253 + } 254 + 255 mutex_unlock(&kvm->lock); 256 return r; 257 out_release: ··· 283 __func__); 284 goto out_disable; 285 } 286 + 287 + pci_reset_function(dev); 288 + 289 match->assigned_dev_id = assigned_dev->assigned_dev_id; 290 match->host_busnr = assigned_dev->busnr; 291 match->host_devfn = assigned_dev->devfn; 292 match->dev = dev; 293 + match->irq_source_id = -1; 294 match->kvm = kvm; 295 296 list_add(&match->list, &kvm->arch.assigned_dev_head); ··· 355 { 356 } 357 358 + static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 359 { 360 int i, cpu, me; 361 + cpumask_var_t cpus; 362 + bool called = true; 363 struct kvm_vcpu *vcpu; 364 365 + if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) 366 + cpumask_clear(cpus); 367 + 368 me = get_cpu(); 369 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 370 vcpu = kvm->vcpus[i]; 371 if (!vcpu) 372 continue; 373 + if (test_and_set_bit(req, &vcpu->requests)) 374 continue; 375 cpu = vcpu->cpu; 376 + if (cpus != NULL && cpu != -1 && cpu != me) 377 + cpumask_set_cpu(cpu, cpus); 378 } 379 + if (unlikely(cpus == NULL)) 380 + smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); 381 + else if (!cpumask_empty(cpus)) 382 + smp_call_function_many(cpus, ack_flush, NULL, 1); 383 + else 384 + called = false; 385 put_cpu(); 386 + free_cpumask_var(cpus); 387 + return called; 388 + } 389 + 390 + void kvm_flush_remote_tlbs(struct kvm *kvm) 391 + { 392 + if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 393 + ++kvm->stat.remote_tlb_flush; 394 } 395 396 void kvm_reload_remote_mmus(struct kvm *kvm) 397 { 398 + make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 399 } 400 401 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 402 { ··· 710 goto out; 711 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 712 goto out; 713 + if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 714 + goto out; 715 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 716 goto out; 717 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) ··· 821 goto out_free; 822 } 823 824 + kvm_free_physmem_slot(&old, npages ? &new : NULL); 825 + /* Slot deletion case: we have to update the current slot */ 826 + if (!npages) 827 + *memslot = old; 828 #ifdef CONFIG_DMAR 829 /* map the pages in iommu page table */ 830 r = kvm_iommu_map_pages(kvm, base_gfn, npages); ··· 918 } 919 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 920 921 + struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 922 { 923 int i; 924 ··· 931 } 932 return NULL; 933 } 934 + EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 935 936 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 937 { 938 gfn = unalias_gfn(kvm, gfn); 939 + return gfn_to_memslot_unaliased(kvm, gfn); 940 } 941 942 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) ··· 959 struct kvm_memory_slot *slot; 960 961 gfn = unalias_gfn(kvm, gfn); 962 + slot = gfn_to_memslot_unaliased(kvm, gfn); 963 if (!slot) 964 return bad_hva(); 965 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); ··· 1210 struct kvm_memory_slot *memslot; 1211 1212 gfn = unalias_gfn(kvm, gfn); 1213 + memslot = gfn_to_memslot_unaliased(kvm, gfn); 1214 if (memslot && memslot->dirty_bitmap) { 1215 unsigned long rel_gfn = gfn - memslot->base_gfn; 1216 ··· 1295 return 0; 1296 } 1297 1298 + static struct file_operations kvm_vcpu_fops = { 1299 .release = kvm_vcpu_release, 1300 .unlocked_ioctl = kvm_vcpu_ioctl, 1301 .compat_ioctl = kvm_vcpu_ioctl, ··· 1689 return 0; 1690 } 1691 1692 + static struct file_operations kvm_vm_fops = { 1693 .release = kvm_vm_release, 1694 .unlocked_ioctl = kvm_vm_ioctl, 1695 .compat_ioctl = kvm_vm_ioctl, ··· 1711 return fd; 1712 } 1713 1714 + static long kvm_dev_ioctl_check_extension_generic(long arg) 1715 + { 1716 + switch (arg) { 1717 + case KVM_CAP_USER_MEMORY: 1718 + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1719 + return 1; 1720 + default: 1721 + break; 1722 + } 1723 + return kvm_dev_ioctl_check_extension(arg); 1724 + } 1725 + 1726 static long kvm_dev_ioctl(struct file *filp, 1727 unsigned int ioctl, unsigned long arg) 1728 { ··· 1730 r = kvm_dev_ioctl_create_vm(); 1731 break; 1732 case KVM_CHECK_EXTENSION: 1733 + r = kvm_dev_ioctl_check_extension_generic(arg); 1734 break; 1735 case KVM_GET_VCPU_MMAP_SIZE: 1736 r = -EINVAL; ··· 1771 { 1772 int cpu = raw_smp_processor_id(); 1773 1774 + if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1775 return; 1776 + cpumask_set_cpu(cpu, cpus_hardware_enabled); 1777 kvm_arch_hardware_enable(NULL); 1778 } 1779 ··· 1781 { 1782 int cpu = raw_smp_processor_id(); 1783 1784 + if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1785 return; 1786 + cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1787 kvm_arch_hardware_disable(NULL); 1788 } 1789 ··· 2017 2018 bad_pfn = page_to_pfn(bad_page); 2019 2020 + if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2021 + r = -ENOMEM; 2022 + goto out_free_0; 2023 + } 2024 + 2025 r = kvm_arch_hardware_setup(); 2026 if (r < 0) 2027 + goto out_free_0a; 2028 2029 for_each_online_cpu(cpu) { 2030 smp_call_function_single(cpu, ··· 2053 } 2054 2055 kvm_chardev_ops.owner = module; 2056 + kvm_vm_fops.owner = module; 2057 + kvm_vcpu_fops.owner = module; 2058 2059 r = misc_register(&kvm_dev); 2060 if (r) { ··· 2062 2063 kvm_preempt_ops.sched_in = kvm_sched_in; 2064 kvm_preempt_ops.sched_out = kvm_sched_out; 2065 + #ifndef CONFIG_X86 2066 + msi2intx = 0; 2067 + #endif 2068 2069 return 0; 2070 ··· 2078 on_each_cpu(hardware_disable, NULL, 1); 2079 out_free_1: 2080 kvm_arch_hardware_unsetup(); 2081 + out_free_0a: 2082 + free_cpumask_var(cpus_hardware_enabled); 2083 out_free_0: 2084 __free_page(bad_page); 2085 out: ··· 2101 kvm_arch_hardware_unsetup(); 2102 kvm_arch_exit(); 2103 kvm_exit_debug(); 2104 + free_cpumask_var(cpus_hardware_enabled); 2105 __free_page(bad_page); 2106 } 2107 EXPORT_SYMBOL_GPL(kvm_exit);

+1

virt/kvm/kvm_trace.c

··· 252 struct kvm_trace_probe *p = &kvm_trace_probes[i]; 253 marker_probe_unregister(p->name, p->probe_func, p); 254 } 255 256 relay_close(kt->rchan); 257 debugfs_remove(kt->lost_file);

··· 252 struct kvm_trace_probe *p = &kvm_trace_probes[i]; 253 marker_probe_unregister(p->name, p->probe_func, p); 254 } 255 + marker_synchronize_unregister(); 256 257 relay_close(kt->rchan); 258 debugfs_remove(kt->lost_file);