Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
"PPC changes will come next week.

- s390: Support for runtime instrumentation within guests, support of
248 VCPUs.

- ARM: rewrite of the arm64 world switch in C, support for 16-bit VM
identifiers. Performance counter virtualization missed the boat.

- x86: Support for more Hyper-V features (synthetic interrupt
controller), MMU cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (115 commits)
kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL
kvm/x86: Hyper-V SynIC timers tracepoints
kvm/x86: Hyper-V SynIC tracepoints
kvm/x86: Update SynIC timers on guest entry only
kvm/x86: Skip SynIC vector check for QEMU side
kvm/x86: Hyper-V fix SynIC timer disabling condition
kvm/x86: Reorg stimer_expiration() to better control timer restart
kvm/x86: Hyper-V unify stimer_start() and stimer_restart()
kvm/x86: Drop stimer_stop() function
kvm/x86: Hyper-V timers fix incorrect logical operation
KVM: move architecture-dependent requests to arch/
KVM: renumber vcpu->request bits
KVM: document which architecture uses each request bit
KVM: Remove unused KVM_REQ_KICK to save a bit in vcpu->requests
kvm: x86: Check kvm_write_guest return value in kvm_write_wall_clock
KVM: s390: implement the RI support of guest
kvm/s390: drop unpaired smp_mb
kvm: x86: fix comment about {mmu,nested_mmu}.gva_to_gpa
KVM: x86: MMU: Use clear_page() instead of init_shadow_page_table()
arm/arm64: KVM: Detect vGIC presence at runtime
...

+4043 -2393
+41
Documentation/virtual/kvm/api.txt
··· 1451 1451 struct kvm_irq_routing_irqchip irqchip; 1452 1452 struct kvm_irq_routing_msi msi; 1453 1453 struct kvm_irq_routing_s390_adapter adapter; 1454 + struct kvm_irq_routing_hv_sint hv_sint; 1454 1455 __u32 pad[8]; 1455 1456 } u; 1456 1457 }; ··· 1460 1459 #define KVM_IRQ_ROUTING_IRQCHIP 1 1461 1460 #define KVM_IRQ_ROUTING_MSI 2 1462 1461 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 1462 + #define KVM_IRQ_ROUTING_HV_SINT 4 1463 1463 1464 1464 No flags are specified so far, the corresponding field must be set to zero. 1465 1465 ··· 1484 1482 __u32 adapter_id; 1485 1483 }; 1486 1484 1485 + struct kvm_irq_routing_hv_sint { 1486 + __u32 vcpu; 1487 + __u32 sint; 1488 + }; 1487 1489 1488 1490 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) 1489 1491 ··· 3337 3331 it is still asserted. Vector is the LAPIC interrupt vector for which the 3338 3332 EOI was received. 3339 3333 3334 + struct kvm_hyperv_exit { 3335 + #define KVM_EXIT_HYPERV_SYNIC 1 3336 + __u32 type; 3337 + union { 3338 + struct { 3339 + __u32 msr; 3340 + __u64 control; 3341 + __u64 evt_page; 3342 + __u64 msg_page; 3343 + } synic; 3344 + } u; 3345 + }; 3346 + /* KVM_EXIT_HYPERV */ 3347 + struct kvm_hyperv_exit hyperv; 3348 + Indicates that the VCPU exits into userspace to process some tasks 3349 + related to Hyper-V emulation. 3350 + Valid values for 'type' are: 3351 + KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about 3352 + Hyper-V SynIC state change. Notification is used to remap SynIC 3353 + event/message pages and to enable/disable SynIC messages/events processing 3354 + in userspace. 3355 + 3340 3356 /* Fix the size of the union. */ 3341 3357 char padding[256]; 3342 3358 }; ··· 3713 3685 H_RANDOM hypercall backed by a hardware random-number generator. 3714 3686 If present, the kernel H_RANDOM handler can be enabled for guest use 3715 3687 with the KVM_CAP_PPC_ENABLE_HCALL capability. 3688 + 3689 + 8.2 KVM_CAP_HYPERV_SYNIC 3690 + 3691 + Architectures: x86 3692 + This capability, if KVM_CHECK_EXTENSION indicates that it is 3693 + available, means that that the kernel has an implementation of the 3694 + Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is 3695 + used to support Windows Hyper-V based guest paravirt drivers(VMBus). 3696 + 3697 + In order to use SynIC, it has to be activated by setting this 3698 + capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this 3699 + will disable the use of APIC hardware virtualization even if supported 3700 + by the CPU, as it's incompatible with SynIC auto-EOI behavior.
+2 -1
Documentation/virtual/kvm/devices/vm.txt
··· 37 37 Allows userspace to query the actual limit and set a new limit for 38 38 the maximum guest memory size. The limit will be rounded up to 39 39 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by 40 - the number of page table levels. 40 + the number of page table levels. In the case that there is no limit we will set 41 + the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). 41 42 42 43 2. GROUP: KVM_S390_VM_CPU_MODEL 43 44 Architectures: s390
+2 -2
Documentation/virtual/kvm/mmu.txt
··· 203 203 page cannot be destroyed. See role.invalid. 204 204 parent_ptes: 205 205 The reverse mapping for the pte/ptes pointing at this page's spt. If 206 - parent_ptes bit 0 is zero, only one spte points at this pages and 206 + parent_ptes bit 0 is zero, only one spte points at this page and 207 207 parent_ptes points at this single spte, otherwise, there exists multiple 208 208 sptes pointing at this page and (parent_ptes & ~0x1) points at a data 209 - structure with a list of parent_ptes. 209 + structure with a list of parent sptes. 210 210 unsync: 211 211 If true, then the translations in this page may not match the guest's 212 212 translation. This is equivalent to the state of the tlb when a pte is
+1
MAINTAINERS
··· 6089 6089 L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 6090 6090 L: kvmarm@lists.cs.columbia.edu 6091 6091 W: http://systems.cs.columbia.edu/projects/kvm-arm 6092 + T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 6092 6093 S: Supported 6093 6094 F: arch/arm/include/uapi/asm/kvm* 6094 6095 F: arch/arm/include/asm/kvm*
+17 -17
arch/arm/include/asm/kvm_arm.h
··· 19 19 #ifndef __ARM_KVM_ARM_H__ 20 20 #define __ARM_KVM_ARM_H__ 21 21 22 + #include <linux/const.h> 22 23 #include <linux/types.h> 23 24 24 25 /* Hyp Configuration Register (HCR) bits */ ··· 133 132 * space. 134 133 */ 135 134 #define KVM_PHYS_SHIFT (40) 136 - #define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) 137 - #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) 138 - #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) 139 - #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) 135 + #define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) 136 + #define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) 137 + #define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) 140 138 141 139 /* Virtualization Translation Control Register (VTCR) bits */ 142 140 #define VTCR_SH0 (3 << 12) ··· 162 162 #define VTTBR_X (5 - KVM_T0SZ) 163 163 #endif 164 164 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 165 - #define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 166 - #define VTTBR_VMID_SHIFT (48LLU) 167 - #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) 165 + #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 166 + #define VTTBR_VMID_SHIFT _AC(48, ULL) 167 + #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) 168 168 169 169 /* Hyp Syndrome Register (HSR) bits */ 170 170 #define HSR_EC_SHIFT (26) 171 - #define HSR_EC (0x3fU << HSR_EC_SHIFT) 172 - #define HSR_IL (1U << 25) 171 + #define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT) 172 + #define HSR_IL (_AC(1, UL) << 25) 173 173 #define HSR_ISS (HSR_IL - 1) 174 174 #define HSR_ISV_SHIFT (24) 175 - #define HSR_ISV (1U << HSR_ISV_SHIFT) 175 + #define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) 176 176 #define HSR_SRT_SHIFT (16) 177 177 #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) 178 178 #define HSR_FSC (0x3f) ··· 180 180 #define HSR_SSE (1 << 21) 181 181 #define HSR_WNR (1 << 6) 182 182 #define HSR_CV_SHIFT (24) 183 - #define HSR_CV (1U << HSR_CV_SHIFT) 183 + #define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT) 184 184 #define HSR_COND_SHIFT (20) 185 - #define HSR_COND (0xfU << HSR_COND_SHIFT) 185 + #define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT) 186 186 187 187 #define FSC_FAULT (0x04) 188 188 #define FSC_ACCESS (0x08) ··· 210 210 #define HSR_EC_DABT (0x24) 211 211 #define HSR_EC_DABT_HYP (0x25) 212 212 213 - #define HSR_WFI_IS_WFE (1U << 0) 213 + #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) 214 214 215 - #define HSR_HVC_IMM_MASK ((1UL << 16) - 1) 215 + #define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1) 216 216 217 - #define HSR_DABT_S1PTW (1U << 7) 218 - #define HSR_DABT_CM (1U << 8) 219 - #define HSR_DABT_EA (1U << 9) 217 + #define HSR_DABT_S1PTW (_AC(1, UL) << 7) 218 + #define HSR_DABT_CM (_AC(1, UL) << 8) 219 + #define HSR_DABT_EA (_AC(1, UL) << 9) 220 220 221 221 #define kvm_arm_exception_type \ 222 222 {0, "RESET" }, \
+6
arch/arm/include/asm/kvm_host.h
··· 150 150 u32 halt_successful_poll; 151 151 u32 halt_attempted_poll; 152 152 u32 halt_wakeup; 153 + u32 hvc_exit_stat; 154 + u64 wfe_exit_stat; 155 + u64 wfi_exit_stat; 156 + u64 mmio_exit_user; 157 + u64 mmio_exit_kernel; 158 + u64 exits; 153 159 }; 154 160 155 161 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+5
arch/arm/include/asm/kvm_mmu.h
··· 279 279 pgd_t *merged_hyp_pgd, 280 280 unsigned long hyp_idmap_start) { } 281 281 282 + static inline unsigned int kvm_get_vmid_bits(void) 283 + { 284 + return 8; 285 + } 286 + 282 287 #endif /* !__ASSEMBLY__ */ 283 288 284 289 #endif /* __ARM_KVM_MMU_H__ */
+36 -4
arch/arm/kvm/arm.c
··· 44 44 #include <asm/kvm_emulate.h> 45 45 #include <asm/kvm_coproc.h> 46 46 #include <asm/kvm_psci.h> 47 + #include <asm/sections.h> 47 48 48 49 #ifdef REQUIRES_VIRT 49 50 __asm__(".arch_extension virt"); ··· 59 58 60 59 /* The VMID used in the VTTBR */ 61 60 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); 62 - static u8 kvm_next_vmid; 61 + static u32 kvm_next_vmid; 62 + static unsigned int kvm_vmid_bits __read_mostly; 63 63 static DEFINE_SPINLOCK(kvm_vmid_lock); 64 + 65 + static bool vgic_present; 64 66 65 67 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) 66 68 { ··· 136 132 kvm->arch.vmid_gen = 0; 137 133 138 134 /* The maximum number of VCPUs is limited by the host's GIC model */ 139 - kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); 135 + kvm->arch.max_vcpus = vgic_present ? 136 + kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; 140 137 141 138 return ret; 142 139 out_free_stage2_pgd: ··· 177 172 int r; 178 173 switch (ext) { 179 174 case KVM_CAP_IRQCHIP: 175 + r = vgic_present; 176 + break; 180 177 case KVM_CAP_IOEVENTFD: 181 178 case KVM_CAP_DEVICE_CTRL: 182 179 case KVM_CAP_USER_MEMORY: ··· 440 433 kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); 441 434 kvm->arch.vmid = kvm_next_vmid; 442 435 kvm_next_vmid++; 436 + kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 443 437 444 438 /* update vttbr to be used with the new vmid */ 445 439 pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 446 440 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 447 - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; 441 + vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 448 442 kvm->arch.vttbr = pgd_phys | vmid; 449 443 450 444 spin_unlock(&kvm_vmid_lock); ··· 611 603 ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); 612 604 613 605 vcpu->mode = OUTSIDE_GUEST_MODE; 606 + vcpu->stat.exits++; 614 607 /* 615 608 * Back from guest 616 609 *************************************************************/ ··· 922 913 923 914 switch (dev_id) { 924 915 case KVM_ARM_DEVICE_VGIC_V2: 916 + if (!vgic_present) 917 + return -ENXIO; 925 918 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); 926 919 default: 927 920 return -ENODEV; ··· 938 927 939 928 switch (ioctl) { 940 929 case KVM_CREATE_IRQCHIP: { 930 + if (!vgic_present) 931 + return -ENXIO; 941 932 return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 942 933 } 943 934 case KVM_ARM_SET_DEVICE_ADDR: { ··· 1080 1067 goto out_free_mappings; 1081 1068 } 1082 1069 1070 + err = create_hyp_mappings(__start_rodata, __end_rodata); 1071 + if (err) { 1072 + kvm_err("Cannot map rodata section\n"); 1073 + goto out_free_mappings; 1074 + } 1075 + 1083 1076 /* 1084 1077 * Map the Hyp stack pages 1085 1078 */ ··· 1130 1111 * Init HYP view of VGIC 1131 1112 */ 1132 1113 err = kvm_vgic_hyp_init(); 1133 - if (err) 1114 + switch (err) { 1115 + case 0: 1116 + vgic_present = true; 1117 + break; 1118 + case -ENODEV: 1119 + case -ENXIO: 1120 + vgic_present = false; 1121 + break; 1122 + default: 1134 1123 goto out_free_context; 1124 + } 1135 1125 1136 1126 /* 1137 1127 * Init HYP architected timer support ··· 1154 1126 #endif 1155 1127 1156 1128 kvm_perf_init(); 1129 + 1130 + /* set size of VMID supported by CPU */ 1131 + kvm_vmid_bits = kvm_get_vmid_bits(); 1132 + kvm_info("%d-bit VMID\n", kvm_vmid_bits); 1157 1133 1158 1134 kvm_info("Hyp mode initialized successfully\n"); 1159 1135
+38 -36
arch/arm/kvm/emulate.c
··· 275 275 return vbar; 276 276 } 277 277 278 + /* 279 + * Switch to an exception mode, updating both CPSR and SPSR. Follow 280 + * the logic described in AArch32.EnterMode() from the ARMv8 ARM. 281 + */ 282 + static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) 283 + { 284 + unsigned long cpsr = *vcpu_cpsr(vcpu); 285 + u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; 286 + 287 + *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; 288 + 289 + switch (mode) { 290 + case FIQ_MODE: 291 + *vcpu_cpsr(vcpu) |= PSR_F_BIT; 292 + /* Fall through */ 293 + case ABT_MODE: 294 + case IRQ_MODE: 295 + *vcpu_cpsr(vcpu) |= PSR_A_BIT; 296 + /* Fall through */ 297 + default: 298 + *vcpu_cpsr(vcpu) |= PSR_I_BIT; 299 + } 300 + 301 + *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); 302 + 303 + if (sctlr & SCTLR_TE) 304 + *vcpu_cpsr(vcpu) |= PSR_T_BIT; 305 + if (sctlr & SCTLR_EE) 306 + *vcpu_cpsr(vcpu) |= PSR_E_BIT; 307 + 308 + /* Note: These now point to the mode banked copies */ 309 + *vcpu_spsr(vcpu) = cpsr; 310 + } 311 + 278 312 /** 279 313 * kvm_inject_undefined - inject an undefined exception into the guest 280 314 * @vcpu: The VCPU to receive the undefined exception ··· 320 286 */ 321 287 void kvm_inject_undefined(struct kvm_vcpu *vcpu) 322 288 { 323 - unsigned long new_lr_value; 324 - unsigned long new_spsr_value; 325 289 unsigned long cpsr = *vcpu_cpsr(vcpu); 326 - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; 327 290 bool is_thumb = (cpsr & PSR_T_BIT); 328 291 u32 vect_offset = 4; 329 292 u32 return_offset = (is_thumb) ? 2 : 4; 330 293 331 - new_spsr_value = cpsr; 332 - new_lr_value = *vcpu_pc(vcpu) - return_offset; 333 - 334 - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE; 335 - *vcpu_cpsr(vcpu) |= PSR_I_BIT; 336 - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); 337 - 338 - if (sctlr & SCTLR_TE) 339 - *vcpu_cpsr(vcpu) |= PSR_T_BIT; 340 - if (sctlr & SCTLR_EE) 341 - *vcpu_cpsr(vcpu) |= PSR_E_BIT; 342 - 343 - /* Note: These now point to UND banked copies */ 344 - *vcpu_spsr(vcpu) = cpsr; 345 - *vcpu_reg(vcpu, 14) = new_lr_value; 294 + kvm_update_psr(vcpu, UND_MODE); 295 + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset; 346 296 347 297 /* Branch to exception vector */ 348 298 *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; ··· 338 320 */ 339 321 static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) 340 322 { 341 - unsigned long new_lr_value; 342 - unsigned long new_spsr_value; 343 323 unsigned long cpsr = *vcpu_cpsr(vcpu); 344 - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; 345 324 bool is_thumb = (cpsr & PSR_T_BIT); 346 325 u32 vect_offset; 347 326 u32 return_offset = (is_thumb) ? 4 : 0; 348 327 bool is_lpae; 349 328 350 - new_spsr_value = cpsr; 351 - new_lr_value = *vcpu_pc(vcpu) + return_offset; 352 - 353 - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE; 354 - *vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT; 355 - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); 356 - 357 - if (sctlr & SCTLR_TE) 358 - *vcpu_cpsr(vcpu) |= PSR_T_BIT; 359 - if (sctlr & SCTLR_EE) 360 - *vcpu_cpsr(vcpu) |= PSR_E_BIT; 361 - 362 - /* Note: These now point to ABT banked copies */ 363 - *vcpu_spsr(vcpu) = cpsr; 364 - *vcpu_reg(vcpu, 14) = new_lr_value; 329 + kvm_update_psr(vcpu, ABT_MODE); 330 + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; 365 331 366 332 if (is_pabt) 367 333 vect_offset = 12;
+6
arch/arm/kvm/guest.c
··· 33 33 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } 34 34 35 35 struct kvm_stats_debugfs_item debugfs_entries[] = { 36 + VCPU_STAT(hvc_exit_stat), 37 + VCPU_STAT(wfe_exit_stat), 38 + VCPU_STAT(wfi_exit_stat), 39 + VCPU_STAT(mmio_exit_user), 40 + VCPU_STAT(mmio_exit_kernel), 41 + VCPU_STAT(exits), 36 42 { NULL } 37 43 }; 38 44
+3
arch/arm/kvm/handle_exit.c
··· 42 42 43 43 trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), 44 44 kvm_vcpu_hvc_get_imm(vcpu)); 45 + vcpu->stat.hvc_exit_stat++; 45 46 46 47 ret = kvm_psci_call(vcpu); 47 48 if (ret < 0) { ··· 90 89 { 91 90 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { 92 91 trace_kvm_wfx(*vcpu_pc(vcpu), true); 92 + vcpu->stat.wfe_exit_stat++; 93 93 kvm_vcpu_on_spin(vcpu); 94 94 } else { 95 95 trace_kvm_wfx(*vcpu_pc(vcpu), false); 96 + vcpu->stat.wfi_exit_stat++; 96 97 kvm_vcpu_block(vcpu); 97 98 } 98 99
+3
arch/arm/kvm/mmio.c
··· 210 210 211 211 if (!ret) { 212 212 /* We handled the access successfully in the kernel. */ 213 + vcpu->stat.mmio_exit_kernel++; 213 214 kvm_handle_mmio_return(vcpu, run); 214 215 return 1; 216 + } else { 217 + vcpu->stat.mmio_exit_user++; 215 218 } 216 219 217 220 run->exit_reason = KVM_EXIT_MMIO;
+3 -3
arch/arm/kvm/mmu.c
··· 656 656 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 657 657 * @kvm: The KVM struct pointer for the VM. 658 658 * 659 - * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can 660 - * support either full 40-bit input addresses or limited to 32-bit input 661 - * addresses). Clears the allocated pages. 659 + * Allocates only the stage-2 HW PGD level table(s) (can support either full 660 + * 40-bit input addresses or limited to 32-bit input addresses). Clears the 661 + * allocated pages. 662 662 * 663 663 * Note we don't need locking here as this is only called when the VM is 664 664 * created, which can only be done once.
+2 -1
arch/arm64/include/asm/kvm_arm.h
··· 125 125 #define VTCR_EL2_SL0_LVL1 (1 << 6) 126 126 #define VTCR_EL2_T0SZ_MASK 0x3f 127 127 #define VTCR_EL2_T0SZ_40B 24 128 + #define VTCR_EL2_VS 19 128 129 129 130 /* 130 131 * We configure the Stage-2 page tables to always restrict the IPA space to be ··· 170 169 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 171 170 #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 172 171 #define VTTBR_VMID_SHIFT (UL(48)) 173 - #define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) 172 + #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) 174 173 175 174 /* Hyp System Trap Register */ 176 175 #define HSTR_EL2_T(x) (1 << x)
-76
arch/arm64/include/asm/kvm_asm.h
··· 20 20 21 21 #include <asm/virt.h> 22 22 23 - /* 24 - * 0 is reserved as an invalid value. 25 - * Order *must* be kept in sync with the hyp switch code. 26 - */ 27 - #define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ 28 - #define CSSELR_EL1 2 /* Cache Size Selection Register */ 29 - #define SCTLR_EL1 3 /* System Control Register */ 30 - #define ACTLR_EL1 4 /* Auxiliary Control Register */ 31 - #define CPACR_EL1 5 /* Coprocessor Access Control */ 32 - #define TTBR0_EL1 6 /* Translation Table Base Register 0 */ 33 - #define TTBR1_EL1 7 /* Translation Table Base Register 1 */ 34 - #define TCR_EL1 8 /* Translation Control Register */ 35 - #define ESR_EL1 9 /* Exception Syndrome Register */ 36 - #define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ 37 - #define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ 38 - #define FAR_EL1 12 /* Fault Address Register */ 39 - #define MAIR_EL1 13 /* Memory Attribute Indirection Register */ 40 - #define VBAR_EL1 14 /* Vector Base Address Register */ 41 - #define CONTEXTIDR_EL1 15 /* Context ID Register */ 42 - #define TPIDR_EL0 16 /* Thread ID, User R/W */ 43 - #define TPIDRRO_EL0 17 /* Thread ID, User R/O */ 44 - #define TPIDR_EL1 18 /* Thread ID, Privileged */ 45 - #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ 46 - #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ 47 - #define PAR_EL1 21 /* Physical Address Register */ 48 - #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ 49 - #define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ 50 - 51 - /* 32bit specific registers. Keep them at the end of the range */ 52 - #define DACR32_EL2 24 /* Domain Access Control Register */ 53 - #define IFSR32_EL2 25 /* Instruction Fault Status Register */ 54 - #define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ 55 - #define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ 56 - #define NR_SYS_REGS 28 57 - 58 - /* 32bit mapping */ 59 - #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ 60 - #define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ 61 - #define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ 62 - #define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ 63 - #define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ 64 - #define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ 65 - #define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ 66 - #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ 67 - #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ 68 - #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ 69 - #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ 70 - #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ 71 - #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ 72 - #define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ 73 - #define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ 74 - #define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ 75 - #define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ 76 - #define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ 77 - #define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ 78 - #define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ 79 - #define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ 80 - #define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ 81 - #define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ 82 - #define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ 83 - #define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ 84 - #define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ 85 - #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ 86 - #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ 87 - #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ 88 - 89 - #define cp14_DBGDSCRext (MDSCR_EL1 * 2) 90 - #define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) 91 - #define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) 92 - #define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) 93 - #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) 94 - #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) 95 - #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) 96 - 97 - #define NR_COPRO_REGS (NR_SYS_REGS * 2) 98 - 99 23 #define ARM_EXCEPTION_IRQ 0 100 24 #define ARM_EXCEPTION_TRAP 1 101 25
-1
arch/arm64/include/asm/kvm_emulate.h
··· 26 26 27 27 #include <asm/esr.h> 28 28 #include <asm/kvm_arm.h> 29 - #include <asm/kvm_asm.h> 30 29 #include <asm/kvm_mmio.h> 31 30 #include <asm/ptrace.h> 32 31 #include <asm/cputype.h>
+86 -1
arch/arm64/include/asm/kvm_host.h
··· 25 25 #include <linux/types.h> 26 26 #include <linux/kvm_types.h> 27 27 #include <asm/kvm.h> 28 - #include <asm/kvm_asm.h> 29 28 #include <asm/kvm_mmio.h> 30 29 31 30 #define __KVM_HAVE_ARCH_INTC_INITIALIZED ··· 83 84 u64 far_el2; /* Hyp Fault Address Register */ 84 85 u64 hpfar_el2; /* Hyp IPA Fault Address Register */ 85 86 }; 87 + 88 + /* 89 + * 0 is reserved as an invalid value. 90 + * Order should be kept in sync with the save/restore code. 91 + */ 92 + enum vcpu_sysreg { 93 + __INVALID_SYSREG__, 94 + MPIDR_EL1, /* MultiProcessor Affinity Register */ 95 + CSSELR_EL1, /* Cache Size Selection Register */ 96 + SCTLR_EL1, /* System Control Register */ 97 + ACTLR_EL1, /* Auxiliary Control Register */ 98 + CPACR_EL1, /* Coprocessor Access Control */ 99 + TTBR0_EL1, /* Translation Table Base Register 0 */ 100 + TTBR1_EL1, /* Translation Table Base Register 1 */ 101 + TCR_EL1, /* Translation Control Register */ 102 + ESR_EL1, /* Exception Syndrome Register */ 103 + AFSR0_EL1, /* Auxilary Fault Status Register 0 */ 104 + AFSR1_EL1, /* Auxilary Fault Status Register 1 */ 105 + FAR_EL1, /* Fault Address Register */ 106 + MAIR_EL1, /* Memory Attribute Indirection Register */ 107 + VBAR_EL1, /* Vector Base Address Register */ 108 + CONTEXTIDR_EL1, /* Context ID Register */ 109 + TPIDR_EL0, /* Thread ID, User R/W */ 110 + TPIDRRO_EL0, /* Thread ID, User R/O */ 111 + TPIDR_EL1, /* Thread ID, Privileged */ 112 + AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ 113 + CNTKCTL_EL1, /* Timer Control Register (EL1) */ 114 + PAR_EL1, /* Physical Address Register */ 115 + MDSCR_EL1, /* Monitor Debug System Control Register */ 116 + MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ 117 + 118 + /* 32bit specific registers. Keep them at the end of the range */ 119 + DACR32_EL2, /* Domain Access Control Register */ 120 + IFSR32_EL2, /* Instruction Fault Status Register */ 121 + FPEXC32_EL2, /* Floating-Point Exception Control Register */ 122 + DBGVCR32_EL2, /* Debug Vector Catch Register */ 123 + 124 + NR_SYS_REGS /* Nothing after this line! */ 125 + }; 126 + 127 + /* 32bit mapping */ 128 + #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ 129 + #define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ 130 + #define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ 131 + #define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ 132 + #define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ 133 + #define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ 134 + #define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ 135 + #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ 136 + #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ 137 + #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ 138 + #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ 139 + #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ 140 + #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ 141 + #define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ 142 + #define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ 143 + #define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ 144 + #define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ 145 + #define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ 146 + #define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ 147 + #define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ 148 + #define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ 149 + #define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ 150 + #define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ 151 + #define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ 152 + #define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ 153 + #define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ 154 + #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ 155 + #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ 156 + #define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ 157 + 158 + #define cp14_DBGDSCRext (MDSCR_EL1 * 2) 159 + #define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) 160 + #define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) 161 + #define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) 162 + #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) 163 + #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) 164 + #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) 165 + 166 + #define NR_COPRO_REGS (NR_SYS_REGS * 2) 86 167 87 168 struct kvm_cpu_context { 88 169 struct kvm_regs gp_regs; ··· 276 197 u32 halt_successful_poll; 277 198 u32 halt_attempted_poll; 278 199 u32 halt_wakeup; 200 + u32 hvc_exit_stat; 201 + u64 wfe_exit_stat; 202 + u64 wfi_exit_stat; 203 + u64 mmio_exit_user; 204 + u64 mmio_exit_kernel; 205 + u64 exits; 279 206 }; 280 207 281 208 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
-1
arch/arm64/include/asm/kvm_mmio.h
··· 19 19 #define __ARM64_KVM_MMIO_H__ 20 20 21 21 #include <linux/kvm_host.h> 22 - #include <asm/kvm_asm.h> 23 22 #include <asm/kvm_arm.h> 24 23 25 24 /*
+8 -1
arch/arm64/include/asm/kvm_mmu.h
··· 20 20 21 21 #include <asm/page.h> 22 22 #include <asm/memory.h> 23 + #include <asm/cpufeature.h> 23 24 24 25 /* 25 26 * As we only have the TTBR0_EL2 register, we cannot express ··· 159 158 #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) 160 159 #endif 161 160 #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) 162 - #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) 163 161 164 162 #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 165 163 ··· 300 300 idmap_idx = hyp_idmap_start >> VA_BITS; 301 301 VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); 302 302 merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); 303 + } 304 + 305 + static inline unsigned int kvm_get_vmid_bits(void) 306 + { 307 + int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); 308 + 309 + return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; 303 310 } 304 311 305 312 #endif /* __ASSEMBLY__ */
+21
arch/arm64/include/asm/sysreg.h
··· 20 20 #ifndef __ASM_SYSREG_H 21 21 #define __ASM_SYSREG_H 22 22 23 + #include <linux/stringify.h> 24 + 23 25 #include <asm/opcodes.h> 24 26 25 27 /* ··· 210 208 211 209 #else 212 210 211 + #include <linux/types.h> 212 + 213 213 asm( 214 214 " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" 215 215 " .equ __reg_num_x\\num, \\num\n" ··· 236 232 val |= set; 237 233 asm volatile("msr sctlr_el1, %0" : : "r" (val)); 238 234 } 235 + 236 + /* 237 + * Unlike read_cpuid, calls to read_sysreg are never expected to be 238 + * optimized away or replaced with synthetic values. 239 + */ 240 + #define read_sysreg(r) ({ \ 241 + u64 __val; \ 242 + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ 243 + __val; \ 244 + }) 245 + 246 + #define write_sysreg(v, r) do { \ 247 + u64 __val = (u64)v; \ 248 + asm volatile("msr " __stringify(r) ", %0" \ 249 + : : "r" (__val)); \ 250 + } while (0) 251 + 239 252 #endif 240 253 241 254 #endif /* __ASM_SYSREG_H */
+1 -39
arch/arm64/kernel/asm-offsets.c
··· 109 109 DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); 110 110 DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); 111 111 DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); 112 - DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); 113 - DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); 114 - DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); 115 - DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); 112 + DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); 116 113 DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); 117 114 DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); 118 115 DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); 119 - DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); 120 - DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); 121 - DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); 122 - DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); 123 - DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); 124 - DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); 125 - DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); 126 - DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); 127 - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); 128 116 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); 129 - DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); 130 - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); 131 - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); 132 - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); 133 - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); 134 - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); 135 - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); 136 - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); 137 - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); 138 - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); 139 - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); 140 - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); 141 - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); 142 - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); 143 - DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); 144 - DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); 145 - DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); 146 - DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); 147 - DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); 148 - DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); 149 - DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); 150 - DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); 151 - DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); 152 - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); 153 - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); 154 - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); 155 117 #endif 156 118 #ifdef CONFIG_CPU_PM 157 119 DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
+1 -2
arch/arm64/kvm/Makefile
··· 10 10 ARM=../../../arch/arm/kvm 11 11 12 12 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o 13 + obj-$(CONFIG_KVM_ARM_HOST) += hyp/ 13 14 14 15 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o 15 16 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o ··· 23 22 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o 24 23 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o 25 24 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o 26 - kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o 27 25 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o 28 26 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o 29 - kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o 30 27 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+9 -1
arch/arm64/kvm/guest.c
··· 28 28 #include <asm/cputype.h> 29 29 #include <asm/uaccess.h> 30 30 #include <asm/kvm.h> 31 - #include <asm/kvm_asm.h> 32 31 #include <asm/kvm_emulate.h> 33 32 #include <asm/kvm_coproc.h> 34 33 35 34 #include "trace.h" 36 35 36 + #define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } 37 + #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } 38 + 37 39 struct kvm_stats_debugfs_item debugfs_entries[] = { 40 + VCPU_STAT(hvc_exit_stat), 41 + VCPU_STAT(wfe_exit_stat), 42 + VCPU_STAT(wfi_exit_stat), 43 + VCPU_STAT(mmio_exit_user), 44 + VCPU_STAT(mmio_exit_kernel), 45 + VCPU_STAT(exits), 38 46 { NULL } 39 47 }; 40 48
+4
arch/arm64/kvm/handle_exit.c
··· 23 23 #include <linux/kvm_host.h> 24 24 25 25 #include <asm/esr.h> 26 + #include <asm/kvm_asm.h> 26 27 #include <asm/kvm_coproc.h> 27 28 #include <asm/kvm_emulate.h> 28 29 #include <asm/kvm_mmu.h> ··· 40 39 41 40 trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), 42 41 kvm_vcpu_hvc_get_imm(vcpu)); 42 + vcpu->stat.hvc_exit_stat++; 43 43 44 44 ret = kvm_psci_call(vcpu); 45 45 if (ret < 0) { ··· 73 71 { 74 72 if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { 75 73 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); 74 + vcpu->stat.wfe_exit_stat++; 76 75 kvm_vcpu_on_spin(vcpu); 77 76 } else { 78 77 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); 78 + vcpu->stat.wfi_exit_stat++; 79 79 kvm_vcpu_block(vcpu); 80 80 } 81 81
+9
arch/arm64/kvm/hyp-init.S
··· 94 94 */ 95 95 mrs x5, ID_AA64MMFR0_EL1 96 96 bfi x4, x5, #16, #3 97 + /* 98 + * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in 99 + * VTCR_EL2. 100 + */ 101 + mrs x5, ID_AA64MMFR1_EL1 102 + ubfx x5, x5, #5, #1 103 + lsl x5, x5, #VTCR_EL2_VS 104 + orr x4, x4, x5 105 + 97 106 msr vtcr_el2, x4 98 107 99 108 mrs x4, mair_el1
+1 -1080
arch/arm64/kvm/hyp.S
··· 17 17 18 18 #include <linux/linkage.h> 19 19 20 - #include <asm/alternative.h> 21 - #include <asm/asm-offsets.h> 22 20 #include <asm/assembler.h> 23 - #include <asm/cpufeature.h> 24 - #include <asm/debug-monitors.h> 25 - #include <asm/esr.h> 26 - #include <asm/fpsimdmacros.h> 27 - #include <asm/kvm.h> 28 - #include <asm/kvm_arm.h> 29 - #include <asm/kvm_asm.h> 30 - #include <asm/kvm_mmu.h> 31 - #include <asm/memory.h> 32 - 33 - #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) 34 - #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) 35 - #define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) 36 - #define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) 37 - 38 - .text 39 - .pushsection .hyp.text, "ax" 40 - .align PAGE_SHIFT 41 - 42 - .macro save_common_regs 43 - // x2: base address for cpu context 44 - // x3: tmp register 45 - 46 - add x3, x2, #CPU_XREG_OFFSET(19) 47 - stp x19, x20, [x3] 48 - stp x21, x22, [x3, #16] 49 - stp x23, x24, [x3, #32] 50 - stp x25, x26, [x3, #48] 51 - stp x27, x28, [x3, #64] 52 - stp x29, lr, [x3, #80] 53 - 54 - mrs x19, sp_el0 55 - mrs x20, elr_el2 // pc before entering el2 56 - mrs x21, spsr_el2 // pstate before entering el2 57 - 58 - stp x19, x20, [x3, #96] 59 - str x21, [x3, #112] 60 - 61 - mrs x22, sp_el1 62 - mrs x23, elr_el1 63 - mrs x24, spsr_el1 64 - 65 - str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] 66 - str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] 67 - str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] 68 - .endm 69 - 70 - .macro restore_common_regs 71 - // x2: base address for cpu context 72 - // x3: tmp register 73 - 74 - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] 75 - ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] 76 - ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] 77 - 78 - msr sp_el1, x22 79 - msr elr_el1, x23 80 - msr spsr_el1, x24 81 - 82 - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 83 - ldp x19, x20, [x3] 84 - ldr x21, [x3, #16] 85 - 86 - msr sp_el0, x19 87 - msr elr_el2, x20 // pc on return from el2 88 - msr spsr_el2, x21 // pstate on return from el2 89 - 90 - add x3, x2, #CPU_XREG_OFFSET(19) 91 - ldp x19, x20, [x3] 92 - ldp x21, x22, [x3, #16] 93 - ldp x23, x24, [x3, #32] 94 - ldp x25, x26, [x3, #48] 95 - ldp x27, x28, [x3, #64] 96 - ldp x29, lr, [x3, #80] 97 - .endm 98 - 99 - .macro save_host_regs 100 - save_common_regs 101 - .endm 102 - 103 - .macro restore_host_regs 104 - restore_common_regs 105 - .endm 106 - 107 - .macro save_fpsimd 108 - // x2: cpu context address 109 - // x3, x4: tmp regs 110 - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) 111 - fpsimd_save x3, 4 112 - .endm 113 - 114 - .macro restore_fpsimd 115 - // x2: cpu context address 116 - // x3, x4: tmp regs 117 - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) 118 - fpsimd_restore x3, 4 119 - .endm 120 - 121 - .macro save_guest_regs 122 - // x0 is the vcpu address 123 - // x1 is the return code, do not corrupt! 124 - // x2 is the cpu context 125 - // x3 is a tmp register 126 - // Guest's x0-x3 are on the stack 127 - 128 - // Compute base to save registers 129 - add x3, x2, #CPU_XREG_OFFSET(4) 130 - stp x4, x5, [x3] 131 - stp x6, x7, [x3, #16] 132 - stp x8, x9, [x3, #32] 133 - stp x10, x11, [x3, #48] 134 - stp x12, x13, [x3, #64] 135 - stp x14, x15, [x3, #80] 136 - stp x16, x17, [x3, #96] 137 - str x18, [x3, #112] 138 - 139 - pop x6, x7 // x2, x3 140 - pop x4, x5 // x0, x1 141 - 142 - add x3, x2, #CPU_XREG_OFFSET(0) 143 - stp x4, x5, [x3] 144 - stp x6, x7, [x3, #16] 145 - 146 - save_common_regs 147 - .endm 148 - 149 - .macro restore_guest_regs 150 - // x0 is the vcpu address. 151 - // x2 is the cpu context 152 - // x3 is a tmp register 153 - 154 - // Prepare x0-x3 for later restore 155 - add x3, x2, #CPU_XREG_OFFSET(0) 156 - ldp x4, x5, [x3] 157 - ldp x6, x7, [x3, #16] 158 - push x4, x5 // Push x0-x3 on the stack 159 - push x6, x7 160 - 161 - // x4-x18 162 - ldp x4, x5, [x3, #32] 163 - ldp x6, x7, [x3, #48] 164 - ldp x8, x9, [x3, #64] 165 - ldp x10, x11, [x3, #80] 166 - ldp x12, x13, [x3, #96] 167 - ldp x14, x15, [x3, #112] 168 - ldp x16, x17, [x3, #128] 169 - ldr x18, [x3, #144] 170 - 171 - // x19-x29, lr, sp*, elr*, spsr* 172 - restore_common_regs 173 - 174 - // Last bits of the 64bit state 175 - pop x2, x3 176 - pop x0, x1 177 - 178 - // Do not touch any register after this! 179 - .endm 180 - 181 - /* 182 - * Macros to perform system register save/restore. 183 - * 184 - * Ordering here is absolutely critical, and must be kept consistent 185 - * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, 186 - * and in kvm_asm.h. 187 - * 188 - * In other words, don't touch any of these unless you know what 189 - * you are doing. 190 - */ 191 - .macro save_sysregs 192 - // x2: base address for cpu context 193 - // x3: tmp register 194 - 195 - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) 196 - 197 - mrs x4, vmpidr_el2 198 - mrs x5, csselr_el1 199 - mrs x6, sctlr_el1 200 - mrs x7, actlr_el1 201 - mrs x8, cpacr_el1 202 - mrs x9, ttbr0_el1 203 - mrs x10, ttbr1_el1 204 - mrs x11, tcr_el1 205 - mrs x12, esr_el1 206 - mrs x13, afsr0_el1 207 - mrs x14, afsr1_el1 208 - mrs x15, far_el1 209 - mrs x16, mair_el1 210 - mrs x17, vbar_el1 211 - mrs x18, contextidr_el1 212 - mrs x19, tpidr_el0 213 - mrs x20, tpidrro_el0 214 - mrs x21, tpidr_el1 215 - mrs x22, amair_el1 216 - mrs x23, cntkctl_el1 217 - mrs x24, par_el1 218 - mrs x25, mdscr_el1 219 - 220 - stp x4, x5, [x3] 221 - stp x6, x7, [x3, #16] 222 - stp x8, x9, [x3, #32] 223 - stp x10, x11, [x3, #48] 224 - stp x12, x13, [x3, #64] 225 - stp x14, x15, [x3, #80] 226 - stp x16, x17, [x3, #96] 227 - stp x18, x19, [x3, #112] 228 - stp x20, x21, [x3, #128] 229 - stp x22, x23, [x3, #144] 230 - stp x24, x25, [x3, #160] 231 - .endm 232 - 233 - .macro save_debug type 234 - // x4: pointer to register set 235 - // x5: number of registers to skip 236 - // x6..x22 trashed 237 - 238 - adr x22, 1f 239 - add x22, x22, x5, lsl #2 240 - br x22 241 - 1: 242 - mrs x21, \type\()15_el1 243 - mrs x20, \type\()14_el1 244 - mrs x19, \type\()13_el1 245 - mrs x18, \type\()12_el1 246 - mrs x17, \type\()11_el1 247 - mrs x16, \type\()10_el1 248 - mrs x15, \type\()9_el1 249 - mrs x14, \type\()8_el1 250 - mrs x13, \type\()7_el1 251 - mrs x12, \type\()6_el1 252 - mrs x11, \type\()5_el1 253 - mrs x10, \type\()4_el1 254 - mrs x9, \type\()3_el1 255 - mrs x8, \type\()2_el1 256 - mrs x7, \type\()1_el1 257 - mrs x6, \type\()0_el1 258 - 259 - adr x22, 1f 260 - add x22, x22, x5, lsl #2 261 - br x22 262 - 1: 263 - str x21, [x4, #(15 * 8)] 264 - str x20, [x4, #(14 * 8)] 265 - str x19, [x4, #(13 * 8)] 266 - str x18, [x4, #(12 * 8)] 267 - str x17, [x4, #(11 * 8)] 268 - str x16, [x4, #(10 * 8)] 269 - str x15, [x4, #(9 * 8)] 270 - str x14, [x4, #(8 * 8)] 271 - str x13, [x4, #(7 * 8)] 272 - str x12, [x4, #(6 * 8)] 273 - str x11, [x4, #(5 * 8)] 274 - str x10, [x4, #(4 * 8)] 275 - str x9, [x4, #(3 * 8)] 276 - str x8, [x4, #(2 * 8)] 277 - str x7, [x4, #(1 * 8)] 278 - str x6, [x4, #(0 * 8)] 279 - .endm 280 - 281 - .macro restore_sysregs 282 - // x2: base address for cpu context 283 - // x3: tmp register 284 - 285 - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) 286 - 287 - ldp x4, x5, [x3] 288 - ldp x6, x7, [x3, #16] 289 - ldp x8, x9, [x3, #32] 290 - ldp x10, x11, [x3, #48] 291 - ldp x12, x13, [x3, #64] 292 - ldp x14, x15, [x3, #80] 293 - ldp x16, x17, [x3, #96] 294 - ldp x18, x19, [x3, #112] 295 - ldp x20, x21, [x3, #128] 296 - ldp x22, x23, [x3, #144] 297 - ldp x24, x25, [x3, #160] 298 - 299 - msr vmpidr_el2, x4 300 - msr csselr_el1, x5 301 - msr sctlr_el1, x6 302 - msr actlr_el1, x7 303 - msr cpacr_el1, x8 304 - msr ttbr0_el1, x9 305 - msr ttbr1_el1, x10 306 - msr tcr_el1, x11 307 - msr esr_el1, x12 308 - msr afsr0_el1, x13 309 - msr afsr1_el1, x14 310 - msr far_el1, x15 311 - msr mair_el1, x16 312 - msr vbar_el1, x17 313 - msr contextidr_el1, x18 314 - msr tpidr_el0, x19 315 - msr tpidrro_el0, x20 316 - msr tpidr_el1, x21 317 - msr amair_el1, x22 318 - msr cntkctl_el1, x23 319 - msr par_el1, x24 320 - msr mdscr_el1, x25 321 - .endm 322 - 323 - .macro restore_debug type 324 - // x4: pointer to register set 325 - // x5: number of registers to skip 326 - // x6..x22 trashed 327 - 328 - adr x22, 1f 329 - add x22, x22, x5, lsl #2 330 - br x22 331 - 1: 332 - ldr x21, [x4, #(15 * 8)] 333 - ldr x20, [x4, #(14 * 8)] 334 - ldr x19, [x4, #(13 * 8)] 335 - ldr x18, [x4, #(12 * 8)] 336 - ldr x17, [x4, #(11 * 8)] 337 - ldr x16, [x4, #(10 * 8)] 338 - ldr x15, [x4, #(9 * 8)] 339 - ldr x14, [x4, #(8 * 8)] 340 - ldr x13, [x4, #(7 * 8)] 341 - ldr x12, [x4, #(6 * 8)] 342 - ldr x11, [x4, #(5 * 8)] 343 - ldr x10, [x4, #(4 * 8)] 344 - ldr x9, [x4, #(3 * 8)] 345 - ldr x8, [x4, #(2 * 8)] 346 - ldr x7, [x4, #(1 * 8)] 347 - ldr x6, [x4, #(0 * 8)] 348 - 349 - adr x22, 1f 350 - add x22, x22, x5, lsl #2 351 - br x22 352 - 1: 353 - msr \type\()15_el1, x21 354 - msr \type\()14_el1, x20 355 - msr \type\()13_el1, x19 356 - msr \type\()12_el1, x18 357 - msr \type\()11_el1, x17 358 - msr \type\()10_el1, x16 359 - msr \type\()9_el1, x15 360 - msr \type\()8_el1, x14 361 - msr \type\()7_el1, x13 362 - msr \type\()6_el1, x12 363 - msr \type\()5_el1, x11 364 - msr \type\()4_el1, x10 365 - msr \type\()3_el1, x9 366 - msr \type\()2_el1, x8 367 - msr \type\()1_el1, x7 368 - msr \type\()0_el1, x6 369 - .endm 370 - 371 - .macro skip_32bit_state tmp, target 372 - // Skip 32bit state if not needed 373 - mrs \tmp, hcr_el2 374 - tbnz \tmp, #HCR_RW_SHIFT, \target 375 - .endm 376 - 377 - .macro skip_tee_state tmp, target 378 - // Skip ThumbEE state if not needed 379 - mrs \tmp, id_pfr0_el1 380 - tbz \tmp, #12, \target 381 - .endm 382 - 383 - .macro skip_debug_state tmp, target 384 - ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] 385 - tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target 386 - .endm 387 - 388 - /* 389 - * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) 390 - */ 391 - .macro skip_fpsimd_state tmp, target 392 - mrs \tmp, cptr_el2 393 - tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target 394 - .endm 395 - 396 - .macro compute_debug_state target 397 - // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY 398 - // is set, we do a full save/restore cycle and disable trapping. 399 - add x25, x0, #VCPU_CONTEXT 400 - 401 - // Check the state of MDSCR_EL1 402 - ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] 403 - and x26, x25, #DBG_MDSCR_KDE 404 - and x25, x25, #DBG_MDSCR_MDE 405 - adds xzr, x25, x26 406 - b.eq 9998f // Nothing to see there 407 - 408 - // If any interesting bits was set, we must set the flag 409 - mov x26, #KVM_ARM64_DEBUG_DIRTY 410 - str x26, [x0, #VCPU_DEBUG_FLAGS] 411 - b 9999f // Don't skip restore 412 - 413 - 9998: 414 - // Otherwise load the flags from memory in case we recently 415 - // trapped 416 - skip_debug_state x25, \target 417 - 9999: 418 - .endm 419 - 420 - .macro save_guest_32bit_state 421 - skip_32bit_state x3, 1f 422 - 423 - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) 424 - mrs x4, spsr_abt 425 - mrs x5, spsr_und 426 - mrs x6, spsr_irq 427 - mrs x7, spsr_fiq 428 - stp x4, x5, [x3] 429 - stp x6, x7, [x3, #16] 430 - 431 - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 432 - mrs x4, dacr32_el2 433 - mrs x5, ifsr32_el2 434 - stp x4, x5, [x3] 435 - 436 - skip_fpsimd_state x8, 2f 437 - mrs x6, fpexc32_el2 438 - str x6, [x3, #16] 439 - 2: 440 - skip_debug_state x8, 1f 441 - mrs x7, dbgvcr32_el2 442 - str x7, [x3, #24] 443 - 1: 444 - .endm 445 - 446 - .macro restore_guest_32bit_state 447 - skip_32bit_state x3, 1f 448 - 449 - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) 450 - ldp x4, x5, [x3] 451 - ldp x6, x7, [x3, #16] 452 - msr spsr_abt, x4 453 - msr spsr_und, x5 454 - msr spsr_irq, x6 455 - msr spsr_fiq, x7 456 - 457 - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 458 - ldp x4, x5, [x3] 459 - msr dacr32_el2, x4 460 - msr ifsr32_el2, x5 461 - 462 - skip_debug_state x8, 1f 463 - ldr x7, [x3, #24] 464 - msr dbgvcr32_el2, x7 465 - 1: 466 - .endm 467 - 468 - .macro activate_traps 469 - ldr x2, [x0, #VCPU_HCR_EL2] 470 - 471 - /* 472 - * We are about to set CPTR_EL2.TFP to trap all floating point 473 - * register accesses to EL2, however, the ARM ARM clearly states that 474 - * traps are only taken to EL2 if the operation would not otherwise 475 - * trap to EL1. Therefore, always make sure that for 32-bit guests, 476 - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 477 - */ 478 - tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state 479 - mov x3, #(1 << 30) 480 - msr fpexc32_el2, x3 481 - isb 482 - 99: 483 - msr hcr_el2, x2 484 - mov x2, #CPTR_EL2_TTA 485 - orr x2, x2, #CPTR_EL2_TFP 486 - msr cptr_el2, x2 487 - 488 - mov x2, #(1 << 15) // Trap CP15 Cr=15 489 - msr hstr_el2, x2 490 - 491 - // Monitor Debug Config - see kvm_arm_setup_debug() 492 - ldr x2, [x0, #VCPU_MDCR_EL2] 493 - msr mdcr_el2, x2 494 - .endm 495 - 496 - .macro deactivate_traps 497 - mov x2, #HCR_RW 498 - msr hcr_el2, x2 499 - msr hstr_el2, xzr 500 - 501 - mrs x2, mdcr_el2 502 - and x2, x2, #MDCR_EL2_HPMN_MASK 503 - msr mdcr_el2, x2 504 - .endm 505 - 506 - .macro activate_vm 507 - ldr x1, [x0, #VCPU_KVM] 508 - kern_hyp_va x1 509 - ldr x2, [x1, #KVM_VTTBR] 510 - msr vttbr_el2, x2 511 - .endm 512 - 513 - .macro deactivate_vm 514 - msr vttbr_el2, xzr 515 - .endm 516 - 517 - /* 518 - * Call into the vgic backend for state saving 519 - */ 520 - .macro save_vgic_state 521 - alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF 522 - bl __save_vgic_v2_state 523 - alternative_else 524 - bl __save_vgic_v3_state 525 - alternative_endif 526 - mrs x24, hcr_el2 527 - mov x25, #HCR_INT_OVERRIDE 528 - neg x25, x25 529 - and x24, x24, x25 530 - msr hcr_el2, x24 531 - .endm 532 - 533 - /* 534 - * Call into the vgic backend for state restoring 535 - */ 536 - .macro restore_vgic_state 537 - mrs x24, hcr_el2 538 - ldr x25, [x0, #VCPU_IRQ_LINES] 539 - orr x24, x24, #HCR_INT_OVERRIDE 540 - orr x24, x24, x25 541 - msr hcr_el2, x24 542 - alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF 543 - bl __restore_vgic_v2_state 544 - alternative_else 545 - bl __restore_vgic_v3_state 546 - alternative_endif 547 - .endm 548 - 549 - .macro save_timer_state 550 - // x0: vcpu pointer 551 - ldr x2, [x0, #VCPU_KVM] 552 - kern_hyp_va x2 553 - ldr w3, [x2, #KVM_TIMER_ENABLED] 554 - cbz w3, 1f 555 - 556 - mrs x3, cntv_ctl_el0 557 - and x3, x3, #3 558 - str w3, [x0, #VCPU_TIMER_CNTV_CTL] 559 - 560 - isb 561 - 562 - mrs x3, cntv_cval_el0 563 - str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 564 - 565 - 1: 566 - // Disable the virtual timer 567 - msr cntv_ctl_el0, xzr 568 - 569 - // Allow physical timer/counter access for the host 570 - mrs x2, cnthctl_el2 571 - orr x2, x2, #3 572 - msr cnthctl_el2, x2 573 - 574 - // Clear cntvoff for the host 575 - msr cntvoff_el2, xzr 576 - .endm 577 - 578 - .macro restore_timer_state 579 - // x0: vcpu pointer 580 - // Disallow physical timer access for the guest 581 - // Physical counter access is allowed 582 - mrs x2, cnthctl_el2 583 - orr x2, x2, #1 584 - bic x2, x2, #2 585 - msr cnthctl_el2, x2 586 - 587 - ldr x2, [x0, #VCPU_KVM] 588 - kern_hyp_va x2 589 - ldr w3, [x2, #KVM_TIMER_ENABLED] 590 - cbz w3, 1f 591 - 592 - ldr x3, [x2, #KVM_TIMER_CNTVOFF] 593 - msr cntvoff_el2, x3 594 - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] 595 - msr cntv_cval_el0, x2 596 - isb 597 - 598 - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] 599 - and x2, x2, #3 600 - msr cntv_ctl_el0, x2 601 - 1: 602 - .endm 603 - 604 - __save_sysregs: 605 - save_sysregs 606 - ret 607 - 608 - __restore_sysregs: 609 - restore_sysregs 610 - ret 611 - 612 - /* Save debug state */ 613 - __save_debug: 614 - // x2: ptr to CPU context 615 - // x3: ptr to debug reg struct 616 - // x4/x5/x6-22/x24-26: trashed 617 - 618 - mrs x26, id_aa64dfr0_el1 619 - ubfx x24, x26, #12, #4 // Extract BRPs 620 - ubfx x25, x26, #20, #4 // Extract WRPs 621 - mov w26, #15 622 - sub w24, w26, w24 // How many BPs to skip 623 - sub w25, w26, w25 // How many WPs to skip 624 - 625 - mov x5, x24 626 - add x4, x3, #DEBUG_BCR 627 - save_debug dbgbcr 628 - add x4, x3, #DEBUG_BVR 629 - save_debug dbgbvr 630 - 631 - mov x5, x25 632 - add x4, x3, #DEBUG_WCR 633 - save_debug dbgwcr 634 - add x4, x3, #DEBUG_WVR 635 - save_debug dbgwvr 636 - 637 - mrs x21, mdccint_el1 638 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 639 - ret 640 - 641 - /* Restore debug state */ 642 - __restore_debug: 643 - // x2: ptr to CPU context 644 - // x3: ptr to debug reg struct 645 - // x4/x5/x6-22/x24-26: trashed 646 - 647 - mrs x26, id_aa64dfr0_el1 648 - ubfx x24, x26, #12, #4 // Extract BRPs 649 - ubfx x25, x26, #20, #4 // Extract WRPs 650 - mov w26, #15 651 - sub w24, w26, w24 // How many BPs to skip 652 - sub w25, w26, w25 // How many WPs to skip 653 - 654 - mov x5, x24 655 - add x4, x3, #DEBUG_BCR 656 - restore_debug dbgbcr 657 - add x4, x3, #DEBUG_BVR 658 - restore_debug dbgbvr 659 - 660 - mov x5, x25 661 - add x4, x3, #DEBUG_WCR 662 - restore_debug dbgwcr 663 - add x4, x3, #DEBUG_WVR 664 - restore_debug dbgwvr 665 - 666 - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 667 - msr mdccint_el1, x21 668 - 669 - ret 670 - 671 - __save_fpsimd: 672 - skip_fpsimd_state x3, 1f 673 - save_fpsimd 674 - 1: ret 675 - 676 - __restore_fpsimd: 677 - skip_fpsimd_state x3, 1f 678 - restore_fpsimd 679 - 1: ret 680 - 681 - switch_to_guest_fpsimd: 682 - push x4, lr 683 - 684 - mrs x2, cptr_el2 685 - bic x2, x2, #CPTR_EL2_TFP 686 - msr cptr_el2, x2 687 - isb 688 - 689 - mrs x0, tpidr_el2 690 - 691 - ldr x2, [x0, #VCPU_HOST_CONTEXT] 692 - kern_hyp_va x2 693 - bl __save_fpsimd 694 - 695 - add x2, x0, #VCPU_CONTEXT 696 - bl __restore_fpsimd 697 - 698 - skip_32bit_state x3, 1f 699 - ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] 700 - msr fpexc32_el2, x4 701 - 1: 702 - pop x4, lr 703 - pop x2, x3 704 - pop x0, x1 705 - 706 - eret 707 - 708 - /* 709 - * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); 710 - * 711 - * This is the world switch. The first half of the function 712 - * deals with entering the guest, and anything from __kvm_vcpu_return 713 - * to the end of the function deals with reentering the host. 714 - * On the enter path, only x0 (vcpu pointer) must be preserved until 715 - * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception 716 - * code) must both be preserved until the epilogue. 717 - * In both cases, x2 points to the CPU context we're saving/restoring from/to. 718 - */ 719 - ENTRY(__kvm_vcpu_run) 720 - kern_hyp_va x0 721 - msr tpidr_el2, x0 // Save the vcpu register 722 - 723 - // Host context 724 - ldr x2, [x0, #VCPU_HOST_CONTEXT] 725 - kern_hyp_va x2 726 - 727 - save_host_regs 728 - bl __save_sysregs 729 - 730 - compute_debug_state 1f 731 - add x3, x0, #VCPU_HOST_DEBUG_STATE 732 - bl __save_debug 733 - 1: 734 - activate_traps 735 - activate_vm 736 - 737 - restore_vgic_state 738 - restore_timer_state 739 - 740 - // Guest context 741 - add x2, x0, #VCPU_CONTEXT 742 - 743 - // We must restore the 32-bit state before the sysregs, thanks 744 - // to Cortex-A57 erratum #852523. 745 - restore_guest_32bit_state 746 - bl __restore_sysregs 747 - 748 - skip_debug_state x3, 1f 749 - ldr x3, [x0, #VCPU_DEBUG_PTR] 750 - kern_hyp_va x3 751 - bl __restore_debug 752 - 1: 753 - restore_guest_regs 754 - 755 - // That's it, no more messing around. 756 - eret 757 - 758 - __kvm_vcpu_return: 759 - // Assume x0 is the vcpu pointer, x1 the return code 760 - // Guest's x0-x3 are on the stack 761 - 762 - // Guest context 763 - add x2, x0, #VCPU_CONTEXT 764 - 765 - save_guest_regs 766 - bl __save_fpsimd 767 - bl __save_sysregs 768 - 769 - skip_debug_state x3, 1f 770 - ldr x3, [x0, #VCPU_DEBUG_PTR] 771 - kern_hyp_va x3 772 - bl __save_debug 773 - 1: 774 - save_guest_32bit_state 775 - 776 - save_timer_state 777 - save_vgic_state 778 - 779 - deactivate_traps 780 - deactivate_vm 781 - 782 - // Host context 783 - ldr x2, [x0, #VCPU_HOST_CONTEXT] 784 - kern_hyp_va x2 785 - 786 - bl __restore_sysregs 787 - bl __restore_fpsimd 788 - /* Clear FPSIMD and Trace trapping */ 789 - msr cptr_el2, xzr 790 - 791 - skip_debug_state x3, 1f 792 - // Clear the dirty flag for the next run, as all the state has 793 - // already been saved. Note that we nuke the whole 64bit word. 794 - // If we ever add more flags, we'll have to be more careful... 795 - str xzr, [x0, #VCPU_DEBUG_FLAGS] 796 - add x3, x0, #VCPU_HOST_DEBUG_STATE 797 - bl __restore_debug 798 - 1: 799 - restore_host_regs 800 - 801 - mov x0, x1 802 - ret 803 - END(__kvm_vcpu_run) 804 - 805 - // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 806 - ENTRY(__kvm_tlb_flush_vmid_ipa) 807 - dsb ishst 808 - 809 - kern_hyp_va x0 810 - ldr x2, [x0, #KVM_VTTBR] 811 - msr vttbr_el2, x2 812 - isb 813 - 814 - /* 815 - * We could do so much better if we had the VA as well. 816 - * Instead, we invalidate Stage-2 for this IPA, and the 817 - * whole of Stage-1. Weep... 818 - */ 819 - lsr x1, x1, #12 820 - tlbi ipas2e1is, x1 821 - /* 822 - * We have to ensure completion of the invalidation at Stage-2, 823 - * since a table walk on another CPU could refill a TLB with a 824 - * complete (S1 + S2) walk based on the old Stage-2 mapping if 825 - * the Stage-1 invalidation happened first. 826 - */ 827 - dsb ish 828 - tlbi vmalle1is 829 - dsb ish 830 - isb 831 - 832 - msr vttbr_el2, xzr 833 - ret 834 - ENDPROC(__kvm_tlb_flush_vmid_ipa) 835 - 836 - /** 837 - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs 838 - * @struct kvm *kvm - pointer to kvm structure 839 - * 840 - * Invalidates all Stage 1 and 2 TLB entries for current VMID. 841 - */ 842 - ENTRY(__kvm_tlb_flush_vmid) 843 - dsb ishst 844 - 845 - kern_hyp_va x0 846 - ldr x2, [x0, #KVM_VTTBR] 847 - msr vttbr_el2, x2 848 - isb 849 - 850 - tlbi vmalls12e1is 851 - dsb ish 852 - isb 853 - 854 - msr vttbr_el2, xzr 855 - ret 856 - ENDPROC(__kvm_tlb_flush_vmid) 857 - 858 - ENTRY(__kvm_flush_vm_context) 859 - dsb ishst 860 - tlbi alle1is 861 - ic ialluis 862 - dsb ish 863 - ret 864 - ENDPROC(__kvm_flush_vm_context) 865 - 866 - __kvm_hyp_panic: 867 - // Stash PAR_EL1 before corrupting it in __restore_sysregs 868 - mrs x0, par_el1 869 - push x0, xzr 870 - 871 - // Guess the context by looking at VTTBR: 872 - // If zero, then we're already a host. 873 - // Otherwise restore a minimal host context before panicing. 874 - mrs x0, vttbr_el2 875 - cbz x0, 1f 876 - 877 - mrs x0, tpidr_el2 878 - 879 - deactivate_traps 880 - deactivate_vm 881 - 882 - ldr x2, [x0, #VCPU_HOST_CONTEXT] 883 - kern_hyp_va x2 884 - 885 - bl __restore_sysregs 886 - 887 - /* 888 - * Make sure we have a valid host stack, and don't leave junk in the 889 - * frame pointer that will give us a misleading host stack unwinding. 890 - */ 891 - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] 892 - msr sp_el1, x22 893 - mov x29, xzr 894 - 895 - 1: adr x0, __hyp_panic_str 896 - adr x1, 2f 897 - ldp x2, x3, [x1] 898 - sub x0, x0, x2 899 - add x0, x0, x3 900 - mrs x1, spsr_el2 901 - mrs x2, elr_el2 902 - mrs x3, esr_el2 903 - mrs x4, far_el2 904 - mrs x5, hpfar_el2 905 - pop x6, xzr // active context PAR_EL1 906 - mrs x7, tpidr_el2 907 - 908 - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ 909 - PSR_MODE_EL1h) 910 - msr spsr_el2, lr 911 - ldr lr, =panic 912 - msr elr_el2, lr 913 - eret 914 - 915 - .align 3 916 - 2: .quad HYP_PAGE_OFFSET 917 - .quad PAGE_OFFSET 918 - ENDPROC(__kvm_hyp_panic) 919 - 920 - __hyp_panic_str: 921 - .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" 922 - 923 - .align 2 924 21 925 22 /* 926 23 * u64 kvm_call_hyp(void *hypfn, ...); ··· 31 934 * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the 32 935 * function pointer can be passed). The function being called must be mapped 33 936 * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are 34 - * passed in r0 and r1. 937 + * passed in x0. 35 938 * 36 939 * A function pointer with a value of 0 has a special meaning, and is 37 940 * used to implement __hyp_get_vectors in the same way as in ··· 41 944 hvc #0 42 945 ret 43 946 ENDPROC(kvm_call_hyp) 44 - 45 - .macro invalid_vector label, target 46 - .align 2 47 - \label: 48 - b \target 49 - ENDPROC(\label) 50 - .endm 51 - 52 - /* None of these should ever happen */ 53 - invalid_vector el2t_sync_invalid, __kvm_hyp_panic 54 - invalid_vector el2t_irq_invalid, __kvm_hyp_panic 55 - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic 56 - invalid_vector el2t_error_invalid, __kvm_hyp_panic 57 - invalid_vector el2h_sync_invalid, __kvm_hyp_panic 58 - invalid_vector el2h_irq_invalid, __kvm_hyp_panic 59 - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic 60 - invalid_vector el2h_error_invalid, __kvm_hyp_panic 61 - invalid_vector el1_sync_invalid, __kvm_hyp_panic 62 - invalid_vector el1_irq_invalid, __kvm_hyp_panic 63 - invalid_vector el1_fiq_invalid, __kvm_hyp_panic 64 - invalid_vector el1_error_invalid, __kvm_hyp_panic 65 - 66 - el1_sync: // Guest trapped into EL2 67 - push x0, x1 68 - push x2, x3 69 - 70 - mrs x1, esr_el2 71 - lsr x2, x1, #ESR_ELx_EC_SHIFT 72 - 73 - cmp x2, #ESR_ELx_EC_HVC64 74 - b.ne el1_trap 75 - 76 - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest 77 - cbnz x3, el1_trap // called HVC 78 - 79 - /* Here, we're pretty sure the host called HVC. */ 80 - pop x2, x3 81 - pop x0, x1 82 - 83 - /* Check for __hyp_get_vectors */ 84 - cbnz x0, 1f 85 - mrs x0, vbar_el2 86 - b 2f 87 - 88 - 1: push lr, xzr 89 - 90 - /* 91 - * Compute the function address in EL2, and shuffle the parameters. 92 - */ 93 - kern_hyp_va x0 94 - mov lr, x0 95 - mov x0, x1 96 - mov x1, x2 97 - mov x2, x3 98 - blr lr 99 - 100 - pop lr, xzr 101 - 2: eret 102 - 103 - el1_trap: 104 - /* 105 - * x1: ESR 106 - * x2: ESR_EC 107 - */ 108 - 109 - /* Guest accessed VFP/SIMD registers, save host, restore Guest */ 110 - cmp x2, #ESR_ELx_EC_FP_ASIMD 111 - b.eq switch_to_guest_fpsimd 112 - 113 - cmp x2, #ESR_ELx_EC_DABT_LOW 114 - mov x0, #ESR_ELx_EC_IABT_LOW 115 - ccmp x2, x0, #4, ne 116 - b.ne 1f // Not an abort we care about 117 - 118 - /* This is an abort. Check for permission fault */ 119 - alternative_if_not ARM64_WORKAROUND_834220 120 - and x2, x1, #ESR_ELx_FSC_TYPE 121 - cmp x2, #FSC_PERM 122 - b.ne 1f // Not a permission fault 123 - alternative_else 124 - nop // Use the permission fault path to 125 - nop // check for a valid S1 translation, 126 - nop // regardless of the ESR value. 127 - alternative_endif 128 - 129 - /* 130 - * Check for Stage-1 page table walk, which is guaranteed 131 - * to give a valid HPFAR_EL2. 132 - */ 133 - tbnz x1, #7, 1f // S1PTW is set 134 - 135 - /* Preserve PAR_EL1 */ 136 - mrs x3, par_el1 137 - push x3, xzr 138 - 139 - /* 140 - * Permission fault, HPFAR_EL2 is invalid. 141 - * Resolve the IPA the hard way using the guest VA. 142 - * Stage-1 translation already validated the memory access rights. 143 - * As such, we can use the EL1 translation regime, and don't have 144 - * to distinguish between EL0 and EL1 access. 145 - */ 146 - mrs x2, far_el2 147 - at s1e1r, x2 148 - isb 149 - 150 - /* Read result */ 151 - mrs x3, par_el1 152 - pop x0, xzr // Restore PAR_EL1 from the stack 153 - msr par_el1, x0 154 - tbnz x3, #0, 3f // Bail out if we failed the translation 155 - ubfx x3, x3, #12, #36 // Extract IPA 156 - lsl x3, x3, #4 // and present it like HPFAR 157 - b 2f 158 - 159 - 1: mrs x3, hpfar_el2 160 - mrs x2, far_el2 161 - 162 - 2: mrs x0, tpidr_el2 163 - str w1, [x0, #VCPU_ESR_EL2] 164 - str x2, [x0, #VCPU_FAR_EL2] 165 - str x3, [x0, #VCPU_HPFAR_EL2] 166 - 167 - mov x1, #ARM_EXCEPTION_TRAP 168 - b __kvm_vcpu_return 169 - 170 - /* 171 - * Translation failed. Just return to the guest and 172 - * let it fault again. Another CPU is probably playing 173 - * behind our back. 174 - */ 175 - 3: pop x2, x3 176 - pop x0, x1 177 - 178 - eret 179 - 180 - el1_irq: 181 - push x0, x1 182 - push x2, x3 183 - mrs x0, tpidr_el2 184 - mov x1, #ARM_EXCEPTION_IRQ 185 - b __kvm_vcpu_return 186 - 187 - .ltorg 188 - 189 - .align 11 190 - 191 - ENTRY(__kvm_hyp_vector) 192 - ventry el2t_sync_invalid // Synchronous EL2t 193 - ventry el2t_irq_invalid // IRQ EL2t 194 - ventry el2t_fiq_invalid // FIQ EL2t 195 - ventry el2t_error_invalid // Error EL2t 196 - 197 - ventry el2h_sync_invalid // Synchronous EL2h 198 - ventry el2h_irq_invalid // IRQ EL2h 199 - ventry el2h_fiq_invalid // FIQ EL2h 200 - ventry el2h_error_invalid // Error EL2h 201 - 202 - ventry el1_sync // Synchronous 64-bit EL1 203 - ventry el1_irq // IRQ 64-bit EL1 204 - ventry el1_fiq_invalid // FIQ 64-bit EL1 205 - ventry el1_error_invalid // Error 64-bit EL1 206 - 207 - ventry el1_sync // Synchronous 32-bit EL1 208 - ventry el1_irq // IRQ 32-bit EL1 209 - ventry el1_fiq_invalid // FIQ 32-bit EL1 210 - ventry el1_error_invalid // Error 32-bit EL1 211 - ENDPROC(__kvm_hyp_vector) 212 - 213 - 214 - ENTRY(__kvm_get_mdcr_el2) 215 - mrs x0, mdcr_el2 216 - ret 217 - ENDPROC(__kvm_get_mdcr_el2) 218 - 219 - .popsection
+14
arch/arm64/kvm/hyp/Makefile
··· 1 + # 2 + # Makefile for Kernel-based Virtual Machine module, HYP part 3 + # 4 + 5 + obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o 6 + obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o 7 + obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o 8 + obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o 9 + obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o 10 + obj-$(CONFIG_KVM_ARM_HOST) += entry.o 11 + obj-$(CONFIG_KVM_ARM_HOST) += switch.o 12 + obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o 13 + obj-$(CONFIG_KVM_ARM_HOST) += tlb.o 14 + obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
+140
arch/arm64/kvm/hyp/debug-sr.c
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/compiler.h> 19 + #include <linux/kvm_host.h> 20 + 21 + #include <asm/kvm_asm.h> 22 + #include <asm/kvm_mmu.h> 23 + 24 + #include "hyp.h" 25 + 26 + #define read_debug(r,n) read_sysreg(r##n##_el1) 27 + #define write_debug(v,r,n) write_sysreg(v, r##n##_el1) 28 + 29 + #define save_debug(ptr,reg,nr) \ 30 + switch (nr) { \ 31 + case 15: ptr[15] = read_debug(reg, 15); \ 32 + case 14: ptr[14] = read_debug(reg, 14); \ 33 + case 13: ptr[13] = read_debug(reg, 13); \ 34 + case 12: ptr[12] = read_debug(reg, 12); \ 35 + case 11: ptr[11] = read_debug(reg, 11); \ 36 + case 10: ptr[10] = read_debug(reg, 10); \ 37 + case 9: ptr[9] = read_debug(reg, 9); \ 38 + case 8: ptr[8] = read_debug(reg, 8); \ 39 + case 7: ptr[7] = read_debug(reg, 7); \ 40 + case 6: ptr[6] = read_debug(reg, 6); \ 41 + case 5: ptr[5] = read_debug(reg, 5); \ 42 + case 4: ptr[4] = read_debug(reg, 4); \ 43 + case 3: ptr[3] = read_debug(reg, 3); \ 44 + case 2: ptr[2] = read_debug(reg, 2); \ 45 + case 1: ptr[1] = read_debug(reg, 1); \ 46 + default: ptr[0] = read_debug(reg, 0); \ 47 + } 48 + 49 + #define restore_debug(ptr,reg,nr) \ 50 + switch (nr) { \ 51 + case 15: write_debug(ptr[15], reg, 15); \ 52 + case 14: write_debug(ptr[14], reg, 14); \ 53 + case 13: write_debug(ptr[13], reg, 13); \ 54 + case 12: write_debug(ptr[12], reg, 12); \ 55 + case 11: write_debug(ptr[11], reg, 11); \ 56 + case 10: write_debug(ptr[10], reg, 10); \ 57 + case 9: write_debug(ptr[9], reg, 9); \ 58 + case 8: write_debug(ptr[8], reg, 8); \ 59 + case 7: write_debug(ptr[7], reg, 7); \ 60 + case 6: write_debug(ptr[6], reg, 6); \ 61 + case 5: write_debug(ptr[5], reg, 5); \ 62 + case 4: write_debug(ptr[4], reg, 4); \ 63 + case 3: write_debug(ptr[3], reg, 3); \ 64 + case 2: write_debug(ptr[2], reg, 2); \ 65 + case 1: write_debug(ptr[1], reg, 1); \ 66 + default: write_debug(ptr[0], reg, 0); \ 67 + } 68 + 69 + void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, 70 + struct kvm_guest_debug_arch *dbg, 71 + struct kvm_cpu_context *ctxt) 72 + { 73 + u64 aa64dfr0; 74 + int brps, wrps; 75 + 76 + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) 77 + return; 78 + 79 + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); 80 + brps = (aa64dfr0 >> 12) & 0xf; 81 + wrps = (aa64dfr0 >> 20) & 0xf; 82 + 83 + save_debug(dbg->dbg_bcr, dbgbcr, brps); 84 + save_debug(dbg->dbg_bvr, dbgbvr, brps); 85 + save_debug(dbg->dbg_wcr, dbgwcr, wrps); 86 + save_debug(dbg->dbg_wvr, dbgwvr, wrps); 87 + 88 + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); 89 + } 90 + 91 + void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, 92 + struct kvm_guest_debug_arch *dbg, 93 + struct kvm_cpu_context *ctxt) 94 + { 95 + u64 aa64dfr0; 96 + int brps, wrps; 97 + 98 + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) 99 + return; 100 + 101 + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); 102 + 103 + brps = (aa64dfr0 >> 12) & 0xf; 104 + wrps = (aa64dfr0 >> 20) & 0xf; 105 + 106 + restore_debug(dbg->dbg_bcr, dbgbcr, brps); 107 + restore_debug(dbg->dbg_bvr, dbgbvr, brps); 108 + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); 109 + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); 110 + 111 + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); 112 + } 113 + 114 + void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) 115 + { 116 + /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform 117 + * a full save/restore cycle. */ 118 + if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || 119 + (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) 120 + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; 121 + 122 + __debug_save_state(vcpu, &vcpu->arch.host_debug_state, 123 + kern_hyp_va(vcpu->arch.host_cpu_context)); 124 + } 125 + 126 + void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) 127 + { 128 + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, 129 + kern_hyp_va(vcpu->arch.host_cpu_context)); 130 + 131 + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) 132 + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; 133 + } 134 + 135 + static u32 __hyp_text __debug_read_mdcr_el2(void) 136 + { 137 + return read_sysreg(mdcr_el2); 138 + } 139 + 140 + __alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void);
+160
arch/arm64/kvm/hyp/entry.S
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/linkage.h> 19 + 20 + #include <asm/asm-offsets.h> 21 + #include <asm/assembler.h> 22 + #include <asm/fpsimdmacros.h> 23 + #include <asm/kvm.h> 24 + #include <asm/kvm_arm.h> 25 + #include <asm/kvm_asm.h> 26 + #include <asm/kvm_mmu.h> 27 + 28 + #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) 29 + #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) 30 + 31 + .text 32 + .pushsection .hyp.text, "ax" 33 + 34 + .macro save_callee_saved_regs ctxt 35 + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] 36 + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] 37 + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] 38 + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] 39 + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] 40 + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] 41 + .endm 42 + 43 + .macro restore_callee_saved_regs ctxt 44 + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] 45 + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] 46 + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] 47 + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] 48 + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] 49 + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] 50 + .endm 51 + 52 + /* 53 + * u64 __guest_enter(struct kvm_vcpu *vcpu, 54 + * struct kvm_cpu_context *host_ctxt); 55 + */ 56 + ENTRY(__guest_enter) 57 + // x0: vcpu 58 + // x1: host/guest context 59 + // x2-x18: clobbered by macros 60 + 61 + // Store the host regs 62 + save_callee_saved_regs x1 63 + 64 + // Preserve vcpu & host_ctxt for use at exit time 65 + stp x0, x1, [sp, #-16]! 66 + 67 + add x1, x0, #VCPU_CONTEXT 68 + 69 + // Prepare x0-x1 for later restore by pushing them onto the stack 70 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] 71 + stp x2, x3, [sp, #-16]! 72 + 73 + // x2-x18 74 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] 75 + ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] 76 + ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] 77 + ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] 78 + ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] 79 + ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] 80 + ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] 81 + ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] 82 + ldr x18, [x1, #CPU_XREG_OFFSET(18)] 83 + 84 + // x19-x29, lr 85 + restore_callee_saved_regs x1 86 + 87 + // Last bits of the 64bit state 88 + ldp x0, x1, [sp], #16 89 + 90 + // Do not touch any register after this! 91 + eret 92 + ENDPROC(__guest_enter) 93 + 94 + ENTRY(__guest_exit) 95 + // x0: vcpu 96 + // x1: return code 97 + // x2-x3: free 98 + // x4-x29,lr: vcpu regs 99 + // vcpu x0-x3 on the stack 100 + 101 + add x2, x0, #VCPU_CONTEXT 102 + 103 + stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] 104 + stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] 105 + stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] 106 + stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] 107 + stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] 108 + stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] 109 + stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] 110 + str x18, [x2, #CPU_XREG_OFFSET(18)] 111 + 112 + ldp x6, x7, [sp], #16 // x2, x3 113 + ldp x4, x5, [sp], #16 // x0, x1 114 + 115 + stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] 116 + stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] 117 + 118 + save_callee_saved_regs x2 119 + 120 + // Restore vcpu & host_ctxt from the stack 121 + // (preserving return code in x1) 122 + ldp x0, x2, [sp], #16 123 + // Now restore the host regs 124 + restore_callee_saved_regs x2 125 + 126 + mov x0, x1 127 + ret 128 + ENDPROC(__guest_exit) 129 + 130 + ENTRY(__fpsimd_guest_restore) 131 + stp x4, lr, [sp, #-16]! 132 + 133 + mrs x2, cptr_el2 134 + bic x2, x2, #CPTR_EL2_TFP 135 + msr cptr_el2, x2 136 + isb 137 + 138 + mrs x3, tpidr_el2 139 + 140 + ldr x0, [x3, #VCPU_HOST_CONTEXT] 141 + kern_hyp_va x0 142 + add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) 143 + bl __fpsimd_save_state 144 + 145 + add x2, x3, #VCPU_CONTEXT 146 + add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) 147 + bl __fpsimd_restore_state 148 + 149 + // Skip restoring fpexc32 for AArch64 guests 150 + mrs x1, hcr_el2 151 + tbnz x1, #HCR_RW_SHIFT, 1f 152 + ldr x4, [x3, #VCPU_FPEXC32_EL2] 153 + msr fpexc32_el2, x4 154 + 1: 155 + ldp x4, lr, [sp], #16 156 + ldp x2, x3, [sp], #16 157 + ldp x0, x1, [sp], #16 158 + 159 + eret 160 + ENDPROC(__fpsimd_guest_restore)
+33
arch/arm64/kvm/hyp/fpsimd.S
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/linkage.h> 19 + 20 + #include <asm/fpsimdmacros.h> 21 + 22 + .text 23 + .pushsection .hyp.text, "ax" 24 + 25 + ENTRY(__fpsimd_save_state) 26 + fpsimd_save x0, 1 27 + ret 28 + ENDPROC(__fpsimd_save_state) 29 + 30 + ENTRY(__fpsimd_restore_state) 31 + fpsimd_restore x0, 1 32 + ret 33 + ENDPROC(__fpsimd_restore_state)
+212
arch/arm64/kvm/hyp/hyp-entry.S
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/linkage.h> 19 + 20 + #include <asm/alternative.h> 21 + #include <asm/assembler.h> 22 + #include <asm/asm-offsets.h> 23 + #include <asm/cpufeature.h> 24 + #include <asm/kvm_arm.h> 25 + #include <asm/kvm_asm.h> 26 + #include <asm/kvm_mmu.h> 27 + 28 + .text 29 + .pushsection .hyp.text, "ax" 30 + 31 + .macro save_x0_to_x3 32 + stp x0, x1, [sp, #-16]! 33 + stp x2, x3, [sp, #-16]! 34 + .endm 35 + 36 + .macro restore_x0_to_x3 37 + ldp x2, x3, [sp], #16 38 + ldp x0, x1, [sp], #16 39 + .endm 40 + 41 + el1_sync: // Guest trapped into EL2 42 + save_x0_to_x3 43 + 44 + mrs x1, esr_el2 45 + lsr x2, x1, #ESR_ELx_EC_SHIFT 46 + 47 + cmp x2, #ESR_ELx_EC_HVC64 48 + b.ne el1_trap 49 + 50 + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest 51 + cbnz x3, el1_trap // called HVC 52 + 53 + /* Here, we're pretty sure the host called HVC. */ 54 + restore_x0_to_x3 55 + 56 + /* Check for __hyp_get_vectors */ 57 + cbnz x0, 1f 58 + mrs x0, vbar_el2 59 + b 2f 60 + 61 + 1: stp lr, xzr, [sp, #-16]! 62 + 63 + /* 64 + * Compute the function address in EL2, and shuffle the parameters. 65 + */ 66 + kern_hyp_va x0 67 + mov lr, x0 68 + mov x0, x1 69 + mov x1, x2 70 + mov x2, x3 71 + blr lr 72 + 73 + ldp lr, xzr, [sp], #16 74 + 2: eret 75 + 76 + el1_trap: 77 + /* 78 + * x1: ESR 79 + * x2: ESR_EC 80 + */ 81 + 82 + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ 83 + cmp x2, #ESR_ELx_EC_FP_ASIMD 84 + b.eq __fpsimd_guest_restore 85 + 86 + cmp x2, #ESR_ELx_EC_DABT_LOW 87 + mov x0, #ESR_ELx_EC_IABT_LOW 88 + ccmp x2, x0, #4, ne 89 + b.ne 1f // Not an abort we care about 90 + 91 + /* This is an abort. Check for permission fault */ 92 + alternative_if_not ARM64_WORKAROUND_834220 93 + and x2, x1, #ESR_ELx_FSC_TYPE 94 + cmp x2, #FSC_PERM 95 + b.ne 1f // Not a permission fault 96 + alternative_else 97 + nop // Use the permission fault path to 98 + nop // check for a valid S1 translation, 99 + nop // regardless of the ESR value. 100 + alternative_endif 101 + 102 + /* 103 + * Check for Stage-1 page table walk, which is guaranteed 104 + * to give a valid HPFAR_EL2. 105 + */ 106 + tbnz x1, #7, 1f // S1PTW is set 107 + 108 + /* Preserve PAR_EL1 */ 109 + mrs x3, par_el1 110 + stp x3, xzr, [sp, #-16]! 111 + 112 + /* 113 + * Permission fault, HPFAR_EL2 is invalid. 114 + * Resolve the IPA the hard way using the guest VA. 115 + * Stage-1 translation already validated the memory access rights. 116 + * As such, we can use the EL1 translation regime, and don't have 117 + * to distinguish between EL0 and EL1 access. 118 + */ 119 + mrs x2, far_el2 120 + at s1e1r, x2 121 + isb 122 + 123 + /* Read result */ 124 + mrs x3, par_el1 125 + ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack 126 + msr par_el1, x0 127 + tbnz x3, #0, 3f // Bail out if we failed the translation 128 + ubfx x3, x3, #12, #36 // Extract IPA 129 + lsl x3, x3, #4 // and present it like HPFAR 130 + b 2f 131 + 132 + 1: mrs x3, hpfar_el2 133 + mrs x2, far_el2 134 + 135 + 2: mrs x0, tpidr_el2 136 + str w1, [x0, #VCPU_ESR_EL2] 137 + str x2, [x0, #VCPU_FAR_EL2] 138 + str x3, [x0, #VCPU_HPFAR_EL2] 139 + 140 + mov x1, #ARM_EXCEPTION_TRAP 141 + b __guest_exit 142 + 143 + /* 144 + * Translation failed. Just return to the guest and 145 + * let it fault again. Another CPU is probably playing 146 + * behind our back. 147 + */ 148 + 3: restore_x0_to_x3 149 + 150 + eret 151 + 152 + el1_irq: 153 + save_x0_to_x3 154 + mrs x0, tpidr_el2 155 + mov x1, #ARM_EXCEPTION_IRQ 156 + b __guest_exit 157 + 158 + ENTRY(__hyp_do_panic) 159 + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ 160 + PSR_MODE_EL1h) 161 + msr spsr_el2, lr 162 + ldr lr, =panic 163 + msr elr_el2, lr 164 + eret 165 + ENDPROC(__hyp_do_panic) 166 + 167 + .macro invalid_vector label, target = __hyp_panic 168 + .align 2 169 + \label: 170 + b \target 171 + ENDPROC(\label) 172 + .endm 173 + 174 + /* None of these should ever happen */ 175 + invalid_vector el2t_sync_invalid 176 + invalid_vector el2t_irq_invalid 177 + invalid_vector el2t_fiq_invalid 178 + invalid_vector el2t_error_invalid 179 + invalid_vector el2h_sync_invalid 180 + invalid_vector el2h_irq_invalid 181 + invalid_vector el2h_fiq_invalid 182 + invalid_vector el2h_error_invalid 183 + invalid_vector el1_sync_invalid 184 + invalid_vector el1_irq_invalid 185 + invalid_vector el1_fiq_invalid 186 + invalid_vector el1_error_invalid 187 + 188 + .ltorg 189 + 190 + .align 11 191 + 192 + ENTRY(__kvm_hyp_vector) 193 + ventry el2t_sync_invalid // Synchronous EL2t 194 + ventry el2t_irq_invalid // IRQ EL2t 195 + ventry el2t_fiq_invalid // FIQ EL2t 196 + ventry el2t_error_invalid // Error EL2t 197 + 198 + ventry el2h_sync_invalid // Synchronous EL2h 199 + ventry el2h_irq_invalid // IRQ EL2h 200 + ventry el2h_fiq_invalid // FIQ EL2h 201 + ventry el2h_error_invalid // Error EL2h 202 + 203 + ventry el1_sync // Synchronous 64-bit EL1 204 + ventry el1_irq // IRQ 64-bit EL1 205 + ventry el1_fiq_invalid // FIQ 64-bit EL1 206 + ventry el1_error_invalid // Error 64-bit EL1 207 + 208 + ventry el1_sync // Synchronous 32-bit EL1 209 + ventry el1_irq // IRQ 32-bit EL1 210 + ventry el1_fiq_invalid // FIQ 32-bit EL1 211 + ventry el1_error_invalid // Error 32-bit EL1 212 + ENDPROC(__kvm_hyp_vector)
+90
arch/arm64/kvm/hyp/hyp.h
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #ifndef __ARM64_KVM_HYP_H__ 19 + #define __ARM64_KVM_HYP_H__ 20 + 21 + #include <linux/compiler.h> 22 + #include <linux/kvm_host.h> 23 + #include <asm/kvm_mmu.h> 24 + #include <asm/sysreg.h> 25 + 26 + #define __hyp_text __section(.hyp.text) notrace 27 + 28 + #define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) 29 + #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ 30 + + PAGE_OFFSET) 31 + 32 + /** 33 + * hyp_alternate_select - Generates patchable code sequences that are 34 + * used to switch between two implementations of a function, depending 35 + * on the availability of a feature. 36 + * 37 + * @fname: a symbol name that will be defined as a function returning a 38 + * function pointer whose type will match @orig and @alt 39 + * @orig: A pointer to the default function, as returned by @fname when 40 + * @cond doesn't hold 41 + * @alt: A pointer to the alternate function, as returned by @fname 42 + * when @cond holds 43 + * @cond: a CPU feature (as described in asm/cpufeature.h) 44 + */ 45 + #define hyp_alternate_select(fname, orig, alt, cond) \ 46 + typeof(orig) * __hyp_text fname(void) \ 47 + { \ 48 + typeof(alt) *val = orig; \ 49 + asm volatile(ALTERNATIVE("nop \n", \ 50 + "mov %0, %1 \n", \ 51 + cond) \ 52 + : "+r" (val) : "r" (alt)); \ 53 + return val; \ 54 + } 55 + 56 + void __vgic_v2_save_state(struct kvm_vcpu *vcpu); 57 + void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); 58 + 59 + void __vgic_v3_save_state(struct kvm_vcpu *vcpu); 60 + void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); 61 + 62 + void __timer_save_state(struct kvm_vcpu *vcpu); 63 + void __timer_restore_state(struct kvm_vcpu *vcpu); 64 + 65 + void __sysreg_save_state(struct kvm_cpu_context *ctxt); 66 + void __sysreg_restore_state(struct kvm_cpu_context *ctxt); 67 + void __sysreg32_save_state(struct kvm_vcpu *vcpu); 68 + void __sysreg32_restore_state(struct kvm_vcpu *vcpu); 69 + 70 + void __debug_save_state(struct kvm_vcpu *vcpu, 71 + struct kvm_guest_debug_arch *dbg, 72 + struct kvm_cpu_context *ctxt); 73 + void __debug_restore_state(struct kvm_vcpu *vcpu, 74 + struct kvm_guest_debug_arch *dbg, 75 + struct kvm_cpu_context *ctxt); 76 + void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); 77 + void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); 78 + 79 + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); 80 + void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); 81 + static inline bool __fpsimd_enabled(void) 82 + { 83 + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); 84 + } 85 + 86 + u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); 87 + void __noreturn __hyp_do_panic(unsigned long, ...); 88 + 89 + #endif /* __ARM64_KVM_HYP_H__ */ 90 +
+175
arch/arm64/kvm/hyp/switch.c
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include "hyp.h" 19 + 20 + static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) 21 + { 22 + u64 val; 23 + 24 + /* 25 + * We are about to set CPTR_EL2.TFP to trap all floating point 26 + * register accesses to EL2, however, the ARM ARM clearly states that 27 + * traps are only taken to EL2 if the operation would not otherwise 28 + * trap to EL1. Therefore, always make sure that for 32-bit guests, 29 + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 30 + */ 31 + val = vcpu->arch.hcr_el2; 32 + if (!(val & HCR_RW)) { 33 + write_sysreg(1 << 30, fpexc32_el2); 34 + isb(); 35 + } 36 + write_sysreg(val, hcr_el2); 37 + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ 38 + write_sysreg(1 << 15, hstr_el2); 39 + write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); 40 + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 41 + } 42 + 43 + static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) 44 + { 45 + write_sysreg(HCR_RW, hcr_el2); 46 + write_sysreg(0, hstr_el2); 47 + write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); 48 + write_sysreg(0, cptr_el2); 49 + } 50 + 51 + static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) 52 + { 53 + struct kvm *kvm = kern_hyp_va(vcpu->kvm); 54 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 55 + } 56 + 57 + static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) 58 + { 59 + write_sysreg(0, vttbr_el2); 60 + } 61 + 62 + static hyp_alternate_select(__vgic_call_save_state, 63 + __vgic_v2_save_state, __vgic_v3_save_state, 64 + ARM64_HAS_SYSREG_GIC_CPUIF); 65 + 66 + static hyp_alternate_select(__vgic_call_restore_state, 67 + __vgic_v2_restore_state, __vgic_v3_restore_state, 68 + ARM64_HAS_SYSREG_GIC_CPUIF); 69 + 70 + static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) 71 + { 72 + __vgic_call_save_state()(vcpu); 73 + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); 74 + } 75 + 76 + static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) 77 + { 78 + u64 val; 79 + 80 + val = read_sysreg(hcr_el2); 81 + val |= HCR_INT_OVERRIDE; 82 + val |= vcpu->arch.irq_lines; 83 + write_sysreg(val, hcr_el2); 84 + 85 + __vgic_call_restore_state()(vcpu); 86 + } 87 + 88 + static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) 89 + { 90 + struct kvm_cpu_context *host_ctxt; 91 + struct kvm_cpu_context *guest_ctxt; 92 + bool fp_enabled; 93 + u64 exit_code; 94 + 95 + vcpu = kern_hyp_va(vcpu); 96 + write_sysreg(vcpu, tpidr_el2); 97 + 98 + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 99 + guest_ctxt = &vcpu->arch.ctxt; 100 + 101 + __sysreg_save_state(host_ctxt); 102 + __debug_cond_save_host_state(vcpu); 103 + 104 + __activate_traps(vcpu); 105 + __activate_vm(vcpu); 106 + 107 + __vgic_restore_state(vcpu); 108 + __timer_restore_state(vcpu); 109 + 110 + /* 111 + * We must restore the 32-bit state before the sysregs, thanks 112 + * to Cortex-A57 erratum #852523. 113 + */ 114 + __sysreg32_restore_state(vcpu); 115 + __sysreg_restore_state(guest_ctxt); 116 + __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); 117 + 118 + /* Jump in the fire! */ 119 + exit_code = __guest_enter(vcpu, host_ctxt); 120 + /* And we're baaack! */ 121 + 122 + fp_enabled = __fpsimd_enabled(); 123 + 124 + __sysreg_save_state(guest_ctxt); 125 + __sysreg32_save_state(vcpu); 126 + __timer_save_state(vcpu); 127 + __vgic_save_state(vcpu); 128 + 129 + __deactivate_traps(vcpu); 130 + __deactivate_vm(vcpu); 131 + 132 + __sysreg_restore_state(host_ctxt); 133 + 134 + if (fp_enabled) { 135 + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); 136 + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); 137 + } 138 + 139 + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); 140 + __debug_cond_restore_host_state(vcpu); 141 + 142 + return exit_code; 143 + } 144 + 145 + __alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 146 + 147 + static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; 148 + 149 + void __hyp_text __noreturn __hyp_panic(void) 150 + { 151 + unsigned long str_va = (unsigned long)__hyp_panic_string; 152 + u64 spsr = read_sysreg(spsr_el2); 153 + u64 elr = read_sysreg(elr_el2); 154 + u64 par = read_sysreg(par_el1); 155 + 156 + if (read_sysreg(vttbr_el2)) { 157 + struct kvm_vcpu *vcpu; 158 + struct kvm_cpu_context *host_ctxt; 159 + 160 + vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); 161 + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); 162 + __deactivate_traps(vcpu); 163 + __deactivate_vm(vcpu); 164 + __sysreg_restore_state(host_ctxt); 165 + } 166 + 167 + /* Call panic for real */ 168 + __hyp_do_panic(hyp_kern_va(str_va), 169 + spsr, elr, 170 + read_sysreg(esr_el2), read_sysreg(far_el2), 171 + read_sysreg(hpfar_el2), par, 172 + (void *)read_sysreg(tpidr_el2)); 173 + 174 + unreachable(); 175 + }
+138
arch/arm64/kvm/hyp/sysreg-sr.c
··· 1 + /* 2 + * Copyright (C) 2012-2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/compiler.h> 19 + #include <linux/kvm_host.h> 20 + 21 + #include <asm/kvm_asm.h> 22 + #include <asm/kvm_mmu.h> 23 + 24 + #include "hyp.h" 25 + 26 + /* ctxt is already in the HYP VA space */ 27 + void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) 28 + { 29 + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); 30 + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); 31 + ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); 32 + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); 33 + ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); 34 + ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); 35 + ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); 36 + ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); 37 + ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); 38 + ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); 39 + ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); 40 + ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); 41 + ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); 42 + ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); 43 + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); 44 + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); 45 + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); 46 + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); 47 + ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); 48 + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); 49 + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); 50 + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); 51 + 52 + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); 53 + ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); 54 + ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); 55 + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); 56 + ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); 57 + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); 58 + } 59 + 60 + void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) 61 + { 62 + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); 63 + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); 64 + write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); 65 + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); 66 + write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); 67 + write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); 68 + write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); 69 + write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); 70 + write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); 71 + write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); 72 + write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); 73 + write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); 74 + write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); 75 + write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); 76 + write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); 77 + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); 78 + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); 79 + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); 80 + write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); 81 + write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); 82 + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); 83 + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); 84 + 85 + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); 86 + write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); 87 + write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); 88 + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); 89 + write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); 90 + write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); 91 + } 92 + 93 + void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) 94 + { 95 + u64 *spsr, *sysreg; 96 + 97 + if (read_sysreg(hcr_el2) & HCR_RW) 98 + return; 99 + 100 + spsr = vcpu->arch.ctxt.gp_regs.spsr; 101 + sysreg = vcpu->arch.ctxt.sys_regs; 102 + 103 + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); 104 + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); 105 + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); 106 + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); 107 + 108 + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); 109 + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); 110 + 111 + if (__fpsimd_enabled()) 112 + sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); 113 + 114 + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) 115 + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); 116 + } 117 + 118 + void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) 119 + { 120 + u64 *spsr, *sysreg; 121 + 122 + if (read_sysreg(hcr_el2) & HCR_RW) 123 + return; 124 + 125 + spsr = vcpu->arch.ctxt.gp_regs.spsr; 126 + sysreg = vcpu->arch.ctxt.sys_regs; 127 + 128 + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); 129 + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); 130 + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); 131 + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); 132 + 133 + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); 134 + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); 135 + 136 + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) 137 + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); 138 + }
+71
arch/arm64/kvm/hyp/timer-sr.c
··· 1 + /* 2 + * Copyright (C) 2012-2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <clocksource/arm_arch_timer.h> 19 + #include <linux/compiler.h> 20 + #include <linux/kvm_host.h> 21 + 22 + #include <asm/kvm_mmu.h> 23 + 24 + #include "hyp.h" 25 + 26 + /* vcpu is already in the HYP VA space */ 27 + void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) 28 + { 29 + struct kvm *kvm = kern_hyp_va(vcpu->kvm); 30 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 31 + u64 val; 32 + 33 + if (kvm->arch.timer.enabled) { 34 + timer->cntv_ctl = read_sysreg(cntv_ctl_el0); 35 + timer->cntv_cval = read_sysreg(cntv_cval_el0); 36 + } 37 + 38 + /* Disable the virtual timer */ 39 + write_sysreg(0, cntv_ctl_el0); 40 + 41 + /* Allow physical timer/counter access for the host */ 42 + val = read_sysreg(cnthctl_el2); 43 + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; 44 + write_sysreg(val, cnthctl_el2); 45 + 46 + /* Clear cntvoff for the host */ 47 + write_sysreg(0, cntvoff_el2); 48 + } 49 + 50 + void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) 51 + { 52 + struct kvm *kvm = kern_hyp_va(vcpu->kvm); 53 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 54 + u64 val; 55 + 56 + /* 57 + * Disallow physical timer access for the guest 58 + * Physical counter access is allowed 59 + */ 60 + val = read_sysreg(cnthctl_el2); 61 + val &= ~CNTHCTL_EL1PCEN; 62 + val |= CNTHCTL_EL1PCTEN; 63 + write_sysreg(val, cnthctl_el2); 64 + 65 + if (kvm->arch.timer.enabled) { 66 + write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); 67 + write_sysreg(timer->cntv_cval, cntv_cval_el0); 68 + isb(); 69 + write_sysreg(timer->cntv_ctl, cntv_ctl_el0); 70 + } 71 + }
+80
arch/arm64/kvm/hyp/tlb.c
··· 1 + /* 2 + * Copyright (C) 2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include "hyp.h" 19 + 20 + static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 21 + { 22 + dsb(ishst); 23 + 24 + /* Switch to requested VMID */ 25 + kvm = kern_hyp_va(kvm); 26 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 27 + isb(); 28 + 29 + /* 30 + * We could do so much better if we had the VA as well. 31 + * Instead, we invalidate Stage-2 for this IPA, and the 32 + * whole of Stage-1. Weep... 33 + */ 34 + ipa >>= 12; 35 + asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); 36 + 37 + /* 38 + * We have to ensure completion of the invalidation at Stage-2, 39 + * since a table walk on another CPU could refill a TLB with a 40 + * complete (S1 + S2) walk based on the old Stage-2 mapping if 41 + * the Stage-1 invalidation happened first. 42 + */ 43 + dsb(ish); 44 + asm volatile("tlbi vmalle1is" : : ); 45 + dsb(ish); 46 + isb(); 47 + 48 + write_sysreg(0, vttbr_el2); 49 + } 50 + 51 + __alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, 52 + phys_addr_t ipa); 53 + 54 + static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) 55 + { 56 + dsb(ishst); 57 + 58 + /* Switch to requested VMID */ 59 + kvm = kern_hyp_va(kvm); 60 + write_sysreg(kvm->arch.vttbr, vttbr_el2); 61 + isb(); 62 + 63 + asm volatile("tlbi vmalls12e1is" : : ); 64 + dsb(ish); 65 + isb(); 66 + 67 + write_sysreg(0, vttbr_el2); 68 + } 69 + 70 + __alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); 71 + 72 + static void __hyp_text __tlb_flush_vm_context(void) 73 + { 74 + dsb(ishst); 75 + asm volatile("tlbi alle1is \n" 76 + "ic ialluis ": : ); 77 + dsb(ish); 78 + } 79 + 80 + __alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);
+84
arch/arm64/kvm/hyp/vgic-v2-sr.c
··· 1 + /* 2 + * Copyright (C) 2012-2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/compiler.h> 19 + #include <linux/irqchip/arm-gic.h> 20 + #include <linux/kvm_host.h> 21 + 22 + #include <asm/kvm_mmu.h> 23 + 24 + #include "hyp.h" 25 + 26 + /* vcpu is already in the HYP VA space */ 27 + void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) 28 + { 29 + struct kvm *kvm = kern_hyp_va(vcpu->kvm); 30 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 31 + struct vgic_dist *vgic = &kvm->arch.vgic; 32 + void __iomem *base = kern_hyp_va(vgic->vctrl_base); 33 + u32 eisr0, eisr1, elrsr0, elrsr1; 34 + int i, nr_lr; 35 + 36 + if (!base) 37 + return; 38 + 39 + nr_lr = vcpu->arch.vgic_cpu.nr_lr; 40 + cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); 41 + cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); 42 + eisr0 = readl_relaxed(base + GICH_EISR0); 43 + elrsr0 = readl_relaxed(base + GICH_ELRSR0); 44 + if (unlikely(nr_lr > 32)) { 45 + eisr1 = readl_relaxed(base + GICH_EISR1); 46 + elrsr1 = readl_relaxed(base + GICH_ELRSR1); 47 + } else { 48 + eisr1 = elrsr1 = 0; 49 + } 50 + #ifdef CONFIG_CPU_BIG_ENDIAN 51 + cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; 52 + cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; 53 + #else 54 + cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; 55 + cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; 56 + #endif 57 + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); 58 + 59 + writel_relaxed(0, base + GICH_HCR); 60 + 61 + for (i = 0; i < nr_lr; i++) 62 + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); 63 + } 64 + 65 + /* vcpu is already in the HYP VA space */ 66 + void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) 67 + { 68 + struct kvm *kvm = kern_hyp_va(vcpu->kvm); 69 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 70 + struct vgic_dist *vgic = &kvm->arch.vgic; 71 + void __iomem *base = kern_hyp_va(vgic->vctrl_base); 72 + int i, nr_lr; 73 + 74 + if (!base) 75 + return; 76 + 77 + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); 78 + writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); 79 + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); 80 + 81 + nr_lr = vcpu->arch.vgic_cpu.nr_lr; 82 + for (i = 0; i < nr_lr; i++) 83 + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); 84 + }
+228
arch/arm64/kvm/hyp/vgic-v3-sr.c
··· 1 + /* 2 + * Copyright (C) 2012-2015 - ARM Ltd 3 + * Author: Marc Zyngier <marc.zyngier@arm.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify 6 + * it under the terms of the GNU General Public License version 2 as 7 + * published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 + */ 17 + 18 + #include <linux/compiler.h> 19 + #include <linux/irqchip/arm-gic-v3.h> 20 + #include <linux/kvm_host.h> 21 + 22 + #include <asm/kvm_mmu.h> 23 + 24 + #include "hyp.h" 25 + 26 + #define vtr_to_max_lr_idx(v) ((v) & 0xf) 27 + #define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) 28 + 29 + #define read_gicreg(r) \ 30 + ({ \ 31 + u64 reg; \ 32 + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ 33 + reg; \ 34 + }) 35 + 36 + #define write_gicreg(v,r) \ 37 + do { \ 38 + u64 __val = (v); \ 39 + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ 40 + } while (0) 41 + 42 + /* vcpu is already in the HYP VA space */ 43 + void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) 44 + { 45 + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 46 + u64 val; 47 + u32 max_lr_idx, nr_pri_bits; 48 + 49 + /* 50 + * Make sure stores to the GIC via the memory mapped interface 51 + * are now visible to the system register interface. 52 + */ 53 + dsb(st); 54 + 55 + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 56 + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); 57 + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); 58 + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); 59 + 60 + write_gicreg(0, ICH_HCR_EL2); 61 + val = read_gicreg(ICH_VTR_EL2); 62 + max_lr_idx = vtr_to_max_lr_idx(val); 63 + nr_pri_bits = vtr_to_nr_pri_bits(val); 64 + 65 + switch (max_lr_idx) { 66 + case 15: 67 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); 68 + case 14: 69 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); 70 + case 13: 71 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); 72 + case 12: 73 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); 74 + case 11: 75 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); 76 + case 10: 77 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); 78 + case 9: 79 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); 80 + case 8: 81 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); 82 + case 7: 83 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); 84 + case 6: 85 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); 86 + case 5: 87 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); 88 + case 4: 89 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); 90 + case 3: 91 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); 92 + case 2: 93 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); 94 + case 1: 95 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); 96 + case 0: 97 + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); 98 + } 99 + 100 + switch (nr_pri_bits) { 101 + case 7: 102 + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); 103 + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); 104 + case 6: 105 + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); 106 + default: 107 + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); 108 + } 109 + 110 + switch (nr_pri_bits) { 111 + case 7: 112 + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); 113 + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); 114 + case 6: 115 + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); 116 + default: 117 + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); 118 + } 119 + 120 + val = read_gicreg(ICC_SRE_EL2); 121 + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); 122 + isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ 123 + write_gicreg(1, ICC_SRE_EL1); 124 + } 125 + 126 + void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) 127 + { 128 + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 129 + u64 val; 130 + u32 max_lr_idx, nr_pri_bits; 131 + 132 + /* 133 + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a 134 + * Group0 interrupt (as generated in GICv2 mode) to be 135 + * delivered as a FIQ to the guest, with potentially fatal 136 + * consequences. So we must make sure that ICC_SRE_EL1 has 137 + * been actually programmed with the value we want before 138 + * starting to mess with the rest of the GIC. 139 + */ 140 + write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); 141 + isb(); 142 + 143 + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 144 + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); 145 + 146 + val = read_gicreg(ICH_VTR_EL2); 147 + max_lr_idx = vtr_to_max_lr_idx(val); 148 + nr_pri_bits = vtr_to_nr_pri_bits(val); 149 + 150 + switch (nr_pri_bits) { 151 + case 7: 152 + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); 153 + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); 154 + case 6: 155 + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); 156 + default: 157 + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); 158 + } 159 + 160 + switch (nr_pri_bits) { 161 + case 7: 162 + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); 163 + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); 164 + case 6: 165 + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); 166 + default: 167 + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); 168 + } 169 + 170 + switch (max_lr_idx) { 171 + case 15: 172 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); 173 + case 14: 174 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); 175 + case 13: 176 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); 177 + case 12: 178 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); 179 + case 11: 180 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); 181 + case 10: 182 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); 183 + case 9: 184 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); 185 + case 8: 186 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); 187 + case 7: 188 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); 189 + case 6: 190 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); 191 + case 5: 192 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); 193 + case 4: 194 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); 195 + case 3: 196 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); 197 + case 2: 198 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); 199 + case 1: 200 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); 201 + case 0: 202 + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); 203 + } 204 + 205 + /* 206 + * Ensures that the above will have reached the 207 + * (re)distributors. This ensure the guest will read the 208 + * correct values from the memory-mapped interface. 209 + */ 210 + isb(); 211 + dsb(sy); 212 + 213 + /* 214 + * Prevent the guest from touching the GIC system registers if 215 + * SRE isn't enabled for GICv3 emulation. 216 + */ 217 + if (!cpu_if->vgic_sre) { 218 + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, 219 + ICC_SRE_EL2); 220 + } 221 + } 222 + 223 + static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) 224 + { 225 + return read_gicreg(ICH_VTR_EL2); 226 + } 227 + 228 + __alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void);
+30 -29
arch/arm64/kvm/sys_regs.c
··· 29 29 #include <asm/debug-monitors.h> 30 30 #include <asm/esr.h> 31 31 #include <asm/kvm_arm.h> 32 + #include <asm/kvm_asm.h> 32 33 #include <asm/kvm_coproc.h> 33 34 #include <asm/kvm_emulate.h> 34 35 #include <asm/kvm_host.h> ··· 220 219 * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the 221 220 * hyp.S code switches between host and guest values in future. 222 221 */ 223 - static inline void reg_to_dbg(struct kvm_vcpu *vcpu, 224 - struct sys_reg_params *p, 225 - u64 *dbg_reg) 222 + static void reg_to_dbg(struct kvm_vcpu *vcpu, 223 + struct sys_reg_params *p, 224 + u64 *dbg_reg) 226 225 { 227 226 u64 val = p->regval; 228 227 ··· 235 234 vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; 236 235 } 237 236 238 - static inline void dbg_to_reg(struct kvm_vcpu *vcpu, 239 - struct sys_reg_params *p, 240 - u64 *dbg_reg) 237 + static void dbg_to_reg(struct kvm_vcpu *vcpu, 238 + struct sys_reg_params *p, 239 + u64 *dbg_reg) 241 240 { 242 241 p->regval = *dbg_reg; 243 242 if (p->is_32bit) 244 243 p->regval &= 0xffffffffUL; 245 244 } 246 245 247 - static inline bool trap_bvr(struct kvm_vcpu *vcpu, 248 - struct sys_reg_params *p, 249 - const struct sys_reg_desc *rd) 246 + static bool trap_bvr(struct kvm_vcpu *vcpu, 247 + struct sys_reg_params *p, 248 + const struct sys_reg_desc *rd) 250 249 { 251 250 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 252 251 ··· 280 279 return 0; 281 280 } 282 281 283 - static inline void reset_bvr(struct kvm_vcpu *vcpu, 284 - const struct sys_reg_desc *rd) 282 + static void reset_bvr(struct kvm_vcpu *vcpu, 283 + const struct sys_reg_desc *rd) 285 284 { 286 285 vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; 287 286 } 288 287 289 - static inline bool trap_bcr(struct kvm_vcpu *vcpu, 290 - struct sys_reg_params *p, 291 - const struct sys_reg_desc *rd) 288 + static bool trap_bcr(struct kvm_vcpu *vcpu, 289 + struct sys_reg_params *p, 290 + const struct sys_reg_desc *rd) 292 291 { 293 292 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; 294 293 ··· 323 322 return 0; 324 323 } 325 324 326 - static inline void reset_bcr(struct kvm_vcpu *vcpu, 327 - const struct sys_reg_desc *rd) 325 + static void reset_bcr(struct kvm_vcpu *vcpu, 326 + const struct sys_reg_desc *rd) 328 327 { 329 328 vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; 330 329 } 331 330 332 - static inline bool trap_wvr(struct kvm_vcpu *vcpu, 333 - struct sys_reg_params *p, 334 - const struct sys_reg_desc *rd) 331 + static bool trap_wvr(struct kvm_vcpu *vcpu, 332 + struct sys_reg_params *p, 333 + const struct sys_reg_desc *rd) 335 334 { 336 335 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; 337 336 ··· 366 365 return 0; 367 366 } 368 367 369 - static inline void reset_wvr(struct kvm_vcpu *vcpu, 370 - const struct sys_reg_desc *rd) 368 + static void reset_wvr(struct kvm_vcpu *vcpu, 369 + const struct sys_reg_desc *rd) 371 370 { 372 371 vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; 373 372 } 374 373 375 - static inline bool trap_wcr(struct kvm_vcpu *vcpu, 376 - struct sys_reg_params *p, 377 - const struct sys_reg_desc *rd) 374 + static bool trap_wcr(struct kvm_vcpu *vcpu, 375 + struct sys_reg_params *p, 376 + const struct sys_reg_desc *rd) 378 377 { 379 378 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; 380 379 ··· 408 407 return 0; 409 408 } 410 409 411 - static inline void reset_wcr(struct kvm_vcpu *vcpu, 412 - const struct sys_reg_desc *rd) 410 + static void reset_wcr(struct kvm_vcpu *vcpu, 411 + const struct sys_reg_desc *rd) 413 412 { 414 413 vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; 415 414 } ··· 723 722 * system is in. 724 723 */ 725 724 726 - static inline bool trap_xvr(struct kvm_vcpu *vcpu, 727 - struct sys_reg_params *p, 728 - const struct sys_reg_desc *rd) 725 + static bool trap_xvr(struct kvm_vcpu *vcpu, 726 + struct sys_reg_params *p, 727 + const struct sys_reg_desc *rd) 729 728 { 730 729 u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 731 730
-134
arch/arm64/kvm/vgic-v2-switch.S
··· 1 - /* 2 - * Copyright (C) 2012,2013 - ARM Ltd 3 - * Author: Marc Zyngier <marc.zyngier@arm.com> 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License version 2 as 7 - * published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope that it will be useful, 10 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 - * GNU General Public License for more details. 13 - * 14 - * You should have received a copy of the GNU General Public License 15 - * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 - */ 17 - 18 - #include <linux/linkage.h> 19 - #include <linux/irqchip/arm-gic.h> 20 - 21 - #include <asm/assembler.h> 22 - #include <asm/memory.h> 23 - #include <asm/asm-offsets.h> 24 - #include <asm/kvm.h> 25 - #include <asm/kvm_asm.h> 26 - #include <asm/kvm_arm.h> 27 - #include <asm/kvm_mmu.h> 28 - 29 - .text 30 - .pushsection .hyp.text, "ax" 31 - 32 - /* 33 - * Save the VGIC CPU state into memory 34 - * x0: Register pointing to VCPU struct 35 - * Do not corrupt x1!!! 36 - */ 37 - ENTRY(__save_vgic_v2_state) 38 - __save_vgic_v2_state: 39 - /* Get VGIC VCTRL base into x2 */ 40 - ldr x2, [x0, #VCPU_KVM] 41 - kern_hyp_va x2 42 - ldr x2, [x2, #KVM_VGIC_VCTRL] 43 - kern_hyp_va x2 44 - cbz x2, 2f // disabled 45 - 46 - /* Compute the address of struct vgic_cpu */ 47 - add x3, x0, #VCPU_VGIC_CPU 48 - 49 - /* Save all interesting registers */ 50 - ldr w5, [x2, #GICH_VMCR] 51 - ldr w6, [x2, #GICH_MISR] 52 - ldr w7, [x2, #GICH_EISR0] 53 - ldr w8, [x2, #GICH_EISR1] 54 - ldr w9, [x2, #GICH_ELRSR0] 55 - ldr w10, [x2, #GICH_ELRSR1] 56 - ldr w11, [x2, #GICH_APR] 57 - CPU_BE( rev w5, w5 ) 58 - CPU_BE( rev w6, w6 ) 59 - CPU_BE( rev w7, w7 ) 60 - CPU_BE( rev w8, w8 ) 61 - CPU_BE( rev w9, w9 ) 62 - CPU_BE( rev w10, w10 ) 63 - CPU_BE( rev w11, w11 ) 64 - 65 - str w5, [x3, #VGIC_V2_CPU_VMCR] 66 - str w6, [x3, #VGIC_V2_CPU_MISR] 67 - CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) 68 - CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) 69 - CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) 70 - CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) 71 - CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) 72 - CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) 73 - CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) 74 - CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) 75 - str w11, [x3, #VGIC_V2_CPU_APR] 76 - 77 - /* Clear GICH_HCR */ 78 - str wzr, [x2, #GICH_HCR] 79 - 80 - /* Save list registers */ 81 - add x2, x2, #GICH_LR0 82 - ldr w4, [x3, #VGIC_CPU_NR_LR] 83 - add x3, x3, #VGIC_V2_CPU_LR 84 - 1: ldr w5, [x2], #4 85 - CPU_BE( rev w5, w5 ) 86 - str w5, [x3], #4 87 - sub w4, w4, #1 88 - cbnz w4, 1b 89 - 2: 90 - ret 91 - ENDPROC(__save_vgic_v2_state) 92 - 93 - /* 94 - * Restore the VGIC CPU state from memory 95 - * x0: Register pointing to VCPU struct 96 - */ 97 - ENTRY(__restore_vgic_v2_state) 98 - __restore_vgic_v2_state: 99 - /* Get VGIC VCTRL base into x2 */ 100 - ldr x2, [x0, #VCPU_KVM] 101 - kern_hyp_va x2 102 - ldr x2, [x2, #KVM_VGIC_VCTRL] 103 - kern_hyp_va x2 104 - cbz x2, 2f // disabled 105 - 106 - /* Compute the address of struct vgic_cpu */ 107 - add x3, x0, #VCPU_VGIC_CPU 108 - 109 - /* We only restore a minimal set of registers */ 110 - ldr w4, [x3, #VGIC_V2_CPU_HCR] 111 - ldr w5, [x3, #VGIC_V2_CPU_VMCR] 112 - ldr w6, [x3, #VGIC_V2_CPU_APR] 113 - CPU_BE( rev w4, w4 ) 114 - CPU_BE( rev w5, w5 ) 115 - CPU_BE( rev w6, w6 ) 116 - 117 - str w4, [x2, #GICH_HCR] 118 - str w5, [x2, #GICH_VMCR] 119 - str w6, [x2, #GICH_APR] 120 - 121 - /* Restore list registers */ 122 - add x2, x2, #GICH_LR0 123 - ldr w4, [x3, #VGIC_CPU_NR_LR] 124 - add x3, x3, #VGIC_V2_CPU_LR 125 - 1: ldr w5, [x3], #4 126 - CPU_BE( rev w5, w5 ) 127 - str w5, [x2], #4 128 - sub w4, w4, #1 129 - cbnz w4, 1b 130 - 2: 131 - ret 132 - ENDPROC(__restore_vgic_v2_state) 133 - 134 - .popsection
-269
arch/arm64/kvm/vgic-v3-switch.S
··· 1 - /* 2 - * Copyright (C) 2012,2013 - ARM Ltd 3 - * Author: Marc Zyngier <marc.zyngier@arm.com> 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License version 2 as 7 - * published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope that it will be useful, 10 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 - * GNU General Public License for more details. 13 - * 14 - * You should have received a copy of the GNU General Public License 15 - * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 - */ 17 - 18 - #include <linux/linkage.h> 19 - #include <linux/irqchip/arm-gic-v3.h> 20 - 21 - #include <asm/assembler.h> 22 - #include <asm/memory.h> 23 - #include <asm/asm-offsets.h> 24 - #include <asm/kvm.h> 25 - #include <asm/kvm_asm.h> 26 - #include <asm/kvm_arm.h> 27 - 28 - .text 29 - .pushsection .hyp.text, "ax" 30 - 31 - /* 32 - * We store LRs in reverse order to let the CPU deal with streaming 33 - * access. Use this macro to make it look saner... 34 - */ 35 - #define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) 36 - 37 - /* 38 - * Save the VGIC CPU state into memory 39 - * x0: Register pointing to VCPU struct 40 - * Do not corrupt x1!!! 41 - */ 42 - .macro save_vgic_v3_state 43 - // Compute the address of struct vgic_cpu 44 - add x3, x0, #VCPU_VGIC_CPU 45 - 46 - // Make sure stores to the GIC via the memory mapped interface 47 - // are now visible to the system register interface 48 - dsb st 49 - 50 - // Save all interesting registers 51 - mrs_s x5, ICH_VMCR_EL2 52 - mrs_s x6, ICH_MISR_EL2 53 - mrs_s x7, ICH_EISR_EL2 54 - mrs_s x8, ICH_ELSR_EL2 55 - 56 - str w5, [x3, #VGIC_V3_CPU_VMCR] 57 - str w6, [x3, #VGIC_V3_CPU_MISR] 58 - str w7, [x3, #VGIC_V3_CPU_EISR] 59 - str w8, [x3, #VGIC_V3_CPU_ELRSR] 60 - 61 - msr_s ICH_HCR_EL2, xzr 62 - 63 - mrs_s x21, ICH_VTR_EL2 64 - mvn w22, w21 65 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 66 - 67 - adr x24, 1f 68 - add x24, x24, x23 69 - br x24 70 - 71 - 1: 72 - mrs_s x20, ICH_LR15_EL2 73 - mrs_s x19, ICH_LR14_EL2 74 - mrs_s x18, ICH_LR13_EL2 75 - mrs_s x17, ICH_LR12_EL2 76 - mrs_s x16, ICH_LR11_EL2 77 - mrs_s x15, ICH_LR10_EL2 78 - mrs_s x14, ICH_LR9_EL2 79 - mrs_s x13, ICH_LR8_EL2 80 - mrs_s x12, ICH_LR7_EL2 81 - mrs_s x11, ICH_LR6_EL2 82 - mrs_s x10, ICH_LR5_EL2 83 - mrs_s x9, ICH_LR4_EL2 84 - mrs_s x8, ICH_LR3_EL2 85 - mrs_s x7, ICH_LR2_EL2 86 - mrs_s x6, ICH_LR1_EL2 87 - mrs_s x5, ICH_LR0_EL2 88 - 89 - adr x24, 1f 90 - add x24, x24, x23 91 - br x24 92 - 93 - 1: 94 - str x20, [x3, #LR_OFFSET(15)] 95 - str x19, [x3, #LR_OFFSET(14)] 96 - str x18, [x3, #LR_OFFSET(13)] 97 - str x17, [x3, #LR_OFFSET(12)] 98 - str x16, [x3, #LR_OFFSET(11)] 99 - str x15, [x3, #LR_OFFSET(10)] 100 - str x14, [x3, #LR_OFFSET(9)] 101 - str x13, [x3, #LR_OFFSET(8)] 102 - str x12, [x3, #LR_OFFSET(7)] 103 - str x11, [x3, #LR_OFFSET(6)] 104 - str x10, [x3, #LR_OFFSET(5)] 105 - str x9, [x3, #LR_OFFSET(4)] 106 - str x8, [x3, #LR_OFFSET(3)] 107 - str x7, [x3, #LR_OFFSET(2)] 108 - str x6, [x3, #LR_OFFSET(1)] 109 - str x5, [x3, #LR_OFFSET(0)] 110 - 111 - tbnz w21, #29, 6f // 6 bits 112 - tbz w21, #30, 5f // 5 bits 113 - // 7 bits 114 - mrs_s x20, ICH_AP0R3_EL2 115 - str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] 116 - mrs_s x19, ICH_AP0R2_EL2 117 - str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] 118 - 6: mrs_s x18, ICH_AP0R1_EL2 119 - str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] 120 - 5: mrs_s x17, ICH_AP0R0_EL2 121 - str w17, [x3, #VGIC_V3_CPU_AP0R] 122 - 123 - tbnz w21, #29, 6f // 6 bits 124 - tbz w21, #30, 5f // 5 bits 125 - // 7 bits 126 - mrs_s x20, ICH_AP1R3_EL2 127 - str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] 128 - mrs_s x19, ICH_AP1R2_EL2 129 - str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] 130 - 6: mrs_s x18, ICH_AP1R1_EL2 131 - str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] 132 - 5: mrs_s x17, ICH_AP1R0_EL2 133 - str w17, [x3, #VGIC_V3_CPU_AP1R] 134 - 135 - // Restore SRE_EL1 access and re-enable SRE at EL1. 136 - mrs_s x5, ICC_SRE_EL2 137 - orr x5, x5, #ICC_SRE_EL2_ENABLE 138 - msr_s ICC_SRE_EL2, x5 139 - isb 140 - mov x5, #1 141 - msr_s ICC_SRE_EL1, x5 142 - .endm 143 - 144 - /* 145 - * Restore the VGIC CPU state from memory 146 - * x0: Register pointing to VCPU struct 147 - */ 148 - .macro restore_vgic_v3_state 149 - // Compute the address of struct vgic_cpu 150 - add x3, x0, #VCPU_VGIC_CPU 151 - 152 - // Restore all interesting registers 153 - ldr w4, [x3, #VGIC_V3_CPU_HCR] 154 - ldr w5, [x3, #VGIC_V3_CPU_VMCR] 155 - ldr w25, [x3, #VGIC_V3_CPU_SRE] 156 - 157 - msr_s ICC_SRE_EL1, x25 158 - 159 - // make sure SRE is valid before writing the other registers 160 - isb 161 - 162 - msr_s ICH_HCR_EL2, x4 163 - msr_s ICH_VMCR_EL2, x5 164 - 165 - mrs_s x21, ICH_VTR_EL2 166 - 167 - tbnz w21, #29, 6f // 6 bits 168 - tbz w21, #30, 5f // 5 bits 169 - // 7 bits 170 - ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] 171 - msr_s ICH_AP1R3_EL2, x20 172 - ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] 173 - msr_s ICH_AP1R2_EL2, x19 174 - 6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] 175 - msr_s ICH_AP1R1_EL2, x18 176 - 5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] 177 - msr_s ICH_AP1R0_EL2, x17 178 - 179 - tbnz w21, #29, 6f // 6 bits 180 - tbz w21, #30, 5f // 5 bits 181 - // 7 bits 182 - ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] 183 - msr_s ICH_AP0R3_EL2, x20 184 - ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] 185 - msr_s ICH_AP0R2_EL2, x19 186 - 6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] 187 - msr_s ICH_AP0R1_EL2, x18 188 - 5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] 189 - msr_s ICH_AP0R0_EL2, x17 190 - 191 - and w22, w21, #0xf 192 - mvn w22, w21 193 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 194 - 195 - adr x24, 1f 196 - add x24, x24, x23 197 - br x24 198 - 199 - 1: 200 - ldr x20, [x3, #LR_OFFSET(15)] 201 - ldr x19, [x3, #LR_OFFSET(14)] 202 - ldr x18, [x3, #LR_OFFSET(13)] 203 - ldr x17, [x3, #LR_OFFSET(12)] 204 - ldr x16, [x3, #LR_OFFSET(11)] 205 - ldr x15, [x3, #LR_OFFSET(10)] 206 - ldr x14, [x3, #LR_OFFSET(9)] 207 - ldr x13, [x3, #LR_OFFSET(8)] 208 - ldr x12, [x3, #LR_OFFSET(7)] 209 - ldr x11, [x3, #LR_OFFSET(6)] 210 - ldr x10, [x3, #LR_OFFSET(5)] 211 - ldr x9, [x3, #LR_OFFSET(4)] 212 - ldr x8, [x3, #LR_OFFSET(3)] 213 - ldr x7, [x3, #LR_OFFSET(2)] 214 - ldr x6, [x3, #LR_OFFSET(1)] 215 - ldr x5, [x3, #LR_OFFSET(0)] 216 - 217 - adr x24, 1f 218 - add x24, x24, x23 219 - br x24 220 - 221 - 1: 222 - msr_s ICH_LR15_EL2, x20 223 - msr_s ICH_LR14_EL2, x19 224 - msr_s ICH_LR13_EL2, x18 225 - msr_s ICH_LR12_EL2, x17 226 - msr_s ICH_LR11_EL2, x16 227 - msr_s ICH_LR10_EL2, x15 228 - msr_s ICH_LR9_EL2, x14 229 - msr_s ICH_LR8_EL2, x13 230 - msr_s ICH_LR7_EL2, x12 231 - msr_s ICH_LR6_EL2, x11 232 - msr_s ICH_LR5_EL2, x10 233 - msr_s ICH_LR4_EL2, x9 234 - msr_s ICH_LR3_EL2, x8 235 - msr_s ICH_LR2_EL2, x7 236 - msr_s ICH_LR1_EL2, x6 237 - msr_s ICH_LR0_EL2, x5 238 - 239 - // Ensure that the above will have reached the 240 - // (re)distributors. This ensure the guest will read 241 - // the correct values from the memory-mapped interface. 242 - isb 243 - dsb sy 244 - 245 - // Prevent the guest from touching the GIC system registers 246 - // if SRE isn't enabled for GICv3 emulation 247 - cbnz x25, 1f 248 - mrs_s x5, ICC_SRE_EL2 249 - and x5, x5, #~ICC_SRE_EL2_ENABLE 250 - msr_s ICC_SRE_EL2, x5 251 - 1: 252 - .endm 253 - 254 - ENTRY(__save_vgic_v3_state) 255 - save_vgic_v3_state 256 - ret 257 - ENDPROC(__save_vgic_v3_state) 258 - 259 - ENTRY(__restore_vgic_v3_state) 260 - restore_vgic_v3_state 261 - ret 262 - ENDPROC(__restore_vgic_v3_state) 263 - 264 - ENTRY(__vgic_v3_get_ich_vtr_el2) 265 - mrs_s x0, ICH_VTR_EL2 266 - ret 267 - ENDPROC(__vgic_v3_get_ich_vtr_el2) 268 - 269 - .popsection
+4
arch/powerpc/include/asm/kvm_host.h
··· 50 50 #define KVM_NR_IRQCHIPS 1 51 51 #define KVM_IRQCHIP_NUM_PINS 256 52 52 53 + /* PPC-specific vcpu->requests bit members */ 54 + #define KVM_REQ_WATCHDOG 8 55 + #define KVM_REQ_EPR_EXIT 9 56 + 53 57 #include <linux/mmu_notifier.h> 54 58 55 59 #define KVM_ARCH_WANT_MMU_NOTIFIER
+2 -8
arch/powerpc/kvm/book3s_hv.c
··· 314 314 315 315 static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 316 316 { 317 - int r; 318 - struct kvm_vcpu *v, *ret = NULL; 317 + struct kvm_vcpu *ret; 319 318 320 319 mutex_lock(&kvm->lock); 321 - kvm_for_each_vcpu(r, v, kvm) { 322 - if (v->vcpu_id == id) { 323 - ret = v; 324 - break; 325 - } 326 - } 320 + ret = kvm_get_vcpu_by_id(kvm, id); 327 321 mutex_unlock(&kvm->lock); 328 322 return ret; 329 323 }
+2 -2
arch/powerpc/kvm/book3s_pr.c
··· 512 512 put_page(hpage); 513 513 } 514 514 515 - static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) 515 + static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) 516 516 { 517 517 ulong mp_pa = vcpu->arch.magic_page_pa; 518 518 ··· 521 521 522 522 gpa &= ~0xFFFULL; 523 523 if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { 524 - return 1; 524 + return true; 525 525 } 526 526 527 527 return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
+7
arch/s390/include/asm/elf.h
··· 104 104 #define HWCAP_S390_TE 1024 105 105 #define HWCAP_S390_VXRS 2048 106 106 107 + /* Internal bits, not exposed via elf */ 108 + #define HWCAP_INT_SIE 1UL 109 + 107 110 /* 108 111 * These are used to set parameters in the core dumps. 109 112 */ ··· 171 168 172 169 extern unsigned long elf_hwcap; 173 170 #define ELF_HWCAP (elf_hwcap) 171 + 172 + /* Internal hardware capabilities, not exposed via elf */ 173 + 174 + extern unsigned long int_hwcap; 174 175 175 176 /* This yields a string that ld.so will use to load implementation 176 177 specific libraries for optimization. This is more specific in
+50 -7
arch/s390/include/asm/kvm_host.h
··· 25 25 #include <asm/fpu/api.h> 26 26 #include <asm/isc.h> 27 27 28 - #define KVM_MAX_VCPUS 64 28 + #define KVM_S390_BSCA_CPU_SLOTS 64 29 + #define KVM_S390_ESCA_CPU_SLOTS 248 30 + #define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS 29 31 #define KVM_USER_MEM_SLOTS 32 30 32 31 33 /* ··· 39 37 #define KVM_IRQCHIP_NUM_PINS 4096 40 38 #define KVM_HALT_POLL_NS_DEFAULT 0 41 39 40 + /* s390-specific vcpu->requests bit members */ 41 + #define KVM_REQ_ENABLE_IBS 8 42 + #define KVM_REQ_DISABLE_IBS 9 43 + 42 44 #define SIGP_CTRL_C 0x80 43 45 #define SIGP_CTRL_SCN_MASK 0x3f 44 46 45 - struct sca_entry { 47 + union bsca_sigp_ctrl { 48 + __u8 value; 49 + struct { 50 + __u8 c : 1; 51 + __u8 r : 1; 52 + __u8 scn : 6; 53 + }; 54 + } __packed; 55 + 56 + union esca_sigp_ctrl { 57 + __u16 value; 58 + struct { 59 + __u8 c : 1; 60 + __u8 reserved: 7; 61 + __u8 scn; 62 + }; 63 + } __packed; 64 + 65 + struct esca_entry { 66 + union esca_sigp_ctrl sigp_ctrl; 67 + __u16 reserved1[3]; 68 + __u64 sda; 69 + __u64 reserved2[6]; 70 + } __packed; 71 + 72 + struct bsca_entry { 46 73 __u8 reserved0; 47 - __u8 sigp_ctrl; 74 + union bsca_sigp_ctrl sigp_ctrl; 48 75 __u16 reserved[3]; 49 76 __u64 sda; 50 77 __u64 reserved2[2]; ··· 88 57 }; 89 58 }; 90 59 91 - struct sca_block { 60 + struct bsca_block { 92 61 union ipte_control ipte_control; 93 62 __u64 reserved[5]; 94 63 __u64 mcn; 95 64 __u64 reserved2; 96 - struct sca_entry cpu[64]; 65 + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; 97 66 } __attribute__((packed)); 67 + 68 + struct esca_block { 69 + union ipte_control ipte_control; 70 + __u64 reserved1[7]; 71 + __u64 mcn[4]; 72 + __u64 reserved2[20]; 73 + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; 74 + } __packed; 98 75 99 76 #define CPUSTAT_STOPPED 0x80000000 100 77 #define CPUSTAT_WAIT 0x10000000 ··· 221 182 __u64 pp; /* 0x01de */ 222 183 __u8 reserved1e6[2]; /* 0x01e6 */ 223 184 __u64 itdba; /* 0x01e8 */ 224 - __u8 reserved1f0[16]; /* 0x01f0 */ 185 + __u64 riccbd; /* 0x01f0 */ 186 + __u8 reserved1f8[8]; /* 0x01f8 */ 225 187 } __attribute__((packed)); 226 188 227 189 struct kvm_s390_itdb { ··· 625 585 }; 626 586 627 587 struct kvm_arch{ 628 - struct sca_block *sca; 588 + void *sca; 589 + int use_esca; 590 + rwlock_t sca_lock; 629 591 debug_info_t *dbf; 630 592 struct kvm_s390_float_interrupt float_int; 631 593 struct kvm_device *flic; 632 594 struct gmap *gmap; 595 + unsigned long mem_limit; 633 596 int css_support; 634 597 int use_irqchip; 635 598 int use_cmma;
+7 -1
arch/s390/include/asm/sclp.h
··· 29 29 30 30 struct sclp_core_entry { 31 31 u8 core_id; 32 - u8 reserved0[2]; 32 + u8 reserved0; 33 + u8 : 4; 34 + u8 sief2 : 1; 35 + u8 : 3; 33 36 u8 : 3; 34 37 u8 siif : 1; 35 38 u8 sigpif : 1; ··· 56 53 unsigned char has_sigpif : 1; 57 54 unsigned char has_core_type : 1; 58 55 unsigned char has_sprp : 1; 56 + unsigned char has_hvs : 1; 57 + unsigned char has_esca : 1; 58 + unsigned char has_sief2 : 1; 59 59 unsigned int ibc; 60 60 unsigned int mtid; 61 61 unsigned int mtid_cp;
+5
arch/s390/include/uapi/asm/kvm.h
··· 66 66 #define KVM_S390_VM_MEM_CLR_CMMA 1 67 67 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 68 68 69 + #define KVM_S390_NO_MEM_LIMIT U64_MAX 70 + 69 71 /* kvm attributes for KVM_S390_VM_TOD */ 70 72 #define KVM_S390_VM_TOD_LOW 0 71 73 #define KVM_S390_VM_TOD_HIGH 1 ··· 153 151 #define KVM_SYNC_ARCH0 (1UL << 4) 154 152 #define KVM_SYNC_PFAULT (1UL << 5) 155 153 #define KVM_SYNC_VRS (1UL << 6) 154 + #define KVM_SYNC_RICCB (1UL << 7) 156 155 /* definition of registers in kvm_run */ 157 156 struct kvm_sync_regs { 158 157 __u64 prefix; /* prefix register */ ··· 171 168 __u64 vrs[32][2]; /* vector registers */ 172 169 __u8 reserved[512]; /* for future vector expansion */ 173 170 __u32 fpc; /* only valid with vector registers */ 171 + __u8 padding[52]; /* riccb needs to be 64byte aligned */ 172 + __u8 riccb[64]; /* runtime instrumentation controls block */ 174 173 }; 175 174 176 175 #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+6
arch/s390/kernel/processor.c
··· 61 61 "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", 62 62 "edat", "etf3eh", "highgprs", "te", "vx" 63 63 }; 64 + static const char * const int_hwcap_str[] = { 65 + "sie" 66 + }; 64 67 unsigned long n = (unsigned long) v - 1; 65 68 int i; 66 69 ··· 78 75 for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) 79 76 if (hwcap_str[i] && (elf_hwcap & (1UL << i))) 80 77 seq_printf(m, "%s ", hwcap_str[i]); 78 + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) 79 + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) 80 + seq_printf(m, "%s ", int_hwcap_str[i]); 81 81 seq_puts(m, "\n"); 82 82 show_cacheinfo(m); 83 83 }
+9
arch/s390/kernel/setup.c
··· 80 80 unsigned long elf_hwcap __read_mostly = 0; 81 81 char elf_platform[ELF_PLATFORM_SIZE]; 82 82 83 + unsigned long int_hwcap = 0; 84 + 83 85 int __initdata memory_end_set; 84 86 unsigned long __initdata memory_end; 85 87 unsigned long __initdata max_physmem_end; ··· 795 793 strcpy(elf_platform, "z13"); 796 794 break; 797 795 } 796 + 797 + /* 798 + * Virtualization support HWCAP_INT_SIE is bit 0. 799 + */ 800 + if (sclp.has_sief2) 801 + int_hwcap |= HWCAP_INT_SIE; 802 + 798 803 return 0; 799 804 } 800 805 arch_initcall(setup_hwcaps);
+3 -8
arch/s390/kvm/diag.c
··· 155 155 156 156 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) 157 157 { 158 - struct kvm *kvm = vcpu->kvm; 159 158 struct kvm_vcpu *tcpu; 160 159 int tid; 161 - int i; 162 160 163 161 tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; 164 162 vcpu->stat.diagnose_9c++; ··· 165 167 if (tid == vcpu->vcpu_id) 166 168 return 0; 167 169 168 - kvm_for_each_vcpu(i, tcpu, kvm) 169 - if (tcpu->vcpu_id == tid) { 170 - kvm_vcpu_yield_to(tcpu); 171 - break; 172 - } 173 - 170 + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); 171 + if (tcpu) 172 + kvm_vcpu_yield_to(tcpu); 174 173 return 0; 175 174 } 176 175
+27 -11
arch/s390/kvm/gaccess.c
··· 259 259 260 260 int ipte_lock_held(struct kvm_vcpu *vcpu) 261 261 { 262 - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; 262 + if (vcpu->arch.sie_block->eca & 1) { 263 + int rc; 263 264 264 - if (vcpu->arch.sie_block->eca & 1) 265 - return ic->kh != 0; 265 + read_lock(&vcpu->kvm->arch.sca_lock); 266 + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; 267 + read_unlock(&vcpu->kvm->arch.sca_lock); 268 + return rc; 269 + } 266 270 return vcpu->kvm->arch.ipte_lock_count != 0; 267 271 } 268 272 ··· 278 274 vcpu->kvm->arch.ipte_lock_count++; 279 275 if (vcpu->kvm->arch.ipte_lock_count > 1) 280 276 goto out; 281 - ic = &vcpu->kvm->arch.sca->ipte_control; 277 + retry: 278 + read_lock(&vcpu->kvm->arch.sca_lock); 279 + ic = kvm_s390_get_ipte_control(vcpu->kvm); 282 280 do { 283 281 old = READ_ONCE(*ic); 284 - while (old.k) { 282 + if (old.k) { 283 + read_unlock(&vcpu->kvm->arch.sca_lock); 285 284 cond_resched(); 286 - old = READ_ONCE(*ic); 285 + goto retry; 287 286 } 288 287 new = old; 289 288 new.k = 1; 290 289 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 290 + read_unlock(&vcpu->kvm->arch.sca_lock); 291 291 out: 292 292 mutex_unlock(&vcpu->kvm->arch.ipte_mutex); 293 293 } ··· 304 296 vcpu->kvm->arch.ipte_lock_count--; 305 297 if (vcpu->kvm->arch.ipte_lock_count) 306 298 goto out; 307 - ic = &vcpu->kvm->arch.sca->ipte_control; 299 + read_lock(&vcpu->kvm->arch.sca_lock); 300 + ic = kvm_s390_get_ipte_control(vcpu->kvm); 308 301 do { 309 302 old = READ_ONCE(*ic); 310 303 new = old; 311 304 new.k = 0; 312 305 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 306 + read_unlock(&vcpu->kvm->arch.sca_lock); 313 307 wake_up(&vcpu->kvm->arch.ipte_wq); 314 308 out: 315 309 mutex_unlock(&vcpu->kvm->arch.ipte_mutex); ··· 321 311 { 322 312 union ipte_control old, new, *ic; 323 313 324 - ic = &vcpu->kvm->arch.sca->ipte_control; 314 + retry: 315 + read_lock(&vcpu->kvm->arch.sca_lock); 316 + ic = kvm_s390_get_ipte_control(vcpu->kvm); 325 317 do { 326 318 old = READ_ONCE(*ic); 327 - while (old.kg) { 319 + if (old.kg) { 320 + read_unlock(&vcpu->kvm->arch.sca_lock); 328 321 cond_resched(); 329 - old = READ_ONCE(*ic); 322 + goto retry; 330 323 } 331 324 new = old; 332 325 new.k = 1; 333 326 new.kh++; 334 327 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 328 + read_unlock(&vcpu->kvm->arch.sca_lock); 335 329 } 336 330 337 331 static void ipte_unlock_siif(struct kvm_vcpu *vcpu) 338 332 { 339 333 union ipte_control old, new, *ic; 340 334 341 - ic = &vcpu->kvm->arch.sca->ipte_control; 335 + read_lock(&vcpu->kvm->arch.sca_lock); 336 + ic = kvm_s390_get_ipte_control(vcpu->kvm); 342 337 do { 343 338 old = READ_ONCE(*ic); 344 339 new = old; ··· 351 336 if (!new.kh) 352 337 new.k = 0; 353 338 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); 339 + read_unlock(&vcpu->kvm->arch.sca_lock); 354 340 if (!new.kh) 355 341 wake_up(&vcpu->kvm->arch.ipte_wq); 356 342 }
+3 -4
arch/s390/kvm/intercept.c
··· 54 54 static int handle_noop(struct kvm_vcpu *vcpu) 55 55 { 56 56 switch (vcpu->arch.sie_block->icptcode) { 57 - case 0x0: 58 - vcpu->stat.exit_null++; 59 - break; 60 57 case 0x10: 61 58 vcpu->stat.exit_external_request++; 62 59 break; ··· 335 338 336 339 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) 337 340 { 341 + if (kvm_is_ucontrol(vcpu->kvm)) 342 + return -EOPNOTSUPP; 343 + 338 344 switch (vcpu->arch.sie_block->icptcode) { 339 - case 0x00: 340 345 case 0x10: 341 346 case 0x18: 342 347 return handle_noop(vcpu);
+106 -27
arch/s390/kvm/interrupt.c
··· 34 34 #define PFAULT_DONE 0x0680 35 35 #define VIRTIO_PARAM 0x0d00 36 36 37 + /* handle external calls via sigp interpretation facility */ 38 + static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) 39 + { 40 + int c, scn; 41 + 42 + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) 43 + return 0; 44 + 45 + read_lock(&vcpu->kvm->arch.sca_lock); 46 + if (vcpu->kvm->arch.use_esca) { 47 + struct esca_block *sca = vcpu->kvm->arch.sca; 48 + union esca_sigp_ctrl sigp_ctrl = 49 + sca->cpu[vcpu->vcpu_id].sigp_ctrl; 50 + 51 + c = sigp_ctrl.c; 52 + scn = sigp_ctrl.scn; 53 + } else { 54 + struct bsca_block *sca = vcpu->kvm->arch.sca; 55 + union bsca_sigp_ctrl sigp_ctrl = 56 + sca->cpu[vcpu->vcpu_id].sigp_ctrl; 57 + 58 + c = sigp_ctrl.c; 59 + scn = sigp_ctrl.scn; 60 + } 61 + read_unlock(&vcpu->kvm->arch.sca_lock); 62 + 63 + if (src_id) 64 + *src_id = scn; 65 + 66 + return c; 67 + } 68 + 69 + static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) 70 + { 71 + int expect, rc; 72 + 73 + read_lock(&vcpu->kvm->arch.sca_lock); 74 + if (vcpu->kvm->arch.use_esca) { 75 + struct esca_block *sca = vcpu->kvm->arch.sca; 76 + union esca_sigp_ctrl *sigp_ctrl = 77 + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 78 + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; 79 + 80 + new_val.scn = src_id; 81 + new_val.c = 1; 82 + old_val.c = 0; 83 + 84 + expect = old_val.value; 85 + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); 86 + } else { 87 + struct bsca_block *sca = vcpu->kvm->arch.sca; 88 + union bsca_sigp_ctrl *sigp_ctrl = 89 + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 90 + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; 91 + 92 + new_val.scn = src_id; 93 + new_val.c = 1; 94 + old_val.c = 0; 95 + 96 + expect = old_val.value; 97 + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); 98 + } 99 + read_unlock(&vcpu->kvm->arch.sca_lock); 100 + 101 + if (rc != expect) { 102 + /* another external call is pending */ 103 + return -EBUSY; 104 + } 105 + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); 106 + return 0; 107 + } 108 + 109 + static void sca_clear_ext_call(struct kvm_vcpu *vcpu) 110 + { 111 + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 112 + int rc, expect; 113 + 114 + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); 115 + read_lock(&vcpu->kvm->arch.sca_lock); 116 + if (vcpu->kvm->arch.use_esca) { 117 + struct esca_block *sca = vcpu->kvm->arch.sca; 118 + union esca_sigp_ctrl *sigp_ctrl = 119 + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 120 + union esca_sigp_ctrl old = *sigp_ctrl; 121 + 122 + expect = old.value; 123 + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); 124 + } else { 125 + struct bsca_block *sca = vcpu->kvm->arch.sca; 126 + union bsca_sigp_ctrl *sigp_ctrl = 127 + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); 128 + union bsca_sigp_ctrl old = *sigp_ctrl; 129 + 130 + expect = old.value; 131 + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); 132 + } 133 + read_unlock(&vcpu->kvm->arch.sca_lock); 134 + WARN_ON(rc != expect); /* cannot clear? */ 135 + } 136 + 37 137 int psw_extint_disabled(struct kvm_vcpu *vcpu) 38 138 { 39 139 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); ··· 892 792 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) 893 793 { 894 794 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 895 - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; 896 795 897 796 if (!sclp.has_sigpif) 898 797 return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); 899 798 900 - return (sigp_ctrl & SIGP_CTRL_C) && 901 - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); 799 + return sca_ext_call_pending(vcpu, NULL); 902 800 } 903 801 904 802 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) ··· 1007 909 memset(&li->irq, 0, sizeof(li->irq)); 1008 910 spin_unlock(&li->lock); 1009 911 1010 - /* clear pending external calls set by sigp interpretation facility */ 1011 - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); 1012 - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; 912 + sca_clear_ext_call(vcpu); 1013 913 } 1014 914 1015 915 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) ··· 1099 1003 return 0; 1100 1004 } 1101 1005 1102 - static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) 1103 - { 1104 - unsigned char new_val, old_val; 1105 - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; 1106 - 1107 - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); 1108 - old_val = *sigp_ctrl & ~SIGP_CTRL_C; 1109 - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { 1110 - /* another external call is pending */ 1111 - return -EBUSY; 1112 - } 1113 - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); 1114 - return 0; 1115 - } 1116 - 1117 1006 static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) 1118 1007 { 1119 1008 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; ··· 1115 1034 return -EINVAL; 1116 1035 1117 1036 if (sclp.has_sigpif) 1118 - return __inject_extcall_sigpif(vcpu, src_id); 1037 + return sca_inject_ext_call(vcpu, src_id); 1119 1038 1120 1039 if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) 1121 1040 return -EBUSY; ··· 2284 2203 2285 2204 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) 2286 2205 { 2287 - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; 2206 + int scn; 2288 2207 unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; 2289 2208 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 2290 2209 unsigned long pending_irqs; ··· 2324 2243 } 2325 2244 } 2326 2245 2327 - if ((sigp_ctrl & SIGP_CTRL_C) && 2328 - (atomic_read(&vcpu->arch.sie_block->cpuflags) & 2329 - CPUSTAT_ECALL_PEND)) { 2246 + if (sca_ext_call_pending(vcpu, &scn)) { 2330 2247 if (n + sizeof(irq) > len) 2331 2248 return -ENOBUFS; 2332 2249 memset(&irq, 0, sizeof(irq)); 2333 2250 irq.type = KVM_S390_INT_EXTERNAL_CALL; 2334 - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; 2251 + irq.u.extcall.code = scn; 2335 2252 if (copy_to_user(&buf[n], &irq, sizeof(irq))) 2336 2253 return -EFAULT; 2337 2254 n += sizeof(irq);
+219 -81
arch/s390/kvm/kvm-s390.c
··· 246 246 break; 247 247 case KVM_CAP_NR_VCPUS: 248 248 case KVM_CAP_MAX_VCPUS: 249 - r = KVM_MAX_VCPUS; 249 + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS 250 + : KVM_S390_BSCA_CPU_SLOTS; 250 251 break; 251 252 case KVM_CAP_NR_MEMSLOTS: 252 253 r = KVM_USER_MEM_SLOTS; ··· 257 256 break; 258 257 case KVM_CAP_S390_VECTOR_REGISTERS: 259 258 r = MACHINE_HAS_VX; 259 + break; 260 + case KVM_CAP_S390_RI: 261 + r = test_facility(64); 260 262 break; 261 263 default: 262 264 r = 0; ··· 287 283 } 288 284 289 285 /* Section: vm related */ 286 + static void sca_del_vcpu(struct kvm_vcpu *vcpu); 287 + 290 288 /* 291 289 * Get (and clear) the dirty memory log for a memory slot. 292 290 */ ··· 361 355 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", 362 356 r ? "(not available)" : "(success)"); 363 357 break; 358 + case KVM_CAP_S390_RI: 359 + r = -EINVAL; 360 + mutex_lock(&kvm->lock); 361 + if (atomic_read(&kvm->online_vcpus)) { 362 + r = -EBUSY; 363 + } else if (test_facility(64)) { 364 + set_kvm_facility(kvm->arch.model.fac->mask, 64); 365 + set_kvm_facility(kvm->arch.model.fac->list, 64); 366 + r = 0; 367 + } 368 + mutex_unlock(&kvm->lock); 369 + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", 370 + r ? "(not available)" : "(success)"); 371 + break; 364 372 case KVM_CAP_S390_USER_STSI: 365 373 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); 366 374 kvm->arch.user_stsi = 1; ··· 395 375 case KVM_S390_VM_MEM_LIMIT_SIZE: 396 376 ret = 0; 397 377 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", 398 - kvm->arch.gmap->asce_end); 399 - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) 378 + kvm->arch.mem_limit); 379 + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) 400 380 ret = -EFAULT; 401 381 break; 402 382 default: ··· 448 428 if (get_user(new_limit, (u64 __user *)attr->addr)) 449 429 return -EFAULT; 450 430 451 - if (new_limit > kvm->arch.gmap->asce_end) 431 + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && 432 + new_limit > kvm->arch.mem_limit) 452 433 return -E2BIG; 434 + 435 + if (!new_limit) 436 + return -EINVAL; 437 + 438 + /* gmap_alloc takes last usable address */ 439 + if (new_limit != KVM_S390_NO_MEM_LIMIT) 440 + new_limit -= 1; 453 441 454 442 ret = -EBUSY; 455 443 mutex_lock(&kvm->lock); ··· 475 447 } 476 448 } 477 449 mutex_unlock(&kvm->lock); 478 - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); 450 + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); 451 + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", 452 + (void *) kvm->arch.gmap->asce); 479 453 break; 480 454 } 481 455 default: ··· 1054 1024 u8 config[128]; 1055 1025 int cc; 1056 1026 1057 - if (test_facility(2) && test_facility(12)) { 1027 + if (test_facility(12)) { 1058 1028 cc = kvm_s390_query_ap_config(config); 1059 1029 1060 1030 if (cc) ··· 1105 1075 return 0; 1106 1076 } 1107 1077 1078 + static void sca_dispose(struct kvm *kvm) 1079 + { 1080 + if (kvm->arch.use_esca) 1081 + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); 1082 + else 1083 + free_page((unsigned long)(kvm->arch.sca)); 1084 + kvm->arch.sca = NULL; 1085 + } 1086 + 1108 1087 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 1109 1088 { 1110 1089 int i, rc; ··· 1137 1098 1138 1099 rc = -ENOMEM; 1139 1100 1140 - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 1101 + kvm->arch.use_esca = 0; /* start with basic SCA */ 1102 + rwlock_init(&kvm->arch.sca_lock); 1103 + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); 1141 1104 if (!kvm->arch.sca) 1142 1105 goto out_err; 1143 1106 spin_lock(&kvm_lock); 1144 1107 sca_offset += 16; 1145 - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) 1108 + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) 1146 1109 sca_offset = 0; 1147 - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); 1110 + kvm->arch.sca = (struct bsca_block *) 1111 + ((char *) kvm->arch.sca + sca_offset); 1148 1112 spin_unlock(&kvm_lock); 1149 1113 1150 1114 sprintf(debug_name, "kvm-%u", current->pid); ··· 1199 1157 1200 1158 if (type & KVM_VM_S390_UCONTROL) { 1201 1159 kvm->arch.gmap = NULL; 1160 + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; 1202 1161 } else { 1203 - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); 1162 + if (sclp.hamax == U64_MAX) 1163 + kvm->arch.mem_limit = TASK_MAX_SIZE; 1164 + else 1165 + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, 1166 + sclp.hamax + 1); 1167 + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); 1204 1168 if (!kvm->arch.gmap) 1205 1169 goto out_err; 1206 1170 kvm->arch.gmap->private = kvm; ··· 1218 1170 kvm->arch.epoch = 0; 1219 1171 1220 1172 spin_lock_init(&kvm->arch.start_stop_lock); 1221 - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); 1173 + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); 1222 1174 1223 1175 return 0; 1224 1176 out_err: 1225 1177 kfree(kvm->arch.crypto.crycb); 1226 1178 free_page((unsigned long)kvm->arch.model.fac); 1227 1179 debug_unregister(kvm->arch.dbf); 1228 - free_page((unsigned long)(kvm->arch.sca)); 1180 + sca_dispose(kvm); 1229 1181 KVM_EVENT(3, "creation of vm failed: %d", rc); 1230 1182 return rc; 1231 1183 } ··· 1236 1188 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 1237 1189 kvm_s390_clear_local_irqs(vcpu); 1238 1190 kvm_clear_async_pf_completion_queue(vcpu); 1239 - if (!kvm_is_ucontrol(vcpu->kvm)) { 1240 - clear_bit(63 - vcpu->vcpu_id, 1241 - (unsigned long *) &vcpu->kvm->arch.sca->mcn); 1242 - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 1243 - (__u64) vcpu->arch.sie_block) 1244 - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 1245 - } 1246 - smp_mb(); 1191 + if (!kvm_is_ucontrol(vcpu->kvm)) 1192 + sca_del_vcpu(vcpu); 1247 1193 1248 1194 if (kvm_is_ucontrol(vcpu->kvm)) 1249 1195 gmap_free(vcpu->arch.gmap); ··· 1270 1228 { 1271 1229 kvm_free_vcpus(kvm); 1272 1230 free_page((unsigned long)kvm->arch.model.fac); 1273 - free_page((unsigned long)(kvm->arch.sca)); 1231 + sca_dispose(kvm); 1274 1232 debug_unregister(kvm->arch.dbf); 1275 1233 kfree(kvm->arch.crypto.crycb); 1276 1234 if (!kvm_is_ucontrol(kvm)) 1277 1235 gmap_free(kvm->arch.gmap); 1278 1236 kvm_s390_destroy_adapters(kvm); 1279 1237 kvm_s390_clear_float_irqs(kvm); 1280 - KVM_EVENT(3, "vm 0x%p destroyed", kvm); 1238 + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); 1281 1239 } 1282 1240 1283 1241 /* Section: vcpu related */ ··· 1291 1249 return 0; 1292 1250 } 1293 1251 1252 + static void sca_del_vcpu(struct kvm_vcpu *vcpu) 1253 + { 1254 + read_lock(&vcpu->kvm->arch.sca_lock); 1255 + if (vcpu->kvm->arch.use_esca) { 1256 + struct esca_block *sca = vcpu->kvm->arch.sca; 1257 + 1258 + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 1259 + sca->cpu[vcpu->vcpu_id].sda = 0; 1260 + } else { 1261 + struct bsca_block *sca = vcpu->kvm->arch.sca; 1262 + 1263 + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 1264 + sca->cpu[vcpu->vcpu_id].sda = 0; 1265 + } 1266 + read_unlock(&vcpu->kvm->arch.sca_lock); 1267 + } 1268 + 1269 + static void sca_add_vcpu(struct kvm_vcpu *vcpu) 1270 + { 1271 + read_lock(&vcpu->kvm->arch.sca_lock); 1272 + if (vcpu->kvm->arch.use_esca) { 1273 + struct esca_block *sca = vcpu->kvm->arch.sca; 1274 + 1275 + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 1276 + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 1277 + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; 1278 + vcpu->arch.sie_block->ecb2 |= 0x04U; 1279 + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); 1280 + } else { 1281 + struct bsca_block *sca = vcpu->kvm->arch.sca; 1282 + 1283 + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; 1284 + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); 1285 + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; 1286 + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); 1287 + } 1288 + read_unlock(&vcpu->kvm->arch.sca_lock); 1289 + } 1290 + 1291 + /* Basic SCA to Extended SCA data copy routines */ 1292 + static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) 1293 + { 1294 + d->sda = s->sda; 1295 + d->sigp_ctrl.c = s->sigp_ctrl.c; 1296 + d->sigp_ctrl.scn = s->sigp_ctrl.scn; 1297 + } 1298 + 1299 + static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) 1300 + { 1301 + int i; 1302 + 1303 + d->ipte_control = s->ipte_control; 1304 + d->mcn[0] = s->mcn; 1305 + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) 1306 + sca_copy_entry(&d->cpu[i], &s->cpu[i]); 1307 + } 1308 + 1309 + static int sca_switch_to_extended(struct kvm *kvm) 1310 + { 1311 + struct bsca_block *old_sca = kvm->arch.sca; 1312 + struct esca_block *new_sca; 1313 + struct kvm_vcpu *vcpu; 1314 + unsigned int vcpu_idx; 1315 + u32 scaol, scaoh; 1316 + 1317 + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); 1318 + if (!new_sca) 1319 + return -ENOMEM; 1320 + 1321 + scaoh = (u32)((u64)(new_sca) >> 32); 1322 + scaol = (u32)(u64)(new_sca) & ~0x3fU; 1323 + 1324 + kvm_s390_vcpu_block_all(kvm); 1325 + write_lock(&kvm->arch.sca_lock); 1326 + 1327 + sca_copy_b_to_e(new_sca, old_sca); 1328 + 1329 + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { 1330 + vcpu->arch.sie_block->scaoh = scaoh; 1331 + vcpu->arch.sie_block->scaol = scaol; 1332 + vcpu->arch.sie_block->ecb2 |= 0x04U; 1333 + } 1334 + kvm->arch.sca = new_sca; 1335 + kvm->arch.use_esca = 1; 1336 + 1337 + write_unlock(&kvm->arch.sca_lock); 1338 + kvm_s390_vcpu_unblock_all(kvm); 1339 + 1340 + free_page((unsigned long)old_sca); 1341 + 1342 + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", 1343 + old_sca, kvm->arch.sca); 1344 + return 0; 1345 + } 1346 + 1347 + static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) 1348 + { 1349 + int rc; 1350 + 1351 + if (id < KVM_S390_BSCA_CPU_SLOTS) 1352 + return true; 1353 + if (!sclp.has_esca) 1354 + return false; 1355 + 1356 + mutex_lock(&kvm->lock); 1357 + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); 1358 + mutex_unlock(&kvm->lock); 1359 + 1360 + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; 1361 + } 1362 + 1294 1363 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 1295 1364 { 1296 1365 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; ··· 1412 1259 KVM_SYNC_CRS | 1413 1260 KVM_SYNC_ARCH0 | 1414 1261 KVM_SYNC_PFAULT; 1262 + if (test_kvm_facility(vcpu->kvm, 64)) 1263 + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; 1415 1264 if (test_kvm_facility(vcpu->kvm, 129)) 1416 1265 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; 1417 1266 ··· 1524 1369 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; 1525 1370 preempt_enable(); 1526 1371 mutex_unlock(&vcpu->kvm->lock); 1527 - if (!kvm_is_ucontrol(vcpu->kvm)) 1372 + if (!kvm_is_ucontrol(vcpu->kvm)) { 1528 1373 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 1374 + sca_add_vcpu(vcpu); 1375 + } 1376 + 1529 1377 } 1530 1378 1531 1379 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) ··· 1597 1439 vcpu->arch.sie_block->eca |= 1; 1598 1440 if (sclp.has_sigpif) 1599 1441 vcpu->arch.sie_block->eca |= 0x10000000U; 1442 + if (test_kvm_facility(vcpu->kvm, 64)) 1443 + vcpu->arch.sie_block->ecb3 |= 0x01; 1600 1444 if (test_kvm_facility(vcpu->kvm, 129)) { 1601 1445 vcpu->arch.sie_block->eca |= 0x00020000; 1602 1446 vcpu->arch.sie_block->ecd |= 0x20000000; 1603 1447 } 1448 + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; 1604 1449 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; 1605 1450 1606 1451 if (vcpu->kvm->arch.use_cmma) { ··· 1626 1465 struct sie_page *sie_page; 1627 1466 int rc = -EINVAL; 1628 1467 1629 - if (id >= KVM_MAX_VCPUS) 1468 + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) 1630 1469 goto out; 1631 1470 1632 1471 rc = -ENOMEM; ··· 1643 1482 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 1644 1483 1645 1484 vcpu->arch.sie_block->icpua = id; 1646 - if (!kvm_is_ucontrol(kvm)) { 1647 - if (!kvm->arch.sca) { 1648 - WARN_ON_ONCE(1); 1649 - goto out_free_cpu; 1650 - } 1651 - if (!kvm->arch.sca->cpu[id].sda) 1652 - kvm->arch.sca->cpu[id].sda = 1653 - (__u64) vcpu->arch.sie_block; 1654 - vcpu->arch.sie_block->scaoh = 1655 - (__u32)(((__u64)kvm->arch.sca) >> 32); 1656 - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 1657 - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 1658 - } 1659 - 1660 1485 spin_lock_init(&vcpu->arch.local_int.lock); 1661 1486 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 1662 1487 vcpu->arch.local_int.wq = &vcpu->wq; ··· 1656 1509 */ 1657 1510 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, 1658 1511 GFP_KERNEL); 1659 - if (!vcpu->arch.guest_fpregs.fprs) { 1660 - rc = -ENOMEM; 1512 + if (!vcpu->arch.guest_fpregs.fprs) 1661 1513 goto out_free_sie_block; 1662 - } 1663 1514 1664 1515 rc = kvm_vcpu_init(vcpu, kvm, id); 1665 1516 if (rc) 1666 1517 goto out_free_sie_block; 1667 - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 1518 + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, 1668 1519 vcpu->arch.sie_block); 1669 1520 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 1670 1521 ··· 2158 2013 */ 2159 2014 kvm_check_async_pf_completion(vcpu); 2160 2015 2161 - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 2016 + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; 2017 + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; 2162 2018 2163 2019 if (need_resched()) 2164 2020 schedule(); ··· 2217 2071 2218 2072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 2219 2073 { 2220 - int rc = -1; 2221 - 2222 2074 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 2223 2075 vcpu->arch.sie_block->icptcode); 2224 2076 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); ··· 2224 2080 if (guestdbg_enabled(vcpu)) 2225 2081 kvm_s390_restore_guest_per_regs(vcpu); 2226 2082 2227 - if (exit_reason >= 0) { 2228 - rc = 0; 2083 + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; 2084 + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; 2085 + 2086 + if (vcpu->arch.sie_block->icptcode > 0) { 2087 + int rc = kvm_handle_sie_intercept(vcpu); 2088 + 2089 + if (rc != -EOPNOTSUPP) 2090 + return rc; 2091 + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; 2092 + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 2093 + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 2094 + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 2095 + return -EREMOTE; 2096 + } else if (exit_reason != -EFAULT) { 2097 + vcpu->stat.exit_null++; 2098 + return 0; 2229 2099 } else if (kvm_is_ucontrol(vcpu->kvm)) { 2230 2100 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 2231 2101 vcpu->run->s390_ucontrol.trans_exc_code = 2232 2102 current->thread.gmap_addr; 2233 2103 vcpu->run->s390_ucontrol.pgm_code = 0x10; 2234 - rc = -EREMOTE; 2235 - 2104 + return -EREMOTE; 2236 2105 } else if (current->thread.gmap_pfault) { 2237 2106 trace_kvm_s390_major_guest_pfault(vcpu); 2238 2107 current->thread.gmap_pfault = 0; 2239 - if (kvm_arch_setup_async_pf(vcpu)) { 2240 - rc = 0; 2241 - } else { 2242 - gpa_t gpa = current->thread.gmap_addr; 2243 - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); 2244 - } 2108 + if (kvm_arch_setup_async_pf(vcpu)) 2109 + return 0; 2110 + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); 2245 2111 } 2246 - 2247 - if (rc == -1) 2248 - rc = vcpu_post_run_fault_in_sie(vcpu); 2249 - 2250 - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 2251 - 2252 - if (rc == 0) { 2253 - if (kvm_is_ucontrol(vcpu->kvm)) 2254 - /* Don't exit for host interrupts. */ 2255 - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; 2256 - else 2257 - rc = kvm_handle_sie_intercept(vcpu); 2258 - } 2259 - 2260 - return rc; 2112 + return vcpu_post_run_fault_in_sie(vcpu); 2261 2113 } 2262 2114 2263 2115 static int __vcpu_run(struct kvm_vcpu *vcpu) ··· 2373 2233 rc = 0; 2374 2234 } 2375 2235 2376 - if (rc == -EOPNOTSUPP) { 2377 - /* intercept cannot be handled in-kernel, prepare kvm-run */ 2378 - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 2379 - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 2380 - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 2381 - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 2382 - rc = 0; 2383 - } 2384 - 2385 2236 if (rc == -EREMOTE) { 2386 - /* intercept was handled, but userspace support is needed 2387 - * kvm_run has been prepared by the handler */ 2237 + /* userspace support is needed, kvm_run has been prepared */ 2388 2238 rc = 0; 2389 2239 } 2390 2240 ··· 2866 2736 if (mem->memory_size & 0xffffful) 2867 2737 return -EINVAL; 2868 2738 2739 + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) 2740 + return -EINVAL; 2741 + 2869 2742 return 0; 2870 2743 } 2871 2744 ··· 2900 2767 2901 2768 static int __init kvm_s390_init(void) 2902 2769 { 2770 + if (!sclp.has_sief2) { 2771 + pr_info("SIE not available\n"); 2772 + return -ENODEV; 2773 + } 2774 + 2903 2775 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 2904 2776 } 2905 2777
+7
arch/s390/kvm/kvm-s390.h
··· 340 340 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); 341 341 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); 342 342 343 + /* support for Basic/Extended SCA handling */ 344 + static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) 345 + { 346 + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ 347 + 348 + return &sca->ipte_control; 349 + } 343 350 #endif
+3 -3
arch/s390/kvm/trace-s390.h
··· 55 55 __entry->sie_block = sie_block; 56 56 ), 57 57 58 - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, 59 - __entry->vcpu, __entry->sie_block) 58 + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", 59 + __entry->id, __entry->vcpu, __entry->sie_block) 60 60 ); 61 61 62 62 TRACE_EVENT(kvm_s390_destroy_vcpu, ··· 254 254 __entry->kvm = kvm; 255 255 ), 256 256 257 - TP_printk("enabling channel I/O support (kvm @ %p)\n", 257 + TP_printk("enabling channel I/O support (kvm @ %pK)\n", 258 258 __entry->kvm) 259 259 ); 260 260
+2 -2
arch/s390/mm/pgtable.c
··· 133 133 /** 134 134 * gmap_alloc - allocate a guest address space 135 135 * @mm: pointer to the parent mm_struct 136 - * @limit: maximum size of the gmap address space 136 + * @limit: maximum address of the gmap address space 137 137 * 138 138 * Returns a guest address space structure. 139 139 */ ··· 402 402 if ((from | to | len) & (PMD_SIZE - 1)) 403 403 return -EINVAL; 404 404 if (len == 0 || from + len < from || to + len < to || 405 - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) 405 + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 406 406 return -EINVAL; 407 407 408 408 flush = 0;
+70 -5
arch/x86/include/asm/kvm_host.h
··· 25 25 #include <linux/pvclock_gtod.h> 26 26 #include <linux/clocksource.h> 27 27 #include <linux/irqbypass.h> 28 + #include <linux/hyperv.h> 28 29 29 30 #include <asm/pvclock-abi.h> 30 31 #include <asm/desc.h> ··· 45 44 #define KVM_HALT_POLL_NS_DEFAULT 500000 46 45 47 46 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS 47 + 48 + /* x86-specific vcpu->requests bit members */ 49 + #define KVM_REQ_MIGRATE_TIMER 8 50 + #define KVM_REQ_REPORT_TPR_ACCESS 9 51 + #define KVM_REQ_TRIPLE_FAULT 10 52 + #define KVM_REQ_MMU_SYNC 11 53 + #define KVM_REQ_CLOCK_UPDATE 12 54 + #define KVM_REQ_DEACTIVATE_FPU 13 55 + #define KVM_REQ_EVENT 14 56 + #define KVM_REQ_APF_HALT 15 57 + #define KVM_REQ_STEAL_UPDATE 16 58 + #define KVM_REQ_NMI 17 59 + #define KVM_REQ_PMU 18 60 + #define KVM_REQ_PMI 19 61 + #define KVM_REQ_SMI 20 62 + #define KVM_REQ_MASTERCLOCK_UPDATE 21 63 + #define KVM_REQ_MCLOCK_INPROGRESS 22 64 + #define KVM_REQ_SCAN_IOAPIC 23 65 + #define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 66 + #define KVM_REQ_APIC_PAGE_RELOAD 25 67 + #define KVM_REQ_HV_CRASH 26 68 + #define KVM_REQ_IOAPIC_EOI_EXIT 27 69 + #define KVM_REQ_HV_RESET 28 70 + #define KVM_REQ_HV_EXIT 29 71 + #define KVM_REQ_HV_STIMER 30 48 72 49 73 #define CR0_RESERVED_BITS \ 50 74 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ ··· 239 213 }; 240 214 }; 241 215 216 + struct kvm_rmap_head { 217 + unsigned long val; 218 + }; 219 + 242 220 struct kvm_mmu_page { 243 221 struct list_head link; 244 222 struct hlist_node hash_link; ··· 260 230 bool unsync; 261 231 int root_count; /* Currently serving as active root */ 262 232 unsigned int unsync_children; 263 - unsigned long parent_ptes; /* Reverse mapping for parent_pte */ 233 + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ 264 234 265 235 /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ 266 236 unsigned long mmu_valid_gen; ··· 404 374 struct list_head head; 405 375 }; 406 376 377 + /* Hyper-V SynIC timer */ 378 + struct kvm_vcpu_hv_stimer { 379 + struct hrtimer timer; 380 + int index; 381 + u64 config; 382 + u64 count; 383 + u64 exp_time; 384 + struct hv_message msg; 385 + bool msg_pending; 386 + }; 387 + 388 + /* Hyper-V synthetic interrupt controller (SynIC)*/ 389 + struct kvm_vcpu_hv_synic { 390 + u64 version; 391 + u64 control; 392 + u64 msg_page; 393 + u64 evt_page; 394 + atomic64_t sint[HV_SYNIC_SINT_COUNT]; 395 + atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; 396 + DECLARE_BITMAP(auto_eoi_bitmap, 256); 397 + DECLARE_BITMAP(vec_bitmap, 256); 398 + bool active; 399 + }; 400 + 407 401 /* Hyper-V per vcpu emulation context */ 408 402 struct kvm_vcpu_hv { 409 403 u64 hv_vapic; 410 404 s64 runtime_offset; 405 + struct kvm_vcpu_hv_synic synic; 406 + struct kvm_hyperv_exit exit; 407 + struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; 408 + DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 411 409 }; 412 410 413 411 struct kvm_vcpu_arch { ··· 458 400 u64 efer; 459 401 u64 apic_base; 460 402 struct kvm_lapic *apic; /* kernel irqchip context */ 461 - u64 eoi_exit_bitmap[4]; 403 + bool apicv_active; 404 + DECLARE_BITMAP(ioapic_handled_vectors, 256); 462 405 unsigned long apic_attention; 463 406 int32_t apic_arb_prio; 464 407 int mp_state; ··· 648 589 }; 649 590 650 591 struct kvm_arch_memory_slot { 651 - unsigned long *rmap[KVM_NR_PAGE_SIZES]; 592 + struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; 652 593 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; 653 594 }; 654 595 ··· 890 831 void (*enable_nmi_window)(struct kvm_vcpu *vcpu); 891 832 void (*enable_irq_window)(struct kvm_vcpu *vcpu); 892 833 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 893 - int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); 834 + bool (*get_enable_apicv)(void); 835 + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); 894 836 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); 895 837 void (*hwapic_isr_update)(struct kvm *kvm, int isr); 896 - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); 838 + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 897 839 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); 898 840 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); 899 841 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); ··· 1146 1086 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, 1147 1087 struct x86_exception *exception); 1148 1088 1089 + void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); 1090 + 1149 1091 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 1150 1092 1151 1093 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, ··· 1292 1230 1293 1231 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); 1294 1232 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1233 + 1234 + void kvm_make_mclock_inprogress_request(struct kvm *kvm); 1235 + void kvm_make_scan_ioapic_request(struct kvm *kvm); 1295 1236 1296 1237 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 1297 1238 struct kvm_async_pf *work);
+92
arch/x86/include/uapi/asm/hyperv.h
··· 269 269 #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) 270 270 #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) 271 271 272 + #define HV_SYNIC_STIMER_COUNT (4) 273 + 274 + /* Define synthetic interrupt controller message constants. */ 275 + #define HV_MESSAGE_SIZE (256) 276 + #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) 277 + #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) 278 + 279 + /* Define hypervisor message types. */ 280 + enum hv_message_type { 281 + HVMSG_NONE = 0x00000000, 282 + 283 + /* Memory access messages. */ 284 + HVMSG_UNMAPPED_GPA = 0x80000000, 285 + HVMSG_GPA_INTERCEPT = 0x80000001, 286 + 287 + /* Timer notification messages. */ 288 + HVMSG_TIMER_EXPIRED = 0x80000010, 289 + 290 + /* Error messages. */ 291 + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, 292 + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, 293 + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, 294 + 295 + /* Trace buffer complete messages. */ 296 + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, 297 + 298 + /* Platform-specific processor intercept messages. */ 299 + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, 300 + HVMSG_X64_MSR_INTERCEPT = 0x80010001, 301 + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, 302 + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, 303 + HVMSG_X64_APIC_EOI = 0x80010004, 304 + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 305 + }; 306 + 307 + /* Define synthetic interrupt controller message flags. */ 308 + union hv_message_flags { 309 + __u8 asu8; 310 + struct { 311 + __u8 msg_pending:1; 312 + __u8 reserved:7; 313 + }; 314 + }; 315 + 316 + /* Define port identifier type. */ 317 + union hv_port_id { 318 + __u32 asu32; 319 + struct { 320 + __u32 id:24; 321 + __u32 reserved:8; 322 + } u; 323 + }; 324 + 325 + /* Define synthetic interrupt controller message header. */ 326 + struct hv_message_header { 327 + __u32 message_type; 328 + __u8 payload_size; 329 + union hv_message_flags message_flags; 330 + __u8 reserved[2]; 331 + union { 332 + __u64 sender; 333 + union hv_port_id port; 334 + }; 335 + }; 336 + 337 + /* Define synthetic interrupt controller message format. */ 338 + struct hv_message { 339 + struct hv_message_header header; 340 + union { 341 + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; 342 + } u; 343 + }; 344 + 345 + /* Define the synthetic interrupt message page layout. */ 346 + struct hv_message_page { 347 + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; 348 + }; 349 + 350 + /* Define timer message payload structure. */ 351 + struct hv_timer_message_payload { 352 + __u32 timer_index; 353 + __u32 reserved; 354 + __u64 expiration_time; /* When the timer expired */ 355 + __u64 delivery_time; /* When the message was delivered */ 356 + }; 357 + 358 + #define HV_STIMER_ENABLE (1ULL << 0) 359 + #define HV_STIMER_PERIODIC (1ULL << 1) 360 + #define HV_STIMER_LAZY (1ULL << 2) 361 + #define HV_STIMER_AUTOENABLE (1ULL << 3) 362 + #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) 363 + 272 364 #endif
+704 -4
arch/x86/kvm/hyperv.c
··· 23 23 24 24 #include "x86.h" 25 25 #include "lapic.h" 26 + #include "ioapic.h" 26 27 #include "hyperv.h" 27 28 28 29 #include <linux/kvm_host.h> 30 + #include <linux/highmem.h> 31 + #include <asm/apicdef.h> 29 32 #include <trace/events/kvm.h> 30 33 31 34 #include "trace.h" 35 + 36 + static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) 37 + { 38 + return atomic64_read(&synic->sint[sint]); 39 + } 40 + 41 + static inline int synic_get_sint_vector(u64 sint_value) 42 + { 43 + if (sint_value & HV_SYNIC_SINT_MASKED) 44 + return -1; 45 + return sint_value & HV_SYNIC_SINT_VECTOR_MASK; 46 + } 47 + 48 + static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, 49 + int vector) 50 + { 51 + int i; 52 + 53 + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 54 + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 55 + return true; 56 + } 57 + return false; 58 + } 59 + 60 + static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, 61 + int vector) 62 + { 63 + int i; 64 + u64 sint_value; 65 + 66 + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 67 + sint_value = synic_read_sint(synic, i); 68 + if (synic_get_sint_vector(sint_value) == vector && 69 + sint_value & HV_SYNIC_SINT_AUTO_EOI) 70 + return true; 71 + } 72 + return false; 73 + } 74 + 75 + static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, 76 + u64 data, bool host) 77 + { 78 + int vector; 79 + 80 + vector = data & HV_SYNIC_SINT_VECTOR_MASK; 81 + if (vector < 16 && !host) 82 + return 1; 83 + /* 84 + * Guest may configure multiple SINTs to use the same vector, so 85 + * we maintain a bitmap of vectors handled by synic, and a 86 + * bitmap of vectors with auto-eoi behavior. The bitmaps are 87 + * updated here, and atomically queried on fast paths. 88 + */ 89 + 90 + atomic64_set(&synic->sint[sint], data); 91 + 92 + if (synic_has_vector_connected(synic, vector)) 93 + __set_bit(vector, synic->vec_bitmap); 94 + else 95 + __clear_bit(vector, synic->vec_bitmap); 96 + 97 + if (synic_has_vector_auto_eoi(synic, vector)) 98 + __set_bit(vector, synic->auto_eoi_bitmap); 99 + else 100 + __clear_bit(vector, synic->auto_eoi_bitmap); 101 + 102 + /* Load SynIC vectors into EOI exit bitmap */ 103 + kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); 104 + return 0; 105 + } 106 + 107 + static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) 108 + { 109 + struct kvm_vcpu *vcpu; 110 + struct kvm_vcpu_hv_synic *synic; 111 + 112 + if (vcpu_id >= atomic_read(&kvm->online_vcpus)) 113 + return NULL; 114 + vcpu = kvm_get_vcpu(kvm, vcpu_id); 115 + if (!vcpu) 116 + return NULL; 117 + synic = vcpu_to_synic(vcpu); 118 + return (synic->active) ? synic : NULL; 119 + } 120 + 121 + static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, 122 + u32 sint) 123 + { 124 + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 125 + struct page *page; 126 + gpa_t gpa; 127 + struct hv_message *msg; 128 + struct hv_message_page *msg_page; 129 + 130 + gpa = synic->msg_page & PAGE_MASK; 131 + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 132 + if (is_error_page(page)) { 133 + vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", 134 + gpa); 135 + return; 136 + } 137 + msg_page = kmap_atomic(page); 138 + 139 + msg = &msg_page->sint_message[sint]; 140 + msg->header.message_flags.msg_pending = 0; 141 + 142 + kunmap_atomic(msg_page); 143 + kvm_release_page_dirty(page); 144 + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 145 + } 146 + 147 + static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) 148 + { 149 + struct kvm *kvm = vcpu->kvm; 150 + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 151 + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 152 + struct kvm_vcpu_hv_stimer *stimer; 153 + int gsi, idx, stimers_pending; 154 + 155 + trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); 156 + 157 + if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) 158 + synic_clear_sint_msg_pending(synic, sint); 159 + 160 + /* Try to deliver pending Hyper-V SynIC timers messages */ 161 + stimers_pending = 0; 162 + for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { 163 + stimer = &hv_vcpu->stimer[idx]; 164 + if (stimer->msg_pending && 165 + (stimer->config & HV_STIMER_ENABLE) && 166 + HV_STIMER_SINT(stimer->config) == sint) { 167 + set_bit(stimer->index, 168 + hv_vcpu->stimer_pending_bitmap); 169 + stimers_pending++; 170 + } 171 + } 172 + if (stimers_pending) 173 + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 174 + 175 + idx = srcu_read_lock(&kvm->irq_srcu); 176 + gsi = atomic_read(&synic->sint_to_gsi[sint]); 177 + if (gsi != -1) 178 + kvm_notify_acked_gsi(kvm, gsi); 179 + srcu_read_unlock(&kvm->irq_srcu, idx); 180 + } 181 + 182 + static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) 183 + { 184 + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 185 + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; 186 + 187 + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; 188 + hv_vcpu->exit.u.synic.msr = msr; 189 + hv_vcpu->exit.u.synic.control = synic->control; 190 + hv_vcpu->exit.u.synic.evt_page = synic->evt_page; 191 + hv_vcpu->exit.u.synic.msg_page = synic->msg_page; 192 + 193 + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); 194 + } 195 + 196 + static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, 197 + u32 msr, u64 data, bool host) 198 + { 199 + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 200 + int ret; 201 + 202 + if (!synic->active) 203 + return 1; 204 + 205 + trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); 206 + 207 + ret = 0; 208 + switch (msr) { 209 + case HV_X64_MSR_SCONTROL: 210 + synic->control = data; 211 + if (!host) 212 + synic_exit(synic, msr); 213 + break; 214 + case HV_X64_MSR_SVERSION: 215 + if (!host) { 216 + ret = 1; 217 + break; 218 + } 219 + synic->version = data; 220 + break; 221 + case HV_X64_MSR_SIEFP: 222 + if (data & HV_SYNIC_SIEFP_ENABLE) 223 + if (kvm_clear_guest(vcpu->kvm, 224 + data & PAGE_MASK, PAGE_SIZE)) { 225 + ret = 1; 226 + break; 227 + } 228 + synic->evt_page = data; 229 + if (!host) 230 + synic_exit(synic, msr); 231 + break; 232 + case HV_X64_MSR_SIMP: 233 + if (data & HV_SYNIC_SIMP_ENABLE) 234 + if (kvm_clear_guest(vcpu->kvm, 235 + data & PAGE_MASK, PAGE_SIZE)) { 236 + ret = 1; 237 + break; 238 + } 239 + synic->msg_page = data; 240 + if (!host) 241 + synic_exit(synic, msr); 242 + break; 243 + case HV_X64_MSR_EOM: { 244 + int i; 245 + 246 + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 247 + kvm_hv_notify_acked_sint(vcpu, i); 248 + break; 249 + } 250 + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 251 + ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); 252 + break; 253 + default: 254 + ret = 1; 255 + break; 256 + } 257 + return ret; 258 + } 259 + 260 + static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) 261 + { 262 + int ret; 263 + 264 + if (!synic->active) 265 + return 1; 266 + 267 + ret = 0; 268 + switch (msr) { 269 + case HV_X64_MSR_SCONTROL: 270 + *pdata = synic->control; 271 + break; 272 + case HV_X64_MSR_SVERSION: 273 + *pdata = synic->version; 274 + break; 275 + case HV_X64_MSR_SIEFP: 276 + *pdata = synic->evt_page; 277 + break; 278 + case HV_X64_MSR_SIMP: 279 + *pdata = synic->msg_page; 280 + break; 281 + case HV_X64_MSR_EOM: 282 + *pdata = 0; 283 + break; 284 + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 285 + *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); 286 + break; 287 + default: 288 + ret = 1; 289 + break; 290 + } 291 + return ret; 292 + } 293 + 294 + int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) 295 + { 296 + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 297 + struct kvm_lapic_irq irq; 298 + int ret, vector; 299 + 300 + if (sint >= ARRAY_SIZE(synic->sint)) 301 + return -EINVAL; 302 + 303 + vector = synic_get_sint_vector(synic_read_sint(synic, sint)); 304 + if (vector < 0) 305 + return -ENOENT; 306 + 307 + memset(&irq, 0, sizeof(irq)); 308 + irq.dest_id = kvm_apic_id(vcpu->arch.apic); 309 + irq.dest_mode = APIC_DEST_PHYSICAL; 310 + irq.delivery_mode = APIC_DM_FIXED; 311 + irq.vector = vector; 312 + irq.level = 1; 313 + 314 + ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); 315 + trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); 316 + return ret; 317 + } 318 + 319 + int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) 320 + { 321 + struct kvm_vcpu_hv_synic *synic; 322 + 323 + synic = synic_get(kvm, vcpu_id); 324 + if (!synic) 325 + return -EINVAL; 326 + 327 + return synic_set_irq(synic, sint); 328 + } 329 + 330 + void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) 331 + { 332 + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); 333 + int i; 334 + 335 + trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); 336 + 337 + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) 338 + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) 339 + kvm_hv_notify_acked_sint(vcpu, i); 340 + } 341 + 342 + static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) 343 + { 344 + struct kvm_vcpu_hv_synic *synic; 345 + 346 + synic = synic_get(kvm, vcpu_id); 347 + if (!synic) 348 + return -EINVAL; 349 + 350 + if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) 351 + return -EINVAL; 352 + 353 + atomic_set(&synic->sint_to_gsi[sint], gsi); 354 + return 0; 355 + } 356 + 357 + void kvm_hv_irq_routing_update(struct kvm *kvm) 358 + { 359 + struct kvm_irq_routing_table *irq_rt; 360 + struct kvm_kernel_irq_routing_entry *e; 361 + u32 gsi; 362 + 363 + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, 364 + lockdep_is_held(&kvm->irq_lock)); 365 + 366 + for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { 367 + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { 368 + if (e->type == KVM_IRQ_ROUTING_HV_SINT) 369 + kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, 370 + e->hv_sint.sint, gsi); 371 + } 372 + } 373 + } 374 + 375 + static void synic_init(struct kvm_vcpu_hv_synic *synic) 376 + { 377 + int i; 378 + 379 + memset(synic, 0, sizeof(*synic)); 380 + synic->version = HV_SYNIC_VERSION_1; 381 + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { 382 + atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); 383 + atomic_set(&synic->sint_to_gsi[i], -1); 384 + } 385 + } 386 + 387 + static u64 get_time_ref_counter(struct kvm *kvm) 388 + { 389 + return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); 390 + } 391 + 392 + static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, 393 + bool vcpu_kick) 394 + { 395 + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 396 + 397 + set_bit(stimer->index, 398 + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 399 + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); 400 + if (vcpu_kick) 401 + kvm_vcpu_kick(vcpu); 402 + } 403 + 404 + static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) 405 + { 406 + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 407 + 408 + trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, 409 + stimer->index); 410 + 411 + hrtimer_cancel(&stimer->timer); 412 + clear_bit(stimer->index, 413 + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); 414 + stimer->msg_pending = false; 415 + stimer->exp_time = 0; 416 + } 417 + 418 + static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) 419 + { 420 + struct kvm_vcpu_hv_stimer *stimer; 421 + 422 + stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); 423 + trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, 424 + stimer->index); 425 + stimer_mark_pending(stimer, true); 426 + 427 + return HRTIMER_NORESTART; 428 + } 429 + 430 + /* 431 + * stimer_start() assumptions: 432 + * a) stimer->count is not equal to 0 433 + * b) stimer->config has HV_STIMER_ENABLE flag 434 + */ 435 + static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) 436 + { 437 + u64 time_now; 438 + ktime_t ktime_now; 439 + 440 + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); 441 + ktime_now = ktime_get(); 442 + 443 + if (stimer->config & HV_STIMER_PERIODIC) { 444 + if (stimer->exp_time) { 445 + if (time_now >= stimer->exp_time) { 446 + u64 remainder; 447 + 448 + div64_u64_rem(time_now - stimer->exp_time, 449 + stimer->count, &remainder); 450 + stimer->exp_time = 451 + time_now + (stimer->count - remainder); 452 + } 453 + } else 454 + stimer->exp_time = time_now + stimer->count; 455 + 456 + trace_kvm_hv_stimer_start_periodic( 457 + stimer_to_vcpu(stimer)->vcpu_id, 458 + stimer->index, 459 + time_now, stimer->exp_time); 460 + 461 + hrtimer_start(&stimer->timer, 462 + ktime_add_ns(ktime_now, 463 + 100 * (stimer->exp_time - time_now)), 464 + HRTIMER_MODE_ABS); 465 + return 0; 466 + } 467 + stimer->exp_time = stimer->count; 468 + if (time_now >= stimer->count) { 469 + /* 470 + * Expire timer according to Hypervisor Top-Level Functional 471 + * specification v4(15.3.1): 472 + * "If a one shot is enabled and the specified count is in 473 + * the past, it will expire immediately." 474 + */ 475 + stimer_mark_pending(stimer, false); 476 + return 0; 477 + } 478 + 479 + trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, 480 + stimer->index, 481 + time_now, stimer->count); 482 + 483 + hrtimer_start(&stimer->timer, 484 + ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), 485 + HRTIMER_MODE_ABS); 486 + return 0; 487 + } 488 + 489 + static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, 490 + bool host) 491 + { 492 + trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, 493 + stimer->index, config, host); 494 + 495 + stimer_cleanup(stimer); 496 + if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) 497 + config &= ~HV_STIMER_ENABLE; 498 + stimer->config = config; 499 + stimer_mark_pending(stimer, false); 500 + return 0; 501 + } 502 + 503 + static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, 504 + bool host) 505 + { 506 + trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, 507 + stimer->index, count, host); 508 + 509 + stimer_cleanup(stimer); 510 + stimer->count = count; 511 + if (stimer->count == 0) 512 + stimer->config &= ~HV_STIMER_ENABLE; 513 + else if (stimer->config & HV_STIMER_AUTOENABLE) 514 + stimer->config |= HV_STIMER_ENABLE; 515 + stimer_mark_pending(stimer, false); 516 + return 0; 517 + } 518 + 519 + static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) 520 + { 521 + *pconfig = stimer->config; 522 + return 0; 523 + } 524 + 525 + static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) 526 + { 527 + *pcount = stimer->count; 528 + return 0; 529 + } 530 + 531 + static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, 532 + struct hv_message *src_msg) 533 + { 534 + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); 535 + struct page *page; 536 + gpa_t gpa; 537 + struct hv_message *dst_msg; 538 + int r; 539 + struct hv_message_page *msg_page; 540 + 541 + if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) 542 + return -ENOENT; 543 + 544 + gpa = synic->msg_page & PAGE_MASK; 545 + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 546 + if (is_error_page(page)) 547 + return -EFAULT; 548 + 549 + msg_page = kmap_atomic(page); 550 + dst_msg = &msg_page->sint_message[sint]; 551 + if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, 552 + src_msg->header.message_type) != HVMSG_NONE) { 553 + dst_msg->header.message_flags.msg_pending = 1; 554 + r = -EAGAIN; 555 + } else { 556 + memcpy(&dst_msg->u.payload, &src_msg->u.payload, 557 + src_msg->header.payload_size); 558 + dst_msg->header.message_type = src_msg->header.message_type; 559 + dst_msg->header.payload_size = src_msg->header.payload_size; 560 + r = synic_set_irq(synic, sint); 561 + if (r >= 1) 562 + r = 0; 563 + else if (r == 0) 564 + r = -EFAULT; 565 + } 566 + kunmap_atomic(msg_page); 567 + kvm_release_page_dirty(page); 568 + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); 569 + return r; 570 + } 571 + 572 + static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) 573 + { 574 + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); 575 + struct hv_message *msg = &stimer->msg; 576 + struct hv_timer_message_payload *payload = 577 + (struct hv_timer_message_payload *)&msg->u.payload; 578 + 579 + payload->expiration_time = stimer->exp_time; 580 + payload->delivery_time = get_time_ref_counter(vcpu->kvm); 581 + return synic_deliver_msg(vcpu_to_synic(vcpu), 582 + HV_STIMER_SINT(stimer->config), msg); 583 + } 584 + 585 + static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) 586 + { 587 + int r; 588 + 589 + stimer->msg_pending = true; 590 + r = stimer_send_msg(stimer); 591 + trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, 592 + stimer->index, r); 593 + if (!r) { 594 + stimer->msg_pending = false; 595 + if (!(stimer->config & HV_STIMER_PERIODIC)) 596 + stimer->config &= ~HV_STIMER_ENABLE; 597 + } 598 + } 599 + 600 + void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) 601 + { 602 + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 603 + struct kvm_vcpu_hv_stimer *stimer; 604 + u64 time_now, exp_time; 605 + int i; 606 + 607 + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 608 + if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { 609 + stimer = &hv_vcpu->stimer[i]; 610 + if (stimer->config & HV_STIMER_ENABLE) { 611 + exp_time = stimer->exp_time; 612 + 613 + if (exp_time) { 614 + time_now = 615 + get_time_ref_counter(vcpu->kvm); 616 + if (time_now >= exp_time) 617 + stimer_expiration(stimer); 618 + } 619 + 620 + if ((stimer->config & HV_STIMER_ENABLE) && 621 + stimer->count) 622 + stimer_start(stimer); 623 + else 624 + stimer_cleanup(stimer); 625 + } 626 + } 627 + } 628 + 629 + void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) 630 + { 631 + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 632 + int i; 633 + 634 + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 635 + stimer_cleanup(&hv_vcpu->stimer[i]); 636 + } 637 + 638 + static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) 639 + { 640 + struct hv_message *msg = &stimer->msg; 641 + struct hv_timer_message_payload *payload = 642 + (struct hv_timer_message_payload *)&msg->u.payload; 643 + 644 + memset(&msg->header, 0, sizeof(msg->header)); 645 + msg->header.message_type = HVMSG_TIMER_EXPIRED; 646 + msg->header.payload_size = sizeof(*payload); 647 + 648 + payload->timer_index = stimer->index; 649 + payload->expiration_time = 0; 650 + payload->delivery_time = 0; 651 + } 652 + 653 + static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) 654 + { 655 + memset(stimer, 0, sizeof(*stimer)); 656 + stimer->index = timer_index; 657 + hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 658 + stimer->timer.function = stimer_timer_callback; 659 + stimer_prepare_msg(stimer); 660 + } 661 + 662 + void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) 663 + { 664 + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); 665 + int i; 666 + 667 + synic_init(&hv_vcpu->synic); 668 + 669 + bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); 670 + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) 671 + stimer_init(&hv_vcpu->stimer[i], i); 672 + } 673 + 674 + int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) 675 + { 676 + /* 677 + * Hyper-V SynIC auto EOI SINT's are 678 + * not compatible with APICV, so deactivate APICV 679 + */ 680 + kvm_vcpu_deactivate_apicv(vcpu); 681 + vcpu_to_synic(vcpu)->active = true; 682 + return 0; 683 + } 32 684 33 685 static bool kvm_hv_msr_partition_wide(u32 msr) 34 686 { ··· 878 226 return 1; 879 227 hv->runtime_offset = data - current_task_runtime_100ns(); 880 228 break; 229 + case HV_X64_MSR_SCONTROL: 230 + case HV_X64_MSR_SVERSION: 231 + case HV_X64_MSR_SIEFP: 232 + case HV_X64_MSR_SIMP: 233 + case HV_X64_MSR_EOM: 234 + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 235 + return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); 236 + case HV_X64_MSR_STIMER0_CONFIG: 237 + case HV_X64_MSR_STIMER1_CONFIG: 238 + case HV_X64_MSR_STIMER2_CONFIG: 239 + case HV_X64_MSR_STIMER3_CONFIG: { 240 + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 241 + 242 + return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), 243 + data, host); 244 + } 245 + case HV_X64_MSR_STIMER0_COUNT: 246 + case HV_X64_MSR_STIMER1_COUNT: 247 + case HV_X64_MSR_STIMER2_COUNT: 248 + case HV_X64_MSR_STIMER3_COUNT: { 249 + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 250 + 251 + return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), 252 + data, host); 253 + } 881 254 default: 882 255 vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", 883 256 msr, data); ··· 925 248 case HV_X64_MSR_HYPERCALL: 926 249 data = hv->hv_hypercall; 927 250 break; 928 - case HV_X64_MSR_TIME_REF_COUNT: { 929 - data = 930 - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); 251 + case HV_X64_MSR_TIME_REF_COUNT: 252 + data = get_time_ref_counter(kvm); 931 253 break; 932 - } 933 254 case HV_X64_MSR_REFERENCE_TSC: 934 255 data = hv->hv_tsc_page; 935 256 break; ··· 979 304 case HV_X64_MSR_VP_RUNTIME: 980 305 data = current_task_runtime_100ns() + hv->runtime_offset; 981 306 break; 307 + case HV_X64_MSR_SCONTROL: 308 + case HV_X64_MSR_SVERSION: 309 + case HV_X64_MSR_SIEFP: 310 + case HV_X64_MSR_SIMP: 311 + case HV_X64_MSR_EOM: 312 + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: 313 + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); 314 + case HV_X64_MSR_STIMER0_CONFIG: 315 + case HV_X64_MSR_STIMER1_CONFIG: 316 + case HV_X64_MSR_STIMER2_CONFIG: 317 + case HV_X64_MSR_STIMER3_CONFIG: { 318 + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; 319 + 320 + return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), 321 + pdata); 322 + } 323 + case HV_X64_MSR_STIMER0_COUNT: 324 + case HV_X64_MSR_STIMER1_COUNT: 325 + case HV_X64_MSR_STIMER2_COUNT: 326 + case HV_X64_MSR_STIMER3_COUNT: { 327 + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; 328 + 329 + return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), 330 + pdata); 331 + } 982 332 default: 983 333 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 984 334 return 1;
+55
arch/x86/kvm/hyperv.h
··· 24 24 #ifndef __ARCH_X86_KVM_HYPERV_H__ 25 25 #define __ARCH_X86_KVM_HYPERV_H__ 26 26 27 + static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) 28 + { 29 + return &vcpu->arch.hyperv; 30 + } 31 + 32 + static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu) 33 + { 34 + struct kvm_vcpu_arch *arch; 35 + 36 + arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv); 37 + return container_of(arch, struct kvm_vcpu, arch); 38 + } 39 + 40 + static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) 41 + { 42 + return &vcpu->arch.hyperv.synic; 43 + } 44 + 45 + static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) 46 + { 47 + return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); 48 + } 49 + 27 50 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); 28 51 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 52 + 29 53 bool kvm_hv_hypercall_enabled(struct kvm *kvm); 30 54 int kvm_hv_hypercall(struct kvm_vcpu *vcpu); 55 + 56 + void kvm_hv_irq_routing_update(struct kvm *kvm); 57 + int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); 58 + void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); 59 + int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); 60 + 61 + void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); 62 + void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); 63 + 64 + static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, 65 + int timer_index) 66 + { 67 + return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index]; 68 + } 69 + 70 + static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer) 71 + { 72 + struct kvm_vcpu_hv *hv_vcpu; 73 + 74 + hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv, 75 + stimer[0]); 76 + return hv_vcpu_to_vcpu(hv_vcpu); 77 + } 78 + 79 + static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) 80 + { 81 + return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap, 82 + HV_SYNIC_STIMER_COUNT); 83 + } 84 + 85 + void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); 31 86 32 87 #endif
+2 -2
arch/x86/kvm/ioapic.c
··· 233 233 } 234 234 235 235 236 - void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 236 + void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) 237 237 { 238 238 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; 239 239 union kvm_ioapic_redirect_entry *e; ··· 250 250 (e->fields.trig_mode == IOAPIC_EDGE_TRIG && 251 251 kvm_apic_pending_eoi(vcpu, e->fields.vector))) 252 252 __set_bit(e->fields.vector, 253 - (unsigned long *)eoi_exit_bitmap); 253 + ioapic_handled_vectors); 254 254 } 255 255 } 256 256 spin_unlock(&ioapic->lock);
+4 -3
arch/x86/kvm/ioapic.h
··· 121 121 struct kvm_lapic_irq *irq, unsigned long *dest_map); 122 122 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 123 123 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); 124 - void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 125 - void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 126 - 124 + void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, 125 + ulong *ioapic_handled_vectors); 126 + void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, 127 + ulong *ioapic_handled_vectors); 127 128 #endif
+1 -1
arch/x86/kvm/irq.c
··· 76 76 if (kvm_cpu_has_extint(v)) 77 77 return 1; 78 78 79 - if (kvm_vcpu_apic_vid_enabled(v)) 79 + if (kvm_vcpu_apicv_active(v)) 80 80 return 0; 81 81 82 82 return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
+38 -3
arch/x86/kvm/irq_comm.c
··· 33 33 34 34 #include "lapic.h" 35 35 36 + #include "hyperv.h" 37 + 36 38 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, 37 39 struct kvm *kvm, int irq_source_id, int level, 38 40 bool line_status) ··· 221 219 srcu_read_unlock(&kvm->irq_srcu, idx); 222 220 } 223 221 222 + static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, 223 + struct kvm *kvm, int irq_source_id, int level, 224 + bool line_status) 225 + { 226 + if (!level) 227 + return -1; 228 + 229 + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); 230 + } 231 + 224 232 int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, 225 233 const struct kvm_irq_routing_entry *ue) 226 234 { ··· 268 256 e->msi.address_lo = ue->u.msi.address_lo; 269 257 e->msi.address_hi = ue->u.msi.address_hi; 270 258 e->msi.data = ue->u.msi.data; 259 + break; 260 + case KVM_IRQ_ROUTING_HV_SINT: 261 + e->set = kvm_hv_set_sint; 262 + e->hv_sint.vcpu = ue->u.hv_sint.vcpu; 263 + e->hv_sint.sint = ue->u.hv_sint.sint; 271 264 break; 272 265 default: 273 266 goto out; ··· 349 332 return kvm_set_irq_routing(kvm, empty_routing, 0, 0); 350 333 } 351 334 352 - void kvm_arch_irq_routing_update(struct kvm *kvm) 335 + void kvm_arch_post_irq_routing_update(struct kvm *kvm) 353 336 { 354 337 if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) 355 338 return; 356 339 kvm_make_scan_ioapic_request(kvm); 357 340 } 358 341 359 - void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 342 + void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, 343 + ulong *ioapic_handled_vectors) 360 344 { 361 345 struct kvm *kvm = vcpu->kvm; 362 346 struct kvm_kernel_irq_routing_entry *entry; ··· 387 369 u32 vector = entry->msi.data & 0xff; 388 370 389 371 __set_bit(vector, 390 - (unsigned long *) eoi_exit_bitmap); 372 + ioapic_handled_vectors); 391 373 } 392 374 } 393 375 } 394 376 srcu_read_unlock(&kvm->irq_srcu, idx); 377 + } 378 + 379 + int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, 380 + int irq_source_id, int level, bool line_status) 381 + { 382 + switch (irq->type) { 383 + case KVM_IRQ_ROUTING_HV_SINT: 384 + return kvm_hv_set_sint(irq, kvm, irq_source_id, level, 385 + line_status); 386 + default: 387 + return -EWOULDBLOCK; 388 + } 389 + } 390 + 391 + void kvm_arch_irq_routing_update(struct kvm *kvm) 392 + { 393 + kvm_hv_irq_routing_update(kvm); 395 394 }
+23 -17
arch/x86/kvm/lapic.c
··· 41 41 #include "trace.h" 42 42 #include "x86.h" 43 43 #include "cpuid.h" 44 + #include "hyperv.h" 44 45 45 46 #ifndef CONFIG_X86_64 46 47 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) ··· 128 127 #define LINT_MASK \ 129 128 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 130 129 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 131 - 132 - static inline int kvm_apic_id(struct kvm_lapic *apic) 133 - { 134 - return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 135 - } 136 130 137 131 /* The logical map is definitely wrong if we have multiple 138 132 * modes at the same time. (Physical map is always right.) ··· 375 379 if (!apic->irr_pending) 376 380 return -1; 377 381 378 - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 382 + if (apic->vcpu->arch.apicv_active) 383 + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 379 384 result = apic_search_irr(apic); 380 385 ASSERT(result == -1 || result >= 16); 381 386 ··· 389 392 390 393 vcpu = apic->vcpu; 391 394 392 - if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { 395 + if (unlikely(vcpu->arch.apicv_active)) { 393 396 /* try to update RVI */ 394 397 apic_clear_vector(vec, apic->regs + APIC_IRR); 395 398 kvm_make_request(KVM_REQ_EVENT, vcpu); ··· 415 418 * because the processor can modify ISR under the hood. Instead 416 419 * just set SVI. 417 420 */ 418 - if (unlikely(kvm_x86_ops->hwapic_isr_update)) 421 + if (unlikely(vcpu->arch.apicv_active)) 419 422 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); 420 423 else { 421 424 ++apic->isr_count; ··· 463 466 * on the other hand isr_count and highest_isr_cache are unused 464 467 * and must be left alone. 465 468 */ 466 - if (unlikely(kvm_x86_ops->hwapic_isr_update)) 469 + if (unlikely(vcpu->arch.apicv_active)) 467 470 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, 468 471 apic_find_highest_isr(apic)); 469 472 else { ··· 849 852 apic_clear_vector(vector, apic->regs + APIC_TMR); 850 853 } 851 854 852 - if (kvm_x86_ops->deliver_posted_interrupt) 855 + if (vcpu->arch.apicv_active) 853 856 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 854 857 else { 855 858 apic_set_irr(vector, apic); ··· 929 932 930 933 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 931 934 { 932 - return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); 935 + return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 933 936 } 934 937 935 938 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) ··· 970 973 971 974 apic_clear_isr(vector, apic); 972 975 apic_update_ppr(apic); 976 + 977 + if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) 978 + kvm_hv_synic_send_eoi(apic->vcpu, vector); 973 979 974 980 kvm_ioapic_send_eoi(apic, vector); 975 981 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); ··· 1225 1225 int vec = reg & APIC_VECTOR_MASK; 1226 1226 void *bitmap = apic->regs + APIC_ISR; 1227 1227 1228 - if (kvm_x86_ops->deliver_posted_interrupt) 1228 + if (vcpu->arch.apicv_active) 1229 1229 bitmap = apic->regs + APIC_IRR; 1230 1230 1231 1231 if (apic_test_vector(vec, bitmap)) ··· 1693 1693 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1694 1694 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1695 1695 } 1696 - apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); 1697 - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; 1696 + apic->irr_pending = vcpu->arch.apicv_active; 1697 + apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; 1698 1698 apic->highest_isr_cache = -1; 1699 1699 update_divide_count(apic); 1700 1700 atomic_set(&apic->lapic_timer.pending, 0); ··· 1883 1883 apic_set_isr(vector, apic); 1884 1884 apic_update_ppr(apic); 1885 1885 apic_clear_irr(vector, apic); 1886 + 1887 + if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { 1888 + apic_clear_isr(vector, apic); 1889 + apic_update_ppr(apic); 1890 + } 1891 + 1886 1892 return vector; 1887 1893 } 1888 1894 ··· 1912 1906 update_divide_count(apic); 1913 1907 start_apic_timer(apic); 1914 1908 apic->irr_pending = true; 1915 - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1909 + apic->isr_count = vcpu->arch.apicv_active ? 1916 1910 1 : count_vectors(apic->regs + APIC_ISR); 1917 1911 apic->highest_isr_cache = -1; 1918 - if (kvm_x86_ops->hwapic_irr_update) 1912 + if (vcpu->arch.apicv_active) { 1919 1913 kvm_x86_ops->hwapic_irr_update(vcpu, 1920 1914 apic_find_highest_irr(apic)); 1921 - if (unlikely(kvm_x86_ops->hwapic_isr_update)) 1922 1915 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, 1923 1916 apic_find_highest_isr(apic)); 1917 + } 1924 1918 kvm_make_request(KVM_REQ_EVENT, vcpu); 1925 1919 if (ioapic_in_kernel(vcpu->kvm)) 1926 1920 kvm_rtc_eoi_tracking_restore_one(vcpu);
+7 -2
arch/x86/kvm/lapic.h
··· 143 143 return apic->vcpu->arch.apic_base & X2APIC_ENABLE; 144 144 } 145 145 146 - static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) 146 + static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) 147 147 { 148 - return kvm_x86_ops->cpu_uses_apicv(vcpu); 148 + return vcpu->arch.apic && vcpu->arch.apicv_active; 149 149 } 150 150 151 151 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) ··· 162 162 static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) 163 163 { 164 164 return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); 165 + } 166 + 167 + static inline int kvm_apic_id(struct kvm_lapic *apic) 168 + { 169 + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 165 170 } 166 171 167 172 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
+183 -214
arch/x86/kvm/mmu.c
··· 311 311 return pte & PT_PAGE_SIZE_MASK; 312 312 } 313 313 314 - static int is_rmap_spte(u64 pte) 315 - { 316 - return is_shadow_present_pte(pte); 317 - } 318 - 319 314 static int is_last_spte(u64 pte, int level) 320 315 { 321 316 if (level == PT_PAGE_TABLE_LEVEL) ··· 535 540 u64 old_spte = *sptep; 536 541 bool ret = false; 537 542 538 - WARN_ON(!is_rmap_spte(new_spte)); 543 + WARN_ON(!is_shadow_present_pte(new_spte)); 539 544 540 545 if (!is_shadow_present_pte(old_spte)) { 541 546 mmu_spte_set(sptep, new_spte); ··· 590 595 else 591 596 old_spte = __update_clear_spte_slow(sptep, 0ull); 592 597 593 - if (!is_rmap_spte(old_spte)) 598 + if (!is_shadow_present_pte(old_spte)) 594 599 return 0; 595 600 596 601 pfn = spte_to_pfn(old_spte); ··· 904 909 } 905 910 906 911 /* 907 - * Pte mapping structures: 912 + * About rmap_head encoding: 908 913 * 909 - * If pte_list bit zero is zero, then pte_list point to the spte. 910 - * 911 - * If pte_list bit zero is one, (then pte_list & ~1) points to a struct 914 + * If the bit zero of rmap_head->val is clear, then it points to the only spte 915 + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct 912 916 * pte_list_desc containing more mappings. 913 - * 914 - * Returns the number of pte entries before the spte was added or zero if 915 - * the spte was not added. 916 - * 917 + */ 918 + 919 + /* 920 + * Returns the number of pointers in the rmap chain, not counting the new one. 917 921 */ 918 922 static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, 919 - unsigned long *pte_list) 923 + struct kvm_rmap_head *rmap_head) 920 924 { 921 925 struct pte_list_desc *desc; 922 926 int i, count = 0; 923 927 924 - if (!*pte_list) { 928 + if (!rmap_head->val) { 925 929 rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); 926 - *pte_list = (unsigned long)spte; 927 - } else if (!(*pte_list & 1)) { 930 + rmap_head->val = (unsigned long)spte; 931 + } else if (!(rmap_head->val & 1)) { 928 932 rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); 929 933 desc = mmu_alloc_pte_list_desc(vcpu); 930 - desc->sptes[0] = (u64 *)*pte_list; 934 + desc->sptes[0] = (u64 *)rmap_head->val; 931 935 desc->sptes[1] = spte; 932 - *pte_list = (unsigned long)desc | 1; 936 + rmap_head->val = (unsigned long)desc | 1; 933 937 ++count; 934 938 } else { 935 939 rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); 936 - desc = (struct pte_list_desc *)(*pte_list & ~1ul); 940 + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); 937 941 while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { 938 942 desc = desc->more; 939 943 count += PTE_LIST_EXT; ··· 949 955 } 950 956 951 957 static void 952 - pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, 953 - int i, struct pte_list_desc *prev_desc) 958 + pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, 959 + struct pte_list_desc *desc, int i, 960 + struct pte_list_desc *prev_desc) 954 961 { 955 962 int j; 956 963 ··· 962 967 if (j != 0) 963 968 return; 964 969 if (!prev_desc && !desc->more) 965 - *pte_list = (unsigned long)desc->sptes[0]; 970 + rmap_head->val = (unsigned long)desc->sptes[0]; 966 971 else 967 972 if (prev_desc) 968 973 prev_desc->more = desc->more; 969 974 else 970 - *pte_list = (unsigned long)desc->more | 1; 975 + rmap_head->val = (unsigned long)desc->more | 1; 971 976 mmu_free_pte_list_desc(desc); 972 977 } 973 978 974 - static void pte_list_remove(u64 *spte, unsigned long *pte_list) 979 + static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) 975 980 { 976 981 struct pte_list_desc *desc; 977 982 struct pte_list_desc *prev_desc; 978 983 int i; 979 984 980 - if (!*pte_list) { 985 + if (!rmap_head->val) { 981 986 printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); 982 987 BUG(); 983 - } else if (!(*pte_list & 1)) { 988 + } else if (!(rmap_head->val & 1)) { 984 989 rmap_printk("pte_list_remove: %p 1->0\n", spte); 985 - if ((u64 *)*pte_list != spte) { 990 + if ((u64 *)rmap_head->val != spte) { 986 991 printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); 987 992 BUG(); 988 993 } 989 - *pte_list = 0; 994 + rmap_head->val = 0; 990 995 } else { 991 996 rmap_printk("pte_list_remove: %p many->many\n", spte); 992 - desc = (struct pte_list_desc *)(*pte_list & ~1ul); 997 + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); 993 998 prev_desc = NULL; 994 999 while (desc) { 995 - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) 1000 + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { 996 1001 if (desc->sptes[i] == spte) { 997 - pte_list_desc_remove_entry(pte_list, 998 - desc, i, 999 - prev_desc); 1002 + pte_list_desc_remove_entry(rmap_head, 1003 + desc, i, prev_desc); 1000 1004 return; 1001 1005 } 1006 + } 1002 1007 prev_desc = desc; 1003 1008 desc = desc->more; 1004 1009 } ··· 1007 1012 } 1008 1013 } 1009 1014 1010 - typedef void (*pte_list_walk_fn) (u64 *spte); 1011 - static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) 1012 - { 1013 - struct pte_list_desc *desc; 1014 - int i; 1015 - 1016 - if (!*pte_list) 1017 - return; 1018 - 1019 - if (!(*pte_list & 1)) 1020 - return fn((u64 *)*pte_list); 1021 - 1022 - desc = (struct pte_list_desc *)(*pte_list & ~1ul); 1023 - while (desc) { 1024 - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) 1025 - fn(desc->sptes[i]); 1026 - desc = desc->more; 1027 - } 1028 - } 1029 - 1030 - static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, 1031 - struct kvm_memory_slot *slot) 1015 + static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, 1016 + struct kvm_memory_slot *slot) 1032 1017 { 1033 1018 unsigned long idx; 1034 1019 ··· 1016 1041 return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; 1017 1042 } 1018 1043 1019 - /* 1020 - * Take gfn and return the reverse mapping to it. 1021 - */ 1022 - static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp) 1044 + static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, 1045 + struct kvm_mmu_page *sp) 1023 1046 { 1024 1047 struct kvm_memslots *slots; 1025 1048 struct kvm_memory_slot *slot; ··· 1038 1065 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 1039 1066 { 1040 1067 struct kvm_mmu_page *sp; 1041 - unsigned long *rmapp; 1068 + struct kvm_rmap_head *rmap_head; 1042 1069 1043 1070 sp = page_header(__pa(spte)); 1044 1071 kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); 1045 - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); 1046 - return pte_list_add(vcpu, spte, rmapp); 1072 + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); 1073 + return pte_list_add(vcpu, spte, rmap_head); 1047 1074 } 1048 1075 1049 1076 static void rmap_remove(struct kvm *kvm, u64 *spte) 1050 1077 { 1051 1078 struct kvm_mmu_page *sp; 1052 1079 gfn_t gfn; 1053 - unsigned long *rmapp; 1080 + struct kvm_rmap_head *rmap_head; 1054 1081 1055 1082 sp = page_header(__pa(spte)); 1056 1083 gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); 1057 - rmapp = gfn_to_rmap(kvm, gfn, sp); 1058 - pte_list_remove(spte, rmapp); 1084 + rmap_head = gfn_to_rmap(kvm, gfn, sp); 1085 + pte_list_remove(spte, rmap_head); 1059 1086 } 1060 1087 1061 1088 /* ··· 1075 1102 * 1076 1103 * Returns sptep if found, NULL otherwise. 1077 1104 */ 1078 - static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) 1105 + static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, 1106 + struct rmap_iterator *iter) 1079 1107 { 1080 - if (!rmap) 1108 + u64 *sptep; 1109 + 1110 + if (!rmap_head->val) 1081 1111 return NULL; 1082 1112 1083 - if (!(rmap & 1)) { 1113 + if (!(rmap_head->val & 1)) { 1084 1114 iter->desc = NULL; 1085 - return (u64 *)rmap; 1115 + sptep = (u64 *)rmap_head->val; 1116 + goto out; 1086 1117 } 1087 1118 1088 - iter->desc = (struct pte_list_desc *)(rmap & ~1ul); 1119 + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); 1089 1120 iter->pos = 0; 1090 - return iter->desc->sptes[iter->pos]; 1121 + sptep = iter->desc->sptes[iter->pos]; 1122 + out: 1123 + BUG_ON(!is_shadow_present_pte(*sptep)); 1124 + return sptep; 1091 1125 } 1092 1126 1093 1127 /* ··· 1104 1124 */ 1105 1125 static u64 *rmap_get_next(struct rmap_iterator *iter) 1106 1126 { 1127 + u64 *sptep; 1128 + 1107 1129 if (iter->desc) { 1108 1130 if (iter->pos < PTE_LIST_EXT - 1) { 1109 - u64 *sptep; 1110 - 1111 1131 ++iter->pos; 1112 1132 sptep = iter->desc->sptes[iter->pos]; 1113 1133 if (sptep) 1114 - return sptep; 1134 + goto out; 1115 1135 } 1116 1136 1117 1137 iter->desc = iter->desc->more; ··· 1119 1139 if (iter->desc) { 1120 1140 iter->pos = 0; 1121 1141 /* desc->sptes[0] cannot be NULL */ 1122 - return iter->desc->sptes[iter->pos]; 1142 + sptep = iter->desc->sptes[iter->pos]; 1143 + goto out; 1123 1144 } 1124 1145 } 1125 1146 1126 1147 return NULL; 1148 + out: 1149 + BUG_ON(!is_shadow_present_pte(*sptep)); 1150 + return sptep; 1127 1151 } 1128 1152 1129 - #define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ 1130 - for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ 1131 - _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ 1132 - _spte_ = rmap_get_next(_iter_)) 1153 + #define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ 1154 + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ 1155 + _spte_; _spte_ = rmap_get_next(_iter_)) 1133 1156 1134 1157 static void drop_spte(struct kvm *kvm, u64 *sptep) 1135 1158 { ··· 1190 1207 return mmu_spte_update(sptep, spte); 1191 1208 } 1192 1209 1193 - static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1210 + static bool __rmap_write_protect(struct kvm *kvm, 1211 + struct kvm_rmap_head *rmap_head, 1194 1212 bool pt_protect) 1195 1213 { 1196 1214 u64 *sptep; 1197 1215 struct rmap_iterator iter; 1198 1216 bool flush = false; 1199 1217 1200 - for_each_rmap_spte(rmapp, &iter, sptep) 1218 + for_each_rmap_spte(rmap_head, &iter, sptep) 1201 1219 flush |= spte_write_protect(kvm, sptep, pt_protect); 1202 1220 1203 1221 return flush; ··· 1215 1231 return mmu_spte_update(sptep, spte); 1216 1232 } 1217 1233 1218 - static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 1234 + static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) 1219 1235 { 1220 1236 u64 *sptep; 1221 1237 struct rmap_iterator iter; 1222 1238 bool flush = false; 1223 1239 1224 - for_each_rmap_spte(rmapp, &iter, sptep) 1240 + for_each_rmap_spte(rmap_head, &iter, sptep) 1225 1241 flush |= spte_clear_dirty(kvm, sptep); 1226 1242 1227 1243 return flush; ··· 1238 1254 return mmu_spte_update(sptep, spte); 1239 1255 } 1240 1256 1241 - static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) 1257 + static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) 1242 1258 { 1243 1259 u64 *sptep; 1244 1260 struct rmap_iterator iter; 1245 1261 bool flush = false; 1246 1262 1247 - for_each_rmap_spte(rmapp, &iter, sptep) 1263 + for_each_rmap_spte(rmap_head, &iter, sptep) 1248 1264 flush |= spte_set_dirty(kvm, sptep); 1249 1265 1250 1266 return flush; ··· 1264 1280 struct kvm_memory_slot *slot, 1265 1281 gfn_t gfn_offset, unsigned long mask) 1266 1282 { 1267 - unsigned long *rmapp; 1283 + struct kvm_rmap_head *rmap_head; 1268 1284 1269 1285 while (mask) { 1270 - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1271 - PT_PAGE_TABLE_LEVEL, slot); 1272 - __rmap_write_protect(kvm, rmapp, false); 1286 + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1287 + PT_PAGE_TABLE_LEVEL, slot); 1288 + __rmap_write_protect(kvm, rmap_head, false); 1273 1289 1274 1290 /* clear the first set bit */ 1275 1291 mask &= mask - 1; ··· 1289 1305 struct kvm_memory_slot *slot, 1290 1306 gfn_t gfn_offset, unsigned long mask) 1291 1307 { 1292 - unsigned long *rmapp; 1308 + struct kvm_rmap_head *rmap_head; 1293 1309 1294 1310 while (mask) { 1295 - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1296 - PT_PAGE_TABLE_LEVEL, slot); 1297 - __rmap_clear_dirty(kvm, rmapp); 1311 + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), 1312 + PT_PAGE_TABLE_LEVEL, slot); 1313 + __rmap_clear_dirty(kvm, rmap_head); 1298 1314 1299 1315 /* clear the first set bit */ 1300 1316 mask &= mask - 1; ··· 1326 1342 static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) 1327 1343 { 1328 1344 struct kvm_memory_slot *slot; 1329 - unsigned long *rmapp; 1345 + struct kvm_rmap_head *rmap_head; 1330 1346 int i; 1331 1347 bool write_protected = false; 1332 1348 1333 1349 slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); 1334 1350 1335 1351 for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { 1336 - rmapp = __gfn_to_rmap(gfn, i, slot); 1337 - write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true); 1352 + rmap_head = __gfn_to_rmap(gfn, i, slot); 1353 + write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); 1338 1354 } 1339 1355 1340 1356 return write_protected; 1341 1357 } 1342 1358 1343 - static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) 1359 + static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) 1344 1360 { 1345 1361 u64 *sptep; 1346 1362 struct rmap_iterator iter; 1347 1363 bool flush = false; 1348 1364 1349 - while ((sptep = rmap_get_first(*rmapp, &iter))) { 1350 - BUG_ON(!(*sptep & PT_PRESENT_MASK)); 1365 + while ((sptep = rmap_get_first(rmap_head, &iter))) { 1351 1366 rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); 1352 1367 1353 1368 drop_spte(kvm, sptep); ··· 1356 1373 return flush; 1357 1374 } 1358 1375 1359 - static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 1376 + static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, 1360 1377 struct kvm_memory_slot *slot, gfn_t gfn, int level, 1361 1378 unsigned long data) 1362 1379 { 1363 - return kvm_zap_rmapp(kvm, rmapp); 1380 + return kvm_zap_rmapp(kvm, rmap_head); 1364 1381 } 1365 1382 1366 - static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, 1383 + static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, 1367 1384 struct kvm_memory_slot *slot, gfn_t gfn, int level, 1368 1385 unsigned long data) 1369 1386 { ··· 1378 1395 new_pfn = pte_pfn(*ptep); 1379 1396 1380 1397 restart: 1381 - for_each_rmap_spte(rmapp, &iter, sptep) { 1398 + for_each_rmap_spte(rmap_head, &iter, sptep) { 1382 1399 rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", 1383 1400 sptep, *sptep, gfn, level); 1384 1401 ··· 1416 1433 1417 1434 /* output fields. */ 1418 1435 gfn_t gfn; 1419 - unsigned long *rmap; 1436 + struct kvm_rmap_head *rmap; 1420 1437 int level; 1421 1438 1422 1439 /* private field. */ 1423 - unsigned long *end_rmap; 1440 + struct kvm_rmap_head *end_rmap; 1424 1441 }; 1425 1442 1426 1443 static void ··· 1479 1496 unsigned long end, 1480 1497 unsigned long data, 1481 1498 int (*handler)(struct kvm *kvm, 1482 - unsigned long *rmapp, 1499 + struct kvm_rmap_head *rmap_head, 1483 1500 struct kvm_memory_slot *slot, 1484 1501 gfn_t gfn, 1485 1502 int level, ··· 1523 1540 1524 1541 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 1525 1542 unsigned long data, 1526 - int (*handler)(struct kvm *kvm, unsigned long *rmapp, 1543 + int (*handler)(struct kvm *kvm, 1544 + struct kvm_rmap_head *rmap_head, 1527 1545 struct kvm_memory_slot *slot, 1528 1546 gfn_t gfn, int level, 1529 1547 unsigned long data)) ··· 1547 1563 kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); 1548 1564 } 1549 1565 1550 - static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1566 + static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, 1551 1567 struct kvm_memory_slot *slot, gfn_t gfn, int level, 1552 1568 unsigned long data) 1553 1569 { ··· 1557 1573 1558 1574 BUG_ON(!shadow_accessed_mask); 1559 1575 1560 - for_each_rmap_spte(rmapp, &iter, sptep) 1576 + for_each_rmap_spte(rmap_head, &iter, sptep) { 1561 1577 if (*sptep & shadow_accessed_mask) { 1562 1578 young = 1; 1563 1579 clear_bit((ffs(shadow_accessed_mask) - 1), 1564 1580 (unsigned long *)sptep); 1565 1581 } 1582 + } 1566 1583 1567 1584 trace_kvm_age_page(gfn, level, slot, young); 1568 1585 return young; 1569 1586 } 1570 1587 1571 - static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1588 + static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, 1572 1589 struct kvm_memory_slot *slot, gfn_t gfn, 1573 1590 int level, unsigned long data) 1574 1591 { ··· 1585 1600 if (!shadow_accessed_mask) 1586 1601 goto out; 1587 1602 1588 - for_each_rmap_spte(rmapp, &iter, sptep) 1603 + for_each_rmap_spte(rmap_head, &iter, sptep) { 1589 1604 if (*sptep & shadow_accessed_mask) { 1590 1605 young = 1; 1591 1606 break; 1592 1607 } 1608 + } 1593 1609 out: 1594 1610 return young; 1595 1611 } ··· 1599 1613 1600 1614 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) 1601 1615 { 1602 - unsigned long *rmapp; 1616 + struct kvm_rmap_head *rmap_head; 1603 1617 struct kvm_mmu_page *sp; 1604 1618 1605 1619 sp = page_header(__pa(spte)); 1606 1620 1607 - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); 1621 + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); 1608 1622 1609 - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); 1623 + kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0); 1610 1624 kvm_flush_remote_tlbs(vcpu->kvm); 1611 1625 } 1612 1626 ··· 1706 1720 mmu_spte_clear_no_track(parent_pte); 1707 1721 } 1708 1722 1709 - static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, 1710 - u64 *parent_pte, int direct) 1723 + static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct) 1711 1724 { 1712 1725 struct kvm_mmu_page *sp; 1713 1726 ··· 1722 1737 * this feature. See the comments in kvm_zap_obsolete_pages(). 1723 1738 */ 1724 1739 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 1725 - sp->parent_ptes = 0; 1726 - mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1727 1740 kvm_mod_used_mmu_pages(vcpu->kvm, +1); 1728 1741 return sp; 1729 1742 } ··· 1729 1746 static void mark_unsync(u64 *spte); 1730 1747 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) 1731 1748 { 1732 - pte_list_walk(&sp->parent_ptes, mark_unsync); 1749 + u64 *sptep; 1750 + struct rmap_iterator iter; 1751 + 1752 + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { 1753 + mark_unsync(sptep); 1754 + } 1733 1755 } 1734 1756 1735 1757 static void mark_unsync(u64 *spte) ··· 1794 1806 return (pvec->nr == KVM_PAGE_ARRAY_NR); 1795 1807 } 1796 1808 1809 + static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) 1810 + { 1811 + --sp->unsync_children; 1812 + WARN_ON((int)sp->unsync_children < 0); 1813 + __clear_bit(idx, sp->unsync_child_bitmap); 1814 + } 1815 + 1797 1816 static int __mmu_unsync_walk(struct kvm_mmu_page *sp, 1798 1817 struct kvm_mmu_pages *pvec) 1799 1818 { ··· 1810 1815 struct kvm_mmu_page *child; 1811 1816 u64 ent = sp->spt[i]; 1812 1817 1813 - if (!is_shadow_present_pte(ent) || is_large_pte(ent)) 1814 - goto clear_child_bitmap; 1818 + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { 1819 + clear_unsync_child_bit(sp, i); 1820 + continue; 1821 + } 1815 1822 1816 1823 child = page_header(ent & PT64_BASE_ADDR_MASK); 1817 1824 ··· 1822 1825 return -ENOSPC; 1823 1826 1824 1827 ret = __mmu_unsync_walk(child, pvec); 1825 - if (!ret) 1826 - goto clear_child_bitmap; 1827 - else if (ret > 0) 1828 + if (!ret) { 1829 + clear_unsync_child_bit(sp, i); 1830 + continue; 1831 + } else if (ret > 0) { 1828 1832 nr_unsync_leaf += ret; 1829 - else 1833 + } else 1830 1834 return ret; 1831 1835 } else if (child->unsync) { 1832 1836 nr_unsync_leaf++; 1833 1837 if (mmu_pages_add(pvec, child, i)) 1834 1838 return -ENOSPC; 1835 1839 } else 1836 - goto clear_child_bitmap; 1837 - 1838 - continue; 1839 - 1840 - clear_child_bitmap: 1841 - __clear_bit(i, sp->unsync_child_bitmap); 1842 - sp->unsync_children--; 1843 - WARN_ON((int)sp->unsync_children < 0); 1840 + clear_unsync_child_bit(sp, i); 1844 1841 } 1845 - 1846 1842 1847 1843 return nr_unsync_leaf; 1848 1844 } ··· 1999 2009 if (!sp) 2000 2010 return; 2001 2011 2002 - --sp->unsync_children; 2003 - WARN_ON((int)sp->unsync_children < 0); 2004 - __clear_bit(idx, sp->unsync_child_bitmap); 2012 + clear_unsync_child_bit(sp, idx); 2005 2013 level++; 2006 2014 } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); 2007 2015 } ··· 2041 2053 } 2042 2054 } 2043 2055 2044 - static void init_shadow_page_table(struct kvm_mmu_page *sp) 2045 - { 2046 - int i; 2047 - 2048 - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 2049 - sp->spt[i] = 0ull; 2050 - } 2051 - 2052 2056 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) 2053 2057 { 2054 2058 sp->write_flooding_count = 0; ··· 2063 2083 gva_t gaddr, 2064 2084 unsigned level, 2065 2085 int direct, 2066 - unsigned access, 2067 - u64 *parent_pte) 2086 + unsigned access) 2068 2087 { 2069 2088 union kvm_mmu_page_role role; 2070 2089 unsigned quadrant; ··· 2095 2116 if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) 2096 2117 break; 2097 2118 2098 - mmu_page_add_parent_pte(vcpu, sp, parent_pte); 2099 - if (sp->unsync_children) { 2119 + if (sp->unsync_children) 2100 2120 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); 2101 - kvm_mmu_mark_parents_unsync(sp); 2102 - } else if (sp->unsync) 2103 - kvm_mmu_mark_parents_unsync(sp); 2104 2121 2105 2122 __clear_sp_write_flooding_count(sp); 2106 2123 trace_kvm_mmu_get_page(sp, false); 2107 2124 return sp; 2108 2125 } 2126 + 2109 2127 ++vcpu->kvm->stat.mmu_cache_miss; 2110 - sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); 2111 - if (!sp) 2112 - return sp; 2128 + 2129 + sp = kvm_mmu_alloc_page(vcpu, direct); 2130 + 2113 2131 sp->gfn = gfn; 2114 2132 sp->role = role; 2115 2133 hlist_add_head(&sp->hash_link, ··· 2120 2144 account_shadowed(vcpu->kvm, sp); 2121 2145 } 2122 2146 sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; 2123 - init_shadow_page_table(sp); 2147 + clear_page(sp->spt); 2124 2148 trace_kvm_mmu_get_page(sp, true); 2125 2149 return sp; 2126 2150 } ··· 2174 2198 return __shadow_walk_next(iterator, *iterator->sptep); 2175 2199 } 2176 2200 2177 - static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) 2201 + static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, 2202 + struct kvm_mmu_page *sp) 2178 2203 { 2179 2204 u64 spte; 2180 2205 ··· 2183 2206 VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); 2184 2207 2185 2208 spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2186 - shadow_user_mask | shadow_x_mask; 2187 - 2188 - if (accessed) 2189 - spte |= shadow_accessed_mask; 2209 + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; 2190 2210 2191 2211 mmu_spte_set(sptep, spte); 2212 + 2213 + mmu_page_add_parent_pte(vcpu, sp, sptep); 2214 + 2215 + if (sp->unsync_children || sp->unsync) 2216 + mark_unsync(sptep); 2192 2217 } 2193 2218 2194 2219 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, ··· 2249 2270 mmu_page_zap_pte(kvm, sp, sp->spt + i); 2250 2271 } 2251 2272 2252 - static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) 2253 - { 2254 - mmu_page_remove_parent_pte(sp, parent_pte); 2255 - } 2256 - 2257 2273 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) 2258 2274 { 2259 2275 u64 *sptep; 2260 2276 struct rmap_iterator iter; 2261 2277 2262 - while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) 2278 + while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) 2263 2279 drop_parent_pte(sp, sptep); 2264 2280 } 2265 2281 ··· 2538 2564 return ret; 2539 2565 } 2540 2566 2541 - static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, 2542 - unsigned pte_access, int write_fault, int *emulate, 2543 - int level, gfn_t gfn, pfn_t pfn, bool speculative, 2544 - bool host_writable) 2567 + static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, 2568 + int write_fault, int level, gfn_t gfn, pfn_t pfn, 2569 + bool speculative, bool host_writable) 2545 2570 { 2546 2571 int was_rmapped = 0; 2547 2572 int rmap_count; 2573 + bool emulate = false; 2548 2574 2549 2575 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, 2550 2576 *sptep, write_fault, gfn); 2551 2577 2552 - if (is_rmap_spte(*sptep)) { 2578 + if (is_shadow_present_pte(*sptep)) { 2553 2579 /* 2554 2580 * If we overwrite a PTE page pointer with a 2MB PMD, unlink 2555 2581 * the parent of the now unreachable PTE. ··· 2574 2600 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, 2575 2601 true, host_writable)) { 2576 2602 if (write_fault) 2577 - *emulate = 1; 2603 + emulate = true; 2578 2604 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 2579 2605 } 2580 2606 2581 - if (unlikely(is_mmio_spte(*sptep) && emulate)) 2582 - *emulate = 1; 2607 + if (unlikely(is_mmio_spte(*sptep))) 2608 + emulate = true; 2583 2609 2584 2610 pgprintk("%s: setting spte %llx\n", __func__, *sptep); 2585 2611 pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", ··· 2598 2624 } 2599 2625 2600 2626 kvm_release_pfn_clean(pfn); 2627 + 2628 + return emulate; 2601 2629 } 2602 2630 2603 2631 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, ··· 2634 2658 return -1; 2635 2659 2636 2660 for (i = 0; i < ret; i++, gfn++, start++) 2637 - mmu_set_spte(vcpu, start, access, 0, NULL, 2638 - sp->role.level, gfn, page_to_pfn(pages[i]), 2639 - true, true); 2661 + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, 2662 + page_to_pfn(pages[i]), true, true); 2640 2663 2641 2664 return 0; 2642 2665 } ··· 2683 2708 __direct_pte_prefetch(vcpu, sp, sptep); 2684 2709 } 2685 2710 2686 - static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 2687 - int map_writable, int level, gfn_t gfn, pfn_t pfn, 2688 - bool prefault) 2711 + static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, 2712 + int level, gfn_t gfn, pfn_t pfn, bool prefault) 2689 2713 { 2690 2714 struct kvm_shadow_walk_iterator iterator; 2691 2715 struct kvm_mmu_page *sp; ··· 2696 2722 2697 2723 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2698 2724 if (iterator.level == level) { 2699 - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2700 - write, &emulate, level, gfn, pfn, 2701 - prefault, map_writable); 2725 + emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, 2726 + write, level, gfn, pfn, prefault, 2727 + map_writable); 2702 2728 direct_pte_prefetch(vcpu, iterator.sptep); 2703 2729 ++vcpu->stat.pf_fixed; 2704 2730 break; ··· 2711 2737 base_addr &= PT64_LVL_ADDR_MASK(iterator.level); 2712 2738 pseudo_gfn = base_addr >> PAGE_SHIFT; 2713 2739 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, 2714 - iterator.level - 1, 2715 - 1, ACC_ALL, iterator.sptep); 2740 + iterator.level - 1, 1, ACC_ALL); 2716 2741 2717 - link_shadow_page(iterator.sptep, sp, true); 2742 + link_shadow_page(vcpu, iterator.sptep, sp); 2718 2743 } 2719 2744 } 2720 2745 return emulate; ··· 2892 2919 * If the mapping has been changed, let the vcpu fault on the 2893 2920 * same address again. 2894 2921 */ 2895 - if (!is_rmap_spte(spte)) { 2922 + if (!is_shadow_present_pte(spte)) { 2896 2923 ret = true; 2897 2924 goto exit; 2898 2925 } ··· 2991 3018 make_mmu_pages_available(vcpu); 2992 3019 if (likely(!force_pt_level)) 2993 3020 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); 2994 - r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, 2995 - prefault); 3021 + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); 2996 3022 spin_unlock(&vcpu->kvm->mmu_lock); 2997 - 2998 3023 2999 3024 return r; 3000 3025 ··· 3068 3097 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { 3069 3098 spin_lock(&vcpu->kvm->mmu_lock); 3070 3099 make_mmu_pages_available(vcpu); 3071 - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 3072 - 1, ACC_ALL, NULL); 3100 + sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); 3073 3101 ++sp->root_count; 3074 3102 spin_unlock(&vcpu->kvm->mmu_lock); 3075 3103 vcpu->arch.mmu.root_hpa = __pa(sp->spt); ··· 3080 3110 spin_lock(&vcpu->kvm->mmu_lock); 3081 3111 make_mmu_pages_available(vcpu); 3082 3112 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), 3083 - i << 30, 3084 - PT32_ROOT_LEVEL, 1, ACC_ALL, 3085 - NULL); 3113 + i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); 3086 3114 root = __pa(sp->spt); 3087 3115 ++sp->root_count; 3088 3116 spin_unlock(&vcpu->kvm->mmu_lock); ··· 3117 3149 spin_lock(&vcpu->kvm->mmu_lock); 3118 3150 make_mmu_pages_available(vcpu); 3119 3151 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 3120 - 0, ACC_ALL, NULL); 3152 + 0, ACC_ALL); 3121 3153 root = __pa(sp->spt); 3122 3154 ++sp->root_count; 3123 3155 spin_unlock(&vcpu->kvm->mmu_lock); ··· 3150 3182 } 3151 3183 spin_lock(&vcpu->kvm->mmu_lock); 3152 3184 make_mmu_pages_available(vcpu); 3153 - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 3154 - PT32_ROOT_LEVEL, 0, 3155 - ACC_ALL, NULL); 3185 + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 3186 + 0, ACC_ALL); 3156 3187 root = __pa(sp->spt); 3157 3188 ++sp->root_count; 3158 3189 spin_unlock(&vcpu->kvm->mmu_lock); ··· 3498 3531 make_mmu_pages_available(vcpu); 3499 3532 if (likely(!force_pt_level)) 3500 3533 transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); 3501 - r = __direct_map(vcpu, gpa, write, map_writable, 3502 - level, gfn, pfn, prefault); 3534 + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); 3503 3535 spin_unlock(&vcpu->kvm->mmu_lock); 3504 3536 3505 3537 return r; ··· 4024 4058 g_context->inject_page_fault = kvm_inject_page_fault; 4025 4059 4026 4060 /* 4027 - * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The 4028 - * translation of l2_gpa to l1_gpa addresses is done using the 4029 - * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa 4030 - * functions between mmu and nested_mmu are swapped. 4061 + * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using 4062 + * L1's nested page tables (e.g. EPT12). The nested translation 4063 + * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using 4064 + * L2's page tables as the first level of translation and L1's 4065 + * nested page tables as the second level of translation. Basically 4066 + * the gva_to_gpa functions between mmu and nested_mmu are swapped. 4031 4067 */ 4032 4068 if (!is_paging(vcpu)) { 4033 4069 g_context->nx = false; ··· 4463 4495 } 4464 4496 4465 4497 /* The return value indicates if tlb flush on all vcpus is needed. */ 4466 - typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); 4498 + typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); 4467 4499 4468 4500 /* The caller should hold mmu-lock before calling this function. */ 4469 4501 static bool ··· 4557 4589 spin_unlock(&kvm->mmu_lock); 4558 4590 } 4559 4591 4560 - static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) 4592 + static bool slot_rmap_write_protect(struct kvm *kvm, 4593 + struct kvm_rmap_head *rmap_head) 4561 4594 { 4562 - return __rmap_write_protect(kvm, rmapp, false); 4595 + return __rmap_write_protect(kvm, rmap_head, false); 4563 4596 } 4564 4597 4565 4598 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, ··· 4596 4627 } 4597 4628 4598 4629 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, 4599 - unsigned long *rmapp) 4630 + struct kvm_rmap_head *rmap_head) 4600 4631 { 4601 4632 u64 *sptep; 4602 4633 struct rmap_iterator iter; ··· 4605 4636 struct kvm_mmu_page *sp; 4606 4637 4607 4638 restart: 4608 - for_each_rmap_spte(rmapp, &iter, sptep) { 4639 + for_each_rmap_spte(rmap_head, &iter, sptep) { 4609 4640 sp = page_header(__pa(sptep)); 4610 4641 pfn = spte_to_pfn(*sptep); 4611 4642
+8 -7
arch/x86/kvm/mmu_audit.c
··· 129 129 static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) 130 130 { 131 131 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); 132 - unsigned long *rmapp; 132 + struct kvm_rmap_head *rmap_head; 133 133 struct kvm_mmu_page *rev_sp; 134 134 struct kvm_memslots *slots; 135 135 struct kvm_memory_slot *slot; ··· 150 150 return; 151 151 } 152 152 153 - rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot); 154 - if (!*rmapp) { 153 + rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); 154 + if (!rmap_head->val) { 155 155 if (!__ratelimit(&ratelimit_state)) 156 156 return; 157 157 audit_printk(kvm, "no rmap for writable spte %llx\n", ··· 183 183 return; 184 184 185 185 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 186 - if (!is_rmap_spte(sp->spt[i])) 186 + if (!is_shadow_present_pte(sp->spt[i])) 187 187 continue; 188 188 189 189 inspect_spte_has_rmap(kvm, sp->spt + i); ··· 192 192 193 193 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) 194 194 { 195 - unsigned long *rmapp; 195 + struct kvm_rmap_head *rmap_head; 196 196 u64 *sptep; 197 197 struct rmap_iterator iter; 198 198 struct kvm_memslots *slots; ··· 203 203 204 204 slots = kvm_memslots_for_spte_role(kvm, sp->role); 205 205 slot = __gfn_to_memslot(slots, sp->gfn); 206 - rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); 206 + rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); 207 207 208 - for_each_rmap_spte(rmapp, &iter, sptep) 208 + for_each_rmap_spte(rmap_head, &iter, sptep) { 209 209 if (is_writable_pte(*sptep)) 210 210 audit_printk(kvm, "shadow page has writable " 211 211 "mappings: gfn %llx role %x\n", 212 212 sp->gfn, sp->role.word); 213 + } 213 214 } 214 215 215 216 static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+9 -11
arch/x86/kvm/paging_tmpl.h
··· 475 475 * we call mmu_set_spte() with host_writable = true because 476 476 * pte_prefetch_gfn_to_pfn always gets a writable pfn. 477 477 */ 478 - mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, 479 - gfn, pfn, true, true); 478 + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, 479 + true, true); 480 480 481 481 return true; 482 482 } ··· 556 556 struct kvm_mmu_page *sp = NULL; 557 557 struct kvm_shadow_walk_iterator it; 558 558 unsigned direct_access, access = gw->pt_access; 559 - int top_level, emulate = 0; 559 + int top_level, emulate; 560 560 561 561 direct_access = gw->pte_access; 562 562 ··· 587 587 if (!is_shadow_present_pte(*it.sptep)) { 588 588 table_gfn = gw->table_gfn[it.level - 2]; 589 589 sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, 590 - false, access, it.sptep); 590 + false, access); 591 591 } 592 592 593 593 /* ··· 598 598 goto out_gpte_changed; 599 599 600 600 if (sp) 601 - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); 601 + link_shadow_page(vcpu, it.sptep, sp); 602 602 } 603 603 604 604 for (; ··· 617 617 direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); 618 618 619 619 sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, 620 - true, direct_access, it.sptep); 621 - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); 620 + true, direct_access); 621 + link_shadow_page(vcpu, it.sptep, sp); 622 622 } 623 623 624 624 clear_sp_write_flooding_count(it.sptep); 625 - mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, 626 - it.level, gw->gfn, pfn, prefault, map_writable); 625 + emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, 626 + it.level, gw->gfn, pfn, prefault, map_writable); 627 627 FNAME(pte_prefetch)(vcpu, gw, it.sptep); 628 628 629 629 return emulate; 630 630 631 631 out_gpte_changed: 632 - if (sp) 633 - kvm_mmu_put_page(sp, it.sptep); 634 632 kvm_release_pfn_clean(pfn); 635 633 return 0; 636 634 }
+32 -5
arch/x86/kvm/svm.c
··· 86 86 MSR_FS_BASE, 87 87 #endif 88 88 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 89 + MSR_TSC_AUX, 89 90 }; 90 91 91 92 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) ··· 136 135 uint64_t asid_generation; 137 136 uint64_t sysenter_esp; 138 137 uint64_t sysenter_eip; 138 + uint64_t tsc_aux; 139 139 140 140 u64 next_rip; 141 141 ··· 1240 1238 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); 1241 1239 } 1242 1240 } 1241 + /* This assumes that the kernel never uses MSR_TSC_AUX */ 1242 + if (static_cpu_has(X86_FEATURE_RDTSCP)) 1243 + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); 1243 1244 } 1244 1245 1245 1246 static void svm_vcpu_put(struct kvm_vcpu *vcpu) ··· 3029 3024 case MSR_IA32_SYSENTER_ESP: 3030 3025 msr_info->data = svm->sysenter_esp; 3031 3026 break; 3027 + case MSR_TSC_AUX: 3028 + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) 3029 + return 1; 3030 + msr_info->data = svm->tsc_aux; 3031 + break; 3032 3032 /* 3033 3033 * Nobody will change the following 5 values in the VMCB so we can 3034 3034 * safely return them on rdmsr. They will always be 0 until LBRV is ··· 3171 3161 case MSR_IA32_SYSENTER_ESP: 3172 3162 svm->sysenter_esp = data; 3173 3163 svm->vmcb->save.sysenter_esp = data; 3164 + break; 3165 + case MSR_TSC_AUX: 3166 + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) 3167 + return 1; 3168 + 3169 + /* 3170 + * This is rare, so we update the MSR here instead of using 3171 + * direct_access_msrs. Doing that would require a rdmsr in 3172 + * svm_vcpu_put. 3173 + */ 3174 + svm->tsc_aux = data; 3175 + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); 3174 3176 break; 3175 3177 case MSR_IA32_DEBUGCTLMSR: 3176 3178 if (!boot_cpu_has(X86_FEATURE_LBRV)) { ··· 3600 3578 return; 3601 3579 } 3602 3580 3603 - static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) 3581 + static bool svm_get_enable_apicv(void) 3604 3582 { 3605 - return 0; 3583 + return false; 3606 3584 } 3607 3585 3608 - static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) 3586 + static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 3587 + { 3588 + } 3589 + 3590 + static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 3609 3591 { 3610 3592 return; 3611 3593 } ··· 4080 4054 4081 4055 static bool svm_rdtscp_supported(void) 4082 4056 { 4083 - return false; 4057 + return boot_cpu_has(X86_FEATURE_RDTSCP); 4084 4058 } 4085 4059 4086 4060 static bool svm_invpcid_supported(void) ··· 4371 4345 .enable_irq_window = enable_irq_window, 4372 4346 .update_cr8_intercept = update_cr8_intercept, 4373 4347 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, 4374 - .cpu_uses_apicv = svm_cpu_uses_apicv, 4348 + .get_enable_apicv = svm_get_enable_apicv, 4349 + .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, 4375 4350 .load_eoi_exitmap = svm_load_eoi_exitmap, 4376 4351 .sync_pir_to_irr = svm_sync_pir_to_irr, 4377 4352
+264 -1
arch/x86/kvm/trace.h
··· 268 268 #define kvm_trace_sym_exc \ 269 269 EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ 270 270 EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ 271 - EXS(MF), EXS(MC) 271 + EXS(MF), EXS(AC), EXS(MC) 272 272 273 273 /* 274 274 * Tracepoint for kvm interrupt injection: ··· 1023 1023 __entry->gsi, 1024 1024 __entry->gvec, 1025 1025 __entry->pi_desc_addr) 1026 + ); 1027 + 1028 + /* 1029 + * Tracepoint for kvm_hv_notify_acked_sint. 1030 + */ 1031 + TRACE_EVENT(kvm_hv_notify_acked_sint, 1032 + TP_PROTO(int vcpu_id, u32 sint), 1033 + TP_ARGS(vcpu_id, sint), 1034 + 1035 + TP_STRUCT__entry( 1036 + __field(int, vcpu_id) 1037 + __field(u32, sint) 1038 + ), 1039 + 1040 + TP_fast_assign( 1041 + __entry->vcpu_id = vcpu_id; 1042 + __entry->sint = sint; 1043 + ), 1044 + 1045 + TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) 1046 + ); 1047 + 1048 + /* 1049 + * Tracepoint for synic_set_irq. 1050 + */ 1051 + TRACE_EVENT(kvm_hv_synic_set_irq, 1052 + TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), 1053 + TP_ARGS(vcpu_id, sint, vector, ret), 1054 + 1055 + TP_STRUCT__entry( 1056 + __field(int, vcpu_id) 1057 + __field(u32, sint) 1058 + __field(int, vector) 1059 + __field(int, ret) 1060 + ), 1061 + 1062 + TP_fast_assign( 1063 + __entry->vcpu_id = vcpu_id; 1064 + __entry->sint = sint; 1065 + __entry->vector = vector; 1066 + __entry->ret = ret; 1067 + ), 1068 + 1069 + TP_printk("vcpu_id %d sint %u vector %d ret %d", 1070 + __entry->vcpu_id, __entry->sint, __entry->vector, 1071 + __entry->ret) 1072 + ); 1073 + 1074 + /* 1075 + * Tracepoint for kvm_hv_synic_send_eoi. 1076 + */ 1077 + TRACE_EVENT(kvm_hv_synic_send_eoi, 1078 + TP_PROTO(int vcpu_id, int vector), 1079 + TP_ARGS(vcpu_id, vector), 1080 + 1081 + TP_STRUCT__entry( 1082 + __field(int, vcpu_id) 1083 + __field(u32, sint) 1084 + __field(int, vector) 1085 + __field(int, ret) 1086 + ), 1087 + 1088 + TP_fast_assign( 1089 + __entry->vcpu_id = vcpu_id; 1090 + __entry->vector = vector; 1091 + ), 1092 + 1093 + TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) 1094 + ); 1095 + 1096 + /* 1097 + * Tracepoint for synic_set_msr. 1098 + */ 1099 + TRACE_EVENT(kvm_hv_synic_set_msr, 1100 + TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), 1101 + TP_ARGS(vcpu_id, msr, data, host), 1102 + 1103 + TP_STRUCT__entry( 1104 + __field(int, vcpu_id) 1105 + __field(u32, msr) 1106 + __field(u64, data) 1107 + __field(bool, host) 1108 + ), 1109 + 1110 + TP_fast_assign( 1111 + __entry->vcpu_id = vcpu_id; 1112 + __entry->msr = msr; 1113 + __entry->data = data; 1114 + __entry->host = host 1115 + ), 1116 + 1117 + TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", 1118 + __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) 1119 + ); 1120 + 1121 + /* 1122 + * Tracepoint for stimer_set_config. 1123 + */ 1124 + TRACE_EVENT(kvm_hv_stimer_set_config, 1125 + TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), 1126 + TP_ARGS(vcpu_id, timer_index, config, host), 1127 + 1128 + TP_STRUCT__entry( 1129 + __field(int, vcpu_id) 1130 + __field(int, timer_index) 1131 + __field(u64, config) 1132 + __field(bool, host) 1133 + ), 1134 + 1135 + TP_fast_assign( 1136 + __entry->vcpu_id = vcpu_id; 1137 + __entry->timer_index = timer_index; 1138 + __entry->config = config; 1139 + __entry->host = host; 1140 + ), 1141 + 1142 + TP_printk("vcpu_id %d timer %d config 0x%llx host %d", 1143 + __entry->vcpu_id, __entry->timer_index, __entry->config, 1144 + __entry->host) 1145 + ); 1146 + 1147 + /* 1148 + * Tracepoint for stimer_set_count. 1149 + */ 1150 + TRACE_EVENT(kvm_hv_stimer_set_count, 1151 + TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), 1152 + TP_ARGS(vcpu_id, timer_index, count, host), 1153 + 1154 + TP_STRUCT__entry( 1155 + __field(int, vcpu_id) 1156 + __field(int, timer_index) 1157 + __field(u64, count) 1158 + __field(bool, host) 1159 + ), 1160 + 1161 + TP_fast_assign( 1162 + __entry->vcpu_id = vcpu_id; 1163 + __entry->timer_index = timer_index; 1164 + __entry->count = count; 1165 + __entry->host = host; 1166 + ), 1167 + 1168 + TP_printk("vcpu_id %d timer %d count %llu host %d", 1169 + __entry->vcpu_id, __entry->timer_index, __entry->count, 1170 + __entry->host) 1171 + ); 1172 + 1173 + /* 1174 + * Tracepoint for stimer_start(periodic timer case). 1175 + */ 1176 + TRACE_EVENT(kvm_hv_stimer_start_periodic, 1177 + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), 1178 + TP_ARGS(vcpu_id, timer_index, time_now, exp_time), 1179 + 1180 + TP_STRUCT__entry( 1181 + __field(int, vcpu_id) 1182 + __field(int, timer_index) 1183 + __field(u64, time_now) 1184 + __field(u64, exp_time) 1185 + ), 1186 + 1187 + TP_fast_assign( 1188 + __entry->vcpu_id = vcpu_id; 1189 + __entry->timer_index = timer_index; 1190 + __entry->time_now = time_now; 1191 + __entry->exp_time = exp_time; 1192 + ), 1193 + 1194 + TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", 1195 + __entry->vcpu_id, __entry->timer_index, __entry->time_now, 1196 + __entry->exp_time) 1197 + ); 1198 + 1199 + /* 1200 + * Tracepoint for stimer_start(one-shot timer case). 1201 + */ 1202 + TRACE_EVENT(kvm_hv_stimer_start_one_shot, 1203 + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), 1204 + TP_ARGS(vcpu_id, timer_index, time_now, count), 1205 + 1206 + TP_STRUCT__entry( 1207 + __field(int, vcpu_id) 1208 + __field(int, timer_index) 1209 + __field(u64, time_now) 1210 + __field(u64, count) 1211 + ), 1212 + 1213 + TP_fast_assign( 1214 + __entry->vcpu_id = vcpu_id; 1215 + __entry->timer_index = timer_index; 1216 + __entry->time_now = time_now; 1217 + __entry->count = count; 1218 + ), 1219 + 1220 + TP_printk("vcpu_id %d timer %d time_now %llu count %llu", 1221 + __entry->vcpu_id, __entry->timer_index, __entry->time_now, 1222 + __entry->count) 1223 + ); 1224 + 1225 + /* 1226 + * Tracepoint for stimer_timer_callback. 1227 + */ 1228 + TRACE_EVENT(kvm_hv_stimer_callback, 1229 + TP_PROTO(int vcpu_id, int timer_index), 1230 + TP_ARGS(vcpu_id, timer_index), 1231 + 1232 + TP_STRUCT__entry( 1233 + __field(int, vcpu_id) 1234 + __field(int, timer_index) 1235 + ), 1236 + 1237 + TP_fast_assign( 1238 + __entry->vcpu_id = vcpu_id; 1239 + __entry->timer_index = timer_index; 1240 + ), 1241 + 1242 + TP_printk("vcpu_id %d timer %d", 1243 + __entry->vcpu_id, __entry->timer_index) 1244 + ); 1245 + 1246 + /* 1247 + * Tracepoint for stimer_expiration. 1248 + */ 1249 + TRACE_EVENT(kvm_hv_stimer_expiration, 1250 + TP_PROTO(int vcpu_id, int timer_index, int msg_send_result), 1251 + TP_ARGS(vcpu_id, timer_index, msg_send_result), 1252 + 1253 + TP_STRUCT__entry( 1254 + __field(int, vcpu_id) 1255 + __field(int, timer_index) 1256 + __field(int, msg_send_result) 1257 + ), 1258 + 1259 + TP_fast_assign( 1260 + __entry->vcpu_id = vcpu_id; 1261 + __entry->timer_index = timer_index; 1262 + __entry->msg_send_result = msg_send_result; 1263 + ), 1264 + 1265 + TP_printk("vcpu_id %d timer %d msg send result %d", 1266 + __entry->vcpu_id, __entry->timer_index, 1267 + __entry->msg_send_result) 1268 + ); 1269 + 1270 + /* 1271 + * Tracepoint for stimer_cleanup. 1272 + */ 1273 + TRACE_EVENT(kvm_hv_stimer_cleanup, 1274 + TP_PROTO(int vcpu_id, int timer_index), 1275 + TP_ARGS(vcpu_id, timer_index), 1276 + 1277 + TP_STRUCT__entry( 1278 + __field(int, vcpu_id) 1279 + __field(int, timer_index) 1280 + ), 1281 + 1282 + TP_fast_assign( 1283 + __entry->vcpu_id = vcpu_id; 1284 + __entry->timer_index = timer_index; 1285 + ), 1286 + 1287 + TP_printk("vcpu_id %d timer %d", 1288 + __entry->vcpu_id, __entry->timer_index) 1026 1289 ); 1027 1290 1028 1291 #endif /* _TRACE_KVM_H */
+131 -71
arch/x86/kvm/vmx.c
··· 19 19 #include "irq.h" 20 20 #include "mmu.h" 21 21 #include "cpuid.h" 22 + #include "lapic.h" 22 23 23 24 #include <linux/kvm_host.h> 24 25 #include <linux/module.h> ··· 863 862 static void kvm_cpu_vmxoff(void); 864 863 static bool vmx_mpx_supported(void); 865 864 static bool vmx_xsaves_supported(void); 866 - static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); 867 865 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 868 866 static void vmx_set_segment(struct kvm_vcpu *vcpu, 869 867 struct kvm_segment *var, int seg); ··· 870 870 struct kvm_segment *var, int seg); 871 871 static bool guest_state_valid(struct kvm_vcpu *vcpu); 872 872 static u32 vmx_segment_access_rights(struct kvm_segment *var); 873 - static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 874 873 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 875 874 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 876 875 static int alloc_identity_pagetable(struct kvm *kvm); ··· 1447 1448 } 1448 1449 } 1449 1450 1450 - static __always_inline unsigned long vmcs_readl(unsigned long field) 1451 + static __always_inline void vmcs_check16(unsigned long field) 1452 + { 1453 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, 1454 + "16-bit accessor invalid for 64-bit field"); 1455 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, 1456 + "16-bit accessor invalid for 64-bit high field"); 1457 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, 1458 + "16-bit accessor invalid for 32-bit high field"); 1459 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, 1460 + "16-bit accessor invalid for natural width field"); 1461 + } 1462 + 1463 + static __always_inline void vmcs_check32(unsigned long field) 1464 + { 1465 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, 1466 + "32-bit accessor invalid for 16-bit field"); 1467 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, 1468 + "32-bit accessor invalid for natural width field"); 1469 + } 1470 + 1471 + static __always_inline void vmcs_check64(unsigned long field) 1472 + { 1473 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, 1474 + "64-bit accessor invalid for 16-bit field"); 1475 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, 1476 + "64-bit accessor invalid for 64-bit high field"); 1477 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, 1478 + "64-bit accessor invalid for 32-bit field"); 1479 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, 1480 + "64-bit accessor invalid for natural width field"); 1481 + } 1482 + 1483 + static __always_inline void vmcs_checkl(unsigned long field) 1484 + { 1485 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, 1486 + "Natural width accessor invalid for 16-bit field"); 1487 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, 1488 + "Natural width accessor invalid for 64-bit field"); 1489 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, 1490 + "Natural width accessor invalid for 64-bit high field"); 1491 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, 1492 + "Natural width accessor invalid for 32-bit field"); 1493 + } 1494 + 1495 + static __always_inline unsigned long __vmcs_readl(unsigned long field) 1451 1496 { 1452 1497 unsigned long value; 1453 1498 ··· 1502 1459 1503 1460 static __always_inline u16 vmcs_read16(unsigned long field) 1504 1461 { 1505 - return vmcs_readl(field); 1462 + vmcs_check16(field); 1463 + return __vmcs_readl(field); 1506 1464 } 1507 1465 1508 1466 static __always_inline u32 vmcs_read32(unsigned long field) 1509 1467 { 1510 - return vmcs_readl(field); 1468 + vmcs_check32(field); 1469 + return __vmcs_readl(field); 1511 1470 } 1512 1471 1513 1472 static __always_inline u64 vmcs_read64(unsigned long field) 1514 1473 { 1474 + vmcs_check64(field); 1515 1475 #ifdef CONFIG_X86_64 1516 - return vmcs_readl(field); 1476 + return __vmcs_readl(field); 1517 1477 #else 1518 - return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); 1478 + return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); 1519 1479 #endif 1480 + } 1481 + 1482 + static __always_inline unsigned long vmcs_readl(unsigned long field) 1483 + { 1484 + vmcs_checkl(field); 1485 + return __vmcs_readl(field); 1520 1486 } 1521 1487 1522 1488 static noinline void vmwrite_error(unsigned long field, unsigned long value) ··· 1535 1483 dump_stack(); 1536 1484 } 1537 1485 1538 - static void vmcs_writel(unsigned long field, unsigned long value) 1486 + static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) 1539 1487 { 1540 1488 u8 error; 1541 1489 ··· 1545 1493 vmwrite_error(field, value); 1546 1494 } 1547 1495 1548 - static void vmcs_write16(unsigned long field, u16 value) 1496 + static __always_inline void vmcs_write16(unsigned long field, u16 value) 1549 1497 { 1550 - vmcs_writel(field, value); 1498 + vmcs_check16(field); 1499 + __vmcs_writel(field, value); 1551 1500 } 1552 1501 1553 - static void vmcs_write32(unsigned long field, u32 value) 1502 + static __always_inline void vmcs_write32(unsigned long field, u32 value) 1554 1503 { 1555 - vmcs_writel(field, value); 1504 + vmcs_check32(field); 1505 + __vmcs_writel(field, value); 1556 1506 } 1557 1507 1558 - static void vmcs_write64(unsigned long field, u64 value) 1508 + static __always_inline void vmcs_write64(unsigned long field, u64 value) 1559 1509 { 1560 - vmcs_writel(field, value); 1510 + vmcs_check64(field); 1511 + __vmcs_writel(field, value); 1561 1512 #ifndef CONFIG_X86_64 1562 1513 asm volatile (""); 1563 - vmcs_writel(field+1, value >> 32); 1514 + __vmcs_writel(field+1, value >> 32); 1564 1515 #endif 1565 1516 } 1566 1517 1567 - static void vmcs_clear_bits(unsigned long field, u32 mask) 1518 + static __always_inline void vmcs_writel(unsigned long field, unsigned long value) 1568 1519 { 1569 - vmcs_writel(field, vmcs_readl(field) & ~mask); 1520 + vmcs_checkl(field); 1521 + __vmcs_writel(field, value); 1570 1522 } 1571 1523 1572 - static void vmcs_set_bits(unsigned long field, u32 mask) 1524 + static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) 1573 1525 { 1574 - vmcs_writel(field, vmcs_readl(field) | mask); 1526 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, 1527 + "vmcs_clear_bits does not support 64-bit fields"); 1528 + __vmcs_writel(field, __vmcs_readl(field) & ~mask); 1529 + } 1530 + 1531 + static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) 1532 + { 1533 + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, 1534 + "vmcs_set_bits does not support 64-bit fields"); 1535 + __vmcs_writel(field, __vmcs_readl(field) | mask); 1575 1536 } 1576 1537 1577 1538 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) ··· 2563 2498 vmx->nested.nested_vmx_pinbased_ctls_high |= 2564 2499 PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 2565 2500 PIN_BASED_VMX_PREEMPTION_TIMER; 2566 - if (vmx_cpu_uses_apicv(&vmx->vcpu)) 2501 + if (kvm_vcpu_apicv_active(&vmx->vcpu)) 2567 2502 vmx->nested.nested_vmx_pinbased_ctls_high |= 2568 2503 PIN_BASED_POSTED_INTR; 2569 2504 ··· 4527 4462 msr, MSR_TYPE_W); 4528 4463 } 4529 4464 4530 - static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) 4465 + static bool vmx_get_enable_apicv(void) 4531 4466 { 4532 - return enable_apicv && lapic_in_kernel(vcpu); 4467 + return enable_apicv; 4533 4468 } 4534 4469 4535 4470 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) ··· 4651 4586 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); 4652 4587 } 4653 4588 4654 - static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) 4655 - { 4656 - return; 4657 - } 4658 - 4659 4589 /* 4660 4590 * Set up the vmcs's constant host-state fields, i.e., host-state fields that 4661 4591 * will not change in the lifetime of the guest. ··· 4720 4660 { 4721 4661 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; 4722 4662 4723 - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) 4663 + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) 4724 4664 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; 4725 4665 return pin_based_exec_ctrl; 4666 + } 4667 + 4668 + static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 4669 + { 4670 + struct vcpu_vmx *vmx = to_vmx(vcpu); 4671 + 4672 + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); 4726 4673 } 4727 4674 4728 4675 static u32 vmx_exec_control(struct vcpu_vmx *vmx) ··· 4770 4703 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; 4771 4704 if (!ple_gap) 4772 4705 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; 4773 - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) 4706 + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) 4774 4707 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | 4775 4708 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); 4776 4709 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; ··· 4834 4767 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, 4835 4768 vmx_secondary_exec_control(vmx)); 4836 4769 4837 - if (vmx_cpu_uses_apicv(&vmx->vcpu)) { 4770 + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { 4838 4771 vmcs_write64(EOI_EXIT_BITMAP0, 0); 4839 4772 vmcs_write64(EOI_EXIT_BITMAP1, 0); 4840 4773 vmcs_write64(EOI_EXIT_BITMAP2, 0); ··· 4842 4775 4843 4776 vmcs_write16(GUEST_INTR_STATUS, 0); 4844 4777 4845 - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); 4778 + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 4846 4779 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); 4847 4780 } 4848 4781 ··· 4934 4867 4935 4868 seg_setup(VCPU_SREG_CS); 4936 4869 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 4937 - vmcs_write32(GUEST_CS_BASE, 0xffff0000); 4870 + vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); 4938 4871 4939 4872 seg_setup(VCPU_SREG_DS); 4940 4873 seg_setup(VCPU_SREG_ES); ··· 4970 4903 4971 4904 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); 4972 4905 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 4973 - vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 4906 + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); 4974 4907 4975 4908 setup_msrs(vmx); 4976 4909 ··· 4986 4919 4987 4920 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 4988 4921 4989 - if (vmx_cpu_uses_apicv(vcpu)) 4922 + if (kvm_vcpu_apicv_active(vcpu)) 4990 4923 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); 4991 4924 4992 4925 if (vmx->vpid != 0) ··· 6268 6201 kvm_has_tsc_control = true; 6269 6202 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; 6270 6203 kvm_tsc_scaling_ratio_frac_bits = 48; 6271 - } 6272 - 6273 - if (enable_apicv) 6274 - kvm_x86_ops->update_cr8_intercept = NULL; 6275 - else { 6276 - kvm_x86_ops->hwapic_irr_update = NULL; 6277 - kvm_x86_ops->hwapic_isr_update = NULL; 6278 - kvm_x86_ops->deliver_posted_interrupt = NULL; 6279 - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; 6280 6204 } 6281 6205 6282 6206 vmx_disable_intercept_for_msr(MSR_FS_BASE, false); ··· 7959 7901 u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); 7960 7902 u32 secondary_exec_control = 0; 7961 7903 unsigned long cr4 = vmcs_readl(GUEST_CR4); 7962 - u64 efer = vmcs_readl(GUEST_IA32_EFER); 7904 + u64 efer = vmcs_read64(GUEST_IA32_EFER); 7963 7905 int i, n; 7964 7906 7965 7907 if (cpu_has_secondary_exec_ctrls()) ··· 7975 7917 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && 7976 7918 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) 7977 7919 { 7978 - pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", 7979 - vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); 7980 - pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", 7981 - vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); 7920 + pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", 7921 + vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); 7922 + pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", 7923 + vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); 7982 7924 } 7983 7925 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", 7984 7926 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); ··· 7999 7941 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); 8000 7942 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || 8001 7943 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) 8002 - pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", 8003 - efer, vmcs_readl(GUEST_IA32_PAT)); 8004 - pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", 8005 - vmcs_readl(GUEST_IA32_DEBUGCTL), 7944 + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", 7945 + efer, vmcs_read64(GUEST_IA32_PAT)); 7946 + pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", 7947 + vmcs_read64(GUEST_IA32_DEBUGCTL), 8006 7948 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); 8007 7949 if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) 8008 - pr_err("PerfGlobCtl = 0x%016lx\n", 8009 - vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); 7950 + pr_err("PerfGlobCtl = 0x%016llx\n", 7951 + vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); 8010 7952 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) 8011 - pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); 7953 + pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); 8012 7954 pr_err("Interruptibility = %08x ActivityState = %08x\n", 8013 7955 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), 8014 7956 vmcs_read32(GUEST_ACTIVITY_STATE)); ··· 8037 7979 vmcs_read32(HOST_IA32_SYSENTER_CS), 8038 7980 vmcs_readl(HOST_IA32_SYSENTER_EIP)); 8039 7981 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) 8040 - pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", 8041 - vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); 7982 + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", 7983 + vmcs_read64(HOST_IA32_EFER), 7984 + vmcs_read64(HOST_IA32_PAT)); 8042 7985 if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) 8043 - pr_err("PerfGlobCtl = 0x%016lx\n", 8044 - vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); 7986 + pr_err("PerfGlobCtl = 0x%016llx\n", 7987 + vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); 8045 7988 8046 7989 pr_err("*** Control State ***\n"); 8047 7990 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", ··· 8065 8006 pr_err("IDTVectoring: info=%08x errcode=%08x\n", 8066 8007 vmcs_read32(IDT_VECTORING_INFO_FIELD), 8067 8008 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 8068 - pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); 8009 + pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); 8069 8010 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) 8070 - pr_err("TSC Multiplier = 0x%016lx\n", 8071 - vmcs_readl(TSC_MULTIPLIER)); 8011 + pr_err("TSC Multiplier = 0x%016llx\n", 8012 + vmcs_read64(TSC_MULTIPLIER)); 8072 8013 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) 8073 8014 pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); 8074 8015 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) 8075 8016 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); 8076 8017 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) 8077 - pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); 8018 + pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); 8078 8019 n = vmcs_read32(CR3_TARGET_COUNT); 8079 8020 for (i = 0; i + 1 < n; i += 4) 8080 8021 pr_err("CR3 target%u=%016lx target%u=%016lx\n", ··· 8213 8154 * apicv 8214 8155 */ 8215 8156 if (!cpu_has_vmx_virtualize_x2apic_mode() || 8216 - !vmx_cpu_uses_apicv(vcpu)) 8157 + !kvm_vcpu_apicv_active(vcpu)) 8217 8158 return; 8218 8159 8219 8160 if (!cpu_need_tpr_shadow(vcpu)) ··· 8318 8259 } 8319 8260 } 8320 8261 8321 - static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) 8262 + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 8322 8263 { 8323 - u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; 8324 - if (!vmx_cpu_uses_apicv(vcpu)) 8264 + if (!kvm_vcpu_apicv_active(vcpu)) 8325 8265 return; 8326 8266 8327 8267 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); ··· 8990 8932 best->ebx &= ~bit(X86_FEATURE_INVPCID); 8991 8933 } 8992 8934 8993 - vmcs_set_secondary_exec_control(secondary_exec_ctl); 8935 + if (cpu_has_secondary_exec_ctrls()) 8936 + vmcs_set_secondary_exec_control(secondary_exec_ctl); 8994 8937 8995 8938 if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { 8996 8939 if (guest_cpuid_has_pcommit(vcpu)) ··· 9567 9508 */ 9568 9509 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; 9569 9510 vmx->nested.pi_pending = false; 9570 - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); 9511 + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 9571 9512 vmcs_write64(POSTED_INTR_DESC_ADDR, 9572 9513 page_to_phys(vmx->nested.pi_desc_page) + 9573 9514 (unsigned long)(vmcs12->posted_intr_desc_addr & ··· 10228 10169 * Additionally, restore L2's PDPTR to vmcs12. 10229 10170 */ 10230 10171 if (enable_ept) { 10231 - vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); 10172 + vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); 10232 10173 vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); 10233 10174 vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); 10234 10175 vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); ··· 10864 10805 .update_cr8_intercept = update_cr8_intercept, 10865 10806 .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, 10866 10807 .set_apic_access_page_addr = vmx_set_apic_access_page_addr, 10867 - .cpu_uses_apicv = vmx_cpu_uses_apicv, 10808 + .get_enable_apicv = vmx_get_enable_apicv, 10809 + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, 10868 10810 .load_eoi_exitmap = vmx_load_eoi_exitmap, 10869 10811 .hwapic_irr_update = vmx_hwapic_irr_update, 10870 10812 .hwapic_isr_update = vmx_hwapic_isr_update,
+96 -14
arch/x86/kvm/x86.c
··· 951 951 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 952 952 #endif 953 953 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, 954 - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS 954 + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, 955 955 }; 956 956 957 957 static unsigned num_msrs_to_save; ··· 966 966 HV_X64_MSR_RESET, 967 967 HV_X64_MSR_VP_INDEX, 968 968 HV_X64_MSR_VP_RUNTIME, 969 + HV_X64_MSR_SCONTROL, 970 + HV_X64_MSR_STIMER0_CONFIG, 969 971 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, 970 972 MSR_KVM_PV_EOI_EN, 971 973 ··· 1169 1167 1170 1168 ++version; 1171 1169 1172 - kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 1170 + if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) 1171 + return; 1173 1172 1174 1173 /* 1175 1174 * The guest calculates current wall clock time by adding ··· 1684 1681 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode, 1685 1682 vcpus_matched); 1686 1683 #endif 1684 + } 1685 + 1686 + void kvm_make_mclock_inprogress_request(struct kvm *kvm) 1687 + { 1688 + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); 1687 1689 } 1688 1690 1689 1691 static void kvm_gen_update_masterclock(struct kvm *kvm) ··· 2206 2198 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 2207 2199 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 2208 2200 case HV_X64_MSR_CRASH_CTL: 2201 + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: 2209 2202 return kvm_hv_set_msr_common(vcpu, msr, data, 2210 2203 msr_info->host_initiated); 2211 2204 case MSR_IA32_BBL_CR_CTL3: ··· 2411 2402 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 2412 2403 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: 2413 2404 case HV_X64_MSR_CRASH_CTL: 2405 + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: 2414 2406 return kvm_hv_get_msr_common(vcpu, 2415 2407 msr_info->index, &msr_info->data); 2416 2408 break; ··· 2551 2541 case KVM_CAP_HYPERV: 2552 2542 case KVM_CAP_HYPERV_VAPIC: 2553 2543 case KVM_CAP_HYPERV_SPIN: 2544 + case KVM_CAP_HYPERV_SYNIC: 2554 2545 case KVM_CAP_PCI_SEGMENT: 2555 2546 case KVM_CAP_DEBUGREGS: 2556 2547 case KVM_CAP_X86_ROBUST_SINGLESTEP: ··· 2704 2693 return kvm_arch_has_noncoherent_dma(vcpu->kvm); 2705 2694 } 2706 2695 2696 + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) 2697 + { 2698 + set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); 2699 + } 2700 + 2707 2701 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 2708 2702 { 2709 2703 /* Address WBINVD may be executed by guest */ ··· 2764 2748 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2765 2749 struct kvm_lapic_state *s) 2766 2750 { 2767 - kvm_x86_ops->sync_pir_to_irr(vcpu); 2751 + if (vcpu->arch.apicv_active) 2752 + kvm_x86_ops->sync_pir_to_irr(vcpu); 2753 + 2768 2754 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 2769 2755 2770 2756 return 0; ··· 3209 3191 return 0; 3210 3192 } 3211 3193 3194 + static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 3195 + struct kvm_enable_cap *cap) 3196 + { 3197 + if (cap->flags) 3198 + return -EINVAL; 3199 + 3200 + switch (cap->cap) { 3201 + case KVM_CAP_HYPERV_SYNIC: 3202 + return kvm_hv_activate_synic(vcpu); 3203 + default: 3204 + return -EINVAL; 3205 + } 3206 + } 3207 + 3212 3208 long kvm_arch_vcpu_ioctl(struct file *filp, 3213 3209 unsigned int ioctl, unsigned long arg) 3214 3210 { ··· 3486 3454 case KVM_KVMCLOCK_CTRL: { 3487 3455 r = kvm_set_guest_paused(vcpu); 3488 3456 goto out; 3457 + } 3458 + case KVM_ENABLE_CAP: { 3459 + struct kvm_enable_cap cap; 3460 + 3461 + r = -EFAULT; 3462 + if (copy_from_user(&cap, argp, sizeof(cap))) 3463 + goto out; 3464 + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 3465 + break; 3489 3466 } 3490 3467 default: 3491 3468 r = -EINVAL; ··· 4047 4006 4048 4007 /* 4049 4008 * Even MSRs that are valid in the host may not be exposed 4050 - * to the guests in some cases. We could work around this 4051 - * in VMX with the generic MSR save/load machinery, but it 4052 - * is not really worthwhile since it will really only 4053 - * happen with nested virtualization. 4009 + * to the guests in some cases. 4054 4010 */ 4055 4011 switch (msrs_to_save[i]) { 4056 4012 case MSR_IA32_BNDCFGS: 4057 4013 if (!kvm_x86_ops->mpx_supported()) 4014 + continue; 4015 + break; 4016 + case MSR_TSC_AUX: 4017 + if (!kvm_x86_ops->rdtscp_supported()) 4058 4018 continue; 4059 4019 break; 4060 4020 default: ··· 5914 5872 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); 5915 5873 } 5916 5874 5875 + void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) 5876 + { 5877 + vcpu->arch.apicv_active = false; 5878 + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); 5879 + } 5880 + 5917 5881 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 5918 5882 { 5919 5883 unsigned long nr, a0, a1, a2, a3, ret; ··· 6011 5963 return; 6012 5964 6013 5965 if (!vcpu->arch.apic) 5966 + return; 5967 + 5968 + if (vcpu->arch.apicv_active) 6014 5969 return; 6015 5970 6016 5971 if (!vcpu->arch.apic->vapic_addr) ··· 6352 6301 kvm_mmu_reset_context(vcpu); 6353 6302 } 6354 6303 6304 + void kvm_make_scan_ioapic_request(struct kvm *kvm) 6305 + { 6306 + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); 6307 + } 6308 + 6355 6309 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) 6356 6310 { 6311 + u64 eoi_exit_bitmap[4]; 6312 + 6357 6313 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 6358 6314 return; 6359 6315 6360 - memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); 6316 + bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); 6361 6317 6362 6318 if (irqchip_split(vcpu->kvm)) 6363 - kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); 6319 + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); 6364 6320 else { 6365 - kvm_x86_ops->sync_pir_to_irr(vcpu); 6366 - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); 6321 + if (vcpu->arch.apicv_active) 6322 + kvm_x86_ops->sync_pir_to_irr(vcpu); 6323 + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); 6367 6324 } 6368 - kvm_x86_ops->load_eoi_exitmap(vcpu); 6325 + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, 6326 + vcpu_to_synic(vcpu)->vec_bitmap, 256); 6327 + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); 6369 6328 } 6370 6329 6371 6330 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) ··· 6483 6422 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { 6484 6423 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); 6485 6424 if (test_bit(vcpu->arch.pending_ioapic_eoi, 6486 - (void *) vcpu->arch.eoi_exit_bitmap)) { 6425 + vcpu->arch.ioapic_handled_vectors)) { 6487 6426 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; 6488 6427 vcpu->run->eoi.vector = 6489 6428 vcpu->arch.pending_ioapic_eoi; ··· 6507 6446 r = 0; 6508 6447 goto out; 6509 6448 } 6449 + if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { 6450 + vcpu->run->exit_reason = KVM_EXIT_HYPERV; 6451 + vcpu->run->hyperv = vcpu->arch.hyperv.exit; 6452 + r = 0; 6453 + goto out; 6454 + } 6455 + 6456 + /* 6457 + * KVM_REQ_HV_STIMER has to be processed after 6458 + * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers 6459 + * depend on the guest clock being up-to-date 6460 + */ 6461 + if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) 6462 + kvm_hv_process_stimers(vcpu); 6510 6463 } 6511 6464 6512 6465 /* ··· 6532 6457 * Update architecture specific hints for APIC 6533 6458 * virtual interrupt delivery. 6534 6459 */ 6535 - if (kvm_x86_ops->hwapic_irr_update) 6460 + if (vcpu->arch.apicv_active) 6536 6461 kvm_x86_ops->hwapic_irr_update(vcpu, 6537 6462 kvm_lapic_find_highest_irr(vcpu)); 6538 6463 } ··· 7603 7528 BUG_ON(vcpu->kvm == NULL); 7604 7529 kvm = vcpu->kvm; 7605 7530 7531 + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); 7606 7532 vcpu->arch.pv.pv_unhalted = false; 7607 7533 vcpu->arch.emulate_ctxt.ops = &emulate_ops; 7608 7534 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) ··· 7661 7585 7662 7586 vcpu->arch.pending_external_vector = -1; 7663 7587 7588 + kvm_hv_vcpu_init(vcpu); 7589 + 7664 7590 return 0; 7665 7591 7666 7592 fail_free_mce_banks: ··· 7681 7603 { 7682 7604 int idx; 7683 7605 7606 + kvm_hv_vcpu_uninit(vcpu); 7684 7607 kvm_pmu_destroy(vcpu); 7685 7608 kfree(vcpu->arch.mce_banks); 7686 7609 kvm_free_lapic(vcpu); ··· 8074 7995 8075 7996 if (kvm_arch_interrupt_allowed(vcpu) && 8076 7997 kvm_cpu_has_interrupt(vcpu)) 7998 + return true; 7999 + 8000 + if (kvm_hv_has_stimer_pending(vcpu)) 8077 8001 return true; 8078 8002 8079 8003 return false;
+1 -87
drivers/hv/hyperv_vmbus.h
··· 63 63 /* Define version of the synthetic interrupt controller. */ 64 64 #define HV_SYNIC_VERSION (1) 65 65 66 - /* Define synthetic interrupt controller message constants. */ 67 - #define HV_MESSAGE_SIZE (256) 68 - #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) 69 - #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) 70 66 #define HV_ANY_VP (0xFFFFFFFF) 71 67 72 68 /* Define synthetic interrupt controller flag constants. */ ··· 70 74 #define HV_EVENT_FLAGS_BYTE_COUNT (256) 71 75 #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) 72 76 73 - /* Define hypervisor message types. */ 74 - enum hv_message_type { 75 - HVMSG_NONE = 0x00000000, 76 - 77 - /* Memory access messages. */ 78 - HVMSG_UNMAPPED_GPA = 0x80000000, 79 - HVMSG_GPA_INTERCEPT = 0x80000001, 80 - 81 - /* Timer notification messages. */ 82 - HVMSG_TIMER_EXPIRED = 0x80000010, 83 - 84 - /* Error messages. */ 85 - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, 86 - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, 87 - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, 88 - 89 - /* Trace buffer complete messages. */ 90 - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, 91 - 92 - /* Platform-specific processor intercept messages. */ 93 - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, 94 - HVMSG_X64_MSR_INTERCEPT = 0x80010001, 95 - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, 96 - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, 97 - HVMSG_X64_APIC_EOI = 0x80010004, 98 - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 99 - }; 100 - 101 - #define HV_SYNIC_STIMER_COUNT (4) 102 - 103 77 /* Define invalid partition identifier. */ 104 78 #define HV_PARTITION_ID_INVALID ((u64)0x0) 105 - 106 - /* Define port identifier type. */ 107 - union hv_port_id { 108 - u32 asu32; 109 - struct { 110 - u32 id:24; 111 - u32 reserved:8; 112 - } u ; 113 - }; 114 79 115 80 /* Define port type. */ 116 81 enum hv_port_type { ··· 120 163 }; 121 164 }; 122 165 123 - /* Define synthetic interrupt controller message flags. */ 124 - union hv_message_flags { 125 - u8 asu8; 126 - struct { 127 - u8 msg_pending:1; 128 - u8 reserved:7; 129 - }; 130 - }; 131 - 132 - /* Define synthetic interrupt controller message header. */ 133 - struct hv_message_header { 134 - enum hv_message_type message_type; 135 - u8 payload_size; 136 - union hv_message_flags message_flags; 137 - u8 reserved[2]; 138 - union { 139 - u64 sender; 140 - union hv_port_id port; 141 - }; 142 - }; 143 - 144 166 /* 145 167 * Timer configuration register. 146 168 */ ··· 136 200 }; 137 201 }; 138 202 139 - 140 - /* Define timer message payload structure. */ 141 - struct hv_timer_message_payload { 142 - u32 timer_index; 143 - u32 reserved; 144 - u64 expiration_time; /* When the timer expired */ 145 - u64 delivery_time; /* When the message was delivered */ 146 - }; 147 - 148 - /* Define synthetic interrupt controller message format. */ 149 - struct hv_message { 150 - struct hv_message_header header; 151 - union { 152 - u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; 153 - } u ; 154 - }; 155 - 156 203 /* Define the number of message buffers associated with each port. */ 157 204 #define HV_PORT_MESSAGE_BUFFER_COUNT (16) 158 - 159 - /* Define the synthetic interrupt message page layout. */ 160 - struct hv_message_page { 161 - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; 162 - }; 163 205 164 206 /* Define the synthetic interrupt controller event flags format. */ 165 207 union hv_synic_event_flags { ··· 261 347 struct hv_input_post_message { 262 348 union hv_connection_id connectionid; 263 349 u32 reserved; 264 - enum hv_message_type message_type; 350 + u32 message_type; 265 351 u32 payload_size; 266 352 u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; 267 353 };
+14 -2
drivers/s390/char/sclp_early.c
··· 40 40 u8 fac85; /* 85 */ 41 41 u8 _pad_86[91 - 86]; /* 86-90 */ 42 42 u8 flags; /* 91 */ 43 - u8 _pad_92[100 - 92]; /* 92-99 */ 43 + u8 _pad_92[99 - 92]; /* 92-98 */ 44 + u8 hamaxpow; /* 99 */ 44 45 u32 rnsize2; /* 100-103 */ 45 46 u64 rnmax2; /* 104-111 */ 46 - u8 _pad_112[120 - 112]; /* 112-119 */ 47 + u8 _pad_112[116 - 112]; /* 112-115 */ 48 + u8 fac116; /* 116 */ 49 + u8 _pad_117[119 - 117]; /* 117-118 */ 50 + u8 fac119; /* 119 */ 47 51 u16 hcpua; /* 120-121 */ 48 52 u8 _pad_122[4096 - 122]; /* 122-4095 */ 49 53 } __packed __aligned(PAGE_SIZE); ··· 112 108 sclp.facilities = sccb->facilities; 113 109 sclp.has_sprp = !!(sccb->fac84 & 0x02); 114 110 sclp.has_core_type = !!(sccb->fac84 & 0x01); 111 + sclp.has_esca = !!(sccb->fac116 & 0x08); 112 + sclp.has_hvs = !!(sccb->fac119 & 0x80); 115 113 if (sccb->fac85 & 0x02) 116 114 S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; 117 115 sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; 118 116 sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; 119 117 sclp.rzm <<= 20; 120 118 sclp.ibc = sccb->ibc; 119 + 120 + if (sccb->hamaxpow && sccb->hamaxpow < 64) 121 + sclp.hamax = (1UL << sccb->hamaxpow) - 1; 122 + else 123 + sclp.hamax = U64_MAX; 121 124 122 125 if (!sccb->hcpua) { 123 126 if (MACHINE_IS_VM) ··· 142 131 continue; 143 132 sclp.has_siif = cpue->siif; 144 133 sclp.has_sigpif = cpue->sigpif; 134 + sclp.has_sief2 = cpue->sief2; 145 135 break; 146 136 } 147 137
+6
include/clocksource/arm_arch_timer.h
··· 23 23 #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) 24 24 #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) 25 25 26 + #define CNTHCTL_EL1PCTEN (1 << 0) 27 + #define CNTHCTL_EL1PCEN (1 << 1) 28 + #define CNTHCTL_EVNTEN (1 << 2) 29 + #define CNTHCTL_EVNTDIR (1 << 3) 30 + #define CNTHCTL_EVNTI (0xF << 4) 31 + 26 32 enum arch_timer_reg { 27 33 ARCH_TIMER_REG_CTRL, 28 34 ARCH_TIMER_REG_TVAL,
+6
include/kvm/arm_vgic.h
··· 279 279 u32 vgic_lr[VGIC_V2_MAX_LRS]; 280 280 }; 281 281 282 + /* 283 + * LRs are stored in reverse order in memory. make sure we index them 284 + * correctly. 285 + */ 286 + #define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) 287 + 282 288 struct vgic_v3_cpu_if { 283 289 #ifdef CONFIG_KVM_ARM_VGIC_V3 284 290 u32 vgic_hcr;
+24 -42
include/linux/kvm_host.h
··· 111 111 } 112 112 113 113 /* 114 - * vcpu->requests bit members 114 + * Architecture-independent vcpu->requests bit members 115 + * Bits 4-7 are reserved for more arch-independent bits. 115 116 */ 116 117 #define KVM_REQ_TLB_FLUSH 0 117 - #define KVM_REQ_MIGRATE_TIMER 1 118 - #define KVM_REQ_REPORT_TPR_ACCESS 2 119 - #define KVM_REQ_MMU_RELOAD 3 120 - #define KVM_REQ_TRIPLE_FAULT 4 121 - #define KVM_REQ_PENDING_TIMER 5 122 - #define KVM_REQ_UNHALT 6 123 - #define KVM_REQ_MMU_SYNC 7 124 - #define KVM_REQ_CLOCK_UPDATE 8 125 - #define KVM_REQ_KICK 9 126 - #define KVM_REQ_DEACTIVATE_FPU 10 127 - #define KVM_REQ_EVENT 11 128 - #define KVM_REQ_APF_HALT 12 129 - #define KVM_REQ_STEAL_UPDATE 13 130 - #define KVM_REQ_NMI 14 131 - #define KVM_REQ_PMU 15 132 - #define KVM_REQ_PMI 16 133 - #define KVM_REQ_WATCHDOG 17 134 - #define KVM_REQ_MASTERCLOCK_UPDATE 18 135 - #define KVM_REQ_MCLOCK_INPROGRESS 19 136 - #define KVM_REQ_EPR_EXIT 20 137 - #define KVM_REQ_SCAN_IOAPIC 21 138 - #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 139 - #define KVM_REQ_ENABLE_IBS 23 140 - #define KVM_REQ_DISABLE_IBS 24 141 - #define KVM_REQ_APIC_PAGE_RELOAD 25 142 - #define KVM_REQ_SMI 26 143 - #define KVM_REQ_HV_CRASH 27 144 - #define KVM_REQ_IOAPIC_EOI_EXIT 28 145 - #define KVM_REQ_HV_RESET 29 118 + #define KVM_REQ_MMU_RELOAD 1 119 + #define KVM_REQ_PENDING_TIMER 2 120 + #define KVM_REQ_UNHALT 3 146 121 147 122 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 148 123 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 ··· 293 318 u32 adapter_id; 294 319 }; 295 320 321 + struct kvm_hv_sint { 322 + u32 vcpu; 323 + u32 sint; 324 + }; 325 + 296 326 struct kvm_kernel_irq_routing_entry { 297 327 u32 gsi; 298 328 u32 type; ··· 311 331 } irqchip; 312 332 struct msi_msg msi; 313 333 struct kvm_s390_adapter_int adapter; 334 + struct kvm_hv_sint hv_sint; 314 335 }; 315 336 struct hlist_node link; 316 337 }; ··· 420 439 421 440 /* The guest did something we don't support. */ 422 441 #define vcpu_unimpl(vcpu, fmt, ...) \ 423 - kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) 442 + kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \ 443 + (vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__) 424 444 425 445 #define vcpu_debug(vcpu, fmt, ...) \ 426 446 kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) 447 + #define vcpu_err(vcpu, fmt, ...) \ 448 + kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) 427 449 428 450 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) 429 451 { ··· 449 465 struct kvm_vcpu *vcpu; 450 466 int i; 451 467 468 + if (id < 0 || id >= KVM_MAX_VCPUS) 469 + return NULL; 470 + vcpu = kvm_get_vcpu(kvm, id); 471 + if (vcpu && vcpu->vcpu_id == id) 472 + return vcpu; 452 473 kvm_for_each_vcpu(i, vcpu, kvm) 453 474 if (vcpu->vcpu_id == id) 454 475 return vcpu; ··· 473 484 474 485 #ifdef __KVM_HAVE_IOAPIC 475 486 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); 476 - void kvm_arch_irq_routing_update(struct kvm *kvm); 487 + void kvm_arch_post_irq_routing_update(struct kvm *kvm); 477 488 #else 478 489 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) 479 490 { 480 491 } 481 - static inline void kvm_arch_irq_routing_update(struct kvm *kvm) 492 + static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) 482 493 { 483 494 } 484 495 #endif ··· 623 634 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); 624 635 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); 625 636 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 626 - int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 637 + bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 627 638 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); 628 639 void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 629 640 ··· 657 668 658 669 void kvm_flush_remote_tlbs(struct kvm *kvm); 659 670 void kvm_reload_remote_mmus(struct kvm *kvm); 660 - void kvm_make_mclock_inprogress_request(struct kvm *kvm); 661 - void kvm_make_scan_ioapic_request(struct kvm *kvm); 662 671 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); 663 672 664 673 long kvm_arch_dev_ioctl(struct file *filp, ··· 977 990 return kvm_is_error_hva(hva); 978 991 } 979 992 980 - static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) 981 - { 982 - set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); 983 - } 984 - 985 993 enum kvm_stat_kind { 986 994 KVM_STAT_VM, 987 995 KVM_STAT_VCPU, ··· 986 1004 const char *name; 987 1005 int offset; 988 1006 enum kvm_stat_kind kind; 989 - struct dentry *dentry; 990 1007 }; 991 1008 extern struct kvm_stats_debugfs_item debugfs_entries[]; 992 1009 extern struct dentry *kvm_debugfs_dir; ··· 1072 1091 { 1073 1092 } 1074 1093 #endif 1094 + void kvm_arch_irq_routing_update(struct kvm *kvm); 1075 1095 1076 1096 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 1077 1097 {
+2 -4
include/linux/kvm_para.h
··· 4 4 #include <uapi/linux/kvm_para.h> 5 5 6 6 7 - static inline int kvm_para_has_feature(unsigned int feature) 7 + static inline bool kvm_para_has_feature(unsigned int feature) 8 8 { 9 - if (kvm_arch_para_features() & (1UL << feature)) 10 - return 1; 11 - return 0; 9 + return !!(kvm_arch_para_features() & (1UL << feature)); 12 10 } 13 11 #endif /* __LINUX_KVM_PARA_H */
+26
include/uapi/linux/kvm.h
··· 154 154 __u32 flags; 155 155 __u32 reserved[9]; 156 156 }; 157 + 158 + struct kvm_hyperv_exit { 159 + #define KVM_EXIT_HYPERV_SYNIC 1 160 + __u32 type; 161 + union { 162 + struct { 163 + __u32 msr; 164 + __u64 control; 165 + __u64 evt_page; 166 + __u64 msg_page; 167 + } synic; 168 + } u; 169 + }; 170 + 157 171 #define KVM_S390_GET_SKEYS_NONE 1 158 172 #define KVM_S390_SKEYS_MAX 1048576 159 173 ··· 198 184 #define KVM_EXIT_SYSTEM_EVENT 24 199 185 #define KVM_EXIT_S390_STSI 25 200 186 #define KVM_EXIT_IOAPIC_EOI 26 187 + #define KVM_EXIT_HYPERV 27 201 188 202 189 /* For KVM_EXIT_INTERNAL_ERROR */ 203 190 /* Emulate instruction failed. */ ··· 353 338 struct { 354 339 __u8 vector; 355 340 } eoi; 341 + /* KVM_EXIT_HYPERV */ 342 + struct kvm_hyperv_exit hyperv; 356 343 /* Fix the size of the union. */ 357 344 char padding[256]; 358 345 }; ··· 848 831 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 849 832 #define KVM_CAP_SPLIT_IRQCHIP 121 850 833 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 834 + #define KVM_CAP_HYPERV_SYNIC 123 835 + #define KVM_CAP_S390_RI 124 851 836 852 837 #ifdef KVM_CAP_IRQ_ROUTING 853 838 ··· 873 854 __u32 adapter_id; 874 855 }; 875 856 857 + struct kvm_irq_routing_hv_sint { 858 + __u32 vcpu; 859 + __u32 sint; 860 + }; 861 + 876 862 /* gsi routing entry types */ 877 863 #define KVM_IRQ_ROUTING_IRQCHIP 1 878 864 #define KVM_IRQ_ROUTING_MSI 2 879 865 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 866 + #define KVM_IRQ_ROUTING_HV_SINT 4 880 867 881 868 struct kvm_irq_routing_entry { 882 869 __u32 gsi; ··· 893 868 struct kvm_irq_routing_irqchip irqchip; 894 869 struct kvm_irq_routing_msi msi; 895 870 struct kvm_irq_routing_s390_adapter adapter; 871 + struct kvm_irq_routing_hv_sint hv_sint; 896 872 __u32 pad[8]; 897 873 } u; 898 874 };
+3 -8
virt/kvm/arm/vgic-v3.c
··· 28 28 29 29 #include <asm/kvm_emulate.h> 30 30 #include <asm/kvm_arm.h> 31 + #include <asm/kvm_asm.h> 31 32 #include <asm/kvm_mmu.h> 32 33 33 34 /* These are for GICv2 emulation only */ ··· 37 36 #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) 38 37 #define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) 39 38 40 - /* 41 - * LRs are stored in reverse order in memory. make sure we index them 42 - * correctly. 43 - */ 44 - #define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) 45 - 46 39 static u32 ich_vtr_el2; 47 40 48 41 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) 49 42 { 50 43 struct vgic_lr lr_desc; 51 - u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; 44 + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)]; 52 45 53 46 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 54 47 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; ··· 106 111 lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; 107 112 } 108 113 109 - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; 114 + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val; 110 115 111 116 if (!(lr_desc.state & LR_STATE_MASK)) 112 117 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+1 -1
virt/kvm/arm/vgic.c
··· 878 878 true); 879 879 } 880 880 881 - struct kvm_io_device_ops vgic_io_ops = { 881 + static struct kvm_io_device_ops vgic_io_ops = { 882 882 .read = vgic_handle_mmio_read, 883 883 .write = vgic_handle_mmio_write, 884 884 };
+1 -2
virt/kvm/async_pf.c
··· 57 57 58 58 void kvm_async_pf_deinit(void) 59 59 { 60 - if (async_pf_cache) 61 - kmem_cache_destroy(async_pf_cache); 60 + kmem_cache_destroy(async_pf_cache); 62 61 async_pf_cache = NULL; 63 62 } 64 63
+6 -1
virt/kvm/irqchip.c
··· 166 166 return r; 167 167 } 168 168 169 + void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) 170 + { 171 + } 172 + 169 173 int kvm_set_irq_routing(struct kvm *kvm, 170 174 const struct kvm_irq_routing_entry *ue, 171 175 unsigned nr, ··· 223 219 old = kvm->irq_routing; 224 220 rcu_assign_pointer(kvm->irq_routing, new); 225 221 kvm_irq_routing_update(kvm); 222 + kvm_arch_irq_routing_update(kvm); 226 223 mutex_unlock(&kvm->irq_lock); 227 224 228 - kvm_arch_irq_routing_update(kvm); 225 + kvm_arch_post_irq_routing_update(kvm); 229 226 230 227 synchronize_srcu_expedited(&kvm->irq_srcu); 231 228
+12 -34
virt/kvm/kvm_main.c
··· 206 206 kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 207 207 } 208 208 209 - void kvm_make_mclock_inprogress_request(struct kvm *kvm) 210 - { 211 - kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); 212 - } 213 - 214 - void kvm_make_scan_ioapic_request(struct kvm *kvm) 215 - { 216 - kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); 217 - } 218 - 219 209 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 220 210 { 221 211 struct page *page; ··· 1154 1164 return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); 1155 1165 } 1156 1166 1157 - int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 1167 + bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 1158 1168 { 1159 1169 struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); 1160 1170 1161 1171 if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || 1162 1172 memslot->flags & KVM_MEMSLOT_INVALID) 1163 - return 0; 1173 + return false; 1164 1174 1165 - return 1; 1175 + return true; 1166 1176 } 1167 1177 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 1168 1178 ··· 2247 2257 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 2248 2258 { 2249 2259 int r; 2250 - struct kvm_vcpu *vcpu, *v; 2260 + struct kvm_vcpu *vcpu; 2251 2261 2252 2262 if (id >= KVM_MAX_VCPUS) 2253 2263 return -EINVAL; ··· 2271 2281 r = -EINVAL; 2272 2282 goto unlock_vcpu_destroy; 2273 2283 } 2274 - 2275 - kvm_for_each_vcpu(r, v, kvm) 2276 - if (v->vcpu_id == id) { 2277 - r = -EEXIST; 2278 - goto unlock_vcpu_destroy; 2279 - } 2284 + if (kvm_get_vcpu_by_id(kvm, id)) { 2285 + r = -EEXIST; 2286 + goto unlock_vcpu_destroy; 2287 + } 2280 2288 2281 2289 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 2282 2290 ··· 3437 3449 goto out; 3438 3450 3439 3451 for (p = debugfs_entries; p->name; ++p) { 3440 - p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 3441 - (void *)(long)p->offset, 3442 - stat_fops[p->kind]); 3443 - if (p->dentry == NULL) 3452 + if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 3453 + (void *)(long)p->offset, 3454 + stat_fops[p->kind])) 3444 3455 goto out_dir; 3445 3456 } 3446 3457 ··· 3449 3462 debugfs_remove_recursive(kvm_debugfs_dir); 3450 3463 out: 3451 3464 return r; 3452 - } 3453 - 3454 - static void kvm_exit_debug(void) 3455 - { 3456 - struct kvm_stats_debugfs_item *p; 3457 - 3458 - for (p = debugfs_entries; p->name; ++p) 3459 - debugfs_remove(p->dentry); 3460 - debugfs_remove(kvm_debugfs_dir); 3461 3465 } 3462 3466 3463 3467 static int kvm_suspend(void) ··· 3608 3630 3609 3631 void kvm_exit(void) 3610 3632 { 3611 - kvm_exit_debug(); 3633 + debugfs_remove_recursive(kvm_debugfs_dir); 3612 3634 misc_deregister(&kvm_dev); 3613 3635 kmem_cache_destroy(kvm_vcpu_cache); 3614 3636 kvm_async_pf_deinit();