Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
"ARM:
- Full debug support for arm64
- Active state switching for timer interrupts
- Lazy FP/SIMD save/restore for arm64
- Generic ARMv8 target

PPC:
- Book3S: A few bug fixes
- Book3S: Allow micro-threading on POWER8

x86:
- Compiler warnings

Generic:
- Adaptive polling for guest halt"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits)
kvm: irqchip: fix memory leak
kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF
KVM: trace kvm_halt_poll_ns grow/shrink
KVM: dynamic halt-polling
KVM: make halt_poll_ns per-vCPU
Silence compiler warning in arch/x86/kvm/emulate.c
kvm: compile process_smi_save_seg_64() only for x86_64
KVM: x86: avoid uninitialized variable warning
KVM: PPC: Book3S: Fix typo in top comment about locking
KVM: PPC: Book3S: Fix size of the PSPB register
KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set
KVM: PPC: Book3S HV: Fix race in starting secondary threads
KVM: PPC: Book3S: correct width in XER handling
KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation
KVM: PPC: Book3S HV: Fix preempted vcore list locking
KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD
KVM: PPC: Book3S HV: Fix bug in dirty page tracking
KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE
KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8
KVM: PPC: Book3S HV: Make use of unused threads when running guests
...

+2650 -697
+11 -4
Documentation/virtual/kvm/api.txt
··· 2671 2671 4.87 KVM_SET_GUEST_DEBUG 2672 2672 2673 2673 Capability: KVM_CAP_SET_GUEST_DEBUG 2674 - Architectures: x86, s390, ppc 2674 + Architectures: x86, s390, ppc, arm64 2675 2675 Type: vcpu ioctl 2676 2676 Parameters: struct kvm_guest_debug (in) 2677 2677 Returns: 0 on success; -1 on error ··· 2693 2693 The top 16 bits of the control field are architecture specific control 2694 2694 flags which can include the following: 2695 2695 2696 - - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86] 2697 - - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390] 2696 + - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64] 2697 + - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64] 2698 2698 - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86] 2699 2699 - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86] 2700 2700 - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390] ··· 2708 2708 2709 2709 The second part of the structure is architecture specific and 2710 2710 typically contains a set of debug registers. 2711 + 2712 + For arm64 the number of debug registers is implementation defined and 2713 + can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and 2714 + KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number 2715 + indicating the number of supported registers. 2711 2716 2712 2717 When debug events exit the main run loop with the reason 2713 2718 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run ··· 3116 3111 where kvm expects application code to place the data for the next 3117 3112 KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. 3118 3113 3114 + /* KVM_EXIT_DEBUG */ 3119 3115 struct { 3120 3116 struct kvm_debug_exit_arch arch; 3121 3117 } debug; 3122 3118 3123 - Unused. 3119 + If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event 3120 + for which architecture specific information is returned. 3124 3121 3125 3122 /* KVM_EXIT_MMIO */ 3126 3123 struct {
+5
arch/arm/include/asm/kvm_host.h
··· 231 231 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 232 232 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 233 233 234 + static inline void kvm_arm_init_debug(void) {} 235 + static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} 236 + static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} 237 + static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} 238 + 234 239 #endif /* __ARM_KVM_HOST_H__ */
+25 -11
arch/arm/kvm/arm.c
··· 125 125 if (ret) 126 126 goto out_free_stage2_pgd; 127 127 128 + kvm_vgic_early_init(kvm); 128 129 kvm_timer_init(kvm); 129 130 130 131 /* Mark the initial VMID generation invalid */ ··· 250 249 251 250 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 252 251 { 252 + kvm_vgic_vcpu_early_init(vcpu); 253 253 } 254 254 255 255 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) ··· 280 278 /* Set up the timer */ 281 279 kvm_timer_vcpu_init(vcpu); 282 280 281 + kvm_arm_reset_debug_ptr(vcpu); 282 + 283 283 return 0; 284 284 } 285 285 ··· 304 300 305 301 kvm_arm_set_running_vcpu(NULL); 306 302 } 307 - 308 - int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 309 - struct kvm_guest_debug *dbg) 310 - { 311 - return -EINVAL; 312 - } 313 - 314 303 315 304 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 316 305 struct kvm_mp_state *mp_state) ··· 525 528 if (vcpu->arch.pause) 526 529 vcpu_pause(vcpu); 527 530 528 - kvm_vgic_flush_hwstate(vcpu); 531 + /* 532 + * Disarming the background timer must be done in a 533 + * preemptible context, as this call may sleep. 534 + */ 529 535 kvm_timer_flush_hwstate(vcpu); 530 536 537 + /* 538 + * Preparing the interrupts to be injected also 539 + * involves poking the GIC, which must be done in a 540 + * non-preemptible context. 541 + */ 531 542 preempt_disable(); 543 + kvm_vgic_flush_hwstate(vcpu); 544 + 532 545 local_irq_disable(); 533 546 534 547 /* ··· 551 544 552 545 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { 553 546 local_irq_enable(); 547 + kvm_vgic_sync_hwstate(vcpu); 554 548 preempt_enable(); 555 549 kvm_timer_sync_hwstate(vcpu); 556 - kvm_vgic_sync_hwstate(vcpu); 557 550 continue; 558 551 } 552 + 553 + kvm_arm_setup_debug(vcpu); 559 554 560 555 /************************************************************** 561 556 * Enter the guest ··· 572 563 /* 573 564 * Back from guest 574 565 *************************************************************/ 566 + 567 + kvm_arm_clear_debug(vcpu); 575 568 576 569 /* 577 570 * We may have taken a host interrupt in HYP mode (ie ··· 597 586 */ 598 587 kvm_guest_exit(); 599 588 trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 589 + 590 + kvm_vgic_sync_hwstate(vcpu); 591 + 600 592 preempt_enable(); 601 593 602 - 603 594 kvm_timer_sync_hwstate(vcpu); 604 - kvm_vgic_sync_hwstate(vcpu); 605 595 606 596 ret = handle_exit(vcpu, run, ret); 607 597 } ··· 933 921 vector_ptr = (unsigned long)__kvm_hyp_vector; 934 922 935 923 __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); 924 + 925 + kvm_arm_init_debug(); 936 926 } 937 927 938 928 static int hyp_init_cpu_notify(struct notifier_block *self,
+6
arch/arm/kvm/guest.c
··· 290 290 { 291 291 return -EINVAL; 292 292 } 293 + 294 + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 295 + struct kvm_guest_debug *dbg) 296 + { 297 + return -EINVAL; 298 + }
+8 -6
arch/arm/kvm/interrupts.S
··· 361 361 @ Check syndrome register 362 362 mrc p15, 4, r1, c5, c2, 0 @ HSR 363 363 lsr r0, r1, #HSR_EC_SHIFT 364 - #ifdef CONFIG_VFPv3 365 - cmp r0, #HSR_EC_CP_0_13 366 - beq switch_to_guest_vfp 367 - #endif 368 364 cmp r0, #HSR_EC_HVC 369 365 bne guest_trap @ Not HVC instr. 370 366 ··· 374 378 cmp r2, #0 375 379 bne guest_trap @ Guest called HVC 376 380 377 - host_switch_to_hyp: 381 + /* 382 + * Getting here means host called HVC, we shift parameters and branch 383 + * to Hyp function. 384 + */ 378 385 pop {r0, r1, r2} 379 386 380 387 /* Check for __hyp_get_vectors */ ··· 408 409 409 410 @ Check if we need the fault information 410 411 lsr r1, r1, #HSR_EC_SHIFT 412 + #ifdef CONFIG_VFPv3 413 + cmp r1, #HSR_EC_CP_0_13 414 + beq switch_to_guest_vfp 415 + #endif 411 416 cmp r1, #HSR_EC_IABT 412 417 mrceq p15, 4, r2, c6, c0, 2 @ HIFAR 413 418 beq 2f ··· 480 477 */ 481 478 #ifdef CONFIG_VFPv3 482 479 switch_to_guest_vfp: 483 - load_vcpu @ Load VCPU pointer to r0 484 480 push {r3-r7} 485 481 486 482 @ NEON/VFP used. Turn on VFP access.
+1 -3
arch/arm/kvm/reset.c
··· 77 77 kvm_reset_coprocs(vcpu); 78 78 79 79 /* Reset arch_timer context */ 80 - kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 81 - 82 - return 0; 80 + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 83 81 }
+14
arch/arm64/include/asm/hw_breakpoint.h
··· 16 16 #ifndef __ASM_HW_BREAKPOINT_H 17 17 #define __ASM_HW_BREAKPOINT_H 18 18 19 + #include <asm/cputype.h> 20 + 19 21 #ifdef __KERNEL__ 20 22 21 23 struct arch_hw_breakpoint_ctrl { ··· 133 131 #endif 134 132 135 133 extern struct pmu perf_ops_bp; 134 + 135 + /* Determine number of BRP registers available. */ 136 + static inline int get_num_brps(void) 137 + { 138 + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; 139 + } 140 + 141 + /* Determine number of WRP registers available. */ 142 + static inline int get_num_wrps(void) 143 + { 144 + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; 145 + } 136 146 137 147 #endif /* __KERNEL__ */ 138 148 #endif /* __ASM_BREAKPOINT_H */
+4 -1
arch/arm64/include/asm/kvm_arm.h
··· 171 171 #define HSTR_EL2_TTEE (1 << 16) 172 172 #define HSTR_EL2_T(x) (1 << x) 173 173 174 + /* Hyp Coproccessor Trap Register Shifts */ 175 + #define CPTR_EL2_TFP_SHIFT 10 176 + 174 177 /* Hyp Coprocessor Trap Register */ 175 178 #define CPTR_EL2_TCPAC (1 << 31) 176 179 #define CPTR_EL2_TTA (1 << 20) 177 - #define CPTR_EL2_TFP (1 << 10) 180 + #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) 178 181 179 182 /* Hyp Debug Configuration Register bits */ 180 183 #define MDCR_EL2_TDRA (1 << 11)
+10 -16
arch/arm64/include/asm/kvm_asm.h
··· 46 46 #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ 47 47 #define PAR_EL1 21 /* Physical Address Register */ 48 48 #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ 49 - #define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ 50 - #define DBGBCR15_EL1 38 51 - #define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ 52 - #define DBGBVR15_EL1 54 53 - #define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ 54 - #define DBGWCR15_EL1 70 55 - #define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ 56 - #define DBGWVR15_EL1 86 57 - #define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ 49 + #define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ 58 50 59 51 /* 32bit specific registers. Keep them at the end of the range */ 60 - #define DACR32_EL2 88 /* Domain Access Control Register */ 61 - #define IFSR32_EL2 89 /* Instruction Fault Status Register */ 62 - #define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ 63 - #define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ 64 - #define TEECR32_EL1 92 /* ThumbEE Configuration Register */ 65 - #define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ 66 - #define NR_SYS_REGS 94 52 + #define DACR32_EL2 24 /* Domain Access Control Register */ 53 + #define IFSR32_EL2 25 /* Instruction Fault Status Register */ 54 + #define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ 55 + #define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ 56 + #define TEECR32_EL1 28 /* ThumbEE Configuration Register */ 57 + #define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ 58 + #define NR_SYS_REGS 30 67 59 68 60 /* 32bit mapping */ 69 61 #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ ··· 123 131 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 124 132 125 133 extern u64 __vgic_v3_get_ich_vtr_el2(void); 134 + 135 + extern u32 __kvm_get_mdcr_el2(void); 126 136 127 137 #endif 128 138
+36 -6
arch/arm64/include/asm/kvm_host.h
··· 103 103 104 104 /* HYP configuration */ 105 105 u64 hcr_el2; 106 + u32 mdcr_el2; 106 107 107 108 /* Exception Information */ 108 109 struct kvm_vcpu_fault_info fault; 109 110 110 - /* Debug state */ 111 + /* Guest debug state */ 111 112 u64 debug_flags; 113 + 114 + /* 115 + * We maintain more than a single set of debug registers to support 116 + * debugging the guest from the host and to maintain separate host and 117 + * guest state during world switches. vcpu_debug_state are the debug 118 + * registers of the vcpu as the guest sees them. host_debug_state are 119 + * the host registers which are saved and restored during 120 + * world switches. external_debug_state contains the debug 121 + * values we want to debug the guest. This is set via the 122 + * KVM_SET_GUEST_DEBUG ioctl. 123 + * 124 + * debug_ptr points to the set of debug registers that should be loaded 125 + * onto the hardware when running the guest. 126 + */ 127 + struct kvm_guest_debug_arch *debug_ptr; 128 + struct kvm_guest_debug_arch vcpu_debug_state; 129 + struct kvm_guest_debug_arch external_debug_state; 112 130 113 131 /* Pointer to host CPU context */ 114 132 kvm_cpu_context_t *host_cpu_context; 133 + struct kvm_guest_debug_arch host_debug_state; 115 134 116 135 /* VGIC state */ 117 136 struct vgic_cpu vgic_cpu; ··· 140 121 * Anything that is not used directly from assembly code goes 141 122 * here. 142 123 */ 124 + 125 + /* 126 + * Guest registers we preserve during guest debugging. 127 + * 128 + * These shadow registers are updated by the kvm_handle_sys_reg 129 + * trap handler if the guest accesses or updates them while we 130 + * are using guest debug. 131 + */ 132 + struct { 133 + u32 mdscr_el1; 134 + } guest_debug_preserved; 143 135 144 136 /* Don't run the guest */ 145 137 bool pause; ··· 246 216 hyp_stack_ptr, vector_ptr); 247 217 } 248 218 249 - struct vgic_sr_vectors { 250 - void *save_vgic; 251 - void *restore_vgic; 252 - }; 253 - 254 219 static inline void kvm_arch_hardware_disable(void) {} 255 220 static inline void kvm_arch_hardware_unsetup(void) {} 256 221 static inline void kvm_arch_sync_events(struct kvm *kvm) {} 257 222 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 258 223 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 224 + 225 + void kvm_arm_init_debug(void); 226 + void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); 227 + void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); 228 + void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); 259 229 260 230 #endif /* __ARM64_KVM_HOST_H__ */
+35 -2
arch/arm64/include/uapi/asm/kvm.h
··· 53 53 struct user_fpsimd_state fp_regs; 54 54 }; 55 55 56 - /* Supported Processor Types */ 56 + /* 57 + * Supported CPU Targets - Adding a new target type is not recommended, 58 + * unless there are some special registers not supported by the 59 + * genericv8 syreg table. 60 + */ 57 61 #define KVM_ARM_TARGET_AEM_V8 0 58 62 #define KVM_ARM_TARGET_FOUNDATION_V8 1 59 63 #define KVM_ARM_TARGET_CORTEX_A57 2 60 64 #define KVM_ARM_TARGET_XGENE_POTENZA 3 61 65 #define KVM_ARM_TARGET_CORTEX_A53 4 66 + /* Generic ARM v8 target */ 67 + #define KVM_ARM_TARGET_GENERIC_V8 5 62 68 63 - #define KVM_ARM_NUM_TARGETS 5 69 + #define KVM_ARM_NUM_TARGETS 6 64 70 65 71 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ 66 72 #define KVM_ARM_DEVICE_TYPE_SHIFT 0 ··· 106 100 struct kvm_fpu { 107 101 }; 108 102 103 + /* 104 + * See v8 ARM ARM D7.3: Debug Registers 105 + * 106 + * The architectural limit is 16 debug registers of each type although 107 + * in practice there are usually less (see ID_AA64DFR0_EL1). 108 + * 109 + * Although the control registers are architecturally defined as 32 110 + * bits wide we use a 64 bit structure here to keep parity with 111 + * KVM_GET/SET_ONE_REG behaviour which treats all system registers as 112 + * 64 bit values. It also allows for the possibility of the 113 + * architecture expanding the control registers without having to 114 + * change the userspace ABI. 115 + */ 116 + #define KVM_ARM_MAX_DBG_REGS 16 109 117 struct kvm_guest_debug_arch { 118 + __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS]; 119 + __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS]; 120 + __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS]; 121 + __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; 110 122 }; 111 123 112 124 struct kvm_debug_exit_arch { 125 + __u32 hsr; 126 + __u64 far; /* used for watchpoints */ 113 127 }; 128 + 129 + /* 130 + * Architecture specific defines for kvm_guest_debug->control 131 + */ 132 + 133 + #define KVM_GUESTDBG_USE_SW_BP (1 << 16) 134 + #define KVM_GUESTDBG_USE_HW (1 << 17) 114 135 115 136 struct kvm_sync_regs { 116 137 };
+7 -2
arch/arm64/kernel/asm-offsets.c
··· 116 116 DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); 117 117 DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); 118 118 DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); 119 + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); 120 + DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); 121 + DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); 122 + DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); 123 + DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); 119 124 DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); 125 + DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); 120 126 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); 121 127 DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); 128 + DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); 122 129 DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); 123 130 DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); 124 131 DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); 125 132 DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); 126 133 DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); 127 134 DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); 128 - DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); 129 - DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); 130 135 DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); 131 136 DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); 132 137 DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
-12
arch/arm64/kernel/hw_breakpoint.c
··· 48 48 static int core_num_brps; 49 49 static int core_num_wrps; 50 50 51 - /* Determine number of BRP registers available. */ 52 - static int get_num_brps(void) 53 - { 54 - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; 55 - } 56 - 57 - /* Determine number of WRP registers available. */ 58 - static int get_num_wrps(void) 59 - { 60 - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; 61 - } 62 - 63 51 int hw_breakpoint_slots(int type) 64 52 { 65 53 /*
+1 -1
arch/arm64/kvm/Makefile
··· 17 17 18 18 kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o 19 19 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o 20 - kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o 20 + kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o 21 21 22 22 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o 23 23 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+217
arch/arm64/kvm/debug.c
··· 1 + /* 2 + * Debug and Guest Debug support 3 + * 4 + * Copyright (C) 2015 - Linaro Ltd 5 + * Author: Alex Bennée <alex.bennee@linaro.org> 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 + */ 19 + 20 + #include <linux/kvm_host.h> 21 + #include <linux/hw_breakpoint.h> 22 + 23 + #include <asm/debug-monitors.h> 24 + #include <asm/kvm_asm.h> 25 + #include <asm/kvm_arm.h> 26 + #include <asm/kvm_emulate.h> 27 + 28 + #include "trace.h" 29 + 30 + /* These are the bits of MDSCR_EL1 we may manipulate */ 31 + #define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \ 32 + DBG_MDSCR_KDE | \ 33 + DBG_MDSCR_MDE) 34 + 35 + static DEFINE_PER_CPU(u32, mdcr_el2); 36 + 37 + /** 38 + * save/restore_guest_debug_regs 39 + * 40 + * For some debug operations we need to tweak some guest registers. As 41 + * a result we need to save the state of those registers before we 42 + * make those modifications. 43 + * 44 + * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled 45 + * after we have restored the preserved value to the main context. 46 + */ 47 + static void save_guest_debug_regs(struct kvm_vcpu *vcpu) 48 + { 49 + vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1); 50 + 51 + trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", 52 + vcpu->arch.guest_debug_preserved.mdscr_el1); 53 + } 54 + 55 + static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) 56 + { 57 + vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1; 58 + 59 + trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", 60 + vcpu_sys_reg(vcpu, MDSCR_EL1)); 61 + } 62 + 63 + /** 64 + * kvm_arm_init_debug - grab what we need for debug 65 + * 66 + * Currently the sole task of this function is to retrieve the initial 67 + * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has 68 + * presumably been set-up by some knowledgeable bootcode. 69 + * 70 + * It is called once per-cpu during CPU hyp initialisation. 71 + */ 72 + 73 + void kvm_arm_init_debug(void) 74 + { 75 + __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); 76 + } 77 + 78 + /** 79 + * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state 80 + */ 81 + 82 + void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) 83 + { 84 + vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state; 85 + } 86 + 87 + /** 88 + * kvm_arm_setup_debug - set up debug related stuff 89 + * 90 + * @vcpu: the vcpu pointer 91 + * 92 + * This is called before each entry into the hypervisor to setup any 93 + * debug related registers. Currently this just ensures we will trap 94 + * access to: 95 + * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) 96 + * - Debug ROM Address (MDCR_EL2_TDRA) 97 + * - OS related registers (MDCR_EL2_TDOSA) 98 + * 99 + * Additionally, KVM only traps guest accesses to the debug registers if 100 + * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY 101 + * flag on vcpu->arch.debug_flags). Since the guest must not interfere 102 + * with the hardware state when debugging the guest, we must ensure that 103 + * trapping is enabled whenever we are debugging the guest using the 104 + * debug registers. 105 + */ 106 + 107 + void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) 108 + { 109 + bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); 110 + 111 + trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); 112 + 113 + vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; 114 + vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | 115 + MDCR_EL2_TPMCR | 116 + MDCR_EL2_TDRA | 117 + MDCR_EL2_TDOSA); 118 + 119 + /* Is Guest debugging in effect? */ 120 + if (vcpu->guest_debug) { 121 + /* Route all software debug exceptions to EL2 */ 122 + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; 123 + 124 + /* Save guest debug state */ 125 + save_guest_debug_regs(vcpu); 126 + 127 + /* 128 + * Single Step (ARM ARM D2.12.3 The software step state 129 + * machine) 130 + * 131 + * If we are doing Single Step we need to manipulate 132 + * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the 133 + * step has occurred the hypervisor will trap the 134 + * debug exception and we return to userspace. 135 + * 136 + * If the guest attempts to single step its userspace 137 + * we would have to deal with a trapped exception 138 + * while in the guest kernel. Because this would be 139 + * hard to unwind we suppress the guest's ability to 140 + * do so by masking MDSCR_EL.SS. 141 + * 142 + * This confuses guest debuggers which use 143 + * single-step behind the scenes but everything 144 + * returns to normal once the host is no longer 145 + * debugging the system. 146 + */ 147 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 148 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 149 + vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS; 150 + } else { 151 + vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; 152 + } 153 + 154 + trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); 155 + 156 + /* 157 + * HW Breakpoints and watchpoints 158 + * 159 + * We simply switch the debug_ptr to point to our new 160 + * external_debug_state which has been populated by the 161 + * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY 162 + * mechanism ensures the registers are updated on the 163 + * world switch. 164 + */ 165 + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 166 + /* Enable breakpoints/watchpoints */ 167 + vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE; 168 + 169 + vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; 170 + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; 171 + trap_debug = true; 172 + 173 + trace_kvm_arm_set_regset("BKPTS", get_num_brps(), 174 + &vcpu->arch.debug_ptr->dbg_bcr[0], 175 + &vcpu->arch.debug_ptr->dbg_bvr[0]); 176 + 177 + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), 178 + &vcpu->arch.debug_ptr->dbg_wcr[0], 179 + &vcpu->arch.debug_ptr->dbg_wvr[0]); 180 + } 181 + } 182 + 183 + BUG_ON(!vcpu->guest_debug && 184 + vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state); 185 + 186 + /* Trap debug register access */ 187 + if (trap_debug) 188 + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; 189 + 190 + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); 191 + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1)); 192 + } 193 + 194 + void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) 195 + { 196 + trace_kvm_arm_clear_debug(vcpu->guest_debug); 197 + 198 + if (vcpu->guest_debug) { 199 + restore_guest_debug_regs(vcpu); 200 + 201 + /* 202 + * If we were using HW debug we need to restore the 203 + * debug_ptr to the guest debug state. 204 + */ 205 + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 206 + kvm_arm_reset_debug_ptr(vcpu); 207 + 208 + trace_kvm_arm_set_regset("BKPTS", get_num_brps(), 209 + &vcpu->arch.debug_ptr->dbg_bcr[0], 210 + &vcpu->arch.debug_ptr->dbg_bvr[0]); 211 + 212 + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), 213 + &vcpu->arch.debug_ptr->dbg_wcr[0], 214 + &vcpu->arch.debug_ptr->dbg_wvr[0]); 215 + } 216 + } 217 + }
+42 -1
arch/arm64/kvm/guest.c
··· 32 32 #include <asm/kvm_emulate.h> 33 33 #include <asm/kvm_coproc.h> 34 34 35 + #include "trace.h" 36 + 35 37 struct kvm_stats_debugfs_item debugfs_entries[] = { 36 38 { NULL } 37 39 }; ··· 295 293 break; 296 294 }; 297 295 298 - return -EINVAL; 296 + /* Return a default generic target */ 297 + return KVM_ARM_TARGET_GENERIC_V8; 299 298 } 300 299 301 300 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) ··· 333 330 struct kvm_translation *tr) 334 331 { 335 332 return -EINVAL; 333 + } 334 + 335 + #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ 336 + KVM_GUESTDBG_USE_SW_BP | \ 337 + KVM_GUESTDBG_USE_HW | \ 338 + KVM_GUESTDBG_SINGLESTEP) 339 + 340 + /** 341 + * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging 342 + * @kvm: pointer to the KVM struct 343 + * @kvm_guest_debug: the ioctl data buffer 344 + * 345 + * This sets up and enables the VM for guest debugging. Userspace 346 + * passes in a control flag to enable different debug types and 347 + * potentially other architecture specific information in the rest of 348 + * the structure. 349 + */ 350 + int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 351 + struct kvm_guest_debug *dbg) 352 + { 353 + trace_kvm_set_guest_debug(vcpu, dbg->control); 354 + 355 + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) 356 + return -EINVAL; 357 + 358 + if (dbg->control & KVM_GUESTDBG_ENABLE) { 359 + vcpu->guest_debug = dbg->control; 360 + 361 + /* Hardware assisted Break and Watch points */ 362 + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 363 + vcpu->arch.external_debug_state = dbg->arch; 364 + } 365 + 366 + } else { 367 + /* If not enabled clear all flags */ 368 + vcpu->guest_debug = 0; 369 + } 370 + return 0; 336 371 }
+44
arch/arm64/kvm/handle_exit.c
··· 82 82 return 1; 83 83 } 84 84 85 + /** 86 + * kvm_handle_guest_debug - handle a debug exception instruction 87 + * 88 + * @vcpu: the vcpu pointer 89 + * @run: access to the kvm_run structure for results 90 + * 91 + * We route all debug exceptions through the same handler. If both the 92 + * guest and host are using the same debug facilities it will be up to 93 + * userspace to re-inject the correct exception for guest delivery. 94 + * 95 + * @return: 0 (while setting run->exit_reason), -1 for error 96 + */ 97 + static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) 98 + { 99 + u32 hsr = kvm_vcpu_get_hsr(vcpu); 100 + int ret = 0; 101 + 102 + run->exit_reason = KVM_EXIT_DEBUG; 103 + run->debug.arch.hsr = hsr; 104 + 105 + switch (hsr >> ESR_ELx_EC_SHIFT) { 106 + case ESR_ELx_EC_WATCHPT_LOW: 107 + run->debug.arch.far = vcpu->arch.fault.far_el2; 108 + /* fall through */ 109 + case ESR_ELx_EC_SOFTSTP_LOW: 110 + case ESR_ELx_EC_BREAKPT_LOW: 111 + case ESR_ELx_EC_BKPT32: 112 + case ESR_ELx_EC_BRK64: 113 + break; 114 + default: 115 + kvm_err("%s: un-handled case hsr: %#08x\n", 116 + __func__, (unsigned int) hsr); 117 + ret = -1; 118 + break; 119 + } 120 + 121 + return ret; 122 + } 123 + 85 124 static exit_handle_fn arm_exit_handlers[] = { 86 125 [ESR_ELx_EC_WFx] = kvm_handle_wfx, 87 126 [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, ··· 135 96 [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, 136 97 [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, 137 98 [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, 99 + [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug, 100 + [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug, 101 + [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, 102 + [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, 103 + [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, 138 104 }; 139 105 140 106 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+215 -396
arch/arm64/kvm/hyp.S
··· 230 230 stp x24, x25, [x3, #160] 231 231 .endm 232 232 233 - .macro save_debug 234 - // x2: base address for cpu context 235 - // x3: tmp register 233 + .macro save_debug type 234 + // x4: pointer to register set 235 + // x5: number of registers to skip 236 + // x6..x22 trashed 236 237 237 - mrs x26, id_aa64dfr0_el1 238 - ubfx x24, x26, #12, #4 // Extract BRPs 239 - ubfx x25, x26, #20, #4 // Extract WRPs 240 - mov w26, #15 241 - sub w24, w26, w24 // How many BPs to skip 242 - sub w25, w26, w25 // How many WPs to skip 243 - 244 - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) 245 - 246 - adr x26, 1f 247 - add x26, x26, x24, lsl #2 248 - br x26 238 + adr x22, 1f 239 + add x22, x22, x5, lsl #2 240 + br x22 249 241 1: 250 - mrs x20, dbgbcr15_el1 251 - mrs x19, dbgbcr14_el1 252 - mrs x18, dbgbcr13_el1 253 - mrs x17, dbgbcr12_el1 254 - mrs x16, dbgbcr11_el1 255 - mrs x15, dbgbcr10_el1 256 - mrs x14, dbgbcr9_el1 257 - mrs x13, dbgbcr8_el1 258 - mrs x12, dbgbcr7_el1 259 - mrs x11, dbgbcr6_el1 260 - mrs x10, dbgbcr5_el1 261 - mrs x9, dbgbcr4_el1 262 - mrs x8, dbgbcr3_el1 263 - mrs x7, dbgbcr2_el1 264 - mrs x6, dbgbcr1_el1 265 - mrs x5, dbgbcr0_el1 242 + mrs x21, \type\()15_el1 243 + mrs x20, \type\()14_el1 244 + mrs x19, \type\()13_el1 245 + mrs x18, \type\()12_el1 246 + mrs x17, \type\()11_el1 247 + mrs x16, \type\()10_el1 248 + mrs x15, \type\()9_el1 249 + mrs x14, \type\()8_el1 250 + mrs x13, \type\()7_el1 251 + mrs x12, \type\()6_el1 252 + mrs x11, \type\()5_el1 253 + mrs x10, \type\()4_el1 254 + mrs x9, \type\()3_el1 255 + mrs x8, \type\()2_el1 256 + mrs x7, \type\()1_el1 257 + mrs x6, \type\()0_el1 266 258 267 - adr x26, 1f 268 - add x26, x26, x24, lsl #2 269 - br x26 270 - 259 + adr x22, 1f 260 + add x22, x22, x5, lsl #2 261 + br x22 271 262 1: 272 - str x20, [x3, #(15 * 8)] 273 - str x19, [x3, #(14 * 8)] 274 - str x18, [x3, #(13 * 8)] 275 - str x17, [x3, #(12 * 8)] 276 - str x16, [x3, #(11 * 8)] 277 - str x15, [x3, #(10 * 8)] 278 - str x14, [x3, #(9 * 8)] 279 - str x13, [x3, #(8 * 8)] 280 - str x12, [x3, #(7 * 8)] 281 - str x11, [x3, #(6 * 8)] 282 - str x10, [x3, #(5 * 8)] 283 - str x9, [x3, #(4 * 8)] 284 - str x8, [x3, #(3 * 8)] 285 - str x7, [x3, #(2 * 8)] 286 - str x6, [x3, #(1 * 8)] 287 - str x5, [x3, #(0 * 8)] 288 - 289 - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) 290 - 291 - adr x26, 1f 292 - add x26, x26, x24, lsl #2 293 - br x26 294 - 1: 295 - mrs x20, dbgbvr15_el1 296 - mrs x19, dbgbvr14_el1 297 - mrs x18, dbgbvr13_el1 298 - mrs x17, dbgbvr12_el1 299 - mrs x16, dbgbvr11_el1 300 - mrs x15, dbgbvr10_el1 301 - mrs x14, dbgbvr9_el1 302 - mrs x13, dbgbvr8_el1 303 - mrs x12, dbgbvr7_el1 304 - mrs x11, dbgbvr6_el1 305 - mrs x10, dbgbvr5_el1 306 - mrs x9, dbgbvr4_el1 307 - mrs x8, dbgbvr3_el1 308 - mrs x7, dbgbvr2_el1 309 - mrs x6, dbgbvr1_el1 310 - mrs x5, dbgbvr0_el1 311 - 312 - adr x26, 1f 313 - add x26, x26, x24, lsl #2 314 - br x26 315 - 316 - 1: 317 - str x20, [x3, #(15 * 8)] 318 - str x19, [x3, #(14 * 8)] 319 - str x18, [x3, #(13 * 8)] 320 - str x17, [x3, #(12 * 8)] 321 - str x16, [x3, #(11 * 8)] 322 - str x15, [x3, #(10 * 8)] 323 - str x14, [x3, #(9 * 8)] 324 - str x13, [x3, #(8 * 8)] 325 - str x12, [x3, #(7 * 8)] 326 - str x11, [x3, #(6 * 8)] 327 - str x10, [x3, #(5 * 8)] 328 - str x9, [x3, #(4 * 8)] 329 - str x8, [x3, #(3 * 8)] 330 - str x7, [x3, #(2 * 8)] 331 - str x6, [x3, #(1 * 8)] 332 - str x5, [x3, #(0 * 8)] 333 - 334 - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) 335 - 336 - adr x26, 1f 337 - add x26, x26, x25, lsl #2 338 - br x26 339 - 1: 340 - mrs x20, dbgwcr15_el1 341 - mrs x19, dbgwcr14_el1 342 - mrs x18, dbgwcr13_el1 343 - mrs x17, dbgwcr12_el1 344 - mrs x16, dbgwcr11_el1 345 - mrs x15, dbgwcr10_el1 346 - mrs x14, dbgwcr9_el1 347 - mrs x13, dbgwcr8_el1 348 - mrs x12, dbgwcr7_el1 349 - mrs x11, dbgwcr6_el1 350 - mrs x10, dbgwcr5_el1 351 - mrs x9, dbgwcr4_el1 352 - mrs x8, dbgwcr3_el1 353 - mrs x7, dbgwcr2_el1 354 - mrs x6, dbgwcr1_el1 355 - mrs x5, dbgwcr0_el1 356 - 357 - adr x26, 1f 358 - add x26, x26, x25, lsl #2 359 - br x26 360 - 361 - 1: 362 - str x20, [x3, #(15 * 8)] 363 - str x19, [x3, #(14 * 8)] 364 - str x18, [x3, #(13 * 8)] 365 - str x17, [x3, #(12 * 8)] 366 - str x16, [x3, #(11 * 8)] 367 - str x15, [x3, #(10 * 8)] 368 - str x14, [x3, #(9 * 8)] 369 - str x13, [x3, #(8 * 8)] 370 - str x12, [x3, #(7 * 8)] 371 - str x11, [x3, #(6 * 8)] 372 - str x10, [x3, #(5 * 8)] 373 - str x9, [x3, #(4 * 8)] 374 - str x8, [x3, #(3 * 8)] 375 - str x7, [x3, #(2 * 8)] 376 - str x6, [x3, #(1 * 8)] 377 - str x5, [x3, #(0 * 8)] 378 - 379 - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) 380 - 381 - adr x26, 1f 382 - add x26, x26, x25, lsl #2 383 - br x26 384 - 1: 385 - mrs x20, dbgwvr15_el1 386 - mrs x19, dbgwvr14_el1 387 - mrs x18, dbgwvr13_el1 388 - mrs x17, dbgwvr12_el1 389 - mrs x16, dbgwvr11_el1 390 - mrs x15, dbgwvr10_el1 391 - mrs x14, dbgwvr9_el1 392 - mrs x13, dbgwvr8_el1 393 - mrs x12, dbgwvr7_el1 394 - mrs x11, dbgwvr6_el1 395 - mrs x10, dbgwvr5_el1 396 - mrs x9, dbgwvr4_el1 397 - mrs x8, dbgwvr3_el1 398 - mrs x7, dbgwvr2_el1 399 - mrs x6, dbgwvr1_el1 400 - mrs x5, dbgwvr0_el1 401 - 402 - adr x26, 1f 403 - add x26, x26, x25, lsl #2 404 - br x26 405 - 406 - 1: 407 - str x20, [x3, #(15 * 8)] 408 - str x19, [x3, #(14 * 8)] 409 - str x18, [x3, #(13 * 8)] 410 - str x17, [x3, #(12 * 8)] 411 - str x16, [x3, #(11 * 8)] 412 - str x15, [x3, #(10 * 8)] 413 - str x14, [x3, #(9 * 8)] 414 - str x13, [x3, #(8 * 8)] 415 - str x12, [x3, #(7 * 8)] 416 - str x11, [x3, #(6 * 8)] 417 - str x10, [x3, #(5 * 8)] 418 - str x9, [x3, #(4 * 8)] 419 - str x8, [x3, #(3 * 8)] 420 - str x7, [x3, #(2 * 8)] 421 - str x6, [x3, #(1 * 8)] 422 - str x5, [x3, #(0 * 8)] 423 - 424 - mrs x21, mdccint_el1 425 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 263 + str x21, [x4, #(15 * 8)] 264 + str x20, [x4, #(14 * 8)] 265 + str x19, [x4, #(13 * 8)] 266 + str x18, [x4, #(12 * 8)] 267 + str x17, [x4, #(11 * 8)] 268 + str x16, [x4, #(10 * 8)] 269 + str x15, [x4, #(9 * 8)] 270 + str x14, [x4, #(8 * 8)] 271 + str x13, [x4, #(7 * 8)] 272 + str x12, [x4, #(6 * 8)] 273 + str x11, [x4, #(5 * 8)] 274 + str x10, [x4, #(4 * 8)] 275 + str x9, [x4, #(3 * 8)] 276 + str x8, [x4, #(2 * 8)] 277 + str x7, [x4, #(1 * 8)] 278 + str x6, [x4, #(0 * 8)] 426 279 .endm 427 280 428 281 .macro restore_sysregs ··· 320 467 msr mdscr_el1, x25 321 468 .endm 322 469 323 - .macro restore_debug 324 - // x2: base address for cpu context 325 - // x3: tmp register 470 + .macro restore_debug type 471 + // x4: pointer to register set 472 + // x5: number of registers to skip 473 + // x6..x22 trashed 326 474 327 - mrs x26, id_aa64dfr0_el1 328 - ubfx x24, x26, #12, #4 // Extract BRPs 329 - ubfx x25, x26, #20, #4 // Extract WRPs 330 - mov w26, #15 331 - sub w24, w26, w24 // How many BPs to skip 332 - sub w25, w26, w25 // How many WPs to skip 333 - 334 - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) 335 - 336 - adr x26, 1f 337 - add x26, x26, x24, lsl #2 338 - br x26 475 + adr x22, 1f 476 + add x22, x22, x5, lsl #2 477 + br x22 339 478 1: 340 - ldr x20, [x3, #(15 * 8)] 341 - ldr x19, [x3, #(14 * 8)] 342 - ldr x18, [x3, #(13 * 8)] 343 - ldr x17, [x3, #(12 * 8)] 344 - ldr x16, [x3, #(11 * 8)] 345 - ldr x15, [x3, #(10 * 8)] 346 - ldr x14, [x3, #(9 * 8)] 347 - ldr x13, [x3, #(8 * 8)] 348 - ldr x12, [x3, #(7 * 8)] 349 - ldr x11, [x3, #(6 * 8)] 350 - ldr x10, [x3, #(5 * 8)] 351 - ldr x9, [x3, #(4 * 8)] 352 - ldr x8, [x3, #(3 * 8)] 353 - ldr x7, [x3, #(2 * 8)] 354 - ldr x6, [x3, #(1 * 8)] 355 - ldr x5, [x3, #(0 * 8)] 479 + ldr x21, [x4, #(15 * 8)] 480 + ldr x20, [x4, #(14 * 8)] 481 + ldr x19, [x4, #(13 * 8)] 482 + ldr x18, [x4, #(12 * 8)] 483 + ldr x17, [x4, #(11 * 8)] 484 + ldr x16, [x4, #(10 * 8)] 485 + ldr x15, [x4, #(9 * 8)] 486 + ldr x14, [x4, #(8 * 8)] 487 + ldr x13, [x4, #(7 * 8)] 488 + ldr x12, [x4, #(6 * 8)] 489 + ldr x11, [x4, #(5 * 8)] 490 + ldr x10, [x4, #(4 * 8)] 491 + ldr x9, [x4, #(3 * 8)] 492 + ldr x8, [x4, #(2 * 8)] 493 + ldr x7, [x4, #(1 * 8)] 494 + ldr x6, [x4, #(0 * 8)] 356 495 357 - adr x26, 1f 358 - add x26, x26, x24, lsl #2 359 - br x26 496 + adr x22, 1f 497 + add x22, x22, x5, lsl #2 498 + br x22 360 499 1: 361 - msr dbgbcr15_el1, x20 362 - msr dbgbcr14_el1, x19 363 - msr dbgbcr13_el1, x18 364 - msr dbgbcr12_el1, x17 365 - msr dbgbcr11_el1, x16 366 - msr dbgbcr10_el1, x15 367 - msr dbgbcr9_el1, x14 368 - msr dbgbcr8_el1, x13 369 - msr dbgbcr7_el1, x12 370 - msr dbgbcr6_el1, x11 371 - msr dbgbcr5_el1, x10 372 - msr dbgbcr4_el1, x9 373 - msr dbgbcr3_el1, x8 374 - msr dbgbcr2_el1, x7 375 - msr dbgbcr1_el1, x6 376 - msr dbgbcr0_el1, x5 377 - 378 - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) 379 - 380 - adr x26, 1f 381 - add x26, x26, x24, lsl #2 382 - br x26 383 - 1: 384 - ldr x20, [x3, #(15 * 8)] 385 - ldr x19, [x3, #(14 * 8)] 386 - ldr x18, [x3, #(13 * 8)] 387 - ldr x17, [x3, #(12 * 8)] 388 - ldr x16, [x3, #(11 * 8)] 389 - ldr x15, [x3, #(10 * 8)] 390 - ldr x14, [x3, #(9 * 8)] 391 - ldr x13, [x3, #(8 * 8)] 392 - ldr x12, [x3, #(7 * 8)] 393 - ldr x11, [x3, #(6 * 8)] 394 - ldr x10, [x3, #(5 * 8)] 395 - ldr x9, [x3, #(4 * 8)] 396 - ldr x8, [x3, #(3 * 8)] 397 - ldr x7, [x3, #(2 * 8)] 398 - ldr x6, [x3, #(1 * 8)] 399 - ldr x5, [x3, #(0 * 8)] 400 - 401 - adr x26, 1f 402 - add x26, x26, x24, lsl #2 403 - br x26 404 - 1: 405 - msr dbgbvr15_el1, x20 406 - msr dbgbvr14_el1, x19 407 - msr dbgbvr13_el1, x18 408 - msr dbgbvr12_el1, x17 409 - msr dbgbvr11_el1, x16 410 - msr dbgbvr10_el1, x15 411 - msr dbgbvr9_el1, x14 412 - msr dbgbvr8_el1, x13 413 - msr dbgbvr7_el1, x12 414 - msr dbgbvr6_el1, x11 415 - msr dbgbvr5_el1, x10 416 - msr dbgbvr4_el1, x9 417 - msr dbgbvr3_el1, x8 418 - msr dbgbvr2_el1, x7 419 - msr dbgbvr1_el1, x6 420 - msr dbgbvr0_el1, x5 421 - 422 - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) 423 - 424 - adr x26, 1f 425 - add x26, x26, x25, lsl #2 426 - br x26 427 - 1: 428 - ldr x20, [x3, #(15 * 8)] 429 - ldr x19, [x3, #(14 * 8)] 430 - ldr x18, [x3, #(13 * 8)] 431 - ldr x17, [x3, #(12 * 8)] 432 - ldr x16, [x3, #(11 * 8)] 433 - ldr x15, [x3, #(10 * 8)] 434 - ldr x14, [x3, #(9 * 8)] 435 - ldr x13, [x3, #(8 * 8)] 436 - ldr x12, [x3, #(7 * 8)] 437 - ldr x11, [x3, #(6 * 8)] 438 - ldr x10, [x3, #(5 * 8)] 439 - ldr x9, [x3, #(4 * 8)] 440 - ldr x8, [x3, #(3 * 8)] 441 - ldr x7, [x3, #(2 * 8)] 442 - ldr x6, [x3, #(1 * 8)] 443 - ldr x5, [x3, #(0 * 8)] 444 - 445 - adr x26, 1f 446 - add x26, x26, x25, lsl #2 447 - br x26 448 - 1: 449 - msr dbgwcr15_el1, x20 450 - msr dbgwcr14_el1, x19 451 - msr dbgwcr13_el1, x18 452 - msr dbgwcr12_el1, x17 453 - msr dbgwcr11_el1, x16 454 - msr dbgwcr10_el1, x15 455 - msr dbgwcr9_el1, x14 456 - msr dbgwcr8_el1, x13 457 - msr dbgwcr7_el1, x12 458 - msr dbgwcr6_el1, x11 459 - msr dbgwcr5_el1, x10 460 - msr dbgwcr4_el1, x9 461 - msr dbgwcr3_el1, x8 462 - msr dbgwcr2_el1, x7 463 - msr dbgwcr1_el1, x6 464 - msr dbgwcr0_el1, x5 465 - 466 - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) 467 - 468 - adr x26, 1f 469 - add x26, x26, x25, lsl #2 470 - br x26 471 - 1: 472 - ldr x20, [x3, #(15 * 8)] 473 - ldr x19, [x3, #(14 * 8)] 474 - ldr x18, [x3, #(13 * 8)] 475 - ldr x17, [x3, #(12 * 8)] 476 - ldr x16, [x3, #(11 * 8)] 477 - ldr x15, [x3, #(10 * 8)] 478 - ldr x14, [x3, #(9 * 8)] 479 - ldr x13, [x3, #(8 * 8)] 480 - ldr x12, [x3, #(7 * 8)] 481 - ldr x11, [x3, #(6 * 8)] 482 - ldr x10, [x3, #(5 * 8)] 483 - ldr x9, [x3, #(4 * 8)] 484 - ldr x8, [x3, #(3 * 8)] 485 - ldr x7, [x3, #(2 * 8)] 486 - ldr x6, [x3, #(1 * 8)] 487 - ldr x5, [x3, #(0 * 8)] 488 - 489 - adr x26, 1f 490 - add x26, x26, x25, lsl #2 491 - br x26 492 - 1: 493 - msr dbgwvr15_el1, x20 494 - msr dbgwvr14_el1, x19 495 - msr dbgwvr13_el1, x18 496 - msr dbgwvr12_el1, x17 497 - msr dbgwvr11_el1, x16 498 - msr dbgwvr10_el1, x15 499 - msr dbgwvr9_el1, x14 500 - msr dbgwvr8_el1, x13 501 - msr dbgwvr7_el1, x12 502 - msr dbgwvr6_el1, x11 503 - msr dbgwvr5_el1, x10 504 - msr dbgwvr4_el1, x9 505 - msr dbgwvr3_el1, x8 506 - msr dbgwvr2_el1, x7 507 - msr dbgwvr1_el1, x6 508 - msr dbgwvr0_el1, x5 509 - 510 - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 511 - msr mdccint_el1, x21 500 + msr \type\()15_el1, x21 501 + msr \type\()14_el1, x20 502 + msr \type\()13_el1, x19 503 + msr \type\()12_el1, x18 504 + msr \type\()11_el1, x17 505 + msr \type\()10_el1, x16 506 + msr \type\()9_el1, x15 507 + msr \type\()8_el1, x14 508 + msr \type\()7_el1, x13 509 + msr \type\()6_el1, x12 510 + msr \type\()5_el1, x11 511 + msr \type\()4_el1, x10 512 + msr \type\()3_el1, x9 513 + msr \type\()2_el1, x8 514 + msr \type\()1_el1, x7 515 + msr \type\()0_el1, x6 512 516 .endm 513 517 514 518 .macro skip_32bit_state tmp, target ··· 383 673 .macro skip_debug_state tmp, target 384 674 ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] 385 675 tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target 676 + .endm 677 + 678 + /* 679 + * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) 680 + */ 681 + .macro skip_fpsimd_state tmp, target 682 + mrs \tmp, cptr_el2 683 + tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target 386 684 .endm 387 685 388 686 .macro compute_debug_state target ··· 431 713 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 432 714 mrs x4, dacr32_el2 433 715 mrs x5, ifsr32_el2 434 - mrs x6, fpexc32_el2 435 716 stp x4, x5, [x3] 436 - str x6, [x3, #16] 437 717 718 + skip_fpsimd_state x8, 3f 719 + mrs x6, fpexc32_el2 720 + str x6, [x3, #16] 721 + 3: 438 722 skip_debug_state x8, 2f 439 723 mrs x7, dbgvcr32_el2 440 724 str x7, [x3, #24] ··· 463 743 464 744 add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) 465 745 ldp x4, x5, [x3] 466 - ldr x6, [x3, #16] 467 746 msr dacr32_el2, x4 468 747 msr ifsr32_el2, x5 469 - msr fpexc32_el2, x6 470 748 471 749 skip_debug_state x8, 2f 472 750 ldr x7, [x3, #24] ··· 481 763 482 764 .macro activate_traps 483 765 ldr x2, [x0, #VCPU_HCR_EL2] 766 + 767 + /* 768 + * We are about to set CPTR_EL2.TFP to trap all floating point 769 + * register accesses to EL2, however, the ARM ARM clearly states that 770 + * traps are only taken to EL2 if the operation would not otherwise 771 + * trap to EL1. Therefore, always make sure that for 32-bit guests, 772 + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 773 + */ 774 + tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state 775 + mov x3, #(1 << 30) 776 + msr fpexc32_el2, x3 777 + isb 778 + 99: 484 779 msr hcr_el2, x2 485 780 mov x2, #CPTR_EL2_TTA 781 + orr x2, x2, #CPTR_EL2_TFP 486 782 msr cptr_el2, x2 487 783 488 784 mov x2, #(1 << 15) // Trap CP15 Cr=15 489 785 msr hstr_el2, x2 490 786 491 - mrs x2, mdcr_el2 492 - and x2, x2, #MDCR_EL2_HPMN_MASK 493 - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) 494 - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) 495 - 496 - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap 497 - // if not dirty. 498 - ldr x3, [x0, #VCPU_DEBUG_FLAGS] 499 - tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f 500 - orr x2, x2, #MDCR_EL2_TDA 501 - 1: 787 + // Monitor Debug Config - see kvm_arm_setup_debug() 788 + ldr x2, [x0, #VCPU_MDCR_EL2] 502 789 msr mdcr_el2, x2 503 790 .endm 504 791 505 792 .macro deactivate_traps 506 793 mov x2, #HCR_RW 507 794 msr hcr_el2, x2 508 - msr cptr_el2, xzr 509 795 msr hstr_el2, xzr 510 796 511 797 mrs x2, mdcr_el2 ··· 622 900 restore_sysregs 623 901 ret 624 902 903 + /* Save debug state */ 625 904 __save_debug: 626 - save_debug 905 + // x2: ptr to CPU context 906 + // x3: ptr to debug reg struct 907 + // x4/x5/x6-22/x24-26: trashed 908 + 909 + mrs x26, id_aa64dfr0_el1 910 + ubfx x24, x26, #12, #4 // Extract BRPs 911 + ubfx x25, x26, #20, #4 // Extract WRPs 912 + mov w26, #15 913 + sub w24, w26, w24 // How many BPs to skip 914 + sub w25, w26, w25 // How many WPs to skip 915 + 916 + mov x5, x24 917 + add x4, x3, #DEBUG_BCR 918 + save_debug dbgbcr 919 + add x4, x3, #DEBUG_BVR 920 + save_debug dbgbvr 921 + 922 + mov x5, x25 923 + add x4, x3, #DEBUG_WCR 924 + save_debug dbgwcr 925 + add x4, x3, #DEBUG_WVR 926 + save_debug dbgwvr 927 + 928 + mrs x21, mdccint_el1 929 + str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 627 930 ret 628 931 932 + /* Restore debug state */ 629 933 __restore_debug: 630 - restore_debug 934 + // x2: ptr to CPU context 935 + // x3: ptr to debug reg struct 936 + // x4/x5/x6-22/x24-26: trashed 937 + 938 + mrs x26, id_aa64dfr0_el1 939 + ubfx x24, x26, #12, #4 // Extract BRPs 940 + ubfx x25, x26, #20, #4 // Extract WRPs 941 + mov w26, #15 942 + sub w24, w26, w24 // How many BPs to skip 943 + sub w25, w26, w25 // How many WPs to skip 944 + 945 + mov x5, x24 946 + add x4, x3, #DEBUG_BCR 947 + restore_debug dbgbcr 948 + add x4, x3, #DEBUG_BVR 949 + restore_debug dbgbvr 950 + 951 + mov x5, x25 952 + add x4, x3, #DEBUG_WCR 953 + restore_debug dbgwcr 954 + add x4, x3, #DEBUG_WVR 955 + restore_debug dbgwvr 956 + 957 + ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] 958 + msr mdccint_el1, x21 959 + 631 960 ret 632 961 633 962 __save_fpsimd: 963 + skip_fpsimd_state x3, 1f 634 964 save_fpsimd 635 - ret 965 + 1: ret 636 966 637 967 __restore_fpsimd: 968 + skip_fpsimd_state x3, 1f 638 969 restore_fpsimd 639 - ret 970 + 1: ret 971 + 972 + switch_to_guest_fpsimd: 973 + push x4, lr 974 + 975 + mrs x2, cptr_el2 976 + bic x2, x2, #CPTR_EL2_TFP 977 + msr cptr_el2, x2 978 + isb 979 + 980 + mrs x0, tpidr_el2 981 + 982 + ldr x2, [x0, #VCPU_HOST_CONTEXT] 983 + kern_hyp_va x2 984 + bl __save_fpsimd 985 + 986 + add x2, x0, #VCPU_CONTEXT 987 + bl __restore_fpsimd 988 + 989 + skip_32bit_state x3, 1f 990 + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] 991 + msr fpexc32_el2, x4 992 + 1: 993 + pop x4, lr 994 + pop x2, x3 995 + pop x0, x1 996 + 997 + eret 640 998 641 999 /* 642 1000 * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); ··· 738 936 kern_hyp_va x2 739 937 740 938 save_host_regs 741 - bl __save_fpsimd 742 939 bl __save_sysregs 743 940 744 941 compute_debug_state 1f 942 + add x3, x0, #VCPU_HOST_DEBUG_STATE 745 943 bl __save_debug 746 944 1: 747 945 activate_traps ··· 754 952 add x2, x0, #VCPU_CONTEXT 755 953 756 954 bl __restore_sysregs 757 - bl __restore_fpsimd 758 955 759 956 skip_debug_state x3, 1f 957 + ldr x3, [x0, #VCPU_DEBUG_PTR] 958 + kern_hyp_va x3 760 959 bl __restore_debug 761 960 1: 762 961 restore_guest_32bit_state ··· 778 975 bl __save_sysregs 779 976 780 977 skip_debug_state x3, 1f 978 + ldr x3, [x0, #VCPU_DEBUG_PTR] 979 + kern_hyp_va x3 781 980 bl __save_debug 782 981 1: 783 982 save_guest_32bit_state ··· 796 991 797 992 bl __restore_sysregs 798 993 bl __restore_fpsimd 994 + /* Clear FPSIMD and Trace trapping */ 995 + msr cptr_el2, xzr 799 996 800 997 skip_debug_state x3, 1f 801 998 // Clear the dirty flag for the next run, as all the state has 802 999 // already been saved. Note that we nuke the whole 64bit word. 803 1000 // If we ever add more flags, we'll have to be more careful... 804 1001 str xzr, [x0, #VCPU_DEBUG_FLAGS] 1002 + add x3, x0, #VCPU_HOST_DEBUG_STATE 805 1003 bl __restore_debug 806 1004 1: 807 1005 restore_host_regs ··· 1007 1199 * x1: ESR 1008 1200 * x2: ESR_EC 1009 1201 */ 1202 + 1203 + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ 1204 + cmp x2, #ESR_ELx_EC_FP_ASIMD 1205 + b.eq switch_to_guest_fpsimd 1206 + 1010 1207 cmp x2, #ESR_ELx_EC_DABT_LOW 1011 1208 mov x0, #ESR_ELx_EC_IABT_LOW 1012 1209 ccmp x2, x0, #4, ne ··· 1105 1292 ventry el1_fiq_invalid // FIQ 32-bit EL1 1106 1293 ventry el1_error_invalid // Error 32-bit EL1 1107 1294 ENDPROC(__kvm_hyp_vector) 1295 + 1296 + 1297 + ENTRY(__kvm_get_mdcr_el2) 1298 + mrs x0, mdcr_el2 1299 + ret 1300 + ENDPROC(__kvm_get_mdcr_el2) 1108 1301 1109 1302 .popsection
+17 -3
arch/arm64/kvm/reset.c
··· 22 22 #include <linux/errno.h> 23 23 #include <linux/kvm_host.h> 24 24 #include <linux/kvm.h> 25 + #include <linux/hw_breakpoint.h> 25 26 26 27 #include <kvm/arm_arch_timer.h> 27 28 ··· 57 56 return !!(pfr0 & 0x20); 58 57 } 59 58 59 + /** 60 + * kvm_arch_dev_ioctl_check_extension 61 + * 62 + * We currently assume that the number of HW registers is uniform 63 + * across all CPUs (see cpuinfo_sanity_check). 64 + */ 60 65 int kvm_arch_dev_ioctl_check_extension(long ext) 61 66 { 62 67 int r; ··· 70 63 switch (ext) { 71 64 case KVM_CAP_ARM_EL1_32BIT: 72 65 r = cpu_has_32bit_el1(); 66 + break; 67 + case KVM_CAP_GUEST_DEBUG_HW_BPS: 68 + r = get_num_brps(); 69 + break; 70 + case KVM_CAP_GUEST_DEBUG_HW_WPS: 71 + r = get_num_wrps(); 72 + break; 73 + case KVM_CAP_SET_GUEST_DEBUG: 74 + r = 1; 73 75 break; 74 76 default: 75 77 r = 0; ··· 121 105 kvm_reset_sys_regs(vcpu); 122 106 123 107 /* Reset timer */ 124 - kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 125 - 126 - return 0; 108 + return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); 127 109 }
+268 -21
arch/arm64/kvm/sys_regs.c
··· 38 38 39 39 #include "sys_regs.h" 40 40 41 + #include "trace.h" 42 + 41 43 /* 42 44 * All of this file is extremly similar to the ARM coproc.c, but the 43 45 * types are different. My gut feeling is that it should be pretty ··· 210 208 *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg); 211 209 } 212 210 211 + trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt)); 212 + 213 213 return true; 214 + } 215 + 216 + /* 217 + * reg_to_dbg/dbg_to_reg 218 + * 219 + * A 32 bit write to a debug register leave top bits alone 220 + * A 32 bit read from a debug register only returns the bottom bits 221 + * 222 + * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the 223 + * hyp.S code switches between host and guest values in future. 224 + */ 225 + static inline void reg_to_dbg(struct kvm_vcpu *vcpu, 226 + const struct sys_reg_params *p, 227 + u64 *dbg_reg) 228 + { 229 + u64 val = *vcpu_reg(vcpu, p->Rt); 230 + 231 + if (p->is_32bit) { 232 + val &= 0xffffffffUL; 233 + val |= ((*dbg_reg >> 32) << 32); 234 + } 235 + 236 + *dbg_reg = val; 237 + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; 238 + } 239 + 240 + static inline void dbg_to_reg(struct kvm_vcpu *vcpu, 241 + const struct sys_reg_params *p, 242 + u64 *dbg_reg) 243 + { 244 + u64 val = *dbg_reg; 245 + 246 + if (p->is_32bit) 247 + val &= 0xffffffffUL; 248 + 249 + *vcpu_reg(vcpu, p->Rt) = val; 250 + } 251 + 252 + static inline bool trap_bvr(struct kvm_vcpu *vcpu, 253 + const struct sys_reg_params *p, 254 + const struct sys_reg_desc *rd) 255 + { 256 + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 257 + 258 + if (p->is_write) 259 + reg_to_dbg(vcpu, p, dbg_reg); 260 + else 261 + dbg_to_reg(vcpu, p, dbg_reg); 262 + 263 + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); 264 + 265 + return true; 266 + } 267 + 268 + static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 269 + const struct kvm_one_reg *reg, void __user *uaddr) 270 + { 271 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 272 + 273 + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 274 + return -EFAULT; 275 + return 0; 276 + } 277 + 278 + static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 279 + const struct kvm_one_reg *reg, void __user *uaddr) 280 + { 281 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 282 + 283 + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 284 + return -EFAULT; 285 + return 0; 286 + } 287 + 288 + static inline void reset_bvr(struct kvm_vcpu *vcpu, 289 + const struct sys_reg_desc *rd) 290 + { 291 + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; 292 + } 293 + 294 + static inline bool trap_bcr(struct kvm_vcpu *vcpu, 295 + const struct sys_reg_params *p, 296 + const struct sys_reg_desc *rd) 297 + { 298 + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; 299 + 300 + if (p->is_write) 301 + reg_to_dbg(vcpu, p, dbg_reg); 302 + else 303 + dbg_to_reg(vcpu, p, dbg_reg); 304 + 305 + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); 306 + 307 + return true; 308 + } 309 + 310 + static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 311 + const struct kvm_one_reg *reg, void __user *uaddr) 312 + { 313 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; 314 + 315 + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 316 + return -EFAULT; 317 + 318 + return 0; 319 + } 320 + 321 + static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 322 + const struct kvm_one_reg *reg, void __user *uaddr) 323 + { 324 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; 325 + 326 + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 327 + return -EFAULT; 328 + return 0; 329 + } 330 + 331 + static inline void reset_bcr(struct kvm_vcpu *vcpu, 332 + const struct sys_reg_desc *rd) 333 + { 334 + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; 335 + } 336 + 337 + static inline bool trap_wvr(struct kvm_vcpu *vcpu, 338 + const struct sys_reg_params *p, 339 + const struct sys_reg_desc *rd) 340 + { 341 + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; 342 + 343 + if (p->is_write) 344 + reg_to_dbg(vcpu, p, dbg_reg); 345 + else 346 + dbg_to_reg(vcpu, p, dbg_reg); 347 + 348 + trace_trap_reg(__func__, rd->reg, p->is_write, 349 + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); 350 + 351 + return true; 352 + } 353 + 354 + static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 355 + const struct kvm_one_reg *reg, void __user *uaddr) 356 + { 357 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; 358 + 359 + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 360 + return -EFAULT; 361 + return 0; 362 + } 363 + 364 + static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 365 + const struct kvm_one_reg *reg, void __user *uaddr) 366 + { 367 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; 368 + 369 + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 370 + return -EFAULT; 371 + return 0; 372 + } 373 + 374 + static inline void reset_wvr(struct kvm_vcpu *vcpu, 375 + const struct sys_reg_desc *rd) 376 + { 377 + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; 378 + } 379 + 380 + static inline bool trap_wcr(struct kvm_vcpu *vcpu, 381 + const struct sys_reg_params *p, 382 + const struct sys_reg_desc *rd) 383 + { 384 + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; 385 + 386 + if (p->is_write) 387 + reg_to_dbg(vcpu, p, dbg_reg); 388 + else 389 + dbg_to_reg(vcpu, p, dbg_reg); 390 + 391 + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); 392 + 393 + return true; 394 + } 395 + 396 + static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 397 + const struct kvm_one_reg *reg, void __user *uaddr) 398 + { 399 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; 400 + 401 + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 402 + return -EFAULT; 403 + return 0; 404 + } 405 + 406 + static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 407 + const struct kvm_one_reg *reg, void __user *uaddr) 408 + { 409 + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; 410 + 411 + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) 412 + return -EFAULT; 413 + return 0; 414 + } 415 + 416 + static inline void reset_wcr(struct kvm_vcpu *vcpu, 417 + const struct sys_reg_desc *rd) 418 + { 419 + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; 214 420 } 215 421 216 422 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) ··· 450 240 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 451 241 /* DBGBVRn_EL1 */ \ 452 242 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ 453 - trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ 243 + trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ 454 244 /* DBGBCRn_EL1 */ \ 455 245 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ 456 - trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ 246 + trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ 457 247 /* DBGWVRn_EL1 */ \ 458 248 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ 459 - trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ 249 + trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ 460 250 /* DBGWCRn_EL1 */ \ 461 251 { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ 462 - trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } 252 + trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } 463 253 464 254 /* 465 255 * Architected system registers. ··· 726 516 return true; 727 517 } 728 518 729 - #define DBG_BCR_BVR_WCR_WVR(n) \ 730 - /* DBGBVRn */ \ 731 - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ 732 - NULL, (cp14_DBGBVR0 + (n) * 2) }, \ 733 - /* DBGBCRn */ \ 734 - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ 735 - NULL, (cp14_DBGBCR0 + (n) * 2) }, \ 736 - /* DBGWVRn */ \ 737 - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ 738 - NULL, (cp14_DBGWVR0 + (n) * 2) }, \ 739 - /* DBGWCRn */ \ 740 - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ 741 - NULL, (cp14_DBGWCR0 + (n) * 2) } 519 + /* AArch32 debug register mappings 520 + * 521 + * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] 522 + * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] 523 + * 524 + * All control registers and watchpoint value registers are mapped to 525 + * the lower 32 bits of their AArch64 equivalents. We share the trap 526 + * handlers with the above AArch64 code which checks what mode the 527 + * system is in. 528 + */ 742 529 743 - #define DBGBXVR(n) \ 744 - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ 745 - NULL, cp14_DBGBXVR0 + n * 2 } 530 + static inline bool trap_xvr(struct kvm_vcpu *vcpu, 531 + const struct sys_reg_params *p, 532 + const struct sys_reg_desc *rd) 533 + { 534 + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; 535 + 536 + if (p->is_write) { 537 + u64 val = *dbg_reg; 538 + 539 + val &= 0xffffffffUL; 540 + val |= *vcpu_reg(vcpu, p->Rt) << 32; 541 + *dbg_reg = val; 542 + 543 + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; 544 + } else { 545 + *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32; 546 + } 547 + 548 + trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); 549 + 550 + return true; 551 + } 552 + 553 + #define DBG_BCR_BVR_WCR_WVR(n) \ 554 + /* DBGBVRn */ \ 555 + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ 556 + /* DBGBCRn */ \ 557 + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ 558 + /* DBGWVRn */ \ 559 + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ 560 + /* DBGWCRn */ \ 561 + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } 562 + 563 + #define DBGBXVR(n) \ 564 + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } 746 565 747 566 /* 748 567 * Trapped cp14 registers. We generally ignore most of the external 749 568 * debug, on the principle that they don't really make sense to a 750 - * guest. Revisit this one day, whould this principle change. 569 + * guest. Revisit this one day, would this principle change. 751 570 */ 752 571 static const struct sys_reg_desc cp14_regs[] = { 753 572 /* DBGIDR */ ··· 1238 999 struct sys_reg_params params; 1239 1000 unsigned long esr = kvm_vcpu_get_hsr(vcpu); 1240 1001 1002 + trace_kvm_handle_sys_reg(esr); 1003 + 1241 1004 params.is_aarch32 = false; 1242 1005 params.is_32bit = false; 1243 1006 params.Op0 = (esr >> 20) & 3; ··· 1544 1303 if (!r) 1545 1304 return get_invariant_sys_reg(reg->id, uaddr); 1546 1305 1306 + if (r->get_user) 1307 + return (r->get_user)(vcpu, r, reg, uaddr); 1308 + 1547 1309 return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); 1548 1310 } 1549 1311 ··· 1564 1320 r = index_to_sys_reg_desc(vcpu, reg->id); 1565 1321 if (!r) 1566 1322 return set_invariant_sys_reg(reg->id, uaddr); 1323 + 1324 + if (r->set_user) 1325 + return (r->set_user)(vcpu, r, reg, uaddr); 1567 1326 1568 1327 return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); 1569 1328 }
+6
arch/arm64/kvm/sys_regs.h
··· 55 55 56 56 /* Value (usually reset value) */ 57 57 u64 val; 58 + 59 + /* Custom get/set_user functions, fallback to generic if NULL */ 60 + int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 61 + const struct kvm_one_reg *reg, void __user *uaddr); 62 + int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 63 + const struct kvm_one_reg *reg, void __user *uaddr); 58 64 }; 59 65 60 66 static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+2
arch/arm64/kvm/sys_regs_generic_v8.c
··· 94 94 &genericv8_target_table); 95 95 kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, 96 96 &genericv8_target_table); 97 + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, 98 + &genericv8_target_table); 97 99 98 100 return 0; 99 101 }
+123
arch/arm64/kvm/trace.h
··· 44 44 __entry->vcpu_pc, __entry->r0, __entry->imm) 45 45 ); 46 46 47 + TRACE_EVENT(kvm_arm_setup_debug, 48 + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), 49 + TP_ARGS(vcpu, guest_debug), 50 + 51 + TP_STRUCT__entry( 52 + __field(struct kvm_vcpu *, vcpu) 53 + __field(__u32, guest_debug) 54 + ), 55 + 56 + TP_fast_assign( 57 + __entry->vcpu = vcpu; 58 + __entry->guest_debug = guest_debug; 59 + ), 60 + 61 + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) 62 + ); 63 + 64 + TRACE_EVENT(kvm_arm_clear_debug, 65 + TP_PROTO(__u32 guest_debug), 66 + TP_ARGS(guest_debug), 67 + 68 + TP_STRUCT__entry( 69 + __field(__u32, guest_debug) 70 + ), 71 + 72 + TP_fast_assign( 73 + __entry->guest_debug = guest_debug; 74 + ), 75 + 76 + TP_printk("flags: 0x%08x", __entry->guest_debug) 77 + ); 78 + 79 + TRACE_EVENT(kvm_arm_set_dreg32, 80 + TP_PROTO(const char *name, __u32 value), 81 + TP_ARGS(name, value), 82 + 83 + TP_STRUCT__entry( 84 + __field(const char *, name) 85 + __field(__u32, value) 86 + ), 87 + 88 + TP_fast_assign( 89 + __entry->name = name; 90 + __entry->value = value; 91 + ), 92 + 93 + TP_printk("%s: 0x%08x", __entry->name, __entry->value) 94 + ); 95 + 96 + TRACE_EVENT(kvm_arm_set_regset, 97 + TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), 98 + TP_ARGS(type, len, control, value), 99 + TP_STRUCT__entry( 100 + __field(const char *, name) 101 + __field(int, len) 102 + __array(u64, ctrls, 16) 103 + __array(u64, values, 16) 104 + ), 105 + TP_fast_assign( 106 + __entry->name = type; 107 + __entry->len = len; 108 + memcpy(__entry->ctrls, control, len << 3); 109 + memcpy(__entry->values, value, len << 3); 110 + ), 111 + TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, 112 + __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), 113 + __print_array(__entry->values, __entry->len, sizeof(__u64))) 114 + ); 115 + 116 + TRACE_EVENT(trap_reg, 117 + TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), 118 + TP_ARGS(fn, reg, is_write, write_value), 119 + 120 + TP_STRUCT__entry( 121 + __field(const char *, fn) 122 + __field(int, reg) 123 + __field(bool, is_write) 124 + __field(u64, write_value) 125 + ), 126 + 127 + TP_fast_assign( 128 + __entry->fn = fn; 129 + __entry->reg = reg; 130 + __entry->is_write = is_write; 131 + __entry->write_value = write_value; 132 + ), 133 + 134 + TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) 135 + ); 136 + 137 + TRACE_EVENT(kvm_handle_sys_reg, 138 + TP_PROTO(unsigned long hsr), 139 + TP_ARGS(hsr), 140 + 141 + TP_STRUCT__entry( 142 + __field(unsigned long, hsr) 143 + ), 144 + 145 + TP_fast_assign( 146 + __entry->hsr = hsr; 147 + ), 148 + 149 + TP_printk("HSR 0x%08lx", __entry->hsr) 150 + ); 151 + 152 + TRACE_EVENT(kvm_set_guest_debug, 153 + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), 154 + TP_ARGS(vcpu, guest_debug), 155 + 156 + TP_STRUCT__entry( 157 + __field(struct kvm_vcpu *, vcpu) 158 + __field(__u32, guest_debug) 159 + ), 160 + 161 + TP_fast_assign( 162 + __entry->vcpu = vcpu; 163 + __entry->guest_debug = guest_debug; 164 + ), 165 + 166 + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) 167 + ); 168 + 169 + 47 170 #endif /* _TRACE_ARM64_KVM_H */ 48 171 49 172 #undef TRACE_INCLUDE_PATH
+3 -2
arch/powerpc/include/asm/kvm_book3s.h
··· 158 158 bool *writable); 159 159 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 160 160 unsigned long *rmap, long pte_index, int realmode); 161 + extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize); 161 162 extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 162 163 unsigned long pte_index); 163 164 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, ··· 226 225 return vcpu->arch.cr; 227 226 } 228 227 229 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 228 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) 230 229 { 231 230 vcpu->arch.xer = val; 232 231 } 233 232 234 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 233 + static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) 235 234 { 236 235 return vcpu->arch.xer; 237 236 }
+21 -1
arch/powerpc/include/asm/kvm_book3s_asm.h
··· 25 25 #define XICS_MFRR 0xc 26 26 #define XICS_IPI 2 /* interrupt source # for IPIs */ 27 27 28 + /* Maximum number of threads per physical core */ 29 + #define MAX_SMT_THREADS 8 30 + 31 + /* Maximum number of subcores per physical core */ 32 + #define MAX_SUBCORES 4 33 + 28 34 #ifdef __ASSEMBLY__ 29 35 30 36 #ifdef CONFIG_KVM_BOOK3S_HANDLER ··· 71 65 72 66 #else /*__ASSEMBLY__ */ 73 67 68 + struct kvmppc_vcore; 69 + 70 + /* Struct used for coordinating micro-threading (split-core) mode changes */ 71 + struct kvm_split_mode { 72 + unsigned long rpr; 73 + unsigned long pmmar; 74 + unsigned long ldbar; 75 + u8 subcore_size; 76 + u8 do_nap; 77 + u8 napped[MAX_SMT_THREADS]; 78 + struct kvmppc_vcore *master_vcs[MAX_SUBCORES]; 79 + }; 80 + 74 81 /* 75 82 * This struct goes in the PACA on 64-bit processors. It is used 76 83 * to store host state that needs to be saved when we enter a guest ··· 119 100 u64 host_spurr; 120 101 u64 host_dscr; 121 102 u64 dec_expires; 103 + struct kvm_split_mode *kvm_split_mode; 122 104 #endif 123 105 #ifdef CONFIG_PPC_BOOK3S_64 124 106 u64 cfar; ··· 132 112 bool in_use; 133 113 ulong gpr[14]; 134 114 u32 cr; 135 - u32 xer; 115 + ulong xer; 136 116 ulong ctr; 137 117 ulong lr; 138 118 ulong pc;
+2 -2
arch/powerpc/include/asm/kvm_booke.h
··· 54 54 return vcpu->arch.cr; 55 55 } 56 56 57 - static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 57 + static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) 58 58 { 59 59 vcpu->arch.xer = val; 60 60 } 61 61 62 - static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 62 + static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) 63 63 { 64 64 return vcpu->arch.xer; 65 65 }
+20 -6
arch/powerpc/include/asm/kvm_host.h
··· 205 205 */ 206 206 #define KVMPPC_RMAP_LOCK_BIT 63 207 207 #define KVMPPC_RMAP_RC_SHIFT 32 208 + #define KVMPPC_RMAP_CHG_SHIFT 48 208 209 #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) 209 210 #define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) 211 + #define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT) 210 212 #define KVMPPC_RMAP_PRESENT 0x100000000ul 211 213 #define KVMPPC_RMAP_INDEX 0xfffffffful 212 214 ··· 280 278 u16 last_cpu; 281 279 u8 vcore_state; 282 280 u8 in_guest; 281 + struct kvmppc_vcore *master_vcore; 283 282 struct list_head runnable_threads; 283 + struct list_head preempt_list; 284 284 spinlock_t lock; 285 285 wait_queue_head_t wq; 286 286 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ ··· 304 300 #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 305 301 #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 306 302 307 - /* Values for vcore_state */ 303 + /* This bit is used when a vcore exit is triggered from outside the vcore */ 304 + #define VCORE_EXIT_REQ 0x10000 305 + 306 + /* 307 + * Values for vcore_state. 308 + * Note that these are arranged such that lower values 309 + * (< VCORE_SLEEPING) don't require stolen time accounting 310 + * on load/unload, and higher values do. 311 + */ 308 312 #define VCORE_INACTIVE 0 309 - #define VCORE_SLEEPING 1 310 - #define VCORE_PREEMPT 2 311 - #define VCORE_RUNNING 3 312 - #define VCORE_EXITING 4 313 + #define VCORE_PREEMPT 1 314 + #define VCORE_PIGGYBACK 2 315 + #define VCORE_SLEEPING 3 316 + #define VCORE_RUNNING 4 317 + #define VCORE_EXITING 5 313 318 314 319 /* 315 320 * Struct used to manage memory for a virtual processor area ··· 486 473 ulong ciabr; 487 474 ulong cfar; 488 475 ulong ppr; 489 - ulong pspb; 476 + u32 pspb; 490 477 ulong fscr; 491 478 ulong shadow_fscr; 492 479 ulong ebbhr; ··· 632 619 int trap; 633 620 int state; 634 621 int ptid; 622 + int thread_cpu; 635 623 bool timer_running; 636 624 wait_queue_head_t cpu_run; 637 625
+1 -1
arch/powerpc/include/asm/ppc-opcode.h
··· 287 287 288 288 /* POWER8 Micro Partition Prefetch (MPP) parameters */ 289 289 /* Address mask is common for LOGMPP instruction and MPPR SPR */ 290 - #define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 290 + #define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL 291 291 292 292 /* Bits 60 and 61 of MPP SPR should be set to one of the following */ 293 293 /* Aborting the fetch is indeed setting 00 in the table size bits */
+9
arch/powerpc/kernel/asm-offsets.c
··· 511 511 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 512 512 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 513 513 DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); 514 + DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu)); 515 + DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); 514 516 #endif 515 517 #ifdef CONFIG_PPC_BOOK3S 516 518 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); ··· 675 673 HSTATE_FIELD(HSTATE_DSCR, host_dscr); 676 674 HSTATE_FIELD(HSTATE_DABR, dabr); 677 675 HSTATE_FIELD(HSTATE_DECEXP, dec_expires); 676 + HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); 678 677 DEFINE(IPI_PRIORITY, IPI_PRIORITY); 678 + DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); 679 + DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); 680 + DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); 681 + DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size)); 682 + DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); 683 + DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); 679 684 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 680 685 681 686 #ifdef CONFIG_PPC_BOOK3S_64
+4 -4
arch/powerpc/kvm/Kconfig
··· 74 74 If unsure, say N. 75 75 76 76 config KVM_BOOK3S_64_HV 77 - tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" 77 + tristate "KVM for POWER7 and later using hypervisor mode in host" 78 78 depends on KVM_BOOK3S_64 && PPC_POWERNV 79 79 select KVM_BOOK3S_HV_POSSIBLE 80 80 select MMU_NOTIFIER 81 81 select CMA 82 82 ---help--- 83 83 Support running unmodified book3s_64 guest kernels in 84 - virtual machines on POWER7 and PPC970 processors that have 84 + virtual machines on POWER7 and newer processors that have 85 85 hypervisor mode available to the host. 86 86 87 87 If you say Y here, KVM will use the hardware virtualization ··· 89 89 guest operating systems will run at full hardware speed 90 90 using supervisor and user modes. However, this also means 91 91 that KVM is not usable under PowerVM (pHyp), is only usable 92 - on POWER7 (or later) processors and PPC970-family processors, 93 - and cannot emulate a different processor from the host processor. 92 + on POWER7 or later processors, and cannot emulate a 93 + different processor from the host processor. 94 94 95 95 If unsure, say N. 96 96
+2 -1
arch/powerpc/kvm/book3s.c
··· 240 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 241 241 } 242 242 243 - int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 243 + static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, 244 + unsigned int priority) 244 245 { 245 246 int deliver = 1; 246 247 int vec = 0;
+1
arch/powerpc/kvm/book3s_32_mmu_host.c
··· 26 26 #include <asm/machdep.h> 27 27 #include <asm/mmu_context.h> 28 28 #include <asm/hw_irq.h> 29 + #include "book3s.h" 29 30 30 31 /* #define DEBUG_MMU */ 31 32 /* #define DEBUG_SR */
+1
arch/powerpc/kvm/book3s_64_mmu_host.c
··· 28 28 #include <asm/mmu_context.h> 29 29 #include <asm/hw_irq.h> 30 30 #include "trace_pr.h" 31 + #include "book3s.h" 31 32 32 33 #define PTE_SIZE 12 33 34
+7 -1
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 761 761 /* Harvest R and C */ 762 762 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); 763 763 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 764 + if (rcbits & HPTE_R_C) 765 + kvmppc_update_rmap_change(rmapp, psize); 764 766 if (rcbits & ~rev[i].guest_rpte) { 765 767 rev[i].guest_rpte = ptel | rcbits; 766 768 note_hpte_modification(kvm, &rev[i]); ··· 929 927 retry: 930 928 lock_rmap(rmapp); 931 929 if (*rmapp & KVMPPC_RMAP_CHANGED) { 932 - *rmapp &= ~KVMPPC_RMAP_CHANGED; 930 + long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER) 931 + >> KVMPPC_RMAP_CHG_SHIFT; 932 + *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER); 933 933 npages_dirty = 1; 934 + if (change_order > PAGE_SHIFT) 935 + npages_dirty = 1ul << (change_order - PAGE_SHIFT); 934 936 } 935 937 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 936 938 unlock_rmap(rmapp);
+1
arch/powerpc/kvm/book3s_emulate.c
··· 23 23 #include <asm/reg.h> 24 24 #include <asm/switch_to.h> 25 25 #include <asm/time.h> 26 + #include "book3s.h" 26 27 27 28 #define OP_19_XOP_RFID 18 28 29 #define OP_19_XOP_RFI 50
+582 -82
arch/powerpc/kvm/book3s_hv.c
··· 81 81 #define MPP_BUFFER_ORDER 3 82 82 #endif 83 83 84 + static int dynamic_mt_modes = 6; 85 + module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR); 86 + MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)"); 87 + static int target_smt_mode; 88 + module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); 89 + MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); 84 90 85 91 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 86 92 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); ··· 120 114 121 115 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 122 116 { 123 - int cpu = vcpu->cpu; 117 + int cpu; 124 118 wait_queue_head_t *wqp; 125 119 126 120 wqp = kvm_arch_vcpu_wq(vcpu); ··· 129 123 ++vcpu->stat.halt_wakeup; 130 124 } 131 125 132 - if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid)) 126 + if (kvmppc_ipi_thread(vcpu->arch.thread_cpu)) 133 127 return; 134 128 135 129 /* CPU points to the first thread of the core */ 130 + cpu = vcpu->cpu; 136 131 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu)) 137 132 smp_send_reschedule(cpu); 138 133 } ··· 171 164 * they should never fail.) 172 165 */ 173 166 167 + static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) 168 + { 169 + unsigned long flags; 170 + 171 + spin_lock_irqsave(&vc->stoltb_lock, flags); 172 + vc->preempt_tb = mftb(); 173 + spin_unlock_irqrestore(&vc->stoltb_lock, flags); 174 + } 175 + 176 + static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) 177 + { 178 + unsigned long flags; 179 + 180 + spin_lock_irqsave(&vc->stoltb_lock, flags); 181 + if (vc->preempt_tb != TB_NIL) { 182 + vc->stolen_tb += mftb() - vc->preempt_tb; 183 + vc->preempt_tb = TB_NIL; 184 + } 185 + spin_unlock_irqrestore(&vc->stoltb_lock, flags); 186 + } 187 + 174 188 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 175 189 { 176 190 struct kvmppc_vcore *vc = vcpu->arch.vcore; ··· 203 175 * vcpu, and once it is set to this vcpu, only this task 204 176 * ever sets it to NULL. 205 177 */ 206 - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 207 - spin_lock_irqsave(&vc->stoltb_lock, flags); 208 - if (vc->preempt_tb != TB_NIL) { 209 - vc->stolen_tb += mftb() - vc->preempt_tb; 210 - vc->preempt_tb = TB_NIL; 211 - } 212 - spin_unlock_irqrestore(&vc->stoltb_lock, flags); 213 - } 178 + if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) 179 + kvmppc_core_end_stolen(vc); 180 + 214 181 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 215 182 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 216 183 vcpu->arch.busy_preempt != TB_NIL) { ··· 220 197 struct kvmppc_vcore *vc = vcpu->arch.vcore; 221 198 unsigned long flags; 222 199 223 - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 224 - spin_lock_irqsave(&vc->stoltb_lock, flags); 225 - vc->preempt_tb = mftb(); 226 - spin_unlock_irqrestore(&vc->stoltb_lock, flags); 227 - } 200 + if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) 201 + kvmppc_core_start_stolen(vc); 202 + 228 203 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 229 204 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 230 205 vcpu->arch.busy_preempt = mftb(); ··· 235 214 kvmppc_end_cede(vcpu); 236 215 } 237 216 238 - void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 217 + static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 239 218 { 240 219 vcpu->arch.pvr = pvr; 241 220 } 242 221 243 - int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 222 + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 244 223 { 245 224 unsigned long pcr = 0; 246 225 struct kvmppc_vcore *vc = vcpu->arch.vcore; ··· 280 259 return 0; 281 260 } 282 261 283 - void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 262 + static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 284 263 { 285 264 int r; 286 265 ··· 313 292 vcpu->arch.last_inst); 314 293 } 315 294 316 - struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 295 + static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 317 296 { 318 297 int r; 319 298 struct kvm_vcpu *v, *ret = NULL; ··· 662 641 663 642 spin_lock(&vcore->lock); 664 643 if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 665 - vcore->vcore_state != VCORE_INACTIVE) 644 + vcore->vcore_state != VCORE_INACTIVE && 645 + vcore->runner) 666 646 target = vcore->runner; 667 647 spin_unlock(&vcore->lock); 668 648 ··· 1453 1431 vcore->lpcr = kvm->arch.lpcr; 1454 1432 vcore->first_vcpuid = core * threads_per_subcore; 1455 1433 vcore->kvm = kvm; 1434 + INIT_LIST_HEAD(&vcore->preempt_list); 1456 1435 1457 1436 vcore->mpp_buffer_is_valid = false; 1458 1437 ··· 1678 1655 spin_unlock(&vcore->lock); 1679 1656 vcpu->arch.vcore = vcore; 1680 1657 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1658 + vcpu->arch.thread_cpu = -1; 1681 1659 1682 1660 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1683 1661 kvmppc_sanity_check(vcpu); ··· 1773 1749 1774 1750 /* Ensure the thread won't go into the kernel if it wakes */ 1775 1751 tpaca->kvm_hstate.kvm_vcpu = NULL; 1752 + tpaca->kvm_hstate.kvm_vcore = NULL; 1776 1753 tpaca->kvm_hstate.napping = 0; 1777 1754 smp_wmb(); 1778 1755 tpaca->kvm_hstate.hwthread_req = 1; ··· 1805 1780 tpaca = &paca[cpu]; 1806 1781 tpaca->kvm_hstate.hwthread_req = 0; 1807 1782 tpaca->kvm_hstate.kvm_vcpu = NULL; 1783 + tpaca->kvm_hstate.kvm_vcore = NULL; 1784 + tpaca->kvm_hstate.kvm_split_mode = NULL; 1808 1785 } 1809 1786 1810 - static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1787 + static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) 1811 1788 { 1812 1789 int cpu; 1813 1790 struct paca_struct *tpaca; 1814 - struct kvmppc_vcore *vc = vcpu->arch.vcore; 1791 + struct kvmppc_vcore *mvc = vc->master_vcore; 1815 1792 1816 - if (vcpu->arch.timer_running) { 1817 - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1818 - vcpu->arch.timer_running = 0; 1793 + cpu = vc->pcpu; 1794 + if (vcpu) { 1795 + if (vcpu->arch.timer_running) { 1796 + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1797 + vcpu->arch.timer_running = 0; 1798 + } 1799 + cpu += vcpu->arch.ptid; 1800 + vcpu->cpu = mvc->pcpu; 1801 + vcpu->arch.thread_cpu = cpu; 1819 1802 } 1820 - cpu = vc->pcpu + vcpu->arch.ptid; 1821 1803 tpaca = &paca[cpu]; 1822 - tpaca->kvm_hstate.kvm_vcore = vc; 1823 - tpaca->kvm_hstate.ptid = vcpu->arch.ptid; 1824 - vcpu->cpu = vc->pcpu; 1825 - /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */ 1826 - smp_wmb(); 1827 1804 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1805 + tpaca->kvm_hstate.ptid = cpu - mvc->pcpu; 1806 + /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ 1807 + smp_wmb(); 1808 + tpaca->kvm_hstate.kvm_vcore = mvc; 1828 1809 if (cpu != smp_processor_id()) 1829 1810 kvmppc_ipi_thread(cpu); 1830 1811 } ··· 1843 1812 for (loops = 0; loops < 1000000; ++loops) { 1844 1813 /* 1845 1814 * Check if all threads are finished. 1846 - * We set the vcpu pointer when starting a thread 1815 + * We set the vcore pointer when starting a thread 1847 1816 * and the thread clears it when finished, so we look 1848 - * for any threads that still have a non-NULL vcpu ptr. 1817 + * for any threads that still have a non-NULL vcore ptr. 1849 1818 */ 1850 1819 for (i = 1; i < threads_per_subcore; ++i) 1851 - if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1820 + if (paca[cpu + i].kvm_hstate.kvm_vcore) 1852 1821 break; 1853 1822 if (i == threads_per_subcore) { 1854 1823 HMT_medium(); ··· 1858 1827 } 1859 1828 HMT_medium(); 1860 1829 for (i = 1; i < threads_per_subcore; ++i) 1861 - if (paca[cpu + i].kvm_hstate.kvm_vcpu) 1830 + if (paca[cpu + i].kvm_hstate.kvm_vcore) 1862 1831 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 1863 1832 } 1864 1833 ··· 1921 1890 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); 1922 1891 } 1923 1892 1893 + /* 1894 + * A list of virtual cores for each physical CPU. 1895 + * These are vcores that could run but their runner VCPU tasks are 1896 + * (or may be) preempted. 1897 + */ 1898 + struct preempted_vcore_list { 1899 + struct list_head list; 1900 + spinlock_t lock; 1901 + }; 1902 + 1903 + static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores); 1904 + 1905 + static void init_vcore_lists(void) 1906 + { 1907 + int cpu; 1908 + 1909 + for_each_possible_cpu(cpu) { 1910 + struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu); 1911 + spin_lock_init(&lp->lock); 1912 + INIT_LIST_HEAD(&lp->list); 1913 + } 1914 + } 1915 + 1916 + static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) 1917 + { 1918 + struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); 1919 + 1920 + vc->vcore_state = VCORE_PREEMPT; 1921 + vc->pcpu = smp_processor_id(); 1922 + if (vc->num_threads < threads_per_subcore) { 1923 + spin_lock(&lp->lock); 1924 + list_add_tail(&vc->preempt_list, &lp->list); 1925 + spin_unlock(&lp->lock); 1926 + } 1927 + 1928 + /* Start accumulating stolen time */ 1929 + kvmppc_core_start_stolen(vc); 1930 + } 1931 + 1932 + static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) 1933 + { 1934 + struct preempted_vcore_list *lp; 1935 + 1936 + kvmppc_core_end_stolen(vc); 1937 + if (!list_empty(&vc->preempt_list)) { 1938 + lp = &per_cpu(preempted_vcores, vc->pcpu); 1939 + spin_lock(&lp->lock); 1940 + list_del_init(&vc->preempt_list); 1941 + spin_unlock(&lp->lock); 1942 + } 1943 + vc->vcore_state = VCORE_INACTIVE; 1944 + } 1945 + 1946 + /* 1947 + * This stores information about the virtual cores currently 1948 + * assigned to a physical core. 1949 + */ 1950 + struct core_info { 1951 + int n_subcores; 1952 + int max_subcore_threads; 1953 + int total_threads; 1954 + int subcore_threads[MAX_SUBCORES]; 1955 + struct kvm *subcore_vm[MAX_SUBCORES]; 1956 + struct list_head vcs[MAX_SUBCORES]; 1957 + }; 1958 + 1959 + /* 1960 + * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7 1961 + * respectively in 2-way micro-threading (split-core) mode. 1962 + */ 1963 + static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; 1964 + 1965 + static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) 1966 + { 1967 + int sub; 1968 + 1969 + memset(cip, 0, sizeof(*cip)); 1970 + cip->n_subcores = 1; 1971 + cip->max_subcore_threads = vc->num_threads; 1972 + cip->total_threads = vc->num_threads; 1973 + cip->subcore_threads[0] = vc->num_threads; 1974 + cip->subcore_vm[0] = vc->kvm; 1975 + for (sub = 0; sub < MAX_SUBCORES; ++sub) 1976 + INIT_LIST_HEAD(&cip->vcs[sub]); 1977 + list_add_tail(&vc->preempt_list, &cip->vcs[0]); 1978 + } 1979 + 1980 + static bool subcore_config_ok(int n_subcores, int n_threads) 1981 + { 1982 + /* Can only dynamically split if unsplit to begin with */ 1983 + if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS) 1984 + return false; 1985 + if (n_subcores > MAX_SUBCORES) 1986 + return false; 1987 + if (n_subcores > 1) { 1988 + if (!(dynamic_mt_modes & 2)) 1989 + n_subcores = 4; 1990 + if (n_subcores > 2 && !(dynamic_mt_modes & 4)) 1991 + return false; 1992 + } 1993 + 1994 + return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS; 1995 + } 1996 + 1997 + static void init_master_vcore(struct kvmppc_vcore *vc) 1998 + { 1999 + vc->master_vcore = vc; 2000 + vc->entry_exit_map = 0; 2001 + vc->in_guest = 0; 2002 + vc->napping_threads = 0; 2003 + vc->conferring_threads = 0; 2004 + } 2005 + 2006 + /* 2007 + * See if the existing subcores can be split into 3 (or fewer) subcores 2008 + * of at most two threads each, so we can fit in another vcore. This 2009 + * assumes there are at most two subcores and at most 6 threads in total. 2010 + */ 2011 + static bool can_split_piggybacked_subcores(struct core_info *cip) 2012 + { 2013 + int sub, new_sub; 2014 + int large_sub = -1; 2015 + int thr; 2016 + int n_subcores = cip->n_subcores; 2017 + struct kvmppc_vcore *vc, *vcnext; 2018 + struct kvmppc_vcore *master_vc = NULL; 2019 + 2020 + for (sub = 0; sub < cip->n_subcores; ++sub) { 2021 + if (cip->subcore_threads[sub] <= 2) 2022 + continue; 2023 + if (large_sub >= 0) 2024 + return false; 2025 + large_sub = sub; 2026 + vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, 2027 + preempt_list); 2028 + if (vc->num_threads > 2) 2029 + return false; 2030 + n_subcores += (cip->subcore_threads[sub] - 1) >> 1; 2031 + } 2032 + if (n_subcores > 3 || large_sub < 0) 2033 + return false; 2034 + 2035 + /* 2036 + * Seems feasible, so go through and move vcores to new subcores. 2037 + * Note that when we have two or more vcores in one subcore, 2038 + * all those vcores must have only one thread each. 2039 + */ 2040 + new_sub = cip->n_subcores; 2041 + thr = 0; 2042 + sub = large_sub; 2043 + list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) { 2044 + if (thr >= 2) { 2045 + list_del(&vc->preempt_list); 2046 + list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]); 2047 + /* vc->num_threads must be 1 */ 2048 + if (++cip->subcore_threads[new_sub] == 1) { 2049 + cip->subcore_vm[new_sub] = vc->kvm; 2050 + init_master_vcore(vc); 2051 + master_vc = vc; 2052 + ++cip->n_subcores; 2053 + } else { 2054 + vc->master_vcore = master_vc; 2055 + ++new_sub; 2056 + } 2057 + } 2058 + thr += vc->num_threads; 2059 + } 2060 + cip->subcore_threads[large_sub] = 2; 2061 + cip->max_subcore_threads = 2; 2062 + 2063 + return true; 2064 + } 2065 + 2066 + static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) 2067 + { 2068 + int n_threads = vc->num_threads; 2069 + int sub; 2070 + 2071 + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 2072 + return false; 2073 + 2074 + if (n_threads < cip->max_subcore_threads) 2075 + n_threads = cip->max_subcore_threads; 2076 + if (subcore_config_ok(cip->n_subcores + 1, n_threads)) { 2077 + cip->max_subcore_threads = n_threads; 2078 + } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 && 2079 + vc->num_threads <= 2) { 2080 + /* 2081 + * We may be able to fit another subcore in by 2082 + * splitting an existing subcore with 3 or 4 2083 + * threads into two 2-thread subcores, or one 2084 + * with 5 or 6 threads into three subcores. 2085 + * We can only do this if those subcores have 2086 + * piggybacked virtual cores. 2087 + */ 2088 + if (!can_split_piggybacked_subcores(cip)) 2089 + return false; 2090 + } else { 2091 + return false; 2092 + } 2093 + 2094 + sub = cip->n_subcores; 2095 + ++cip->n_subcores; 2096 + cip->total_threads += vc->num_threads; 2097 + cip->subcore_threads[sub] = vc->num_threads; 2098 + cip->subcore_vm[sub] = vc->kvm; 2099 + init_master_vcore(vc); 2100 + list_del(&vc->preempt_list); 2101 + list_add_tail(&vc->preempt_list, &cip->vcs[sub]); 2102 + 2103 + return true; 2104 + } 2105 + 2106 + static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, 2107 + struct core_info *cip, int sub) 2108 + { 2109 + struct kvmppc_vcore *vc; 2110 + int n_thr; 2111 + 2112 + vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, 2113 + preempt_list); 2114 + 2115 + /* require same VM and same per-core reg values */ 2116 + if (pvc->kvm != vc->kvm || 2117 + pvc->tb_offset != vc->tb_offset || 2118 + pvc->pcr != vc->pcr || 2119 + pvc->lpcr != vc->lpcr) 2120 + return false; 2121 + 2122 + /* P8 guest with > 1 thread per core would see wrong TIR value */ 2123 + if (cpu_has_feature(CPU_FTR_ARCH_207S) && 2124 + (vc->num_threads > 1 || pvc->num_threads > 1)) 2125 + return false; 2126 + 2127 + n_thr = cip->subcore_threads[sub] + pvc->num_threads; 2128 + if (n_thr > cip->max_subcore_threads) { 2129 + if (!subcore_config_ok(cip->n_subcores, n_thr)) 2130 + return false; 2131 + cip->max_subcore_threads = n_thr; 2132 + } 2133 + 2134 + cip->total_threads += pvc->num_threads; 2135 + cip->subcore_threads[sub] = n_thr; 2136 + pvc->master_vcore = vc; 2137 + list_del(&pvc->preempt_list); 2138 + list_add_tail(&pvc->preempt_list, &cip->vcs[sub]); 2139 + 2140 + return true; 2141 + } 2142 + 2143 + /* 2144 + * Work out whether it is possible to piggyback the execution of 2145 + * vcore *pvc onto the execution of the other vcores described in *cip. 2146 + */ 2147 + static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, 2148 + int target_threads) 2149 + { 2150 + int sub; 2151 + 2152 + if (cip->total_threads + pvc->num_threads > target_threads) 2153 + return false; 2154 + for (sub = 0; sub < cip->n_subcores; ++sub) 2155 + if (cip->subcore_threads[sub] && 2156 + can_piggyback_subcore(pvc, cip, sub)) 2157 + return true; 2158 + 2159 + if (can_dynamic_split(pvc, cip)) 2160 + return true; 2161 + 2162 + return false; 2163 + } 2164 + 1924 2165 static void prepare_threads(struct kvmppc_vcore *vc) 1925 2166 { 1926 2167 struct kvm_vcpu *vcpu, *vnext; ··· 2212 1909 } 2213 1910 } 2214 1911 2215 - static void post_guest_process(struct kvmppc_vcore *vc) 1912 + static void collect_piggybacks(struct core_info *cip, int target_threads) 2216 1913 { 1914 + struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); 1915 + struct kvmppc_vcore *pvc, *vcnext; 1916 + 1917 + spin_lock(&lp->lock); 1918 + list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) { 1919 + if (!spin_trylock(&pvc->lock)) 1920 + continue; 1921 + prepare_threads(pvc); 1922 + if (!pvc->n_runnable) { 1923 + list_del_init(&pvc->preempt_list); 1924 + if (pvc->runner == NULL) { 1925 + pvc->vcore_state = VCORE_INACTIVE; 1926 + kvmppc_core_end_stolen(pvc); 1927 + } 1928 + spin_unlock(&pvc->lock); 1929 + continue; 1930 + } 1931 + if (!can_piggyback(pvc, cip, target_threads)) { 1932 + spin_unlock(&pvc->lock); 1933 + continue; 1934 + } 1935 + kvmppc_core_end_stolen(pvc); 1936 + pvc->vcore_state = VCORE_PIGGYBACK; 1937 + if (cip->total_threads >= target_threads) 1938 + break; 1939 + } 1940 + spin_unlock(&lp->lock); 1941 + } 1942 + 1943 + static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) 1944 + { 1945 + int still_running = 0; 2217 1946 u64 now; 2218 1947 long ret; 2219 1948 struct kvm_vcpu *vcpu, *vnext; 2220 1949 1950 + spin_lock(&vc->lock); 2221 1951 now = get_tb(); 2222 1952 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 2223 1953 arch.run_list) { ··· 2269 1933 vcpu->arch.ret = ret; 2270 1934 vcpu->arch.trap = 0; 2271 1935 2272 - if (vcpu->arch.ceded) { 2273 - if (!is_kvmppc_resume_guest(ret)) 2274 - kvmppc_end_cede(vcpu); 2275 - else 1936 + if (is_kvmppc_resume_guest(vcpu->arch.ret)) { 1937 + if (vcpu->arch.pending_exceptions) 1938 + kvmppc_core_prepare_to_enter(vcpu); 1939 + if (vcpu->arch.ceded) 2276 1940 kvmppc_set_timer(vcpu); 2277 - } 2278 - if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { 1941 + else 1942 + ++still_running; 1943 + } else { 2279 1944 kvmppc_remove_runnable(vc, vcpu); 2280 1945 wake_up(&vcpu->arch.cpu_run); 2281 1946 } 2282 1947 } 1948 + list_del_init(&vc->preempt_list); 1949 + if (!is_master) { 1950 + if (still_running > 0) { 1951 + kvmppc_vcore_preempt(vc); 1952 + } else if (vc->runner) { 1953 + vc->vcore_state = VCORE_PREEMPT; 1954 + kvmppc_core_start_stolen(vc); 1955 + } else { 1956 + vc->vcore_state = VCORE_INACTIVE; 1957 + } 1958 + if (vc->n_runnable > 0 && vc->runner == NULL) { 1959 + /* make sure there's a candidate runner awake */ 1960 + vcpu = list_first_entry(&vc->runnable_threads, 1961 + struct kvm_vcpu, arch.run_list); 1962 + wake_up(&vcpu->arch.cpu_run); 1963 + } 1964 + } 1965 + spin_unlock(&vc->lock); 2283 1966 } 2284 1967 2285 1968 /* ··· 2310 1955 struct kvm_vcpu *vcpu, *vnext; 2311 1956 int i; 2312 1957 int srcu_idx; 1958 + struct core_info core_info; 1959 + struct kvmppc_vcore *pvc, *vcnext; 1960 + struct kvm_split_mode split_info, *sip; 1961 + int split, subcore_size, active; 1962 + int sub; 1963 + bool thr0_done; 1964 + unsigned long cmd_bit, stat_bit; 1965 + int pcpu, thr; 1966 + int target_threads; 2313 1967 2314 1968 /* 2315 1969 * Remove from the list any threads that have a signal pending ··· 2333 1969 /* 2334 1970 * Initialize *vc. 2335 1971 */ 2336 - vc->entry_exit_map = 0; 1972 + init_master_vcore(vc); 2337 1973 vc->preempt_tb = TB_NIL; 2338 - vc->in_guest = 0; 2339 - vc->napping_threads = 0; 2340 - vc->conferring_threads = 0; 2341 1974 2342 1975 /* 2343 1976 * Make sure we are running on primary threads, and that secondary ··· 2352 1991 goto out; 2353 1992 } 2354 1993 1994 + /* 1995 + * See if we could run any other vcores on the physical core 1996 + * along with this one. 1997 + */ 1998 + init_core_info(&core_info, vc); 1999 + pcpu = smp_processor_id(); 2000 + target_threads = threads_per_subcore; 2001 + if (target_smt_mode && target_smt_mode < target_threads) 2002 + target_threads = target_smt_mode; 2003 + if (vc->num_threads < target_threads) 2004 + collect_piggybacks(&core_info, target_threads); 2355 2005 2356 - vc->pcpu = smp_processor_id(); 2357 - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 2358 - kvmppc_start_thread(vcpu); 2359 - kvmppc_create_dtl_entry(vcpu, vc); 2360 - trace_kvm_guest_enter(vcpu); 2006 + /* Decide on micro-threading (split-core) mode */ 2007 + subcore_size = threads_per_subcore; 2008 + cmd_bit = stat_bit = 0; 2009 + split = core_info.n_subcores; 2010 + sip = NULL; 2011 + if (split > 1) { 2012 + /* threads_per_subcore must be MAX_SMT_THREADS (8) here */ 2013 + if (split == 2 && (dynamic_mt_modes & 2)) { 2014 + cmd_bit = HID0_POWER8_1TO2LPAR; 2015 + stat_bit = HID0_POWER8_2LPARMODE; 2016 + } else { 2017 + split = 4; 2018 + cmd_bit = HID0_POWER8_1TO4LPAR; 2019 + stat_bit = HID0_POWER8_4LPARMODE; 2020 + } 2021 + subcore_size = MAX_SMT_THREADS / split; 2022 + sip = &split_info; 2023 + memset(&split_info, 0, sizeof(split_info)); 2024 + split_info.rpr = mfspr(SPRN_RPR); 2025 + split_info.pmmar = mfspr(SPRN_PMMAR); 2026 + split_info.ldbar = mfspr(SPRN_LDBAR); 2027 + split_info.subcore_size = subcore_size; 2028 + for (sub = 0; sub < core_info.n_subcores; ++sub) 2029 + split_info.master_vcs[sub] = 2030 + list_first_entry(&core_info.vcs[sub], 2031 + struct kvmppc_vcore, preempt_list); 2032 + /* order writes to split_info before kvm_split_mode pointer */ 2033 + smp_wmb(); 2034 + } 2035 + pcpu = smp_processor_id(); 2036 + for (thr = 0; thr < threads_per_subcore; ++thr) 2037 + paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; 2038 + 2039 + /* Initiate micro-threading (split-core) if required */ 2040 + if (cmd_bit) { 2041 + unsigned long hid0 = mfspr(SPRN_HID0); 2042 + 2043 + hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS; 2044 + mb(); 2045 + mtspr(SPRN_HID0, hid0); 2046 + isync(); 2047 + for (;;) { 2048 + hid0 = mfspr(SPRN_HID0); 2049 + if (hid0 & stat_bit) 2050 + break; 2051 + cpu_relax(); 2052 + } 2361 2053 } 2362 2054 2363 - /* Set this explicitly in case thread 0 doesn't have a vcpu */ 2364 - get_paca()->kvm_hstate.kvm_vcore = vc; 2365 - get_paca()->kvm_hstate.ptid = 0; 2055 + /* Start all the threads */ 2056 + active = 0; 2057 + for (sub = 0; sub < core_info.n_subcores; ++sub) { 2058 + thr = subcore_thread_map[sub]; 2059 + thr0_done = false; 2060 + active |= 1 << thr; 2061 + list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { 2062 + pvc->pcpu = pcpu + thr; 2063 + list_for_each_entry(vcpu, &pvc->runnable_threads, 2064 + arch.run_list) { 2065 + kvmppc_start_thread(vcpu, pvc); 2066 + kvmppc_create_dtl_entry(vcpu, pvc); 2067 + trace_kvm_guest_enter(vcpu); 2068 + if (!vcpu->arch.ptid) 2069 + thr0_done = true; 2070 + active |= 1 << (thr + vcpu->arch.ptid); 2071 + } 2072 + /* 2073 + * We need to start the first thread of each subcore 2074 + * even if it doesn't have a vcpu. 2075 + */ 2076 + if (pvc->master_vcore == pvc && !thr0_done) 2077 + kvmppc_start_thread(NULL, pvc); 2078 + thr += pvc->num_threads; 2079 + } 2080 + } 2081 + 2082 + /* 2083 + * Ensure that split_info.do_nap is set after setting 2084 + * the vcore pointer in the PACA of the secondaries. 2085 + */ 2086 + smp_mb(); 2087 + if (cmd_bit) 2088 + split_info.do_nap = 1; /* ask secondaries to nap when done */ 2089 + 2090 + /* 2091 + * When doing micro-threading, poke the inactive threads as well. 2092 + * This gets them to the nap instruction after kvm_do_nap, 2093 + * which reduces the time taken to unsplit later. 2094 + */ 2095 + if (split > 1) 2096 + for (thr = 1; thr < threads_per_subcore; ++thr) 2097 + if (!(active & (1 << thr))) 2098 + kvmppc_ipi_thread(pcpu + thr); 2366 2099 2367 2100 vc->vcore_state = VCORE_RUNNING; 2368 2101 preempt_disable(); 2369 2102 2370 2103 trace_kvmppc_run_core(vc, 0); 2371 2104 2372 - spin_unlock(&vc->lock); 2105 + for (sub = 0; sub < core_info.n_subcores; ++sub) 2106 + list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) 2107 + spin_unlock(&pvc->lock); 2373 2108 2374 2109 kvm_guest_enter(); 2375 2110 ··· 2476 2019 2477 2020 __kvmppc_vcore_entry(); 2478 2021 2479 - spin_lock(&vc->lock); 2480 - 2481 2022 if (vc->mpp_buffer) 2482 2023 kvmppc_start_saving_l2_cache(vc); 2483 2024 2484 - /* disable sending of IPIs on virtual external irqs */ 2485 - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 2486 - vcpu->cpu = -1; 2487 - /* wait for secondary threads to finish writing their state to memory */ 2488 - kvmppc_wait_for_nap(); 2489 - for (i = 0; i < threads_per_subcore; ++i) 2490 - kvmppc_release_hwthread(vc->pcpu + i); 2025 + srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2026 + 2027 + spin_lock(&vc->lock); 2491 2028 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 2492 2029 vc->vcore_state = VCORE_EXITING; 2493 - spin_unlock(&vc->lock); 2494 2030 2495 - srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 2031 + /* wait for secondary threads to finish writing their state to memory */ 2032 + kvmppc_wait_for_nap(); 2033 + 2034 + /* Return to whole-core mode if we split the core earlier */ 2035 + if (split > 1) { 2036 + unsigned long hid0 = mfspr(SPRN_HID0); 2037 + unsigned long loops = 0; 2038 + 2039 + hid0 &= ~HID0_POWER8_DYNLPARDIS; 2040 + stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; 2041 + mb(); 2042 + mtspr(SPRN_HID0, hid0); 2043 + isync(); 2044 + for (;;) { 2045 + hid0 = mfspr(SPRN_HID0); 2046 + if (!(hid0 & stat_bit)) 2047 + break; 2048 + cpu_relax(); 2049 + ++loops; 2050 + } 2051 + split_info.do_nap = 0; 2052 + } 2053 + 2054 + /* Let secondaries go back to the offline loop */ 2055 + for (i = 0; i < threads_per_subcore; ++i) { 2056 + kvmppc_release_hwthread(pcpu + i); 2057 + if (sip && sip->napped[i]) 2058 + kvmppc_ipi_thread(pcpu + i); 2059 + } 2060 + 2061 + spin_unlock(&vc->lock); 2496 2062 2497 2063 /* make sure updates to secondary vcpu structs are visible now */ 2498 2064 smp_mb(); 2499 2065 kvm_guest_exit(); 2500 2066 2501 - preempt_enable(); 2067 + for (sub = 0; sub < core_info.n_subcores; ++sub) 2068 + list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], 2069 + preempt_list) 2070 + post_guest_process(pvc, pvc == vc); 2502 2071 2503 2072 spin_lock(&vc->lock); 2504 - post_guest_process(vc); 2073 + preempt_enable(); 2505 2074 2506 2075 out: 2507 2076 vc->vcore_state = VCORE_INACTIVE; ··· 2538 2055 * Wait for some other vcpu thread to execute us, and 2539 2056 * wake us up when we need to handle something in the host. 2540 2057 */ 2541 - static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 2058 + static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, 2059 + struct kvm_vcpu *vcpu, int wait_state) 2542 2060 { 2543 2061 DEFINE_WAIT(wait); 2544 2062 2545 2063 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 2546 - if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 2064 + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 2065 + spin_unlock(&vc->lock); 2547 2066 schedule(); 2067 + spin_lock(&vc->lock); 2068 + } 2548 2069 finish_wait(&vcpu->arch.cpu_run, &wait); 2549 2070 } 2550 2071 ··· 2624 2137 * this thread straight away and have it join in. 2625 2138 */ 2626 2139 if (!signal_pending(current)) { 2627 - if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { 2140 + if (vc->vcore_state == VCORE_PIGGYBACK) { 2141 + struct kvmppc_vcore *mvc = vc->master_vcore; 2142 + if (spin_trylock(&mvc->lock)) { 2143 + if (mvc->vcore_state == VCORE_RUNNING && 2144 + !VCORE_IS_EXITING(mvc)) { 2145 + kvmppc_create_dtl_entry(vcpu, vc); 2146 + kvmppc_start_thread(vcpu, vc); 2147 + trace_kvm_guest_enter(vcpu); 2148 + } 2149 + spin_unlock(&mvc->lock); 2150 + } 2151 + } else if (vc->vcore_state == VCORE_RUNNING && 2152 + !VCORE_IS_EXITING(vc)) { 2628 2153 kvmppc_create_dtl_entry(vcpu, vc); 2629 - kvmppc_start_thread(vcpu); 2154 + kvmppc_start_thread(vcpu, vc); 2630 2155 trace_kvm_guest_enter(vcpu); 2631 2156 } else if (vc->vcore_state == VCORE_SLEEPING) { 2632 2157 wake_up(&vc->wq); ··· 2648 2149 2649 2150 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2650 2151 !signal_pending(current)) { 2152 + if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) 2153 + kvmppc_vcore_end_preempt(vc); 2154 + 2651 2155 if (vc->vcore_state != VCORE_INACTIVE) { 2652 - spin_unlock(&vc->lock); 2653 - kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 2654 - spin_lock(&vc->lock); 2156 + kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); 2655 2157 continue; 2656 2158 } 2657 2159 list_for_each_entry_safe(v, vn, &vc->runnable_threads, ··· 2679 2179 if (n_ceded == vc->n_runnable) { 2680 2180 kvmppc_vcore_blocked(vc); 2681 2181 } else if (need_resched()) { 2682 - vc->vcore_state = VCORE_PREEMPT; 2182 + kvmppc_vcore_preempt(vc); 2683 2183 /* Let something else run */ 2684 2184 cond_resched_lock(&vc->lock); 2685 - vc->vcore_state = VCORE_INACTIVE; 2185 + if (vc->vcore_state == VCORE_PREEMPT) 2186 + kvmppc_vcore_end_preempt(vc); 2686 2187 } else { 2687 2188 kvmppc_run_core(vc); 2688 2189 } ··· 2692 2191 2693 2192 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 2694 2193 (vc->vcore_state == VCORE_RUNNING || 2695 - vc->vcore_state == VCORE_EXITING)) { 2696 - spin_unlock(&vc->lock); 2697 - kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 2698 - spin_lock(&vc->lock); 2699 - } 2194 + vc->vcore_state == VCORE_EXITING)) 2195 + kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE); 2700 2196 2701 2197 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 2702 2198 kvmppc_remove_runnable(vc, vcpu); ··· 3252 2754 kvmppc_hv_ops = &kvm_ops_hv; 3253 2755 3254 2756 init_default_hcalls(); 2757 + 2758 + init_vcore_lists(); 3255 2759 3256 2760 r = kvmppc_mmu_hv_init(); 3257 2761 return r;
+28 -4
arch/powerpc/kvm/book3s_hv_builtin.c
··· 110 110 long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, 111 111 unsigned int yield_count) 112 112 { 113 - struct kvmppc_vcore *vc = vcpu->arch.vcore; 113 + struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 114 + int ptid = local_paca->kvm_hstate.ptid; 114 115 int threads_running; 115 116 int threads_ceded; 116 117 int threads_conferring; 117 118 u64 stop = get_tb() + 10 * tb_ticks_per_usec; 118 119 int rv = H_SUCCESS; /* => don't yield */ 119 120 120 - set_bit(vcpu->arch.ptid, &vc->conferring_threads); 121 + set_bit(ptid, &vc->conferring_threads); 121 122 while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { 122 123 threads_running = VCORE_ENTRY_MAP(vc); 123 124 threads_ceded = vc->napping_threads; ··· 128 127 break; 129 128 } 130 129 } 131 - clear_bit(vcpu->arch.ptid, &vc->conferring_threads); 130 + clear_bit(ptid, &vc->conferring_threads); 132 131 return rv; 133 132 } 134 133 ··· 239 238 { 240 239 struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; 241 240 int ptid = local_paca->kvm_hstate.ptid; 242 - int me, ee; 241 + struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode; 242 + int me, ee, i; 243 243 244 244 /* Set our bit in the threads-exiting-guest map in the 0xff00 245 245 bits of vcore->entry_exit_map */ ··· 260 258 */ 261 259 if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) 262 260 kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); 261 + 262 + /* 263 + * If we are doing dynamic micro-threading, interrupt the other 264 + * subcores to pull them out of their guests too. 265 + */ 266 + if (!sip) 267 + return; 268 + 269 + for (i = 0; i < MAX_SUBCORES; ++i) { 270 + vc = sip->master_vcs[i]; 271 + if (!vc) 272 + break; 273 + do { 274 + ee = vc->entry_exit_map; 275 + /* Already asked to exit? */ 276 + if ((ee >> 8) != 0) 277 + break; 278 + } while (cmpxchg(&vc->entry_exit_map, ee, 279 + ee | VCORE_EXIT_REQ) != ee); 280 + if ((ee >> 8) == 0) 281 + kvmhv_interrupt_vcore(vc, ee); 282 + } 263 283 }
+148 -13
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 12 12 #include <linux/kvm_host.h> 13 13 #include <linux/hugetlb.h> 14 14 #include <linux/module.h> 15 + #include <linux/log2.h> 15 16 16 17 #include <asm/tlbflush.h> 17 18 #include <asm/kvm_ppc.h> ··· 98 97 } 99 98 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 100 99 100 + /* Update the changed page order field of an rmap entry */ 101 + void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize) 102 + { 103 + unsigned long order; 104 + 105 + if (!psize) 106 + return; 107 + order = ilog2(psize); 108 + order <<= KVMPPC_RMAP_CHG_SHIFT; 109 + if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER)) 110 + *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order; 111 + } 112 + EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change); 113 + 114 + /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 115 + static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 116 + unsigned long hpte_gr) 117 + { 118 + struct kvm_memory_slot *memslot; 119 + unsigned long *rmap; 120 + unsigned long gfn; 121 + 122 + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); 123 + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 124 + if (!memslot) 125 + return NULL; 126 + 127 + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 128 + return rmap; 129 + } 130 + 101 131 /* Remove this HPTE from the chain for a real page */ 102 132 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 103 133 struct revmap_entry *rev, 104 134 unsigned long hpte_v, unsigned long hpte_r) 105 135 { 106 136 struct revmap_entry *next, *prev; 107 - unsigned long gfn, ptel, head; 108 - struct kvm_memory_slot *memslot; 137 + unsigned long ptel, head; 109 138 unsigned long *rmap; 110 139 unsigned long rcbits; 111 140 112 141 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 113 142 ptel = rev->guest_rpte |= rcbits; 114 - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 115 - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 116 - if (!memslot) 143 + rmap = revmap_for_hpte(kvm, hpte_v, ptel); 144 + if (!rmap) 117 145 return; 118 - 119 - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 120 146 lock_rmap(rmap); 121 147 122 148 head = *rmap & KVMPPC_RMAP_INDEX; ··· 159 131 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 160 132 } 161 133 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 134 + if (rcbits & HPTE_R_C) 135 + kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r)); 162 136 unlock_rmap(rmap); 163 137 } 164 138 ··· 451 421 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 452 422 v = pte & ~HPTE_V_HVLOCK; 453 423 if (v & HPTE_V_VALID) { 454 - u64 pte1; 455 - 456 - pte1 = be64_to_cpu(hpte[1]); 457 424 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 458 - rb = compute_tlbie_rb(v, pte1, pte_index); 425 + rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); 459 426 do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); 460 - /* Read PTE low word after tlbie to get final R/C values */ 461 - remove_revmap_chain(kvm, pte_index, rev, v, pte1); 427 + /* 428 + * The reference (R) and change (C) bits in a HPT 429 + * entry can be set by hardware at any time up until 430 + * the HPTE is invalidated and the TLB invalidation 431 + * sequence has completed. This means that when 432 + * removing a HPTE, we need to re-read the HPTE after 433 + * the invalidation sequence has completed in order to 434 + * obtain reliable values of R and C. 435 + */ 436 + remove_revmap_chain(kvm, pte_index, rev, v, 437 + be64_to_cpu(hpte[1])); 462 438 } 463 439 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 464 440 note_hpte_modification(kvm, rev); ··· 689 653 vcpu->arch.gpr[5 + i * 2] = r; 690 654 } 691 655 return H_SUCCESS; 656 + } 657 + 658 + long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 659 + unsigned long pte_index) 660 + { 661 + struct kvm *kvm = vcpu->kvm; 662 + __be64 *hpte; 663 + unsigned long v, r, gr; 664 + struct revmap_entry *rev; 665 + unsigned long *rmap; 666 + long ret = H_NOT_FOUND; 667 + 668 + if (pte_index >= kvm->arch.hpt_npte) 669 + return H_PARAMETER; 670 + 671 + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 672 + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 673 + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 674 + cpu_relax(); 675 + v = be64_to_cpu(hpte[0]); 676 + r = be64_to_cpu(hpte[1]); 677 + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 678 + goto out; 679 + 680 + gr = rev->guest_rpte; 681 + if (rev->guest_rpte & HPTE_R_R) { 682 + rev->guest_rpte &= ~HPTE_R_R; 683 + note_hpte_modification(kvm, rev); 684 + } 685 + if (v & HPTE_V_VALID) { 686 + gr |= r & (HPTE_R_R | HPTE_R_C); 687 + if (r & HPTE_R_R) { 688 + kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 689 + rmap = revmap_for_hpte(kvm, v, gr); 690 + if (rmap) { 691 + lock_rmap(rmap); 692 + *rmap |= KVMPPC_RMAP_REFERENCED; 693 + unlock_rmap(rmap); 694 + } 695 + } 696 + } 697 + vcpu->arch.gpr[4] = gr; 698 + ret = H_SUCCESS; 699 + out: 700 + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 701 + return ret; 702 + } 703 + 704 + long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 705 + unsigned long pte_index) 706 + { 707 + struct kvm *kvm = vcpu->kvm; 708 + __be64 *hpte; 709 + unsigned long v, r, gr; 710 + struct revmap_entry *rev; 711 + unsigned long *rmap; 712 + long ret = H_NOT_FOUND; 713 + 714 + if (pte_index >= kvm->arch.hpt_npte) 715 + return H_PARAMETER; 716 + 717 + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 718 + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); 719 + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 720 + cpu_relax(); 721 + v = be64_to_cpu(hpte[0]); 722 + r = be64_to_cpu(hpte[1]); 723 + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 724 + goto out; 725 + 726 + gr = rev->guest_rpte; 727 + if (gr & HPTE_R_C) { 728 + rev->guest_rpte &= ~HPTE_R_C; 729 + note_hpte_modification(kvm, rev); 730 + } 731 + if (v & HPTE_V_VALID) { 732 + /* need to make it temporarily absent so C is stable */ 733 + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 734 + kvmppc_invalidate_hpte(kvm, hpte, pte_index); 735 + r = be64_to_cpu(hpte[1]); 736 + gr |= r & (HPTE_R_R | HPTE_R_C); 737 + if (r & HPTE_R_C) { 738 + unsigned long psize = hpte_page_size(v, r); 739 + hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 740 + eieio(); 741 + rmap = revmap_for_hpte(kvm, v, gr); 742 + if (rmap) { 743 + lock_rmap(rmap); 744 + *rmap |= KVMPPC_RMAP_CHANGED; 745 + kvmppc_update_rmap_change(rmap, psize); 746 + unlock_rmap(rmap); 747 + } 748 + } 749 + } 750 + vcpu->arch.gpr[4] = gr; 751 + ret = H_SUCCESS; 752 + out: 753 + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 754 + return ret; 692 755 } 693 756 694 757 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
+1 -3
arch/powerpc/kvm/book3s_hv_rm_xics.c
··· 67 67 } 68 68 69 69 /* Check if the core is loaded, if not, too hard */ 70 - cpu = vcpu->cpu; 70 + cpu = vcpu->arch.thread_cpu; 71 71 if (cpu < 0 || cpu >= nr_cpu_ids) { 72 72 this_icp->rm_action |= XICS_RM_KICK_VCPU; 73 73 this_icp->rm_kick_target = vcpu; 74 74 return; 75 75 } 76 - /* In SMT cpu will always point to thread 0, we adjust it */ 77 - cpu += vcpu->arch.ptid; 78 76 79 77 smp_mb(); 80 78 kvmhv_rm_send_ipi(cpu);
+117 -20
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 128 128 subf r4, r4, r3 129 129 mtspr SPRN_DEC, r4 130 130 131 + /* hwthread_req may have got set by cede or no vcpu, so clear it */ 132 + li r0, 0 133 + stb r0, HSTATE_HWTHREAD_REQ(r13) 134 + 131 135 /* 132 136 * For external and machine check interrupts, we need 133 137 * to call the Linux handler to process the interrupt. ··· 219 215 ld r5, HSTATE_KVM_VCORE(r13) 220 216 li r0, 0 221 217 stb r0, HSTATE_NAPPING(r13) 222 - stb r0, HSTATE_HWTHREAD_REQ(r13) 223 218 224 219 /* check the wake reason */ 225 220 bl kvmppc_check_wake_reason ··· 318 315 cmpdi r3, 0 319 316 bge kvm_no_guest 320 317 321 - /* get vcpu pointer, NULL if we have no vcpu to run */ 322 - ld r4,HSTATE_KVM_VCPU(r13) 323 - cmpdi r4,0 324 - /* if we have no vcpu to run, go back to sleep */ 318 + /* get vcore pointer, NULL if we have nothing to run */ 319 + ld r5,HSTATE_KVM_VCORE(r13) 320 + cmpdi r5,0 321 + /* if we have no vcore to run, go back to sleep */ 325 322 beq kvm_no_guest 326 323 327 324 kvm_secondary_got_guest: ··· 330 327 ld r6, PACA_DSCR_DEFAULT(r13) 331 328 std r6, HSTATE_DSCR(r13) 332 329 333 - /* Order load of vcore, ptid etc. after load of vcpu */ 330 + /* On thread 0 of a subcore, set HDEC to max */ 331 + lbz r4, HSTATE_PTID(r13) 332 + cmpwi r4, 0 333 + bne 63f 334 + lis r6, 0x7fff 335 + ori r6, r6, 0xffff 336 + mtspr SPRN_HDEC, r6 337 + /* and set per-LPAR registers, if doing dynamic micro-threading */ 338 + ld r6, HSTATE_SPLIT_MODE(r13) 339 + cmpdi r6, 0 340 + beq 63f 341 + ld r0, KVM_SPLIT_RPR(r6) 342 + mtspr SPRN_RPR, r0 343 + ld r0, KVM_SPLIT_PMMAR(r6) 344 + mtspr SPRN_PMMAR, r0 345 + ld r0, KVM_SPLIT_LDBAR(r6) 346 + mtspr SPRN_LDBAR, r0 347 + isync 348 + 63: 349 + /* Order load of vcpu after load of vcore */ 334 350 lwsync 351 + ld r4, HSTATE_KVM_VCPU(r13) 335 352 bl kvmppc_hv_entry 336 353 337 354 /* Back from the guest, go back to nap */ 338 - /* Clear our vcpu pointer so we don't come back in early */ 355 + /* Clear our vcpu and vcore pointers so we don't come back in early */ 339 356 li r0, 0 357 + std r0, HSTATE_KVM_VCPU(r13) 340 358 /* 341 - * Once we clear HSTATE_KVM_VCPU(r13), the code in 359 + * Once we clear HSTATE_KVM_VCORE(r13), the code in 342 360 * kvmppc_run_core() is going to assume that all our vcpu 343 361 * state is visible in memory. This lwsync makes sure 344 362 * that that is true. 345 363 */ 346 364 lwsync 347 - std r0, HSTATE_KVM_VCPU(r13) 365 + std r0, HSTATE_KVM_VCORE(r13) 348 366 349 367 /* 350 368 * At this point we have finished executing in the guest. ··· 398 374 b power7_wakeup_loss 399 375 400 376 53: HMT_LOW 401 - ld r4, HSTATE_KVM_VCPU(r13) 402 - cmpdi r4, 0 377 + ld r5, HSTATE_KVM_VCORE(r13) 378 + cmpdi r5, 0 379 + bne 60f 380 + ld r3, HSTATE_SPLIT_MODE(r13) 381 + cmpdi r3, 0 382 + beq kvm_no_guest 383 + lbz r0, KVM_SPLIT_DO_NAP(r3) 384 + cmpwi r0, 0 403 385 beq kvm_no_guest 404 386 HMT_MEDIUM 387 + b kvm_unsplit_nap 388 + 60: HMT_MEDIUM 405 389 b kvm_secondary_got_guest 406 390 407 391 54: li r0, KVM_HWTHREAD_IN_KVM 408 392 stb r0, HSTATE_HWTHREAD_STATE(r13) 393 + b kvm_no_guest 394 + 395 + /* 396 + * Here the primary thread is trying to return the core to 397 + * whole-core mode, so we need to nap. 398 + */ 399 + kvm_unsplit_nap: 400 + /* 401 + * Ensure that secondary doesn't nap when it has 402 + * its vcore pointer set. 403 + */ 404 + sync /* matches smp_mb() before setting split_info.do_nap */ 405 + ld r0, HSTATE_KVM_VCORE(r13) 406 + cmpdi r0, 0 407 + bne kvm_no_guest 408 + /* clear any pending message */ 409 + BEGIN_FTR_SECTION 410 + lis r6, (PPC_DBELL_SERVER << (63-36))@h 411 + PPC_MSGCLR(6) 412 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 413 + /* Set kvm_split_mode.napped[tid] = 1 */ 414 + ld r3, HSTATE_SPLIT_MODE(r13) 415 + li r0, 1 416 + lhz r4, PACAPACAINDEX(r13) 417 + clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */ 418 + addi r4, r4, KVM_SPLIT_NAPPED 419 + stbx r0, r3, r4 420 + /* Check the do_nap flag again after setting napped[] */ 421 + sync 422 + lbz r0, KVM_SPLIT_DO_NAP(r3) 423 + cmpwi r0, 0 424 + beq 57f 425 + li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 426 + mfspr r4, SPRN_LPCR 427 + rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) 428 + mtspr SPRN_LPCR, r4 429 + isync 430 + std r0, HSTATE_SCRATCH0(r13) 431 + ptesync 432 + ld r0, HSTATE_SCRATCH0(r13) 433 + 1: cmpd r0, r0 434 + bne 1b 435 + nap 436 + b . 437 + 438 + 57: li r0, 0 439 + stbx r0, r3, r4 409 440 b kvm_no_guest 410 441 411 442 /****************************************************************************** ··· 933 854 cmpwi r0, 0 934 855 bne 21f 935 856 HMT_LOW 936 - 20: lbz r0, VCORE_IN_GUEST(r5) 857 + 20: lwz r3, VCORE_ENTRY_EXIT(r5) 858 + cmpwi r3, 0x100 859 + bge no_switch_exit 860 + lbz r0, VCORE_IN_GUEST(r5) 937 861 cmpwi r0, 0 938 862 beq 20b 939 863 HMT_MEDIUM ··· 952 870 blt hdec_soon 953 871 954 872 ld r6, VCPU_CTR(r4) 955 - lwz r7, VCPU_XER(r4) 873 + ld r7, VCPU_XER(r4) 956 874 957 875 mtctr r6 958 876 mtxer r7 ··· 1067 985 #endif 1068 986 11: b kvmhv_switch_to_host 1069 987 988 + no_switch_exit: 989 + HMT_MEDIUM 990 + li r12, 0 991 + b 12f 1070 992 hdec_soon: 1071 993 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 1072 - stw r12, VCPU_TRAP(r4) 994 + 12: stw r12, VCPU_TRAP(r4) 1073 995 mr r9, r4 1074 996 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1075 997 addi r3, r4, VCPU_TB_RMEXIT ··· 1189 1103 mfctr r3 1190 1104 mfxer r4 1191 1105 std r3, VCPU_CTR(r9) 1192 - stw r4, VCPU_XER(r9) 1106 + std r4, VCPU_XER(r9) 1193 1107 1194 1108 /* If this is a page table miss then see if it's theirs or ours */ 1195 1109 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE ··· 1213 1127 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 1214 1128 bne 3f 1215 1129 lbz r0, HSTATE_HOST_IPI(r13) 1130 + cmpwi r0, 0 1216 1131 beq 4f 1217 1132 b guest_exit_cont 1218 1133 3: ··· 1262 1175 nop 1263 1176 ld r9, HSTATE_KVM_VCPU(r13) 1264 1177 lwz r12, VCPU_TRAP(r9) 1178 + 1179 + /* Stop others sending VCPU interrupts to this physical CPU */ 1180 + li r0, -1 1181 + stw r0, VCPU_CPU(r9) 1182 + stw r0, VCPU_THREAD_CPU(r9) 1265 1183 1266 1184 /* Save guest CTRL register, set runlatch to 1 */ 1267 1185 mfspr r6,SPRN_CTRLF ··· 1632 1540 1633 1541 /* Primary thread waits for all the secondaries to exit guest */ 1634 1542 15: lwz r3,VCORE_ENTRY_EXIT(r5) 1635 - srwi r0,r3,8 1543 + rlwinm r0,r3,32-8,0xff 1636 1544 clrldi r3,r3,56 1637 1545 cmpw r3,r0 1638 1546 bne 15b 1639 1547 isync 1548 + 1549 + /* Did we actually switch to the guest at all? */ 1550 + lbz r6, VCORE_IN_GUEST(r5) 1551 + cmpwi r6, 0 1552 + beq 19f 1640 1553 1641 1554 /* Primary thread switches back to host partition */ 1642 1555 ld r6,KVM_HOST_SDR1(r4) ··· 1686 1589 18: 1687 1590 /* Signal secondary CPUs to continue */ 1688 1591 stb r0,VCORE_IN_GUEST(r5) 1689 - lis r8,0x7fff /* MAX_INT@h */ 1592 + 19: lis r8,0x7fff /* MAX_INT@h */ 1690 1593 mtspr SPRN_HDEC,r8 1691 1594 1692 1595 16: ld r8,KVM_HOST_LPCR(r4) ··· 1772 1675 bl kvmppc_msr_interrupt 1773 1676 fast_interrupt_c_return: 1774 1677 6: ld r7, VCPU_CTR(r9) 1775 - lwz r8, VCPU_XER(r9) 1678 + ld r8, VCPU_XER(r9) 1776 1679 mtctr r7 1777 1680 mtxer r8 1778 1681 mr r4, r9 ··· 1913 1816 .long DOTSYM(kvmppc_h_remove) - hcall_real_table 1914 1817 .long DOTSYM(kvmppc_h_enter) - hcall_real_table 1915 1818 .long DOTSYM(kvmppc_h_read) - hcall_real_table 1916 - .long 0 /* 0x10 - H_CLEAR_MOD */ 1917 - .long 0 /* 0x14 - H_CLEAR_REF */ 1819 + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table 1820 + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table 1918 1821 .long DOTSYM(kvmppc_h_protect) - hcall_real_table 1919 1822 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table 1920 1823 .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
+1 -1
arch/powerpc/kvm/book3s_paired_singles.c
··· 352 352 return kvmppc_get_field(inst, msb + 32, lsb + 32); 353 353 } 354 354 355 - bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 355 + static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) 356 356 { 357 357 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 358 358 return false;
+2 -2
arch/powerpc/kvm/book3s_segment.S
··· 123 123 PPC_LL r8, SVCPU_CTR(r3) 124 124 PPC_LL r9, SVCPU_LR(r3) 125 125 lwz r10, SVCPU_CR(r3) 126 - lwz r11, SVCPU_XER(r3) 126 + PPC_LL r11, SVCPU_XER(r3) 127 127 128 128 mtctr r8 129 129 mtlr r9 ··· 237 237 mfctr r8 238 238 mflr r9 239 239 240 - stw r5, SVCPU_XER(r13) 240 + PPC_STL r5, SVCPU_XER(r13) 241 241 PPC_STL r6, SVCPU_FAULT_DAR(r13) 242 242 stw r7, SVCPU_FAULT_DSISR(r13) 243 243 PPC_STL r8, SVCPU_CTR(r13)
+1 -1
arch/powerpc/kvm/book3s_xics.c
··· 41 41 * ======= 42 42 * 43 43 * Each ICS has a spin lock protecting the information about the IRQ 44 - * sources and avoiding simultaneous deliveries if the same interrupt. 44 + * sources and avoiding simultaneous deliveries of the same interrupt. 45 45 * 46 46 * ICP operations are done via a single compare & swap transaction 47 47 * (most ICP state fits in the union kvmppc_icp_state)
+1
arch/powerpc/kvm/booke.c
··· 933 933 #endif 934 934 break; 935 935 case BOOKE_INTERRUPT_CRITICAL: 936 + kvmppc_fill_pt_regs(&regs); 936 937 unknown_exception(&regs); 937 938 break; 938 939 case BOOKE_INTERRUPT_DEBUG:
+1 -1
arch/powerpc/kvm/e500_mmu.c
··· 377 377 | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); 378 378 vcpu->arch.shared->mas1 = 379 379 (vcpu->arch.shared->mas6 & MAS6_SPID0) 380 - | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) 380 + | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0) 381 381 | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); 382 382 vcpu->arch.shared->mas2 &= MAS2_EPN; 383 383 vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
+1 -1
arch/powerpc/kvm/powerpc.c
··· 660 660 return kvmppc_core_pending_dec(vcpu); 661 661 } 662 662 663 - enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 663 + static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) 664 664 { 665 665 struct kvm_vcpu *vcpu; 666 666
+1 -1
arch/x86/kvm/emulate.c
··· 650 650 u16 sel; 651 651 652 652 la = seg_base(ctxt, addr.seg) + addr.ea; 653 + *linear = la; 653 654 *max_size = 0; 654 655 switch (mode) { 655 656 case X86EMUL_MODE_PROT64: ··· 694 693 } 695 694 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) 696 695 return emulate_gp(ctxt, 0); 697 - *linear = la; 698 696 return X86EMUL_CONTINUE; 699 697 bad: 700 698 if (addr.seg == VCPU_SREG_SS)
+4 -3
arch/x86/kvm/mmu.c
··· 3309 3309 3310 3310 walk_shadow_page_lockless_begin(vcpu); 3311 3311 3312 - for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level; 3312 + for (shadow_walk_init(&iterator, vcpu, addr), 3313 + leaf = root = iterator.level; 3313 3314 shadow_walk_okay(&iterator); 3314 3315 __shadow_walk_next(&iterator, spte)) { 3315 - leaf = iterator.level; 3316 3316 spte = mmu_spte_get_lockless(iterator.sptep); 3317 3317 3318 3318 sptes[leaf - 1] = spte; 3319 + leaf--; 3319 3320 3320 3321 if (!is_shadow_present_pte(spte)) 3321 3322 break; ··· 3330 3329 if (reserved) { 3331 3330 pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", 3332 3331 __func__, addr); 3333 - while (root >= leaf) { 3332 + while (root > leaf) { 3334 3333 pr_err("------ spte 0x%llx level %d.\n", 3335 3334 sptes[root - 1], root); 3336 3335 root--;
+2
arch/x86/kvm/x86.c
··· 5943 5943 put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg)); 5944 5944 } 5945 5945 5946 + #ifdef CONFIG_X86_64 5946 5947 static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) 5947 5948 { 5948 5949 struct kvm_segment seg; ··· 5959 5958 put_smstate(u32, buf, offset + 4, seg.limit); 5960 5959 put_smstate(u64, buf, offset + 8, seg.base); 5961 5960 } 5961 + #endif 5962 5962 5963 5963 static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) 5964 5964 {
+5 -2
include/kvm/arm_arch_timer.h
··· 52 52 53 53 /* Timer IRQ */ 54 54 const struct kvm_irq_level *irq; 55 + 56 + /* VGIC mapping */ 57 + struct irq_phys_map *map; 55 58 }; 56 59 57 60 int kvm_timer_hyp_init(void); 58 61 void kvm_timer_enable(struct kvm *kvm); 59 62 void kvm_timer_init(struct kvm *kvm); 60 - void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 61 - const struct kvm_irq_level *irq); 63 + int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 64 + const struct kvm_irq_level *irq); 62 65 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 63 66 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); 64 67 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
+36 -3
include/kvm/arm_vgic.h
··· 95 95 #define LR_STATE_ACTIVE (1 << 1) 96 96 #define LR_STATE_MASK (3 << 0) 97 97 #define LR_EOI_INT (1 << 2) 98 + #define LR_HW (1 << 3) 98 99 99 100 struct vgic_lr { 100 - u16 irq; 101 - u8 source; 102 - u8 state; 101 + unsigned irq:10; 102 + union { 103 + unsigned hwirq:10; 104 + unsigned source:3; 105 + }; 106 + unsigned state:4; 103 107 }; 104 108 105 109 struct vgic_vmcr { ··· 157 153 const struct vgic_io_range *reg_ranges; 158 154 struct kvm_vcpu *redist_vcpu; 159 155 struct kvm_io_device dev; 156 + }; 157 + 158 + struct irq_phys_map { 159 + u32 virt_irq; 160 + u32 phys_irq; 161 + u32 irq; 162 + bool active; 163 + }; 164 + 165 + struct irq_phys_map_entry { 166 + struct list_head entry; 167 + struct rcu_head rcu; 168 + struct irq_phys_map map; 160 169 }; 161 170 162 171 struct vgic_dist { ··· 269 252 struct vgic_vm_ops vm_ops; 270 253 struct vgic_io_device dist_iodev; 271 254 struct vgic_io_device *redist_iodevs; 255 + 256 + /* Virtual irq to hwirq mapping */ 257 + spinlock_t irq_phys_map_lock; 258 + struct list_head irq_phys_map_list; 272 259 }; 273 260 274 261 struct vgic_v2_cpu_if { ··· 324 303 struct vgic_v2_cpu_if vgic_v2; 325 304 struct vgic_v3_cpu_if vgic_v3; 326 305 }; 306 + 307 + /* Protected by the distributor's irq_phys_map_lock */ 308 + struct list_head irq_phys_map_list; 327 309 }; 328 310 329 311 #define LR_EMPTY 0xff ··· 341 317 int kvm_vgic_hyp_init(void); 342 318 int kvm_vgic_map_resources(struct kvm *kvm); 343 319 int kvm_vgic_get_max_vcpus(void); 320 + void kvm_vgic_early_init(struct kvm *kvm); 344 321 int kvm_vgic_create(struct kvm *kvm, u32 type); 345 322 void kvm_vgic_destroy(struct kvm *kvm); 323 + void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); 346 324 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 347 325 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 348 326 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 349 327 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 350 328 bool level); 329 + int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 330 + struct irq_phys_map *map, bool level); 351 331 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); 352 332 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 353 333 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); 334 + struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 335 + int virt_irq, int irq); 336 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); 337 + bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); 338 + void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); 354 339 355 340 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 356 341 #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
+3
include/linux/irqchip/arm-gic-v3.h
··· 270 270 271 271 #define ICH_LR_EOI (1UL << 41) 272 272 #define ICH_LR_GROUP (1UL << 60) 273 + #define ICH_LR_HW (1UL << 61) 273 274 #define ICH_LR_STATE (3UL << 62) 274 275 #define ICH_LR_PENDING_BIT (1UL << 62) 275 276 #define ICH_LR_ACTIVE_BIT (1UL << 63) 277 + #define ICH_LR_PHYS_ID_SHIFT 32 278 + #define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT) 276 279 277 280 #define ICH_MISR_EOI (1 << 0) 278 281 #define ICH_MISR_U (1 << 1)
+2 -1
include/linux/irqchip/arm-gic.h
··· 75 75 76 76 #define GICH_LR_VIRTUALID (0x3ff << 0) 77 77 #define GICH_LR_PHYSID_CPUID_SHIFT (10) 78 - #define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) 78 + #define GICH_LR_PHYSID_CPUID (0x3ff << GICH_LR_PHYSID_CPUID_SHIFT) 79 79 #define GICH_LR_STATE (3 << 28) 80 80 #define GICH_LR_PENDING_BIT (1 << 28) 81 81 #define GICH_LR_ACTIVE_BIT (1 << 29) 82 82 #define GICH_LR_EOI (1 << 19) 83 + #define GICH_LR_HW (1 << 31) 83 84 84 85 #define GICH_VMCR_CTRL_SHIFT 0 85 86 #define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
+1
include/linux/kvm_host.h
··· 242 242 int sigset_active; 243 243 sigset_t sigset; 244 244 struct kvm_vcpu_stat stat; 245 + unsigned int halt_poll_ns; 245 246 246 247 #ifdef CONFIG_HAS_IOMEM 247 248 int mmio_needed;
+30
include/trace/events/kvm.h
··· 358 358 359 359 #endif 360 360 361 + TRACE_EVENT(kvm_halt_poll_ns, 362 + TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), 363 + TP_ARGS(grow, vcpu_id, new, old), 364 + 365 + TP_STRUCT__entry( 366 + __field(bool, grow) 367 + __field(unsigned int, vcpu_id) 368 + __field(int, new) 369 + __field(int, old) 370 + ), 371 + 372 + TP_fast_assign( 373 + __entry->grow = grow; 374 + __entry->vcpu_id = vcpu_id; 375 + __entry->new = new; 376 + __entry->old = old; 377 + ), 378 + 379 + TP_printk("vcpu %u: halt_poll_ns %d (%s %d)", 380 + __entry->vcpu_id, 381 + __entry->new, 382 + __entry->grow ? "grow" : "shrink", 383 + __entry->old) 384 + ); 385 + 386 + #define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \ 387 + trace_kvm_halt_poll_ns(true, vcpu_id, new, old) 388 + #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ 389 + trace_kvm_halt_poll_ns(false, vcpu_id, new, old) 390 + 361 391 #endif /* _TRACE_KVM_MAIN_H */ 362 392 363 393 /* This part must be outside protection */
+5
include/uapi/linux/kvm.h
··· 237 237 __u32 count; 238 238 __u64 data_offset; /* relative to kvm_run start */ 239 239 } io; 240 + /* KVM_EXIT_DEBUG */ 240 241 struct { 241 242 struct kvm_debug_exit_arch arch; 242 243 } debug; ··· 286 285 __u32 data; 287 286 __u8 is_write; 288 287 } dcr; 288 + /* KVM_EXIT_INTERNAL_ERROR */ 289 289 struct { 290 290 __u32 suberror; 291 291 /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ ··· 297 295 struct { 298 296 __u64 gprs[32]; 299 297 } osi; 298 + /* KVM_EXIT_PAPR_HCALL */ 300 299 struct { 301 300 __u64 nr; 302 301 __u64 ret; ··· 822 819 #define KVM_CAP_DISABLE_QUIRKS 116 823 820 #define KVM_CAP_X86_SMM 117 824 821 #define KVM_CAP_MULTI_ADDRESS_SPACE 118 822 + #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 823 + #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 825 824 826 825 #ifdef KVM_CAP_IRQ_ROUTING 827 826
+22 -7
virt/kvm/arm/arch_timer.c
··· 64 64 int ret; 65 65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 66 66 67 - timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; 68 - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 69 - timer->irq->irq, 70 - timer->irq->level); 67 + kvm_vgic_set_phys_irq_active(timer->map, true); 68 + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, 69 + timer->map, 70 + timer->irq->level); 71 71 WARN_ON(ret); 72 72 } 73 73 ··· 117 117 cycle_t cval, now; 118 118 119 119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || 120 - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) 120 + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || 121 + kvm_vgic_get_phys_irq_active(timer->map)) 121 122 return false; 122 123 123 124 cval = timer->cntv_cval; ··· 185 184 timer_arm(timer, ns); 186 185 } 187 186 188 - void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 189 - const struct kvm_irq_level *irq) 187 + int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, 188 + const struct kvm_irq_level *irq) 190 189 { 191 190 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 191 + struct irq_phys_map *map; 192 192 193 193 /* 194 194 * The vcpu timer irq number cannot be determined in ··· 198 196 * vcpu timer irq number when the vcpu is reset. 199 197 */ 200 198 timer->irq = irq; 199 + 200 + /* 201 + * Tell the VGIC that the virtual interrupt is tied to a 202 + * physical interrupt. We do that once per VCPU. 203 + */ 204 + map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); 205 + if (WARN_ON(IS_ERR(map))) 206 + return PTR_ERR(map); 207 + 208 + timer->map = map; 209 + return 0; 201 210 } 202 211 203 212 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ··· 348 335 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 349 336 350 337 timer_disarm(timer); 338 + if (timer->map) 339 + kvm_vgic_unmap_phys_irq(vcpu, timer->map); 351 340 } 352 341 353 342 void kvm_timer_enable(struct kvm *kvm)
+15 -1
virt/kvm/arm/vgic-v2.c
··· 48 48 lr_desc.state |= LR_STATE_ACTIVE; 49 49 if (val & GICH_LR_EOI) 50 50 lr_desc.state |= LR_EOI_INT; 51 + if (val & GICH_LR_HW) { 52 + lr_desc.state |= LR_HW; 53 + lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT; 54 + } 51 55 52 56 return lr_desc; 53 57 } ··· 59 55 static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, 60 56 struct vgic_lr lr_desc) 61 57 { 62 - u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq; 58 + u32 lr_val; 59 + 60 + lr_val = lr_desc.irq; 63 61 64 62 if (lr_desc.state & LR_STATE_PENDING) 65 63 lr_val |= GICH_LR_PENDING_BIT; ··· 69 63 lr_val |= GICH_LR_ACTIVE_BIT; 70 64 if (lr_desc.state & LR_EOI_INT) 71 65 lr_val |= GICH_LR_EOI; 66 + 67 + if (lr_desc.state & LR_HW) { 68 + lr_val |= GICH_LR_HW; 69 + lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT; 70 + } 71 + 72 + if (lr_desc.irq < VGIC_NR_SGIS) 73 + lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); 72 74 73 75 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; 74 76 }
+18 -3
virt/kvm/arm/vgic-v3.c
··· 67 67 lr_desc.state |= LR_STATE_ACTIVE; 68 68 if (val & ICH_LR_EOI) 69 69 lr_desc.state |= LR_EOI_INT; 70 + if (val & ICH_LR_HW) { 71 + lr_desc.state |= LR_HW; 72 + lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0); 73 + } 70 74 71 75 return lr_desc; 72 76 } ··· 88 84 * Eventually we want to make this configurable, so we may revisit 89 85 * this in the future. 90 86 */ 91 - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) 87 + switch (vcpu->kvm->arch.vgic.vgic_model) { 88 + case KVM_DEV_TYPE_ARM_VGIC_V3: 92 89 lr_val |= ICH_LR_GROUP; 93 - else 94 - lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; 90 + break; 91 + case KVM_DEV_TYPE_ARM_VGIC_V2: 92 + if (lr_desc.irq < VGIC_NR_SGIS) 93 + lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; 94 + break; 95 + default: 96 + BUG(); 97 + } 95 98 96 99 if (lr_desc.state & LR_STATE_PENDING) 97 100 lr_val |= ICH_LR_PENDING_BIT; ··· 106 95 lr_val |= ICH_LR_ACTIVE_BIT; 107 96 if (lr_desc.state & LR_EOI_INT) 108 97 lr_val |= ICH_LR_EOI; 98 + if (lr_desc.state & LR_HW) { 99 + lr_val |= ICH_LR_HW; 100 + lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; 101 + } 109 102 110 103 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; 111 104 }
+390 -39
virt/kvm/arm/vgic.c
··· 24 24 #include <linux/of.h> 25 25 #include <linux/of_address.h> 26 26 #include <linux/of_irq.h> 27 + #include <linux/rculist.h> 27 28 #include <linux/uaccess.h> 28 29 29 30 #include <asm/kvm_emulate.h> ··· 75 74 * cause the interrupt to become inactive in such a situation. 76 75 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become 77 76 * inactive as long as the external input line is held high. 77 + * 78 + * 79 + * Initialization rules: there are multiple stages to the vgic 80 + * initialization, both for the distributor and the CPU interfaces. 81 + * 82 + * Distributor: 83 + * 84 + * - kvm_vgic_early_init(): initialization of static data that doesn't 85 + * depend on any sizing information or emulation type. No allocation 86 + * is allowed there. 87 + * 88 + * - vgic_init(): allocation and initialization of the generic data 89 + * structures that depend on sizing information (number of CPUs, 90 + * number of interrupts). Also initializes the vcpu specific data 91 + * structures. Can be executed lazily for GICv2. 92 + * [to be renamed to kvm_vgic_init??] 93 + * 94 + * CPU Interface: 95 + * 96 + * - kvm_vgic_cpu_early_init(): initialization of static data that 97 + * doesn't depend on any sizing information or emulation type. No 98 + * allocation is allowed there. 78 99 */ 79 100 80 101 #include "vgic.h" ··· 105 82 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 106 83 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 107 84 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 85 + static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, 86 + int virt_irq); 108 87 109 88 static const struct vgic_ops *vgic_ops; 110 89 static const struct vgic_params *vgic; ··· 400 375 401 376 static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) 402 377 { 403 - return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); 378 + return !vgic_irq_is_queued(vcpu, irq); 404 379 } 405 380 406 381 /** ··· 1140 1115 if (!vgic_irq_is_edge(vcpu, irq)) 1141 1116 vlr.state |= LR_EOI_INT; 1142 1117 1118 + if (vlr.irq >= VGIC_NR_SGIS) { 1119 + struct irq_phys_map *map; 1120 + map = vgic_irq_map_search(vcpu, irq); 1121 + 1122 + /* 1123 + * If we have a mapping, and the virtual interrupt is 1124 + * being injected, then we must set the state to 1125 + * active in the physical world. Otherwise the 1126 + * physical interrupt will fire and the guest will 1127 + * exit before processing the virtual interrupt. 1128 + */ 1129 + if (map) { 1130 + int ret; 1131 + 1132 + BUG_ON(!map->active); 1133 + vlr.hwirq = map->phys_irq; 1134 + vlr.state |= LR_HW; 1135 + vlr.state &= ~LR_EOI_INT; 1136 + 1137 + ret = irq_set_irqchip_state(map->irq, 1138 + IRQCHIP_STATE_ACTIVE, 1139 + true); 1140 + WARN_ON(ret); 1141 + 1142 + /* 1143 + * Make sure we're not going to sample this 1144 + * again, as a HW-backed interrupt cannot be 1145 + * in the PENDING_ACTIVE stage. 1146 + */ 1147 + vgic_irq_set_queued(vcpu, irq); 1148 + } 1149 + } 1150 + 1143 1151 vgic_set_lr(vcpu, lr_nr, vlr); 1144 1152 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); 1145 1153 } ··· 1397 1339 return level_pending; 1398 1340 } 1399 1341 1342 + /* 1343 + * Save the physical active state, and reset it to inactive. 1344 + * 1345 + * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. 1346 + */ 1347 + static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) 1348 + { 1349 + struct irq_phys_map *map; 1350 + int ret; 1351 + 1352 + if (!(vlr.state & LR_HW)) 1353 + return 0; 1354 + 1355 + map = vgic_irq_map_search(vcpu, vlr.irq); 1356 + BUG_ON(!map || !map->active); 1357 + 1358 + ret = irq_get_irqchip_state(map->irq, 1359 + IRQCHIP_STATE_ACTIVE, 1360 + &map->active); 1361 + 1362 + WARN_ON(ret); 1363 + 1364 + if (map->active) { 1365 + ret = irq_set_irqchip_state(map->irq, 1366 + IRQCHIP_STATE_ACTIVE, 1367 + false); 1368 + WARN_ON(ret); 1369 + return 0; 1370 + } 1371 + 1372 + return 1; 1373 + } 1374 + 1400 1375 /* Sync back the VGIC state after a guest run */ 1401 1376 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1402 1377 { ··· 1444 1353 elrsr = vgic_get_elrsr(vcpu); 1445 1354 elrsr_ptr = u64_to_bitmask(&elrsr); 1446 1355 1447 - /* Clear mappings for empty LRs */ 1448 - for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { 1356 + /* Deal with HW interrupts, and clear mappings for empty LRs */ 1357 + for (lr = 0; lr < vgic->nr_lr; lr++) { 1449 1358 struct vgic_lr vlr; 1450 1359 1451 - if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) 1360 + if (!test_bit(lr, vgic_cpu->lr_used)) 1452 1361 continue; 1453 1362 1454 1363 vlr = vgic_get_lr(vcpu, lr); 1364 + if (vgic_sync_hwirq(vcpu, vlr)) { 1365 + /* 1366 + * So this is a HW interrupt that the guest 1367 + * EOI-ed. Clean the LR state and allow the 1368 + * interrupt to be sampled again. 1369 + */ 1370 + vlr.state = 0; 1371 + vlr.hwirq = 0; 1372 + vgic_set_lr(vcpu, lr, vlr); 1373 + vgic_irq_clear_queued(vcpu, vlr.irq); 1374 + set_bit(lr, elrsr_ptr); 1375 + } 1376 + 1377 + if (!test_bit(lr, elrsr_ptr)) 1378 + continue; 1379 + 1380 + clear_bit(lr, vgic_cpu->lr_used); 1455 1381 1456 1382 BUG_ON(vlr.irq >= dist->nr_irqs); 1457 1383 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; ··· 1555 1447 } 1556 1448 1557 1449 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, 1558 - unsigned int irq_num, bool level) 1450 + struct irq_phys_map *map, 1451 + unsigned int irq_num, bool level) 1559 1452 { 1560 1453 struct vgic_dist *dist = &kvm->arch.vgic; 1561 1454 struct kvm_vcpu *vcpu; 1562 1455 int edge_triggered, level_triggered; 1563 1456 int enabled; 1564 1457 bool ret = true, can_inject = true; 1458 + 1459 + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) 1460 + return -EINVAL; 1565 1461 1566 1462 spin_lock(&dist->lock); 1567 1463 ··· 1629 1517 out: 1630 1518 spin_unlock(&dist->lock); 1631 1519 1632 - return ret ? cpuid : -EINVAL; 1520 + if (ret) { 1521 + /* kick the specified vcpu */ 1522 + kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid)); 1523 + } 1524 + 1525 + return 0; 1633 1526 } 1634 1527 1635 - /** 1636 - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 1637 - * @kvm: The VM structure pointer 1638 - * @cpuid: The CPU for PPIs 1639 - * @irq_num: The IRQ number that is assigned to the device 1640 - * @level: Edge-triggered: true: to trigger the interrupt 1641 - * false: to ignore the call 1642 - * Level-sensitive true: activates an interrupt 1643 - * false: deactivates an interrupt 1644 - * 1645 - * The GIC is not concerned with devices being active-LOW or active-HIGH for 1646 - * level-sensitive interrupts. You can think of the level parameter as 1 1647 - * being HIGH and 0 being LOW and all devices being active-HIGH. 1648 - */ 1649 - int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1650 - bool level) 1528 + static int vgic_lazy_init(struct kvm *kvm) 1651 1529 { 1652 1530 int ret = 0; 1653 - int vcpu_id; 1654 1531 1655 1532 if (unlikely(!vgic_initialized(kvm))) { 1656 1533 /* ··· 1648 1547 * be explicitly initialized once setup with the respective 1649 1548 * KVM device call. 1650 1549 */ 1651 - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) { 1652 - ret = -EBUSY; 1653 - goto out; 1654 - } 1550 + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) 1551 + return -EBUSY; 1552 + 1655 1553 mutex_lock(&kvm->lock); 1656 1554 ret = vgic_init(kvm); 1657 1555 mutex_unlock(&kvm->lock); 1658 - 1659 - if (ret) 1660 - goto out; 1661 1556 } 1662 1557 1663 - if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) 1558 + return ret; 1559 + } 1560 + 1561 + /** 1562 + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 1563 + * @kvm: The VM structure pointer 1564 + * @cpuid: The CPU for PPIs 1565 + * @irq_num: The IRQ number that is assigned to the device. This IRQ 1566 + * must not be mapped to a HW interrupt. 1567 + * @level: Edge-triggered: true: to trigger the interrupt 1568 + * false: to ignore the call 1569 + * Level-sensitive true: raise the input signal 1570 + * false: lower the input signal 1571 + * 1572 + * The GIC is not concerned with devices being active-LOW or active-HIGH for 1573 + * level-sensitive interrupts. You can think of the level parameter as 1 1574 + * being HIGH and 0 being LOW and all devices being active-HIGH. 1575 + */ 1576 + int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1577 + bool level) 1578 + { 1579 + struct irq_phys_map *map; 1580 + int ret; 1581 + 1582 + ret = vgic_lazy_init(kvm); 1583 + if (ret) 1584 + return ret; 1585 + 1586 + map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num); 1587 + if (map) 1664 1588 return -EINVAL; 1665 1589 1666 - vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level); 1667 - if (vcpu_id >= 0) { 1668 - /* kick the specified vcpu */ 1669 - kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id)); 1670 - } 1590 + return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level); 1591 + } 1671 1592 1672 - out: 1673 - return ret; 1593 + /** 1594 + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic 1595 + * @kvm: The VM structure pointer 1596 + * @cpuid: The CPU for PPIs 1597 + * @map: Pointer to a irq_phys_map structure describing the mapping 1598 + * @level: Edge-triggered: true: to trigger the interrupt 1599 + * false: to ignore the call 1600 + * Level-sensitive true: raise the input signal 1601 + * false: lower the input signal 1602 + * 1603 + * The GIC is not concerned with devices being active-LOW or active-HIGH for 1604 + * level-sensitive interrupts. You can think of the level parameter as 1 1605 + * being HIGH and 0 being LOW and all devices being active-HIGH. 1606 + */ 1607 + int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, 1608 + struct irq_phys_map *map, bool level) 1609 + { 1610 + int ret; 1611 + 1612 + ret = vgic_lazy_init(kvm); 1613 + if (ret) 1614 + return ret; 1615 + 1616 + return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level); 1674 1617 } 1675 1618 1676 1619 static irqreturn_t vgic_maintenance_handler(int irq, void *data) ··· 1728 1583 return IRQ_HANDLED; 1729 1584 } 1730 1585 1586 + static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, 1587 + int virt_irq) 1588 + { 1589 + if (virt_irq < VGIC_NR_PRIVATE_IRQS) 1590 + return &vcpu->arch.vgic_cpu.irq_phys_map_list; 1591 + else 1592 + return &vcpu->kvm->arch.vgic.irq_phys_map_list; 1593 + } 1594 + 1595 + /** 1596 + * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ 1597 + * @vcpu: The VCPU pointer 1598 + * @virt_irq: The virtual irq number 1599 + * @irq: The Linux IRQ number 1600 + * 1601 + * Establish a mapping between a guest visible irq (@virt_irq) and a 1602 + * Linux irq (@irq). On injection, @virt_irq will be associated with 1603 + * the physical interrupt represented by @irq. This mapping can be 1604 + * established multiple times as long as the parameters are the same. 1605 + * 1606 + * Returns a valid pointer on success, and an error pointer otherwise 1607 + */ 1608 + struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, 1609 + int virt_irq, int irq) 1610 + { 1611 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1612 + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); 1613 + struct irq_phys_map *map; 1614 + struct irq_phys_map_entry *entry; 1615 + struct irq_desc *desc; 1616 + struct irq_data *data; 1617 + int phys_irq; 1618 + 1619 + desc = irq_to_desc(irq); 1620 + if (!desc) { 1621 + kvm_err("%s: no interrupt descriptor\n", __func__); 1622 + return ERR_PTR(-EINVAL); 1623 + } 1624 + 1625 + data = irq_desc_get_irq_data(desc); 1626 + while (data->parent_data) 1627 + data = data->parent_data; 1628 + 1629 + phys_irq = data->hwirq; 1630 + 1631 + /* Create a new mapping */ 1632 + entry = kzalloc(sizeof(*entry), GFP_KERNEL); 1633 + if (!entry) 1634 + return ERR_PTR(-ENOMEM); 1635 + 1636 + spin_lock(&dist->irq_phys_map_lock); 1637 + 1638 + /* Try to match an existing mapping */ 1639 + map = vgic_irq_map_search(vcpu, virt_irq); 1640 + if (map) { 1641 + /* Make sure this mapping matches */ 1642 + if (map->phys_irq != phys_irq || 1643 + map->irq != irq) 1644 + map = ERR_PTR(-EINVAL); 1645 + 1646 + /* Found an existing, valid mapping */ 1647 + goto out; 1648 + } 1649 + 1650 + map = &entry->map; 1651 + map->virt_irq = virt_irq; 1652 + map->phys_irq = phys_irq; 1653 + map->irq = irq; 1654 + 1655 + list_add_tail_rcu(&entry->entry, root); 1656 + 1657 + out: 1658 + spin_unlock(&dist->irq_phys_map_lock); 1659 + /* If we've found a hit in the existing list, free the useless 1660 + * entry */ 1661 + if (IS_ERR(map) || map != &entry->map) 1662 + kfree(entry); 1663 + return map; 1664 + } 1665 + 1666 + static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, 1667 + int virt_irq) 1668 + { 1669 + struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); 1670 + struct irq_phys_map_entry *entry; 1671 + struct irq_phys_map *map; 1672 + 1673 + rcu_read_lock(); 1674 + 1675 + list_for_each_entry_rcu(entry, root, entry) { 1676 + map = &entry->map; 1677 + if (map->virt_irq == virt_irq) { 1678 + rcu_read_unlock(); 1679 + return map; 1680 + } 1681 + } 1682 + 1683 + rcu_read_unlock(); 1684 + 1685 + return NULL; 1686 + } 1687 + 1688 + static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) 1689 + { 1690 + struct irq_phys_map_entry *entry; 1691 + 1692 + entry = container_of(rcu, struct irq_phys_map_entry, rcu); 1693 + kfree(entry); 1694 + } 1695 + 1696 + /** 1697 + * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ 1698 + * 1699 + * Return the logical active state of a mapped interrupt. This doesn't 1700 + * necessarily reflects the current HW state. 1701 + */ 1702 + bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) 1703 + { 1704 + BUG_ON(!map); 1705 + return map->active; 1706 + } 1707 + 1708 + /** 1709 + * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ 1710 + * 1711 + * Set the logical active state of a mapped interrupt. This doesn't 1712 + * immediately affects the HW state. 1713 + */ 1714 + void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) 1715 + { 1716 + BUG_ON(!map); 1717 + map->active = active; 1718 + } 1719 + 1720 + /** 1721 + * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping 1722 + * @vcpu: The VCPU pointer 1723 + * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq 1724 + * 1725 + * Remove an existing mapping between virtual and physical interrupts. 1726 + */ 1727 + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map) 1728 + { 1729 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1730 + struct irq_phys_map_entry *entry; 1731 + struct list_head *root; 1732 + 1733 + if (!map) 1734 + return -EINVAL; 1735 + 1736 + root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq); 1737 + 1738 + spin_lock(&dist->irq_phys_map_lock); 1739 + 1740 + list_for_each_entry(entry, root, entry) { 1741 + if (&entry->map == map) { 1742 + list_del_rcu(&entry->entry); 1743 + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); 1744 + break; 1745 + } 1746 + } 1747 + 1748 + spin_unlock(&dist->irq_phys_map_lock); 1749 + 1750 + return 0; 1751 + } 1752 + 1753 + static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root) 1754 + { 1755 + struct vgic_dist *dist = &kvm->arch.vgic; 1756 + struct irq_phys_map_entry *entry; 1757 + 1758 + spin_lock(&dist->irq_phys_map_lock); 1759 + 1760 + list_for_each_entry(entry, root, entry) { 1761 + list_del_rcu(&entry->entry); 1762 + call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); 1763 + } 1764 + 1765 + spin_unlock(&dist->irq_phys_map_lock); 1766 + } 1767 + 1731 1768 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) 1732 1769 { 1733 1770 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; ··· 1918 1591 kfree(vgic_cpu->active_shared); 1919 1592 kfree(vgic_cpu->pend_act_shared); 1920 1593 kfree(vgic_cpu->vgic_irq_lr_map); 1594 + vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); 1921 1595 vgic_cpu->pending_shared = NULL; 1922 1596 vgic_cpu->active_shared = NULL; 1923 1597 vgic_cpu->pend_act_shared = NULL; ··· 1953 1625 vgic_cpu->nr_lr = vgic->nr_lr; 1954 1626 1955 1627 return 0; 1628 + } 1629 + 1630 + /** 1631 + * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage 1632 + * 1633 + * No memory allocation should be performed here, only static init. 1634 + */ 1635 + void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) 1636 + { 1637 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1638 + INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list); 1956 1639 } 1957 1640 1958 1641 /** ··· 2003 1664 kfree(dist->irq_spi_target); 2004 1665 kfree(dist->irq_pending_on_cpu); 2005 1666 kfree(dist->irq_active_on_cpu); 1667 + vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list); 2006 1668 dist->irq_sgi_sources = NULL; 2007 1669 dist->irq_spi_cpu = NULL; 2008 1670 dist->irq_spi_target = NULL; ··· 2127 1787 return 0; 2128 1788 } 2129 1789 1790 + /** 1791 + * kvm_vgic_early_init - Earliest possible vgic initialization stage 1792 + * 1793 + * No memory allocation should be performed here, only static init. 1794 + */ 1795 + void kvm_vgic_early_init(struct kvm *kvm) 1796 + { 1797 + spin_lock_init(&kvm->arch.vgic.lock); 1798 + spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock); 1799 + INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list); 1800 + } 1801 + 2130 1802 int kvm_vgic_create(struct kvm *kvm, u32 type) 2131 1803 { 2132 1804 int i, vcpu_lock_idx = -1, ret; ··· 2184 1832 if (ret) 2185 1833 goto out_unlock; 2186 1834 2187 - spin_lock_init(&kvm->arch.vgic.lock); 2188 1835 kvm->arch.vgic.in_kernel = true; 2189 1836 kvm->arch.vgic.vgic_model = type; 2190 1837 kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
+6 -2
virt/kvm/irqchip.c
··· 213 213 goto out; 214 214 215 215 r = -EINVAL; 216 - if (ue->flags) 216 + if (ue->flags) { 217 + kfree(e); 217 218 goto out; 219 + } 218 220 r = setup_routing_entry(new, e, ue); 219 - if (r) 221 + if (r) { 222 + kfree(e); 220 223 goto out; 224 + } 221 225 ++ue; 222 226 } 223 227
+58 -4
virt/kvm/kvm_main.c
··· 66 66 MODULE_AUTHOR("Qumranet"); 67 67 MODULE_LICENSE("GPL"); 68 68 69 - static unsigned int halt_poll_ns; 69 + /* halt polling only reduces halt latency by 5-7 us, 500us is enough */ 70 + static unsigned int halt_poll_ns = 500000; 70 71 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); 72 + 73 + /* Default doubles per-vcpu halt_poll_ns. */ 74 + static unsigned int halt_poll_ns_grow = 2; 75 + module_param(halt_poll_ns_grow, int, S_IRUGO); 76 + 77 + /* Default resets per-vcpu halt_poll_ns . */ 78 + static unsigned int halt_poll_ns_shrink; 79 + module_param(halt_poll_ns_shrink, int, S_IRUGO); 71 80 72 81 /* 73 82 * Ordering of locks: ··· 226 217 vcpu->kvm = kvm; 227 218 vcpu->vcpu_id = id; 228 219 vcpu->pid = NULL; 220 + vcpu->halt_poll_ns = 0; 229 221 init_waitqueue_head(&vcpu->wq); 230 222 kvm_async_pf_vcpu_init(vcpu); 231 223 ··· 1916 1906 } 1917 1907 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); 1918 1908 1909 + static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) 1910 + { 1911 + int old, val; 1912 + 1913 + old = val = vcpu->halt_poll_ns; 1914 + /* 10us base */ 1915 + if (val == 0 && halt_poll_ns_grow) 1916 + val = 10000; 1917 + else 1918 + val *= halt_poll_ns_grow; 1919 + 1920 + vcpu->halt_poll_ns = val; 1921 + trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); 1922 + } 1923 + 1924 + static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) 1925 + { 1926 + int old, val; 1927 + 1928 + old = val = vcpu->halt_poll_ns; 1929 + if (halt_poll_ns_shrink == 0) 1930 + val = 0; 1931 + else 1932 + val /= halt_poll_ns_shrink; 1933 + 1934 + vcpu->halt_poll_ns = val; 1935 + trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); 1936 + } 1937 + 1919 1938 static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) 1920 1939 { 1921 1940 if (kvm_arch_vcpu_runnable(vcpu)) { ··· 1967 1928 ktime_t start, cur; 1968 1929 DEFINE_WAIT(wait); 1969 1930 bool waited = false; 1931 + u64 block_ns; 1970 1932 1971 1933 start = cur = ktime_get(); 1972 - if (halt_poll_ns) { 1973 - ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); 1934 + if (vcpu->halt_poll_ns) { 1935 + ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); 1974 1936 1975 1937 do { 1976 1938 /* ··· 2000 1960 cur = ktime_get(); 2001 1961 2002 1962 out: 2003 - trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited); 1963 + block_ns = ktime_to_ns(cur) - ktime_to_ns(start); 1964 + 1965 + if (halt_poll_ns) { 1966 + if (block_ns <= vcpu->halt_poll_ns) 1967 + ; 1968 + /* we had a long block, shrink polling */ 1969 + else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns) 1970 + shrink_halt_poll_ns(vcpu); 1971 + /* we had a short halt and our poll time is too small */ 1972 + else if (vcpu->halt_poll_ns < halt_poll_ns && 1973 + block_ns < halt_poll_ns) 1974 + grow_halt_poll_ns(vcpu); 1975 + } 1976 + 1977 + trace_kvm_vcpu_wakeup(block_ns, waited); 2004 1978 } 2005 1979 EXPORT_SYMBOL_GPL(kvm_vcpu_block); 2006 1980