Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block

Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.

There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().

Things go really south if the guest does the following:

mov x0, #0 // or any small value masking interrupts
msr ICC_PMR_EL1, x0

[vcpu preempted, then rescheduled, VMCR sampled]

mov x0, #ff // allow all interrupts
msr ICC_PMR_EL1, x0
wfi // traps to EL2, so samping of VMCR

[interrupt arrives just after WFI]

Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.

To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.

This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.

Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>

+39 -2
+1
include/kvm/arm_vgic.h
··· 350 350 351 351 void kvm_vgic_load(struct kvm_vcpu *vcpu); 352 352 void kvm_vgic_put(struct kvm_vcpu *vcpu); 353 + void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); 353 354 354 355 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 355 356 #define vgic_initialized(k) ((k)->arch.vgic.initialized)
+11
virt/kvm/arm/arm.c
··· 323 323 324 324 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 325 325 { 326 + /* 327 + * If we're about to block (most likely because we've just hit a 328 + * WFI), we need to sync back the state of the GIC CPU interface 329 + * so that we have the lastest PMR and group enables. This ensures 330 + * that kvm_arch_vcpu_runnable has up-to-date data to decide 331 + * whether we have pending interrupts. 332 + */ 333 + preempt_disable(); 334 + kvm_vgic_vmcr_sync(vcpu); 335 + preempt_enable(); 336 + 326 337 kvm_vgic_v4_enable_doorbell(vcpu); 327 338 } 328 339
+8 -1
virt/kvm/arm/vgic/vgic-v2.c
··· 484 484 kvm_vgic_global_state.vctrl_base + GICH_APR); 485 485 } 486 486 487 - void vgic_v2_put(struct kvm_vcpu *vcpu) 487 + void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) 488 488 { 489 489 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 490 490 491 491 cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 492 + } 493 + 494 + void vgic_v2_put(struct kvm_vcpu *vcpu) 495 + { 496 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 497 + 498 + vgic_v2_vmcr_sync(vcpu); 492 499 cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); 493 500 }
+6 -1
virt/kvm/arm/vgic/vgic-v3.c
··· 662 662 __vgic_v3_activate_traps(vcpu); 663 663 } 664 664 665 - void vgic_v3_put(struct kvm_vcpu *vcpu) 665 + void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) 666 666 { 667 667 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 668 668 669 669 if (likely(cpu_if->vgic_sre)) 670 670 cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); 671 + } 672 + 673 + void vgic_v3_put(struct kvm_vcpu *vcpu) 674 + { 675 + vgic_v3_vmcr_sync(vcpu); 671 676 672 677 kvm_call_hyp(__vgic_v3_save_aprs, vcpu); 673 678
+11
virt/kvm/arm/vgic/vgic.c
··· 919 919 vgic_v3_put(vcpu); 920 920 } 921 921 922 + void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) 923 + { 924 + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 925 + return; 926 + 927 + if (kvm_vgic_global_state.type == VGIC_V2) 928 + vgic_v2_vmcr_sync(vcpu); 929 + else 930 + vgic_v3_vmcr_sync(vcpu); 931 + } 932 + 922 933 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 923 934 { 924 935 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+2
virt/kvm/arm/vgic/vgic.h
··· 193 193 void vgic_v2_init_lrs(void); 194 194 void vgic_v2_load(struct kvm_vcpu *vcpu); 195 195 void vgic_v2_put(struct kvm_vcpu *vcpu); 196 + void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); 196 197 197 198 void vgic_v2_save_state(struct kvm_vcpu *vcpu); 198 199 void vgic_v2_restore_state(struct kvm_vcpu *vcpu); ··· 224 223 225 224 void vgic_v3_load(struct kvm_vcpu *vcpu); 226 225 void vgic_v3_put(struct kvm_vcpu *vcpu); 226 + void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); 227 227 228 228 bool vgic_has_its(struct kvm *kvm); 229 229 int kvm_vgic_register_its_device(void);