KVM: arm64: GICv3: nv: Resync LRs/VMCR/HCR early for better MI emulation

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

The current approach to nested GICv3 support is to not do anything
while L2 is running, wait a transition from L2 to L1 to resync
LRs, VMCR and HCR, and only then evaluate the state to decide
whether to generate a maintenance interrupt.

This doesn't provide a good quality of emulation, and it would be
far preferable to find out early that we need to perform a switch.

Move the LRs/VMCR and HCR resync into vgic_v3_sync_nested(), so
that we have most of the state available. As we turning the vgic
off at this stage to avoid a screaming host MI, add a new helper
vgic_v3_flush_nested() that switches the vgic on again. The MI can
then be directly injected as required.

Tested-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Link: https://msgid.link/20251120172540.2267180-35-maz@kernel.org
Signed-off-by: Oliver Upton <oupton@kernel.org>

authored by

Marc Zyngier and committed by

Oliver Upton 4 months ago eb33ffa2 84792050

+46 -33

5 changed files

expand all

arch

arm64

include

asm

kvm_hyp.h

kvm

hyp

vgic-v3-sr.c

vgic

vgic-v3-nested.c

vgic.c

vgic.h

arch/arm64/include/asm/kvm_hyp.h

··· 77 77 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); 78 78 79 79 u64 __gic_v3_get_lr(unsigned int lr); 80 + void __gic_v3_set_lr(u64 val, int lr); 80 81 81 82 void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); 82 83 void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);

+1 -1

arch/arm64/kvm/hyp/vgic-v3-sr.c

··· 60 60 unreachable(); 61 61 } 62 62 63 - static void __gic_v3_set_lr(u64 val, int lr) 63 + void __gic_v3_set_lr(u64 val, int lr) 64 64 { 65 65 switch (lr & 0xf) { 66 66 case 0:

+39 -30

arch/arm64/kvm/vgic/vgic-v3-nested.c

··· 70 70 * - on L2 put: perform the inverse transformation, so that the result of L2 71 71 * running becomes visible to L1 in the VNCR-accessible registers. 72 72 * 73 - * - there is nothing to do on L2 entry, as everything will have happened 74 - * on load. However, this is the point where we detect that an interrupt 75 - * targeting L1 and prepare the grand switcheroo. 73 + * - there is nothing to do on L2 entry apart from enabling the vgic, as 74 + * everything will have happened on load. However, this is the point where 75 + * we detect that an interrupt targeting L1 and prepare the grand 76 + * switcheroo. 76 77 * 77 - * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1 78 - * interrupt. The L0 active state will be cleared by the HW if the L1 79 - * interrupt was itself backed by a HW interrupt. 78 + * - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate 79 + * corresponding the L1 interrupt. The L0 active state will be cleared by 80 + * the HW if the L1 interrupt was itself backed by a HW interrupt. 80 81 * 81 82 * Maintenance Interrupt (MI) management: 82 83 * ··· 266 265 s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); 267 266 } 268 267 268 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu) 269 + { 270 + u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 271 + 272 + write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2); 273 + } 274 + 269 275 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) 270 276 { 271 277 struct shadow_if *shadow_if = get_shadow_if(); 272 278 int i; 273 279 274 280 for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 275 - u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 281 + u64 val, host_lr, lr; 276 282 struct vgic_irq *irq; 283 + 284 + host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 285 + 286 + /* Propagate the new LR state */ 287 + lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 288 + val = lr & ~ICH_LR_STATE; 289 + val |= host_lr & ICH_LR_STATE; 290 + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 277 291 278 292 if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) 279 293 continue; ··· 302 286 if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ 303 287 continue; 304 288 305 - lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 306 - if (!(lr & ICH_LR_STATE)) 289 + if (!(host_lr & ICH_LR_STATE)) 307 290 irq->active = false; 308 291 309 292 vgic_put_irq(vcpu->kvm, irq); 310 293 } 294 + 295 + /* We need these to be synchronised to generate the MI */ 296 + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2)); 297 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount); 298 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount); 299 + 300 + write_sysreg_s(0, SYS_ICH_HCR_EL2); 301 + isb(); 302 + 303 + vgic_v3_nested_update_mi(vcpu); 311 304 } 312 305 313 306 static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu, ··· 349 324 __vgic_v3_restore_vmcr_aprs(cpu_if); 350 325 __vgic_v3_activate_traps(cpu_if); 351 326 352 - __vgic_v3_restore_state(cpu_if); 327 + for (int i = 0; i < cpu_if->used_lrs; i++) 328 + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 353 329 354 330 /* 355 331 * Propagate the number of used LRs for the benefit of the HYP ··· 363 337 { 364 338 struct shadow_if *shadow_if = get_shadow_if(); 365 339 struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif; 366 - u64 val; 367 340 int i; 368 341 369 342 __vgic_v3_save_aprs(s_cpu_if); 370 - __vgic_v3_deactivate_traps(s_cpu_if); 371 - __vgic_v3_save_state(s_cpu_if); 372 - 373 - /* 374 - * Translate the shadow state HW fields back to the virtual ones 375 - * before copying the shadow struct back to the nested one. 376 - */ 377 - val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 378 - val &= ~ICH_HCR_EL2_EOIcount_MASK; 379 - val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); 380 - __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); 381 - __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); 382 343 383 344 for (i = 0; i < 4; i++) { 384 345 __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); 385 346 __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); 386 347 } 387 348 388 - for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 389 - val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 349 + for (i = 0; i < s_cpu_if->used_lrs; i++) 350 + __gic_v3_set_lr(0, i); 390 351 391 - val &= ~ICH_LR_STATE; 392 - val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; 393 - 394 - __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 395 - } 352 + __vgic_v3_deactivate_traps(s_cpu_if); 396 353 397 354 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; 398 355 }

+4 -2

arch/arm64/kvm/vgic/vgic.c

··· 1049 1049 * abort the entry procedure and inject the exception at the 1050 1050 * beginning of the run loop. 1051 1051 * 1052 - * - Otherwise, do exactly *NOTHING*. The guest state is 1053 - * already loaded, and we can carry on with running it. 1052 + * - Otherwise, do exactly *NOTHING* apart from enabling the virtual 1053 + * CPU interface. The guest state is already loaded, and we can 1054 + * carry on with running it. 1054 1055 * 1055 1056 * If we have NV, but are not in a nested state, compute the 1056 1057 * maintenance interrupt state, as it may fire. ··· 1060 1059 if (kvm_vgic_vcpu_pending_irq(vcpu)) 1061 1060 kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); 1062 1061 1062 + vgic_v3_flush_nested(vcpu); 1063 1063 return; 1064 1064 } 1065 1065

arch/arm64/kvm/vgic/vgic.h

··· 445 445 return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP); 446 446 } 447 447 448 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu); 448 449 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu); 449 450 void vgic_v3_load_nested(struct kvm_vcpu *vcpu); 450 451 void vgic_v3_put_nested(struct kvm_vcpu *vcpu);