Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'kvm-arm64/vgic-lr-overflow' into kvmarm/next

* kvm-arm64/vgic-lr-overflow: (50 commits)
: Support for VGIC LR overflows, courtesy of Marc Zyngier
:
: Address deficiencies in KVM's GIC emulation when a vCPU has more active
: IRQs than can be represented in the VGIC list registers. Sort the AP
: list to prioritize inactive and pending IRQs, potentially spilling
: active IRQs outside of the LRs.
:
: Handle deactivation of IRQs outside of the LRs for both EOImode=0/1,
: which involves special consideration for SPIs being deactivated from a
: different vCPU than the one that acked it.
KVM: arm64: Convert ICH_HCR_EL2_TDIR cap to EARLY_LOCAL_CPU_FEATURE
KVM: arm64: selftests: vgic_irq: Add timer deactivation test
KVM: arm64: selftests: vgic_irq: Add Group-0 enable test
KVM: arm64: selftests: vgic_irq: Add asymmetric SPI deaectivation test
KVM: arm64: selftests: vgic_irq: Perform EOImode==1 deactivation in ack order
KVM: arm64: selftests: vgic_irq: Remove LR-bound limitation
KVM: arm64: selftests: vgic_irq: Exclude timer-controlled interrupts
KVM: arm64: selftests: vgic_irq: Change configuration before enabling interrupt
KVM: arm64: selftests: vgic_irq: Fix GUEST_ASSERT_IAR_EMPTY() helper
KVM: arm64: selftests: gic_v3: Disable Group-0 interrupts by default
KVM: arm64: selftests: gic_v3: Add irq group setting helper
KVM: arm64: GICv2: Always trap GICV_DIR register
KVM: arm64: GICv2: Handle deactivation via GICV_DIR traps
KVM: arm64: GICv2: Handle LR overflow when EOImode==0
KVM: arm64: GICv3: Force exit to sync ICH_HCR_EL2.En
KVM: arm64: GICv3: nv: Plug L1 LR sync into deactivation primitive
KVM: arm64: GICv3: nv: Resync LRs/VMCR/HCR early for better MI emulation
KVM: arm64: GICv3: Avoid broadcast kick on CPUs lacking TDIR
KVM: arm64: GICv3: Handle in-LR deactivation when possible
KVM: arm64: GICv3: Add SPI tracking to handle asymmetric deactivation
...

Signed-off-by: Oliver Upton <oupton@kernel.org>

+1360 -427
+1 -1
arch/arm64/include/asm/kvm_asm.h
··· 79 79 __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, 80 80 __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, 81 81 __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, 82 - __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, 82 + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, 83 83 __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, 84 84 __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, 85 85 __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
+1
arch/arm64/include/asm/kvm_host.h
··· 54 54 #define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) 55 55 #define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9) 56 56 #define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10) 57 + #define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11) 57 58 58 59 #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ 59 60 KVM_DIRTY_LOG_INITIALLY_SET)
+2 -1
arch/arm64/include/asm/kvm_hyp.h
··· 77 77 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); 78 78 79 79 u64 __gic_v3_get_lr(unsigned int lr); 80 + void __gic_v3_set_lr(u64 val, int lr); 80 81 81 82 void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); 82 83 void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); 83 84 void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); 84 85 void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); 85 - void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); 86 + void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); 86 87 void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); 87 88 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); 88 89
+6 -1
arch/arm64/include/asm/virt.h
··· 40 40 */ 41 41 #define HVC_FINALISE_EL2 3 42 42 43 + /* 44 + * HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value 45 + */ 46 + #define HVC_GET_ICH_VTR_EL2 4 47 + 43 48 /* Max number of HYP stub hypercalls */ 44 - #define HVC_STUB_HCALL_NR 4 49 + #define HVC_STUB_HCALL_NR 5 45 50 46 51 /* Error returned when an invalid stub number is passed into x0 */ 47 52 #define HVC_STUB_ERR 0xbadca11
+52
arch/arm64/kernel/cpufeature.c
··· 2303 2303 } 2304 2304 #endif 2305 2305 2306 + static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry, 2307 + int scope) 2308 + { 2309 + static const struct midr_range has_vgic_v3[] = { 2310 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), 2311 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), 2312 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), 2313 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), 2314 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), 2315 + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), 2316 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), 2317 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), 2318 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), 2319 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), 2320 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), 2321 + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), 2322 + {}, 2323 + }; 2324 + struct arm_smccc_res res = {}; 2325 + 2326 + BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF); 2327 + BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY); 2328 + if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) && 2329 + !is_midr_in_range_list(has_vgic_v3)) 2330 + return false; 2331 + 2332 + if (!is_hyp_mode_available()) 2333 + return false; 2334 + 2335 + if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY)) 2336 + return true; 2337 + 2338 + if (is_kernel_in_hyp_mode()) 2339 + res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2); 2340 + else 2341 + arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res); 2342 + 2343 + if (res.a0 == HVC_STUB_ERR) 2344 + return false; 2345 + 2346 + return res.a1 & ICH_VTR_EL2_TDS; 2347 + } 2348 + 2306 2349 #ifdef CONFIG_ARM64_BTI 2307 2350 static void bti_enable(const struct arm64_cpu_capabilities *__unused) 2308 2351 { ··· 2857 2814 .matches = has_gic_prio_relaxed_sync, 2858 2815 }, 2859 2816 #endif 2817 + { 2818 + /* 2819 + * Depends on having GICv3 2820 + */ 2821 + .desc = "ICV_DIR_EL1 trapping", 2822 + .capability = ARM64_HAS_ICH_HCR_EL2_TDIR, 2823 + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, 2824 + .matches = can_trap_icv_dir_el1, 2825 + }, 2860 2826 #ifdef CONFIG_ARM64_E0PD 2861 2827 { 2862 2828 .desc = "E0PD",
+5
arch/arm64/kernel/hyp-stub.S
··· 54 54 1: cmp x0, #HVC_FINALISE_EL2 55 55 b.eq __finalise_el2 56 56 57 + cmp x0, #HVC_GET_ICH_VTR_EL2 58 + b.ne 2f 59 + mrs_s x1, SYS_ICH_VTR_EL2 60 + b 9f 61 + 57 62 2: cmp x0, #HVC_SOFT_RESTART 58 63 b.ne 3f 59 64 mov x0, x2
+1
arch/arm64/kernel/image-vars.h
··· 91 91 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); 92 92 KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); 93 93 KVM_NVHE_ALIAS(alt_cb_patch_nops); 94 + KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits); 94 95 95 96 /* Global kernel state accessed by nVHE hyp code. */ 96 97 KVM_NVHE_ALIAS(kvm_vgic_global_state);
+5 -2
arch/arm64/kvm/arm.c
··· 664 664 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 665 665 { 666 666 if (is_protected_kvm_enabled()) { 667 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, 668 - &vcpu->arch.vgic_cpu.vgic_v3); 667 + kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3); 669 668 kvm_call_hyp_nvhe(__pkvm_vcpu_put); 670 669 } 671 670 ··· 1045 1046 * that a VCPU sees new virtual interrupts. 1046 1047 */ 1047 1048 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); 1049 + 1050 + /* Process interrupts deactivated through a trap */ 1051 + if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu)) 1052 + kvm_vgic_process_async_update(vcpu); 1048 1053 1049 1054 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) 1050 1055 kvm_update_stolen_time(vcpu);
+4 -3
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 157 157 host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; 158 158 159 159 host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; 160 + host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr; 160 161 for (i = 0; i < hyp_cpu_if->used_lrs; ++i) 161 162 host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; 162 163 } ··· 465 464 __vgic_v3_init_lrs(); 466 465 } 467 466 468 - static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) 467 + static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) 469 468 { 470 469 DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); 471 470 472 - __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); 471 + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); 473 472 } 474 473 475 474 static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) ··· 617 616 HANDLE_FUNC(__kvm_tlb_flush_vmid_range), 618 617 HANDLE_FUNC(__kvm_flush_cpu_context), 619 618 HANDLE_FUNC(__kvm_timer_set_cntvoff), 620 - HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), 619 + HANDLE_FUNC(__vgic_v3_save_aprs), 621 620 HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), 622 621 HANDLE_FUNC(__pkvm_reserve_vm), 623 622 HANDLE_FUNC(__pkvm_unreserve_vm),
+3
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 337 337 /* CTR_EL0 is always under host control, even for protected VMs. */ 338 338 hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0; 339 339 340 + /* Preserve the vgic model so that GICv3 emulation works */ 341 + hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model; 342 + 340 343 if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) 341 344 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); 342 345
+5
arch/arm64/kvm/hyp/nvhe/sys_regs.c
··· 444 444 445 445 /* Scalable Vector Registers are restricted. */ 446 446 447 + HOST_HANDLED(SYS_ICC_PMR_EL1), 448 + 447 449 RAZ_WI(SYS_ERRIDR_EL1), 448 450 RAZ_WI(SYS_ERRSELR_EL1), 449 451 RAZ_WI(SYS_ERXFR_EL1), ··· 459 457 460 458 /* Limited Ordering Regions Registers are restricted. */ 461 459 460 + HOST_HANDLED(SYS_ICC_DIR_EL1), 461 + HOST_HANDLED(SYS_ICC_RPR_EL1), 462 462 HOST_HANDLED(SYS_ICC_SGI1R_EL1), 463 463 HOST_HANDLED(SYS_ICC_ASGI1R_EL1), 464 464 HOST_HANDLED(SYS_ICC_SGI0R_EL1), 465 + HOST_HANDLED(SYS_ICC_CTLR_EL1), 465 466 { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, }, 466 467 467 468 HOST_HANDLED(SYS_CCSIDR_EL1),
+4
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
··· 63 63 return -1; 64 64 } 65 65 66 + /* Handle deactivation as a normal exit */ 67 + if ((fault_ipa - vgic->vgic_cpu_base) >= GIC_CPU_DEACTIVATE) 68 + return 0; 69 + 66 70 rd = kvm_vcpu_dabt_get_rd(vcpu); 67 71 addr = kvm_vgic_global_state.vcpu_hyp_va; 68 72 addr += fault_ipa - vgic->vgic_cpu_base;
+63 -33
arch/arm64/kvm/hyp/vgic-v3-sr.c
··· 14 14 #include <asm/kvm_hyp.h> 15 15 #include <asm/kvm_mmu.h> 16 16 17 + #include "../../vgic/vgic.h" 18 + 17 19 #define vtr_to_max_lr_idx(v) ((v) & 0xf) 18 20 #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) 19 21 #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) ··· 60 58 unreachable(); 61 59 } 62 60 63 - static void __gic_v3_set_lr(u64 val, int lr) 61 + void __gic_v3_set_lr(u64 val, int lr) 64 62 { 65 63 switch (lr & 0xf) { 66 64 case 0: ··· 198 196 return val; 199 197 } 200 198 199 + static u64 compute_ich_hcr(struct vgic_v3_cpu_if *cpu_if) 200 + { 201 + return cpu_if->vgic_hcr | vgic_ich_hcr_trap_bits(); 202 + } 203 + 201 204 void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) 202 205 { 203 206 u64 used_lrs = cpu_if->used_lrs; ··· 219 212 } 220 213 } 221 214 222 - if (used_lrs || cpu_if->its_vpe.its_vm) { 215 + if (used_lrs) { 223 216 int i; 224 217 u32 elrsr; 225 218 226 219 elrsr = read_gicreg(ICH_ELRSR_EL2); 227 - 228 - write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2); 229 220 230 221 for (i = 0; i < used_lrs; i++) { 231 222 if (elrsr & (1 << i)) ··· 234 229 __gic_v3_set_lr(0, i); 235 230 } 236 231 } 232 + 233 + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 234 + 235 + if (cpu_if->vgic_hcr & ICH_HCR_EL2_LRENPIE) { 236 + u64 val = read_gicreg(ICH_HCR_EL2); 237 + cpu_if->vgic_hcr &= ~ICH_HCR_EL2_EOIcount; 238 + cpu_if->vgic_hcr |= val & ICH_HCR_EL2_EOIcount; 239 + } 240 + 241 + write_gicreg(0, ICH_HCR_EL2); 242 + 243 + /* 244 + * Hack alert: On NV, this results in a trap so that the above write 245 + * actually takes effect... No synchronisation is necessary, as we 246 + * only care about the effects when this traps. 247 + */ 248 + read_gicreg(ICH_MISR_EL2); 237 249 } 238 250 239 251 void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) ··· 258 236 u64 used_lrs = cpu_if->used_lrs; 259 237 int i; 260 238 261 - if (used_lrs || cpu_if->its_vpe.its_vm) { 262 - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 239 + write_gicreg(compute_ich_hcr(cpu_if), ICH_HCR_EL2); 263 240 264 - for (i = 0; i < used_lrs; i++) 265 - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 266 - } 241 + for (i = 0; i < used_lrs; i++) 242 + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 267 243 268 244 /* 269 245 * Ensure that writes to the LRs, and on non-VHE systems ensure that ··· 327 307 } 328 308 329 309 /* 330 - * If we need to trap system registers, we must write 331 - * ICH_HCR_EL2 anyway, even if no interrupts are being 332 - * injected. Note that this also applies if we don't expect 333 - * any system register access (no vgic at all). 310 + * If we need to trap system registers, we must write ICH_HCR_EL2 311 + * anyway, even if no interrupts are being injected. Note that this 312 + * also applies if we don't expect any system register access (no 313 + * vgic at all). In any case, no need to provide MI configuration. 334 314 */ 335 315 if (static_branch_unlikely(&vgic_v3_cpuif_trap) || 336 316 cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre) 337 - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 317 + write_gicreg(vgic_ich_hcr_trap_bits() | ICH_HCR_EL2_En, ICH_HCR_EL2); 338 318 } 339 319 340 320 void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) 341 321 { 342 322 u64 val; 343 - 344 - if (!cpu_if->vgic_sre) { 345 - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 346 - } 347 323 348 324 /* Only restore SRE if the host implements the GICv2 interface */ 349 325 if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { ··· 362 346 write_gicreg(0, ICH_HCR_EL2); 363 347 } 364 348 365 - static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 349 + void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 366 350 { 367 351 u64 val; 368 352 u32 nr_pre_bits; ··· 521 505 static void __vgic_v3_write_vmcr(u32 vmcr) 522 506 { 523 507 write_gicreg(vmcr, ICH_VMCR_EL2); 524 - } 525 - 526 - void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) 527 - { 528 - __vgic_v3_save_aprs(cpu_if); 529 - if (cpu_if->vgic_sre) 530 - cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); 531 508 } 532 509 533 510 void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) ··· 799 790 write_gicreg(hcr, ICH_HCR_EL2); 800 791 } 801 792 802 - static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 793 + static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 803 794 { 804 795 u32 vid = vcpu_get_reg(vcpu, rt); 805 796 u64 lr_val; ··· 807 798 808 799 /* EOImode == 0, nothing to be done here */ 809 800 if (!(vmcr & ICH_VMCR_EOIM_MASK)) 810 - return; 801 + return 1; 811 802 812 803 /* No deactivate to be performed on an LPI */ 813 804 if (vid >= VGIC_MIN_LPI) 814 - return; 805 + return 1; 815 806 816 807 lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); 817 - if (lr == -1) { 818 - __vgic_v3_bump_eoicount(); 819 - return; 808 + if (lr != -1) { 809 + __vgic_v3_clear_active_lr(lr, lr_val); 810 + return 1; 820 811 } 821 812 822 - __vgic_v3_clear_active_lr(lr, lr_val); 813 + return 0; 814 + } 815 + 816 + static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 817 + { 818 + if (!___vgic_v3_write_dir(vcpu, vmcr, rt)) 819 + __vgic_v3_bump_eoicount(); 823 820 } 824 821 825 822 static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) ··· 1260 1245 case SYS_ICC_DIR_EL1: 1261 1246 if (unlikely(is_read)) 1262 1247 return 0; 1248 + /* 1249 + * Full exit if required to handle overflow deactivation, 1250 + * unless we can emulate it in the LRs (likely the majority 1251 + * of the cases). 1252 + */ 1253 + if (vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr & ICH_HCR_EL2_TDIR) { 1254 + int ret; 1255 + 1256 + ret = ___vgic_v3_write_dir(vcpu, __vgic_v3_read_vmcr(), 1257 + kvm_vcpu_sys_get_rt(vcpu)); 1258 + if (ret) 1259 + __kvm_skip_instr(vcpu); 1260 + 1261 + return ret; 1262 + } 1263 1263 fn = __vgic_v3_write_dir; 1264 1264 break; 1265 1265 case SYS_ICC_RPR_EL1:
+17 -2
arch/arm64/kvm/sys_regs.c
··· 666 666 return true; 667 667 } 668 668 669 + static bool access_gic_dir(struct kvm_vcpu *vcpu, 670 + struct sys_reg_params *p, 671 + const struct sys_reg_desc *r) 672 + { 673 + if (!kvm_has_gicv3(vcpu->kvm)) 674 + return undef_access(vcpu, p, r); 675 + 676 + if (!p->is_write) 677 + return undef_access(vcpu, p, r); 678 + 679 + vgic_v3_deactivate(vcpu, p->regval); 680 + 681 + return true; 682 + } 683 + 669 684 static bool trap_raz_wi(struct kvm_vcpu *vcpu, 670 685 struct sys_reg_params *p, 671 686 const struct sys_reg_desc *r) ··· 3385 3370 { SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, 3386 3371 { SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, 3387 3372 { SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, 3388 - { SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, 3373 + { SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, 3389 3374 { SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, 3390 3375 { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, 3391 3376 { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi }, ··· 4510 4495 { CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access }, 4511 4496 { CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access }, 4512 4497 { CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access }, 4513 - { CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access }, 4498 + { CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir }, 4514 4499 { CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access }, 4515 4500 { CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access }, 4516 4501 { CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+5 -4
arch/arm64/kvm/vgic/vgic-init.c
··· 188 188 struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); 189 189 int i; 190 190 191 + dist->active_spis = (atomic_t)ATOMIC_INIT(0); 191 192 dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); 192 193 if (!dist->spis) 193 194 return -ENOMEM; ··· 354 353 return ret; 355 354 } 356 355 357 - static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) 356 + static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu) 358 357 { 359 358 if (kvm_vgic_global_state.type == VGIC_V2) 360 - vgic_v2_enable(vcpu); 359 + vgic_v2_reset(vcpu); 361 360 else 362 - vgic_v3_enable(vcpu); 361 + vgic_v3_reset(vcpu); 363 362 } 364 363 365 364 /* ··· 406 405 } 407 406 408 407 kvm_for_each_vcpu(idx, vcpu, kvm) 409 - kvm_vgic_vcpu_enable(vcpu); 408 + kvm_vgic_vcpu_reset(vcpu); 410 409 411 410 ret = kvm_vgic_setup_default_irq_routing(kvm); 412 411 if (ret)
+24
arch/arm64/kvm/vgic/vgic-mmio-v2.c
··· 359 359 vgic_set_vmcr(vcpu, &vmcr); 360 360 } 361 361 362 + static void vgic_mmio_write_dir(struct kvm_vcpu *vcpu, 363 + gpa_t addr, unsigned int len, 364 + unsigned long val) 365 + { 366 + if (kvm_vgic_global_state.type == VGIC_V2) 367 + vgic_v2_deactivate(vcpu, val); 368 + else 369 + vgic_v3_deactivate(vcpu, val); 370 + } 371 + 362 372 static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, 363 373 gpa_t addr, unsigned int len) 364 374 { ··· 492 482 REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, 493 483 vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, 494 484 VGIC_ACCESS_32bit), 485 + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_CPU_DEACTIVATE, 486 + vgic_mmio_read_raz, vgic_mmio_write_dir, 487 + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 488 + 4, VGIC_ACCESS_32bit), 495 489 }; 496 490 497 491 unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) ··· 506 492 kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 507 493 508 494 return SZ_4K; 495 + } 496 + 497 + unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev) 498 + { 499 + dev->regions = vgic_v2_cpu_registers; 500 + dev->nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); 501 + 502 + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); 503 + 504 + return KVM_VGIC_V2_CPU_SIZE; 509 505 } 510 506 511 507 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+1
arch/arm64/kvm/vgic/vgic-mmio.h
··· 213 213 const u32 val); 214 214 215 215 unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); 216 + unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev); 216 217 217 218 unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); 218 219
+222 -73
arch/arm64/kvm/vgic/vgic-v2.c
··· 9 9 #include <kvm/arm_vgic.h> 10 10 #include <asm/kvm_mmu.h> 11 11 12 + #include "vgic-mmio.h" 12 13 #include "vgic.h" 13 14 14 15 static inline void vgic_v2_write_lr(int lr, u32 val) ··· 27 26 vgic_v2_write_lr(i, 0); 28 27 } 29 28 30 - void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) 29 + void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, 30 + struct ap_list_summary *als) 31 31 { 32 32 struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; 33 33 34 - cpuif->vgic_hcr |= GICH_HCR_UIE; 34 + cpuif->vgic_hcr = GICH_HCR_EN; 35 + 36 + if (irqs_pending_outside_lrs(als)) 37 + cpuif->vgic_hcr |= GICH_HCR_NPIE; 38 + if (irqs_active_outside_lrs(als)) 39 + cpuif->vgic_hcr |= GICH_HCR_LRENPIE; 40 + if (irqs_outside_lrs(als)) 41 + cpuif->vgic_hcr |= GICH_HCR_UIE; 42 + 43 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP0_MASK) ? 44 + GICH_HCR_VGrp0DIE : GICH_HCR_VGrp0EIE; 45 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP1_MASK) ? 46 + GICH_HCR_VGrp1DIE : GICH_HCR_VGrp1EIE; 35 47 } 36 48 37 49 static bool lr_signals_eoi_mi(u32 lr_val) ··· 53 39 !(lr_val & GICH_LR_HW); 54 40 } 55 41 56 - /* 57 - * transfer the content of the LRs back into the corresponding ap_list: 58 - * - active bit is transferred as is 59 - * - pending bit is 60 - * - transferred as is in case of edge sensitive IRQs 61 - * - set to the line-level (resample time) for level sensitive IRQs 62 - */ 63 - void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) 42 + static void vgic_v2_fold_lr(struct kvm_vcpu *vcpu, u32 val) 64 43 { 65 - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 66 - struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 67 - int lr; 44 + u32 cpuid, intid = val & GICH_LR_VIRTUALID; 45 + struct vgic_irq *irq; 46 + bool deactivated; 68 47 69 - DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 48 + /* Extract the source vCPU id from the LR */ 49 + cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val) & 7; 70 50 71 - cpuif->vgic_hcr &= ~GICH_HCR_UIE; 51 + /* Notify fds when the guest EOI'ed a level-triggered SPI */ 52 + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 53 + kvm_notify_acked_irq(vcpu->kvm, 0, 54 + intid - VGIC_NR_PRIVATE_IRQS); 72 55 73 - for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { 74 - u32 val = cpuif->vgic_lr[lr]; 75 - u32 cpuid, intid = val & GICH_LR_VIRTUALID; 76 - struct vgic_irq *irq; 77 - bool deactivated; 56 + irq = vgic_get_vcpu_irq(vcpu, intid); 78 57 79 - /* Extract the source vCPU id from the LR */ 80 - cpuid = val & GICH_LR_PHYSID_CPUID; 81 - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 82 - cpuid &= 7; 83 - 84 - /* Notify fds when the guest EOI'ed a level-triggered SPI */ 85 - if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 86 - kvm_notify_acked_irq(vcpu->kvm, 0, 87 - intid - VGIC_NR_PRIVATE_IRQS); 88 - 89 - irq = vgic_get_vcpu_irq(vcpu, intid); 90 - 91 - raw_spin_lock(&irq->irq_lock); 92 - 58 + scoped_guard(raw_spinlock, &irq->irq_lock) { 93 59 /* Always preserve the active bit, note deactivation */ 94 60 deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT); 95 61 irq->active = !!(val & GICH_LR_ACTIVE_BIT); ··· 95 101 /* Handle resampling for mapped interrupts if required */ 96 102 vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT); 97 103 98 - raw_spin_unlock(&irq->irq_lock); 99 - vgic_put_irq(vcpu->kvm, irq); 104 + irq->on_lr = false; 105 + } 106 + 107 + vgic_put_irq(vcpu->kvm, irq); 108 + } 109 + 110 + static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); 111 + 112 + /* 113 + * transfer the content of the LRs back into the corresponding ap_list: 114 + * - active bit is transferred as is 115 + * - pending bit is 116 + * - transferred as is in case of edge sensitive IRQs 117 + * - set to the line-level (resample time) for level sensitive IRQs 118 + */ 119 + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) 120 + { 121 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 122 + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 123 + u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr); 124 + struct vgic_irq *irq; 125 + 126 + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 127 + 128 + for (int lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) 129 + vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]); 130 + 131 + /* See the GICv3 equivalent for the EOIcount handling rationale */ 132 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 133 + u32 lr; 134 + 135 + if (!eoicount) { 136 + break; 137 + } else { 138 + guard(raw_spinlock)(&irq->irq_lock); 139 + 140 + if (!(likely(vgic_target_oracle(irq) == vcpu) && 141 + irq->active)) 142 + continue; 143 + 144 + lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT; 145 + } 146 + 147 + if (lr & GICH_LR_HW) 148 + writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr), 149 + kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE); 150 + vgic_v2_fold_lr(vcpu, lr); 151 + eoicount--; 100 152 } 101 153 102 154 cpuif->used_lrs = 0; 103 155 } 104 156 105 - /* 106 - * Populates the particular LR with the state of a given IRQ: 107 - * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq 108 - * - for a level sensitive IRQ the pending state value is unchanged; 109 - * it is dictated directly by the input level 110 - * 111 - * If @irq describes an SGI with multiple sources, we choose the 112 - * lowest-numbered source VCPU and clear that bit in the source bitmap. 113 - * 114 - * The irq_lock must be held by the caller. 115 - */ 116 - void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 157 + void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val) 158 + { 159 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 160 + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; 161 + struct kvm_vcpu *target_vcpu = NULL; 162 + bool mmio = false; 163 + struct vgic_irq *irq; 164 + unsigned long flags; 165 + u64 lr = 0; 166 + u8 cpuid; 167 + 168 + /* Snapshot CPUID, and remove it from the INTID */ 169 + cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); 170 + val &= ~GENMASK_ULL(12, 10); 171 + 172 + /* We only deal with DIR when EOIMode==1 */ 173 + if (!(cpuif->vgic_vmcr & GICH_VMCR_EOI_MODE_MASK)) 174 + return; 175 + 176 + /* Make sure we're in the same context as LR handling */ 177 + local_irq_save(flags); 178 + 179 + irq = vgic_get_vcpu_irq(vcpu, val); 180 + if (WARN_ON_ONCE(!irq)) 181 + goto out; 182 + 183 + /* See the corresponding v3 code for the rationale */ 184 + scoped_guard(raw_spinlock, &irq->irq_lock) { 185 + target_vcpu = irq->vcpu; 186 + 187 + /* Not on any ap_list? */ 188 + if (!target_vcpu) 189 + goto put; 190 + 191 + /* 192 + * Urgh. We're deactivating something that we cannot 193 + * observe yet... Big hammer time. 194 + */ 195 + if (irq->on_lr) { 196 + mmio = true; 197 + goto put; 198 + } 199 + 200 + /* SGI: check that the cpuid matches */ 201 + if (val < VGIC_NR_SGIS && irq->active_source != cpuid) { 202 + target_vcpu = NULL; 203 + goto put; 204 + } 205 + 206 + /* (with a Dalek voice) DEACTIVATE!!!! */ 207 + lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT; 208 + } 209 + 210 + if (lr & GICH_LR_HW) 211 + writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr), 212 + kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE); 213 + 214 + vgic_v2_fold_lr(vcpu, lr); 215 + 216 + put: 217 + vgic_put_irq(vcpu->kvm, irq); 218 + 219 + out: 220 + local_irq_restore(flags); 221 + 222 + if (mmio) 223 + vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); 224 + 225 + /* Force the ap_list to be pruned */ 226 + if (target_vcpu) 227 + kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); 228 + } 229 + 230 + static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) 117 231 { 118 232 u32 val = irq->intid; 119 233 bool allow_pending = true; 234 + 235 + WARN_ON(irq->on_lr); 120 236 121 237 if (irq->active) { 122 238 val |= GICH_LR_ACTIVE_BIT; ··· 267 163 if (allow_pending && irq_is_pending(irq)) { 268 164 val |= GICH_LR_PENDING_BIT; 269 165 166 + if (vgic_irq_is_sgi(irq->intid)) { 167 + u32 src = ffs(irq->source); 168 + 169 + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 170 + irq->intid)) 171 + return 0; 172 + 173 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 174 + if (irq->source & ~BIT(src - 1)) 175 + val |= GICH_LR_EOI; 176 + } 177 + } 178 + 179 + /* The GICv2 LR only holds five bits of priority. */ 180 + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 181 + 182 + return val; 183 + } 184 + 185 + /* 186 + * Populates the particular LR with the state of a given IRQ: 187 + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq 188 + * - for a level sensitive IRQ the pending state value is unchanged; 189 + * it is dictated directly by the input level 190 + * 191 + * If @irq describes an SGI with multiple sources, we choose the 192 + * lowest-numbered source VCPU and clear that bit in the source bitmap. 193 + * 194 + * The irq_lock must be held by the caller. 195 + */ 196 + void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 197 + { 198 + u32 val = vgic_v2_compute_lr(vcpu, irq); 199 + 200 + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; 201 + 202 + if (val & GICH_LR_PENDING_BIT) { 270 203 if (irq->config == VGIC_CONFIG_EDGE) 271 204 irq->pending_latch = false; 272 205 273 206 if (vgic_irq_is_sgi(irq->intid)) { 274 207 u32 src = ffs(irq->source); 275 208 276 - if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 277 - irq->intid)) 278 - return; 279 - 280 - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 281 - irq->source &= ~(1 << (src - 1)); 282 - if (irq->source) { 209 + irq->source &= ~BIT(src - 1); 210 + if (irq->source) 283 211 irq->pending_latch = true; 284 - val |= GICH_LR_EOI; 285 - } 286 212 } 287 213 } 288 214 ··· 328 194 /* The GICv2 LR only holds five bits of priority. */ 329 195 val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; 330 196 331 - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; 197 + irq->on_lr = true; 332 198 } 333 199 334 200 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) ··· 391 257 GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; 392 258 } 393 259 394 - void vgic_v2_enable(struct kvm_vcpu *vcpu) 260 + void vgic_v2_reset(struct kvm_vcpu *vcpu) 395 261 { 396 262 /* 397 263 * By forcing VMCR to zero, the GIC will restore the binary ··· 399 265 * anyway. 400 266 */ 401 267 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; 402 - 403 - /* Get the show on the road... */ 404 - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; 405 268 } 406 269 407 270 /* check for overlapping regions and for regions crossing the end of memory */ ··· 420 289 int vgic_v2_map_resources(struct kvm *kvm) 421 290 { 422 291 struct vgic_dist *dist = &kvm->arch.vgic; 292 + unsigned int len; 423 293 int ret = 0; 424 294 425 295 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || ··· 444 312 return ret; 445 313 } 446 314 315 + len = vgic_v2_init_cpuif_iodev(&dist->cpuif_iodev); 316 + dist->cpuif_iodev.base_addr = dist->vgic_cpu_base; 317 + dist->cpuif_iodev.iodev_type = IODEV_CPUIF; 318 + dist->cpuif_iodev.redist_vcpu = NULL; 319 + 320 + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist->vgic_cpu_base, 321 + len, &dist->cpuif_iodev.dev); 322 + if (ret) 323 + return ret; 324 + 447 325 if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { 448 326 ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, 449 327 kvm_vgic_global_state.vcpu_base, 450 - KVM_VGIC_V2_CPU_SIZE, true); 328 + KVM_VGIC_V2_CPU_SIZE - SZ_4K, true); 451 329 if (ret) { 452 330 kvm_err("Unable to remap VGIC CPU to VCPU\n"); 453 331 return ret; ··· 527 385 528 386 kvm_vgic_global_state.can_emulate_gicv2 = true; 529 387 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 388 + kvm_vgic_global_state.gicc_base = info->gicc_base; 530 389 kvm_vgic_global_state.type = VGIC_V2; 531 390 kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; 532 391 ··· 566 423 567 424 void vgic_v2_save_state(struct kvm_vcpu *vcpu) 568 425 { 426 + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 569 427 void __iomem *base = kvm_vgic_global_state.vctrl_base; 570 428 u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 571 429 572 430 if (!base) 573 431 return; 574 432 575 - if (used_lrs) { 433 + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 434 + 435 + if (used_lrs) 576 436 save_lrs(vcpu, base); 577 - writel_relaxed(0, base + GICH_HCR); 437 + 438 + if (cpu_if->vgic_hcr & GICH_HCR_LRENPIE) { 439 + u32 val = readl_relaxed(base + GICH_HCR); 440 + 441 + cpu_if->vgic_hcr &= ~GICH_HCR_EOICOUNT; 442 + cpu_if->vgic_hcr |= val & GICH_HCR_EOICOUNT; 578 443 } 444 + 445 + writel_relaxed(0, base + GICH_HCR); 579 446 } 580 447 581 448 void vgic_v2_restore_state(struct kvm_vcpu *vcpu) ··· 598 445 if (!base) 599 446 return; 600 447 601 - if (used_lrs) { 602 - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); 603 - for (i = 0; i < used_lrs; i++) { 604 - writel_relaxed(cpu_if->vgic_lr[i], 605 - base + GICH_LR0 + (i * 4)); 606 - } 607 - } 448 + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); 449 + 450 + for (i = 0; i < used_lrs; i++) 451 + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); 608 452 } 609 453 610 454 void vgic_v2_load(struct kvm_vcpu *vcpu) ··· 618 468 { 619 469 struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; 620 470 621 - cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); 622 471 cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); 623 472 }
+53 -53
arch/arm64/kvm/vgic/vgic-v3-nested.c
··· 70 70 * - on L2 put: perform the inverse transformation, so that the result of L2 71 71 * running becomes visible to L1 in the VNCR-accessible registers. 72 72 * 73 - * - there is nothing to do on L2 entry, as everything will have happened 74 - * on load. However, this is the point where we detect that an interrupt 75 - * targeting L1 and prepare the grand switcheroo. 73 + * - there is nothing to do on L2 entry apart from enabling the vgic, as 74 + * everything will have happened on load. However, this is the point where 75 + * we detect that an interrupt targeting L1 and prepare the grand 76 + * switcheroo. 76 77 * 77 - * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1 78 - * interrupt. The L0 active state will be cleared by the HW if the L1 79 - * interrupt was itself backed by a HW interrupt. 78 + * - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate 79 + * corresponding the L1 interrupt. The L0 active state will be cleared by 80 + * the HW if the L1 interrupt was itself backed by a HW interrupt. 80 81 * 81 82 * Maintenance Interrupt (MI) management: 82 83 * ··· 94 93 * 95 94 * - because most of the ICH_*_EL2 registers live in the VNCR page, the 96 95 * quality of emulation is poor: L1 can setup the vgic so that an MI would 97 - * immediately fire, and not observe anything until the next exit. Trying 98 - * to read ICH_MISR_EL2 would do the trick, for example. 96 + * immediately fire, and not observe anything until the next exit. 97 + * Similarly, a pending MI is not immediately disabled by clearing 98 + * ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for 99 + * example. 99 100 * 100 101 * System register emulation: 101 102 * ··· 268 265 s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); 269 266 } 270 267 268 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu) 269 + { 270 + u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 271 + 272 + write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2); 273 + } 274 + 271 275 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) 272 276 { 273 277 struct shadow_if *shadow_if = get_shadow_if(); 274 278 int i; 275 279 276 280 for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 277 - u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 278 - struct vgic_irq *irq; 281 + u64 val, host_lr, lr; 279 282 280 - if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) 283 + host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 284 + 285 + /* Propagate the new LR state */ 286 + lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 287 + val = lr & ~ICH_LR_STATE; 288 + val |= host_lr & ICH_LR_STATE; 289 + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 290 + 291 + /* 292 + * Deactivation of a HW interrupt: the LR must have the HW 293 + * bit set, have been in a non-invalid state before the run, 294 + * and now be in an invalid state. If any of that doesn't 295 + * hold, we're done with this LR. 296 + */ 297 + if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) && 298 + !(host_lr & ICH_LR_STATE))) 281 299 continue; 282 300 283 301 /* ··· 306 282 * need to emulate the HW effect between the guest hypervisor 307 283 * and the nested guest. 308 284 */ 309 - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 310 - if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ 311 - continue; 312 - 313 - lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); 314 - if (!(lr & ICH_LR_STATE)) 315 - irq->active = false; 316 - 317 - vgic_put_irq(vcpu->kvm, irq); 285 + vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 318 286 } 287 + 288 + /* We need these to be synchronised to generate the MI */ 289 + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2)); 290 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount); 291 + __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount); 292 + 293 + write_sysreg_s(0, SYS_ICH_HCR_EL2); 294 + isb(); 295 + 296 + vgic_v3_nested_update_mi(vcpu); 319 297 } 320 298 321 299 static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu, 322 300 struct vgic_v3_cpu_if *s_cpu_if) 323 301 { 324 302 struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3; 325 - u64 val = 0; 326 303 int i; 327 304 328 - /* 329 - * If we're on a system with a broken vgic that requires 330 - * trapping, propagate the trapping requirements. 331 - * 332 - * Ah, the smell of rotten fruits... 333 - */ 334 - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) 335 - val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 336 - ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR); 337 - s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val; 305 + s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 338 306 s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); 339 307 s_cpu_if->vgic_sre = host_if->vgic_sre; 340 308 ··· 350 334 __vgic_v3_restore_vmcr_aprs(cpu_if); 351 335 __vgic_v3_activate_traps(cpu_if); 352 336 353 - __vgic_v3_restore_state(cpu_if); 337 + for (int i = 0; i < cpu_if->used_lrs; i++) 338 + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); 354 339 355 340 /* 356 341 * Propagate the number of used LRs for the benefit of the HYP ··· 364 347 { 365 348 struct shadow_if *shadow_if = get_shadow_if(); 366 349 struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif; 367 - u64 val; 368 350 int i; 369 351 370 - __vgic_v3_save_vmcr_aprs(s_cpu_if); 371 - __vgic_v3_deactivate_traps(s_cpu_if); 372 - __vgic_v3_save_state(s_cpu_if); 373 - 374 - /* 375 - * Translate the shadow state HW fields back to the virtual ones 376 - * before copying the shadow struct back to the nested one. 377 - */ 378 - val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); 379 - val &= ~ICH_HCR_EL2_EOIcount_MASK; 380 - val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); 381 - __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); 382 - __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); 352 + __vgic_v3_save_aprs(s_cpu_if); 383 353 384 354 for (i = 0; i < 4; i++) { 385 355 __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); 386 356 __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); 387 357 } 388 358 389 - for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { 390 - val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); 359 + for (i = 0; i < s_cpu_if->used_lrs; i++) 360 + __gic_v3_set_lr(0, i); 391 361 392 - val &= ~ICH_LR_STATE; 393 - val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; 394 - 395 - __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); 396 - } 362 + __vgic_v3_deactivate_traps(s_cpu_if); 397 363 398 364 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; 399 365 }
+330 -100
arch/arm64/kvm/vgic/vgic-v3.c
··· 12 12 #include <asm/kvm_mmu.h> 13 13 #include <asm/kvm_asm.h> 14 14 15 + #include "vgic-mmio.h" 15 16 #include "vgic.h" 16 17 17 18 static bool group0_trap; ··· 21 20 static bool dir_trap; 22 21 static bool gicv4_enable; 23 22 24 - void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) 23 + void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, 24 + struct ap_list_summary *als) 25 25 { 26 26 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 27 27 28 - cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; 28 + if (!irqchip_in_kernel(vcpu->kvm)) 29 + return; 30 + 31 + cpuif->vgic_hcr = ICH_HCR_EL2_En; 32 + 33 + if (irqs_pending_outside_lrs(als)) 34 + cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE; 35 + if (irqs_active_outside_lrs(als)) 36 + cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE; 37 + if (irqs_outside_lrs(als)) 38 + cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; 39 + 40 + if (!als->nr_sgi) 41 + cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; 42 + 43 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ? 44 + ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; 45 + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ? 46 + ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; 47 + 48 + /* 49 + * Dealing with EOImode=1 is a massive source of headache. Not 50 + * only do we need to track that we have active interrupts 51 + * outside of the LRs and force DIR to be trapped, we also 52 + * need to deal with SPIs that can be deactivated on another 53 + * CPU. 54 + * 55 + * On systems that do not implement TDIR, force the bit in the 56 + * shadow state anyway to avoid IPI-ing on these poor sods. 57 + * 58 + * Note that we set the trap irrespective of EOIMode, as that 59 + * can change behind our back without any warning... 60 + */ 61 + if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) || 62 + irqs_active_outside_lrs(als) || 63 + atomic_read(&vcpu->kvm->arch.vgic.active_spis)) 64 + cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR; 29 65 } 30 66 31 67 static bool lr_signals_eoi_mi(u64 lr_val) ··· 71 33 !(lr_val & ICH_LR_HW); 72 34 } 73 35 74 - void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 36 + static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val) 75 37 { 76 - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 77 - struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 78 - u32 model = vcpu->kvm->arch.vgic.vgic_model; 79 - int lr; 38 + struct vgic_irq *irq; 39 + bool is_v2_sgi = false; 40 + bool deactivated; 41 + u32 intid; 80 42 81 - DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 43 + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 44 + intid = val & ICH_LR_VIRTUAL_ID_MASK; 45 + } else { 46 + intid = val & GICH_LR_VIRTUALID; 47 + is_v2_sgi = vgic_irq_is_sgi(intid); 48 + } 82 49 83 - cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE; 50 + irq = vgic_get_vcpu_irq(vcpu, intid); 51 + if (!irq) /* An LPI could have been unmapped. */ 52 + return; 84 53 85 - for (lr = 0; lr < cpuif->used_lrs; lr++) { 86 - u64 val = cpuif->vgic_lr[lr]; 87 - u32 intid, cpuid; 88 - struct vgic_irq *irq; 89 - bool is_v2_sgi = false; 90 - bool deactivated; 91 - 92 - cpuid = val & GICH_LR_PHYSID_CPUID; 93 - cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; 94 - 95 - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { 96 - intid = val & ICH_LR_VIRTUAL_ID_MASK; 97 - } else { 98 - intid = val & GICH_LR_VIRTUALID; 99 - is_v2_sgi = vgic_irq_is_sgi(intid); 100 - } 101 - 102 - /* Notify fds when the guest EOI'ed a level-triggered IRQ */ 103 - if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) 104 - kvm_notify_acked_irq(vcpu->kvm, 0, 105 - intid - VGIC_NR_PRIVATE_IRQS); 106 - 107 - irq = vgic_get_vcpu_irq(vcpu, intid); 108 - if (!irq) /* An LPI could have been unmapped. */ 109 - continue; 110 - 111 - raw_spin_lock(&irq->irq_lock); 112 - 113 - /* Always preserve the active bit, note deactivation */ 54 + scoped_guard(raw_spinlock, &irq->irq_lock) { 55 + /* Always preserve the active bit for !LPIs, note deactivation */ 56 + if (irq->intid >= VGIC_MIN_LPI) 57 + val &= ~ICH_LR_ACTIVE_BIT; 114 58 deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); 115 59 irq->active = !!(val & ICH_LR_ACTIVE_BIT); 116 60 117 - if (irq->active && is_v2_sgi) 118 - irq->active_source = cpuid; 119 - 120 61 /* Edge is the only case where we preserve the pending bit */ 121 62 if (irq->config == VGIC_CONFIG_EDGE && 122 - (val & ICH_LR_PENDING_BIT)) { 63 + (val & ICH_LR_PENDING_BIT)) 123 64 irq->pending_latch = true; 124 - 125 - if (is_v2_sgi) 126 - irq->source |= (1 << cpuid); 127 - } 128 65 129 66 /* 130 67 * Clear soft pending state when level irqs have been acked. ··· 107 94 if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) 108 95 irq->pending_latch = false; 109 96 97 + if (is_v2_sgi) { 98 + u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val); 99 + 100 + if (irq->active) 101 + irq->active_source = cpuid; 102 + 103 + if (val & ICH_LR_PENDING_BIT) 104 + irq->source |= BIT(cpuid); 105 + } 106 + 110 107 /* Handle resampling for mapped interrupts if required */ 111 108 vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); 112 109 113 - raw_spin_unlock(&irq->irq_lock); 114 - vgic_put_irq(vcpu->kvm, irq); 110 + irq->on_lr = false; 111 + } 112 + 113 + /* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */ 114 + if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) { 115 + kvm_notify_acked_irq(vcpu->kvm, 0, 116 + intid - VGIC_NR_PRIVATE_IRQS); 117 + atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis); 118 + } 119 + 120 + vgic_put_irq(vcpu->kvm, irq); 121 + } 122 + 123 + static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); 124 + 125 + static void vgic_v3_deactivate_phys(u32 intid) 126 + { 127 + if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) 128 + gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI); 129 + else 130 + gic_write_dir(intid); 131 + } 132 + 133 + void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 134 + { 135 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 136 + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 137 + u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); 138 + struct vgic_irq *irq; 139 + 140 + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 141 + 142 + for (int lr = 0; lr < cpuif->used_lrs; lr++) 143 + vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]); 144 + 145 + /* 146 + * EOIMode=0: use EOIcount to emulate deactivation. We are 147 + * guaranteed to deactivate in reverse order of the activation, so 148 + * just pick one active interrupt after the other in the ap_list, 149 + * and replay the deactivation as if the CPU was doing it. We also 150 + * rely on priority drop to have taken place, and the list to be 151 + * sorted by priority. 152 + */ 153 + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 154 + u64 lr; 155 + 156 + /* 157 + * I would have loved to write this using a scoped_guard(), 158 + * but using 'continue' here is a total train wreck. 159 + */ 160 + if (!eoicount) { 161 + break; 162 + } else { 163 + guard(raw_spinlock)(&irq->irq_lock); 164 + 165 + if (!(likely(vgic_target_oracle(irq) == vcpu) && 166 + irq->active)) 167 + continue; 168 + 169 + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 170 + } 171 + 172 + if (lr & ICH_LR_HW) 173 + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 174 + 175 + vgic_v3_fold_lr(vcpu, lr); 176 + eoicount--; 115 177 } 116 178 117 179 cpuif->used_lrs = 0; 118 180 } 119 181 182 + void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) 183 + { 184 + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 185 + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 186 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 187 + struct kvm_vcpu *target_vcpu = NULL; 188 + bool mmio = false, is_v2_sgi; 189 + struct vgic_irq *irq; 190 + unsigned long flags; 191 + u64 lr = 0; 192 + u8 cpuid; 193 + 194 + /* Snapshot CPUID, and remove it from the INTID */ 195 + cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); 196 + val &= ~GENMASK_ULL(12, 10); 197 + 198 + is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 && 199 + val < VGIC_NR_SGIS); 200 + 201 + /* 202 + * We only deal with DIR when EOIMode==1, and only for SGI, 203 + * PPI or SPI. 204 + */ 205 + if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) || 206 + val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) 207 + return; 208 + 209 + /* Make sure we're in the same context as LR handling */ 210 + local_irq_save(flags); 211 + 212 + irq = vgic_get_vcpu_irq(vcpu, val); 213 + if (WARN_ON_ONCE(!irq)) 214 + goto out; 215 + 216 + /* 217 + * EOIMode=1: we must rely on traps to handle deactivate of 218 + * overflowing interrupts, as there is no ordering guarantee and 219 + * EOIcount isn't being incremented. Priority drop will have taken 220 + * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs. 221 + * 222 + * Three possibities: 223 + * 224 + * - The irq is not queued on any CPU, and there is nothing to 225 + * do, 226 + * 227 + * - Or the irq is in an LR, meaning that its state is not 228 + * directly observable. Treat it bluntly by making it as if 229 + * this was a write to GICD_ICACTIVER, which will force an 230 + * exit on all vcpus. If it hurts, don't do that. 231 + * 232 + * - Or the irq is active, but not in an LR, and we can 233 + * directly deactivate it by building a pseudo-LR, fold it, 234 + * and queue a request to prune the resulting ap_list, 235 + * 236 + * Special care must be taken to match the source CPUID when 237 + * deactivating a GICv2 SGI. 238 + */ 239 + scoped_guard(raw_spinlock, &irq->irq_lock) { 240 + target_vcpu = irq->vcpu; 241 + 242 + /* Not on any ap_list? */ 243 + if (!target_vcpu) 244 + goto put; 245 + 246 + /* 247 + * Urgh. We're deactivating something that we cannot 248 + * observe yet... Big hammer time. 249 + */ 250 + if (irq->on_lr) { 251 + mmio = true; 252 + goto put; 253 + } 254 + 255 + /* GICv2 SGI: check that the cpuid matches */ 256 + if (is_v2_sgi && irq->active_source != cpuid) { 257 + target_vcpu = NULL; 258 + goto put; 259 + } 260 + 261 + /* (with a Dalek voice) DEACTIVATE!!!! */ 262 + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 263 + } 264 + 265 + if (lr & ICH_LR_HW) 266 + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 267 + 268 + vgic_v3_fold_lr(vcpu, lr); 269 + 270 + put: 271 + vgic_put_irq(vcpu->kvm, irq); 272 + 273 + out: 274 + local_irq_restore(flags); 275 + 276 + if (mmio) 277 + vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); 278 + 279 + /* Force the ap_list to be pruned */ 280 + if (target_vcpu) 281 + kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); 282 + } 283 + 120 284 /* Requires the irq to be locked already */ 121 - void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 285 + static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) 122 286 { 123 287 u32 model = vcpu->kvm->arch.vgic.vgic_model; 124 288 u64 val = irq->intid; 125 289 bool allow_pending = true, is_v2_sgi; 290 + 291 + WARN_ON(irq->on_lr); 126 292 127 293 is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && 128 294 model == KVM_DEV_TYPE_ARM_VGIC_V2); ··· 342 150 if (allow_pending && irq_is_pending(irq)) { 343 151 val |= ICH_LR_PENDING_BIT; 344 152 153 + if (is_v2_sgi) { 154 + u32 src = ffs(irq->source); 155 + 156 + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 157 + irq->intid)) 158 + return 0; 159 + 160 + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 161 + if (irq->source & ~BIT(src - 1)) 162 + val |= ICH_LR_EOI; 163 + } 164 + } 165 + 166 + if (irq->group) 167 + val |= ICH_LR_GROUP; 168 + 169 + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 170 + 171 + return val; 172 + } 173 + 174 + void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 175 + { 176 + u32 model = vcpu->kvm->arch.vgic.vgic_model; 177 + u64 val = vgic_v3_compute_lr(vcpu, irq); 178 + 179 + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 180 + 181 + if (val & ICH_LR_PENDING_BIT) { 345 182 if (irq->config == VGIC_CONFIG_EDGE) 346 183 irq->pending_latch = false; 347 184 ··· 378 157 model == KVM_DEV_TYPE_ARM_VGIC_V2) { 379 158 u32 src = ffs(irq->source); 380 159 381 - if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 382 - irq->intid)) 383 - return; 384 - 385 - val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 386 - irq->source &= ~(1 << (src - 1)); 387 - if (irq->source) { 160 + irq->source &= ~BIT(src - 1); 161 + if (irq->source) 388 162 irq->pending_latch = true; 389 - val |= ICH_LR_EOI; 390 - } 391 163 } 392 164 } 393 165 ··· 393 179 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) 394 180 irq->line_level = false; 395 181 396 - if (irq->group) 397 - val |= ICH_LR_GROUP; 398 - 399 - val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 400 - 401 - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 182 + irq->on_lr = true; 402 183 } 403 184 404 185 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) ··· 467 258 GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ 468 259 GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) 469 260 470 - void vgic_v3_enable(struct kvm_vcpu *vcpu) 261 + void vgic_v3_reset(struct kvm_vcpu *vcpu) 471 262 { 472 263 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; 473 264 ··· 497 288 kvm_vgic_global_state.ich_vtr_el2); 498 289 vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, 499 290 kvm_vgic_global_state.ich_vtr_el2) + 1; 500 - 501 - /* Get the show on the road... */ 502 - vgic_v3->vgic_hcr = ICH_HCR_EL2_En; 503 291 } 504 292 505 293 void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) ··· 507 301 return; 508 302 509 303 /* Hide GICv3 sysreg if necessary */ 510 - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { 304 + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) 511 305 vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 512 306 ICH_HCR_EL2_TC); 513 - return; 514 - } 515 - 516 - if (group0_trap) 517 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0; 518 - if (group1_trap) 519 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1; 520 - if (common_trap) 521 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC; 522 - if (dir_trap) 523 - vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR; 524 307 } 525 308 526 309 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) ··· 830 635 831 636 static bool vgic_v3_broken_seis(void) 832 637 { 833 - return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) && 834 - is_midr_in_range_list(broken_seis)); 638 + return (is_kernel_in_hyp_mode() && 639 + is_midr_in_range_list(broken_seis) && 640 + (read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS)); 641 + } 642 + 643 + void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, 644 + __le32 *origptr, __le32 *updptr, 645 + int nr_inst) 646 + { 647 + u32 insn, oinsn, rd; 648 + u64 hcr = 0; 649 + 650 + if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 651 + group0_trap = true; 652 + group1_trap = true; 653 + } 654 + 655 + if (vgic_v3_broken_seis()) { 656 + /* We know that these machines have ICH_HCR_EL2.TDIR */ 657 + group0_trap = true; 658 + group1_trap = true; 659 + dir_trap = true; 660 + } 661 + 662 + if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR)) 663 + common_trap = true; 664 + 665 + if (group0_trap) 666 + hcr |= ICH_HCR_EL2_TALL0; 667 + if (group1_trap) 668 + hcr |= ICH_HCR_EL2_TALL1; 669 + if (common_trap) 670 + hcr |= ICH_HCR_EL2_TC; 671 + if (dir_trap) 672 + hcr |= ICH_HCR_EL2_TDIR; 673 + 674 + /* Compute target register */ 675 + oinsn = le32_to_cpu(*origptr); 676 + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); 677 + 678 + /* movz rd, #(val & 0xffff) */ 679 + insn = aarch64_insn_gen_movewide(rd, 680 + (u16)hcr, 681 + 0, 682 + AARCH64_INSN_VARIANT_64BIT, 683 + AARCH64_INSN_MOVEWIDE_ZERO); 684 + *updptr = cpu_to_le32(insn); 835 685 } 836 686 837 687 /** ··· 890 650 { 891 651 u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); 892 652 bool has_v2; 653 + u64 traps; 893 654 int ret; 894 655 895 656 has_v2 = ich_vtr_el2 >> 63; ··· 949 708 if (has_v2) 950 709 static_branch_enable(&vgic_v3_has_v2_compat); 951 710 952 - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 953 - group0_trap = true; 954 - group1_trap = true; 955 - } 956 - 957 711 if (vgic_v3_broken_seis()) { 958 712 kvm_info("GICv3 with broken locally generated SEI\n"); 959 - 960 713 kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; 961 - group0_trap = true; 962 - group1_trap = true; 963 - if (ich_vtr_el2 & ICH_VTR_EL2_TDS) 964 - dir_trap = true; 965 - else 966 - common_trap = true; 967 714 } 968 715 969 - if (group0_trap || group1_trap || common_trap | dir_trap) { 716 + traps = vgic_ich_hcr_trap_bits(); 717 + if (traps) { 970 718 kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", 971 - group0_trap ? "G0" : "", 972 - group1_trap ? "G1" : "", 973 - common_trap ? "C" : "", 974 - dir_trap ? "D" : ""); 719 + (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", 720 + (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", 721 + (traps & ICH_HCR_EL2_TC) ? "C" : "", 722 + (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); 975 723 static_branch_enable(&vgic_v3_cpuif_trap); 976 724 } 977 725 ··· 1000 770 } 1001 771 1002 772 if (likely(!is_protected_kvm_enabled())) 1003 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 773 + kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); 1004 774 WARN_ON(vgic_v4_put(vcpu)); 1005 775 1006 776 if (has_vhe())
+4 -1
arch/arm64/kvm/vgic/vgic-v4.c
··· 163 163 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); 164 164 struct irq_desc *desc; 165 165 unsigned long flags; 166 + bool pending; 166 167 int ret; 167 168 168 169 raw_spin_lock_irqsave(&irq->irq_lock, flags); ··· 174 173 irq->hw = false; 175 174 ret = irq_get_irqchip_state(irq->host_irq, 176 175 IRQCHIP_STATE_PENDING, 177 - &irq->pending_latch); 176 + &pending); 178 177 WARN_ON(ret); 178 + 179 + irq->pending_latch = pending; 179 180 180 181 desc = irq_to_desc(irq->host_irq); 181 182 irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+189 -115
arch/arm64/kvm/vgic/vgic.c
··· 237 237 * 238 238 * Requires the IRQ lock to be held. 239 239 */ 240 - static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 240 + struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 241 241 { 242 242 lockdep_assert_held(&irq->irq_lock); 243 243 ··· 265 265 return NULL; 266 266 } 267 267 268 + struct vgic_sort_info { 269 + struct kvm_vcpu *vcpu; 270 + struct vgic_vmcr vmcr; 271 + }; 272 + 268 273 /* 269 274 * The order of items in the ap_lists defines how we'll pack things in LRs as 270 275 * well, the first items in the list being the first things populated in the 271 276 * LRs. 272 277 * 273 - * A hard rule is that active interrupts can never be pushed out of the LRs 274 - * (and therefore take priority) since we cannot reliably trap on deactivation 275 - * of IRQs and therefore they have to be present in the LRs. 276 - * 278 + * Pending, non-active interrupts must be placed at the head of the list. 277 279 * Otherwise things should be sorted by the priority field and the GIC 278 280 * hardware support will take care of preemption of priority groups etc. 281 + * Interrupts that are not deliverable should be at the end of the list. 279 282 * 280 283 * Return negative if "a" sorts before "b", 0 to preserve order, and positive 281 284 * to sort "b" before "a". ··· 288 285 { 289 286 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 290 287 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 288 + struct vgic_sort_info *info = priv; 289 + struct kvm_vcpu *vcpu = info->vcpu; 291 290 bool penda, pendb; 292 291 int ret; 293 292 ··· 303 298 raw_spin_lock(&irqa->irq_lock); 304 299 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 305 300 306 - if (irqa->active || irqb->active) { 307 - ret = (int)irqb->active - (int)irqa->active; 301 + /* Undeliverable interrupts should be last */ 302 + ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu); 303 + if (ret) 308 304 goto out; 309 - } 310 305 311 - penda = irqa->enabled && irq_is_pending(irqa); 312 - pendb = irqb->enabled && irq_is_pending(irqb); 313 - 314 - if (!penda || !pendb) { 315 - ret = (int)pendb - (int)penda; 306 + /* Same thing for interrupts targeting a disabled group */ 307 + ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0); 308 + ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0); 309 + if (ret) 316 310 goto out; 317 - } 318 311 319 - /* Both pending and enabled, sort by priority */ 320 - ret = irqa->priority - irqb->priority; 312 + penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active; 313 + pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active; 314 + 315 + ret = (int)pendb - (int)penda; 316 + if (ret) 317 + goto out; 318 + 319 + /* Both pending and enabled, sort by priority (lower number first) */ 320 + ret = (int)irqa->priority - (int)irqb->priority; 321 + if (ret) 322 + goto out; 323 + 324 + /* Finally, HW bit active interrupts have priority over non-HW ones */ 325 + ret = (int)irqb->hw - (int)irqa->hw; 326 + 321 327 out: 322 328 raw_spin_unlock(&irqb->irq_lock); 323 329 raw_spin_unlock(&irqa->irq_lock); ··· 339 323 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 340 324 { 341 325 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 326 + struct vgic_sort_info info = { .vcpu = vcpu, }; 342 327 343 328 lockdep_assert_held(&vgic_cpu->ap_list_lock); 344 329 345 - list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 330 + vgic_get_vmcr(vcpu, &info.vmcr); 331 + list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp); 346 332 } 347 333 348 334 /* ··· 367 349 return false; 368 350 } 369 351 352 + static bool vgic_model_needs_bcst_kick(struct kvm *kvm) 353 + { 354 + /* 355 + * A GICv3 (or GICv3-like) system exposing a GICv3 to the guest 356 + * needs a broadcast kick to set TDIR globally. 357 + * 358 + * For systems that do not have TDIR (ARM's own v8.0 CPUs), the 359 + * shadow TDIR bit is always set, and so is the register's TC bit, 360 + * so no need to kick the CPUs. 361 + */ 362 + return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) && 363 + kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3); 364 + } 365 + 370 366 /* 371 367 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 372 368 * Do the queuing if necessary, taking the right locks in the right order. ··· 393 361 unsigned long flags) __releases(&irq->irq_lock) 394 362 { 395 363 struct kvm_vcpu *vcpu; 364 + bool bcast; 396 365 397 366 lockdep_assert_held(&irq->irq_lock); 398 367 ··· 468 435 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 469 436 irq->vcpu = vcpu; 470 437 438 + /* A new SPI may result in deactivation trapping on all vcpus */ 439 + bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) && 440 + vgic_valid_spi(vcpu->kvm, irq->intid) && 441 + atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0); 442 + 471 443 raw_spin_unlock(&irq->irq_lock); 472 444 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 473 445 474 - kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 475 - kvm_vcpu_kick(vcpu); 446 + if (!bcast) { 447 + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 448 + kvm_vcpu_kick(vcpu); 449 + } else { 450 + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING); 451 + } 476 452 477 453 return true; 478 454 } ··· 833 791 vgic_v3_clear_lr(vcpu, lr); 834 792 } 835 793 836 - static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 837 - { 838 - if (kvm_vgic_global_state.type == VGIC_V2) 839 - vgic_v2_set_underflow(vcpu); 840 - else 841 - vgic_v3_set_underflow(vcpu); 842 - } 843 - 844 - /* Requires the ap_list_lock to be held. */ 845 - static int compute_ap_list_depth(struct kvm_vcpu *vcpu, 846 - bool *multi_sgi) 794 + static void summarize_ap_list(struct kvm_vcpu *vcpu, 795 + struct ap_list_summary *als) 847 796 { 848 797 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 849 798 struct vgic_irq *irq; 850 - int count = 0; 851 - 852 - *multi_sgi = false; 853 799 854 800 lockdep_assert_held(&vgic_cpu->ap_list_lock); 855 801 802 + *als = (typeof(*als)){}; 803 + 856 804 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 857 - int w; 805 + guard(raw_spinlock)(&irq->irq_lock); 858 806 859 - raw_spin_lock(&irq->irq_lock); 860 - /* GICv2 SGIs can count for more than one... */ 861 - w = vgic_irq_get_lr_count(irq); 862 - raw_spin_unlock(&irq->irq_lock); 807 + if (unlikely(vgic_target_oracle(irq) != vcpu)) 808 + continue; 863 809 864 - count += w; 865 - *multi_sgi |= (w > 1); 810 + if (!irq->active) 811 + als->nr_pend++; 812 + else 813 + als->nr_act++; 814 + 815 + if (irq->intid < VGIC_NR_SGIS) 816 + als->nr_sgi++; 866 817 } 867 - return count; 868 818 } 869 819 870 - /* Requires the VCPU's ap_list_lock to be held. */ 820 + /* 821 + * Dealing with LR overflow is close to black magic -- dress accordingly. 822 + * 823 + * We have to present an almost infinite number of interrupts through a very 824 + * limited number of registers. Therefore crucial decisions must be made to 825 + * ensure we feed the most relevant interrupts into the LRs, and yet have 826 + * some facilities to let the guest interact with those that are not there. 827 + * 828 + * All considerations below are in the context of interrupts targeting a 829 + * single vcpu with non-idle state (either pending, active, or both), 830 + * colloquially called the ap_list: 831 + * 832 + * - Pending interrupts must have priority over active interrupts. This also 833 + * excludes pending+active interrupts. This ensures that a guest can 834 + * perform priority drops on any number of interrupts, and yet be 835 + * presented the next pending one. 836 + * 837 + * - Deactivation of interrupts outside of the LRs must be tracked by using 838 + * either the EOIcount-driven maintenance interrupt, and sometimes by 839 + * trapping the DIR register. 840 + * 841 + * - For EOImode=0, a non-zero EOIcount means walking the ap_list past the 842 + * point that made it into the LRs, and deactivate interrupts that would 843 + * have made it onto the LRs if we had the space. 844 + * 845 + * - The MI-generation bits must be used to try and force an exit when the 846 + * guest has done enough changes to the LRs that we want to reevaluate the 847 + * situation: 848 + * 849 + * - if the total number of pending interrupts exceeds the number of 850 + * LR, NPIE must be set in order to exit once no pending interrupts 851 + * are present in the LRs, allowing us to populate the next batch. 852 + * 853 + * - if there are active interrupts outside of the LRs, then LRENPIE 854 + * must be set so that we exit on deactivation of one of these, and 855 + * work out which one is to be deactivated. Note that this is not 856 + * enough to deal with EOImode=1, see below. 857 + * 858 + * - if the overall number of interrupts exceeds the number of LRs, 859 + * then UIE must be set to allow refilling of the LRs once the 860 + * majority of them has been processed. 861 + * 862 + * - as usual, MI triggers are only an optimisation, since we cannot 863 + * rely on the MI being delivered in timely manner... 864 + * 865 + * - EOImode=1 creates some additional problems: 866 + * 867 + * - deactivation can happen in any order, and we cannot rely on 868 + * EOImode=0's coupling of priority-drop and deactivation which 869 + * imposes strict reverse Ack order. This means that DIR must 870 + * trap if we have active interrupts outside of the LRs. 871 + * 872 + * - deactivation of SPIs can occur on any CPU, while the SPI is only 873 + * present in the ap_list of the CPU that actually ack-ed it. In that 874 + * case, EOIcount doesn't provide enough information, and we must 875 + * resort to trapping DIR even if we don't overflow the LRs. Bonus 876 + * point for not trapping DIR when no SPIs are pending or active in 877 + * the whole VM. 878 + * 879 + * - LPIs do not suffer the same problem as SPIs on deactivation, as we 880 + * have to essentially discard the active state, see below. 881 + * 882 + * - Virtual LPIs have an active state (surprise!), which gets removed on 883 + * priority drop (EOI). However, EOIcount doesn't get bumped when the LPI 884 + * is not present in the LR (surprise again!). Special care must therefore 885 + * be taken to remove the active state from any activated LPI when exiting 886 + * from the guest. This is in a way no different from what happens on the 887 + * physical side. We still rely on the running priority to have been 888 + * removed from the APRs, irrespective of the LPI being present in the LRs 889 + * or not. 890 + * 891 + * - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as 892 + * they are not managed in SW and don't have a true active state. So only 893 + * set vSGIEOICount when no SGIs are in the ap_list. 894 + * 895 + * - GICv2 SGIs with multiple sources are injected one source at a time, as 896 + * if they were made pending sequentially. This may mean that we don't 897 + * always present the HPPI if other interrupts with lower priority are 898 + * pending in the LRs. Big deal. 899 + */ 871 900 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 872 901 { 873 902 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 903 + struct ap_list_summary als; 874 904 struct vgic_irq *irq; 875 - int count; 876 - bool multi_sgi; 877 - u8 prio = 0xff; 878 - int i = 0; 905 + int count = 0; 879 906 880 907 lockdep_assert_held(&vgic_cpu->ap_list_lock); 881 908 882 - count = compute_ap_list_depth(vcpu, &multi_sgi); 883 - if (count > kvm_vgic_global_state.nr_lr || multi_sgi) 909 + summarize_ap_list(vcpu, &als); 910 + 911 + if (irqs_outside_lrs(&als)) 884 912 vgic_sort_ap_list(vcpu); 885 913 886 - count = 0; 887 - 888 914 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 889 - raw_spin_lock(&irq->irq_lock); 915 + scoped_guard(raw_spinlock, &irq->irq_lock) { 916 + if (likely(vgic_target_oracle(irq) == vcpu)) { 917 + vgic_populate_lr(vcpu, irq, count++); 918 + } 919 + } 890 920 891 - /* 892 - * If we have multi-SGIs in the pipeline, we need to 893 - * guarantee that they are all seen before any IRQ of 894 - * lower priority. In that case, we need to filter out 895 - * these interrupts by exiting early. This is easy as 896 - * the AP list has been sorted already. 897 - */ 898 - if (multi_sgi && irq->priority > prio) { 899 - raw_spin_unlock(&irq->irq_lock); 921 + if (count == kvm_vgic_global_state.nr_lr) 900 922 break; 901 - } 902 - 903 - if (likely(vgic_target_oracle(irq) == vcpu)) { 904 - vgic_populate_lr(vcpu, irq, count++); 905 - 906 - if (irq->source) 907 - prio = irq->priority; 908 - } 909 - 910 - raw_spin_unlock(&irq->irq_lock); 911 - 912 - if (count == kvm_vgic_global_state.nr_lr) { 913 - if (!list_is_last(&irq->ap_list, 914 - &vgic_cpu->ap_list_head)) 915 - vgic_set_underflow(vcpu); 916 - break; 917 - } 918 923 } 919 924 920 925 /* Nuke remaining LRs */ 921 - for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) 926 + for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++) 922 927 vgic_clear_lr(vcpu, i); 923 928 924 - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 929 + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 925 930 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; 926 - else 931 + vgic_v2_configure_hcr(vcpu, &als); 932 + } else { 927 933 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; 934 + vgic_v3_configure_hcr(vcpu, &als); 935 + } 928 936 } 929 937 930 938 static inline bool can_access_vgic_from_kernel(void) ··· 998 906 /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 999 907 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1000 908 { 1001 - int used_lrs; 1002 - 1003 909 /* If nesting, emulate the HW effect from L0 to L1 */ 1004 910 if (vgic_state_is_nested(vcpu)) { 1005 911 vgic_v3_sync_nested(vcpu); ··· 1007 917 if (vcpu_has_nv(vcpu)) 1008 918 vgic_v3_nested_update_mi(vcpu); 1009 919 1010 - /* An empty ap_list_head implies used_lrs == 0 */ 1011 - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 1012 - return; 1013 - 1014 920 if (can_access_vgic_from_kernel()) 1015 921 vgic_save_state(vcpu); 1016 922 1017 - if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1018 - used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 1019 - else 1020 - used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 1021 - 1022 - if (used_lrs) 1023 - vgic_fold_lr_state(vcpu); 923 + vgic_fold_lr_state(vcpu); 1024 924 vgic_prune_ap_list(vcpu); 925 + } 926 + 927 + /* Sync interrupts that were deactivated through a DIR trap */ 928 + void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu) 929 + { 930 + unsigned long flags; 931 + 932 + /* Make sure we're in the same context as LR handling */ 933 + local_irq_save(flags); 934 + vgic_prune_ap_list(vcpu); 935 + local_irq_restore(flags); 1025 936 } 1026 937 1027 938 static inline void vgic_restore_state(struct kvm_vcpu *vcpu) ··· 1049 958 * abort the entry procedure and inject the exception at the 1050 959 * beginning of the run loop. 1051 960 * 1052 - * - Otherwise, do exactly *NOTHING*. The guest state is 1053 - * already loaded, and we can carry on with running it. 961 + * - Otherwise, do exactly *NOTHING* apart from enabling the virtual 962 + * CPU interface. The guest state is already loaded, and we can 963 + * carry on with running it. 1054 964 * 1055 965 * If we have NV, but are not in a nested state, compute the 1056 966 * maintenance interrupt state, as it may fire. ··· 1060 968 if (kvm_vgic_vcpu_pending_irq(vcpu)) 1061 969 kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); 1062 970 971 + vgic_v3_flush_nested(vcpu); 1063 972 return; 1064 973 } 1065 974 1066 975 if (vcpu_has_nv(vcpu)) 1067 976 vgic_v3_nested_update_mi(vcpu); 1068 977 1069 - /* 1070 - * If there are no virtual interrupts active or pending for this 1071 - * VCPU, then there is no work to do and we can bail out without 1072 - * taking any lock. There is a potential race with someone injecting 1073 - * interrupts to the VCPU, but it is a benign race as the VCPU will 1074 - * either observe the new interrupt before or after doing this check, 1075 - * and introducing additional synchronization mechanism doesn't change 1076 - * this. 1077 - * 1078 - * Note that we still need to go through the whole thing if anything 1079 - * can be directly injected (GICv4). 1080 - */ 1081 - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && 1082 - !vgic_supports_direct_irqs(vcpu->kvm)) 1083 - return; 1084 - 1085 978 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 1086 979 1087 - if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { 1088 - raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 980 + scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock) 1089 981 vgic_flush_lr_state(vcpu); 1090 - raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 1091 - } 1092 982 1093 983 if (can_access_vgic_from_kernel()) 1094 984 vgic_restore_state(vcpu);
+39 -4
arch/arm64/kvm/vgic/vgic.h
··· 164 164 return ret; 165 165 } 166 166 167 + void kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, 168 + __le32 *origptr, __le32 *updptr, int nr_inst); 169 + 170 + static inline u64 vgic_ich_hcr_trap_bits(void) 171 + { 172 + u64 hcr; 173 + 174 + /* All the traps are in the bottom 16bits */ 175 + asm volatile(ALTERNATIVE_CB("movz %0, #0\n", 176 + ARM64_ALWAYS_SYSTEM, 177 + kvm_compute_ich_hcr_trap_bits) 178 + : "=r" (hcr)); 179 + 180 + return hcr; 181 + } 182 + 167 183 /* 168 184 * This struct provides an intermediate representation of the fields contained 169 185 * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC ··· 236 220 u32 event_id; 237 221 }; 238 222 223 + struct ap_list_summary { 224 + unsigned int nr_pend; /* purely pending, not active */ 225 + unsigned int nr_act; /* active, or active+pending */ 226 + unsigned int nr_sgi; /* any SGI */ 227 + }; 228 + 229 + #define irqs_outside_lrs(s) \ 230 + (((s)->nr_pend + (s)->nr_act) > kvm_vgic_global_state.nr_lr) 231 + 232 + #define irqs_pending_outside_lrs(s) \ 233 + ((s)->nr_pend > kvm_vgic_global_state.nr_lr) 234 + 235 + #define irqs_active_outside_lrs(s) \ 236 + ((s)->nr_act && irqs_outside_lrs(s)) 237 + 239 238 int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, 240 239 struct vgic_reg_attr *reg_attr); 241 240 int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, ··· 261 230 struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid); 262 231 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid); 263 232 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); 233 + struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq); 264 234 bool vgic_get_phys_line_level(struct vgic_irq *irq); 265 235 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); 266 236 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); ··· 277 245 278 246 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); 279 247 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 248 + void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val); 280 249 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); 281 - void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); 250 + void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); 282 251 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); 283 252 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, 284 253 int offset, u32 *val); ··· 287 254 int offset, u32 *val); 288 255 void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 289 256 void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 290 - void vgic_v2_enable(struct kvm_vcpu *vcpu); 257 + void vgic_v2_reset(struct kvm_vcpu *vcpu); 291 258 int vgic_v2_probe(const struct gic_kvm_info *info); 292 259 int vgic_v2_map_resources(struct kvm *kvm); 293 260 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, ··· 319 286 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); 320 287 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); 321 288 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); 322 - void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); 289 + void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val); 290 + void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als); 323 291 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 324 292 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); 325 - void vgic_v3_enable(struct kvm_vcpu *vcpu); 293 + void vgic_v3_reset(struct kvm_vcpu *vcpu); 326 294 int vgic_v3_probe(const struct gic_kvm_info *info); 327 295 int vgic_v3_map_resources(struct kvm *kvm); 328 296 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); ··· 446 412 return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP); 447 413 } 448 414 415 + void vgic_v3_flush_nested(struct kvm_vcpu *vcpu); 449 416 void vgic_v3_sync_nested(struct kvm_vcpu *vcpu); 450 417 void vgic_v3_load_nested(struct kvm_vcpu *vcpu); 451 418 void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
+1
arch/arm64/tools/cpucaps
··· 40 40 HAS_GICV5_LEGACY 41 41 HAS_GIC_PRIO_MASKING 42 42 HAS_GIC_PRIO_RELAXED_SYNC 43 + HAS_ICH_HCR_EL2_TDIR 43 44 HAS_HCR_NV1 44 45 HAS_HCX 45 46 HAS_LDAPR
+5 -2
drivers/irqchip/irq-apple-aic.c
··· 411 411 if (is_kernel_in_hyp_mode() && 412 412 (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && 413 413 read_sysreg_s(SYS_ICH_MISR_EL2) != 0) { 414 + u64 val; 415 + 414 416 generic_handle_domain_irq(aic_irqc->hw_domain, 415 417 AIC_FIQ_HWIRQ(AIC_VGIC_MI)); 416 418 417 419 if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && 418 - read_sysreg_s(SYS_ICH_MISR_EL2))) { 419 - pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n"); 420 + (val = read_sysreg_s(SYS_ICH_MISR_EL2)))) { 421 + pr_err_ratelimited("vGIC IRQ fired and not handled by KVM (MISR=%llx), disabling.\n", 422 + val); 420 423 sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); 421 424 } 422 425 }
+3
drivers/irqchip/irq-gic.c
··· 1459 1459 if (ret) 1460 1460 return; 1461 1461 1462 + gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base; 1463 + 1462 1464 if (static_branch_likely(&supports_deactivate_key)) 1463 1465 vgic_set_kvm_info(&gic_v2_kvm_info); 1464 1466 } ··· 1622 1620 return; 1623 1621 1624 1622 gic_v2_kvm_info.maint_irq = irq; 1623 + gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base; 1625 1624 1626 1625 vgic_set_kvm_info(&gic_v2_kvm_info); 1627 1626 }
+19 -10
include/kvm/arm_vgic.h
··· 59 59 /* virtual control interface mapping, HYP VA */ 60 60 void __iomem *vctrl_hyp; 61 61 62 + /* Physical CPU interface, kernel VA */ 63 + void __iomem *gicc_base; 64 + 62 65 /* Number of implemented list registers */ 63 66 int nr_lr; 64 67 ··· 123 120 124 121 struct vgic_irq { 125 122 raw_spinlock_t irq_lock; /* Protects the content of the struct */ 123 + u32 intid; /* Guest visible INTID */ 126 124 struct rcu_head rcu; 127 125 struct list_head ap_list; 128 126 ··· 138 134 * affinity reg (v3). 139 135 */ 140 136 141 - u32 intid; /* Guest visible INTID */ 142 - bool line_level; /* Level only */ 143 - bool pending_latch; /* The pending latch state used to calculate 144 - * the pending state for both level 145 - * and edge triggered IRQs. */ 146 - bool active; 147 - bool pending_release; /* Used for LPIs only, unreferenced IRQ 137 + bool pending_release:1; /* Used for LPIs only, unreferenced IRQ 148 138 * pending a release */ 149 139 150 - bool enabled; 151 - bool hw; /* Tied to HW IRQ */ 140 + bool pending_latch:1; /* The pending latch state used to calculate 141 + * the pending state for both level 142 + * and edge triggered IRQs. */ 143 + enum vgic_irq_config config:1; /* Level or edge */ 144 + bool line_level:1; /* Level only */ 145 + bool enabled:1; 146 + bool active:1; 147 + bool hw:1; /* Tied to HW IRQ */ 148 + bool on_lr:1; /* Present in a CPU LR */ 152 149 refcount_t refcount; /* Used for LPIs */ 153 150 u32 hwintid; /* HW INTID number */ 154 151 unsigned int host_irq; /* linux irq corresponding to hwintid */ ··· 161 156 u8 active_source; /* GICv2 SGIs only */ 162 157 u8 priority; 163 158 u8 group; /* 0 == group 0, 1 == group 1 */ 164 - enum vgic_irq_config config; /* Level or edge */ 165 159 166 160 struct irq_ops *ops; 167 161 ··· 263 259 /* The GIC maintenance IRQ for nested hypervisors. */ 264 260 u32 mi_intid; 265 261 262 + /* Track the number of in-flight active SPIs */ 263 + atomic_t active_spis; 264 + 266 265 /* base addresses in guest physical address space: */ 267 266 gpa_t vgic_dist_base; /* distributor */ 268 267 union { ··· 287 280 struct vgic_irq *spis; 288 281 289 282 struct vgic_io_device dist_iodev; 283 + struct vgic_io_device cpuif_iodev; 290 284 291 285 bool has_its; 292 286 bool table_write_in_progress; ··· 425 417 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 426 418 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 427 419 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); 420 + void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu); 428 421 429 422 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); 430 423
+6
include/linux/irqchip/arm-gic.h
··· 86 86 87 87 #define GICH_HCR_EN (1 << 0) 88 88 #define GICH_HCR_UIE (1 << 1) 89 + #define GICH_HCR_LRENPIE (1 << 2) 89 90 #define GICH_HCR_NPIE (1 << 3) 91 + #define GICH_HCR_VGrp0EIE (1 << 4) 92 + #define GICH_HCR_VGrp0DIE (1 << 5) 93 + #define GICH_HCR_VGrp1EIE (1 << 6) 94 + #define GICH_HCR_VGrp1DIE (1 << 7) 95 + #define GICH_HCR_EOICOUNT GENMASK(31, 27) 90 96 91 97 #define GICH_LR_VIRTUALID (0x3ff << 0) 92 98 #define GICH_LR_PHYSID_CPUID_SHIFT (10)
+2
include/linux/irqchip/arm-vgic-info.h
··· 24 24 enum gic_type type; 25 25 /* Virtual CPU interface */ 26 26 struct resource vcpu; 27 + /* GICv2 GICC VA */ 28 + void __iomem *gicc_base; 27 29 /* Interrupt number */ 28 30 unsigned int maint_irq; 29 31 /* No interrupt mask, no need to use the above field */
+263 -22
tools/testing/selftests/kvm/arm64/vgic_irq.c
··· 29 29 bool level_sensitive; /* 1 is level, 0 is edge */ 30 30 int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ 31 31 bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ 32 + uint32_t shared_data; 32 33 }; 33 34 34 35 /* ··· 206 205 do { \ 207 206 uint32_t _intid; \ 208 207 _intid = gic_get_and_ack_irq(); \ 209 - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ 208 + GUEST_ASSERT(_intid == IAR_SPURIOUS); \ 210 209 } while (0) 211 210 212 211 #define CAT_HELPER(a, b) a ## b ··· 360 359 * interrupts for the whole test. 361 360 */ 362 361 static void test_inject_preemption(struct test_args *args, 363 - uint32_t first_intid, int num, 364 - kvm_inject_cmd cmd) 362 + uint32_t first_intid, int num, 363 + const unsigned long *exclude, 364 + kvm_inject_cmd cmd) 365 365 { 366 366 uint32_t intid, prio, step = KVM_PRIO_STEPS; 367 367 int i; ··· 381 379 for (i = 0; i < num; i++) { 382 380 uint32_t tmp; 383 381 intid = i + first_intid; 382 + 383 + if (exclude && test_bit(i, exclude)) 384 + continue; 385 + 384 386 KVM_INJECT(cmd, intid); 385 387 /* Each successive IRQ will preempt the previous one. */ 386 388 tmp = wait_for_and_activate_irq(); ··· 396 390 /* finish handling the IRQs starting with the highest priority one. */ 397 391 for (i = 0; i < num; i++) { 398 392 intid = num - i - 1 + first_intid; 393 + 394 + if (exclude && test_bit(intid - first_intid, exclude)) 395 + continue; 396 + 399 397 gic_set_eoi(intid); 400 - if (args->eoi_split) 401 - gic_set_dir(intid); 398 + } 399 + 400 + if (args->eoi_split) { 401 + for (i = 0; i < num; i++) { 402 + intid = i + first_intid; 403 + 404 + if (exclude && test_bit(i, exclude)) 405 + continue; 406 + 407 + if (args->eoi_split) 408 + gic_set_dir(intid); 409 + } 402 410 } 403 411 404 412 local_irq_enable(); 405 413 406 - for (i = 0; i < num; i++) 414 + for (i = 0; i < num; i++) { 415 + if (exclude && test_bit(i, exclude)) 416 + continue; 417 + 407 418 GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); 419 + } 408 420 GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); 409 421 GUEST_ASSERT_IAR_EMPTY(); 410 422 ··· 460 436 461 437 static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) 462 438 { 463 - /* 464 - * Test up to 4 levels of preemption. The reason is that KVM doesn't 465 - * currently implement the ability to have more than the number-of-LRs 466 - * number of concurrently active IRQs. The number of LRs implemented is 467 - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. 468 - */ 439 + /* Timer PPIs cannot be injected from userspace */ 440 + static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) | 441 + BIT(30 - MIN_PPI) | 442 + BIT(28 - MIN_PPI) | 443 + BIT(26 - MIN_PPI)); 444 + 469 445 if (f->sgi) 470 - test_inject_preemption(args, MIN_SGI, 4, f->cmd); 446 + test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd); 471 447 472 448 if (f->ppi) 473 - test_inject_preemption(args, MIN_PPI, 4, f->cmd); 449 + test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd); 474 450 475 451 if (f->spi) 476 - test_inject_preemption(args, MIN_SPI, 4, f->cmd); 452 + test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd); 477 453 } 478 454 479 455 static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) 480 456 { 481 - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ 482 457 if (f->sgi) 483 - guest_restore_active(args, MIN_SGI, 4, f->cmd); 458 + guest_restore_active(args, MIN_SGI, 16, f->cmd); 484 459 485 460 if (f->ppi) 486 - guest_restore_active(args, MIN_PPI, 4, f->cmd); 461 + guest_restore_active(args, MIN_PPI, 16, f->cmd); 487 462 488 463 if (f->spi) 489 - guest_restore_active(args, MIN_SPI, 4, f->cmd); 464 + guest_restore_active(args, MIN_SPI, 31, f->cmd); 490 465 } 491 466 492 467 static void guest_code(struct test_args *args) ··· 496 473 497 474 gic_init(GIC_V3, 1); 498 475 499 - for (i = 0; i < nr_irqs; i++) 500 - gic_irq_enable(i); 501 - 502 476 for (i = MIN_SPI; i < nr_irqs; i++) 503 477 gic_irq_set_config(i, !level_sensitive); 478 + 479 + for (i = 0; i < nr_irqs; i++) 480 + gic_irq_enable(i); 504 481 505 482 gic_set_eoi_split(args->eoi_split); 506 483 ··· 802 779 kvm_vm_free(vm); 803 780 } 804 781 782 + static void guest_code_asym_dir(struct test_args *args, int cpuid) 783 + { 784 + gic_init(GIC_V3, 2); 785 + 786 + gic_set_eoi_split(1); 787 + gic_set_priority_mask(CPU_PRIO_MASK); 788 + 789 + if (cpuid == 0) { 790 + uint32_t intid; 791 + 792 + local_irq_disable(); 793 + 794 + gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO); 795 + gic_irq_enable(MIN_SPI); 796 + gic_irq_set_pending(MIN_SPI); 797 + 798 + intid = wait_for_and_activate_irq(); 799 + GUEST_ASSERT_EQ(intid, MIN_SPI); 800 + 801 + gic_set_eoi(intid); 802 + isb(); 803 + 804 + WRITE_ONCE(args->shared_data, MIN_SPI); 805 + dsb(ishst); 806 + 807 + do { 808 + dsb(ishld); 809 + } while (READ_ONCE(args->shared_data) == MIN_SPI); 810 + GUEST_ASSERT(!gic_irq_get_active(MIN_SPI)); 811 + } else { 812 + do { 813 + dsb(ishld); 814 + } while (READ_ONCE(args->shared_data) != MIN_SPI); 815 + 816 + gic_set_dir(MIN_SPI); 817 + isb(); 818 + 819 + WRITE_ONCE(args->shared_data, 0); 820 + dsb(ishst); 821 + } 822 + 823 + GUEST_DONE(); 824 + } 825 + 826 + static void guest_code_group_en(struct test_args *args, int cpuid) 827 + { 828 + uint32_t intid; 829 + 830 + gic_init(GIC_V3, 2); 831 + 832 + gic_set_eoi_split(0); 833 + gic_set_priority_mask(CPU_PRIO_MASK); 834 + /* SGI0 is G0, which is disabled */ 835 + gic_irq_set_group(0, 0); 836 + 837 + /* Configure all SGIs with decreasing priority */ 838 + for (intid = 0; intid < MIN_PPI; intid++) { 839 + gic_set_priority(intid, (intid + 1) * 8); 840 + gic_irq_enable(intid); 841 + gic_irq_set_pending(intid); 842 + } 843 + 844 + /* Ack and EOI all G1 interrupts */ 845 + for (int i = 1; i < MIN_PPI; i++) { 846 + intid = wait_for_and_activate_irq(); 847 + 848 + GUEST_ASSERT(intid < MIN_PPI); 849 + gic_set_eoi(intid); 850 + isb(); 851 + } 852 + 853 + /* 854 + * Check that SGI0 is still pending, inactive, and that we cannot 855 + * ack anything. 856 + */ 857 + GUEST_ASSERT(gic_irq_get_pending(0)); 858 + GUEST_ASSERT(!gic_irq_get_active(0)); 859 + GUEST_ASSERT_IAR_EMPTY(); 860 + GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS); 861 + 862 + /* Open the G0 gates, and verify we can ack SGI0 */ 863 + write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1); 864 + isb(); 865 + 866 + do { 867 + intid = read_sysreg_s(SYS_ICC_IAR0_EL1); 868 + } while (intid == IAR_SPURIOUS); 869 + 870 + GUEST_ASSERT(intid == 0); 871 + GUEST_DONE(); 872 + } 873 + 874 + static void guest_code_timer_spi(struct test_args *args, int cpuid) 875 + { 876 + uint32_t intid; 877 + u64 val; 878 + 879 + gic_init(GIC_V3, 2); 880 + 881 + gic_set_eoi_split(1); 882 + gic_set_priority_mask(CPU_PRIO_MASK); 883 + 884 + /* Add a pending SPI so that KVM starts trapping DIR */ 885 + gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO); 886 + gic_irq_set_pending(MIN_SPI + cpuid); 887 + 888 + /* Configure the timer with a higher priority, make it pending */ 889 + gic_set_priority(27, IRQ_DEFAULT_PRIO - 8); 890 + 891 + isb(); 892 + val = read_sysreg(cntvct_el0); 893 + write_sysreg(val, cntv_cval_el0); 894 + write_sysreg(1, cntv_ctl_el0); 895 + isb(); 896 + 897 + GUEST_ASSERT(gic_irq_get_pending(27)); 898 + 899 + /* Enable both interrupts */ 900 + gic_irq_enable(MIN_SPI + cpuid); 901 + gic_irq_enable(27); 902 + 903 + /* The timer must fire */ 904 + intid = wait_for_and_activate_irq(); 905 + GUEST_ASSERT(intid == 27); 906 + 907 + /* Check that we can deassert it */ 908 + write_sysreg(0, cntv_ctl_el0); 909 + isb(); 910 + 911 + GUEST_ASSERT(!gic_irq_get_pending(27)); 912 + 913 + /* 914 + * Priority drop, deactivation -- we expect that the host 915 + * deactivation will have been effective 916 + */ 917 + gic_set_eoi(27); 918 + gic_set_dir(27); 919 + 920 + GUEST_ASSERT(!gic_irq_get_active(27)); 921 + 922 + /* Do it one more time */ 923 + isb(); 924 + val = read_sysreg(cntvct_el0); 925 + write_sysreg(val, cntv_cval_el0); 926 + write_sysreg(1, cntv_ctl_el0); 927 + isb(); 928 + 929 + GUEST_ASSERT(gic_irq_get_pending(27)); 930 + 931 + /* The timer must fire again */ 932 + intid = wait_for_and_activate_irq(); 933 + GUEST_ASSERT(intid == 27); 934 + 935 + GUEST_DONE(); 936 + } 937 + 938 + static void *test_vcpu_run(void *arg) 939 + { 940 + struct kvm_vcpu *vcpu = arg; 941 + struct ucall uc; 942 + 943 + while (1) { 944 + vcpu_run(vcpu); 945 + 946 + switch (get_ucall(vcpu, &uc)) { 947 + case UCALL_ABORT: 948 + REPORT_GUEST_ASSERT(uc); 949 + break; 950 + case UCALL_DONE: 951 + return NULL; 952 + default: 953 + TEST_FAIL("Unknown ucall %lu", uc.cmd); 954 + } 955 + } 956 + 957 + return NULL; 958 + } 959 + 960 + static void test_vgic_two_cpus(void *gcode) 961 + { 962 + pthread_t thr[2]; 963 + struct kvm_vcpu *vcpus[2]; 964 + struct test_args args = {}; 965 + struct kvm_vm *vm; 966 + vm_vaddr_t args_gva; 967 + int gic_fd, ret; 968 + 969 + vm = vm_create_with_vcpus(2, gcode, vcpus); 970 + 971 + vm_init_descriptor_tables(vm); 972 + vcpu_init_descriptor_tables(vcpus[0]); 973 + vcpu_init_descriptor_tables(vcpus[1]); 974 + 975 + /* Setup the guest args page (so it gets the args). */ 976 + args_gva = vm_vaddr_alloc_page(vm); 977 + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); 978 + vcpu_args_set(vcpus[0], 2, args_gva, 0); 979 + vcpu_args_set(vcpus[1], 2, args_gva, 1); 980 + 981 + gic_fd = vgic_v3_setup(vm, 2, 64); 982 + 983 + ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]); 984 + if (ret) 985 + TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret); 986 + ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]); 987 + if (ret) 988 + TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret); 989 + 990 + pthread_join(thr[0], NULL); 991 + pthread_join(thr[1], NULL); 992 + 993 + close(gic_fd); 994 + kvm_vm_free(vm); 995 + } 996 + 805 997 static void help(const char *name) 806 998 { 807 999 printf( ··· 1073 835 test_vgic(nr_irqs, false /* level */, true /* eoi_split */); 1074 836 test_vgic(nr_irqs, true /* level */, false /* eoi_split */); 1075 837 test_vgic(nr_irqs, true /* level */, true /* eoi_split */); 838 + test_vgic_two_cpus(guest_code_asym_dir); 839 + test_vgic_two_cpus(guest_code_group_en); 840 + test_vgic_two_cpus(guest_code_timer_spi); 1076 841 } else { 1077 842 test_vgic(nr_irqs, level_sensitive, eoi_split); 1078 843 }
+1
tools/testing/selftests/kvm/include/arm64/gic.h
··· 57 57 void gic_irq_clear_pending(unsigned int intid); 58 58 bool gic_irq_get_pending(unsigned int intid); 59 59 void gic_irq_set_config(unsigned int intid, bool is_edge); 60 + void gic_irq_set_group(unsigned int intid, bool group); 60 61 61 62 void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, 62 63 vm_paddr_t pend_table);
+6
tools/testing/selftests/kvm/lib/arm64/gic.c
··· 155 155 GUEST_ASSERT(gic_common_ops); 156 156 gic_common_ops->gic_irq_set_config(intid, is_edge); 157 157 } 158 + 159 + void gic_irq_set_group(unsigned int intid, bool group) 160 + { 161 + GUEST_ASSERT(gic_common_ops); 162 + gic_common_ops->gic_irq_set_group(intid, group); 163 + }
+1
tools/testing/selftests/kvm/lib/arm64/gic_private.h
··· 25 25 void (*gic_irq_clear_pending)(uint32_t intid); 26 26 bool (*gic_irq_get_pending)(uint32_t intid); 27 27 void (*gic_irq_set_config)(uint32_t intid, bool is_edge); 28 + void (*gic_irq_set_group)(uint32_t intid, bool group); 28 29 }; 29 30 30 31 extern const struct gic_common_ops gicv3_ops;
+17
tools/testing/selftests/kvm/lib/arm64/gic_v3.c
··· 293 293 } 294 294 } 295 295 296 + static void gicv3_set_group(uint32_t intid, bool grp) 297 + { 298 + uint32_t cpu_or_dist; 299 + uint32_t val; 300 + 301 + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); 302 + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); 303 + if (grp) 304 + val |= BIT(intid % 32); 305 + else 306 + val &= ~BIT(intid % 32); 307 + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); 308 + } 309 + 296 310 static void gicv3_cpu_init(unsigned int cpu) 297 311 { 298 312 volatile void *sgi_base; ··· 347 333 /* Set a default priority threshold */ 348 334 write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); 349 335 336 + /* Disable Group-0 interrupts */ 337 + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); 350 338 /* Enable non-secure Group-1 interrupts */ 351 339 write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); 352 340 } ··· 421 405 .gic_irq_clear_pending = gicv3_irq_clear_pending, 422 406 .gic_irq_get_pending = gicv3_irq_get_pending, 423 407 .gic_irq_set_config = gicv3_irq_set_config, 408 + .gic_irq_set_group = gicv3_set_group, 424 409 }; 425 410 426 411 void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,