Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S: Simplify external interrupt handling

Currently we use two bits in the vcpu pending_exceptions bitmap to
indicate that an external interrupt is pending for the guest, one
for "one-shot" interrupts that are cleared when delivered, and one
for interrupts that persist until cleared by an explicit action of
the OS (e.g. an acknowledge to an interrupt controller). The
BOOK3S_IRQPRIO_EXTERNAL bit is used for one-shot interrupt requests
and BOOK3S_IRQPRIO_EXTERNAL_LEVEL is used for persisting interrupts.

In practice BOOK3S_IRQPRIO_EXTERNAL never gets used, because our
Book3S platforms generally, and pseries in particular, expect
external interrupt requests to persist until they are acknowledged
at the interrupt controller. That combined with the confusion
introduced by having two bits for what is essentially the same thing
makes it attractive to simplify things by only using one bit. This
patch does that.

With this patch there is only BOOK3S_IRQPRIO_EXTERNAL, and by default
it has the semantics of a persisting interrupt. In order to avoid
breaking the ABI, we introduce a new "external_oneshot" flag which
preserves the behaviour of the KVM_INTERRUPT ioctl with the
KVM_INTERRUPT_SET argument.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Paul Mackerras and committed by
Michael Ellerman
d24ea8a7 e7b17d50

+44 -29
+1 -3
arch/powerpc/include/asm/kvm_asm.h
··· 84 84 #define BOOK3S_INTERRUPT_INST_STORAGE 0x400 85 85 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 86 86 #define BOOK3S_INTERRUPT_EXTERNAL 0x500 87 - #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501 88 87 #define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 89 88 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600 90 89 #define BOOK3S_INTERRUPT_PROGRAM 0x700 ··· 133 134 #define BOOK3S_IRQPRIO_EXTERNAL 14 134 135 #define BOOK3S_IRQPRIO_DECREMENTER 15 135 136 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16 136 - #define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17 137 - #define BOOK3S_IRQPRIO_MAX 18 137 + #define BOOK3S_IRQPRIO_MAX 17 138 138 139 139 #define BOOK3S_HFLAG_DCBZ32 0x1 140 140 #define BOOK3S_HFLAG_SLB 0x2
+1
arch/powerpc/include/asm/kvm_host.h
··· 707 707 u8 hcall_needed; 708 708 u8 epr_flags; /* KVMPPC_EPR_xxx */ 709 709 u8 epr_needed; 710 + u8 external_oneshot; /* clear external irq after delivery */ 710 711 711 712 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 712 713
+33 -10
arch/powerpc/kvm/book3s.c
··· 150 150 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; 151 151 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; 152 152 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; 153 - case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break; 154 153 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; 155 154 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; 156 155 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; ··· 235 236 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 236 237 struct kvm_interrupt *irq) 237 238 { 238 - unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; 239 + /* 240 + * This case (KVM_INTERRUPT_SET) should never actually arise for 241 + * a pseries guest (because pseries guests expect their interrupt 242 + * controllers to continue asserting an external interrupt request 243 + * until it is acknowledged at the interrupt controller), but is 244 + * included to avoid ABI breakage and potentially for other 245 + * sorts of guest. 246 + * 247 + * There is a subtlety here: HV KVM does not test the 248 + * external_oneshot flag in the code that synthesizes 249 + * external interrupts for the guest just before entering 250 + * the guest. That is OK even if userspace did do a 251 + * KVM_INTERRUPT_SET on a pseries guest vcpu, because the 252 + * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick() 253 + * which ends up doing a smp_send_reschedule(), which will 254 + * pull the guest all the way out to the host, meaning that 255 + * we will call kvmppc_core_prepare_to_enter() before entering 256 + * the guest again, and that will handle the external_oneshot 257 + * flag correctly. 258 + */ 259 + if (irq->irq == KVM_INTERRUPT_SET) 260 + vcpu->arch.external_oneshot = 1; 239 261 240 - if (irq->irq == KVM_INTERRUPT_SET_LEVEL) 241 - vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; 242 - 243 - kvmppc_book3s_queue_irqprio(vcpu, vec); 262 + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 244 263 } 245 264 246 265 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) 247 266 { 248 267 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 249 - kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 250 268 } 251 269 252 270 void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, ··· 294 278 vec = BOOK3S_INTERRUPT_DECREMENTER; 295 279 break; 296 280 case BOOK3S_IRQPRIO_EXTERNAL: 297 - case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: 298 281 deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; 299 282 vec = BOOK3S_INTERRUPT_EXTERNAL; 300 283 break; ··· 367 352 case BOOK3S_IRQPRIO_DECREMENTER: 368 353 /* DEC interrupts get cleared by mtdec */ 369 354 return false; 370 - case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: 371 - /* External interrupts get cleared by userspace */ 355 + case BOOK3S_IRQPRIO_EXTERNAL: 356 + /* 357 + * External interrupts get cleared by userspace 358 + * except when set by the KVM_INTERRUPT ioctl with 359 + * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL). 360 + */ 361 + if (vcpu->arch.external_oneshot) { 362 + vcpu->arch.external_oneshot = 0; 363 + return true; 364 + } 372 365 return false; 373 366 } 374 367
+2 -3
arch/powerpc/kvm/book3s_hv_rm_xics.c
··· 136 136 137 137 /* Mark the target VCPU as having an interrupt pending */ 138 138 vcpu->stat.queue_intr++; 139 - set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); 139 + set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); 140 140 141 141 /* Kick self ? Just set MER and return */ 142 142 if (vcpu == this_vcpu) { ··· 170 170 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) 171 171 { 172 172 /* Note: Only called on self ! */ 173 - clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 174 - &vcpu->arch.pending_exceptions); 173 + clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); 175 174 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); 176 175 } 177 176
+2 -2
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 1122 1122 1123 1123 /* Check if we can deliver an external or decrementer interrupt now */ 1124 1124 ld r0, VCPU_PENDING_EXC(r4) 1125 - rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 1125 + rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL, 63 1126 1126 cmpdi cr1, r0, 0 1127 1127 andi. r8, r11, MSR_EE 1128 1128 mfspr r8, SPRN_LPCR 1129 - /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ 1129 + /* Insert EXTERNAL bit into LPCR at the MER bit position */ 1130 1130 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH 1131 1131 mtspr SPRN_LPCR, r8 1132 1132 isync
-1
arch/powerpc/kvm/book3s_pr.c
··· 1246 1246 r = RESUME_GUEST; 1247 1247 break; 1248 1248 case BOOK3S_INTERRUPT_EXTERNAL: 1249 - case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: 1250 1249 case BOOK3S_INTERRUPT_EXTERNAL_HV: 1251 1250 case BOOK3S_INTERRUPT_H_VIRT: 1252 1251 vcpu->stat.ext_intr_exits++;
+4 -7
arch/powerpc/kvm/book3s_xics.c
··· 310 310 */ 311 311 if (new.out_ee) { 312 312 kvmppc_book3s_queue_irqprio(icp->vcpu, 313 - BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 313 + BOOK3S_INTERRUPT_EXTERNAL); 314 314 if (!change_self) 315 315 kvmppc_fast_vcpu_kick(icp->vcpu); 316 316 } ··· 593 593 u32 xirr; 594 594 595 595 /* First, remove EE from the processor */ 596 - kvmppc_book3s_dequeue_irqprio(icp->vcpu, 597 - BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 596 + kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); 598 597 599 598 /* 600 599 * ICP State: Accept_Interrupt ··· 753 754 * We can remove EE from the current processor, the update 754 755 * transaction will set it again if needed 755 756 */ 756 - kvmppc_book3s_dequeue_irqprio(icp->vcpu, 757 - BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 757 + kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); 758 758 759 759 do { 760 760 old_state = new_state = READ_ONCE(icp->state); ··· 1165 1167 * Deassert the CPU interrupt request. 1166 1168 * icp_try_update will reassert it if necessary. 1167 1169 */ 1168 - kvmppc_book3s_dequeue_irqprio(icp->vcpu, 1169 - BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 1170 + kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); 1170 1171 1171 1172 /* 1172 1173 * Note that if we displace an interrupt from old_state.xisr,
+1 -1
arch/powerpc/kvm/book3s_xive_template.c
··· 285 285 * set by pull or an escalation interrupts). 286 286 */ 287 287 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions)) 288 - clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 288 + clear_bit(BOOK3S_IRQPRIO_EXTERNAL, 289 289 &vcpu->arch.pending_exceptions); 290 290 291 291 pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
-1
arch/powerpc/kvm/trace_book3s.h
··· 14 14 {0x400, "INST_STORAGE"}, \ 15 15 {0x480, "INST_SEGMENT"}, \ 16 16 {0x500, "EXTERNAL"}, \ 17 - {0x501, "EXTERNAL_LEVEL"}, \ 18 17 {0x502, "EXTERNAL_HV"}, \ 19 18 {0x600, "ALIGNMENT"}, \ 20 19 {0x700, "PROGRAM"}, \
-1
tools/perf/arch/powerpc/util/book3s_hv_exits.h
··· 15 15 {0x400, "INST_STORAGE"}, \ 16 16 {0x480, "INST_SEGMENT"}, \ 17 17 {0x500, "EXTERNAL"}, \ 18 - {0x501, "EXTERNAL_LEVEL"}, \ 19 18 {0x502, "EXTERNAL_HV"}, \ 20 19 {0x600, "ALIGNMENT"}, \ 21 20 {0x700, "PROGRAM"}, \