Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV P9: Remove subcore HMI handling

On POWER9 and newer, rather than the complex HMI synchronisation and
subcore state, have each thread un-apply the guest TB offset before
calling into the early HMI handler.

This allows the subcore state to be avoided, including subcore enter
/ exit guest, which includes an expensive divide that shows up
slightly in profiles.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211123095231.1036501-54-npiggin@gmail.com

authored by

Nicholas Piggin and committed by
Michael Ellerman
9c5a432a 6398326b

+67 -9
+1
arch/powerpc/include/asm/kvm_ppc.h
··· 759 759 void kvmppc_subcore_enter_guest(void); 760 760 void kvmppc_subcore_exit_guest(void); 761 761 long kvmppc_realmode_hmi_handler(void); 762 + long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu); 762 763 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 763 764 long pte_index, unsigned long pteh, unsigned long ptel); 764 765 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+5 -7
arch/powerpc/kvm/book3s_hv.c
··· 4033 4033 4034 4034 vcpu->arch.ceded = 0; 4035 4035 4036 - kvmppc_subcore_enter_guest(); 4037 - 4038 4036 vcpu_vpa_increment_dispatch(vcpu); 4039 4037 4040 4038 if (kvmhv_on_pseries()) { ··· 4084 4086 } 4085 4087 4086 4088 vcpu_vpa_increment_dispatch(vcpu); 4087 - 4088 - kvmppc_subcore_exit_guest(); 4089 4089 4090 4090 return trap; 4091 4091 } ··· 6098 6102 if (r) 6099 6103 return r; 6100 6104 6101 - r = kvm_init_subcore_bitmap(); 6102 - if (r) 6103 - return r; 6105 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 6106 + r = kvm_init_subcore_bitmap(); 6107 + if (r) 6108 + return r; 6109 + } 6104 6110 6105 6111 /* 6106 6112 * We need a way of accessing the XICS interrupt controller,
+6 -1
arch/powerpc/kvm/book3s_hv_hmi.c
··· 20 20 21 21 /* 22 22 * NULL bitmap pointer indicates that KVM module hasn't 23 - * been loaded yet and hence no guests are running. 23 + * been loaded yet and hence no guests are running, or running 24 + * on POWER9 or newer CPU. 25 + * 24 26 * If no KVM is in use, no need to co-ordinate among threads 25 27 * as all of them will always be in host and no one is going 26 28 * to modify TB other than the opal hmi handler. 29 + * 30 + * POWER9 and newer don't need this synchronisation. 31 + * 27 32 * Hence, just return from here. 28 33 */ 29 34 if (!local_paca->sibling_subcore_state)
+1 -1
arch/powerpc/kvm/book3s_hv_p9_entry.c
··· 1013 1013 kvmppc_realmode_machine_check(vcpu); 1014 1014 1015 1015 } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { 1016 - kvmppc_realmode_hmi_handler(); 1016 + kvmppc_p9_realmode_hmi_handler(vcpu); 1017 1017 1018 1018 } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { 1019 1019 vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+54
arch/powerpc/kvm/book3s_hv_ras.c
··· 136 136 vcpu->arch.mce_evt = mce_evt; 137 137 } 138 138 139 + 140 + long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) 141 + { 142 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 143 + long ret = 0; 144 + 145 + /* 146 + * Unapply and clear the offset first. That way, if the TB was not 147 + * resynced then it will remain in host-offset, and if it was resynced 148 + * then it is brought into host-offset. Then the tb offset is 149 + * re-applied before continuing with the KVM exit. 150 + * 151 + * This way, we don't need to actually know whether not OPAL resynced 152 + * the timebase or do any of the complicated dance that the P7/8 153 + * path requires. 154 + */ 155 + if (vc->tb_offset_applied) { 156 + u64 new_tb = mftb() - vc->tb_offset_applied; 157 + mtspr(SPRN_TBU40, new_tb); 158 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 159 + new_tb += 0x1000000; 160 + mtspr(SPRN_TBU40, new_tb); 161 + } 162 + vc->tb_offset_applied = 0; 163 + } 164 + 165 + local_paca->hmi_irqs++; 166 + 167 + if (hmi_handle_debugtrig(NULL) >= 0) { 168 + ret = 1; 169 + goto out; 170 + } 171 + 172 + if (ppc_md.hmi_exception_early) 173 + ppc_md.hmi_exception_early(NULL); 174 + 175 + out: 176 + if (vc->tb_offset) { 177 + u64 new_tb = mftb() + vc->tb_offset; 178 + mtspr(SPRN_TBU40, new_tb); 179 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 180 + new_tb += 0x1000000; 181 + mtspr(SPRN_TBU40, new_tb); 182 + } 183 + vc->tb_offset_applied = vc->tb_offset; 184 + } 185 + 186 + return ret; 187 + } 188 + 189 + /* 190 + * The following subcore HMI handling is all only for pre-POWER9 CPUs. 191 + */ 192 + 139 193 /* Check if dynamic split is in force and return subcore size accordingly. */ 140 194 static inline int kvmppc_cur_subcore_size(void) 141 195 {