Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+3

Documentation/powerpc/ultravisor.rst

··· 895 895 One of the following values: 896 896 897 897 * H_SUCCESS on success. 898 + * H_STATE if the VM is not in a position to switch to secure. 898 899 899 900 Description 900 901 ~~~~~~~~~~~ ··· 934 933 * H_UNSUPPORTED if called from the wrong context (e.g. 935 934 from an SVM or before an H_SVM_INIT_START 936 935 hypercall). 936 + * H_STATE if the hypervisor could not successfully 937 + transition the VM to Secure VM. 937 938 938 939 Description 939 940 ~~~~~~~~~~~

+1 -19

arch/arm64/Kconfig

··· 1182 1182 1183 1183 If unsure, say Y. 1184 1184 1185 - config HARDEN_EL2_VECTORS 1186 - bool "Harden EL2 vector mapping against system register leak" if EXPERT 1187 - default y 1188 - help 1189 - Speculation attacks against some high-performance processors can 1190 - be used to leak privileged information such as the vector base 1191 - register, resulting in a potential defeat of the EL2 layout 1192 - randomization. 1193 - 1194 - This config option will map the vectors to a fixed location, 1195 - independent of the EL2 code mapping, so that revealing VBAR_EL2 1196 - to an attacker does not give away any extra information. This 1197 - only gets enabled on affected CPUs. 1198 - 1199 - If unsure, say Y. 1200 - 1201 1185 config ARM64_SSBD 1202 1186 bool "Speculative Store Bypass Disable" if EXPERT 1203 1187 default y ··· 1504 1520 config ARM64_PTR_AUTH 1505 1521 bool "Enable support for pointer authentication" 1506 1522 default y 1507 - depends on !KVM || ARM64_VHE 1508 1523 depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC 1509 1524 # Modern compilers insert a .note.gnu.property section note for PAC 1510 1525 # which is only understood by binutils starting with version 2.33.1. ··· 1530 1547 1531 1548 The feature is detected at runtime. If the feature is not present in 1532 1549 hardware it will not be advertised to userspace/KVM guest nor will it 1533 - be enabled. However, KVM guest also require VHE mode and hence 1534 - CONFIG_ARM64_VHE=y option to use this feature. 1550 + be enabled. 1535 1551 1536 1552 If the feature is present on the boot CPU but not on a late CPU, then 1537 1553 the late CPU will be parked. Also, if the boot CPU does not have

+61 -14

arch/arm64/include/asm/kvm_asm.h

··· 42 42 43 43 #include <linux/mm.h> 44 44 45 - /* Translate a kernel address of @sym into its equivalent linear mapping */ 46 - #define kvm_ksym_ref(sym) \ 45 + /* 46 + * Translate name of a symbol defined in nVHE hyp to the name seen 47 + * by kernel proper. All nVHE symbols are prefixed by the build system 48 + * to avoid clashes with the VHE variants. 49 + */ 50 + #define kvm_nvhe_sym(sym) __kvm_nvhe_##sym 51 + 52 + #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] 53 + #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] 54 + 55 + /* 56 + * Define a pair of symbols sharing the same name but one defined in 57 + * VHE and the other in nVHE hyp implementations. 58 + */ 59 + #define DECLARE_KVM_HYP_SYM(sym) \ 60 + DECLARE_KVM_VHE_SYM(sym); \ 61 + DECLARE_KVM_NVHE_SYM(sym) 62 + 63 + #define CHOOSE_VHE_SYM(sym) sym 64 + #define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) 65 + 66 + #ifndef __KVM_NVHE_HYPERVISOR__ 67 + /* 68 + * BIG FAT WARNINGS: 69 + * 70 + * - Don't be tempted to change the following is_kernel_in_hyp_mode() 71 + * to has_vhe(). has_vhe() is implemented as a *final* capability, 72 + * while this is used early at boot time, when the capabilities are 73 + * not final yet.... 74 + * 75 + * - Don't let the nVHE hypervisor have access to this, as it will 76 + * pick the *wrong* symbol (yes, it runs at EL2...). 77 + */ 78 + #define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ 79 + : CHOOSE_NVHE_SYM(sym)) 80 + #else 81 + /* The nVHE hypervisor shouldn't even try to access anything */ 82 + extern void *__nvhe_undefined_symbol; 83 + #define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol 84 + #endif 85 + 86 + /* Translate a kernel address @ptr into its equivalent linear mapping */ 87 + #define kvm_ksym_ref(ptr) \ 47 88 ({ \ 48 - void *val = &sym; \ 89 + void *val = (ptr); \ 49 90 if (!is_kernel_in_hyp_mode()) \ 50 - val = lm_alias(&sym); \ 91 + val = lm_alias((ptr)); \ 51 92 val; \ 52 93 }) 94 + #define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) 53 95 54 96 struct kvm; 55 97 struct kvm_vcpu; 98 + struct kvm_s2_mmu; 56 99 57 - extern char __kvm_hyp_init[]; 58 - extern char __kvm_hyp_init_end[]; 100 + DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); 101 + DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); 102 + #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) 103 + #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) 59 104 60 - extern char __kvm_hyp_vector[]; 105 + #ifdef CONFIG_KVM_INDIRECT_VECTORS 106 + extern atomic_t arm64_el2_vector_last_slot; 107 + DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); 108 + #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) 109 + #endif 61 110 62 111 extern void __kvm_flush_vm_context(void); 63 - extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 64 - extern void __kvm_tlb_flush_vmid(struct kvm *kvm); 65 - extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); 112 + extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, 113 + int level); 114 + extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); 115 + extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); 66 116 67 117 extern void __kvm_timer_set_cntvoff(u64 cntvoff); 68 118 69 - extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); 70 - 71 - extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); 119 + extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 72 120 73 121 extern void __kvm_enable_ssbs(void); 74 122 ··· 191 143 .macro get_vcpu_ptr vcpu, ctxt 192 144 get_host_ctxt \ctxt, \vcpu 193 145 ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] 194 - kern_hyp_va \vcpu 195 146 .endm 196 147 197 148 #endif

-8

arch/arm64/include/asm/kvm_coproc.h

··· 19 19 size_t num; 20 20 }; 21 21 22 - struct kvm_sys_reg_target_table { 23 - struct kvm_sys_reg_table table64; 24 - struct kvm_sys_reg_table table32; 25 - }; 26 - 27 - void kvm_register_target_sys_reg_table(unsigned int target, 28 - struct kvm_sys_reg_target_table *table); 29 - 30 22 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); 31 23 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); 32 24 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);

+27 -48

arch/arm64/include/asm/kvm_emulate.h

··· 124 124 125 125 static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) 126 126 { 127 - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; 128 - } 129 - 130 - static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) 131 - { 132 - return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; 133 - } 134 - 135 - static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) 136 - { 137 - if (vcpu->arch.sysregs_loaded_on_cpu) 138 - return read_sysreg_el1(SYS_ELR); 139 - else 140 - return *__vcpu_elr_el1(vcpu); 141 - } 142 - 143 - static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) 144 - { 145 - if (vcpu->arch.sysregs_loaded_on_cpu) 146 - write_sysreg_el1(v, SYS_ELR); 147 - else 148 - *__vcpu_elr_el1(vcpu) = v; 127 + return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; 149 128 } 150 129 151 130 static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) 152 131 { 153 - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; 132 + return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; 154 133 } 155 134 156 135 static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) ··· 158 179 static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, 159 180 u8 reg_num) 160 181 { 161 - return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; 182 + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num]; 162 183 } 163 184 164 185 static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, 165 186 unsigned long val) 166 187 { 167 188 if (reg_num != 31) 168 - vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; 189 + vcpu_gp_regs(vcpu)->regs[reg_num] = val; 169 190 } 170 191 171 192 static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) ··· 176 197 if (vcpu->arch.sysregs_loaded_on_cpu) 177 198 return read_sysreg_el1(SYS_SPSR); 178 199 else 179 - return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; 200 + return __vcpu_sys_reg(vcpu, SPSR_EL1); 180 201 } 181 202 182 203 static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) ··· 189 210 if (vcpu->arch.sysregs_loaded_on_cpu) 190 211 write_sysreg_el1(v, SYS_SPSR); 191 212 else 192 - vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; 213 + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; 193 214 } 194 215 195 216 /* ··· 238 259 return mode != PSR_MODE_EL0t; 239 260 } 240 261 241 - static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) 262 + static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) 242 263 { 243 264 return vcpu->arch.fault.esr_el2; 244 265 } 245 266 246 267 static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) 247 268 { 248 - u32 esr = kvm_vcpu_get_hsr(vcpu); 269 + u32 esr = kvm_vcpu_get_esr(vcpu); 249 270 250 271 if (esr & ESR_ELx_CV) 251 272 return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; ··· 270 291 271 292 static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) 272 293 { 273 - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; 294 + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK; 274 295 } 275 296 276 297 static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) 277 298 { 278 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); 299 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV); 279 300 } 280 301 281 302 static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) 282 303 { 283 - return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); 304 + return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); 284 305 } 285 306 286 307 static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) 287 308 { 288 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); 309 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE); 289 310 } 290 311 291 312 static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) 292 313 { 293 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); 314 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF); 294 315 } 295 316 296 317 static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) 297 318 { 298 - return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; 319 + return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; 299 320 } 300 321 301 322 static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) 302 323 { 303 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); 324 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); 304 325 } 305 326 306 327 static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) 307 328 { 308 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || 329 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || 309 330 kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ 310 331 } 311 332 312 333 static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) 313 334 { 314 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); 335 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM); 315 336 } 316 337 317 338 static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) 318 339 { 319 - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); 340 + return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); 320 341 } 321 342 322 343 /* This one is not specific to Data Abort */ 323 344 static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) 324 345 { 325 - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); 346 + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL); 326 347 } 327 348 328 349 static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) 329 350 { 330 - return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); 351 + return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); 331 352 } 332 353 333 354 static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) ··· 337 358 338 359 static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) 339 360 { 340 - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; 361 + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; 341 362 } 342 363 343 364 static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) 344 365 { 345 - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; 366 + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; 346 367 } 347 368 348 - static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) 369 + static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) 349 370 { 350 371 switch (kvm_vcpu_trap_get_fault(vcpu)) { 351 372 case FSC_SEA: ··· 366 387 367 388 static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) 368 389 { 369 - u32 esr = kvm_vcpu_get_hsr(vcpu); 390 + u32 esr = kvm_vcpu_get_esr(vcpu); 370 391 return ESR_ELx_SYS64_ISS_RT(esr); 371 392 } 372 393 ··· 495 516 * Skip an instruction which has been emulated at hyp while most guest sysregs 496 517 * are live. 497 518 */ 498 - static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) 519 + static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) 499 520 { 500 521 *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); 501 - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); 522 + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); 502 523 503 524 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 504 525 505 - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); 526 + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); 506 527 write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); 507 528 } 508 529

+73 -21

arch/arm64/include/asm/kvm_host.h

··· 66 66 u32 vmid; 67 67 }; 68 68 69 - struct kvm_arch { 69 + struct kvm_s2_mmu { 70 70 struct kvm_vmid vmid; 71 71 72 - /* stage2 entry level table */ 73 - pgd_t *pgd; 74 - phys_addr_t pgd_phys; 75 - 76 - /* VTCR_EL2 value for this VM */ 77 - u64 vtcr; 72 + /* 73 + * stage2 entry level table 74 + * 75 + * Two kvm_s2_mmu structures in the same VM can point to the same 76 + * pgd here. This happens when running a guest using a 77 + * translation regime that isn't affected by its own stage-2 78 + * translation, such as a non-VHE hypervisor running at vEL2, or 79 + * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the 80 + * canonical stage-2 page tables. 81 + */ 82 + pgd_t *pgd; 83 + phys_addr_t pgd_phys; 78 84 79 85 /* The last vcpu id that ran on each physical CPU */ 80 86 int __percpu *last_vcpu_ran; 87 + 88 + struct kvm *kvm; 89 + }; 90 + 91 + struct kvm_arch { 92 + struct kvm_s2_mmu mmu; 93 + 94 + /* VTCR_EL2 value for this VM */ 95 + u64 vtcr; 81 96 82 97 /* The maximum number of vCPUs depends on the used GIC model */ 83 98 int max_vcpus; ··· 174 159 APGAKEYLO_EL1, 175 160 APGAKEYHI_EL1, 176 161 162 + ELR_EL1, 163 + SP_EL1, 164 + SPSR_EL1, 165 + 166 + CNTVOFF_EL2, 167 + CNTV_CVAL_EL0, 168 + CNTV_CTL_EL0, 169 + CNTP_CVAL_EL0, 170 + CNTP_CTL_EL0, 171 + 177 172 /* 32bit specific registers. Keep them at the end of the range */ 178 173 DACR32_EL2, /* Domain Access Control Register */ 179 174 IFSR32_EL2, /* Instruction Fault Status Register */ ··· 235 210 #define NR_COPRO_REGS (NR_SYS_REGS * 2) 236 211 237 212 struct kvm_cpu_context { 238 - struct kvm_regs gp_regs; 213 + struct user_pt_regs regs; /* sp = sp_el0 */ 214 + 215 + u64 spsr_abt; 216 + u64 spsr_und; 217 + u64 spsr_irq; 218 + u64 spsr_fiq; 219 + 220 + struct user_fpsimd_state fp_regs; 221 + 239 222 union { 240 223 u64 sys_regs[NR_SYS_REGS]; 241 224 u32 copro[NR_COPRO_REGS]; ··· 275 242 struct kvm_cpu_context ctxt; 276 243 void *sve_state; 277 244 unsigned int sve_max_vl; 245 + 246 + /* Stage 2 paging state used by the hardware on next switch */ 247 + struct kvm_s2_mmu *hw_mmu; 278 248 279 249 /* HYP configuration */ 280 250 u64 hcr_el2; ··· 363 327 struct vcpu_reset_state reset_state; 364 328 365 329 /* True when deferrable sysregs are loaded on the physical CPU, 366 - * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ 330 + * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ 367 331 bool sysregs_loaded_on_cpu; 368 332 369 333 /* Guest PV state */ ··· 414 378 #define vcpu_has_ptrauth(vcpu) false 415 379 #endif 416 380 417 - #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) 381 + #define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) 418 382 419 383 /* 420 - * Only use __vcpu_sys_reg if you know you want the memory backed version of a 421 - * register, and not the one most recently accessed by a running VCPU. For 422 - * example, for userspace access or for system registers that are never context 423 - * switched, but only emulated. 384 + * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the 385 + * memory backed version of a register, and not the one most recently 386 + * accessed by a running VCPU. For example, for userspace access or 387 + * for system registers that are never context switched, but only 388 + * emulated. 424 389 */ 425 - #define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) 390 + #define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) 391 + 392 + #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) 393 + 394 + #define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) 426 395 427 396 u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); 428 397 void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); ··· 483 442 484 443 u64 __kvm_call_hyp(void *hypfn, ...); 485 444 445 + #define kvm_call_hyp_nvhe(f, ...) \ 446 + do { \ 447 + DECLARE_KVM_NVHE_SYM(f); \ 448 + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ 449 + } while(0) 450 + 451 + #define kvm_call_hyp_nvhe_ret(f, ...) \ 452 + ({ \ 453 + DECLARE_KVM_NVHE_SYM(f); \ 454 + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ 455 + }) 456 + 486 457 /* 487 458 * The couple of isb() below are there to guarantee the same behaviour 488 459 * on VHE as on !VHE, where the eret to EL1 acts as a context ··· 506 453 f(__VA_ARGS__); \ 507 454 isb(); \ 508 455 } else { \ 509 - __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ 456 + kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ 510 457 } \ 511 458 } while(0) 512 459 ··· 518 465 ret = f(__VA_ARGS__); \ 519 466 isb(); \ 520 467 } else { \ 521 - ret = __kvm_call_hyp(kvm_ksym_ref(f), \ 522 - ##__VA_ARGS__); \ 468 + ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ 523 469 } \ 524 470 \ 525 471 ret; \ ··· 570 518 static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) 571 519 { 572 520 /* The host's MPIDR is immutable, so let's set it up at boot time */ 573 - cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); 521 + ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); 574 522 } 575 523 576 524 static inline bool kvm_arch_requires_vhe(void) ··· 671 619 } 672 620 } 673 621 674 - void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); 675 - void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); 622 + void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); 623 + void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); 676 624 677 625 int kvm_set_ipa_limit(void); 678 626

+11 -4

arch/arm64/include/asm/kvm_hyp.h

··· 12 12 #include <asm/alternative.h> 13 13 #include <asm/sysreg.h> 14 14 15 - #define __hyp_text __section(.hyp.text) notrace __noscs 16 - 17 15 #define read_sysreg_elx(r,nvh,vh) \ 18 16 ({ \ 19 17 u64 reg; \ ··· 61 63 void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); 62 64 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); 63 65 66 + #ifdef __KVM_NVHE_HYPERVISOR__ 64 67 void __timer_enable_traps(struct kvm_vcpu *vcpu); 65 68 void __timer_disable_traps(struct kvm_vcpu *vcpu); 69 + #endif 66 70 71 + #ifdef __KVM_NVHE_HYPERVISOR__ 67 72 void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); 68 73 void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); 74 + #else 69 75 void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); 70 76 void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); 71 77 void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); 72 78 void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); 73 - void __sysreg32_save_state(struct kvm_vcpu *vcpu); 74 - void __sysreg32_restore_state(struct kvm_vcpu *vcpu); 79 + #endif 75 80 76 81 void __debug_switch_to_guest(struct kvm_vcpu *vcpu); 77 82 void __debug_switch_to_host(struct kvm_vcpu *vcpu); ··· 82 81 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); 83 82 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); 84 83 84 + #ifndef __KVM_NVHE_HYPERVISOR__ 85 85 void activate_traps_vhe_load(struct kvm_vcpu *vcpu); 86 86 void deactivate_traps_vhe_put(void); 87 + #endif 87 88 88 89 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); 90 + 91 + void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); 92 + #ifdef __KVM_NVHE_HYPERVISOR__ 89 93 void __noreturn __hyp_do_panic(unsigned long, ...); 94 + #endif 90 95 91 96 #endif /* __ARM64_KVM_HYP_H__ */ 92 97

+8 -8

arch/arm64/include/asm/kvm_mmu.h

··· 134 134 void free_hyp_pgds(void); 135 135 136 136 void stage2_unmap_vm(struct kvm *kvm); 137 - int kvm_alloc_stage2_pgd(struct kvm *kvm); 138 - void kvm_free_stage2_pgd(struct kvm *kvm); 137 + int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); 138 + void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); 139 139 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 140 140 phys_addr_t pa, unsigned long size, bool writable); 141 141 ··· 577 577 return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); 578 578 } 579 579 580 - static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) 580 + static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) 581 581 { 582 - struct kvm_vmid *vmid = &kvm->arch.vmid; 582 + struct kvm_vmid *vmid = &mmu->vmid; 583 583 u64 vmid_field, baddr; 584 584 u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; 585 585 586 - baddr = kvm->arch.pgd_phys; 586 + baddr = mmu->pgd_phys; 587 587 vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; 588 588 return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; 589 589 } ··· 592 592 * Must be called from hyp code running at EL2 with an updated VTTBR 593 593 * and interrupts disabled. 594 594 */ 595 - static __always_inline void __load_guest_stage2(struct kvm *kvm) 595 + static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) 596 596 { 597 - write_sysreg(kvm->arch.vtcr, vtcr_el2); 598 - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); 597 + write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); 598 + write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); 599 599 600 600 /* 601 601 * ARM errata 1165522 and 1530923 require the actual execution of the

+13 -21

arch/arm64/include/asm/kvm_ptrauth.h

··· 61 61 62 62 /* 63 63 * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will 64 - * check for the presence of one of the cpufeature flag 65 - * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and 64 + * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as 65 + * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and 66 66 * then proceed ahead with the save/restore of Pointer Authentication 67 - * key registers. 67 + * key registers if enabled for the guest. 68 68 */ 69 69 .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 70 - alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH 71 - b 1000f 70 + alternative_if_not ARM64_HAS_ADDRESS_AUTH 71 + b .L__skip_switch\@ 72 72 alternative_else_nop_endif 73 - alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF 74 - b 1001f 75 - alternative_else_nop_endif 76 - 1000: 77 - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] 73 + mrs \reg1, hcr_el2 78 74 and \reg1, \reg1, #(HCR_API | HCR_APK) 79 - cbz \reg1, 1001f 75 + cbz \reg1, .L__skip_switch\@ 80 76 add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 81 77 ptrauth_restore_state \reg1, \reg2, \reg3 82 - 1001: 78 + .L__skip_switch\@: 83 79 .endm 84 80 85 81 .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 86 - alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH 87 - b 2000f 82 + alternative_if_not ARM64_HAS_ADDRESS_AUTH 83 + b .L__skip_switch\@ 88 84 alternative_else_nop_endif 89 - alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF 90 - b 2001f 91 - alternative_else_nop_endif 92 - 2000: 93 - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] 85 + mrs \reg1, hcr_el2 94 86 and \reg1, \reg1, #(HCR_API | HCR_APK) 95 - cbz \reg1, 2001f 87 + cbz \reg1, .L__skip_switch\@ 96 88 add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 97 89 ptrauth_save_state \reg1, \reg2, \reg3 98 90 add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 99 91 ptrauth_restore_state \reg1, \reg2, \reg3 100 92 isb 101 - 2001: 93 + .L__skip_switch\@: 102 94 .endm 103 95 104 96 #else /* !CONFIG_ARM64_PTR_AUTH */

-7

arch/arm64/include/asm/mmu.h

··· 45 45 bp_hardening_cb_t fn; 46 46 }; 47 47 48 - #if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ 49 - defined(CONFIG_HARDEN_EL2_VECTORS)) 50 - 51 - extern char __bp_harden_hyp_vecs[]; 52 - extern atomic_t arm64_el2_vector_last_slot; 53 - #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ 54 - 55 48 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR 56 49 DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); 57 50

+10 -3

arch/arm64/include/asm/virt.h

··· 85 85 86 86 static __always_inline bool has_vhe(void) 87 87 { 88 - if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) 88 + /* 89 + * The following macros are defined for code specic to VHE/nVHE. 90 + * If has_vhe() is inlined into those compilation units, it can 91 + * be determined statically. Otherwise fall back to caps. 92 + */ 93 + if (__is_defined(__KVM_VHE_HYPERVISOR__)) 89 94 return true; 90 - 91 - return false; 95 + else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) 96 + return false; 97 + else 98 + return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); 92 99 } 93 100 94 101 #endif /* __ASSEMBLY__ */

+1 -2

arch/arm64/kernel/asm-offsets.c

··· 102 102 DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); 103 103 DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); 104 104 DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); 105 - DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); 105 + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); 106 106 DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); 107 107 DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); 108 108 DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); 109 109 DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); 110 110 DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); 111 - DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); 112 111 DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); 113 112 DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); 114 113 #endif

+2 -2

arch/arm64/kernel/cpu_errata.c

··· 632 632 return is_midr_in_range(midr, &range) && has_dic; 633 633 } 634 634 635 - #if defined(CONFIG_HARDEN_EL2_VECTORS) 635 + #ifdef CONFIG_RANDOMIZE_BASE 636 636 637 637 static const struct midr_range ca57_a72[] = { 638 638 MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), ··· 891 891 .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, 892 892 .matches = check_branch_predictor, 893 893 }, 894 - #ifdef CONFIG_HARDEN_EL2_VECTORS 894 + #ifdef CONFIG_RANDOMIZE_BASE 895 895 { 896 896 .desc = "EL2 vector hardening", 897 897 .capability = ARM64_HARDEN_EL2_VECTORS,

+54

arch/arm64/kernel/image-vars.h

··· 51 51 52 52 #endif 53 53 54 + #ifdef CONFIG_KVM 55 + 56 + /* 57 + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to 58 + * separate it from the kernel proper. The following symbols are legally 59 + * accessed by it, therefore provide aliases to make them linkable. 60 + * Do not include symbols which may not be safely accessed under hypervisor 61 + * memory mappings. 62 + */ 63 + 64 + #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; 65 + 66 + /* Alternative callbacks for init-time patching of nVHE hyp code. */ 67 + KVM_NVHE_ALIAS(arm64_enable_wa2_handling); 68 + KVM_NVHE_ALIAS(kvm_patch_vector_branch); 69 + KVM_NVHE_ALIAS(kvm_update_va_mask); 70 + 71 + /* Global kernel state accessed by nVHE hyp code. */ 72 + KVM_NVHE_ALIAS(arm64_ssbd_callback_required); 73 + KVM_NVHE_ALIAS(kvm_host_data); 74 + KVM_NVHE_ALIAS(kvm_vgic_global_state); 75 + 76 + /* Kernel constant needed to compute idmap addresses. */ 77 + KVM_NVHE_ALIAS(kimage_voffset); 78 + 79 + /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ 80 + KVM_NVHE_ALIAS(__hyp_panic_string); 81 + KVM_NVHE_ALIAS(panic); 82 + 83 + /* Vectors installed by hyp-init on reset HVC. */ 84 + KVM_NVHE_ALIAS(__hyp_stub_vectors); 85 + 86 + /* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ 87 + KVM_NVHE_ALIAS(idmap_t0sz); 88 + 89 + /* Kernel symbol used by icache_is_vpipt(). */ 90 + KVM_NVHE_ALIAS(__icache_flags); 91 + 92 + /* Kernel symbols needed for cpus_have_final/const_caps checks. */ 93 + KVM_NVHE_ALIAS(arm64_const_caps_ready); 94 + KVM_NVHE_ALIAS(cpu_hwcap_keys); 95 + KVM_NVHE_ALIAS(cpu_hwcaps); 96 + 97 + /* Static keys which are set if a vGIC trap should be handled in hyp. */ 98 + KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); 99 + KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); 100 + 101 + /* Static key checked in pmr_sync(). */ 102 + #ifdef CONFIG_ARM64_PSEUDO_NMI 103 + KVM_NVHE_ALIAS(gic_pmr_sync); 104 + #endif 105 + 106 + #endif /* CONFIG_KVM */ 107 + 54 108 #endif /* __ARM64_KERNEL_IMAGE_VARS_H */

+1 -1

arch/arm64/kvm/Kconfig

··· 58 58 virtual machines. 59 59 60 60 config KVM_INDIRECT_VECTORS 61 - def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS 61 + def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE 62 62 63 63 endif # KVM 64 64

+2 -2

arch/arm64/kvm/Makefile

··· 13 13 kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ 14 14 $(KVM)/vfio.o $(KVM)/irqchip.o \ 15 15 arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ 16 - inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ 17 - guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ 16 + inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \ 17 + guest.o debug.o reset.o sys_regs.o \ 18 18 vgic-sys-reg-v3.o fpsimd.o pmu.o \ 19 19 aarch32.o arch_timer.o \ 20 20 vgic/vgic.o vgic/vgic-init.o \

+123 -34

arch/arm64/kvm/arch_timer.c

··· 51 51 struct arch_timer_context *timer, 52 52 enum kvm_arch_timer_regs treg); 53 53 54 + u32 timer_get_ctl(struct arch_timer_context *ctxt) 55 + { 56 + struct kvm_vcpu *vcpu = ctxt->vcpu; 57 + 58 + switch(arch_timer_ctx_index(ctxt)) { 59 + case TIMER_VTIMER: 60 + return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 61 + case TIMER_PTIMER: 62 + return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 63 + default: 64 + WARN_ON(1); 65 + return 0; 66 + } 67 + } 68 + 69 + u64 timer_get_cval(struct arch_timer_context *ctxt) 70 + { 71 + struct kvm_vcpu *vcpu = ctxt->vcpu; 72 + 73 + switch(arch_timer_ctx_index(ctxt)) { 74 + case TIMER_VTIMER: 75 + return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 76 + case TIMER_PTIMER: 77 + return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 78 + default: 79 + WARN_ON(1); 80 + return 0; 81 + } 82 + } 83 + 84 + static u64 timer_get_offset(struct arch_timer_context *ctxt) 85 + { 86 + struct kvm_vcpu *vcpu = ctxt->vcpu; 87 + 88 + switch(arch_timer_ctx_index(ctxt)) { 89 + case TIMER_VTIMER: 90 + return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); 91 + default: 92 + return 0; 93 + } 94 + } 95 + 96 + static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 97 + { 98 + struct kvm_vcpu *vcpu = ctxt->vcpu; 99 + 100 + switch(arch_timer_ctx_index(ctxt)) { 101 + case TIMER_VTIMER: 102 + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 103 + break; 104 + case TIMER_PTIMER: 105 + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 106 + break; 107 + default: 108 + WARN_ON(1); 109 + } 110 + } 111 + 112 + static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 113 + { 114 + struct kvm_vcpu *vcpu = ctxt->vcpu; 115 + 116 + switch(arch_timer_ctx_index(ctxt)) { 117 + case TIMER_VTIMER: 118 + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 119 + break; 120 + case TIMER_PTIMER: 121 + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 122 + break; 123 + default: 124 + WARN_ON(1); 125 + } 126 + } 127 + 128 + static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 129 + { 130 + struct kvm_vcpu *vcpu = ctxt->vcpu; 131 + 132 + switch(arch_timer_ctx_index(ctxt)) { 133 + case TIMER_VTIMER: 134 + __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; 135 + break; 136 + default: 137 + WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 138 + } 139 + } 140 + 54 141 u64 kvm_phys_timer_read(void) 55 142 { 56 143 return timecounter->cc->read(timecounter->cc); ··· 211 124 { 212 125 u64 cval, now; 213 126 214 - cval = timer_ctx->cnt_cval; 215 - now = kvm_phys_timer_read() - timer_ctx->cntvoff; 127 + cval = timer_get_cval(timer_ctx); 128 + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 216 129 217 130 if (now < cval) { 218 131 u64 ns; ··· 231 144 { 232 145 WARN_ON(timer_ctx && timer_ctx->loaded); 233 146 return timer_ctx && 234 - !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && 235 - (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); 147 + ((timer_get_ctl(timer_ctx) & 148 + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 236 149 } 237 150 238 151 /* ··· 343 256 if (!kvm_timer_irq_can_fire(timer_ctx)) 344 257 return false; 345 258 346 - cval = timer_ctx->cnt_cval; 347 - now = kvm_phys_timer_read() - timer_ctx->cntvoff; 259 + cval = timer_get_cval(timer_ctx); 260 + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 348 261 349 262 return cval <= now; 350 263 } ··· 437 350 438 351 switch (index) { 439 352 case TIMER_VTIMER: 440 - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 441 - ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); 353 + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 354 + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); 442 355 443 356 /* Disable the timer */ 444 357 write_sysreg_el0(0, SYS_CNTV_CTL); ··· 446 359 447 360 break; 448 361 case TIMER_PTIMER: 449 - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 450 - ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); 362 + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 363 + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); 451 364 452 365 /* Disable the timer */ 453 366 write_sysreg_el0(0, SYS_CNTP_CTL); ··· 516 429 517 430 switch (index) { 518 431 case TIMER_VTIMER: 519 - write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); 432 + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); 520 433 isb(); 521 - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); 434 + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 522 435 break; 523 436 case TIMER_PTIMER: 524 - write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); 437 + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); 525 438 isb(); 526 - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); 439 + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 527 440 break; 528 441 case NR_KVM_TIMERS: 529 442 BUG(); ··· 615 528 kvm_timer_vcpu_load_nogic(vcpu); 616 529 } 617 530 618 - set_cntvoff(map.direct_vtimer->cntvoff); 531 + set_cntvoff(timer_get_offset(map.direct_vtimer)); 619 532 620 533 kvm_timer_unblocking(vcpu); 621 534 ··· 702 615 } 703 616 } 704 617 705 - void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) 618 + void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 706 619 { 707 620 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 708 621 ··· 726 639 * resets the timer to be disabled and unmasked and is compliant with 727 640 * the ARMv7 architecture. 728 641 */ 729 - vcpu_vtimer(vcpu)->cnt_ctl = 0; 730 - vcpu_ptimer(vcpu)->cnt_ctl = 0; 642 + timer_set_ctl(vcpu_vtimer(vcpu), 0); 643 + timer_set_ctl(vcpu_ptimer(vcpu), 0); 731 644 732 645 if (timer->enabled) { 733 646 kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); ··· 755 668 756 669 mutex_lock(&kvm->lock); 757 670 kvm_for_each_vcpu(i, tmp, kvm) 758 - vcpu_vtimer(tmp)->cntvoff = cntvoff; 671 + timer_set_offset(vcpu_vtimer(tmp), cntvoff); 759 672 760 673 /* 761 674 * When called from the vcpu create path, the CPU being created is not 762 675 * included in the loop above, so we just set it here as well. 763 676 */ 764 - vcpu_vtimer(vcpu)->cntvoff = cntvoff; 677 + timer_set_offset(vcpu_vtimer(vcpu), cntvoff); 765 678 mutex_unlock(&kvm->lock); 766 679 } 767 680 ··· 771 684 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 772 685 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 773 686 687 + vtimer->vcpu = vcpu; 688 + ptimer->vcpu = vcpu; 689 + 774 690 /* Synchronize cntvoff across all vtimers of a VM. */ 775 691 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); 776 - ptimer->cntvoff = 0; 692 + timer_set_offset(ptimer, 0); 777 693 778 694 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 779 695 timer->bg_timer.function = kvm_bg_timer_expire; ··· 794 704 795 705 vtimer->host_timer_irq_flags = host_vtimer_irq_flags; 796 706 ptimer->host_timer_irq_flags = host_ptimer_irq_flags; 797 - 798 - vtimer->vcpu = vcpu; 799 - ptimer->vcpu = vcpu; 800 707 } 801 708 802 709 static void kvm_timer_init_interrupt(void *info) ··· 843 756 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 844 757 * regardless of ENABLE bit for our implementation convenience. 845 758 */ 759 + u32 ctl = timer_get_ctl(timer); 760 + 846 761 if (!kvm_timer_compute_delta(timer)) 847 - return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; 848 - else 849 - return timer->cnt_ctl; 762 + ctl |= ARCH_TIMER_CTRL_IT_STAT; 763 + 764 + return ctl; 850 765 } 851 766 852 767 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) ··· 884 795 885 796 switch (treg) { 886 797 case TIMER_REG_TVAL: 887 - val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; 888 - val &= lower_32_bits(val); 798 + val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 799 + val = lower_32_bits(val); 889 800 break; 890 801 891 802 case TIMER_REG_CTL: ··· 893 804 break; 894 805 895 806 case TIMER_REG_CVAL: 896 - val = timer->cnt_cval; 807 + val = timer_get_cval(timer); 897 808 break; 898 809 899 810 case TIMER_REG_CNT: 900 - val = kvm_phys_timer_read() - timer->cntvoff; 811 + val = kvm_phys_timer_read() - timer_get_offset(timer); 901 812 break; 902 813 903 814 default: ··· 931 842 { 932 843 switch (treg) { 933 844 case TIMER_REG_TVAL: 934 - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; 845 + timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 935 846 break; 936 847 937 848 case TIMER_REG_CTL: 938 - timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; 849 + timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 939 850 break; 940 851 941 852 case TIMER_REG_CVAL: 942 - timer->cnt_cval = val; 853 + timer_set_cval(timer, val); 943 854 break; 944 855 945 856 default:

+21 -36

arch/arm64/kvm/arm.c

··· 106 106 */ 107 107 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 108 108 { 109 - int ret, cpu; 109 + int ret; 110 110 111 111 ret = kvm_arm_setup_stage2(kvm, type); 112 112 if (ret) 113 113 return ret; 114 114 115 - kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); 116 - if (!kvm->arch.last_vcpu_ran) 117 - return -ENOMEM; 118 - 119 - for_each_possible_cpu(cpu) 120 - *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; 121 - 122 - ret = kvm_alloc_stage2_pgd(kvm); 115 + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); 123 116 if (ret) 124 - goto out_fail_alloc; 117 + return ret; 125 118 126 119 ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); 127 120 if (ret) ··· 122 129 123 130 kvm_vgic_early_init(kvm); 124 131 125 - /* Mark the initial VMID generation invalid */ 126 - kvm->arch.vmid.vmid_gen = 0; 127 - 128 132 /* The maximum number of VCPUs is limited by the host's GIC model */ 129 133 kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); 130 134 131 135 return ret; 132 136 out_free_stage2_pgd: 133 - kvm_free_stage2_pgd(kvm); 134 - out_fail_alloc: 135 - free_percpu(kvm->arch.last_vcpu_ran); 136 - kvm->arch.last_vcpu_ran = NULL; 137 + kvm_free_stage2_pgd(&kvm->arch.mmu); 137 138 return ret; 138 139 } 139 140 ··· 146 159 int i; 147 160 148 161 kvm_vgic_destroy(kvm); 149 - 150 - free_percpu(kvm->arch.last_vcpu_ran); 151 - kvm->arch.last_vcpu_ran = NULL; 152 162 153 163 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 154 164 if (kvm->vcpus[i]) { ··· 265 281 266 282 kvm_arm_pvtime_vcpu_init(&vcpu->arch); 267 283 284 + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; 285 + 268 286 err = kvm_vgic_vcpu_init(vcpu); 269 287 if (err) 270 288 return err; ··· 322 336 323 337 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 324 338 { 339 + struct kvm_s2_mmu *mmu; 325 340 int *last_ran; 326 341 327 - last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); 342 + mmu = vcpu->arch.hw_mmu; 343 + last_ran = this_cpu_ptr(mmu->last_vcpu_ran); 328 344 329 345 /* 330 346 * We might get preempted before the vCPU actually runs, but 331 347 * over-invalidation doesn't affect correctness. 332 348 */ 333 349 if (*last_ran != vcpu->vcpu_id) { 334 - kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); 350 + kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); 335 351 *last_ran = vcpu->vcpu_id; 336 352 } 337 353 ··· 341 353 342 354 kvm_vgic_load(vcpu); 343 355 kvm_timer_vcpu_load(vcpu); 344 - kvm_vcpu_load_sysregs(vcpu); 356 + if (has_vhe()) 357 + kvm_vcpu_load_sysregs_vhe(vcpu); 345 358 kvm_arch_vcpu_load_fp(vcpu); 346 359 kvm_vcpu_pmu_restore_guest(vcpu); 347 360 if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) ··· 360 371 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 361 372 { 362 373 kvm_arch_vcpu_put_fp(vcpu); 363 - kvm_vcpu_put_sysregs(vcpu); 374 + if (has_vhe()) 375 + kvm_vcpu_put_sysregs_vhe(vcpu); 364 376 kvm_timer_vcpu_put(vcpu); 365 377 kvm_vgic_put(vcpu); 366 378 kvm_vcpu_pmu_restore_host(vcpu); ··· 458 468 459 469 /** 460 470 * update_vmid - Update the vmid with a valid VMID for the current generation 461 - * @kvm: The guest that struct vmid belongs to 462 471 * @vmid: The stage-2 VMID information struct 463 472 */ 464 473 static void update_vmid(struct kvm_vmid *vmid) ··· 669 680 */ 670 681 cond_resched(); 671 682 672 - update_vmid(&vcpu->kvm->arch.vmid); 683 + update_vmid(&vcpu->arch.hw_mmu->vmid); 673 684 674 685 check_vcpu_requests(vcpu); 675 686 ··· 718 729 */ 719 730 smp_store_mb(vcpu->mode, IN_GUEST_MODE); 720 731 721 - if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || 732 + if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || 722 733 kvm_request_pending(vcpu)) { 723 734 vcpu->mode = OUTSIDE_GUEST_MODE; 724 735 isb(); /* Ensure work in x_flush_hwstate is committed */ 725 736 kvm_pmu_sync_hwstate(vcpu); 726 737 if (static_branch_unlikely(&userspace_irqchip_in_use)) 727 - kvm_timer_sync_hwstate(vcpu); 738 + kvm_timer_sync_user(vcpu); 728 739 kvm_vgic_sync_hwstate(vcpu); 729 740 local_irq_enable(); 730 741 preempt_enable(); ··· 739 750 trace_kvm_entry(*vcpu_pc(vcpu)); 740 751 guest_enter_irqoff(); 741 752 742 - if (has_vhe()) { 743 - ret = kvm_vcpu_run_vhe(vcpu); 744 - } else { 745 - ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); 746 - } 753 + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); 747 754 748 755 vcpu->mode = OUTSIDE_GUEST_MODE; 749 756 vcpu->stat.exits++; ··· 769 784 * timer virtual interrupt state. 770 785 */ 771 786 if (static_branch_unlikely(&userspace_irqchip_in_use)) 772 - kvm_timer_sync_hwstate(vcpu); 787 + kvm_timer_sync_user(vcpu); 773 788 774 789 kvm_arch_vcpu_ctxsync_fp(vcpu); 775 790 ··· 1272 1287 * so that we can use adr_l to access per-cpu variables in EL2. 1273 1288 */ 1274 1289 tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - 1275 - (unsigned long)kvm_ksym_ref(kvm_host_data)); 1290 + (unsigned long)kvm_ksym_ref(&kvm_host_data)); 1276 1291 1277 1292 pgd_ptr = kvm_mmu_get_httbr(); 1278 1293 hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; ··· 1293 1308 */ 1294 1309 if (this_cpu_has_cap(ARM64_SSBS) && 1295 1310 arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { 1296 - kvm_call_hyp(__kvm_enable_ssbs); 1311 + kvm_call_hyp_nvhe(__kvm_enable_ssbs); 1297 1312 } 1298 1313 } 1299 1314

+2 -4

arch/arm64/kvm/fpsimd.c

··· 85 85 WARN_ON_ONCE(!irqs_disabled()); 86 86 87 87 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 88 - fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, 88 + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, 89 89 vcpu->arch.sve_state, 90 90 vcpu->arch.sve_max_vl); 91 91 ··· 109 109 local_irq_save(flags); 110 110 111 111 if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { 112 - u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; 113 - 114 112 fpsimd_save_and_flush_cpu_state(); 115 113 116 114 if (guest_has_sve) 117 - *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); 115 + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12); 118 116 } else if (host_has_sve) { 119 117 /* 120 118 * The FPSIMD/SVE state in the CPU has not been touched, and we

+65 -14

arch/arm64/kvm/guest.c

··· 101 101 return size; 102 102 } 103 103 104 - static int validate_core_offset(const struct kvm_vcpu *vcpu, 105 - const struct kvm_one_reg *reg) 104 + static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 106 105 { 107 106 u64 off = core_reg_offset_from_id(reg->id); 108 107 int size = core_reg_size_from_offset(vcpu, off); 109 108 110 109 if (size < 0) 111 - return -EINVAL; 110 + return NULL; 112 111 113 112 if (KVM_REG_SIZE(reg->id) != size) 114 - return -EINVAL; 113 + return NULL; 115 114 116 - return 0; 115 + switch (off) { 116 + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... 117 + KVM_REG_ARM_CORE_REG(regs.regs[30]): 118 + off -= KVM_REG_ARM_CORE_REG(regs.regs[0]); 119 + off /= 2; 120 + return &vcpu->arch.ctxt.regs.regs[off]; 121 + 122 + case KVM_REG_ARM_CORE_REG(regs.sp): 123 + return &vcpu->arch.ctxt.regs.sp; 124 + 125 + case KVM_REG_ARM_CORE_REG(regs.pc): 126 + return &vcpu->arch.ctxt.regs.pc; 127 + 128 + case KVM_REG_ARM_CORE_REG(regs.pstate): 129 + return &vcpu->arch.ctxt.regs.pstate; 130 + 131 + case KVM_REG_ARM_CORE_REG(sp_el1): 132 + return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1); 133 + 134 + case KVM_REG_ARM_CORE_REG(elr_el1): 135 + return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); 136 + 137 + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): 138 + return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1); 139 + 140 + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): 141 + return &vcpu->arch.ctxt.spsr_abt; 142 + 143 + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]): 144 + return &vcpu->arch.ctxt.spsr_und; 145 + 146 + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]): 147 + return &vcpu->arch.ctxt.spsr_irq; 148 + 149 + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]): 150 + return &vcpu->arch.ctxt.spsr_fiq; 151 + 152 + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... 153 + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): 154 + off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]); 155 + off /= 4; 156 + return &vcpu->arch.ctxt.fp_regs.vregs[off]; 157 + 158 + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): 159 + return &vcpu->arch.ctxt.fp_regs.fpsr; 160 + 161 + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): 162 + return &vcpu->arch.ctxt.fp_regs.fpcr; 163 + 164 + default: 165 + return NULL; 166 + } 117 167 } 118 168 119 169 static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) ··· 175 125 * off the index in the "array". 176 126 */ 177 127 __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; 178 - struct kvm_regs *regs = vcpu_gp_regs(vcpu); 179 - int nr_regs = sizeof(*regs) / sizeof(__u32); 128 + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); 129 + void *addr; 180 130 u32 off; 181 131 182 132 /* Our ID is an index into the kvm_regs struct. */ ··· 185 135 (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) 186 136 return -ENOENT; 187 137 188 - if (validate_core_offset(vcpu, reg)) 138 + addr = core_reg_addr(vcpu, reg); 139 + if (!addr) 189 140 return -EINVAL; 190 141 191 - if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) 142 + if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id))) 192 143 return -EFAULT; 193 144 194 145 return 0; ··· 198 147 static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) 199 148 { 200 149 __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; 201 - struct kvm_regs *regs = vcpu_gp_regs(vcpu); 202 - int nr_regs = sizeof(*regs) / sizeof(__u32); 150 + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); 203 151 __uint128_t tmp; 204 - void *valp = &tmp; 152 + void *valp = &tmp, *addr; 205 153 u64 off; 206 154 int err = 0; 207 155 ··· 210 160 (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) 211 161 return -ENOENT; 212 162 213 - if (validate_core_offset(vcpu, reg)) 163 + addr = core_reg_addr(vcpu, reg); 164 + if (!addr) 214 165 return -EINVAL; 215 166 216 167 if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) ··· 249 198 } 250 199 } 251 200 252 - memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); 201 + memcpy(addr, valp, KVM_REG_SIZE(reg->id)); 253 202 254 203 if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { 255 204 int i;

+16 -16

arch/arm64/kvm/handle_exit.c

··· 89 89 */ 90 90 static int kvm_handle_wfx(struct kvm_vcpu *vcpu) 91 91 { 92 - if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { 92 + if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { 93 93 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); 94 94 vcpu->stat.wfe_exit_stat++; 95 95 kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); ··· 119 119 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) 120 120 { 121 121 struct kvm_run *run = vcpu->run; 122 - u32 hsr = kvm_vcpu_get_hsr(vcpu); 122 + u32 esr = kvm_vcpu_get_esr(vcpu); 123 123 int ret = 0; 124 124 125 125 run->exit_reason = KVM_EXIT_DEBUG; 126 - run->debug.arch.hsr = hsr; 126 + run->debug.arch.hsr = esr; 127 127 128 - switch (ESR_ELx_EC(hsr)) { 128 + switch (ESR_ELx_EC(esr)) { 129 129 case ESR_ELx_EC_WATCHPT_LOW: 130 130 run->debug.arch.far = vcpu->arch.fault.far_el2; 131 131 /* fall through */ ··· 135 135 case ESR_ELx_EC_BRK64: 136 136 break; 137 137 default: 138 - kvm_err("%s: un-handled case hsr: %#08x\n", 139 - __func__, (unsigned int) hsr); 138 + kvm_err("%s: un-handled case esr: %#08x\n", 139 + __func__, (unsigned int) esr); 140 140 ret = -1; 141 141 break; 142 142 } ··· 146 146 147 147 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) 148 148 { 149 - u32 hsr = kvm_vcpu_get_hsr(vcpu); 149 + u32 esr = kvm_vcpu_get_esr(vcpu); 150 150 151 - kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", 152 - hsr, esr_get_class_string(hsr)); 151 + kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n", 152 + esr, esr_get_class_string(esr)); 153 153 154 154 kvm_inject_undefined(vcpu); 155 155 return 1; ··· 200 200 201 201 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) 202 202 { 203 - u32 hsr = kvm_vcpu_get_hsr(vcpu); 204 - u8 hsr_ec = ESR_ELx_EC(hsr); 203 + u32 esr = kvm_vcpu_get_esr(vcpu); 204 + u8 esr_ec = ESR_ELx_EC(esr); 205 205 206 - return arm_exit_handlers[hsr_ec]; 206 + return arm_exit_handlers[esr_ec]; 207 207 } 208 208 209 209 /* ··· 242 242 struct kvm_run *run = vcpu->run; 243 243 244 244 if (ARM_SERROR_PENDING(exception_index)) { 245 - u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); 245 + u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); 246 246 247 247 /* 248 248 * HVC/SMC already have an adjusted PC, which we need 249 249 * to correct in order to return to after having 250 250 * injected the SError. 251 251 */ 252 - if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 || 253 - hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) { 252 + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || 253 + esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { 254 254 u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; 255 255 *vcpu_pc(vcpu) -= adj; 256 256 } ··· 307 307 exception_index = ARM_EXCEPTION_CODE(exception_index); 308 308 309 309 if (exception_index == ARM_EXCEPTION_EL1_SERROR) 310 - kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); 310 + kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); 311 311 }

+5

arch/arm64/kvm/hyp-init.S arch/arm64/kvm/hyp/nvhe/hyp-init.S

··· 105 105 */ 106 106 mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) 107 107 CPU_BE( orr x4, x4, #SCTLR_ELx_EE) 108 + alternative_if ARM64_HAS_ADDRESS_AUTH 109 + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ 110 + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) 111 + orr x4, x4, x5 112 + alternative_else_nop_endif 108 113 msr sctlr_el2, x4 109 114 isb 110 115

+8 -14

arch/arm64/kvm/hyp/Makefile

··· 3 3 # Makefile for Kernel-based Virtual Machine module, HYP part 4 4 # 5 5 6 - ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ 7 - $(DISABLE_STACKLEAK_PLUGIN) 6 + incdir := $(srctree)/$(src)/include 7 + subdir-asflags-y := -I$(incdir) 8 + subdir-ccflags-y := -I$(incdir) \ 9 + -fno-stack-protector \ 10 + -DDISABLE_BRANCH_PROFILING \ 11 + $(DISABLE_STACKLEAK_PLUGIN) 8 12 9 - obj-$(CONFIG_KVM) += hyp.o 10 - 11 - hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ 12 - debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o 13 - 14 - # KVM code is run at a different exception code with a different map, so 15 - # compiler instrumentation that inserts callbacks or checks into the code may 16 - # cause crashes. Just disable it. 17 - GCOV_PROFILE := n 18 - KASAN_SANITIZE := n 19 - UBSAN_SANITIZE := n 20 - KCOV_INSTRUMENT := n 13 + obj-$(CONFIG_KVM) += vhe/ nvhe/ 14 + obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o

+4 -4

arch/arm64/kvm/hyp/aarch32.c

··· 44 44 /* 45 45 * Check if a trapped instruction should have been executed or not. 46 46 */ 47 - bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) 47 + bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) 48 48 { 49 49 unsigned long cpsr; 50 50 u32 cpsr_cond; 51 51 int cond; 52 52 53 53 /* Top two bits non-zero? Unconditional. */ 54 - if (kvm_vcpu_get_hsr(vcpu) >> 30) 54 + if (kvm_vcpu_get_esr(vcpu) >> 30) 55 55 return true; 56 56 57 57 /* Is condition field valid? */ ··· 93 93 * 94 94 * IT[7:0] -> CPSR[26:25],CPSR[15:10] 95 95 */ 96 - static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) 96 + static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) 97 97 { 98 98 unsigned long itbits, cond; 99 99 unsigned long cpsr = *vcpu_cpsr(vcpu); ··· 123 123 * kvm_skip_instr - skip a trapped instruction and proceed to the next 124 124 * @vcpu: The vcpu pointer 125 125 */ 126 - void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) 126 + void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) 127 127 { 128 128 u32 pc = *vcpu_pc(vcpu); 129 129 bool is_thumb;

+16 -72

arch/arm64/kvm/hyp/debug-sr.c arch/arm64/kvm/hyp/include/hyp/debug-sr.h

··· 4 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 5 */ 6 6 7 + #ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ 8 + #define __ARM64_KVM_HYP_DEBUG_SR_H__ 9 + 7 10 #include <linux/compiler.h> 8 11 #include <linux/kvm_host.h> 9 12 ··· 88 85 default: write_debug(ptr[0], reg, 0); \ 89 86 } 90 87 91 - static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) 92 - { 93 - u64 reg; 94 - 95 - /* Clear pmscr in case of early return */ 96 - *pmscr_el1 = 0; 97 - 98 - /* SPE present on this CPU? */ 99 - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 100 - ID_AA64DFR0_PMSVER_SHIFT)) 101 - return; 102 - 103 - /* Yes; is it owned by EL3? */ 104 - reg = read_sysreg_s(SYS_PMBIDR_EL1); 105 - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) 106 - return; 107 - 108 - /* No; is the host actually using the thing? */ 109 - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); 110 - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) 111 - return; 112 - 113 - /* Yes; save the control register and disable data generation */ 114 - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); 115 - write_sysreg_s(0, SYS_PMSCR_EL1); 116 - isb(); 117 - 118 - /* Now drain all buffered data to memory */ 119 - psb_csync(); 120 - dsb(nsh); 121 - } 122 - 123 - static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) 124 - { 125 - if (!pmscr_el1) 126 - return; 127 - 128 - /* The host page table is installed, but not yet synchronised */ 129 - isb(); 130 - 131 - /* Re-enable data generation */ 132 - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); 133 - } 134 - 135 - static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, 136 - struct kvm_guest_debug_arch *dbg, 137 - struct kvm_cpu_context *ctxt) 88 + static void __debug_save_state(struct kvm_guest_debug_arch *dbg, 89 + struct kvm_cpu_context *ctxt) 138 90 { 139 91 u64 aa64dfr0; 140 92 int brps, wrps; ··· 103 145 save_debug(dbg->dbg_wcr, dbgwcr, wrps); 104 146 save_debug(dbg->dbg_wvr, dbgwvr, wrps); 105 147 106 - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); 148 + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); 107 149 } 108 150 109 - static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, 110 - struct kvm_guest_debug_arch *dbg, 111 - struct kvm_cpu_context *ctxt) 151 + static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, 152 + struct kvm_cpu_context *ctxt) 112 153 { 113 154 u64 aa64dfr0; 114 155 int brps, wrps; ··· 122 165 restore_debug(dbg->dbg_wcr, dbgwcr, wrps); 123 166 restore_debug(dbg->dbg_wvr, dbgwvr, wrps); 124 167 125 - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); 168 + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); 126 169 } 127 170 128 - void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) 171 + static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) 129 172 { 130 173 struct kvm_cpu_context *host_ctxt; 131 174 struct kvm_cpu_context *guest_ctxt; 132 175 struct kvm_guest_debug_arch *host_dbg; 133 176 struct kvm_guest_debug_arch *guest_dbg; 134 - 135 - /* 136 - * Non-VHE: Disable and flush SPE data generation 137 - * VHE: The vcpu can run, but it can't hide. 138 - */ 139 - if (!has_vhe()) 140 - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); 141 177 142 178 if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) 143 179 return; ··· 140 190 host_dbg = &vcpu->arch.host_debug_state.regs; 141 191 guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); 142 192 143 - __debug_save_state(vcpu, host_dbg, host_ctxt); 144 - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); 193 + __debug_save_state(host_dbg, host_ctxt); 194 + __debug_restore_state(guest_dbg, guest_ctxt); 145 195 } 146 196 147 - void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) 197 + static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) 148 198 { 149 199 struct kvm_cpu_context *host_ctxt; 150 200 struct kvm_cpu_context *guest_ctxt; 151 201 struct kvm_guest_debug_arch *host_dbg; 152 202 struct kvm_guest_debug_arch *guest_dbg; 153 - 154 - if (!has_vhe()) 155 - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); 156 203 157 204 if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) 158 205 return; ··· 159 212 host_dbg = &vcpu->arch.host_debug_state.regs; 160 213 guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); 161 214 162 - __debug_save_state(vcpu, guest_dbg, guest_ctxt); 163 - __debug_restore_state(vcpu, host_dbg, host_ctxt); 215 + __debug_save_state(guest_dbg, guest_ctxt); 216 + __debug_restore_state(host_dbg, host_ctxt); 164 217 165 218 vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; 166 219 } 167 220 168 - u32 __hyp_text __kvm_get_mdcr_el2(void) 169 - { 170 - return read_sysreg(mdcr_el2); 171 - } 221 + #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */

+1 -3

arch/arm64/kvm/hyp/entry.S

··· 16 16 #include <asm/kvm_mmu.h> 17 17 #include <asm/kvm_ptrauth.h> 18 18 19 - #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) 20 - #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) 19 + #define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) 21 20 #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) 22 21 23 22 .text 24 - .pushsection .hyp.text, "ax" 25 23 26 24 /* 27 25 * We treat x18 as callee-saved as the host may use it as a platform

-1

arch/arm64/kvm/hyp/fpsimd.S

··· 9 9 #include <asm/fpsimdmacros.h> 10 10 11 11 .text 12 - .pushsection .hyp.text, "ax" 13 12 14 13 SYM_FUNC_START(__fpsimd_save_state) 15 14 fpsimd_save x0, 1

+4 -17

arch/arm64/kvm/hyp/hyp-entry.S

··· 16 16 #include <asm/mmu.h> 17 17 18 18 .text 19 - .pushsection .hyp.text, "ax" 20 19 21 20 .macro do_el2_call 22 21 /* ··· 39 40 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne 40 41 b.ne el1_trap 41 42 43 + #ifdef __KVM_NVHE_HYPERVISOR__ 42 44 mrs x1, vttbr_el2 // If vttbr is valid, the guest 43 45 cbnz x1, el1_hvc_guest // called HVC 44 46 ··· 74 74 75 75 eret 76 76 sb 77 + #endif /* __KVM_NVHE_HYPERVISOR__ */ 77 78 78 79 el1_hvc_guest: 79 80 /* ··· 181 180 eret 182 181 sb 183 182 183 + #ifdef __KVM_NVHE_HYPERVISOR__ 184 184 SYM_FUNC_START(__hyp_do_panic) 185 185 mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ 186 186 PSR_MODE_EL1h) ··· 191 189 eret 192 190 sb 193 191 SYM_FUNC_END(__hyp_do_panic) 192 + #endif 194 193 195 194 SYM_CODE_START(__hyp_panic) 196 195 get_host_ctxt x0, x1 ··· 321 318 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ 322 319 .org 1b 323 320 SYM_CODE_END(__bp_harden_hyp_vecs) 324 - 325 - .popsection 326 - 327 - SYM_CODE_START(__smccc_workaround_1_smc) 328 - esb 329 - sub sp, sp, #(8 * 4) 330 - stp x2, x3, [sp, #(8 * 0)] 331 - stp x0, x1, [sp, #(8 * 2)] 332 - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 333 - smc #0 334 - ldp x2, x3, [sp, #(8 * 0)] 335 - ldp x0, x1, [sp, #(8 * 2)] 336 - add sp, sp, #(8 * 4) 337 - 1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ 338 - .org 1b 339 - SYM_CODE_END(__smccc_workaround_1_smc) 340 321 #endif

+511

arch/arm64/kvm/hyp/include/hyp/switch.h

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 + #define __ARM64_KVM_HYP_SWITCH_H__ 9 + 10 + #include <linux/arm-smccc.h> 11 + #include <linux/kvm_host.h> 12 + #include <linux/types.h> 13 + #include <linux/jump_label.h> 14 + #include <uapi/linux/psci.h> 15 + 16 + #include <kvm/arm_psci.h> 17 + 18 + #include <asm/barrier.h> 19 + #include <asm/cpufeature.h> 20 + #include <asm/kprobes.h> 21 + #include <asm/kvm_asm.h> 22 + #include <asm/kvm_emulate.h> 23 + #include <asm/kvm_hyp.h> 24 + #include <asm/kvm_mmu.h> 25 + #include <asm/fpsimd.h> 26 + #include <asm/debug-monitors.h> 27 + #include <asm/processor.h> 28 + #include <asm/thread_info.h> 29 + 30 + extern const char __hyp_panic_string[]; 31 + 32 + /* Check whether the FP regs were dirtied while in the host-side run loop: */ 33 + static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) 34 + { 35 + /* 36 + * When the system doesn't support FP/SIMD, we cannot rely on 37 + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 38 + * abort on the very first access to FP and thus we should never 39 + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 40 + * trap the accesses. 41 + */ 42 + if (!system_supports_fpsimd() || 43 + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 44 + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 45 + KVM_ARM64_FP_HOST); 46 + 47 + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 48 + } 49 + 50 + /* Save the 32-bit only FPSIMD system register state */ 51 + static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 52 + { 53 + if (!vcpu_el1_is_32bit(vcpu)) 54 + return; 55 + 56 + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 57 + } 58 + 59 + static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 60 + { 61 + /* 62 + * We are about to set CPTR_EL2.TFP to trap all floating point 63 + * register accesses to EL2, however, the ARM ARM clearly states that 64 + * traps are only taken to EL2 if the operation would not otherwise 65 + * trap to EL1. Therefore, always make sure that for 32-bit guests, 66 + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 67 + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 68 + * it will cause an exception. 69 + */ 70 + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 71 + write_sysreg(1 << 30, fpexc32_el2); 72 + isb(); 73 + } 74 + } 75 + 76 + static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 77 + { 78 + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 79 + write_sysreg(1 << 15, hstr_el2); 80 + 81 + /* 82 + * Make sure we trap PMU access from EL0 to EL2. Also sanitize 83 + * PMSELR_EL0 to make sure it never contains the cycle 84 + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 85 + * EL1 instead of being trapped to EL2. 86 + */ 87 + write_sysreg(0, pmselr_el0); 88 + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 89 + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 90 + } 91 + 92 + static inline void __deactivate_traps_common(void) 93 + { 94 + write_sysreg(0, hstr_el2); 95 + write_sysreg(0, pmuserenr_el0); 96 + } 97 + 98 + static inline void ___activate_traps(struct kvm_vcpu *vcpu) 99 + { 100 + u64 hcr = vcpu->arch.hcr_el2; 101 + 102 + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 103 + hcr |= HCR_TVM; 104 + 105 + write_sysreg(hcr, hcr_el2); 106 + 107 + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 108 + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 109 + } 110 + 111 + static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 112 + { 113 + /* 114 + * If we pended a virtual abort, preserve it until it gets 115 + * cleared. See D1.14.3 (Virtual Interrupts) for details, but 116 + * the crucial bit is "On taking a vSError interrupt, 117 + * HCR_EL2.VSE is cleared to 0." 118 + */ 119 + if (vcpu->arch.hcr_el2 & HCR_VSE) { 120 + vcpu->arch.hcr_el2 &= ~HCR_VSE; 121 + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 122 + } 123 + } 124 + 125 + static inline void __activate_vm(struct kvm_s2_mmu *mmu) 126 + { 127 + __load_guest_stage2(mmu); 128 + } 129 + 130 + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 131 + { 132 + u64 par, tmp; 133 + 134 + /* 135 + * Resolve the IPA the hard way using the guest VA. 136 + * 137 + * Stage-1 translation already validated the memory access 138 + * rights. As such, we can use the EL1 translation regime, and 139 + * don't have to distinguish between EL0 and EL1 access. 140 + * 141 + * We do need to save/restore PAR_EL1 though, as we haven't 142 + * saved the guest context yet, and we may return early... 143 + */ 144 + par = read_sysreg(par_el1); 145 + asm volatile("at s1e1r, %0" : : "r" (far)); 146 + isb(); 147 + 148 + tmp = read_sysreg(par_el1); 149 + write_sysreg(par, par_el1); 150 + 151 + if (unlikely(tmp & SYS_PAR_EL1_F)) 152 + return false; /* Translation failed, back to guest */ 153 + 154 + /* Convert PAR to HPFAR format */ 155 + *hpfar = PAR_TO_HPFAR(tmp); 156 + return true; 157 + } 158 + 159 + static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 160 + { 161 + u8 ec; 162 + u64 esr; 163 + u64 hpfar, far; 164 + 165 + esr = vcpu->arch.fault.esr_el2; 166 + ec = ESR_ELx_EC(esr); 167 + 168 + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 169 + return true; 170 + 171 + far = read_sysreg_el2(SYS_FAR); 172 + 173 + /* 174 + * The HPFAR can be invalid if the stage 2 fault did not 175 + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 176 + * bit is clear) and one of the two following cases are true: 177 + * 1. The fault was due to a permission fault 178 + * 2. The processor carries errata 834220 179 + * 180 + * Therefore, for all non S1PTW faults where we either have a 181 + * permission fault or the errata workaround is enabled, we 182 + * resolve the IPA using the AT instruction. 183 + */ 184 + if (!(esr & ESR_ELx_S1PTW) && 185 + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 186 + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 187 + if (!__translate_far_to_hpfar(far, &hpfar)) 188 + return false; 189 + } else { 190 + hpfar = read_sysreg(hpfar_el2); 191 + } 192 + 193 + vcpu->arch.fault.far_el2 = far; 194 + vcpu->arch.fault.hpfar_el2 = hpfar; 195 + return true; 196 + } 197 + 198 + /* Check for an FPSIMD/SVE trap and handle as appropriate */ 199 + static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 200 + { 201 + bool vhe, sve_guest, sve_host; 202 + u8 esr_ec; 203 + 204 + if (!system_supports_fpsimd()) 205 + return false; 206 + 207 + /* 208 + * Currently system_supports_sve() currently implies has_vhe(), 209 + * so the check is redundant. However, has_vhe() can be determined 210 + * statically and helps the compiler remove dead code. 211 + */ 212 + if (has_vhe() && system_supports_sve()) { 213 + sve_guest = vcpu_has_sve(vcpu); 214 + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 215 + vhe = true; 216 + } else { 217 + sve_guest = false; 218 + sve_host = false; 219 + vhe = has_vhe(); 220 + } 221 + 222 + esr_ec = kvm_vcpu_trap_get_class(vcpu); 223 + if (esr_ec != ESR_ELx_EC_FP_ASIMD && 224 + esr_ec != ESR_ELx_EC_SVE) 225 + return false; 226 + 227 + /* Don't handle SVE traps for non-SVE vcpus here: */ 228 + if (!sve_guest) 229 + if (esr_ec != ESR_ELx_EC_FP_ASIMD) 230 + return false; 231 + 232 + /* Valid trap. Switch the context: */ 233 + 234 + if (vhe) { 235 + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 236 + 237 + if (sve_guest) 238 + reg |= CPACR_EL1_ZEN; 239 + 240 + write_sysreg(reg, cpacr_el1); 241 + } else { 242 + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 243 + cptr_el2); 244 + } 245 + 246 + isb(); 247 + 248 + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 249 + /* 250 + * In the SVE case, VHE is assumed: it is enforced by 251 + * Kconfig and kvm_arch_init(). 252 + */ 253 + if (sve_host) { 254 + struct thread_struct *thread = container_of( 255 + vcpu->arch.host_fpsimd_state, 256 + struct thread_struct, uw.fpsimd_state); 257 + 258 + sve_save_state(sve_pffr(thread), 259 + &vcpu->arch.host_fpsimd_state->fpsr); 260 + } else { 261 + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 262 + } 263 + 264 + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 265 + } 266 + 267 + if (sve_guest) { 268 + sve_load_state(vcpu_sve_pffr(vcpu), 269 + &vcpu->arch.ctxt.fp_regs.fpsr, 270 + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 271 + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); 272 + } else { 273 + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 274 + } 275 + 276 + /* Skip restoring fpexc32 for AArch64 guests */ 277 + if (!(read_sysreg(hcr_el2) & HCR_RW)) 278 + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 279 + 280 + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 281 + 282 + return true; 283 + } 284 + 285 + static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 286 + { 287 + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 288 + int rt = kvm_vcpu_sys_get_rt(vcpu); 289 + u64 val = vcpu_get_reg(vcpu, rt); 290 + 291 + /* 292 + * The normal sysreg handling code expects to see the traps, 293 + * let's not do anything here. 294 + */ 295 + if (vcpu->arch.hcr_el2 & HCR_TVM) 296 + return false; 297 + 298 + switch (sysreg) { 299 + case SYS_SCTLR_EL1: 300 + write_sysreg_el1(val, SYS_SCTLR); 301 + break; 302 + case SYS_TTBR0_EL1: 303 + write_sysreg_el1(val, SYS_TTBR0); 304 + break; 305 + case SYS_TTBR1_EL1: 306 + write_sysreg_el1(val, SYS_TTBR1); 307 + break; 308 + case SYS_TCR_EL1: 309 + write_sysreg_el1(val, SYS_TCR); 310 + break; 311 + case SYS_ESR_EL1: 312 + write_sysreg_el1(val, SYS_ESR); 313 + break; 314 + case SYS_FAR_EL1: 315 + write_sysreg_el1(val, SYS_FAR); 316 + break; 317 + case SYS_AFSR0_EL1: 318 + write_sysreg_el1(val, SYS_AFSR0); 319 + break; 320 + case SYS_AFSR1_EL1: 321 + write_sysreg_el1(val, SYS_AFSR1); 322 + break; 323 + case SYS_MAIR_EL1: 324 + write_sysreg_el1(val, SYS_MAIR); 325 + break; 326 + case SYS_AMAIR_EL1: 327 + write_sysreg_el1(val, SYS_AMAIR); 328 + break; 329 + case SYS_CONTEXTIDR_EL1: 330 + write_sysreg_el1(val, SYS_CONTEXTIDR); 331 + break; 332 + default: 333 + return false; 334 + } 335 + 336 + __kvm_skip_instr(vcpu); 337 + return true; 338 + } 339 + 340 + static inline bool esr_is_ptrauth_trap(u32 esr) 341 + { 342 + u32 ec = ESR_ELx_EC(esr); 343 + 344 + if (ec == ESR_ELx_EC_PAC) 345 + return true; 346 + 347 + if (ec != ESR_ELx_EC_SYS64) 348 + return false; 349 + 350 + switch (esr_sys64_to_sysreg(esr)) { 351 + case SYS_APIAKEYLO_EL1: 352 + case SYS_APIAKEYHI_EL1: 353 + case SYS_APIBKEYLO_EL1: 354 + case SYS_APIBKEYHI_EL1: 355 + case SYS_APDAKEYLO_EL1: 356 + case SYS_APDAKEYHI_EL1: 357 + case SYS_APDBKEYLO_EL1: 358 + case SYS_APDBKEYHI_EL1: 359 + case SYS_APGAKEYLO_EL1: 360 + case SYS_APGAKEYHI_EL1: 361 + return true; 362 + } 363 + 364 + return false; 365 + } 366 + 367 + #define __ptrauth_save_key(ctxt, key) \ 368 + do { \ 369 + u64 __val; \ 370 + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 371 + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 372 + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 373 + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 374 + } while(0) 375 + 376 + static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 377 + { 378 + struct kvm_cpu_context *ctxt; 379 + u64 val; 380 + 381 + if (!vcpu_has_ptrauth(vcpu) || 382 + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 383 + return false; 384 + 385 + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 386 + __ptrauth_save_key(ctxt, APIA); 387 + __ptrauth_save_key(ctxt, APIB); 388 + __ptrauth_save_key(ctxt, APDA); 389 + __ptrauth_save_key(ctxt, APDB); 390 + __ptrauth_save_key(ctxt, APGA); 391 + 392 + vcpu_ptrauth_enable(vcpu); 393 + 394 + val = read_sysreg(hcr_el2); 395 + val |= (HCR_API | HCR_APK); 396 + write_sysreg(val, hcr_el2); 397 + 398 + return true; 399 + } 400 + 401 + /* 402 + * Return true when we were able to fixup the guest exit and should return to 403 + * the guest, false when we should restore the host state and return to the 404 + * main run loop. 405 + */ 406 + static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 407 + { 408 + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 409 + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 410 + 411 + /* 412 + * We're using the raw exception code in order to only process 413 + * the trap if no SError is pending. We will come back to the 414 + * same PC once the SError has been injected, and replay the 415 + * trapping instruction. 416 + */ 417 + if (*exit_code != ARM_EXCEPTION_TRAP) 418 + goto exit; 419 + 420 + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 421 + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 422 + handle_tx2_tvm(vcpu)) 423 + return true; 424 + 425 + /* 426 + * We trap the first access to the FP/SIMD to save the host context 427 + * and restore the guest context lazily. 428 + * If FP/SIMD is not implemented, handle the trap and inject an 429 + * undefined instruction exception to the guest. 430 + * Similarly for trapped SVE accesses. 431 + */ 432 + if (__hyp_handle_fpsimd(vcpu)) 433 + return true; 434 + 435 + if (__hyp_handle_ptrauth(vcpu)) 436 + return true; 437 + 438 + if (!__populate_fault_info(vcpu)) 439 + return true; 440 + 441 + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 442 + bool valid; 443 + 444 + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 445 + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 446 + kvm_vcpu_dabt_isvalid(vcpu) && 447 + !kvm_vcpu_abt_issea(vcpu) && 448 + !kvm_vcpu_dabt_iss1tw(vcpu); 449 + 450 + if (valid) { 451 + int ret = __vgic_v2_perform_cpuif_access(vcpu); 452 + 453 + if (ret == 1) 454 + return true; 455 + 456 + /* Promote an illegal access to an SError.*/ 457 + if (ret == -1) 458 + *exit_code = ARM_EXCEPTION_EL1_SERROR; 459 + 460 + goto exit; 461 + } 462 + } 463 + 464 + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 465 + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 466 + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 467 + int ret = __vgic_v3_perform_cpuif_access(vcpu); 468 + 469 + if (ret == 1) 470 + return true; 471 + } 472 + 473 + exit: 474 + /* Return to the host kernel and handle the exit */ 475 + return false; 476 + } 477 + 478 + static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) 479 + { 480 + if (!cpus_have_final_cap(ARM64_SSBD)) 481 + return false; 482 + 483 + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); 484 + } 485 + 486 + static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) 487 + { 488 + #ifdef CONFIG_ARM64_SSBD 489 + /* 490 + * The host runs with the workaround always present. If the 491 + * guest wants it disabled, so be it... 492 + */ 493 + if (__needs_ssbd_off(vcpu) && 494 + __hyp_this_cpu_read(arm64_ssbd_callback_required)) 495 + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); 496 + #endif 497 + } 498 + 499 + static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) 500 + { 501 + #ifdef CONFIG_ARM64_SSBD 502 + /* 503 + * If the guest has disabled the workaround, bring it back on. 504 + */ 505 + if (__needs_ssbd_off(vcpu) && 506 + __hyp_this_cpu_read(arm64_ssbd_callback_required)) 507 + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); 508 + #endif 509 + } 510 + 511 + #endif /* __ARM64_KVM_HYP_SWITCH_H__ */

+193

arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2012-2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ 8 + #define __ARM64_KVM_HYP_SYSREG_SR_H__ 9 + 10 + #include <linux/compiler.h> 11 + #include <linux/kvm_host.h> 12 + 13 + #include <asm/kprobes.h> 14 + #include <asm/kvm_asm.h> 15 + #include <asm/kvm_emulate.h> 16 + #include <asm/kvm_hyp.h> 17 + 18 + static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) 19 + { 20 + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); 21 + } 22 + 23 + static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) 24 + { 25 + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); 26 + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); 27 + } 28 + 29 + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) 30 + { 31 + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); 32 + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); 33 + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); 34 + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); 35 + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); 36 + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); 37 + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); 38 + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); 39 + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); 40 + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); 41 + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); 42 + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); 43 + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); 44 + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); 45 + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); 46 + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); 47 + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); 48 + 49 + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); 50 + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); 51 + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); 52 + } 53 + 54 + static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) 55 + { 56 + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); 57 + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); 58 + 59 + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) 60 + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); 61 + } 62 + 63 + static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) 64 + { 65 + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); 66 + } 67 + 68 + static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) 69 + { 70 + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); 71 + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); 72 + } 73 + 74 + static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) 75 + { 76 + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); 77 + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); 78 + 79 + if (has_vhe() || 80 + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 81 + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); 82 + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); 83 + } else if (!ctxt->__hyp_running_vcpu) { 84 + /* 85 + * Must only be done for guest registers, hence the context 86 + * test. We're coming from the host, so SCTLR.M is already 87 + * set. Pairs with nVHE's __activate_traps(). 88 + */ 89 + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | 90 + TCR_EPD1_MASK | TCR_EPD0_MASK), 91 + SYS_TCR); 92 + isb(); 93 + } 94 + 95 + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); 96 + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); 97 + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); 98 + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); 99 + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); 100 + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); 101 + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); 102 + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); 103 + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); 104 + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); 105 + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); 106 + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); 107 + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); 108 + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); 109 + 110 + if (!has_vhe() && 111 + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && 112 + ctxt->__hyp_running_vcpu) { 113 + /* 114 + * Must only be done for host registers, hence the context 115 + * test. Pairs with nVHE's __deactivate_traps(). 116 + */ 117 + isb(); 118 + /* 119 + * At this stage, and thanks to the above isb(), S2 is 120 + * deconfigured and disabled. We can now restore the host's 121 + * S1 configuration: SCTLR, and only then TCR. 122 + */ 123 + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); 124 + isb(); 125 + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); 126 + } 127 + 128 + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); 129 + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); 130 + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); 131 + } 132 + 133 + static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) 134 + { 135 + u64 pstate = ctxt->regs.pstate; 136 + u64 mode = pstate & PSR_AA32_MODE_MASK; 137 + 138 + /* 139 + * Safety check to ensure we're setting the CPU up to enter the guest 140 + * in a less privileged mode. 141 + * 142 + * If we are attempting a return to EL2 or higher in AArch64 state, 143 + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that 144 + * we'll take an illegal exception state exception immediately after 145 + * the ERET to the guest. Attempts to return to AArch32 Hyp will 146 + * result in an illegal exception return because EL2's execution state 147 + * is determined by SCR_EL3.RW. 148 + */ 149 + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) 150 + pstate = PSR_MODE_EL2h | PSR_IL_BIT; 151 + 152 + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); 153 + write_sysreg_el2(pstate, SYS_SPSR); 154 + 155 + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) 156 + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); 157 + } 158 + 159 + static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) 160 + { 161 + if (!vcpu_el1_is_32bit(vcpu)) 162 + return; 163 + 164 + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); 165 + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); 166 + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); 167 + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); 168 + 169 + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); 170 + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); 171 + 172 + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) 173 + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); 174 + } 175 + 176 + static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) 177 + { 178 + if (!vcpu_el1_is_32bit(vcpu)) 179 + return; 180 + 181 + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); 182 + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); 183 + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); 184 + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); 185 + 186 + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); 187 + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); 188 + 189 + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) 190 + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); 191 + } 192 + 193 + #endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */

+62

arch/arm64/kvm/hyp/nvhe/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # 3 + # Makefile for Kernel-based Virtual Machine module, HYP/nVHE part 4 + # 5 + 6 + asflags-y := -D__KVM_NVHE_HYPERVISOR__ 7 + ccflags-y := -D__KVM_NVHE_HYPERVISOR__ 8 + 9 + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o 10 + obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ 11 + ../fpsimd.o ../hyp-entry.o 12 + 13 + obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) 14 + extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) 15 + 16 + $(obj)/%.hyp.tmp.o: $(src)/%.c FORCE 17 + $(call if_changed_rule,cc_o_c) 18 + $(obj)/%.hyp.tmp.o: $(src)/%.S FORCE 19 + $(call if_changed_rule,as_o_S) 20 + $(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE 21 + $(call if_changed,hypcopy) 22 + 23 + # Disable reordering functions by GCC (enabled at -O2). 24 + # This pass puts functions into '.text.*' sections to aid the linker 25 + # in optimizing ELF layout. See HYPCOPY comment below for more info. 26 + ccflags-y += $(call cc-option,-fno-reorder-functions) 27 + 28 + # The HYPCOPY command uses `objcopy` to prefix all ELF symbol names 29 + # and relevant ELF section names to avoid clashes with VHE code/data. 30 + # 31 + # Hyp code is assumed to be in the '.text' section of the input object 32 + # files (with the exception of specialized sections such as 33 + # '.hyp.idmap.text'). This assumption may be broken by a compiler that 34 + # divides code into sections like '.text.unlikely' so as to optimize 35 + # ELF layout. HYPCOPY checks that no such sections exist in the input 36 + # using `objdump`, otherwise they would be linked together with other 37 + # kernel code and not memory-mapped correctly at runtime. 38 + quiet_cmd_hypcopy = HYPCOPY $@ 39 + cmd_hypcopy = \ 40 + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ 41 + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ 42 + /bin/false; \ 43 + fi; \ 44 + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ 45 + --rename-section=.text=.hyp.text \ 46 + $< $@ 47 + 48 + # Remove ftrace and Shadow Call Stack CFLAGS. 49 + # This is equivalent to the 'notrace' and '__noscs' annotations. 50 + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) 51 + 52 + # KVM nVHE code is run at a different exception code with a different map, so 53 + # compiler instrumentation that inserts callbacks or checks into the code may 54 + # cause crashes. Just disable it. 55 + GCOV_PROFILE := n 56 + KASAN_SANITIZE := n 57 + UBSAN_SANITIZE := n 58 + KCOV_INSTRUMENT := n 59 + 60 + # Skip objtool checking for this directory because nVHE code is compiled with 61 + # non-standard build rules. 62 + OBJECT_FILES_NON_STANDARD := y

+77

arch/arm64/kvm/hyp/nvhe/debug-sr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/debug-sr.h> 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/kvm_host.h> 11 + 12 + #include <asm/debug-monitors.h> 13 + #include <asm/kvm_asm.h> 14 + #include <asm/kvm_hyp.h> 15 + #include <asm/kvm_mmu.h> 16 + 17 + static void __debug_save_spe(u64 *pmscr_el1) 18 + { 19 + u64 reg; 20 + 21 + /* Clear pmscr in case of early return */ 22 + *pmscr_el1 = 0; 23 + 24 + /* SPE present on this CPU? */ 25 + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), 26 + ID_AA64DFR0_PMSVER_SHIFT)) 27 + return; 28 + 29 + /* Yes; is it owned by EL3? */ 30 + reg = read_sysreg_s(SYS_PMBIDR_EL1); 31 + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) 32 + return; 33 + 34 + /* No; is the host actually using the thing? */ 35 + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); 36 + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) 37 + return; 38 + 39 + /* Yes; save the control register and disable data generation */ 40 + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); 41 + write_sysreg_s(0, SYS_PMSCR_EL1); 42 + isb(); 43 + 44 + /* Now drain all buffered data to memory */ 45 + psb_csync(); 46 + dsb(nsh); 47 + } 48 + 49 + static void __debug_restore_spe(u64 pmscr_el1) 50 + { 51 + if (!pmscr_el1) 52 + return; 53 + 54 + /* The host page table is installed, but not yet synchronised */ 55 + isb(); 56 + 57 + /* Re-enable data generation */ 58 + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); 59 + } 60 + 61 + void __debug_switch_to_guest(struct kvm_vcpu *vcpu) 62 + { 63 + /* Disable and flush SPE data generation */ 64 + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); 65 + __debug_switch_to_guest_common(vcpu); 66 + } 67 + 68 + void __debug_switch_to_host(struct kvm_vcpu *vcpu) 69 + { 70 + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); 71 + __debug_switch_to_host_common(vcpu); 72 + } 73 + 74 + u32 __kvm_get_mdcr_el2(void) 75 + { 76 + return read_sysreg(mdcr_el2); 77 + }

+272

arch/arm64/kvm/hyp/nvhe/switch.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/switch.h> 8 + #include <hyp/sysreg-sr.h> 9 + 10 + #include <linux/arm-smccc.h> 11 + #include <linux/kvm_host.h> 12 + #include <linux/types.h> 13 + #include <linux/jump_label.h> 14 + #include <uapi/linux/psci.h> 15 + 16 + #include <kvm/arm_psci.h> 17 + 18 + #include <asm/barrier.h> 19 + #include <asm/cpufeature.h> 20 + #include <asm/kprobes.h> 21 + #include <asm/kvm_asm.h> 22 + #include <asm/kvm_emulate.h> 23 + #include <asm/kvm_hyp.h> 24 + #include <asm/kvm_mmu.h> 25 + #include <asm/fpsimd.h> 26 + #include <asm/debug-monitors.h> 27 + #include <asm/processor.h> 28 + #include <asm/thread_info.h> 29 + 30 + static void __activate_traps(struct kvm_vcpu *vcpu) 31 + { 32 + u64 val; 33 + 34 + ___activate_traps(vcpu); 35 + __activate_traps_common(vcpu); 36 + 37 + val = CPTR_EL2_DEFAULT; 38 + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; 39 + if (!update_fp_enabled(vcpu)) { 40 + val |= CPTR_EL2_TFP; 41 + __activate_traps_fpsimd32(vcpu); 42 + } 43 + 44 + write_sysreg(val, cptr_el2); 45 + 46 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 47 + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; 48 + 49 + isb(); 50 + /* 51 + * At this stage, and thanks to the above isb(), S2 is 52 + * configured and enabled. We can now restore the guest's S1 53 + * configuration: SCTLR, and only then TCR. 54 + */ 55 + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); 56 + isb(); 57 + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); 58 + } 59 + } 60 + 61 + static void __deactivate_traps(struct kvm_vcpu *vcpu) 62 + { 63 + u64 mdcr_el2; 64 + 65 + ___deactivate_traps(vcpu); 66 + 67 + mdcr_el2 = read_sysreg(mdcr_el2); 68 + 69 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 70 + u64 val; 71 + 72 + /* 73 + * Set the TCR and SCTLR registers in the exact opposite 74 + * sequence as __activate_traps (first prevent walks, 75 + * then force the MMU on). A generous sprinkling of isb() 76 + * ensure that things happen in this exact order. 77 + */ 78 + val = read_sysreg_el1(SYS_TCR); 79 + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); 80 + isb(); 81 + val = read_sysreg_el1(SYS_SCTLR); 82 + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); 83 + isb(); 84 + } 85 + 86 + __deactivate_traps_common(); 87 + 88 + mdcr_el2 &= MDCR_EL2_HPMN_MASK; 89 + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; 90 + 91 + write_sysreg(mdcr_el2, mdcr_el2); 92 + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); 93 + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); 94 + } 95 + 96 + static void __deactivate_vm(struct kvm_vcpu *vcpu) 97 + { 98 + write_sysreg(0, vttbr_el2); 99 + } 100 + 101 + /* Save VGICv3 state on non-VHE systems */ 102 + static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) 103 + { 104 + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 105 + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); 106 + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 107 + } 108 + } 109 + 110 + /* Restore VGICv3 state on non_VEH systems */ 111 + static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) 112 + { 113 + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 114 + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 115 + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); 116 + } 117 + } 118 + 119 + /** 120 + * Disable host events, enable guest events 121 + */ 122 + static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) 123 + { 124 + struct kvm_host_data *host; 125 + struct kvm_pmu_events *pmu; 126 + 127 + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); 128 + pmu = &host->pmu_events; 129 + 130 + if (pmu->events_host) 131 + write_sysreg(pmu->events_host, pmcntenclr_el0); 132 + 133 + if (pmu->events_guest) 134 + write_sysreg(pmu->events_guest, pmcntenset_el0); 135 + 136 + return (pmu->events_host || pmu->events_guest); 137 + } 138 + 139 + /** 140 + * Disable guest events, enable host events 141 + */ 142 + static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) 143 + { 144 + struct kvm_host_data *host; 145 + struct kvm_pmu_events *pmu; 146 + 147 + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); 148 + pmu = &host->pmu_events; 149 + 150 + if (pmu->events_guest) 151 + write_sysreg(pmu->events_guest, pmcntenclr_el0); 152 + 153 + if (pmu->events_host) 154 + write_sysreg(pmu->events_host, pmcntenset_el0); 155 + } 156 + 157 + /* Switch to the guest for legacy non-VHE systems */ 158 + int __kvm_vcpu_run(struct kvm_vcpu *vcpu) 159 + { 160 + struct kvm_cpu_context *host_ctxt; 161 + struct kvm_cpu_context *guest_ctxt; 162 + bool pmu_switch_needed; 163 + u64 exit_code; 164 + 165 + /* 166 + * Having IRQs masked via PMR when entering the guest means the GIC 167 + * will not signal the CPU of interrupts of lower priority, and the 168 + * only way to get out will be via guest exceptions. 169 + * Naturally, we want to avoid this. 170 + */ 171 + if (system_uses_irq_prio_masking()) { 172 + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); 173 + pmr_sync(); 174 + } 175 + 176 + vcpu = kern_hyp_va(vcpu); 177 + 178 + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 179 + host_ctxt->__hyp_running_vcpu = vcpu; 180 + guest_ctxt = &vcpu->arch.ctxt; 181 + 182 + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); 183 + 184 + __sysreg_save_state_nvhe(host_ctxt); 185 + 186 + /* 187 + * We must restore the 32-bit state before the sysregs, thanks 188 + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). 189 + * 190 + * Also, and in order to be able to deal with erratum #1319537 (A57) 191 + * and #1319367 (A72), we must ensure that all VM-related sysreg are 192 + * restored before we enable S2 translation. 193 + */ 194 + __sysreg32_restore_state(vcpu); 195 + __sysreg_restore_state_nvhe(guest_ctxt); 196 + 197 + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); 198 + __activate_traps(vcpu); 199 + 200 + __hyp_vgic_restore_state(vcpu); 201 + __timer_enable_traps(vcpu); 202 + 203 + __debug_switch_to_guest(vcpu); 204 + 205 + __set_guest_arch_workaround_state(vcpu); 206 + 207 + do { 208 + /* Jump in the fire! */ 209 + exit_code = __guest_enter(vcpu, host_ctxt); 210 + 211 + /* And we're baaack! */ 212 + } while (fixup_guest_exit(vcpu, &exit_code)); 213 + 214 + __set_host_arch_workaround_state(vcpu); 215 + 216 + __sysreg_save_state_nvhe(guest_ctxt); 217 + __sysreg32_save_state(vcpu); 218 + __timer_disable_traps(vcpu); 219 + __hyp_vgic_save_state(vcpu); 220 + 221 + __deactivate_traps(vcpu); 222 + __deactivate_vm(vcpu); 223 + 224 + __sysreg_restore_state_nvhe(host_ctxt); 225 + 226 + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) 227 + __fpsimd_save_fpexc32(vcpu); 228 + 229 + /* 230 + * This must come after restoring the host sysregs, since a non-VHE 231 + * system may enable SPE here and make use of the TTBRs. 232 + */ 233 + __debug_switch_to_host(vcpu); 234 + 235 + if (pmu_switch_needed) 236 + __pmu_switch_to_host(host_ctxt); 237 + 238 + /* Returning to host will clear PSR.I, remask PMR if needed */ 239 + if (system_uses_irq_prio_masking()) 240 + gic_write_pmr(GIC_PRIO_IRQOFF); 241 + 242 + return exit_code; 243 + } 244 + 245 + void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) 246 + { 247 + u64 spsr = read_sysreg_el2(SYS_SPSR); 248 + u64 elr = read_sysreg_el2(SYS_ELR); 249 + u64 par = read_sysreg(par_el1); 250 + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; 251 + unsigned long str_va; 252 + 253 + if (read_sysreg(vttbr_el2)) { 254 + __timer_disable_traps(vcpu); 255 + __deactivate_traps(vcpu); 256 + __deactivate_vm(vcpu); 257 + __sysreg_restore_state_nvhe(host_ctxt); 258 + } 259 + 260 + /* 261 + * Force the panic string to be loaded from the literal pool, 262 + * making sure it is a kernel address and not a PC-relative 263 + * reference. 264 + */ 265 + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); 266 + 267 + __hyp_do_panic(str_va, 268 + spsr, elr, 269 + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), 270 + read_sysreg(hpfar_el2), par, vcpu); 271 + unreachable(); 272 + }

+46

arch/arm64/kvm/hyp/nvhe/sysreg-sr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2012-2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/sysreg-sr.h> 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/kvm_host.h> 11 + 12 + #include <asm/kprobes.h> 13 + #include <asm/kvm_asm.h> 14 + #include <asm/kvm_emulate.h> 15 + #include <asm/kvm_hyp.h> 16 + 17 + /* 18 + * Non-VHE: Both host and guest must save everything. 19 + */ 20 + 21 + void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) 22 + { 23 + __sysreg_save_el1_state(ctxt); 24 + __sysreg_save_common_state(ctxt); 25 + __sysreg_save_user_state(ctxt); 26 + __sysreg_save_el2_return_state(ctxt); 27 + } 28 + 29 + void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) 30 + { 31 + __sysreg_restore_el1_state(ctxt); 32 + __sysreg_restore_common_state(ctxt); 33 + __sysreg_restore_user_state(ctxt); 34 + __sysreg_restore_el2_return_state(ctxt); 35 + } 36 + 37 + void __kvm_enable_ssbs(void) 38 + { 39 + u64 tmp; 40 + 41 + asm volatile( 42 + "mrs %0, sctlr_el2\n" 43 + "orr %0, %0, %1\n" 44 + "msr sctlr_el2, %0" 45 + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); 46 + }

+154

arch/arm64/kvm/hyp/nvhe/tlb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <asm/kvm_hyp.h> 8 + #include <asm/kvm_mmu.h> 9 + #include <asm/tlbflush.h> 10 + 11 + struct tlb_inv_context { 12 + u64 tcr; 13 + }; 14 + 15 + static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, 16 + struct tlb_inv_context *cxt) 17 + { 18 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 19 + u64 val; 20 + 21 + /* 22 + * For CPUs that are affected by ARM 1319367, we need to 23 + * avoid a host Stage-1 walk while we have the guest's 24 + * VMID set in the VTTBR in order to invalidate TLBs. 25 + * We're guaranteed that the S1 MMU is enabled, so we can 26 + * simply set the EPD bits to avoid any further TLB fill. 27 + */ 28 + val = cxt->tcr = read_sysreg_el1(SYS_TCR); 29 + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; 30 + write_sysreg_el1(val, SYS_TCR); 31 + isb(); 32 + } 33 + 34 + __load_guest_stage2(mmu); 35 + } 36 + 37 + static void __tlb_switch_to_host(struct tlb_inv_context *cxt) 38 + { 39 + write_sysreg(0, vttbr_el2); 40 + 41 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 42 + /* Ensure write of the host VMID */ 43 + isb(); 44 + /* Restore the host's TCR_EL1 */ 45 + write_sysreg_el1(cxt->tcr, SYS_TCR); 46 + } 47 + } 48 + 49 + void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, 50 + phys_addr_t ipa, int level) 51 + { 52 + struct tlb_inv_context cxt; 53 + 54 + dsb(ishst); 55 + 56 + /* Switch to requested VMID */ 57 + mmu = kern_hyp_va(mmu); 58 + __tlb_switch_to_guest(mmu, &cxt); 59 + 60 + /* 61 + * We could do so much better if we had the VA as well. 62 + * Instead, we invalidate Stage-2 for this IPA, and the 63 + * whole of Stage-1. Weep... 64 + */ 65 + ipa >>= 12; 66 + __tlbi_level(ipas2e1is, ipa, level); 67 + 68 + /* 69 + * We have to ensure completion of the invalidation at Stage-2, 70 + * since a table walk on another CPU could refill a TLB with a 71 + * complete (S1 + S2) walk based on the old Stage-2 mapping if 72 + * the Stage-1 invalidation happened first. 73 + */ 74 + dsb(ish); 75 + __tlbi(vmalle1is); 76 + dsb(ish); 77 + isb(); 78 + 79 + /* 80 + * If the host is running at EL1 and we have a VPIPT I-cache, 81 + * then we must perform I-cache maintenance at EL2 in order for 82 + * it to have an effect on the guest. Since the guest cannot hit 83 + * I-cache lines allocated with a different VMID, we don't need 84 + * to worry about junk out of guest reset (we nuke the I-cache on 85 + * VMID rollover), but we do need to be careful when remapping 86 + * executable pages for the same guest. This can happen when KSM 87 + * takes a CoW fault on an executable page, copies the page into 88 + * a page that was previously mapped in the guest and then needs 89 + * to invalidate the guest view of the I-cache for that page 90 + * from EL1. To solve this, we invalidate the entire I-cache when 91 + * unmapping a page from a guest if we have a VPIPT I-cache but 92 + * the host is running at EL1. As above, we could do better if 93 + * we had the VA. 94 + * 95 + * The moral of this story is: if you have a VPIPT I-cache, then 96 + * you should be running with VHE enabled. 97 + */ 98 + if (icache_is_vpipt()) 99 + __flush_icache_all(); 100 + 101 + __tlb_switch_to_host(&cxt); 102 + } 103 + 104 + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) 105 + { 106 + struct tlb_inv_context cxt; 107 + 108 + dsb(ishst); 109 + 110 + /* Switch to requested VMID */ 111 + mmu = kern_hyp_va(mmu); 112 + __tlb_switch_to_guest(mmu, &cxt); 113 + 114 + __tlbi(vmalls12e1is); 115 + dsb(ish); 116 + isb(); 117 + 118 + __tlb_switch_to_host(&cxt); 119 + } 120 + 121 + void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) 122 + { 123 + struct tlb_inv_context cxt; 124 + 125 + /* Switch to requested VMID */ 126 + mmu = kern_hyp_va(mmu); 127 + __tlb_switch_to_guest(mmu, &cxt); 128 + 129 + __tlbi(vmalle1); 130 + dsb(nsh); 131 + isb(); 132 + 133 + __tlb_switch_to_host(&cxt); 134 + } 135 + 136 + void __kvm_flush_vm_context(void) 137 + { 138 + dsb(ishst); 139 + __tlbi(alle1is); 140 + 141 + /* 142 + * VIPT and PIPT caches are not affected by VMID, so no maintenance 143 + * is necessary across a VMID rollover. 144 + * 145 + * VPIPT caches constrain lookup and maintenance to the active VMID, 146 + * so we need to invalidate lines with a stale VMID to avoid an ABA 147 + * race after multiple rollovers. 148 + * 149 + */ 150 + if (icache_is_vpipt()) 151 + asm volatile("ic ialluis"); 152 + 153 + dsb(ish); 154 + }

+32

arch/arm64/kvm/hyp/smccc_wa.S

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2015-2018 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <linux/arm-smccc.h> 8 + #include <linux/linkage.h> 9 + 10 + #include <asm/kvm_asm.h> 11 + #include <asm/kvm_mmu.h> 12 + 13 + /* 14 + * This is not executed directly and is instead copied into the vectors 15 + * by install_bp_hardening_cb(). 16 + */ 17 + .data 18 + .pushsection .rodata 19 + .global __smccc_workaround_1_smc 20 + SYM_DATA_START(__smccc_workaround_1_smc) 21 + esb 22 + sub sp, sp, #(8 * 4) 23 + stp x2, x3, [sp, #(8 * 0)] 24 + stp x0, x1, [sp, #(8 * 2)] 25 + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 26 + smc #0 27 + ldp x2, x3, [sp, #(8 * 0)] 28 + ldp x0, x1, [sp, #(8 * 2)] 29 + add sp, sp, #(8 * 4) 30 + 1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ 31 + .org 1b 32 + SYM_DATA_END(__smccc_workaround_1_smc)

-936

arch/arm64/kvm/hyp/switch.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Copyright (C) 2015 - ARM Ltd 4 - * Author: Marc Zyngier <marc.zyngier@arm.com> 5 - */ 6 - 7 - #include <linux/arm-smccc.h> 8 - #include <linux/kvm_host.h> 9 - #include <linux/types.h> 10 - #include <linux/jump_label.h> 11 - #include <uapi/linux/psci.h> 12 - 13 - #include <kvm/arm_psci.h> 14 - 15 - #include <asm/barrier.h> 16 - #include <asm/cpufeature.h> 17 - #include <asm/kprobes.h> 18 - #include <asm/kvm_asm.h> 19 - #include <asm/kvm_emulate.h> 20 - #include <asm/kvm_hyp.h> 21 - #include <asm/kvm_mmu.h> 22 - #include <asm/fpsimd.h> 23 - #include <asm/debug-monitors.h> 24 - #include <asm/processor.h> 25 - #include <asm/thread_info.h> 26 - 27 - /* Check whether the FP regs were dirtied while in the host-side run loop: */ 28 - static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) 29 - { 30 - /* 31 - * When the system doesn't support FP/SIMD, we cannot rely on 32 - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 33 - * abort on the very first access to FP and thus we should never 34 - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 35 - * trap the accesses. 36 - */ 37 - if (!system_supports_fpsimd() || 38 - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 39 - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 40 - KVM_ARM64_FP_HOST); 41 - 42 - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 43 - } 44 - 45 - /* Save the 32-bit only FPSIMD system register state */ 46 - static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 47 - { 48 - if (!vcpu_el1_is_32bit(vcpu)) 49 - return; 50 - 51 - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); 52 - } 53 - 54 - static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 55 - { 56 - /* 57 - * We are about to set CPTR_EL2.TFP to trap all floating point 58 - * register accesses to EL2, however, the ARM ARM clearly states that 59 - * traps are only taken to EL2 if the operation would not otherwise 60 - * trap to EL1. Therefore, always make sure that for 32-bit guests, 61 - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 62 - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 63 - * it will cause an exception. 64 - */ 65 - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 66 - write_sysreg(1 << 30, fpexc32_el2); 67 - isb(); 68 - } 69 - } 70 - 71 - static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) 72 - { 73 - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 74 - write_sysreg(1 << 15, hstr_el2); 75 - 76 - /* 77 - * Make sure we trap PMU access from EL0 to EL2. Also sanitize 78 - * PMSELR_EL0 to make sure it never contains the cycle 79 - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 80 - * EL1 instead of being trapped to EL2. 81 - */ 82 - write_sysreg(0, pmselr_el0); 83 - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 84 - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 85 - } 86 - 87 - static void __hyp_text __deactivate_traps_common(void) 88 - { 89 - write_sysreg(0, hstr_el2); 90 - write_sysreg(0, pmuserenr_el0); 91 - } 92 - 93 - static void activate_traps_vhe(struct kvm_vcpu *vcpu) 94 - { 95 - u64 val; 96 - 97 - val = read_sysreg(cpacr_el1); 98 - val |= CPACR_EL1_TTA; 99 - val &= ~CPACR_EL1_ZEN; 100 - 101 - /* 102 - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to 103 - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, 104 - * except for some missing controls, such as TAM. 105 - * In this case, CPTR_EL2.TAM has the same position with or without 106 - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM 107 - * shift value for trapping the AMU accesses. 108 - */ 109 - 110 - val |= CPTR_EL2_TAM; 111 - 112 - if (update_fp_enabled(vcpu)) { 113 - if (vcpu_has_sve(vcpu)) 114 - val |= CPACR_EL1_ZEN; 115 - } else { 116 - val &= ~CPACR_EL1_FPEN; 117 - __activate_traps_fpsimd32(vcpu); 118 - } 119 - 120 - write_sysreg(val, cpacr_el1); 121 - 122 - write_sysreg(kvm_get_hyp_vector(), vbar_el1); 123 - } 124 - NOKPROBE_SYMBOL(activate_traps_vhe); 125 - 126 - static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) 127 - { 128 - u64 val; 129 - 130 - __activate_traps_common(vcpu); 131 - 132 - val = CPTR_EL2_DEFAULT; 133 - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; 134 - if (!update_fp_enabled(vcpu)) { 135 - val |= CPTR_EL2_TFP; 136 - __activate_traps_fpsimd32(vcpu); 137 - } 138 - 139 - write_sysreg(val, cptr_el2); 140 - 141 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 142 - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; 143 - 144 - isb(); 145 - /* 146 - * At this stage, and thanks to the above isb(), S2 is 147 - * configured and enabled. We can now restore the guest's S1 148 - * configuration: SCTLR, and only then TCR. 149 - */ 150 - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); 151 - isb(); 152 - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); 153 - } 154 - } 155 - 156 - static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) 157 - { 158 - u64 hcr = vcpu->arch.hcr_el2; 159 - 160 - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 161 - hcr |= HCR_TVM; 162 - 163 - write_sysreg(hcr, hcr_el2); 164 - 165 - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 166 - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 167 - 168 - if (has_vhe()) 169 - activate_traps_vhe(vcpu); 170 - else 171 - __activate_traps_nvhe(vcpu); 172 - } 173 - 174 - static void deactivate_traps_vhe(void) 175 - { 176 - extern char vectors[]; /* kernel exception vectors */ 177 - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); 178 - 179 - /* 180 - * ARM errata 1165522 and 1530923 require the actual execution of the 181 - * above before we can switch to the EL2/EL0 translation regime used by 182 - * the host. 183 - */ 184 - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 185 - 186 - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); 187 - write_sysreg(vectors, vbar_el1); 188 - } 189 - NOKPROBE_SYMBOL(deactivate_traps_vhe); 190 - 191 - static void __hyp_text __deactivate_traps_nvhe(void) 192 - { 193 - u64 mdcr_el2 = read_sysreg(mdcr_el2); 194 - 195 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 196 - u64 val; 197 - 198 - /* 199 - * Set the TCR and SCTLR registers in the exact opposite 200 - * sequence as __activate_traps_nvhe (first prevent walks, 201 - * then force the MMU on). A generous sprinkling of isb() 202 - * ensure that things happen in this exact order. 203 - */ 204 - val = read_sysreg_el1(SYS_TCR); 205 - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); 206 - isb(); 207 - val = read_sysreg_el1(SYS_SCTLR); 208 - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); 209 - isb(); 210 - } 211 - 212 - __deactivate_traps_common(); 213 - 214 - mdcr_el2 &= MDCR_EL2_HPMN_MASK; 215 - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; 216 - 217 - write_sysreg(mdcr_el2, mdcr_el2); 218 - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); 219 - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); 220 - } 221 - 222 - static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) 223 - { 224 - /* 225 - * If we pended a virtual abort, preserve it until it gets 226 - * cleared. See D1.14.3 (Virtual Interrupts) for details, but 227 - * the crucial bit is "On taking a vSError interrupt, 228 - * HCR_EL2.VSE is cleared to 0." 229 - */ 230 - if (vcpu->arch.hcr_el2 & HCR_VSE) { 231 - vcpu->arch.hcr_el2 &= ~HCR_VSE; 232 - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 233 - } 234 - 235 - if (has_vhe()) 236 - deactivate_traps_vhe(); 237 - else 238 - __deactivate_traps_nvhe(); 239 - } 240 - 241 - void activate_traps_vhe_load(struct kvm_vcpu *vcpu) 242 - { 243 - __activate_traps_common(vcpu); 244 - } 245 - 246 - void deactivate_traps_vhe_put(void) 247 - { 248 - u64 mdcr_el2 = read_sysreg(mdcr_el2); 249 - 250 - mdcr_el2 &= MDCR_EL2_HPMN_MASK | 251 - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | 252 - MDCR_EL2_TPMS; 253 - 254 - write_sysreg(mdcr_el2, mdcr_el2); 255 - 256 - __deactivate_traps_common(); 257 - } 258 - 259 - static void __hyp_text __activate_vm(struct kvm *kvm) 260 - { 261 - __load_guest_stage2(kvm); 262 - } 263 - 264 - static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) 265 - { 266 - write_sysreg(0, vttbr_el2); 267 - } 268 - 269 - /* Save VGICv3 state on non-VHE systems */ 270 - static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) 271 - { 272 - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 273 - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); 274 - __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 275 - } 276 - } 277 - 278 - /* Restore VGICv3 state on non_VEH systems */ 279 - static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) 280 - { 281 - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { 282 - __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 283 - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); 284 - } 285 - } 286 - 287 - static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) 288 - { 289 - u64 par, tmp; 290 - 291 - /* 292 - * Resolve the IPA the hard way using the guest VA. 293 - * 294 - * Stage-1 translation already validated the memory access 295 - * rights. As such, we can use the EL1 translation regime, and 296 - * don't have to distinguish between EL0 and EL1 access. 297 - * 298 - * We do need to save/restore PAR_EL1 though, as we haven't 299 - * saved the guest context yet, and we may return early... 300 - */ 301 - par = read_sysreg(par_el1); 302 - asm volatile("at s1e1r, %0" : : "r" (far)); 303 - isb(); 304 - 305 - tmp = read_sysreg(par_el1); 306 - write_sysreg(par, par_el1); 307 - 308 - if (unlikely(tmp & SYS_PAR_EL1_F)) 309 - return false; /* Translation failed, back to guest */ 310 - 311 - /* Convert PAR to HPFAR format */ 312 - *hpfar = PAR_TO_HPFAR(tmp); 313 - return true; 314 - } 315 - 316 - static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) 317 - { 318 - u8 ec; 319 - u64 esr; 320 - u64 hpfar, far; 321 - 322 - esr = vcpu->arch.fault.esr_el2; 323 - ec = ESR_ELx_EC(esr); 324 - 325 - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 326 - return true; 327 - 328 - far = read_sysreg_el2(SYS_FAR); 329 - 330 - /* 331 - * The HPFAR can be invalid if the stage 2 fault did not 332 - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 333 - * bit is clear) and one of the two following cases are true: 334 - * 1. The fault was due to a permission fault 335 - * 2. The processor carries errata 834220 336 - * 337 - * Therefore, for all non S1PTW faults where we either have a 338 - * permission fault or the errata workaround is enabled, we 339 - * resolve the IPA using the AT instruction. 340 - */ 341 - if (!(esr & ESR_ELx_S1PTW) && 342 - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 343 - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 344 - if (!__translate_far_to_hpfar(far, &hpfar)) 345 - return false; 346 - } else { 347 - hpfar = read_sysreg(hpfar_el2); 348 - } 349 - 350 - vcpu->arch.fault.far_el2 = far; 351 - vcpu->arch.fault.hpfar_el2 = hpfar; 352 - return true; 353 - } 354 - 355 - /* Check for an FPSIMD/SVE trap and handle as appropriate */ 356 - static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 357 - { 358 - bool vhe, sve_guest, sve_host; 359 - u8 hsr_ec; 360 - 361 - if (!system_supports_fpsimd()) 362 - return false; 363 - 364 - if (system_supports_sve()) { 365 - sve_guest = vcpu_has_sve(vcpu); 366 - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 367 - vhe = true; 368 - } else { 369 - sve_guest = false; 370 - sve_host = false; 371 - vhe = has_vhe(); 372 - } 373 - 374 - hsr_ec = kvm_vcpu_trap_get_class(vcpu); 375 - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && 376 - hsr_ec != ESR_ELx_EC_SVE) 377 - return false; 378 - 379 - /* Don't handle SVE traps for non-SVE vcpus here: */ 380 - if (!sve_guest) 381 - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) 382 - return false; 383 - 384 - /* Valid trap. Switch the context: */ 385 - 386 - if (vhe) { 387 - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 388 - 389 - if (sve_guest) 390 - reg |= CPACR_EL1_ZEN; 391 - 392 - write_sysreg(reg, cpacr_el1); 393 - } else { 394 - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 395 - cptr_el2); 396 - } 397 - 398 - isb(); 399 - 400 - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 401 - /* 402 - * In the SVE case, VHE is assumed: it is enforced by 403 - * Kconfig and kvm_arch_init(). 404 - */ 405 - if (sve_host) { 406 - struct thread_struct *thread = container_of( 407 - vcpu->arch.host_fpsimd_state, 408 - struct thread_struct, uw.fpsimd_state); 409 - 410 - sve_save_state(sve_pffr(thread), 411 - &vcpu->arch.host_fpsimd_state->fpsr); 412 - } else { 413 - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 414 - } 415 - 416 - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 417 - } 418 - 419 - if (sve_guest) { 420 - sve_load_state(vcpu_sve_pffr(vcpu), 421 - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, 422 - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 423 - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); 424 - } else { 425 - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); 426 - } 427 - 428 - /* Skip restoring fpexc32 for AArch64 guests */ 429 - if (!(read_sysreg(hcr_el2) & HCR_RW)) 430 - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], 431 - fpexc32_el2); 432 - 433 - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 434 - 435 - return true; 436 - } 437 - 438 - static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) 439 - { 440 - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); 441 - int rt = kvm_vcpu_sys_get_rt(vcpu); 442 - u64 val = vcpu_get_reg(vcpu, rt); 443 - 444 - /* 445 - * The normal sysreg handling code expects to see the traps, 446 - * let's not do anything here. 447 - */ 448 - if (vcpu->arch.hcr_el2 & HCR_TVM) 449 - return false; 450 - 451 - switch (sysreg) { 452 - case SYS_SCTLR_EL1: 453 - write_sysreg_el1(val, SYS_SCTLR); 454 - break; 455 - case SYS_TTBR0_EL1: 456 - write_sysreg_el1(val, SYS_TTBR0); 457 - break; 458 - case SYS_TTBR1_EL1: 459 - write_sysreg_el1(val, SYS_TTBR1); 460 - break; 461 - case SYS_TCR_EL1: 462 - write_sysreg_el1(val, SYS_TCR); 463 - break; 464 - case SYS_ESR_EL1: 465 - write_sysreg_el1(val, SYS_ESR); 466 - break; 467 - case SYS_FAR_EL1: 468 - write_sysreg_el1(val, SYS_FAR); 469 - break; 470 - case SYS_AFSR0_EL1: 471 - write_sysreg_el1(val, SYS_AFSR0); 472 - break; 473 - case SYS_AFSR1_EL1: 474 - write_sysreg_el1(val, SYS_AFSR1); 475 - break; 476 - case SYS_MAIR_EL1: 477 - write_sysreg_el1(val, SYS_MAIR); 478 - break; 479 - case SYS_AMAIR_EL1: 480 - write_sysreg_el1(val, SYS_AMAIR); 481 - break; 482 - case SYS_CONTEXTIDR_EL1: 483 - write_sysreg_el1(val, SYS_CONTEXTIDR); 484 - break; 485 - default: 486 - return false; 487 - } 488 - 489 - __kvm_skip_instr(vcpu); 490 - return true; 491 - } 492 - 493 - static bool __hyp_text esr_is_ptrauth_trap(u32 esr) 494 - { 495 - u32 ec = ESR_ELx_EC(esr); 496 - 497 - if (ec == ESR_ELx_EC_PAC) 498 - return true; 499 - 500 - if (ec != ESR_ELx_EC_SYS64) 501 - return false; 502 - 503 - switch (esr_sys64_to_sysreg(esr)) { 504 - case SYS_APIAKEYLO_EL1: 505 - case SYS_APIAKEYHI_EL1: 506 - case SYS_APIBKEYLO_EL1: 507 - case SYS_APIBKEYHI_EL1: 508 - case SYS_APDAKEYLO_EL1: 509 - case SYS_APDAKEYHI_EL1: 510 - case SYS_APDBKEYLO_EL1: 511 - case SYS_APDBKEYHI_EL1: 512 - case SYS_APGAKEYLO_EL1: 513 - case SYS_APGAKEYHI_EL1: 514 - return true; 515 - } 516 - 517 - return false; 518 - } 519 - 520 - #define __ptrauth_save_key(regs, key) \ 521 - ({ \ 522 - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 523 - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 524 - }) 525 - 526 - static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 527 - { 528 - struct kvm_cpu_context *ctxt; 529 - u64 val; 530 - 531 - if (!vcpu_has_ptrauth(vcpu) || 532 - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) 533 - return false; 534 - 535 - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 536 - __ptrauth_save_key(ctxt->sys_regs, APIA); 537 - __ptrauth_save_key(ctxt->sys_regs, APIB); 538 - __ptrauth_save_key(ctxt->sys_regs, APDA); 539 - __ptrauth_save_key(ctxt->sys_regs, APDB); 540 - __ptrauth_save_key(ctxt->sys_regs, APGA); 541 - 542 - vcpu_ptrauth_enable(vcpu); 543 - 544 - val = read_sysreg(hcr_el2); 545 - val |= (HCR_API | HCR_APK); 546 - write_sysreg(val, hcr_el2); 547 - 548 - return true; 549 - } 550 - 551 - /* 552 - * Return true when we were able to fixup the guest exit and should return to 553 - * the guest, false when we should restore the host state and return to the 554 - * main run loop. 555 - */ 556 - static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 557 - { 558 - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 559 - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 560 - 561 - /* 562 - * We're using the raw exception code in order to only process 563 - * the trap if no SError is pending. We will come back to the 564 - * same PC once the SError has been injected, and replay the 565 - * trapping instruction. 566 - */ 567 - if (*exit_code != ARM_EXCEPTION_TRAP) 568 - goto exit; 569 - 570 - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 571 - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 572 - handle_tx2_tvm(vcpu)) 573 - return true; 574 - 575 - /* 576 - * We trap the first access to the FP/SIMD to save the host context 577 - * and restore the guest context lazily. 578 - * If FP/SIMD is not implemented, handle the trap and inject an 579 - * undefined instruction exception to the guest. 580 - * Similarly for trapped SVE accesses. 581 - */ 582 - if (__hyp_handle_fpsimd(vcpu)) 583 - return true; 584 - 585 - if (__hyp_handle_ptrauth(vcpu)) 586 - return true; 587 - 588 - if (!__populate_fault_info(vcpu)) 589 - return true; 590 - 591 - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 592 - bool valid; 593 - 594 - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 595 - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 596 - kvm_vcpu_dabt_isvalid(vcpu) && 597 - !kvm_vcpu_dabt_isextabt(vcpu) && 598 - !kvm_vcpu_dabt_iss1tw(vcpu); 599 - 600 - if (valid) { 601 - int ret = __vgic_v2_perform_cpuif_access(vcpu); 602 - 603 - if (ret == 1) 604 - return true; 605 - 606 - /* Promote an illegal access to an SError.*/ 607 - if (ret == -1) 608 - *exit_code = ARM_EXCEPTION_EL1_SERROR; 609 - 610 - goto exit; 611 - } 612 - } 613 - 614 - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 615 - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 616 - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 617 - int ret = __vgic_v3_perform_cpuif_access(vcpu); 618 - 619 - if (ret == 1) 620 - return true; 621 - } 622 - 623 - exit: 624 - /* Return to the host kernel and handle the exit */ 625 - return false; 626 - } 627 - 628 - static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) 629 - { 630 - if (!cpus_have_final_cap(ARM64_SSBD)) 631 - return false; 632 - 633 - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); 634 - } 635 - 636 - static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) 637 - { 638 - #ifdef CONFIG_ARM64_SSBD 639 - /* 640 - * The host runs with the workaround always present. If the 641 - * guest wants it disabled, so be it... 642 - */ 643 - if (__needs_ssbd_off(vcpu) && 644 - __hyp_this_cpu_read(arm64_ssbd_callback_required)) 645 - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); 646 - #endif 647 - } 648 - 649 - static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) 650 - { 651 - #ifdef CONFIG_ARM64_SSBD 652 - /* 653 - * If the guest has disabled the workaround, bring it back on. 654 - */ 655 - if (__needs_ssbd_off(vcpu) && 656 - __hyp_this_cpu_read(arm64_ssbd_callback_required)) 657 - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); 658 - #endif 659 - } 660 - 661 - /** 662 - * Disable host events, enable guest events 663 - */ 664 - static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) 665 - { 666 - struct kvm_host_data *host; 667 - struct kvm_pmu_events *pmu; 668 - 669 - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); 670 - pmu = &host->pmu_events; 671 - 672 - if (pmu->events_host) 673 - write_sysreg(pmu->events_host, pmcntenclr_el0); 674 - 675 - if (pmu->events_guest) 676 - write_sysreg(pmu->events_guest, pmcntenset_el0); 677 - 678 - return (pmu->events_host || pmu->events_guest); 679 - } 680 - 681 - /** 682 - * Disable guest events, enable host events 683 - */ 684 - static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) 685 - { 686 - struct kvm_host_data *host; 687 - struct kvm_pmu_events *pmu; 688 - 689 - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); 690 - pmu = &host->pmu_events; 691 - 692 - if (pmu->events_guest) 693 - write_sysreg(pmu->events_guest, pmcntenclr_el0); 694 - 695 - if (pmu->events_host) 696 - write_sysreg(pmu->events_host, pmcntenset_el0); 697 - } 698 - 699 - /* Switch to the guest for VHE systems running in EL2 */ 700 - static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) 701 - { 702 - struct kvm_cpu_context *host_ctxt; 703 - struct kvm_cpu_context *guest_ctxt; 704 - u64 exit_code; 705 - 706 - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 707 - host_ctxt->__hyp_running_vcpu = vcpu; 708 - guest_ctxt = &vcpu->arch.ctxt; 709 - 710 - sysreg_save_host_state_vhe(host_ctxt); 711 - 712 - /* 713 - * ARM erratum 1165522 requires us to configure both stage 1 and 714 - * stage 2 translation for the guest context before we clear 715 - * HCR_EL2.TGE. 716 - * 717 - * We have already configured the guest's stage 1 translation in 718 - * kvm_vcpu_load_sysregs above. We must now call __activate_vm 719 - * before __activate_traps, because __activate_vm configures 720 - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE 721 - * (among other things). 722 - */ 723 - __activate_vm(vcpu->kvm); 724 - __activate_traps(vcpu); 725 - 726 - sysreg_restore_guest_state_vhe(guest_ctxt); 727 - __debug_switch_to_guest(vcpu); 728 - 729 - __set_guest_arch_workaround_state(vcpu); 730 - 731 - do { 732 - /* Jump in the fire! */ 733 - exit_code = __guest_enter(vcpu, host_ctxt); 734 - 735 - /* And we're baaack! */ 736 - } while (fixup_guest_exit(vcpu, &exit_code)); 737 - 738 - __set_host_arch_workaround_state(vcpu); 739 - 740 - sysreg_save_guest_state_vhe(guest_ctxt); 741 - 742 - __deactivate_traps(vcpu); 743 - 744 - sysreg_restore_host_state_vhe(host_ctxt); 745 - 746 - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) 747 - __fpsimd_save_fpexc32(vcpu); 748 - 749 - __debug_switch_to_host(vcpu); 750 - 751 - return exit_code; 752 - } 753 - NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); 754 - 755 - int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) 756 - { 757 - int ret; 758 - 759 - local_daif_mask(); 760 - 761 - /* 762 - * Having IRQs masked via PMR when entering the guest means the GIC 763 - * will not signal the CPU of interrupts of lower priority, and the 764 - * only way to get out will be via guest exceptions. 765 - * Naturally, we want to avoid this. 766 - * 767 - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a 768 - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. 769 - */ 770 - pmr_sync(); 771 - 772 - ret = __kvm_vcpu_run_vhe(vcpu); 773 - 774 - /* 775 - * local_daif_restore() takes care to properly restore PSTATE.DAIF 776 - * and the GIC PMR if the host is using IRQ priorities. 777 - */ 778 - local_daif_restore(DAIF_PROCCTX_NOIRQ); 779 - 780 - /* 781 - * When we exit from the guest we change a number of CPU configuration 782 - * parameters, such as traps. Make sure these changes take effect 783 - * before running the host or additional guests. 784 - */ 785 - isb(); 786 - 787 - return ret; 788 - } 789 - 790 - /* Switch to the guest for legacy non-VHE systems */ 791 - int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) 792 - { 793 - struct kvm_cpu_context *host_ctxt; 794 - struct kvm_cpu_context *guest_ctxt; 795 - bool pmu_switch_needed; 796 - u64 exit_code; 797 - 798 - /* 799 - * Having IRQs masked via PMR when entering the guest means the GIC 800 - * will not signal the CPU of interrupts of lower priority, and the 801 - * only way to get out will be via guest exceptions. 802 - * Naturally, we want to avoid this. 803 - */ 804 - if (system_uses_irq_prio_masking()) { 805 - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); 806 - pmr_sync(); 807 - } 808 - 809 - vcpu = kern_hyp_va(vcpu); 810 - 811 - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 812 - host_ctxt->__hyp_running_vcpu = vcpu; 813 - guest_ctxt = &vcpu->arch.ctxt; 814 - 815 - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); 816 - 817 - __sysreg_save_state_nvhe(host_ctxt); 818 - 819 - /* 820 - * We must restore the 32-bit state before the sysregs, thanks 821 - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). 822 - * 823 - * Also, and in order to be able to deal with erratum #1319537 (A57) 824 - * and #1319367 (A72), we must ensure that all VM-related sysreg are 825 - * restored before we enable S2 translation. 826 - */ 827 - __sysreg32_restore_state(vcpu); 828 - __sysreg_restore_state_nvhe(guest_ctxt); 829 - 830 - __activate_vm(kern_hyp_va(vcpu->kvm)); 831 - __activate_traps(vcpu); 832 - 833 - __hyp_vgic_restore_state(vcpu); 834 - __timer_enable_traps(vcpu); 835 - 836 - __debug_switch_to_guest(vcpu); 837 - 838 - __set_guest_arch_workaround_state(vcpu); 839 - 840 - do { 841 - /* Jump in the fire! */ 842 - exit_code = __guest_enter(vcpu, host_ctxt); 843 - 844 - /* And we're baaack! */ 845 - } while (fixup_guest_exit(vcpu, &exit_code)); 846 - 847 - __set_host_arch_workaround_state(vcpu); 848 - 849 - __sysreg_save_state_nvhe(guest_ctxt); 850 - __sysreg32_save_state(vcpu); 851 - __timer_disable_traps(vcpu); 852 - __hyp_vgic_save_state(vcpu); 853 - 854 - __deactivate_traps(vcpu); 855 - __deactivate_vm(vcpu); 856 - 857 - __sysreg_restore_state_nvhe(host_ctxt); 858 - 859 - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) 860 - __fpsimd_save_fpexc32(vcpu); 861 - 862 - /* 863 - * This must come after restoring the host sysregs, since a non-VHE 864 - * system may enable SPE here and make use of the TTBRs. 865 - */ 866 - __debug_switch_to_host(vcpu); 867 - 868 - if (pmu_switch_needed) 869 - __pmu_switch_to_host(host_ctxt); 870 - 871 - /* Returning to host will clear PSR.I, remask PMR if needed */ 872 - if (system_uses_irq_prio_masking()) 873 - gic_write_pmr(GIC_PRIO_IRQOFF); 874 - 875 - return exit_code; 876 - } 877 - 878 - static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; 879 - 880 - static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, 881 - struct kvm_cpu_context *__host_ctxt) 882 - { 883 - struct kvm_vcpu *vcpu; 884 - unsigned long str_va; 885 - 886 - vcpu = __host_ctxt->__hyp_running_vcpu; 887 - 888 - if (read_sysreg(vttbr_el2)) { 889 - __timer_disable_traps(vcpu); 890 - __deactivate_traps(vcpu); 891 - __deactivate_vm(vcpu); 892 - __sysreg_restore_state_nvhe(__host_ctxt); 893 - } 894 - 895 - /* 896 - * Force the panic string to be loaded from the literal pool, 897 - * making sure it is a kernel address and not a PC-relative 898 - * reference. 899 - */ 900 - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); 901 - 902 - __hyp_do_panic(str_va, 903 - spsr, elr, 904 - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), 905 - read_sysreg(hpfar_el2), par, vcpu); 906 - } 907 - 908 - static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, 909 - struct kvm_cpu_context *host_ctxt) 910 - { 911 - struct kvm_vcpu *vcpu; 912 - vcpu = host_ctxt->__hyp_running_vcpu; 913 - 914 - __deactivate_traps(vcpu); 915 - sysreg_restore_host_state_vhe(host_ctxt); 916 - 917 - panic(__hyp_panic_string, 918 - spsr, elr, 919 - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), 920 - read_sysreg(hpfar_el2), par, vcpu); 921 - } 922 - NOKPROBE_SYMBOL(__hyp_call_panic_vhe); 923 - 924 - void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) 925 - { 926 - u64 spsr = read_sysreg_el2(SYS_SPSR); 927 - u64 elr = read_sysreg_el2(SYS_ELR); 928 - u64 par = read_sysreg(par_el1); 929 - 930 - if (!has_vhe()) 931 - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); 932 - else 933 - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); 934 - 935 - unreachable(); 936 - }

-333

arch/arm64/kvm/hyp/sysreg-sr.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Copyright (C) 2012-2015 - ARM Ltd 4 - * Author: Marc Zyngier <marc.zyngier@arm.com> 5 - */ 6 - 7 - #include <linux/compiler.h> 8 - #include <linux/kvm_host.h> 9 - 10 - #include <asm/kprobes.h> 11 - #include <asm/kvm_asm.h> 12 - #include <asm/kvm_emulate.h> 13 - #include <asm/kvm_hyp.h> 14 - 15 - /* 16 - * Non-VHE: Both host and guest must save everything. 17 - * 18 - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and 19 - * pstate, which are handled as part of the el2 return state) on every 20 - * switch (sp_el0 is being dealt with in the assembly code). 21 - * tpidr_el0 and tpidrro_el0 only need to be switched when going 22 - * to host userspace or a different VCPU. EL1 registers only need to be 23 - * switched when potentially going to run a different VCPU. The latter two 24 - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. 25 - */ 26 - 27 - static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) 28 - { 29 - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); 30 - } 31 - 32 - static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) 33 - { 34 - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); 35 - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); 36 - } 37 - 38 - static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) 39 - { 40 - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); 41 - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); 42 - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); 43 - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); 44 - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); 45 - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); 46 - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); 47 - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); 48 - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); 49 - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); 50 - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); 51 - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); 52 - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); 53 - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); 54 - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); 55 - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); 56 - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); 57 - 58 - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); 59 - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); 60 - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); 61 - } 62 - 63 - static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) 64 - { 65 - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); 66 - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); 67 - 68 - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) 69 - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); 70 - } 71 - 72 - void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) 73 - { 74 - __sysreg_save_el1_state(ctxt); 75 - __sysreg_save_common_state(ctxt); 76 - __sysreg_save_user_state(ctxt); 77 - __sysreg_save_el2_return_state(ctxt); 78 - } 79 - 80 - void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) 81 - { 82 - __sysreg_save_common_state(ctxt); 83 - } 84 - NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); 85 - 86 - void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) 87 - { 88 - __sysreg_save_common_state(ctxt); 89 - __sysreg_save_el2_return_state(ctxt); 90 - } 91 - NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); 92 - 93 - static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) 94 - { 95 - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); 96 - } 97 - 98 - static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) 99 - { 100 - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); 101 - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); 102 - } 103 - 104 - static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) 105 - { 106 - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); 107 - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); 108 - 109 - if (has_vhe() || 110 - !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 111 - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); 112 - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); 113 - } else if (!ctxt->__hyp_running_vcpu) { 114 - /* 115 - * Must only be done for guest registers, hence the context 116 - * test. We're coming from the host, so SCTLR.M is already 117 - * set. Pairs with __activate_traps_nvhe(). 118 - */ 119 - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | 120 - TCR_EPD1_MASK | TCR_EPD0_MASK), 121 - SYS_TCR); 122 - isb(); 123 - } 124 - 125 - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); 126 - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); 127 - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); 128 - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); 129 - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); 130 - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); 131 - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); 132 - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); 133 - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); 134 - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); 135 - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); 136 - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); 137 - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); 138 - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); 139 - 140 - if (!has_vhe() && 141 - cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && 142 - ctxt->__hyp_running_vcpu) { 143 - /* 144 - * Must only be done for host registers, hence the context 145 - * test. Pairs with __deactivate_traps_nvhe(). 146 - */ 147 - isb(); 148 - /* 149 - * At this stage, and thanks to the above isb(), S2 is 150 - * deconfigured and disabled. We can now restore the host's 151 - * S1 configuration: SCTLR, and only then TCR. 152 - */ 153 - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); 154 - isb(); 155 - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); 156 - } 157 - 158 - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); 159 - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); 160 - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); 161 - } 162 - 163 - static void __hyp_text 164 - __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) 165 - { 166 - u64 pstate = ctxt->gp_regs.regs.pstate; 167 - u64 mode = pstate & PSR_AA32_MODE_MASK; 168 - 169 - /* 170 - * Safety check to ensure we're setting the CPU up to enter the guest 171 - * in a less privileged mode. 172 - * 173 - * If we are attempting a return to EL2 or higher in AArch64 state, 174 - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that 175 - * we'll take an illegal exception state exception immediately after 176 - * the ERET to the guest. Attempts to return to AArch32 Hyp will 177 - * result in an illegal exception return because EL2's execution state 178 - * is determined by SCR_EL3.RW. 179 - */ 180 - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) 181 - pstate = PSR_MODE_EL2h | PSR_IL_BIT; 182 - 183 - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); 184 - write_sysreg_el2(pstate, SYS_SPSR); 185 - 186 - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) 187 - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); 188 - } 189 - 190 - void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) 191 - { 192 - __sysreg_restore_el1_state(ctxt); 193 - __sysreg_restore_common_state(ctxt); 194 - __sysreg_restore_user_state(ctxt); 195 - __sysreg_restore_el2_return_state(ctxt); 196 - } 197 - 198 - void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) 199 - { 200 - __sysreg_restore_common_state(ctxt); 201 - } 202 - NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); 203 - 204 - void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) 205 - { 206 - __sysreg_restore_common_state(ctxt); 207 - __sysreg_restore_el2_return_state(ctxt); 208 - } 209 - NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); 210 - 211 - void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) 212 - { 213 - u64 *spsr, *sysreg; 214 - 215 - if (!vcpu_el1_is_32bit(vcpu)) 216 - return; 217 - 218 - spsr = vcpu->arch.ctxt.gp_regs.spsr; 219 - sysreg = vcpu->arch.ctxt.sys_regs; 220 - 221 - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); 222 - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); 223 - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); 224 - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); 225 - 226 - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); 227 - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); 228 - 229 - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) 230 - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); 231 - } 232 - 233 - void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) 234 - { 235 - u64 *spsr, *sysreg; 236 - 237 - if (!vcpu_el1_is_32bit(vcpu)) 238 - return; 239 - 240 - spsr = vcpu->arch.ctxt.gp_regs.spsr; 241 - sysreg = vcpu->arch.ctxt.sys_regs; 242 - 243 - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); 244 - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); 245 - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); 246 - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); 247 - 248 - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); 249 - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); 250 - 251 - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) 252 - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); 253 - } 254 - 255 - /** 256 - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU 257 - * 258 - * @vcpu: The VCPU pointer 259 - * 260 - * Load system registers that do not affect the host's execution, for 261 - * example EL1 system registers on a VHE system where the host kernel 262 - * runs at EL2. This function is called from KVM's vcpu_load() function 263 - * and loading system register state early avoids having to load them on 264 - * every entry to the VM. 265 - */ 266 - void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) 267 - { 268 - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; 269 - struct kvm_cpu_context *host_ctxt; 270 - 271 - if (!has_vhe()) 272 - return; 273 - 274 - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 275 - __sysreg_save_user_state(host_ctxt); 276 - 277 - /* 278 - * Load guest EL1 and user state 279 - * 280 - * We must restore the 32-bit state before the sysregs, thanks 281 - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). 282 - */ 283 - __sysreg32_restore_state(vcpu); 284 - __sysreg_restore_user_state(guest_ctxt); 285 - __sysreg_restore_el1_state(guest_ctxt); 286 - 287 - vcpu->arch.sysregs_loaded_on_cpu = true; 288 - 289 - activate_traps_vhe_load(vcpu); 290 - } 291 - 292 - /** 293 - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU 294 - * 295 - * @vcpu: The VCPU pointer 296 - * 297 - * Save guest system registers that do not affect the host's execution, for 298 - * example EL1 system registers on a VHE system where the host kernel 299 - * runs at EL2. This function is called from KVM's vcpu_put() function 300 - * and deferring saving system register state until we're no longer running the 301 - * VCPU avoids having to save them on every exit from the VM. 302 - */ 303 - void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) 304 - { 305 - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; 306 - struct kvm_cpu_context *host_ctxt; 307 - 308 - if (!has_vhe()) 309 - return; 310 - 311 - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 312 - deactivate_traps_vhe_put(); 313 - 314 - __sysreg_save_el1_state(guest_ctxt); 315 - __sysreg_save_user_state(guest_ctxt); 316 - __sysreg32_save_state(vcpu); 317 - 318 - /* Restore host user state */ 319 - __sysreg_restore_user_state(host_ctxt); 320 - 321 - vcpu->arch.sysregs_loaded_on_cpu = false; 322 - } 323 - 324 - void __hyp_text __kvm_enable_ssbs(void) 325 - { 326 - u64 tmp; 327 - 328 - asm volatile( 329 - "mrs %0, sctlr_el2\n" 330 - "orr %0, %0, %1\n" 331 - "msr sctlr_el2, %0" 332 - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); 333 - }

+3 -3

arch/arm64/kvm/hyp/timer-sr.c arch/arm64/kvm/hyp/nvhe/timer-sr.c

··· 10 10 11 11 #include <asm/kvm_hyp.h> 12 12 13 - void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) 13 + void __kvm_timer_set_cntvoff(u64 cntvoff) 14 14 { 15 15 write_sysreg(cntvoff, cntvoff_el2); 16 16 } ··· 19 19 * Should only be called on non-VHE systems. 20 20 * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). 21 21 */ 22 - void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) 22 + void __timer_disable_traps(struct kvm_vcpu *vcpu) 23 23 { 24 24 u64 val; 25 25 ··· 33 33 * Should only be called on non-VHE systems. 34 34 * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). 35 35 */ 36 - void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) 36 + void __timer_enable_traps(struct kvm_vcpu *vcpu) 37 37 { 38 38 u64 val; 39 39

-242

arch/arm64/kvm/hyp/tlb.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Copyright (C) 2015 - ARM Ltd 4 - * Author: Marc Zyngier <marc.zyngier@arm.com> 5 - */ 6 - 7 - #include <linux/irqflags.h> 8 - 9 - #include <asm/kvm_hyp.h> 10 - #include <asm/kvm_mmu.h> 11 - #include <asm/tlbflush.h> 12 - 13 - struct tlb_inv_context { 14 - unsigned long flags; 15 - u64 tcr; 16 - u64 sctlr; 17 - }; 18 - 19 - static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, 20 - struct tlb_inv_context *cxt) 21 - { 22 - u64 val; 23 - 24 - local_irq_save(cxt->flags); 25 - 26 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 27 - /* 28 - * For CPUs that are affected by ARM errata 1165522 or 1530923, 29 - * we cannot trust stage-1 to be in a correct state at that 30 - * point. Since we do not want to force a full load of the 31 - * vcpu state, we prevent the EL1 page-table walker to 32 - * allocate new TLBs. This is done by setting the EPD bits 33 - * in the TCR_EL1 register. We also need to prevent it to 34 - * allocate IPA->PA walks, so we enable the S1 MMU... 35 - */ 36 - val = cxt->tcr = read_sysreg_el1(SYS_TCR); 37 - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; 38 - write_sysreg_el1(val, SYS_TCR); 39 - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); 40 - val |= SCTLR_ELx_M; 41 - write_sysreg_el1(val, SYS_SCTLR); 42 - } 43 - 44 - /* 45 - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and 46 - * most TLB operations target EL2/EL0. In order to affect the 47 - * guest TLBs (EL1/EL0), we need to change one of these two 48 - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so 49 - * let's flip TGE before executing the TLB operation. 50 - * 51 - * ARM erratum 1165522 requires some special handling (again), 52 - * as we need to make sure both stages of translation are in 53 - * place before clearing TGE. __load_guest_stage2() already 54 - * has an ISB in order to deal with this. 55 - */ 56 - __load_guest_stage2(kvm); 57 - val = read_sysreg(hcr_el2); 58 - val &= ~HCR_TGE; 59 - write_sysreg(val, hcr_el2); 60 - isb(); 61 - } 62 - 63 - static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, 64 - struct tlb_inv_context *cxt) 65 - { 66 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 67 - u64 val; 68 - 69 - /* 70 - * For CPUs that are affected by ARM 1319367, we need to 71 - * avoid a host Stage-1 walk while we have the guest's 72 - * VMID set in the VTTBR in order to invalidate TLBs. 73 - * We're guaranteed that the S1 MMU is enabled, so we can 74 - * simply set the EPD bits to avoid any further TLB fill. 75 - */ 76 - val = cxt->tcr = read_sysreg_el1(SYS_TCR); 77 - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; 78 - write_sysreg_el1(val, SYS_TCR); 79 - isb(); 80 - } 81 - 82 - /* __load_guest_stage2() includes an ISB for the workaround. */ 83 - __load_guest_stage2(kvm); 84 - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 85 - } 86 - 87 - static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, 88 - struct tlb_inv_context *cxt) 89 - { 90 - if (has_vhe()) 91 - __tlb_switch_to_guest_vhe(kvm, cxt); 92 - else 93 - __tlb_switch_to_guest_nvhe(kvm, cxt); 94 - } 95 - 96 - static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, 97 - struct tlb_inv_context *cxt) 98 - { 99 - /* 100 - * We're done with the TLB operation, let's restore the host's 101 - * view of HCR_EL2. 102 - */ 103 - write_sysreg(0, vttbr_el2); 104 - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); 105 - isb(); 106 - 107 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 108 - /* Restore the registers to what they were */ 109 - write_sysreg_el1(cxt->tcr, SYS_TCR); 110 - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); 111 - } 112 - 113 - local_irq_restore(cxt->flags); 114 - } 115 - 116 - static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, 117 - struct tlb_inv_context *cxt) 118 - { 119 - write_sysreg(0, vttbr_el2); 120 - 121 - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 122 - /* Ensure write of the host VMID */ 123 - isb(); 124 - /* Restore the host's TCR_EL1 */ 125 - write_sysreg_el1(cxt->tcr, SYS_TCR); 126 - } 127 - } 128 - 129 - static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, 130 - struct tlb_inv_context *cxt) 131 - { 132 - if (has_vhe()) 133 - __tlb_switch_to_host_vhe(kvm, cxt); 134 - else 135 - __tlb_switch_to_host_nvhe(kvm, cxt); 136 - } 137 - 138 - void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 139 - { 140 - struct tlb_inv_context cxt; 141 - 142 - dsb(ishst); 143 - 144 - /* Switch to requested VMID */ 145 - kvm = kern_hyp_va(kvm); 146 - __tlb_switch_to_guest(kvm, &cxt); 147 - 148 - /* 149 - * We could do so much better if we had the VA as well. 150 - * Instead, we invalidate Stage-2 for this IPA, and the 151 - * whole of Stage-1. Weep... 152 - */ 153 - ipa >>= 12; 154 - __tlbi(ipas2e1is, ipa); 155 - 156 - /* 157 - * We have to ensure completion of the invalidation at Stage-2, 158 - * since a table walk on another CPU could refill a TLB with a 159 - * complete (S1 + S2) walk based on the old Stage-2 mapping if 160 - * the Stage-1 invalidation happened first. 161 - */ 162 - dsb(ish); 163 - __tlbi(vmalle1is); 164 - dsb(ish); 165 - isb(); 166 - 167 - /* 168 - * If the host is running at EL1 and we have a VPIPT I-cache, 169 - * then we must perform I-cache maintenance at EL2 in order for 170 - * it to have an effect on the guest. Since the guest cannot hit 171 - * I-cache lines allocated with a different VMID, we don't need 172 - * to worry about junk out of guest reset (we nuke the I-cache on 173 - * VMID rollover), but we do need to be careful when remapping 174 - * executable pages for the same guest. This can happen when KSM 175 - * takes a CoW fault on an executable page, copies the page into 176 - * a page that was previously mapped in the guest and then needs 177 - * to invalidate the guest view of the I-cache for that page 178 - * from EL1. To solve this, we invalidate the entire I-cache when 179 - * unmapping a page from a guest if we have a VPIPT I-cache but 180 - * the host is running at EL1. As above, we could do better if 181 - * we had the VA. 182 - * 183 - * The moral of this story is: if you have a VPIPT I-cache, then 184 - * you should be running with VHE enabled. 185 - */ 186 - if (!has_vhe() && icache_is_vpipt()) 187 - __flush_icache_all(); 188 - 189 - __tlb_switch_to_host(kvm, &cxt); 190 - } 191 - 192 - void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) 193 - { 194 - struct tlb_inv_context cxt; 195 - 196 - dsb(ishst); 197 - 198 - /* Switch to requested VMID */ 199 - kvm = kern_hyp_va(kvm); 200 - __tlb_switch_to_guest(kvm, &cxt); 201 - 202 - __tlbi(vmalls12e1is); 203 - dsb(ish); 204 - isb(); 205 - 206 - __tlb_switch_to_host(kvm, &cxt); 207 - } 208 - 209 - void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) 210 - { 211 - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); 212 - struct tlb_inv_context cxt; 213 - 214 - /* Switch to requested VMID */ 215 - __tlb_switch_to_guest(kvm, &cxt); 216 - 217 - __tlbi(vmalle1); 218 - dsb(nsh); 219 - isb(); 220 - 221 - __tlb_switch_to_host(kvm, &cxt); 222 - } 223 - 224 - void __hyp_text __kvm_flush_vm_context(void) 225 - { 226 - dsb(ishst); 227 - __tlbi(alle1is); 228 - 229 - /* 230 - * VIPT and PIPT caches are not affected by VMID, so no maintenance 231 - * is necessary across a VMID rollover. 232 - * 233 - * VPIPT caches constrain lookup and maintenance to the active VMID, 234 - * so we need to invalidate lines with a stale VMID to avoid an ABA 235 - * race after multiple rollovers. 236 - * 237 - */ 238 - if (icache_is_vpipt()) 239 - asm volatile("ic ialluis"); 240 - 241 - dsb(ish); 242 - }

+2 -2

arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c

··· 13 13 #include <asm/kvm_hyp.h> 14 14 #include <asm/kvm_mmu.h> 15 15 16 - static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) 16 + static bool __is_be(struct kvm_vcpu *vcpu) 17 17 { 18 18 if (vcpu_mode_is_32bit(vcpu)) 19 19 return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); ··· 32 32 * 0: Not a GICV access 33 33 * -1: Illegal GICV access successfully performed 34 34 */ 35 - int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) 35 + int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) 36 36 { 37 37 struct kvm *kvm = kern_hyp_va(vcpu->kvm); 38 38 struct vgic_dist *vgic = &kvm->arch.vgic;

+60 -74

arch/arm64/kvm/hyp/vgic-v3-sr.c

··· 16 16 #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) 17 17 #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) 18 18 19 - static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) 19 + static u64 __gic_v3_get_lr(unsigned int lr) 20 20 { 21 21 switch (lr & 0xf) { 22 22 case 0: ··· 56 56 unreachable(); 57 57 } 58 58 59 - static void __hyp_text __gic_v3_set_lr(u64 val, int lr) 59 + static void __gic_v3_set_lr(u64 val, int lr) 60 60 { 61 61 switch (lr & 0xf) { 62 62 case 0: ··· 110 110 } 111 111 } 112 112 113 - static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) 113 + static void __vgic_v3_write_ap0rn(u32 val, int n) 114 114 { 115 115 switch (n) { 116 116 case 0: ··· 128 128 } 129 129 } 130 130 131 - static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) 131 + static void __vgic_v3_write_ap1rn(u32 val, int n) 132 132 { 133 133 switch (n) { 134 134 case 0: ··· 146 146 } 147 147 } 148 148 149 - static u32 __hyp_text __vgic_v3_read_ap0rn(int n) 149 + static u32 __vgic_v3_read_ap0rn(int n) 150 150 { 151 151 u32 val; 152 152 ··· 170 170 return val; 171 171 } 172 172 173 - static u32 __hyp_text __vgic_v3_read_ap1rn(int n) 173 + static u32 __vgic_v3_read_ap1rn(int n) 174 174 { 175 175 u32 val; 176 176 ··· 194 194 return val; 195 195 } 196 196 197 - void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) 197 + void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) 198 198 { 199 199 u64 used_lrs = cpu_if->used_lrs; 200 200 ··· 229 229 } 230 230 } 231 231 232 - void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) 232 + void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) 233 233 { 234 234 u64 used_lrs = cpu_if->used_lrs; 235 235 int i; ··· 255 255 } 256 256 } 257 257 258 - void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) 258 + void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) 259 259 { 260 260 /* 261 261 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a ··· 302 302 write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); 303 303 } 304 304 305 - void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) 305 + void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) 306 306 { 307 307 u64 val; 308 308 ··· 328 328 write_gicreg(0, ICH_HCR_EL2); 329 329 } 330 330 331 - void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 331 + void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) 332 332 { 333 333 u64 val; 334 334 u32 nr_pre_bits; ··· 361 361 } 362 362 } 363 363 364 - void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) 364 + void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) 365 365 { 366 366 u64 val; 367 367 u32 nr_pre_bits; ··· 394 394 } 395 395 } 396 396 397 - void __hyp_text __vgic_v3_init_lrs(void) 397 + void __vgic_v3_init_lrs(void) 398 398 { 399 399 int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); 400 400 int i; ··· 403 403 __gic_v3_set_lr(0, i); 404 404 } 405 405 406 - u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) 406 + u64 __vgic_v3_get_ich_vtr_el2(void) 407 407 { 408 408 return read_gicreg(ICH_VTR_EL2); 409 409 } 410 410 411 - u64 __hyp_text __vgic_v3_read_vmcr(void) 411 + u64 __vgic_v3_read_vmcr(void) 412 412 { 413 413 return read_gicreg(ICH_VMCR_EL2); 414 414 } 415 415 416 - void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) 416 + void __vgic_v3_write_vmcr(u32 vmcr) 417 417 { 418 418 write_gicreg(vmcr, ICH_VMCR_EL2); 419 419 } 420 420 421 - static int __hyp_text __vgic_v3_bpr_min(void) 421 + static int __vgic_v3_bpr_min(void) 422 422 { 423 423 /* See Pseudocode for VPriorityGroup */ 424 424 return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); 425 425 } 426 426 427 - static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) 427 + static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) 428 428 { 429 - u32 esr = kvm_vcpu_get_hsr(vcpu); 429 + u32 esr = kvm_vcpu_get_esr(vcpu); 430 430 u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; 431 431 432 432 return crm != 8; ··· 434 434 435 435 #define GICv3_IDLE_PRIORITY 0xff 436 436 437 - static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, 438 - u32 vmcr, 439 - u64 *lr_val) 437 + static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, 438 + u64 *lr_val) 440 439 { 441 440 unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 442 441 u8 priority = GICv3_IDLE_PRIORITY; ··· 473 474 return lr; 474 475 } 475 476 476 - static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, 477 - int intid, u64 *lr_val) 477 + static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, 478 + u64 *lr_val) 478 479 { 479 480 unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 480 481 int i; ··· 493 494 return -1; 494 495 } 495 496 496 - static int __hyp_text __vgic_v3_get_highest_active_priority(void) 497 + static int __vgic_v3_get_highest_active_priority(void) 497 498 { 498 499 u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); 499 500 u32 hap = 0; ··· 525 526 return GICv3_IDLE_PRIORITY; 526 527 } 527 528 528 - static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) 529 + static unsigned int __vgic_v3_get_bpr0(u32 vmcr) 529 530 { 530 531 return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; 531 532 } 532 533 533 - static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) 534 + static unsigned int __vgic_v3_get_bpr1(u32 vmcr) 534 535 { 535 536 unsigned int bpr; 536 537 ··· 549 550 * Convert a priority to a preemption level, taking the relevant BPR 550 551 * into account by zeroing the sub-priority bits. 551 552 */ 552 - static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) 553 + static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) 553 554 { 554 555 unsigned int bpr; 555 556 ··· 567 568 * matter what the guest does with its BPR, we can always set/get the 568 569 * same value of a priority. 569 570 */ 570 - static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) 571 + static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) 571 572 { 572 573 u8 pre, ap; 573 574 u32 val; ··· 586 587 } 587 588 } 588 589 589 - static int __hyp_text __vgic_v3_clear_highest_active_priority(void) 590 + static int __vgic_v3_clear_highest_active_priority(void) 590 591 { 591 592 u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); 592 593 u32 hap = 0; ··· 624 625 return GICv3_IDLE_PRIORITY; 625 626 } 626 627 627 - static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 628 + static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 628 629 { 629 630 u64 lr_val; 630 631 u8 lr_prio, pmr; ··· 660 661 vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); 661 662 } 662 663 663 - static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) 664 + static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) 664 665 { 665 666 lr_val &= ~ICH_LR_ACTIVE_BIT; 666 667 if (lr_val & ICH_LR_HW) { ··· 673 674 __gic_v3_set_lr(lr_val, lr); 674 675 } 675 676 676 - static void __hyp_text __vgic_v3_bump_eoicount(void) 677 + static void __vgic_v3_bump_eoicount(void) 677 678 { 678 679 u32 hcr; 679 680 ··· 682 683 write_gicreg(hcr, ICH_HCR_EL2); 683 684 } 684 685 685 - static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, 686 - u32 vmcr, int rt) 686 + static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 687 687 { 688 688 u32 vid = vcpu_get_reg(vcpu, rt); 689 689 u64 lr_val; ··· 705 707 __vgic_v3_clear_active_lr(lr, lr_val); 706 708 } 707 709 708 - static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 710 + static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 709 711 { 710 712 u32 vid = vcpu_get_reg(vcpu, rt); 711 713 u64 lr_val; ··· 742 744 __vgic_v3_clear_active_lr(lr, lr_val); 743 745 } 744 746 745 - static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 747 + static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 746 748 { 747 749 vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); 748 750 } 749 751 750 - static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 752 + static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 751 753 { 752 754 vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); 753 755 } 754 756 755 - static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 757 + static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 756 758 { 757 759 u64 val = vcpu_get_reg(vcpu, rt); 758 760 ··· 764 766 __vgic_v3_write_vmcr(vmcr); 765 767 } 766 768 767 - static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 769 + static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 768 770 { 769 771 u64 val = vcpu_get_reg(vcpu, rt); 770 772 ··· 776 778 __vgic_v3_write_vmcr(vmcr); 777 779 } 778 780 779 - static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 781 + static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 780 782 { 781 783 vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); 782 784 } 783 785 784 - static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 786 + static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 785 787 { 786 788 vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); 787 789 } 788 790 789 - static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 791 + static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 790 792 { 791 793 u64 val = vcpu_get_reg(vcpu, rt); 792 794 u8 bpr_min = __vgic_v3_bpr_min() - 1; ··· 803 805 __vgic_v3_write_vmcr(vmcr); 804 806 } 805 807 806 - static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 808 + static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 807 809 { 808 810 u64 val = vcpu_get_reg(vcpu, rt); 809 811 u8 bpr_min = __vgic_v3_bpr_min(); ··· 823 825 __vgic_v3_write_vmcr(vmcr); 824 826 } 825 827 826 - static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) 828 + static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) 827 829 { 828 830 u32 val; 829 831 ··· 835 837 vcpu_set_reg(vcpu, rt, val); 836 838 } 837 839 838 - static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) 840 + static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) 839 841 { 840 842 u32 val = vcpu_get_reg(vcpu, rt); 841 843 ··· 845 847 __vgic_v3_write_ap1rn(val, n); 846 848 } 847 849 848 - static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, 850 + static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, 849 851 u32 vmcr, int rt) 850 852 { 851 853 __vgic_v3_read_apxrn(vcpu, rt, 0); 852 854 } 853 855 854 - static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, 856 + static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, 855 857 u32 vmcr, int rt) 856 858 { 857 859 __vgic_v3_read_apxrn(vcpu, rt, 1); 858 860 } 859 861 860 - static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, 861 - u32 vmcr, int rt) 862 + static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 862 863 { 863 864 __vgic_v3_read_apxrn(vcpu, rt, 2); 864 865 } 865 866 866 - static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, 867 - u32 vmcr, int rt) 867 + static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 868 868 { 869 869 __vgic_v3_read_apxrn(vcpu, rt, 3); 870 870 } 871 871 872 - static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, 873 - u32 vmcr, int rt) 872 + static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 874 873 { 875 874 __vgic_v3_write_apxrn(vcpu, rt, 0); 876 875 } 877 876 878 - static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, 879 - u32 vmcr, int rt) 877 + static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 880 878 { 881 879 __vgic_v3_write_apxrn(vcpu, rt, 1); 882 880 } 883 881 884 - static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, 885 - u32 vmcr, int rt) 882 + static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 886 883 { 887 884 __vgic_v3_write_apxrn(vcpu, rt, 2); 888 885 } 889 886 890 - static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, 891 - u32 vmcr, int rt) 887 + static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 892 888 { 893 889 __vgic_v3_write_apxrn(vcpu, rt, 3); 894 890 } 895 891 896 - static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, 897 - u32 vmcr, int rt) 892 + static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 898 893 { 899 894 u64 lr_val; 900 895 int lr, lr_grp, grp; ··· 906 915 vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); 907 916 } 908 917 909 - static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, 910 - u32 vmcr, int rt) 918 + static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 911 919 { 912 920 vmcr &= ICH_VMCR_PMR_MASK; 913 921 vmcr >>= ICH_VMCR_PMR_SHIFT; 914 922 vcpu_set_reg(vcpu, rt, vmcr); 915 923 } 916 924 917 - static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, 918 - u32 vmcr, int rt) 925 + static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 919 926 { 920 927 u32 val = vcpu_get_reg(vcpu, rt); 921 928 ··· 925 936 write_gicreg(vmcr, ICH_VMCR_EL2); 926 937 } 927 938 928 - static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, 929 - u32 vmcr, int rt) 939 + static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 930 940 { 931 941 u32 val = __vgic_v3_get_highest_active_priority(); 932 942 vcpu_set_reg(vcpu, rt, val); 933 943 } 934 944 935 - static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, 936 - u32 vmcr, int rt) 945 + static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 937 946 { 938 947 u32 vtr, val; 939 948 ··· 952 965 vcpu_set_reg(vcpu, rt, val); 953 966 } 954 967 955 - static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, 956 - u32 vmcr, int rt) 968 + static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) 957 969 { 958 970 u32 val = vcpu_get_reg(vcpu, rt); 959 971 ··· 969 983 write_gicreg(vmcr, ICH_VMCR_EL2); 970 984 } 971 985 972 - int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) 986 + int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) 973 987 { 974 988 int rt; 975 989 u32 esr; ··· 978 992 bool is_read; 979 993 u32 sysreg; 980 994 981 - esr = kvm_vcpu_get_hsr(vcpu); 995 + esr = kvm_vcpu_get_esr(vcpu); 982 996 if (vcpu_mode_is_32bit(vcpu)) { 983 997 if (!kvm_condition_valid(vcpu)) { 984 998 __kvm_skip_instr(vcpu);

+11

arch/arm64/kvm/hyp/vhe/Makefile

··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + # 3 + # Makefile for Kernel-based Virtual Machine module, HYP/nVHE part 4 + # 5 + 6 + asflags-y := -D__KVM_VHE_HYPERVISOR__ 7 + ccflags-y := -D__KVM_VHE_HYPERVISOR__ 8 + 9 + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o 10 + obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ 11 + ../fpsimd.o ../hyp-entry.o

+26

arch/arm64/kvm/hyp/vhe/debug-sr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/debug-sr.h> 8 + 9 + #include <linux/kvm_host.h> 10 + 11 + #include <asm/kvm_hyp.h> 12 + 13 + void __debug_switch_to_guest(struct kvm_vcpu *vcpu) 14 + { 15 + __debug_switch_to_guest_common(vcpu); 16 + } 17 + 18 + void __debug_switch_to_host(struct kvm_vcpu *vcpu) 19 + { 20 + __debug_switch_to_host_common(vcpu); 21 + } 22 + 23 + u32 __kvm_get_mdcr_el2(void) 24 + { 25 + return read_sysreg(mdcr_el2); 26 + }

+219

arch/arm64/kvm/hyp/vhe/switch.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/switch.h> 8 + 9 + #include <linux/arm-smccc.h> 10 + #include <linux/kvm_host.h> 11 + #include <linux/types.h> 12 + #include <linux/jump_label.h> 13 + #include <uapi/linux/psci.h> 14 + 15 + #include <kvm/arm_psci.h> 16 + 17 + #include <asm/barrier.h> 18 + #include <asm/cpufeature.h> 19 + #include <asm/kprobes.h> 20 + #include <asm/kvm_asm.h> 21 + #include <asm/kvm_emulate.h> 22 + #include <asm/kvm_hyp.h> 23 + #include <asm/kvm_mmu.h> 24 + #include <asm/fpsimd.h> 25 + #include <asm/debug-monitors.h> 26 + #include <asm/processor.h> 27 + #include <asm/thread_info.h> 28 + 29 + const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; 30 + 31 + static void __activate_traps(struct kvm_vcpu *vcpu) 32 + { 33 + u64 val; 34 + 35 + ___activate_traps(vcpu); 36 + 37 + val = read_sysreg(cpacr_el1); 38 + val |= CPACR_EL1_TTA; 39 + val &= ~CPACR_EL1_ZEN; 40 + 41 + /* 42 + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to 43 + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, 44 + * except for some missing controls, such as TAM. 45 + * In this case, CPTR_EL2.TAM has the same position with or without 46 + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM 47 + * shift value for trapping the AMU accesses. 48 + */ 49 + 50 + val |= CPTR_EL2_TAM; 51 + 52 + if (update_fp_enabled(vcpu)) { 53 + if (vcpu_has_sve(vcpu)) 54 + val |= CPACR_EL1_ZEN; 55 + } else { 56 + val &= ~CPACR_EL1_FPEN; 57 + __activate_traps_fpsimd32(vcpu); 58 + } 59 + 60 + write_sysreg(val, cpacr_el1); 61 + 62 + write_sysreg(kvm_get_hyp_vector(), vbar_el1); 63 + } 64 + NOKPROBE_SYMBOL(__activate_traps); 65 + 66 + static void __deactivate_traps(struct kvm_vcpu *vcpu) 67 + { 68 + extern char vectors[]; /* kernel exception vectors */ 69 + 70 + ___deactivate_traps(vcpu); 71 + 72 + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); 73 + 74 + /* 75 + * ARM errata 1165522 and 1530923 require the actual execution of the 76 + * above before we can switch to the EL2/EL0 translation regime used by 77 + * the host. 78 + */ 79 + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 80 + 81 + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); 82 + write_sysreg(vectors, vbar_el1); 83 + } 84 + NOKPROBE_SYMBOL(__deactivate_traps); 85 + 86 + void activate_traps_vhe_load(struct kvm_vcpu *vcpu) 87 + { 88 + __activate_traps_common(vcpu); 89 + } 90 + 91 + void deactivate_traps_vhe_put(void) 92 + { 93 + u64 mdcr_el2 = read_sysreg(mdcr_el2); 94 + 95 + mdcr_el2 &= MDCR_EL2_HPMN_MASK | 96 + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | 97 + MDCR_EL2_TPMS; 98 + 99 + write_sysreg(mdcr_el2, mdcr_el2); 100 + 101 + __deactivate_traps_common(); 102 + } 103 + 104 + /* Switch to the guest for VHE systems running in EL2 */ 105 + static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) 106 + { 107 + struct kvm_cpu_context *host_ctxt; 108 + struct kvm_cpu_context *guest_ctxt; 109 + u64 exit_code; 110 + 111 + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 112 + host_ctxt->__hyp_running_vcpu = vcpu; 113 + guest_ctxt = &vcpu->arch.ctxt; 114 + 115 + sysreg_save_host_state_vhe(host_ctxt); 116 + 117 + /* 118 + * ARM erratum 1165522 requires us to configure both stage 1 and 119 + * stage 2 translation for the guest context before we clear 120 + * HCR_EL2.TGE. 121 + * 122 + * We have already configured the guest's stage 1 translation in 123 + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm 124 + * before __activate_traps, because __activate_vm configures 125 + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE 126 + * (among other things). 127 + */ 128 + __activate_vm(vcpu->arch.hw_mmu); 129 + __activate_traps(vcpu); 130 + 131 + sysreg_restore_guest_state_vhe(guest_ctxt); 132 + __debug_switch_to_guest(vcpu); 133 + 134 + __set_guest_arch_workaround_state(vcpu); 135 + 136 + do { 137 + /* Jump in the fire! */ 138 + exit_code = __guest_enter(vcpu, host_ctxt); 139 + 140 + /* And we're baaack! */ 141 + } while (fixup_guest_exit(vcpu, &exit_code)); 142 + 143 + __set_host_arch_workaround_state(vcpu); 144 + 145 + sysreg_save_guest_state_vhe(guest_ctxt); 146 + 147 + __deactivate_traps(vcpu); 148 + 149 + sysreg_restore_host_state_vhe(host_ctxt); 150 + 151 + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) 152 + __fpsimd_save_fpexc32(vcpu); 153 + 154 + __debug_switch_to_host(vcpu); 155 + 156 + return exit_code; 157 + } 158 + NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); 159 + 160 + int __kvm_vcpu_run(struct kvm_vcpu *vcpu) 161 + { 162 + int ret; 163 + 164 + local_daif_mask(); 165 + 166 + /* 167 + * Having IRQs masked via PMR when entering the guest means the GIC 168 + * will not signal the CPU of interrupts of lower priority, and the 169 + * only way to get out will be via guest exceptions. 170 + * Naturally, we want to avoid this. 171 + * 172 + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a 173 + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. 174 + */ 175 + pmr_sync(); 176 + 177 + ret = __kvm_vcpu_run_vhe(vcpu); 178 + 179 + /* 180 + * local_daif_restore() takes care to properly restore PSTATE.DAIF 181 + * and the GIC PMR if the host is using IRQ priorities. 182 + */ 183 + local_daif_restore(DAIF_PROCCTX_NOIRQ); 184 + 185 + /* 186 + * When we exit from the guest we change a number of CPU configuration 187 + * parameters, such as traps. Make sure these changes take effect 188 + * before running the host or additional guests. 189 + */ 190 + isb(); 191 + 192 + return ret; 193 + } 194 + 195 + static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, 196 + struct kvm_cpu_context *host_ctxt) 197 + { 198 + struct kvm_vcpu *vcpu; 199 + vcpu = host_ctxt->__hyp_running_vcpu; 200 + 201 + __deactivate_traps(vcpu); 202 + sysreg_restore_host_state_vhe(host_ctxt); 203 + 204 + panic(__hyp_panic_string, 205 + spsr, elr, 206 + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), 207 + read_sysreg(hpfar_el2), par, vcpu); 208 + } 209 + NOKPROBE_SYMBOL(__hyp_call_panic); 210 + 211 + void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) 212 + { 213 + u64 spsr = read_sysreg_el2(SYS_SPSR); 214 + u64 elr = read_sysreg_el2(SYS_ELR); 215 + u64 par = read_sysreg(par_el1); 216 + 217 + __hyp_call_panic(spsr, elr, par, host_ctxt); 218 + unreachable(); 219 + }

+114

arch/arm64/kvm/hyp/vhe/sysreg-sr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2012-2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <hyp/sysreg-sr.h> 8 + 9 + #include <linux/compiler.h> 10 + #include <linux/kvm_host.h> 11 + 12 + #include <asm/kprobes.h> 13 + #include <asm/kvm_asm.h> 14 + #include <asm/kvm_emulate.h> 15 + #include <asm/kvm_hyp.h> 16 + 17 + /* 18 + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and 19 + * pstate, which are handled as part of the el2 return state) on every 20 + * switch (sp_el0 is being dealt with in the assembly code). 21 + * tpidr_el0 and tpidrro_el0 only need to be switched when going 22 + * to host userspace or a different VCPU. EL1 registers only need to be 23 + * switched when potentially going to run a different VCPU. The latter two 24 + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. 25 + */ 26 + 27 + void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) 28 + { 29 + __sysreg_save_common_state(ctxt); 30 + } 31 + NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); 32 + 33 + void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) 34 + { 35 + __sysreg_save_common_state(ctxt); 36 + __sysreg_save_el2_return_state(ctxt); 37 + } 38 + NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); 39 + 40 + void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) 41 + { 42 + __sysreg_restore_common_state(ctxt); 43 + } 44 + NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); 45 + 46 + void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) 47 + { 48 + __sysreg_restore_common_state(ctxt); 49 + __sysreg_restore_el2_return_state(ctxt); 50 + } 51 + NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); 52 + 53 + /** 54 + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU 55 + * 56 + * @vcpu: The VCPU pointer 57 + * 58 + * Load system registers that do not affect the host's execution, for 59 + * example EL1 system registers on a VHE system where the host kernel 60 + * runs at EL2. This function is called from KVM's vcpu_load() function 61 + * and loading system register state early avoids having to load them on 62 + * every entry to the VM. 63 + */ 64 + void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) 65 + { 66 + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; 67 + struct kvm_cpu_context *host_ctxt; 68 + 69 + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 70 + __sysreg_save_user_state(host_ctxt); 71 + 72 + /* 73 + * Load guest EL1 and user state 74 + * 75 + * We must restore the 32-bit state before the sysregs, thanks 76 + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). 77 + */ 78 + __sysreg32_restore_state(vcpu); 79 + __sysreg_restore_user_state(guest_ctxt); 80 + __sysreg_restore_el1_state(guest_ctxt); 81 + 82 + vcpu->arch.sysregs_loaded_on_cpu = true; 83 + 84 + activate_traps_vhe_load(vcpu); 85 + } 86 + 87 + /** 88 + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU 89 + * 90 + * @vcpu: The VCPU pointer 91 + * 92 + * Save guest system registers that do not affect the host's execution, for 93 + * example EL1 system registers on a VHE system where the host kernel 94 + * runs at EL2. This function is called from KVM's vcpu_put() function 95 + * and deferring saving system register state until we're no longer running the 96 + * VCPU avoids having to save them on every exit from the VM. 97 + */ 98 + void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) 99 + { 100 + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; 101 + struct kvm_cpu_context *host_ctxt; 102 + 103 + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 104 + deactivate_traps_vhe_put(); 105 + 106 + __sysreg_save_el1_state(guest_ctxt); 107 + __sysreg_save_user_state(guest_ctxt); 108 + __sysreg32_save_state(vcpu); 109 + 110 + /* Restore host user state */ 111 + __sysreg_restore_user_state(host_ctxt); 112 + 113 + vcpu->arch.sysregs_loaded_on_cpu = false; 114 + }

+12

arch/arm64/kvm/hyp/vhe/timer-sr.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2012-2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <asm/kvm_hyp.h> 8 + 9 + void __kvm_timer_set_cntvoff(u64 cntvoff) 10 + { 11 + write_sysreg(cntvoff, cntvoff_el2); 12 + }

+162

arch/arm64/kvm/hyp/vhe/tlb.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2015 - ARM Ltd 4 + * Author: Marc Zyngier <marc.zyngier@arm.com> 5 + */ 6 + 7 + #include <linux/irqflags.h> 8 + 9 + #include <asm/kvm_hyp.h> 10 + #include <asm/kvm_mmu.h> 11 + #include <asm/tlbflush.h> 12 + 13 + struct tlb_inv_context { 14 + unsigned long flags; 15 + u64 tcr; 16 + u64 sctlr; 17 + }; 18 + 19 + static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, 20 + struct tlb_inv_context *cxt) 21 + { 22 + u64 val; 23 + 24 + local_irq_save(cxt->flags); 25 + 26 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 27 + /* 28 + * For CPUs that are affected by ARM errata 1165522 or 1530923, 29 + * we cannot trust stage-1 to be in a correct state at that 30 + * point. Since we do not want to force a full load of the 31 + * vcpu state, we prevent the EL1 page-table walker to 32 + * allocate new TLBs. This is done by setting the EPD bits 33 + * in the TCR_EL1 register. We also need to prevent it to 34 + * allocate IPA->PA walks, so we enable the S1 MMU... 35 + */ 36 + val = cxt->tcr = read_sysreg_el1(SYS_TCR); 37 + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; 38 + write_sysreg_el1(val, SYS_TCR); 39 + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); 40 + val |= SCTLR_ELx_M; 41 + write_sysreg_el1(val, SYS_SCTLR); 42 + } 43 + 44 + /* 45 + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and 46 + * most TLB operations target EL2/EL0. In order to affect the 47 + * guest TLBs (EL1/EL0), we need to change one of these two 48 + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so 49 + * let's flip TGE before executing the TLB operation. 50 + * 51 + * ARM erratum 1165522 requires some special handling (again), 52 + * as we need to make sure both stages of translation are in 53 + * place before clearing TGE. __load_guest_stage2() already 54 + * has an ISB in order to deal with this. 55 + */ 56 + __load_guest_stage2(mmu); 57 + val = read_sysreg(hcr_el2); 58 + val &= ~HCR_TGE; 59 + write_sysreg(val, hcr_el2); 60 + isb(); 61 + } 62 + 63 + static void __tlb_switch_to_host(struct tlb_inv_context *cxt) 64 + { 65 + /* 66 + * We're done with the TLB operation, let's restore the host's 67 + * view of HCR_EL2. 68 + */ 69 + write_sysreg(0, vttbr_el2); 70 + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); 71 + isb(); 72 + 73 + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { 74 + /* Restore the registers to what they were */ 75 + write_sysreg_el1(cxt->tcr, SYS_TCR); 76 + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); 77 + } 78 + 79 + local_irq_restore(cxt->flags); 80 + } 81 + 82 + void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, 83 + phys_addr_t ipa, int level) 84 + { 85 + struct tlb_inv_context cxt; 86 + 87 + dsb(ishst); 88 + 89 + /* Switch to requested VMID */ 90 + __tlb_switch_to_guest(mmu, &cxt); 91 + 92 + /* 93 + * We could do so much better if we had the VA as well. 94 + * Instead, we invalidate Stage-2 for this IPA, and the 95 + * whole of Stage-1. Weep... 96 + */ 97 + ipa >>= 12; 98 + __tlbi_level(ipas2e1is, ipa, level); 99 + 100 + /* 101 + * We have to ensure completion of the invalidation at Stage-2, 102 + * since a table walk on another CPU could refill a TLB with a 103 + * complete (S1 + S2) walk based on the old Stage-2 mapping if 104 + * the Stage-1 invalidation happened first. 105 + */ 106 + dsb(ish); 107 + __tlbi(vmalle1is); 108 + dsb(ish); 109 + isb(); 110 + 111 + __tlb_switch_to_host(&cxt); 112 + } 113 + 114 + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) 115 + { 116 + struct tlb_inv_context cxt; 117 + 118 + dsb(ishst); 119 + 120 + /* Switch to requested VMID */ 121 + __tlb_switch_to_guest(mmu, &cxt); 122 + 123 + __tlbi(vmalls12e1is); 124 + dsb(ish); 125 + isb(); 126 + 127 + __tlb_switch_to_host(&cxt); 128 + } 129 + 130 + void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) 131 + { 132 + struct tlb_inv_context cxt; 133 + 134 + /* Switch to requested VMID */ 135 + __tlb_switch_to_guest(mmu, &cxt); 136 + 137 + __tlbi(vmalle1); 138 + dsb(nsh); 139 + isb(); 140 + 141 + __tlb_switch_to_host(&cxt); 142 + } 143 + 144 + void __kvm_flush_vm_context(void) 145 + { 146 + dsb(ishst); 147 + __tlbi(alle1is); 148 + 149 + /* 150 + * VIPT and PIPT caches are not affected by VMID, so no maintenance 151 + * is necessary across a VMID rollover. 152 + * 153 + * VPIPT caches constrain lookup and maintenance to the active VMID, 154 + * so we need to invalidate lines with a stale VMID to avoid an ABA 155 + * race after multiple rollovers. 156 + * 157 + */ 158 + if (icache_is_vpipt()) 159 + asm volatile("ic ialluis"); 160 + 161 + dsb(ish); 162 + }

+1 -1

arch/arm64/kvm/inject_fault.c

··· 64 64 case PSR_MODE_EL1h: 65 65 vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); 66 66 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 67 - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); 67 + vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); 68 68 break; 69 69 default: 70 70 /* Don't do that */

-6

arch/arm64/kvm/mmio.c

··· 146 146 return -ENOSYS; 147 147 } 148 148 149 - /* Page table accesses IO mem: tell guest to fix its TTBR */ 150 - if (kvm_vcpu_dabt_iss1tw(vcpu)) { 151 - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 152 - return 1; 153 - } 154 - 155 149 /* 156 150 * Prepare MMIO operation. First decode the syndrome data we get 157 151 * from the CPU. Then try if some in-kernel emulation feels

+178 -133

arch/arm64/kvm/mmu.c

··· 55 55 */ 56 56 void kvm_flush_remote_tlbs(struct kvm *kvm) 57 57 { 58 - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); 58 + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 59 59 } 60 60 61 - static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 61 + static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, 62 + int level) 62 63 { 63 - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 64 + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level); 64 65 } 65 66 66 67 /* ··· 91 90 92 91 /** 93 92 * stage2_dissolve_pmd() - clear and flush huge PMD entry 94 - * @kvm: pointer to kvm structure. 93 + * @mmu: pointer to mmu structure to operate on 95 94 * @addr: IPA 96 95 * @pmd: pmd pointer for IPA 97 96 * 98 97 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. 99 98 */ 100 - static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 99 + static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd) 101 100 { 102 101 if (!pmd_thp_or_huge(*pmd)) 103 102 return; 104 103 105 104 pmd_clear(pmd); 106 - kvm_tlb_flush_vmid_ipa(kvm, addr); 105 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); 107 106 put_page(virt_to_page(pmd)); 108 107 } 109 108 110 109 /** 111 110 * stage2_dissolve_pud() - clear and flush huge PUD entry 112 - * @kvm: pointer to kvm structure. 111 + * @mmu: pointer to mmu structure to operate on 113 112 * @addr: IPA 114 113 * @pud: pud pointer for IPA 115 114 * 116 115 * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. 117 116 */ 118 - static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) 117 + static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp) 119 118 { 119 + struct kvm *kvm = mmu->kvm; 120 + 120 121 if (!stage2_pud_huge(kvm, *pudp)) 121 122 return; 122 123 123 124 stage2_pud_clear(kvm, pudp); 124 - kvm_tlb_flush_vmid_ipa(kvm, addr); 125 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); 125 126 put_page(virt_to_page(pudp)); 126 127 } 127 128 128 - static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 129 + static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr) 129 130 { 131 + struct kvm *kvm = mmu->kvm; 130 132 p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); 131 133 stage2_pgd_clear(kvm, pgd); 132 - kvm_tlb_flush_vmid_ipa(kvm, addr); 134 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); 133 135 stage2_p4d_free(kvm, p4d_table); 134 136 put_page(virt_to_page(pgd)); 135 137 } 136 138 137 - static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr) 139 + static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr) 138 140 { 141 + struct kvm *kvm = mmu->kvm; 139 142 pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); 140 143 stage2_p4d_clear(kvm, p4d); 141 - kvm_tlb_flush_vmid_ipa(kvm, addr); 144 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); 142 145 stage2_pud_free(kvm, pud_table); 143 146 put_page(virt_to_page(p4d)); 144 147 } 145 148 146 - static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 149 + static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr) 147 150 { 151 + struct kvm *kvm = mmu->kvm; 148 152 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); 153 + 149 154 VM_BUG_ON(stage2_pud_huge(kvm, *pud)); 150 155 stage2_pud_clear(kvm, pud); 151 - kvm_tlb_flush_vmid_ipa(kvm, addr); 156 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); 152 157 stage2_pmd_free(kvm, pmd_table); 153 158 put_page(virt_to_page(pud)); 154 159 } 155 160 156 - static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 161 + static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr) 157 162 { 158 163 pte_t *pte_table = pte_offset_kernel(pmd, 0); 159 164 VM_BUG_ON(pmd_thp_or_huge(*pmd)); 160 165 pmd_clear(pmd); 161 - kvm_tlb_flush_vmid_ipa(kvm, addr); 166 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); 162 167 free_page((unsigned long)pte_table); 163 168 put_page(virt_to_page(pmd)); 164 169 } ··· 230 223 * we then fully enforce cacheability of RAM, no matter what the guest 231 224 * does. 232 225 */ 233 - static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, 226 + static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, 234 227 phys_addr_t addr, phys_addr_t end) 235 228 { 236 229 phys_addr_t start_addr = addr; ··· 242 235 pte_t old_pte = *pte; 243 236 244 237 kvm_set_pte(pte, __pte(0)); 245 - kvm_tlb_flush_vmid_ipa(kvm, addr); 238 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); 246 239 247 240 /* No need to invalidate the cache for device mappings */ 248 241 if (!kvm_is_device_pfn(pte_pfn(old_pte))) ··· 252 245 } 253 246 } while (pte++, addr += PAGE_SIZE, addr != end); 254 247 255 - if (stage2_pte_table_empty(kvm, start_pte)) 256 - clear_stage2_pmd_entry(kvm, pmd, start_addr); 248 + if (stage2_pte_table_empty(mmu->kvm, start_pte)) 249 + clear_stage2_pmd_entry(mmu, pmd, start_addr); 257 250 } 258 251 259 - static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, 252 + static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, 260 253 phys_addr_t addr, phys_addr_t end) 261 254 { 255 + struct kvm *kvm = mmu->kvm; 262 256 phys_addr_t next, start_addr = addr; 263 257 pmd_t *pmd, *start_pmd; 264 258 ··· 271 263 pmd_t old_pmd = *pmd; 272 264 273 265 pmd_clear(pmd); 274 - kvm_tlb_flush_vmid_ipa(kvm, addr); 266 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); 275 267 276 268 kvm_flush_dcache_pmd(old_pmd); 277 269 278 270 put_page(virt_to_page(pmd)); 279 271 } else { 280 - unmap_stage2_ptes(kvm, pmd, addr, next); 272 + unmap_stage2_ptes(mmu, pmd, addr, next); 281 273 } 282 274 } 283 275 } while (pmd++, addr = next, addr != end); 284 276 285 277 if (stage2_pmd_table_empty(kvm, start_pmd)) 286 - clear_stage2_pud_entry(kvm, pud, start_addr); 278 + clear_stage2_pud_entry(mmu, pud, start_addr); 287 279 } 288 280 289 - static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, 281 + static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, 290 282 phys_addr_t addr, phys_addr_t end) 291 283 { 284 + struct kvm *kvm = mmu->kvm; 292 285 phys_addr_t next, start_addr = addr; 293 286 pud_t *pud, *start_pud; 294 287 ··· 301 292 pud_t old_pud = *pud; 302 293 303 294 stage2_pud_clear(kvm, pud); 304 - kvm_tlb_flush_vmid_ipa(kvm, addr); 295 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); 305 296 kvm_flush_dcache_pud(old_pud); 306 297 put_page(virt_to_page(pud)); 307 298 } else { 308 - unmap_stage2_pmds(kvm, pud, addr, next); 299 + unmap_stage2_pmds(mmu, pud, addr, next); 309 300 } 310 301 } 311 302 } while (pud++, addr = next, addr != end); 312 303 313 304 if (stage2_pud_table_empty(kvm, start_pud)) 314 - clear_stage2_p4d_entry(kvm, p4d, start_addr); 305 + clear_stage2_p4d_entry(mmu, p4d, start_addr); 315 306 } 316 307 317 - static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, 308 + static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, 318 309 phys_addr_t addr, phys_addr_t end) 319 310 { 311 + struct kvm *kvm = mmu->kvm; 320 312 phys_addr_t next, start_addr = addr; 321 313 p4d_t *p4d, *start_p4d; 322 314 ··· 325 315 do { 326 316 next = stage2_p4d_addr_end(kvm, addr, end); 327 317 if (!stage2_p4d_none(kvm, *p4d)) 328 - unmap_stage2_puds(kvm, p4d, addr, next); 318 + unmap_stage2_puds(mmu, p4d, addr, next); 329 319 } while (p4d++, addr = next, addr != end); 330 320 331 321 if (stage2_p4d_table_empty(kvm, start_p4d)) 332 - clear_stage2_pgd_entry(kvm, pgd, start_addr); 322 + clear_stage2_pgd_entry(mmu, pgd, start_addr); 333 323 } 334 324 335 325 /** ··· 343 333 * destroying the VM), otherwise another faulting VCPU may come in and mess 344 334 * with things behind our backs. 345 335 */ 346 - static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 336 + static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) 347 337 { 338 + struct kvm *kvm = mmu->kvm; 348 339 pgd_t *pgd; 349 340 phys_addr_t addr = start, end = start + size; 350 341 phys_addr_t next; ··· 353 342 assert_spin_locked(&kvm->mmu_lock); 354 343 WARN_ON(size & ~PAGE_MASK); 355 344 356 - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); 345 + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); 357 346 do { 358 347 /* 359 348 * Make sure the page table is still active, as another thread 360 349 * could have possibly freed the page table, while we released 361 350 * the lock. 362 351 */ 363 - if (!READ_ONCE(kvm->arch.pgd)) 352 + if (!READ_ONCE(mmu->pgd)) 364 353 break; 365 354 next = stage2_pgd_addr_end(kvm, addr, end); 366 355 if (!stage2_pgd_none(kvm, *pgd)) 367 - unmap_stage2_p4ds(kvm, pgd, addr, next); 356 + unmap_stage2_p4ds(mmu, pgd, addr, next); 368 357 /* 369 358 * If the range is too large, release the kvm->mmu_lock 370 359 * to prevent starvation and lockup detector warnings. ··· 374 363 } while (pgd++, addr = next, addr != end); 375 364 } 376 365 377 - static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, 366 + static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, 378 367 phys_addr_t addr, phys_addr_t end) 379 368 { 380 369 pte_t *pte; ··· 386 375 } while (pte++, addr += PAGE_SIZE, addr != end); 387 376 } 388 377 389 - static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, 378 + static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, 390 379 phys_addr_t addr, phys_addr_t end) 391 380 { 381 + struct kvm *kvm = mmu->kvm; 392 382 pmd_t *pmd; 393 383 phys_addr_t next; 394 384 ··· 400 388 if (pmd_thp_or_huge(*pmd)) 401 389 kvm_flush_dcache_pmd(*pmd); 402 390 else 403 - stage2_flush_ptes(kvm, pmd, addr, next); 391 + stage2_flush_ptes(mmu, pmd, addr, next); 404 392 } 405 393 } while (pmd++, addr = next, addr != end); 406 394 } 407 395 408 - static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, 396 + static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, 409 397 phys_addr_t addr, phys_addr_t end) 410 398 { 399 + struct kvm *kvm = mmu->kvm; 411 400 pud_t *pud; 412 401 phys_addr_t next; 413 402 ··· 419 406 if (stage2_pud_huge(kvm, *pud)) 420 407 kvm_flush_dcache_pud(*pud); 421 408 else 422 - stage2_flush_pmds(kvm, pud, addr, next); 409 + stage2_flush_pmds(mmu, pud, addr, next); 423 410 } 424 411 } while (pud++, addr = next, addr != end); 425 412 } 426 413 427 - static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, 414 + static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, 428 415 phys_addr_t addr, phys_addr_t end) 429 416 { 417 + struct kvm *kvm = mmu->kvm; 430 418 p4d_t *p4d; 431 419 phys_addr_t next; 432 420 ··· 435 421 do { 436 422 next = stage2_p4d_addr_end(kvm, addr, end); 437 423 if (!stage2_p4d_none(kvm, *p4d)) 438 - stage2_flush_puds(kvm, p4d, addr, next); 424 + stage2_flush_puds(mmu, p4d, addr, next); 439 425 } while (p4d++, addr = next, addr != end); 440 426 } 441 427 442 428 static void stage2_flush_memslot(struct kvm *kvm, 443 429 struct kvm_memory_slot *memslot) 444 430 { 431 + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; 445 432 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; 446 433 phys_addr_t end = addr + PAGE_SIZE * memslot->npages; 447 434 phys_addr_t next; 448 435 pgd_t *pgd; 449 436 450 - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); 437 + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); 451 438 do { 452 439 next = stage2_pgd_addr_end(kvm, addr, end); 453 440 if (!stage2_pgd_none(kvm, *pgd)) 454 - stage2_flush_p4ds(kvm, pgd, addr, next); 441 + stage2_flush_p4ds(mmu, pgd, addr, next); 455 442 456 443 if (next != end) 457 444 cond_resched_lock(&kvm->mmu_lock); ··· 979 964 } 980 965 981 966 /** 982 - * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 983 - * @kvm: The KVM struct pointer for the VM. 967 + * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure 968 + * @kvm: The pointer to the KVM structure 969 + * @mmu: The pointer to the s2 MMU structure 984 970 * 985 971 * Allocates only the stage-2 HW PGD level table(s) of size defined by 986 - * stage2_pgd_size(kvm). 972 + * stage2_pgd_size(mmu->kvm). 987 973 * 988 974 * Note we don't need locking here as this is only called when the VM is 989 975 * created, which can only be done once. 990 976 */ 991 - int kvm_alloc_stage2_pgd(struct kvm *kvm) 977 + int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) 992 978 { 993 979 phys_addr_t pgd_phys; 994 980 pgd_t *pgd; 981 + int cpu; 995 982 996 - if (kvm->arch.pgd != NULL) { 983 + if (mmu->pgd != NULL) { 997 984 kvm_err("kvm_arch already initialized?\n"); 998 985 return -EINVAL; 999 986 } ··· 1009 992 if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) 1010 993 return -EINVAL; 1011 994 1012 - kvm->arch.pgd = pgd; 1013 - kvm->arch.pgd_phys = pgd_phys; 995 + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); 996 + if (!mmu->last_vcpu_ran) { 997 + free_pages_exact(pgd, stage2_pgd_size(kvm)); 998 + return -ENOMEM; 999 + } 1000 + 1001 + for_each_possible_cpu(cpu) 1002 + *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; 1003 + 1004 + mmu->kvm = kvm; 1005 + mmu->pgd = pgd; 1006 + mmu->pgd_phys = pgd_phys; 1007 + mmu->vmid.vmid_gen = 0; 1008 + 1014 1009 return 0; 1015 1010 } 1016 1011 ··· 1061 1032 1062 1033 if (!(vma->vm_flags & VM_PFNMAP)) { 1063 1034 gpa_t gpa = addr + (vm_start - memslot->userspace_addr); 1064 - unmap_stage2_range(kvm, gpa, vm_end - vm_start); 1035 + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); 1065 1036 } 1066 1037 hva = vm_end; 1067 1038 } while (hva < reg_end); ··· 1093 1064 srcu_read_unlock(&kvm->srcu, idx); 1094 1065 } 1095 1066 1096 - /** 1097 - * kvm_free_stage2_pgd - free all stage-2 tables 1098 - * @kvm: The KVM struct pointer for the VM. 1099 - * 1100 - * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all 1101 - * underlying level-2 and level-3 tables before freeing the actual level-1 table 1102 - * and setting the struct pointer to NULL. 1103 - */ 1104 - void kvm_free_stage2_pgd(struct kvm *kvm) 1067 + void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) 1105 1068 { 1069 + struct kvm *kvm = mmu->kvm; 1106 1070 void *pgd = NULL; 1107 1071 1108 1072 spin_lock(&kvm->mmu_lock); 1109 - if (kvm->arch.pgd) { 1110 - unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); 1111 - pgd = READ_ONCE(kvm->arch.pgd); 1112 - kvm->arch.pgd = NULL; 1113 - kvm->arch.pgd_phys = 0; 1073 + if (mmu->pgd) { 1074 + unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); 1075 + pgd = READ_ONCE(mmu->pgd); 1076 + mmu->pgd = NULL; 1114 1077 } 1115 1078 spin_unlock(&kvm->mmu_lock); 1116 1079 1117 1080 /* Free the HW pgd, one page at a time */ 1118 - if (pgd) 1081 + if (pgd) { 1119 1082 free_pages_exact(pgd, stage2_pgd_size(kvm)); 1083 + free_percpu(mmu->last_vcpu_ran); 1084 + } 1120 1085 } 1121 1086 1122 - static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1087 + static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, 1123 1088 phys_addr_t addr) 1124 1089 { 1090 + struct kvm *kvm = mmu->kvm; 1125 1091 pgd_t *pgd; 1126 1092 p4d_t *p4d; 1127 1093 1128 - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); 1094 + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); 1129 1095 if (stage2_pgd_none(kvm, *pgd)) { 1130 1096 if (!cache) 1131 1097 return NULL; ··· 1132 1108 return stage2_p4d_offset(kvm, pgd, addr); 1133 1109 } 1134 1110 1135 - static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1111 + static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, 1136 1112 phys_addr_t addr) 1137 1113 { 1114 + struct kvm *kvm = mmu->kvm; 1138 1115 p4d_t *p4d; 1139 1116 pud_t *pud; 1140 1117 1141 - p4d = stage2_get_p4d(kvm, cache, addr); 1118 + p4d = stage2_get_p4d(mmu, cache, addr); 1142 1119 if (stage2_p4d_none(kvm, *p4d)) { 1143 1120 if (!cache) 1144 1121 return NULL; ··· 1151 1126 return stage2_pud_offset(kvm, p4d, addr); 1152 1127 } 1153 1128 1154 - static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1129 + static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, 1155 1130 phys_addr_t addr) 1156 1131 { 1132 + struct kvm *kvm = mmu->kvm; 1157 1133 pud_t *pud; 1158 1134 pmd_t *pmd; 1159 1135 1160 - pud = stage2_get_pud(kvm, cache, addr); 1136 + pud = stage2_get_pud(mmu, cache, addr); 1161 1137 if (!pud || stage2_pud_huge(kvm, *pud)) 1162 1138 return NULL; 1163 1139 ··· 1173 1147 return stage2_pmd_offset(kvm, pud, addr); 1174 1148 } 1175 1149 1176 - static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 1177 - *cache, phys_addr_t addr, const pmd_t *new_pmd) 1150 + static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu, 1151 + struct kvm_mmu_memory_cache *cache, 1152 + phys_addr_t addr, const pmd_t *new_pmd) 1178 1153 { 1179 1154 pmd_t *pmd, old_pmd; 1180 1155 1181 1156 retry: 1182 - pmd = stage2_get_pmd(kvm, cache, addr); 1157 + pmd = stage2_get_pmd(mmu, cache, addr); 1183 1158 VM_BUG_ON(!pmd); 1184 1159 1185 1160 old_pmd = *pmd; ··· 1213 1186 * get handled accordingly. 1214 1187 */ 1215 1188 if (!pmd_thp_or_huge(old_pmd)) { 1216 - unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); 1189 + unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE); 1217 1190 goto retry; 1218 1191 } 1219 1192 /* ··· 1229 1202 */ 1230 1203 WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); 1231 1204 pmd_clear(pmd); 1232 - kvm_tlb_flush_vmid_ipa(kvm, addr); 1205 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); 1233 1206 } else { 1234 1207 get_page(virt_to_page(pmd)); 1235 1208 } ··· 1238 1211 return 0; 1239 1212 } 1240 1213 1241 - static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1214 + static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu, 1215 + struct kvm_mmu_memory_cache *cache, 1242 1216 phys_addr_t addr, const pud_t *new_pudp) 1243 1217 { 1218 + struct kvm *kvm = mmu->kvm; 1244 1219 pud_t *pudp, old_pud; 1245 1220 1246 1221 retry: 1247 - pudp = stage2_get_pud(kvm, cache, addr); 1222 + pudp = stage2_get_pud(mmu, cache, addr); 1248 1223 VM_BUG_ON(!pudp); 1249 1224 1250 1225 old_pud = *pudp; ··· 1265 1236 * the range for this block and retry. 1266 1237 */ 1267 1238 if (!stage2_pud_huge(kvm, old_pud)) { 1268 - unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); 1239 + unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE); 1269 1240 goto retry; 1270 1241 } 1271 1242 1272 1243 WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); 1273 1244 stage2_pud_clear(kvm, pudp); 1274 - kvm_tlb_flush_vmid_ipa(kvm, addr); 1245 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); 1275 1246 } else { 1276 1247 get_page(virt_to_page(pudp)); 1277 1248 } ··· 1286 1257 * leaf-entry is returned in the appropriate level variable - pudpp, 1287 1258 * pmdpp, ptepp. 1288 1259 */ 1289 - static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, 1260 + static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr, 1290 1261 pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) 1291 1262 { 1263 + struct kvm *kvm = mmu->kvm; 1292 1264 pud_t *pudp; 1293 1265 pmd_t *pmdp; 1294 1266 pte_t *ptep; ··· 1298 1268 *pmdpp = NULL; 1299 1269 *ptepp = NULL; 1300 1270 1301 - pudp = stage2_get_pud(kvm, NULL, addr); 1271 + pudp = stage2_get_pud(mmu, NULL, addr); 1302 1272 if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) 1303 1273 return false; 1304 1274 ··· 1324 1294 return true; 1325 1295 } 1326 1296 1327 - static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) 1297 + static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz) 1328 1298 { 1329 1299 pud_t *pudp; 1330 1300 pmd_t *pmdp; 1331 1301 pte_t *ptep; 1332 1302 bool found; 1333 1303 1334 - found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); 1304 + found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep); 1335 1305 if (!found) 1336 1306 return false; 1337 1307 ··· 1343 1313 return sz == PAGE_SIZE && kvm_s2pte_exec(ptep); 1344 1314 } 1345 1315 1346 - static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 1316 + static int stage2_set_pte(struct kvm_s2_mmu *mmu, 1317 + struct kvm_mmu_memory_cache *cache, 1347 1318 phys_addr_t addr, const pte_t *new_pte, 1348 1319 unsigned long flags) 1349 1320 { 1321 + struct kvm *kvm = mmu->kvm; 1350 1322 pud_t *pud; 1351 1323 pmd_t *pmd; 1352 1324 pte_t *pte, old_pte; ··· 1358 1326 VM_BUG_ON(logging_active && !cache); 1359 1327 1360 1328 /* Create stage-2 page table mapping - Levels 0 and 1 */ 1361 - pud = stage2_get_pud(kvm, cache, addr); 1329 + pud = stage2_get_pud(mmu, cache, addr); 1362 1330 if (!pud) { 1363 1331 /* 1364 1332 * Ignore calls from kvm_set_spte_hva for unallocated ··· 1372 1340 * on to allocate page. 1373 1341 */ 1374 1342 if (logging_active) 1375 - stage2_dissolve_pud(kvm, addr, pud); 1343 + stage2_dissolve_pud(mmu, addr, pud); 1376 1344 1377 1345 if (stage2_pud_none(kvm, *pud)) { 1378 1346 if (!cache) ··· 1396 1364 * allocate page. 1397 1365 */ 1398 1366 if (logging_active) 1399 - stage2_dissolve_pmd(kvm, addr, pmd); 1367 + stage2_dissolve_pmd(mmu, addr, pmd); 1400 1368 1401 1369 /* Create stage-2 page mappings - Level 2 */ 1402 1370 if (pmd_none(*pmd)) { ··· 1420 1388 return 0; 1421 1389 1422 1390 kvm_set_pte(pte, __pte(0)); 1423 - kvm_tlb_flush_vmid_ipa(kvm, addr); 1391 + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); 1424 1392 } else { 1425 1393 get_page(virt_to_page(pte)); 1426 1394 } ··· 1485 1453 if (ret) 1486 1454 goto out; 1487 1455 spin_lock(&kvm->mmu_lock); 1488 - ret = stage2_set_pte(kvm, &cache, addr, &pte, 1489 - KVM_S2PTE_FLAG_IS_IOMAP); 1456 + ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte, 1457 + KVM_S2PTE_FLAG_IS_IOMAP); 1490 1458 spin_unlock(&kvm->mmu_lock); 1491 1459 if (ret) 1492 1460 goto out; ··· 1525 1493 * @addr: range start address 1526 1494 * @end: range end address 1527 1495 */ 1528 - static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, 1496 + static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, 1529 1497 phys_addr_t addr, phys_addr_t end) 1530 1498 { 1499 + struct kvm *kvm = mmu->kvm; 1531 1500 pmd_t *pmd; 1532 1501 phys_addr_t next; 1533 1502 ··· 1549 1516 1550 1517 /** 1551 1518 * stage2_wp_puds - write protect P4D range 1552 - * @pgd: pointer to pgd entry 1519 + * @p4d: pointer to p4d entry 1553 1520 * @addr: range start address 1554 1521 * @end: range end address 1555 1522 */ 1556 - static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, 1523 + static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, 1557 1524 phys_addr_t addr, phys_addr_t end) 1558 1525 { 1526 + struct kvm *kvm = mmu->kvm; 1559 1527 pud_t *pud; 1560 1528 phys_addr_t next; 1561 1529 ··· 1568 1534 if (!kvm_s2pud_readonly(pud)) 1569 1535 kvm_set_s2pud_readonly(pud); 1570 1536 } else { 1571 - stage2_wp_pmds(kvm, pud, addr, next); 1537 + stage2_wp_pmds(mmu, pud, addr, next); 1572 1538 } 1573 1539 } 1574 1540 } while (pud++, addr = next, addr != end); ··· 1580 1546 * @addr: range start address 1581 1547 * @end: range end address 1582 1548 */ 1583 - static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, 1549 + static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, 1584 1550 phys_addr_t addr, phys_addr_t end) 1585 1551 { 1552 + struct kvm *kvm = mmu->kvm; 1586 1553 p4d_t *p4d; 1587 1554 phys_addr_t next; 1588 1555 ··· 1591 1556 do { 1592 1557 next = stage2_p4d_addr_end(kvm, addr, end); 1593 1558 if (!stage2_p4d_none(kvm, *p4d)) 1594 - stage2_wp_puds(kvm, p4d, addr, next); 1559 + stage2_wp_puds(mmu, p4d, addr, next); 1595 1560 } while (p4d++, addr = next, addr != end); 1596 1561 } 1597 1562 ··· 1601 1566 * @addr: Start address of range 1602 1567 * @end: End address of range 1603 1568 */ 1604 - static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) 1569 + static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 1605 1570 { 1571 + struct kvm *kvm = mmu->kvm; 1606 1572 pgd_t *pgd; 1607 1573 phys_addr_t next; 1608 1574 1609 - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); 1575 + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); 1610 1576 do { 1611 1577 /* 1612 1578 * Release kvm_mmu_lock periodically if the memory region is ··· 1619 1583 * the lock. 1620 1584 */ 1621 1585 cond_resched_lock(&kvm->mmu_lock); 1622 - if (!READ_ONCE(kvm->arch.pgd)) 1586 + if (!READ_ONCE(mmu->pgd)) 1623 1587 break; 1624 1588 next = stage2_pgd_addr_end(kvm, addr, end); 1625 1589 if (stage2_pgd_present(kvm, *pgd)) 1626 - stage2_wp_p4ds(kvm, pgd, addr, next); 1590 + stage2_wp_p4ds(mmu, pgd, addr, next); 1627 1591 } while (pgd++, addr = next, addr != end); 1628 1592 } 1629 1593 ··· 1653 1617 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; 1654 1618 1655 1619 spin_lock(&kvm->mmu_lock); 1656 - stage2_wp_range(kvm, start, end); 1620 + stage2_wp_range(&kvm->arch.mmu, start, end); 1657 1621 spin_unlock(&kvm->mmu_lock); 1658 1622 kvm_flush_remote_tlbs(kvm); 1659 1623 } ··· 1677 1641 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; 1678 1642 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; 1679 1643 1680 - stage2_wp_range(kvm, start, end); 1644 + stage2_wp_range(&kvm->arch.mmu, start, end); 1681 1645 } 1682 1646 1683 1647 /* ··· 1840 1804 pgprot_t mem_type = PAGE_S2; 1841 1805 bool logging_active = memslot_is_logging(memslot); 1842 1806 unsigned long vma_pagesize, flags = 0; 1807 + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; 1843 1808 1844 1809 write_fault = kvm_is_write_fault(vcpu); 1845 1810 exec_fault = kvm_vcpu_trap_is_iabt(vcpu); ··· 1962 1925 */ 1963 1926 needs_exec = exec_fault || 1964 1927 (fault_status == FSC_PERM && 1965 - stage2_is_exec(kvm, fault_ipa, vma_pagesize)); 1928 + stage2_is_exec(mmu, fault_ipa, vma_pagesize)); 1966 1929 1967 1930 if (vma_pagesize == PUD_SIZE) { 1968 1931 pud_t new_pud = kvm_pfn_pud(pfn, mem_type); ··· 1974 1937 if (needs_exec) 1975 1938 new_pud = kvm_s2pud_mkexec(new_pud); 1976 1939 1977 - ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); 1940 + ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud); 1978 1941 } else if (vma_pagesize == PMD_SIZE) { 1979 1942 pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); 1980 1943 ··· 1986 1949 if (needs_exec) 1987 1950 new_pmd = kvm_s2pmd_mkexec(new_pmd); 1988 1951 1989 - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1952 + ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd); 1990 1953 } else { 1991 1954 pte_t new_pte = kvm_pfn_pte(pfn, mem_type); 1992 1955 ··· 1998 1961 if (needs_exec) 1999 1962 new_pte = kvm_s2pte_mkexec(new_pte); 2000 1963 2001 - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); 1964 + ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags); 2002 1965 } 2003 1966 2004 1967 out_unlock: ··· 2027 1990 2028 1991 spin_lock(&vcpu->kvm->mmu_lock); 2029 1992 2030 - if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) 1993 + if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte)) 2031 1994 goto out; 2032 1995 2033 1996 if (pud) { /* HugeTLB */ ··· 2077 2040 is_iabt = kvm_vcpu_trap_is_iabt(vcpu); 2078 2041 2079 2042 /* Synchronous External Abort? */ 2080 - if (kvm_vcpu_dabt_isextabt(vcpu)) { 2043 + if (kvm_vcpu_abt_issea(vcpu)) { 2081 2044 /* 2082 2045 * For RAS the host kernel may handle this abort. 2083 2046 * There is no need to pass the error into the guest. 2084 2047 */ 2085 - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) 2086 - return 1; 2087 - 2088 - if (unlikely(!is_iabt)) { 2048 + if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) 2089 2049 kvm_inject_vabt(vcpu); 2090 - return 1; 2091 - } 2050 + 2051 + return 1; 2092 2052 } 2093 2053 2094 - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), 2054 + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), 2095 2055 kvm_vcpu_get_hfar(vcpu), fault_ipa); 2096 2056 2097 2057 /* Check the stage-2 fault is trans. fault or write fault */ ··· 2097 2063 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", 2098 2064 kvm_vcpu_trap_get_class(vcpu), 2099 2065 (unsigned long)kvm_vcpu_trap_get_fault(vcpu), 2100 - (unsigned long)kvm_vcpu_get_hsr(vcpu)); 2066 + (unsigned long)kvm_vcpu_get_esr(vcpu)); 2101 2067 return -EFAULT; 2102 2068 } 2103 2069 ··· 2108 2074 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); 2109 2075 write_fault = kvm_is_write_fault(vcpu); 2110 2076 if (kvm_is_error_hva(hva) || (write_fault && !writable)) { 2077 + /* 2078 + * The guest has put either its instructions or its page-tables 2079 + * somewhere it shouldn't have. Userspace won't be able to do 2080 + * anything about this (there's no syndrome for a start), so 2081 + * re-inject the abort back into the guest. 2082 + */ 2111 2083 if (is_iabt) { 2112 - /* Prefetch Abort on I/O address */ 2113 2084 ret = -ENOEXEC; 2114 2085 goto out; 2086 + } 2087 + 2088 + if (kvm_vcpu_dabt_iss1tw(vcpu)) { 2089 + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); 2090 + ret = 1; 2091 + goto out_unlock; 2115 2092 } 2116 2093 2117 2094 /* ··· 2135 2090 * So let's assume that the guest is just being 2136 2091 * cautious, and skip the instruction. 2137 2092 */ 2138 - if (kvm_vcpu_dabt_is_cm(vcpu)) { 2093 + if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { 2139 2094 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 2140 2095 ret = 1; 2141 2096 goto out_unlock; ··· 2208 2163 2209 2164 static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) 2210 2165 { 2211 - unmap_stage2_range(kvm, gpa, size); 2166 + unmap_stage2_range(&kvm->arch.mmu, gpa, size); 2212 2167 return 0; 2213 2168 } 2214 2169 2215 2170 int kvm_unmap_hva_range(struct kvm *kvm, 2216 2171 unsigned long start, unsigned long end) 2217 2172 { 2218 - if (!kvm->arch.pgd) 2173 + if (!kvm->arch.mmu.pgd) 2219 2174 return 0; 2220 2175 2221 2176 trace_kvm_unmap_hva_range(start, end); ··· 2235 2190 * therefore stage2_set_pte() never needs to clear out a huge PMD 2236 2191 * through this calling path. 2237 2192 */ 2238 - stage2_set_pte(kvm, NULL, gpa, pte, 0); 2193 + stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0); 2239 2194 return 0; 2240 2195 } 2241 2196 ··· 2246 2201 kvm_pfn_t pfn = pte_pfn(pte); 2247 2202 pte_t stage2_pte; 2248 2203 2249 - if (!kvm->arch.pgd) 2204 + if (!kvm->arch.mmu.pgd) 2250 2205 return 0; 2251 2206 2252 2207 trace_kvm_set_spte_hva(hva); ··· 2269 2224 pte_t *pte; 2270 2225 2271 2226 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 2272 - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) 2227 + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) 2273 2228 return 0; 2274 2229 2275 2230 if (pud) ··· 2287 2242 pte_t *pte; 2288 2243 2289 2244 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 2290 - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) 2245 + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) 2291 2246 return 0; 2292 2247 2293 2248 if (pud) ··· 2300 2255 2301 2256 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) 2302 2257 { 2303 - if (!kvm->arch.pgd) 2258 + if (!kvm->arch.mmu.pgd) 2304 2259 return 0; 2305 2260 trace_kvm_age_hva(start, end); 2306 2261 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); ··· 2308 2263 2309 2264 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 2310 2265 { 2311 - if (!kvm->arch.pgd) 2266 + if (!kvm->arch.mmu.pgd) 2312 2267 return 0; 2313 2268 trace_kvm_test_age_hva(hva); 2314 2269 return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, ··· 2521 2476 2522 2477 spin_lock(&kvm->mmu_lock); 2523 2478 if (ret) 2524 - unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 2479 + unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); 2525 2480 else 2526 2481 stage2_flush_memslot(kvm, memslot); 2527 2482 spin_unlock(&kvm->mmu_lock); ··· 2540 2495 2541 2496 void kvm_arch_flush_shadow_all(struct kvm *kvm) 2542 2497 { 2543 - kvm_free_stage2_pgd(kvm); 2498 + kvm_free_stage2_pgd(&kvm->arch.mmu); 2544 2499 } 2545 2500 2546 2501 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, ··· 2550 2505 phys_addr_t size = slot->npages << PAGE_SHIFT; 2551 2506 2552 2507 spin_lock(&kvm->mmu_lock); 2553 - unmap_stage2_range(kvm, gpa, size); 2508 + unmap_stage2_range(&kvm->arch.mmu, gpa, size); 2554 2509 spin_unlock(&kvm->mmu_lock); 2555 2510 } 2556 2511

+33 -4

arch/arm64/kvm/regmap.c

··· 100 100 */ 101 101 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) 102 102 { 103 - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; 103 + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; 104 104 unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; 105 105 106 106 switch (mode) { ··· 147 147 { 148 148 int spsr_idx = vcpu_spsr32_mode(vcpu); 149 149 150 - if (!vcpu->arch.sysregs_loaded_on_cpu) 151 - return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; 150 + if (!vcpu->arch.sysregs_loaded_on_cpu) { 151 + switch (spsr_idx) { 152 + case KVM_SPSR_SVC: 153 + return __vcpu_sys_reg(vcpu, SPSR_EL1); 154 + case KVM_SPSR_ABT: 155 + return vcpu->arch.ctxt.spsr_abt; 156 + case KVM_SPSR_UND: 157 + return vcpu->arch.ctxt.spsr_und; 158 + case KVM_SPSR_IRQ: 159 + return vcpu->arch.ctxt.spsr_irq; 160 + case KVM_SPSR_FIQ: 161 + return vcpu->arch.ctxt.spsr_fiq; 162 + } 163 + } 152 164 153 165 switch (spsr_idx) { 154 166 case KVM_SPSR_SVC: ··· 183 171 int spsr_idx = vcpu_spsr32_mode(vcpu); 184 172 185 173 if (!vcpu->arch.sysregs_loaded_on_cpu) { 186 - vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; 174 + switch (spsr_idx) { 175 + case KVM_SPSR_SVC: 176 + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; 177 + break; 178 + case KVM_SPSR_ABT: 179 + vcpu->arch.ctxt.spsr_abt = v; 180 + break; 181 + case KVM_SPSR_UND: 182 + vcpu->arch.ctxt.spsr_und = v; 183 + break; 184 + case KVM_SPSR_IRQ: 185 + vcpu->arch.ctxt.spsr_irq = v; 186 + break; 187 + case KVM_SPSR_FIQ: 188 + vcpu->arch.ctxt.spsr_fiq = v; 189 + break; 190 + } 191 + 187 192 return; 188 193 } 189 194

+11 -12

arch/arm64/kvm/reset.c

··· 42 42 #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ 43 43 PSR_AA32_I_BIT | PSR_AA32_F_BIT) 44 44 45 + static bool system_has_full_ptr_auth(void) 46 + { 47 + return system_supports_address_auth() && system_supports_generic_auth(); 48 + } 49 + 45 50 /** 46 51 * kvm_arch_vm_ioctl_check_extension 47 52 * ··· 85 80 break; 86 81 case KVM_CAP_ARM_PTRAUTH_ADDRESS: 87 82 case KVM_CAP_ARM_PTRAUTH_GENERIC: 88 - r = has_vhe() && system_supports_address_auth() && 89 - system_supports_generic_auth(); 83 + r = system_has_full_ptr_auth(); 90 84 break; 91 85 default: 92 86 r = 0; ··· 209 205 210 206 static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) 211 207 { 212 - /* Support ptrauth only if the system supports these capabilities. */ 213 - if (!has_vhe()) 214 - return -EINVAL; 215 - 216 - if (!system_supports_address_auth() || 217 - !system_supports_generic_auth()) 218 - return -EINVAL; 219 208 /* 220 209 * For now make sure that both address/generic pointer authentication 221 - * features are requested by the userspace together. 210 + * features are requested by the userspace together and the system 211 + * supports these capabilities. 222 212 */ 223 213 if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 224 - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) 214 + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || 215 + !system_has_full_ptr_auth()) 225 216 return -EINVAL; 226 217 227 218 vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; ··· 291 292 292 293 /* Reset core registers */ 293 294 memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); 294 - vcpu_gp_regs(vcpu)->regs.pstate = pstate; 295 + vcpu_gp_regs(vcpu)->pstate = pstate; 295 296 296 297 /* Reset system registers */ 297 298 kvm_reset_sys_regs(vcpu);

+71 -136

arch/arm64/kvm/sys_regs.c

··· 94 94 case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; 95 95 case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; 96 96 case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; 97 + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; 97 98 case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; 98 99 case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; 99 100 case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; ··· 134 133 case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; 135 134 case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; 136 135 case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; 136 + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; 137 137 case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; 138 138 case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; 139 139 case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; ··· 241 239 vcpu_write_sys_reg(vcpu, val, reg); 242 240 243 241 kvm_toggle_cache(vcpu, was_enabled); 242 + return true; 243 + } 244 + 245 + static bool access_actlr(struct kvm_vcpu *vcpu, 246 + struct sys_reg_params *p, 247 + const struct sys_reg_desc *r) 248 + { 249 + if (p->is_write) 250 + return ignore_write(vcpu, p); 251 + 252 + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); 253 + 254 + if (p->is_aarch32) { 255 + if (r->Op2 & 2) 256 + p->regval = upper_32_bits(p->regval); 257 + else 258 + p->regval = lower_32_bits(p->regval); 259 + } 260 + 244 261 return true; 245 262 } 246 263 ··· 634 613 { 635 614 u64 amair = read_sysreg(amair_el1); 636 615 vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); 616 + } 617 + 618 + static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) 619 + { 620 + u64 actlr = read_sysreg(actlr_el1); 621 + vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1); 637 622 } 638 623 639 624 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) ··· 1545 1518 ID_UNALLOCATED(7,7), 1546 1519 1547 1520 { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, 1521 + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, 1548 1522 { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, 1549 1523 { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, 1550 1524 { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, ··· 1985 1957 static const struct sys_reg_desc cp15_regs[] = { 1986 1958 { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, 1987 1959 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, 1960 + { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, 1961 + { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, 1988 1962 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 1989 1963 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, 1990 1964 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, ··· 2139 2109 return 0; 2140 2110 } 2141 2111 2142 - /* Target specific emulation tables */ 2143 - static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; 2144 - 2145 - void kvm_register_target_sys_reg_table(unsigned int target, 2146 - struct kvm_sys_reg_target_table *table) 2147 - { 2148 - if (check_sysreg_table(table->table64.table, table->table64.num, false) || 2149 - check_sysreg_table(table->table32.table, table->table32.num, true)) 2150 - return; 2151 - 2152 - target_tables[target] = table; 2153 - } 2154 - 2155 - /* Get specific register table for this target. */ 2156 - static const struct sys_reg_desc *get_target_table(unsigned target, 2157 - bool mode_is_64, 2158 - size_t *num) 2159 - { 2160 - struct kvm_sys_reg_target_table *table; 2161 - 2162 - table = target_tables[target]; 2163 - if (mode_is_64) { 2164 - *num = table->table64.num; 2165 - return table->table64.table; 2166 - } else { 2167 - *num = table->table32.num; 2168 - return table->table32.table; 2169 - } 2170 - } 2171 - 2172 2112 static int match_sys_reg(const void *key, const void *elt) 2173 2113 { 2174 2114 const unsigned long pval = (unsigned long)key; ··· 2220 2220 static void unhandled_cp_access(struct kvm_vcpu *vcpu, 2221 2221 struct sys_reg_params *params) 2222 2222 { 2223 - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); 2223 + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); 2224 2224 int cp = -1; 2225 2225 2226 - switch(hsr_ec) { 2226 + switch (esr_ec) { 2227 2227 case ESR_ELx_EC_CP15_32: 2228 2228 case ESR_ELx_EC_CP15_64: 2229 2229 cp = 15; ··· 2249 2249 */ 2250 2250 static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, 2251 2251 const struct sys_reg_desc *global, 2252 - size_t nr_global, 2253 - const struct sys_reg_desc *target_specific, 2254 - size_t nr_specific) 2252 + size_t nr_global) 2255 2253 { 2256 2254 struct sys_reg_params params; 2257 - u32 hsr = kvm_vcpu_get_hsr(vcpu); 2255 + u32 esr = kvm_vcpu_get_esr(vcpu); 2258 2256 int Rt = kvm_vcpu_sys_get_rt(vcpu); 2259 - int Rt2 = (hsr >> 10) & 0x1f; 2257 + int Rt2 = (esr >> 10) & 0x1f; 2260 2258 2261 2259 params.is_aarch32 = true; 2262 2260 params.is_32bit = false; 2263 - params.CRm = (hsr >> 1) & 0xf; 2264 - params.is_write = ((hsr & 1) == 0); 2261 + params.CRm = (esr >> 1) & 0xf; 2262 + params.is_write = ((esr & 1) == 0); 2265 2263 2266 2264 params.Op0 = 0; 2267 - params.Op1 = (hsr >> 16) & 0xf; 2265 + params.Op1 = (esr >> 16) & 0xf; 2268 2266 params.Op2 = 0; 2269 2267 params.CRn = 0; 2270 2268 ··· 2276 2278 } 2277 2279 2278 2280 /* 2279 - * Try to emulate the coprocessor access using the target 2280 - * specific table first, and using the global table afterwards. 2281 - * If either of the tables contains a handler, handle the 2281 + * If the table contains a handler, handle the 2282 2282 * potential register operation in the case of a read and return 2283 2283 * with success. 2284 2284 */ 2285 - if (!emulate_cp(vcpu, &params, target_specific, nr_specific) || 2286 - !emulate_cp(vcpu, &params, global, nr_global)) { 2285 + if (!emulate_cp(vcpu, &params, global, nr_global)) { 2287 2286 /* Split up the value between registers for the read side */ 2288 2287 if (!params.is_write) { 2289 2288 vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); ··· 2301 2306 */ 2302 2307 static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, 2303 2308 const struct sys_reg_desc *global, 2304 - size_t nr_global, 2305 - const struct sys_reg_desc *target_specific, 2306 - size_t nr_specific) 2309 + size_t nr_global) 2307 2310 { 2308 2311 struct sys_reg_params params; 2309 - u32 hsr = kvm_vcpu_get_hsr(vcpu); 2312 + u32 esr = kvm_vcpu_get_esr(vcpu); 2310 2313 int Rt = kvm_vcpu_sys_get_rt(vcpu); 2311 2314 2312 2315 params.is_aarch32 = true; 2313 2316 params.is_32bit = true; 2314 - params.CRm = (hsr >> 1) & 0xf; 2317 + params.CRm = (esr >> 1) & 0xf; 2315 2318 params.regval = vcpu_get_reg(vcpu, Rt); 2316 - params.is_write = ((hsr & 1) == 0); 2317 - params.CRn = (hsr >> 10) & 0xf; 2319 + params.is_write = ((esr & 1) == 0); 2320 + params.CRn = (esr >> 10) & 0xf; 2318 2321 params.Op0 = 0; 2319 - params.Op1 = (hsr >> 14) & 0x7; 2320 - params.Op2 = (hsr >> 17) & 0x7; 2322 + params.Op1 = (esr >> 14) & 0x7; 2323 + params.Op2 = (esr >> 17) & 0x7; 2321 2324 2322 - if (!emulate_cp(vcpu, &params, target_specific, nr_specific) || 2323 - !emulate_cp(vcpu, &params, global, nr_global)) { 2325 + if (!emulate_cp(vcpu, &params, global, nr_global)) { 2324 2326 if (!params.is_write) 2325 2327 vcpu_set_reg(vcpu, Rt, params.regval); 2326 2328 return 1; ··· 2329 2337 2330 2338 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) 2331 2339 { 2332 - const struct sys_reg_desc *target_specific; 2333 - size_t num; 2334 - 2335 - target_specific = get_target_table(vcpu->arch.target, false, &num); 2336 - return kvm_handle_cp_64(vcpu, 2337 - cp15_64_regs, ARRAY_SIZE(cp15_64_regs), 2338 - target_specific, num); 2340 + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); 2339 2341 } 2340 2342 2341 2343 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) 2342 2344 { 2343 - const struct sys_reg_desc *target_specific; 2344 - size_t num; 2345 - 2346 - target_specific = get_target_table(vcpu->arch.target, false, &num); 2347 - return kvm_handle_cp_32(vcpu, 2348 - cp15_regs, ARRAY_SIZE(cp15_regs), 2349 - target_specific, num); 2345 + return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); 2350 2346 } 2351 2347 2352 2348 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) 2353 2349 { 2354 - return kvm_handle_cp_64(vcpu, 2355 - cp14_64_regs, ARRAY_SIZE(cp14_64_regs), 2356 - NULL, 0); 2350 + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); 2357 2351 } 2358 2352 2359 2353 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) 2360 2354 { 2361 - return kvm_handle_cp_32(vcpu, 2362 - cp14_regs, ARRAY_SIZE(cp14_regs), 2363 - NULL, 0); 2355 + return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); 2364 2356 } 2365 2357 2366 2358 static bool is_imp_def_sys_reg(struct sys_reg_params *params) ··· 2356 2380 static int emulate_sys_reg(struct kvm_vcpu *vcpu, 2357 2381 struct sys_reg_params *params) 2358 2382 { 2359 - size_t num; 2360 - const struct sys_reg_desc *table, *r; 2383 + const struct sys_reg_desc *r; 2361 2384 2362 - table = get_target_table(vcpu->arch.target, true, &num); 2363 - 2364 - /* Search target-specific then generic table. */ 2365 - r = find_reg(params, table, num); 2366 - if (!r) 2367 - r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2385 + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2368 2386 2369 2387 if (likely(r)) { 2370 2388 perform_access(vcpu, params, r); ··· 2373 2403 return 1; 2374 2404 } 2375 2405 2376 - static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, 2377 - const struct sys_reg_desc *table, size_t num) 2406 + /** 2407 + * kvm_reset_sys_regs - sets system registers to reset value 2408 + * @vcpu: The VCPU pointer 2409 + * 2410 + * This function finds the right table above and sets the registers on the 2411 + * virtual CPU struct to their architecturally defined reset values. 2412 + */ 2413 + void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) 2378 2414 { 2379 2415 unsigned long i; 2380 2416 2381 - for (i = 0; i < num; i++) 2382 - if (table[i].reset) 2383 - table[i].reset(vcpu, &table[i]); 2417 + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) 2418 + if (sys_reg_descs[i].reset) 2419 + sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]); 2384 2420 } 2385 2421 2386 2422 /** ··· 2396 2420 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) 2397 2421 { 2398 2422 struct sys_reg_params params; 2399 - unsigned long esr = kvm_vcpu_get_hsr(vcpu); 2423 + unsigned long esr = kvm_vcpu_get_esr(vcpu); 2400 2424 int Rt = kvm_vcpu_sys_get_rt(vcpu); 2401 2425 int ret; 2402 2426 ··· 2467 2491 static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, 2468 2492 u64 id) 2469 2493 { 2470 - size_t num; 2471 - const struct sys_reg_desc *table, *r; 2494 + const struct sys_reg_desc *r; 2472 2495 struct sys_reg_params params; 2473 2496 2474 2497 /* We only do sys_reg for now. */ ··· 2477 2502 if (!index_to_params(id, &params)) 2478 2503 return NULL; 2479 2504 2480 - table = get_target_table(vcpu->arch.target, true, &num); 2481 - r = find_reg(&params, table, num); 2482 - if (!r) 2483 - r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2505 + r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2484 2506 2485 2507 /* Not saved in the sys_reg array and not otherwise accessible? */ 2486 2508 if (r && !(r->reg || r->get_user)) ··· 2777 2805 /* Assumed ordered tables, see kvm_sys_reg_table_init. */ 2778 2806 static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) 2779 2807 { 2780 - const struct sys_reg_desc *i1, *i2, *end1, *end2; 2808 + const struct sys_reg_desc *i2, *end2; 2781 2809 unsigned int total = 0; 2782 - size_t num; 2783 2810 int err; 2784 2811 2785 - /* We check for duplicates here, to allow arch-specific overrides. */ 2786 - i1 = get_target_table(vcpu->arch.target, true, &num); 2787 - end1 = i1 + num; 2788 2812 i2 = sys_reg_descs; 2789 2813 end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); 2790 2814 2791 - BUG_ON(i1 == end1 || i2 == end2); 2792 - 2793 - /* Walk carefully, as both tables may refer to the same register. */ 2794 - while (i1 || i2) { 2795 - int cmp = cmp_sys_reg(i1, i2); 2796 - /* target-specific overrides generic entry. */ 2797 - if (cmp <= 0) 2798 - err = walk_one_sys_reg(vcpu, i1, &uind, &total); 2799 - else 2800 - err = walk_one_sys_reg(vcpu, i2, &uind, &total); 2801 - 2815 + while (i2 != end2) { 2816 + err = walk_one_sys_reg(vcpu, i2++, &uind, &total); 2802 2817 if (err) 2803 2818 return err; 2804 - 2805 - if (cmp <= 0 && ++i1 == end1) 2806 - i1 = NULL; 2807 - if (cmp >= 0 && ++i2 == end2) 2808 - i2 = NULL; 2809 2819 } 2810 2820 return total; 2811 2821 } ··· 2853 2899 break; 2854 2900 /* Clear all higher bits. */ 2855 2901 cache_levels &= (1 << (i*3))-1; 2856 - } 2857 - 2858 - /** 2859 - * kvm_reset_sys_regs - sets system registers to reset value 2860 - * @vcpu: The VCPU pointer 2861 - * 2862 - * This function finds the right table above and sets the registers on the 2863 - * virtual CPU struct to their architecturally defined reset values. 2864 - */ 2865 - void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) 2866 - { 2867 - size_t num; 2868 - const struct sys_reg_desc *table; 2869 - 2870 - /* Generic chip reset first (so target could override). */ 2871 - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2872 - 2873 - table = get_target_table(vcpu->arch.target, true, &num); 2874 - reset_sys_reg_descs(vcpu, table, num); 2875 2902 }

-96

arch/arm64/kvm/sys_regs_generic_v8.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Copyright (C) 2012,2013 - ARM Ltd 4 - * Author: Marc Zyngier <marc.zyngier@arm.com> 5 - * 6 - * Based on arch/arm/kvm/coproc_a15.c: 7 - * Copyright (C) 2012 - Virtual Open Systems and Columbia University 8 - * Authors: Rusty Russell <rusty@rustcorp.au> 9 - * Christoffer Dall <c.dall@virtualopensystems.com> 10 - */ 11 - #include <linux/kvm_host.h> 12 - #include <asm/cputype.h> 13 - #include <asm/kvm_arm.h> 14 - #include <asm/kvm_asm.h> 15 - #include <asm/kvm_emulate.h> 16 - #include <asm/kvm_coproc.h> 17 - #include <asm/sysreg.h> 18 - #include <linux/init.h> 19 - 20 - #include "sys_regs.h" 21 - 22 - static bool access_actlr(struct kvm_vcpu *vcpu, 23 - struct sys_reg_params *p, 24 - const struct sys_reg_desc *r) 25 - { 26 - if (p->is_write) 27 - return ignore_write(vcpu, p); 28 - 29 - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); 30 - 31 - if (p->is_aarch32) { 32 - if (r->Op2 & 2) 33 - p->regval = upper_32_bits(p->regval); 34 - else 35 - p->regval = lower_32_bits(p->regval); 36 - } 37 - 38 - return true; 39 - } 40 - 41 - static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) 42 - { 43 - __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); 44 - } 45 - 46 - /* 47 - * Implementation specific sys-reg registers. 48 - * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 49 - */ 50 - static const struct sys_reg_desc genericv8_sys_regs[] = { 51 - { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, 52 - }; 53 - 54 - static const struct sys_reg_desc genericv8_cp15_regs[] = { 55 - /* ACTLR */ 56 - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), 57 - access_actlr }, 58 - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011), 59 - access_actlr }, 60 - }; 61 - 62 - static struct kvm_sys_reg_target_table genericv8_target_table = { 63 - .table64 = { 64 - .table = genericv8_sys_regs, 65 - .num = ARRAY_SIZE(genericv8_sys_regs), 66 - }, 67 - .table32 = { 68 - .table = genericv8_cp15_regs, 69 - .num = ARRAY_SIZE(genericv8_cp15_regs), 70 - }, 71 - }; 72 - 73 - static int __init sys_reg_genericv8_init(void) 74 - { 75 - unsigned int i; 76 - 77 - for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) 78 - BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], 79 - &genericv8_sys_regs[i]) >= 0); 80 - 81 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, 82 - &genericv8_target_table); 83 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, 84 - &genericv8_target_table); 85 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, 86 - &genericv8_target_table); 87 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, 88 - &genericv8_target_table); 89 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, 90 - &genericv8_target_table); 91 - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, 92 - &genericv8_target_table); 93 - 94 - return 0; 95 - } 96 - late_initcall(sys_reg_genericv8_init);

+4 -4

arch/arm64/kvm/trace_arm.h

··· 301 301 ), 302 302 303 303 TP_fast_assign( 304 - __entry->ctl = ctx->cnt_ctl; 305 - __entry->cval = ctx->cnt_cval; 304 + __entry->ctl = timer_get_ctl(ctx); 305 + __entry->cval = timer_get_cval(ctx); 306 306 __entry->timer_idx = arch_timer_ctx_index(ctx); 307 307 ), 308 308 ··· 323 323 ), 324 324 325 325 TP_fast_assign( 326 - __entry->ctl = ctx->cnt_ctl; 327 - __entry->cval = ctx->cnt_cval; 326 + __entry->ctl = timer_get_ctl(ctx); 327 + __entry->cval = timer_get_cval(ctx); 328 328 __entry->timer_idx = arch_timer_ctx_index(ctx); 329 329 ), 330 330

+1 -1

arch/arm64/kvm/va_layout.c

··· 48 48 va_mask = GENMASK_ULL(tag_lsb - 1, 0); 49 49 tag_val = hyp_va_msb; 50 50 51 - if (tag_lsb != (vabits_actual - 1)) { 51 + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { 52 52 /* We have some free bits to insert a random tag. */ 53 53 tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); 54 54 }

+19 -5

arch/arm64/kvm/vgic/vgic-irqfd.c

··· 100 100 101 101 /** 102 102 * kvm_arch_set_irq_inatomic: fast-path for irqfd injection 103 - * 104 - * Currently only direct MSI injection is supported. 105 103 */ 106 104 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, 107 105 struct kvm *kvm, int irq_source_id, int level, 108 106 bool line_status) 109 107 { 110 - if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { 108 + if (!level) 109 + return -EWOULDBLOCK; 110 + 111 + switch (e->type) { 112 + case KVM_IRQ_ROUTING_MSI: { 111 113 struct kvm_msi msi; 112 114 115 + if (!vgic_has_its(kvm)) 116 + break; 117 + 113 118 kvm_populate_msi(e, &msi); 114 - if (!vgic_its_inject_cached_translation(kvm, &msi)) 115 - return 0; 119 + return vgic_its_inject_cached_translation(kvm, &msi); 120 + } 121 + 122 + case KVM_IRQ_ROUTING_IRQCHIP: 123 + /* 124 + * Injecting SPIs is always possible in atomic context 125 + * as long as the damn vgic is initialized. 126 + */ 127 + if (unlikely(!vgic_initialized(kvm))) 128 + break; 129 + return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status); 116 130 } 117 131 118 132 return -EWOULDBLOCK;

+1 -2

arch/arm64/kvm/vgic/vgic-its.c

··· 757 757 758 758 db = (u64)msi->address_hi << 32 | msi->address_lo; 759 759 irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); 760 - 761 760 if (!irq) 762 - return -1; 761 + return -EWOULDBLOCK; 763 762 764 763 raw_spin_lock_irqsave(&irq->irq_lock, flags); 765 764 irq->pending_latch = true;

+1 -1

arch/arm64/kvm/vgic/vgic-mmio-v3.c

··· 389 389 case GIC_BASER_CACHE_nC: 390 390 return field; 391 391 default: 392 - return GIC_BASER_CACHE_nC; 392 + return GIC_BASER_CACHE_SameAsInner; 393 393 } 394 394 } 395 395

+1 -1

arch/mips/kvm/emulate.c

··· 1935 1935 1936 1936 case lwu_op: 1937 1937 vcpu->mmio_needed = 1; /* unsigned */ 1938 - /* fall through */ 1938 + fallthrough; 1939 1939 #endif 1940 1940 case lw_op: 1941 1941 run->mmio.len = 4;

+3 -2

arch/mips/kvm/vz.c

··· 29 29 #include <linux/kvm_host.h> 30 30 31 31 #include "interrupt.h" 32 + #ifdef CONFIG_CPU_LOONGSON64 32 33 #include "loongson_regs.h" 34 + #endif 33 35 34 36 #include "trace.h" 35 37 ··· 1144 1142 #ifdef CONFIG_CPU_LOONGSON64 1145 1143 static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, 1146 1144 u32 *opc, u32 cause, 1147 - struct kvm_run *run, 1148 1145 struct kvm_vcpu *vcpu) 1149 1146 { 1150 1147 unsigned int rs, rd; ··· 1241 1240 #endif 1242 1241 #ifdef CONFIG_CPU_LOONGSON64 1243 1242 case lwc2_op: 1244 - er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); 1243 + er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu); 1245 1244 break; 1246 1245 #endif 1247 1246 case spec3_op:

+14

arch/powerpc/include/asm/kvm_book3s_uvmem.h

··· 23 23 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); 24 24 void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 25 25 struct kvm *kvm, bool skip_page_out); 26 + int kvmppc_uvmem_memslot_create(struct kvm *kvm, 27 + const struct kvm_memory_slot *new); 28 + void kvmppc_uvmem_memslot_delete(struct kvm *kvm, 29 + const struct kvm_memory_slot *old); 26 30 #else 27 31 static inline int kvmppc_uvmem_init(void) 28 32 { ··· 86 82 static inline void 87 83 kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 88 84 struct kvm *kvm, bool skip_page_out) { } 85 + 86 + static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm, 87 + const struct kvm_memory_slot *new) 88 + { 89 + return H_UNSUPPORTED; 90 + } 91 + 92 + static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm, 93 + const struct kvm_memory_slot *old) { } 94 + 89 95 #endif /* CONFIG_PPC_UV */ 90 96 #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */

+1 -1

arch/powerpc/include/asm/kvm_ppc.h

··· 59 59 }; 60 60 61 61 extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); 62 - extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); 62 + extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu); 63 63 extern void kvmppc_handler_highmem(void); 64 64 65 65 extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);

+3 -1

arch/powerpc/include/asm/reg.h

··· 474 474 #ifndef SPRN_LPID 475 475 #define SPRN_LPID 0x13F /* Logical Partition Identifier */ 476 476 #endif 477 - #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 477 + #define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ 478 + #define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ 478 479 #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ 479 480 #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ 480 481 #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ ··· 1363 1362 #define PVR_ARCH_206p 0x0f100003 1364 1363 #define PVR_ARCH_207 0x0f000004 1365 1364 #define PVR_ARCH_300 0x0f000005 1365 + #define PVR_ARCH_31 0x0f000006 1366 1366 1367 1367 /* Macros for setting and retrieving special purpose registers */ 1368 1368 #ifndef __ASSEMBLY__

+6 -2

arch/powerpc/kvm/book3s_64_mmu_hv.c

··· 260 260 if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) 261 261 return -EINVAL; 262 262 263 - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ 264 263 host_lpid = 0; 265 264 if (cpu_has_feature(CPU_FTR_HVMODE)) 266 265 host_lpid = mfspr(SPRN_LPID); 267 - rsvd_lpid = LPID_RSVD; 266 + 267 + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ 268 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) 269 + rsvd_lpid = LPID_RSVD; 270 + else 271 + rsvd_lpid = LPID_RSVD_POWER7; 268 272 269 273 kvmppc_init_lpid(rsvd_lpid + 1); 270 274

+4

arch/powerpc/kvm/book3s_64_mmu_radix.c

··· 161 161 return -EINVAL; 162 162 /* Read the entry from guest memory */ 163 163 addr = base + (index * sizeof(rpte)); 164 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 164 165 ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); 166 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 165 167 if (ret) { 166 168 if (pte_ret_p) 167 169 *pte_ret_p = addr; ··· 239 237 240 238 /* Read the table to find the root of the radix tree */ 241 239 ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); 240 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 242 241 ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); 242 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 243 243 if (ret) 244 244 return ret; 245 245

+16 -10

arch/powerpc/kvm/book3s_hv.c

··· 343 343 vcpu->arch.pvr = pvr; 344 344 } 345 345 346 + /* Dummy value used in computing PCR value below */ 347 + #define PCR_ARCH_31 (PCR_ARCH_300 << 1) 348 + 346 349 static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 347 350 { 348 351 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; 349 352 struct kvmppc_vcore *vc = vcpu->arch.vcore; 350 353 351 354 /* We can (emulate) our own architecture version and anything older */ 352 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 355 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 356 + host_pcr_bit = PCR_ARCH_31; 357 + else if (cpu_has_feature(CPU_FTR_ARCH_300)) 353 358 host_pcr_bit = PCR_ARCH_300; 354 359 else if (cpu_has_feature(CPU_FTR_ARCH_207S)) 355 360 host_pcr_bit = PCR_ARCH_207; ··· 379 374 break; 380 375 case PVR_ARCH_300: 381 376 guest_pcr_bit = PCR_ARCH_300; 377 + break; 378 + case PVR_ARCH_31: 379 + guest_pcr_bit = PCR_ARCH_31; 382 380 break; 383 381 default: 384 382 return -EINVAL; ··· 2363 2355 * to trap and then we emulate them. 2364 2356 */ 2365 2357 vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | 2366 - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; 2358 + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; 2367 2359 if (cpu_has_feature(CPU_FTR_HVMODE)) { 2368 2360 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); 2369 2361 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) ··· 4560 4552 4561 4553 switch (change) { 4562 4554 case KVM_MR_CREATE: 4563 - if (kvmppc_uvmem_slot_init(kvm, new)) 4564 - return; 4565 - uv_register_mem_slot(kvm->arch.lpid, 4566 - new->base_gfn << PAGE_SHIFT, 4567 - new->npages * PAGE_SIZE, 4568 - 0, new->id); 4555 + /* 4556 + * @TODO kvmppc_uvmem_memslot_create() can fail and 4557 + * return error. Fix this. 4558 + */ 4559 + kvmppc_uvmem_memslot_create(kvm, new); 4569 4560 break; 4570 4561 case KVM_MR_DELETE: 4571 - uv_unregister_mem_slot(kvm->arch.lpid, old->id); 4572 - kvmppc_uvmem_slot_free(kvm, old); 4562 + kvmppc_uvmem_memslot_delete(kvm, old); 4573 4563 break; 4574 4564 default: 4575 4565 /* TODO: Handle KVM_MR_MOVE */

+19 -11

arch/powerpc/kvm/book3s_hv_nested.c

··· 233 233 234 234 /* copy parameters in */ 235 235 hv_ptr = kvmppc_get_gpr(vcpu, 4); 236 + regs_ptr = kvmppc_get_gpr(vcpu, 5); 237 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 236 238 err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, 237 - sizeof(struct hv_guest_state)); 239 + sizeof(struct hv_guest_state)) || 240 + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, 241 + sizeof(struct pt_regs)); 242 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 238 243 if (err) 239 244 return H_PARAMETER; 245 + 240 246 if (kvmppc_need_byteswap(vcpu)) 241 247 byteswap_hv_regs(&l2_hv); 242 248 if (l2_hv.version != HV_GUEST_STATE_VERSION) 243 249 return H_P2; 244 250 245 - regs_ptr = kvmppc_get_gpr(vcpu, 5); 246 - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, 247 - sizeof(struct pt_regs)); 248 - if (err) 249 - return H_PARAMETER; 250 251 if (kvmppc_need_byteswap(vcpu)) 251 252 byteswap_pt_regs(&l2_regs); 252 253 if (l2_hv.vcpu_token >= NR_CPUS) ··· 324 323 byteswap_hv_regs(&l2_hv); 325 324 byteswap_pt_regs(&l2_regs); 326 325 } 326 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 327 327 err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, 328 - sizeof(struct hv_guest_state)); 329 - if (err) 330 - return H_AUTHORITY; 331 - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, 328 + sizeof(struct hv_guest_state)) || 329 + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, 332 330 sizeof(struct pt_regs)); 331 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 333 332 if (err) 334 333 return H_AUTHORITY; 335 334 ··· 509 508 goto not_found; 510 509 511 510 /* Write what was loaded into our buffer back to the L1 guest */ 511 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 512 512 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); 513 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 513 514 if (rc) 514 515 goto not_found; 515 516 } else { 516 517 /* Load the data to be stored from the L1 guest into our buf */ 518 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 517 519 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); 520 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 518 521 if (rc) 519 522 goto not_found; 520 523 ··· 553 548 554 549 ret = -EFAULT; 555 550 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); 556 - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) 551 + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { 552 + int srcu_idx = srcu_read_lock(&kvm->srcu); 557 553 ret = kvm_read_guest(kvm, ptbl_addr, 558 554 &ptbl_entry, sizeof(ptbl_entry)); 555 + srcu_read_unlock(&kvm->srcu, srcu_idx); 556 + } 559 557 if (ret) { 560 558 gp->l1_gr_to_hr = 0; 561 559 gp->process_table = 0;

+533 -171

arch/powerpc/kvm/book3s_hv_uvmem.c

··· 93 93 #include <asm/ultravisor.h> 94 94 #include <asm/mman.h> 95 95 #include <asm/kvm_ppc.h> 96 + #include <asm/kvm_book3s_uvmem.h> 96 97 97 98 static struct dev_pagemap kvmppc_uvmem_pgmap; 98 99 static unsigned long *kvmppc_uvmem_bitmap; 99 100 static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); 100 101 101 - #define KVMPPC_UVMEM_PFN (1UL << 63) 102 + /* 103 + * States of a GFN 104 + * --------------- 105 + * The GFN can be in one of the following states. 106 + * 107 + * (a) Secure - The GFN is secure. The GFN is associated with 108 + * a Secure VM, the contents of the GFN is not accessible 109 + * to the Hypervisor. This GFN can be backed by a secure-PFN, 110 + * or can be backed by a normal-PFN with contents encrypted. 111 + * The former is true when the GFN is paged-in into the 112 + * ultravisor. The latter is true when the GFN is paged-out 113 + * of the ultravisor. 114 + * 115 + * (b) Shared - The GFN is shared. The GFN is associated with a 116 + * a secure VM. The contents of the GFN is accessible to 117 + * Hypervisor. This GFN is backed by a normal-PFN and its 118 + * content is un-encrypted. 119 + * 120 + * (c) Normal - The GFN is a normal. The GFN is associated with 121 + * a normal VM. The contents of the GFN is accesible to 122 + * the Hypervisor. Its content is never encrypted. 123 + * 124 + * States of a VM. 125 + * --------------- 126 + * 127 + * Normal VM: A VM whose contents are always accessible to 128 + * the hypervisor. All its GFNs are normal-GFNs. 129 + * 130 + * Secure VM: A VM whose contents are not accessible to the 131 + * hypervisor without the VM's consent. Its GFNs are 132 + * either Shared-GFN or Secure-GFNs. 133 + * 134 + * Transient VM: A Normal VM that is transitioning to secure VM. 135 + * The transition starts on successful return of 136 + * H_SVM_INIT_START, and ends on successful return 137 + * of H_SVM_INIT_DONE. This transient VM, can have GFNs 138 + * in any of the three states; i.e Secure-GFN, Shared-GFN, 139 + * and Normal-GFN. The VM never executes in this state 140 + * in supervisor-mode. 141 + * 142 + * Memory slot State. 143 + * ----------------------------- 144 + * The state of a memory slot mirrors the state of the 145 + * VM the memory slot is associated with. 146 + * 147 + * VM State transition. 148 + * -------------------- 149 + * 150 + * A VM always starts in Normal Mode. 151 + * 152 + * H_SVM_INIT_START moves the VM into transient state. During this 153 + * time the Ultravisor may request some of its GFNs to be shared or 154 + * secured. So its GFNs can be in one of the three GFN states. 155 + * 156 + * H_SVM_INIT_DONE moves the VM entirely from transient state to 157 + * secure-state. At this point any left-over normal-GFNs are 158 + * transitioned to Secure-GFN. 159 + * 160 + * H_SVM_INIT_ABORT moves the transient VM back to normal VM. 161 + * All its GFNs are moved to Normal-GFNs. 162 + * 163 + * UV_TERMINATE transitions the secure-VM back to normal-VM. All 164 + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN 165 + * Note: The contents of the normal-GFN is undefined at this point. 166 + * 167 + * GFN state implementation: 168 + * ------------------------- 169 + * 170 + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, 171 + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag 172 + * set, and contains the value of the secure-PFN. 173 + * It is associated with a normal-PFN; also called mem_pfn, when 174 + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. 175 + * The value of the normal-PFN is not tracked. 176 + * 177 + * Shared GFN is associated with a normal-PFN. Its pfn[] has 178 + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN 179 + * is not tracked. 180 + * 181 + * Normal GFN is associated with normal-PFN. Its pfn[] has 182 + * no flag set. The value of the normal-PFN is not tracked. 183 + * 184 + * Life cycle of a GFN 185 + * -------------------- 186 + * 187 + * -------------------------------------------------------------- 188 + * | | Share | Unshare | SVM |H_SVM_INIT_DONE| 189 + * | |operation |operation | abort/ | | 190 + * | | | | terminate | | 191 + * ------------------------------------------------------------- 192 + * | | | | | | 193 + * | Secure | Shared | Secure |Normal |Secure | 194 + * | | | | | | 195 + * | Shared | Shared | Secure |Normal |Shared | 196 + * | | | | | | 197 + * | Normal | Shared | Secure |Normal |Secure | 198 + * -------------------------------------------------------------- 199 + * 200 + * Life cycle of a VM 201 + * -------------------- 202 + * 203 + * -------------------------------------------------------------------- 204 + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | 205 + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | 206 + * | | | | | | | 207 + * --------- ---------------------------------------------------------- 208 + * | | | | | | | 209 + * | Normal | Normal | Transient|Error |Error |Normal | 210 + * | | | | | | | 211 + * | Secure | Error | Error |Error |Error |Normal | 212 + * | | | | | | | 213 + * |Transient| N/A | Error |Secure |Normal |Normal | 214 + * -------------------------------------------------------------------- 215 + */ 216 + 217 + #define KVMPPC_GFN_UVMEM_PFN (1UL << 63) 218 + #define KVMPPC_GFN_MEM_PFN (1UL << 62) 219 + #define KVMPPC_GFN_SHARED (1UL << 61) 220 + #define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) 221 + #define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) 222 + #define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) 102 223 103 224 struct kvmppc_uvmem_slot { 104 225 struct list_head list; ··· 227 106 unsigned long base_pfn; 228 107 unsigned long *pfns; 229 108 }; 230 - 231 109 struct kvmppc_uvmem_page_pvt { 232 110 struct kvm *kvm; 233 111 unsigned long gpa; 234 112 bool skip_page_out; 113 + bool remove_gfn; 235 114 }; 236 115 237 116 bool kvmppc_uvmem_available(void) ··· 284 163 mutex_unlock(&kvm->arch.uvmem_lock); 285 164 } 286 165 287 - static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, 288 - struct kvm *kvm) 166 + static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, 167 + unsigned long flag, unsigned long uvmem_pfn) 289 168 { 290 169 struct kvmppc_uvmem_slot *p; 291 170 ··· 293 172 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 294 173 unsigned long index = gfn - p->base_pfn; 295 174 296 - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; 175 + if (flag == KVMPPC_GFN_UVMEM_PFN) 176 + p->pfns[index] = uvmem_pfn | flag; 177 + else 178 + p->pfns[index] = flag; 297 179 return; 298 180 } 299 181 } 300 182 } 301 183 302 - static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) 184 + /* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ 185 + static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, 186 + unsigned long uvmem_pfn, struct kvm *kvm) 303 187 { 304 - struct kvmppc_uvmem_slot *p; 305 - 306 - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { 307 - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 308 - p->pfns[gfn - p->base_pfn] = 0; 309 - return; 310 - } 311 - } 188 + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); 312 189 } 313 190 191 + /* mark the GFN as secure-GFN associated with a memory-PFN. */ 192 + static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) 193 + { 194 + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); 195 + } 196 + 197 + /* mark the GFN as a shared GFN. */ 198 + static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) 199 + { 200 + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); 201 + } 202 + 203 + /* mark the GFN as a non-existent GFN. */ 204 + static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) 205 + { 206 + kvmppc_mark_gfn(gfn, kvm, 0, 0); 207 + } 208 + 209 + /* return true, if the GFN is a secure-GFN backed by a secure-PFN */ 314 210 static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, 315 211 unsigned long *uvmem_pfn) 316 212 { ··· 337 199 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { 338 200 unsigned long index = gfn - p->base_pfn; 339 201 340 - if (p->pfns[index] & KVMPPC_UVMEM_PFN) { 202 + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { 341 203 if (uvmem_pfn) 342 204 *uvmem_pfn = p->pfns[index] & 343 - ~KVMPPC_UVMEM_PFN; 205 + KVMPPC_GFN_PFN_MASK; 344 206 return true; 345 207 } else 346 208 return false; ··· 349 211 return false; 350 212 } 351 213 214 + /* 215 + * starting from *gfn search for the next available GFN that is not yet 216 + * transitioned to a secure GFN. return the value of that GFN in *gfn. If a 217 + * GFN is found, return true, else return false 218 + * 219 + * Must be called with kvm->arch.uvmem_lock held. 220 + */ 221 + static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, 222 + struct kvm *kvm, unsigned long *gfn) 223 + { 224 + struct kvmppc_uvmem_slot *p; 225 + bool ret = false; 226 + unsigned long i; 227 + 228 + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) 229 + if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) 230 + break; 231 + if (!p) 232 + return ret; 233 + /* 234 + * The code below assumes, one to one correspondence between 235 + * kvmppc_uvmem_slot and memslot. 236 + */ 237 + for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { 238 + unsigned long index = i - p->base_pfn; 239 + 240 + if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { 241 + *gfn = i; 242 + ret = true; 243 + break; 244 + } 245 + } 246 + return ret; 247 + } 248 + 249 + static int kvmppc_memslot_page_merge(struct kvm *kvm, 250 + const struct kvm_memory_slot *memslot, bool merge) 251 + { 252 + unsigned long gfn = memslot->base_gfn; 253 + unsigned long end, start = gfn_to_hva(kvm, gfn); 254 + int ret = 0; 255 + struct vm_area_struct *vma; 256 + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; 257 + 258 + if (kvm_is_error_hva(start)) 259 + return H_STATE; 260 + 261 + end = start + (memslot->npages << PAGE_SHIFT); 262 + 263 + mmap_write_lock(kvm->mm); 264 + do { 265 + vma = find_vma_intersection(kvm->mm, start, end); 266 + if (!vma) { 267 + ret = H_STATE; 268 + break; 269 + } 270 + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 271 + merge_flag, &vma->vm_flags); 272 + if (ret) { 273 + ret = H_STATE; 274 + break; 275 + } 276 + start = vma->vm_end; 277 + } while (end > vma->vm_end); 278 + 279 + mmap_write_unlock(kvm->mm); 280 + return ret; 281 + } 282 + 283 + static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, 284 + const struct kvm_memory_slot *memslot) 285 + { 286 + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); 287 + kvmppc_uvmem_slot_free(kvm, memslot); 288 + kvmppc_memslot_page_merge(kvm, memslot, true); 289 + } 290 + 291 + static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, 292 + const struct kvm_memory_slot *memslot) 293 + { 294 + int ret = H_PARAMETER; 295 + 296 + if (kvmppc_memslot_page_merge(kvm, memslot, false)) 297 + return ret; 298 + 299 + if (kvmppc_uvmem_slot_init(kvm, memslot)) 300 + goto out1; 301 + 302 + ret = uv_register_mem_slot(kvm->arch.lpid, 303 + memslot->base_gfn << PAGE_SHIFT, 304 + memslot->npages * PAGE_SIZE, 305 + 0, memslot->id); 306 + if (ret < 0) { 307 + ret = H_PARAMETER; 308 + goto out; 309 + } 310 + return 0; 311 + out: 312 + kvmppc_uvmem_slot_free(kvm, memslot); 313 + out1: 314 + kvmppc_memslot_page_merge(kvm, memslot, true); 315 + return ret; 316 + } 317 + 352 318 unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) 353 319 { 354 320 struct kvm_memslots *slots; 355 - struct kvm_memory_slot *memslot; 321 + struct kvm_memory_slot *memslot, *m; 356 322 int ret = H_SUCCESS; 357 323 int srcu_idx; 358 324 ··· 474 232 return H_AUTHORITY; 475 233 476 234 srcu_idx = srcu_read_lock(&kvm->srcu); 235 + 236 + /* register the memslot */ 477 237 slots = kvm_memslots(kvm); 478 238 kvm_for_each_memslot(memslot, slots) { 479 - if (kvmppc_uvmem_slot_init(kvm, memslot)) { 480 - ret = H_PARAMETER; 481 - goto out; 482 - } 483 - ret = uv_register_mem_slot(kvm->arch.lpid, 484 - memslot->base_gfn << PAGE_SHIFT, 485 - memslot->npages * PAGE_SIZE, 486 - 0, memslot->id); 487 - if (ret < 0) { 488 - kvmppc_uvmem_slot_free(kvm, memslot); 489 - ret = H_PARAMETER; 490 - goto out; 239 + ret = __kvmppc_uvmem_memslot_create(kvm, memslot); 240 + if (ret) 241 + break; 242 + } 243 + 244 + if (ret) { 245 + slots = kvm_memslots(kvm); 246 + kvm_for_each_memslot(m, slots) { 247 + if (m == memslot) 248 + break; 249 + __kvmppc_uvmem_memslot_delete(kvm, memslot); 491 250 } 492 251 } 493 - out: 252 + 494 253 srcu_read_unlock(&kvm->srcu, srcu_idx); 495 254 return ret; 496 255 } 497 256 498 - unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 257 + /* 258 + * Provision a new page on HV side and copy over the contents 259 + * from secure memory using UV_PAGE_OUT uvcall. 260 + * Caller must held kvm->arch.uvmem_lock. 261 + */ 262 + static int __kvmppc_svm_page_out(struct vm_area_struct *vma, 263 + unsigned long start, 264 + unsigned long end, unsigned long page_shift, 265 + struct kvm *kvm, unsigned long gpa) 499 266 { 500 - if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 501 - return H_UNSUPPORTED; 267 + unsigned long src_pfn, dst_pfn = 0; 268 + struct migrate_vma mig; 269 + struct page *dpage, *spage; 270 + struct kvmppc_uvmem_page_pvt *pvt; 271 + unsigned long pfn; 272 + int ret = U_SUCCESS; 502 273 503 - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 504 - pr_info("LPID %d went secure\n", kvm->arch.lpid); 505 - return H_SUCCESS; 274 + memset(&mig, 0, sizeof(mig)); 275 + mig.vma = vma; 276 + mig.start = start; 277 + mig.end = end; 278 + mig.src = &src_pfn; 279 + mig.dst = &dst_pfn; 280 + mig.pgmap_owner = &kvmppc_uvmem_pgmap; 281 + mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 282 + 283 + /* The requested page is already paged-out, nothing to do */ 284 + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 285 + return ret; 286 + 287 + ret = migrate_vma_setup(&mig); 288 + if (ret) 289 + return -1; 290 + 291 + spage = migrate_pfn_to_page(*mig.src); 292 + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 293 + goto out_finalize; 294 + 295 + if (!is_zone_device_page(spage)) 296 + goto out_finalize; 297 + 298 + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 299 + if (!dpage) { 300 + ret = -1; 301 + goto out_finalize; 302 + } 303 + 304 + lock_page(dpage); 305 + pvt = spage->zone_device_data; 306 + pfn = page_to_pfn(dpage); 307 + 308 + /* 309 + * This function is used in two cases: 310 + * - When HV touches a secure page, for which we do UV_PAGE_OUT 311 + * - When a secure page is converted to shared page, we *get* 312 + * the page to essentially unmap the device page. In this 313 + * case we skip page-out. 314 + */ 315 + if (!pvt->skip_page_out) 316 + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 317 + gpa, 0, page_shift); 318 + 319 + if (ret == U_SUCCESS) 320 + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; 321 + else { 322 + unlock_page(dpage); 323 + __free_page(dpage); 324 + goto out_finalize; 325 + } 326 + 327 + migrate_vma_pages(&mig); 328 + 329 + out_finalize: 330 + migrate_vma_finalize(&mig); 331 + return ret; 332 + } 333 + 334 + static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, 335 + unsigned long start, unsigned long end, 336 + unsigned long page_shift, 337 + struct kvm *kvm, unsigned long gpa) 338 + { 339 + int ret; 340 + 341 + mutex_lock(&kvm->arch.uvmem_lock); 342 + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); 343 + mutex_unlock(&kvm->arch.uvmem_lock); 344 + 345 + return ret; 506 346 } 507 347 508 348 /* ··· 595 271 * fault on them, do fault time migration to replace the device PTEs in 596 272 * QEMU page table with normal PTEs from newly allocated pages. 597 273 */ 598 - void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, 274 + void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, 599 275 struct kvm *kvm, bool skip_page_out) 600 276 { 601 277 int i; 602 278 struct kvmppc_uvmem_page_pvt *pvt; 603 - unsigned long pfn, uvmem_pfn; 604 - unsigned long gfn = free->base_gfn; 279 + struct page *uvmem_page; 280 + struct vm_area_struct *vma = NULL; 281 + unsigned long uvmem_pfn, gfn; 282 + unsigned long addr; 605 283 606 - for (i = free->npages; i; --i, ++gfn) { 607 - struct page *uvmem_page; 284 + mmap_read_lock(kvm->mm); 608 285 609 - mutex_lock(&kvm->arch.uvmem_lock); 610 - if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 611 - mutex_unlock(&kvm->arch.uvmem_lock); 612 - continue; 286 + addr = slot->userspace_addr; 287 + 288 + gfn = slot->base_gfn; 289 + for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { 290 + 291 + /* Fetch the VMA if addr is not in the latest fetched one */ 292 + if (!vma || addr >= vma->vm_end) { 293 + vma = find_vma_intersection(kvm->mm, addr, addr+1); 294 + if (!vma) { 295 + pr_err("Can't find VMA for gfn:0x%lx\n", gfn); 296 + break; 297 + } 613 298 } 614 299 615 - uvmem_page = pfn_to_page(uvmem_pfn); 616 - pvt = uvmem_page->zone_device_data; 617 - pvt->skip_page_out = skip_page_out; 618 - mutex_unlock(&kvm->arch.uvmem_lock); 300 + mutex_lock(&kvm->arch.uvmem_lock); 619 301 620 - pfn = gfn_to_pfn(kvm, gfn); 621 - if (is_error_noslot_pfn(pfn)) 622 - continue; 623 - kvm_release_pfn_clean(pfn); 302 + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { 303 + uvmem_page = pfn_to_page(uvmem_pfn); 304 + pvt = uvmem_page->zone_device_data; 305 + pvt->skip_page_out = skip_page_out; 306 + pvt->remove_gfn = true; 307 + 308 + if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, 309 + PAGE_SHIFT, kvm, pvt->gpa)) 310 + pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", 311 + pvt->gpa, addr); 312 + } else { 313 + /* Remove the shared flag if any */ 314 + kvmppc_gfn_remove(gfn, kvm); 315 + } 316 + 317 + mutex_unlock(&kvm->arch.uvmem_lock); 624 318 } 319 + 320 + mmap_read_unlock(kvm->mm); 625 321 } 626 322 627 323 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) ··· 704 360 goto out_clear; 705 361 706 362 uvmem_pfn = bit + pfn_first; 707 - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 363 + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); 708 364 709 365 pvt->gpa = gpa; 710 366 pvt->kvm = kvm; ··· 723 379 } 724 380 725 381 /* 726 - * Alloc a PFN from private device memory pool and copy page from normal 727 - * memory to secure memory using UV_PAGE_IN uvcall. 382 + * Alloc a PFN from private device memory pool. If @pagein is true, 383 + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. 728 384 */ 729 - static int 730 - kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, 731 - unsigned long end, unsigned long gpa, struct kvm *kvm, 732 - unsigned long page_shift, bool *downgrade) 385 + static int kvmppc_svm_page_in(struct vm_area_struct *vma, 386 + unsigned long start, 387 + unsigned long end, unsigned long gpa, struct kvm *kvm, 388 + unsigned long page_shift, 389 + bool pagein) 733 390 { 734 391 unsigned long src_pfn, dst_pfn = 0; 735 392 struct migrate_vma mig; ··· 747 402 mig.dst = &dst_pfn; 748 403 mig.flags = MIGRATE_VMA_SELECT_SYSTEM; 749 404 750 - /* 751 - * We come here with mmap_lock write lock held just for 752 - * ksm_madvise(), otherwise we only need read mmap_lock. 753 - * Hence downgrade to read lock once ksm_madvise() is done. 754 - */ 755 - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, 756 - MADV_UNMERGEABLE, &vma->vm_flags); 757 - mmap_write_downgrade(kvm->mm); 758 - *downgrade = true; 759 - if (ret) 760 - return ret; 761 - 762 405 ret = migrate_vma_setup(&mig); 763 406 if (ret) 764 407 return ret; ··· 762 429 goto out_finalize; 763 430 } 764 431 765 - pfn = *mig.src >> MIGRATE_PFN_SHIFT; 766 - spage = migrate_pfn_to_page(*mig.src); 767 - if (spage) 768 - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 769 - page_shift); 432 + if (pagein) { 433 + pfn = *mig.src >> MIGRATE_PFN_SHIFT; 434 + spage = migrate_pfn_to_page(*mig.src); 435 + if (spage) { 436 + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, 437 + gpa, 0, page_shift); 438 + if (ret) 439 + goto out_finalize; 440 + } 441 + } 770 442 771 443 *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; 772 444 migrate_vma_pages(&mig); 773 445 out_finalize: 774 446 migrate_vma_finalize(&mig); 447 + return ret; 448 + } 449 + 450 + static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, 451 + const struct kvm_memory_slot *memslot) 452 + { 453 + unsigned long gfn = memslot->base_gfn; 454 + struct vm_area_struct *vma; 455 + unsigned long start, end; 456 + int ret = 0; 457 + 458 + mmap_read_lock(kvm->mm); 459 + mutex_lock(&kvm->arch.uvmem_lock); 460 + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { 461 + ret = H_STATE; 462 + start = gfn_to_hva(kvm, gfn); 463 + if (kvm_is_error_hva(start)) 464 + break; 465 + 466 + end = start + (1UL << PAGE_SHIFT); 467 + vma = find_vma_intersection(kvm->mm, start, end); 468 + if (!vma || vma->vm_start > start || vma->vm_end < end) 469 + break; 470 + 471 + ret = kvmppc_svm_page_in(vma, start, end, 472 + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); 473 + if (ret) { 474 + ret = H_STATE; 475 + break; 476 + } 477 + 478 + /* relinquish the cpu if needed */ 479 + cond_resched(); 480 + } 481 + mutex_unlock(&kvm->arch.uvmem_lock); 482 + mmap_read_unlock(kvm->mm); 483 + return ret; 484 + } 485 + 486 + unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) 487 + { 488 + struct kvm_memslots *slots; 489 + struct kvm_memory_slot *memslot; 490 + int srcu_idx; 491 + long ret = H_SUCCESS; 492 + 493 + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) 494 + return H_UNSUPPORTED; 495 + 496 + /* migrate any unmoved normal pfn to device pfns*/ 497 + srcu_idx = srcu_read_lock(&kvm->srcu); 498 + slots = kvm_memslots(kvm); 499 + kvm_for_each_memslot(memslot, slots) { 500 + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); 501 + if (ret) { 502 + /* 503 + * The pages will remain transitioned. 504 + * Its the callers responsibility to 505 + * terminate the VM, which will undo 506 + * all state of the VM. Till then 507 + * this VM is in a erroneous state. 508 + * Its KVMPPC_SECURE_INIT_DONE will 509 + * remain unset. 510 + */ 511 + ret = H_STATE; 512 + goto out; 513 + } 514 + } 515 + 516 + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; 517 + pr_info("LPID %d went secure\n", kvm->arch.lpid); 518 + 519 + out: 520 + srcu_read_unlock(&kvm->srcu, srcu_idx); 775 521 return ret; 776 522 } 777 523 ··· 863 451 * In the former case, uses dev_pagemap_ops.migrate_to_ram handler 864 452 * to unmap the device page from QEMU's page tables. 865 453 */ 866 - static unsigned long 867 - kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) 454 + static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, 455 + unsigned long page_shift) 868 456 { 869 457 870 458 int ret = H_PARAMETER; ··· 881 469 uvmem_page = pfn_to_page(uvmem_pfn); 882 470 pvt = uvmem_page->zone_device_data; 883 471 pvt->skip_page_out = true; 472 + /* 473 + * do not drop the GFN. It is a valid GFN 474 + * that is transitioned to a shared GFN. 475 + */ 476 + pvt->remove_gfn = false; 884 477 } 885 478 886 479 retry: ··· 899 482 uvmem_page = pfn_to_page(uvmem_pfn); 900 483 pvt = uvmem_page->zone_device_data; 901 484 pvt->skip_page_out = true; 485 + pvt->remove_gfn = false; /* it continues to be a valid GFN */ 902 486 kvm_release_pfn_clean(pfn); 903 487 goto retry; 904 488 } 905 489 906 - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) 490 + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, 491 + page_shift)) { 492 + kvmppc_gfn_shared(gfn, kvm); 907 493 ret = H_SUCCESS; 494 + } 908 495 kvm_release_pfn_clean(pfn); 909 496 mutex_unlock(&kvm->arch.uvmem_lock); 910 497 out: ··· 922 501 * H_PAGE_IN_SHARED flag makes the page shared which means that the same 923 502 * memory in is visible from both UV and HV. 924 503 */ 925 - unsigned long 926 - kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 927 - unsigned long flags, unsigned long page_shift) 504 + unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, 505 + unsigned long flags, 506 + unsigned long page_shift) 928 507 { 929 - bool downgrade = false; 930 508 unsigned long start, end; 931 509 struct vm_area_struct *vma; 932 510 int srcu_idx; ··· 946 526 947 527 ret = H_PARAMETER; 948 528 srcu_idx = srcu_read_lock(&kvm->srcu); 949 - mmap_write_lock(kvm->mm); 529 + mmap_read_lock(kvm->mm); 950 530 951 531 start = gfn_to_hva(kvm, gfn); 952 532 if (kvm_is_error_hva(start)) ··· 962 542 if (!vma || vma->vm_start > start || vma->vm_end < end) 963 543 goto out_unlock; 964 544 965 - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 966 - &downgrade)) 967 - ret = H_SUCCESS; 545 + if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, 546 + true)) 547 + goto out_unlock; 548 + 549 + ret = H_SUCCESS; 550 + 968 551 out_unlock: 969 552 mutex_unlock(&kvm->arch.uvmem_lock); 970 553 out: 971 - if (downgrade) 972 - mmap_read_unlock(kvm->mm); 973 - else 974 - mmap_write_unlock(kvm->mm); 554 + mmap_read_unlock(kvm->mm); 975 555 srcu_read_unlock(&kvm->srcu, srcu_idx); 976 556 return ret; 977 557 } 978 558 979 - /* 980 - * Provision a new page on HV side and copy over the contents 981 - * from secure memory using UV_PAGE_OUT uvcall. 982 - */ 983 - static int 984 - kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, 985 - unsigned long end, unsigned long page_shift, 986 - struct kvm *kvm, unsigned long gpa) 987 - { 988 - unsigned long src_pfn, dst_pfn = 0; 989 - struct migrate_vma mig; 990 - struct page *dpage, *spage; 991 - struct kvmppc_uvmem_page_pvt *pvt; 992 - unsigned long pfn; 993 - int ret = U_SUCCESS; 994 - 995 - memset(&mig, 0, sizeof(mig)); 996 - mig.vma = vma; 997 - mig.start = start; 998 - mig.end = end; 999 - mig.src = &src_pfn; 1000 - mig.dst = &dst_pfn; 1001 - mig.pgmap_owner = &kvmppc_uvmem_pgmap; 1002 - mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; 1003 - 1004 - mutex_lock(&kvm->arch.uvmem_lock); 1005 - /* The requested page is already paged-out, nothing to do */ 1006 - if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) 1007 - goto out; 1008 - 1009 - ret = migrate_vma_setup(&mig); 1010 - if (ret) 1011 - goto out; 1012 - 1013 - spage = migrate_pfn_to_page(*mig.src); 1014 - if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) 1015 - goto out_finalize; 1016 - 1017 - if (!is_zone_device_page(spage)) 1018 - goto out_finalize; 1019 - 1020 - dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); 1021 - if (!dpage) { 1022 - ret = -1; 1023 - goto out_finalize; 1024 - } 1025 - 1026 - lock_page(dpage); 1027 - pvt = spage->zone_device_data; 1028 - pfn = page_to_pfn(dpage); 1029 - 1030 - /* 1031 - * This function is used in two cases: 1032 - * - When HV touches a secure page, for which we do UV_PAGE_OUT 1033 - * - When a secure page is converted to shared page, we *get* 1034 - * the page to essentially unmap the device page. In this 1035 - * case we skip page-out. 1036 - */ 1037 - if (!pvt->skip_page_out) 1038 - ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, 1039 - gpa, 0, page_shift); 1040 - 1041 - if (ret == U_SUCCESS) 1042 - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; 1043 - else { 1044 - unlock_page(dpage); 1045 - __free_page(dpage); 1046 - goto out_finalize; 1047 - } 1048 - 1049 - migrate_vma_pages(&mig); 1050 - out_finalize: 1051 - migrate_vma_finalize(&mig); 1052 - out: 1053 - mutex_unlock(&kvm->arch.uvmem_lock); 1054 - return ret; 1055 - } 1056 559 1057 560 /* 1058 561 * Fault handler callback that gets called when HV touches any page that ··· 1000 657 /* 1001 658 * Release the device PFN back to the pool 1002 659 * 1003 - * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. 660 + * Gets called when secure GFN tranistions from a secure-PFN 661 + * to a normal PFN during H_SVM_PAGE_OUT. 1004 662 * Gets called with kvm->arch.uvmem_lock held. 1005 663 */ 1006 664 static void kvmppc_uvmem_page_free(struct page *page) ··· 1016 672 1017 673 pvt = page->zone_device_data; 1018 674 page->zone_device_data = NULL; 1019 - kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 675 + if (pvt->remove_gfn) 676 + kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 677 + else 678 + kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); 1020 679 kfree(pvt); 1021 680 } 1022 681 ··· 1089 742 kvm_release_pfn_clean(pfn); 1090 743 mutex_unlock(&kvm->arch.uvmem_lock); 1091 744 return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; 745 + } 746 + 747 + int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) 748 + { 749 + int ret = __kvmppc_uvmem_memslot_create(kvm, new); 750 + 751 + if (!ret) 752 + ret = kvmppc_uv_migrate_mem_slot(kvm, new); 753 + 754 + return ret; 755 + } 756 + 757 + void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) 758 + { 759 + __kvmppc_uvmem_memslot_delete(kvm, old); 1092 760 } 1093 761 1094 762 static u64 kvmppc_get_secmem_size(void)

+27 -29

arch/powerpc/kvm/book3s_interrupts.S

··· 55 55 ****************************************************************************/ 56 56 57 57 /* Registers: 58 - * r3: kvm_run pointer 59 - * r4: vcpu pointer 58 + * r3: vcpu pointer 60 59 */ 61 60 _GLOBAL(__kvmppc_vcpu_run) 62 61 ··· 67 68 /* Save host state to the stack */ 68 69 PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) 69 70 70 - /* Save r3 (kvm_run) and r4 (vcpu) */ 71 - SAVE_2GPRS(3, r1) 71 + /* Save r3 (vcpu) */ 72 + SAVE_GPR(3, r1) 72 73 73 74 /* Save non-volatile registers (r14 - r31) */ 74 75 SAVE_NVGPRS(r1) ··· 81 82 PPC_STL r0, _LINK(r1) 82 83 83 84 /* Load non-volatile guest state from the vcpu */ 84 - VCPU_LOAD_NVGPRS(r4) 85 + VCPU_LOAD_NVGPRS(r3) 85 86 86 87 kvm_start_lightweight: 87 88 /* Copy registers into shadow vcpu so we can access them in real mode */ 88 - mr r3, r4 89 89 bl FUNC(kvmppc_copy_to_svcpu) 90 90 nop 91 - REST_GPR(4, r1) 91 + REST_GPR(3, r1) 92 92 93 93 #ifdef CONFIG_PPC_BOOK3S_64 94 94 /* Get the dcbz32 flag */ 95 - PPC_LL r3, VCPU_HFLAGS(r4) 96 - rldicl r3, r3, 0, 63 /* r3 &= 1 */ 97 - stb r3, HSTATE_RESTORE_HID5(r13) 95 + PPC_LL r0, VCPU_HFLAGS(r3) 96 + rldicl r0, r0, 0, 63 /* r3 &= 1 */ 97 + stb r0, HSTATE_RESTORE_HID5(r13) 98 98 99 99 /* Load up guest SPRG3 value, since it's user readable */ 100 - lwz r3, VCPU_SHAREDBE(r4) 101 - cmpwi r3, 0 102 - ld r5, VCPU_SHARED(r4) 100 + lbz r4, VCPU_SHAREDBE(r3) 101 + cmpwi r4, 0 102 + ld r5, VCPU_SHARED(r3) 103 103 beq sprg3_little_endian 104 104 sprg3_big_endian: 105 105 #ifdef __BIG_ENDIAN__ 106 - ld r3, VCPU_SHARED_SPRG3(r5) 106 + ld r4, VCPU_SHARED_SPRG3(r5) 107 107 #else 108 108 addi r5, r5, VCPU_SHARED_SPRG3 109 - ldbrx r3, 0, r5 109 + ldbrx r4, 0, r5 110 110 #endif 111 111 b after_sprg3_load 112 112 sprg3_little_endian: 113 113 #ifdef __LITTLE_ENDIAN__ 114 - ld r3, VCPU_SHARED_SPRG3(r5) 114 + ld r4, VCPU_SHARED_SPRG3(r5) 115 115 #else 116 116 addi r5, r5, VCPU_SHARED_SPRG3 117 - ldbrx r3, 0, r5 117 + ldbrx r4, 0, r5 118 118 #endif 119 119 120 120 after_sprg3_load: 121 - mtspr SPRN_SPRG3, r3 121 + mtspr SPRN_SPRG3, r4 122 122 #endif /* CONFIG_PPC_BOOK3S_64 */ 123 123 124 - PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ 124 + PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */ 125 125 126 126 /* Jump to segment patching handler and into our guest */ 127 127 bl FUNC(kvmppc_entry_trampoline) ··· 144 146 * 145 147 */ 146 148 147 - PPC_LL r3, GPR4(r1) /* vcpu pointer */ 149 + PPC_LL r3, GPR3(r1) /* vcpu pointer */ 148 150 149 151 /* 150 152 * kvmppc_copy_from_svcpu can clobber volatile registers, save ··· 167 169 #endif /* CONFIG_PPC_BOOK3S_64 */ 168 170 169 171 /* R7 = vcpu */ 170 - PPC_LL r7, GPR4(r1) 172 + PPC_LL r7, GPR3(r1) 171 173 172 174 PPC_STL r14, VCPU_GPR(R14)(r7) 173 175 PPC_STL r15, VCPU_GPR(R15)(r7) ··· 188 190 PPC_STL r30, VCPU_GPR(R30)(r7) 189 191 PPC_STL r31, VCPU_GPR(R31)(r7) 190 192 191 - /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ 192 - lwz r5, VCPU_TRAP(r7) 193 + /* Pass the exit number as 2nd argument to kvmppc_handle_exit */ 194 + lwz r4, VCPU_TRAP(r7) 193 195 194 - /* Restore r3 (kvm_run) and r4 (vcpu) */ 195 - REST_2GPRS(3, r1) 196 + /* Restore r3 (vcpu) */ 197 + REST_GPR(3, r1) 196 198 bl FUNC(kvmppc_handle_exit_pr) 197 199 198 200 /* If RESUME_GUEST, get back in the loop */ ··· 221 223 PPC_LL r4, _LINK(r1) 222 224 PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) 223 225 224 - /* Load vcpu and cpu_run */ 225 - REST_2GPRS(3, r1) 226 + /* Load vcpu */ 227 + REST_GPR(3, r1) 226 228 227 229 /* Load non-volatile guest state from the vcpu */ 228 - VCPU_LOAD_NVGPRS(r4) 230 + VCPU_LOAD_NVGPRS(r3) 229 231 230 232 /* Jump back into the beginning of this function */ 231 233 b kvm_start_lightweight ··· 233 235 kvm_loop_lightweight: 234 236 235 237 /* We'll need the vcpu pointer */ 236 - REST_GPR(4, r1) 238 + REST_GPR(3, r1) 237 239 238 240 /* Jump back into the beginning of this function */ 239 241 b kvm_start_lightweight

+4 -5

arch/powerpc/kvm/book3s_pr.c

··· 1151 1151 return r; 1152 1152 } 1153 1153 1154 - int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 1155 - unsigned int exit_nr) 1154 + int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr) 1156 1155 { 1156 + struct kvm_run *run = vcpu->run; 1157 1157 int r = RESUME_HOST; 1158 1158 int s; 1159 1159 ··· 1826 1826 1827 1827 static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) 1828 1828 { 1829 - struct kvm_run *run = vcpu->run; 1830 1829 int ret; 1831 1830 1832 1831 /* Check if we can run the vcpu at all */ 1833 1832 if (!vcpu->arch.sane) { 1834 - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1833 + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1835 1834 ret = -EINVAL; 1836 1835 goto out; 1837 1836 } ··· 1857 1858 1858 1859 kvmppc_fix_ee_before_entry(); 1859 1860 1860 - ret = __kvmppc_vcpu_run(run, vcpu); 1861 + ret = __kvmppc_vcpu_run(vcpu); 1861 1862 1862 1863 kvmppc_clear_debug(vcpu); 1863 1864

+2

arch/powerpc/kvm/book3s_rtas.c

··· 229 229 */ 230 230 args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; 231 231 232 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 232 233 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); 234 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 233 235 if (rc) 234 236 goto fail; 235 237

+4 -5

arch/powerpc/kvm/booke.c

··· 731 731 732 732 int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) 733 733 { 734 - struct kvm_run *run = vcpu->run; 735 734 int ret, s; 736 735 struct debug_reg debug; 737 736 738 737 if (!vcpu->arch.sane) { 739 - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 738 + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 740 739 return -EINVAL; 741 740 } 742 741 ··· 777 778 vcpu->arch.pgdir = vcpu->kvm->mm->pgd; 778 779 kvmppc_fix_ee_before_entry(); 779 780 780 - ret = __kvmppc_vcpu_run(run, vcpu); 781 + ret = __kvmppc_vcpu_run(vcpu); 781 782 782 783 /* No need for guest_exit. It's done in handle_exit. 783 784 We also get here with interrupts enabled. */ ··· 981 982 * 982 983 * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) 983 984 */ 984 - int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 985 - unsigned int exit_nr) 985 + int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) 986 986 { 987 + struct kvm_run *run = vcpu->run; 987 988 int r = RESUME_HOST; 988 989 int s; 989 990 int idx;

+4 -5

arch/powerpc/kvm/booke_interrupts.S

··· 237 237 /* Switch to kernel stack and jump to handler. */ 238 238 LOAD_REG_ADDR(r3, kvmppc_handle_exit) 239 239 mtctr r3 240 - lwz r3, HOST_RUN(r1) 240 + mr r3, r4 241 241 lwz r2, HOST_R2(r1) 242 242 mr r14, r4 /* Save vcpu pointer. */ 243 243 ··· 337 337 338 338 339 339 /* Registers: 340 - * r3: kvm_run pointer 341 - * r4: vcpu pointer 340 + * r3: vcpu pointer 342 341 */ 343 342 _GLOBAL(__kvmppc_vcpu_run) 344 343 stwu r1, -HOST_STACK_SIZE(r1) 345 - stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ 344 + stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ 346 345 347 346 /* Save host state to stack. */ 348 - stw r3, HOST_RUN(r1) 347 + mr r4, r3 349 348 mflr r3 350 349 stw r3, HOST_STACK_LR(r1) 351 350 mfcr r5

+5 -5

arch/powerpc/kvm/bookehv_interrupts.S

··· 434 434 #endif 435 435 436 436 /* Switch to kernel stack and jump to handler. */ 437 - PPC_LL r3, HOST_RUN(r1) 437 + mr r3, r4 438 438 mr r5, r14 /* intno */ 439 439 mr r14, r4 /* Save vcpu pointer. */ 440 + mr r4, r5 440 441 bl kvmppc_handle_exit 441 442 442 443 /* Restore vcpu pointer and the nonvolatiles we used. */ ··· 526 525 blr 527 526 528 527 /* Registers: 529 - * r3: kvm_run pointer 530 - * r4: vcpu pointer 528 + * r3: vcpu pointer 531 529 */ 532 530 _GLOBAL(__kvmppc_vcpu_run) 533 531 stwu r1, -HOST_STACK_SIZE(r1) 534 - PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ 532 + PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ 535 533 536 534 /* Save host state to stack. */ 537 - PPC_STL r3, HOST_RUN(r1) 535 + mr r4, r3 538 536 mflr r3 539 537 mfcr r5 540 538 PPC_STL r3, HOST_STACK_LR(r1)

+4 -1

arch/powerpc/kvm/powerpc.c

··· 403 403 return EMULATE_DONE; 404 404 } 405 405 406 - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) 406 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 407 + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); 408 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 409 + if (rc) 407 410 return EMULATE_DO_MMIO; 408 411 409 412 return EMULATE_DONE;

+2 -1

arch/x86/kvm/cpuid.c

··· 370 370 kvm_cpu_cap_mask(CPUID_7_EDX, 371 371 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | 372 372 F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | 373 - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) 373 + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | 374 + F(SERIALIZE) 374 375 ); 375 376 376 377 /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */

+1

arch/x86/kvm/hyperv.c

··· 900 900 kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); 901 901 synic->active = true; 902 902 synic->dont_zero_synic_pages = dont_zero_synic_pages; 903 + synic->control = HV_SYNIC_CONTROL_ENABLE; 903 904 return 0; 904 905 } 905 906

+12 -12

arch/x86/kvm/x86.c

··· 820 820 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) 821 821 return 1; 822 822 823 - if (cr0 & X86_CR0_PG) { 824 823 #ifdef CONFIG_X86_64 825 - if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) { 826 - int cs_db, cs_l; 824 + if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) && 825 + (cr0 & X86_CR0_PG)) { 826 + int cs_db, cs_l; 827 827 828 - if (!is_pae(vcpu)) 829 - return 1; 830 - kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 831 - if (cs_l) 832 - return 1; 833 - } else 834 - #endif 835 - if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && 836 - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) 828 + if (!is_pae(vcpu)) 829 + return 1; 830 + kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 831 + if (cs_l) 837 832 return 1; 838 833 } 834 + #endif 835 + if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) && 836 + is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && 837 + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) 838 + return 1; 839 839 840 840 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) 841 841 return 1;

+5 -8

include/kvm/arm_arch_timer.h

··· 26 26 struct arch_timer_context { 27 27 struct kvm_vcpu *vcpu; 28 28 29 - /* Registers: control register, timer value */ 30 - u32 cnt_ctl; 31 - u64 cnt_cval; 32 - 33 29 /* Timer IRQ */ 34 30 struct kvm_irq_level irq; 35 - 36 - /* Virtual offset */ 37 - u64 cntvoff; 38 31 39 32 /* Emulated Timer (may be unused) */ 40 33 struct hrtimer hrtimer; ··· 64 71 int kvm_timer_enable(struct kvm_vcpu *vcpu); 65 72 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 66 73 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 67 - void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); 74 + void kvm_timer_sync_user(struct kvm_vcpu *vcpu); 68 75 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); 69 76 void kvm_timer_update_run(struct kvm_vcpu *vcpu); 70 77 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); ··· 101 108 enum kvm_arch_timers tmr, 102 109 enum kvm_arch_timer_regs treg, 103 110 u64 val); 111 + 112 + /* Needed for tracing */ 113 + u32 timer_get_ctl(struct arch_timer_context *ctxt); 114 + u64 timer_get_cval(struct arch_timer_context *ctxt); 104 115 105 116 #endif

+1 -1

include/trace/events/kvm.h

··· 17 17 ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ 18 18 ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ 19 19 ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ 20 - ERSN(HYPERV) 20 + ERSN(HYPERV), ERSN(ARM_NISV) 21 21 22 22 TRACE_EVENT(kvm_userspace_exit, 23 23 TP_PROTO(__u32 reason, int errno),

+1

scripts/kallsyms.c

··· 109 109 ".LASANPC", /* s390 kasan local symbols */ 110 110 "__crc_", /* modversions */ 111 111 "__efistub_", /* arm64 EFI stub namespace */ 112 + "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ 112 113 NULL 113 114 }; 114 115