Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull arm64 kvm fixes from Paolo Bonzini:

- Ensure early return semantics are preserved for pKVM fault handlers

- Fix case where the kernel runs with the guest's PAN value when
CONFIG_ARM64_PAN is not set

- Make stage-1 walks to set the access flag respect the access
permission of the underlying stage-2, when enabled

- Propagate computed FGT values to the pKVM view of the vCPU at
vcpu_load()

- Correctly program PXN and UXN privilege bits for hVHE's stage-1 page
tables

- Check that the VM is actually using VGICv3 before accessing the GICv3
CPU interface

- Delete some unused code

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: arm64: Invert KVM_PGTABLE_WALK_HANDLE_FAULT to fix pKVM walkers
KVM: arm64: Don't blindly set set PSTATE.PAN on guest exit
KVM: arm64: nv: Respect stage-2 write permssion when setting stage-1 AF
KVM: arm64: Remove unused vcpu_{clear,set}_wfx_traps()
KVM: arm64: Remove unused parameter in synchronize_vcpu_pstate()
KVM: arm64: Remove extra argument for __pvkm_host_{share,unshare}_hyp()
KVM: arm64: Inject UNDEF for a register trap without accessor
KVM: arm64: Copy FGT traps to unprotected pKVM VCPU on VCPU load
KVM: arm64: Fix EL2 S1 XN handling for hVHE setups
KVM: arm64: gic: Check for vGICv3 when clearing TWI

+73 -38
+2
arch/arm64/include/asm/kvm_asm.h
··· 300 __le32 *origptr, __le32 *updptr, int nr_inst); 301 void kvm_compute_final_ctr_el0(struct alt_instr *alt, 302 __le32 *origptr, __le32 *updptr, int nr_inst); 303 void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, 304 u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); 305
··· 300 __le32 *origptr, __le32 *updptr, int nr_inst); 301 void kvm_compute_final_ctr_el0(struct alt_instr *alt, 302 __le32 *origptr, __le32 *updptr, int nr_inst); 303 + void kvm_pan_patch_el2_entry(struct alt_instr *alt, 304 + __le32 *origptr, __le32 *updptr, int nr_inst); 305 void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, 306 u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); 307
-16
arch/arm64/include/asm/kvm_emulate.h
··· 119 return (unsigned long *)&vcpu->arch.hcr_el2; 120 } 121 122 - static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) 123 - { 124 - vcpu->arch.hcr_el2 &= ~HCR_TWE; 125 - if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || 126 - vcpu->kvm->arch.vgic.nassgireq) 127 - vcpu->arch.hcr_el2 &= ~HCR_TWI; 128 - else 129 - vcpu->arch.hcr_el2 |= HCR_TWI; 130 - } 131 - 132 - static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) 133 - { 134 - vcpu->arch.hcr_el2 |= HCR_TWE; 135 - vcpu->arch.hcr_el2 |= HCR_TWI; 136 - } 137 - 138 static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) 139 { 140 return vcpu->arch.vsesr_el2;
··· 119 return (unsigned long *)&vcpu->arch.hcr_el2; 120 } 121 122 static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) 123 { 124 return vcpu->arch.vsesr_el2;
+12 -4
arch/arm64/include/asm/kvm_pgtable.h
··· 87 88 #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) 89 90 - #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 91 92 #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) 93 ··· 301 * children. 302 * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared 303 * with other software walkers. 304 - * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was 305 - * invoked from a fault handler. 306 * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries 307 * without Break-before-make's 308 * TLB invalidation. ··· 315 KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), 316 KVM_PGTABLE_WALK_TABLE_POST = BIT(2), 317 KVM_PGTABLE_WALK_SHARED = BIT(3), 318 - KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), 319 KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5), 320 KVM_PGTABLE_WALK_SKIP_CMO = BIT(6), 321 };
··· 87 88 #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) 89 90 + #define __KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 91 + #define __KVM_PTE_LEAF_ATTR_HI_S1_UXN BIT(54) 92 + #define __KVM_PTE_LEAF_ATTR_HI_S1_PXN BIT(53) 93 + 94 + #define KVM_PTE_LEAF_ATTR_HI_S1_XN \ 95 + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? \ 96 + (__KVM_PTE_LEAF_ATTR_HI_S1_UXN | \ 97 + __KVM_PTE_LEAF_ATTR_HI_S1_PXN) : \ 98 + __KVM_PTE_LEAF_ATTR_HI_S1_XN; }) 99 100 #define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53) 101 ··· 293 * children. 294 * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared 295 * with other software walkers. 296 + * @KVM_PGTABLE_WALK_IGNORE_EAGAIN: Don't terminate the walk early if 297 + * the walker returns -EAGAIN. 298 * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries 299 * without Break-before-make's 300 * TLB invalidation. ··· 307 KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), 308 KVM_PGTABLE_WALK_TABLE_POST = BIT(2), 309 KVM_PGTABLE_WALK_SHARED = BIT(3), 310 + KVM_PGTABLE_WALK_IGNORE_EAGAIN = BIT(4), 311 KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5), 312 KVM_PGTABLE_WALK_SKIP_CMO = BIT(6), 313 };
+2 -1
arch/arm64/include/asm/sysreg.h
··· 91 */ 92 #define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) 93 #define PSTATE_Imm_shift CRm_shift 94 - #define SET_PSTATE(x, r) __emit_inst(0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift)) 95 96 #define PSTATE_PAN pstate_field(0, 4) 97 #define PSTATE_UAO pstate_field(0, 3)
··· 91 */ 92 #define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) 93 #define PSTATE_Imm_shift CRm_shift 94 + #define ENCODE_PSTATE(x, r) (0xd500401f | PSTATE_ ## r | ((!!x) << PSTATE_Imm_shift)) 95 + #define SET_PSTATE(x, r) __emit_inst(ENCODE_PSTATE(x, r)) 96 97 #define PSTATE_PAN pstate_field(0, 4) 98 #define PSTATE_UAO pstate_field(0, 3)
+1
arch/arm64/kernel/image-vars.h
··· 86 KVM_NVHE_ALIAS(kvm_update_va_mask); 87 KVM_NVHE_ALIAS(kvm_get_kimage_voffset); 88 KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); 89 KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); 90 KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); 91 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
··· 86 KVM_NVHE_ALIAS(kvm_update_va_mask); 87 KVM_NVHE_ALIAS(kvm_get_kimage_voffset); 88 KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); 89 + KVM_NVHE_ALIAS(kvm_pan_patch_el2_entry); 90 KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); 91 KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); 92 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
+1
arch/arm64/kvm/arm.c
··· 569 return kvm_wfi_trap_policy == KVM_WFX_NOTRAP; 570 571 return single_task_running() && 572 (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || 573 vcpu->kvm->arch.vgic.nassgireq); 574 }
··· 569 return kvm_wfi_trap_policy == KVM_WFX_NOTRAP; 570 571 return single_task_running() && 572 + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && 573 (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || 574 vcpu->kvm->arch.vgic.nassgireq); 575 }
+6 -2
arch/arm64/kvm/at.c
··· 403 struct s1_walk_result *wr, u64 va) 404 { 405 u64 va_top, va_bottom, baddr, desc, new_desc, ipa; 406 int level, stride, ret; 407 408 level = wi->sl; ··· 421 ipa = baddr | index; 422 423 if (wi->s2) { 424 - struct kvm_s2_trans s2_trans = {}; 425 - 426 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 427 if (ret) { 428 fail_s1_walk(wr, ··· 514 new_desc |= PTE_AF; 515 516 if (new_desc != desc) { 517 ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); 518 if (ret) 519 return ret;
··· 403 struct s1_walk_result *wr, u64 va) 404 { 405 u64 va_top, va_bottom, baddr, desc, new_desc, ipa; 406 + struct kvm_s2_trans s2_trans = {}; 407 int level, stride, ret; 408 409 level = wi->sl; ··· 420 ipa = baddr | index; 421 422 if (wi->s2) { 423 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 424 if (ret) { 425 fail_s1_walk(wr, ··· 515 new_desc |= PTE_AF; 516 517 if (new_desc != desc) { 518 + if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) { 519 + fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true); 520 + return -EPERM; 521 + } 522 + 523 ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi); 524 if (ret) 525 return ret;
+3 -1
arch/arm64/kvm/hyp/entry.S
··· 126 127 add x1, x1, #VCPU_CONTEXT 128 129 - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) 130 131 // Store the guest regs x2 and x3 132 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)]
··· 126 127 add x1, x1, #VCPU_CONTEXT 128 129 + alternative_cb ARM64_ALWAYS_SYSTEM, kvm_pan_patch_el2_entry 130 + nop 131 + alternative_cb_end 132 133 // Store the guest regs x2 and x3 134 stp x2, x3, [x1, #CPU_XREG_OFFSET(2)]
+1 -1
arch/arm64/kvm/hyp/include/hyp/switch.h
··· 854 return false; 855 } 856 857 - static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) 858 { 859 /* 860 * Check for the conditions of Cortex-A510's #2077057. When these occur
··· 854 return false; 855 } 856 857 + static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu) 858 { 859 /* 860 * Check for the conditions of Cortex-A510's #2077057. When these occur
+3
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 180 /* Propagate WFx trapping flags */ 181 hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); 182 hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); 183 } 184 } 185
··· 180 /* Propagate WFx trapping flags */ 181 hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); 182 hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); 183 + } else { 184 + memcpy(&hyp_vcpu->vcpu.arch.fgt, hyp_vcpu->host_vcpu->arch.fgt, 185 + sizeof(hyp_vcpu->vcpu.arch.fgt)); 186 } 187 } 188
-1
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 172 173 /* Trust the host for non-protected vcpu features. */ 174 vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; 175 - memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); 176 return 0; 177 } 178
··· 172 173 /* Trust the host for non-protected vcpu features. */ 174 vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; 175 return 0; 176 } 177
+1 -1
arch/arm64/kvm/hyp/nvhe/switch.c
··· 211 { 212 const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); 213 214 - synchronize_vcpu_pstate(vcpu, exit_code); 215 216 /* 217 * Some guests (e.g., protected VMs) are not be allowed to run in
··· 211 { 212 const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); 213 214 + synchronize_vcpu_pstate(vcpu); 215 216 /* 217 * Some guests (e.g., protected VMs) are not be allowed to run in
+3 -2
arch/arm64/kvm/hyp/pgtable.c
··· 144 * page table walk. 145 */ 146 if (r == -EAGAIN) 147 - return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); 148 149 return !r; 150 } ··· 1262 { 1263 return stage2_update_leaf_attrs(pgt, addr, size, 0, 1264 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 1265 - NULL, NULL, 0); 1266 } 1267 1268 void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
··· 144 * page table walk. 145 */ 146 if (r == -EAGAIN) 147 + return walker->flags & KVM_PGTABLE_WALK_IGNORE_EAGAIN; 148 149 return !r; 150 } ··· 1262 { 1263 return stage2_update_leaf_attrs(pgt, addr, size, 0, 1264 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 1265 + NULL, NULL, 1266 + KVM_PGTABLE_WALK_IGNORE_EAGAIN); 1267 } 1268 1269 void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+1 -1
arch/arm64/kvm/hyp/vhe/switch.c
··· 536 537 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 538 { 539 - synchronize_vcpu_pstate(vcpu, exit_code); 540 541 /* 542 * If we were in HYP context on entry, adjust the PSTATE view
··· 536 537 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 538 { 539 + synchronize_vcpu_pstate(vcpu); 540 541 /* 542 * If we were in HYP context on entry, adjust the PSTATE view
+5 -7
arch/arm64/kvm/mmu.c
··· 497 this->count = 1; 498 rb_link_node(&this->node, parent, node); 499 rb_insert_color(&this->node, &hyp_shared_pfns); 500 - ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1); 501 unlock: 502 mutex_unlock(&hyp_shared_pfns_lock); 503 ··· 523 524 rb_erase(&this->node, &hyp_shared_pfns); 525 kfree(this); 526 - ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1); 527 unlock: 528 mutex_unlock(&hyp_shared_pfns_lock); 529 ··· 1563 *prot &= ~KVM_PGTABLE_PROT_PX; 1564 } 1565 1566 - #define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) 1567 - 1568 static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 1569 struct kvm_s2_trans *nested, 1570 struct kvm_memory_slot *memslot, bool is_perm) 1571 { 1572 bool write_fault, exec_fault, writable; 1573 - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; 1574 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; 1575 struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; 1576 unsigned long mmu_seq; ··· 1663 struct kvm_pgtable *pgt; 1664 struct page *page; 1665 vm_flags_t vm_flags; 1666 - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; 1667 1668 if (fault_is_perm) 1669 fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); ··· 1931 /* Resolve the access fault by making the page young again. */ 1932 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1933 { 1934 - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; 1935 struct kvm_s2_mmu *mmu; 1936 1937 trace_kvm_access_fault(fault_ipa);
··· 497 this->count = 1; 498 rb_link_node(&this->node, parent, node); 499 rb_insert_color(&this->node, &hyp_shared_pfns); 500 + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn); 501 unlock: 502 mutex_unlock(&hyp_shared_pfns_lock); 503 ··· 523 524 rb_erase(&this->node, &hyp_shared_pfns); 525 kfree(this); 526 + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn); 527 unlock: 528 mutex_unlock(&hyp_shared_pfns_lock); 529 ··· 1563 *prot &= ~KVM_PGTABLE_PROT_PX; 1564 } 1565 1566 static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 1567 struct kvm_s2_trans *nested, 1568 struct kvm_memory_slot *memslot, bool is_perm) 1569 { 1570 bool write_fault, exec_fault, writable; 1571 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; 1572 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; 1573 struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; 1574 unsigned long mmu_seq; ··· 1665 struct kvm_pgtable *pgt; 1666 struct page *page; 1667 vm_flags_t vm_flags; 1668 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; 1669 1670 if (fault_is_perm) 1671 fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); ··· 1933 /* Resolve the access fault by making the page young again. */ 1934 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1935 { 1936 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED; 1937 struct kvm_s2_mmu *mmu; 1938 1939 trace_kvm_access_fault(fault_ipa);
+4 -1
arch/arm64/kvm/sys_regs.c
··· 4668 * that we don't know how to handle. This certainly qualifies 4669 * as a gross bug that should be fixed right away. 4670 */ 4671 - BUG_ON(!r->access); 4672 4673 /* Skip instruction if instructed so */ 4674 if (likely(r->access(vcpu, params, r)))
··· 4668 * that we don't know how to handle. This certainly qualifies 4669 * as a gross bug that should be fixed right away. 4670 */ 4671 + if (!r->access) { 4672 + bad_trap(vcpu, params, r, "register access"); 4673 + return; 4674 + } 4675 4676 /* Skip instruction if instructed so */ 4677 if (likely(r->access(vcpu, params, r)))
+28
arch/arm64/kvm/va_layout.c
··· 296 generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), 297 origptr, updptr, nr_inst); 298 }
··· 296 generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), 297 origptr, updptr, nr_inst); 298 } 299 + 300 + void kvm_pan_patch_el2_entry(struct alt_instr *alt, 301 + __le32 *origptr, __le32 *updptr, int nr_inst) 302 + { 303 + /* 304 + * If we're running at EL1 without hVHE, then SCTLR_EL2.SPAN means 305 + * nothing to us (it is RES1), and we don't need to set PSTATE.PAN 306 + * to anything useful. 307 + */ 308 + if (!is_kernel_in_hyp_mode() && !cpus_have_cap(ARM64_KVM_HVHE)) 309 + return; 310 + 311 + /* 312 + * Leap of faith: at this point, we must be running VHE one way or 313 + * another, and FEAT_PAN is required to be implemented. If KVM 314 + * explodes at runtime because your system does not abide by this 315 + * requirement, call your favourite HW vendor, they have screwed up. 316 + * 317 + * We don't expect hVHE to access any userspace mapping, so always 318 + * set PSTATE.PAN on enty. Same thing if we have PAN enabled on an 319 + * EL2 kernel. Only force it to 0 if we have not configured PAN in 320 + * the kernel (and you know this is really silly). 321 + */ 322 + if (cpus_have_cap(ARM64_KVM_HVHE) || IS_ENABLED(CONFIG_ARM64_PAN)) 323 + *updptr = cpu_to_le32(ENCODE_PSTATE(1, PAN)); 324 + else 325 + *updptr = cpu_to_le32(ENCODE_PSTATE(0, PAN)); 326 + }