Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Don't translate FAR if invalid/unsafe

Don't re-walk the page tables if an SEA occurred during the faulting
page table walk to avoid taking a fatal exception in the hyp.
Additionally, check that FAR_EL2 is valid for SEAs not taken on PTW
as the architecture doesn't guarantee it contains the fault VA.

Finally, fix up the rest of the abort path by checking for SEAs early
and bugging the VM if we get further along with an UNKNOWN fault IPA.

Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20250402201725.2963645-4-oliver.upton@linux.dev
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>

+73 -18
+22
arch/arm64/include/asm/esr.h
··· 482 482 (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); 483 483 } 484 484 485 + static inline bool esr_fsc_is_sea_ttw(unsigned long esr) 486 + { 487 + esr = esr & ESR_ELx_FSC; 488 + 489 + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || 490 + (esr == ESR_ELx_FSC_SEA_TTW(2)) || 491 + (esr == ESR_ELx_FSC_SEA_TTW(1)) || 492 + (esr == ESR_ELx_FSC_SEA_TTW(0)) || 493 + (esr == ESR_ELx_FSC_SEA_TTW(-1)); 494 + } 495 + 496 + static inline bool esr_fsc_is_secc_ttw(unsigned long esr) 497 + { 498 + esr = esr & ESR_ELx_FSC; 499 + 500 + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || 501 + (esr == ESR_ELx_FSC_SECC_TTW(2)) || 502 + (esr == ESR_ELx_FSC_SECC_TTW(1)) || 503 + (esr == ESR_ELx_FSC_SECC_TTW(0)) || 504 + (esr == ESR_ELx_FSC_SECC_TTW(-1)); 505 + } 506 + 485 507 /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ 486 508 static inline bool esr_iss_is_eretax(unsigned long esr) 487 509 {
+3
arch/arm64/include/asm/kvm_emulate.h
··· 307 307 { 308 308 u64 hpfar = vcpu->arch.fault.hpfar_el2; 309 309 310 + if (unlikely(!(hpfar & HPFAR_EL2_NS))) 311 + return INVALID_GPA; 312 + 310 313 return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; 311 314 } 312 315
+1 -1
arch/arm64/include/asm/kvm_ras.h
··· 14 14 * Was this synchronous external abort a RAS notification? 15 15 * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. 16 16 */ 17 - static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) 17 + static inline int kvm_handle_guest_sea(void) 18 18 { 19 19 /* apei_claim_sea(NULL) expects to mask interrupts itself */ 20 20 lockdep_assert_irqs_enabled();
+21 -5
arch/arm64/kvm/hyp/include/hyp/fault.h
··· 12 12 #include <asm/kvm_hyp.h> 13 13 #include <asm/kvm_mmu.h> 14 14 15 + static inline bool __fault_safe_to_translate(u64 esr) 16 + { 17 + u64 fsc = esr & ESR_ELx_FSC; 18 + 19 + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) 20 + return false; 21 + 22 + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); 23 + } 24 + 15 25 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 16 26 { 17 27 int ret; ··· 81 71 82 72 static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) 83 73 { 84 - u64 hpfar, far; 74 + u64 hpfar; 85 75 86 - far = read_sysreg_el2(SYS_FAR); 76 + fault->far_el2 = read_sysreg_el2(SYS_FAR); 77 + fault->hpfar_el2 = 0; 87 78 88 79 if (__hpfar_valid(esr)) 89 80 hpfar = read_sysreg(hpfar_el2); 90 - else if (!__translate_far_to_hpfar(far, &hpfar)) 81 + else if (unlikely(!__fault_safe_to_translate(esr))) 82 + return true; 83 + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) 91 84 return false; 92 85 93 - fault->far_el2 = far; 94 - fault->hpfar_el2 = hpfar; 86 + /* 87 + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid 88 + * HPFAR value. 89 + */ 90 + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; 95 91 return true; 96 92 } 97 93
+7
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 578 578 return; 579 579 } 580 580 581 + 582 + /* 583 + * Yikes, we couldn't resolve the fault IPA. This should reinject an 584 + * abort into the host when we figure out how to do that. 585 + */ 586 + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); 581 587 addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; 588 + 582 589 ret = host_stage2_idmap(addr); 583 590 BUG_ON(ret && ret != -EAGAIN); 584 591 }
+19 -12
arch/arm64/kvm/mmu.c
··· 1794 1794 gfn_t gfn; 1795 1795 int ret, idx; 1796 1796 1797 + /* Synchronous External Abort? */ 1798 + if (kvm_vcpu_abt_issea(vcpu)) { 1799 + /* 1800 + * For RAS the host kernel may handle this abort. 1801 + * There is no need to pass the error into the guest. 1802 + */ 1803 + if (kvm_handle_guest_sea()) 1804 + kvm_inject_vabt(vcpu); 1805 + 1806 + return 1; 1807 + } 1808 + 1797 1809 esr = kvm_vcpu_get_esr(vcpu); 1798 1810 1811 + /* 1812 + * The fault IPA should be reliable at this point as we're not dealing 1813 + * with an SEA. 1814 + */ 1799 1815 ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); 1816 + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) 1817 + return -EFAULT; 1818 + 1800 1819 is_iabt = kvm_vcpu_trap_is_iabt(vcpu); 1801 1820 1802 1821 if (esr_fsc_is_translation_fault(esr)) { ··· 1835 1816 kvm_inject_dabt(vcpu, fault_ipa); 1836 1817 return 1; 1837 1818 } 1838 - } 1839 - 1840 - /* Synchronous External Abort? */ 1841 - if (kvm_vcpu_abt_issea(vcpu)) { 1842 - /* 1843 - * For RAS the host kernel may handle this abort. 1844 - * There is no need to pass the error into the guest. 1845 - */ 1846 - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) 1847 - kvm_inject_vabt(vcpu); 1848 - 1849 - return 1; 1850 1819 } 1851 1820 1852 1821 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),