Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull KVM/arm64 updates from Will Deacon:
"New features:

- Support for non-protected guest in protected mode, achieving near
feature parity with the non-protected mode

- Support for the EL2 timers as part of the ongoing NV support

- Allow control of hardware tracing for nVHE/hVHE

Improvements, fixes and cleanups:

- Massive cleanup of the debug infrastructure, making it a bit less
awkward and definitely easier to maintain. This should pave the way
for further optimisations

- Complete rewrite of pKVM's fixed-feature infrastructure, aligning
it with the rest of KVM and making the code easier to follow

- Large simplification of pKVM's memory protection infrastructure

- Better handling of RES0/RES1 fields for memory-backed system
registers

- Add a workaround for Qualcomm's Snapdragon X CPUs, which suffer
from a pretty nasty timer bug

- Small collection of cleanups and low-impact fixes"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (87 commits)
arm64/sysreg: Get rid of TRFCR_ELx SysregFields
KVM: arm64: nv: Fix doc header layout for timers
KVM: arm64: nv: Apply RESx settings to sysreg reset values
KVM: arm64: nv: Always evaluate HCR_EL2 using sanitising accessors
KVM: arm64: Fix selftests after sysreg field name update
coresight: Pass guest TRFCR value to KVM
KVM: arm64: Support trace filtering for guests
KVM: arm64: coresight: Give TRBE enabled state to KVM
coresight: trbe: Remove redundant disable call
arm64/sysreg/tools: Move TRFCR definitions to sysreg
tools: arm64: Update sysreg.h header files
KVM: arm64: Drop pkvm_mem_transition for host/hyp donations
KVM: arm64: Drop pkvm_mem_transition for host/hyp sharing
KVM: arm64: Drop pkvm_mem_transition for FF-A
KVM: arm64: Explicitly handle BRBE traps as UNDEFINED
KVM: arm64: vgic: Use str_enabled_disabled() in vgic_v3_probe()
arm64: kvm: Introduce nvhe stack size constants
KVM: arm64: Fix nVHE stacktrace VA bits mask
KVM: arm64: Fix FEAT_MTE in pKVM
Documentation: Update the behaviour of "kvm-arm.mode"
...

+3213 -2319
+10 -6
Documentation/admin-guide/kernel-parameters.txt
··· 2824 2824 nvhe: Standard nVHE-based mode, without support for 2825 2825 protected guests. 2826 2826 2827 - protected: nVHE-based mode with support for guests whose 2828 - state is kept private from the host. 2827 + protected: Mode with support for guests whose state is 2828 + kept private from the host, using VHE or 2829 + nVHE depending on HW support. 2829 2830 2830 2831 nested: VHE-based mode with support for nested 2831 - virtualization. Requires at least ARMv8.3 2832 - hardware. 2832 + virtualization. Requires at least ARMv8.4 2833 + hardware (with FEAT_NV2). 2833 2834 2834 2835 Defaults to VHE/nVHE based on hardware support. Setting 2835 2836 mode to "protected" will disable kexec and hibernation 2836 - for the host. "nested" is experimental and should be 2837 - used with extreme caution. 2837 + for the host. To force nVHE on VHE hardware, add 2838 + "arm64_sw.hvhe=0 id_aa64mmfr1.vh=0" to the 2839 + command-line. 2840 + "nested" is experimental and should be used with 2841 + extreme caution. 2838 2842 2839 2843 kvm-arm.vgic_v3_group0_trap= 2840 2844 [KVM,ARM,EARLY] Trap guest accesses to GICv3 group-0
+8 -6
Documentation/virt/kvm/devices/vcpu.rst
··· 142 142 143 143 :Architectures: ARM64 144 144 145 - 2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER 146 - ----------------------------------------------------------------------------- 145 + 2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_{VTIMER,PTIMER,HVTIMER,HPTIMER} 146 + ----------------------------------------------------------------------- 147 147 148 148 :Parameters: in kvm_device_attr.addr the address for the timer interrupt is a 149 149 pointer to an int ··· 159 159 in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the 160 160 attribute overrides the default values (see below). 161 161 162 - ============================= ========================================== 163 - KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27) 164 - KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30) 165 - ============================= ========================================== 162 + ============================== ========================================== 163 + KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27) 164 + KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30) 165 + KVM_ARM_VCPU_TIMER_IRQ_HVTIMER The EL2 virtual timer intid (default: 28) 166 + KVM_ARM_VCPU_TIMER_IRQ_HPTIMER The EL2 physical timer intid (default: 26) 167 + ============================== ========================================== 166 168 167 169 Setting the same PPI for different timers will prevent the VCPUs from running. 168 170 Setting the interrupt number on a VCPU configures all VCPUs created at that
+2
arch/arm64/include/asm/cputype.h
··· 122 122 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 123 123 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 124 124 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 125 + #define QCOM_CPU_PART_ORYON_X1 0x001 125 126 126 127 #define NVIDIA_CPU_PART_DENVER 0x003 127 128 #define NVIDIA_CPU_PART_CARMEL 0x004 ··· 199 198 #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) 200 199 #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) 201 200 #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) 201 + #define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) 202 202 #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) 203 203 #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) 204 204 #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
+1 -1
arch/arm64/include/asm/kvm_arm.h
··· 300 300 #define CPTR_EL2_TSM (1 << 12) 301 301 #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) 302 302 #define CPTR_EL2_TZ (1 << 8) 303 - #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ 303 + #define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0)) 304 304 #define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \ 305 305 GENMASK(29, 21) | \ 306 306 GENMASK(19, 14) | \
+10 -4
arch/arm64/include/asm/kvm_asm.h
··· 53 53 enum __kvm_host_smccc_func { 54 54 /* Hypercalls available only prior to pKVM finalisation */ 55 55 /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ 56 - __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, 57 - __KVM_HOST_SMCCC_FUNC___pkvm_init, 56 + __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, 58 57 __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, 59 58 __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, 60 59 __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, ··· 64 65 /* Hypercalls available after pKVM finalisation */ 65 66 __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, 66 67 __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, 68 + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest, 69 + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest, 70 + __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest, 71 + __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest, 72 + __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest, 73 + __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest, 67 74 __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, 68 75 __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, 69 76 __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, ··· 84 79 __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, 85 80 __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, 86 81 __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, 82 + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, 83 + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, 84 + __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, 87 85 }; 88 86 89 87 #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] ··· 254 246 255 247 extern u64 __vgic_v3_get_gic_config(void); 256 248 extern void __vgic_v3_init_lrs(void); 257 - 258 - extern u64 __kvm_get_mdcr_el2(void); 259 249 260 250 #define __KVM_EXTABLE(from, to) \ 261 251 " .pushsection __kvm_ex_table, \"a\"\n" \
+28 -39
arch/arm64/include/asm/kvm_emulate.h
··· 184 184 return vcpu_is_el2_ctxt(&vcpu->arch.ctxt); 185 185 } 186 186 187 - static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt) 188 - { 189 - return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || 190 - (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H)); 191 - } 192 - 193 187 static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu) 194 188 { 195 - return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt); 196 - } 197 - 198 - static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt) 199 - { 200 - return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE; 189 + return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) || 190 + (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H)); 201 191 } 202 192 203 193 static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu) 204 194 { 205 - return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt); 195 + return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE; 206 196 } 207 197 208 - static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt) 198 + static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) 209 199 { 200 + bool e2h, tge; 201 + u64 hcr; 202 + 203 + if (!vcpu_has_nv(vcpu)) 204 + return false; 205 + 206 + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 207 + 208 + e2h = (hcr & HCR_E2H); 209 + tge = (hcr & HCR_TGE); 210 + 210 211 /* 211 212 * We are in a hypervisor context if the vcpu mode is EL2 or 212 213 * E2H and TGE bits are set. The latter means we are in the user space ··· 216 215 * Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the 217 216 * rest of the KVM code, and will result in a misbehaving guest. 218 217 */ 219 - return vcpu_is_el2_ctxt(ctxt) || 220 - (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) || 221 - __vcpu_el2_tge_is_set(ctxt); 222 - } 223 - 224 - static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu) 225 - { 226 - return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt); 218 + return vcpu_is_el2(vcpu) || (e2h && tge) || tge; 227 219 } 228 220 229 221 static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu) ··· 613 619 write_sysreg(val, cptr_el2); 614 620 } 615 621 616 - static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) 622 + /* Resets the value of cptr_el2 when returning to the host. */ 623 + static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm) 617 624 { 618 625 u64 val; 619 626 ··· 625 630 } else if (has_hvhe()) { 626 631 val = CPACR_EL1_FPEN; 627 632 628 - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) 633 + if (!kvm_has_sve(kvm) || !guest_owns_fp_regs()) 629 634 val |= CPACR_EL1_ZEN; 630 635 if (cpus_have_final_cap(ARM64_SME)) 631 636 val |= CPACR_EL1_SMEN; 632 637 } else { 633 638 val = CPTR_NVHE_EL2_RES1; 634 639 635 - if (vcpu_has_sve(vcpu) && guest_owns_fp_regs()) 640 + if (kvm_has_sve(kvm) && guest_owns_fp_regs()) 636 641 val |= CPTR_EL2_TZ; 637 - if (cpus_have_final_cap(ARM64_SME)) 638 - val &= ~CPTR_EL2_TSM; 642 + if (!cpus_have_final_cap(ARM64_SME)) 643 + val |= CPTR_EL2_TSM; 639 644 } 640 - 641 - return val; 642 - } 643 - 644 - static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) 645 - { 646 - u64 val = kvm_get_reset_cptr_el2(vcpu); 647 645 648 646 kvm_write_cptr_el2(val); 649 647 } 648 + 649 + #ifdef __KVM_NVHE_HYPERVISOR__ 650 + #define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm)) 651 + #else 652 + #define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm) 653 + #endif 650 654 651 655 /* 652 656 * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE ··· 690 696 static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) 691 697 { 692 698 return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); 693 - } 694 - 695 - static inline void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) 696 - { 697 - vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH); 698 699 } 699 700 #endif /* __ARM64_KVM_EMULATE_H__ */
+80 -54
arch/arm64/include/asm/kvm_host.h
··· 85 85 struct kvm_hyp_memcache { 86 86 phys_addr_t head; 87 87 unsigned long nr_pages; 88 + struct pkvm_mapping *mapping; /* only used from EL1 */ 88 89 }; 89 90 90 91 static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, ··· 332 331 #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7 333 332 /* Fine-Grained UNDEF initialised */ 334 333 #define KVM_ARCH_FLAG_FGU_INITIALIZED 8 334 + /* SVE exposed to guest */ 335 + #define KVM_ARCH_FLAG_GUEST_HAS_SVE 9 335 336 unsigned long flags; 336 337 337 338 /* VM-wide vCPU feature set */ ··· 493 490 VBAR_EL2, /* Vector Base Address Register (EL2) */ 494 491 RVBAR_EL2, /* Reset Vector Base Address Register */ 495 492 CONTEXTIDR_EL2, /* Context ID Register (EL2) */ 496 - CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ 497 493 SP_EL2, /* EL2 Stack Pointer */ 498 494 CNTHP_CTL_EL2, 499 495 CNTHP_CVAL_EL2, ··· 503 501 MARKER(__SANITISED_REG_START__), 504 502 TCR2_EL2, /* Extended Translation Control Register (EL2) */ 505 503 MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ 504 + CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ 506 505 507 506 /* Any VNCR-capable reg goes after this point */ 508 507 MARKER(__VNCR_START__), ··· 613 610 * field. 614 611 */ 615 612 struct kvm_host_data { 613 + #define KVM_HOST_DATA_FLAG_HAS_SPE 0 614 + #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 615 + #define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2 616 + #define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3 617 + #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 618 + #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 619 + unsigned long flags; 620 + 616 621 struct kvm_cpu_context host_ctxt; 617 622 618 623 /* ··· 653 642 * host_debug_state contains the host registers which are 654 643 * saved and restored during world switches. 655 644 */ 656 - struct { 645 + struct { 657 646 /* {Break,watch}point registers */ 658 647 struct kvm_guest_debug_arch regs; 659 648 /* Statistical profiling extension */ ··· 663 652 /* Values of trap registers for the host before guest entry. */ 664 653 u64 mdcr_el2; 665 654 } host_debug_state; 655 + 656 + /* Guest trace filter value */ 657 + u64 trfcr_while_in_guest; 658 + 659 + /* Number of programmable event counters (PMCR_EL0.N) for this CPU */ 660 + unsigned int nr_event_counters; 661 + 662 + /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */ 663 + unsigned int debug_brps; 664 + unsigned int debug_wrps; 666 665 }; 667 666 668 667 struct kvm_host_psci_config { ··· 729 708 u64 hcr_el2; 730 709 u64 hcrx_el2; 731 710 u64 mdcr_el2; 732 - u64 cptr_el2; 733 711 734 712 /* Exception Information */ 735 713 struct kvm_vcpu_fault_info fault; ··· 759 739 * 760 740 * external_debug_state contains the debug values we want to debug the 761 741 * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. 762 - * 763 - * debug_ptr points to the set of debug registers that should be loaded 764 - * onto the hardware when running the guest. 765 742 */ 766 - struct kvm_guest_debug_arch *debug_ptr; 767 743 struct kvm_guest_debug_arch vcpu_debug_state; 768 744 struct kvm_guest_debug_arch external_debug_state; 745 + u64 external_mdscr_el1; 746 + 747 + enum { 748 + VCPU_DEBUG_FREE, 749 + VCPU_DEBUG_HOST_OWNED, 750 + VCPU_DEBUG_GUEST_OWNED, 751 + } debug_owner; 769 752 770 753 /* VGIC state */ 771 754 struct vgic_cpu vgic_cpu; 772 755 struct arch_timer_cpu timer_cpu; 773 756 struct kvm_pmu pmu; 774 - 775 - /* 776 - * Guest registers we preserve during guest debugging. 777 - * 778 - * These shadow registers are updated by the kvm_handle_sys_reg 779 - * trap handler if the guest accesses or updates them while we 780 - * are using guest debug. 781 - */ 782 - struct { 783 - u32 mdscr_el1; 784 - bool pstate_ss; 785 - } guest_debug_preserved; 786 757 787 758 /* vcpu power state */ 788 759 struct kvm_mp_state mp_state; ··· 781 770 782 771 /* Cache some mmu pages needed inside spinlock regions */ 783 772 struct kvm_mmu_memory_cache mmu_page_cache; 773 + 774 + /* Pages to top-up the pKVM/EL2 guest pool */ 775 + struct kvm_hyp_memcache pkvm_memcache; 784 776 785 777 /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ 786 778 u64 vsesr_el2; ··· 877 863 #define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__) 878 864 #define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__) 879 865 880 - /* SVE exposed to guest */ 881 - #define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0)) 866 + /* KVM_ARM_VCPU_INIT completed */ 867 + #define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0)) 882 868 /* SVE config completed */ 883 869 #define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1)) 884 - /* PTRAUTH exposed to guest */ 885 - #define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2)) 886 - /* KVM_ARM_VCPU_INIT completed */ 887 - #define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3)) 888 870 889 871 /* Exception pending */ 890 872 #define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0)) ··· 916 906 #define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5) 917 907 #define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6) 918 908 #define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7) 919 - /* Guest debug is live */ 920 - #define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4)) 921 - /* Save SPE context if active */ 922 - #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) 923 - /* Save TRBE context if active */ 924 - #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) 925 909 926 - /* SVE enabled for host EL0 */ 927 - #define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) 928 - /* SME enabled for EL0 */ 929 - #define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) 930 910 /* Physical CPU not in supported_cpus */ 931 - #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) 911 + #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0)) 932 912 /* WFIT instruction trapped */ 933 - #define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) 913 + #define IN_WFIT __vcpu_single_flag(sflags, BIT(1)) 934 914 /* vcpu system registers loaded on physical CPU */ 935 - #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) 936 - /* Software step state is Active-pending */ 937 - #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) 915 + #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2)) 916 + /* Software step state is Active-pending for external debug */ 917 + #define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3)) 918 + /* Software step state is Active pending for guest debug */ 919 + #define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4)) 938 920 /* PMUSERENR for the guest EL0 is on physical CPU */ 939 - #define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) 921 + #define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5)) 940 922 /* WFI instruction trapped */ 941 - #define IN_WFI __vcpu_single_flag(sflags, BIT(7)) 923 + #define IN_WFI __vcpu_single_flag(sflags, BIT(6)) 942 924 943 925 944 926 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ ··· 961 959 KVM_GUESTDBG_USE_HW | \ 962 960 KVM_GUESTDBG_SINGLESTEP) 963 961 964 - #define vcpu_has_sve(vcpu) (system_supports_sve() && \ 965 - vcpu_get_flag(vcpu, GUEST_HAS_SVE)) 962 + #define kvm_has_sve(kvm) (system_supports_sve() && \ 963 + test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags)) 964 + 965 + #ifdef __KVM_NVHE_HYPERVISOR__ 966 + #define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm)) 967 + #else 968 + #define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm) 969 + #endif 966 970 967 971 #ifdef CONFIG_ARM64_PTR_AUTH 968 972 #define vcpu_has_ptrauth(vcpu) \ 969 973 ((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \ 970 974 cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \ 971 - vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH)) 975 + (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \ 976 + vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC))) 972 977 #else 973 978 #define vcpu_has_ptrauth(vcpu) false 974 979 #endif ··· 1316 1307 &this_cpu_ptr_hyp_sym(kvm_host_data)->f) 1317 1308 #endif 1318 1309 1310 + #define host_data_test_flag(flag) \ 1311 + (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))) 1312 + #define host_data_set_flag(flag) \ 1313 + set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) 1314 + #define host_data_clear_flag(flag) \ 1315 + clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)) 1316 + 1319 1317 /* Check whether the FP regs are owned by the guest */ 1320 1318 static inline bool guest_owns_fp_regs(void) 1321 1319 { ··· 1348 1332 1349 1333 static inline void kvm_arch_sync_events(struct kvm *kvm) {} 1350 1334 1351 - void kvm_arm_init_debug(void); 1352 - void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); 1353 - void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); 1354 - void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); 1355 - void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); 1335 + void kvm_init_host_debug_data(void); 1336 + void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); 1337 + void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); 1338 + void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); 1339 + void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val); 1356 1340 1357 1341 #define kvm_vcpu_os_lock_enabled(vcpu) \ 1358 1342 (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) 1343 + 1344 + #define kvm_debug_regs_in_use(vcpu) \ 1345 + ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE) 1346 + #define kvm_host_owns_debug_regs(vcpu) \ 1347 + ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED) 1348 + #define kvm_guest_owns_debug_regs(vcpu) \ 1349 + ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED) 1359 1350 1360 1351 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, 1361 1352 struct kvm_device_attr *attr); ··· 1390 1367 return (!has_vhe() && attr->exclude_host); 1391 1368 } 1392 1369 1393 - /* Flags for host debug state */ 1394 - void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu); 1395 - void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); 1396 - 1397 1370 #ifdef CONFIG_KVM 1398 1371 void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr); 1399 1372 void kvm_clr_pmu_events(u64 clr); 1400 1373 bool kvm_set_pmuserenr(u64 val); 1374 + void kvm_enable_trbe(void); 1375 + void kvm_disable_trbe(void); 1376 + void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest); 1401 1377 #else 1402 1378 static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {} 1403 1379 static inline void kvm_clr_pmu_events(u64 clr) {} ··· 1404 1382 { 1405 1383 return false; 1406 1384 } 1385 + static inline void kvm_enable_trbe(void) {} 1386 + static inline void kvm_disable_trbe(void) {} 1387 + static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {} 1407 1388 #endif 1408 1389 1409 1390 void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); ··· 1447 1422 return test_bit(feature, ka->vcpu_features); 1448 1423 } 1449 1424 1425 + #define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f)) 1450 1426 #define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f)) 1451 1427 1452 1428 #define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
+18
arch/arm64/include/asm/kvm_mmu.h
··· 139 139 140 140 #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) 141 141 142 + extern u32 __hyp_va_bits; 143 + 142 144 /* 143 145 * We currently support using a VM-specified IPA size. For backward 144 146 * compatibility, the default IPA size is fixed to 40bits. ··· 353 351 * *any* of its fields. 354 352 */ 355 353 return &kvm->arch.mmu != mmu; 354 + } 355 + 356 + static inline void kvm_fault_lock(struct kvm *kvm) 357 + { 358 + if (is_protected_kvm_enabled()) 359 + write_lock(&kvm->mmu_lock); 360 + else 361 + read_lock(&kvm->mmu_lock); 362 + } 363 + 364 + static inline void kvm_fault_unlock(struct kvm *kvm) 365 + { 366 + if (is_protected_kvm_enabled()) 367 + write_unlock(&kvm->mmu_lock); 368 + else 369 + read_unlock(&kvm->mmu_lock); 356 370 } 357 371 358 372 #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+2 -1
arch/arm64/include/asm/kvm_nested.h
··· 64 64 } 65 65 66 66 extern bool forward_smc_trap(struct kvm_vcpu *vcpu); 67 + extern bool forward_debug_exception(struct kvm_vcpu *vcpu); 67 68 extern void kvm_init_nested(struct kvm *kvm); 68 69 extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); 69 70 extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); ··· 187 186 return true; 188 187 } 189 188 190 - int kvm_init_nv_sysregs(struct kvm *kvm); 189 + int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu); 191 190 192 191 #ifdef CONFIG_ARM64_PTR_AUTH 193 192 bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+24 -12
arch/arm64/include/asm/kvm_pgtable.h
··· 412 412 * be used instead of block mappings. 413 413 */ 414 414 struct kvm_pgtable { 415 - u32 ia_bits; 416 - s8 start_level; 417 - kvm_pteref_t pgd; 418 - struct kvm_pgtable_mm_ops *mm_ops; 415 + union { 416 + struct rb_root pkvm_mappings; 417 + struct { 418 + u32 ia_bits; 419 + s8 start_level; 420 + kvm_pteref_t pgd; 421 + struct kvm_pgtable_mm_ops *mm_ops; 419 422 420 - /* Stage-2 only */ 421 - struct kvm_s2_mmu *mmu; 422 - enum kvm_pgtable_stage2_flags flags; 423 - kvm_pgtable_force_pte_cb_t force_pte_cb; 423 + /* Stage-2 only */ 424 + enum kvm_pgtable_stage2_flags flags; 425 + kvm_pgtable_force_pte_cb_t force_pte_cb; 426 + }; 427 + }; 428 + struct kvm_s2_mmu *mmu; 424 429 }; 425 430 426 431 /** ··· 531 526 enum kvm_pgtable_stage2_flags flags, 532 527 kvm_pgtable_force_pte_cb_t force_pte_cb); 533 528 534 - #define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \ 535 - __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL) 529 + static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 530 + struct kvm_pgtable_mm_ops *mm_ops) 531 + { 532 + return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL); 533 + } 536 534 537 535 /** 538 536 * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. ··· 677 669 * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. 678 670 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 679 671 * @addr: Intermediate physical address to identify the page-table entry. 672 + * @flags: Flags to control the page-table walk (ex. a shared walk) 680 673 * 681 674 * The offset of @addr within a page is ignored. 682 675 * 683 676 * If there is a valid, leaf page-table entry used to translate @addr, then 684 677 * set the access flag in that entry. 685 678 */ 686 - void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); 679 + void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 680 + enum kvm_pgtable_walk_flags flags); 687 681 688 682 /** 689 683 * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access ··· 715 705 * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 716 706 * @addr: Intermediate physical address to identify the page-table entry. 717 707 * @prot: Additional permissions to grant for the mapping. 708 + * @flags: Flags to control the page-table walk (ex. a shared walk) 718 709 * 719 710 * The offset of @addr within a page is ignored. 720 711 * ··· 728 717 * Return: 0 on success, negative error code on failure. 729 718 */ 730 719 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 731 - enum kvm_pgtable_prot prot); 720 + enum kvm_pgtable_prot prot, 721 + enum kvm_pgtable_walk_flags flags); 732 722 733 723 /** 734 724 * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
+51
arch/arm64/include/asm/kvm_pkvm.h
··· 20 20 int pkvm_create_hyp_vm(struct kvm *kvm); 21 21 void pkvm_destroy_hyp_vm(struct kvm *kvm); 22 22 23 + /* 24 + * This functions as an allow-list of protected VM capabilities. 25 + * Features not explicitly allowed by this function are denied. 26 + */ 27 + static inline bool kvm_pvm_ext_allowed(long ext) 28 + { 29 + switch (ext) { 30 + case KVM_CAP_IRQCHIP: 31 + case KVM_CAP_ARM_PSCI: 32 + case KVM_CAP_ARM_PSCI_0_2: 33 + case KVM_CAP_NR_VCPUS: 34 + case KVM_CAP_MAX_VCPUS: 35 + case KVM_CAP_MAX_VCPU_ID: 36 + case KVM_CAP_MSI_DEVID: 37 + case KVM_CAP_ARM_VM_IPA_SIZE: 38 + case KVM_CAP_ARM_PMU_V3: 39 + case KVM_CAP_ARM_SVE: 40 + case KVM_CAP_ARM_PTRAUTH_ADDRESS: 41 + case KVM_CAP_ARM_PTRAUTH_GENERIC: 42 + return true; 43 + default: 44 + return false; 45 + } 46 + } 47 + 23 48 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; 24 49 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); 25 50 ··· 162 137 SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl))); 163 138 } 164 139 140 + struct pkvm_mapping { 141 + struct rb_node node; 142 + u64 gfn; 143 + u64 pfn; 144 + }; 145 + 146 + int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 147 + struct kvm_pgtable_mm_ops *mm_ops); 148 + void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 149 + int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 150 + enum kvm_pgtable_prot prot, void *mc, 151 + enum kvm_pgtable_walk_flags flags); 152 + int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); 153 + int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); 154 + int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); 155 + bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold); 156 + int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 157 + enum kvm_pgtable_walk_flags flags); 158 + void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 159 + enum kvm_pgtable_walk_flags flags); 160 + int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 161 + struct kvm_mmu_memory_cache *mc); 162 + void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level); 163 + kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 164 + enum kvm_pgtable_prot prot, void *mc, 165 + bool force_pte); 165 166 #endif /* __ARM64_KVM_PKVM_H__ */
+4 -1
arch/arm64/include/asm/memory.h
··· 145 145 146 146 #define OVERFLOW_STACK_SIZE SZ_4K 147 147 148 + #define NVHE_STACK_SHIFT PAGE_SHIFT 149 + #define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT) 150 + 148 151 /* 149 152 * With the minimum frame size of [x29, x30], exactly half the combined 150 153 * sizes of the hyp and overflow stacks is the maximum size needed to 151 154 * save the unwinded stacktrace; plus an additional entry to delimit the 152 155 * end. 153 156 */ 154 - #define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long)) 157 + #define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long)) 155 158 156 159 /* 157 160 * Alignment of kernel segments (e.g. .text, .data).
+1 -1
arch/arm64/include/asm/stacktrace/nvhe.h
··· 47 47 48 48 DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); 49 49 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info); 50 - DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); 50 + DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); 51 51 52 52 void kvm_nvhe_dump_backtrace(unsigned long hyp_offset); 53 53
+4 -12
arch/arm64/include/asm/sysreg.h
··· 283 283 #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) 284 284 #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) 285 285 286 - #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) 287 - 288 286 #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) 289 287 290 288 #define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) ··· 475 477 #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) 476 478 477 479 #define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) 480 + #define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2) 478 481 #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) 479 482 #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) 480 483 ··· 483 484 #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) 484 485 #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) 485 486 487 + #define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0) 486 488 #define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) 487 489 #define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) 488 490 489 491 #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) 490 492 #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) 491 493 #define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) 494 + #define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0) 492 495 #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) 493 496 #define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) 497 + #define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0) 494 498 495 499 #define __PMEV_op2(n) ((n) & 0x7) 496 500 #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) ··· 521 519 #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) 522 520 #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) 523 521 524 - #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) 525 522 #define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) 526 523 #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) 527 524 #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) ··· 983 982 984 983 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ 985 984 #define SYS_MPIDR_SAFE_VAL (BIT(31)) 986 - 987 - #define TRFCR_ELx_TS_SHIFT 5 988 - #define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT) 989 - #define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) 990 - #define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) 991 - #define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) 992 - #define TRFCR_EL2_CX BIT(3) 993 - #define TRFCR_ELx_ExTRE BIT(1) 994 - #define TRFCR_ELx_E0TRE BIT(0) 995 985 996 986 /* GIC Hypervisor interface registers */ 997 987 /* ICH_MISR_EL2 bit definitions */
+8
arch/arm64/kernel/cpu_errata.c
··· 787 787 }, 788 788 #endif 789 789 { 790 + .desc = "Broken CNTVOFF_EL2", 791 + .capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF, 792 + ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) { 793 + MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1), 794 + {} 795 + })), 796 + }, 797 + { 790 798 } 791 799 };
+3
arch/arm64/kernel/image-vars.h
··· 105 105 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); 106 106 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); 107 107 108 + /* Static key which is set if CNTVOFF_EL2 is unusable */ 109 + KVM_NVHE_ALIAS(broken_cntvoff_key); 110 + 108 111 /* EL2 exception handling */ 109 112 KVM_NVHE_ALIAS(__start___kvm_ex_table); 110 113 KVM_NVHE_ALIAS(__stop___kvm_ex_table);
+159 -20
arch/arm64/kvm/arch_timer.c
··· 30 30 static u32 host_ptimer_irq_flags; 31 31 32 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 + DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 33 34 34 35 static const u8 default_ppi[] = { 35 36 [TIMER_PTIMER] = 30, ··· 100 99 WARN_ON(1); 101 100 return 0; 102 101 } 103 - } 104 - 105 - static u64 timer_get_offset(struct arch_timer_context *ctxt) 106 - { 107 - u64 offset = 0; 108 - 109 - if (!ctxt) 110 - return 0; 111 - 112 - if (ctxt->offset.vm_offset) 113 - offset += *ctxt->offset.vm_offset; 114 - if (ctxt->offset.vcpu_offset) 115 - offset += *ctxt->offset.vcpu_offset; 116 - 117 - return offset; 118 102 } 119 103 120 104 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) ··· 427 441 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 428 442 } 429 443 444 + static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 445 + { 446 + /* 447 + * Paper over NV2 brokenness by publishing the interrupt status 448 + * bit. This still results in a poor quality of emulation (guest 449 + * writes will have no effect until the next exit). 450 + * 451 + * But hey, it's fast, right? 452 + */ 453 + if (is_hyp_ctxt(ctx->vcpu) && 454 + (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { 455 + unsigned long val = timer_get_ctl(ctx); 456 + __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 457 + timer_set_ctl(ctx, val); 458 + } 459 + } 460 + 430 461 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 431 462 struct arch_timer_context *timer_ctx) 432 463 { 433 464 int ret; 465 + 466 + kvm_timer_update_status(timer_ctx, new_level); 434 467 435 468 timer_ctx->irq.level = new_level; 436 469 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), ··· 475 470 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 476 471 return; 477 472 } 473 + 474 + kvm_timer_update_status(ctx, should_fire); 478 475 479 476 /* 480 477 * If the timer can fire now, we don't need to have a soft timer ··· 520 513 case TIMER_VTIMER: 521 514 case TIMER_HVTIMER: 522 515 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 523 - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); 516 + cval = read_sysreg_el0(SYS_CNTV_CVAL); 517 + 518 + if (has_broken_cntvoff()) 519 + cval -= timer_get_offset(ctx); 520 + 521 + timer_set_cval(ctx, cval); 524 522 525 523 /* Disable the timer */ 526 524 write_sysreg_el0(0, SYS_CNTV_CTL); ··· 630 618 631 619 case TIMER_VTIMER: 632 620 case TIMER_HVTIMER: 633 - set_cntvoff(timer_get_offset(ctx)); 634 - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); 621 + cval = timer_get_cval(ctx); 622 + offset = timer_get_offset(ctx); 623 + if (has_broken_cntvoff()) { 624 + set_cntvoff(0); 625 + cval += offset; 626 + } else { 627 + set_cntvoff(offset); 628 + } 629 + write_sysreg_el0(cval, SYS_CNTV_CVAL); 635 630 isb(); 636 631 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 637 632 break; ··· 781 762 782 763 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 783 764 { 784 - bool tpt, tpc; 765 + bool tvt, tpt, tvc, tpc, tvt02, tpt02; 785 766 u64 clr, set; 786 767 787 768 /* ··· 796 777 * within this function, reality kicks in and we start adding 797 778 * traps based on emulation requirements. 798 779 */ 799 - tpt = tpc = false; 780 + tvt = tpt = tvc = tpc = false; 781 + tvt02 = tpt02 = false; 782 + 783 + /* 784 + * NV2 badly breaks the timer semantics by redirecting accesses to 785 + * the EL1 timer state to memory, so let's call ECV to the rescue if 786 + * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 787 + * 788 + * The treatment slightly varies depending whether we run a nVHE or 789 + * VHE guest: nVHE will use the _EL0 registers directly, while VHE 790 + * will use the _EL02 accessors. This translates in different trap 791 + * bits. 792 + * 793 + * None of the trapping is required when running in non-HYP context, 794 + * unless required by the L1 hypervisor settings once we advertise 795 + * ECV+NV in the guest, or that we need trapping for other reasons. 796 + */ 797 + if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 798 + if (vcpu_el2_e2h_is_set(vcpu)) 799 + tvt02 = tpt02 = true; 800 + else 801 + tvt = tpt = true; 802 + } 800 803 801 804 /* 802 805 * We have two possibility to deal with a physical offset: ··· 834 793 tpt = tpc = true; 835 794 836 795 /* 796 + * For the poor sods that could not correctly substract one value 797 + * from another, trap the full virtual timer and counter. 798 + */ 799 + if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 800 + tvt = tvc = true; 801 + 802 + /* 837 803 * Apply the enable bits that the guest hypervisor has requested for 838 804 * its own guest. We can only add traps that wouldn't have been set 839 805 * above. 806 + * Implementation choices: we do not support NV when E2H=0 in the 807 + * guest, and we don't support configuration where E2H is writable 808 + * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 809 + * not both). This simplifies the handling of the EL1NV* bits. 840 810 */ 841 811 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 842 812 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); ··· 858 806 859 807 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 860 808 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 809 + 810 + tpt02 |= (val & CNTHCTL_EL1NVPCT); 811 + tvt02 |= (val & CNTHCTL_EL1NVVCT); 861 812 } 862 813 863 814 /* ··· 872 817 873 818 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 874 819 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 820 + assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 821 + assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 822 + assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 823 + assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 875 824 876 825 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 877 826 sysreg_clear_set(cnthctl_el2, clr, set); ··· 962 903 963 904 if (kvm_vcpu_is_blocking(vcpu)) 964 905 kvm_timer_blocking(vcpu); 906 + } 907 + 908 + void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 909 + { 910 + /* 911 + * When NV2 is on, guest hypervisors have their EL1 timer register 912 + * accesses redirected to the VNCR page. Any guest action taken on 913 + * the timer is postponed until the next exit, leading to a very 914 + * poor quality of emulation. 915 + * 916 + * This is an unmitigated disaster, only papered over by FEAT_ECV, 917 + * which allows trapping of the timer registers even with NV2. 918 + * Still, this is still worse than FEAT_NV on its own. Meh. 919 + */ 920 + if (!vcpu_el2_e2h_is_set(vcpu)) { 921 + if (cpus_have_final_cap(ARM64_HAS_ECV)) 922 + return; 923 + 924 + /* 925 + * A non-VHE guest hypervisor doesn't have any direct access 926 + * to its timers: the EL2 registers trap (and the HW is 927 + * fully emulated), while the EL0 registers access memory 928 + * despite the access being notionally direct. Boo. 929 + * 930 + * We update the hardware timer registers with the 931 + * latest value written by the guest to the VNCR page 932 + * and let the hardware take care of the rest. 933 + */ 934 + write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); 935 + write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); 936 + write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL); 937 + write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL); 938 + } else { 939 + /* 940 + * For a VHE guest hypervisor, the EL2 state is directly 941 + * stored in the host EL1 timers, while the emulated EL0 942 + * state is stored in the VNCR page. The latter could have 943 + * been updated behind our back, and we must reset the 944 + * emulation of the timers. 945 + */ 946 + struct timer_map map; 947 + get_timer_map(vcpu, &map); 948 + 949 + soft_timer_cancel(&map.emul_vtimer->hrtimer); 950 + soft_timer_cancel(&map.emul_ptimer->hrtimer); 951 + timer_emulate(map.emul_vtimer); 952 + timer_emulate(map.emul_ptimer); 953 + } 965 954 } 966 955 967 956 /* ··· 1470 1363 return 0; 1471 1364 } 1472 1365 1366 + static void kvm_timer_handle_errata(void) 1367 + { 1368 + u64 mmfr0, mmfr1, mmfr4; 1369 + 1370 + /* 1371 + * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1372 + * all virtual timer/counter accesses, requiring FEAT_ECV. 1373 + * 1374 + * However, a hypervisor supporting nesting is likely to mitigate the 1375 + * erratum at L0, and not require other levels to mitigate it (which 1376 + * would otherwise be a terrible performance sink due to trap 1377 + * amplification). 1378 + * 1379 + * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1380 + * and that NV is likely not to (because of limitations of the 1381 + * architecture), only enable the workaround when FEAT_VHE and 1382 + * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1383 + */ 1384 + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1385 + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1386 + mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1387 + if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1388 + !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1389 + SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1390 + (has_vhe() || has_hvhe()) && 1391 + cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1392 + static_branch_enable(&broken_cntvoff_key); 1393 + kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1394 + } 1395 + } 1396 + 1473 1397 int __init kvm_timer_hyp_init(bool has_gic) 1474 1398 { 1475 1399 struct arch_timer_kvm_info *info; ··· 1569 1431 goto out_free_vtimer_irq; 1570 1432 } 1571 1433 1434 + kvm_timer_handle_errata(); 1572 1435 return 0; 1573 1436 1574 1437 out_free_ptimer_irq:
+38 -48
arch/arm64/kvm/arm.c
··· 61 61 62 62 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); 63 63 64 - DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); 64 + DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); 65 65 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); 66 66 67 67 DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); ··· 80 80 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 81 81 } 82 82 83 - /* 84 - * This functions as an allow-list of protected VM capabilities. 85 - * Features not explicitly allowed by this function are denied. 86 - */ 87 - static bool pkvm_ext_allowed(struct kvm *kvm, long ext) 88 - { 89 - switch (ext) { 90 - case KVM_CAP_IRQCHIP: 91 - case KVM_CAP_ARM_PSCI: 92 - case KVM_CAP_ARM_PSCI_0_2: 93 - case KVM_CAP_NR_VCPUS: 94 - case KVM_CAP_MAX_VCPUS: 95 - case KVM_CAP_MAX_VCPU_ID: 96 - case KVM_CAP_MSI_DEVID: 97 - case KVM_CAP_ARM_VM_IPA_SIZE: 98 - case KVM_CAP_ARM_PMU_V3: 99 - case KVM_CAP_ARM_SVE: 100 - case KVM_CAP_ARM_PTRAUTH_ADDRESS: 101 - case KVM_CAP_ARM_PTRAUTH_GENERIC: 102 - return true; 103 - default: 104 - return false; 105 - } 106 - } 107 - 108 83 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, 109 84 struct kvm_enable_cap *cap) 110 85 { ··· 88 113 if (cap->flags) 89 114 return -EINVAL; 90 115 91 - if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap)) 116 + if (kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(cap->cap)) 92 117 return -EINVAL; 93 118 94 119 switch (cap->cap) { ··· 286 311 { 287 312 int r; 288 313 289 - if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext)) 314 + if (kvm && kvm_vm_is_protected(kvm) && !kvm_pvm_ext_allowed(ext)) 290 315 return 0; 291 316 292 317 switch (ext) { ··· 451 476 452 477 kvm_pmu_vcpu_init(vcpu); 453 478 454 - kvm_arm_reset_debug_ptr(vcpu); 455 - 456 479 kvm_arm_pvtime_vcpu_init(&vcpu->arch); 457 480 458 481 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; ··· 475 502 476 503 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 477 504 { 478 - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 505 + if (!is_protected_kvm_enabled()) 506 + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 507 + else 508 + free_hyp_memcache(&vcpu->arch.pkvm_memcache); 479 509 kvm_timer_vcpu_terminate(vcpu); 480 510 kvm_pmu_vcpu_destroy(vcpu); 481 511 kvm_vgic_vcpu_destroy(vcpu); ··· 550 574 struct kvm_s2_mmu *mmu; 551 575 int *last_ran; 552 576 577 + if (is_protected_kvm_enabled()) 578 + goto nommu; 579 + 553 580 if (vcpu_has_nv(vcpu)) 554 581 kvm_vcpu_load_hw_mmu(vcpu); 555 582 ··· 573 594 *last_ran = vcpu->vcpu_idx; 574 595 } 575 596 597 + nommu: 576 598 vcpu->cpu = cpu; 577 599 578 600 kvm_vgic_load(vcpu); 579 601 kvm_timer_vcpu_load(vcpu); 602 + kvm_vcpu_load_debug(vcpu); 580 603 if (has_vhe()) 581 604 kvm_vcpu_load_vhe(vcpu); 582 605 kvm_arch_vcpu_load_fp(vcpu); ··· 598 617 599 618 vcpu_set_pauth_traps(vcpu); 600 619 601 - kvm_arch_vcpu_load_debug_state_flags(vcpu); 620 + if (is_protected_kvm_enabled()) { 621 + kvm_call_hyp_nvhe(__pkvm_vcpu_load, 622 + vcpu->kvm->arch.pkvm.handle, 623 + vcpu->vcpu_idx, vcpu->arch.hcr_el2); 624 + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, 625 + &vcpu->arch.vgic_cpu.vgic_v3); 626 + } 602 627 603 628 if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) 604 629 vcpu_set_on_unsupported_cpu(vcpu); ··· 612 625 613 626 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 614 627 { 615 - kvm_arch_vcpu_put_debug_state_flags(vcpu); 628 + if (is_protected_kvm_enabled()) { 629 + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, 630 + &vcpu->arch.vgic_cpu.vgic_v3); 631 + kvm_call_hyp_nvhe(__pkvm_vcpu_put); 632 + } 633 + 634 + kvm_vcpu_put_debug(vcpu); 616 635 kvm_arch_vcpu_put_fp(vcpu); 617 636 if (has_vhe()) 618 637 kvm_vcpu_put_vhe(vcpu); ··· 800 807 return 0; 801 808 802 809 kvm_init_mpidr_data(kvm); 803 - 804 - kvm_arm_vcpu_init_debug(vcpu); 805 810 806 811 if (likely(irqchip_in_kernel(kvm))) { 807 812 /* ··· 1178 1187 continue; 1179 1188 } 1180 1189 1181 - kvm_arm_setup_debug(vcpu); 1182 1190 kvm_arch_vcpu_ctxflush_fp(vcpu); 1183 1191 1184 1192 /************************************************************** ··· 1193 1203 /* 1194 1204 * Back from guest 1195 1205 *************************************************************/ 1196 - 1197 - kvm_arm_clear_debug(vcpu); 1198 1206 1199 1207 /* 1200 1208 * We must sync the PMU state before the vgic state so ··· 1215 1227 */ 1216 1228 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1217 1229 kvm_timer_sync_user(vcpu); 1230 + 1231 + if (is_hyp_ctxt(vcpu)) 1232 + kvm_timer_sync_nested(vcpu); 1218 1233 1219 1234 kvm_arch_vcpu_ctxsync_fp(vcpu); 1220 1235 ··· 1562 1571 } 1563 1572 1564 1573 vcpu_reset_hcr(vcpu); 1565 - vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu); 1566 1574 1567 1575 /* 1568 1576 * Handle the "start in power-off" case. ··· 2099 2109 static void cpu_hyp_init_context(void) 2100 2110 { 2101 2111 kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); 2112 + kvm_init_host_debug_data(); 2102 2113 2103 2114 if (!is_kernel_in_hyp_mode()) 2104 2115 cpu_init_hyp_mode(); ··· 2108 2117 static void cpu_hyp_init_features(void) 2109 2118 { 2110 2119 cpu_set_hyp_vector(); 2111 - kvm_arm_init_debug(); 2112 2120 2113 2121 if (is_kernel_in_hyp_mode()) 2114 2122 kvm_timer_init_vhe(); ··· 2329 2339 2330 2340 free_hyp_pgds(); 2331 2341 for_each_possible_cpu(cpu) { 2332 - free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); 2342 + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); 2333 2343 free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); 2334 2344 2335 2345 if (free_sve) { ··· 2517 2527 * Allocate stack pages for Hypervisor-mode 2518 2528 */ 2519 2529 for_each_possible_cpu(cpu) { 2520 - unsigned long stack_page; 2530 + unsigned long stack_base; 2521 2531 2522 - stack_page = __get_free_page(GFP_KERNEL); 2523 - if (!stack_page) { 2532 + stack_base = __get_free_pages(GFP_KERNEL, NVHE_STACK_SHIFT - PAGE_SHIFT); 2533 + if (!stack_base) { 2524 2534 err = -ENOMEM; 2525 2535 goto out_err; 2526 2536 } 2527 2537 2528 - per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; 2538 + per_cpu(kvm_arm_hyp_stack_base, cpu) = stack_base; 2529 2539 } 2530 2540 2531 2541 /* ··· 2594 2604 */ 2595 2605 for_each_possible_cpu(cpu) { 2596 2606 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 2597 - char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); 2607 + char *stack_base = (char *)per_cpu(kvm_arm_hyp_stack_base, cpu); 2598 2608 2599 - err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va); 2609 + err = create_hyp_stack(__pa(stack_base), &params->stack_hyp_va); 2600 2610 if (err) { 2601 2611 kvm_err("Cannot map hyp stack\n"); 2602 2612 goto out_err; ··· 2608 2618 * __hyp_pa() won't do the right thing there, since the stack 2609 2619 * has been mapped in the flexible private VA space. 2610 2620 */ 2611 - params->stack_pa = __pa(stack_page); 2621 + params->stack_pa = __pa(stack_base); 2612 2622 } 2613 2623 2614 2624 for_each_possible_cpu(cpu) {
+198 -278
arch/arm64/kvm/debug.c
··· 3 3 * Debug and Guest Debug support 4 4 * 5 5 * Copyright (C) 2015 - Linaro Ltd 6 - * Author: Alex Bennée <alex.bennee@linaro.org> 6 + * Authors: Alex Bennée <alex.bennee@linaro.org> 7 + * Oliver Upton <oliver.upton@linux.dev> 7 8 */ 8 9 9 10 #include <linux/kvm_host.h> ··· 14 13 #include <asm/kvm_asm.h> 15 14 #include <asm/kvm_arm.h> 16 15 #include <asm/kvm_emulate.h> 17 - 18 - #include "trace.h" 19 - 20 - /* These are the bits of MDSCR_EL1 we may manipulate */ 21 - #define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \ 22 - DBG_MDSCR_KDE | \ 23 - DBG_MDSCR_MDE) 24 - 25 - static DEFINE_PER_CPU(u64, mdcr_el2); 26 - 27 - /* 28 - * save/restore_guest_debug_regs 29 - * 30 - * For some debug operations we need to tweak some guest registers. As 31 - * a result we need to save the state of those registers before we 32 - * make those modifications. 33 - * 34 - * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled 35 - * after we have restored the preserved value to the main context. 36 - * 37 - * When single-step is enabled by userspace, we tweak PSTATE.SS on every 38 - * guest entry. Preserve PSTATE.SS so we can restore the original value 39 - * for the vcpu after the single-step is disabled. 40 - */ 41 - static void save_guest_debug_regs(struct kvm_vcpu *vcpu) 42 - { 43 - u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 44 - 45 - vcpu->arch.guest_debug_preserved.mdscr_el1 = val; 46 - 47 - trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", 48 - vcpu->arch.guest_debug_preserved.mdscr_el1); 49 - 50 - vcpu->arch.guest_debug_preserved.pstate_ss = 51 - (*vcpu_cpsr(vcpu) & DBG_SPSR_SS); 52 - } 53 - 54 - static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) 55 - { 56 - u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1; 57 - 58 - vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); 59 - 60 - trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", 61 - vcpu_read_sys_reg(vcpu, MDSCR_EL1)); 62 - 63 - if (vcpu->arch.guest_debug_preserved.pstate_ss) 64 - *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 65 - else 66 - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 67 - } 68 - 69 - /** 70 - * kvm_arm_init_debug - grab what we need for debug 71 - * 72 - * Currently the sole task of this function is to retrieve the initial 73 - * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has 74 - * presumably been set-up by some knowledgeable bootcode. 75 - * 76 - * It is called once per-cpu during CPU hyp initialisation. 77 - */ 78 - 79 - void kvm_arm_init_debug(void) 80 - { 81 - __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2)); 82 - } 83 16 84 17 /** 85 18 * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value ··· 30 95 */ 31 96 static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) 32 97 { 98 + preempt_disable(); 99 + 33 100 /* 34 101 * This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK 35 102 * to disable guest access to the profiling and trace buffers 36 103 */ 37 - vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; 104 + vcpu->arch.mdcr_el2 = FIELD_PREP(MDCR_EL2_HPMN, 105 + *host_data_ptr(nr_event_counters)); 38 106 vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | 39 107 MDCR_EL2_TPMS | 40 108 MDCR_EL2_TTRF | ··· 51 113 vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; 52 114 53 115 /* 54 - * Trap debug register access when one of the following is true: 55 - * - Userspace is using the hardware to debug the guest 56 - * (KVM_GUESTDBG_USE_HW is set). 57 - * - The guest is not using debug (DEBUG_DIRTY clear). 58 - * - The guest has enabled the OS Lock (debug exceptions are blocked). 116 + * Trap debug registers if the guest doesn't have ownership of them. 59 117 */ 60 - if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || 61 - !vcpu_get_flag(vcpu, DEBUG_DIRTY) || 62 - kvm_vcpu_os_lock_enabled(vcpu)) 118 + if (!kvm_guest_owns_debug_regs(vcpu)) 63 119 vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; 64 120 65 - trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); 66 - } 121 + /* Write MDCR_EL2 directly if we're already at EL2 */ 122 + if (has_vhe()) 123 + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 67 124 68 - /** 69 - * kvm_arm_vcpu_init_debug - setup vcpu debug traps 70 - * 71 - * @vcpu: the vcpu pointer 72 - * 73 - * Set vcpu initial mdcr_el2 value. 74 - */ 75 - void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) 76 - { 77 - preempt_disable(); 78 - kvm_arm_setup_mdcr_el2(vcpu); 79 125 preempt_enable(); 80 126 } 81 127 82 - /** 83 - * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state 84 - * @vcpu: the vcpu pointer 85 - */ 86 - 87 - void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) 128 + void kvm_init_host_debug_data(void) 88 129 { 89 - vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state; 90 - } 130 + u64 dfr0 = read_sysreg(id_aa64dfr0_el1); 91 131 92 - /** 93 - * kvm_arm_setup_debug - set up debug related stuff 94 - * 95 - * @vcpu: the vcpu pointer 96 - * 97 - * This is called before each entry into the hypervisor to setup any 98 - * debug related registers. 99 - * 100 - * Additionally, KVM only traps guest accesses to the debug registers if 101 - * the guest is not actively using them (see the DEBUG_DIRTY 102 - * flag on vcpu->arch.iflags). Since the guest must not interfere 103 - * with the hardware state when debugging the guest, we must ensure that 104 - * trapping is enabled whenever we are debugging the guest using the 105 - * debug registers. 106 - */ 132 + if (cpuid_feature_extract_signed_field(dfr0, ID_AA64DFR0_EL1_PMUVer_SHIFT) > 0) 133 + *host_data_ptr(nr_event_counters) = FIELD_GET(ARMV8_PMU_PMCR_N, 134 + read_sysreg(pmcr_el0)); 107 135 108 - void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) 109 - { 110 - unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; 136 + *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); 137 + *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); 111 138 112 - trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); 113 - 114 - kvm_arm_setup_mdcr_el2(vcpu); 115 - 116 - /* Check if we need to use the debug registers. */ 117 - if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { 118 - /* Save guest debug state */ 119 - save_guest_debug_regs(vcpu); 120 - 121 - /* 122 - * Single Step (ARM ARM D2.12.3 The software step state 123 - * machine) 124 - * 125 - * If we are doing Single Step we need to manipulate 126 - * the guest's MDSCR_EL1.SS and PSTATE.SS. Once the 127 - * step has occurred the hypervisor will trap the 128 - * debug exception and we return to userspace. 129 - * 130 - * If the guest attempts to single step its userspace 131 - * we would have to deal with a trapped exception 132 - * while in the guest kernel. Because this would be 133 - * hard to unwind we suppress the guest's ability to 134 - * do so by masking MDSCR_EL.SS. 135 - * 136 - * This confuses guest debuggers which use 137 - * single-step behind the scenes but everything 138 - * returns to normal once the host is no longer 139 - * debugging the system. 140 - */ 141 - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 142 - /* 143 - * If the software step state at the last guest exit 144 - * was Active-pending, we don't set DBG_SPSR_SS so 145 - * that the state is maintained (to not run another 146 - * single-step until the pending Software Step 147 - * exception is taken). 148 - */ 149 - if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING)) 150 - *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 151 - else 152 - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 153 - 154 - mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 155 - mdscr |= DBG_MDSCR_SS; 156 - vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); 157 - } else { 158 - mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 159 - mdscr &= ~DBG_MDSCR_SS; 160 - vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); 161 - } 162 - 163 - trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); 164 - 165 - /* 166 - * HW Breakpoints and watchpoints 167 - * 168 - * We simply switch the debug_ptr to point to our new 169 - * external_debug_state which has been populated by the 170 - * debug ioctl. The existing DEBUG_DIRTY mechanism ensures 171 - * the registers are updated on the world switch. 172 - */ 173 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 174 - /* Enable breakpoints/watchpoints */ 175 - mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 176 - mdscr |= DBG_MDSCR_MDE; 177 - vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); 178 - 179 - vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; 180 - vcpu_set_flag(vcpu, DEBUG_DIRTY); 181 - 182 - trace_kvm_arm_set_regset("BKPTS", get_num_brps(), 183 - &vcpu->arch.debug_ptr->dbg_bcr[0], 184 - &vcpu->arch.debug_ptr->dbg_bvr[0]); 185 - 186 - trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), 187 - &vcpu->arch.debug_ptr->dbg_wcr[0], 188 - &vcpu->arch.debug_ptr->dbg_wvr[0]); 189 - 190 - /* 191 - * The OS Lock blocks debug exceptions in all ELs when it is 192 - * enabled. If the guest has enabled the OS Lock, constrain its 193 - * effects to the guest. Emulate the behavior by clearing 194 - * MDSCR_EL1.MDE. In so doing, we ensure that host debug 195 - * exceptions are unaffected by guest configuration of the OS 196 - * Lock. 197 - */ 198 - } else if (kvm_vcpu_os_lock_enabled(vcpu)) { 199 - mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 200 - mdscr &= ~DBG_MDSCR_MDE; 201 - vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); 202 - } 203 - } 204 - 205 - BUG_ON(!vcpu->guest_debug && 206 - vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state); 207 - 208 - /* If KDE or MDE are set, perform a full save/restore cycle. */ 209 - if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) 210 - vcpu_set_flag(vcpu, DEBUG_DIRTY); 211 - 212 - /* Write mdcr_el2 changes since vcpu_load on VHE systems */ 213 - if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) 214 - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 215 - 216 - trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); 217 - } 218 - 219 - void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) 220 - { 221 - trace_kvm_arm_clear_debug(vcpu->guest_debug); 222 - 223 - /* 224 - * Restore the guest's debug registers if we were using them. 225 - */ 226 - if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { 227 - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 228 - if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)) 229 - /* 230 - * Mark the vcpu as ACTIVE_PENDING 231 - * until Software Step exception is taken. 232 - */ 233 - vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING); 234 - } 235 - 236 - restore_guest_debug_regs(vcpu); 237 - 238 - /* 239 - * If we were using HW debug we need to restore the 240 - * debug_ptr to the guest debug state. 241 - */ 242 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 243 - kvm_arm_reset_debug_ptr(vcpu); 244 - 245 - trace_kvm_arm_set_regset("BKPTS", get_num_brps(), 246 - &vcpu->arch.debug_ptr->dbg_bcr[0], 247 - &vcpu->arch.debug_ptr->dbg_bvr[0]); 248 - 249 - trace_kvm_arm_set_regset("WAPTS", get_num_wrps(), 250 - &vcpu->arch.debug_ptr->dbg_wcr[0], 251 - &vcpu->arch.debug_ptr->dbg_wvr[0]); 252 - } 253 - } 254 - } 255 - 256 - void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) 257 - { 258 - u64 dfr0; 259 - 260 - /* For VHE, there is nothing to do */ 261 139 if (has_vhe()) 262 140 return; 263 141 264 - dfr0 = read_sysreg(id_aa64dfr0_el1); 265 - /* 266 - * If SPE is present on this CPU and is available at current EL, 267 - * we may need to check if the host state needs to be saved. 268 - */ 269 142 if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && 270 - !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(PMBIDR_EL1_P_SHIFT))) 271 - vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE); 143 + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) 144 + host_data_set_flag(HAS_SPE); 272 145 273 - /* Check if we have TRBE implemented and available at the host */ 274 - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && 275 - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) 276 - vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); 146 + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT)) { 147 + /* Force disable trace in protected mode in case of no TRBE */ 148 + if (is_protected_kvm_enabled()) 149 + host_data_set_flag(EL1_TRACING_CONFIGURED); 150 + 151 + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && 152 + !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) 153 + host_data_set_flag(HAS_TRBE); 154 + } 277 155 } 278 156 279 - void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) 157 + /* 158 + * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host 159 + * has taken over MDSCR_EL1. 160 + * 161 + * - Userspace is single-stepping the guest, and MDSCR_EL1.SS is forced to 1. 162 + * 163 + * - Userspace is using the breakpoint/watchpoint registers to debug the 164 + * guest, and MDSCR_EL1.MDE is forced to 1. 165 + * 166 + * - The guest has enabled the OS Lock, and KVM is forcing MDSCR_EL1.MDE to 0, 167 + * masking all debug exceptions affected by the OS Lock. 168 + */ 169 + static void setup_external_mdscr(struct kvm_vcpu *vcpu) 280 170 { 281 - vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE); 282 - vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE); 171 + /* 172 + * Use the guest's MDSCR_EL1 as a starting point, since there are 173 + * several other features controlled by MDSCR_EL1 that are not relevant 174 + * to the host. 175 + * 176 + * Clear the bits that KVM may use which also satisfies emulation of 177 + * the OS Lock as MDSCR_EL1.MDE is cleared. 178 + */ 179 + u64 mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1) & ~(MDSCR_EL1_SS | 180 + MDSCR_EL1_MDE | 181 + MDSCR_EL1_KDE); 182 + 183 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 184 + mdscr |= MDSCR_EL1_SS; 185 + 186 + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) 187 + mdscr |= MDSCR_EL1_MDE | MDSCR_EL1_KDE; 188 + 189 + vcpu->arch.external_mdscr_el1 = mdscr; 283 190 } 191 + 192 + void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu) 193 + { 194 + u64 mdscr; 195 + 196 + /* Must be called before kvm_vcpu_load_vhe() */ 197 + KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm); 198 + 199 + /* 200 + * Determine which of the possible debug states we're in: 201 + * 202 + * - VCPU_DEBUG_HOST_OWNED: KVM has taken ownership of the guest's 203 + * breakpoint/watchpoint registers, or needs to use MDSCR_EL1 to do 204 + * software step or emulate the effects of the OS Lock being enabled. 205 + * 206 + * - VCPU_DEBUG_GUEST_OWNED: The guest has debug exceptions enabled, and 207 + * the breakpoint/watchpoint registers need to be loaded eagerly. 208 + * 209 + * - VCPU_DEBUG_FREE: Neither of the above apply, no breakpoint/watchpoint 210 + * context needs to be loaded on the CPU. 211 + */ 212 + if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { 213 + vcpu->arch.debug_owner = VCPU_DEBUG_HOST_OWNED; 214 + setup_external_mdscr(vcpu); 215 + 216 + /* 217 + * Steal the guest's single-step state machine if userspace wants 218 + * single-step the guest. 219 + */ 220 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 221 + if (*vcpu_cpsr(vcpu) & DBG_SPSR_SS) 222 + vcpu_clear_flag(vcpu, GUEST_SS_ACTIVE_PENDING); 223 + else 224 + vcpu_set_flag(vcpu, GUEST_SS_ACTIVE_PENDING); 225 + 226 + if (!vcpu_get_flag(vcpu, HOST_SS_ACTIVE_PENDING)) 227 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 228 + else 229 + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 230 + } 231 + } else { 232 + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 233 + 234 + if (mdscr & (MDSCR_EL1_KDE | MDSCR_EL1_MDE)) 235 + vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED; 236 + else 237 + vcpu->arch.debug_owner = VCPU_DEBUG_FREE; 238 + } 239 + 240 + kvm_arm_setup_mdcr_el2(vcpu); 241 + } 242 + 243 + void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu) 244 + { 245 + if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) 246 + return; 247 + 248 + /* 249 + * Save the host's software step state and restore the guest's before 250 + * potentially returning to userspace. 251 + */ 252 + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)) 253 + vcpu_set_flag(vcpu, HOST_SS_ACTIVE_PENDING); 254 + else 255 + vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING); 256 + 257 + if (vcpu_get_flag(vcpu, GUEST_SS_ACTIVE_PENDING)) 258 + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 259 + else 260 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 261 + } 262 + 263 + /* 264 + * Updates ownership of the debug registers after a trapped guest access to a 265 + * breakpoint/watchpoint register. Host ownership of the debug registers is of 266 + * strictly higher priority, and it is the responsibility of the VMM to emulate 267 + * guest debug exceptions in this configuration. 268 + */ 269 + void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu) 270 + { 271 + if (kvm_host_owns_debug_regs(vcpu)) 272 + return; 273 + 274 + vcpu->arch.debug_owner = VCPU_DEBUG_GUEST_OWNED; 275 + kvm_arm_setup_mdcr_el2(vcpu); 276 + } 277 + 278 + void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) 279 + { 280 + if (val & OSLAR_EL1_OSLK) 281 + __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK; 282 + else 283 + __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK; 284 + 285 + preempt_disable(); 286 + kvm_arch_vcpu_put(vcpu); 287 + kvm_arch_vcpu_load(vcpu, smp_processor_id()); 288 + preempt_enable(); 289 + } 290 + 291 + void kvm_enable_trbe(void) 292 + { 293 + if (has_vhe() || is_protected_kvm_enabled() || 294 + WARN_ON_ONCE(preemptible())) 295 + return; 296 + 297 + host_data_set_flag(TRBE_ENABLED); 298 + } 299 + EXPORT_SYMBOL_GPL(kvm_enable_trbe); 300 + 301 + void kvm_disable_trbe(void) 302 + { 303 + if (has_vhe() || is_protected_kvm_enabled() || 304 + WARN_ON_ONCE(preemptible())) 305 + return; 306 + 307 + host_data_clear_flag(TRBE_ENABLED); 308 + } 309 + EXPORT_SYMBOL_GPL(kvm_disable_trbe); 310 + 311 + void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) 312 + { 313 + if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible())) 314 + return; 315 + 316 + if (has_vhe()) { 317 + write_sysreg_s(trfcr_while_in_guest, SYS_TRFCR_EL12); 318 + return; 319 + } 320 + 321 + *host_data_ptr(trfcr_while_in_guest) = trfcr_while_in_guest; 322 + if (read_sysreg_s(SYS_TRFCR_EL1) != trfcr_while_in_guest) 323 + host_data_set_flag(EL1_TRACING_CONFIGURED); 324 + else 325 + host_data_clear_flag(EL1_TRACING_CONFIGURED); 326 + } 327 + EXPORT_SYMBOL_GPL(kvm_tracing_set_el1_configuration);
+75 -6
arch/arm64/kvm/emulate-nested.c
··· 89 89 CGT_HCRX_EnFPM, 90 90 CGT_HCRX_TCR2En, 91 91 92 + CGT_CNTHCTL_EL1TVT, 93 + CGT_CNTHCTL_EL1TVCT, 94 + 92 95 CGT_ICH_HCR_TC, 93 96 CGT_ICH_HCR_TALL0, 94 97 CGT_ICH_HCR_TALL1, ··· 127 124 __COMPLEX_CONDITIONS__, 128 125 CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__, 129 126 CGT_CNTHCTL_EL1PTEN, 127 + CGT_CNTHCTL_EL1NVPCT, 128 + CGT_CNTHCTL_EL1NVVCT, 130 129 131 130 CGT_CPTR_TTA, 132 131 CGT_MDCR_HPMN, ··· 398 393 .mask = HCRX_EL2_TCR2En, 399 394 .behaviour = BEHAVE_FORWARD_RW, 400 395 }, 396 + [CGT_CNTHCTL_EL1TVT] = { 397 + .index = CNTHCTL_EL2, 398 + .value = CNTHCTL_EL1TVT, 399 + .mask = CNTHCTL_EL1TVT, 400 + .behaviour = BEHAVE_FORWARD_RW, 401 + }, 402 + [CGT_CNTHCTL_EL1TVCT] = { 403 + .index = CNTHCTL_EL2, 404 + .value = CNTHCTL_EL1TVCT, 405 + .mask = CNTHCTL_EL1TVCT, 406 + .behaviour = BEHAVE_FORWARD_READ, 407 + }, 401 408 [CGT_ICH_HCR_TC] = { 402 409 .index = ICH_HCR_EL2, 403 410 .value = ICH_HCR_TC, ··· 504 487 return BEHAVE_FORWARD_RW; 505 488 } 506 489 490 + static bool is_nested_nv2_guest(struct kvm_vcpu *vcpu) 491 + { 492 + u64 val; 493 + 494 + val = __vcpu_sys_reg(vcpu, HCR_EL2); 495 + return ((val & (HCR_E2H | HCR_TGE | HCR_NV2 | HCR_NV1 | HCR_NV)) == (HCR_E2H | HCR_NV2 | HCR_NV)); 496 + } 497 + 498 + static enum trap_behaviour check_cnthctl_el1nvpct(struct kvm_vcpu *vcpu) 499 + { 500 + if (!is_nested_nv2_guest(vcpu) || 501 + !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVPCT)) 502 + return BEHAVE_HANDLE_LOCALLY; 503 + 504 + return BEHAVE_FORWARD_RW; 505 + } 506 + 507 + static enum trap_behaviour check_cnthctl_el1nvvct(struct kvm_vcpu *vcpu) 508 + { 509 + if (!is_nested_nv2_guest(vcpu) || 510 + !(__vcpu_sys_reg(vcpu, CNTHCTL_EL2) & CNTHCTL_EL1NVVCT)) 511 + return BEHAVE_HANDLE_LOCALLY; 512 + 513 + return BEHAVE_FORWARD_RW; 514 + } 515 + 507 516 static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu) 508 517 { 509 518 u64 val = __vcpu_sys_reg(vcpu, CPTR_EL2); ··· 577 534 static const complex_condition_check ccc[] = { 578 535 CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten), 579 536 CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten), 537 + CCC(CGT_CNTHCTL_EL1NVPCT, check_cnthctl_el1nvpct), 538 + CCC(CGT_CNTHCTL_EL1NVVCT, check_cnthctl_el1nvvct), 580 539 CCC(CGT_CPTR_TTA, check_cptr_tta), 581 540 CCC(CGT_MDCR_HPMN, check_mdcr_hpmn), 582 541 }; ··· 895 850 SYS_CNTHP_CVAL_EL2, CGT_HCR_NV), 896 851 SR_RANGE_TRAP(SYS_CNTHV_TVAL_EL2, 897 852 SYS_CNTHV_CVAL_EL2, CGT_HCR_NV), 898 - /* All _EL02, _EL12 registers */ 853 + /* All _EL02, _EL12 registers up to CNTKCTL_EL12*/ 899 854 SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0), 900 855 sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV), 901 856 SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0), 902 - sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV), 857 + sys_reg(3, 5, 14, 1, 0), CGT_HCR_NV), 858 + SR_TRAP(SYS_CNTP_CTL_EL02, CGT_CNTHCTL_EL1NVPCT), 859 + SR_TRAP(SYS_CNTP_CVAL_EL02, CGT_CNTHCTL_EL1NVPCT), 860 + SR_TRAP(SYS_CNTV_CTL_EL02, CGT_CNTHCTL_EL1NVVCT), 861 + SR_TRAP(SYS_CNTV_CVAL_EL02, CGT_CNTHCTL_EL1NVVCT), 903 862 SR_TRAP(OP_AT_S1E2R, CGT_HCR_NV), 904 863 SR_TRAP(OP_AT_S1E2W, CGT_HCR_NV), 905 864 SR_TRAP(OP_AT_S12E1R, CGT_HCR_NV), ··· 1233 1184 SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN), 1234 1185 SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN), 1235 1186 SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN), 1187 + SR_TRAP(SYS_CNTV_TVAL_EL0, CGT_CNTHCTL_EL1TVT), 1188 + SR_TRAP(SYS_CNTV_CVAL_EL0, CGT_CNTHCTL_EL1TVT), 1189 + SR_TRAP(SYS_CNTV_CTL_EL0, CGT_CNTHCTL_EL1TVT), 1190 + SR_TRAP(SYS_CNTVCT_EL0, CGT_CNTHCTL_EL1TVCT), 1191 + SR_TRAP(SYS_CNTVCTSS_EL0, CGT_CNTHCTL_EL1TVCT), 1236 1192 SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM), 1237 1193 /* 1238 1194 * IMPDEF choice: ··· 2399 2345 return true; 2400 2346 } 2401 2347 2402 - static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit) 2348 + static bool __forward_traps(struct kvm_vcpu *vcpu, unsigned int reg, u64 control_bit) 2403 2349 { 2404 2350 bool control_bit_set; 2405 2351 2406 2352 if (!vcpu_has_nv(vcpu)) 2407 2353 return false; 2408 2354 2409 - control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit; 2355 + control_bit_set = __vcpu_sys_reg(vcpu, reg) & control_bit; 2410 2356 if (!is_hyp_ctxt(vcpu) && control_bit_set) { 2411 2357 kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); 2412 2358 return true; ··· 2414 2360 return false; 2415 2361 } 2416 2362 2363 + static bool forward_hcr_traps(struct kvm_vcpu *vcpu, u64 control_bit) 2364 + { 2365 + return __forward_traps(vcpu, HCR_EL2, control_bit); 2366 + } 2367 + 2417 2368 bool forward_smc_trap(struct kvm_vcpu *vcpu) 2418 2369 { 2419 - return forward_traps(vcpu, HCR_TSC); 2370 + return forward_hcr_traps(vcpu, HCR_TSC); 2371 + } 2372 + 2373 + static bool forward_mdcr_traps(struct kvm_vcpu *vcpu, u64 control_bit) 2374 + { 2375 + return __forward_traps(vcpu, MDCR_EL2, control_bit); 2376 + } 2377 + 2378 + bool forward_debug_exception(struct kvm_vcpu *vcpu) 2379 + { 2380 + return forward_mdcr_traps(vcpu, MDCR_EL2_TDE); 2420 2381 } 2421 2382 2422 2383 static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr) ··· 2475 2406 * Forward this trap to the virtual EL2 if the virtual 2476 2407 * HCR_EL2.NV bit is set and this is coming from !EL2. 2477 2408 */ 2478 - if (forward_traps(vcpu, HCR_NV)) 2409 + if (forward_hcr_traps(vcpu, HCR_NV)) 2479 2410 return; 2480 2411 2481 2412 spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+6 -6
arch/arm64/kvm/fpsimd.c
··· 65 65 *host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state); 66 66 *host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr); 67 67 68 - vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); 68 + host_data_clear_flag(HOST_SVE_ENABLED); 69 69 if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) 70 - vcpu_set_flag(vcpu, HOST_SVE_ENABLED); 70 + host_data_set_flag(HOST_SVE_ENABLED); 71 71 72 72 if (system_supports_sme()) { 73 - vcpu_clear_flag(vcpu, HOST_SME_ENABLED); 73 + host_data_clear_flag(HOST_SME_ENABLED); 74 74 if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) 75 - vcpu_set_flag(vcpu, HOST_SME_ENABLED); 75 + host_data_set_flag(HOST_SME_ENABLED); 76 76 77 77 /* 78 78 * If PSTATE.SM is enabled then save any pending FP ··· 168 168 */ 169 169 if (has_vhe() && system_supports_sme()) { 170 170 /* Also restore EL0 state seen on entry */ 171 - if (vcpu_get_flag(vcpu, HOST_SME_ENABLED)) 171 + if (host_data_test_flag(HOST_SME_ENABLED)) 172 172 sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN); 173 173 else 174 174 sysreg_clear_set(CPACR_EL1, ··· 227 227 * for EL0. To avoid spurious traps, restore the trap state 228 228 * seen by kvm_arch_vcpu_load_fp(): 229 229 */ 230 - if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED)) 230 + if (host_data_test_flag(HOST_SVE_ENABLED)) 231 231 sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); 232 232 else 233 233 sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
+12 -19
arch/arm64/kvm/guest.c
··· 917 917 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 918 918 struct kvm_guest_debug *dbg) 919 919 { 920 - int ret = 0; 921 - 922 920 trace_kvm_set_guest_debug(vcpu, dbg->control); 923 921 924 - if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { 925 - ret = -EINVAL; 926 - goto out; 927 - } 922 + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) 923 + return -EINVAL; 928 924 929 - if (dbg->control & KVM_GUESTDBG_ENABLE) { 930 - vcpu->guest_debug = dbg->control; 931 - 932 - /* Hardware assisted Break and Watch points */ 933 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { 934 - vcpu->arch.external_debug_state = dbg->arch; 935 - } 936 - 937 - } else { 938 - /* If not enabled clear all flags */ 925 + if (!(dbg->control & KVM_GUESTDBG_ENABLE)) { 939 926 vcpu->guest_debug = 0; 940 - vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); 927 + vcpu_clear_flag(vcpu, HOST_SS_ACTIVE_PENDING); 928 + return 0; 941 929 } 942 930 943 - out: 944 - return ret; 931 + vcpu->guest_debug = dbg->control; 932 + 933 + /* Hardware assisted Break and Watch points */ 934 + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) 935 + vcpu->arch.external_debug_state = dbg->arch; 936 + 937 + return 0; 945 938 } 946 939 947 940 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+4 -1
arch/arm64/kvm/handle_exit.c
··· 183 183 struct kvm_run *run = vcpu->run; 184 184 u64 esr = kvm_vcpu_get_esr(vcpu); 185 185 186 + if (!vcpu->guest_debug && forward_debug_exception(vcpu)) 187 + return 1; 188 + 186 189 run->exit_reason = KVM_EXIT_DEBUG; 187 190 run->debug.arch.hsr = lower_32_bits(esr); 188 191 run->debug.arch.hsr_high = upper_32_bits(esr); ··· 196 193 run->debug.arch.far = vcpu->arch.fault.far_el2; 197 194 break; 198 195 case ESR_ELx_EC_SOFTSTP_LOW: 199 - vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); 196 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 200 197 break; 201 198 } 202 199
+23 -19
arch/arm64/kvm/hyp/include/hyp/debug-sr.h
··· 88 88 default: write_debug(ptr[0], reg, 0); \ 89 89 } 90 90 91 + static struct kvm_guest_debug_arch *__vcpu_debug_regs(struct kvm_vcpu *vcpu) 92 + { 93 + switch (vcpu->arch.debug_owner) { 94 + case VCPU_DEBUG_FREE: 95 + WARN_ON_ONCE(1); 96 + fallthrough; 97 + case VCPU_DEBUG_GUEST_OWNED: 98 + return &vcpu->arch.vcpu_debug_state; 99 + case VCPU_DEBUG_HOST_OWNED: 100 + return &vcpu->arch.external_debug_state; 101 + } 102 + 103 + return NULL; 104 + } 105 + 91 106 static void __debug_save_state(struct kvm_guest_debug_arch *dbg, 92 107 struct kvm_cpu_context *ctxt) 93 108 { 94 - u64 aa64dfr0; 95 - int brps, wrps; 96 - 97 - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); 98 - brps = (aa64dfr0 >> 12) & 0xf; 99 - wrps = (aa64dfr0 >> 20) & 0xf; 109 + int brps = *host_data_ptr(debug_brps); 110 + int wrps = *host_data_ptr(debug_wrps); 100 111 101 112 save_debug(dbg->dbg_bcr, dbgbcr, brps); 102 113 save_debug(dbg->dbg_bvr, dbgbvr, brps); ··· 120 109 static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, 121 110 struct kvm_cpu_context *ctxt) 122 111 { 123 - u64 aa64dfr0; 124 - int brps, wrps; 125 - 126 - aa64dfr0 = read_sysreg(id_aa64dfr0_el1); 127 - 128 - brps = (aa64dfr0 >> 12) & 0xf; 129 - wrps = (aa64dfr0 >> 20) & 0xf; 112 + int brps = *host_data_ptr(debug_brps); 113 + int wrps = *host_data_ptr(debug_wrps); 130 114 131 115 restore_debug(dbg->dbg_bcr, dbgbcr, brps); 132 116 restore_debug(dbg->dbg_bvr, dbgbvr, brps); ··· 138 132 struct kvm_guest_debug_arch *host_dbg; 139 133 struct kvm_guest_debug_arch *guest_dbg; 140 134 141 - if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) 135 + if (!kvm_debug_regs_in_use(vcpu)) 142 136 return; 143 137 144 138 host_ctxt = host_data_ptr(host_ctxt); 145 139 guest_ctxt = &vcpu->arch.ctxt; 146 140 host_dbg = host_data_ptr(host_debug_state.regs); 147 - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); 141 + guest_dbg = __vcpu_debug_regs(vcpu); 148 142 149 143 __debug_save_state(host_dbg, host_ctxt); 150 144 __debug_restore_state(guest_dbg, guest_ctxt); ··· 157 151 struct kvm_guest_debug_arch *host_dbg; 158 152 struct kvm_guest_debug_arch *guest_dbg; 159 153 160 - if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) 154 + if (!kvm_debug_regs_in_use(vcpu)) 161 155 return; 162 156 163 157 host_ctxt = host_data_ptr(host_ctxt); 164 158 guest_ctxt = &vcpu->arch.ctxt; 165 159 host_dbg = host_data_ptr(host_debug_state.regs); 166 - guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); 160 + guest_dbg = __vcpu_debug_regs(vcpu); 167 161 168 162 __debug_save_state(guest_dbg, guest_ctxt); 169 163 __debug_restore_state(host_dbg, host_ctxt); 170 - 171 - vcpu_clear_flag(vcpu, DEBUG_DIRTY); 172 164 } 173 165 174 166 #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
+26 -13
arch/arm64/kvm/hyp/include/hyp/switch.h
··· 501 501 return true; 502 502 } 503 503 504 - static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) 504 + static inline u64 compute_counter_value(struct arch_timer_context *ctxt) 505 + { 506 + return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt); 507 + } 508 + 509 + static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu) 505 510 { 506 511 struct arch_timer_context *ctxt; 507 512 u32 sysreg; ··· 516 511 * We only get here for 64bit guests, 32bit guests will hit 517 512 * the long and winding road all the way to the standard 518 513 * handling. Yes, it sucks to be irrelevant. 514 + * 515 + * Also, we only deal with non-hypervisor context here (either 516 + * an EL1 guest, or a non-HYP context of an EL2 guest). 519 517 */ 518 + if (is_hyp_ctxt(vcpu)) 519 + return false; 520 + 520 521 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 521 522 522 523 switch (sysreg) { 523 524 case SYS_CNTPCT_EL0: 524 525 case SYS_CNTPCTSS_EL0: 525 526 if (vcpu_has_nv(vcpu)) { 526 - if (is_hyp_ctxt(vcpu)) { 527 - ctxt = vcpu_hptimer(vcpu); 528 - break; 529 - } 530 - 531 527 /* Check for guest hypervisor trapping */ 532 528 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 533 529 if (!vcpu_el2_e2h_is_set(vcpu)) ··· 540 534 541 535 ctxt = vcpu_ptimer(vcpu); 542 536 break; 537 + case SYS_CNTVCT_EL0: 538 + case SYS_CNTVCTSS_EL0: 539 + if (vcpu_has_nv(vcpu)) { 540 + /* Check for guest hypervisor trapping */ 541 + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 542 + 543 + if (val & CNTHCTL_EL1TVCT) 544 + return false; 545 + } 546 + 547 + ctxt = vcpu_vtimer(vcpu); 548 + break; 543 549 default: 544 550 return false; 545 551 } 546 552 547 - val = arch_timer_read_cntpct_el0(); 548 - 549 - if (ctxt->offset.vm_offset) 550 - val -= *kern_hyp_va(ctxt->offset.vm_offset); 551 - if (ctxt->offset.vcpu_offset) 552 - val -= *kern_hyp_va(ctxt->offset.vcpu_offset); 553 + val = compute_counter_value(ctxt); 553 554 554 555 vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); 555 556 __kvm_skip_instr(vcpu); ··· 601 588 __vgic_v3_perform_cpuif_access(vcpu) == 1) 602 589 return true; 603 590 604 - if (kvm_hyp_handle_cntpct(vcpu)) 591 + if (kvm_handle_cntxct(vcpu)) 605 592 return true; 606 593 607 594 return false;
+29 -14
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
··· 18 18 19 19 static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt); 20 20 21 + static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) 22 + { 23 + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; 24 + 25 + if (!vcpu) 26 + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); 27 + 28 + return vcpu; 29 + } 30 + 31 + static inline bool ctxt_is_guest(struct kvm_cpu_context *ctxt) 32 + { 33 + return host_data_ptr(host_ctxt) != ctxt; 34 + } 35 + 36 + static inline u64 *ctxt_mdscr_el1(struct kvm_cpu_context *ctxt) 37 + { 38 + struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt); 39 + 40 + if (ctxt_is_guest(ctxt) && kvm_host_owns_debug_regs(vcpu)) 41 + return &vcpu->arch.external_mdscr_el1; 42 + 43 + return &ctxt_sys_reg(ctxt, MDSCR_EL1); 44 + } 45 + 21 46 static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) 22 47 { 23 - ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); 48 + *ctxt_mdscr_el1(ctxt) = read_sysreg(mdscr_el1); 24 49 25 50 // POR_EL0 can affect uaccess, so must be saved/restored early. 26 51 if (ctxt_has_s1poe(ctxt)) ··· 56 31 { 57 32 ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); 58 33 ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); 59 - } 60 - 61 - static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt) 62 - { 63 - struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; 64 - 65 - if (!vcpu) 66 - vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); 67 - 68 - return vcpu; 69 34 } 70 35 71 36 static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) ··· 154 139 155 140 static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) 156 141 { 157 - write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); 142 + write_sysreg(*ctxt_mdscr_el1(ctxt), mdscr_el1); 158 143 159 144 // POR_EL0 can affect uaccess, so must be saved/restored early. 160 145 if (ctxt_has_s1poe(ctxt)) ··· 298 283 __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); 299 284 __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); 300 285 301 - if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) 286 + if (has_vhe() || kvm_debug_regs_in_use(vcpu)) 302 287 __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); 303 288 } 304 289 ··· 315 300 write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); 316 301 write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); 317 302 318 - if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) 303 + if (has_vhe() || kvm_debug_regs_in_use(vcpu)) 319 304 write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); 320 305 } 321 306
-223
arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright (C) 2021 Google LLC 4 - * Author: Fuad Tabba <tabba@google.com> 5 - */ 6 - 7 - #ifndef __ARM64_KVM_FIXED_CONFIG_H__ 8 - #define __ARM64_KVM_FIXED_CONFIG_H__ 9 - 10 - #include <asm/sysreg.h> 11 - 12 - /* 13 - * This file contains definitions for features to be allowed or restricted for 14 - * guest virtual machines, depending on the mode KVM is running in and on the 15 - * type of guest that is running. 16 - * 17 - * The ALLOW masks represent a bitmask of feature fields that are allowed 18 - * without any restrictions as long as they are supported by the system. 19 - * 20 - * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for 21 - * features that are restricted to support at most the specified feature. 22 - * 23 - * If a feature field is not present in either, than it is not supported. 24 - * 25 - * The approach taken for protected VMs is to allow features that are: 26 - * - Needed by common Linux distributions (e.g., floating point) 27 - * - Trivial to support, e.g., supporting the feature does not introduce or 28 - * require tracking of additional state in KVM 29 - * - Cannot be trapped or prevent the guest from using anyway 30 - */ 31 - 32 - /* 33 - * Allow for protected VMs: 34 - * - Floating-point and Advanced SIMD 35 - * - Data Independent Timing 36 - * - Spectre/Meltdown Mitigation 37 - */ 38 - #define PVM_ID_AA64PFR0_ALLOW (\ 39 - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ 40 - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \ 41 - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \ 42 - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \ 43 - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \ 44 - ) 45 - 46 - /* 47 - * Restrict to the following *unsigned* features for protected VMs: 48 - * - AArch64 guests only (no support for AArch32 guests): 49 - * AArch32 adds complexity in trap handling, emulation, condition codes, 50 - * etc... 51 - * - RAS (v1) 52 - * Supported by KVM 53 - */ 54 - #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ 55 - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP) | \ 56 - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP) | \ 57 - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP) | \ 58 - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP) | \ 59 - SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, RAS, IMP) \ 60 - ) 61 - 62 - /* 63 - * Allow for protected VMs: 64 - * - Branch Target Identification 65 - * - Speculative Store Bypassing 66 - */ 67 - #define PVM_ID_AA64PFR1_ALLOW (\ 68 - ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \ 69 - ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ 70 - ) 71 - 72 - #define PVM_ID_AA64PFR2_ALLOW 0ULL 73 - 74 - /* 75 - * Allow for protected VMs: 76 - * - Mixed-endian 77 - * - Distinction between Secure and Non-secure Memory 78 - * - Mixed-endian at EL0 only 79 - * - Non-context synchronizing exception entry and exit 80 - */ 81 - #define PVM_ID_AA64MMFR0_ALLOW (\ 82 - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \ 83 - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \ 84 - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \ 85 - ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \ 86 - ) 87 - 88 - /* 89 - * Restrict to the following *unsigned* features for protected VMs: 90 - * - 40-bit IPA 91 - * - 16-bit ASID 92 - */ 93 - #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ 94 - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \ 95 - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \ 96 - ) 97 - 98 - /* 99 - * Allow for protected VMs: 100 - * - Hardware translation table updates to Access flag and Dirty state 101 - * - Number of VMID bits from CPU 102 - * - Hierarchical Permission Disables 103 - * - Privileged Access Never 104 - * - SError interrupt exceptions from speculative reads 105 - * - Enhanced Translation Synchronization 106 - * - Control for cache maintenance permission 107 - */ 108 - #define PVM_ID_AA64MMFR1_ALLOW (\ 109 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ 110 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \ 111 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ 112 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ 113 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ 114 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \ 115 - ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \ 116 - ) 117 - 118 - /* 119 - * Allow for protected VMs: 120 - * - Common not Private translations 121 - * - User Access Override 122 - * - IESB bit in the SCTLR_ELx registers 123 - * - Unaligned single-copy atomicity and atomic functions 124 - * - ESR_ELx.EC value on an exception by read access to feature ID space 125 - * - TTL field in address operations. 126 - * - Break-before-make sequences when changing translation block size 127 - * - E0PDx mechanism 128 - */ 129 - #define PVM_ID_AA64MMFR2_ALLOW (\ 130 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \ 131 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \ 132 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \ 133 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \ 134 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \ 135 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \ 136 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \ 137 - ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ 138 - ) 139 - 140 - #define PVM_ID_AA64MMFR3_ALLOW (0ULL) 141 - 142 - /* 143 - * No support for Scalable Vectors for protected VMs: 144 - * Requires additional support from KVM, e.g., context-switching and 145 - * trapping at EL2 146 - */ 147 - #define PVM_ID_AA64ZFR0_ALLOW (0ULL) 148 - 149 - /* 150 - * No support for debug, including breakpoints, and watchpoints for protected 151 - * VMs: 152 - * The Arm architecture mandates support for at least the Armv8 debug 153 - * architecture, which would include at least 2 hardware breakpoints and 154 - * watchpoints. Providing that support to protected guests adds 155 - * considerable state and complexity. Therefore, the reserved value of 0 is 156 - * used for debug-related fields. 157 - */ 158 - #define PVM_ID_AA64DFR0_ALLOW (0ULL) 159 - #define PVM_ID_AA64DFR1_ALLOW (0ULL) 160 - 161 - /* 162 - * No support for implementation defined features. 163 - */ 164 - #define PVM_ID_AA64AFR0_ALLOW (0ULL) 165 - #define PVM_ID_AA64AFR1_ALLOW (0ULL) 166 - 167 - /* 168 - * No restrictions on instructions implemented in AArch64. 169 - */ 170 - #define PVM_ID_AA64ISAR0_ALLOW (\ 171 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \ 172 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \ 173 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \ 174 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \ 175 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \ 176 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \ 177 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \ 178 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \ 179 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \ 180 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \ 181 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \ 182 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \ 183 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \ 184 - ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ 185 - ) 186 - 187 - /* Restrict pointer authentication to the basic version. */ 188 - #define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\ 189 - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \ 190 - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \ 191 - ) 192 - 193 - #define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\ 194 - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \ 195 - ) 196 - 197 - #define PVM_ID_AA64ISAR1_ALLOW (\ 198 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ 199 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ 200 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ 201 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ 202 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ 203 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ 204 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ 205 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ 206 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ 207 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ 208 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ 209 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ 210 - ) 211 - 212 - #define PVM_ID_AA64ISAR2_ALLOW (\ 213 - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \ 214 - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ 215 - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \ 216 - ) 217 - 218 - u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); 219 - bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); 220 - bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); 221 - int kvm_check_pvm_sysreg_table(void); 222 - 223 - #endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
+3 -3
arch/arm64/kvm/hyp/include/nvhe/gfp.h
··· 7 7 #include <nvhe/memory.h> 8 8 #include <nvhe/spinlock.h> 9 9 10 - #define HYP_NO_ORDER USHRT_MAX 10 + #define HYP_NO_ORDER ((u8)(~0)) 11 11 12 12 struct hyp_pool { 13 13 /* ··· 19 19 struct list_head free_area[NR_PAGE_ORDERS]; 20 20 phys_addr_t range_start; 21 21 phys_addr_t range_end; 22 - unsigned short max_order; 22 + u8 max_order; 23 23 }; 24 24 25 25 /* Allocation */ 26 - void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); 26 + void *hyp_alloc_pages(struct hyp_pool *pool, u8 order); 27 27 void hyp_split_page(struct hyp_page *page); 28 28 void hyp_get_page(struct hyp_pool *pool, void *addr); 29 29 void hyp_put_page(struct hyp_pool *pool, void *addr);
+8 -31
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
··· 11 11 #include <asm/kvm_mmu.h> 12 12 #include <asm/kvm_pgtable.h> 13 13 #include <asm/virt.h> 14 + #include <nvhe/memory.h> 14 15 #include <nvhe/pkvm.h> 15 16 #include <nvhe/spinlock.h> 16 - 17 - /* 18 - * SW bits 0-1 are reserved to track the memory ownership state of each page: 19 - * 00: The page is owned exclusively by the page-table owner. 20 - * 01: The page is owned by the page-table owner, but is shared 21 - * with another entity. 22 - * 10: The page is shared with, but not owned by the page-table owner. 23 - * 11: Reserved for future use (lending). 24 - */ 25 - enum pkvm_page_state { 26 - PKVM_PAGE_OWNED = 0ULL, 27 - PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, 28 - PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, 29 - __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | 30 - KVM_PGTABLE_PROT_SW1, 31 - 32 - /* Meta-states which aren't encoded directly in the PTE's SW bits */ 33 - PKVM_NOPAGE, 34 - }; 35 - 36 - #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) 37 - static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, 38 - enum pkvm_page_state state) 39 - { 40 - return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; 41 - } 42 - 43 - static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) 44 - { 45 - return prot & PKVM_PAGE_STATE_PROT_MASK; 46 - } 47 17 48 18 struct host_mmu { 49 19 struct kvm_arch arch; ··· 39 69 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); 40 70 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages); 41 71 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); 72 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 73 + enum kvm_pgtable_prot prot); 74 + int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 75 + int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot); 76 + int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm); 77 + int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm); 78 + int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu); 42 79 43 80 bool addr_is_memory(phys_addr_t phys); 44 81 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
+47 -3
arch/arm64/kvm/hyp/include/nvhe/memory.h
··· 7 7 8 8 #include <linux/types.h> 9 9 10 + /* 11 + * Bits 0-1 are reserved to track the memory ownership state of each page: 12 + * 00: The page is owned exclusively by the page-table owner. 13 + * 01: The page is owned by the page-table owner, but is shared 14 + * with another entity. 15 + * 10: The page is shared with, but not owned by the page-table owner. 16 + * 11: Reserved for future use (lending). 17 + */ 18 + enum pkvm_page_state { 19 + PKVM_PAGE_OWNED = 0ULL, 20 + PKVM_PAGE_SHARED_OWNED = BIT(0), 21 + PKVM_PAGE_SHARED_BORROWED = BIT(1), 22 + __PKVM_PAGE_RESERVED = BIT(0) | BIT(1), 23 + 24 + /* Meta-states which aren't encoded directly in the PTE's SW bits */ 25 + PKVM_NOPAGE = BIT(2), 26 + }; 27 + #define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED) 28 + 29 + #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) 30 + static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, 31 + enum pkvm_page_state state) 32 + { 33 + prot &= ~PKVM_PAGE_STATE_PROT_MASK; 34 + prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state); 35 + return prot; 36 + } 37 + 38 + static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) 39 + { 40 + return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot); 41 + } 42 + 10 43 struct hyp_page { 11 - unsigned short refcount; 12 - unsigned short order; 44 + u16 refcount; 45 + u8 order; 46 + 47 + /* Host (non-meta) state. Guarded by the host stage-2 lock. */ 48 + enum pkvm_page_state host_state : 8; 49 + 50 + u32 host_share_guest_count; 13 51 }; 14 52 15 53 extern u64 __hyp_vmemmap; ··· 67 29 68 30 #define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) 69 31 #define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT)) 70 - #define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)]) 32 + 33 + static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys) 34 + { 35 + BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64)); 36 + return &hyp_vmemmap[hyp_phys_to_pfn(phys)]; 37 + } 38 + 71 39 #define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt)) 72 40 #define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt)) 73 41
+23
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
··· 20 20 21 21 /* Backpointer to the host's (untrusted) vCPU instance. */ 22 22 struct kvm_vcpu *host_vcpu; 23 + 24 + /* 25 + * If this hyp vCPU is loaded, then this is a backpointer to the 26 + * per-cpu pointer tracking us. Otherwise, NULL if not loaded. 27 + */ 28 + struct pkvm_hyp_vcpu **loaded_hyp_vcpu; 23 29 }; 24 30 25 31 /* ··· 53 47 struct pkvm_hyp_vcpu *vcpus[]; 54 48 }; 55 49 50 + extern hyp_spinlock_t vm_table_lock; 51 + 56 52 static inline struct pkvm_hyp_vm * 57 53 pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) 58 54 { ··· 64 56 static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu) 65 57 { 66 58 return vcpu_is_protected(&hyp_vcpu->vcpu); 59 + } 60 + 61 + static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm) 62 + { 63 + return kvm_vm_is_protected(&hyp_vm->kvm); 67 64 } 68 65 69 66 void pkvm_hyp_vm_table_init(void *tbl); ··· 82 69 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, 83 70 unsigned int vcpu_idx); 84 71 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); 72 + struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void); 73 + 74 + struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle); 75 + struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle); 76 + void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm); 77 + 78 + bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); 79 + bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); 80 + void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu); 81 + int kvm_check_pvm_sysreg_table(void); 85 82 86 83 #endif /* __ARM64_KVM_NVHE_PKVM_H__ */
+41 -33
arch/arm64/kvm/hyp/nvhe/debug-sr.c
··· 51 51 write_sysreg_el1(pmscr_el1, SYS_PMSCR); 52 52 } 53 53 54 - static void __debug_save_trace(u64 *trfcr_el1) 54 + static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr) 55 55 { 56 - *trfcr_el1 = 0; 57 - 58 - /* Check if the TRBE is enabled */ 59 - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E)) 60 - return; 61 - /* 62 - * Prohibit trace generation while we are in guest. 63 - * Since access to TRFCR_EL1 is trapped, the guest can't 64 - * modify the filtering set by the host. 65 - */ 66 - *trfcr_el1 = read_sysreg_el1(SYS_TRFCR); 67 - write_sysreg_el1(0, SYS_TRFCR); 68 - isb(); 69 - /* Drain the trace buffer to memory */ 70 - tsb_csync(); 56 + *saved_trfcr = read_sysreg_el1(SYS_TRFCR); 57 + write_sysreg_el1(new_trfcr, SYS_TRFCR); 71 58 } 72 59 73 - static void __debug_restore_trace(u64 trfcr_el1) 60 + static bool __trace_needs_drain(void) 74 61 { 75 - if (!trfcr_el1) 76 - return; 62 + if (is_protected_kvm_enabled() && host_data_test_flag(HAS_TRBE)) 63 + return read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E; 77 64 78 - /* Restore trace filter controls */ 79 - write_sysreg_el1(trfcr_el1, SYS_TRFCR); 65 + return host_data_test_flag(TRBE_ENABLED); 66 + } 67 + 68 + static bool __trace_needs_switch(void) 69 + { 70 + return host_data_test_flag(TRBE_ENABLED) || 71 + host_data_test_flag(EL1_TRACING_CONFIGURED); 72 + } 73 + 74 + static void __trace_switch_to_guest(void) 75 + { 76 + /* Unsupported with TRBE so disable */ 77 + if (host_data_test_flag(TRBE_ENABLED)) 78 + *host_data_ptr(trfcr_while_in_guest) = 0; 79 + 80 + __trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1), 81 + *host_data_ptr(trfcr_while_in_guest)); 82 + 83 + if (__trace_needs_drain()) { 84 + isb(); 85 + tsb_csync(); 86 + } 87 + } 88 + 89 + static void __trace_switch_to_host(void) 90 + { 91 + __trace_do_switch(host_data_ptr(trfcr_while_in_guest), 92 + *host_data_ptr(host_debug_state.trfcr_el1)); 80 93 } 81 94 82 95 void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) 83 96 { 84 97 /* Disable and flush SPE data generation */ 85 - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) 98 + if (host_data_test_flag(HAS_SPE)) 86 99 __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); 87 - /* Disable and flush Self-Hosted Trace generation */ 88 - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) 89 - __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); 100 + 101 + if (__trace_needs_switch()) 102 + __trace_switch_to_guest(); 90 103 } 91 104 92 105 void __debug_switch_to_guest(struct kvm_vcpu *vcpu) ··· 109 96 110 97 void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) 111 98 { 112 - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) 99 + if (host_data_test_flag(HAS_SPE)) 113 100 __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); 114 - if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) 115 - __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); 101 + if (__trace_needs_switch()) 102 + __trace_switch_to_host(); 116 103 } 117 104 118 105 void __debug_switch_to_host(struct kvm_vcpu *vcpu) 119 106 { 120 107 __debug_switch_to_host_common(vcpu); 121 - } 122 - 123 - u64 __kvm_get_mdcr_el2(void) 124 - { 125 - return read_sysreg(mdcr_el2); 126 108 }
+2 -2
arch/arm64/kvm/hyp/nvhe/host.S
··· 188 188 189 189 /* 190 190 * Test whether the SP has overflowed, without corrupting a GPR. 191 - * nVHE hypervisor stacks are aligned so that the PAGE_SHIFT bit 191 + * nVHE hypervisor stacks are aligned so that the NVHE_STACK_SHIFT bit 192 192 * of SP should always be 1. 193 193 */ 194 194 add sp, sp, x0 // sp' = sp + x0 195 195 sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp 196 - tbz x0, #PAGE_SHIFT, .L__hyp_sp_overflow\@ 196 + tbz x0, #NVHE_STACK_SHIFT, .L__hyp_sp_overflow\@ 197 197 sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 198 198 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp 199 199
+190 -19
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 103 103 /* Limit guest vector length to the maximum supported by the host. */ 104 104 hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); 105 105 106 - hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; 107 - 108 106 hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; 109 107 hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE); 110 108 hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) & 111 109 (HCR_TWI | HCR_TWE); 112 110 113 111 hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; 114 - 115 - hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); 116 112 117 113 hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; 118 114 ··· 137 141 host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; 138 142 } 139 143 144 + static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) 145 + { 146 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 147 + DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2); 148 + DECLARE_REG(u64, hcr_el2, host_ctxt, 3); 149 + struct pkvm_hyp_vcpu *hyp_vcpu; 150 + 151 + if (!is_protected_kvm_enabled()) 152 + return; 153 + 154 + hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx); 155 + if (!hyp_vcpu) 156 + return; 157 + 158 + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) { 159 + /* Propagate WFx trapping flags */ 160 + hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI); 161 + hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); 162 + } 163 + } 164 + 165 + static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt) 166 + { 167 + struct pkvm_hyp_vcpu *hyp_vcpu; 168 + 169 + if (!is_protected_kvm_enabled()) 170 + return; 171 + 172 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 173 + if (hyp_vcpu) 174 + pkvm_put_hyp_vcpu(hyp_vcpu); 175 + } 176 + 140 177 static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) 141 178 { 142 179 DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); 143 180 int ret; 144 181 145 - host_vcpu = kern_hyp_va(host_vcpu); 146 - 147 182 if (unlikely(is_protected_kvm_enabled())) { 148 - struct pkvm_hyp_vcpu *hyp_vcpu; 149 - struct kvm *host_kvm; 183 + struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 150 184 151 185 /* 152 186 * KVM (and pKVM) doesn't support SME guests for now, and ··· 189 163 goto out; 190 164 } 191 165 192 - host_kvm = kern_hyp_va(host_vcpu->kvm); 193 - hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, 194 - host_vcpu->vcpu_idx); 195 166 if (!hyp_vcpu) { 196 167 ret = -EINVAL; 197 168 goto out; ··· 199 176 ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); 200 177 201 178 sync_hyp_vcpu(hyp_vcpu); 202 - pkvm_put_hyp_vcpu(hyp_vcpu); 203 179 } else { 204 180 /* The host is fully trusted, run its vCPU directly. */ 205 - ret = __kvm_vcpu_run(host_vcpu); 181 + ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); 206 182 } 183 + out: 184 + cpu_reg(host_ctxt, 1) = ret; 185 + } 207 186 187 + static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu) 188 + { 189 + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; 190 + 191 + return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache, 192 + host_vcpu->arch.pkvm_memcache.nr_pages, 193 + &host_vcpu->arch.pkvm_memcache); 194 + } 195 + 196 + static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt) 197 + { 198 + DECLARE_REG(u64, pfn, host_ctxt, 1); 199 + DECLARE_REG(u64, gfn, host_ctxt, 2); 200 + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3); 201 + struct pkvm_hyp_vcpu *hyp_vcpu; 202 + int ret = -EINVAL; 203 + 204 + if (!is_protected_kvm_enabled()) 205 + goto out; 206 + 207 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 208 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 209 + goto out; 210 + 211 + ret = pkvm_refill_memcache(hyp_vcpu); 212 + if (ret) 213 + goto out; 214 + 215 + ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot); 216 + out: 217 + cpu_reg(host_ctxt, 1) = ret; 218 + } 219 + 220 + static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt) 221 + { 222 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 223 + DECLARE_REG(u64, gfn, host_ctxt, 2); 224 + struct pkvm_hyp_vm *hyp_vm; 225 + int ret = -EINVAL; 226 + 227 + if (!is_protected_kvm_enabled()) 228 + goto out; 229 + 230 + hyp_vm = get_np_pkvm_hyp_vm(handle); 231 + if (!hyp_vm) 232 + goto out; 233 + 234 + ret = __pkvm_host_unshare_guest(gfn, hyp_vm); 235 + put_pkvm_hyp_vm(hyp_vm); 236 + out: 237 + cpu_reg(host_ctxt, 1) = ret; 238 + } 239 + 240 + static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt) 241 + { 242 + DECLARE_REG(u64, gfn, host_ctxt, 1); 243 + DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2); 244 + struct pkvm_hyp_vcpu *hyp_vcpu; 245 + int ret = -EINVAL; 246 + 247 + if (!is_protected_kvm_enabled()) 248 + goto out; 249 + 250 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 251 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 252 + goto out; 253 + 254 + ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot); 255 + out: 256 + cpu_reg(host_ctxt, 1) = ret; 257 + } 258 + 259 + static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt) 260 + { 261 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 262 + DECLARE_REG(u64, gfn, host_ctxt, 2); 263 + struct pkvm_hyp_vm *hyp_vm; 264 + int ret = -EINVAL; 265 + 266 + if (!is_protected_kvm_enabled()) 267 + goto out; 268 + 269 + hyp_vm = get_np_pkvm_hyp_vm(handle); 270 + if (!hyp_vm) 271 + goto out; 272 + 273 + ret = __pkvm_host_wrprotect_guest(gfn, hyp_vm); 274 + put_pkvm_hyp_vm(hyp_vm); 275 + out: 276 + cpu_reg(host_ctxt, 1) = ret; 277 + } 278 + 279 + static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt) 280 + { 281 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 282 + DECLARE_REG(u64, gfn, host_ctxt, 2); 283 + DECLARE_REG(bool, mkold, host_ctxt, 3); 284 + struct pkvm_hyp_vm *hyp_vm; 285 + int ret = -EINVAL; 286 + 287 + if (!is_protected_kvm_enabled()) 288 + goto out; 289 + 290 + hyp_vm = get_np_pkvm_hyp_vm(handle); 291 + if (!hyp_vm) 292 + goto out; 293 + 294 + ret = __pkvm_host_test_clear_young_guest(gfn, mkold, hyp_vm); 295 + put_pkvm_hyp_vm(hyp_vm); 296 + out: 297 + cpu_reg(host_ctxt, 1) = ret; 298 + } 299 + 300 + static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt) 301 + { 302 + DECLARE_REG(u64, gfn, host_ctxt, 1); 303 + struct pkvm_hyp_vcpu *hyp_vcpu; 304 + int ret = -EINVAL; 305 + 306 + if (!is_protected_kvm_enabled()) 307 + goto out; 308 + 309 + hyp_vcpu = pkvm_get_loaded_hyp_vcpu(); 310 + if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 311 + goto out; 312 + 313 + ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu); 208 314 out: 209 315 cpu_reg(host_ctxt, 1) = ret; 210 316 } ··· 385 233 __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); 386 234 } 387 235 236 + static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) 237 + { 238 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 239 + struct pkvm_hyp_vm *hyp_vm; 240 + 241 + if (!is_protected_kvm_enabled()) 242 + return; 243 + 244 + hyp_vm = get_np_pkvm_hyp_vm(handle); 245 + if (!hyp_vm) 246 + return; 247 + 248 + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); 249 + put_pkvm_hyp_vm(hyp_vm); 250 + } 251 + 388 252 static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) 389 253 { 390 254 DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); ··· 430 262 static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) 431 263 { 432 264 __vgic_v3_init_lrs(); 433 - } 434 - 435 - static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) 436 - { 437 - cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); 438 265 } 439 266 440 267 static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) ··· 547 384 548 385 static const hcall_t host_hcall[] = { 549 386 /* ___kvm_hyp_init */ 550 - HANDLE_FUNC(__kvm_get_mdcr_el2), 551 387 HANDLE_FUNC(__pkvm_init), 552 388 HANDLE_FUNC(__pkvm_create_private_mapping), 553 389 HANDLE_FUNC(__pkvm_cpu_set_vector), ··· 557 395 558 396 HANDLE_FUNC(__pkvm_host_share_hyp), 559 397 HANDLE_FUNC(__pkvm_host_unshare_hyp), 398 + HANDLE_FUNC(__pkvm_host_share_guest), 399 + HANDLE_FUNC(__pkvm_host_unshare_guest), 400 + HANDLE_FUNC(__pkvm_host_relax_perms_guest), 401 + HANDLE_FUNC(__pkvm_host_wrprotect_guest), 402 + HANDLE_FUNC(__pkvm_host_test_clear_young_guest), 403 + HANDLE_FUNC(__pkvm_host_mkyoung_guest), 560 404 HANDLE_FUNC(__kvm_adjust_pc), 561 405 HANDLE_FUNC(__kvm_vcpu_run), 562 406 HANDLE_FUNC(__kvm_flush_vm_context), ··· 577 409 HANDLE_FUNC(__pkvm_init_vm), 578 410 HANDLE_FUNC(__pkvm_init_vcpu), 579 411 HANDLE_FUNC(__pkvm_teardown_vm), 412 + HANDLE_FUNC(__pkvm_vcpu_load), 413 + HANDLE_FUNC(__pkvm_vcpu_put), 414 + HANDLE_FUNC(__pkvm_tlb_flush_vmid), 580 415 }; 581 416 582 417 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+356 -584
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 201 201 202 202 memset(addr, 0, PAGE_SIZE); 203 203 p = hyp_virt_to_page(addr); 204 - memset(p, 0, sizeof(*p)); 205 204 p->refcount = 1; 205 + p->order = 0; 206 206 207 207 return addr; 208 208 } ··· 268 268 269 269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 270 { 271 + struct hyp_page *page; 271 272 void *addr; 272 273 273 274 /* Dump all pgtable pages in the hyp_pool */ ··· 280 279 /* Drain the hyp_pool into the memcache */ 281 280 addr = hyp_alloc_pages(&vm->pool, 0); 282 281 while (addr) { 283 - memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); 282 + page = hyp_virt_to_page(addr); 283 + page->refcount = 0; 284 + page->order = 0; 284 285 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 285 286 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 286 287 addr = hyp_alloc_pages(&vm->pool, 0); ··· 385 382 return !!find_mem_range(phys, &range); 386 383 } 387 384 388 - static bool addr_is_allowed_memory(phys_addr_t phys) 385 + static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 386 + { 387 + return range->start <= addr && addr < range->end; 388 + } 389 + 390 + static int check_range_allowed_memory(u64 start, u64 end) 389 391 { 390 392 struct memblock_region *reg; 391 393 struct kvm_mem_range range; 392 394 393 - reg = find_mem_range(phys, &range); 395 + /* 396 + * Callers can't check the state of a range that overlaps memory and 397 + * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. 398 + */ 399 + reg = find_mem_range(start, &range); 400 + if (!is_in_mem_range(end - 1, &range)) 401 + return -EINVAL; 394 402 395 - return reg && !(reg->flags & MEMBLOCK_NOMAP); 396 - } 403 + if (!reg || reg->flags & MEMBLOCK_NOMAP) 404 + return -EPERM; 397 405 398 - static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 399 - { 400 - return range->start <= addr && addr < range->end; 406 + return 0; 401 407 } 402 408 403 409 static bool range_is_memory(u64 start, u64 end) ··· 466 454 if (kvm_pte_valid(pte)) 467 455 return -EAGAIN; 468 456 469 - if (pte) 457 + if (pte) { 458 + WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); 470 459 return -EPERM; 460 + } 471 461 472 462 do { 473 463 u64 granule = kvm_granule_size(level); ··· 491 477 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 492 478 } 493 479 480 + static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) 481 + { 482 + phys_addr_t end = addr + size; 483 + 484 + for (; addr < end; addr += PAGE_SIZE) 485 + hyp_phys_to_page(addr)->host_state = state; 486 + } 487 + 494 488 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 495 489 { 496 - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 497 - addr, size, &host_s2_pool, owner_id); 490 + int ret; 491 + 492 + if (!addr_is_memory(addr)) 493 + return -EPERM; 494 + 495 + ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 496 + addr, size, &host_s2_pool, owner_id); 497 + if (ret) 498 + return ret; 499 + 500 + /* Don't forget to update the vmemmap tracking for the host */ 501 + if (owner_id == PKVM_ID_HOST) 502 + __host_update_page_state(addr, size, PKVM_PAGE_OWNED); 503 + else 504 + __host_update_page_state(addr, size, PKVM_NOPAGE); 505 + 506 + return 0; 498 507 } 499 508 500 509 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) ··· 583 546 BUG_ON(ret && ret != -EAGAIN); 584 547 } 585 548 586 - struct pkvm_mem_transition { 587 - u64 nr_pages; 588 - 589 - struct { 590 - enum pkvm_component_id id; 591 - /* Address in the initiator's address space */ 592 - u64 addr; 593 - 594 - union { 595 - struct { 596 - /* Address in the completer's address space */ 597 - u64 completer_addr; 598 - } host; 599 - struct { 600 - u64 completer_addr; 601 - } hyp; 602 - }; 603 - } initiator; 604 - 605 - struct { 606 - enum pkvm_component_id id; 607 - } completer; 608 - }; 609 - 610 - struct pkvm_mem_share { 611 - const struct pkvm_mem_transition tx; 612 - const enum kvm_pgtable_prot completer_prot; 613 - }; 614 - 615 - struct pkvm_mem_donation { 616 - const struct pkvm_mem_transition tx; 617 - }; 618 - 619 549 struct check_walk_data { 620 550 enum pkvm_page_state desired; 621 551 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); ··· 608 604 return kvm_pgtable_walk(pgt, addr, size, &walker); 609 605 } 610 606 611 - static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) 612 - { 613 - if (!addr_is_allowed_memory(addr)) 614 - return PKVM_NOPAGE; 615 - 616 - if (!kvm_pte_valid(pte) && pte) 617 - return PKVM_NOPAGE; 618 - 619 - return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 620 - } 621 - 622 607 static int __host_check_page_state_range(u64 addr, u64 size, 623 608 enum pkvm_page_state state) 624 609 { 625 - struct check_walk_data d = { 626 - .desired = state, 627 - .get_page_state = host_get_page_state, 628 - }; 610 + u64 end = addr + size; 611 + int ret; 612 + 613 + ret = check_range_allowed_memory(addr, end); 614 + if (ret) 615 + return ret; 629 616 630 617 hyp_assert_lock_held(&host_mmu.lock); 631 - return check_page_state_range(&host_mmu.pgt, addr, size, &d); 618 + for (; addr < end; addr += PAGE_SIZE) { 619 + if (hyp_phys_to_page(addr)->host_state != state) 620 + return -EPERM; 621 + } 622 + 623 + return 0; 632 624 } 633 625 634 626 static int __host_set_page_state_range(u64 addr, u64 size, 635 627 enum pkvm_page_state state) 636 628 { 637 - enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); 629 + if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { 630 + int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); 638 631 639 - return host_stage2_idmap_locked(addr, size, prot); 640 - } 632 + if (ret) 633 + return ret; 634 + } 641 635 642 - static int host_request_owned_transition(u64 *completer_addr, 643 - const struct pkvm_mem_transition *tx) 644 - { 645 - u64 size = tx->nr_pages * PAGE_SIZE; 646 - u64 addr = tx->initiator.addr; 636 + __host_update_page_state(addr, size, state); 647 637 648 - *completer_addr = tx->initiator.host.completer_addr; 649 - return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 650 - } 651 - 652 - static int host_request_unshare(u64 *completer_addr, 653 - const struct pkvm_mem_transition *tx) 654 - { 655 - u64 size = tx->nr_pages * PAGE_SIZE; 656 - u64 addr = tx->initiator.addr; 657 - 658 - *completer_addr = tx->initiator.host.completer_addr; 659 - return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 660 - } 661 - 662 - static int host_initiate_share(u64 *completer_addr, 663 - const struct pkvm_mem_transition *tx) 664 - { 665 - u64 size = tx->nr_pages * PAGE_SIZE; 666 - u64 addr = tx->initiator.addr; 667 - 668 - *completer_addr = tx->initiator.host.completer_addr; 669 - return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 670 - } 671 - 672 - static int host_initiate_unshare(u64 *completer_addr, 673 - const struct pkvm_mem_transition *tx) 674 - { 675 - u64 size = tx->nr_pages * PAGE_SIZE; 676 - u64 addr = tx->initiator.addr; 677 - 678 - *completer_addr = tx->initiator.host.completer_addr; 679 - return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); 680 - } 681 - 682 - static int host_initiate_donation(u64 *completer_addr, 683 - const struct pkvm_mem_transition *tx) 684 - { 685 - u8 owner_id = tx->completer.id; 686 - u64 size = tx->nr_pages * PAGE_SIZE; 687 - 688 - *completer_addr = tx->initiator.host.completer_addr; 689 - return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); 690 - } 691 - 692 - static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 693 - { 694 - return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 695 - tx->initiator.id != PKVM_ID_HYP); 696 - } 697 - 698 - static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, 699 - enum pkvm_page_state state) 700 - { 701 - u64 size = tx->nr_pages * PAGE_SIZE; 702 - 703 - if (__host_ack_skip_pgtable_check(tx)) 704 - return 0; 705 - 706 - return __host_check_page_state_range(addr, size, state); 707 - } 708 - 709 - static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 710 - { 711 - return __host_ack_transition(addr, tx, PKVM_NOPAGE); 712 - } 713 - 714 - static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) 715 - { 716 - u64 size = tx->nr_pages * PAGE_SIZE; 717 - u8 host_id = tx->completer.id; 718 - 719 - return host_stage2_set_owner_locked(addr, size, host_id); 638 + return 0; 720 639 } 721 640 722 641 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) ··· 662 735 return check_page_state_range(&pkvm_pgtable, addr, size, &d); 663 736 } 664 737 665 - static int hyp_request_donation(u64 *completer_addr, 666 - const struct pkvm_mem_transition *tx) 738 + static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) 667 739 { 668 - u64 size = tx->nr_pages * PAGE_SIZE; 669 - u64 addr = tx->initiator.addr; 740 + if (!kvm_pte_valid(pte)) 741 + return PKVM_NOPAGE; 670 742 671 - *completer_addr = tx->initiator.hyp.completer_addr; 672 - return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 743 + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 673 744 } 674 745 675 - static int hyp_initiate_donation(u64 *completer_addr, 676 - const struct pkvm_mem_transition *tx) 746 + static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, 747 + u64 size, enum pkvm_page_state state) 677 748 { 678 - u64 size = tx->nr_pages * PAGE_SIZE; 679 - int ret; 749 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 750 + struct check_walk_data d = { 751 + .desired = state, 752 + .get_page_state = guest_get_page_state, 753 + }; 680 754 681 - *completer_addr = tx->initiator.hyp.completer_addr; 682 - ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); 683 - return (ret != size) ? -EFAULT : 0; 684 - } 685 - 686 - static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 687 - { 688 - return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 689 - tx->initiator.id != PKVM_ID_HOST); 690 - } 691 - 692 - static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, 693 - enum kvm_pgtable_prot perms) 694 - { 695 - u64 size = tx->nr_pages * PAGE_SIZE; 696 - 697 - if (perms != PAGE_HYP) 698 - return -EPERM; 699 - 700 - if (__hyp_ack_skip_pgtable_check(tx)) 701 - return 0; 702 - 703 - return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 704 - } 705 - 706 - static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) 707 - { 708 - u64 size = tx->nr_pages * PAGE_SIZE; 709 - 710 - if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) 711 - return -EBUSY; 712 - 713 - return __hyp_check_page_state_range(addr, size, 714 - PKVM_PAGE_SHARED_BORROWED); 715 - } 716 - 717 - static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 718 - { 719 - u64 size = tx->nr_pages * PAGE_SIZE; 720 - 721 - if (__hyp_ack_skip_pgtable_check(tx)) 722 - return 0; 723 - 724 - return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 725 - } 726 - 727 - static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, 728 - enum kvm_pgtable_prot perms) 729 - { 730 - void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 731 - enum kvm_pgtable_prot prot; 732 - 733 - prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); 734 - return pkvm_create_mappings_locked(start, end, prot); 735 - } 736 - 737 - static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) 738 - { 739 - u64 size = tx->nr_pages * PAGE_SIZE; 740 - int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); 741 - 742 - return (ret != size) ? -EFAULT : 0; 743 - } 744 - 745 - static int hyp_complete_donation(u64 addr, 746 - const struct pkvm_mem_transition *tx) 747 - { 748 - void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 749 - enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 750 - 751 - return pkvm_create_mappings_locked(start, end, prot); 752 - } 753 - 754 - static int check_share(struct pkvm_mem_share *share) 755 - { 756 - const struct pkvm_mem_transition *tx = &share->tx; 757 - u64 completer_addr; 758 - int ret; 759 - 760 - switch (tx->initiator.id) { 761 - case PKVM_ID_HOST: 762 - ret = host_request_owned_transition(&completer_addr, tx); 763 - break; 764 - default: 765 - ret = -EINVAL; 766 - } 767 - 768 - if (ret) 769 - return ret; 770 - 771 - switch (tx->completer.id) { 772 - case PKVM_ID_HYP: 773 - ret = hyp_ack_share(completer_addr, tx, share->completer_prot); 774 - break; 775 - case PKVM_ID_FFA: 776 - /* 777 - * We only check the host; the secure side will check the other 778 - * end when we forward the FFA call. 779 - */ 780 - ret = 0; 781 - break; 782 - default: 783 - ret = -EINVAL; 784 - } 785 - 786 - return ret; 787 - } 788 - 789 - static int __do_share(struct pkvm_mem_share *share) 790 - { 791 - const struct pkvm_mem_transition *tx = &share->tx; 792 - u64 completer_addr; 793 - int ret; 794 - 795 - switch (tx->initiator.id) { 796 - case PKVM_ID_HOST: 797 - ret = host_initiate_share(&completer_addr, tx); 798 - break; 799 - default: 800 - ret = -EINVAL; 801 - } 802 - 803 - if (ret) 804 - return ret; 805 - 806 - switch (tx->completer.id) { 807 - case PKVM_ID_HYP: 808 - ret = hyp_complete_share(completer_addr, tx, share->completer_prot); 809 - break; 810 - case PKVM_ID_FFA: 811 - /* 812 - * We're not responsible for any secure page-tables, so there's 813 - * nothing to do here. 814 - */ 815 - ret = 0; 816 - break; 817 - default: 818 - ret = -EINVAL; 819 - } 820 - 821 - return ret; 822 - } 823 - 824 - /* 825 - * do_share(): 826 - * 827 - * The page owner grants access to another component with a given set 828 - * of permissions. 829 - * 830 - * Initiator: OWNED => SHARED_OWNED 831 - * Completer: NOPAGE => SHARED_BORROWED 832 - */ 833 - static int do_share(struct pkvm_mem_share *share) 834 - { 835 - int ret; 836 - 837 - ret = check_share(share); 838 - if (ret) 839 - return ret; 840 - 841 - return WARN_ON(__do_share(share)); 842 - } 843 - 844 - static int check_unshare(struct pkvm_mem_share *share) 845 - { 846 - const struct pkvm_mem_transition *tx = &share->tx; 847 - u64 completer_addr; 848 - int ret; 849 - 850 - switch (tx->initiator.id) { 851 - case PKVM_ID_HOST: 852 - ret = host_request_unshare(&completer_addr, tx); 853 - break; 854 - default: 855 - ret = -EINVAL; 856 - } 857 - 858 - if (ret) 859 - return ret; 860 - 861 - switch (tx->completer.id) { 862 - case PKVM_ID_HYP: 863 - ret = hyp_ack_unshare(completer_addr, tx); 864 - break; 865 - case PKVM_ID_FFA: 866 - /* See check_share() */ 867 - ret = 0; 868 - break; 869 - default: 870 - ret = -EINVAL; 871 - } 872 - 873 - return ret; 874 - } 875 - 876 - static int __do_unshare(struct pkvm_mem_share *share) 877 - { 878 - const struct pkvm_mem_transition *tx = &share->tx; 879 - u64 completer_addr; 880 - int ret; 881 - 882 - switch (tx->initiator.id) { 883 - case PKVM_ID_HOST: 884 - ret = host_initiate_unshare(&completer_addr, tx); 885 - break; 886 - default: 887 - ret = -EINVAL; 888 - } 889 - 890 - if (ret) 891 - return ret; 892 - 893 - switch (tx->completer.id) { 894 - case PKVM_ID_HYP: 895 - ret = hyp_complete_unshare(completer_addr, tx); 896 - break; 897 - case PKVM_ID_FFA: 898 - /* See __do_share() */ 899 - ret = 0; 900 - break; 901 - default: 902 - ret = -EINVAL; 903 - } 904 - 905 - return ret; 906 - } 907 - 908 - /* 909 - * do_unshare(): 910 - * 911 - * The page owner revokes access from another component for a range of 912 - * pages which were previously shared using do_share(). 913 - * 914 - * Initiator: SHARED_OWNED => OWNED 915 - * Completer: SHARED_BORROWED => NOPAGE 916 - */ 917 - static int do_unshare(struct pkvm_mem_share *share) 918 - { 919 - int ret; 920 - 921 - ret = check_unshare(share); 922 - if (ret) 923 - return ret; 924 - 925 - return WARN_ON(__do_unshare(share)); 926 - } 927 - 928 - static int check_donation(struct pkvm_mem_donation *donation) 929 - { 930 - const struct pkvm_mem_transition *tx = &donation->tx; 931 - u64 completer_addr; 932 - int ret; 933 - 934 - switch (tx->initiator.id) { 935 - case PKVM_ID_HOST: 936 - ret = host_request_owned_transition(&completer_addr, tx); 937 - break; 938 - case PKVM_ID_HYP: 939 - ret = hyp_request_donation(&completer_addr, tx); 940 - break; 941 - default: 942 - ret = -EINVAL; 943 - } 944 - 945 - if (ret) 946 - return ret; 947 - 948 - switch (tx->completer.id) { 949 - case PKVM_ID_HOST: 950 - ret = host_ack_donation(completer_addr, tx); 951 - break; 952 - case PKVM_ID_HYP: 953 - ret = hyp_ack_donation(completer_addr, tx); 954 - break; 955 - default: 956 - ret = -EINVAL; 957 - } 958 - 959 - return ret; 960 - } 961 - 962 - static int __do_donate(struct pkvm_mem_donation *donation) 963 - { 964 - const struct pkvm_mem_transition *tx = &donation->tx; 965 - u64 completer_addr; 966 - int ret; 967 - 968 - switch (tx->initiator.id) { 969 - case PKVM_ID_HOST: 970 - ret = host_initiate_donation(&completer_addr, tx); 971 - break; 972 - case PKVM_ID_HYP: 973 - ret = hyp_initiate_donation(&completer_addr, tx); 974 - break; 975 - default: 976 - ret = -EINVAL; 977 - } 978 - 979 - if (ret) 980 - return ret; 981 - 982 - switch (tx->completer.id) { 983 - case PKVM_ID_HOST: 984 - ret = host_complete_donation(completer_addr, tx); 985 - break; 986 - case PKVM_ID_HYP: 987 - ret = hyp_complete_donation(completer_addr, tx); 988 - break; 989 - default: 990 - ret = -EINVAL; 991 - } 992 - 993 - return ret; 994 - } 995 - 996 - /* 997 - * do_donate(): 998 - * 999 - * The page owner transfers ownership to another component, losing access 1000 - * as a consequence. 1001 - * 1002 - * Initiator: OWNED => NOPAGE 1003 - * Completer: NOPAGE => OWNED 1004 - */ 1005 - static int do_donate(struct pkvm_mem_donation *donation) 1006 - { 1007 - int ret; 1008 - 1009 - ret = check_donation(donation); 1010 - if (ret) 1011 - return ret; 1012 - 1013 - return WARN_ON(__do_donate(donation)); 755 + hyp_assert_lock_held(&vm->lock); 756 + return check_page_state_range(&vm->pgt, addr, size, &d); 1014 757 } 1015 758 1016 759 int __pkvm_host_share_hyp(u64 pfn) 1017 760 { 761 + u64 phys = hyp_pfn_to_phys(pfn); 762 + void *virt = __hyp_va(phys); 763 + enum kvm_pgtable_prot prot; 764 + u64 size = PAGE_SIZE; 1018 765 int ret; 1019 - u64 host_addr = hyp_pfn_to_phys(pfn); 1020 - u64 hyp_addr = (u64)__hyp_va(host_addr); 1021 - struct pkvm_mem_share share = { 1022 - .tx = { 1023 - .nr_pages = 1, 1024 - .initiator = { 1025 - .id = PKVM_ID_HOST, 1026 - .addr = host_addr, 1027 - .host = { 1028 - .completer_addr = hyp_addr, 1029 - }, 1030 - }, 1031 - .completer = { 1032 - .id = PKVM_ID_HYP, 1033 - }, 1034 - }, 1035 - .completer_prot = PAGE_HYP, 1036 - }; 1037 766 1038 767 host_lock_component(); 1039 768 hyp_lock_component(); 1040 769 1041 - ret = do_share(&share); 770 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 771 + if (ret) 772 + goto unlock; 773 + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 774 + ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 775 + if (ret) 776 + goto unlock; 777 + } 1042 778 779 + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 780 + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 781 + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); 782 + 783 + unlock: 1043 784 hyp_unlock_component(); 1044 785 host_unlock_component(); 1045 786 ··· 716 1121 717 1122 int __pkvm_host_unshare_hyp(u64 pfn) 718 1123 { 1124 + u64 phys = hyp_pfn_to_phys(pfn); 1125 + u64 virt = (u64)__hyp_va(phys); 1126 + u64 size = PAGE_SIZE; 719 1127 int ret; 720 - u64 host_addr = hyp_pfn_to_phys(pfn); 721 - u64 hyp_addr = (u64)__hyp_va(host_addr); 722 - struct pkvm_mem_share share = { 723 - .tx = { 724 - .nr_pages = 1, 725 - .initiator = { 726 - .id = PKVM_ID_HOST, 727 - .addr = host_addr, 728 - .host = { 729 - .completer_addr = hyp_addr, 730 - }, 731 - }, 732 - .completer = { 733 - .id = PKVM_ID_HYP, 734 - }, 735 - }, 736 - .completer_prot = PAGE_HYP, 737 - }; 738 1128 739 1129 host_lock_component(); 740 1130 hyp_lock_component(); 741 1131 742 - ret = do_unshare(&share); 1132 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 1133 + if (ret) 1134 + goto unlock; 1135 + ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); 1136 + if (ret) 1137 + goto unlock; 1138 + if (hyp_page_count((void *)virt)) { 1139 + ret = -EBUSY; 1140 + goto unlock; 1141 + } 743 1142 1143 + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 1144 + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); 1145 + 1146 + unlock: 744 1147 hyp_unlock_component(); 745 1148 host_unlock_component(); 746 1149 ··· 747 1154 748 1155 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) 749 1156 { 1157 + u64 phys = hyp_pfn_to_phys(pfn); 1158 + u64 size = PAGE_SIZE * nr_pages; 1159 + void *virt = __hyp_va(phys); 1160 + enum kvm_pgtable_prot prot; 750 1161 int ret; 751 - u64 host_addr = hyp_pfn_to_phys(pfn); 752 - u64 hyp_addr = (u64)__hyp_va(host_addr); 753 - struct pkvm_mem_donation donation = { 754 - .tx = { 755 - .nr_pages = nr_pages, 756 - .initiator = { 757 - .id = PKVM_ID_HOST, 758 - .addr = host_addr, 759 - .host = { 760 - .completer_addr = hyp_addr, 761 - }, 762 - }, 763 - .completer = { 764 - .id = PKVM_ID_HYP, 765 - }, 766 - }, 767 - }; 768 1162 769 1163 host_lock_component(); 770 1164 hyp_lock_component(); 771 1165 772 - ret = do_donate(&donation); 1166 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 1167 + if (ret) 1168 + goto unlock; 1169 + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 1170 + ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 1171 + if (ret) 1172 + goto unlock; 1173 + } 773 1174 1175 + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 1176 + WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 1177 + WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); 1178 + 1179 + unlock: 774 1180 hyp_unlock_component(); 775 1181 host_unlock_component(); 776 1182 ··· 778 1186 779 1187 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) 780 1188 { 1189 + u64 phys = hyp_pfn_to_phys(pfn); 1190 + u64 size = PAGE_SIZE * nr_pages; 1191 + u64 virt = (u64)__hyp_va(phys); 781 1192 int ret; 782 - u64 host_addr = hyp_pfn_to_phys(pfn); 783 - u64 hyp_addr = (u64)__hyp_va(host_addr); 784 - struct pkvm_mem_donation donation = { 785 - .tx = { 786 - .nr_pages = nr_pages, 787 - .initiator = { 788 - .id = PKVM_ID_HYP, 789 - .addr = hyp_addr, 790 - .hyp = { 791 - .completer_addr = host_addr, 792 - }, 793 - }, 794 - .completer = { 795 - .id = PKVM_ID_HOST, 796 - }, 797 - }, 798 - }; 799 1193 800 1194 host_lock_component(); 801 1195 hyp_lock_component(); 802 1196 803 - ret = do_donate(&donation); 1197 + ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); 1198 + if (ret) 1199 + goto unlock; 1200 + if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 1201 + ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); 1202 + if (ret) 1203 + goto unlock; 1204 + } 804 1205 1206 + WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 1207 + WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); 1208 + 1209 + unlock: 805 1210 hyp_unlock_component(); 806 1211 host_unlock_component(); 807 1212 ··· 852 1263 853 1264 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) 854 1265 { 1266 + u64 phys = hyp_pfn_to_phys(pfn); 1267 + u64 size = PAGE_SIZE * nr_pages; 855 1268 int ret; 856 - struct pkvm_mem_share share = { 857 - .tx = { 858 - .nr_pages = nr_pages, 859 - .initiator = { 860 - .id = PKVM_ID_HOST, 861 - .addr = hyp_pfn_to_phys(pfn), 862 - }, 863 - .completer = { 864 - .id = PKVM_ID_FFA, 865 - }, 866 - }, 867 - }; 868 1269 869 1270 host_lock_component(); 870 - ret = do_share(&share); 1271 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 1272 + if (!ret) 1273 + ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 871 1274 host_unlock_component(); 872 1275 873 1276 return ret; ··· 867 1286 868 1287 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) 869 1288 { 1289 + u64 phys = hyp_pfn_to_phys(pfn); 1290 + u64 size = PAGE_SIZE * nr_pages; 870 1291 int ret; 871 - struct pkvm_mem_share share = { 872 - .tx = { 873 - .nr_pages = nr_pages, 874 - .initiator = { 875 - .id = PKVM_ID_HOST, 876 - .addr = hyp_pfn_to_phys(pfn), 877 - }, 878 - .completer = { 879 - .id = PKVM_ID_FFA, 880 - }, 881 - }, 882 - }; 883 1292 884 1293 host_lock_component(); 885 - ret = do_unshare(&share); 1294 + ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 1295 + if (!ret) 1296 + ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED); 1297 + host_unlock_component(); 1298 + 1299 + return ret; 1300 + } 1301 + 1302 + int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 1303 + enum kvm_pgtable_prot prot) 1304 + { 1305 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1306 + u64 phys = hyp_pfn_to_phys(pfn); 1307 + u64 ipa = hyp_pfn_to_phys(gfn); 1308 + struct hyp_page *page; 1309 + int ret; 1310 + 1311 + if (prot & ~KVM_PGTABLE_PROT_RWX) 1312 + return -EINVAL; 1313 + 1314 + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 1315 + if (ret) 1316 + return ret; 1317 + 1318 + host_lock_component(); 1319 + guest_lock_component(vm); 1320 + 1321 + ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); 1322 + if (ret) 1323 + goto unlock; 1324 + 1325 + page = hyp_phys_to_page(phys); 1326 + switch (page->host_state) { 1327 + case PKVM_PAGE_OWNED: 1328 + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); 1329 + break; 1330 + case PKVM_PAGE_SHARED_OWNED: 1331 + if (page->host_share_guest_count) 1332 + break; 1333 + /* Only host to np-guest multi-sharing is tolerated */ 1334 + WARN_ON(1); 1335 + fallthrough; 1336 + default: 1337 + ret = -EPERM; 1338 + goto unlock; 1339 + } 1340 + 1341 + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, 1342 + pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), 1343 + &vcpu->vcpu.arch.pkvm_memcache, 0)); 1344 + page->host_share_guest_count++; 1345 + 1346 + unlock: 1347 + guest_unlock_component(vm); 1348 + host_unlock_component(); 1349 + 1350 + return ret; 1351 + } 1352 + 1353 + static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) 1354 + { 1355 + enum pkvm_page_state state; 1356 + struct hyp_page *page; 1357 + kvm_pte_t pte; 1358 + u64 phys; 1359 + s8 level; 1360 + int ret; 1361 + 1362 + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); 1363 + if (ret) 1364 + return ret; 1365 + if (level != KVM_PGTABLE_LAST_LEVEL) 1366 + return -E2BIG; 1367 + if (!kvm_pte_valid(pte)) 1368 + return -ENOENT; 1369 + 1370 + state = guest_get_page_state(pte, ipa); 1371 + if (state != PKVM_PAGE_SHARED_BORROWED) 1372 + return -EPERM; 1373 + 1374 + phys = kvm_pte_to_phys(pte); 1375 + ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 1376 + if (WARN_ON(ret)) 1377 + return ret; 1378 + 1379 + page = hyp_phys_to_page(phys); 1380 + if (page->host_state != PKVM_PAGE_SHARED_OWNED) 1381 + return -EPERM; 1382 + if (WARN_ON(!page->host_share_guest_count)) 1383 + return -EINVAL; 1384 + 1385 + *__phys = phys; 1386 + 1387 + return 0; 1388 + } 1389 + 1390 + int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1391 + { 1392 + u64 ipa = hyp_pfn_to_phys(gfn); 1393 + struct hyp_page *page; 1394 + u64 phys; 1395 + int ret; 1396 + 1397 + host_lock_component(); 1398 + guest_lock_component(vm); 1399 + 1400 + ret = __check_host_shared_guest(vm, &phys, ipa); 1401 + if (ret) 1402 + goto unlock; 1403 + 1404 + ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); 1405 + if (ret) 1406 + goto unlock; 1407 + 1408 + page = hyp_phys_to_page(phys); 1409 + page->host_share_guest_count--; 1410 + if (!page->host_share_guest_count) 1411 + WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); 1412 + 1413 + unlock: 1414 + guest_unlock_component(vm); 1415 + host_unlock_component(); 1416 + 1417 + return ret; 1418 + } 1419 + 1420 + int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) 1421 + { 1422 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1423 + u64 ipa = hyp_pfn_to_phys(gfn); 1424 + u64 phys; 1425 + int ret; 1426 + 1427 + if (prot & ~KVM_PGTABLE_PROT_RWX) 1428 + return -EINVAL; 1429 + 1430 + host_lock_component(); 1431 + guest_lock_component(vm); 1432 + 1433 + ret = __check_host_shared_guest(vm, &phys, ipa); 1434 + if (!ret) 1435 + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1436 + 1437 + guest_unlock_component(vm); 1438 + host_unlock_component(); 1439 + 1440 + return ret; 1441 + } 1442 + 1443 + int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1444 + { 1445 + u64 ipa = hyp_pfn_to_phys(gfn); 1446 + u64 phys; 1447 + int ret; 1448 + 1449 + host_lock_component(); 1450 + guest_lock_component(vm); 1451 + 1452 + ret = __check_host_shared_guest(vm, &phys, ipa); 1453 + if (!ret) 1454 + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); 1455 + 1456 + guest_unlock_component(vm); 1457 + host_unlock_component(); 1458 + 1459 + return ret; 1460 + } 1461 + 1462 + int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) 1463 + { 1464 + u64 ipa = hyp_pfn_to_phys(gfn); 1465 + u64 phys; 1466 + int ret; 1467 + 1468 + host_lock_component(); 1469 + guest_lock_component(vm); 1470 + 1471 + ret = __check_host_shared_guest(vm, &phys, ipa); 1472 + if (!ret) 1473 + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); 1474 + 1475 + guest_unlock_component(vm); 1476 + host_unlock_component(); 1477 + 1478 + return ret; 1479 + } 1480 + 1481 + int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) 1482 + { 1483 + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1484 + u64 ipa = hyp_pfn_to_phys(gfn); 1485 + u64 phys; 1486 + int ret; 1487 + 1488 + host_lock_component(); 1489 + guest_lock_component(vm); 1490 + 1491 + ret = __check_host_shared_guest(vm, &phys, ipa); 1492 + if (!ret) 1493 + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1494 + 1495 + guest_unlock_component(vm); 886 1496 host_unlock_component(); 887 1497 888 1498 return ret;
+6 -6
arch/arm64/kvm/hyp/nvhe/mm.c
··· 360 360 361 361 prev_base = __io_map_base; 362 362 /* 363 - * Efficient stack verification using the PAGE_SHIFT bit implies 363 + * Efficient stack verification using the NVHE_STACK_SHIFT bit implies 364 364 * an alignment of our allocation on the order of the size. 365 365 */ 366 - size = PAGE_SIZE * 2; 366 + size = NVHE_STACK_SIZE * 2; 367 367 addr = ALIGN(__io_map_base, size); 368 368 369 369 ret = __pkvm_alloc_private_va_range(addr, size); ··· 373 373 * at the higher address and leave the lower guard page 374 374 * unbacked. 375 375 * 376 - * Any valid stack address now has the PAGE_SHIFT bit as 1 376 + * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1 377 377 * and addresses corresponding to the guard page have the 378 - * PAGE_SHIFT bit as 0 - this is used for overflow detection. 378 + * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection. 379 379 */ 380 - ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE, 381 - PAGE_SIZE, phys, PAGE_HYP); 380 + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + NVHE_STACK_SIZE, 381 + NVHE_STACK_SIZE, phys, PAGE_HYP); 382 382 if (ret) 383 383 __io_map_base = prev_base; 384 384 }
+7 -7
arch/arm64/kvm/hyp/nvhe/page_alloc.c
··· 32 32 */ 33 33 static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, 34 34 struct hyp_page *p, 35 - unsigned short order) 35 + u8 order) 36 36 { 37 37 phys_addr_t addr = hyp_page_to_phys(p); 38 38 ··· 51 51 /* Find a buddy page currently available for allocation */ 52 52 static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, 53 53 struct hyp_page *p, 54 - unsigned short order) 54 + u8 order) 55 55 { 56 56 struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); 57 57 ··· 94 94 struct hyp_page *p) 95 95 { 96 96 phys_addr_t phys = hyp_page_to_phys(p); 97 - unsigned short order = p->order; 97 + u8 order = p->order; 98 98 struct hyp_page *buddy; 99 99 100 100 memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); ··· 129 129 130 130 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, 131 131 struct hyp_page *p, 132 - unsigned short order) 132 + u8 order) 133 133 { 134 134 struct hyp_page *buddy; 135 135 ··· 183 183 184 184 void hyp_split_page(struct hyp_page *p) 185 185 { 186 - unsigned short order = p->order; 186 + u8 order = p->order; 187 187 unsigned int i; 188 188 189 189 p->order = 0; ··· 195 195 } 196 196 } 197 197 198 - void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) 198 + void *hyp_alloc_pages(struct hyp_pool *pool, u8 order) 199 199 { 200 - unsigned short i = order; 201 200 struct hyp_page *p; 201 + u8 i = order; 202 202 203 203 hyp_spin_lock(&pool->lock); 204 204
+205 -231
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 9 9 10 10 #include <asm/kvm_emulate.h> 11 11 12 - #include <nvhe/fixed_config.h> 13 12 #include <nvhe/mem_protect.h> 14 13 #include <nvhe/memory.h> 15 14 #include <nvhe/pkvm.h> ··· 23 24 unsigned int kvm_host_sve_max_vl; 24 25 25 26 /* 26 - * Set trap register values based on features in ID_AA64PFR0. 27 + * The currently loaded hyp vCPU for each physical CPU. Used only when 28 + * protected KVM is enabled, but for both protected and non-protected VMs. 27 29 */ 28 - static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) 29 - { 30 - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); 31 - u64 hcr_set = HCR_RW; 32 - u64 hcr_clear = 0; 33 - u64 cptr_set = 0; 34 - u64 cptr_clear = 0; 35 - 36 - /* Protected KVM does not support AArch32 guests. */ 37 - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), 38 - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP); 39 - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), 40 - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP); 41 - 42 - /* 43 - * Linux guests assume support for floating-point and Advanced SIMD. Do 44 - * not change the trapping behavior for these from the KVM default. 45 - */ 46 - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), 47 - PVM_ID_AA64PFR0_ALLOW)); 48 - BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD), 49 - PVM_ID_AA64PFR0_ALLOW)); 50 - 51 - if (has_hvhe()) 52 - hcr_set |= HCR_E2H; 53 - 54 - /* Trap RAS unless all current versions are supported */ 55 - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) < 56 - ID_AA64PFR0_EL1_RAS_V1P1) { 57 - hcr_set |= HCR_TERR | HCR_TEA; 58 - hcr_clear |= HCR_FIEN; 59 - } 60 - 61 - /* Trap AMU */ 62 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { 63 - hcr_clear |= HCR_AMVOFFEN; 64 - cptr_set |= CPTR_EL2_TAM; 65 - } 66 - 67 - /* Trap SVE */ 68 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) { 69 - if (has_hvhe()) 70 - cptr_clear |= CPACR_EL1_ZEN; 71 - else 72 - cptr_set |= CPTR_EL2_TZ; 73 - } 74 - 75 - vcpu->arch.hcr_el2 |= hcr_set; 76 - vcpu->arch.hcr_el2 &= ~hcr_clear; 77 - vcpu->arch.cptr_el2 |= cptr_set; 78 - vcpu->arch.cptr_el2 &= ~cptr_clear; 79 - } 80 - 81 - /* 82 - * Set trap register values based on features in ID_AA64PFR1. 83 - */ 84 - static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) 85 - { 86 - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); 87 - u64 hcr_set = 0; 88 - u64 hcr_clear = 0; 89 - 90 - /* Memory Tagging: Trap and Treat as Untagged if not supported. */ 91 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) { 92 - hcr_set |= HCR_TID5; 93 - hcr_clear |= HCR_DCT | HCR_ATA; 94 - } 95 - 96 - vcpu->arch.hcr_el2 |= hcr_set; 97 - vcpu->arch.hcr_el2 &= ~hcr_clear; 98 - } 99 - 100 - /* 101 - * Set trap register values based on features in ID_AA64DFR0. 102 - */ 103 - static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) 104 - { 105 - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); 106 - u64 mdcr_set = 0; 107 - u64 mdcr_clear = 0; 108 - u64 cptr_set = 0; 109 - 110 - /* Trap/constrain PMU */ 111 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { 112 - mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; 113 - mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | 114 - MDCR_EL2_HPMN_MASK; 115 - } 116 - 117 - /* Trap Debug */ 118 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids)) 119 - mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; 120 - 121 - /* Trap OS Double Lock */ 122 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids)) 123 - mdcr_set |= MDCR_EL2_TDOSA; 124 - 125 - /* Trap SPE */ 126 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { 127 - mdcr_set |= MDCR_EL2_TPMS; 128 - mdcr_clear |= MDCR_EL2_E2PB_MASK; 129 - } 130 - 131 - /* Trap Trace Filter */ 132 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) 133 - mdcr_set |= MDCR_EL2_TTRF; 134 - 135 - /* Trap Trace */ 136 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) { 137 - if (has_hvhe()) 138 - cptr_set |= CPACR_EL1_TTA; 139 - else 140 - cptr_set |= CPTR_EL2_TTA; 141 - } 142 - 143 - /* Trap External Trace */ 144 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) 145 - mdcr_clear |= MDCR_EL2_E2TB_MASK; 146 - 147 - vcpu->arch.mdcr_el2 |= mdcr_set; 148 - vcpu->arch.mdcr_el2 &= ~mdcr_clear; 149 - vcpu->arch.cptr_el2 |= cptr_set; 150 - } 151 - 152 - /* 153 - * Set trap register values based on features in ID_AA64MMFR0. 154 - */ 155 - static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) 156 - { 157 - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); 158 - u64 mdcr_set = 0; 159 - 160 - /* Trap Debug Communications Channel registers */ 161 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids)) 162 - mdcr_set |= MDCR_EL2_TDCC; 163 - 164 - vcpu->arch.mdcr_el2 |= mdcr_set; 165 - } 166 - 167 - /* 168 - * Set trap register values based on features in ID_AA64MMFR1. 169 - */ 170 - static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) 171 - { 172 - const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); 173 - u64 hcr_set = 0; 174 - 175 - /* Trap LOR */ 176 - if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids)) 177 - hcr_set |= HCR_TLOR; 178 - 179 - vcpu->arch.hcr_el2 |= hcr_set; 180 - } 181 - 182 - /* 183 - * Set baseline trap register values. 184 - */ 185 - static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) 186 - { 187 - const u64 hcr_trap_feat_regs = HCR_TID3; 188 - const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; 189 - 190 - /* 191 - * Always trap: 192 - * - Feature id registers: to control features exposed to guests 193 - * - Implementation-defined features 194 - */ 195 - vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; 196 - 197 - /* Clear res0 and set res1 bits to trap potential new features. */ 198 - vcpu->arch.hcr_el2 &= ~(HCR_RES0); 199 - vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); 200 - if (!has_hvhe()) { 201 - vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; 202 - vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); 203 - } 204 - } 30 + static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); 205 31 206 32 static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu) 207 33 { ··· 53 229 54 230 if (vcpu_has_ptrauth(vcpu)) 55 231 vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); 232 + 233 + if (kvm_has_mte(vcpu->kvm)) 234 + vcpu->arch.hcr_el2 |= HCR_ATA; 235 + } 236 + 237 + static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu) 238 + { 239 + struct kvm *kvm = vcpu->kvm; 240 + u64 val = vcpu->arch.hcr_el2; 241 + 242 + /* No support for AArch32. */ 243 + val |= HCR_RW; 244 + 245 + /* 246 + * Always trap: 247 + * - Feature id registers: to control features exposed to guests 248 + * - Implementation-defined features 249 + */ 250 + val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1; 251 + 252 + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) { 253 + val |= HCR_TERR | HCR_TEA; 254 + val &= ~(HCR_FIEN); 255 + } 256 + 257 + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP)) 258 + val &= ~(HCR_AMVOFFEN); 259 + 260 + if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) { 261 + val |= HCR_TID5; 262 + val &= ~(HCR_DCT | HCR_ATA); 263 + } 264 + 265 + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP)) 266 + val |= HCR_TLOR; 267 + 268 + vcpu->arch.hcr_el2 = val; 269 + } 270 + 271 + static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu) 272 + { 273 + struct kvm *kvm = vcpu->kvm; 274 + u64 val = vcpu->arch.mdcr_el2; 275 + 276 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) { 277 + val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; 278 + val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK); 279 + } 280 + 281 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP)) 282 + val |= MDCR_EL2_TDRA | MDCR_EL2_TDA; 283 + 284 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP)) 285 + val |= MDCR_EL2_TDOSA; 286 + 287 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) { 288 + val |= MDCR_EL2_TPMS; 289 + val &= ~MDCR_EL2_E2PB_MASK; 290 + } 291 + 292 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP)) 293 + val |= MDCR_EL2_TTRF; 294 + 295 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP)) 296 + val |= MDCR_EL2_E2TB_MASK; 297 + 298 + /* Trap Debug Communications Channel registers */ 299 + if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP)) 300 + val |= MDCR_EL2_TDCC; 301 + 302 + vcpu->arch.mdcr_el2 = val; 303 + } 304 + 305 + /* 306 + * Check that cpu features that are neither trapped nor supported are not 307 + * enabled for protected VMs. 308 + */ 309 + static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu) 310 + { 311 + struct kvm *kvm = vcpu->kvm; 312 + 313 + /* Protected KVM does not support AArch32 guests. */ 314 + if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || 315 + kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) 316 + return -EINVAL; 317 + 318 + /* 319 + * Linux guests assume support for floating-point and Advanced SIMD. Do 320 + * not change the trapping behavior for these from the KVM default. 321 + */ 322 + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) || 323 + !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP)) 324 + return -EINVAL; 325 + 326 + /* No SME support in KVM right now. Check to catch if it changes. */ 327 + if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) 328 + return -EINVAL; 329 + 330 + return 0; 56 331 } 57 332 58 333 /* 59 334 * Initialize trap register values in protected mode. 60 335 */ 61 - static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) 336 + static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) 62 337 { 63 - vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu); 338 + struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 339 + int ret; 340 + 64 341 vcpu->arch.mdcr_el2 = 0; 65 342 66 343 pkvm_vcpu_reset_hcr(vcpu); 67 344 68 - if ((!vcpu_is_protected(vcpu))) 69 - return; 345 + if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu))) 346 + return 0; 70 347 71 - pvm_init_trap_regs(vcpu); 72 - pvm_init_traps_aa64pfr0(vcpu); 73 - pvm_init_traps_aa64pfr1(vcpu); 74 - pvm_init_traps_aa64dfr0(vcpu); 75 - pvm_init_traps_aa64mmfr0(vcpu); 76 - pvm_init_traps_aa64mmfr1(vcpu); 348 + ret = pkvm_check_pvm_cpu_features(vcpu); 349 + if (ret) 350 + return ret; 351 + 352 + pvm_init_traps_hcr(vcpu); 353 + pvm_init_traps_mdcr(vcpu); 354 + 355 + return 0; 77 356 } 78 357 79 358 /* ··· 197 270 198 271 /* 199 272 * Spinlock for protecting state related to the VM table. Protects writes 200 - * to 'vm_table' and 'nr_table_entries' as well as reads and writes to 201 - * 'last_hyp_vcpu_lookup'. 273 + * to 'vm_table', 'nr_table_entries', and other per-vm state on initialization. 274 + * Also protects reads and writes to 'last_hyp_vcpu_lookup'. 202 275 */ 203 - static DEFINE_HYP_SPINLOCK(vm_table_lock); 276 + DEFINE_HYP_SPINLOCK(vm_table_lock); 204 277 205 278 /* 206 279 * The table of VM entries for protected VMs in hyp. ··· 233 306 struct pkvm_hyp_vcpu *hyp_vcpu = NULL; 234 307 struct pkvm_hyp_vm *hyp_vm; 235 308 309 + /* Cannot load a new vcpu without putting the old one first. */ 310 + if (__this_cpu_read(loaded_hyp_vcpu)) 311 + return NULL; 312 + 236 313 hyp_spin_lock(&vm_table_lock); 237 314 hyp_vm = get_vm_by_handle(handle); 238 315 if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) 239 316 goto unlock; 240 317 241 318 hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; 319 + 320 + /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ 321 + if (unlikely(hyp_vcpu->loaded_hyp_vcpu)) { 322 + hyp_vcpu = NULL; 323 + goto unlock; 324 + } 325 + 326 + hyp_vcpu->loaded_hyp_vcpu = this_cpu_ptr(&loaded_hyp_vcpu); 242 327 hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); 243 328 unlock: 244 329 hyp_spin_unlock(&vm_table_lock); 330 + 331 + if (hyp_vcpu) 332 + __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu); 245 333 return hyp_vcpu; 246 334 } 247 335 ··· 265 323 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); 266 324 267 325 hyp_spin_lock(&vm_table_lock); 326 + hyp_vcpu->loaded_hyp_vcpu = NULL; 327 + __this_cpu_write(loaded_hyp_vcpu, NULL); 268 328 hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); 269 329 hyp_spin_unlock(&vm_table_lock); 330 + } 331 + 332 + struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void) 333 + { 334 + return __this_cpu_read(loaded_hyp_vcpu); 335 + 336 + } 337 + 338 + struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle) 339 + { 340 + struct pkvm_hyp_vm *hyp_vm; 341 + 342 + hyp_spin_lock(&vm_table_lock); 343 + hyp_vm = get_vm_by_handle(handle); 344 + if (hyp_vm) 345 + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); 346 + hyp_spin_unlock(&vm_table_lock); 347 + 348 + return hyp_vm; 349 + } 350 + 351 + void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm) 352 + { 353 + hyp_spin_lock(&vm_table_lock); 354 + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); 355 + hyp_spin_unlock(&vm_table_lock); 356 + } 357 + 358 + struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle) 359 + { 360 + struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle); 361 + 362 + if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) { 363 + put_pkvm_hyp_vm(hyp_vm); 364 + hyp_vm = NULL; 365 + } 366 + 367 + return hyp_vm; 270 368 } 271 369 272 370 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm) 273 371 { 274 372 struct kvm *kvm = &hyp_vm->kvm; 373 + unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags); 275 374 DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); 375 + 376 + if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags)) 377 + set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); 276 378 277 379 /* No restrictions for non-protected VMs. */ 278 380 if (!kvm_vm_is_protected(kvm)) { 381 + hyp_vm->kvm.arch.flags = host_arch_flags; 382 + 279 383 bitmap_copy(kvm->arch.vcpu_features, 280 384 host_kvm->arch.vcpu_features, 281 385 KVM_VCPU_MAX_FEATURES); ··· 330 342 331 343 bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); 332 344 333 - /* 334 - * For protected VMs, always allow: 335 - * - CPU starting in poweroff state 336 - * - PSCI v0.2 337 - */ 338 - set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features); 339 345 set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); 340 346 341 - /* 342 - * Check if remaining features are allowed: 343 - * - Performance Monitoring 344 - * - Scalable Vectors 345 - * - Pointer Authentication 346 - */ 347 - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW)) 347 + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PMU_V3)) 348 348 set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); 349 349 350 - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW)) 351 - set_bit(KVM_ARM_VCPU_SVE, allowed_features); 352 - 353 - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) && 354 - FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED)) 350 + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_ADDRESS)) 355 351 set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); 356 352 357 - if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) && 358 - FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW)) 353 + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_PTRAUTH_GENERIC)) 359 354 set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); 355 + 356 + if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE)) { 357 + set_bit(KVM_ARM_VCPU_SVE, allowed_features); 358 + kvm->arch.flags |= host_arch_flags & BIT(KVM_ARCH_FLAG_GUEST_HAS_SVE); 359 + } 360 360 361 361 bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, 362 362 allowed_features, KVM_VCPU_MAX_FEATURES); 363 - } 364 - 365 - static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu) 366 - { 367 - struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 368 - 369 - if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || 370 - vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) { 371 - kvm_vcpu_enable_ptrauth(vcpu); 372 - } else { 373 - vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH); 374 - } 375 363 } 376 364 377 365 static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) ··· 372 408 hyp_vm->kvm.created_vcpus = nr_vcpus; 373 409 hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; 374 410 hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); 411 + hyp_vm->kvm.arch.flags = 0; 375 412 pkvm_init_features_from_host(hyp_vm, host_kvm); 376 413 } 377 414 ··· 380 415 { 381 416 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 382 417 383 - if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { 384 - vcpu_clear_flag(vcpu, GUEST_HAS_SVE); 418 + if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) 385 419 vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); 386 - } 387 420 } 388 421 389 422 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, ··· 409 446 hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); 410 447 hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; 411 448 449 + if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) 450 + kvm_init_pvm_id_regs(&hyp_vcpu->vcpu); 451 + 452 + ret = pkvm_vcpu_init_traps(hyp_vcpu); 453 + if (ret) 454 + goto done; 455 + 412 456 pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); 413 - pkvm_vcpu_init_ptrauth(hyp_vcpu); 414 - pkvm_vcpu_init_traps(&hyp_vcpu->vcpu); 415 457 done: 416 458 if (ret) 417 459 unpin_host_vcpu(host_vcpu); ··· 661 693 return ret; 662 694 } 663 695 664 - hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu); 665 - 666 696 return 0; 667 697 } 668 698 ··· 712 746 /* Push the metadata pages to the teardown memcache */ 713 747 for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { 714 748 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; 749 + struct kvm_hyp_memcache *vcpu_mc = &hyp_vcpu->vcpu.arch.pkvm_memcache; 750 + 751 + while (vcpu_mc->nr_pages) { 752 + void *addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt); 753 + 754 + push_hyp_memcache(mc, addr, hyp_virt_to_phys); 755 + unmap_donated_memory_noclear(addr, PAGE_SIZE); 756 + } 715 757 716 758 teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); 717 759 }
+3 -5
arch/arm64/kvm/hyp/nvhe/setup.c
··· 12 12 13 13 #include <nvhe/early_alloc.h> 14 14 #include <nvhe/ffa.h> 15 - #include <nvhe/fixed_config.h> 16 15 #include <nvhe/gfp.h> 17 16 #include <nvhe/memory.h> 18 17 #include <nvhe/mem_protect.h> ··· 179 180 static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, 180 181 enum kvm_pgtable_walk_flags visit) 181 182 { 182 - enum kvm_pgtable_prot prot; 183 183 enum pkvm_page_state state; 184 184 phys_addr_t phys; 185 185 ··· 201 203 case PKVM_PAGE_OWNED: 202 204 return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); 203 205 case PKVM_PAGE_SHARED_OWNED: 204 - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); 206 + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED; 205 207 break; 206 208 case PKVM_PAGE_SHARED_BORROWED: 207 - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); 209 + hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED; 208 210 break; 209 211 default: 210 212 return -EINVAL; 211 213 } 212 214 213 - return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); 215 + return 0; 214 216 } 215 217 216 218 static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,
+2 -2
arch/arm64/kvm/hyp/nvhe/stacktrace.c
··· 28 28 struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info); 29 29 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 30 30 31 - stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE); 31 + stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - NVHE_STACK_SIZE); 32 32 stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack); 33 33 stacktrace_info->fp = fp; 34 34 stacktrace_info->pc = pc; ··· 54 54 { 55 55 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 56 56 unsigned long high = params->stack_hyp_va; 57 - unsigned long low = high - PAGE_SIZE; 57 + unsigned long low = high - NVHE_STACK_SIZE; 58 58 59 59 return (struct stack_info) { 60 60 .low = low,
+32 -20
arch/arm64/kvm/hyp/nvhe/switch.c
··· 26 26 #include <asm/debug-monitors.h> 27 27 #include <asm/processor.h> 28 28 29 - #include <nvhe/fixed_config.h> 30 29 #include <nvhe/mem_protect.h> 31 30 32 31 /* Non-VHE specific context */ ··· 35 36 36 37 extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); 37 38 38 - static void __activate_traps(struct kvm_vcpu *vcpu) 39 + static void __activate_cptr_traps(struct kvm_vcpu *vcpu) 39 40 { 40 - u64 val; 41 + u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ 41 42 42 - ___activate_traps(vcpu, vcpu->arch.hcr_el2); 43 - __activate_traps_common(vcpu); 43 + if (has_hvhe()) { 44 + val |= CPACR_EL1_TTA; 44 45 45 - val = vcpu->arch.cptr_el2; 46 - val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */ 47 - val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA; 48 - if (cpus_have_final_cap(ARM64_SME)) { 49 - if (has_hvhe()) 50 - val &= ~CPACR_EL1_SMEN; 51 - else 52 - val |= CPTR_EL2_TSM; 46 + if (guest_owns_fp_regs()) { 47 + val |= CPACR_EL1_FPEN; 48 + if (vcpu_has_sve(vcpu)) 49 + val |= CPACR_EL1_ZEN; 50 + } 51 + } else { 52 + val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; 53 + 54 + /* 55 + * Always trap SME since it's not supported in KVM. 56 + * TSM is RES1 if SME isn't implemented. 57 + */ 58 + val |= CPTR_EL2_TSM; 59 + 60 + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) 61 + val |= CPTR_EL2_TZ; 62 + 63 + if (!guest_owns_fp_regs()) 64 + val |= CPTR_EL2_TFP; 53 65 } 54 66 55 - if (!guest_owns_fp_regs()) { 56 - if (has_hvhe()) 57 - val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); 58 - else 59 - val |= CPTR_EL2_TFP | CPTR_EL2_TZ; 60 - 67 + if (!guest_owns_fp_regs()) 61 68 __activate_traps_fpsimd32(vcpu); 62 - } 63 69 64 70 kvm_write_cptr_el2(val); 71 + } 72 + 73 + static void __activate_traps(struct kvm_vcpu *vcpu) 74 + { 75 + ___activate_traps(vcpu, vcpu->arch.hcr_el2); 76 + __activate_traps_common(vcpu); 77 + __activate_cptr_traps(vcpu); 78 + 65 79 write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); 66 80 67 81 if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
+250 -200
arch/arm64/kvm/hyp/nvhe/sys_regs.c
··· 11 11 12 12 #include <hyp/adjust_pc.h> 13 13 14 - #include <nvhe/fixed_config.h> 14 + #include <nvhe/pkvm.h> 15 15 16 16 #include "../../sys_regs.h" 17 17 ··· 27 27 u64 id_aa64mmfr1_el1_sys_val; 28 28 u64 id_aa64mmfr2_el1_sys_val; 29 29 u64 id_aa64smfr0_el1_sys_val; 30 + 31 + struct pvm_ftr_bits { 32 + bool sign; 33 + u8 shift; 34 + u8 width; 35 + u8 max_val; 36 + bool (*vm_supported)(const struct kvm *kvm); 37 + }; 38 + 39 + #define __MAX_FEAT_FUNC(id, fld, max, func, sgn) \ 40 + { \ 41 + .sign = sgn, \ 42 + .shift = id##_##fld##_SHIFT, \ 43 + .width = id##_##fld##_WIDTH, \ 44 + .max_val = id##_##fld##_##max, \ 45 + .vm_supported = func, \ 46 + } 47 + 48 + #define MAX_FEAT_FUNC(id, fld, max, func) \ 49 + __MAX_FEAT_FUNC(id, fld, max, func, id##_##fld##_SIGNED) 50 + 51 + #define MAX_FEAT(id, fld, max) \ 52 + MAX_FEAT_FUNC(id, fld, max, NULL) 53 + 54 + #define MAX_FEAT_ENUM(id, fld, max) \ 55 + __MAX_FEAT_FUNC(id, fld, max, NULL, false) 56 + 57 + #define FEAT_END { .width = 0, } 58 + 59 + static bool vm_has_ptrauth(const struct kvm *kvm) 60 + { 61 + if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) 62 + return false; 63 + 64 + return (cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || 65 + cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && 66 + kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC); 67 + } 68 + 69 + static bool vm_has_sve(const struct kvm *kvm) 70 + { 71 + return system_supports_sve() && kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_SVE); 72 + } 73 + 74 + /* 75 + * Definitions for features to be allowed or restricted for protected guests. 76 + * 77 + * Each field in the masks represents the highest supported value for the 78 + * feature. If a feature field is not present, it is not supported. Moreover, 79 + * these are used to generate the guest's view of the feature registers. 80 + * 81 + * The approach for protected VMs is to at least support features that are: 82 + * - Needed by common Linux distributions (e.g., floating point) 83 + * - Trivial to support, e.g., supporting the feature does not introduce or 84 + * require tracking of additional state in KVM 85 + * - Cannot be trapped or prevent the guest from using anyway 86 + */ 87 + 88 + static const struct pvm_ftr_bits pvmid_aa64pfr0[] = { 89 + MAX_FEAT(ID_AA64PFR0_EL1, EL0, IMP), 90 + MAX_FEAT(ID_AA64PFR0_EL1, EL1, IMP), 91 + MAX_FEAT(ID_AA64PFR0_EL1, EL2, IMP), 92 + MAX_FEAT(ID_AA64PFR0_EL1, EL3, IMP), 93 + MAX_FEAT(ID_AA64PFR0_EL1, FP, FP16), 94 + MAX_FEAT(ID_AA64PFR0_EL1, AdvSIMD, FP16), 95 + MAX_FEAT(ID_AA64PFR0_EL1, GIC, IMP), 96 + MAX_FEAT_FUNC(ID_AA64PFR0_EL1, SVE, IMP, vm_has_sve), 97 + MAX_FEAT(ID_AA64PFR0_EL1, RAS, IMP), 98 + MAX_FEAT(ID_AA64PFR0_EL1, DIT, IMP), 99 + MAX_FEAT(ID_AA64PFR0_EL1, CSV2, IMP), 100 + MAX_FEAT(ID_AA64PFR0_EL1, CSV3, IMP), 101 + FEAT_END 102 + }; 103 + 104 + static const struct pvm_ftr_bits pvmid_aa64pfr1[] = { 105 + MAX_FEAT(ID_AA64PFR1_EL1, BT, IMP), 106 + MAX_FEAT(ID_AA64PFR1_EL1, SSBS, SSBS2), 107 + MAX_FEAT_ENUM(ID_AA64PFR1_EL1, MTE_frac, NI), 108 + FEAT_END 109 + }; 110 + 111 + static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = { 112 + MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40), 113 + MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16), 114 + MAX_FEAT(ID_AA64MMFR0_EL1, BIGEND, IMP), 115 + MAX_FEAT(ID_AA64MMFR0_EL1, SNSMEM, IMP), 116 + MAX_FEAT(ID_AA64MMFR0_EL1, BIGENDEL0, IMP), 117 + MAX_FEAT(ID_AA64MMFR0_EL1, EXS, IMP), 118 + FEAT_END 119 + }; 120 + 121 + static const struct pvm_ftr_bits pvmid_aa64mmfr1[] = { 122 + MAX_FEAT(ID_AA64MMFR1_EL1, HAFDBS, DBM), 123 + MAX_FEAT_ENUM(ID_AA64MMFR1_EL1, VMIDBits, 16), 124 + MAX_FEAT(ID_AA64MMFR1_EL1, HPDS, HPDS2), 125 + MAX_FEAT(ID_AA64MMFR1_EL1, PAN, PAN3), 126 + MAX_FEAT(ID_AA64MMFR1_EL1, SpecSEI, IMP), 127 + MAX_FEAT(ID_AA64MMFR1_EL1, ETS, IMP), 128 + MAX_FEAT(ID_AA64MMFR1_EL1, CMOW, IMP), 129 + FEAT_END 130 + }; 131 + 132 + static const struct pvm_ftr_bits pvmid_aa64mmfr2[] = { 133 + MAX_FEAT(ID_AA64MMFR2_EL1, CnP, IMP), 134 + MAX_FEAT(ID_AA64MMFR2_EL1, UAO, IMP), 135 + MAX_FEAT(ID_AA64MMFR2_EL1, IESB, IMP), 136 + MAX_FEAT(ID_AA64MMFR2_EL1, AT, IMP), 137 + MAX_FEAT_ENUM(ID_AA64MMFR2_EL1, IDS, 0x18), 138 + MAX_FEAT(ID_AA64MMFR2_EL1, TTL, IMP), 139 + MAX_FEAT(ID_AA64MMFR2_EL1, BBM, 2), 140 + MAX_FEAT(ID_AA64MMFR2_EL1, E0PD, IMP), 141 + FEAT_END 142 + }; 143 + 144 + static const struct pvm_ftr_bits pvmid_aa64isar1[] = { 145 + MAX_FEAT(ID_AA64ISAR1_EL1, DPB, DPB2), 146 + MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, APA, PAuth, vm_has_ptrauth), 147 + MAX_FEAT_FUNC(ID_AA64ISAR1_EL1, API, PAuth, vm_has_ptrauth), 148 + MAX_FEAT(ID_AA64ISAR1_EL1, JSCVT, IMP), 149 + MAX_FEAT(ID_AA64ISAR1_EL1, FCMA, IMP), 150 + MAX_FEAT(ID_AA64ISAR1_EL1, LRCPC, LRCPC3), 151 + MAX_FEAT(ID_AA64ISAR1_EL1, GPA, IMP), 152 + MAX_FEAT(ID_AA64ISAR1_EL1, GPI, IMP), 153 + MAX_FEAT(ID_AA64ISAR1_EL1, FRINTTS, IMP), 154 + MAX_FEAT(ID_AA64ISAR1_EL1, SB, IMP), 155 + MAX_FEAT(ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX), 156 + MAX_FEAT(ID_AA64ISAR1_EL1, BF16, EBF16), 157 + MAX_FEAT(ID_AA64ISAR1_EL1, DGH, IMP), 158 + MAX_FEAT(ID_AA64ISAR1_EL1, I8MM, IMP), 159 + FEAT_END 160 + }; 161 + 162 + static const struct pvm_ftr_bits pvmid_aa64isar2[] = { 163 + MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, GPA3, IMP, vm_has_ptrauth), 164 + MAX_FEAT_FUNC(ID_AA64ISAR2_EL1, APA3, PAuth, vm_has_ptrauth), 165 + MAX_FEAT(ID_AA64ISAR2_EL1, ATS1A, IMP), 166 + FEAT_END 167 + }; 168 + 169 + /* 170 + * None of the features in ID_AA64DFR0_EL1 nor ID_AA64MMFR4_EL1 are supported. 171 + * However, both have Not-Implemented values that are non-zero. Define them 172 + * so they can be used when getting the value of these registers. 173 + */ 174 + #define ID_AA64DFR0_EL1_NONZERO_NI \ 175 + ( \ 176 + SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI) | \ 177 + SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, MTPMU, NI) \ 178 + ) 179 + 180 + #define ID_AA64MMFR4_EL1_NONZERO_NI \ 181 + SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI) 182 + 183 + /* 184 + * Returns the value of the feature registers based on the system register 185 + * value, the vcpu support for the revelant features, and the additional 186 + * restrictions for protected VMs. 187 + */ 188 + static u64 get_restricted_features(const struct kvm_vcpu *vcpu, 189 + u64 sys_reg_val, 190 + const struct pvm_ftr_bits restrictions[]) 191 + { 192 + u64 val = 0UL; 193 + int i; 194 + 195 + for (i = 0; restrictions[i].width != 0; i++) { 196 + bool (*vm_supported)(const struct kvm *) = restrictions[i].vm_supported; 197 + bool sign = restrictions[i].sign; 198 + int shift = restrictions[i].shift; 199 + int width = restrictions[i].width; 200 + u64 min_signed = (1UL << width) - 1UL; 201 + u64 sign_bit = 1UL << (width - 1); 202 + u64 mask = GENMASK_ULL(width + shift - 1, shift); 203 + u64 sys_val = (sys_reg_val & mask) >> shift; 204 + u64 pvm_max = restrictions[i].max_val; 205 + 206 + if (vm_supported && !vm_supported(vcpu->kvm)) 207 + val |= (sign ? min_signed : 0) << shift; 208 + else if (sign && (sys_val >= sign_bit || pvm_max >= sign_bit)) 209 + val |= max(sys_val, pvm_max) << shift; 210 + else 211 + val |= min(sys_val, pvm_max) << shift; 212 + } 213 + 214 + return val; 215 + } 216 + 217 + static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id) 218 + { 219 + switch (id) { 220 + case SYS_ID_AA64PFR0_EL1: 221 + return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0); 222 + case SYS_ID_AA64PFR1_EL1: 223 + return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1); 224 + case SYS_ID_AA64ISAR0_EL1: 225 + return id_aa64isar0_el1_sys_val; 226 + case SYS_ID_AA64ISAR1_EL1: 227 + return get_restricted_features(vcpu, id_aa64isar1_el1_sys_val, pvmid_aa64isar1); 228 + case SYS_ID_AA64ISAR2_EL1: 229 + return get_restricted_features(vcpu, id_aa64isar2_el1_sys_val, pvmid_aa64isar2); 230 + case SYS_ID_AA64MMFR0_EL1: 231 + return get_restricted_features(vcpu, id_aa64mmfr0_el1_sys_val, pvmid_aa64mmfr0); 232 + case SYS_ID_AA64MMFR1_EL1: 233 + return get_restricted_features(vcpu, id_aa64mmfr1_el1_sys_val, pvmid_aa64mmfr1); 234 + case SYS_ID_AA64MMFR2_EL1: 235 + return get_restricted_features(vcpu, id_aa64mmfr2_el1_sys_val, pvmid_aa64mmfr2); 236 + case SYS_ID_AA64DFR0_EL1: 237 + return ID_AA64DFR0_EL1_NONZERO_NI; 238 + case SYS_ID_AA64MMFR4_EL1: 239 + return ID_AA64MMFR4_EL1_NONZERO_NI; 240 + default: 241 + /* Unhandled ID register, RAZ */ 242 + return 0; 243 + } 244 + } 30 245 31 246 /* 32 247 * Inject an unknown/undefined exception to an AArch64 guest while most of its ··· 264 49 write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); 265 50 } 266 51 267 - /* 268 - * Returns the restricted features values of the feature register based on the 269 - * limitations in restrict_fields. 270 - * A feature id field value of 0b0000 does not impose any restrictions. 271 - * Note: Use only for unsigned feature field values. 272 - */ 273 - static u64 get_restricted_features_unsigned(u64 sys_reg_val, 274 - u64 restrict_fields) 275 - { 276 - u64 value = 0UL; 277 - u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); 278 - 279 - /* 280 - * According to the Arm Architecture Reference Manual, feature fields 281 - * use increasing values to indicate increases in functionality. 282 - * Iterate over the restricted feature fields and calculate the minimum 283 - * unsigned value between the one supported by the system, and what the 284 - * value is being restricted to. 285 - */ 286 - while (sys_reg_val && restrict_fields) { 287 - value |= min(sys_reg_val & mask, restrict_fields & mask); 288 - sys_reg_val &= ~mask; 289 - restrict_fields &= ~mask; 290 - mask <<= ARM64_FEATURE_FIELD_BITS; 291 - } 292 - 293 - return value; 294 - } 295 - 296 - /* 297 - * Functions that return the value of feature id registers for protected VMs 298 - * based on allowed features, system features, and KVM support. 299 - */ 300 - 301 - static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) 302 - { 303 - u64 set_mask = 0; 304 - u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; 305 - 306 - set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, 307 - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); 308 - 309 - return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; 310 - } 311 - 312 - static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) 313 - { 314 - const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); 315 - u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; 316 - 317 - if (!kvm_has_mte(kvm)) 318 - allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); 319 - 320 - return id_aa64pfr1_el1_sys_val & allow_mask; 321 - } 322 - 323 - static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) 324 - { 325 - /* 326 - * No support for Scalable Vectors, therefore, hyp has no sanitized 327 - * copy of the feature id register. 328 - */ 329 - BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL); 330 - return 0; 331 - } 332 - 333 - static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) 334 - { 335 - /* 336 - * No support for debug, including breakpoints, and watchpoints, 337 - * therefore, pKVM has no sanitized copy of the feature id register. 338 - */ 339 - BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL); 340 - return 0; 341 - } 342 - 343 - static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) 344 - { 345 - /* 346 - * No support for debug, therefore, hyp has no sanitized copy of the 347 - * feature id register. 348 - */ 349 - BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL); 350 - return 0; 351 - } 352 - 353 - static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) 354 - { 355 - /* 356 - * No support for implementation defined features, therefore, hyp has no 357 - * sanitized copy of the feature id register. 358 - */ 359 - BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL); 360 - return 0; 361 - } 362 - 363 - static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) 364 - { 365 - /* 366 - * No support for implementation defined features, therefore, hyp has no 367 - * sanitized copy of the feature id register. 368 - */ 369 - BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL); 370 - return 0; 371 - } 372 - 373 - static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) 374 - { 375 - return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; 376 - } 377 - 378 - static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) 379 - { 380 - u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; 381 - 382 - if (!vcpu_has_ptrauth(vcpu)) 383 - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | 384 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | 385 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | 386 - ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI)); 387 - 388 - return id_aa64isar1_el1_sys_val & allow_mask; 389 - } 390 - 391 - static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) 392 - { 393 - u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; 394 - 395 - if (!vcpu_has_ptrauth(vcpu)) 396 - allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | 397 - ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); 398 - 399 - return id_aa64isar2_el1_sys_val & allow_mask; 400 - } 401 - 402 - static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) 403 - { 404 - u64 set_mask; 405 - 406 - set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val, 407 - PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED); 408 - 409 - return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; 410 - } 411 - 412 - static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) 413 - { 414 - return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; 415 - } 416 - 417 - static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) 418 - { 419 - return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; 420 - } 421 - 422 - /* Read a sanitized cpufeature ID register by its encoding */ 423 - u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) 424 - { 425 - switch (id) { 426 - case SYS_ID_AA64PFR0_EL1: 427 - return get_pvm_id_aa64pfr0(vcpu); 428 - case SYS_ID_AA64PFR1_EL1: 429 - return get_pvm_id_aa64pfr1(vcpu); 430 - case SYS_ID_AA64ZFR0_EL1: 431 - return get_pvm_id_aa64zfr0(vcpu); 432 - case SYS_ID_AA64DFR0_EL1: 433 - return get_pvm_id_aa64dfr0(vcpu); 434 - case SYS_ID_AA64DFR1_EL1: 435 - return get_pvm_id_aa64dfr1(vcpu); 436 - case SYS_ID_AA64AFR0_EL1: 437 - return get_pvm_id_aa64afr0(vcpu); 438 - case SYS_ID_AA64AFR1_EL1: 439 - return get_pvm_id_aa64afr1(vcpu); 440 - case SYS_ID_AA64ISAR0_EL1: 441 - return get_pvm_id_aa64isar0(vcpu); 442 - case SYS_ID_AA64ISAR1_EL1: 443 - return get_pvm_id_aa64isar1(vcpu); 444 - case SYS_ID_AA64ISAR2_EL1: 445 - return get_pvm_id_aa64isar2(vcpu); 446 - case SYS_ID_AA64MMFR0_EL1: 447 - return get_pvm_id_aa64mmfr0(vcpu); 448 - case SYS_ID_AA64MMFR1_EL1: 449 - return get_pvm_id_aa64mmfr1(vcpu); 450 - case SYS_ID_AA64MMFR2_EL1: 451 - return get_pvm_id_aa64mmfr2(vcpu); 452 - default: 453 - /* Unhandled ID register, RAZ */ 454 - return 0; 455 - } 456 - } 457 - 458 52 static u64 read_id_reg(const struct kvm_vcpu *vcpu, 459 53 struct sys_reg_desc const *r) 460 54 { 461 - return pvm_read_id_reg(vcpu, reg_to_encoding(r)); 55 + struct kvm *kvm = vcpu->kvm; 56 + u32 reg = reg_to_encoding(r); 57 + 58 + if (WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))) 59 + return 0; 60 + 61 + if (reg >= sys_reg(3, 0, 0, 1, 0) && reg <= sys_reg(3, 0, 0, 7, 7)) 62 + return kvm->arch.id_regs[IDREG_IDX(reg)]; 63 + 64 + return 0; 462 65 } 463 66 464 67 /* Handler to RAZ/WI sysregs */ ··· 303 270 inject_undef64(vcpu); 304 271 return false; 305 272 } 306 - 307 - /* 308 - * No support for AArch32 guests, therefore, pKVM has no sanitized copy 309 - * of AArch32 feature id registers. 310 - */ 311 - BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), 312 - PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_EL1_IMP); 313 273 314 274 return pvm_access_raz_wi(vcpu, p, r); 315 275 } ··· 473 447 474 448 /* Performance Monitoring Registers are restricted. */ 475 449 }; 450 + 451 + /* 452 + * Initializes feature registers for protected vms. 453 + */ 454 + void kvm_init_pvm_id_regs(struct kvm_vcpu *vcpu) 455 + { 456 + struct kvm *kvm = vcpu->kvm; 457 + struct kvm_arch *ka = &kvm->arch; 458 + u32 r; 459 + 460 + hyp_assert_lock_held(&vm_table_lock); 461 + 462 + if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) 463 + return; 464 + 465 + /* 466 + * Initialize only AArch64 id registers since AArch32 isn't supported 467 + * for protected VMs. 468 + */ 469 + for (r = sys_reg(3, 0, 0, 4, 0); r <= sys_reg(3, 0, 0, 7, 7); r += sys_reg(0, 0, 0, 0, 1)) 470 + ka->id_regs[IDREG_IDX(r)] = pvm_calc_id_reg(vcpu, r); 471 + 472 + set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); 473 + } 476 474 477 475 /* 478 476 * Checks that the sysreg table is unique and in-order.
+12 -4
arch/arm64/kvm/hyp/nvhe/timer-sr.c
··· 22 22 */ 23 23 void __timer_disable_traps(struct kvm_vcpu *vcpu) 24 24 { 25 - u64 val, shift = 0; 25 + u64 set, clr, shift = 0; 26 26 27 27 if (has_hvhe()) 28 28 shift = 10; 29 29 30 30 /* Allow physical timer/counter access for the host */ 31 - val = read_sysreg(cnthctl_el2); 32 - val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift; 33 - write_sysreg(val, cnthctl_el2); 31 + set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift; 32 + clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT; 33 + 34 + sysreg_clear_set(cnthctl_el2, clr, set); 34 35 } 35 36 36 37 /* ··· 58 57 clr <<= 10; 59 58 set <<= 10; 60 59 } 60 + 61 + /* 62 + * Trap the virtual counter/timer if we have a broken cntvoff 63 + * implementation. 64 + */ 65 + if (has_broken_cntvoff()) 66 + set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT; 61 67 62 68 sysreg_clear_set(cnthctl_el2, clr, set); 63 69 }
+5 -8
arch/arm64/kvm/hyp/pgtable.c
··· 1232 1232 NULL, NULL, 0); 1233 1233 } 1234 1234 1235 - void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 1235 + void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 1236 + enum kvm_pgtable_walk_flags flags) 1236 1237 { 1237 1238 int ret; 1238 1239 1239 1240 ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 1240 - NULL, NULL, 1241 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1242 - KVM_PGTABLE_WALK_SHARED); 1241 + NULL, NULL, flags); 1243 1242 if (!ret) 1244 1243 dsb(ishst); 1245 1244 } ··· 1294 1295 } 1295 1296 1296 1297 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 1297 - enum kvm_pgtable_prot prot) 1298 + enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) 1298 1299 { 1299 1300 int ret; 1300 1301 s8 level; ··· 1312 1313 if (prot & KVM_PGTABLE_PROT_X) 1313 1314 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 1314 1315 1315 - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, 1316 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1317 - KVM_PGTABLE_WALK_SHARED); 1316 + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags); 1318 1317 if (!ret || ret == -EAGAIN) 1319 1318 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); 1320 1319 return ret;
-5
arch/arm64/kvm/hyp/vhe/debug-sr.c
··· 19 19 { 20 20 __debug_switch_to_host_common(vcpu); 21 21 } 22 - 23 - u64 __kvm_get_mdcr_el2(void) 24 - { 25 - return read_sysreg(mdcr_el2); 26 - }
+107
arch/arm64/kvm/hyp/vhe/switch.c
··· 256 256 host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL; 257 257 } 258 258 259 + static u64 compute_emulated_cntx_ctl_el0(struct kvm_vcpu *vcpu, 260 + enum vcpu_sysreg reg) 261 + { 262 + unsigned long ctl; 263 + u64 cval, cnt; 264 + bool stat; 265 + 266 + switch (reg) { 267 + case CNTP_CTL_EL0: 268 + cval = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 269 + ctl = __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 270 + cnt = compute_counter_value(vcpu_ptimer(vcpu)); 271 + break; 272 + case CNTV_CTL_EL0: 273 + cval = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 274 + ctl = __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 275 + cnt = compute_counter_value(vcpu_vtimer(vcpu)); 276 + break; 277 + default: 278 + BUG(); 279 + } 280 + 281 + stat = cval <= cnt; 282 + __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &ctl, stat); 283 + 284 + return ctl; 285 + } 286 + 287 + static bool kvm_hyp_handle_timer(struct kvm_vcpu *vcpu, u64 *exit_code) 288 + { 289 + u64 esr, val; 290 + 291 + /* 292 + * Having FEAT_ECV allows for a better quality of timer emulation. 293 + * However, this comes at a huge cost in terms of traps. Try and 294 + * satisfy the reads from guest's hypervisor context without 295 + * returning to the kernel if we can. 296 + */ 297 + if (!is_hyp_ctxt(vcpu)) 298 + return false; 299 + 300 + esr = kvm_vcpu_get_esr(vcpu); 301 + if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) != ESR_ELx_SYS64_ISS_DIR_READ) 302 + return false; 303 + 304 + switch (esr_sys64_to_sysreg(esr)) { 305 + case SYS_CNTP_CTL_EL02: 306 + val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0); 307 + break; 308 + case SYS_CNTP_CTL_EL0: 309 + if (vcpu_el2_e2h_is_set(vcpu)) 310 + val = read_sysreg_el0(SYS_CNTP_CTL); 311 + else 312 + val = compute_emulated_cntx_ctl_el0(vcpu, CNTP_CTL_EL0); 313 + break; 314 + case SYS_CNTP_CVAL_EL02: 315 + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 316 + break; 317 + case SYS_CNTP_CVAL_EL0: 318 + if (vcpu_el2_e2h_is_set(vcpu)) { 319 + val = read_sysreg_el0(SYS_CNTP_CVAL); 320 + 321 + if (!has_cntpoff()) 322 + val -= timer_get_offset(vcpu_hptimer(vcpu)); 323 + } else { 324 + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 325 + } 326 + break; 327 + case SYS_CNTPCT_EL0: 328 + case SYS_CNTPCTSS_EL0: 329 + val = compute_counter_value(vcpu_hptimer(vcpu)); 330 + break; 331 + case SYS_CNTV_CTL_EL02: 332 + val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0); 333 + break; 334 + case SYS_CNTV_CTL_EL0: 335 + if (vcpu_el2_e2h_is_set(vcpu)) 336 + val = read_sysreg_el0(SYS_CNTV_CTL); 337 + else 338 + val = compute_emulated_cntx_ctl_el0(vcpu, CNTV_CTL_EL0); 339 + break; 340 + case SYS_CNTV_CVAL_EL02: 341 + val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 342 + break; 343 + case SYS_CNTV_CVAL_EL0: 344 + if (vcpu_el2_e2h_is_set(vcpu)) 345 + val = read_sysreg_el0(SYS_CNTV_CVAL); 346 + else 347 + val = __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 348 + break; 349 + case SYS_CNTVCT_EL0: 350 + case SYS_CNTVCTSS_EL0: 351 + val = compute_counter_value(vcpu_hvtimer(vcpu)); 352 + break; 353 + default: 354 + return false; 355 + } 356 + 357 + vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); 358 + __kvm_skip_instr(vcpu); 359 + 360 + return true; 361 + } 362 + 259 363 static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) 260 364 { 261 365 u64 esr = kvm_vcpu_get_esr(vcpu); ··· 511 407 static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code) 512 408 { 513 409 if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code)) 410 + return true; 411 + 412 + if (kvm_hyp_handle_timer(vcpu, exit_code)) 514 413 return true; 515 414 516 415 if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code))
+2 -2
arch/arm64/kvm/hyp/vhe/sysreg-sr.c
··· 216 216 __sysreg32_restore_state(vcpu); 217 217 __sysreg_restore_user_state(guest_ctxt); 218 218 219 - if (unlikely(__is_hyp_ctxt(guest_ctxt))) { 219 + if (unlikely(is_hyp_ctxt(vcpu))) { 220 220 __sysreg_restore_vel2_state(vcpu); 221 221 } else { 222 222 if (vcpu_has_nv(vcpu)) { ··· 260 260 261 261 host_ctxt = host_data_ptr(host_ctxt); 262 262 263 - if (unlikely(__is_hyp_ctxt(guest_ctxt))) 263 + if (unlikely(is_hyp_ctxt(vcpu))) 264 264 __sysreg_save_vel2_state(vcpu); 265 265 else 266 266 __sysreg_save_el1_state(guest_ctxt);
+70 -38
arch/arm64/kvm/mmu.c
··· 15 15 #include <asm/kvm_arm.h> 16 16 #include <asm/kvm_mmu.h> 17 17 #include <asm/kvm_pgtable.h> 18 + #include <asm/kvm_pkvm.h> 18 19 #include <asm/kvm_ras.h> 19 20 #include <asm/kvm_asm.h> 20 21 #include <asm/kvm_emulate.h> ··· 30 29 static unsigned long __ro_after_init hyp_idmap_end; 31 30 static phys_addr_t __ro_after_init hyp_idmap_vector; 32 31 32 + u32 __ro_after_init __hyp_va_bits; 33 + 33 34 static unsigned long __ro_after_init io_map_base; 35 + 36 + #define KVM_PGT_FN(fn) (!is_protected_kvm_enabled() ? fn : p ## fn) 34 37 35 38 static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end, 36 39 phys_addr_t size) ··· 152 147 return -EINVAL; 153 148 154 149 next = __stage2_range_addr_end(addr, end, chunk_size); 155 - ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache); 150 + ret = KVM_PGT_FN(kvm_pgtable_stage2_split)(pgt, addr, next - addr, cache); 156 151 if (ret) 157 152 break; 158 153 } while (addr = next, addr != end); ··· 173 168 */ 174 169 int kvm_arch_flush_remote_tlbs(struct kvm *kvm) 175 170 { 176 - kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 171 + if (is_protected_kvm_enabled()) 172 + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); 173 + else 174 + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); 177 175 return 0; 178 176 } 179 177 180 178 int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, 181 179 gfn_t gfn, u64 nr_pages) 182 180 { 183 - kvm_tlb_flush_vmid_range(&kvm->arch.mmu, 184 - gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); 181 + u64 size = nr_pages << PAGE_SHIFT; 182 + u64 addr = gfn << PAGE_SHIFT; 183 + 184 + if (is_protected_kvm_enabled()) 185 + kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle); 186 + else 187 + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size); 185 188 return 0; 186 189 } 187 190 ··· 238 225 void *pgtable = page_to_virt(page); 239 226 s8 level = page_private(page); 240 227 241 - kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level); 228 + KVM_PGT_FN(kvm_pgtable_stage2_free_unlinked)(&kvm_s2_mm_ops, pgtable, level); 242 229 } 243 230 244 231 static void stage2_free_unlinked_table(void *addr, s8 level) ··· 337 324 338 325 lockdep_assert_held_write(&kvm->mmu_lock); 339 326 WARN_ON(size & ~PAGE_MASK); 340 - WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, 327 + WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap), 341 328 may_block)); 342 329 } 343 330 ··· 349 336 350 337 void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 351 338 { 352 - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush); 339 + stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_flush)); 353 340 } 354 341 355 342 static void stage2_flush_memslot(struct kvm *kvm, ··· 717 704 718 705 mutex_lock(&kvm_hyp_pgd_mutex); 719 706 /* 720 - * Efficient stack verification using the PAGE_SHIFT bit implies 707 + * Efficient stack verification using the NVHE_STACK_SHIFT bit implies 721 708 * an alignment of our allocation on the order of the size. 722 709 */ 723 - size = PAGE_SIZE * 2; 710 + size = NVHE_STACK_SIZE * 2; 724 711 base = ALIGN_DOWN(io_map_base - size, size); 725 712 726 713 ret = __hyp_alloc_private_va_range(base); ··· 737 724 * at the higher address and leave the lower guard page 738 725 * unbacked. 739 726 * 740 - * Any valid stack address now has the PAGE_SHIFT bit as 1 727 + * Any valid stack address now has the NVHE_STACK_SHIFT bit as 1 741 728 * and addresses corresponding to the guard page have the 742 - * PAGE_SHIFT bit as 0 - this is used for overflow detection. 729 + * NVHE_STACK_SHIFT bit as 0 - this is used for overflow detection. 743 730 */ 744 - ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr, 745 - PAGE_HYP); 731 + ret = __create_hyp_mappings(base + NVHE_STACK_SIZE, NVHE_STACK_SIZE, 732 + phys_addr, PAGE_HYP); 746 733 if (ret) 747 734 kvm_err("Cannot map hyp stack\n"); 748 735 ··· 955 942 return -ENOMEM; 956 943 957 944 mmu->arch = &kvm->arch; 958 - err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops); 945 + err = KVM_PGT_FN(kvm_pgtable_stage2_init)(pgt, mmu, &kvm_s2_mm_ops); 959 946 if (err) 960 947 goto out_free_pgtable; 948 + 949 + mmu->pgt = pgt; 950 + if (is_protected_kvm_enabled()) 951 + return 0; 961 952 962 953 mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); 963 954 if (!mmu->last_vcpu_ran) { ··· 976 959 mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; 977 960 mmu->split_page_cache.gfp_zero = __GFP_ZERO; 978 961 979 - mmu->pgt = pgt; 980 962 mmu->pgd_phys = __pa(pgt->pgd); 981 963 982 964 if (kvm_is_nested_s2_mmu(kvm, mmu)) ··· 984 968 return 0; 985 969 986 970 out_destroy_pgtable: 987 - kvm_pgtable_stage2_destroy(pgt); 971 + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 988 972 out_free_pgtable: 989 973 kfree(pgt); 990 974 return err; ··· 1081 1065 write_unlock(&kvm->mmu_lock); 1082 1066 1083 1067 if (pgt) { 1084 - kvm_pgtable_stage2_destroy(pgt); 1068 + KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 1085 1069 kfree(pgt); 1086 1070 } 1087 1071 } ··· 1098 1082 1099 1083 void free_hyp_memcache(struct kvm_hyp_memcache *mc) 1100 1084 { 1101 - if (is_protected_kvm_enabled()) 1102 - __free_hyp_memcache(mc, hyp_mc_free_fn, 1103 - kvm_host_va, NULL); 1085 + if (!is_protected_kvm_enabled()) 1086 + return; 1087 + 1088 + kfree(mc->mapping); 1089 + __free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL); 1104 1090 } 1105 1091 1106 1092 int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) 1107 1093 { 1108 1094 if (!is_protected_kvm_enabled()) 1109 1095 return 0; 1096 + 1097 + if (!mc->mapping) { 1098 + mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT); 1099 + if (!mc->mapping) 1100 + return -ENOMEM; 1101 + } 1110 1102 1111 1103 return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, 1112 1104 kvm_host_pa, NULL); ··· 1154 1130 break; 1155 1131 1156 1132 write_lock(&kvm->mmu_lock); 1157 - ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, 1158 - &cache, 0); 1133 + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, addr, PAGE_SIZE, 1134 + pa, prot, &cache, 0); 1159 1135 write_unlock(&kvm->mmu_lock); 1160 1136 if (ret) 1161 1137 break; ··· 1175 1151 */ 1176 1152 void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) 1177 1153 { 1178 - stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); 1154 + stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_wrprotect)); 1179 1155 } 1180 1156 1181 1157 /** ··· 1466 1442 unsigned long mmu_seq; 1467 1443 phys_addr_t ipa = fault_ipa; 1468 1444 struct kvm *kvm = vcpu->kvm; 1469 - struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; 1470 1445 struct vm_area_struct *vma; 1471 1446 short vma_shift; 1447 + void *memcache; 1472 1448 gfn_t gfn; 1473 1449 kvm_pfn_t pfn; 1474 1450 bool logging_active = memslot_is_logging(memslot); ··· 1476 1452 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; 1477 1453 struct kvm_pgtable *pgt; 1478 1454 struct page *page; 1455 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; 1479 1456 1480 1457 if (fault_is_perm) 1481 1458 fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); ··· 1496 1471 * and a write fault needs to collapse a block entry into a table. 1497 1472 */ 1498 1473 if (!fault_is_perm || (logging_active && write_fault)) { 1499 - ret = kvm_mmu_topup_memory_cache(memcache, 1500 - kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); 1474 + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); 1475 + 1476 + if (!is_protected_kvm_enabled()) { 1477 + memcache = &vcpu->arch.mmu_page_cache; 1478 + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); 1479 + } else { 1480 + memcache = &vcpu->arch.pkvm_memcache; 1481 + ret = topup_hyp_memcache(memcache, min_pages); 1482 + } 1501 1483 if (ret) 1502 1484 return ret; 1503 1485 } ··· 1525 1493 * logging_active is guaranteed to never be true for VM_PFNMAP 1526 1494 * memslots. 1527 1495 */ 1528 - if (logging_active) { 1496 + if (logging_active || is_protected_kvm_enabled()) { 1529 1497 force_pte = true; 1530 1498 vma_shift = PAGE_SHIFT; 1531 1499 } else { ··· 1665 1633 prot |= kvm_encode_nested_level(nested); 1666 1634 } 1667 1635 1668 - read_lock(&kvm->mmu_lock); 1636 + kvm_fault_lock(kvm); 1669 1637 pgt = vcpu->arch.hw_mmu->pgt; 1670 1638 if (mmu_invalidate_retry(kvm, mmu_seq)) { 1671 1639 ret = -EAGAIN; ··· 1727 1695 * PTE, which will be preserved. 1728 1696 */ 1729 1697 prot &= ~KVM_NV_GUEST_MAP_SZ; 1730 - ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); 1698 + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags); 1731 1699 } else { 1732 - ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, 1700 + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize, 1733 1701 __pfn_to_phys(pfn), prot, 1734 - memcache, 1735 - KVM_PGTABLE_WALK_HANDLE_FAULT | 1736 - KVM_PGTABLE_WALK_SHARED); 1702 + memcache, flags); 1737 1703 } 1738 1704 1739 1705 out_unlock: 1740 1706 kvm_release_faultin_page(kvm, page, !!ret, writable); 1741 - read_unlock(&kvm->mmu_lock); 1707 + kvm_fault_unlock(kvm); 1742 1708 1743 1709 /* Mark the page dirty only if the fault is handled successfully */ 1744 1710 if (writable && !ret) ··· 1748 1718 /* Resolve the access fault by making the page young again. */ 1749 1719 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1750 1720 { 1721 + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; 1751 1722 struct kvm_s2_mmu *mmu; 1752 1723 1753 1724 trace_kvm_access_fault(fault_ipa); 1754 1725 1755 1726 read_lock(&vcpu->kvm->mmu_lock); 1756 1727 mmu = vcpu->arch.hw_mmu; 1757 - kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); 1728 + KVM_PGT_FN(kvm_pgtable_stage2_mkyoung)(mmu->pgt, fault_ipa, flags); 1758 1729 read_unlock(&vcpu->kvm->mmu_lock); 1759 1730 } 1760 1731 ··· 1795 1764 } 1796 1765 1797 1766 /* Falls between the IPA range and the PARange? */ 1798 - if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) { 1767 + if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { 1799 1768 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); 1800 1769 1801 1770 if (is_iabt) ··· 1961 1930 if (!kvm->arch.mmu.pgt) 1962 1931 return false; 1963 1932 1964 - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, 1933 + return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt, 1965 1934 range->start << PAGE_SHIFT, 1966 1935 size, true); 1967 1936 /* ··· 1977 1946 if (!kvm->arch.mmu.pgt) 1978 1947 return false; 1979 1948 1980 - return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, 1949 + return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt, 1981 1950 range->start << PAGE_SHIFT, 1982 1951 size, false); 1983 1952 } ··· 2087 2056 goto out_destroy_pgtable; 2088 2057 2089 2058 io_map_base = hyp_idmap_start; 2059 + __hyp_va_bits = *hyp_va_bits; 2090 2060 return 0; 2091 2061 2092 2062 out_destroy_pgtable:
+30 -8
arch/arm64/kvm/nested.c
··· 830 830 NV_FTR(PFR0, RAS) | 831 831 NV_FTR(PFR0, EL3) | 832 832 NV_FTR(PFR0, EL2) | 833 - NV_FTR(PFR0, EL1)); 834 - /* 64bit EL1/EL2/EL3 only */ 833 + NV_FTR(PFR0, EL1) | 834 + NV_FTR(PFR0, EL0)); 835 + /* 64bit only at any EL */ 836 + val |= FIELD_PREP(NV_FTR(PFR0, EL0), 0b0001); 835 837 val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001); 836 838 val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001); 837 839 val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001); ··· 965 963 kvm->arch.sysreg_masks->mask[i].res1 = res1; 966 964 } 967 965 968 - int kvm_init_nv_sysregs(struct kvm *kvm) 966 + int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) 969 967 { 968 + struct kvm *kvm = vcpu->kvm; 970 969 u64 res0, res1; 971 970 972 971 lockdep_assert_held(&kvm->arch.config_lock); 973 972 974 973 if (kvm->arch.sysreg_masks) 975 - return 0; 974 + goto out; 976 975 977 976 kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)), 978 977 GFP_KERNEL_ACCOUNT); ··· 1024 1021 res0 |= HCR_NV2; 1025 1022 if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP)) 1026 1023 res0 |= (HCR_AT | HCR_NV1 | HCR_NV); 1027 - if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && 1028 - __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC))) 1024 + if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && 1025 + kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC))) 1029 1026 res0 |= (HCR_API | HCR_APK); 1030 1027 if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP)) 1031 1028 res0 |= BIT(39); ··· 1081 1078 1082 1079 /* HFG[RW]TR_EL2 */ 1083 1080 res0 = res1 = 0; 1084 - if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && 1085 - __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC))) 1081 + if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) && 1082 + kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC))) 1086 1083 res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey | 1087 1084 HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey | 1088 1085 HFGxTR_EL2_APIBKey); ··· 1273 1270 if (!kvm_has_feat(kvm, ID_AA64DFR2_EL1, STEP, IMP)) 1274 1271 res0 |= MDCR_EL2_EnSTEPOP; 1275 1272 set_sysreg_masks(kvm, MDCR_EL2, res0, res1); 1273 + 1274 + /* CNTHCTL_EL2 */ 1275 + res0 = GENMASK(63, 20); 1276 + res1 = 0; 1277 + if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RME, IMP)) 1278 + res0 |= CNTHCTL_CNTPMASK | CNTHCTL_CNTVMASK; 1279 + if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, CNTPOFF)) { 1280 + res0 |= CNTHCTL_ECV; 1281 + if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, ECV, IMP)) 1282 + res0 |= (CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT | 1283 + CNTHCTL_EL1NVPCT | CNTHCTL_EL1NVVCT); 1284 + } 1285 + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP)) 1286 + res0 |= GENMASK(11, 8); 1287 + set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1); 1288 + 1289 + out: 1290 + for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) 1291 + (void)__vcpu_sys_reg(vcpu, sr); 1276 1292 1277 1293 return 0; 1278 1294 }
+201
arch/arm64/kvm/pkvm.c
··· 7 7 #include <linux/init.h> 8 8 #include <linux/kmemleak.h> 9 9 #include <linux/kvm_host.h> 10 + #include <asm/kvm_mmu.h> 10 11 #include <linux/memblock.h> 11 12 #include <linux/mutex.h> 12 13 #include <linux/sort.h> ··· 269 268 return ret; 270 269 } 271 270 device_initcall_sync(finalize_pkvm); 271 + 272 + static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) 273 + { 274 + struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); 275 + struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node); 276 + 277 + if (a->gfn < b->gfn) 278 + return -1; 279 + if (a->gfn > b->gfn) 280 + return 1; 281 + return 0; 282 + } 283 + 284 + static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) 285 + { 286 + struct rb_node *node = root->rb_node, *prev = NULL; 287 + struct pkvm_mapping *mapping; 288 + 289 + while (node) { 290 + mapping = rb_entry(node, struct pkvm_mapping, node); 291 + if (mapping->gfn == gfn) 292 + return node; 293 + prev = node; 294 + node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right; 295 + } 296 + 297 + return prev; 298 + } 299 + 300 + /* 301 + * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing 302 + * of __map inline. 303 + */ 304 + #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ 305 + for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \ 306 + ((__start) >> PAGE_SHIFT)); \ 307 + __tmp && ({ \ 308 + __map = rb_entry(__tmp, struct pkvm_mapping, node); \ 309 + __tmp = rb_next(__tmp); \ 310 + true; \ 311 + }); \ 312 + ) \ 313 + if (__map->gfn < ((__start) >> PAGE_SHIFT)) \ 314 + continue; \ 315 + else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \ 316 + break; \ 317 + else 318 + 319 + int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 320 + struct kvm_pgtable_mm_ops *mm_ops) 321 + { 322 + pgt->pkvm_mappings = RB_ROOT; 323 + pgt->mmu = mmu; 324 + 325 + return 0; 326 + } 327 + 328 + void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 329 + { 330 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 331 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 332 + struct pkvm_mapping *mapping; 333 + struct rb_node *node; 334 + 335 + if (!handle) 336 + return; 337 + 338 + node = rb_first(&pgt->pkvm_mappings); 339 + while (node) { 340 + mapping = rb_entry(node, struct pkvm_mapping, node); 341 + kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 342 + node = rb_next(node); 343 + rb_erase(&mapping->node, &pgt->pkvm_mappings); 344 + kfree(mapping); 345 + } 346 + } 347 + 348 + int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 349 + u64 phys, enum kvm_pgtable_prot prot, 350 + void *mc, enum kvm_pgtable_walk_flags flags) 351 + { 352 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 353 + struct pkvm_mapping *mapping = NULL; 354 + struct kvm_hyp_memcache *cache = mc; 355 + u64 gfn = addr >> PAGE_SHIFT; 356 + u64 pfn = phys >> PAGE_SHIFT; 357 + int ret; 358 + 359 + if (size != PAGE_SIZE) 360 + return -EINVAL; 361 + 362 + lockdep_assert_held_write(&kvm->mmu_lock); 363 + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot); 364 + if (ret) { 365 + /* Is the gfn already mapped due to a racing vCPU? */ 366 + if (ret == -EPERM) 367 + return -EAGAIN; 368 + } 369 + 370 + swap(mapping, cache->mapping); 371 + mapping->gfn = gfn; 372 + mapping->pfn = pfn; 373 + WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings)); 374 + 375 + return ret; 376 + } 377 + 378 + int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 379 + { 380 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 381 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 382 + struct pkvm_mapping *mapping; 383 + int ret = 0; 384 + 385 + lockdep_assert_held_write(&kvm->mmu_lock); 386 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 387 + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 388 + if (WARN_ON(ret)) 389 + break; 390 + rb_erase(&mapping->node, &pgt->pkvm_mappings); 391 + kfree(mapping); 392 + } 393 + 394 + return ret; 395 + } 396 + 397 + int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 398 + { 399 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 400 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 401 + struct pkvm_mapping *mapping; 402 + int ret = 0; 403 + 404 + lockdep_assert_held(&kvm->mmu_lock); 405 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 406 + ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn); 407 + if (WARN_ON(ret)) 408 + break; 409 + } 410 + 411 + return ret; 412 + } 413 + 414 + int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 415 + { 416 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 417 + struct pkvm_mapping *mapping; 418 + 419 + lockdep_assert_held(&kvm->mmu_lock); 420 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 421 + __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE); 422 + 423 + return 0; 424 + } 425 + 426 + bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) 427 + { 428 + struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 429 + pkvm_handle_t handle = kvm->arch.pkvm.handle; 430 + struct pkvm_mapping *mapping; 431 + bool young = false; 432 + 433 + lockdep_assert_held(&kvm->mmu_lock); 434 + for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 435 + young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, 436 + mkold); 437 + 438 + return young; 439 + } 440 + 441 + int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 442 + enum kvm_pgtable_walk_flags flags) 443 + { 444 + return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); 445 + } 446 + 447 + void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 448 + enum kvm_pgtable_walk_flags flags) 449 + { 450 + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); 451 + } 452 + 453 + void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) 454 + { 455 + WARN_ON_ONCE(1); 456 + } 457 + 458 + kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 459 + enum kvm_pgtable_prot prot, void *mc, bool force_pte) 460 + { 461 + WARN_ON_ONCE(1); 462 + return NULL; 463 + } 464 + 465 + int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 466 + struct kvm_mmu_memory_cache *mc) 467 + { 468 + WARN_ON_ONCE(1); 469 + return -EINVAL; 470 + }
+1 -5
arch/arm64/kvm/reset.c
··· 85 85 * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until 86 86 * kvm_arm_vcpu_finalize(), which freezes the configuration. 87 87 */ 88 - vcpu_set_flag(vcpu, GUEST_HAS_SVE); 88 + set_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &vcpu->kvm->arch.flags); 89 89 } 90 90 91 91 /* ··· 210 210 } else { 211 211 kvm_vcpu_reset_sve(vcpu); 212 212 } 213 - 214 - if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || 215 - vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) 216 - kvm_vcpu_enable_ptrauth(vcpu); 217 213 218 214 if (vcpu_el1_is_32bit(vcpu)) 219 215 pstate = VCPU_RESET_PSTATE_SVC;
+5 -4
arch/arm64/kvm/stacktrace.c
··· 19 19 #include <linux/kvm.h> 20 20 #include <linux/kvm_host.h> 21 21 22 + #include <asm/kvm_mmu.h> 22 23 #include <asm/stacktrace/nvhe.h> 23 24 24 25 static struct stack_info stackinfo_get_overflow(void) ··· 51 50 struct kvm_nvhe_stacktrace_info *stacktrace_info 52 51 = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); 53 52 unsigned long low = (unsigned long)stacktrace_info->stack_base; 54 - unsigned long high = low + PAGE_SIZE; 53 + unsigned long high = low + NVHE_STACK_SIZE; 55 54 56 55 return (struct stack_info) { 57 56 .low = low, ··· 61 60 62 61 static struct stack_info stackinfo_get_hyp_kern_va(void) 63 62 { 64 - unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); 65 - unsigned long high = low + PAGE_SIZE; 63 + unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_base); 64 + unsigned long high = low + NVHE_STACK_SIZE; 66 65 67 66 return (struct stack_info) { 68 67 .low = low, ··· 146 145 */ 147 146 static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where) 148 147 { 149 - unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0); 148 + unsigned long va_mask = GENMASK_ULL(__hyp_va_bits - 1, 0); 150 149 unsigned long hyp_offset = (unsigned long)arg; 151 150 152 151 /* Mask tags and convert to kern addr */
+237 -188
arch/arm64/kvm/sys_regs.c
··· 570 570 struct sys_reg_params *p, 571 571 const struct sys_reg_desc *r) 572 572 { 573 - u64 oslsr; 574 - 575 573 if (!p->is_write) 576 574 return read_from_write_only(vcpu, p, r); 577 575 578 - /* Forward the OSLK bit to OSLSR */ 579 - oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK; 580 - if (p->regval & OSLAR_EL1_OSLK) 581 - oslsr |= OSLSR_EL1_OSLK; 582 - 583 - __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; 576 + kvm_debug_handle_oslar(vcpu, p->regval); 584 577 return true; 585 578 } 586 579 ··· 614 621 } 615 622 } 616 623 617 - /* 618 - * We want to avoid world-switching all the DBG registers all the 619 - * time: 620 - * 621 - * - If we've touched any debug register, it is likely that we're 622 - * going to touch more of them. It then makes sense to disable the 623 - * traps and start doing the save/restore dance 624 - * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is 625 - * then mandatory to save/restore the registers, as the guest 626 - * depends on them. 627 - * 628 - * For this, we use a DIRTY bit, indicating the guest has modified the 629 - * debug registers, used as follow: 630 - * 631 - * On guest entry: 632 - * - If the dirty bit is set (because we're coming back from trapping), 633 - * disable the traps, save host registers, restore guest registers. 634 - * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), 635 - * set the dirty bit, disable the traps, save host registers, 636 - * restore guest registers. 637 - * - Otherwise, enable the traps 638 - * 639 - * On guest exit: 640 - * - If the dirty bit is set, save guest registers, restore host 641 - * registers and clear the dirty bit. This ensure that the host can 642 - * now use the debug registers. 643 - */ 644 624 static bool trap_debug_regs(struct kvm_vcpu *vcpu, 645 625 struct sys_reg_params *p, 646 626 const struct sys_reg_desc *r) 647 627 { 648 628 access_rw(vcpu, p, r); 649 - if (p->is_write) 650 - vcpu_set_flag(vcpu, DEBUG_DIRTY); 651 629 652 - trace_trap_reg(__func__, r->reg, p->is_write, p->regval); 653 - 630 + kvm_debug_set_guest_ownership(vcpu); 654 631 return true; 655 632 } 656 633 ··· 629 666 * 630 667 * A 32 bit write to a debug register leave top bits alone 631 668 * A 32 bit read from a debug register only returns the bottom bits 632 - * 633 - * All writes will set the DEBUG_DIRTY flag to ensure the hyp code 634 - * switches between host and guest values in future. 635 669 */ 636 670 static void reg_to_dbg(struct kvm_vcpu *vcpu, 637 671 struct sys_reg_params *p, ··· 643 683 val &= ~mask; 644 684 val |= (p->regval & (mask >> shift)) << shift; 645 685 *dbg_reg = val; 646 - 647 - vcpu_set_flag(vcpu, DEBUG_DIRTY); 648 686 } 649 687 650 688 static void dbg_to_reg(struct kvm_vcpu *vcpu, ··· 656 698 p->regval = (*dbg_reg & mask) >> shift; 657 699 } 658 700 659 - static bool trap_bvr(struct kvm_vcpu *vcpu, 660 - struct sys_reg_params *p, 661 - const struct sys_reg_desc *rd) 701 + static u64 *demux_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) 662 702 { 663 - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; 703 + struct kvm_guest_debug_arch *dbg = &vcpu->arch.vcpu_debug_state; 704 + 705 + switch (rd->Op2) { 706 + case 0b100: 707 + return &dbg->dbg_bvr[rd->CRm]; 708 + case 0b101: 709 + return &dbg->dbg_bcr[rd->CRm]; 710 + case 0b110: 711 + return &dbg->dbg_wvr[rd->CRm]; 712 + case 0b111: 713 + return &dbg->dbg_wcr[rd->CRm]; 714 + default: 715 + KVM_BUG_ON(1, vcpu->kvm); 716 + return NULL; 717 + } 718 + } 719 + 720 + static bool trap_dbg_wb_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, 721 + const struct sys_reg_desc *rd) 722 + { 723 + u64 *reg = demux_wb_reg(vcpu, rd); 724 + 725 + if (!reg) 726 + return false; 664 727 665 728 if (p->is_write) 666 - reg_to_dbg(vcpu, p, rd, dbg_reg); 729 + reg_to_dbg(vcpu, p, rd, reg); 667 730 else 668 - dbg_to_reg(vcpu, p, rd, dbg_reg); 731 + dbg_to_reg(vcpu, p, rd, reg); 669 732 670 - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); 671 - 733 + kvm_debug_set_guest_ownership(vcpu); 672 734 return true; 673 735 } 674 736 675 - static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 676 - u64 val) 737 + static int set_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 738 + u64 val) 677 739 { 678 - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val; 740 + u64 *reg = demux_wb_reg(vcpu, rd); 741 + 742 + if (!reg) 743 + return -EINVAL; 744 + 745 + *reg = val; 679 746 return 0; 680 747 } 681 748 682 - static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 683 - u64 *val) 749 + static int get_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 750 + u64 *val) 684 751 { 685 - *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; 752 + u64 *reg = demux_wb_reg(vcpu, rd); 753 + 754 + if (!reg) 755 + return -EINVAL; 756 + 757 + *val = *reg; 686 758 return 0; 687 759 } 688 760 689 - static u64 reset_bvr(struct kvm_vcpu *vcpu, 690 - const struct sys_reg_desc *rd) 761 + static u64 reset_dbg_wb_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) 691 762 { 692 - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; 693 - return rd->val; 694 - } 763 + u64 *reg = demux_wb_reg(vcpu, rd); 695 764 696 - static bool trap_bcr(struct kvm_vcpu *vcpu, 697 - struct sys_reg_params *p, 698 - const struct sys_reg_desc *rd) 699 - { 700 - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; 765 + /* 766 + * Bail early if we couldn't find storage for the register, the 767 + * KVM_BUG_ON() in demux_wb_reg() will prevent this VM from ever 768 + * being run. 769 + */ 770 + if (!reg) 771 + return 0; 701 772 702 - if (p->is_write) 703 - reg_to_dbg(vcpu, p, rd, dbg_reg); 704 - else 705 - dbg_to_reg(vcpu, p, rd, dbg_reg); 706 - 707 - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); 708 - 709 - return true; 710 - } 711 - 712 - static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 713 - u64 val) 714 - { 715 - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val; 716 - return 0; 717 - } 718 - 719 - static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 720 - u64 *val) 721 - { 722 - *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; 723 - return 0; 724 - } 725 - 726 - static u64 reset_bcr(struct kvm_vcpu *vcpu, 727 - const struct sys_reg_desc *rd) 728 - { 729 - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; 730 - return rd->val; 731 - } 732 - 733 - static bool trap_wvr(struct kvm_vcpu *vcpu, 734 - struct sys_reg_params *p, 735 - const struct sys_reg_desc *rd) 736 - { 737 - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; 738 - 739 - if (p->is_write) 740 - reg_to_dbg(vcpu, p, rd, dbg_reg); 741 - else 742 - dbg_to_reg(vcpu, p, rd, dbg_reg); 743 - 744 - trace_trap_reg(__func__, rd->CRm, p->is_write, 745 - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); 746 - 747 - return true; 748 - } 749 - 750 - static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 751 - u64 val) 752 - { 753 - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val; 754 - return 0; 755 - } 756 - 757 - static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 758 - u64 *val) 759 - { 760 - *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; 761 - return 0; 762 - } 763 - 764 - static u64 reset_wvr(struct kvm_vcpu *vcpu, 765 - const struct sys_reg_desc *rd) 766 - { 767 - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; 768 - return rd->val; 769 - } 770 - 771 - static bool trap_wcr(struct kvm_vcpu *vcpu, 772 - struct sys_reg_params *p, 773 - const struct sys_reg_desc *rd) 774 - { 775 - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; 776 - 777 - if (p->is_write) 778 - reg_to_dbg(vcpu, p, rd, dbg_reg); 779 - else 780 - dbg_to_reg(vcpu, p, rd, dbg_reg); 781 - 782 - trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); 783 - 784 - return true; 785 - } 786 - 787 - static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 788 - u64 val) 789 - { 790 - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val; 791 - return 0; 792 - } 793 - 794 - static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 795 - u64 *val) 796 - { 797 - *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; 798 - return 0; 799 - } 800 - 801 - static u64 reset_wcr(struct kvm_vcpu *vcpu, 802 - const struct sys_reg_desc *rd) 803 - { 804 - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; 773 + *reg = rd->val; 805 774 return rd->val; 806 775 } 807 776 ··· 1235 1350 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ 1236 1351 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 1237 1352 { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ 1238 - trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ 1353 + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ 1354 + get_dbg_wb_reg, set_dbg_wb_reg }, \ 1239 1355 { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ 1240 - trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ 1356 + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ 1357 + get_dbg_wb_reg, set_dbg_wb_reg }, \ 1241 1358 { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ 1242 - trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ 1359 + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ 1360 + get_dbg_wb_reg, set_dbg_wb_reg }, \ 1243 1361 { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ 1244 - trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } 1362 + trap_dbg_wb_reg, reset_dbg_wb_reg, 0, 0, \ 1363 + get_dbg_wb_reg, set_dbg_wb_reg } 1245 1364 1246 1365 #define PMU_SYS_REG(name) \ 1247 1366 SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \ ··· 1299 1410 1300 1411 switch (reg) { 1301 1412 case SYS_CNTP_TVAL_EL0: 1413 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1414 + tmr = TIMER_HPTIMER; 1415 + else 1416 + tmr = TIMER_PTIMER; 1417 + treg = TIMER_REG_TVAL; 1418 + break; 1419 + 1420 + case SYS_CNTV_TVAL_EL0: 1421 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1422 + tmr = TIMER_HVTIMER; 1423 + else 1424 + tmr = TIMER_VTIMER; 1425 + treg = TIMER_REG_TVAL; 1426 + break; 1427 + 1302 1428 case SYS_AARCH32_CNTP_TVAL: 1429 + case SYS_CNTP_TVAL_EL02: 1303 1430 tmr = TIMER_PTIMER; 1304 1431 treg = TIMER_REG_TVAL; 1305 1432 break; 1433 + 1434 + case SYS_CNTV_TVAL_EL02: 1435 + tmr = TIMER_VTIMER; 1436 + treg = TIMER_REG_TVAL; 1437 + break; 1438 + 1439 + case SYS_CNTHP_TVAL_EL2: 1440 + tmr = TIMER_HPTIMER; 1441 + treg = TIMER_REG_TVAL; 1442 + break; 1443 + 1444 + case SYS_CNTHV_TVAL_EL2: 1445 + tmr = TIMER_HVTIMER; 1446 + treg = TIMER_REG_TVAL; 1447 + break; 1448 + 1306 1449 case SYS_CNTP_CTL_EL0: 1450 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1451 + tmr = TIMER_HPTIMER; 1452 + else 1453 + tmr = TIMER_PTIMER; 1454 + treg = TIMER_REG_CTL; 1455 + break; 1456 + 1457 + case SYS_CNTV_CTL_EL0: 1458 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1459 + tmr = TIMER_HVTIMER; 1460 + else 1461 + tmr = TIMER_VTIMER; 1462 + treg = TIMER_REG_CTL; 1463 + break; 1464 + 1307 1465 case SYS_AARCH32_CNTP_CTL: 1466 + case SYS_CNTP_CTL_EL02: 1308 1467 tmr = TIMER_PTIMER; 1309 1468 treg = TIMER_REG_CTL; 1310 1469 break; 1470 + 1471 + case SYS_CNTV_CTL_EL02: 1472 + tmr = TIMER_VTIMER; 1473 + treg = TIMER_REG_CTL; 1474 + break; 1475 + 1476 + case SYS_CNTHP_CTL_EL2: 1477 + tmr = TIMER_HPTIMER; 1478 + treg = TIMER_REG_CTL; 1479 + break; 1480 + 1481 + case SYS_CNTHV_CTL_EL2: 1482 + tmr = TIMER_HVTIMER; 1483 + treg = TIMER_REG_CTL; 1484 + break; 1485 + 1311 1486 case SYS_CNTP_CVAL_EL0: 1487 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1488 + tmr = TIMER_HPTIMER; 1489 + else 1490 + tmr = TIMER_PTIMER; 1491 + treg = TIMER_REG_CVAL; 1492 + break; 1493 + 1494 + case SYS_CNTV_CVAL_EL0: 1495 + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) 1496 + tmr = TIMER_HVTIMER; 1497 + else 1498 + tmr = TIMER_VTIMER; 1499 + treg = TIMER_REG_CVAL; 1500 + break; 1501 + 1312 1502 case SYS_AARCH32_CNTP_CVAL: 1503 + case SYS_CNTP_CVAL_EL02: 1313 1504 tmr = TIMER_PTIMER; 1314 1505 treg = TIMER_REG_CVAL; 1315 1506 break; 1507 + 1508 + case SYS_CNTV_CVAL_EL02: 1509 + tmr = TIMER_VTIMER; 1510 + treg = TIMER_REG_CVAL; 1511 + break; 1512 + 1513 + case SYS_CNTHP_CVAL_EL2: 1514 + tmr = TIMER_HPTIMER; 1515 + treg = TIMER_REG_CVAL; 1516 + break; 1517 + 1518 + case SYS_CNTHV_CVAL_EL2: 1519 + tmr = TIMER_HVTIMER; 1520 + treg = TIMER_REG_CVAL; 1521 + break; 1522 + 1316 1523 case SYS_CNTPCT_EL0: 1317 1524 case SYS_CNTPCTSS_EL0: 1525 + if (is_hyp_ctxt(vcpu)) 1526 + tmr = TIMER_HPTIMER; 1527 + else 1528 + tmr = TIMER_PTIMER; 1529 + treg = TIMER_REG_CNT; 1530 + break; 1531 + 1318 1532 case SYS_AARCH32_CNTPCT: 1533 + case SYS_AARCH32_CNTPCTSS: 1319 1534 tmr = TIMER_PTIMER; 1320 1535 treg = TIMER_REG_CNT; 1321 1536 break; 1537 + 1538 + case SYS_CNTVCT_EL0: 1539 + case SYS_CNTVCTSS_EL0: 1540 + if (is_hyp_ctxt(vcpu)) 1541 + tmr = TIMER_HVTIMER; 1542 + else 1543 + tmr = TIMER_VTIMER; 1544 + treg = TIMER_REG_CNT; 1545 + break; 1546 + 1547 + case SYS_AARCH32_CNTVCT: 1548 + case SYS_AARCH32_CNTVCTSS: 1549 + tmr = TIMER_VTIMER; 1550 + treg = TIMER_REG_CNT; 1551 + break; 1552 + 1322 1553 default: 1323 1554 print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); 1324 1555 return undef_access(vcpu, p, r); ··· 1608 1599 if (!vcpu_has_ptrauth(vcpu)) 1609 1600 val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | 1610 1601 ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); 1611 - if (!cpus_have_final_cap(ARM64_HAS_WFXT)) 1602 + if (!cpus_have_final_cap(ARM64_HAS_WFXT) || 1603 + has_broken_cntvoff()) 1612 1604 val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); 1613 1605 break; 1614 1606 case SYS_ID_AA64ISAR3_EL1: ··· 1816 1806 1817 1807 /* Hide SPE from guests */ 1818 1808 val &= ~ID_AA64DFR0_EL1_PMSVer_MASK; 1809 + 1810 + /* Hide BRBE from guests */ 1811 + val &= ~ID_AA64DFR0_EL1_BRBE_MASK; 1819 1812 1820 1813 return val; 1821 1814 } ··· 2937 2924 AMU_AMEVTYPER1_EL0(15), 2938 2925 2939 2926 { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, 2927 + { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, 2940 2928 { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, 2929 + { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, 2941 2930 { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, 2942 2931 { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, 2943 2932 { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, 2933 + 2934 + { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, 2935 + { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, 2936 + { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, 2944 2937 2945 2938 /* PMEVCNTRn_EL0 */ 2946 2939 PMU_PMEVCNTR_EL0(0), ··· 3099 3080 3100 3081 EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), 3101 3082 EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), 3083 + { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, 3084 + EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), 3085 + EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), 3086 + 3087 + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer }, 3088 + EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0), 3089 + EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0), 3102 3090 3103 3091 { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, 3092 + 3093 + { SYS_DESC(SYS_CNTP_TVAL_EL02), access_arch_timer }, 3094 + { SYS_DESC(SYS_CNTP_CTL_EL02), access_arch_timer }, 3095 + { SYS_DESC(SYS_CNTP_CVAL_EL02), access_arch_timer }, 3096 + 3097 + { SYS_DESC(SYS_CNTV_TVAL_EL02), access_arch_timer }, 3098 + { SYS_DESC(SYS_CNTV_CTL_EL02), access_arch_timer }, 3099 + { SYS_DESC(SYS_CNTV_CVAL_EL02), access_arch_timer }, 3104 3100 3105 3101 EL2_REG(SP_EL2, NULL, reset_unknown, 0), 3106 3102 }; ··· 3628 3594 * None of the other registers share their location, so treat them as 3629 3595 * if they were 64bit. 3630 3596 */ 3631 - #define DBG_BCR_BVR_WCR_WVR(n) \ 3632 - /* DBGBVRn */ \ 3633 - { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ 3634 - /* DBGBCRn */ \ 3635 - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ 3636 - /* DBGWVRn */ \ 3637 - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ 3638 - /* DBGWCRn */ \ 3639 - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } 3597 + #define DBG_BCR_BVR_WCR_WVR(n) \ 3598 + /* DBGBVRn */ \ 3599 + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), \ 3600 + trap_dbg_wb_reg, NULL, n }, \ 3601 + /* DBGBCRn */ \ 3602 + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_dbg_wb_reg, NULL, n }, \ 3603 + /* DBGWVRn */ \ 3604 + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_dbg_wb_reg, NULL, n }, \ 3605 + /* DBGWCRn */ \ 3606 + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_dbg_wb_reg, NULL, n } 3640 3607 3641 - #define DBGBXVR(n) \ 3642 - { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } 3608 + #define DBGBXVR(n) \ 3609 + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), \ 3610 + trap_dbg_wb_reg, NULL, n } 3643 3611 3644 3612 /* 3645 3613 * Trapped cp14 registers. We generally ignore most of the external ··· 3938 3902 { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, 3939 3903 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, 3940 3904 { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ 3905 + { SYS_DESC(SYS_AARCH32_CNTVCT), access_arch_timer }, 3941 3906 { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ 3942 3907 { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, 3943 3908 { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer }, 3909 + { SYS_DESC(SYS_AARCH32_CNTVCTSS), access_arch_timer }, 3944 3910 }; 3945 3911 3946 3912 static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, ··· 4457 4419 reset_vcpu_ftr_id_reg(vcpu, r); 4458 4420 else 4459 4421 r->reset(vcpu, r); 4422 + 4423 + if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) 4424 + (void)__vcpu_sys_reg(vcpu, r->reg); 4460 4425 } 4461 4426 4462 4427 set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); ··· 5036 4995 kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 | 5037 4996 HAFGRTR_EL2_RES1); 5038 4997 4998 + if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) { 4999 + kvm->arch.fgu[HDFGRTR_GROUP] |= (HDFGRTR_EL2_nBRBDATA | 5000 + HDFGRTR_EL2_nBRBCTL | 5001 + HDFGRTR_EL2_nBRBIDR); 5002 + kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_nBRBINJ | 5003 + HFGITR_EL2_nBRBIALL); 5004 + } 5005 + 5039 5006 set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags); 5040 5007 out: 5041 5008 mutex_unlock(&kvm->arch.config_lock); ··· 5071 5022 } 5072 5023 5073 5024 if (vcpu_has_nv(vcpu)) { 5074 - int ret = kvm_init_nv_sysregs(kvm); 5025 + int ret = kvm_init_nv_sysregs(vcpu); 5075 5026 if (ret) 5076 5027 return ret; 5077 5028 }
-75
arch/arm64/kvm/trace_handle_exit.h
··· 46 46 __entry->vcpu_pc, __entry->r0, __entry->imm) 47 47 ); 48 48 49 - TRACE_EVENT(kvm_arm_setup_debug, 50 - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), 51 - TP_ARGS(vcpu, guest_debug), 52 - 53 - TP_STRUCT__entry( 54 - __field(struct kvm_vcpu *, vcpu) 55 - __field(__u32, guest_debug) 56 - ), 57 - 58 - TP_fast_assign( 59 - __entry->vcpu = vcpu; 60 - __entry->guest_debug = guest_debug; 61 - ), 62 - 63 - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) 64 - ); 65 - 66 - TRACE_EVENT(kvm_arm_clear_debug, 67 - TP_PROTO(__u32 guest_debug), 68 - TP_ARGS(guest_debug), 69 - 70 - TP_STRUCT__entry( 71 - __field(__u32, guest_debug) 72 - ), 73 - 74 - TP_fast_assign( 75 - __entry->guest_debug = guest_debug; 76 - ), 77 - 78 - TP_printk("flags: 0x%08x", __entry->guest_debug) 79 - ); 80 - 81 49 /* 82 50 * The dreg32 name is a leftover from a distant past. This will really 83 51 * output a 64bit value... ··· 65 97 ), 66 98 67 99 TP_printk("%s: 0x%llx", __entry->name, __entry->value) 68 - ); 69 - 70 - TRACE_DEFINE_SIZEOF(__u64); 71 - 72 - TRACE_EVENT(kvm_arm_set_regset, 73 - TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), 74 - TP_ARGS(type, len, control, value), 75 - TP_STRUCT__entry( 76 - __field(const char *, name) 77 - __field(int, len) 78 - __array(u64, ctrls, 16) 79 - __array(u64, values, 16) 80 - ), 81 - TP_fast_assign( 82 - __entry->name = type; 83 - __entry->len = len; 84 - memcpy(__entry->ctrls, control, len << 3); 85 - memcpy(__entry->values, value, len << 3); 86 - ), 87 - TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, 88 - __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), 89 - __print_array(__entry->values, __entry->len, sizeof(__u64))) 90 - ); 91 - 92 - TRACE_EVENT(trap_reg, 93 - TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), 94 - TP_ARGS(fn, reg, is_write, write_value), 95 - 96 - TP_STRUCT__entry( 97 - __field(const char *, fn) 98 - __field(int, reg) 99 - __field(bool, is_write) 100 - __field(u64, write_value) 101 - ), 102 - 103 - TP_fast_assign( 104 - __entry->fn = fn; 105 - __entry->reg = reg; 106 - __entry->is_write = is_write; 107 - __entry->write_value = write_value; 108 - ), 109 - 110 - TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) 111 100 ); 112 101 113 102 TRACE_EVENT(kvm_handle_sys_reg,
+7 -4
arch/arm64/kvm/vgic/vgic-v3.c
··· 6 6 #include <linux/kstrtox.h> 7 7 #include <linux/kvm.h> 8 8 #include <linux/kvm_host.h> 9 + #include <linux/string_choices.h> 9 10 #include <kvm/arm_vgic.h> 10 11 #include <asm/kvm_hyp.h> 11 12 #include <asm/kvm_mmu.h> ··· 664 663 if (info->has_v4) { 665 664 kvm_vgic_global_state.has_gicv4 = gicv4_enable; 666 665 kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; 667 - kvm_info("GICv4%s support %sabled\n", 666 + kvm_info("GICv4%s support %s\n", 668 667 kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", 669 - gicv4_enable ? "en" : "dis"); 668 + str_enabled_disabled(gicv4_enable)); 670 669 } 671 670 672 671 kvm_vgic_global_state.vcpu_base = 0; ··· 735 734 { 736 735 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 737 736 738 - kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); 737 + if (likely(!is_protected_kvm_enabled())) 738 + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); 739 739 740 740 if (has_vhe()) 741 741 __vgic_v3_activate_traps(cpu_if); ··· 748 746 { 749 747 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 750 748 751 - kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 749 + if (likely(!is_protected_kvm_enabled())) 750 + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); 752 751 WARN_ON(vgic_v4_put(vcpu)); 753 752 754 753 if (has_vhe())
+1
arch/arm64/tools/cpucaps
··· 105 105 WORKAROUND_DEVICE_LOAD_ACQUIRE 106 106 WORKAROUND_NVIDIA_CARMEL_CNP 107 107 WORKAROUND_QCOM_FALKOR_E1003 108 + WORKAROUND_QCOM_ORYON_CNTVOFF 108 109 WORKAROUND_REPEAT_TLBI 109 110 WORKAROUND_SPECULATIVE_AT 110 111 WORKAROUND_SPECULATIVE_SSBS
+32
arch/arm64/tools/sysreg
··· 2064 2064 Res0 15:0 2065 2065 EndSysreg 2066 2066 2067 + Sysreg TRFCR_EL1 3 0 1 2 1 2068 + Res0 63:7 2069 + UnsignedEnum 6:5 TS 2070 + 0b0001 VIRTUAL 2071 + 0b0010 GUEST_PHYSICAL 2072 + 0b0011 PHYSICAL 2073 + EndEnum 2074 + Res0 4:2 2075 + Field 1 ExTRE 2076 + Field 0 E0TRE 2077 + EndSysregFields 2078 + 2067 2079 Sysreg SMPRI_EL1 3 0 1 2 4 2068 2080 Res0 63:4 2069 2081 Field 3:0 PRIORITY ··· 2625 2613 Field 0 ICIALLUIS 2626 2614 EndSysreg 2627 2615 2616 + Sysreg TRFCR_EL2 3 4 1 2 1 2617 + Res0 63:7 2618 + UnsignedEnum 6:5 TS 2619 + 0b0000 USE_TRFCR_EL1_TS 2620 + 0b0001 VIRTUAL 2621 + 0b0010 GUEST_PHYSICAL 2622 + 0b0011 PHYSICAL 2623 + EndEnum 2624 + Res0 4 2625 + Field 3 CX 2626 + Res0 2 2627 + Field 1 E2TRE 2628 + Field 0 E0HTRE 2629 + EndSysreg 2630 + 2631 + 2628 2632 Sysreg HDFGRTR_EL2 3 4 3 1 4 2629 2633 Field 63 PMBIDR_EL1 2630 2634 Field 62 nPMSNEVFR_EL1 ··· 3049 3021 3050 3022 Sysreg ZCR_EL12 3 5 1 2 0 3051 3023 Mapping ZCR_EL1 3024 + EndSysreg 3025 + 3026 + Sysreg TRFCR_EL12 3 5 1 2 1 3027 + Mapping TRFCR_EL1 3052 3028 EndSysreg 3053 3029 3054 3030 Sysreg SMCR_EL12 3 5 1 2 6
+45 -10
drivers/hwtracing/coresight/coresight-etm4x-core.c
··· 6 6 #include <linux/acpi.h> 7 7 #include <linux/bitops.h> 8 8 #include <linux/kernel.h> 9 + #include <linux/kvm_host.h> 9 10 #include <linux/moduleparam.h> 10 11 #include <linux/init.h> 11 12 #include <linux/types.h> ··· 269 268 */ 270 269 static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata) 271 270 { 271 + u64 trfcr; 272 + 272 273 /* If the CPU doesn't support FEAT_TRF, nothing to do */ 273 274 if (!drvdata->trfcr) 274 275 return; 275 - cpu_prohibit_trace(); 276 + 277 + trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE); 278 + 279 + write_trfcr(trfcr); 280 + kvm_tracing_set_el1_configuration(trfcr); 281 + } 282 + 283 + static u64 etm4x_get_kern_user_filter(struct etmv4_drvdata *drvdata) 284 + { 285 + u64 trfcr = drvdata->trfcr; 286 + 287 + if (drvdata->config.mode & ETM_MODE_EXCL_KERN) 288 + trfcr &= ~TRFCR_EL1_ExTRE; 289 + if (drvdata->config.mode & ETM_MODE_EXCL_USER) 290 + trfcr &= ~TRFCR_EL1_E0TRE; 291 + 292 + return trfcr; 276 293 } 277 294 278 295 /* ··· 305 286 */ 306 287 static void etm4x_allow_trace(struct etmv4_drvdata *drvdata) 307 288 { 308 - u64 trfcr = drvdata->trfcr; 289 + u64 trfcr, guest_trfcr; 309 290 310 291 /* If the CPU doesn't support FEAT_TRF, nothing to do */ 311 - if (!trfcr) 292 + if (!drvdata->trfcr) 312 293 return; 313 294 314 - if (drvdata->config.mode & ETM_MODE_EXCL_KERN) 315 - trfcr &= ~TRFCR_ELx_ExTRE; 316 - if (drvdata->config.mode & ETM_MODE_EXCL_USER) 317 - trfcr &= ~TRFCR_ELx_E0TRE; 295 + if (drvdata->config.mode & ETM_MODE_EXCL_HOST) 296 + trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE); 297 + else 298 + trfcr = etm4x_get_kern_user_filter(drvdata); 318 299 319 300 write_trfcr(trfcr); 301 + 302 + /* Set filters for guests and pass to KVM */ 303 + if (drvdata->config.mode & ETM_MODE_EXCL_GUEST) 304 + guest_trfcr = drvdata->trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE); 305 + else 306 + guest_trfcr = etm4x_get_kern_user_filter(drvdata); 307 + 308 + /* TRFCR_EL1 doesn't have CX so mask it out. */ 309 + guest_trfcr &= ~TRFCR_EL2_CX; 310 + kvm_tracing_set_el1_configuration(guest_trfcr); 320 311 } 321 312 322 313 #ifdef CONFIG_ETM4X_IMPDEF_FEATURE ··· 683 654 684 655 if (attr->exclude_user) 685 656 config->mode = ETM_MODE_EXCL_USER; 657 + 658 + if (attr->exclude_host) 659 + config->mode |= ETM_MODE_EXCL_HOST; 660 + 661 + if (attr->exclude_guest) 662 + config->mode |= ETM_MODE_EXCL_GUEST; 686 663 687 664 /* Always start from the default config */ 688 665 etm4_set_default_config(config); ··· 1176 1141 * tracing at the kernel EL and EL0, forcing to use the 1177 1142 * virtual time as the timestamp. 1178 1143 */ 1179 - trfcr = (TRFCR_ELx_TS_VIRTUAL | 1180 - TRFCR_ELx_ExTRE | 1181 - TRFCR_ELx_E0TRE); 1144 + trfcr = (TRFCR_EL1_TS_VIRTUAL | 1145 + TRFCR_EL1_ExTRE | 1146 + TRFCR_EL1_E0TRE); 1182 1147 1183 1148 /* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */ 1184 1149 if (is_kernel_in_hyp_mode())
+5 -5
drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
··· 2319 2319 goto out; 2320 2320 } 2321 2321 2322 - switch (drvdata->trfcr & TRFCR_ELx_TS_MASK) { 2323 - case TRFCR_ELx_TS_VIRTUAL: 2324 - case TRFCR_ELx_TS_GUEST_PHYSICAL: 2325 - case TRFCR_ELx_TS_PHYSICAL: 2326 - val = FIELD_GET(TRFCR_ELx_TS_MASK, drvdata->trfcr); 2322 + switch (drvdata->trfcr & TRFCR_EL1_TS_MASK) { 2323 + case TRFCR_EL1_TS_VIRTUAL: 2324 + case TRFCR_EL1_TS_GUEST_PHYSICAL: 2325 + case TRFCR_EL1_TS_PHYSICAL: 2326 + val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr); 2327 2327 break; 2328 2328 default: 2329 2329 val = -1;
+1 -1
drivers/hwtracing/coresight/coresight-etm4x.h
··· 817 817 * @s_ex_level: Secure ELs where tracing is supported. 818 818 */ 819 819 struct etmv4_config { 820 - u32 mode; 820 + u64 mode; 821 821 u32 pe_sel; 822 822 u32 cfg; 823 823 u32 eventctrl0;
+3
drivers/hwtracing/coresight/coresight-priv.h
··· 42 42 43 43 #define ETM_MODE_EXCL_KERN BIT(30) 44 44 #define ETM_MODE_EXCL_USER BIT(31) 45 + #define ETM_MODE_EXCL_HOST BIT(32) 46 + #define ETM_MODE_EXCL_GUEST BIT(33) 47 + 45 48 struct cs_pair_attribute { 46 49 struct device_attribute attr; 47 50 u32 lo_off;
-9
drivers/hwtracing/coresight/coresight-self-hosted-trace.h
··· 21 21 isb(); 22 22 } 23 23 24 - static inline u64 cpu_prohibit_trace(void) 25 - { 26 - u64 trfcr = read_trfcr(); 27 - 28 - /* Prohibit tracing at EL0 & the kernel EL */ 29 - write_trfcr(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE)); 30 - /* Return the original value of the TRFCR */ 31 - return trfcr; 32 - } 33 24 #endif /* __CORESIGHT_SELF_HOSTED_TRACE_H */
+14 -1
drivers/hwtracing/coresight/coresight-trbe.c
··· 17 17 18 18 #include <asm/barrier.h> 19 19 #include <asm/cpufeature.h> 20 + #include <linux/kvm_host.h> 20 21 #include <linux/vmalloc.h> 21 22 22 23 #include "coresight-self-hosted-trace.h" ··· 222 221 */ 223 222 trblimitr |= TRBLIMITR_EL1_E; 224 223 write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1); 224 + kvm_enable_trbe(); 225 225 226 226 /* Synchronize the TRBE enable event */ 227 227 isb(); ··· 241 239 */ 242 240 trblimitr &= ~TRBLIMITR_EL1_E; 243 241 write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1); 242 + kvm_disable_trbe(); 244 243 245 244 if (trbe_needs_drain_after_disable(cpudata)) 246 245 trbe_drain_buffer(); ··· 256 253 257 254 static void trbe_reset_local(struct trbe_cpudata *cpudata) 258 255 { 259 - trbe_drain_and_disable_local(cpudata); 260 256 write_sysreg_s(0, SYS_TRBLIMITR_EL1); 257 + trbe_drain_buffer(); 261 258 write_sysreg_s(0, SYS_TRBPTR_EL1); 262 259 write_sysreg_s(0, SYS_TRBBASER_EL1); 263 260 write_sysreg_s(0, SYS_TRBSR_EL1); ··· 1111 1108 return false; 1112 1109 1113 1110 return true; 1111 + } 1112 + 1113 + static u64 cpu_prohibit_trace(void) 1114 + { 1115 + u64 trfcr = read_trfcr(); 1116 + 1117 + /* Prohibit tracing at EL0 & the kernel EL */ 1118 + write_trfcr(trfcr & ~(TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE)); 1119 + /* Return the original value of the TRFCR */ 1120 + return trfcr; 1114 1121 } 1115 1122 1116 1123 static irqreturn_t arm_trbe_irq_handler(int irq, void *dev)
+6
include/clocksource/arm_arch_timer.h
··· 22 22 #define CNTHCTL_EVNTDIR (1 << 3) 23 23 #define CNTHCTL_EVNTI (0xF << 4) 24 24 #define CNTHCTL_ECV (1 << 12) 25 + #define CNTHCTL_EL1TVT (1 << 13) 26 + #define CNTHCTL_EL1TVCT (1 << 14) 27 + #define CNTHCTL_EL1NVPCT (1 << 15) 28 + #define CNTHCTL_EL1NVVCT (1 << 16) 29 + #define CNTHCTL_CNTVMASK (1 << 18) 30 + #define CNTHCTL_CNTPMASK (1 << 19) 25 31 26 32 enum arch_timer_reg { 27 33 ARCH_TIMER_REG_CTRL,
+23
include/kvm/arm_arch_timer.h
··· 98 98 int kvm_timer_enable(struct kvm_vcpu *vcpu); 99 99 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); 100 100 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); 101 + void kvm_timer_sync_nested(struct kvm_vcpu *vcpu); 101 102 void kvm_timer_sync_user(struct kvm_vcpu *vcpu); 102 103 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); 103 104 void kvm_timer_update_run(struct kvm_vcpu *vcpu); ··· 151 150 /* CNTKCTL_EL1 valid bits as of DDI0487J.a */ 152 151 #define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0)) 153 152 153 + DECLARE_STATIC_KEY_FALSE(broken_cntvoff_key); 154 + 155 + static inline bool has_broken_cntvoff(void) 156 + { 157 + return static_branch_unlikely(&broken_cntvoff_key); 158 + } 159 + 154 160 static inline bool has_cntpoff(void) 155 161 { 156 162 return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); 163 + } 164 + 165 + static inline u64 timer_get_offset(struct arch_timer_context *ctxt) 166 + { 167 + u64 offset = 0; 168 + 169 + if (!ctxt) 170 + return 0; 171 + 172 + if (ctxt->offset.vm_offset) 173 + offset += *ctxt->offset.vm_offset; 174 + if (ctxt->offset.vcpu_offset) 175 + offset += *ctxt->offset.vcpu_offset; 176 + 177 + return offset; 157 178 } 158 179 159 180 #endif
+390 -20
tools/arch/arm64/include/asm/sysreg.h
··· 11 11 12 12 #include <linux/bits.h> 13 13 #include <linux/stringify.h> 14 + #include <linux/kasan-tags.h> 14 15 15 16 #include <asm/gpr-num.h> 16 17 ··· 109 108 #define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) 110 109 #define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x)) 111 110 111 + /* Register-based PAN access, for save/restore purposes */ 112 + #define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) 113 + 112 114 #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ 113 115 __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) 114 116 ··· 126 122 #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) 127 123 #define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) 128 124 #define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) 125 + 126 + #define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0) 127 + #define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0) 128 + #define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1) 129 + 130 + #define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1) 131 + #define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3) 132 + #define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5) 133 + 134 + #define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1) 135 + #define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3) 136 + #define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5) 137 + 138 + #define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1) 139 + 140 + #define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1) 141 + #define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3) 142 + #define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5) 143 + 144 + #define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1) 145 + #define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3) 146 + #define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5) 147 + 148 + #define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1) 149 + #define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3) 150 + #define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5) 151 + 152 + /* Data cache zero operations */ 153 + #define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1) 154 + #define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3) 155 + #define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4) 129 156 130 157 /* 131 158 * Automatically generated definitions for system registers, the ··· 197 162 #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) 198 163 #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) 199 164 165 + #define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) 166 + #define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) 167 + #define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) 168 + #define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) 169 + #define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) 170 + #define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) 171 + #define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) 172 + 173 + #define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) 174 + #define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) 175 + #define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) 176 + 177 + #define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) 178 + #define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) 179 + #define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3))) 180 + #define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6) 181 + #define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0) 182 + #define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0) 183 + #define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0) 184 + #define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2) 185 + #define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2) 186 + #define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0) 187 + #define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6) 188 + #define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6) 189 + #define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5) 190 + #define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5) 191 + #define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5) 192 + #define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0) 193 + #define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6) 194 + #define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7) 195 + #define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0) 196 + #define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0) 197 + #define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4) 198 + #define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7) 199 + #define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6) 200 + #define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6) 201 + #define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6) 202 + #define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6) 203 + #define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7) 204 + #define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7) 205 + #define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7) 206 + #define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7) 207 + #define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7) 208 + #define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7) 209 + #define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7) 210 + #define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6) 211 + #define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6) 212 + #define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7) 213 + #define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1) 214 + #define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4) 215 + #define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0) 216 + #define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1) 217 + #define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4))) 218 + #define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0) 219 + #define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4) 220 + #define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4) 221 + #define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4) 222 + #define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2) 223 + #define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2) 224 + #define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3) 225 + #define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0) 226 + #define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0) 227 + #define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0) 228 + #define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1) 229 + #define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0) 230 + #define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2) 231 + #define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2) 232 + #define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2) 233 + #define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2) 234 + #define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2) 235 + #define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2) 236 + #define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1) 237 + 238 + /* ETM */ 239 + #define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) 240 + 241 + #define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) 242 + 200 243 #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) 201 244 #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) 202 245 #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) ··· 282 169 #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) 283 170 #define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) 284 171 #define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) 285 - 286 - #define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) 287 172 288 173 #define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) 289 174 ··· 313 202 #define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) 314 203 #define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) 315 204 #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) 205 + #define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4) 206 + #define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5) 207 + #define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6) 316 208 #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) 317 209 #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) 210 + #define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2) 211 + #define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3) 318 212 #define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) 319 213 #define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) 320 214 321 215 #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) 322 216 323 217 #define SYS_PAR_EL1_F BIT(0) 218 + /* When PAR_EL1.F == 1 */ 324 219 #define SYS_PAR_EL1_FST GENMASK(6, 1) 220 + #define SYS_PAR_EL1_PTW BIT(8) 221 + #define SYS_PAR_EL1_S BIT(9) 222 + #define SYS_PAR_EL1_AssuredOnly BIT(12) 223 + #define SYS_PAR_EL1_TopLevel BIT(13) 224 + #define SYS_PAR_EL1_Overlay BIT(14) 225 + #define SYS_PAR_EL1_DirtyBit BIT(15) 226 + #define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48) 227 + #define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16)) 228 + #define SYS_PAR_EL1_RES1 BIT(11) 229 + /* When PAR_EL1.F == 0 */ 230 + #define SYS_PAR_EL1_SH GENMASK_ULL(8, 7) 231 + #define SYS_PAR_EL1_NS BIT(9) 232 + #define SYS_PAR_EL1_F0_IMPDEF BIT(10) 233 + #define SYS_PAR_EL1_NSE BIT(11) 234 + #define SYS_PAR_EL1_PA GENMASK_ULL(51, 12) 235 + #define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56) 236 + #define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52)) 325 237 326 238 /*** Statistical Profiling Extension ***/ 327 239 #define PMSEVFR_EL1_RES0_IMP \ ··· 408 274 #define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) 409 275 #define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) 410 276 277 + #define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5) 278 + 411 279 #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) 412 280 413 281 #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) ··· 422 286 #define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) 423 287 #define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) 424 288 #define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) 425 - #define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) 426 289 #define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) 427 290 #define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) 428 291 #define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) ··· 504 369 505 370 #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) 506 371 #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) 372 + #define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) 507 373 #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) 508 374 #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) 509 375 #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) ··· 517 381 #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) 518 382 #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) 519 383 520 - #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) 521 - #define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) 522 - #define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) 384 + #define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) 523 385 #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) 524 386 #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) 525 387 #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) 526 388 #define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0) 389 + #define SYS_SPSR_irq sys_reg(3, 4, 4, 3, 0) 390 + #define SYS_SPSR_abt sys_reg(3, 4, 4, 3, 1) 391 + #define SYS_SPSR_und sys_reg(3, 4, 4, 3, 2) 392 + #define SYS_SPSR_fiq sys_reg(3, 4, 4, 3, 3) 527 393 #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) 528 394 #define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0) 529 395 #define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1) ··· 587 449 588 450 #define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) 589 451 #define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) 452 + #define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) 453 + 454 + #define __AMEV_op2(m) (m & 0x7) 455 + #define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) 456 + #define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) 457 + #define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) 458 + #define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) 459 + #define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) 590 460 591 461 #define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) 592 462 #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) 463 + #define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) 464 + #define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) 465 + #define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) 466 + #define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) 467 + #define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) 468 + #define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) 593 469 594 470 /* VHE encodings for architectural EL0/1 system registers */ 471 + #define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) 595 472 #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) 473 + #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) 474 + #define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) 475 + #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) 476 + #define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) 477 + #define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) 596 478 #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) 597 479 #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) 598 480 #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) 481 + #define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) 599 482 #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) 600 483 #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) 601 484 #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) 602 485 #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) 603 486 #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) 604 487 #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) 488 + #define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) 489 + #define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) 605 490 #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) 606 491 #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) 607 492 #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) 493 + #define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) 494 + #define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) 608 495 #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) 609 496 #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) 610 497 #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) ··· 639 476 #define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) 640 477 641 478 #define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0) 479 + 480 + /* AT instructions */ 481 + #define AT_Op0 1 482 + #define AT_CRn 7 483 + 484 + #define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0) 485 + #define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1) 486 + #define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2) 487 + #define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3) 488 + #define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0) 489 + #define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1) 490 + #define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2) 491 + #define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0) 492 + #define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1) 493 + #define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4) 494 + #define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) 495 + #define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) 496 + #define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) 497 + #define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2) 498 + 499 + /* TLBI instructions */ 500 + #define TLBI_Op0 1 501 + 502 + #define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ 503 + #define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ 504 + 505 + #define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ 506 + #define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ 507 + 508 + #define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ 509 + #define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ 510 + #define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ 511 + #define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ 512 + #define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ 513 + #define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ 514 + #define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ 515 + #define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ 516 + 517 + #define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) 518 + #define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) 519 + #define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) 520 + #define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3) 521 + #define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5) 522 + #define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7) 523 + #define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1) 524 + #define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3) 525 + #define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5) 526 + #define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7) 527 + #define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0) 528 + #define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1) 529 + #define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2) 530 + #define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3) 531 + #define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5) 532 + #define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7) 533 + #define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1) 534 + #define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3) 535 + #define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5) 536 + #define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7) 537 + #define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1) 538 + #define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3) 539 + #define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5) 540 + #define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7) 541 + #define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0) 542 + #define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1) 543 + #define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2) 544 + #define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3) 545 + #define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5) 546 + #define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7) 547 + #define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0) 548 + #define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1) 549 + #define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2) 550 + #define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3) 551 + #define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5) 552 + #define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7) 553 + #define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1) 554 + #define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3) 555 + #define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5) 556 + #define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7) 557 + #define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0) 558 + #define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1) 559 + #define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2) 560 + #define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3) 561 + #define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5) 562 + #define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7) 563 + #define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1) 564 + #define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3) 565 + #define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5) 566 + #define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7) 567 + #define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1) 568 + #define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3) 569 + #define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5) 570 + #define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7) 571 + #define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0) 572 + #define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1) 573 + #define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2) 574 + #define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3) 575 + #define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5) 576 + #define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7) 577 + #define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1) 578 + #define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2) 579 + #define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5) 580 + #define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6) 581 + #define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0) 582 + #define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1) 583 + #define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4) 584 + #define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5) 585 + #define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6) 586 + #define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1) 587 + #define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5) 588 + #define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0) 589 + #define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1) 590 + #define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4) 591 + #define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5) 592 + #define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6) 593 + #define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0) 594 + #define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1) 595 + #define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2) 596 + #define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3) 597 + #define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4) 598 + #define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5) 599 + #define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6) 600 + #define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7) 601 + #define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1) 602 + #define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5) 603 + #define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1) 604 + #define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5) 605 + #define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0) 606 + #define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1) 607 + #define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4) 608 + #define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5) 609 + #define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6) 610 + #define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1) 611 + #define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2) 612 + #define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5) 613 + #define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6) 614 + #define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0) 615 + #define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1) 616 + #define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4) 617 + #define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5) 618 + #define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6) 619 + #define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1) 620 + #define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5) 621 + #define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0) 622 + #define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1) 623 + #define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4) 624 + #define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5) 625 + #define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6) 626 + #define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0) 627 + #define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1) 628 + #define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2) 629 + #define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3) 630 + #define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4) 631 + #define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5) 632 + #define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6) 633 + #define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7) 634 + #define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1) 635 + #define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5) 636 + #define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1) 637 + #define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5) 638 + #define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0) 639 + #define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1) 640 + #define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4) 641 + #define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5) 642 + #define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6) 643 + 644 + /* Misc instructions */ 645 + #define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4) 646 + #define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5) 647 + #define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6) 648 + #define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0) 649 + 650 + #define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4) 651 + #define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5) 652 + #define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4) 653 + #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) 654 + #define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6) 655 + #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) 642 656 643 657 /* Common SCTLR_ELx flags. */ 644 658 #define SCTLR_ELx_ENTP2 (BIT(60)) ··· 895 555 /* Position the attr at the correct index */ 896 556 #define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) 897 557 898 - /* id_aa64pfr0 */ 899 - #define ID_AA64PFR0_EL1_ELx_64BIT_ONLY 0x1 900 - #define ID_AA64PFR0_EL1_ELx_32BIT_64BIT 0x2 901 - 902 558 /* id_aa64mmfr0 */ 903 559 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0 560 + #define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT 904 561 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7 905 562 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0 906 563 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7 907 564 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1 565 + #define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT 908 566 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf 909 567 910 568 #define ARM64_MIN_PARANGE_BITS 32 ··· 910 572 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0 911 573 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1 912 574 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2 575 + #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3 913 576 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7 914 577 915 578 #ifdef CONFIG_ARM64_PA_BITS_52 ··· 921 582 922 583 #if defined(CONFIG_ARM64_4K_PAGES) 923 584 #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT 585 + #define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT 924 586 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 925 587 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 926 588 #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT 927 589 #elif defined(CONFIG_ARM64_16K_PAGES) 928 590 #define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT 591 + #define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT 929 592 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 930 593 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 931 594 #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT ··· 951 610 #define SYS_GCR_EL1_RRND (BIT(16)) 952 611 #define SYS_GCR_EL1_EXCL_MASK 0xffffUL 953 612 613 + #ifdef CONFIG_KASAN_HW_TAGS 614 + /* 615 + * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it 616 + * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF. 617 + */ 618 + #define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf) 619 + #define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf) 620 + #define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN) 621 + #define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL) 622 + #else 623 + #define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK 624 + #endif 625 + 954 626 #define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL) 955 627 956 628 /* RGSR_EL1 Definitions */ ··· 979 625 980 626 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ 981 627 #define SYS_MPIDR_SAFE_VAL (BIT(31)) 982 - 983 - #define TRFCR_ELx_TS_SHIFT 5 984 - #define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT) 985 - #define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) 986 - #define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) 987 - #define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) 988 - #define TRFCR_EL2_CX BIT(3) 989 - #define TRFCR_ELx_ExTRE BIT(1) 990 - #define TRFCR_ELx_E0TRE BIT(0) 991 628 992 629 /* GIC Hypervisor interface registers */ 993 630 /* ICH_MISR_EL2 bit definitions */ ··· 1061 716 1062 717 #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) 1063 718 719 + /* 720 + * Permission Overlay Extension (POE) permission encodings. 721 + */ 722 + #define POE_NONE UL(0x0) 723 + #define POE_R UL(0x1) 724 + #define POE_X UL(0x2) 725 + #define POE_RX UL(0x3) 726 + #define POE_W UL(0x4) 727 + #define POE_RW UL(0x5) 728 + #define POE_XW UL(0x6) 729 + #define POE_RXW UL(0x7) 730 + #define POE_MASK UL(0xf) 731 + 732 + /* Initial value for Permission Overlay Extension for EL0 */ 733 + #define POR_EL0_INIT POE_RXW 734 + 1064 735 #define ARM64_FEATURE_FIELD_BITS 4 1065 736 1066 737 /* Defined for compatibility only, do not add new users. */ ··· 1150 789 /* 1151 790 * For registers without architectural names, or simply unsupported by 1152 791 * GAS. 792 + * 793 + * __check_r forces warnings to be generated by the compiler when 794 + * evaluating r which wouldn't normally happen due to being passed to 795 + * the assembler via __stringify(r). 1153 796 */ 1154 797 #define read_sysreg_s(r) ({ \ 1155 798 u64 __val; \ 799 + u32 __maybe_unused __check_r = (u32)(r); \ 1156 800 asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ 1157 801 __val; \ 1158 802 }) 1159 803 1160 804 #define write_sysreg_s(v, r) do { \ 1161 805 u64 __val = (u64)(v); \ 806 + u32 __maybe_unused __check_r = (u32)(r); \ 1162 807 asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ 1163 808 } while (0) 1164 809 ··· 1194 827 par; \ 1195 828 }) 1196 829 830 + #define SYS_FIELD_VALUE(reg, field, val) reg##_##field##_##val 831 + 1197 832 #define SYS_FIELD_GET(reg, field, val) \ 1198 833 FIELD_GET(reg##_##field##_MASK, val) 1199 834 ··· 1203 834 FIELD_PREP(reg##_##field##_MASK, val) 1204 835 1205 836 #define SYS_FIELD_PREP_ENUM(reg, field, val) \ 1206 - FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) 837 + FIELD_PREP(reg##_##field##_MASK, \ 838 + SYS_FIELD_VALUE(reg, field, val)) 1207 839 1208 840 #endif 1209 841
+15
tools/include/linux/kasan-tags.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_KASAN_TAGS_H 3 + #define _LINUX_KASAN_TAGS_H 4 + 5 + #define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ 6 + #define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ 7 + #define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ 8 + 9 + #ifdef CONFIG_KASAN_HW_TAGS 10 + #define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */ 11 + #else 12 + #define KASAN_TAG_MIN 0x00 /* minimum value for random tags */ 13 + #endif 14 + 15 + #endif /* LINUX_KASAN_TAGS_H */
+1 -1
tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
··· 147 147 val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); 148 148 149 149 el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); 150 - return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; 150 + return el0 == ID_AA64PFR0_EL1_EL0_IMP; 151 151 } 152 152 153 153 int main(void)
+1 -1
tools/testing/selftests/kvm/arm64/set_id_regs.c
··· 666 666 /* Check for AARCH64 only system */ 667 667 val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); 668 668 el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); 669 - aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); 669 + aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); 670 670 671 671 ksft_print_header(); 672 672