Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvmarm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 6.18

- Add support for FF-A 1.2 as the secure memory conduit for pKVM,
allowing more registers to be used as part of the message payload.

- Change the way pKVM allocates its VM handles, making sure that the
privileged hypervisor is never tricked into using uninitialised
data.

- Speed up MMIO range registration by avoiding unnecessary RCU
synchronisation, which results in VMs starting much quicker.

- Add the dump of the instruction stream when panic-ing in the EL2
payload, just like the rest of the kernel has always done. This will
hopefully help debugging non-VHE setups.

- Add 52bit PA support to the stage-1 page-table walker, and make use
of it to populate the fault level reported to the guest on failing
to translate a stage-1 walk.

- Add NV support to the GICv3-on-GICv5 emulation code, ensuring
feature parity for guests, irrespective of the host platform.

- Fix some really ugly architecture problems when dealing with debug
in a nested VM. This has some bad performance impacts, but is at
least correct.

- Add enough infrastructure to be able to disable EL2 features and
give effective values to the EL2 control registers. This then allows
a bunch of features to be turned off, which helps cross-host
migration.

- Large rework of the selftest infrastructure to allow most tests to
transparently run at EL2. This is the first step towards enabling
NV testing.

- Various fixes and improvements all over the map, including one BE
fix, just in time for the removal of the feature.

+1696 -688
+2
arch/arm64/include/asm/kvm_asm.h
··· 81 81 __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, 82 82 __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, 83 83 __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, 84 + __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm, 85 + __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm, 84 86 __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, 85 87 __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, 86 88 __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+28 -6
arch/arm64/include/asm/kvm_emulate.h
··· 220 220 221 221 static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu) 222 222 { 223 + /* 224 + * DDI0487L.b Known Issue D22105 225 + * 226 + * When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is 227 + * IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO 228 + * is the value programmed or 1. 229 + * 230 + * Make the implementation choice of treating the effective value as 1 as 231 + * we cannot subsequently catch changes to TGE or AMO that would 232 + * otherwise lead to the SError becoming deliverable. 233 + */ 234 + if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu)) 235 + return true; 236 + 223 237 return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO; 224 238 } 225 239 ··· 525 511 if (vcpu_mode_is_32bit(vcpu)) { 526 512 *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; 527 513 } else { 528 - u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 514 + enum vcpu_sysreg r; 515 + u64 sctlr; 516 + 517 + r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1; 518 + 519 + sctlr = vcpu_read_sys_reg(vcpu, r); 529 520 sctlr |= SCTLR_ELx_EE; 530 - vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); 521 + vcpu_write_sys_reg(vcpu, sctlr, r); 531 522 } 532 523 } 533 524 534 525 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) 535 526 { 527 + enum vcpu_sysreg r; 528 + u64 bit; 529 + 536 530 if (vcpu_mode_is_32bit(vcpu)) 537 531 return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); 538 532 539 - if (vcpu_mode_priv(vcpu)) 540 - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); 541 - else 542 - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); 533 + r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1; 534 + bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E; 535 + 536 + return vcpu_read_sys_reg(vcpu, r) & bit; 543 537 } 544 538 545 539 static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+3 -2
arch/arm64/include/asm/kvm_host.h
··· 252 252 pkvm_handle_t handle; 253 253 struct kvm_hyp_memcache teardown_mc; 254 254 struct kvm_hyp_memcache stage2_teardown_mc; 255 - bool enabled; 255 + bool is_protected; 256 + bool is_created; 256 257 }; 257 258 258 259 struct kvm_mpidr_data { ··· 1443 1442 1444 1443 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE 1445 1444 1446 - #define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled) 1445 + #define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected) 1447 1446 1448 1447 #define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm) 1449 1448
+25 -2
arch/arm64/include/asm/kvm_nested.h
··· 83 83 extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu); 84 84 extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu); 85 85 86 + extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu); 87 + 86 88 struct kvm_s2_trans { 87 89 phys_addr_t output; 88 90 unsigned long block_size; ··· 267 265 return base; 268 266 } 269 267 270 - static inline unsigned int ps_to_output_size(unsigned int ps) 268 + static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit) 271 269 { 272 270 switch (ps) { 273 271 case 0: return 32; ··· 275 273 case 2: return 40; 276 274 case 3: return 42; 277 275 case 4: return 44; 278 - case 5: 276 + case 5: return 48; 277 + case 6: if (pa52bit) 278 + return 52; 279 + fallthrough; 279 280 default: 280 281 return 48; 281 282 } ··· 290 285 TR_EL2, 291 286 }; 292 287 288 + struct s1_walk_info; 289 + 290 + struct s1_walk_context { 291 + struct s1_walk_info *wi; 292 + u64 table_ipa; 293 + int level; 294 + }; 295 + 296 + struct s1_walk_filter { 297 + int (*fn)(struct s1_walk_context *, void *); 298 + void *priv; 299 + }; 300 + 293 301 struct s1_walk_info { 302 + struct s1_walk_filter *filter; 294 303 u64 baddr; 295 304 enum trans_regime regime; 296 305 unsigned int max_oa_bits; 297 306 unsigned int pgshift; 298 307 unsigned int txsz; 299 308 int sl; 309 + u8 sh; 300 310 bool as_el0; 301 311 bool hpd; 302 312 bool e0poe; ··· 319 299 bool pan; 320 300 bool be; 321 301 bool s2; 302 + bool pa52bit; 322 303 }; 323 304 324 305 struct s1_walk_result { ··· 355 334 356 335 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 357 336 struct s1_walk_result *wr, u64 va); 337 + int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, 338 + int *level); 358 339 359 340 /* VNCR management */ 360 341 int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+1
arch/arm64/include/asm/kvm_pkvm.h
··· 18 18 19 19 int pkvm_init_host_vm(struct kvm *kvm); 20 20 int pkvm_create_hyp_vm(struct kvm *kvm); 21 + bool pkvm_hyp_vm_is_created(struct kvm *kvm); 21 22 void pkvm_destroy_hyp_vm(struct kvm *kvm); 22 23 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu); 23 24
+1
arch/arm64/include/asm/traps.h
··· 36 36 int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr); 37 37 38 38 int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); 39 + void dump_kernel_instr(unsigned long kaddr); 39 40 40 41 /* 41 42 * Move regs->pc to next instruction and do necessary setup before it
+2
arch/arm64/include/asm/vncr_mapping.h
··· 94 94 #define VNCR_PMSICR_EL1 0x838 95 95 #define VNCR_PMSIRR_EL1 0x840 96 96 #define VNCR_PMSLATFR_EL1 0x848 97 + #define VNCR_PMSNEVFR_EL1 0x850 98 + #define VNCR_PMSDSFR_EL1 0x858 97 99 #define VNCR_TRFCR_EL1 0x880 98 100 #define VNCR_MPAM1_EL1 0x900 99 101 #define VNCR_MPAMHCR_EL2 0x930
+15
arch/arm64/kernel/cpufeature.c
··· 2539 2539 return idr & MPAMIDR_EL1_HAS_HCR; 2540 2540 } 2541 2541 2542 + static bool 2543 + test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope) 2544 + { 2545 + if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF)) 2546 + return false; 2547 + 2548 + return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY); 2549 + } 2550 + 2542 2551 static const struct arm64_cpu_capabilities arm64_features[] = { 2543 2552 { 2544 2553 .capability = ARM64_ALWAYS_BOOT, ··· 3164 3155 .capability = ARM64_HAS_GICV5_CPUIF, 3165 3156 .matches = has_cpuid_feature, 3166 3157 ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP) 3158 + }, 3159 + { 3160 + .desc = "GICv5 Legacy vCPU interface", 3161 + .type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE, 3162 + .capability = ARM64_HAS_GICV5_LEGACY, 3163 + .matches = test_has_gicv5_legacy, 3167 3164 }, 3168 3165 {}, 3169 3166 };
+3
arch/arm64/kernel/image-vars.h
··· 105 105 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); 106 106 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); 107 107 108 + /* Static key indicating whether GICv3 has GICv2 compatibility */ 109 + KVM_NVHE_ALIAS(vgic_v3_has_v2_compat); 110 + 108 111 /* Static key which is set if CNTVOFF_EL2 is unusable */ 109 112 KVM_NVHE_ALIAS(broken_cntvoff_key); 110 113
+9 -6
arch/arm64/kernel/traps.c
··· 149 149 150 150 int show_unhandled_signals = 0; 151 151 152 - static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) 152 + void dump_kernel_instr(unsigned long kaddr) 153 153 { 154 - unsigned long addr = instruction_pointer(regs); 155 154 char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; 156 155 int i; 157 156 158 - if (user_mode(regs)) 157 + if (!is_ttbr1_addr(kaddr)) 159 158 return; 160 159 161 160 for (i = -4; i < 1; i++) { 162 161 unsigned int val, bad; 163 162 164 - bad = aarch64_insn_read(&((u32 *)addr)[i], &val); 163 + bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val); 165 164 166 165 if (!bad) 167 166 p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); ··· 168 169 p += sprintf(p, i == 0 ? "(????????) " : "???????? "); 169 170 } 170 171 171 - printk("%sCode: %s\n", lvl, str); 172 + printk(KERN_EMERG "Code: %s\n", str); 172 173 } 173 174 174 175 #define S_SMP " SMP" ··· 177 178 { 178 179 static int die_counter; 179 180 int ret; 181 + unsigned long addr = instruction_pointer(regs); 180 182 181 183 pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n", 182 184 str, err, ++die_counter); ··· 190 190 print_modules(); 191 191 show_regs(regs); 192 192 193 - dump_kernel_instr(KERN_EMERG, regs); 193 + if (user_mode(regs)) 194 + return ret; 195 + 196 + dump_kernel_instr(addr); 194 197 195 198 return ret; 196 199 }
+13 -6
arch/arm64/kvm/arm.c
··· 170 170 if (ret) 171 171 return ret; 172 172 173 - ret = pkvm_init_host_vm(kvm); 174 - if (ret) 175 - goto err_unshare_kvm; 176 - 177 173 if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { 178 174 ret = -ENOMEM; 179 175 goto err_unshare_kvm; ··· 179 183 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); 180 184 if (ret) 181 185 goto err_free_cpumask; 186 + 187 + if (is_protected_kvm_enabled()) { 188 + /* 189 + * If any failures occur after this is successful, make sure to 190 + * call __pkvm_unreserve_vm to unreserve the VM in hyp. 191 + */ 192 + ret = pkvm_init_host_vm(kvm); 193 + if (ret) 194 + goto err_free_cpumask; 195 + } 182 196 183 197 kvm_vgic_early_init(kvm); 184 198 ··· 2323 2317 } 2324 2318 2325 2319 if (kvm_mode == KVM_MODE_NV && 2326 - !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { 2327 - kvm_err("NV support requires GICv3, giving up\n"); 2320 + !(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 || 2321 + kvm_vgic_global_state.has_gcie_v3_compat))) { 2322 + kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n"); 2328 2323 err = -EINVAL; 2329 2324 goto out; 2330 2325 }
+270 -106
arch/arm64/kvm/at.c
··· 28 28 /* Return true if the IPA is out of the OA range */ 29 29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 30 30 { 31 + if (wi->pa52bit) 32 + return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); 31 33 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 34 + } 35 + 36 + static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) 37 + { 38 + switch (BIT(wi->pgshift)) { 39 + case SZ_64K: 40 + default: /* IMPDEF: treat any other value as 64k */ 41 + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) 42 + return false; 43 + return ((wi->regime == TR_EL2 ? 44 + FIELD_GET(TCR_EL2_PS_MASK, tcr) : 45 + FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); 46 + case SZ_16K: 47 + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) 48 + return false; 49 + break; 50 + case SZ_4K: 51 + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) 52 + return false; 53 + break; 54 + } 55 + 56 + return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); 57 + } 58 + 59 + static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) 60 + { 61 + u64 addr; 62 + 63 + if (!wi->pa52bit) 64 + return desc & GENMASK_ULL(47, wi->pgshift); 65 + 66 + switch (BIT(wi->pgshift)) { 67 + case SZ_4K: 68 + case SZ_16K: 69 + addr = desc & GENMASK_ULL(49, wi->pgshift); 70 + addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; 71 + break; 72 + case SZ_64K: 73 + default: /* IMPDEF: treat any other value as 64k */ 74 + addr = desc & GENMASK_ULL(47, wi->pgshift); 75 + addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; 76 + break; 77 + } 78 + 79 + return addr; 32 80 } 33 81 34 82 /* Return the translation regime that applies to an AT instruction */ ··· 98 50 } 99 51 } 100 52 53 + static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) 54 + { 55 + if (regime == TR_EL10) { 56 + if (vcpu_has_nv(vcpu) && 57 + !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) 58 + return 0; 59 + 60 + return vcpu_read_sys_reg(vcpu, TCR2_EL1); 61 + } 62 + 63 + return vcpu_read_sys_reg(vcpu, TCR2_EL2); 64 + } 65 + 101 66 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 102 67 { 103 68 if (!kvm_has_s1pie(vcpu->kvm)) 104 69 return false; 105 70 106 - switch (regime) { 107 - case TR_EL2: 108 - case TR_EL20: 109 - return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; 110 - case TR_EL10: 111 - return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && 112 - (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE); 113 - default: 114 - BUG(); 115 - } 71 + /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ 72 + return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; 116 73 } 117 74 118 75 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) ··· 129 76 return; 130 77 } 131 78 132 - switch (wi->regime) { 133 - case TR_EL2: 134 - case TR_EL20: 135 - val = vcpu_read_sys_reg(vcpu, TCR2_EL2); 136 - wi->poe = val & TCR2_EL2_POE; 137 - wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); 138 - break; 139 - case TR_EL10: 140 - if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { 141 - wi->poe = wi->e0poe = false; 142 - return; 143 - } 79 + val = effective_tcr2(vcpu, wi->regime); 144 80 145 - val = __vcpu_sys_reg(vcpu, TCR2_EL1); 146 - wi->poe = val & TCR2_EL1_POE; 147 - wi->e0poe = val & TCR2_EL1_E0POE; 148 - } 81 + /* Abuse TCR2_EL1_* for EL2 */ 82 + wi->poe = val & TCR2_EL1_POE; 83 + wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); 149 84 } 150 85 151 86 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ··· 143 102 unsigned int stride, x; 144 103 bool va55, tbi, lva; 145 104 146 - hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 147 - 148 105 va55 = va & BIT(55); 149 106 150 - if (wi->regime == TR_EL2 && va55) 151 - goto addrsz; 152 - 153 - wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 107 + if (vcpu_has_nv(vcpu)) { 108 + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 109 + wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 110 + } else { 111 + WARN_ON_ONCE(wi->regime != TR_EL10); 112 + wi->s2 = false; 113 + hcr = 0; 114 + } 154 115 155 116 switch (wi->regime) { 156 117 case TR_EL10: ··· 174 131 BUG(); 175 132 } 176 133 134 + /* Someone was silly enough to encode TG0/TG1 differently */ 135 + if (va55 && wi->regime != TR_EL2) { 136 + wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 137 + tg = FIELD_GET(TCR_TG1_MASK, tcr); 138 + 139 + switch (tg << TCR_TG1_SHIFT) { 140 + case TCR_TG1_4K: 141 + wi->pgshift = 12; break; 142 + case TCR_TG1_16K: 143 + wi->pgshift = 14; break; 144 + case TCR_TG1_64K: 145 + default: /* IMPDEF: treat any other value as 64k */ 146 + wi->pgshift = 16; break; 147 + } 148 + } else { 149 + wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 150 + tg = FIELD_GET(TCR_TG0_MASK, tcr); 151 + 152 + switch (tg << TCR_TG0_SHIFT) { 153 + case TCR_TG0_4K: 154 + wi->pgshift = 12; break; 155 + case TCR_TG0_16K: 156 + wi->pgshift = 14; break; 157 + case TCR_TG0_64K: 158 + default: /* IMPDEF: treat any other value as 64k */ 159 + wi->pgshift = 16; break; 160 + } 161 + } 162 + 163 + wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); 164 + 165 + ia_bits = get_ia_size(wi); 166 + 167 + /* AArch64.S1StartLevel() */ 168 + stride = wi->pgshift - 3; 169 + wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 170 + 171 + if (wi->regime == TR_EL2 && va55) 172 + goto addrsz; 173 + 177 174 tbi = (wi->regime == TR_EL2 ? 178 175 FIELD_GET(TCR_EL2_TBI, tcr) : 179 176 (va55 ? ··· 222 139 223 140 if (!tbi && (u64)sign_extend64(va, 55) != va) 224 141 goto addrsz; 142 + 143 + wi->sh = (wi->regime == TR_EL2 ? 144 + FIELD_GET(TCR_EL2_SH0_MASK, tcr) : 145 + (va55 ? 146 + FIELD_GET(TCR_SH1_MASK, tcr) : 147 + FIELD_GET(TCR_SH0_MASK, tcr))); 225 148 226 149 va = (u64)sign_extend64(va, 55); 227 150 ··· 283 194 /* R_BVXDG */ 284 195 wi->hpd |= (wi->poe || wi->e0poe); 285 196 286 - /* Someone was silly enough to encode TG0/TG1 differently */ 287 - if (va55) { 288 - wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 289 - tg = FIELD_GET(TCR_TG1_MASK, tcr); 290 - 291 - switch (tg << TCR_TG1_SHIFT) { 292 - case TCR_TG1_4K: 293 - wi->pgshift = 12; break; 294 - case TCR_TG1_16K: 295 - wi->pgshift = 14; break; 296 - case TCR_TG1_64K: 297 - default: /* IMPDEF: treat any other value as 64k */ 298 - wi->pgshift = 16; break; 299 - } 300 - } else { 301 - wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 302 - tg = FIELD_GET(TCR_TG0_MASK, tcr); 303 - 304 - switch (tg << TCR_TG0_SHIFT) { 305 - case TCR_TG0_4K: 306 - wi->pgshift = 12; break; 307 - case TCR_TG0_16K: 308 - wi->pgshift = 14; break; 309 - case TCR_TG0_64K: 310 - default: /* IMPDEF: treat any other value as 64k */ 311 - wi->pgshift = 16; break; 312 - } 313 - } 314 - 315 197 /* R_PLCGL, R_YXNYW */ 316 198 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 317 199 if (wi->txsz > 39) 318 - goto transfault_l0; 200 + goto transfault; 319 201 } else { 320 202 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 321 - goto transfault_l0; 203 + goto transfault; 322 204 } 323 205 324 206 /* R_GTJBY, R_SXWGM */ 325 207 switch (BIT(wi->pgshift)) { 326 208 case SZ_4K: 327 - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); 328 - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 329 - break; 330 209 case SZ_16K: 331 - lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); 332 - lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 210 + lva = wi->pa52bit; 333 211 break; 334 212 case SZ_64K: 335 213 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); ··· 304 248 } 305 249 306 250 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 307 - goto transfault_l0; 308 - 309 - ia_bits = get_ia_size(wi); 251 + goto transfault; 310 252 311 253 /* R_YYVYV, I_THCZK */ 312 254 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 313 255 (va55 && va < GENMASK(63, ia_bits))) 314 - goto transfault_l0; 256 + goto transfault; 315 257 316 258 /* I_ZFSYQ */ 317 259 if (wi->regime != TR_EL2 && 318 260 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 319 - goto transfault_l0; 261 + goto transfault; 320 262 321 263 /* R_BNDVG and following statements */ 322 264 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 323 265 wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 324 - goto transfault_l0; 325 - 326 - /* AArch64.S1StartLevel() */ 327 - stride = wi->pgshift - 3; 328 - wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 266 + goto transfault; 329 267 330 268 ps = (wi->regime == TR_EL2 ? 331 269 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 332 270 333 - wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); 271 + wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); 334 272 335 273 /* Compute minimal alignment */ 336 274 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 337 275 338 276 wi->baddr = ttbr & TTBRx_EL1_BADDR; 277 + if (wi->pa52bit) { 278 + /* 279 + * Force the alignment on 64 bytes for top-level tables 280 + * smaller than 8 entries, since TTBR.BADDR[5:2] are used to 281 + * store bits [51:48] of the first level of lookup. 282 + */ 283 + x = max(x, 6); 284 + 285 + wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; 286 + } 339 287 340 288 /* R_VPBBF */ 341 289 if (check_output_size(wi->baddr, wi)) ··· 349 289 350 290 return 0; 351 291 352 - addrsz: /* Address Size Fault level 0 */ 292 + addrsz: 293 + /* 294 + * Address Size Fault level 0 to indicate it comes from TTBR. 295 + * yes, this is an oddity. 296 + */ 353 297 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); 354 298 return -EFAULT; 355 299 356 - transfault_l0: /* Translation Fault level 0 */ 357 - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); 300 + transfault: 301 + /* Translation Fault on start level */ 302 + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); 358 303 return -EFAULT; 359 304 } 360 305 ··· 404 339 ipa = kvm_s2_trans_output(&s2_trans); 405 340 } 406 341 342 + if (wi->filter) { 343 + ret = wi->filter->fn(&(struct s1_walk_context) 344 + { 345 + .wi = wi, 346 + .table_ipa = baddr, 347 + .level = level, 348 + }, wi->filter->priv); 349 + if (ret) 350 + return ret; 351 + } 352 + 407 353 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 408 354 if (ret) { 409 355 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); ··· 445 369 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 446 370 } 447 371 448 - baddr = desc & GENMASK_ULL(47, wi->pgshift); 372 + baddr = desc_to_oa(wi, desc); 449 373 450 374 /* Check for out-of-range OA */ 451 375 if (check_output_size(baddr, wi)) ··· 462 386 463 387 switch (BIT(wi->pgshift)) { 464 388 case SZ_4K: 465 - valid_block = level == 1 || level == 2; 389 + valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); 466 390 break; 467 391 case SZ_16K: 468 392 case SZ_64K: 469 - valid_block = level == 2; 393 + valid_block = level == 2 || (wi->pa52bit && level == 1); 470 394 break; 471 395 } 472 396 ··· 474 398 goto transfault; 475 399 } 476 400 477 - if (check_output_size(desc & GENMASK(47, va_bottom), wi)) 401 + baddr = desc_to_oa(wi, desc); 402 + if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) 478 403 goto addrsz; 479 404 480 405 if (!(desc & PTE_AF)) { ··· 488 411 wr->failed = false; 489 412 wr->level = level; 490 413 wr->desc = desc; 491 - wr->pa = desc & GENMASK(47, va_bottom); 414 + wr->pa = baddr & GENMASK(52, va_bottom); 492 415 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 493 416 494 417 wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); ··· 717 640 #define ATTR_OSH 0b10 718 641 #define ATTR_ISH 0b11 719 642 720 - static u8 compute_sh(u8 attr, u64 desc) 643 + static u8 compute_final_sh(u8 attr, u8 sh) 721 644 { 722 - u8 sh; 723 - 724 645 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 725 646 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 726 647 return ATTR_OSH; 727 648 728 - sh = FIELD_GET(PTE_SHARED, desc); 729 649 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 730 650 sh = ATTR_NSH; 731 651 732 652 return sh; 653 + } 654 + 655 + static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, 656 + u8 attr) 657 + { 658 + u8 sh; 659 + 660 + /* 661 + * non-52bit and LPA have their basic shareability described in the 662 + * descriptor. LPA2 gets it from the corresponding field in TCR, 663 + * conveniently recorded in the walk info. 664 + */ 665 + if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) 666 + sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); 667 + else 668 + sh = wi->sh; 669 + 670 + return compute_final_sh(attr, sh); 733 671 } 734 672 735 673 static u8 combine_sh(u8 s1_sh, u8 s2_sh) ··· 760 668 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 761 669 struct kvm_s2_trans *tr) 762 670 { 763 - u8 s1_parattr, s2_memattr, final_attr; 671 + u8 s1_parattr, s2_memattr, final_attr, s2_sh; 764 672 u64 par; 765 673 766 674 /* If S2 has failed to translate, report the damage */ ··· 833 741 !MEMATTR_IS_DEVICE(final_attr)) 834 742 final_attr = MEMATTR(NC, NC); 835 743 744 + s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); 745 + 836 746 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 837 747 par |= tr->output & GENMASK(47, 12); 838 748 par |= FIELD_PREP(SYS_PAR_EL1_SH, 839 749 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 840 - compute_sh(final_attr, tr->desc))); 750 + compute_final_sh(final_attr, s2_sh))); 841 751 842 752 return par; 843 753 } 844 754 845 - static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, 846 - enum trans_regime regime) 755 + static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 756 + struct s1_walk_result *wr) 847 757 { 848 758 u64 par; 849 759 ··· 858 764 } else if (wr->level == S1_MMU_DISABLED) { 859 765 /* MMU off or HCR_EL2.DC == 1 */ 860 766 par = SYS_PAR_EL1_NSE; 861 - par |= wr->pa & GENMASK_ULL(47, 12); 767 + par |= wr->pa & SYS_PAR_EL1_PA; 862 768 863 - if (regime == TR_EL10 && 769 + if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && 864 770 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 865 771 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 866 772 MEMATTR(WbRaWa, WbRaWa)); ··· 875 781 876 782 par = SYS_PAR_EL1_NSE; 877 783 878 - mair = (regime == TR_EL10 ? 784 + mair = (wi->regime == TR_EL10 ? 879 785 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 880 786 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 881 787 882 788 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 883 789 mair &= 0xff; 884 790 885 - sctlr = (regime == TR_EL10 ? 791 + sctlr = (wi->regime == TR_EL10 ? 886 792 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 887 793 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 888 794 ··· 891 797 mair = MEMATTR(NC, NC); 892 798 893 799 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 894 - par |= wr->pa & GENMASK_ULL(47, 12); 800 + par |= wr->pa & SYS_PAR_EL1_PA; 895 801 896 - sh = compute_sh(mair, wr->desc); 802 + sh = compute_s1_sh(wi, wr, mair); 897 803 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 898 804 } 899 805 ··· 967 873 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 968 874 break; 969 875 case TR_EL10: 970 - wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 876 + wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 971 877 break; 972 878 } 973 879 ··· 1280 1186 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); 1281 1187 1282 1188 compute_par: 1283 - return compute_par_s1(vcpu, &wr, wi.regime); 1189 + return compute_par_s1(vcpu, &wi, &wr); 1284 1190 } 1285 1191 1286 1192 /* ··· 1296 1202 { 1297 1203 struct mmu_config config; 1298 1204 struct kvm_s2_mmu *mmu; 1299 - bool fail; 1205 + bool fail, mmu_cs; 1300 1206 u64 par; 1301 1207 1302 1208 par = SYS_PAR_EL1_F; ··· 1312 1218 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1313 1219 * the right one (as we trapped from vEL2). If not, save the 1314 1220 * full MMU context. 1221 + * 1222 + * We are also guaranteed to be in the correct context if 1223 + * we're not in a nested VM. 1315 1224 */ 1316 - if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) 1225 + mmu_cs = (vcpu_has_nv(vcpu) && 1226 + !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); 1227 + if (!mmu_cs) 1317 1228 goto skip_mmu_switch; 1318 1229 1319 1230 /* ··· 1386 1287 1387 1288 write_sysreg_hcr(HCR_HOST_VHE_FLAGS); 1388 1289 1389 - if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) 1290 + if (mmu_cs) 1390 1291 __mmu_config_restore(&config); 1391 1292 1392 1293 return par; ··· 1568 1469 } 1569 1470 1570 1471 return 0; 1472 + } 1473 + 1474 + struct desc_match { 1475 + u64 ipa; 1476 + int level; 1477 + }; 1478 + 1479 + static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) 1480 + { 1481 + struct desc_match *dm = priv; 1482 + u64 ipa = dm->ipa; 1483 + 1484 + /* Use S1 granule alignment */ 1485 + ipa &= GENMASK(51, ctxt->wi->pgshift); 1486 + 1487 + /* Not the IPA we're looking for? Continue. */ 1488 + if (ipa != ctxt->table_ipa) 1489 + return 0; 1490 + 1491 + /* Note the level and interrupt the walk */ 1492 + dm->level = ctxt->level; 1493 + return -EINTR; 1494 + } 1495 + 1496 + int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) 1497 + { 1498 + struct desc_match dm = { 1499 + .ipa = ipa, 1500 + }; 1501 + struct s1_walk_info wi = { 1502 + .filter = &(struct s1_walk_filter){ 1503 + .fn = match_s1_desc, 1504 + .priv = &dm, 1505 + }, 1506 + .regime = TR_EL10, 1507 + .as_el0 = false, 1508 + .pan = false, 1509 + }; 1510 + struct s1_walk_result wr = {}; 1511 + int ret; 1512 + 1513 + ret = setup_s1_walk(vcpu, &wi, &wr, va); 1514 + if (ret) 1515 + return ret; 1516 + 1517 + /* We really expect the S1 MMU to be on here... */ 1518 + if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { 1519 + *level = 0; 1520 + return 0; 1521 + } 1522 + 1523 + /* Walk the guest's PT, looking for a match along the way */ 1524 + ret = walk_s1(vcpu, &wi, &wr, va); 1525 + switch (ret) { 1526 + case -EINTR: 1527 + /* We interrupted the walk on a match, return the level */ 1528 + *level = dm.level; 1529 + return 0; 1530 + case 0: 1531 + /* The walk completed, we failed to find the entry */ 1532 + return -ENOENT; 1533 + default: 1534 + /* Any other error... */ 1535 + return ret; 1536 + } 1571 1537 }
+227 -131
arch/arm64/kvm/config.c
··· 7 7 #include <linux/kvm_host.h> 8 8 #include <asm/sysreg.h> 9 9 10 + /* 11 + * Describes the dependencies between a set of bits (or the negation 12 + * of a set of RES0 bits) and a feature. The flags indicate how the 13 + * data is interpreted. 14 + */ 10 15 struct reg_bits_to_feat_map { 11 - u64 bits; 16 + union { 17 + u64 bits; 18 + u64 *res0p; 19 + }; 12 20 13 21 #define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */ 14 22 #define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */ 15 23 #define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */ 24 + #define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */ 25 + 16 26 unsigned long flags; 17 27 18 28 union { ··· 38 28 }; 39 29 }; 40 30 41 - #define __NEEDS_FEAT_3(m, f, id, fld, lim) \ 31 + /* 32 + * Describes the dependencies for a given register: 33 + * 34 + * @feat_map describes the dependency for the whole register. If the 35 + * features the register depends on are not present, the whole 36 + * register is effectively RES0. 37 + * 38 + * @bit_feat_map describes the dependencies for a set of bits in that 39 + * register. If the features these bits depend on are not present, the 40 + * bits are effectively RES0. 41 + */ 42 + struct reg_feat_map_desc { 43 + const char *name; 44 + const struct reg_bits_to_feat_map feat_map; 45 + const struct reg_bits_to_feat_map *bit_feat_map; 46 + const unsigned int bit_feat_map_sz; 47 + }; 48 + 49 + #define __NEEDS_FEAT_3(m, f, w, id, fld, lim) \ 42 50 { \ 43 - .bits = (m), \ 51 + .w = (m), \ 44 52 .flags = (f), \ 45 53 .regidx = IDREG_IDX(SYS_ ## id), \ 46 54 .shift = id ##_## fld ## _SHIFT, \ ··· 67 39 .lo_lim = id ##_## fld ##_## lim \ 68 40 } 69 41 70 - #define __NEEDS_FEAT_2(m, f, fun, dummy) \ 42 + #define __NEEDS_FEAT_2(m, f, w, fun, dummy) \ 71 43 { \ 72 - .bits = (m), \ 44 + .w = (m), \ 73 45 .flags = (f) | CALL_FUNC, \ 74 46 .fval = (fun), \ 75 47 } 76 48 77 - #define __NEEDS_FEAT_1(m, f, fun) \ 49 + #define __NEEDS_FEAT_1(m, f, w, fun) \ 78 50 { \ 79 - .bits = (m), \ 51 + .w = (m), \ 80 52 .flags = (f) | CALL_FUNC, \ 81 53 .match = (fun), \ 82 54 } 83 55 56 + #define __NEEDS_FEAT_FLAG(m, f, w, ...) \ 57 + CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, w, __VA_ARGS__) 58 + 84 59 #define NEEDS_FEAT_FLAG(m, f, ...) \ 85 - CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, __VA_ARGS__) 60 + __NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__) 86 61 87 62 #define NEEDS_FEAT_FIXED(m, ...) \ 88 - NEEDS_FEAT_FLAG(m, FIXED_VALUE, __VA_ARGS__, 0) 63 + __NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0) 89 64 65 + #define NEEDS_FEAT_RES0(p, ...) \ 66 + __NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__) 67 + 68 + /* 69 + * Declare the dependency between a set of bits and a set of features, 70 + * generating a struct reg_bit_to_feat_map. 71 + */ 90 72 #define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__) 73 + 74 + /* 75 + * Declare the dependency between a non-FGT register, a set of 76 + * feature, and the set of individual bits it contains. This generates 77 + * a struct reg_feat_map_desc. 78 + */ 79 + #define DECLARE_FEAT_MAP(n, r, m, f) \ 80 + struct reg_feat_map_desc n = { \ 81 + .name = #r, \ 82 + .feat_map = NEEDS_FEAT(~r##_RES0, f), \ 83 + .bit_feat_map = m, \ 84 + .bit_feat_map_sz = ARRAY_SIZE(m), \ 85 + } 86 + 87 + /* 88 + * Specialised version of the above for FGT registers that have their 89 + * RES0 masks described as struct fgt_masks. 90 + */ 91 + #define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \ 92 + struct reg_feat_map_desc n = { \ 93 + .name = #msk, \ 94 + .feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\ 95 + .bit_feat_map = m, \ 96 + .bit_feat_map_sz = ARRAY_SIZE(m), \ 97 + } 91 98 92 99 #define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP 93 100 #define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2 ··· 136 73 #define FEAT_AA32EL0 ID_AA64PFR0_EL1, EL0, AARCH32 137 74 #define FEAT_AA32EL1 ID_AA64PFR0_EL1, EL1, AARCH32 138 75 #define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP 76 + #define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP 139 77 #define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP 140 78 #define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP 141 79 #define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP ··· 195 131 #define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP 196 132 #define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP 197 133 #define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP 198 - #define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP 199 134 #define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2 200 135 #define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP 201 136 #define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP ··· 206 143 #define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP 207 144 #define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP 208 145 #define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP 209 - #define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2 210 146 #define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC 211 147 #define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP 212 148 #define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP ··· 213 151 #define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP 214 152 #define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP 215 153 #define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP 154 + #define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2 216 155 #define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP 156 + #define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP 217 157 218 158 static bool not_feat_aa64el3(struct kvm *kvm) 219 159 { ··· 461 397 NEVER_FGU, FEAT_AA64EL1), 462 398 }; 463 399 400 + 401 + static const DECLARE_FEAT_MAP_FGT(hfgrtr_desc, hfgrtr_masks, 402 + hfgrtr_feat_map, FEAT_FGT); 403 + 464 404 static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = { 465 405 NEEDS_FEAT(HFGWTR_EL2_nAMAIR2_EL1 | 466 406 HFGWTR_EL2_nMAIR2_EL1, ··· 528 460 HFGWTR_EL2_AFSR0_EL1, 529 461 NEVER_FGU, FEAT_AA64EL1), 530 462 }; 463 + 464 + static const DECLARE_FEAT_MAP_FGT(hfgwtr_desc, hfgwtr_masks, 465 + hfgwtr_feat_map, FEAT_FGT); 531 466 532 467 static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = { 533 468 NEEDS_FEAT(HDFGRTR_EL2_PMBIDR_EL1 | ··· 599 528 NEVER_FGU, FEAT_AA64EL1) 600 529 }; 601 530 531 + static const DECLARE_FEAT_MAP_FGT(hdfgrtr_desc, hdfgrtr_masks, 532 + hdfgrtr_feat_map, FEAT_FGT); 533 + 602 534 static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = { 603 535 NEEDS_FEAT(HDFGWTR_EL2_PMSLATFR_EL1 | 604 536 HDFGWTR_EL2_PMSIRR_EL1 | ··· 662 588 NEEDS_FEAT(HDFGWTR_EL2_TRFCR_EL1, FEAT_TRF), 663 589 }; 664 590 591 + static const DECLARE_FEAT_MAP_FGT(hdfgwtr_desc, hdfgwtr_masks, 592 + hdfgwtr_feat_map, FEAT_FGT); 665 593 666 594 static const struct reg_bits_to_feat_map hfgitr_feat_map[] = { 667 595 NEEDS_FEAT(HFGITR_EL2_PSBCSYNC, FEAT_SPEv1p5), ··· 738 662 NEVER_FGU, FEAT_AA64EL1), 739 663 }; 740 664 665 + static const DECLARE_FEAT_MAP_FGT(hfgitr_desc, hfgitr_masks, 666 + hfgitr_feat_map, FEAT_FGT); 667 + 741 668 static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = { 742 669 NEEDS_FEAT(HAFGRTR_EL2_AMEVTYPER115_EL0 | 743 670 HAFGRTR_EL2_AMEVTYPER114_EL0 | ··· 783 704 FEAT_AMUv1), 784 705 }; 785 706 707 + static const DECLARE_FEAT_MAP_FGT(hafgrtr_desc, hafgrtr_masks, 708 + hafgrtr_feat_map, FEAT_FGT); 709 + 786 710 static const struct reg_bits_to_feat_map hfgitr2_feat_map[] = { 787 711 NEEDS_FEAT(HFGITR2_EL2_nDCCIVAPS, FEAT_PoPS), 788 712 NEEDS_FEAT(HFGITR2_EL2_TSBCSYNC, FEAT_TRBEv1p1) 789 713 }; 714 + 715 + static const DECLARE_FEAT_MAP_FGT(hfgitr2_desc, hfgitr2_masks, 716 + hfgitr2_feat_map, FEAT_FGT2); 790 717 791 718 static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = { 792 719 NEEDS_FEAT(HFGRTR2_EL2_nPFAR_EL1, FEAT_PFAR), ··· 813 728 NEEDS_FEAT(HFGRTR2_EL2_nRCWSMASK_EL1, FEAT_THE), 814 729 }; 815 730 731 + static const DECLARE_FEAT_MAP_FGT(hfgrtr2_desc, hfgrtr2_masks, 732 + hfgrtr2_feat_map, FEAT_FGT2); 733 + 816 734 static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = { 817 735 NEEDS_FEAT(HFGWTR2_EL2_nPFAR_EL1, FEAT_PFAR), 818 736 NEEDS_FEAT(HFGWTR2_EL2_nACTLRALIAS_EL1 | ··· 833 745 FEAT_SRMASK), 834 746 NEEDS_FEAT(HFGWTR2_EL2_nRCWSMASK_EL1, FEAT_THE), 835 747 }; 748 + 749 + static const DECLARE_FEAT_MAP_FGT(hfgwtr2_desc, hfgwtr2_masks, 750 + hfgwtr2_feat_map, FEAT_FGT2); 836 751 837 752 static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = { 838 753 NEEDS_FEAT(HDFGRTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), ··· 867 776 NEEDS_FEAT(HDFGRTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), 868 777 }; 869 778 779 + static const DECLARE_FEAT_MAP_FGT(hdfgrtr2_desc, hdfgrtr2_masks, 780 + hdfgrtr2_feat_map, FEAT_FGT2); 781 + 870 782 static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = { 871 783 NEEDS_FEAT(HDFGWTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9), 872 784 NEEDS_FEAT(HDFGWTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss), ··· 897 803 NEEDS_FEAT(HDFGWTR2_EL2_nMDSTEPOP_EL1, FEAT_STEP2), 898 804 NEEDS_FEAT(HDFGWTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam), 899 805 }; 806 + 807 + static const DECLARE_FEAT_MAP_FGT(hdfgwtr2_desc, hdfgwtr2_masks, 808 + hdfgwtr2_feat_map, FEAT_FGT2); 809 + 900 810 901 811 static const struct reg_bits_to_feat_map hcrx_feat_map[] = { 902 812 NEEDS_FEAT(HCRX_EL2_PACMEn, feat_pauth_lr), ··· 930 832 NEEDS_FEAT(HCRX_EL2_EnALS, FEAT_LS64), 931 833 NEEDS_FEAT(HCRX_EL2_EnAS0, FEAT_LS64_ACCDATA), 932 834 }; 835 + 836 + 837 + static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2, 838 + hcrx_feat_map, FEAT_HCX); 933 839 934 840 static const struct reg_bits_to_feat_map hcr_feat_map[] = { 935 841 NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0), ··· 1006 904 NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h), 1007 905 }; 1008 906 907 + static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2, 908 + hcr_feat_map, FEAT_AA64EL2); 909 + 1009 910 static const struct reg_bits_to_feat_map sctlr2_feat_map[] = { 1010 911 NEEDS_FEAT(SCTLR2_EL1_NMEA | 1011 912 SCTLR2_EL1_EASE, ··· 1025 920 SCTLR2_EL1_CPTM0, 1026 921 FEAT_CPA2), 1027 922 }; 923 + 924 + static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1, 925 + sctlr2_feat_map, FEAT_SCTLR2); 1028 926 1029 927 static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = { 1030 928 NEEDS_FEAT(TCR2_EL2_FNG1 | ··· 1050 942 FEAT_S1POE), 1051 943 NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE), 1052 944 }; 945 + 946 + static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2, 947 + tcr2_el2_feat_map, FEAT_TCR2); 1053 948 1054 949 static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = { 1055 950 NEEDS_FEAT(SCTLR_EL1_CP15BEN | ··· 1128 1017 FEAT_AA64EL1), 1129 1018 }; 1130 1019 1020 + static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1, 1021 + sctlr_el1_feat_map, FEAT_AA64EL1); 1022 + 1131 1023 static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = { 1132 1024 NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9), 1133 1025 NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock), ··· 1162 1048 FEAT_AA64EL1), 1163 1049 }; 1164 1050 1051 + static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2, 1052 + mdcr_el2_feat_map, FEAT_AA64EL2); 1053 + 1165 1054 static void __init check_feat_map(const struct reg_bits_to_feat_map *map, 1166 1055 int map_size, u64 res0, const char *str) 1167 1056 { ··· 1178 1061 str, mask ^ ~res0); 1179 1062 } 1180 1063 1064 + static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map) 1065 + { 1066 + return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits; 1067 + } 1068 + 1069 + static void __init check_reg_desc(const struct reg_feat_map_desc *r) 1070 + { 1071 + check_feat_map(r->bit_feat_map, r->bit_feat_map_sz, 1072 + ~reg_feat_map_bits(&r->feat_map), r->name); 1073 + } 1074 + 1181 1075 void __init check_feature_map(void) 1182 1076 { 1183 - check_feat_map(hfgrtr_feat_map, ARRAY_SIZE(hfgrtr_feat_map), 1184 - hfgrtr_masks.res0, hfgrtr_masks.str); 1185 - check_feat_map(hfgwtr_feat_map, ARRAY_SIZE(hfgwtr_feat_map), 1186 - hfgwtr_masks.res0, hfgwtr_masks.str); 1187 - check_feat_map(hfgitr_feat_map, ARRAY_SIZE(hfgitr_feat_map), 1188 - hfgitr_masks.res0, hfgitr_masks.str); 1189 - check_feat_map(hdfgrtr_feat_map, ARRAY_SIZE(hdfgrtr_feat_map), 1190 - hdfgrtr_masks.res0, hdfgrtr_masks.str); 1191 - check_feat_map(hdfgwtr_feat_map, ARRAY_SIZE(hdfgwtr_feat_map), 1192 - hdfgwtr_masks.res0, hdfgwtr_masks.str); 1193 - check_feat_map(hafgrtr_feat_map, ARRAY_SIZE(hafgrtr_feat_map), 1194 - hafgrtr_masks.res0, hafgrtr_masks.str); 1195 - check_feat_map(hcrx_feat_map, ARRAY_SIZE(hcrx_feat_map), 1196 - __HCRX_EL2_RES0, "HCRX_EL2"); 1197 - check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map), 1198 - HCR_EL2_RES0, "HCR_EL2"); 1199 - check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map), 1200 - SCTLR2_EL1_RES0, "SCTLR2_EL1"); 1201 - check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map), 1202 - TCR2_EL2_RES0, "TCR2_EL2"); 1203 - check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map), 1204 - SCTLR_EL1_RES0, "SCTLR_EL1"); 1205 - check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map), 1206 - MDCR_EL2_RES0, "MDCR_EL2"); 1077 + check_reg_desc(&hfgrtr_desc); 1078 + check_reg_desc(&hfgwtr_desc); 1079 + check_reg_desc(&hfgitr_desc); 1080 + check_reg_desc(&hdfgrtr_desc); 1081 + check_reg_desc(&hdfgwtr_desc); 1082 + check_reg_desc(&hafgrtr_desc); 1083 + check_reg_desc(&hfgrtr2_desc); 1084 + check_reg_desc(&hfgwtr2_desc); 1085 + check_reg_desc(&hfgitr2_desc); 1086 + check_reg_desc(&hdfgrtr2_desc); 1087 + check_reg_desc(&hdfgwtr2_desc); 1088 + check_reg_desc(&hcrx_desc); 1089 + check_reg_desc(&hcr_desc); 1090 + check_reg_desc(&sctlr2_desc); 1091 + check_reg_desc(&tcr2_el2_desc); 1092 + check_reg_desc(&sctlr_el1_desc); 1093 + check_reg_desc(&mdcr_el2_desc); 1207 1094 } 1208 1095 1209 1096 static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map) ··· 1250 1129 match = idreg_feat_match(kvm, &map[i]); 1251 1130 1252 1131 if (!match || (map[i].flags & FIXED_VALUE)) 1253 - val |= map[i].bits; 1132 + val |= reg_feat_map_bits(&map[i]); 1254 1133 } 1255 1134 1256 1135 return val; ··· 1266 1145 require, exclude | FIXED_VALUE); 1267 1146 } 1268 1147 1269 - static u64 compute_fixed_bits(struct kvm *kvm, 1270 - const struct reg_bits_to_feat_map *map, 1271 - int map_size, 1272 - u64 *fixed_bits, 1273 - unsigned long require, 1274 - unsigned long exclude) 1148 + static u64 compute_reg_res0_bits(struct kvm *kvm, 1149 + const struct reg_feat_map_desc *r, 1150 + unsigned long require, unsigned long exclude) 1151 + 1275 1152 { 1276 - return __compute_fixed_bits(kvm, map, map_size, fixed_bits, 1277 - require | FIXED_VALUE, exclude); 1153 + u64 res0; 1154 + 1155 + res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, 1156 + require, exclude); 1157 + 1158 + /* 1159 + * If computing FGUs, don't take RES0 or register existence 1160 + * into account -- we're not computing bits for the register 1161 + * itself. 1162 + */ 1163 + if (!(exclude & NEVER_FGU)) { 1164 + res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude); 1165 + res0 |= ~reg_feat_map_bits(&r->feat_map); 1166 + } 1167 + 1168 + return res0; 1169 + } 1170 + 1171 + static u64 compute_reg_fixed_bits(struct kvm *kvm, 1172 + const struct reg_feat_map_desc *r, 1173 + u64 *fixed_bits, unsigned long require, 1174 + unsigned long exclude) 1175 + { 1176 + return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz, 1177 + fixed_bits, require | FIXED_VALUE, exclude); 1278 1178 } 1279 1179 1280 1180 void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt) ··· 1304 1162 1305 1163 switch (fgt) { 1306 1164 case HFGRTR_GROUP: 1307 - val |= compute_res0_bits(kvm, hfgrtr_feat_map, 1308 - ARRAY_SIZE(hfgrtr_feat_map), 1309 - 0, NEVER_FGU); 1310 - val |= compute_res0_bits(kvm, hfgwtr_feat_map, 1311 - ARRAY_SIZE(hfgwtr_feat_map), 1312 - 0, NEVER_FGU); 1165 + val |= compute_reg_res0_bits(kvm, &hfgrtr_desc, 1166 + 0, NEVER_FGU); 1167 + val |= compute_reg_res0_bits(kvm, &hfgwtr_desc, 1168 + 0, NEVER_FGU); 1313 1169 break; 1314 1170 case HFGITR_GROUP: 1315 - val |= compute_res0_bits(kvm, hfgitr_feat_map, 1316 - ARRAY_SIZE(hfgitr_feat_map), 1317 - 0, NEVER_FGU); 1171 + val |= compute_reg_res0_bits(kvm, &hfgitr_desc, 1172 + 0, NEVER_FGU); 1318 1173 break; 1319 1174 case HDFGRTR_GROUP: 1320 - val |= compute_res0_bits(kvm, hdfgrtr_feat_map, 1321 - ARRAY_SIZE(hdfgrtr_feat_map), 1322 - 0, NEVER_FGU); 1323 - val |= compute_res0_bits(kvm, hdfgwtr_feat_map, 1324 - ARRAY_SIZE(hdfgwtr_feat_map), 1325 - 0, NEVER_FGU); 1175 + val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc, 1176 + 0, NEVER_FGU); 1177 + val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc, 1178 + 0, NEVER_FGU); 1326 1179 break; 1327 1180 case HAFGRTR_GROUP: 1328 - val |= compute_res0_bits(kvm, hafgrtr_feat_map, 1329 - ARRAY_SIZE(hafgrtr_feat_map), 1330 - 0, NEVER_FGU); 1181 + val |= compute_reg_res0_bits(kvm, &hafgrtr_desc, 1182 + 0, NEVER_FGU); 1331 1183 break; 1332 1184 case HFGRTR2_GROUP: 1333 - val |= compute_res0_bits(kvm, hfgrtr2_feat_map, 1334 - ARRAY_SIZE(hfgrtr2_feat_map), 1335 - 0, NEVER_FGU); 1336 - val |= compute_res0_bits(kvm, hfgwtr2_feat_map, 1337 - ARRAY_SIZE(hfgwtr2_feat_map), 1338 - 0, NEVER_FGU); 1185 + val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc, 1186 + 0, NEVER_FGU); 1187 + val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc, 1188 + 0, NEVER_FGU); 1339 1189 break; 1340 1190 case HFGITR2_GROUP: 1341 - val |= compute_res0_bits(kvm, hfgitr2_feat_map, 1342 - ARRAY_SIZE(hfgitr2_feat_map), 1343 - 0, NEVER_FGU); 1191 + val |= compute_reg_res0_bits(kvm, &hfgitr2_desc, 1192 + 0, NEVER_FGU); 1344 1193 break; 1345 1194 case HDFGRTR2_GROUP: 1346 - val |= compute_res0_bits(kvm, hdfgrtr2_feat_map, 1347 - ARRAY_SIZE(hdfgrtr2_feat_map), 1348 - 0, NEVER_FGU); 1349 - val |= compute_res0_bits(kvm, hdfgwtr2_feat_map, 1350 - ARRAY_SIZE(hdfgwtr2_feat_map), 1351 - 0, NEVER_FGU); 1195 + val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 1196 + 0, NEVER_FGU); 1197 + val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 1198 + 0, NEVER_FGU); 1352 1199 break; 1353 1200 default: 1354 1201 BUG(); ··· 1352 1221 1353 1222 switch (reg) { 1354 1223 case HFGRTR_EL2: 1355 - *res0 = compute_res0_bits(kvm, hfgrtr_feat_map, 1356 - ARRAY_SIZE(hfgrtr_feat_map), 0, 0); 1357 - *res0 |= hfgrtr_masks.res0; 1224 + *res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0); 1358 1225 *res1 = HFGRTR_EL2_RES1; 1359 1226 break; 1360 1227 case HFGWTR_EL2: 1361 - *res0 = compute_res0_bits(kvm, hfgwtr_feat_map, 1362 - ARRAY_SIZE(hfgwtr_feat_map), 0, 0); 1363 - *res0 |= hfgwtr_masks.res0; 1228 + *res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0); 1364 1229 *res1 = HFGWTR_EL2_RES1; 1365 1230 break; 1366 1231 case HFGITR_EL2: 1367 - *res0 = compute_res0_bits(kvm, hfgitr_feat_map, 1368 - ARRAY_SIZE(hfgitr_feat_map), 0, 0); 1369 - *res0 |= hfgitr_masks.res0; 1232 + *res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0); 1370 1233 *res1 = HFGITR_EL2_RES1; 1371 1234 break; 1372 1235 case HDFGRTR_EL2: 1373 - *res0 = compute_res0_bits(kvm, hdfgrtr_feat_map, 1374 - ARRAY_SIZE(hdfgrtr_feat_map), 0, 0); 1375 - *res0 |= hdfgrtr_masks.res0; 1236 + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0); 1376 1237 *res1 = HDFGRTR_EL2_RES1; 1377 1238 break; 1378 1239 case HDFGWTR_EL2: 1379 - *res0 = compute_res0_bits(kvm, hdfgwtr_feat_map, 1380 - ARRAY_SIZE(hdfgwtr_feat_map), 0, 0); 1381 - *res0 |= hdfgwtr_masks.res0; 1240 + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0); 1382 1241 *res1 = HDFGWTR_EL2_RES1; 1383 1242 break; 1384 1243 case HAFGRTR_EL2: 1385 - *res0 = compute_res0_bits(kvm, hafgrtr_feat_map, 1386 - ARRAY_SIZE(hafgrtr_feat_map), 0, 0); 1387 - *res0 |= hafgrtr_masks.res0; 1244 + *res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0); 1388 1245 *res1 = HAFGRTR_EL2_RES1; 1389 1246 break; 1390 1247 case HFGRTR2_EL2: 1391 - *res0 = compute_res0_bits(kvm, hfgrtr2_feat_map, 1392 - ARRAY_SIZE(hfgrtr2_feat_map), 0, 0); 1393 - *res0 |= hfgrtr2_masks.res0; 1248 + *res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0); 1394 1249 *res1 = HFGRTR2_EL2_RES1; 1395 1250 break; 1396 1251 case HFGWTR2_EL2: 1397 - *res0 = compute_res0_bits(kvm, hfgwtr2_feat_map, 1398 - ARRAY_SIZE(hfgwtr2_feat_map), 0, 0); 1399 - *res0 |= hfgwtr2_masks.res0; 1252 + *res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0); 1400 1253 *res1 = HFGWTR2_EL2_RES1; 1401 1254 break; 1402 1255 case HFGITR2_EL2: 1403 - *res0 = compute_res0_bits(kvm, hfgitr2_feat_map, 1404 - ARRAY_SIZE(hfgitr2_feat_map), 0, 0); 1405 - *res0 |= hfgitr2_masks.res0; 1256 + *res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0); 1406 1257 *res1 = HFGITR2_EL2_RES1; 1407 1258 break; 1408 1259 case HDFGRTR2_EL2: 1409 - *res0 = compute_res0_bits(kvm, hdfgrtr2_feat_map, 1410 - ARRAY_SIZE(hdfgrtr2_feat_map), 0, 0); 1411 - *res0 |= hdfgrtr2_masks.res0; 1260 + *res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0); 1412 1261 *res1 = HDFGRTR2_EL2_RES1; 1413 1262 break; 1414 1263 case HDFGWTR2_EL2: 1415 - *res0 = compute_res0_bits(kvm, hdfgwtr2_feat_map, 1416 - ARRAY_SIZE(hdfgwtr2_feat_map), 0, 0); 1417 - *res0 |= hdfgwtr2_masks.res0; 1264 + *res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0); 1418 1265 *res1 = HDFGWTR2_EL2_RES1; 1419 1266 break; 1420 1267 case HCRX_EL2: 1421 - *res0 = compute_res0_bits(kvm, hcrx_feat_map, 1422 - ARRAY_SIZE(hcrx_feat_map), 0, 0); 1423 - *res0 |= __HCRX_EL2_RES0; 1268 + *res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0); 1424 1269 *res1 = __HCRX_EL2_RES1; 1425 1270 break; 1426 1271 case HCR_EL2: 1427 - mask = compute_fixed_bits(kvm, hcr_feat_map, 1428 - ARRAY_SIZE(hcr_feat_map), &fixed, 1429 - 0, 0); 1430 - *res0 = compute_res0_bits(kvm, hcr_feat_map, 1431 - ARRAY_SIZE(hcr_feat_map), 0, 0); 1432 - *res0 |= HCR_EL2_RES0 | (mask & ~fixed); 1272 + mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0); 1273 + *res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0); 1274 + *res0 |= (mask & ~fixed); 1433 1275 *res1 = HCR_EL2_RES1 | (mask & fixed); 1434 1276 break; 1435 1277 case SCTLR2_EL1: 1436 1278 case SCTLR2_EL2: 1437 - *res0 = compute_res0_bits(kvm, sctlr2_feat_map, 1438 - ARRAY_SIZE(sctlr2_feat_map), 0, 0); 1439 - *res0 |= SCTLR2_EL1_RES0; 1279 + *res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0); 1440 1280 *res1 = SCTLR2_EL1_RES1; 1441 1281 break; 1442 1282 case TCR2_EL2: 1443 - *res0 = compute_res0_bits(kvm, tcr2_el2_feat_map, 1444 - ARRAY_SIZE(tcr2_el2_feat_map), 0, 0); 1445 - *res0 |= TCR2_EL2_RES0; 1283 + *res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0); 1446 1284 *res1 = TCR2_EL2_RES1; 1447 1285 break; 1448 1286 case SCTLR_EL1: 1449 - *res0 = compute_res0_bits(kvm, sctlr_el1_feat_map, 1450 - ARRAY_SIZE(sctlr_el1_feat_map), 0, 0); 1451 - *res0 |= SCTLR_EL1_RES0; 1287 + *res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0); 1452 1288 *res1 = SCTLR_EL1_RES1; 1453 1289 break; 1454 1290 case MDCR_EL2: 1455 - *res0 = compute_res0_bits(kvm, mdcr_el2_feat_map, 1456 - ARRAY_SIZE(mdcr_el2_feat_map), 0, 0); 1457 - *res0 |= MDCR_EL2_RES0; 1291 + *res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0); 1458 1292 *res1 = MDCR_EL2_RES1; 1459 1293 break; 1460 1294 default:
+14 -11
arch/arm64/kvm/debug.c
··· 56 56 if (!kvm_guest_owns_debug_regs(vcpu)) 57 57 vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; 58 58 59 + if (vcpu_has_nv(vcpu)) 60 + kvm_nested_setup_mdcr_el2(vcpu); 61 + 59 62 /* Write MDCR_EL2 directly if we're already at EL2 */ 60 63 if (has_vhe()) 61 64 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); ··· 246 243 preempt_enable(); 247 244 } 248 245 246 + static bool skip_trbe_access(bool skip_condition) 247 + { 248 + return (WARN_ON_ONCE(preemptible()) || skip_condition || 249 + is_protected_kvm_enabled() || !is_kvm_arm_initialised()); 250 + } 251 + 249 252 void kvm_enable_trbe(void) 250 253 { 251 - if (has_vhe() || is_protected_kvm_enabled() || 252 - WARN_ON_ONCE(preemptible())) 253 - return; 254 - 255 - host_data_set_flag(TRBE_ENABLED); 254 + if (!skip_trbe_access(has_vhe())) 255 + host_data_set_flag(TRBE_ENABLED); 256 256 } 257 257 EXPORT_SYMBOL_GPL(kvm_enable_trbe); 258 258 259 259 void kvm_disable_trbe(void) 260 260 { 261 - if (has_vhe() || is_protected_kvm_enabled() || 262 - WARN_ON_ONCE(preemptible())) 263 - return; 264 - 265 - host_data_clear_flag(TRBE_ENABLED); 261 + if (!skip_trbe_access(has_vhe())) 262 + host_data_clear_flag(TRBE_ENABLED); 266 263 } 267 264 EXPORT_SYMBOL_GPL(kvm_disable_trbe); 268 265 269 266 void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) 270 267 { 271 - if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible())) 268 + if (skip_trbe_access(false)) 272 269 return; 273 270 274 271 if (has_vhe()) {
+1
arch/arm64/kvm/emulate-nested.c
··· 1185 1185 SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS), 1186 1186 SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS), 1187 1187 SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS), 1188 + SR_TRAP(SYS_PMSDSFR_EL1, CGT_MDCR_TPMS), 1188 1189 SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF), 1189 1190 SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB), 1190 1191 SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB),
+3
arch/arm64/kvm/handle_exit.c
··· 559 559 /* Dump the nVHE hypervisor backtrace */ 560 560 kvm_nvhe_dump_backtrace(hyp_offset); 561 561 562 + /* Dump the faulting instruction */ 563 + dump_kernel_instr(panic_addr + kaslr_offset()); 564 + 562 565 /* 563 566 * Hyp has panicked and we're going to handle that by panicking the 564 567 * kernel. The kernel offset will be revealed in the panic so we're
+3 -1
arch/arm64/kvm/hyp/include/nvhe/pkvm.h
··· 29 29 }; 30 30 31 31 /* 32 - * Holds the relevant data for running a protected vm. 32 + * Holds the relevant data for running a vm in protected mode. 33 33 */ 34 34 struct pkvm_hyp_vm { 35 35 struct kvm kvm; ··· 67 67 68 68 void pkvm_hyp_vm_table_init(void *tbl); 69 69 70 + int __pkvm_reserve_vm(void); 71 + void __pkvm_unreserve_vm(pkvm_handle_t handle); 70 72 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, 71 73 unsigned long pgd_hva); 72 74 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
+2 -1
arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
··· 12 12 #include <asm/kvm_host.h> 13 13 14 14 #define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] 15 - #define DECLARE_REG(type, name, ctxt, reg) \ 15 + #define DECLARE_REG(type, name, ctxt, reg) \ 16 + __always_unused int ___check_reg_ ## reg; \ 16 17 type name = (type)cpu_reg(ctxt, (reg)) 17 18 18 19 #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
+1
arch/arm64/kvm/hyp/nvhe/Makefile
··· 27 27 cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o 28 28 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ 29 29 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o 30 + hyp-obj-y += ../../../kernel/smccc-call.o 30 31 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o 31 32 hyp-obj-y += $(lib-objs) 32 33
+144 -73
arch/arm64/kvm/hyp/nvhe/ffa.c
··· 71 71 static bool has_version_negotiated; 72 72 static hyp_spinlock_t version_lock; 73 73 74 - static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) 74 + static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno) 75 75 { 76 - *res = (struct arm_smccc_res) { 76 + *res = (struct arm_smccc_1_2_regs) { 77 77 .a0 = FFA_ERROR, 78 78 .a2 = ffa_errno, 79 79 }; 80 80 } 81 81 82 - static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop) 82 + static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop) 83 83 { 84 84 if (ret == FFA_RET_SUCCESS) { 85 - *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS, 86 - .a2 = prop }; 85 + *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS, 86 + .a2 = prop }; 87 87 } else { 88 88 ffa_to_smccc_error(res, ret); 89 89 } 90 90 } 91 91 92 - static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret) 92 + static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret) 93 93 { 94 94 ffa_to_smccc_res_prop(res, ret, 0); 95 95 } 96 96 97 97 static void ffa_set_retval(struct kvm_cpu_context *ctxt, 98 - struct arm_smccc_res *res) 98 + struct arm_smccc_1_2_regs *res) 99 99 { 100 100 cpu_reg(ctxt, 0) = res->a0; 101 101 cpu_reg(ctxt, 1) = res->a1; 102 102 cpu_reg(ctxt, 2) = res->a2; 103 103 cpu_reg(ctxt, 3) = res->a3; 104 + cpu_reg(ctxt, 4) = res->a4; 105 + cpu_reg(ctxt, 5) = res->a5; 106 + cpu_reg(ctxt, 6) = res->a6; 107 + cpu_reg(ctxt, 7) = res->a7; 108 + 109 + /* 110 + * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30. 111 + * 112 + * In FF-A 1.2, we cannot rely on the function ID sent by the caller to 113 + * detect 32-bit calls because the CPU cycle management interfaces (e.g. 114 + * FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses. 115 + * 116 + * FFA-1.3 introduces 64-bit variants of the CPU cycle management 117 + * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests 118 + * complete with SMC32 direct reponses which *should* allow us use the 119 + * function ID sent by the caller to determine whether to return x8-x17. 120 + * 121 + * Note that we also cannot rely on function IDs in the response. 122 + * 123 + * Given the above, assume SMC64 and send back x0-x17 unconditionally 124 + * as the passthrough code (__kvm_hyp_host_forward_smc) does the same. 125 + */ 126 + cpu_reg(ctxt, 8) = res->a8; 127 + cpu_reg(ctxt, 9) = res->a9; 128 + cpu_reg(ctxt, 10) = res->a10; 129 + cpu_reg(ctxt, 11) = res->a11; 130 + cpu_reg(ctxt, 12) = res->a12; 131 + cpu_reg(ctxt, 13) = res->a13; 132 + cpu_reg(ctxt, 14) = res->a14; 133 + cpu_reg(ctxt, 15) = res->a15; 134 + cpu_reg(ctxt, 16) = res->a16; 135 + cpu_reg(ctxt, 17) = res->a17; 104 136 } 105 137 106 138 static bool is_ffa_call(u64 func_id) ··· 145 113 146 114 static int ffa_map_hyp_buffers(u64 ffa_page_count) 147 115 { 148 - struct arm_smccc_res res; 116 + struct arm_smccc_1_2_regs res; 149 117 150 - arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP, 151 - hyp_virt_to_phys(hyp_buffers.tx), 152 - hyp_virt_to_phys(hyp_buffers.rx), 153 - ffa_page_count, 154 - 0, 0, 0, 0, 155 - &res); 118 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 119 + .a0 = FFA_FN64_RXTX_MAP, 120 + .a1 = hyp_virt_to_phys(hyp_buffers.tx), 121 + .a2 = hyp_virt_to_phys(hyp_buffers.rx), 122 + .a3 = ffa_page_count, 123 + }, &res); 156 124 157 125 return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; 158 126 } 159 127 160 128 static int ffa_unmap_hyp_buffers(void) 161 129 { 162 - struct arm_smccc_res res; 130 + struct arm_smccc_1_2_regs res; 163 131 164 - arm_smccc_1_1_smc(FFA_RXTX_UNMAP, 165 - HOST_FFA_ID, 166 - 0, 0, 0, 0, 0, 0, 167 - &res); 132 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 133 + .a0 = FFA_RXTX_UNMAP, 134 + .a1 = HOST_FFA_ID, 135 + }, &res); 168 136 169 137 return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; 170 138 } 171 139 172 - static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo, 140 + static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo, 173 141 u32 handle_hi, u32 fraglen, u32 endpoint_id) 174 142 { 175 - arm_smccc_1_1_smc(FFA_MEM_FRAG_TX, 176 - handle_lo, handle_hi, fraglen, endpoint_id, 177 - 0, 0, 0, 178 - res); 143 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 144 + .a0 = FFA_MEM_FRAG_TX, 145 + .a1 = handle_lo, 146 + .a2 = handle_hi, 147 + .a3 = fraglen, 148 + .a4 = endpoint_id, 149 + }, res); 179 150 } 180 151 181 - static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo, 152 + static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo, 182 153 u32 handle_hi, u32 fragoff) 183 154 { 184 - arm_smccc_1_1_smc(FFA_MEM_FRAG_RX, 185 - handle_lo, handle_hi, fragoff, HOST_FFA_ID, 186 - 0, 0, 0, 187 - res); 155 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 156 + .a0 = FFA_MEM_FRAG_RX, 157 + .a1 = handle_lo, 158 + .a2 = handle_hi, 159 + .a3 = fragoff, 160 + .a4 = HOST_FFA_ID, 161 + }, res); 188 162 } 189 163 190 - static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len, 164 + static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len, 191 165 u32 fraglen) 192 166 { 193 - arm_smccc_1_1_smc(func_id, len, fraglen, 194 - 0, 0, 0, 0, 0, 195 - res); 167 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 168 + .a0 = func_id, 169 + .a1 = len, 170 + .a2 = fraglen, 171 + }, res); 196 172 } 197 173 198 - static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo, 174 + static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo, 199 175 u32 handle_hi, u32 flags) 200 176 { 201 - arm_smccc_1_1_smc(FFA_MEM_RECLAIM, 202 - handle_lo, handle_hi, flags, 203 - 0, 0, 0, 0, 204 - res); 177 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 178 + .a0 = FFA_MEM_RECLAIM, 179 + .a1 = handle_lo, 180 + .a2 = handle_hi, 181 + .a3 = flags, 182 + }, res); 205 183 } 206 184 207 - static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) 185 + static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len) 208 186 { 209 - arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ, 210 - len, len, 211 - 0, 0, 0, 0, 0, 212 - res); 187 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 188 + .a0 = FFA_FN64_MEM_RETRIEVE_REQ, 189 + .a1 = len, 190 + .a2 = len, 191 + }, res); 213 192 } 214 193 215 - static void ffa_rx_release(struct arm_smccc_res *res) 194 + static void ffa_rx_release(struct arm_smccc_1_2_regs *res) 216 195 { 217 - arm_smccc_1_1_smc(FFA_RX_RELEASE, 218 - 0, 0, 219 - 0, 0, 0, 0, 0, 220 - res); 196 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 197 + .a0 = FFA_RX_RELEASE, 198 + }, res); 221 199 } 222 200 223 - static void do_ffa_rxtx_map(struct arm_smccc_res *res, 201 + static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res, 224 202 struct kvm_cpu_context *ctxt) 225 203 { 226 204 DECLARE_REG(phys_addr_t, tx, ctxt, 1); ··· 309 267 goto out_unlock; 310 268 } 311 269 312 - static void do_ffa_rxtx_unmap(struct arm_smccc_res *res, 270 + static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res, 313 271 struct kvm_cpu_context *ctxt) 314 272 { 315 273 DECLARE_REG(u32, id, ctxt, 1); ··· 410 368 return ret; 411 369 } 412 370 413 - static void do_ffa_mem_frag_tx(struct arm_smccc_res *res, 371 + static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, 414 372 struct kvm_cpu_context *ctxt) 415 373 { 416 374 DECLARE_REG(u32, handle_lo, ctxt, 1); ··· 469 427 } 470 428 471 429 static void __do_ffa_mem_xfer(const u64 func_id, 472 - struct arm_smccc_res *res, 430 + struct arm_smccc_1_2_regs *res, 473 431 struct kvm_cpu_context *ctxt) 474 432 { 475 433 DECLARE_REG(u32, len, ctxt, 1); ··· 563 521 __do_ffa_mem_xfer((fid), (res), (ctxt)); \ 564 522 } while (0); 565 523 566 - static void do_ffa_mem_reclaim(struct arm_smccc_res *res, 524 + static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, 567 525 struct kvm_cpu_context *ctxt) 568 526 { 569 527 DECLARE_REG(u32, handle_lo, ctxt, 1); ··· 670 628 case FFA_RXTX_MAP: 671 629 case FFA_MEM_DONATE: 672 630 case FFA_MEM_RETRIEVE_REQ: 631 + /* Optional notification interfaces added in FF-A 1.1 */ 632 + case FFA_NOTIFICATION_BITMAP_CREATE: 633 + case FFA_NOTIFICATION_BITMAP_DESTROY: 634 + case FFA_NOTIFICATION_BIND: 635 + case FFA_NOTIFICATION_UNBIND: 636 + case FFA_NOTIFICATION_SET: 637 + case FFA_NOTIFICATION_GET: 638 + case FFA_NOTIFICATION_INFO_GET: 639 + /* Optional interfaces added in FF-A 1.2 */ 640 + case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */ 641 + case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */ 642 + case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */ 643 + case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */ 673 644 return false; 674 645 } 675 646 676 647 return true; 677 648 } 678 649 679 - static bool do_ffa_features(struct arm_smccc_res *res, 650 + static bool do_ffa_features(struct arm_smccc_1_2_regs *res, 680 651 struct kvm_cpu_context *ctxt) 681 652 { 682 653 DECLARE_REG(u32, id, ctxt, 1); ··· 721 666 static int hyp_ffa_post_init(void) 722 667 { 723 668 size_t min_rxtx_sz; 724 - struct arm_smccc_res res; 669 + struct arm_smccc_1_2_regs res; 725 670 726 - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); 671 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ 672 + .a0 = FFA_ID_GET, 673 + }, &res); 727 674 if (res.a0 != FFA_SUCCESS) 728 675 return -EOPNOTSUPP; 729 676 730 677 if (res.a2 != HOST_FFA_ID) 731 678 return -EINVAL; 732 679 733 - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, 734 - 0, 0, 0, 0, 0, 0, &res); 680 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){ 681 + .a0 = FFA_FEATURES, 682 + .a1 = FFA_FN64_RXTX_MAP, 683 + }, &res); 735 684 if (res.a0 != FFA_SUCCESS) 736 685 return -EOPNOTSUPP; 737 686 738 - switch (res.a2) { 687 + switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) { 739 688 case FFA_FEAT_RXTX_MIN_SZ_4K: 740 689 min_rxtx_sz = SZ_4K; 741 690 break; ··· 759 700 return 0; 760 701 } 761 702 762 - static void do_ffa_version(struct arm_smccc_res *res, 703 + static void do_ffa_version(struct arm_smccc_1_2_regs *res, 763 704 struct kvm_cpu_context *ctxt) 764 705 { 765 706 DECLARE_REG(u32, ffa_req_version, ctxt, 1); ··· 771 712 772 713 hyp_spin_lock(&version_lock); 773 714 if (has_version_negotiated) { 774 - res->a0 = hyp_ffa_version; 715 + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) 716 + res->a0 = FFA_RET_NOT_SUPPORTED; 717 + else 718 + res->a0 = hyp_ffa_version; 775 719 goto unlock; 776 720 } 777 721 ··· 783 721 * first if TEE supports it. 784 722 */ 785 723 if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { 786 - arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, 787 - 0, 0, 0, 0, 0, 788 - res); 724 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 725 + .a0 = FFA_VERSION, 726 + .a1 = ffa_req_version, 727 + }, res); 789 728 if (res->a0 == FFA_RET_NOT_SUPPORTED) 790 729 goto unlock; 791 730 ··· 803 740 hyp_spin_unlock(&version_lock); 804 741 } 805 742 806 - static void do_ffa_part_get(struct arm_smccc_res *res, 743 + static void do_ffa_part_get(struct arm_smccc_1_2_regs *res, 807 744 struct kvm_cpu_context *ctxt) 808 745 { 809 746 DECLARE_REG(u32, uuid0, ctxt, 1); ··· 819 756 goto out_unlock; 820 757 } 821 758 822 - arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, 823 - uuid2, uuid3, flags, 0, 0, 824 - res); 759 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 760 + .a0 = FFA_PARTITION_INFO_GET, 761 + .a1 = uuid0, 762 + .a2 = uuid1, 763 + .a3 = uuid2, 764 + .a4 = uuid3, 765 + .a5 = flags, 766 + }, res); 825 767 826 768 if (res->a0 != FFA_SUCCESS) 827 769 goto out_unlock; ··· 859 791 860 792 bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) 861 793 { 862 - struct arm_smccc_res res; 794 + struct arm_smccc_1_2_regs res; 863 795 864 796 /* 865 797 * There's no way we can tell what a non-standard SMC call might ··· 928 860 929 861 int hyp_ffa_init(void *pages) 930 862 { 931 - struct arm_smccc_res res; 863 + struct arm_smccc_1_2_regs res; 932 864 void *tx, *rx; 933 865 934 866 if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) 935 867 return 0; 936 868 937 - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); 869 + arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) { 870 + .a0 = FFA_VERSION, 871 + .a1 = FFA_VERSION_1_2, 872 + }, &res); 938 873 if (res.a0 == FFA_RET_NOT_SUPPORTED) 939 874 return 0; 940 875 ··· 957 886 if (FFA_MAJOR_VERSION(res.a0) != 1) 958 887 return -EOPNOTSUPP; 959 888 960 - if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) 889 + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2)) 961 890 hyp_ffa_version = res.a0; 962 891 else 963 - hyp_ffa_version = FFA_VERSION_1_1; 892 + hyp_ffa_version = FFA_VERSION_1_2; 964 893 965 894 tx = pages; 966 895 pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
+14
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 546 546 cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); 547 547 } 548 548 549 + static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt) 550 + { 551 + cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm(); 552 + } 553 + 554 + static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt) 555 + { 556 + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); 557 + 558 + __pkvm_unreserve_vm(handle); 559 + } 560 + 549 561 static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) 550 562 { 551 563 DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); ··· 618 606 HANDLE_FUNC(__kvm_timer_set_cntvoff), 619 607 HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), 620 608 HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), 609 + HANDLE_FUNC(__pkvm_reserve_vm), 610 + HANDLE_FUNC(__pkvm_unreserve_vm), 621 611 HANDLE_FUNC(__pkvm_init_vm), 622 612 HANDLE_FUNC(__pkvm_init_vcpu), 623 613 HANDLE_FUNC(__pkvm_teardown_vm),
+6 -3
arch/arm64/kvm/hyp/nvhe/mem_protect.c
··· 1010 1010 return ret; 1011 1011 if (!kvm_pte_valid(pte)) 1012 1012 return -ENOENT; 1013 - if (kvm_granule_size(level) != size) 1013 + if (size && kvm_granule_size(level) != size) 1014 1014 return -E2BIG; 1015 + 1016 + if (!size) 1017 + size = kvm_granule_size(level); 1015 1018 1016 1019 state = guest_get_page_state(pte, ipa); 1017 1020 if (state != PKVM_PAGE_SHARED_BORROWED) ··· 1103 1100 if (prot & ~KVM_PGTABLE_PROT_RWX) 1104 1101 return -EINVAL; 1105 1102 1106 - assert_host_shared_guest(vm, ipa, PAGE_SIZE); 1103 + assert_host_shared_guest(vm, ipa, 0); 1107 1104 guest_lock_component(vm); 1108 1105 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1109 1106 guest_unlock_component(vm); ··· 1159 1156 if (pkvm_hyp_vm_is_protected(vm)) 1160 1157 return -EPERM; 1161 1158 1162 - assert_host_shared_guest(vm, ipa, PAGE_SIZE); 1159 + assert_host_shared_guest(vm, ipa, 0); 1163 1160 guest_lock_component(vm); 1164 1161 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1165 1162 guest_unlock_component(vm);
+134 -43
arch/arm64/kvm/hyp/nvhe/pkvm.c
··· 23 23 unsigned int kvm_host_sve_max_vl; 24 24 25 25 /* 26 - * The currently loaded hyp vCPU for each physical CPU. Used only when 27 - * protected KVM is enabled, but for both protected and non-protected VMs. 26 + * The currently loaded hyp vCPU for each physical CPU. Used in protected mode 27 + * for both protected and non-protected VMs. 28 28 */ 29 29 static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu); 30 30 ··· 135 135 { 136 136 struct kvm *kvm = vcpu->kvm; 137 137 138 - /* Protected KVM does not support AArch32 guests. */ 138 + /* No AArch32 support for protected guests. */ 139 139 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) || 140 140 kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32)) 141 141 return -EINVAL; ··· 192 192 */ 193 193 #define HANDLE_OFFSET 0x1000 194 194 195 + /* 196 + * Marks a reserved but not yet used entry in the VM table. 197 + */ 198 + #define RESERVED_ENTRY ((void *)0xa110ca7ed) 199 + 195 200 static unsigned int vm_handle_to_idx(pkvm_handle_t handle) 196 201 { 197 202 return handle - HANDLE_OFFSET; ··· 215 210 DEFINE_HYP_SPINLOCK(vm_table_lock); 216 211 217 212 /* 218 - * The table of VM entries for protected VMs in hyp. 219 - * Allocated at hyp initialization and setup. 213 + * A table that tracks all VMs in protected mode. 214 + * Allocated during hyp initialization and setup. 220 215 */ 221 216 static struct pkvm_hyp_vm **vm_table; 222 217 ··· 234 229 unsigned int idx = vm_handle_to_idx(handle); 235 230 236 231 if (unlikely(idx >= KVM_MAX_PVMS)) 232 + return NULL; 233 + 234 + /* A reserved entry doesn't represent an initialized VM. */ 235 + if (unlikely(vm_table[idx] == RESERVED_ENTRY)) 237 236 return NULL; 238 237 239 238 return vm_table[idx]; ··· 410 401 } 411 402 412 403 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, 413 - unsigned int nr_vcpus) 404 + unsigned int nr_vcpus, pkvm_handle_t handle) 414 405 { 406 + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; 407 + int idx = vm_handle_to_idx(handle); 408 + 409 + hyp_vm->kvm.arch.pkvm.handle = handle; 410 + 415 411 hyp_vm->host_kvm = host_kvm; 416 412 hyp_vm->kvm.created_vcpus = nr_vcpus; 417 - hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; 418 - hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); 413 + hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected); 414 + hyp_vm->kvm.arch.pkvm.is_created = true; 419 415 hyp_vm->kvm.arch.flags = 0; 420 416 pkvm_init_features_from_host(hyp_vm, host_kvm); 417 + 418 + /* VMID 0 is reserved for the host */ 419 + atomic64_set(&mmu->vmid.id, idx + 1); 420 + 421 + mmu->vtcr = host_mmu.arch.mmu.vtcr; 422 + mmu->arch = &hyp_vm->kvm.arch; 423 + mmu->pgt = &hyp_vm->pgt; 421 424 } 422 425 423 426 static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) ··· 501 480 return ret; 502 481 } 503 482 504 - static int find_free_vm_table_entry(struct kvm *host_kvm) 483 + static int find_free_vm_table_entry(void) 505 484 { 506 485 int i; 507 486 ··· 514 493 } 515 494 516 495 /* 517 - * Allocate a VM table entry and insert a pointer to the new vm. 496 + * Reserve a VM table entry. 518 497 * 519 - * Return a unique handle to the protected VM on success, 498 + * Return a unique handle to the VM on success, 520 499 * negative error code on failure. 521 500 */ 522 - static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, 523 - struct pkvm_hyp_vm *hyp_vm) 501 + static int allocate_vm_table_entry(void) 524 502 { 525 - struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; 526 503 int idx; 527 504 528 505 hyp_assert_lock_held(&vm_table_lock); ··· 533 514 if (unlikely(!vm_table)) 534 515 return -EINVAL; 535 516 536 - idx = find_free_vm_table_entry(host_kvm); 537 - if (idx < 0) 517 + idx = find_free_vm_table_entry(); 518 + if (unlikely(idx < 0)) 538 519 return idx; 539 520 540 - hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); 521 + vm_table[idx] = RESERVED_ENTRY; 541 522 542 - /* VMID 0 is reserved for the host */ 543 - atomic64_set(&mmu->vmid.id, idx + 1); 523 + return idx; 524 + } 544 525 545 - mmu->arch = &hyp_vm->kvm.arch; 546 - mmu->pgt = &hyp_vm->pgt; 526 + static int __insert_vm_table_entry(pkvm_handle_t handle, 527 + struct pkvm_hyp_vm *hyp_vm) 528 + { 529 + unsigned int idx; 530 + 531 + hyp_assert_lock_held(&vm_table_lock); 532 + 533 + /* 534 + * Initializing protected state might have failed, yet a malicious 535 + * host could trigger this function. Thus, ensure that 'vm_table' 536 + * exists. 537 + */ 538 + if (unlikely(!vm_table)) 539 + return -EINVAL; 540 + 541 + idx = vm_handle_to_idx(handle); 542 + if (unlikely(idx >= KVM_MAX_PVMS)) 543 + return -EINVAL; 544 + 545 + if (unlikely(vm_table[idx] != RESERVED_ENTRY)) 546 + return -EINVAL; 547 547 548 548 vm_table[idx] = hyp_vm; 549 - return hyp_vm->kvm.arch.pkvm.handle; 549 + 550 + return 0; 551 + } 552 + 553 + /* 554 + * Insert a pointer to the initialized VM into the VM table. 555 + * 556 + * Return 0 on success, or negative error code on failure. 557 + */ 558 + static int insert_vm_table_entry(pkvm_handle_t handle, 559 + struct pkvm_hyp_vm *hyp_vm) 560 + { 561 + int ret; 562 + 563 + hyp_spin_lock(&vm_table_lock); 564 + ret = __insert_vm_table_entry(handle, hyp_vm); 565 + hyp_spin_unlock(&vm_table_lock); 566 + 567 + return ret; 550 568 } 551 569 552 570 /* ··· 650 594 } 651 595 652 596 /* 653 - * Initialize the hypervisor copy of the protected VM state using the 654 - * memory donated by the host. 597 + * Reserves an entry in the hypervisor for a new VM in protected mode. 655 598 * 656 - * Unmaps the donated memory from the host at stage 2. 599 + * Return a unique handle to the VM on success, negative error code on failure. 600 + */ 601 + int __pkvm_reserve_vm(void) 602 + { 603 + int ret; 604 + 605 + hyp_spin_lock(&vm_table_lock); 606 + ret = allocate_vm_table_entry(); 607 + hyp_spin_unlock(&vm_table_lock); 608 + 609 + if (ret < 0) 610 + return ret; 611 + 612 + return idx_to_vm_handle(ret); 613 + } 614 + 615 + /* 616 + * Removes a reserved entry, but only if is hasn't been used yet. 617 + * Otherwise, the VM needs to be destroyed. 618 + */ 619 + void __pkvm_unreserve_vm(pkvm_handle_t handle) 620 + { 621 + unsigned int idx = vm_handle_to_idx(handle); 622 + 623 + if (unlikely(!vm_table)) 624 + return; 625 + 626 + hyp_spin_lock(&vm_table_lock); 627 + if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY)) 628 + remove_vm_table_entry(handle); 629 + hyp_spin_unlock(&vm_table_lock); 630 + } 631 + 632 + /* 633 + * Initialize the hypervisor copy of the VM state using host-donated memory. 634 + * 635 + * Unmap the donated memory from the host at stage 2. 657 636 * 658 637 * host_kvm: A pointer to the host's struct kvm. 659 638 * vm_hva: The host va of the area being donated for the VM state. ··· 697 606 * the VM. Must be page aligned. Its size is implied by the VM's 698 607 * VTCR. 699 608 * 700 - * Return a unique handle to the protected VM on success, 701 - * negative error code on failure. 609 + * Return 0 success, negative error code on failure. 702 610 */ 703 611 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, 704 612 unsigned long pgd_hva) ··· 705 615 struct pkvm_hyp_vm *hyp_vm = NULL; 706 616 size_t vm_size, pgd_size; 707 617 unsigned int nr_vcpus; 618 + pkvm_handle_t handle; 708 619 void *pgd = NULL; 709 620 int ret; 710 621 ··· 715 624 716 625 nr_vcpus = READ_ONCE(host_kvm->created_vcpus); 717 626 if (nr_vcpus < 1) { 627 + ret = -EINVAL; 628 + goto err_unpin_kvm; 629 + } 630 + 631 + handle = READ_ONCE(host_kvm->arch.pkvm.handle); 632 + if (unlikely(handle < HANDLE_OFFSET)) { 718 633 ret = -EINVAL; 719 634 goto err_unpin_kvm; 720 635 } ··· 738 641 if (!pgd) 739 642 goto err_remove_mappings; 740 643 741 - init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); 742 - 743 - hyp_spin_lock(&vm_table_lock); 744 - ret = insert_vm_table_entry(host_kvm, hyp_vm); 745 - if (ret < 0) 746 - goto err_unlock; 644 + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle); 747 645 748 646 ret = kvm_guest_prepare_stage2(hyp_vm, pgd); 749 647 if (ret) 750 - goto err_remove_vm_table_entry; 751 - hyp_spin_unlock(&vm_table_lock); 648 + goto err_remove_mappings; 752 649 753 - return hyp_vm->kvm.arch.pkvm.handle; 650 + /* Must be called last since this publishes the VM. */ 651 + ret = insert_vm_table_entry(handle, hyp_vm); 652 + if (ret) 653 + goto err_remove_mappings; 754 654 755 - err_remove_vm_table_entry: 756 - remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); 757 - err_unlock: 758 - hyp_spin_unlock(&vm_table_lock); 655 + return 0; 656 + 759 657 err_remove_mappings: 760 658 unmap_donated_memory(hyp_vm, vm_size); 761 659 unmap_donated_memory(pgd, pgd_size); ··· 760 668 } 761 669 762 670 /* 763 - * Initialize the hypervisor copy of the protected vCPU state using the 764 - * memory donated by the host. 671 + * Initialize the hypervisor copy of the vCPU state using host-donated memory. 765 672 * 766 - * handle: The handle for the protected vm. 673 + * handle: The hypervisor handle for the vm. 767 674 * host_vcpu: A pointer to the corresponding host vcpu. 768 675 * vcpu_hva: The host va of the area being donated for the vcpu state. 769 676 * Must be page aligned. The size of the area must be equal to
+10 -2
arch/arm64/kvm/hyp/nvhe/setup.c
··· 192 192 enum pkvm_page_state state; 193 193 struct hyp_page *page; 194 194 phys_addr_t phys; 195 + enum kvm_pgtable_prot prot; 195 196 196 197 if (!kvm_pte_valid(ctx->old)) 197 198 return 0; ··· 211 210 * configured in the hypervisor stage-1, and make sure to propagate them 212 211 * to the hyp_vmemmap state. 213 212 */ 214 - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); 213 + prot = kvm_pgtable_hyp_pte_prot(ctx->old); 214 + state = pkvm_getstate(prot); 215 215 switch (state) { 216 216 case PKVM_PAGE_OWNED: 217 217 set_hyp_state(page, PKVM_PAGE_OWNED); 218 - return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); 218 + /* hyp text is RO in the host stage-2 to be inspected on panic. */ 219 + if (prot == PAGE_HYP_EXEC) { 220 + set_host_state(page, PKVM_NOPAGE); 221 + return host_stage2_idmap_locked(phys, PAGE_SIZE, KVM_PGTABLE_PROT_R); 222 + } else { 223 + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); 224 + } 219 225 case PKVM_PAGE_SHARED_OWNED: 220 226 set_hyp_state(page, PKVM_PAGE_SHARED_OWNED); 221 227 set_host_state(page, PKVM_PAGE_SHARED_BORROWED);
+9 -16
arch/arm64/kvm/hyp/vgic-v3-sr.c
··· 295 295 } 296 296 } 297 297 298 - /* 299 - * GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due 300 - * to be relaxed in a future spec release, at which point this in 301 - * condition can be dropped. 302 - */ 303 - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { 298 + /* Only disable SRE if the host implements the GICv2 interface */ 299 + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { 304 300 /* 305 301 * Prevent the guest from touching the ICC_SRE_EL1 system 306 302 * register. Note that this may not have any effect, as ··· 325 329 cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); 326 330 } 327 331 328 - /* 329 - * Can be dropped in the future when GICv5 spec is relaxed. See comment 330 - * above. 331 - */ 332 - if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) { 332 + /* Only restore SRE if the host implements the GICv2 interface */ 333 + if (static_branch_unlikely(&vgic_v3_has_v2_compat)) { 333 334 val = read_gicreg(ICC_SRE_EL2); 334 335 write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); 335 - } 336 336 337 - if (!cpu_if->vgic_sre) { 338 - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ 339 - isb(); 340 - write_gicreg(1, ICC_SRE_EL1); 337 + if (!cpu_if->vgic_sre) { 338 + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ 339 + isb(); 340 + write_gicreg(1, ICC_SRE_EL1); 341 + } 341 342 } 342 343 343 344 /*
+7
arch/arm64/kvm/hyp/vhe/switch.c
··· 95 95 /* Force NV2 in case the guest is forgetful... */ 96 96 guest_hcr |= HCR_NV2; 97 97 } 98 + 99 + /* 100 + * Exclude the guest's TWED configuration if it hasn't set TWE 101 + * to avoid potentially delaying traps for the host. 102 + */ 103 + if (!(guest_hcr & HCR_TWE)) 104 + guest_hcr &= ~(HCR_EL2_TWEDEn | HCR_EL2_TWEDEL); 98 105 } 99 106 100 107 BUG_ON(host_data_test_flag(VCPU_IN_HYP_CONTEXT) &&
+25 -2
arch/arm64/kvm/inject_fault.c
··· 106 106 { 107 107 unsigned long cpsr = *vcpu_cpsr(vcpu); 108 108 bool is_aarch32 = vcpu_mode_is_32bit(vcpu); 109 - u64 esr = 0; 109 + u64 esr = 0, fsc; 110 + int level; 111 + 112 + /* 113 + * If injecting an abort from a failed S1PTW, rewalk the S1 PTs to 114 + * find the failing level. If we can't find it, assume the error was 115 + * transient and restart without changing the state. 116 + */ 117 + if (kvm_vcpu_abt_iss1tw(vcpu)) { 118 + u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu); 119 + int ret; 120 + 121 + if (hpfar == INVALID_GPA) 122 + return; 123 + 124 + ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level); 125 + if (ret) 126 + return; 127 + 128 + WARN_ON_ONCE(level < -1 || level > 3); 129 + fsc = ESR_ELx_FSC_SEA_TTW(level); 130 + } else { 131 + fsc = ESR_ELx_FSC_EXTABT; 132 + } 110 133 111 134 /* This delight is brought to you by FEAT_DoubleFault2. */ 112 135 if (effective_sctlr2_ease(vcpu)) ··· 156 133 if (!is_iabt) 157 134 esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; 158 135 159 - esr |= ESR_ELx_FSC_EXTABT; 136 + esr |= fsc; 160 137 161 138 vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu)); 162 139 vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
+9 -7
arch/arm64/kvm/mmu.c
··· 1431 1431 * able to see the page's tags and therefore they must be initialised first. If 1432 1432 * PG_mte_tagged is set, tags have already been initialised. 1433 1433 * 1434 - * The race in the test/set of the PG_mte_tagged flag is handled by: 1435 - * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs 1436 - * racing to santise the same page 1437 - * - mmap_lock protects between a VM faulting a page in and the VMM performing 1438 - * an mprotect() to add VM_MTE 1434 + * Must be called with kvm->mmu_lock held to ensure the memory remains mapped 1435 + * while the tags are zeroed. 1439 1436 */ 1440 1437 static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, 1441 1438 unsigned long size) ··· 1772 1775 * cache maintenance. 1773 1776 */ 1774 1777 if (!kvm_supports_cacheable_pfnmap()) 1775 - return -EFAULT; 1778 + ret = -EFAULT; 1776 1779 } else { 1777 1780 /* 1778 1781 * If the page was identified as device early by looking at ··· 1795 1798 } 1796 1799 1797 1800 if (exec_fault && s2_force_noncacheable) 1798 - return -ENOEXEC; 1801 + ret = -ENOEXEC; 1802 + 1803 + if (ret) { 1804 + kvm_release_page_unused(page); 1805 + return ret; 1806 + } 1799 1807 1800 1808 if (nested) 1801 1809 adjust_nested_fault_perms(nested, &prot, &writable);
+61 -17
arch/arm64/kvm/nested.c
··· 349 349 wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 350 350 /* Global limit for now, should eventually be per-VM */ 351 351 wi->max_oa_bits = min(get_kvm_ipa_limit(), 352 - ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); 352 + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false)); 353 353 } 354 354 355 355 int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, ··· 1491 1491 1492 1492 case SYS_ID_AA64PFR1_EL1: 1493 1493 /* Only support BTI, SSBS, CSV2_frac */ 1494 - val &= (ID_AA64PFR1_EL1_BT | 1495 - ID_AA64PFR1_EL1_SSBS | 1496 - ID_AA64PFR1_EL1_CSV2_frac); 1494 + val &= ~(ID_AA64PFR1_EL1_PFAR | 1495 + ID_AA64PFR1_EL1_MTEX | 1496 + ID_AA64PFR1_EL1_THE | 1497 + ID_AA64PFR1_EL1_GCS | 1498 + ID_AA64PFR1_EL1_MTE_frac | 1499 + ID_AA64PFR1_EL1_NMI | 1500 + ID_AA64PFR1_EL1_SME | 1501 + ID_AA64PFR1_EL1_RES0 | 1502 + ID_AA64PFR1_EL1_MPAM_frac | 1503 + ID_AA64PFR1_EL1_MTE); 1497 1504 break; 1498 1505 1499 1506 case SYS_ID_AA64MMFR0_EL1: ··· 1553 1546 break; 1554 1547 1555 1548 case SYS_ID_AA64MMFR1_EL1: 1556 - val &= (ID_AA64MMFR1_EL1_HCX | 1557 - ID_AA64MMFR1_EL1_PAN | 1558 - ID_AA64MMFR1_EL1_LO | 1559 - ID_AA64MMFR1_EL1_HPDS | 1560 - ID_AA64MMFR1_EL1_VH | 1561 - ID_AA64MMFR1_EL1_VMIDBits); 1549 + val &= ~(ID_AA64MMFR1_EL1_CMOW | 1550 + ID_AA64MMFR1_EL1_nTLBPA | 1551 + ID_AA64MMFR1_EL1_ETS | 1552 + ID_AA64MMFR1_EL1_XNX | 1553 + ID_AA64MMFR1_EL1_HAFDBS); 1562 1554 /* FEAT_E2H0 implies no VHE */ 1563 1555 if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) 1564 1556 val &= ~ID_AA64MMFR1_EL1_VH; ··· 1599 1593 1600 1594 case SYS_ID_AA64DFR0_EL1: 1601 1595 /* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */ 1602 - val &= (ID_AA64DFR0_EL1_PMUVer | 1603 - ID_AA64DFR0_EL1_WRPs | 1604 - ID_AA64DFR0_EL1_BRPs | 1605 - ID_AA64DFR0_EL1_DebugVer| 1606 - ID_AA64DFR0_EL1_HPMN0); 1596 + val &= ~(ID_AA64DFR0_EL1_ExtTrcBuff | 1597 + ID_AA64DFR0_EL1_BRBE | 1598 + ID_AA64DFR0_EL1_MTPMU | 1599 + ID_AA64DFR0_EL1_TraceBuffer | 1600 + ID_AA64DFR0_EL1_TraceFilt | 1601 + ID_AA64DFR0_EL1_PMSVer | 1602 + ID_AA64DFR0_EL1_CTX_CMPs | 1603 + ID_AA64DFR0_EL1_SEBEP | 1604 + ID_AA64DFR0_EL1_PMSS | 1605 + ID_AA64DFR0_EL1_TraceVer); 1607 1606 1608 - /* Cap Debug to ARMv8.1 */ 1609 - val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE); 1607 + /* 1608 + * FEAT_Debugv8p9 requires support for extended breakpoints / 1609 + * watchpoints. 1610 + */ 1611 + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); 1610 1612 break; 1611 1613 } 1612 1614 ··· 1838 1824 */ 1839 1825 if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING))) 1840 1826 kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu)); 1827 + } 1828 + 1829 + /* 1830 + * KVM unconditionally sets most of these traps anyway but use an allowlist 1831 + * to document the guest hypervisor traps that may take precedence and guard 1832 + * against future changes to the non-nested trap configuration. 1833 + */ 1834 + #define NV_MDCR_GUEST_INCLUDE (MDCR_EL2_TDE | \ 1835 + MDCR_EL2_TDA | \ 1836 + MDCR_EL2_TDRA | \ 1837 + MDCR_EL2_TTRF | \ 1838 + MDCR_EL2_TPMS | \ 1839 + MDCR_EL2_TPM | \ 1840 + MDCR_EL2_TPMCR | \ 1841 + MDCR_EL2_TDCC | \ 1842 + MDCR_EL2_TDOSA) 1843 + 1844 + void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) 1845 + { 1846 + u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); 1847 + 1848 + /* 1849 + * In yet another example where FEAT_NV2 is fscking broken, accesses 1850 + * to MDSCR_EL1 are redirected to the VNCR despite having an effect 1851 + * at EL2. Use a big hammer to apply sanity. 1852 + */ 1853 + if (is_hyp_ctxt(vcpu)) 1854 + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; 1855 + else 1856 + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); 1841 1857 }
+52 -24
arch/arm64/kvm/pkvm.c
··· 85 85 hyp_mem_base); 86 86 } 87 87 88 - static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) 88 + static void __pkvm_destroy_hyp_vm(struct kvm *kvm) 89 89 { 90 - if (host_kvm->arch.pkvm.handle) { 90 + if (pkvm_hyp_vm_is_created(kvm)) { 91 91 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, 92 - host_kvm->arch.pkvm.handle)); 92 + kvm->arch.pkvm.handle)); 93 + } else if (kvm->arch.pkvm.handle) { 94 + /* 95 + * The VM could have been reserved but hyp initialization has 96 + * failed. Make sure to unreserve it. 97 + */ 98 + kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle); 93 99 } 94 100 95 - host_kvm->arch.pkvm.handle = 0; 96 - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); 97 - free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); 101 + kvm->arch.pkvm.handle = 0; 102 + kvm->arch.pkvm.is_created = false; 103 + free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); 104 + free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc); 98 105 } 99 106 100 107 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) ··· 136 129 * 137 130 * Return 0 on success, negative error code on failure. 138 131 */ 139 - static int __pkvm_create_hyp_vm(struct kvm *host_kvm) 132 + static int __pkvm_create_hyp_vm(struct kvm *kvm) 140 133 { 141 134 size_t pgd_sz, hyp_vm_sz; 142 135 void *pgd, *hyp_vm; 143 136 int ret; 144 137 145 - if (host_kvm->created_vcpus < 1) 138 + if (kvm->created_vcpus < 1) 146 139 return -EINVAL; 147 140 148 - pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); 141 + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr); 149 142 150 143 /* 151 144 * The PGD pages will be reclaimed using a hyp_memcache which implies ··· 159 152 /* Allocate memory to donate to hyp for vm and vcpu pointers. */ 160 153 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, 161 154 size_mul(sizeof(void *), 162 - host_kvm->created_vcpus))); 155 + kvm->created_vcpus))); 163 156 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); 164 157 if (!hyp_vm) { 165 158 ret = -ENOMEM; ··· 167 160 } 168 161 169 162 /* Donate the VM memory to hyp and let hyp initialize it. */ 170 - ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); 171 - if (ret < 0) 163 + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd); 164 + if (ret) 172 165 goto free_vm; 173 166 174 - host_kvm->arch.pkvm.handle = ret; 175 - host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 167 + kvm->arch.pkvm.is_created = true; 168 + kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 176 169 kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); 177 170 178 171 return 0; ··· 183 176 return ret; 184 177 } 185 178 186 - int pkvm_create_hyp_vm(struct kvm *host_kvm) 179 + bool pkvm_hyp_vm_is_created(struct kvm *kvm) 180 + { 181 + return READ_ONCE(kvm->arch.pkvm.is_created); 182 + } 183 + 184 + int pkvm_create_hyp_vm(struct kvm *kvm) 187 185 { 188 186 int ret = 0; 189 187 190 - mutex_lock(&host_kvm->arch.config_lock); 191 - if (!host_kvm->arch.pkvm.handle) 192 - ret = __pkvm_create_hyp_vm(host_kvm); 193 - mutex_unlock(&host_kvm->arch.config_lock); 188 + mutex_lock(&kvm->arch.config_lock); 189 + if (!pkvm_hyp_vm_is_created(kvm)) 190 + ret = __pkvm_create_hyp_vm(kvm); 191 + mutex_unlock(&kvm->arch.config_lock); 194 192 195 193 return ret; 196 194 } ··· 212 200 return ret; 213 201 } 214 202 215 - void pkvm_destroy_hyp_vm(struct kvm *host_kvm) 203 + void pkvm_destroy_hyp_vm(struct kvm *kvm) 216 204 { 217 - mutex_lock(&host_kvm->arch.config_lock); 218 - __pkvm_destroy_hyp_vm(host_kvm); 219 - mutex_unlock(&host_kvm->arch.config_lock); 205 + mutex_lock(&kvm->arch.config_lock); 206 + __pkvm_destroy_hyp_vm(kvm); 207 + mutex_unlock(&kvm->arch.config_lock); 220 208 } 221 209 222 - int pkvm_init_host_vm(struct kvm *host_kvm) 210 + int pkvm_init_host_vm(struct kvm *kvm) 223 211 { 212 + int ret; 213 + 214 + if (pkvm_hyp_vm_is_created(kvm)) 215 + return -EINVAL; 216 + 217 + /* VM is already reserved, no need to proceed. */ 218 + if (kvm->arch.pkvm.handle) 219 + return 0; 220 + 221 + /* Reserve the VM in hyp and obtain a hyp handle for the VM. */ 222 + ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm); 223 + if (ret < 0) 224 + return ret; 225 + 226 + kvm->arch.pkvm.handle = ret; 227 + 224 228 return 0; 225 229 } 226 230
+10 -10
arch/arm64/kvm/ptdump.c
··· 32 32 .set = " ", 33 33 .clear = "F", 34 34 }, { 35 - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, 36 - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, 35 + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, 36 + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, 37 37 .set = "R", 38 38 .clear = " ", 39 39 }, { 40 - .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, 41 - .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, 40 + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 41 + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 42 42 .set = "W", 43 43 .clear = " ", 44 44 }, { 45 - .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, 46 - .val = PTE_VALID, 47 - .set = " ", 48 - .clear = "X", 45 + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN, 46 + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN, 47 + .set = "NX", 48 + .clear = "x ", 49 49 }, { 50 - .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, 51 - .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, 50 + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF, 51 + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF, 52 52 .set = "AF", 53 53 .clear = " ", 54 54 }, {
+47 -8
arch/arm64/kvm/sys_regs.c
··· 1757 1757 val &= ~ID_AA64ISAR2_EL1_WFxT; 1758 1758 break; 1759 1759 case SYS_ID_AA64ISAR3_EL1: 1760 - val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX; 1760 + val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_LSFE | 1761 + ID_AA64ISAR3_EL1_FAMINMAX; 1761 1762 break; 1762 1763 case SYS_ID_AA64MMFR2_EL1: 1763 1764 val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; ··· 1998 1997 return val; 1999 1998 } 2000 1999 2000 + /* 2001 + * Older versions of KVM erroneously claim support for FEAT_DoubleLock with 2002 + * NV-enabled VMs on unsupporting hardware. Silently ignore the incorrect 2003 + * value if it is consistent with the bug. 2004 + */ 2005 + static bool ignore_feat_doublelock(struct kvm_vcpu *vcpu, u64 val) 2006 + { 2007 + u8 host, user; 2008 + 2009 + if (!vcpu_has_nv(vcpu)) 2010 + return false; 2011 + 2012 + host = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, 2013 + read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1)); 2014 + user = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, val); 2015 + 2016 + return host == ID_AA64DFR0_EL1_DoubleLock_NI && 2017 + user == ID_AA64DFR0_EL1_DoubleLock_IMP; 2018 + } 2019 + 2001 2020 static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, 2002 2021 const struct sys_reg_desc *rd, 2003 2022 u64 val) ··· 2048 2027 */ 2049 2028 if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP) 2050 2029 return -EINVAL; 2030 + 2031 + if (ignore_feat_doublelock(vcpu, val)) { 2032 + val &= ~ID_AA64DFR0_EL1_DoubleLock; 2033 + val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI); 2034 + } 2051 2035 2052 2036 return set_id_reg(vcpu, rd, val); 2053 2037 } ··· 2174 2148 return set_id_reg(vcpu, rd, user_val); 2175 2149 } 2176 2150 2151 + /* 2152 + * Allow userspace to de-feature a stage-2 translation granule but prevent it 2153 + * from claiming the impossible. 2154 + */ 2155 + #define tgran2_val_allowed(tg, safe, user) \ 2156 + ({ \ 2157 + u8 __s = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, safe); \ 2158 + u8 __u = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, user); \ 2159 + \ 2160 + __s == __u || __u == ID_AA64MMFR0_EL1_##tg##_NI; \ 2161 + }) 2162 + 2177 2163 static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu, 2178 2164 const struct sys_reg_desc *rd, u64 user_val) 2179 2165 { 2180 2166 u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd); 2181 - u64 tgran2_mask = ID_AA64MMFR0_EL1_TGRAN4_2_MASK | 2182 - ID_AA64MMFR0_EL1_TGRAN16_2_MASK | 2183 - ID_AA64MMFR0_EL1_TGRAN64_2_MASK; 2184 2167 2185 - if (vcpu_has_nv(vcpu) && 2186 - ((sanitized_val & tgran2_mask) != (user_val & tgran2_mask))) 2168 + if (!vcpu_has_nv(vcpu)) 2169 + return set_id_reg(vcpu, rd, user_val); 2170 + 2171 + if (!tgran2_val_allowed(TGRAN4_2, sanitized_val, user_val) || 2172 + !tgran2_val_allowed(TGRAN16_2, sanitized_val, user_val) || 2173 + !tgran2_val_allowed(TGRAN64_2, sanitized_val, user_val)) 2187 2174 return -EINVAL; 2188 2175 2189 2176 return set_id_reg(vcpu, rd, user_val); ··· 3180 3141 ID_AA64ISAR2_EL1_APA3 | 3181 3142 ID_AA64ISAR2_EL1_GPA3)), 3182 3143 ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT | 3144 + ID_AA64ISAR3_EL1_LSFE | 3183 3145 ID_AA64ISAR3_EL1_FAMINMAX)), 3184 3146 ID_UNALLOCATED(6,4), 3185 3147 ID_UNALLOCATED(6,5), ··· 3192 3152 ~(ID_AA64MMFR0_EL1_RES0 | 3193 3153 ID_AA64MMFR0_EL1_ASIDBITS)), 3194 3154 ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | 3195 - ID_AA64MMFR1_EL1_HCX | 3196 - ID_AA64MMFR1_EL1_TWED | 3197 3155 ID_AA64MMFR1_EL1_XNX | 3198 3156 ID_AA64MMFR1_EL1_VH | 3199 3157 ID_AA64MMFR1_EL1_VMIDBits)), ··· 3276 3238 { SYS_DESC(SYS_PMBLIMITR_EL1), undef_access }, 3277 3239 { SYS_DESC(SYS_PMBPTR_EL1), undef_access }, 3278 3240 { SYS_DESC(SYS_PMBSR_EL1), undef_access }, 3241 + { SYS_DESC(SYS_PMSDSFR_EL1), undef_access }, 3279 3242 /* PMBIDR_EL1 is not trapped */ 3280 3243 3281 3244 { PMU_SYS_REG(PMINTENSET_EL1),
+3 -11
arch/arm64/kvm/vgic/vgic-init.c
··· 554 554 * Also map the virtual CPU interface into the VM. 555 555 * v2 calls vgic_init() if not already done. 556 556 * v3 and derivatives return an error if the VGIC is not initialized. 557 - * vgic_ready() returns true if this function has succeeded. 558 557 */ 559 558 int kvm_vgic_map_resources(struct kvm *kvm) 560 559 { ··· 562 563 gpa_t dist_base; 563 564 int ret = 0; 564 565 565 - if (likely(vgic_ready(kvm))) 566 + if (likely(smp_load_acquire(&dist->ready))) 566 567 return 0; 567 568 568 569 mutex_lock(&kvm->slots_lock); 569 570 mutex_lock(&kvm->arch.config_lock); 570 - if (vgic_ready(kvm)) 571 + if (dist->ready) 571 572 goto out; 572 573 573 574 if (!irqchip_in_kernel(kvm)) ··· 593 594 goto out_slots; 594 595 } 595 596 596 - /* 597 - * kvm_io_bus_register_dev() guarantees all readers see the new MMIO 598 - * registration before returning through synchronize_srcu(), which also 599 - * implies a full memory barrier. As such, marking the distributor as 600 - * 'ready' here is guaranteed to be ordered after all vCPUs having seen 601 - * a completely configured distributor. 602 - */ 603 - dist->ready = true; 597 + smp_store_release(&dist->ready, true); 604 598 goto out_slots; 605 599 out: 606 600 mutex_unlock(&kvm->arch.config_lock);
+8
arch/arm64/kvm/vgic/vgic-v3.c
··· 588 588 } 589 589 590 590 DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); 591 + DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat); 591 592 592 593 static int __init early_group0_trap_cfg(char *buf) 593 594 { ··· 697 696 698 697 if (kvm_vgic_global_state.vcpu_base == 0) 699 698 kvm_info("disabling GICv2 emulation\n"); 699 + 700 + /* 701 + * Flip the static branch if the HW supports v2, even if we're 702 + * not using it (such as in protected mode). 703 + */ 704 + if (has_v2) 705 + static_branch_enable(&vgic_v3_has_v2_compat); 700 706 701 707 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 702 708 group0_trap = true;
+1 -1
arch/arm64/kvm/vgic/vgic-v5.c
··· 15 15 u64 ich_vtr_el2; 16 16 int ret; 17 17 18 - if (!info->has_gcie_v3_compat) 18 + if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) 19 19 return -ENODEV; 20 20 21 21 kvm_vgic_global_state.type = VGIC_V5;
+1
arch/arm64/tools/cpucaps
··· 37 37 HAS_GENERIC_AUTH_IMP_DEF 38 38 HAS_GICV3_CPUIF 39 39 HAS_GICV5_CPUIF 40 + HAS_GICV5_LEGACY 40 41 HAS_GIC_PRIO_MASKING 41 42 HAS_GIC_PRIO_RELAXED_SYNC 42 43 HAS_HCR_NV1
+7
arch/x86/kvm/vmx/vmx.c
··· 5785 5785 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 5786 5786 return 1; 5787 5787 5788 + /* 5789 + * Ensure that any updates to kvm->buses[] observed by the 5790 + * previous instruction (emulated or otherwise) are also 5791 + * visible to the instruction KVM is about to emulate. 5792 + */ 5793 + smp_rmb(); 5794 + 5788 5795 if (!kvm_emulate_instruction(vcpu, 0)) 5789 5796 return 0; 5790 5797
-7
drivers/irqchip/irq-gic-v5.c
··· 1062 1062 #ifdef CONFIG_KVM 1063 1063 static struct gic_kvm_info gic_v5_kvm_info __initdata; 1064 1064 1065 - static bool __init gicv5_cpuif_has_gcie_legacy(void) 1066 - { 1067 - u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1); 1068 - return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0); 1069 - } 1070 - 1071 1065 static void __init gic_of_setup_kvm_info(struct device_node *node) 1072 1066 { 1073 1067 gic_v5_kvm_info.type = GIC_V5; 1074 - gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy(); 1075 1068 1076 1069 /* GIC Virtual CPU interface maintenance interrupt */ 1077 1070 gic_v5_kvm_info.no_maint_irq_mask = false;
+1 -1
include/kvm/arm_vgic.h
··· 378 378 379 379 extern struct static_key_false vgic_v2_cpuif_trap; 380 380 extern struct static_key_false vgic_v3_cpuif_trap; 381 + extern struct static_key_false vgic_v3_has_v2_compat; 381 382 382 383 int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr); 383 384 void kvm_vgic_early_init(struct kvm *kvm); ··· 410 409 411 410 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) 412 411 #define vgic_initialized(k) ((k)->arch.vgic.initialized) 413 - #define vgic_ready(k) ((k)->arch.vgic.ready) 414 412 #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ 415 413 ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) 416 414
+1
include/linux/arm_ffa.h
··· 128 128 #define FFA_FEAT_RXTX_MIN_SZ_4K 0 129 129 #define FFA_FEAT_RXTX_MIN_SZ_64K 1 130 130 #define FFA_FEAT_RXTX_MIN_SZ_16K 2 131 + #define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0) 131 132 132 133 /* FFA Bus/Device/Driver related */ 133 134 struct ffa_device {
-2
include/linux/irqchip/arm-vgic-info.h
··· 36 36 bool has_v4_1; 37 37 /* Deactivation impared, subpar stuff */ 38 38 bool no_hw_deactivation; 39 - /* v3 compat support (GICv5 hosts, only) */ 40 - bool has_gcie_v3_compat; 41 39 }; 42 40 43 41 #ifdef CONFIG_KVM
+8 -3
include/linux/kvm_host.h
··· 207 207 struct kvm_io_bus { 208 208 int dev_count; 209 209 int ioeventfd_count; 210 + struct rcu_head rcu; 210 211 struct kvm_io_range range[]; 211 212 }; 212 213 ··· 968 967 return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); 969 968 } 970 969 970 + /* 971 + * Get a bus reference under the update-side lock. No long-term SRCU reader 972 + * references are permitted, to avoid stale reads vs concurrent IO 973 + * registrations. 974 + */ 971 975 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) 972 976 { 973 - return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, 974 - lockdep_is_held(&kvm->slots_lock) || 975 - !refcount_read(&kvm->users_count)); 977 + return rcu_dereference_protected(kvm->buses[idx], 978 + lockdep_is_held(&kvm->slots_lock)); 976 979 } 977 980 978 981 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
+1
tools/testing/selftests/kvm/Makefile.kvm
··· 156 156 TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs 157 157 TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases 158 158 TEST_GEN_PROGS_arm64 += arm64/debug-exceptions 159 + TEST_GEN_PROGS_arm64 += arm64/hello_el2 159 160 TEST_GEN_PROGS_arm64 += arm64/host_sve 160 161 TEST_GEN_PROGS_arm64 += arm64/hypercalls 161 162 TEST_GEN_PROGS_arm64 += arm64/external_aborts
+4 -9
tools/testing/selftests/kvm/arm64/arch_timer.c
··· 165 165 static void test_init_timer_irq(struct kvm_vm *vm) 166 166 { 167 167 /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ 168 - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, 169 - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); 170 - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, 171 - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); 168 + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); 169 + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); 172 170 173 171 sync_global_to_guest(vm, ptimer_irq); 174 172 sync_global_to_guest(vm, vtimer_irq); ··· 174 176 pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); 175 177 } 176 178 177 - static int gic_fd; 178 - 179 179 struct kvm_vm *test_vm_create(void) 180 180 { 181 181 struct kvm_vm *vm; 182 182 unsigned int i; 183 183 int nr_vcpus = test_args.nr_vcpus; 184 + 185 + TEST_REQUIRE(kvm_supports_vgic_v3()); 184 186 185 187 vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); 186 188 ··· 202 204 vcpu_init_descriptor_tables(vcpus[i]); 203 205 204 206 test_init_timer_irq(vm); 205 - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); 206 - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); 207 207 208 208 /* Make all the test's cmdline args visible to the guest */ 209 209 sync_global_to_guest(vm, test_args); ··· 211 215 212 216 void test_vm_cleanup(struct kvm_vm *vm) 213 217 { 214 - close(gic_fd); 215 218 kvm_vm_free(vm); 216 219 }
+4 -9
tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
··· 924 924 925 925 static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 926 926 { 927 - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, 928 - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); 929 - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, 930 - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); 927 + ptimer_irq = vcpu_get_ptimer_irq(vcpu); 928 + vtimer_irq = vcpu_get_vtimer_irq(vcpu); 931 929 932 930 sync_global_to_guest(vm, ptimer_irq); 933 931 sync_global_to_guest(vm, vtimer_irq); 934 932 935 933 pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); 936 934 } 937 - 938 - static int gic_fd; 939 935 940 936 static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, 941 937 enum arch_timer timer) ··· 947 951 vcpu_args_set(*vcpu, 1, timer); 948 952 949 953 test_init_timer_irq(*vm, *vcpu); 950 - gic_fd = vgic_v3_setup(*vm, 1, 64); 951 - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); 952 954 953 955 sync_global_to_guest(*vm, test_args); 954 956 sync_global_to_guest(*vm, CVAL_MAX); ··· 955 961 956 962 static void test_vm_cleanup(struct kvm_vm *vm) 957 963 { 958 - close(gic_fd); 959 964 kvm_vm_free(vm); 960 965 } 961 966 ··· 1034 1041 1035 1042 /* Tell stdout not to buffer its content */ 1036 1043 setbuf(stdout, NULL); 1044 + 1045 + TEST_REQUIRE(kvm_supports_vgic_v3()); 1037 1046 1038 1047 if (!parse_args(argc, argv)) 1039 1048 exit(KSFT_SKIP);
+42
tools/testing/selftests/kvm/arm64/external_aborts.c
··· 250 250 kvm_vm_free(vm); 251 251 } 252 252 253 + static void expect_sea_s1ptw_handler(struct ex_regs *regs) 254 + { 255 + u64 esr = read_sysreg(esr_el1); 256 + 257 + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); 258 + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); 259 + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); 260 + 261 + GUEST_DONE(); 262 + } 263 + 264 + static noinline void test_s1ptw_abort_guest(void) 265 + { 266 + extern char test_s1ptw_abort_insn; 267 + 268 + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); 269 + 270 + asm volatile("test_s1ptw_abort_insn:\n\t" 271 + "ldr x0, [%0]\n\t" 272 + : : "r" (MMIO_ADDR) : "x0", "memory"); 273 + 274 + GUEST_FAIL("Load on S1PTW abort should not retire"); 275 + } 276 + 277 + static void test_s1ptw_abort(void) 278 + { 279 + struct kvm_vcpu *vcpu; 280 + u64 *ptep, bad_pa; 281 + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, 282 + expect_sea_s1ptw_handler); 283 + 284 + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); 285 + bad_pa = BIT(vm->pa_bits) - vm->page_size; 286 + 287 + *ptep &= ~GENMASK(47, 12); 288 + *ptep |= bad_pa; 289 + 290 + vcpu_run_expect_done(vcpu); 291 + kvm_vm_free(vm); 292 + } 293 + 253 294 static void test_serror_emulated_guest(void) 254 295 { 255 296 GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); ··· 368 327 test_serror_masked(); 369 328 test_serror_emulated(); 370 329 test_mmio_ease(); 330 + test_s1ptw_abort(); 371 331 }
+71
tools/testing/selftests/kvm/arm64/hello_el2.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 4 + * 5 + * Copyright 2025 Google LLC 6 + */ 7 + #include "kvm_util.h" 8 + #include "processor.h" 9 + #include "test_util.h" 10 + #include "ucall.h" 11 + 12 + #include <asm/sysreg.h> 13 + 14 + static void guest_code(void) 15 + { 16 + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); 17 + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); 18 + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); 19 + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); 20 + 21 + GUEST_ASSERT_EQ(get_current_el(), 2); 22 + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); 23 + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), 24 + ID_AA64MMFR1_EL1_VH_IMP); 25 + 26 + /* 27 + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, 28 + * which is really annoying to deal with in KVM describing E2H as RES1. 29 + * 30 + * If the implementation doesn't honor the trap then expect the register 31 + * to return all zeros. 32 + */ 33 + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) 34 + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), 35 + ID_AA64MMFR0_EL1_FGT_NI); 36 + else 37 + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); 38 + 39 + GUEST_DONE(); 40 + } 41 + 42 + int main(void) 43 + { 44 + struct kvm_vcpu_init init; 45 + struct kvm_vcpu *vcpu; 46 + struct kvm_vm *vm; 47 + struct ucall uc; 48 + 49 + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); 50 + 51 + vm = vm_create(1); 52 + 53 + kvm_get_default_vcpu_target(vm, &init); 54 + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); 55 + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); 56 + kvm_arch_vm_finalize_vcpus(vm); 57 + 58 + vcpu_run(vcpu); 59 + switch (get_ucall(vcpu, &uc)) { 60 + case UCALL_DONE: 61 + break; 62 + case UCALL_ABORT: 63 + REPORT_GUEST_ASSERT(uc); 64 + break; 65 + default: 66 + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); 67 + } 68 + 69 + kvm_vm_free(vm); 70 + return 0; 71 + }
+1 -1
tools/testing/selftests/kvm/arm64/hypercalls.c
··· 108 108 109 109 for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { 110 110 memset(&res, 0, sizeof(res)); 111 - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); 111 + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); 112 112 113 113 switch (stage) { 114 114 case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+1 -1
tools/testing/selftests/kvm/arm64/kvm-uuid.c
··· 25 25 { 26 26 struct arm_smccc_res res = {}; 27 27 28 - smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); 28 + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); 29 29 30 30 __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && 31 31 res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+2
tools/testing/selftests/kvm/arm64/no-vgic-v3.c
··· 163 163 struct kvm_vm *vm; 164 164 uint64_t pfr0; 165 165 166 + test_disable_default_vgic(); 167 + 166 168 vm = vm_create_with_one_vcpu(&vcpu, NULL); 167 169 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); 168 170 __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),
+7 -6
tools/testing/selftests/kvm/arm64/psci_test.c
··· 27 27 { 28 28 struct arm_smccc_res res; 29 29 30 - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 30 + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 31 31 0, 0, 0, 0, &res); 32 32 33 33 return res.a0; ··· 38 38 { 39 39 struct arm_smccc_res res; 40 40 41 - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 41 + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 42 42 0, 0, 0, 0, 0, &res); 43 43 44 44 return res.a0; ··· 48 48 { 49 49 struct arm_smccc_res res; 50 50 51 - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 51 + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 52 52 0, 0, 0, 0, 0, &res); 53 53 54 54 return res.a0; ··· 58 58 { 59 59 struct arm_smccc_res res; 60 60 61 - smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); 61 + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); 62 62 63 63 return res.a0; 64 64 } ··· 67 67 { 68 68 struct arm_smccc_res res; 69 69 70 - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); 70 + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); 71 71 72 72 return res.a0; 73 73 } ··· 89 89 90 90 vm = vm_create(2); 91 91 92 - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); 92 + kvm_get_default_vcpu_target(vm, &init); 93 93 init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); 94 94 95 95 *source = aarch64_vcpu_add(vm, 0, &init, guest_code); 96 96 *target = aarch64_vcpu_add(vm, 1, &init, guest_code); 97 97 98 + kvm_arch_vm_finalize_vcpus(vm); 98 99 return vm; 99 100 } 100 101
+22 -22
tools/testing/selftests/kvm/arm64/set_id_regs.c
··· 15 15 #include "test_util.h" 16 16 #include <linux/bitfield.h> 17 17 18 - bool have_cap_arm_mte; 19 - 20 18 enum ftr_type { 21 19 FTR_EXACT, /* Use a predefined safe value */ 22 20 FTR_LOWER_SAFE, /* Smaller value is safe */ ··· 123 125 REG_FTR_END, 124 126 }; 125 127 128 + static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { 129 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), 130 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), 131 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), 132 + REG_FTR_END, 133 + }; 134 + 126 135 static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { 127 136 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), 128 137 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), ··· 170 165 static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { 171 166 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), 172 167 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), 168 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), 173 169 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), 170 + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), 174 171 REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), 175 172 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), 176 173 REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), ··· 228 221 TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), 229 222 TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), 230 223 TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), 224 + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), 231 225 TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), 232 226 TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), 233 227 TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), ··· 247 239 GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); 248 240 GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); 249 241 GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); 242 + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); 250 243 GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); 251 244 GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); 252 245 GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); ··· 577 568 uint64_t mte_frac; 578 569 int idx, err; 579 570 580 - if (!have_cap_arm_mte) { 571 + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); 572 + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); 573 + if (!mte) { 581 574 ksft_test_result_skip("MTE capability not supported, nothing to test\n"); 582 575 return; 583 576 } ··· 604 593 * from unsupported (0xF) to supported (0). 605 594 * 606 595 */ 607 - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); 608 - 609 - mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); 610 596 mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); 611 597 if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || 612 598 mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { ··· 758 750 ksft_test_result_pass("%s\n", __func__); 759 751 } 760 752 761 - void kvm_arch_vm_post_create(struct kvm_vm *vm) 762 - { 763 - if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) { 764 - vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); 765 - have_cap_arm_mte = true; 766 - } 767 - } 768 - 769 753 int main(void) 770 754 { 771 755 struct kvm_vcpu *vcpu; 772 756 struct kvm_vm *vm; 773 757 bool aarch64_only; 774 758 uint64_t val, el0; 775 - int test_cnt; 759 + int test_cnt, i, j; 776 760 777 761 TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); 778 762 TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); 779 763 764 + test_wants_mte(); 765 + 780 766 vm = vm_create(1); 781 767 vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); 782 768 vcpu = vm_vcpu_add(vm, 0, guest_code); 769 + kvm_arch_vm_finalize_vcpus(vm); 783 770 784 771 /* Check for AARCH64 only system */ 785 772 val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); ··· 783 780 784 781 ksft_print_header(); 785 782 786 - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + 787 - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + 788 - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + 789 - ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + 790 - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + 791 - ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - 792 - ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; 783 + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; 784 + for (i = 0; i < ARRAY_SIZE(test_regs); i++) 785 + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) 786 + test_cnt++; 793 787 794 788 ksft_set_plan(test_cnt); 795 789
+15 -2
tools/testing/selftests/kvm/arm64/smccc_filter.c
··· 22 22 SMC_INSN, 23 23 }; 24 24 25 + static bool test_runs_at_el2(void) 26 + { 27 + struct kvm_vm *vm = vm_create(1); 28 + struct kvm_vcpu_init init; 29 + 30 + kvm_get_default_vcpu_target(vm, &init); 31 + kvm_vm_free(vm); 32 + 33 + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); 34 + } 35 + 25 36 #define for_each_conduit(conduit) \ 26 - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) 37 + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ 38 + conduit <= SMC_INSN; conduit++) 27 39 28 40 static void guest_main(uint32_t func_id, enum smccc_conduit conduit) 29 41 { ··· 76 64 struct kvm_vm *vm; 77 65 78 66 vm = vm_create(1); 79 - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); 67 + kvm_get_default_vcpu_target(vm, &init); 80 68 81 69 /* 82 70 * Enable in-kernel emulation of PSCI to ensure that calls are denied ··· 85 73 init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); 86 74 87 75 *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); 76 + kvm_arch_vm_finalize_vcpus(vm); 88 77 return vm; 89 78 } 90 79
+2
tools/testing/selftests/kvm/arm64/vgic_init.c
··· 994 994 int pa_bits; 995 995 int cnt_impl = 0; 996 996 997 + test_disable_default_vgic(); 998 + 997 999 pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; 998 1000 max_phys_size = 1ULL << pa_bits; 999 1001
+3 -1
tools/testing/selftests/kvm/arm64/vgic_irq.c
··· 752 752 vcpu_args_set(vcpu, 1, args_gva); 753 753 754 754 gic_fd = vgic_v3_setup(vm, 1, nr_irqs); 755 - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); 756 755 757 756 vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, 758 757 guest_irq_handlers[args.eoi_split][args.level_sensitive]); ··· 800 801 bool level_sensitive = false; 801 802 int opt; 802 803 bool eoi_split = false; 804 + 805 + TEST_REQUIRE(kvm_supports_vgic_v3()); 806 + test_disable_default_vgic(); 803 807 804 808 while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { 805 809 switch (opt) {
+3 -5
tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
··· 27 27 28 28 static struct kvm_vm *vm; 29 29 static struct kvm_vcpu **vcpus; 30 - static int gic_fd, its_fd; 30 + static int its_fd; 31 31 32 32 static struct test_data { 33 33 bool request_vcpus_stop; ··· 214 214 215 215 static void setup_gic(void) 216 216 { 217 - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); 218 - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); 219 - 220 217 its_fd = vgic_its_setup(vm); 221 218 } 222 219 ··· 352 355 static void destroy_vm(void) 353 356 { 354 357 close(its_fd); 355 - close(gic_fd); 356 358 kvm_vm_free(vm); 357 359 free(vcpus); 358 360 } ··· 369 373 { 370 374 u32 nr_threads; 371 375 int c; 376 + 377 + TEST_REQUIRE(kvm_supports_vgic_v3()); 372 378 373 379 while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { 374 380 switch (c) {
+35 -40
tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
··· 28 28 struct vpmu_vm { 29 29 struct kvm_vm *vm; 30 30 struct kvm_vcpu *vcpu; 31 - int gic_fd; 32 31 }; 33 32 34 33 static struct vpmu_vm vpmu_vm; ··· 42 43 static uint64_t get_pmcr_n(uint64_t pmcr) 43 44 { 44 45 return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); 45 - } 46 - 47 - static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) 48 - { 49 - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); 50 46 } 51 47 52 48 static uint64_t get_counters_mask(uint64_t n) ··· 409 415 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 410 416 .addr = (uint64_t)&irq, 411 417 }; 412 - struct kvm_device_attr init_attr = { 413 - .group = KVM_ARM_VCPU_PMU_V3_CTRL, 414 - .attr = KVM_ARM_VCPU_PMU_V3_INIT, 415 - }; 416 418 417 419 /* The test creates the vpmu_vm multiple times. Ensure a clean state */ 418 420 memset(&vpmu_vm, 0, sizeof(vpmu_vm)); ··· 421 431 } 422 432 423 433 /* Create vCPU with PMUv3 */ 424 - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); 434 + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); 425 435 init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); 426 436 vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); 427 437 vcpu_init_descriptor_tables(vpmu_vm.vcpu); 428 - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); 429 - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, 430 - "Failed to create vgic-v3, skipping"); 438 + 439 + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); 431 440 432 441 /* Make sure that PMUv3 support is indicated in the ID register */ 433 442 dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); ··· 435 446 pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, 436 447 "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); 437 448 438 - /* Initialize vPMU */ 439 449 vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); 440 - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); 441 450 } 442 451 443 452 static void destroy_vpmu_vm(void) 444 453 { 445 - close(vpmu_vm.gic_fd); 446 454 kvm_vm_free(vpmu_vm.vm); 447 455 } 448 456 ··· 461 475 } 462 476 } 463 477 464 - static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) 478 + static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) 465 479 { 466 480 struct kvm_vcpu *vcpu; 467 - uint64_t pmcr, pmcr_orig; 481 + unsigned int prev; 482 + int ret; 468 483 469 484 create_vpmu_vm(guest_code); 470 485 vcpu = vpmu_vm.vcpu; 471 486 472 - pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); 473 - pmcr = pmcr_orig; 487 + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); 474 488 475 - /* 476 - * Setting a larger value of PMCR.N should not modify the field, and 477 - * return a success. 478 - */ 479 - set_pmcr_n(&pmcr, pmcr_n); 480 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); 481 - pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); 489 + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, 490 + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); 482 491 483 492 if (expect_fail) 484 - TEST_ASSERT(pmcr_orig == pmcr, 485 - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", 486 - pmcr, pmcr_n); 493 + TEST_ASSERT(ret && errno == EINVAL, 494 + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", 495 + nr_counters, prev); 487 496 else 488 - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), 489 - "Failed to update PMCR.N to %lu (received: %lu)", 490 - pmcr_n, get_pmcr_n(pmcr)); 497 + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); 498 + 499 + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); 491 500 } 492 501 493 502 /* ··· 497 516 498 517 pr_debug("Test with pmcr_n %lu\n", pmcr_n); 499 518 500 - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); 519 + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); 501 520 vcpu = vpmu_vm.vcpu; 502 521 503 522 /* Save the initial sp to restore them later to run the guest again */ 504 - sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); 523 + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); 505 524 506 525 run_vcpu(vcpu, pmcr_n); 507 526 ··· 509 528 * Reset and re-initialize the vCPU, and run the guest code again to 510 529 * check if PMCR_EL0.N is preserved. 511 530 */ 512 - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); 531 + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); 513 532 init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); 514 533 aarch64_vcpu_setup(vcpu, &init); 515 534 vcpu_init_descriptor_tables(vcpu); 516 - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); 535 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); 517 536 vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); 518 537 519 538 run_vcpu(vcpu, pmcr_n); ··· 538 557 uint64_t set_reg_id, clr_reg_id, reg_val; 539 558 uint64_t valid_counters_mask, max_counters_mask; 540 559 541 - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); 560 + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); 542 561 vcpu = vpmu_vm.vcpu; 543 562 544 563 valid_counters_mask = get_counters_mask(pmcr_n); ··· 592 611 { 593 612 pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); 594 613 595 - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); 614 + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); 596 615 destroy_vpmu_vm(); 597 616 } 598 617 ··· 610 629 return get_pmcr_n(pmcr); 611 630 } 612 631 632 + static bool kvm_supports_nr_counters_attr(void) 633 + { 634 + bool supported; 635 + 636 + create_vpmu_vm(NULL); 637 + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, 638 + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); 639 + destroy_vpmu_vm(); 640 + 641 + return supported; 642 + } 643 + 613 644 int main(void) 614 645 { 615 646 uint64_t i, pmcr_n; 616 647 617 648 TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); 649 + TEST_REQUIRE(kvm_supports_vgic_v3()); 650 + TEST_REQUIRE(kvm_supports_nr_counters_attr()); 618 651 619 652 pmcr_n = get_pmcr_n_limit(); 620 653 for (i = 0; i <= pmcr_n; i++) {
-35
tools/testing/selftests/kvm/dirty_log_perf_test.c
··· 20 20 #include "guest_modes.h" 21 21 #include "ucall_common.h" 22 22 23 - #ifdef __aarch64__ 24 - #include "arm64/vgic.h" 25 - 26 - static int gic_fd; 27 - 28 - static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) 29 - { 30 - /* 31 - * The test can still run even if hardware does not support GICv3, as it 32 - * is only an optimization to reduce guest exits. 33 - */ 34 - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); 35 - } 36 - 37 - static void arch_cleanup_vm(struct kvm_vm *vm) 38 - { 39 - if (gic_fd > 0) 40 - close(gic_fd); 41 - } 42 - 43 - #else /* __aarch64__ */ 44 - 45 - static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) 46 - { 47 - } 48 - 49 - static void arch_cleanup_vm(struct kvm_vm *vm) 50 - { 51 - } 52 - 53 - #endif 54 - 55 23 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ 56 24 #define TEST_HOST_LOOP_N 2UL 57 25 ··· 133 165 if (dirty_log_manual_caps) 134 166 vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 135 167 dirty_log_manual_caps); 136 - 137 - arch_setup_vm(vm, nr_vcpus); 138 168 139 169 /* Start the iterations */ 140 170 iteration = 0; ··· 251 285 } 252 286 253 287 memstress_free_bitmaps(bitmaps, p->slots); 254 - arch_cleanup_vm(vm); 255 288 memstress_destroy_vm(vm); 256 289 } 257 290
+1
tools/testing/selftests/kvm/dirty_log_test.c
··· 585 585 586 586 log_mode_create_vm_done(vm); 587 587 *vcpu = vm_vcpu_add(vm, 0, guest_code); 588 + kvm_arch_vm_finalize_vcpus(vm); 588 589 return vm; 589 590 } 590 591
+6 -3
tools/testing/selftests/kvm/get-reg-list.c
··· 116 116 } 117 117 118 118 #ifdef __aarch64__ 119 - static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) 119 + static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, 120 + struct kvm_vcpu_init *init) 120 121 { 121 122 struct vcpu_reg_sublist *s; 123 + 124 + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); 122 125 123 126 for_each_sublist(c, s) 124 127 if (s->capability) ··· 130 127 131 128 static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) 132 129 { 133 - struct kvm_vcpu_init init = { .target = -1, }; 130 + struct kvm_vcpu_init init; 134 131 struct kvm_vcpu *vcpu; 135 132 136 - prepare_vcpu_init(c, &init); 133 + prepare_vcpu_init(vm, c, &init); 137 134 vcpu = __vm_vcpu_add(vm, 0); 138 135 aarch64_vcpu_setup(vcpu, &init); 139 136
+24
tools/testing/selftests/kvm/include/arm64/arch_timer.h
··· 155 155 timer_set_tval(timer, msec_to_cycles(msec)); 156 156 } 157 157 158 + static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) 159 + { 160 + u32 intid; 161 + u64 attr; 162 + 163 + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : 164 + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; 165 + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); 166 + 167 + return intid; 168 + } 169 + 170 + static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) 171 + { 172 + u32 intid; 173 + u64 attr; 174 + 175 + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : 176 + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; 177 + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); 178 + 179 + return intid; 180 + } 181 + 158 182 #endif /* SELFTEST_KVM_ARCH_TIMER_H */
+4 -1
tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
··· 2 2 #ifndef SELFTEST_KVM_UTIL_ARCH_H 3 3 #define SELFTEST_KVM_UTIL_ARCH_H 4 4 5 - struct kvm_vm_arch {}; 5 + struct kvm_vm_arch { 6 + bool has_gic; 7 + int gic_fd; 8 + }; 6 9 7 10 #endif // SELFTEST_KVM_UTIL_ARCH_H
+74
tools/testing/selftests/kvm/include/arm64/processor.h
··· 175 175 void vm_install_sync_handler(struct kvm_vm *vm, 176 176 int vector, int ec, handler_fn handler); 177 177 178 + uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); 178 179 uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); 179 180 180 181 static inline void cpu_relax(void) ··· 300 299 301 300 /* Execute a Wait For Interrupt instruction. */ 302 301 void wfi(void); 302 + 303 + void test_wants_mte(void); 304 + void test_disable_default_vgic(void); 305 + 306 + bool vm_supports_el2(struct kvm_vm *vm); 307 + static bool vcpu_has_el2(struct kvm_vcpu *vcpu) 308 + { 309 + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); 310 + } 311 + 312 + #define MAPPED_EL2_SYSREG(el2, el1) \ 313 + case SYS_##el1: \ 314 + if (vcpu_has_el2(vcpu)) \ 315 + alias = SYS_##el2; \ 316 + break 317 + 318 + 319 + static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) 320 + { 321 + u32 alias = encoding; 322 + 323 + BUILD_BUG_ON(!__builtin_constant_p(encoding)); 324 + 325 + switch (encoding) { 326 + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); 327 + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); 328 + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); 329 + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); 330 + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); 331 + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); 332 + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); 333 + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); 334 + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); 335 + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); 336 + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); 337 + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); 338 + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); 339 + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); 340 + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); 341 + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); 342 + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); 343 + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); 344 + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); 345 + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); 346 + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); 347 + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); 348 + case SYS_SP_EL1: 349 + if (!vcpu_has_el2(vcpu)) 350 + return ARM64_CORE_REG(sp_el1); 351 + 352 + alias = SYS_SP_EL2; 353 + break; 354 + default: 355 + BUILD_BUG(); 356 + } 357 + 358 + return KVM_ARM64_SYS_REG(alias); 359 + } 360 + 361 + void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); 362 + 363 + static inline unsigned int get_current_el(void) 364 + { 365 + return (read_sysreg(CurrentEL) >> 2) & 0x3; 366 + } 367 + 368 + #define do_smccc(...) \ 369 + do { \ 370 + if (get_current_el() == 2) \ 371 + smccc_smc(__VA_ARGS__); \ 372 + else \ 373 + smccc_hvc(__VA_ARGS__); \ 374 + } while (0) 303 375 304 376 #endif /* SELFTEST_KVM_PROCESSOR_H */
+3
tools/testing/selftests/kvm/include/arm64/vgic.h
··· 16 16 ((uint64_t)(flags) << 12) | \ 17 17 index) 18 18 19 + bool kvm_supports_vgic_v3(void); 20 + int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); 21 + void __vgic_v3_init(int fd); 19 22 int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); 20 23 21 24 #define VGIC_MAX_RESERVED 1023
+6 -1
tools/testing/selftests/kvm/include/kvm_util.h
··· 64 64 #ifdef __x86_64__ 65 65 struct kvm_cpuid2 *cpuid; 66 66 #endif 67 + #ifdef __aarch64__ 68 + struct kvm_vcpu_init init; 69 + #endif 67 70 struct kvm_binary_stats stats; 68 71 struct kvm_dirty_gfn *dirty_gfns; 69 72 uint32_t fetch_index; ··· 1260 1257 */ 1261 1258 void kvm_selftest_arch_init(void); 1262 1259 1263 - void kvm_arch_vm_post_create(struct kvm_vm *vm); 1260 + void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); 1261 + void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); 1262 + void kvm_arch_vm_release(struct kvm_vm *vm); 1264 1263 1265 1264 bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); 1266 1265
+88 -16
tools/testing/selftests/kvm/lib/arm64/processor.c
··· 12 12 #include "kvm_util.h" 13 13 #include "processor.h" 14 14 #include "ucall_common.h" 15 + #include "vgic.h" 15 16 16 17 #include <linux/bitfield.h> 17 18 #include <linux/sizes.h> ··· 186 185 _virt_pg_map(vm, vaddr, paddr, attr_idx); 187 186 } 188 187 189 - uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) 188 + uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) 190 189 { 191 190 uint64_t *ptep; 192 191 ··· 196 195 ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; 197 196 if (!ptep) 198 197 goto unmapped_gva; 198 + if (level == 0) 199 + return ptep; 199 200 200 201 switch (vm->pgtable_levels) { 201 202 case 4: 202 203 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; 203 204 if (!ptep) 204 205 goto unmapped_gva; 206 + if (level == 1) 207 + break; 205 208 /* fall through */ 206 209 case 3: 207 210 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; 208 211 if (!ptep) 209 212 goto unmapped_gva; 213 + if (level == 2) 214 + break; 210 215 /* fall through */ 211 216 case 2: 212 217 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; ··· 228 221 unmapped_gva: 229 222 TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); 230 223 exit(EXIT_FAILURE); 224 + } 225 + 226 + uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) 227 + { 228 + return virt_get_pte_hva_at_level(vm, gva, 3); 231 229 } 232 230 233 231 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) ··· 278 266 } 279 267 } 280 268 269 + bool vm_supports_el2(struct kvm_vm *vm) 270 + { 271 + const char *value = getenv("NV"); 272 + 273 + if (value && *value == '0') 274 + return false; 275 + 276 + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; 277 + } 278 + 279 + void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) 280 + { 281 + struct kvm_vcpu_init preferred = {}; 282 + 283 + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); 284 + if (vm_supports_el2(vm)) 285 + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); 286 + 287 + *init = preferred; 288 + } 289 + 281 290 void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) 282 291 { 283 292 struct kvm_vcpu_init default_init = { .target = -1, }; 284 293 struct kvm_vm *vm = vcpu->vm; 285 294 uint64_t sctlr_el1, tcr_el1, ttbr0_el1; 286 295 287 - if (!init) 296 + if (!init) { 297 + kvm_get_default_vcpu_target(vm, &default_init); 288 298 init = &default_init; 289 - 290 - if (init->target == -1) { 291 - struct kvm_vcpu_init preferred; 292 - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); 293 - init->target = preferred.target; 294 299 } 295 300 296 301 vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); 302 + vcpu->init = *init; 297 303 298 304 /* 299 305 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 300 306 * registers, which the variable argument list macros do. 301 307 */ 302 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); 308 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); 303 309 304 - sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); 305 - tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); 310 + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); 311 + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); 306 312 307 313 /* Configure base granule size */ 308 314 switch (vm->mode) { ··· 387 357 if (use_lpa2_pte_format(vm)) 388 358 tcr_el1 |= TCR_DS; 389 359 390 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); 391 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); 392 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); 393 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); 360 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); 361 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); 362 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); 363 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); 394 364 vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); 365 + 366 + if (!vcpu_has_el2(vcpu)) 367 + return; 368 + 369 + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), 370 + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); 395 371 } 396 372 397 373 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) ··· 431 395 432 396 aarch64_vcpu_setup(vcpu, init); 433 397 434 - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); 398 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); 435 399 return vcpu; 436 400 } 437 401 ··· 501 465 { 502 466 extern char vectors; 503 467 504 - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); 468 + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); 505 469 } 506 470 507 471 void route_exception(struct ex_regs *regs, int vector) ··· 688 652 void wfi(void) 689 653 { 690 654 asm volatile("wfi"); 655 + } 656 + 657 + static bool request_mte; 658 + static bool request_vgic = true; 659 + 660 + void test_wants_mte(void) 661 + { 662 + request_mte = true; 663 + } 664 + 665 + void test_disable_default_vgic(void) 666 + { 667 + request_vgic = false; 668 + } 669 + 670 + void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) 671 + { 672 + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) 673 + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); 674 + 675 + if (request_vgic && kvm_supports_vgic_v3()) { 676 + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); 677 + vm->arch.has_gic = true; 678 + } 679 + } 680 + 681 + void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) 682 + { 683 + if (vm->arch.has_gic) 684 + __vgic_v3_init(vm->arch.gic_fd); 685 + } 686 + 687 + void kvm_arch_vm_release(struct kvm_vm *vm) 688 + { 689 + if (vm->arch.has_gic) 690 + close(vm->arch.gic_fd); 691 691 }
+45 -21
tools/testing/selftests/kvm/lib/arm64/vgic.c
··· 15 15 #include "gic.h" 16 16 #include "gic_v3.h" 17 17 18 + bool kvm_supports_vgic_v3(void) 19 + { 20 + struct kvm_vm *vm = vm_create_barebones(); 21 + int r; 22 + 23 + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); 24 + kvm_vm_free(vm); 25 + 26 + return !r; 27 + } 28 + 18 29 /* 19 30 * vGIC-v3 default host setup 20 31 * ··· 41 30 * redistributor regions of the guest. Since it depends on the number of 42 31 * vCPUs for the VM, it must be called after all the vCPUs have been created. 43 32 */ 44 - int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) 33 + int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) 45 34 { 46 35 int gic_fd; 47 36 uint64_t attr; 48 - struct list_head *iter; 49 - unsigned int nr_gic_pages, nr_vcpus_created = 0; 50 - 51 - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); 52 - 53 - /* 54 - * Make sure that the caller is infact calling this 55 - * function after all the vCPUs are added. 56 - */ 57 - list_for_each(iter, &vm->vcpus) 58 - nr_vcpus_created++; 59 - TEST_ASSERT(nr_vcpus == nr_vcpus_created, 60 - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", 61 - nr_vcpus, nr_vcpus_created); 37 + unsigned int nr_gic_pages; 62 38 63 39 /* Distributor setup */ 64 40 gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); ··· 53 55 return gic_fd; 54 56 55 57 kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); 56 - 57 - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, 58 - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); 59 58 60 59 attr = GICD_BASE_GPA; 61 60 kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, ··· 68 73 KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); 69 74 virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); 70 75 71 - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, 72 - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); 73 - 74 76 return gic_fd; 77 + } 78 + 79 + void __vgic_v3_init(int fd) 80 + { 81 + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, 82 + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); 83 + } 84 + 85 + int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) 86 + { 87 + unsigned int nr_vcpus_created = 0; 88 + struct list_head *iter; 89 + int fd; 90 + 91 + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); 92 + 93 + /* 94 + * Make sure that the caller is infact calling this 95 + * function after all the vCPUs are added. 96 + */ 97 + list_for_each(iter, &vm->vcpus) 98 + nr_vcpus_created++; 99 + TEST_ASSERT(nr_vcpus == nr_vcpus_created, 100 + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", 101 + nr_vcpus, nr_vcpus_created); 102 + 103 + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); 104 + if (fd < 0) 105 + return fd; 106 + 107 + __vgic_v3_init(fd); 108 + return fd; 75 109 } 76 110 77 111 /* should only work for level sensitive interrupts */
+13 -2
tools/testing/selftests/kvm/lib/kvm_util.c
··· 517 517 guest_rng = new_guest_random_state(guest_random_seed); 518 518 sync_global_to_guest(vm, guest_rng); 519 519 520 - kvm_arch_vm_post_create(vm); 520 + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); 521 521 522 522 return vm; 523 523 } ··· 555 555 for (i = 0; i < nr_vcpus; ++i) 556 556 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 557 557 558 + kvm_arch_vm_finalize_vcpus(vm); 558 559 return vm; 559 560 } 560 561 ··· 806 805 807 806 /* Free cached stats metadata and close FD */ 808 807 kvm_stats_release(&vmp->stats); 808 + 809 + kvm_arch_vm_release(vmp); 809 810 } 810 811 811 812 static void __vm_mem_region_delete(struct kvm_vm *vm, ··· 2333 2330 TEST_FAIL("Unable to find stat '%s'", name); 2334 2331 } 2335 2332 2336 - __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2333 + __weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) 2334 + { 2335 + } 2336 + 2337 + __weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) 2338 + { 2339 + } 2340 + 2341 + __weak void kvm_arch_vm_release(struct kvm_vm *vm) 2337 2342 { 2338 2343 } 2339 2344
+1 -1
tools/testing/selftests/kvm/lib/x86/processor.c
··· 625 625 REPORT_GUEST_ASSERT(uc); 626 626 } 627 627 628 - void kvm_arch_vm_post_create(struct kvm_vm *vm) 628 + void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) 629 629 { 630 630 int r; 631 631
+1 -1
tools/testing/selftests/kvm/s390/cmma_test.c
··· 145 145 slot0 = memslot2region(vm, 0); 146 146 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 147 147 148 - kvm_arch_vm_post_create(vm); 148 + kvm_arch_vm_post_create(vm, 0); 149 149 } 150 150 151 151 static struct kvm_vm *create_vm_two_memslots(void)
+1 -1
tools/testing/selftests/kvm/steal_time.c
··· 118 118 { 119 119 struct arm_smccc_res res; 120 120 121 - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); 121 + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); 122 122 return res.a0; 123 123 } 124 124
+35 -8
virt/kvm/kvm_main.c
··· 1103 1103 { 1104 1104 } 1105 1105 1106 + /* Called only on cleanup and destruction paths when there are no users. */ 1107 + static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm, 1108 + enum kvm_bus idx) 1109 + { 1110 + return rcu_dereference_protected(kvm->buses[idx], 1111 + !refcount_read(&kvm->users_count)); 1112 + } 1113 + 1106 1114 static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) 1107 1115 { 1108 1116 struct kvm *kvm = kvm_arch_alloc_vm(); ··· 1236 1228 out_err_no_arch_destroy_vm: 1237 1229 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); 1238 1230 for (i = 0; i < KVM_NR_BUSES; i++) 1239 - kfree(kvm_get_bus(kvm, i)); 1231 + kfree(kvm_get_bus_for_destruction(kvm, i)); 1240 1232 kvm_free_irq_routing(kvm); 1241 1233 out_err_no_irq_routing: 1242 1234 cleanup_srcu_struct(&kvm->irq_srcu); ··· 1284 1276 1285 1277 kvm_free_irq_routing(kvm); 1286 1278 for (i = 0; i < KVM_NR_BUSES; i++) { 1287 - struct kvm_io_bus *bus = kvm_get_bus(kvm, i); 1279 + struct kvm_io_bus *bus = kvm_get_bus_for_destruction(kvm, i); 1288 1280 1289 1281 if (bus) 1290 1282 kvm_io_bus_destroy(bus); ··· 1320 1312 kvm_free_memslots(kvm, &kvm->__memslots[i][1]); 1321 1313 } 1322 1314 cleanup_srcu_struct(&kvm->irq_srcu); 1315 + srcu_barrier(&kvm->srcu); 1323 1316 cleanup_srcu_struct(&kvm->srcu); 1324 1317 #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES 1325 1318 xa_destroy(&kvm->mem_attr_array); ··· 5854 5845 return -EOPNOTSUPP; 5855 5846 } 5856 5847 5848 + static struct kvm_io_bus *kvm_get_bus_srcu(struct kvm *kvm, enum kvm_bus idx) 5849 + { 5850 + /* 5851 + * Ensure that any updates to kvm_buses[] observed by the previous vCPU 5852 + * machine instruction are also visible to the vCPU machine instruction 5853 + * that triggered this call. 5854 + */ 5855 + smp_mb__after_srcu_read_lock(); 5856 + 5857 + return srcu_dereference(kvm->buses[idx], &kvm->srcu); 5858 + } 5859 + 5857 5860 int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, 5858 5861 int len, const void *val) 5859 5862 { ··· 5878 5857 .len = len, 5879 5858 }; 5880 5859 5881 - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 5860 + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); 5882 5861 if (!bus) 5883 5862 return -ENOMEM; 5884 5863 r = __kvm_io_bus_write(vcpu, bus, &range, val); ··· 5897 5876 .len = len, 5898 5877 }; 5899 5878 5900 - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 5879 + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); 5901 5880 if (!bus) 5902 5881 return -ENOMEM; 5903 5882 ··· 5947 5926 .len = len, 5948 5927 }; 5949 5928 5950 - bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); 5929 + bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx); 5951 5930 if (!bus) 5952 5931 return -ENOMEM; 5953 5932 r = __kvm_io_bus_read(vcpu, bus, &range, val); 5954 5933 return r < 0 ? r : 0; 5955 5934 } 5956 5935 EXPORT_SYMBOL_GPL(kvm_io_bus_read); 5936 + 5937 + static void __free_bus(struct rcu_head *rcu) 5938 + { 5939 + struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu); 5940 + 5941 + kfree(bus); 5942 + } 5957 5943 5958 5944 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 5959 5945 int len, struct kvm_io_device *dev) ··· 6000 5972 memcpy(new_bus->range + i + 1, bus->range + i, 6001 5973 (bus->dev_count - i) * sizeof(struct kvm_io_range)); 6002 5974 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 6003 - synchronize_srcu_expedited(&kvm->srcu); 6004 - kfree(bus); 5975 + call_srcu(&kvm->srcu, &bus->rcu, __free_bus); 6005 5976 6006 5977 return 0; 6007 5978 } ··· 6062 6035 6063 6036 srcu_idx = srcu_read_lock(&kvm->srcu); 6064 6037 6065 - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 6038 + bus = kvm_get_bus_srcu(kvm, bus_idx); 6066 6039 if (!bus) 6067 6040 goto out_unlock; 6068 6041