Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

+15 -2

Documentation/virt/kvm/api.rst

··· 7918 7918 (0x40000001). Otherwise, a guest may use the paravirtual features 7919 7919 regardless of what has actually been exposed through the CPUID leaf. 7920 7920 7921 - 8.29 KVM_CAP_DIRTY_LOG_RING 7922 - --------------------------- 7921 + 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL 7922 + ---------------------------------------------------------- 7923 7923 7924 7924 :Architectures: x86 7925 7925 :Parameters: args[0] - size of the dirty log ring ··· 7977 7977 flags of a GFN have the DIRTY bit cleared, meaning that it has harvested 7978 7978 all the dirty GFNs that were available. 7979 7979 7980 + Note that on weakly ordered architectures, userspace accesses to the 7981 + ring buffer (and more specifically the 'flags' field) must be ordered, 7982 + using load-acquire/store-release accessors when available, or any 7983 + other memory barrier that will ensure this ordering. 7984 + 7980 7985 It's not necessary for userspace to harvest the all dirty GFNs at once. 7981 7986 However it must collect the dirty GFNs in sequence, i.e., the userspace 7982 7987 program cannot skip one dirty GFN to collect the one next to it. ··· 8009 8004 KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual 8010 8005 machine will switch to ring-buffer dirty page tracking and further 8011 8006 KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. 8007 + 8008 + NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that 8009 + should be exposed by weakly ordered architecture, in order to indicate 8010 + the additional memory ordering requirements imposed on userspace when 8011 + reading the state of an entry and mutating it from DIRTY to HARVESTED. 8012 + Architecture with TSO-like ordering (such as x86) are allowed to 8013 + expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL 8014 + to userspace. 8012 8015 8013 8016 8.30 KVM_CAP_XEN_HVM 8014 8017 --------------------

+2 -1

MAINTAINERS

··· 11204 11204 R: Suzuki K Poulose <suzuki.poulose@arm.com> 11205 11205 R: Oliver Upton <oliver.upton@linux.dev> 11206 11206 L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 11207 - L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers) 11207 + L: kvmarm@lists.linux.dev 11208 + L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers) 11208 11209 S: Maintained 11209 11210 T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git 11210 11211 F: arch/arm64/include/asm/kvm*

+4

arch/arm64/include/asm/kvm_host.h

··· 393 393 */ 394 394 struct { 395 395 u32 mdscr_el1; 396 + bool pstate_ss; 396 397 } guest_debug_preserved; 397 398 398 399 /* vcpu power state */ ··· 536 535 #define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) 537 536 /* vcpu system registers loaded on physical CPU */ 538 537 #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) 538 + /* Software step state is Active-pending */ 539 + #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) 540 + 539 541 540 542 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ 541 543 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \

+10 -5

arch/arm64/kvm/arm.c

··· 2269 2269 if (!arg) 2270 2270 return -EINVAL; 2271 2271 2272 + if (strcmp(arg, "none") == 0) { 2273 + kvm_mode = KVM_MODE_NONE; 2274 + return 0; 2275 + } 2276 + 2277 + if (!is_hyp_mode_available()) { 2278 + pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n"); 2279 + return 0; 2280 + } 2281 + 2272 2282 if (strcmp(arg, "protected") == 0) { 2273 2283 if (!is_kernel_in_hyp_mode()) 2274 2284 kvm_mode = KVM_MODE_PROTECTED; ··· 2290 2280 2291 2281 if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { 2292 2282 kvm_mode = KVM_MODE_DEFAULT; 2293 - return 0; 2294 - } 2295 - 2296 - if (strcmp(arg, "none") == 0) { 2297 - kvm_mode = KVM_MODE_NONE; 2298 2283 return 0; 2299 2284 } 2300 2285

+33 -1

arch/arm64/kvm/debug.c

··· 32 32 * 33 33 * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled 34 34 * after we have restored the preserved value to the main context. 35 + * 36 + * When single-step is enabled by userspace, we tweak PSTATE.SS on every 37 + * guest entry. Preserve PSTATE.SS so we can restore the original value 38 + * for the vcpu after the single-step is disabled. 35 39 */ 36 40 static void save_guest_debug_regs(struct kvm_vcpu *vcpu) 37 41 { ··· 45 41 46 42 trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", 47 43 vcpu->arch.guest_debug_preserved.mdscr_el1); 44 + 45 + vcpu->arch.guest_debug_preserved.pstate_ss = 46 + (*vcpu_cpsr(vcpu) & DBG_SPSR_SS); 48 47 } 49 48 50 49 static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) ··· 58 51 59 52 trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", 60 53 vcpu_read_sys_reg(vcpu, MDSCR_EL1)); 54 + 55 + if (vcpu->arch.guest_debug_preserved.pstate_ss) 56 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 57 + else 58 + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 61 59 } 62 60 63 61 /** ··· 200 188 * debugging the system. 201 189 */ 202 190 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 203 - *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 191 + /* 192 + * If the software step state at the last guest exit 193 + * was Active-pending, we don't set DBG_SPSR_SS so 194 + * that the state is maintained (to not run another 195 + * single-step until the pending Software Step 196 + * exception is taken). 197 + */ 198 + if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING)) 199 + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; 200 + else 201 + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; 202 + 204 203 mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); 205 204 mdscr |= DBG_MDSCR_SS; 206 205 vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); ··· 285 262 * Restore the guest's debug registers if we were using them. 286 263 */ 287 264 if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { 265 + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 266 + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)) 267 + /* 268 + * Mark the vcpu as ACTIVE_PENDING 269 + * until Software Step exception is taken. 270 + */ 271 + vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING); 272 + } 273 + 288 274 restore_guest_debug_regs(vcpu); 289 275 290 276 /*

+1

arch/arm64/kvm/guest.c

··· 937 937 } else { 938 938 /* If not enabled clear all flags */ 939 939 vcpu->guest_debug = 0; 940 + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); 940 941 } 941 942 942 943 out:

+7 -1

arch/arm64/kvm/handle_exit.c

··· 152 152 run->debug.arch.hsr_high = upper_32_bits(esr); 153 153 run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; 154 154 155 - if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) 155 + switch (ESR_ELx_EC(esr)) { 156 + case ESR_ELx_EC_WATCHPT_LOW: 156 157 run->debug.arch.far = vcpu->arch.fault.far_el2; 158 + break; 159 + case ESR_ELx_EC_SOFTSTP_LOW: 160 + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); 161 + break; 162 + } 157 163 158 164 return 0; 159 165 }

+1 -1

arch/arm64/kvm/hyp/nvhe/switch.c

··· 143 143 } 144 144 } 145 145 146 - /* Restore VGICv3 state on non_VEH systems */ 146 + /* Restore VGICv3 state on non-VHE systems */ 147 147 static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) 148 148 { 149 149 if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {

+73 -81

arch/arm64/kvm/sys_regs.c

··· 1063 1063 } 1064 1064 1065 1065 /* Read a sanitised cpufeature ID register by sys_reg_desc */ 1066 - static u64 read_id_reg(const struct kvm_vcpu *vcpu, 1067 - struct sys_reg_desc const *r, bool raz) 1066 + static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) 1068 1067 { 1069 1068 u32 id = reg_to_encoding(r); 1070 1069 u64 val; 1071 1070 1072 - if (raz) 1071 + if (sysreg_visible_as_raz(vcpu, r)) 1073 1072 return 0; 1074 1073 1075 1074 val = read_sanitised_ftr_reg(id); ··· 1144 1145 return 0; 1145 1146 } 1146 1147 1147 - /* cpufeature ID register access trap handlers */ 1148 - 1149 - static bool __access_id_reg(struct kvm_vcpu *vcpu, 1150 - struct sys_reg_params *p, 1151 - const struct sys_reg_desc *r, 1152 - bool raz) 1148 + static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, 1149 + const struct sys_reg_desc *r) 1153 1150 { 1154 - if (p->is_write) 1155 - return write_to_read_only(vcpu, p, r); 1151 + /* 1152 + * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any 1153 + * EL. Promote to RAZ/WI in order to guarantee consistency between 1154 + * systems. 1155 + */ 1156 + if (!kvm_supports_32bit_el0()) 1157 + return REG_RAZ | REG_USER_WI; 1156 1158 1157 - p->regval = read_id_reg(vcpu, r, raz); 1158 - return true; 1159 + return id_visibility(vcpu, r); 1159 1160 } 1161 + 1162 + static unsigned int raz_visibility(const struct kvm_vcpu *vcpu, 1163 + const struct sys_reg_desc *r) 1164 + { 1165 + return REG_RAZ; 1166 + } 1167 + 1168 + /* cpufeature ID register access trap handlers */ 1160 1169 1161 1170 static bool access_id_reg(struct kvm_vcpu *vcpu, 1162 1171 struct sys_reg_params *p, 1163 1172 const struct sys_reg_desc *r) 1164 1173 { 1165 - bool raz = sysreg_visible_as_raz(vcpu, r); 1174 + if (p->is_write) 1175 + return write_to_read_only(vcpu, p, r); 1166 1176 1167 - return __access_id_reg(vcpu, p, r, raz); 1168 - } 1169 - 1170 - static bool access_raz_id_reg(struct kvm_vcpu *vcpu, 1171 - struct sys_reg_params *p, 1172 - const struct sys_reg_desc *r) 1173 - { 1174 - return __access_id_reg(vcpu, p, r, true); 1177 + p->regval = read_id_reg(vcpu, r); 1178 + return true; 1175 1179 } 1176 1180 1177 1181 /* Visibility overrides for SVE-specific control registers */ ··· 1210 1208 return -EINVAL; 1211 1209 1212 1210 /* We can only differ with CSV[23], and anything else is an error */ 1213 - val ^= read_id_reg(vcpu, rd, false); 1214 - val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) | 1215 - (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT)); 1211 + val ^= read_id_reg(vcpu, rd); 1212 + val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | 1213 + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); 1216 1214 if (val) 1217 1215 return -EINVAL; 1218 1216 ··· 1229 1227 * are stored, and for set_id_reg() we don't allow the effective value 1230 1228 * to be changed. 1231 1229 */ 1232 - static int __get_id_reg(const struct kvm_vcpu *vcpu, 1233 - const struct sys_reg_desc *rd, u64 *val, 1234 - bool raz) 1235 - { 1236 - *val = read_id_reg(vcpu, rd, raz); 1237 - return 0; 1238 - } 1239 - 1240 - static int __set_id_reg(const struct kvm_vcpu *vcpu, 1241 - const struct sys_reg_desc *rd, u64 val, 1242 - bool raz) 1243 - { 1244 - /* This is what we mean by invariant: you can't change it. */ 1245 - if (val != read_id_reg(vcpu, rd, raz)) 1246 - return -EINVAL; 1247 - 1248 - return 0; 1249 - } 1250 - 1251 1230 static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 1252 1231 u64 *val) 1253 1232 { 1254 - bool raz = sysreg_visible_as_raz(vcpu, rd); 1255 - 1256 - return __get_id_reg(vcpu, rd, val, raz); 1233 + *val = read_id_reg(vcpu, rd); 1234 + return 0; 1257 1235 } 1258 1236 1259 1237 static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 1260 1238 u64 val) 1261 1239 { 1262 - bool raz = sysreg_visible_as_raz(vcpu, rd); 1240 + /* This is what we mean by invariant: you can't change it. */ 1241 + if (val != read_id_reg(vcpu, rd)) 1242 + return -EINVAL; 1263 1243 1264 - return __set_id_reg(vcpu, rd, val, raz); 1265 - } 1266 - 1267 - static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, 1268 - u64 val) 1269 - { 1270 - return __set_id_reg(vcpu, rd, val, true); 1244 + return 0; 1271 1245 } 1272 1246 1273 1247 static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, ··· 1345 1367 .visibility = id_visibility, \ 1346 1368 } 1347 1369 1370 + /* sys_reg_desc initialiser for known cpufeature ID registers */ 1371 + #define AA32_ID_SANITISED(name) { \ 1372 + SYS_DESC(SYS_##name), \ 1373 + .access = access_id_reg, \ 1374 + .get_user = get_id_reg, \ 1375 + .set_user = set_id_reg, \ 1376 + .visibility = aa32_id_visibility, \ 1377 + } 1378 + 1348 1379 /* 1349 1380 * sys_reg_desc initialiser for architecturally unallocated cpufeature ID 1350 1381 * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 ··· 1361 1374 */ 1362 1375 #define ID_UNALLOCATED(crm, op2) { \ 1363 1376 Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ 1364 - .access = access_raz_id_reg, \ 1365 - .get_user = get_raz_reg, \ 1366 - .set_user = set_raz_id_reg, \ 1377 + .access = access_id_reg, \ 1378 + .get_user = get_id_reg, \ 1379 + .set_user = set_id_reg, \ 1380 + .visibility = raz_visibility \ 1367 1381 } 1368 1382 1369 1383 /* ··· 1374 1386 */ 1375 1387 #define ID_HIDDEN(name) { \ 1376 1388 SYS_DESC(SYS_##name), \ 1377 - .access = access_raz_id_reg, \ 1378 - .get_user = get_raz_reg, \ 1379 - .set_user = set_raz_id_reg, \ 1389 + .access = access_id_reg, \ 1390 + .get_user = get_id_reg, \ 1391 + .set_user = set_id_reg, \ 1392 + .visibility = raz_visibility, \ 1380 1393 } 1381 1394 1382 1395 /* ··· 1441 1452 1442 1453 /* AArch64 mappings of the AArch32 ID registers */ 1443 1454 /* CRm=1 */ 1444 - ID_SANITISED(ID_PFR0_EL1), 1445 - ID_SANITISED(ID_PFR1_EL1), 1446 - ID_SANITISED(ID_DFR0_EL1), 1455 + AA32_ID_SANITISED(ID_PFR0_EL1), 1456 + AA32_ID_SANITISED(ID_PFR1_EL1), 1457 + AA32_ID_SANITISED(ID_DFR0_EL1), 1447 1458 ID_HIDDEN(ID_AFR0_EL1), 1448 - ID_SANITISED(ID_MMFR0_EL1), 1449 - ID_SANITISED(ID_MMFR1_EL1), 1450 - ID_SANITISED(ID_MMFR2_EL1), 1451 - ID_SANITISED(ID_MMFR3_EL1), 1459 + AA32_ID_SANITISED(ID_MMFR0_EL1), 1460 + AA32_ID_SANITISED(ID_MMFR1_EL1), 1461 + AA32_ID_SANITISED(ID_MMFR2_EL1), 1462 + AA32_ID_SANITISED(ID_MMFR3_EL1), 1452 1463 1453 1464 /* CRm=2 */ 1454 - ID_SANITISED(ID_ISAR0_EL1), 1455 - ID_SANITISED(ID_ISAR1_EL1), 1456 - ID_SANITISED(ID_ISAR2_EL1), 1457 - ID_SANITISED(ID_ISAR3_EL1), 1458 - ID_SANITISED(ID_ISAR4_EL1), 1459 - ID_SANITISED(ID_ISAR5_EL1), 1460 - ID_SANITISED(ID_MMFR4_EL1), 1461 - ID_SANITISED(ID_ISAR6_EL1), 1465 + AA32_ID_SANITISED(ID_ISAR0_EL1), 1466 + AA32_ID_SANITISED(ID_ISAR1_EL1), 1467 + AA32_ID_SANITISED(ID_ISAR2_EL1), 1468 + AA32_ID_SANITISED(ID_ISAR3_EL1), 1469 + AA32_ID_SANITISED(ID_ISAR4_EL1), 1470 + AA32_ID_SANITISED(ID_ISAR5_EL1), 1471 + AA32_ID_SANITISED(ID_MMFR4_EL1), 1472 + AA32_ID_SANITISED(ID_ISAR6_EL1), 1462 1473 1463 1474 /* CRm=3 */ 1464 - ID_SANITISED(MVFR0_EL1), 1465 - ID_SANITISED(MVFR1_EL1), 1466 - ID_SANITISED(MVFR2_EL1), 1475 + AA32_ID_SANITISED(MVFR0_EL1), 1476 + AA32_ID_SANITISED(MVFR1_EL1), 1477 + AA32_ID_SANITISED(MVFR2_EL1), 1467 1478 ID_UNALLOCATED(3,3), 1468 - ID_SANITISED(ID_PFR2_EL1), 1479 + AA32_ID_SANITISED(ID_PFR2_EL1), 1469 1480 ID_HIDDEN(ID_DFR1_EL1), 1470 - ID_SANITISED(ID_MMFR5_EL1), 1481 + AA32_ID_SANITISED(ID_MMFR5_EL1), 1471 1482 ID_UNALLOCATED(3,7), 1472 1483 1473 1484 /* AArch64 ID registers */ ··· 2797 2808 r = id_to_sys_reg_desc(vcpu, reg->id, table, num); 2798 2809 if (!r) 2799 2810 return -ENOENT; 2811 + 2812 + if (sysreg_user_write_ignore(vcpu, r)) 2813 + return 0; 2800 2814 2801 2815 if (r->set_user) { 2802 2816 ret = (r->set_user)(vcpu, r, val);

+17 -7

arch/arm64/kvm/sys_regs.h

··· 86 86 87 87 #define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ 88 88 #define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ 89 + #define REG_USER_WI (1 << 2) /* WI from userspace only */ 89 90 90 91 static __printf(2, 3) 91 92 inline void print_sys_reg_msg(const struct sys_reg_params *p, ··· 137 136 __vcpu_sys_reg(vcpu, r->reg) = r->val; 138 137 } 139 138 139 + static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu, 140 + const struct sys_reg_desc *r) 141 + { 142 + if (likely(!r->visibility)) 143 + return 0; 144 + 145 + return r->visibility(vcpu, r); 146 + } 147 + 140 148 static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, 141 149 const struct sys_reg_desc *r) 142 150 { 143 - if (likely(!r->visibility)) 144 - return false; 145 - 146 - return r->visibility(vcpu, r) & REG_HIDDEN; 151 + return sysreg_visibility(vcpu, r) & REG_HIDDEN; 147 152 } 148 153 149 154 static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, 150 155 const struct sys_reg_desc *r) 151 156 { 152 - if (likely(!r->visibility)) 153 - return false; 157 + return sysreg_visibility(vcpu, r) & REG_RAZ; 158 + } 154 159 155 - return r->visibility(vcpu, r) & REG_RAZ; 160 + static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu, 161 + const struct sys_reg_desc *r) 162 + { 163 + return sysreg_visibility(vcpu, r) & REG_USER_WI; 156 164 } 157 165 158 166 static inline int cmp_sys_reg(const struct sys_reg_desc *i1,

+1 -1

arch/arm64/kvm/vgic/vgic-its.c

··· 406 406 struct its_ite *ite; 407 407 408 408 for_each_lpi_its(device, ite, its) { 409 - if (!ite->collection || coll != ite->collection) 409 + if (ite->collection != coll) 410 410 continue; 411 411 412 412 update_affinity_ite(kvm, ite);

+4

arch/riscv/Kconfig

··· 104 104 select HAVE_PERF_EVENTS 105 105 select HAVE_PERF_REGS 106 106 select HAVE_PERF_USER_STACK_DUMP 107 + select HAVE_POSIX_CPU_TIMERS_TASK_WORK 107 108 select HAVE_REGS_AND_STACK_ACCESS_API 108 109 select HAVE_FUNCTION_ARG_ACCESS_API 109 110 select HAVE_STACKPROTECTOR ··· 228 227 select ARCH_HAS_SYNC_DMA_FOR_CPU 229 228 select ARCH_HAS_SETUP_DMA_OPS 230 229 select DMA_DIRECT_REMAP 230 + 231 + config AS_HAS_INSN 232 + def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) 231 233 232 234 source "arch/riscv/Kconfig.socs" 233 235 source "arch/riscv/Kconfig.erratas"

+8

arch/riscv/include/asm/gpr-num.h

··· 3 3 #define __ASM_GPR_NUM_H 4 4 5 5 #ifdef __ASSEMBLY__ 6 + 7 + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 8 + .equ .L__gpr_num_x\num, \num 9 + .endr 10 + 6 11 .equ .L__gpr_num_zero, 0 7 12 .equ .L__gpr_num_ra, 1 8 13 .equ .L__gpr_num_sp, 2 ··· 44 39 #else /* __ASSEMBLY__ */ 45 40 46 41 #define __DEFINE_ASM_GPR_NUMS \ 42 + " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ 43 + " .equ .L__gpr_num_x\\num, \\num\n" \ 44 + " .endr\n" \ 47 45 " .equ .L__gpr_num_zero, 0\n" \ 48 46 " .equ .L__gpr_num_ra, 1\n" \ 49 47 " .equ .L__gpr_num_sp, 2\n" \

+4

arch/riscv/include/asm/hwcap.h

··· 58 58 RISCV_ISA_EXT_ZICBOM, 59 59 RISCV_ISA_EXT_ZIHINTPAUSE, 60 60 RISCV_ISA_EXT_SSTC, 61 + RISCV_ISA_EXT_SVINVAL, 61 62 RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, 62 63 }; 63 64 ··· 70 69 enum riscv_isa_ext_key { 71 70 RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */ 72 71 RISCV_ISA_EXT_KEY_ZIHINTPAUSE, 72 + RISCV_ISA_EXT_KEY_SVINVAL, 73 73 RISCV_ISA_EXT_KEY_MAX, 74 74 }; 75 75 ··· 92 90 return RISCV_ISA_EXT_KEY_FPU; 93 91 case RISCV_ISA_EXT_ZIHINTPAUSE: 94 92 return RISCV_ISA_EXT_KEY_ZIHINTPAUSE; 93 + case RISCV_ISA_EXT_SVINVAL: 94 + return RISCV_ISA_EXT_KEY_SVINVAL; 95 95 default: 96 96 return -EINVAL; 97 97 }

+137

arch/riscv/include/asm/insn-def.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #ifndef __ASM_INSN_DEF_H 4 + #define __ASM_INSN_DEF_H 5 + 6 + #include <asm/asm.h> 7 + 8 + #define INSN_R_FUNC7_SHIFT 25 9 + #define INSN_R_RS2_SHIFT 20 10 + #define INSN_R_RS1_SHIFT 15 11 + #define INSN_R_FUNC3_SHIFT 12 12 + #define INSN_R_RD_SHIFT 7 13 + #define INSN_R_OPCODE_SHIFT 0 14 + 15 + #ifdef __ASSEMBLY__ 16 + 17 + #ifdef CONFIG_AS_HAS_INSN 18 + 19 + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 20 + .insn r \opcode, \func3, \func7, \rd, \rs1, \rs2 21 + .endm 22 + 23 + #else 24 + 25 + #include <asm/gpr-num.h> 26 + 27 + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 28 + .4byte ((\opcode << INSN_R_OPCODE_SHIFT) | \ 29 + (\func3 << INSN_R_FUNC3_SHIFT) | \ 30 + (\func7 << INSN_R_FUNC7_SHIFT) | \ 31 + (.L__gpr_num_\rd << INSN_R_RD_SHIFT) | \ 32 + (.L__gpr_num_\rs1 << INSN_R_RS1_SHIFT) | \ 33 + (.L__gpr_num_\rs2 << INSN_R_RS2_SHIFT)) 34 + .endm 35 + 36 + #endif 37 + 38 + #define __INSN_R(...) insn_r __VA_ARGS__ 39 + 40 + #else /* ! __ASSEMBLY__ */ 41 + 42 + #ifdef CONFIG_AS_HAS_INSN 43 + 44 + #define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ 45 + ".insn r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" 46 + 47 + #else 48 + 49 + #include <linux/stringify.h> 50 + #include <asm/gpr-num.h> 51 + 52 + #define DEFINE_INSN_R \ 53 + __DEFINE_ASM_GPR_NUMS \ 54 + " .macro insn_r, opcode, func3, func7, rd, rs1, rs2\n" \ 55 + " .4byte ((\\opcode << " __stringify(INSN_R_OPCODE_SHIFT) ") |" \ 56 + " (\\func3 << " __stringify(INSN_R_FUNC3_SHIFT) ") |" \ 57 + " (\\func7 << " __stringify(INSN_R_FUNC7_SHIFT) ") |" \ 58 + " (.L__gpr_num_\\rd << " __stringify(INSN_R_RD_SHIFT) ") |" \ 59 + " (.L__gpr_num_\\rs1 << " __stringify(INSN_R_RS1_SHIFT) ") |" \ 60 + " (.L__gpr_num_\\rs2 << " __stringify(INSN_R_RS2_SHIFT) "))\n" \ 61 + " .endm\n" 62 + 63 + #define UNDEFINE_INSN_R \ 64 + " .purgem insn_r\n" 65 + 66 + #define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ 67 + DEFINE_INSN_R \ 68 + "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ 69 + UNDEFINE_INSN_R 70 + 71 + #endif 72 + 73 + #endif /* ! __ASSEMBLY__ */ 74 + 75 + #define INSN_R(opcode, func3, func7, rd, rs1, rs2) \ 76 + __INSN_R(RV_##opcode, RV_##func3, RV_##func7, \ 77 + RV_##rd, RV_##rs1, RV_##rs2) 78 + 79 + #define RV_OPCODE(v) __ASM_STR(v) 80 + #define RV_FUNC3(v) __ASM_STR(v) 81 + #define RV_FUNC7(v) __ASM_STR(v) 82 + #define RV_RD(v) __ASM_STR(v) 83 + #define RV_RS1(v) __ASM_STR(v) 84 + #define RV_RS2(v) __ASM_STR(v) 85 + #define __RV_REG(v) __ASM_STR(x ## v) 86 + #define RV___RD(v) __RV_REG(v) 87 + #define RV___RS1(v) __RV_REG(v) 88 + #define RV___RS2(v) __RV_REG(v) 89 + 90 + #define RV_OPCODE_SYSTEM RV_OPCODE(115) 91 + 92 + #define HFENCE_VVMA(vaddr, asid) \ 93 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ 94 + __RD(0), RS1(vaddr), RS2(asid)) 95 + 96 + #define HFENCE_GVMA(gaddr, vmid) \ 97 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(49), \ 98 + __RD(0), RS1(gaddr), RS2(vmid)) 99 + 100 + #define HLVX_HU(dest, addr) \ 101 + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(50), \ 102 + RD(dest), RS1(addr), __RS2(3)) 103 + 104 + #define HLV_W(dest, addr) \ 105 + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(52), \ 106 + RD(dest), RS1(addr), __RS2(0)) 107 + 108 + #ifdef CONFIG_64BIT 109 + #define HLV_D(dest, addr) \ 110 + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(54), \ 111 + RD(dest), RS1(addr), __RS2(0)) 112 + #else 113 + #define HLV_D(dest, addr) \ 114 + __ASM_STR(.error "hlv.d requires 64-bit support") 115 + #endif 116 + 117 + #define SINVAL_VMA(vaddr, asid) \ 118 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11), \ 119 + __RD(0), RS1(vaddr), RS2(asid)) 120 + 121 + #define SFENCE_W_INVAL() \ 122 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ 123 + __RD(0), __RS1(0), __RS2(0)) 124 + 125 + #define SFENCE_INVAL_IR() \ 126 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ 127 + __RD(0), __RS1(0), __RS2(1)) 128 + 129 + #define HINVAL_VVMA(vaddr, asid) \ 130 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(19), \ 131 + __RD(0), RS1(vaddr), RS2(asid)) 132 + 133 + #define HINVAL_GVMA(gaddr, vmid) \ 134 + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ 135 + __RD(0), RS1(gaddr), RS2(vmid)) 136 + 137 + #endif /* __ASM_INSN_DEF_H */

+1

arch/riscv/include/asm/kvm_host.h

··· 67 67 u64 mmio_exit_kernel; 68 68 u64 csr_exit_user; 69 69 u64 csr_exit_kernel; 70 + u64 signal_exits; 70 71 u64 exits; 71 72 }; 72 73

+2 -2

arch/riscv/include/asm/kvm_vcpu_sbi.h

··· 11 11 12 12 #define KVM_SBI_IMPID 3 13 13 14 - #define KVM_SBI_VERSION_MAJOR 0 15 - #define KVM_SBI_VERSION_MINOR 3 14 + #define KVM_SBI_VERSION_MAJOR 1 15 + #define KVM_SBI_VERSION_MINOR 0 16 16 17 17 struct kvm_vcpu_sbi_extension { 18 18 unsigned long extid_start;

+4

arch/riscv/include/uapi/asm/kvm.h

··· 48 48 /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ 49 49 struct kvm_riscv_config { 50 50 unsigned long isa; 51 + unsigned long zicbom_block_size; 51 52 }; 52 53 53 54 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ ··· 99 98 KVM_RISCV_ISA_EXT_M, 100 99 KVM_RISCV_ISA_EXT_SVPBMT, 101 100 KVM_RISCV_ISA_EXT_SSTC, 101 + KVM_RISCV_ISA_EXT_SVINVAL, 102 + KVM_RISCV_ISA_EXT_ZIHINTPAUSE, 103 + KVM_RISCV_ISA_EXT_ZICBOM, 102 104 KVM_RISCV_ISA_EXT_MAX, 103 105 }; 104 106

+1

arch/riscv/kernel/cpu.c

··· 93 93 static struct riscv_isa_ext_data isa_ext_arr[] = { 94 94 __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), 95 95 __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), 96 + __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), 96 97 __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), 97 98 __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), 98 99 __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),

+1

arch/riscv/kernel/cpufeature.c

··· 204 204 SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); 205 205 SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); 206 206 SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); 207 + SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); 207 208 } 208 209 #undef SET_ISA_EXT_MAP 209 210 }

+1

arch/riscv/kvm/Kconfig

··· 24 24 select PREEMPT_NOTIFIERS 25 25 select KVM_MMIO 26 26 select KVM_GENERIC_DIRTYLOG_READ_PROTECT 27 + select KVM_XFER_TO_GUEST_WORK 27 28 select HAVE_KVM_VCPU_ASYNC_IOCTL 28 29 select HAVE_KVM_EVENTFD 29 30 select SRCU

+1 -1

arch/riscv/kvm/main.c

··· 122 122 { 123 123 } 124 124 125 - static int riscv_kvm_init(void) 125 + static int __init riscv_kvm_init(void) 126 126 { 127 127 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 128 128 }

+48 -107

arch/riscv/kvm/tlb.c

··· 12 12 #include <linux/kvm_host.h> 13 13 #include <asm/cacheflush.h> 14 14 #include <asm/csr.h> 15 + #include <asm/hwcap.h> 16 + #include <asm/insn-def.h> 15 17 16 - /* 17 - * Instruction encoding of hfence.gvma is: 18 - * HFENCE.GVMA rs1, rs2 19 - * HFENCE.GVMA zero, rs2 20 - * HFENCE.GVMA rs1 21 - * HFENCE.GVMA 22 - * 23 - * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 24 - * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 25 - * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 26 - * rs1==zero and rs2==zero ==> HFENCE.GVMA 27 - * 28 - * Instruction encoding of HFENCE.GVMA is: 29 - * 0110001 rs2(5) rs1(5) 000 00000 1110011 30 - */ 18 + #define has_svinval() \ 19 + static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL]) 31 20 32 21 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, 33 22 gpa_t gpa, gpa_t gpsz, ··· 29 40 return; 30 41 } 31 42 32 - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { 33 - /* 34 - * rs1 = a0 (GPA >> 2) 35 - * rs2 = a1 (VMID) 36 - * HFENCE.GVMA a0, a1 37 - * 0110001 01011 01010 000 00000 1110011 38 - */ 39 - asm volatile ("srli a0, %0, 2\n" 40 - "add a1, %1, zero\n" 41 - ".word 0x62b50073\n" 42 - :: "r" (pos), "r" (vmid) 43 - : "a0", "a1", "memory"); 43 + if (has_svinval()) { 44 + asm volatile (SFENCE_W_INVAL() ::: "memory"); 45 + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) 46 + asm volatile (HINVAL_GVMA(%0, %1) 47 + : : "r" (pos >> 2), "r" (vmid) : "memory"); 48 + asm volatile (SFENCE_INVAL_IR() ::: "memory"); 49 + } else { 50 + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) 51 + asm volatile (HFENCE_GVMA(%0, %1) 52 + : : "r" (pos >> 2), "r" (vmid) : "memory"); 44 53 } 45 54 } 46 55 47 56 void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid) 48 57 { 49 - /* 50 - * rs1 = zero 51 - * rs2 = a0 (VMID) 52 - * HFENCE.GVMA zero, a0 53 - * 0110001 01010 00000 000 00000 1110011 54 - */ 55 - asm volatile ("add a0, %0, zero\n" 56 - ".word 0x62a00073\n" 57 - :: "r" (vmid) : "a0", "memory"); 58 + asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory"); 58 59 } 59 60 60 61 void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, ··· 57 78 return; 58 79 } 59 80 60 - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { 61 - /* 62 - * rs1 = a0 (GPA >> 2) 63 - * rs2 = zero 64 - * HFENCE.GVMA a0 65 - * 0110001 00000 01010 000 00000 1110011 66 - */ 67 - asm volatile ("srli a0, %0, 2\n" 68 - ".word 0x62050073\n" 69 - :: "r" (pos) : "a0", "memory"); 81 + if (has_svinval()) { 82 + asm volatile (SFENCE_W_INVAL() ::: "memory"); 83 + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) 84 + asm volatile(HINVAL_GVMA(%0, zero) 85 + : : "r" (pos >> 2) : "memory"); 86 + asm volatile (SFENCE_INVAL_IR() ::: "memory"); 87 + } else { 88 + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) 89 + asm volatile(HFENCE_GVMA(%0, zero) 90 + : : "r" (pos >> 2) : "memory"); 70 91 } 71 92 } 72 93 73 94 void kvm_riscv_local_hfence_gvma_all(void) 74 95 { 75 - /* 76 - * rs1 = zero 77 - * rs2 = zero 78 - * HFENCE.GVMA 79 - * 0110001 00000 00000 000 00000 1110011 80 - */ 81 - asm volatile (".word 0x62000073" ::: "memory"); 96 + asm volatile(HFENCE_GVMA(zero, zero) : : : "memory"); 82 97 } 83 - 84 - /* 85 - * Instruction encoding of hfence.gvma is: 86 - * HFENCE.VVMA rs1, rs2 87 - * HFENCE.VVMA zero, rs2 88 - * HFENCE.VVMA rs1 89 - * HFENCE.VVMA 90 - * 91 - * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2 92 - * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2 93 - * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1 94 - * rs1==zero and rs2==zero ==> HFENCE.VVMA 95 - * 96 - * Instruction encoding of HFENCE.VVMA is: 97 - * 0010001 rs2(5) rs1(5) 000 00000 1110011 98 - */ 99 98 100 99 void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, 101 100 unsigned long asid, ··· 90 133 91 134 hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); 92 135 93 - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { 94 - /* 95 - * rs1 = a0 (GVA) 96 - * rs2 = a1 (ASID) 97 - * HFENCE.VVMA a0, a1 98 - * 0010001 01011 01010 000 00000 1110011 99 - */ 100 - asm volatile ("add a0, %0, zero\n" 101 - "add a1, %1, zero\n" 102 - ".word 0x22b50073\n" 103 - :: "r" (pos), "r" (asid) 104 - : "a0", "a1", "memory"); 136 + if (has_svinval()) { 137 + asm volatile (SFENCE_W_INVAL() ::: "memory"); 138 + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) 139 + asm volatile(HINVAL_VVMA(%0, %1) 140 + : : "r" (pos), "r" (asid) : "memory"); 141 + asm volatile (SFENCE_INVAL_IR() ::: "memory"); 142 + } else { 143 + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) 144 + asm volatile(HFENCE_VVMA(%0, %1) 145 + : : "r" (pos), "r" (asid) : "memory"); 105 146 } 106 147 107 148 csr_write(CSR_HGATP, hgatp); ··· 112 157 113 158 hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); 114 159 115 - /* 116 - * rs1 = zero 117 - * rs2 = a0 (ASID) 118 - * HFENCE.VVMA zero, a0 119 - * 0010001 01010 00000 000 00000 1110011 120 - */ 121 - asm volatile ("add a0, %0, zero\n" 122 - ".word 0x22a00073\n" 123 - :: "r" (asid) : "a0", "memory"); 160 + asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory"); 124 161 125 162 csr_write(CSR_HGATP, hgatp); 126 163 } ··· 130 183 131 184 hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); 132 185 133 - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { 134 - /* 135 - * rs1 = a0 (GVA) 136 - * rs2 = zero 137 - * HFENCE.VVMA a0 138 - * 0010001 00000 01010 000 00000 1110011 139 - */ 140 - asm volatile ("add a0, %0, zero\n" 141 - ".word 0x22050073\n" 142 - :: "r" (pos) : "a0", "memory"); 186 + if (has_svinval()) { 187 + asm volatile (SFENCE_W_INVAL() ::: "memory"); 188 + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) 189 + asm volatile(HINVAL_VVMA(%0, zero) 190 + : : "r" (pos) : "memory"); 191 + asm volatile (SFENCE_INVAL_IR() ::: "memory"); 192 + } else { 193 + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) 194 + asm volatile(HFENCE_VVMA(%0, zero) 195 + : : "r" (pos) : "memory"); 143 196 } 144 197 145 198 csr_write(CSR_HGATP, hgatp); ··· 151 204 152 205 hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); 153 206 154 - /* 155 - * rs1 = zero 156 - * rs2 = zero 157 - * HFENCE.VVMA 158 - * 0010001 00000 00000 000 00000 1110011 159 - */ 160 - asm volatile (".word 0x22000073" ::: "memory"); 207 + asm volatile(HFENCE_VVMA(zero, zero) : : : "memory"); 161 208 162 209 csr_write(CSR_HGATP, hgatp); 163 210 }

+38 -22

arch/riscv/kvm/vcpu.c

··· 7 7 */ 8 8 9 9 #include <linux/bitops.h> 10 + #include <linux/entry-kvm.h> 10 11 #include <linux/errno.h> 11 12 #include <linux/err.h> 12 13 #include <linux/kdebug.h> ··· 19 18 #include <linux/fs.h> 20 19 #include <linux/kvm_host.h> 21 20 #include <asm/csr.h> 21 + #include <asm/cacheflush.h> 22 22 #include <asm/hwcap.h> 23 23 24 24 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { ··· 30 28 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), 31 29 STATS_DESC_COUNTER(VCPU, csr_exit_user), 32 30 STATS_DESC_COUNTER(VCPU, csr_exit_kernel), 31 + STATS_DESC_COUNTER(VCPU, signal_exits), 33 32 STATS_DESC_COUNTER(VCPU, exits) 34 33 }; 35 34 ··· 45 42 46 43 #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) 47 44 45 + #define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext 46 + 48 47 /* Mapping between KVM ISA Extension ID & Host ISA extension ID */ 49 48 static const unsigned long kvm_isa_ext_arr[] = { 50 - RISCV_ISA_EXT_a, 51 - RISCV_ISA_EXT_c, 52 - RISCV_ISA_EXT_d, 53 - RISCV_ISA_EXT_f, 54 - RISCV_ISA_EXT_h, 55 - RISCV_ISA_EXT_i, 56 - RISCV_ISA_EXT_m, 57 - RISCV_ISA_EXT_SVPBMT, 58 - RISCV_ISA_EXT_SSTC, 49 + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, 50 + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, 51 + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, 52 + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, 53 + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, 54 + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, 55 + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, 56 + 57 + KVM_ISA_EXT_ARR(SSTC), 58 + KVM_ISA_EXT_ARR(SVINVAL), 59 + KVM_ISA_EXT_ARR(SVPBMT), 60 + KVM_ISA_EXT_ARR(ZIHINTPAUSE), 61 + KVM_ISA_EXT_ARR(ZICBOM), 59 62 }; 60 63 61 64 static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) ··· 96 87 case KVM_RISCV_ISA_EXT_I: 97 88 case KVM_RISCV_ISA_EXT_M: 98 89 case KVM_RISCV_ISA_EXT_SSTC: 90 + case KVM_RISCV_ISA_EXT_SVINVAL: 91 + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: 99 92 return false; 100 93 default: 101 94 break; ··· 265 254 case KVM_REG_RISCV_CONFIG_REG(isa): 266 255 reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; 267 256 break; 257 + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): 258 + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) 259 + return -EINVAL; 260 + reg_val = riscv_cbom_block_size; 261 + break; 268 262 default: 269 263 return -EINVAL; 270 264 } ··· 327 311 return -EOPNOTSUPP; 328 312 } 329 313 break; 314 + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): 315 + return -EOPNOTSUPP; 330 316 default: 331 317 return -EINVAL; 332 318 } ··· 802 784 { 803 785 u64 henvcfg = 0; 804 786 805 - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT)) 787 + if (riscv_isa_extension_available(isa, SVPBMT)) 806 788 henvcfg |= ENVCFG_PBMTE; 807 789 808 - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC)) 790 + if (riscv_isa_extension_available(isa, SSTC)) 809 791 henvcfg |= ENVCFG_STCE; 792 + 793 + if (riscv_isa_extension_available(isa, ZICBOM)) 794 + henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); 795 + 810 796 csr_write(CSR_HENVCFG, henvcfg); 811 797 #ifdef CONFIG_32BIT 812 798 csr_write(CSR_HENVCFGH, henvcfg >> 32); ··· 980 958 run->exit_reason = KVM_EXIT_UNKNOWN; 981 959 while (ret > 0) { 982 960 /* Check conditions before entering the guest */ 983 - cond_resched(); 961 + ret = xfer_to_guest_mode_handle_work(vcpu); 962 + if (!ret) 963 + ret = 1; 984 964 985 965 kvm_riscv_gstage_vmid_update(vcpu); 986 966 987 967 kvm_riscv_check_vcpu_requests(vcpu); 988 968 989 969 local_irq_disable(); 990 - 991 - /* 992 - * Exit if we have a signal pending so that we can deliver 993 - * the signal to user space. 994 - */ 995 - if (signal_pending(current)) { 996 - ret = -EINTR; 997 - run->exit_reason = KVM_EXIT_INTR; 998 - } 999 970 1000 971 /* 1001 972 * Ensure we set mode to IN_GUEST_MODE after we disable ··· 1012 997 1013 998 if (ret <= 0 || 1014 999 kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || 1015 - kvm_request_pending(vcpu)) { 1000 + kvm_request_pending(vcpu) || 1001 + xfer_to_guest_mode_work_pending()) { 1016 1002 vcpu->mode = OUTSIDE_GUEST_MODE; 1017 1003 local_irq_enable(); 1018 1004 kvm_vcpu_srcu_read_lock(vcpu);

+8 -31

arch/riscv/kvm/vcpu_exit.c

··· 8 8 9 9 #include <linux/kvm_host.h> 10 10 #include <asm/csr.h> 11 + #include <asm/insn-def.h> 11 12 12 13 static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, 13 14 struct kvm_cpu_trap *trap) ··· 63 62 { 64 63 register unsigned long taddr asm("a0") = (unsigned long)trap; 65 64 register unsigned long ttmp asm("a1"); 66 - register unsigned long val asm("t0"); 67 - register unsigned long tmp asm("t1"); 68 - register unsigned long addr asm("t2") = guest_addr; 69 - unsigned long flags; 70 - unsigned long old_stvec, old_hstatus; 65 + unsigned long flags, val, tmp, old_stvec, old_hstatus; 71 66 72 67 local_irq_save(flags); 73 68 ··· 79 82 ".option push\n" 80 83 ".option norvc\n" 81 84 "add %[ttmp], %[taddr], 0\n" 82 - /* 83 - * HLVX.HU %[val], (%[addr]) 84 - * HLVX.HU t0, (t2) 85 - * 0110010 00011 00111 100 00101 1110011 86 - */ 87 - ".word 0x6433c2f3\n" 85 + HLVX_HU(%[val], %[addr]) 88 86 "andi %[tmp], %[val], 3\n" 89 87 "addi %[tmp], %[tmp], -3\n" 90 88 "bne %[tmp], zero, 2f\n" 91 89 "addi %[addr], %[addr], 2\n" 92 - /* 93 - * HLVX.HU %[tmp], (%[addr]) 94 - * HLVX.HU t1, (t2) 95 - * 0110010 00011 00111 100 00110 1110011 96 - */ 97 - ".word 0x6433c373\n" 90 + HLVX_HU(%[tmp], %[addr]) 98 91 "sll %[tmp], %[tmp], 16\n" 99 92 "add %[val], %[val], %[tmp]\n" 100 93 "2:\n" 101 94 ".option pop" 102 95 : [val] "=&r" (val), [tmp] "=&r" (tmp), 103 96 [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp), 104 - [addr] "+&r" (addr) : : "memory"); 97 + [addr] "+&r" (guest_addr) : : "memory"); 105 98 106 99 if (trap->scause == EXC_LOAD_PAGE_FAULT) 107 100 trap->scause = EXC_INST_PAGE_FAULT; ··· 108 121 ".option norvc\n" 109 122 "add %[ttmp], %[taddr], 0\n" 110 123 #ifdef CONFIG_64BIT 111 - /* 112 - * HLV.D %[val], (%[addr]) 113 - * HLV.D t0, (t2) 114 - * 0110110 00000 00111 100 00101 1110011 115 - */ 116 - ".word 0x6c03c2f3\n" 124 + HLV_D(%[val], %[addr]) 117 125 #else 118 - /* 119 - * HLV.W %[val], (%[addr]) 120 - * HLV.W t0, (t2) 121 - * 0110100 00000 00111 100 00101 1110011 122 - */ 123 - ".word 0x6803c2f3\n" 126 + HLV_W(%[val], %[addr]) 124 127 #endif 125 128 ".option pop" 126 129 : [val] "=&r" (val), 127 130 [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp) 128 - : [addr] "r" (addr) : "memory"); 131 + : [addr] "r" (guest_addr) : "memory"); 129 132 } 130 133 131 134 csr_write(CSR_STVEC, old_stvec);

+2

arch/riscv/mm/dma-noncoherent.c

··· 13 13 #include <asm/cacheflush.h> 14 14 15 15 unsigned int riscv_cbom_block_size; 16 + EXPORT_SYMBOL_GPL(riscv_cbom_block_size); 17 + 16 18 static bool noncoherent_supported; 17 19 18 20 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,

+6 -6

arch/x86/include/asm/kvm_host.h

··· 1280 1280 bool tdp_mmu_enabled; 1281 1281 1282 1282 /* 1283 - * List of struct kvm_mmu_pages being used as roots. 1284 - * All struct kvm_mmu_pages in the list should have 1283 + * List of kvm_mmu_page structs being used as roots. 1284 + * All kvm_mmu_page structs in the list should have 1285 1285 * tdp_mmu_page set. 1286 1286 * 1287 1287 * For reads, this list is protected by: ··· 1300 1300 struct list_head tdp_mmu_roots; 1301 1301 1302 1302 /* 1303 - * List of struct kvmp_mmu_pages not being used as roots. 1304 - * All struct kvm_mmu_pages in the list should have 1303 + * List of kvm_mmu_page structs not being used as roots. 1304 + * All kvm_mmu_page structs in the list should have 1305 1305 * tdp_mmu_page set and a tdp_mmu_root_count of 0. 1306 1306 */ 1307 1307 struct list_head tdp_mmu_pages; ··· 1311 1311 * is held in read mode: 1312 1312 * - tdp_mmu_roots (above) 1313 1313 * - tdp_mmu_pages (above) 1314 - * - the link field of struct kvm_mmu_pages used by the TDP MMU 1314 + * - the link field of kvm_mmu_page structs used by the TDP MMU 1315 1315 * - lpage_disallowed_mmu_pages 1316 - * - the lpage_disallowed_link field of struct kvm_mmu_pages used 1316 + * - the lpage_disallowed_link field of kvm_mmu_page structs used 1317 1317 * by the TDP MMU 1318 1318 * It is acceptable, but not necessary, to acquire this lock when 1319 1319 * the thread holds the MMU lock in write mode.

+1 -1

arch/x86/include/asm/vmx.h

··· 309 309 GUEST_LDTR_AR_BYTES = 0x00004820, 310 310 GUEST_TR_AR_BYTES = 0x00004822, 311 311 GUEST_INTERRUPTIBILITY_INFO = 0x00004824, 312 - GUEST_ACTIVITY_STATE = 0X00004826, 312 + GUEST_ACTIVITY_STATE = 0x00004826, 313 313 GUEST_SYSENTER_CS = 0x0000482A, 314 314 VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, 315 315 HOST_IA32_SYSENTER_CS = 0x00004c00,

+2 -1

arch/x86/kvm/Kconfig

··· 28 28 select HAVE_KVM_IRQCHIP 29 29 select HAVE_KVM_PFNCACHE 30 30 select HAVE_KVM_IRQFD 31 - select HAVE_KVM_DIRTY_RING 31 + select HAVE_KVM_DIRTY_RING_TSO 32 + select HAVE_KVM_DIRTY_RING_ACQ_REL 32 33 select IRQ_BYPASS_MANAGER 33 34 select HAVE_KVM_IRQ_BYPASS 34 35 select HAVE_KVM_IRQ_ROUTING

+15 -5

arch/x86/kvm/pmu.c

··· 106 106 return; 107 107 108 108 if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { 109 - /* Indicate PEBS overflow PMI to guest. */ 110 - skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, 111 - (unsigned long *)&pmu->global_status); 109 + if (!in_pmi) { 110 + /* 111 + * TODO: KVM is currently _choosing_ to not generate records 112 + * for emulated instructions, avoiding BUFFER_OVF PMI when 113 + * there are no records. Strictly speaking, it should be done 114 + * as well in the right context to improve sampling accuracy. 115 + */ 116 + skip_pmi = true; 117 + } else { 118 + /* Indicate PEBS overflow PMI to guest. */ 119 + skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, 120 + (unsigned long *)&pmu->global_status); 121 + } 112 122 } else { 113 123 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); 114 124 } ··· 237 227 get_sample_period(pmc, pmc->counter))) 238 228 return false; 239 229 240 - if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) && 241 - pmc->perf_event->attr.precise_ip) 230 + if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) != 231 + (!!pmc->perf_event->attr.precise_ip)) 242 232 return false; 243 233 244 234 /* reuse perf_event to serve as pmc_reprogram_counter() does*/

+19 -98

arch/x86/kvm/svm/pmu.c

··· 23 23 PMU_TYPE_EVNTSEL, 24 24 }; 25 25 26 - enum index { 27 - INDEX_ZERO = 0, 28 - INDEX_ONE, 29 - INDEX_TWO, 30 - INDEX_THREE, 31 - INDEX_FOUR, 32 - INDEX_FIVE, 33 - INDEX_ERROR, 34 - }; 35 - 36 - static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) 26 + static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) 37 27 { 38 - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); 28 + unsigned int num_counters = pmu->nr_arch_gp_counters; 39 29 40 - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { 41 - if (type == PMU_TYPE_COUNTER) 42 - return MSR_F15H_PERF_CTR; 43 - else 44 - return MSR_F15H_PERF_CTL; 45 - } else { 46 - if (type == PMU_TYPE_COUNTER) 47 - return MSR_K7_PERFCTR0; 48 - else 49 - return MSR_K7_EVNTSEL0; 50 - } 51 - } 30 + if (pmc_idx >= num_counters) 31 + return NULL; 52 32 53 - static enum index msr_to_index(u32 msr) 54 - { 55 - switch (msr) { 56 - case MSR_F15H_PERF_CTL0: 57 - case MSR_F15H_PERF_CTR0: 58 - case MSR_K7_EVNTSEL0: 59 - case MSR_K7_PERFCTR0: 60 - return INDEX_ZERO; 61 - case MSR_F15H_PERF_CTL1: 62 - case MSR_F15H_PERF_CTR1: 63 - case MSR_K7_EVNTSEL1: 64 - case MSR_K7_PERFCTR1: 65 - return INDEX_ONE; 66 - case MSR_F15H_PERF_CTL2: 67 - case MSR_F15H_PERF_CTR2: 68 - case MSR_K7_EVNTSEL2: 69 - case MSR_K7_PERFCTR2: 70 - return INDEX_TWO; 71 - case MSR_F15H_PERF_CTL3: 72 - case MSR_F15H_PERF_CTR3: 73 - case MSR_K7_EVNTSEL3: 74 - case MSR_K7_PERFCTR3: 75 - return INDEX_THREE; 76 - case MSR_F15H_PERF_CTL4: 77 - case MSR_F15H_PERF_CTR4: 78 - return INDEX_FOUR; 79 - case MSR_F15H_PERF_CTL5: 80 - case MSR_F15H_PERF_CTR5: 81 - return INDEX_FIVE; 82 - default: 83 - return INDEX_ERROR; 84 - } 33 + return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)]; 85 34 } 86 35 87 36 static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, 88 37 enum pmu_type type) 89 38 { 90 39 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); 40 + unsigned int idx; 91 41 92 42 if (!vcpu->kvm->arch.enable_pmu) 93 43 return NULL; 94 44 95 45 switch (msr) { 96 - case MSR_F15H_PERF_CTL0: 97 - case MSR_F15H_PERF_CTL1: 98 - case MSR_F15H_PERF_CTL2: 99 - case MSR_F15H_PERF_CTL3: 100 - case MSR_F15H_PERF_CTL4: 101 - case MSR_F15H_PERF_CTL5: 46 + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: 102 47 if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) 103 48 return NULL; 104 - fallthrough; 49 + /* 50 + * Each PMU counter has a pair of CTL and CTR MSRs. CTLn 51 + * MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd. 52 + */ 53 + idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2); 54 + if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL)) 55 + return NULL; 56 + break; 105 57 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: 106 58 if (type != PMU_TYPE_EVNTSEL) 107 59 return NULL; 60 + idx = msr - MSR_K7_EVNTSEL0; 108 61 break; 109 - case MSR_F15H_PERF_CTR0: 110 - case MSR_F15H_PERF_CTR1: 111 - case MSR_F15H_PERF_CTR2: 112 - case MSR_F15H_PERF_CTR3: 113 - case MSR_F15H_PERF_CTR4: 114 - case MSR_F15H_PERF_CTR5: 115 - if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) 116 - return NULL; 117 - fallthrough; 118 62 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: 119 63 if (type != PMU_TYPE_COUNTER) 120 64 return NULL; 65 + idx = msr - MSR_K7_PERFCTR0; 121 66 break; 122 67 default: 123 68 return NULL; 124 69 } 125 70 126 - return &pmu->gp_counters[msr_to_index(msr)]; 71 + return amd_pmc_idx_to_pmc(pmu, idx); 127 72 } 128 73 129 74 static bool amd_hw_event_available(struct kvm_pmc *pmc) ··· 84 139 return true; 85 140 } 86 141 87 - static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) 88 - { 89 - unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); 90 - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); 91 - 92 - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { 93 - /* 94 - * The idx is contiguous. The MSRs are not. The counter MSRs 95 - * are interleaved with the event select MSRs. 96 - */ 97 - pmc_idx *= 2; 98 - } 99 - 100 - return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); 101 - } 102 - 103 142 static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) 104 143 { 105 144 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); ··· 97 168 static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, 98 169 unsigned int idx, u64 *mask) 99 170 { 100 - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 101 - struct kvm_pmc *counters; 102 - 103 - idx &= ~(3u << 30); 104 - if (idx >= pmu->nr_arch_gp_counters) 105 - return NULL; 106 - counters = pmu->gp_counters; 107 - 108 - return &counters[idx]; 171 + return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30)); 109 172 } 110 173 111 174 static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)

+16 -13

arch/x86/kvm/vmx/pmu_intel.c

··· 68 68 } 69 69 } 70 70 71 - /* function is called when global control register has been updated. */ 72 - static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) 71 + static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) 73 72 { 74 73 int bit; 75 - u64 diff = pmu->global_ctrl ^ data; 76 74 struct kvm_pmc *pmc; 77 - 78 - pmu->global_ctrl = data; 79 75 80 76 for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { 81 77 pmc = intel_pmc_idx_to_pmc(pmu, bit); ··· 393 397 struct kvm_pmc *pmc; 394 398 u32 msr = msr_info->index; 395 399 u64 data = msr_info->data; 396 - u64 reserved_bits; 400 + u64 reserved_bits, diff; 397 401 398 402 switch (msr) { 399 403 case MSR_CORE_PERF_FIXED_CTR_CTRL: ··· 414 418 if (pmu->global_ctrl == data) 415 419 return 0; 416 420 if (kvm_valid_perf_global_ctrl(pmu, data)) { 417 - global_ctrl_changed(pmu, data); 421 + diff = pmu->global_ctrl ^ data; 422 + pmu->global_ctrl = data; 423 + reprogram_counters(pmu, diff); 418 424 return 0; 419 425 } 420 426 break; ··· 431 433 if (pmu->pebs_enable == data) 432 434 return 0; 433 435 if (!(data & pmu->pebs_enable_mask)) { 436 + diff = pmu->pebs_enable ^ data; 434 437 pmu->pebs_enable = data; 438 + reprogram_counters(pmu, diff); 435 439 return 0; 436 440 } 437 441 break; ··· 776 776 void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) 777 777 { 778 778 struct kvm_pmc *pmc = NULL; 779 - int bit; 779 + int bit, hw_idx; 780 780 781 781 for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, 782 782 X86_PMC_IDX_MAX) { 783 783 pmc = intel_pmc_idx_to_pmc(pmu, bit); 784 784 785 785 if (!pmc || !pmc_speculative_in_use(pmc) || 786 - !intel_pmc_is_enabled(pmc)) 786 + !intel_pmc_is_enabled(pmc) || !pmc->perf_event) 787 787 continue; 788 788 789 - if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) { 790 - pmu->host_cross_mapped_mask |= 791 - BIT_ULL(pmc->perf_event->hw.idx); 792 - } 789 + /* 790 + * A negative index indicates the event isn't mapped to a 791 + * physical counter in the host, e.g. due to contention. 792 + */ 793 + hw_idx = pmc->perf_event->hw.idx; 794 + if (hw_idx != pmc->idx && hw_idx > -1) 795 + pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx); 793 796 } 794 797 } 795 798

+1

include/uapi/linux/kvm.h

··· 1177 1177 #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 1178 1178 #define KVM_CAP_S390_ZPCI_OP 221 1179 1179 #define KVM_CAP_S390_CPU_TOPOLOGY 222 1180 + #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 1180 1181 1181 1182 #ifdef KVM_CAP_IRQ_ROUTING 1182 1183

+1

tools/testing/selftests/kvm/.gitignore

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 + /aarch64/aarch32_id_regs 2 3 /aarch64/arch_timer 3 4 /aarch64/debug-exceptions 4 5 /aarch64/get-reg-list

+1

tools/testing/selftests/kvm/Makefile

··· 147 147 # Compiled outputs used by test targets 148 148 TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test 149 149 150 + TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs 150 151 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer 151 152 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions 152 153 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list

+169

tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * aarch32_id_regs - Test for ID register behavior on AArch64-only systems 4 + * 5 + * Copyright (c) 2022 Google LLC. 6 + * 7 + * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ 8 + * and WI from userspace. 9 + */ 10 + 11 + #include <stdint.h> 12 + 13 + #include "kvm_util.h" 14 + #include "processor.h" 15 + #include "test_util.h" 16 + 17 + #define BAD_ID_REG_VAL 0x1badc0deul 18 + 19 + #define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0) 20 + 21 + static void guest_main(void) 22 + { 23 + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1); 24 + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1); 25 + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1); 26 + GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1); 27 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1); 28 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1); 29 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1); 30 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1); 31 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1); 32 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1); 33 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1); 34 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1); 35 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1); 36 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1); 37 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1); 38 + GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1); 39 + GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1); 40 + GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1); 41 + GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1); 42 + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3)); 43 + GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1); 44 + GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1); 45 + GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1); 46 + GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7)); 47 + 48 + GUEST_DONE(); 49 + } 50 + 51 + static void test_guest_raz(struct kvm_vcpu *vcpu) 52 + { 53 + struct ucall uc; 54 + 55 + vcpu_run(vcpu); 56 + 57 + switch (get_ucall(vcpu, &uc)) { 58 + case UCALL_ABORT: 59 + REPORT_GUEST_ASSERT(uc); 60 + break; 61 + case UCALL_DONE: 62 + break; 63 + default: 64 + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); 65 + } 66 + } 67 + 68 + static uint64_t raz_wi_reg_ids[] = { 69 + KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), 70 + KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), 71 + KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), 72 + KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1), 73 + KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1), 74 + KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1), 75 + KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1), 76 + KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1), 77 + KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1), 78 + KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1), 79 + KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1), 80 + KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1), 81 + KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1), 82 + KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1), 83 + KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1), 84 + KVM_ARM64_SYS_REG(SYS_MVFR0_EL1), 85 + KVM_ARM64_SYS_REG(SYS_MVFR1_EL1), 86 + KVM_ARM64_SYS_REG(SYS_MVFR2_EL1), 87 + KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1), 88 + KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1), 89 + }; 90 + 91 + static void test_user_raz_wi(struct kvm_vcpu *vcpu) 92 + { 93 + int i; 94 + 95 + for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { 96 + uint64_t reg_id = raz_wi_reg_ids[i]; 97 + uint64_t val; 98 + 99 + vcpu_get_reg(vcpu, reg_id, &val); 100 + ASSERT_EQ(val, 0); 101 + 102 + /* 103 + * Expect the ioctl to succeed with no effect on the register 104 + * value. 105 + */ 106 + vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); 107 + 108 + vcpu_get_reg(vcpu, reg_id, &val); 109 + ASSERT_EQ(val, 0); 110 + } 111 + } 112 + 113 + static uint64_t raz_invariant_reg_ids[] = { 114 + KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), 115 + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), 116 + KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), 117 + KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)), 118 + }; 119 + 120 + static void test_user_raz_invariant(struct kvm_vcpu *vcpu) 121 + { 122 + int i, r; 123 + 124 + for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { 125 + uint64_t reg_id = raz_invariant_reg_ids[i]; 126 + uint64_t val; 127 + 128 + vcpu_get_reg(vcpu, reg_id, &val); 129 + ASSERT_EQ(val, 0); 130 + 131 + r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); 132 + TEST_ASSERT(r < 0 && errno == EINVAL, 133 + "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); 134 + 135 + vcpu_get_reg(vcpu, reg_id, &val); 136 + ASSERT_EQ(val, 0); 137 + } 138 + } 139 + 140 + 141 + 142 + static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) 143 + { 144 + uint64_t val, el0; 145 + 146 + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); 147 + 148 + el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT; 149 + return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; 150 + } 151 + 152 + int main(void) 153 + { 154 + struct kvm_vcpu *vcpu; 155 + struct kvm_vm *vm; 156 + 157 + vm = vm_create_with_one_vcpu(&vcpu, guest_main); 158 + 159 + TEST_REQUIRE(vcpu_aarch64_only(vcpu)); 160 + 161 + ucall_init(vm, NULL); 162 + 163 + test_user_raz_wi(vcpu); 164 + test_user_raz_invariant(vcpu); 165 + test_guest_raz(vcpu); 166 + 167 + ucall_uninit(vm); 168 + kvm_vm_free(vm); 169 + }

+145 -4

tools/testing/selftests/kvm/aarch64/debug-exceptions.c

··· 22 22 #define SPSR_SS (1 << 21) 23 23 24 24 extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start; 25 + extern unsigned char iter_ss_begin, iter_ss_end; 25 26 static volatile uint64_t sw_bp_addr, hw_bp_addr; 26 27 static volatile uint64_t wp_addr, wp_data_addr; 27 28 static volatile uint64_t svc_addr; ··· 239 238 svc_addr = regs->pc; 240 239 } 241 240 241 + enum single_step_op { 242 + SINGLE_STEP_ENABLE = 0, 243 + SINGLE_STEP_DISABLE = 1, 244 + }; 245 + 246 + static void guest_code_ss(int test_cnt) 247 + { 248 + uint64_t i; 249 + uint64_t bvr, wvr, w_bvr, w_wvr; 250 + 251 + for (i = 0; i < test_cnt; i++) { 252 + /* Bits [1:0] of dbg{b,w}vr are RES0 */ 253 + w_bvr = i << 2; 254 + w_wvr = i << 2; 255 + 256 + /* Enable Single Step execution */ 257 + GUEST_SYNC(SINGLE_STEP_ENABLE); 258 + 259 + /* 260 + * The userspace will veriry that the pc is as expected during 261 + * single step execution between iter_ss_begin and iter_ss_end. 262 + */ 263 + asm volatile("iter_ss_begin:nop\n"); 264 + 265 + write_sysreg(w_bvr, dbgbvr0_el1); 266 + write_sysreg(w_wvr, dbgwvr0_el1); 267 + bvr = read_sysreg(dbgbvr0_el1); 268 + wvr = read_sysreg(dbgwvr0_el1); 269 + 270 + asm volatile("iter_ss_end:\n"); 271 + 272 + /* Disable Single Step execution */ 273 + GUEST_SYNC(SINGLE_STEP_DISABLE); 274 + 275 + GUEST_ASSERT(bvr == w_bvr); 276 + GUEST_ASSERT(wvr == w_wvr); 277 + } 278 + GUEST_DONE(); 279 + } 280 + 242 281 static int debug_version(struct kvm_vcpu *vcpu) 243 282 { 244 283 uint64_t id_aa64dfr0; ··· 287 246 return id_aa64dfr0 & 0xf; 288 247 } 289 248 290 - int main(int argc, char *argv[]) 249 + static void test_guest_debug_exceptions(void) 291 250 { 292 251 struct kvm_vcpu *vcpu; 293 252 struct kvm_vm *vm; ··· 299 258 300 259 vm_init_descriptor_tables(vm); 301 260 vcpu_init_descriptor_tables(vcpu); 302 - 303 - __TEST_REQUIRE(debug_version(vcpu) >= 6, 304 - "Armv8 debug architecture not supported."); 305 261 306 262 vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, 307 263 ESR_EC_BRK_INS, guest_sw_bp_handler); ··· 332 294 333 295 done: 334 296 kvm_vm_free(vm); 297 + } 298 + 299 + void test_single_step_from_userspace(int test_cnt) 300 + { 301 + struct kvm_vcpu *vcpu; 302 + struct kvm_vm *vm; 303 + struct ucall uc; 304 + struct kvm_run *run; 305 + uint64_t pc, cmd; 306 + uint64_t test_pc = 0; 307 + bool ss_enable = false; 308 + struct kvm_guest_debug debug = {}; 309 + 310 + vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); 311 + ucall_init(vm, NULL); 312 + run = vcpu->run; 313 + vcpu_args_set(vcpu, 1, test_cnt); 314 + 315 + while (1) { 316 + vcpu_run(vcpu); 317 + if (run->exit_reason != KVM_EXIT_DEBUG) { 318 + cmd = get_ucall(vcpu, &uc); 319 + if (cmd == UCALL_ABORT) { 320 + REPORT_GUEST_ASSERT(uc); 321 + /* NOT REACHED */ 322 + } else if (cmd == UCALL_DONE) { 323 + break; 324 + } 325 + 326 + TEST_ASSERT(cmd == UCALL_SYNC, 327 + "Unexpected ucall cmd 0x%lx", cmd); 328 + 329 + if (uc.args[1] == SINGLE_STEP_ENABLE) { 330 + debug.control = KVM_GUESTDBG_ENABLE | 331 + KVM_GUESTDBG_SINGLESTEP; 332 + ss_enable = true; 333 + } else { 334 + debug.control = SINGLE_STEP_DISABLE; 335 + ss_enable = false; 336 + } 337 + 338 + vcpu_guest_debug_set(vcpu, &debug); 339 + continue; 340 + } 341 + 342 + TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); 343 + 344 + /* Check if the current pc is expected. */ 345 + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); 346 + TEST_ASSERT(!test_pc || pc == test_pc, 347 + "Unexpected pc 0x%lx (expected 0x%lx)", 348 + pc, test_pc); 349 + 350 + /* 351 + * If the current pc is between iter_ss_bgin and 352 + * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should 353 + * be the current pc + 4. 354 + */ 355 + if ((pc >= (uint64_t)&iter_ss_begin) && 356 + (pc < (uint64_t)&iter_ss_end)) 357 + test_pc = pc + 4; 358 + else 359 + test_pc = 0; 360 + } 361 + 362 + kvm_vm_free(vm); 363 + } 364 + 365 + static void help(char *name) 366 + { 367 + puts(""); 368 + printf("Usage: %s [-h] [-i iterations of the single step test]\n", name); 369 + puts(""); 370 + exit(0); 371 + } 372 + 373 + int main(int argc, char *argv[]) 374 + { 375 + struct kvm_vcpu *vcpu; 376 + struct kvm_vm *vm; 377 + int opt; 378 + int ss_iteration = 10000; 379 + 380 + vm = vm_create_with_one_vcpu(&vcpu, guest_code); 381 + __TEST_REQUIRE(debug_version(vcpu) >= 6, 382 + "Armv8 debug architecture not supported."); 383 + kvm_vm_free(vm); 384 + 385 + while ((opt = getopt(argc, argv, "i:")) != -1) { 386 + switch (opt) { 387 + case 'i': 388 + ss_iteration = atoi(optarg); 389 + break; 390 + case 'h': 391 + default: 392 + help(argv[0]); 393 + break; 394 + } 395 + } 396 + 397 + test_guest_debug_exceptions(); 398 + test_single_step_from_userspace(ss_iteration); 399 + 335 400 return 0; 336 401 }

+6 -4

tools/testing/selftests/kvm/aarch64/psci_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the 4 - * CPU_ON PSCI call matches what the caller requested. 3 + * psci_test - Tests relating to KVM's PSCI implementation. 5 4 * 6 5 * Copyright (c) 2021 Google LLC. 7 6 * 8 - * This is a regression test for a race between KVM servicing the PSCI call and 9 - * userspace reading the vCPUs registers. 7 + * This test includes: 8 + * - A regression test for a race between KVM servicing the PSCI CPU_ON call 9 + * and userspace reading the targeted vCPU's registers. 10 + * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated 11 + * KVM_SYSTEM_EVENT_SUSPEND UAPI. 10 12 */ 11 13 12 14 #define _GNU_SOURCE

+5 -3

tools/testing/selftests/kvm/dirty_log_test.c

··· 17 17 #include <linux/bitmap.h> 18 18 #include <linux/bitops.h> 19 19 #include <linux/atomic.h> 20 + #include <asm/barrier.h> 20 21 21 22 #include "kvm_util.h" 22 23 #include "test_util.h" ··· 265 264 266 265 static bool dirty_ring_supported(void) 267 266 { 268 - return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING); 267 + return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) || 268 + kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL)); 269 269 } 270 270 271 271 static void dirty_ring_create_vm_done(struct kvm_vm *vm) ··· 281 279 282 280 static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn) 283 281 { 284 - return gfn->flags == KVM_DIRTY_GFN_F_DIRTY; 282 + return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY; 285 283 } 286 284 287 285 static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) 288 286 { 289 - gfn->flags = KVM_DIRTY_GFN_F_RESET; 287 + smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET); 290 288 } 291 289 292 290 static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,

+4

tools/testing/selftests/kvm/include/kvm_util_base.h

··· 175 175 176 176 int open_path_or_exit(const char *path, int flags); 177 177 int open_kvm_dev_path_or_exit(void); 178 + 179 + bool get_kvm_intel_param_bool(const char *param); 180 + bool get_kvm_amd_param_bool(const char *param); 181 + 178 182 unsigned int kvm_check_cap(long cap); 179 183 180 184 static inline bool kvm_has_cap(long cap)

+4 -2

tools/testing/selftests/kvm/include/test_util.h

··· 63 63 #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ 64 64 } while (0) 65 65 66 - #define TEST_FAIL(fmt, ...) \ 67 - TEST_ASSERT(false, fmt, ##__VA_ARGS__) 66 + #define TEST_FAIL(fmt, ...) do { \ 67 + TEST_ASSERT(false, fmt, ##__VA_ARGS__); \ 68 + __builtin_unreachable(); \ 69 + } while (0) 68 70 69 71 size_t parse_size(const char *size); 70 72

+4

tools/testing/selftests/kvm/include/x86_64/processor.h

··· 825 825 return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); 826 826 } 827 827 828 + bool kvm_is_tdp_enabled(void); 829 + 828 830 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 829 831 uint64_t vaddr); 830 832 void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, ··· 857 855 #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) 858 856 859 857 void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); 858 + void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 859 + uint64_t nr_bytes, int level); 860 860 861 861 /* 862 862 * Basic CPU control in CR0

+43 -1

tools/testing/selftests/kvm/lib/kvm_util.c

··· 50 50 return _open_kvm_dev_path_or_exit(O_RDONLY); 51 51 } 52 52 53 + static bool get_module_param_bool(const char *module_name, const char *param) 54 + { 55 + const int path_size = 128; 56 + char path[path_size]; 57 + char value; 58 + ssize_t r; 59 + int fd; 60 + 61 + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 62 + module_name, param); 63 + TEST_ASSERT(r < path_size, 64 + "Failed to construct sysfs path in %d bytes.", path_size); 65 + 66 + fd = open_path_or_exit(path, O_RDONLY); 67 + 68 + r = read(fd, &value, 1); 69 + TEST_ASSERT(r == 1, "read(%s) failed", path); 70 + 71 + r = close(fd); 72 + TEST_ASSERT(!r, "close(%s) failed", path); 73 + 74 + if (value == 'Y') 75 + return true; 76 + else if (value == 'N') 77 + return false; 78 + 79 + TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 80 + } 81 + 82 + bool get_kvm_intel_param_bool(const char *param) 83 + { 84 + return get_module_param_bool("kvm_intel", param); 85 + } 86 + 87 + bool get_kvm_amd_param_bool(const char *param) 88 + { 89 + return get_module_param_bool("kvm_amd", param); 90 + } 91 + 53 92 /* 54 93 * Capability 55 94 * ··· 121 82 122 83 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 123 84 { 124 - vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 85 + if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 86 + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 87 + else 88 + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 125 89 vm->dirty_ring_size = ring_size; 126 90 } 127 91

+28 -12

tools/testing/selftests/kvm/lib/x86_64/processor.c

··· 111 111 } 112 112 } 113 113 114 + bool kvm_is_tdp_enabled(void) 115 + { 116 + if (is_intel_cpu()) 117 + return get_kvm_intel_param_bool("ept"); 118 + else 119 + return get_kvm_amd_param_bool("npt"); 120 + } 121 + 114 122 void virt_arch_pgd_alloc(struct kvm_vm *vm) 115 123 { 116 124 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " ··· 220 212 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 221 213 { 222 214 __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); 215 + } 216 + 217 + void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 218 + uint64_t nr_bytes, int level) 219 + { 220 + uint64_t pg_size = PG_LEVEL_SIZE(level); 221 + uint64_t nr_pages = nr_bytes / pg_size; 222 + int i; 223 + 224 + TEST_ASSERT(nr_bytes % pg_size == 0, 225 + "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx", 226 + nr_bytes, pg_size); 227 + 228 + for (i = 0; i < nr_pages; i++) { 229 + __virt_pg_map(vm, vaddr, paddr, level); 230 + 231 + vaddr += pg_size; 232 + paddr += pg_size; 233 + } 223 234 } 224 235 225 236 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, ··· 1321 1294 /* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ 1322 1295 bool vm_is_unrestricted_guest(struct kvm_vm *vm) 1323 1296 { 1324 - char val = 'N'; 1325 - size_t count; 1326 - FILE *f; 1327 - 1328 1297 /* Ensure that a KVM vendor-specific module is loaded. */ 1329 1298 if (vm == NULL) 1330 1299 close(open_kvm_dev_path_or_exit()); 1331 1300 1332 - f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); 1333 - if (f) { 1334 - count = fread(&val, sizeof(char), 1, f); 1335 - TEST_ASSERT(count == 1, "Unable to read from param file."); 1336 - fclose(f); 1337 - } 1338 - 1339 - return val == 'Y'; 1301 + return get_kvm_intel_param_bool("unrestricted_guest"); 1340 1302 }

+1 -13

tools/testing/selftests/kvm/lib/x86_64/svm.c

··· 60 60 seg->base = base; 61 61 } 62 62 63 - /* 64 - * Avoid using memset to clear the vmcb, since libc may not be 65 - * available in L1 (and, even if it is, features that libc memset may 66 - * want to use, like AVX, may not be enabled). 67 - */ 68 - static void clear_vmcb(struct vmcb *vmcb) 69 - { 70 - int n = sizeof(*vmcb) / sizeof(u32); 71 - 72 - asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory"); 73 - } 74 - 75 63 void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) 76 64 { 77 65 struct vmcb *vmcb = svm->vmcb; ··· 76 88 wrmsr(MSR_EFER, efer | EFER_SVME); 77 89 wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); 78 90 79 - clear_vmcb(vmcb); 91 + memset(vmcb, 0, sizeof(*vmcb)); 80 92 asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); 81 93 vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); 82 94 vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);

+51 -77

tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c

··· 17 17 /* VMCALL and VMMCALL are both 3-byte opcodes. */ 18 18 #define HYPERCALL_INSN_SIZE 3 19 19 20 - static bool ud_expected; 20 + static bool quirk_disabled; 21 21 22 22 static void guest_ud_handler(struct ex_regs *regs) 23 23 { 24 - GUEST_ASSERT(ud_expected); 25 - GUEST_DONE(); 24 + regs->rax = -EFAULT; 25 + regs->rip += HYPERCALL_INSN_SIZE; 26 26 } 27 27 28 - extern uint8_t svm_hypercall_insn[HYPERCALL_INSN_SIZE]; 29 - static uint64_t svm_do_sched_yield(uint8_t apic_id) 28 + static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; 29 + static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; 30 + 31 + extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; 32 + static uint64_t do_sched_yield(uint8_t apic_id) 30 33 { 31 34 uint64_t ret; 32 35 33 - asm volatile("mov %1, %%rax\n\t" 34 - "mov %2, %%rbx\n\t" 35 - "svm_hypercall_insn:\n\t" 36 - "vmmcall\n\t" 37 - "mov %%rax, %0\n\t" 38 - : "=r"(ret) 39 - : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) 40 - : "rax", "rbx", "memory"); 41 - 42 - return ret; 43 - } 44 - 45 - extern uint8_t vmx_hypercall_insn[HYPERCALL_INSN_SIZE]; 46 - static uint64_t vmx_do_sched_yield(uint8_t apic_id) 47 - { 48 - uint64_t ret; 49 - 50 - asm volatile("mov %1, %%rax\n\t" 51 - "mov %2, %%rbx\n\t" 52 - "vmx_hypercall_insn:\n\t" 53 - "vmcall\n\t" 54 - "mov %%rax, %0\n\t" 55 - : "=r"(ret) 56 - : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id) 57 - : "rax", "rbx", "memory"); 36 + asm volatile("hypercall_insn:\n\t" 37 + ".byte 0xcc,0xcc,0xcc\n\t" 38 + : "=a"(ret) 39 + : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) 40 + : "memory"); 58 41 59 42 return ret; 60 43 } 61 44 62 45 static void guest_main(void) 63 46 { 64 - uint8_t *native_hypercall_insn, *hypercall_insn; 65 - uint8_t apic_id; 66 - 67 - apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 47 + const uint8_t *native_hypercall_insn; 48 + const uint8_t *other_hypercall_insn; 49 + uint64_t ret; 68 50 69 51 if (is_intel_cpu()) { 70 - native_hypercall_insn = vmx_hypercall_insn; 71 - hypercall_insn = svm_hypercall_insn; 72 - svm_do_sched_yield(apic_id); 52 + native_hypercall_insn = vmx_vmcall; 53 + other_hypercall_insn = svm_vmmcall; 73 54 } else if (is_amd_cpu()) { 74 - native_hypercall_insn = svm_hypercall_insn; 75 - hypercall_insn = vmx_hypercall_insn; 76 - vmx_do_sched_yield(apic_id); 55 + native_hypercall_insn = svm_vmmcall; 56 + other_hypercall_insn = vmx_vmcall; 77 57 } else { 78 58 GUEST_ASSERT(0); 79 59 /* unreachable */ 80 60 return; 81 61 } 82 62 83 - /* 84 - * The hypercall didn't #UD (guest_ud_handler() signals "done" if a #UD 85 - * occurs). Verify that a #UD is NOT expected and that KVM patched in 86 - * the native hypercall. 87 - */ 88 - GUEST_ASSERT(!ud_expected); 89 - GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); 90 - GUEST_DONE(); 91 - } 63 + memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE); 92 64 93 - static void setup_ud_vector(struct kvm_vcpu *vcpu) 94 - { 95 - vm_init_descriptor_tables(vcpu->vm); 96 - vcpu_init_descriptor_tables(vcpu); 97 - vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); 65 + ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID))); 66 + 67 + /* 68 + * If the quirk is disabled, verify that guest_ud_handler() "returned" 69 + * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is 70 + * enabled, verify that the hypercall succeeded and that KVM patched in 71 + * the "right" hypercall. 72 + */ 73 + if (quirk_disabled) { 74 + GUEST_ASSERT(ret == (uint64_t)-EFAULT); 75 + GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, 76 + HYPERCALL_INSN_SIZE)); 77 + } else { 78 + GUEST_ASSERT(!ret); 79 + GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, 80 + HYPERCALL_INSN_SIZE)); 81 + } 82 + 83 + GUEST_DONE(); 98 84 } 99 85 100 86 static void enter_guest(struct kvm_vcpu *vcpu) ··· 103 117 } 104 118 } 105 119 106 - static void test_fix_hypercall(void) 120 + static void test_fix_hypercall(bool disable_quirk) 107 121 { 108 122 struct kvm_vcpu *vcpu; 109 123 struct kvm_vm *vm; 110 124 111 125 vm = vm_create_with_one_vcpu(&vcpu, guest_main); 112 - setup_ud_vector(vcpu); 113 126 114 - ud_expected = false; 115 - sync_global_to_guest(vm, ud_expected); 127 + vm_init_descriptor_tables(vcpu->vm); 128 + vcpu_init_descriptor_tables(vcpu); 129 + vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); 116 130 117 - virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 131 + if (disable_quirk) 132 + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, 133 + KVM_X86_QUIRK_FIX_HYPERCALL_INSN); 118 134 119 - enter_guest(vcpu); 120 - } 121 - 122 - static void test_fix_hypercall_disabled(void) 123 - { 124 - struct kvm_vcpu *vcpu; 125 - struct kvm_vm *vm; 126 - 127 - vm = vm_create_with_one_vcpu(&vcpu, guest_main); 128 - setup_ud_vector(vcpu); 129 - 130 - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, 131 - KVM_X86_QUIRK_FIX_HYPERCALL_INSN); 132 - 133 - ud_expected = true; 134 - sync_global_to_guest(vm, ud_expected); 135 + quirk_disabled = disable_quirk; 136 + sync_global_to_guest(vm, quirk_disabled); 135 137 136 138 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 137 139 ··· 130 156 { 131 157 TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); 132 158 133 - test_fix_hypercall(); 134 - test_fix_hypercall_disabled(); 159 + test_fix_hypercall(false); 160 + test_fix_hypercall(true); 135 161 }

+7 -6

tools/testing/selftests/kvm/x86_64/hyperv_features.c

··· 26 26 : "=a" (*hv_status), 27 27 "+c" (control), "+d" (input_address), 28 28 KVM_ASM_SAFE_OUTPUTS(vector) 29 - : [output_address] "r"(output_address) 29 + : [output_address] "r"(output_address), 30 + "a" (-EFAULT) 30 31 : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); 31 32 return vector; 32 33 } ··· 82 81 } 83 82 84 83 vector = hypercall(hcall->control, input, output, &res); 85 - if (hcall->ud_expected) 84 + if (hcall->ud_expected) { 86 85 GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); 87 - else 86 + } else { 88 87 GUEST_ASSERT_2(!vector, hcall->control, vector); 88 + GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res); 89 + } 89 90 90 - GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect, 91 - hcall->expect, res); 92 91 GUEST_DONE(); 93 92 } 94 93 ··· 508 507 switch (stage) { 509 508 case 0: 510 509 feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; 511 - hcall->control = 0xdeadbeef; 510 + hcall->control = 0xbeef; 512 511 hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; 513 512 break; 514 513

+17 -2

tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c

··· 112 112 { 113 113 struct kvm_vcpu *vcpu; 114 114 struct kvm_vm *vm; 115 + uint64_t nr_bytes; 115 116 void *hva; 116 117 int r; 117 118 ··· 135 134 HPAGE_GPA, HPAGE_SLOT, 136 135 HPAGE_SLOT_NPAGES, 0); 137 136 138 - virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES); 137 + nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size; 138 + 139 + /* 140 + * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the 141 + * region into the guest with 2MiB pages whenever TDP is disabled (i.e. 142 + * whenever KVM is shadowing the guest page tables). 143 + * 144 + * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge 145 + * pages irrespective of the guest page size, so map with 4KiB pages 146 + * to test that that is the case. 147 + */ 148 + if (kvm_is_tdp_enabled()) 149 + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K); 150 + else 151 + virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M); 139 152 140 153 hva = addr_gpa2hva(vm, HPAGE_GPA); 141 - memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE); 154 + memset(hva, RETURN_OPCODE, nr_bytes); 142 155 143 156 check_2m_page_count(vm, 0); 144 157 check_split_count(vm, 0);

+14

virt/kvm/Kconfig

··· 19 19 config HAVE_KVM_DIRTY_RING 20 20 bool 21 21 22 + # Only strongly ordered architectures can select this, as it doesn't 23 + # put any explicit constraint on userspace ordering. They can also 24 + # select the _ACQ_REL version. 25 + config HAVE_KVM_DIRTY_RING_TSO 26 + bool 27 + select HAVE_KVM_DIRTY_RING 28 + depends on X86 29 + 30 + # Weakly ordered architectures can only select this, advertising 31 + # to userspace the additional ordering requirements. 32 + config HAVE_KVM_DIRTY_RING_ACQ_REL 33 + bool 34 + select HAVE_KVM_DIRTY_RING 35 + 22 36 config HAVE_KVM_EVENTFD 23 37 bool 24 38 select EVENTFD

+2 -2

virt/kvm/dirty_ring.c

··· 74 74 75 75 static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) 76 76 { 77 - gfn->flags = 0; 77 + smp_store_release(&gfn->flags, 0); 78 78 } 79 79 80 80 static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) ··· 84 84 85 85 static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) 86 86 { 87 - return gfn->flags & KVM_DIRTY_GFN_F_RESET; 87 + return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET; 88 88 } 89 89 90 90 int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)

+8 -1

virt/kvm/kvm_main.c

··· 4473 4473 case KVM_CAP_NR_MEMSLOTS: 4474 4474 return KVM_USER_MEM_SLOTS; 4475 4475 case KVM_CAP_DIRTY_LOG_RING: 4476 - #ifdef CONFIG_HAVE_KVM_DIRTY_RING 4476 + #ifdef CONFIG_HAVE_KVM_DIRTY_RING_TSO 4477 + return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); 4478 + #else 4479 + return 0; 4480 + #endif 4481 + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: 4482 + #ifdef CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL 4477 4483 return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); 4478 4484 #else 4479 4485 return 0; ··· 4584 4578 return 0; 4585 4579 } 4586 4580 case KVM_CAP_DIRTY_LOG_RING: 4581 + case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: 4587 4582 return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); 4588 4583 default: 4589 4584 return kvm_vm_ioctl_enable_cap(kvm, cap);