Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Don't write junk to sysregs on reset

At the moment, the way we reset system registers is mildly insane:
We write junk to them, call the reset functions, and then check that
we have something else in them.

The "fun" thing is that this can happen while the guest is running
(PSCI, for example). If anything in KVM has to evaluate the state
of a system register while junk is in there, bad thing may happen.

Let's stop doing that. Instead, we track that we have called a
reset function for that register, and assume that the reset
function has done something. This requires fixing a couple of
sysreg refinition in the trap table.

In the end, the very need of this reset check is pretty dubious,
as it doesn't check everything (a lot of the sysregs leave outside of
the sys_regs[] array). It may well be axed in the near future.

Tested-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>

+18 -14
+18 -14
arch/arm64/kvm/sys_regs.c
··· 632 632 */ 633 633 val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) 634 634 | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); 635 - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; 635 + __vcpu_sys_reg(vcpu, r->reg) = val; 636 636 } 637 637 638 638 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) ··· 981 981 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ 982 982 #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ 983 983 { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ 984 - trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ 984 + trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ 985 985 { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ 986 - trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ 986 + trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ 987 987 { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ 988 - trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ 988 + trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ 989 989 { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ 990 - trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } 990 + trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } 991 991 992 992 /* Macro to expand the PMEVCNTRn_EL0 register */ 993 993 #define PMU_PMEVCNTR_EL0(n) \ ··· 1540 1540 { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, 1541 1541 { SYS_DESC(SYS_CTR_EL0), access_ctr }, 1542 1542 1543 - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, 1543 + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, 1544 1544 { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, 1545 1545 { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, 1546 1546 { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, ··· 2254 2254 } 2255 2255 2256 2256 static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, 2257 - const struct sys_reg_desc *table, size_t num) 2257 + const struct sys_reg_desc *table, size_t num, 2258 + unsigned long *bmap) 2258 2259 { 2259 2260 unsigned long i; 2260 2261 2261 2262 for (i = 0; i < num; i++) 2262 - if (table[i].reset) 2263 + if (table[i].reset) { 2264 + int reg = table[i].reg; 2265 + 2263 2266 table[i].reset(vcpu, &table[i]); 2267 + if (reg > 0 && reg < NR_SYS_REGS) 2268 + set_bit(reg, bmap); 2269 + } 2264 2270 } 2265 2271 2266 2272 /** ··· 2780 2774 { 2781 2775 size_t num; 2782 2776 const struct sys_reg_desc *table; 2783 - 2784 - /* Catch someone adding a register without putting in reset entry. */ 2785 - memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); 2777 + DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; 2786 2778 2787 2779 /* Generic chip reset first (so target could override). */ 2788 - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); 2780 + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); 2789 2781 2790 2782 table = get_target_table(vcpu->arch.target, true, &num); 2791 - reset_sys_reg_descs(vcpu, table, num); 2783 + reset_sys_reg_descs(vcpu, table, num, bmap); 2792 2784 2793 2785 for (num = 1; num < NR_SYS_REGS; num++) { 2794 - if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242, 2786 + if (WARN(!test_bit(num, bmap), 2795 2787 "Didn't reset __vcpu_sys_reg(%zi)\n", num)) 2796 2788 break; 2797 2789 }