Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf/x86/intel: Protect LBR and extra_regs against KVM lying

With -cpu host, KVM reports LBR and extra_regs support, if the host has
support.

When the guest perf driver tries to access LBR or extra_regs MSR,
it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support.
So check the related MSRs access right once at initialization time to avoid
the error access at runtime.

For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y
(for host kernel).
And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel).
Start the guest with -cpu host.
Run perf record with --branch-any or --branch-filter in guest to trigger LBR
Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to
trigger offcore_rsp #GP

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Cc: Mark Davies <junk@eslaf.co.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yan, Zheng <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Kan Liang and committed by
Ingo Molnar
338b522c 4a1c0f26

+75 -6
+3
arch/x86/kernel/cpu/perf_event.c
··· 118 118 continue; 119 119 if (event->attr.config1 & ~er->valid_mask) 120 120 return -EINVAL; 121 + /* Check if the extra msrs can be safely accessed*/ 122 + if (!er->extra_msr_access) 123 + return -ENXIO; 121 124 122 125 reg->idx = er->idx; 123 126 reg->config = event->attr.config1;
+7 -5
arch/x86/kernel/cpu/perf_event.h
··· 295 295 u64 config_mask; 296 296 u64 valid_mask; 297 297 int idx; /* per_xxx->regs[] reg index */ 298 + bool extra_msr_access; 298 299 }; 299 300 300 301 #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ 301 - .event = (e), \ 302 - .msr = (ms), \ 303 - .config_mask = (m), \ 304 - .valid_mask = (vm), \ 305 - .idx = EXTRA_REG_##i, \ 302 + .event = (e), \ 303 + .msr = (ms), \ 304 + .config_mask = (m), \ 305 + .valid_mask = (vm), \ 306 + .idx = EXTRA_REG_##i, \ 307 + .extra_msr_access = true, \ 306 308 } 307 309 308 310 #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
+65 -1
arch/x86/kernel/cpu/perf_event_intel.c
··· 2182 2182 } 2183 2183 } 2184 2184 2185 + /* 2186 + * Under certain circumstances, access certain MSR may cause #GP. 2187 + * The function tests if the input MSR can be safely accessed. 2188 + */ 2189 + static bool check_msr(unsigned long msr, u64 mask) 2190 + { 2191 + u64 val_old, val_new, val_tmp; 2192 + 2193 + /* 2194 + * Read the current value, change it and read it back to see if it 2195 + * matches, this is needed to detect certain hardware emulators 2196 + * (qemu/kvm) that don't trap on the MSR access and always return 0s. 2197 + */ 2198 + if (rdmsrl_safe(msr, &val_old)) 2199 + return false; 2200 + 2201 + /* 2202 + * Only change the bits which can be updated by wrmsrl. 2203 + */ 2204 + val_tmp = val_old ^ mask; 2205 + if (wrmsrl_safe(msr, val_tmp) || 2206 + rdmsrl_safe(msr, &val_new)) 2207 + return false; 2208 + 2209 + if (val_new != val_tmp) 2210 + return false; 2211 + 2212 + /* Here it's sure that the MSR can be safely accessed. 2213 + * Restore the old value and return. 2214 + */ 2215 + wrmsrl(msr, val_old); 2216 + 2217 + return true; 2218 + } 2219 + 2185 2220 static __init void intel_sandybridge_quirk(void) 2186 2221 { 2187 2222 x86_pmu.check_microcode = intel_snb_check_microcode; ··· 2306 2271 union cpuid10_ebx ebx; 2307 2272 struct event_constraint *c; 2308 2273 unsigned int unused; 2309 - int version; 2274 + struct extra_reg *er; 2275 + int version, i; 2310 2276 2311 2277 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 2312 2278 switch (boot_cpu_data.x86) { ··· 2610 2574 2611 2575 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 2612 2576 c->weight += x86_pmu.num_counters; 2577 + } 2578 + } 2579 + 2580 + /* 2581 + * Access LBR MSR may cause #GP under certain circumstances. 2582 + * E.g. KVM doesn't support LBR MSR 2583 + * Check all LBT MSR here. 2584 + * Disable LBR access if any LBR MSRs can not be accessed. 2585 + */ 2586 + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) 2587 + x86_pmu.lbr_nr = 0; 2588 + for (i = 0; i < x86_pmu.lbr_nr; i++) { 2589 + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && 2590 + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) 2591 + x86_pmu.lbr_nr = 0; 2592 + } 2593 + 2594 + /* 2595 + * Access extra MSR may cause #GP under certain circumstances. 2596 + * E.g. KVM doesn't support offcore event 2597 + * Check all extra_regs here. 2598 + */ 2599 + if (x86_pmu.extra_regs) { 2600 + for (er = x86_pmu.extra_regs; er->msr; er++) { 2601 + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); 2602 + /* Disable LBR select mapping */ 2603 + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 2604 + x86_pmu.lbr_sel_map = NULL; 2613 2605 } 2614 2606 } 2615 2607