Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Use xen_vcpuop_clockevent, xen_clocksource and xen wallclock.

Use xen_vcpuop_clockevent instead of hpet and APIC timers as main
clockevent device on all vcpus, use the xen wallclock time as wallclock
instead of rtc and use xen_clocksource as clocksource.
The pv clock algorithm needs to work correctly for the xen_clocksource
and xen wallclock to be usable, only modern Xen versions offer a
reliable pv clock in HVM guests (XENFEAT_hvm_safe_pvclock).

Using the hpet as clocksource means a VMEXIT every time we read/write to
the hpet mmio addresses, pvclock give us a better rating without
VMEXITs. Same goes for the xen wallclock and xen_vcpuop_clockevent

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Don Dutile <ddutile@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

authored by

Stefano Stabellini and committed by
Jeremy Fitzhardinge
409771d2 99ad198c

+66 -22
+2 -12
arch/x86/xen/enlighten.c
··· 935 935 .patch = xen_patch, 936 936 }; 937 937 938 - static const struct pv_time_ops xen_time_ops __initdata = { 939 - .sched_clock = xen_sched_clock, 940 - }; 941 - 942 938 static const struct pv_cpu_ops xen_cpu_ops __initdata = { 943 939 .cpuid = xen_cpuid, 944 940 ··· 1072 1076 /* Install Xen paravirt ops */ 1073 1077 pv_info = xen_info; 1074 1078 pv_init_ops = xen_init_ops; 1075 - pv_time_ops = xen_time_ops; 1076 1079 pv_cpu_ops = xen_cpu_ops; 1077 1080 pv_apic_ops = xen_apic_ops; 1078 1081 ··· 1079 1084 x86_init.oem.arch_setup = xen_arch_setup; 1080 1085 x86_init.oem.banner = xen_banner; 1081 1086 1082 - x86_init.timers.timer_init = xen_time_init; 1083 - x86_init.timers.setup_percpu_clockev = x86_init_noop; 1084 - x86_cpuinit.setup_percpu_clockev = x86_init_noop; 1085 - 1086 - x86_platform.calibrate_tsc = xen_tsc_khz; 1087 - x86_platform.get_wallclock = xen_get_wallclock; 1088 - x86_platform.set_wallclock = xen_set_wallclock; 1087 + xen_init_time_ops(); 1089 1088 1090 1089 /* 1091 1090 * Set up some pagetable state before starting to set any ptes. ··· 1316 1327 register_cpu_notifier(&xen_hvm_cpu_notifier); 1317 1328 have_vcpu_info_placement = 0; 1318 1329 x86_init.irqs.intr_init = xen_init_IRQ; 1330 + xen_hvm_init_time_ops(); 1319 1331 } 1320 1332 1321 1333 static bool __init xen_hvm_platform(void)
+6
arch/x86/xen/suspend.c
··· 28 28 29 29 void xen_hvm_post_suspend(int suspend_cancelled) 30 30 { 31 + int cpu; 31 32 xen_hvm_init_shared_info(); 32 33 xen_callback_vector(); 34 + if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 35 + for_each_online_cpu(cpu) { 36 + xen_setup_runstate_info(cpu); 37 + } 38 + } 33 39 } 34 40 35 41 void xen_post_suspend(int suspend_cancelled)
+53 -5
arch/x86/xen/time.c
··· 20 20 #include <asm/xen/hypercall.h> 21 21 22 22 #include <xen/events.h> 23 + #include <xen/features.h> 23 24 #include <xen/interface/xen.h> 24 25 #include <xen/interface/vcpu.h> 25 26 ··· 161 160 * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED 162 161 * states. 163 162 */ 164 - unsigned long long xen_sched_clock(void) 163 + static unsigned long long xen_sched_clock(void) 165 164 { 166 165 struct vcpu_runstate_info state; 167 166 cycle_t now; ··· 196 195 197 196 198 197 /* Get the TSC speed from Xen */ 199 - unsigned long xen_tsc_khz(void) 198 + static unsigned long xen_tsc_khz(void) 200 199 { 201 200 struct pvclock_vcpu_time_info *info = 202 201 &HYPERVISOR_shared_info->vcpu_info[0].time; ··· 231 230 put_cpu_var(xen_vcpu); 232 231 } 233 232 234 - unsigned long xen_get_wallclock(void) 233 + static unsigned long xen_get_wallclock(void) 235 234 { 236 235 struct timespec ts; 237 236 ··· 239 238 return ts.tv_sec; 240 239 } 241 240 242 - int xen_set_wallclock(unsigned long now) 241 + static int xen_set_wallclock(unsigned long now) 243 242 { 244 243 /* do nothing for domU */ 245 244 return -1; ··· 474 473 } 475 474 } 476 475 477 - __init void xen_time_init(void) 476 + static const struct pv_time_ops xen_time_ops __initdata = { 477 + .sched_clock = xen_sched_clock, 478 + }; 479 + 480 + static __init void xen_time_init(void) 478 481 { 479 482 int cpu = smp_processor_id(); 480 483 struct timespec tp; ··· 502 497 xen_setup_timer(cpu); 503 498 xen_setup_cpu_clockevents(); 504 499 } 500 + 501 + __init void xen_init_time_ops(void) 502 + { 503 + pv_time_ops = xen_time_ops; 504 + 505 + x86_init.timers.timer_init = xen_time_init; 506 + x86_init.timers.setup_percpu_clockev = x86_init_noop; 507 + x86_cpuinit.setup_percpu_clockev = x86_init_noop; 508 + 509 + x86_platform.calibrate_tsc = xen_tsc_khz; 510 + x86_platform.get_wallclock = xen_get_wallclock; 511 + x86_platform.set_wallclock = xen_set_wallclock; 512 + } 513 + 514 + static void xen_hvm_setup_cpu_clockevents(void) 515 + { 516 + int cpu = smp_processor_id(); 517 + xen_setup_runstate_info(cpu); 518 + xen_setup_timer(cpu); 519 + xen_setup_cpu_clockevents(); 520 + } 521 + 522 + __init void xen_hvm_init_time_ops(void) 523 + { 524 + /* vector callback is needed otherwise we cannot receive interrupts 525 + * on cpu > 0 */ 526 + if (!xen_have_vector_callback && num_present_cpus() > 1) 527 + return; 528 + if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { 529 + printk(KERN_INFO "Xen doesn't support pvclock on HVM," 530 + "disable pv timer\n"); 531 + return; 532 + } 533 + 534 + pv_time_ops = xen_time_ops; 535 + x86_init.timers.setup_percpu_clockev = xen_time_init; 536 + x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; 537 + 538 + x86_platform.calibrate_tsc = xen_tsc_khz; 539 + x86_platform.get_wallclock = xen_get_wallclock; 540 + x86_platform.set_wallclock = xen_set_wallclock; 541 + } 542 +
+2 -5
arch/x86/xen/xen-ops.h
··· 49 49 void xen_teardown_timer(int cpu); 50 50 cycle_t xen_clocksource_read(void); 51 51 void xen_setup_cpu_clockevents(void); 52 - unsigned long xen_tsc_khz(void); 53 - void __init xen_time_init(void); 54 - unsigned long xen_get_wallclock(void); 55 - int xen_set_wallclock(unsigned long time); 56 - unsigned long long xen_sched_clock(void); 52 + void __init xen_init_time_ops(void); 53 + void __init xen_hvm_init_time_ops(void); 57 54 58 55 irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 59 56
+3
include/xen/interface/features.h
··· 44 44 /* x86: Does this Xen host support the HVM callback vector type? */ 45 45 #define XENFEAT_hvm_callback_vector 8 46 46 47 + /* x86: pvclock algorithm is safe to use on HVM */ 48 + #define XENFEAT_hvm_safe_pvclock 9 49 + 47 50 #define XENFEAT_NR_SUBMAPS 1 48 51 49 52 #endif /* __XEN_PUBLIC_FEATURES_H__ */