x86: Add structs and functions for paravirt clocksource

This patch adds structs for the paravirt clocksource ABI
used by both xen and kvm (pvclock-abi.h).

It also adds some helper functions to read system time and
wall clock time from a paravirtual clocksource (pvclock.[ch]).
They are based on the xen code. They are enabled using
CONFIG_PARAVIRT_CLOCK.

Subsequent patches of this series will put the code in use.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>

authored by Gerd Hoffmann and committed by Avi Kivity 7af192c9 a9b21b62

+201
+4
arch/x86/Kconfig
··· 410 410 over full virtualization. However, when run without a hypervisor 411 411 the kernel is theoretically slower and slightly larger. 412 412 413 + config PARAVIRT_CLOCK 414 + bool 415 + default n 416 + 413 417 endif 414 418 415 419 config MEMTEST_BOOTPARAM
+1
arch/x86/kernel/Makefile
··· 82 82 obj-$(CONFIG_KVM_GUEST) += kvm.o 83 83 obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 84 84 obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 85 + obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o 85 86 86 87 obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 87 88
+141
arch/x86/kernel/pvclock.c
··· 1 + /* paravirtual clock -- common code used by kvm/xen 2 + 3 + This program is free software; you can redistribute it and/or modify 4 + it under the terms of the GNU General Public License as published by 5 + the Free Software Foundation; either version 2 of the License, or 6 + (at your option) any later version. 7 + 8 + This program is distributed in the hope that it will be useful, 9 + but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + GNU General Public License for more details. 12 + 13 + You should have received a copy of the GNU General Public License 14 + along with this program; if not, write to the Free Software 15 + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 + */ 17 + 18 + #include <linux/kernel.h> 19 + #include <linux/percpu.h> 20 + #include <asm/pvclock.h> 21 + 22 + /* 23 + * These are perodically updated 24 + * xen: magic shared_info page 25 + * kvm: gpa registered via msr 26 + * and then copied here. 27 + */ 28 + struct pvclock_shadow_time { 29 + u64 tsc_timestamp; /* TSC at last update of time vals. */ 30 + u64 system_timestamp; /* Time, in nanosecs, since boot. */ 31 + u32 tsc_to_nsec_mul; 32 + int tsc_shift; 33 + u32 version; 34 + }; 35 + 36 + /* 37 + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 38 + * yielding a 64-bit result. 39 + */ 40 + static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) 41 + { 42 + u64 product; 43 + #ifdef __i386__ 44 + u32 tmp1, tmp2; 45 + #endif 46 + 47 + if (shift < 0) 48 + delta >>= -shift; 49 + else 50 + delta <<= shift; 51 + 52 + #ifdef __i386__ 53 + __asm__ ( 54 + "mul %5 ; " 55 + "mov %4,%%eax ; " 56 + "mov %%edx,%4 ; " 57 + "mul %5 ; " 58 + "xor %5,%5 ; " 59 + "add %4,%%eax ; " 60 + "adc %5,%%edx ; " 61 + : "=A" (product), "=r" (tmp1), "=r" (tmp2) 62 + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); 63 + #elif __x86_64__ 64 + __asm__ ( 65 + "mul %%rdx ; shrd $32,%%rdx,%%rax" 66 + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); 67 + #else 68 + #error implement me! 69 + #endif 70 + 71 + return product; 72 + } 73 + 74 + static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) 75 + { 76 + u64 delta = native_read_tsc() - shadow->tsc_timestamp; 77 + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); 78 + } 79 + 80 + /* 81 + * Reads a consistent set of time-base values from hypervisor, 82 + * into a shadow data area. 83 + */ 84 + static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, 85 + struct pvclock_vcpu_time_info *src) 86 + { 87 + do { 88 + dst->version = src->version; 89 + rmb(); /* fetch version before data */ 90 + dst->tsc_timestamp = src->tsc_timestamp; 91 + dst->system_timestamp = src->system_time; 92 + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 93 + dst->tsc_shift = src->tsc_shift; 94 + rmb(); /* test version after fetching data */ 95 + } while ((src->version & 1) || (dst->version != src->version)); 96 + 97 + return dst->version; 98 + } 99 + 100 + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 101 + { 102 + struct pvclock_shadow_time shadow; 103 + unsigned version; 104 + cycle_t ret, offset; 105 + 106 + do { 107 + version = pvclock_get_time_values(&shadow, src); 108 + barrier(); 109 + offset = pvclock_get_nsec_offset(&shadow); 110 + ret = shadow.system_timestamp + offset; 111 + barrier(); 112 + } while (version != src->version); 113 + 114 + return ret; 115 + } 116 + 117 + void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, 118 + struct pvclock_vcpu_time_info *vcpu_time, 119 + struct timespec *ts) 120 + { 121 + u32 version; 122 + u64 delta; 123 + struct timespec now; 124 + 125 + /* get wallclock at system boot */ 126 + do { 127 + version = wall_clock->version; 128 + rmb(); /* fetch version before time */ 129 + now.tv_sec = wall_clock->sec; 130 + now.tv_nsec = wall_clock->nsec; 131 + rmb(); /* fetch time before checking version */ 132 + } while ((wall_clock->version & 1) || (version != wall_clock->version)); 133 + 134 + delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ 135 + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 136 + 137 + now.tv_nsec = do_div(delta, NSEC_PER_SEC); 138 + now.tv_sec = delta; 139 + 140 + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); 141 + }
+42
include/asm-x86/pvclock-abi.h
··· 1 + #ifndef _ASM_X86_PVCLOCK_ABI_H_ 2 + #define _ASM_X86_PVCLOCK_ABI_H_ 3 + #ifndef __ASSEMBLY__ 4 + 5 + /* 6 + * These structs MUST NOT be changed. 7 + * They are the ABI between hypervisor and guest OS. 8 + * Both Xen and KVM are using this. 9 + * 10 + * pvclock_vcpu_time_info holds the system time and the tsc timestamp 11 + * of the last update. So the guest can use the tsc delta to get a 12 + * more precise system time. There is one per virtual cpu. 13 + * 14 + * pvclock_wall_clock references the point in time when the system 15 + * time was zero (usually boot time), thus the guest calculates the 16 + * current wall clock by adding the system time. 17 + * 18 + * Protocol for the "version" fields is: hypervisor raises it (making 19 + * it uneven) before it starts updating the fields and raises it again 20 + * (making it even) when it is done. Thus the guest can make sure the 21 + * time values it got are consistent by checking the version before 22 + * and after reading them. 23 + */ 24 + 25 + struct pvclock_vcpu_time_info { 26 + u32 version; 27 + u32 pad0; 28 + u64 tsc_timestamp; 29 + u64 system_time; 30 + u32 tsc_to_system_mul; 31 + s8 tsc_shift; 32 + u8 pad[3]; 33 + } __attribute__((__packed__)); /* 32 bytes */ 34 + 35 + struct pvclock_wall_clock { 36 + u32 version; 37 + u32 sec; 38 + u32 nsec; 39 + } __attribute__((__packed__)); 40 + 41 + #endif /* __ASSEMBLY__ */ 42 + #endif /* _ASM_X86_PVCLOCK_ABI_H_ */
+13
include/asm-x86/pvclock.h
··· 1 + #ifndef _ASM_X86_PVCLOCK_H_ 2 + #define _ASM_X86_PVCLOCK_H_ 3 + 4 + #include <linux/clocksource.h> 5 + #include <asm/pvclock-abi.h> 6 + 7 + /* some helper functions for xen and kvm pv clock sources */ 8 + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); 9 + void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 10 + struct pvclock_vcpu_time_info *vcpu, 11 + struct timespec *ts); 12 + 13 + #endif /* _ASM_X86_PVCLOCK_H_ */