Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/vdso: Switch to generic vDSO implementation

The x86 vDSO library requires some adaptations to take advantage of the
newly introduced generic vDSO library.

Introduce the following changes:
- Modification of vdso.c to be compliant with the common vdso datapage
- Use of lib/vdso for gettimeofday

[ tglx: Massaged changelog and cleaned up the function signature formatting ]

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Mark Salyzyn <salyzyn@android.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Huw Davies <huw@codeweavers.com>
Cc: Shijith Thotton <sthotton@marvell.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Link: https://lkml.kernel.org/r/20190621095252.32307-23-vincenzo.frascino@arm.com

authored by

Vincenzo Frascino and committed by
Thomas Gleixner
7ac87074 bfe801eb

+297 -392
+3
arch/x86/Kconfig
··· 17 17 select HAVE_DEBUG_STACKOVERFLOW 18 18 select MODULES_USE_ELF_REL 19 19 select OLD_SIGACTION 20 + select GENERIC_VDSO_32 20 21 21 22 config X86_64 22 23 def_bool y ··· 122 121 select GENERIC_STRNCPY_FROM_USER 123 122 select GENERIC_STRNLEN_USER 124 123 select GENERIC_TIME_VSYSCALL 124 + select GENERIC_GETTIMEOFDAY 125 125 select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 126 126 select HAVE_ACPI_APEI if ACPI 127 127 select HAVE_ACPI_APEI_NMI if ACPI ··· 204 202 select HAVE_SYSCALL_TRACEPOINTS 205 203 select HAVE_UNSTABLE_SCHED_CLOCK 206 204 select HAVE_USER_RETURN_NOTIFIER 205 + select HAVE_GENERIC_VDSO 207 206 select HOTPLUG_SMT if SMP 208 207 select IRQ_FORCED_THREADING 209 208 select NEED_SG_DMA_LENGTH
+9
arch/x86/entry/vdso/Makefile
··· 3 3 # Building vDSO images for x86. 4 4 # 5 5 6 + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before 7 + # the inclusion of generic Makefile. 8 + ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| 9 + ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE 10 + include $(srctree)/lib/vdso/Makefile 11 + 6 12 KBUILD_CFLAGS += $(DISABLE_LTO) 7 13 KASAN_SANITIZE := n 8 14 UBSAN_SANITIZE := n ··· 57 51 58 52 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE 59 53 $(call if_changed,vdso) 54 + $(call if_changed,vdso_check) 60 55 61 56 HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi 62 57 hostprogs-y += vdso2c ··· 128 121 129 122 $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE 130 123 $(call if_changed,vdso) 124 + $(call if_changed,vdso_check) 131 125 132 126 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 133 127 VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 ··· 168 160 $(obj)/vdso32/system_call.o \ 169 161 $(obj)/vdso32/sigreturn.o 170 162 $(call if_changed,vdso) 163 + $(call if_changed,vdso_check) 171 164 172 165 # 173 166 # The DSO images are built using a special linker script.
+40 -231
arch/x86/entry/vdso/vclock_gettime.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * Copyright 2006 Andi Kleen, SUSE Labs. 4 - * 5 3 * Fast user context implementation of clock_gettime, gettimeofday, and time. 4 + * 5 + * Copyright 2006 Andi Kleen, SUSE Labs. 6 + * Copyright 2019 ARM Limited 6 7 * 7 8 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 9 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 - * 10 - * The code should have no internal unresolved relocations. 11 - * Check with readelf after changing. 12 10 */ 13 - 14 - #include <uapi/linux/time.h> 15 - #include <asm/vgtod.h> 16 - #include <asm/vvar.h> 17 - #include <asm/unistd.h> 18 - #include <asm/msr.h> 19 - #include <asm/pvclock.h> 20 - #include <asm/mshyperv.h> 21 - #include <linux/math64.h> 22 11 #include <linux/time.h> 23 12 #include <linux/kernel.h> 13 + #include <linux/types.h> 24 14 25 - #define gtod (&VVAR(vsyscall_gtod_data)) 15 + #include "../../../../lib/vdso/gettimeofday.c" 26 16 27 - extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); 28 - extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 17 + extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); 29 18 extern time_t __vdso_time(time_t *t); 30 19 31 - #ifdef CONFIG_PARAVIRT_CLOCK 32 - extern u8 pvclock_page[PAGE_SIZE] 33 - __attribute__((visibility("hidden"))); 34 - #endif 35 - 36 - #ifdef CONFIG_HYPERV_TSCPAGE 37 - extern u8 hvclock_page[PAGE_SIZE] 38 - __attribute__((visibility("hidden"))); 39 - #endif 40 - 41 - #ifndef BUILD_VDSO32 42 - 43 - notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 20 + int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) 44 21 { 45 - long ret; 46 - asm ("syscall" : "=a" (ret), "=m" (*ts) : 47 - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : 48 - "rcx", "r11"); 49 - return ret; 22 + return __cvdso_gettimeofday(tv, tz); 50 23 } 51 24 52 - #else 53 - 54 - notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 55 - { 56 - long ret; 57 - 58 - asm ( 59 - "mov %%ebx, %%edx \n" 60 - "mov %[clock], %%ebx \n" 61 - "call __kernel_vsyscall \n" 62 - "mov %%edx, %%ebx \n" 63 - : "=a" (ret), "=m" (*ts) 64 - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) 65 - : "edx"); 66 - return ret; 67 - } 68 - 69 - #endif 70 - 71 - #ifdef CONFIG_PARAVIRT_CLOCK 72 - static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) 73 - { 74 - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 75 - } 76 - 77 - static notrace u64 vread_pvclock(void) 78 - { 79 - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 80 - u32 version; 81 - u64 ret; 82 - 83 - /* 84 - * Note: The kernel and hypervisor must guarantee that cpu ID 85 - * number maps 1:1 to per-CPU pvclock time info. 86 - * 87 - * Because the hypervisor is entirely unaware of guest userspace 88 - * preemption, it cannot guarantee that per-CPU pvclock time 89 - * info is updated if the underlying CPU changes or that that 90 - * version is increased whenever underlying CPU changes. 91 - * 92 - * On KVM, we are guaranteed that pvti updates for any vCPU are 93 - * atomic as seen by *all* vCPUs. This is an even stronger 94 - * guarantee than we get with a normal seqlock. 95 - * 96 - * On Xen, we don't appear to have that guarantee, but Xen still 97 - * supplies a valid seqlock using the version field. 98 - * 99 - * We only do pvclock vdso timing at all if 100 - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 101 - * mean that all vCPUs have matching pvti and that the TSC is 102 - * synced, so we can just look at vCPU 0's pvti. 103 - */ 104 - 105 - do { 106 - version = pvclock_read_begin(pvti); 107 - 108 - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) 109 - return U64_MAX; 110 - 111 - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); 112 - } while (pvclock_read_retry(pvti, version)); 113 - 114 - return ret; 115 - } 116 - #endif 117 - #ifdef CONFIG_HYPERV_TSCPAGE 118 - static notrace u64 vread_hvclock(void) 119 - { 120 - const struct ms_hyperv_tsc_page *tsc_pg = 121 - (const struct ms_hyperv_tsc_page *)&hvclock_page; 122 - 123 - return hv_read_tsc_page(tsc_pg); 124 - } 125 - #endif 126 - 127 - notrace static inline u64 vgetcyc(int mode) 128 - { 129 - if (mode == VCLOCK_TSC) 130 - return (u64)rdtsc_ordered(); 131 - 132 - /* 133 - * For any memory-mapped vclock type, we need to make sure that gcc 134 - * doesn't cleverly hoist a load before the mode check. Otherwise we 135 - * might end up touching the memory-mapped page even if the vclock in 136 - * question isn't enabled, which will segfault. Hence the barriers. 137 - */ 138 - #ifdef CONFIG_PARAVIRT_CLOCK 139 - if (mode == VCLOCK_PVCLOCK) { 140 - barrier(); 141 - return vread_pvclock(); 142 - } 143 - #endif 144 - #ifdef CONFIG_HYPERV_TSCPAGE 145 - if (mode == VCLOCK_HVCLOCK) { 146 - barrier(); 147 - return vread_hvclock(); 148 - } 149 - #endif 150 - return U64_MAX; 151 - } 152 - 153 - notrace static int do_hres(clockid_t clk, struct timespec *ts) 154 - { 155 - struct vgtod_ts *base = &gtod->basetime[clk]; 156 - u64 cycles, last, sec, ns; 157 - unsigned int seq; 158 - 159 - do { 160 - seq = gtod_read_begin(gtod); 161 - cycles = vgetcyc(gtod->vclock_mode); 162 - ns = base->nsec; 163 - last = gtod->cycle_last; 164 - if (unlikely((s64)cycles < 0)) 165 - return vdso_fallback_gettime(clk, ts); 166 - if (cycles > last) 167 - ns += (cycles - last) * gtod->mult; 168 - ns >>= gtod->shift; 169 - sec = base->sec; 170 - } while (unlikely(gtod_read_retry(gtod, seq))); 171 - 172 - /* 173 - * Do this outside the loop: a race inside the loop could result 174 - * in __iter_div_u64_rem() being extremely slow. 175 - */ 176 - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 177 - ts->tv_nsec = ns; 178 - 179 - return 0; 180 - } 181 - 182 - notrace static void do_coarse(clockid_t clk, struct timespec *ts) 183 - { 184 - struct vgtod_ts *base = &gtod->basetime[clk]; 185 - unsigned int seq; 186 - 187 - do { 188 - seq = gtod_read_begin(gtod); 189 - ts->tv_sec = base->sec; 190 - ts->tv_nsec = base->nsec; 191 - } while (unlikely(gtod_read_retry(gtod, seq))); 192 - } 193 - 194 - notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 195 - { 196 - unsigned int msk; 197 - 198 - /* Sort out negative (CPU/FD) and invalid clocks */ 199 - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) 200 - return vdso_fallback_gettime(clock, ts); 201 - 202 - /* 203 - * Convert the clockid to a bitmask and use it to check which 204 - * clocks are handled in the VDSO directly. 205 - */ 206 - msk = 1U << clock; 207 - if (likely(msk & VGTOD_HRES)) { 208 - return do_hres(clock, ts); 209 - } else if (msk & VGTOD_COARSE) { 210 - do_coarse(clock, ts); 211 - return 0; 212 - } 213 - return vdso_fallback_gettime(clock, ts); 214 - } 215 - 216 - int clock_gettime(clockid_t, struct timespec *) 217 - __attribute__((weak, alias("__vdso_clock_gettime"))); 218 - 219 - notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 220 - { 221 - if (likely(tv != NULL)) { 222 - struct timespec *ts = (struct timespec *) tv; 223 - 224 - do_hres(CLOCK_REALTIME, ts); 225 - tv->tv_usec /= 1000; 226 - } 227 - if (unlikely(tz != NULL)) { 228 - tz->tz_minuteswest = gtod->tz_minuteswest; 229 - tz->tz_dsttime = gtod->tz_dsttime; 230 - } 231 - 232 - return 0; 233 - } 234 - int gettimeofday(struct timeval *, struct timezone *) 25 + int gettimeofday(struct __kernel_old_timeval *, struct timezone *) 235 26 __attribute__((weak, alias("__vdso_gettimeofday"))); 236 27 237 - /* 238 - * This will break when the xtime seconds get inaccurate, but that is 239 - * unlikely 240 - */ 241 - notrace time_t __vdso_time(time_t *t) 28 + time_t __vdso_time(time_t *t) 242 29 { 243 - /* This is atomic on x86 so we don't need any locks. */ 244 - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); 245 - 246 - if (t) 247 - *t = result; 248 - return result; 30 + return __cvdso_time(t); 249 31 } 250 - time_t time(time_t *t) 251 - __attribute__((weak, alias("__vdso_time"))); 32 + 33 + time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); 34 + 35 + 36 + #if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) 37 + /* both 64-bit and x32 use these */ 38 + extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); 39 + 40 + int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) 41 + { 42 + return __cvdso_clock_gettime(clock, ts); 43 + } 44 + 45 + int clock_gettime(clockid_t, struct __kernel_timespec *) 46 + __attribute__((weak, alias("__vdso_clock_gettime"))); 47 + 48 + #else 49 + /* i386 only */ 50 + extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); 51 + 52 + int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) 53 + { 54 + return __cvdso_clock_gettime32(clock, ts); 55 + } 56 + 57 + int clock_gettime(clockid_t, struct old_timespec32 *) 58 + __attribute__((weak, alias("__vdso_clock_gettime"))); 59 + 60 + #endif
+1
arch/x86/entry/vdso/vdsox32.lds.S
··· 21 21 __vdso_gettimeofday; 22 22 __vdso_getcpu; 23 23 __vdso_time; 24 + __vdso_clock_getres; 24 25 local: *; 25 26 }; 26 27 }
-2
arch/x86/entry/vsyscall/Makefile
··· 2 2 # 3 3 # Makefile for the x86 low level vsyscall code 4 4 # 5 - obj-y := vsyscall_gtod.o 6 - 7 5 obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o 8 6
-83
arch/x86/entry/vsyscall/vsyscall_gtod.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 4 - * Copyright 2003 Andi Kleen, SuSE Labs. 5 - * 6 - * Modified for x86 32 bit architecture by 7 - * Stefani Seibold <stefani@seibold.net> 8 - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 - * 10 - * Thanks to hpa@transmeta.com for some useful hint. 11 - * Special thanks to Ingo Molnar for his early experience with 12 - * a different vsyscall implementation for Linux/IA32 and for the name. 13 - * 14 - */ 15 - 16 - #include <linux/timekeeper_internal.h> 17 - #include <asm/vgtod.h> 18 - #include <asm/vvar.h> 19 - 20 - int vclocks_used __read_mostly; 21 - 22 - DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); 23 - 24 - void update_vsyscall_tz(void) 25 - { 26 - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; 27 - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; 28 - } 29 - 30 - void update_vsyscall(struct timekeeper *tk) 31 - { 32 - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; 33 - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; 34 - struct vgtod_ts *base; 35 - u64 nsec; 36 - 37 - /* Mark the new vclock used. */ 38 - BUILD_BUG_ON(VCLOCK_MAX >= 32); 39 - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); 40 - 41 - gtod_write_begin(vdata); 42 - 43 - /* copy vsyscall data */ 44 - vdata->vclock_mode = vclock_mode; 45 - vdata->cycle_last = tk->tkr_mono.cycle_last; 46 - vdata->mask = tk->tkr_mono.mask; 47 - vdata->mult = tk->tkr_mono.mult; 48 - vdata->shift = tk->tkr_mono.shift; 49 - 50 - base = &vdata->basetime[CLOCK_REALTIME]; 51 - base->sec = tk->xtime_sec; 52 - base->nsec = tk->tkr_mono.xtime_nsec; 53 - 54 - base = &vdata->basetime[CLOCK_TAI]; 55 - base->sec = tk->xtime_sec + (s64)tk->tai_offset; 56 - base->nsec = tk->tkr_mono.xtime_nsec; 57 - 58 - base = &vdata->basetime[CLOCK_MONOTONIC]; 59 - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 60 - nsec = tk->tkr_mono.xtime_nsec; 61 - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); 62 - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { 63 - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; 64 - base->sec++; 65 - } 66 - base->nsec = nsec; 67 - 68 - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; 69 - base->sec = tk->xtime_sec; 70 - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 71 - 72 - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; 73 - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 74 - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 75 - nsec += tk->wall_to_monotonic.tv_nsec; 76 - while (nsec >= NSEC_PER_SEC) { 77 - nsec -= NSEC_PER_SEC; 78 - base->sec++; 79 - } 80 - base->nsec = nsec; 81 - 82 - gtod_write_end(vdata); 83 - }
+1 -1
arch/x86/include/asm/pvclock.h
··· 2 2 #ifndef _ASM_X86_PVCLOCK_H 3 3 #define _ASM_X86_PVCLOCK_H 4 4 5 - #include <linux/clocksource.h> 5 + #include <asm/clocksource.h> 6 6 #include <asm/pvclock-abi.h> 7 7 8 8 /* some helper functions for xen and kvm pv clock sources */
+191
arch/x86/include/asm/vdso/gettimeofday.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Fast user context implementation of clock_gettime, gettimeofday, and time. 4 + * 5 + * Copyright (C) 2019 ARM Limited. 6 + * Copyright 2006 Andi Kleen, SUSE Labs. 7 + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> 8 + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 9 + */ 10 + #ifndef __ASM_VDSO_GETTIMEOFDAY_H 11 + #define __ASM_VDSO_GETTIMEOFDAY_H 12 + 13 + #ifndef __ASSEMBLY__ 14 + 15 + #include <uapi/linux/time.h> 16 + #include <asm/vgtod.h> 17 + #include <asm/vvar.h> 18 + #include <asm/unistd.h> 19 + #include <asm/msr.h> 20 + #include <asm/pvclock.h> 21 + #include <asm/mshyperv.h> 22 + 23 + #define __vdso_data (VVAR(_vdso_data)) 24 + 25 + #define VDSO_HAS_TIME 1 26 + 27 + #ifdef CONFIG_PARAVIRT_CLOCK 28 + extern u8 pvclock_page[PAGE_SIZE] 29 + __attribute__((visibility("hidden"))); 30 + #endif 31 + 32 + #ifdef CONFIG_HYPERV_TSCPAGE 33 + extern u8 hvclock_page[PAGE_SIZE] 34 + __attribute__((visibility("hidden"))); 35 + #endif 36 + 37 + #ifndef BUILD_VDSO32 38 + 39 + static __always_inline 40 + long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 41 + { 42 + long ret; 43 + 44 + asm ("syscall" : "=a" (ret), "=m" (*_ts) : 45 + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : 46 + "rcx", "r11"); 47 + 48 + return ret; 49 + } 50 + 51 + static __always_inline 52 + long gettimeofday_fallback(struct __kernel_old_timeval *_tv, 53 + struct timezone *_tz) 54 + { 55 + long ret; 56 + 57 + asm("syscall" : "=a" (ret) : 58 + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); 59 + 60 + return ret; 61 + } 62 + 63 + #else 64 + 65 + static __always_inline 66 + long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) 67 + { 68 + long ret; 69 + 70 + asm ( 71 + "mov %%ebx, %%edx \n" 72 + "mov %[clock], %%ebx \n" 73 + "call __kernel_vsyscall \n" 74 + "mov %%edx, %%ebx \n" 75 + : "=a" (ret), "=m" (*_ts) 76 + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) 77 + : "edx"); 78 + 79 + return ret; 80 + } 81 + 82 + static __always_inline 83 + long gettimeofday_fallback(struct __kernel_old_timeval *_tv, 84 + struct timezone *_tz) 85 + { 86 + long ret; 87 + 88 + asm( 89 + "mov %%ebx, %%edx \n" 90 + "mov %2, %%ebx \n" 91 + "call __kernel_vsyscall \n" 92 + "mov %%edx, %%ebx \n" 93 + : "=a" (ret) 94 + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) 95 + : "memory", "edx"); 96 + 97 + return ret; 98 + } 99 + 100 + #endif 101 + 102 + #ifdef CONFIG_PARAVIRT_CLOCK 103 + static const struct pvclock_vsyscall_time_info *get_pvti0(void) 104 + { 105 + return (const struct pvclock_vsyscall_time_info *)&pvclock_page; 106 + } 107 + 108 + static u64 vread_pvclock(void) 109 + { 110 + const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; 111 + u32 version; 112 + u64 ret; 113 + 114 + /* 115 + * Note: The kernel and hypervisor must guarantee that cpu ID 116 + * number maps 1:1 to per-CPU pvclock time info. 117 + * 118 + * Because the hypervisor is entirely unaware of guest userspace 119 + * preemption, it cannot guarantee that per-CPU pvclock time 120 + * info is updated if the underlying CPU changes or that that 121 + * version is increased whenever underlying CPU changes. 122 + * 123 + * On KVM, we are guaranteed that pvti updates for any vCPU are 124 + * atomic as seen by *all* vCPUs. This is an even stronger 125 + * guarantee than we get with a normal seqlock. 126 + * 127 + * On Xen, we don't appear to have that guarantee, but Xen still 128 + * supplies a valid seqlock using the version field. 129 + * 130 + * We only do pvclock vdso timing at all if 131 + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to 132 + * mean that all vCPUs have matching pvti and that the TSC is 133 + * synced, so we can just look at vCPU 0's pvti. 134 + */ 135 + 136 + do { 137 + version = pvclock_read_begin(pvti); 138 + 139 + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) 140 + return U64_MAX; 141 + 142 + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); 143 + } while (pvclock_read_retry(pvti, version)); 144 + 145 + return ret; 146 + } 147 + #endif 148 + 149 + #ifdef CONFIG_HYPERV_TSCPAGE 150 + static u64 vread_hvclock(void) 151 + { 152 + const struct ms_hyperv_tsc_page *tsc_pg = 153 + (const struct ms_hyperv_tsc_page *)&hvclock_page; 154 + 155 + return hv_read_tsc_page(tsc_pg); 156 + } 157 + #endif 158 + 159 + static inline u64 __arch_get_hw_counter(s32 clock_mode) 160 + { 161 + if (clock_mode == VCLOCK_TSC) 162 + return (u64)rdtsc_ordered(); 163 + /* 164 + * For any memory-mapped vclock type, we need to make sure that gcc 165 + * doesn't cleverly hoist a load before the mode check. Otherwise we 166 + * might end up touching the memory-mapped page even if the vclock in 167 + * question isn't enabled, which will segfault. Hence the barriers. 168 + */ 169 + #ifdef CONFIG_PARAVIRT_CLOCK 170 + if (clock_mode == VCLOCK_PVCLOCK) { 171 + barrier(); 172 + return vread_pvclock(); 173 + } 174 + #endif 175 + #ifdef CONFIG_HYPERV_TSCPAGE 176 + if (clock_mode == VCLOCK_HVCLOCK) { 177 + barrier(); 178 + return vread_hvclock(); 179 + } 180 + #endif 181 + return U64_MAX; 182 + } 183 + 184 + static __always_inline const struct vdso_data *__arch_get_vdso_data(void) 185 + { 186 + return __vdso_data; 187 + } 188 + 189 + #endif /* !__ASSEMBLY__ */ 190 + 191 + #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
+44
arch/x86/include/asm/vdso/vsyscall.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_VDSO_VSYSCALL_H 3 + #define __ASM_VDSO_VSYSCALL_H 4 + 5 + #ifndef __ASSEMBLY__ 6 + 7 + #include <linux/hrtimer.h> 8 + #include <linux/timekeeper_internal.h> 9 + #include <vdso/datapage.h> 10 + #include <asm/vgtod.h> 11 + #include <asm/vvar.h> 12 + 13 + int vclocks_used __read_mostly; 14 + 15 + DEFINE_VVAR(struct vdso_data, _vdso_data); 16 + /* 17 + * Update the vDSO data page to keep in sync with kernel timekeeping. 18 + */ 19 + static __always_inline 20 + struct vdso_data *__x86_get_k_vdso_data(void) 21 + { 22 + return _vdso_data; 23 + } 24 + #define __arch_get_k_vdso_data __x86_get_k_vdso_data 25 + 26 + static __always_inline 27 + int __x86_get_clock_mode(struct timekeeper *tk) 28 + { 29 + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; 30 + 31 + /* Mark the new vclock used. */ 32 + BUILD_BUG_ON(VCLOCK_MAX >= 32); 33 + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); 34 + 35 + return vclock_mode; 36 + } 37 + #define __arch_get_clock_mode __x86_get_clock_mode 38 + 39 + /* The asm-generic header needs to be included after the definitions above */ 40 + #include <asm-generic/vdso/vsyscall.h> 41 + 42 + #endif /* !__ASSEMBLY__ */ 43 + 44 + #endif /* __ASM_VDSO_VSYSCALL_H */
+3 -72
arch/x86/include/asm/vgtod.h
··· 3 3 #define _ASM_X86_VGTOD_H 4 4 5 5 #include <linux/compiler.h> 6 - #include <linux/clocksource.h> 6 + #include <asm/clocksource.h> 7 + #include <vdso/datapage.h> 8 + #include <vdso/helpers.h> 7 9 8 10 #include <uapi/linux/time.h> 9 11 ··· 15 13 typedef unsigned long gtod_long_t; 16 14 #endif 17 15 18 - /* 19 - * There is one of these objects in the vvar page for each 20 - * vDSO-accelerated clockid. For high-resolution clocks, this encodes 21 - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse 22 - * clocks, this encodes the actual time. 23 - * 24 - * To confuse the reader, for high-resolution clocks, nsec is left-shifted 25 - * by vsyscall_gtod_data.shift. 26 - */ 27 - struct vgtod_ts { 28 - u64 sec; 29 - u64 nsec; 30 - }; 31 - 32 - #define VGTOD_BASES (CLOCK_TAI + 1) 33 - #define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) 34 - #define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) 35 - 36 - /* 37 - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time 38 - * so be carefull by modifying this structure. 39 - */ 40 - struct vsyscall_gtod_data { 41 - unsigned int seq; 42 - 43 - int vclock_mode; 44 - u64 cycle_last; 45 - u64 mask; 46 - u32 mult; 47 - u32 shift; 48 - 49 - struct vgtod_ts basetime[VGTOD_BASES]; 50 - 51 - int tz_minuteswest; 52 - int tz_dsttime; 53 - }; 54 - extern struct vsyscall_gtod_data vsyscall_gtod_data; 55 - 56 16 extern int vclocks_used; 57 17 static inline bool vclock_was_used(int vclock) 58 18 { 59 19 return READ_ONCE(vclocks_used) & (1 << vclock); 60 - } 61 - 62 - static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) 63 - { 64 - unsigned int ret; 65 - 66 - repeat: 67 - ret = READ_ONCE(s->seq); 68 - if (unlikely(ret & 1)) { 69 - cpu_relax(); 70 - goto repeat; 71 - } 72 - smp_rmb(); 73 - return ret; 74 - } 75 - 76 - static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, 77 - unsigned int start) 78 - { 79 - smp_rmb(); 80 - return unlikely(s->seq != start); 81 - } 82 - 83 - static inline void gtod_write_begin(struct vsyscall_gtod_data *s) 84 - { 85 - ++s->seq; 86 - smp_wmb(); 87 - } 88 - 89 - static inline void gtod_write_end(struct vsyscall_gtod_data *s) 90 - { 91 - smp_wmb(); 92 - ++s->seq; 93 20 } 94 21 95 22 #endif /* _ASM_X86_VGTOD_H */
+4 -3
arch/x86/include/asm/vvar.h
··· 32 32 extern char __vvar_page; 33 33 34 34 #define DECLARE_VVAR(offset, type, name) \ 35 - extern type vvar_ ## name __attribute__((visibility("hidden"))); 35 + extern type vvar_ ## name[CS_BASES] \ 36 + __attribute__((visibility("hidden"))); 36 37 37 38 #define VVAR(name) (vvar_ ## name) 38 39 39 40 #define DEFINE_VVAR(type, name) \ 40 - type name \ 41 + type name[CS_BASES] \ 41 42 __attribute__((section(".vvar_" #name), aligned(16))) __visible 42 43 43 44 #endif 44 45 45 46 /* DECLARE_VVAR(offset, type, name) */ 46 47 47 - DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 48 + DECLARE_VVAR(128, struct vdso_data, _vdso_data) 48 49 49 50 #undef DECLARE_VVAR 50 51
+1
arch/x86/kernel/pvclock.c
··· 3 3 4 4 */ 5 5 6 + #include <linux/clocksource.h> 6 7 #include <linux/kernel.h> 7 8 #include <linux/percpu.h> 8 9 #include <linux/notifier.h>