Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86-64: Move vread_tsc and vread_hpet into the vDSO

The vsyscall page now consists entirely of trap instructions.

Cc: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/637648f303f2ef93af93bae25186e9a1bea093f5.1310639973.git.luto@mit.edu
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

authored by

Andy Lutomirski and committed by
H. Peter Anvin
98d0ac38 433bd805

+57 -79
+5 -1
arch/x86/include/asm/clocksource.h
··· 7 7 8 8 #define __ARCH_HAS_CLOCKSOURCE_DATA 9 9 10 + #define VCLOCK_NONE 0 /* No vDSO clock available. */ 11 + #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ 12 + #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ 13 + 10 14 struct arch_clocksource_data { 11 - cycle_t (*vread)(void); 15 + int vclock_mode; 12 16 }; 13 17 14 18 #endif /* CONFIG_X86_64 */
-4
arch/x86/include/asm/tsc.h
··· 51 51 extern int check_tsc_unstable(void); 52 52 extern unsigned long native_calibrate_tsc(void); 53 53 54 - #ifdef CONFIG_X86_64 55 - extern cycles_t vread_tsc(void); 56 - #endif 57 - 58 54 /* 59 55 * Boot-time check whether the TSCs are synchronized across 60 56 * all CPUs/cores:
+1 -1
arch/x86/include/asm/vgtod.h
··· 13 13 14 14 struct timezone sys_tz; 15 15 struct { /* extract of a clocksource struct */ 16 - cycle_t (*vread)(void); 16 + int vclock_mode; 17 17 cycle_t cycle_last; 18 18 cycle_t mask; 19 19 u32 mult;
-4
arch/x86/include/asm/vsyscall.h
··· 16 16 #ifdef __KERNEL__ 17 17 #include <linux/seqlock.h> 18 18 19 - /* Definitions for CONFIG_GENERIC_TIME definitions */ 20 - #define __vsyscall_fn \ 21 - __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace 22 - 23 19 #define VGETCPU_RDTSCP 1 24 20 #define VGETCPU_LSL 2 25 21
+1 -6
arch/x86/kernel/Makefile
··· 24 24 nostackp := $(call cc-option, -fno-stack-protector) 25 25 CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 26 26 CFLAGS_hpet.o := $(nostackp) 27 - CFLAGS_vread_tsc_64.o := $(nostackp) 28 27 CFLAGS_paravirt.o := $(nostackp) 29 28 GCOV_PROFILE_vsyscall_64.o := n 30 29 GCOV_PROFILE_hpet.o := n 31 30 GCOV_PROFILE_tsc.o := n 32 - GCOV_PROFILE_vread_tsc_64.o := n 33 31 GCOV_PROFILE_paravirt.o := n 34 - 35 - # vread_tsc_64 is hot and should be fully optimized: 36 - CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls 37 32 38 33 obj-y := process_$(BITS).o signal.o entry_$(BITS).o 39 34 obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o ··· 38 43 obj-y += probe_roms.o 39 44 obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40 45 obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 41 - obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o 46 + obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 42 47 obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 43 48 obj-y += bootflag.o e820.o 44 49 obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
-8
arch/x86/kernel/alternative.c
··· 14 14 #include <asm/pgtable.h> 15 15 #include <asm/mce.h> 16 16 #include <asm/nmi.h> 17 - #include <asm/vsyscall.h> 18 17 #include <asm/cacheflush.h> 19 18 #include <asm/tlbflush.h> 20 19 #include <asm/io.h> ··· 249 250 250 251 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 251 252 extern s32 __smp_locks[], __smp_locks_end[]; 252 - extern char __vsyscall_0; 253 253 void *text_poke_early(void *addr, const void *opcode, size_t len); 254 254 255 255 /* Replace instructions with better alternatives for this CPU type. ··· 292 294 add_nops(insnbuf + a->replacementlen, 293 295 a->instrlen - a->replacementlen); 294 296 295 - #ifdef CONFIG_X86_64 296 - /* vsyscall code is not mapped yet. resolve it manually. */ 297 - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { 298 - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); 299 - } 300 - #endif 301 297 text_poke_early(instr, insnbuf, a->instrlen); 302 298 } 303 299 }
+1 -8
arch/x86/kernel/hpet.c
··· 738 738 return (cycle_t)hpet_readl(HPET_COUNTER); 739 739 } 740 740 741 - #ifdef CONFIG_X86_64 742 - static cycle_t __vsyscall_fn vread_hpet(void) 743 - { 744 - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 745 - } 746 - #endif 747 - 748 741 static struct clocksource clocksource_hpet = { 749 742 .name = "hpet", 750 743 .rating = 250, ··· 746 753 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 747 754 .resume = hpet_resume_counter, 748 755 #ifdef CONFIG_X86_64 749 - .archdata = { .vread = vread_hpet }, 756 + .archdata = { .vclock_mode = VCLOCK_HPET }, 750 757 #endif 751 758 }; 752 759
+1 -1
arch/x86/kernel/tsc.c
··· 777 777 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 778 778 CLOCK_SOURCE_MUST_VERIFY, 779 779 #ifdef CONFIG_X86_64 780 - .archdata = { .vread = vread_tsc }, 780 + .archdata = { .vclock_mode = VCLOCK_TSC }, 781 781 #endif 782 782 }; 783 783
-3
arch/x86/kernel/vmlinux.lds.S
··· 169 169 .vsyscall : AT(VLOAD(.vsyscall)) { 170 170 *(.vsyscall_0) 171 171 172 - . = ALIGN(L1_CACHE_BYTES); 173 - *(.vsyscall_fn) 174 - 175 172 . = 1024; 176 173 *(.vsyscall_1) 177 174
-36
arch/x86/kernel/vread_tsc_64.c
··· 1 - /* This code runs in userspace. */ 2 - 3 - #define DISABLE_BRANCH_PROFILING 4 - #include <asm/vgtod.h> 5 - 6 - notrace cycle_t __vsyscall_fn vread_tsc(void) 7 - { 8 - cycle_t ret; 9 - u64 last; 10 - 11 - /* 12 - * Empirically, a fence (of type that depends on the CPU) 13 - * before rdtsc is enough to ensure that rdtsc is ordered 14 - * with respect to loads. The various CPU manuals are unclear 15 - * as to whether rdtsc can be reordered with later loads, 16 - * but no one has ever seen it happen. 17 - */ 18 - rdtsc_barrier(); 19 - ret = (cycle_t)vget_cycles(); 20 - 21 - last = VVAR(vsyscall_gtod_data).clock.cycle_last; 22 - 23 - if (likely(ret >= last)) 24 - return ret; 25 - 26 - /* 27 - * GCC likes to generate cmov here, but this branch is extremely 28 - * predictable (it's just a funciton of time and the likely is 29 - * very likely) and there's a data dependence, so force GCC 30 - * to generate a branch instead. I don't barrier() because 31 - * we don't actually need a barrier, and if this function 32 - * ever gets inlined it will generate worse code. 33 - */ 34 - asm volatile (""); 35 - return last; 36 - }
+1 -1
arch/x86/kernel/vsyscall_64.c
··· 74 74 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 75 75 76 76 /* copy vsyscall data */ 77 - vsyscall_gtod_data.clock.vread = clock->archdata.vread; 77 + vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; 78 78 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; 79 79 vsyscall_gtod_data.clock.mask = clock->mask; 80 80 vsyscall_gtod_data.clock.mult = mult;
+47 -6
arch/x86/vdso/vclock_gettime.c
··· 17 17 #include <linux/time.h> 18 18 #include <linux/string.h> 19 19 #include <asm/vsyscall.h> 20 + #include <asm/fixmap.h> 20 21 #include <asm/vgtod.h> 21 22 #include <asm/timex.h> 22 23 #include <asm/hpet.h> ··· 25 24 #include <asm/io.h> 26 25 27 26 #define gtod (&VVAR(vsyscall_gtod_data)) 27 + 28 + notrace static cycle_t vread_tsc(void) 29 + { 30 + cycle_t ret; 31 + u64 last; 32 + 33 + /* 34 + * Empirically, a fence (of type that depends on the CPU) 35 + * before rdtsc is enough to ensure that rdtsc is ordered 36 + * with respect to loads. The various CPU manuals are unclear 37 + * as to whether rdtsc can be reordered with later loads, 38 + * but no one has ever seen it happen. 39 + */ 40 + rdtsc_barrier(); 41 + ret = (cycle_t)vget_cycles(); 42 + 43 + last = VVAR(vsyscall_gtod_data).clock.cycle_last; 44 + 45 + if (likely(ret >= last)) 46 + return ret; 47 + 48 + /* 49 + * GCC likes to generate cmov here, but this branch is extremely 50 + * predictable (it's just a funciton of time and the likely is 51 + * very likely) and there's a data dependence, so force GCC 52 + * to generate a branch instead. I don't barrier() because 53 + * we don't actually need a barrier, and if this function 54 + * ever gets inlined it will generate worse code. 55 + */ 56 + asm volatile (""); 57 + return last; 58 + } 59 + 60 + static notrace cycle_t vread_hpet(void) 61 + { 62 + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 63 + } 28 64 29 65 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 30 66 { ··· 74 36 notrace static inline long vgetns(void) 75 37 { 76 38 long v; 77 - cycles_t (*vread)(void); 78 - vread = gtod->clock.vread; 79 - v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; 39 + cycles_t cycles; 40 + if (gtod->clock.vclock_mode == VCLOCK_TSC) 41 + cycles = vread_tsc(); 42 + else 43 + cycles = vread_hpet(); 44 + v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 80 45 return (v * gtod->clock.mult) >> gtod->clock.shift; 81 46 } 82 47 ··· 159 118 { 160 119 switch (clock) { 161 120 case CLOCK_REALTIME: 162 - if (likely(gtod->clock.vread)) 121 + if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 163 122 return do_realtime(ts); 164 123 break; 165 124 case CLOCK_MONOTONIC: 166 - if (likely(gtod->clock.vread)) 125 + if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) 167 126 return do_monotonic(ts); 168 127 break; 169 128 case CLOCK_REALTIME_COARSE: ··· 180 139 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 181 140 { 182 141 long ret; 183 - if (likely(gtod->clock.vread)) { 142 + if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { 184 143 if (likely(tv != NULL)) { 185 144 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 186 145 offsetof(struct timespec, tv_nsec) ||