Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] time: x86_64: re-enable vsyscall support for x86_64

Cleanup and re-enable vsyscall gettimeofday using the generic clocksource
infrastructure.

[akpm@osdl.org: cleanup]
Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andi Kleen <ak@muc.de>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

john stultz and committed by
Linus Torvalds
7460ed28 1489939f

+106 -100
+4
arch/x86_64/Kconfig
··· 28 28 bool 29 29 default y 30 30 31 + config GENERIC_TIME_VSYSCALL 32 + bool 33 + default y 34 + 31 35 config ZONE_DMA32 32 36 bool 33 37 default y
+6
arch/x86_64/kernel/hpet.c
··· 458 458 return (cycle_t)readl(hpet_ptr); 459 459 } 460 460 461 + static cycle_t __vsyscall_fn vread_hpet(void) 462 + { 463 + return readl((void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 464 + } 465 + 461 466 struct clocksource clocksource_hpet = { 462 467 .name = "hpet", 463 468 .rating = 250, ··· 471 466 .mult = 0, /* set below */ 472 467 .shift = HPET_SHIFT, 473 468 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 469 + .vread = vread_hpet, 474 470 }; 475 471 476 472 static int __init init_hpet_clocksource(void)
-6
arch/x86_64/kernel/time.c
··· 53 53 EXPORT_SYMBOL(rtc_lock); 54 54 DEFINE_SPINLOCK(i8253_lock); 55 55 56 - unsigned long vxtime_hz = PIT_TICK_RATE; 57 - 58 - struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ 59 - 60 56 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 61 - struct timespec __xtime __section_xtime; 62 - struct timezone __sys_tz __section_sys_tz; 63 57 64 58 unsigned long profile_pc(struct pt_regs *regs) 65 59 {
+7
arch/x86_64/kernel/tsc.c
··· 180 180 return ret; 181 181 } 182 182 183 + static cycle_t __vsyscall_fn vread_tsc(void) 184 + { 185 + cycle_t ret = (cycle_t)get_cycles_sync(); 186 + return ret; 187 + } 188 + 183 189 static struct clocksource clocksource_tsc = { 184 190 .name = "tsc", 185 191 .rating = 300, ··· 194 188 .shift = 22, 195 189 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 196 190 CLOCK_SOURCE_MUST_VERIFY, 191 + .vread = vread_tsc, 197 192 }; 198 193 199 194 void mark_tsc_unstable(void)
+11 -17
arch/x86_64/kernel/vmlinux.lds.S
··· 88 88 __vsyscall_0 = VSYSCALL_VIRT_ADDR; 89 89 90 90 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 91 - .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) } 92 - xtime_lock = VVIRT(.xtime_lock); 93 - 94 - .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } 95 - vxtime = VVIRT(.vxtime); 91 + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } 92 + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 93 + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) 94 + { *(.vsyscall_gtod_data) } 95 + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); 96 96 97 97 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } 98 98 vgetcpu_mode = VVIRT(.vgetcpu_mode); 99 - 100 - .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) } 101 - sys_tz = VVIRT(.sys_tz); 102 - 103 - .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) } 104 - sysctl_vsyscall = VVIRT(.sysctl_vsyscall); 105 - 106 - .xtime : AT(VLOAD(.xtime)) { *(.xtime) } 107 - xtime = VVIRT(.xtime); 108 99 109 100 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 110 101 .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } 111 102 jiffies = VVIRT(.jiffies); 112 103 113 - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } 114 - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } 115 - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } 104 + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) 105 + { *(.vsyscall_1) } 106 + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) 107 + { *(.vsyscall_2) } 108 + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) 109 + { *(.vsyscall_3) } 116 110 117 111 . = VSYSCALL_VIRT_ADDR + 4096; 118 112
+73 -50
arch/x86_64/kernel/vsyscall.c
··· 26 26 #include <linux/seqlock.h> 27 27 #include <linux/jiffies.h> 28 28 #include <linux/sysctl.h> 29 + #include <linux/clocksource.h> 29 30 #include <linux/getcpu.h> 30 31 #include <linux/cpu.h> 31 32 #include <linux/smp.h> ··· 35 34 #include <asm/vsyscall.h> 36 35 #include <asm/pgtable.h> 37 36 #include <asm/page.h> 37 + #include <asm/unistd.h> 38 38 #include <asm/fixmap.h> 39 39 #include <asm/errno.h> 40 40 #include <asm/io.h> ··· 46 44 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 47 45 #define __syscall_clobber "r11","rcx","memory" 48 46 49 - int __sysctl_vsyscall __section_sysctl_vsyscall = 1; 50 - seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; 47 + struct vsyscall_gtod_data_t { 48 + seqlock_t lock; 49 + int sysctl_enabled; 50 + struct timeval wall_time_tv; 51 + struct timezone sys_tz; 52 + cycle_t offset_base; 53 + struct clocksource clock; 54 + }; 51 55 int __vgetcpu_mode __section_vgetcpu_mode; 52 56 53 - #include <asm/unistd.h> 54 - 55 - static __always_inline void timeval_normalize(struct timeval * tv) 57 + struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = 56 58 { 57 - time_t __sec; 59 + .lock = SEQLOCK_UNLOCKED, 60 + .sysctl_enabled = 1, 61 + }; 58 62 59 - __sec = tv->tv_usec / 1000000; 60 - if (__sec) { 61 - tv->tv_usec %= 1000000; 62 - tv->tv_sec += __sec; 63 - } 63 + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) 64 + { 65 + unsigned long flags; 66 + 67 + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 68 + /* copy vsyscall data */ 69 + vsyscall_gtod_data.clock = *clock; 70 + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; 71 + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; 72 + vsyscall_gtod_data.sys_tz = sys_tz; 73 + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 64 74 } 65 75 66 - static __always_inline void do_vgettimeofday(struct timeval * tv) 67 - { 68 - long sequence, t; 69 - unsigned long sec, usec; 70 - 71 - do { 72 - sequence = read_seqbegin(&__xtime_lock); 73 - 74 - sec = __xtime.tv_sec; 75 - usec = __xtime.tv_nsec / 1000; 76 - 77 - if (__vxtime.mode != VXTIME_HPET) { 78 - t = get_cycles_sync(); 79 - if (t < __vxtime.last_tsc) 80 - t = __vxtime.last_tsc; 81 - usec += ((t - __vxtime.last_tsc) * 82 - __vxtime.tsc_quot) >> 32; 83 - /* See comment in x86_64 do_gettimeofday. */ 84 - } else { 85 - usec += ((readl((void __iomem *) 86 - fix_to_virt(VSYSCALL_HPET) + 0xf0) - 87 - __vxtime.last) * __vxtime.quot) >> 32; 88 - } 89 - } while (read_seqretry(&__xtime_lock, sequence)); 90 - 91 - tv->tv_sec = sec + usec / 1000000; 92 - tv->tv_usec = usec % 1000000; 93 - } 94 - 95 - /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ 76 + /* RED-PEN may want to readd seq locking, but then the variable should be 77 + * write-once. 78 + */ 96 79 static __always_inline void do_get_tz(struct timezone * tz) 97 80 { 98 - *tz = __sys_tz; 81 + *tz = __vsyscall_gtod_data.sys_tz; 99 82 } 100 83 101 84 static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) ··· 88 101 int ret; 89 102 asm volatile("vsysc2: syscall" 90 103 : "=a" (ret) 91 - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber ); 104 + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) 105 + : __syscall_clobber ); 92 106 return ret; 93 107 } 94 108 ··· 102 114 return secs; 103 115 } 104 116 117 + static __always_inline void do_vgettimeofday(struct timeval * tv) 118 + { 119 + cycle_t now, base, mask, cycle_delta; 120 + unsigned long seq, mult, shift, nsec_delta; 121 + cycle_t (*vread)(void); 122 + do { 123 + seq = read_seqbegin(&__vsyscall_gtod_data.lock); 124 + 125 + vread = __vsyscall_gtod_data.clock.vread; 126 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { 127 + gettimeofday(tv,0); 128 + return; 129 + } 130 + now = vread(); 131 + base = __vsyscall_gtod_data.clock.cycle_last; 132 + mask = __vsyscall_gtod_data.clock.mask; 133 + mult = __vsyscall_gtod_data.clock.mult; 134 + shift = __vsyscall_gtod_data.clock.shift; 135 + 136 + *tv = __vsyscall_gtod_data.wall_time_tv; 137 + 138 + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 139 + 140 + /* calculate interval: */ 141 + cycle_delta = (now - base) & mask; 142 + /* convert to nsecs: */ 143 + nsec_delta = (cycle_delta * mult) >> shift; 144 + 145 + /* convert to usecs and add to timespec: */ 146 + tv->tv_usec += nsec_delta / NSEC_PER_USEC; 147 + while (tv->tv_usec > USEC_PER_SEC) { 148 + tv->tv_sec += 1; 149 + tv->tv_usec -= USEC_PER_SEC; 150 + } 151 + } 152 + 105 153 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 106 154 { 107 - if (!__sysctl_vsyscall) 108 - return gettimeofday(tv,tz); 109 155 if (tv) 110 156 do_vgettimeofday(tv); 111 157 if (tz) ··· 151 129 * unlikely */ 152 130 time_t __vsyscall(1) vtime(time_t *t) 153 131 { 154 - if (!__sysctl_vsyscall) 132 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 155 133 return time_syscall(t); 156 134 else if (t) 157 - *t = __xtime.tv_sec; 158 - return __xtime.tv_sec; 135 + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; 136 + return __vsyscall_gtod_data.wall_time_tv.tv_sec; 159 137 } 160 138 161 139 /* Fast way to get current CPU and node. ··· 232 210 ret = -ENOMEM; 233 211 goto out; 234 212 } 235 - if (!sysctl_vsyscall) { 213 + if (!vsyscall_gtod_data.sysctl_enabled) { 236 214 writew(SYSCALL, map1); 237 215 writew(SYSCALL, map2); 238 216 } else { ··· 254 232 255 233 static ctl_table kernel_table2[] = { 256 234 { .ctl_name = 99, .procname = "vsyscall64", 257 - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644, 235 + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), 236 + .mode = 0644, 258 237 .strategy = vsyscall_sysctl_nostrat, 259 238 .proc_handler = vsyscall_sysctl_change }, 260 239 {}
-2
include/asm-x86_64/proto.h
··· 45 45 #else 46 46 #define pmtmr_ioport 0 47 47 #endif 48 - extern int sysctl_vsyscall; 49 48 extern int nohpet; 50 - extern unsigned long vxtime_hz; 51 49 52 50 extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); 53 51
-1
include/asm-x86_64/timex.h
··· 27 27 #define NS_SCALE 10 /* 2^10, carefully chosen */ 28 28 #define US_SCALE 32 /* 2^32, arbitralrily chosen */ 29 29 30 - extern struct vxtime_data vxtime; 31 30 extern void mark_tsc_unstable(void); 32 31 extern void set_cyc2ns_scale(unsigned long khz); 33 32 #endif
+5 -24
include/asm-x86_64/vsyscall.h
··· 16 16 #ifdef __KERNEL__ 17 17 #include <linux/seqlock.h> 18 18 19 - #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) 20 19 #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) 21 20 #define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) 22 - #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) 23 - #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16))) 24 - #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16))) 25 - #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16))) 26 21 27 - #define VXTIME_TSC 1 28 - #define VXTIME_HPET 2 29 - #define VXTIME_PMTMR 3 22 + /* Definitions for CONFIG_GENERIC_TIME definitions */ 23 + #define __section_vsyscall_gtod_data __attribute__ \ 24 + ((unused, __section__ (".vsyscall_gtod_data"),aligned(16))) 25 + #define __vsyscall_fn __attribute__ ((unused,__section__(".vsyscall_fn"))) 30 26 31 27 #define VGETCPU_RDTSCP 1 32 28 #define VGETCPU_LSL 2 33 29 34 - struct vxtime_data { 35 - long hpet_address; /* HPET base address */ 36 - int last; 37 - unsigned long last_tsc; 38 - long quot; 39 - long tsc_quot; 40 - int mode; 41 - }; 42 - 43 30 #define hpet_readl(a) readl((const void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) 44 31 #define hpet_writel(d,a) writel(d, (void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) 45 32 46 - /* vsyscall space (readonly) */ 47 - extern struct vxtime_data __vxtime; 48 33 extern int __vgetcpu_mode; 49 - extern struct timespec __xtime; 50 34 extern volatile unsigned long __jiffies; 51 - extern struct timezone __sys_tz; 52 - extern seqlock_t __xtime_lock; 53 35 54 36 /* kernel space (writeable) */ 55 - extern struct vxtime_data vxtime; 56 37 extern int vgetcpu_mode; 57 38 extern struct timezone sys_tz; 58 - extern int sysctl_vsyscall; 39 + extern struct vsyscall_gtod_data_t vsyscall_gtod_data; 59 40 60 41 #endif /* __KERNEL__ */ 61 42