Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86, vdso: Add 32 bit VDSO time support for 64 bit kernel

This patch add the VDSO time support for the IA32 Emulation Layer.

Due the nature of the kernel headers and the LP64 compiler where the
size of a long and a pointer differs against a 32 bit compiler, there
is some type hacking necessary for optimal performance.

The vsyscall_gtod_data struture must be a rearranged to serve 32- and
64-bit code access at the same time:

- The seqcount_t was replaced by an unsigned, this makes the
vsyscall_gtod_data intedepend of kernel configuration and internal functions.
- All kernel internal structures are replaced by fix size elements
which works for 32- and 64-bit access
- The inner struct clock was removed to pack the whole struct.

The "unsigned seq" would be handled by functions derivated from seqcount_t.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Link: http://lkml.kernel.org/r/1395094933-14252-11-git-send-email-stefani@seibold.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

authored by

Stefani Seibold and committed by
H. Peter Anvin
7c03156f 7a59ed41

+155 -67
+57 -14
arch/x86/include/asm/vgtod.h
··· 1 1 #ifndef _ASM_X86_VGTOD_H 2 2 #define _ASM_X86_VGTOD_H 3 3 4 - #include <asm/vsyscall.h> 4 + #include <linux/compiler.h> 5 5 #include <linux/clocksource.h> 6 6 7 + #ifdef BUILD_VDSO32_64 8 + typedef u64 gtod_long_t; 9 + #else 10 + typedef unsigned long gtod_long_t; 11 + #endif 12 + /* 13 + * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time 14 + * so be carefull by modifying this structure. 15 + */ 7 16 struct vsyscall_gtod_data { 8 - seqcount_t seq; 17 + unsigned seq; 9 18 10 - struct { /* extract of a clocksource struct */ 11 - int vclock_mode; 12 - cycle_t cycle_last; 13 - cycle_t mask; 14 - u32 mult; 15 - u32 shift; 16 - } clock; 19 + int vclock_mode; 20 + cycle_t cycle_last; 21 + cycle_t mask; 22 + u32 mult; 23 + u32 shift; 17 24 18 25 /* open coded 'struct timespec' */ 19 - time_t wall_time_sec; 20 26 u64 wall_time_snsec; 27 + gtod_long_t wall_time_sec; 28 + gtod_long_t monotonic_time_sec; 21 29 u64 monotonic_time_snsec; 22 - time_t monotonic_time_sec; 30 + gtod_long_t wall_time_coarse_sec; 31 + gtod_long_t wall_time_coarse_nsec; 32 + gtod_long_t monotonic_time_coarse_sec; 33 + gtod_long_t monotonic_time_coarse_nsec; 23 34 24 - struct timezone sys_tz; 25 - struct timespec wall_time_coarse; 26 - struct timespec monotonic_time_coarse; 35 + int tz_minuteswest; 36 + int tz_dsttime; 27 37 }; 28 38 extern struct vsyscall_gtod_data vsyscall_gtod_data; 39 + 40 + static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) 41 + { 42 + unsigned ret; 43 + 44 + repeat: 45 + ret = ACCESS_ONCE(s->seq); 46 + if (unlikely(ret & 1)) { 47 + cpu_relax(); 48 + goto repeat; 49 + } 50 + smp_rmb(); 51 + return ret; 52 + } 53 + 54 + static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, 55 + unsigned start) 56 + { 57 + smp_rmb(); 58 + return unlikely(s->seq != start); 59 + } 60 + 61 + static inline void gtod_write_begin(struct vsyscall_gtod_data *s) 62 + { 63 + ++s->seq; 64 + smp_wmb(); 65 + } 66 + 67 + static inline void gtod_write_end(struct vsyscall_gtod_data *s) 68 + { 69 + smp_wmb(); 70 + ++s->seq; 71 + } 29 72 30 73 #endif /* _ASM_X86_VGTOD_H */
+5
arch/x86/include/asm/vvar.h
··· 16 16 * you mess up, the linker will catch it.) 17 17 */ 18 18 19 + #ifndef _ASM_X86_VVAR_H 20 + #define _ASM_X86_VVAR_H 21 + 19 22 #if defined(__VVAR_KERNEL_LDS) 20 23 21 24 /* The kernel linker script defines its own magic to put vvars in the ··· 67 64 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 68 65 69 66 #undef DECLARE_VVAR 67 + 68 + #endif
+22 -12
arch/x86/kernel/vsyscall_gtod.c
··· 4 4 * 5 5 * Modified for x86 32 bit architecture by 6 6 * Stefani Seibold <stefani@seibold.net> 7 + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany 7 8 * 8 9 * Thanks to hpa@transmeta.com for some useful hint. 9 10 * Special thanks to Ingo Molnar for his early experience with ··· 14 13 15 14 #include <linux/timekeeper_internal.h> 16 15 #include <asm/vgtod.h> 16 + #include <asm/vvar.h> 17 17 18 18 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); 19 19 20 20 void update_vsyscall_tz(void) 21 21 { 22 - vsyscall_gtod_data.sys_tz = sys_tz; 22 + vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; 23 + vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; 23 24 } 24 25 25 26 void update_vsyscall(struct timekeeper *tk) 26 27 { 27 28 struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; 28 29 29 - write_seqcount_begin(&vdata->seq); 30 + gtod_write_begin(vdata); 30 31 31 32 /* copy vsyscall data */ 32 - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; 33 - vdata->clock.cycle_last = tk->clock->cycle_last; 34 - vdata->clock.mask = tk->clock->mask; 35 - vdata->clock.mult = tk->mult; 36 - vdata->clock.shift = tk->shift; 33 + vdata->vclock_mode = tk->clock->archdata.vclock_mode; 34 + vdata->cycle_last = tk->clock->cycle_last; 35 + vdata->mask = tk->clock->mask; 36 + vdata->mult = tk->mult; 37 + vdata->shift = tk->shift; 37 38 38 39 vdata->wall_time_sec = tk->xtime_sec; 39 40 vdata->wall_time_snsec = tk->xtime_nsec; ··· 52 49 vdata->monotonic_time_sec++; 53 50 } 54 51 55 - vdata->wall_time_coarse.tv_sec = tk->xtime_sec; 56 - vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); 52 + vdata->wall_time_coarse_sec = tk->xtime_sec; 53 + vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); 57 54 58 - vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, 59 - tk->wall_to_monotonic); 55 + vdata->monotonic_time_coarse_sec = 56 + vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; 57 + vdata->monotonic_time_coarse_nsec = 58 + vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; 60 59 61 - write_seqcount_end(&vdata->seq); 60 + while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { 61 + vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; 62 + vdata->monotonic_time_coarse_sec++; 63 + } 64 + 65 + gtod_write_end(vdata); 62 66 }
+50 -41
arch/x86/vdso/vclock_gettime.c
··· 14 14 /* Disable profiling for userspace code: */ 15 15 #define DISABLE_BRANCH_PROFILING 16 16 17 - #include <linux/kernel.h> 18 17 #include <uapi/linux/time.h> 19 - #include <linux/string.h> 20 - #include <asm/vsyscall.h> 21 - #include <asm/fixmap.h> 22 18 #include <asm/vgtod.h> 23 19 #include <asm/hpet.h> 20 + #include <asm/vvar.h> 24 21 #include <asm/unistd.h> 25 - #include <asm/io.h> 26 - #include <asm/pvclock.h> 22 + #include <asm/msr.h> 23 + #include <linux/math64.h> 24 + #include <linux/time.h> 27 25 28 26 #define gtod (&VVAR(vsyscall_gtod_data)) 29 27 ··· 29 31 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); 30 32 extern time_t __vdso_time(time_t *t); 31 33 34 + #ifdef CONFIG_HPET_TIMER 35 + static inline u32 read_hpet_counter(const volatile void *addr) 36 + { 37 + return *(const volatile u32 *) (addr + HPET_COUNTER); 38 + } 39 + #endif 40 + 32 41 #ifndef BUILD_VDSO32 42 + 43 + #include <linux/kernel.h> 44 + #include <asm/vsyscall.h> 45 + #include <asm/fixmap.h> 46 + #include <asm/pvclock.h> 33 47 34 48 static notrace cycle_t vread_hpet(void) 35 49 { 36 - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); 50 + return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); 37 51 } 38 52 39 53 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) ··· 126 116 *mode = VCLOCK_NONE; 127 117 128 118 /* refer to tsc.c read_tsc() comment for rationale */ 129 - last = gtod->clock.cycle_last; 119 + last = gtod->cycle_last; 130 120 131 121 if (likely(ret >= last)) 132 122 return ret; ··· 143 133 #ifdef CONFIG_HPET_TIMER 144 134 static notrace cycle_t vread_hpet(void) 145 135 { 146 - return readl((const void __iomem *)(&hpet_page + HPET_COUNTER)); 136 + return read_hpet_counter((const void *)(&hpet_page)); 147 137 } 148 138 #endif 149 139 ··· 203 193 rdtsc_barrier(); 204 194 ret = (cycle_t)__native_read_tsc(); 205 195 206 - last = gtod->clock.cycle_last; 196 + last = gtod->cycle_last; 207 197 208 198 if (likely(ret >= last)) 209 199 return ret; ··· 224 214 { 225 215 u64 v; 226 216 cycles_t cycles; 227 - if (gtod->clock.vclock_mode == VCLOCK_TSC) 217 + 218 + if (gtod->vclock_mode == VCLOCK_TSC) 228 219 cycles = vread_tsc(); 229 220 #ifdef CONFIG_HPET_TIMER 230 - else if (gtod->clock.vclock_mode == VCLOCK_HPET) 221 + else if (gtod->vclock_mode == VCLOCK_HPET) 231 222 cycles = vread_hpet(); 232 223 #endif 233 224 #ifdef CONFIG_PARAVIRT_CLOCK 234 - else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) 225 + else if (gtod->vclock_mode == VCLOCK_PVCLOCK) 235 226 cycles = vread_pvclock(mode); 236 227 #endif 237 228 else 238 229 return 0; 239 - v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 240 - return v * gtod->clock.mult; 230 + v = (cycles - gtod->cycle_last) & gtod->mask; 231 + return v * gtod->mult; 241 232 } 242 233 243 234 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ ··· 248 237 u64 ns; 249 238 int mode; 250 239 251 - ts->tv_nsec = 0; 252 240 do { 253 - seq = raw_read_seqcount_begin(&gtod->seq); 254 - mode = gtod->clock.vclock_mode; 241 + seq = gtod_read_begin(gtod); 242 + mode = gtod->vclock_mode; 255 243 ts->tv_sec = gtod->wall_time_sec; 256 244 ns = gtod->wall_time_snsec; 257 245 ns += vgetsns(&mode); 258 - ns >>= gtod->clock.shift; 259 - } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 246 + ns >>= gtod->shift; 247 + } while (unlikely(gtod_read_retry(gtod, seq))); 260 248 261 - timespec_add_ns(ts, ns); 249 + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 250 + ts->tv_nsec = ns; 251 + 262 252 return mode; 263 253 } 264 254 ··· 269 257 u64 ns; 270 258 int mode; 271 259 272 - ts->tv_nsec = 0; 273 260 do { 274 - seq = raw_read_seqcount_begin(&gtod->seq); 275 - mode = gtod->clock.vclock_mode; 261 + seq = gtod_read_begin(gtod); 262 + mode = gtod->vclock_mode; 276 263 ts->tv_sec = gtod->monotonic_time_sec; 277 264 ns = gtod->monotonic_time_snsec; 278 265 ns += vgetsns(&mode); 279 - ns >>= gtod->clock.shift; 280 - } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 281 - timespec_add_ns(ts, ns); 266 + ns >>= gtod->shift; 267 + } while (unlikely(gtod_read_retry(gtod, seq))); 268 + 269 + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); 270 + ts->tv_nsec = ns; 282 271 283 272 return mode; 284 273 } ··· 288 275 { 289 276 unsigned long seq; 290 277 do { 291 - seq = raw_read_seqcount_begin(&gtod->seq); 292 - ts->tv_sec = gtod->wall_time_coarse.tv_sec; 293 - ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; 294 - } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 278 + seq = gtod_read_begin(gtod); 279 + ts->tv_sec = gtod->wall_time_coarse_sec; 280 + ts->tv_nsec = gtod->wall_time_coarse_nsec; 281 + } while (unlikely(gtod_read_retry(gtod, seq))); 295 282 } 296 283 297 284 notrace static void do_monotonic_coarse(struct timespec *ts) 298 285 { 299 286 unsigned long seq; 300 287 do { 301 - seq = raw_read_seqcount_begin(&gtod->seq); 302 - ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; 303 - ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; 304 - } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 288 + seq = gtod_read_begin(gtod); 289 + ts->tv_sec = gtod->monotonic_time_coarse_sec; 290 + ts->tv_nsec = gtod->monotonic_time_coarse_nsec; 291 + } while (unlikely(gtod_read_retry(gtod, seq))); 305 292 } 306 293 307 294 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) ··· 335 322 notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 336 323 { 337 324 if (likely(tv != NULL)) { 338 - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 339 - offsetof(struct timespec, tv_nsec) || 340 - sizeof(*tv) != sizeof(struct timespec)); 341 325 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) 342 326 return vdso_fallback_gtod(tv, tz); 343 327 tv->tv_usec /= 1000; 344 328 } 345 329 if (unlikely(tz != NULL)) { 346 - /* Avoid memcpy. Some old compilers fail to inline it */ 347 - tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; 348 - tz->tz_dsttime = gtod->sys_tz.tz_dsttime; 330 + tz->tz_minuteswest = gtod->tz_minuteswest; 331 + tz->tz_dsttime = gtod->tz_dsttime; 349 332 } 350 333 351 334 return 0;
+21
arch/x86/vdso/vdso32/vclock_gettime.c
··· 6 6 7 7 #undef CONFIG_X86_PPRO_FENCE 8 8 9 + #ifdef CONFIG_X86_64 10 + 11 + /* 12 + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel 13 + * configuration 14 + */ 15 + #undef CONFIG_64BIT 16 + #undef CONFIG_X86_64 17 + #undef CONFIG_ILLEGAL_POINTER_VALUE 18 + #undef CONFIG_SPARSEMEM_VMEMMAP 19 + #undef CONFIG_NR_CPUS 20 + 21 + #define CONFIG_X86_32 1 22 + #define CONFIG_PAGE_OFFSET 0 23 + #define CONFIG_ILLEGAL_POINTER_VALUE 0 24 + #define CONFIG_NR_CPUS 1 25 + 26 + #define BUILD_VDSO32_64 27 + 28 + #endif 29 + 9 30 #include "../vclock_gettime.c"