Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: VDSO: Add implementations of gettimeofday() and clock_gettime()

Add user-mode implementations of gettimeofday() and clock_gettime() to
the VDSO. This is currently usable with 2 clocksources: the CP0 count
register, which is accessible to user-mode via RDHWR on R2 and later
cores, or the MIPS Global Interrupt Controller (GIC) timer, which
provides a "user-mode visible" section containing a mirror of its
counter registers. This section must be mapped into user memory, which
is done below the VDSO data page.

When a supported clocksource is not in use, the VDSO functions will
return -ENOSYS, which causes libc to fall back on the standard syscall
path.

When support for neither of these clocksources is compiled into the
kernel at all, the VDSO still provides clock_gettime(), as the coarse
realtime/monotonic clocks can still be implemented. However,
gettimeofday() is not provided in this case as nothing can be done
without a suitable clocksource. This causes the symbol lookup to fail
in libc and it will then always use the standard syscall path.

This patch includes a workaround for a bug in QEMU which results in
RDHWR on the CP0 count register always returning a constant (incorrect)
value. A fix for this has been submitted, and the workaround can be
removed after the fix has been in stable releases for a reasonable
amount of time.

A simple performance test which calls gettimeofday() 1000 times in a
loop and calculates the average execution time gives the following
results on a Malta + I6400 (running at 20MHz):

- Syscall: ~31000 ns
- VDSO (GIC): ~15000 ns
- VDSO (CP0): ~9500 ns

[markos.chandras@imgtec.com:
- Minor code re-arrangements in order for mappings to be made
in the order they appear to the process' address space.
- Move do_{monotonic, realtime} outside of the MIPS_CLOCK_VSYSCALL ifdef
- Use gic_get_usm_range so we can do the GIC mapping in the
arch/mips/kernel/vdso instead of the GIC irqchip driver]

Signed-off-by: Alex Smith <alex.smith@imgtec.com>
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11338/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

authored by

Alex Smith and committed by
Ralf Baechle
a7f4df4e c0a9f72c

+460 -10
+5
arch/mips/Kconfig
··· 61 61 select SYSCTL_EXCEPTION_TRACE 62 62 select HAVE_VIRT_CPU_ACCOUNTING_GEN 63 63 select HAVE_IRQ_TIME_ACCOUNTING 64 + select GENERIC_TIME_VSYSCALL 65 + select ARCH_CLOCKSOURCE_DATA 64 66 65 67 menu "Machine selection" 66 68 ··· 1041 1039 1042 1040 config CSRC_SB1250 1043 1041 bool 1042 + 1043 + config MIPS_CLOCK_VSYSCALL 1044 + def_bool CSRC_R4K || CLKSRC_MIPS_GIC 1044 1045 1045 1046 config GPIO_TXX9 1046 1047 select ARCH_REQUIRE_GPIOLIB
+29
arch/mips/include/asm/clocksource.h
··· 1 + /* 2 + * Copyright (C) 2015 Imagination Technologies 3 + * Author: Alex Smith <alex.smith@imgtec.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License as published by the 7 + * Free Software Foundation; either version 2 of the License, or (at your 8 + * option) any later version. 9 + */ 10 + 11 + #ifndef __ASM_CLOCKSOURCE_H 12 + #define __ASM_CLOCKSOURCE_H 13 + 14 + #include <linux/types.h> 15 + 16 + /* VDSO clocksources. */ 17 + #define VDSO_CLOCK_NONE 0 /* No suitable clocksource. */ 18 + #define VDSO_CLOCK_R4K 1 /* Use the coprocessor 0 count. */ 19 + #define VDSO_CLOCK_GIC 2 /* Use the GIC. */ 20 + 21 + /** 22 + * struct arch_clocksource_data - Architecture-specific clocksource information. 23 + * @vdso_clock_mode: Method the VDSO should use to access the clocksource. 24 + */ 25 + struct arch_clocksource_data { 26 + u8 vdso_clock_mode; 27 + }; 28 + 29 + #endif /* __ASM_CLOCKSOURCE_H */
+67 -1
arch/mips/include/asm/vdso.h
··· 13 13 14 14 #include <linux/mm_types.h> 15 15 16 + #include <asm/barrier.h> 17 + 16 18 /** 17 19 * struct mips_vdso_image - Details of a VDSO image. 18 20 * @data: Pointer to VDSO image data (page-aligned). ··· 55 53 56 54 /** 57 55 * union mips_vdso_data - Data provided by the kernel for the VDSO. 56 + * @xtime_sec: Current real time (seconds part). 57 + * @xtime_nsec: Current real time (nanoseconds part, shifted). 58 + * @wall_to_mono_sec: Wall-to-monotonic offset (seconds part). 59 + * @wall_to_mono_nsec: Wall-to-monotonic offset (nanoseconds part). 60 + * @seq_count: Counter to synchronise updates (odd = updating). 61 + * @cs_shift: Clocksource shift value. 62 + * @clock_mode: Clocksource to use for time functions. 63 + * @cs_mult: Clocksource multiplier value. 64 + * @cs_cycle_last: Clock cycle value at last update. 65 + * @cs_mask: Clocksource mask value. 66 + * @tz_minuteswest: Minutes west of Greenwich (from timezone). 67 + * @tz_dsttime: Type of DST correction (from timezone). 58 68 * 59 69 * This structure contains data needed by functions within the VDSO. It is 60 - * populated by the kernel and mapped read-only into user memory. 70 + * populated by the kernel and mapped read-only into user memory. The time 71 + * fields are mirrors of internal data from the timekeeping infrastructure. 61 72 * 62 73 * Note: Care should be taken when modifying as the layout must remain the same 63 74 * for both 64- and 32-bit (for 32-bit userland on 64-bit kernel). 64 75 */ 65 76 union mips_vdso_data { 66 77 struct { 78 + u64 xtime_sec; 79 + u64 xtime_nsec; 80 + u32 wall_to_mono_sec; 81 + u32 wall_to_mono_nsec; 82 + u32 seq_count; 83 + u32 cs_shift; 84 + u8 clock_mode; 85 + u32 cs_mult; 86 + u64 cs_cycle_last; 87 + u64 cs_mask; 88 + s32 tz_minuteswest; 89 + s32 tz_dsttime; 67 90 }; 68 91 69 92 u8 page[PAGE_SIZE]; 70 93 }; 94 + 95 + static inline u32 vdso_data_read_begin(const union mips_vdso_data *data) 96 + { 97 + u32 seq; 98 + 99 + while (true) { 100 + seq = ACCESS_ONCE(data->seq_count); 101 + if (likely(!(seq & 1))) { 102 + /* Paired with smp_wmb() in vdso_data_write_*(). */ 103 + smp_rmb(); 104 + return seq; 105 + } 106 + 107 + cpu_relax(); 108 + } 109 + } 110 + 111 + static inline bool vdso_data_read_retry(const union mips_vdso_data *data, 112 + u32 start_seq) 113 + { 114 + /* Paired with smp_wmb() in vdso_data_write_*(). */ 115 + smp_rmb(); 116 + return unlikely(data->seq_count != start_seq); 117 + } 118 + 119 + static inline void vdso_data_write_begin(union mips_vdso_data *data) 120 + { 121 + ++data->seq_count; 122 + 123 + /* Ensure sequence update is written before other data page values. */ 124 + smp_wmb(); 125 + } 126 + 127 + static inline void vdso_data_write_end(union mips_vdso_data *data) 128 + { 129 + /* Ensure data values are written before updating sequence again. */ 130 + smp_wmb(); 131 + ++data->seq_count; 132 + } 71 133 72 134 #endif /* __ASM_VDSO_H */
+44
arch/mips/kernel/csrc-r4k.c
··· 28 28 return read_c0_count(); 29 29 } 30 30 31 + static inline unsigned int rdhwr_count(void) 32 + { 33 + unsigned int count; 34 + 35 + __asm__ __volatile__( 36 + " .set push\n" 37 + " .set mips32r2\n" 38 + " rdhwr %0, $2\n" 39 + " .set pop\n" 40 + : "=r" (count)); 41 + 42 + return count; 43 + } 44 + 45 + static bool rdhwr_count_usable(void) 46 + { 47 + unsigned int prev, curr, i; 48 + 49 + /* 50 + * Older QEMUs have a broken implementation of RDHWR for the CP0 count 51 + * which always returns a constant value. Try to identify this and don't 52 + * use it in the VDSO if it is broken. This workaround can be removed 53 + * once the fix has been in QEMU stable for a reasonable amount of time. 54 + */ 55 + for (i = 0, prev = rdhwr_count(); i < 100; i++) { 56 + curr = rdhwr_count(); 57 + 58 + if (curr != prev) 59 + return true; 60 + 61 + prev = curr; 62 + } 63 + 64 + pr_warn("Not using R4K clocksource in VDSO due to broken RDHWR\n"); 65 + return false; 66 + } 67 + 31 68 int __init init_r4k_clocksource(void) 32 69 { 33 70 if (!cpu_has_counter || !mips_hpt_frequency) ··· 72 35 73 36 /* Calculate a somewhat reasonable rating value */ 74 37 clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000; 38 + 39 + /* 40 + * R2 onwards makes the count accessible to user mode so it can be used 41 + * by the VDSO (HWREna is configured by configure_hwrena()). 42 + */ 43 + if (cpu_has_mips_r2_r6 && rdhwr_count_usable()) 44 + clocksource_mips.archdata.vdso_clock_mode = VDSO_CLOCK_R4K; 75 45 76 46 clocksource_register_hz(&clocksource_mips, mips_hpt_frequency); 77 47
+65 -6
arch/mips/kernel/vdso.c
··· 12 12 #include <linux/elf.h> 13 13 #include <linux/err.h> 14 14 #include <linux/init.h> 15 + #include <linux/ioport.h> 16 + #include <linux/irqchip/mips-gic.h> 15 17 #include <linux/mm.h> 16 18 #include <linux/sched.h> 17 19 #include <linux/slab.h> 20 + #include <linux/timekeeper_internal.h> 18 21 19 22 #include <asm/abi.h> 20 23 #include <asm/vdso.h> ··· 26 23 static union mips_vdso_data vdso_data __page_aligned_data; 27 24 28 25 /* 29 - * Mapping for the VDSO data pages. The real pages are mapped manually, as 26 + * Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as 30 27 * what we map and where within the area they are mapped is determined at 31 28 * runtime. 32 29 */ ··· 67 64 } 68 65 subsys_initcall(init_vdso); 69 66 67 + void update_vsyscall(struct timekeeper *tk) 68 + { 69 + vdso_data_write_begin(&vdso_data); 70 + 71 + vdso_data.xtime_sec = tk->xtime_sec; 72 + vdso_data.xtime_nsec = tk->tkr_mono.xtime_nsec; 73 + vdso_data.wall_to_mono_sec = tk->wall_to_monotonic.tv_sec; 74 + vdso_data.wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec; 75 + vdso_data.cs_shift = tk->tkr_mono.shift; 76 + 77 + vdso_data.clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode; 78 + if (vdso_data.clock_mode != VDSO_CLOCK_NONE) { 79 + vdso_data.cs_mult = tk->tkr_mono.mult; 80 + vdso_data.cs_cycle_last = tk->tkr_mono.cycle_last; 81 + vdso_data.cs_mask = tk->tkr_mono.mask; 82 + } 83 + 84 + vdso_data_write_end(&vdso_data); 85 + } 86 + 87 + void update_vsyscall_tz(void) 88 + { 89 + if (vdso_data.clock_mode != VDSO_CLOCK_NONE) { 90 + vdso_data.tz_minuteswest = sys_tz.tz_minuteswest; 91 + vdso_data.tz_dsttime = sys_tz.tz_dsttime; 92 + } 93 + } 94 + 70 95 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 71 96 { 72 97 struct mips_vdso_image *image = current->thread.abi->vdso; 73 98 struct mm_struct *mm = current->mm; 74 - unsigned long base, vdso_addr; 99 + unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr; 75 100 struct vm_area_struct *vma; 101 + struct resource gic_res; 76 102 int ret; 77 103 78 104 down_write(&mm->mmap_sem); 79 105 80 - base = get_unmapped_area(NULL, 0, PAGE_SIZE + image->size, 0, 0); 106 + /* 107 + * Determine total area size. This includes the VDSO data itself, the 108 + * data page, and the GIC user page if present. Always create a mapping 109 + * for the GIC user area if the GIC is present regardless of whether it 110 + * is the current clocksource, in case it comes into use later on. We 111 + * only map a page even though the total area is 64K, as we only need 112 + * the counter registers at the start. 113 + */ 114 + gic_size = gic_present ? PAGE_SIZE : 0; 115 + vvar_size = gic_size + PAGE_SIZE; 116 + size = vvar_size + image->size; 117 + 118 + base = get_unmapped_area(NULL, 0, size, 0, 0); 81 119 if (IS_ERR_VALUE(base)) { 82 120 ret = base; 83 121 goto out; 84 122 } 85 123 86 - vdso_addr = base + PAGE_SIZE; 124 + data_addr = base + gic_size; 125 + vdso_addr = data_addr + PAGE_SIZE; 87 126 88 - vma = _install_special_mapping(mm, base, PAGE_SIZE, 127 + vma = _install_special_mapping(mm, base, vvar_size, 89 128 VM_READ | VM_MAYREAD, 90 129 &vdso_vvar_mapping); 91 130 if (IS_ERR(vma)) { ··· 135 90 goto out; 136 91 } 137 92 93 + /* Map GIC user page. */ 94 + if (gic_size) { 95 + ret = gic_get_usm_range(&gic_res); 96 + if (ret) 97 + goto out; 98 + 99 + ret = io_remap_pfn_range(vma, base, 100 + gic_res.start >> PAGE_SHIFT, 101 + gic_size, 102 + pgprot_noncached(PAGE_READONLY)); 103 + if (ret) 104 + goto out; 105 + } 106 + 138 107 /* Map data page. */ 139 - ret = remap_pfn_range(vma, base, 108 + ret = remap_pfn_range(vma, data_addr, 140 109 virt_to_phys(&vdso_data) >> PAGE_SHIFT, 141 110 PAGE_SIZE, PAGE_READONLY); 142 111 if (ret)
+232
arch/mips/vdso/gettimeofday.c
··· 1 + /* 2 + * Copyright (C) 2015 Imagination Technologies 3 + * Author: Alex Smith <alex.smith@imgtec.com> 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms of the GNU General Public License as published by the 7 + * Free Software Foundation; either version 2 of the License, or (at your 8 + * option) any later version. 9 + */ 10 + 11 + #include "vdso.h" 12 + 13 + #include <linux/compiler.h> 14 + #include <linux/irqchip/mips-gic.h> 15 + #include <linux/time.h> 16 + 17 + #include <asm/clocksource.h> 18 + #include <asm/io.h> 19 + #include <asm/mips-cm.h> 20 + #include <asm/unistd.h> 21 + #include <asm/vdso.h> 22 + 23 + static __always_inline int do_realtime_coarse(struct timespec *ts, 24 + const union mips_vdso_data *data) 25 + { 26 + u32 start_seq; 27 + 28 + do { 29 + start_seq = vdso_data_read_begin(data); 30 + 31 + ts->tv_sec = data->xtime_sec; 32 + ts->tv_nsec = data->xtime_nsec >> data->cs_shift; 33 + } while (vdso_data_read_retry(data, start_seq)); 34 + 35 + return 0; 36 + } 37 + 38 + static __always_inline int do_monotonic_coarse(struct timespec *ts, 39 + const union mips_vdso_data *data) 40 + { 41 + u32 start_seq; 42 + u32 to_mono_sec; 43 + u32 to_mono_nsec; 44 + 45 + do { 46 + start_seq = vdso_data_read_begin(data); 47 + 48 + ts->tv_sec = data->xtime_sec; 49 + ts->tv_nsec = data->xtime_nsec >> data->cs_shift; 50 + 51 + to_mono_sec = data->wall_to_mono_sec; 52 + to_mono_nsec = data->wall_to_mono_nsec; 53 + } while (vdso_data_read_retry(data, start_seq)); 54 + 55 + ts->tv_sec += to_mono_sec; 56 + timespec_add_ns(ts, to_mono_nsec); 57 + 58 + return 0; 59 + } 60 + 61 + #ifdef CONFIG_CSRC_R4K 62 + 63 + static __always_inline u64 read_r4k_count(void) 64 + { 65 + unsigned int count; 66 + 67 + __asm__ __volatile__( 68 + " .set push\n" 69 + " .set mips32r2\n" 70 + " rdhwr %0, $2\n" 71 + " .set pop\n" 72 + : "=r" (count)); 73 + 74 + return count; 75 + } 76 + 77 + #endif 78 + 79 + #ifdef CONFIG_CLKSRC_MIPS_GIC 80 + 81 + static __always_inline u64 read_gic_count(const union mips_vdso_data *data) 82 + { 83 + void __iomem *gic = get_gic(data); 84 + u32 hi, hi2, lo; 85 + 86 + do { 87 + hi = __raw_readl(gic + GIC_UMV_SH_COUNTER_63_32_OFS); 88 + lo = __raw_readl(gic + GIC_UMV_SH_COUNTER_31_00_OFS); 89 + hi2 = __raw_readl(gic + GIC_UMV_SH_COUNTER_63_32_OFS); 90 + } while (hi2 != hi); 91 + 92 + return (((u64)hi) << 32) + lo; 93 + } 94 + 95 + #endif 96 + 97 + static __always_inline u64 get_ns(const union mips_vdso_data *data) 98 + { 99 + u64 cycle_now, delta, nsec; 100 + 101 + switch (data->clock_mode) { 102 + #ifdef CONFIG_CSRC_R4K 103 + case VDSO_CLOCK_R4K: 104 + cycle_now = read_r4k_count(); 105 + break; 106 + #endif 107 + #ifdef CONFIG_CLKSRC_MIPS_GIC 108 + case VDSO_CLOCK_GIC: 109 + cycle_now = read_gic_count(data); 110 + break; 111 + #endif 112 + default: 113 + return 0; 114 + } 115 + 116 + delta = (cycle_now - data->cs_cycle_last) & data->cs_mask; 117 + 118 + nsec = (delta * data->cs_mult) + data->xtime_nsec; 119 + nsec >>= data->cs_shift; 120 + 121 + return nsec; 122 + } 123 + 124 + static __always_inline int do_realtime(struct timespec *ts, 125 + const union mips_vdso_data *data) 126 + { 127 + u32 start_seq; 128 + u64 ns; 129 + 130 + do { 131 + start_seq = vdso_data_read_begin(data); 132 + 133 + if (data->clock_mode == VDSO_CLOCK_NONE) 134 + return -ENOSYS; 135 + 136 + ts->tv_sec = data->xtime_sec; 137 + ns = get_ns(data); 138 + } while (vdso_data_read_retry(data, start_seq)); 139 + 140 + ts->tv_nsec = 0; 141 + timespec_add_ns(ts, ns); 142 + 143 + return 0; 144 + } 145 + 146 + static __always_inline int do_monotonic(struct timespec *ts, 147 + const union mips_vdso_data *data) 148 + { 149 + u32 start_seq; 150 + u64 ns; 151 + u32 to_mono_sec; 152 + u32 to_mono_nsec; 153 + 154 + do { 155 + start_seq = vdso_data_read_begin(data); 156 + 157 + if (data->clock_mode == VDSO_CLOCK_NONE) 158 + return -ENOSYS; 159 + 160 + ts->tv_sec = data->xtime_sec; 161 + ns = get_ns(data); 162 + 163 + to_mono_sec = data->wall_to_mono_sec; 164 + to_mono_nsec = data->wall_to_mono_nsec; 165 + } while (vdso_data_read_retry(data, start_seq)); 166 + 167 + ts->tv_sec += to_mono_sec; 168 + ts->tv_nsec = 0; 169 + timespec_add_ns(ts, ns + to_mono_nsec); 170 + 171 + return 0; 172 + } 173 + 174 + #ifdef CONFIG_MIPS_CLOCK_VSYSCALL 175 + 176 + /* 177 + * This is behind the ifdef so that we don't provide the symbol when there's no 178 + * possibility of there being a usable clocksource, because there's nothing we 179 + * can do without it. When libc fails the symbol lookup it should fall back on 180 + * the standard syscall path. 181 + */ 182 + int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 183 + { 184 + const union mips_vdso_data *data = get_vdso_data(); 185 + struct timespec ts; 186 + int ret; 187 + 188 + ret = do_realtime(&ts, data); 189 + if (ret) 190 + return ret; 191 + 192 + if (tv) { 193 + tv->tv_sec = ts.tv_sec; 194 + tv->tv_usec = ts.tv_nsec / 1000; 195 + } 196 + 197 + if (tz) { 198 + tz->tz_minuteswest = data->tz_minuteswest; 199 + tz->tz_dsttime = data->tz_dsttime; 200 + } 201 + 202 + return 0; 203 + } 204 + 205 + #endif /* CONFIG_CLKSRC_MIPS_GIC */ 206 + 207 + int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) 208 + { 209 + const union mips_vdso_data *data = get_vdso_data(); 210 + int ret; 211 + 212 + switch (clkid) { 213 + case CLOCK_REALTIME_COARSE: 214 + ret = do_realtime_coarse(ts, data); 215 + break; 216 + case CLOCK_MONOTONIC_COARSE: 217 + ret = do_monotonic_coarse(ts, data); 218 + break; 219 + case CLOCK_REALTIME: 220 + ret = do_realtime(ts, data); 221 + break; 222 + case CLOCK_MONOTONIC: 223 + ret = do_monotonic(ts, data); 224 + break; 225 + default: 226 + ret = -ENOSYS; 227 + break; 228 + } 229 + 230 + /* If we return -ENOSYS libc should fall back to a syscall. */ 231 + return ret; 232 + }
+9
arch/mips/vdso/vdso.h
··· 77 77 return (const union mips_vdso_data *)(get_vdso_base() - PAGE_SIZE); 78 78 } 79 79 80 + #ifdef CONFIG_CLKSRC_MIPS_GIC 81 + 82 + static inline void __iomem *get_gic(const union mips_vdso_data *data) 83 + { 84 + return (void __iomem *)data - PAGE_SIZE; 85 + } 86 + 87 + #endif /* CONFIG_CLKSRC_MIPS_GIC */ 88 + 80 89 #endif /* __ASSEMBLY__ */
+5
arch/mips/vdso/vdso.lds.S
··· 95 95 VERSION 96 96 { 97 97 LINUX_2.6 { 98 + #ifndef DISABLE_MIPS_VDSO 99 + global: 100 + __vdso_clock_gettime; 101 + __vdso_gettimeofday; 102 + #endif 98 103 local: *; 99 104 }; 100 105 }
+4 -3
drivers/clocksource/mips-gic-timer.c
··· 140 140 } 141 141 142 142 static struct clocksource gic_clocksource = { 143 - .name = "GIC", 144 - .read = gic_hpt_read, 145 - .flags = CLOCK_SOURCE_IS_CONTINUOUS, 143 + .name = "GIC", 144 + .read = gic_hpt_read, 145 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 146 + .archdata = { .vdso_clock_mode = VDSO_CLOCK_GIC }, 146 147 }; 147 148 148 149 static void __init __gic_clocksource_init(void)