Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/asm/tsc: Add rdtsc_ordered() and use it in trivial call sites

rdtsc_barrier(); rdtsc() is an unnecessary mouthful and requires
more thought than should be necessary. Add an rdtsc_ordered()
helper and replace the trivial call sites with it.

This should not change generated code. The duplication of the
fence asm is temporary.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm ML <kvm@vger.kernel.org>
Link: http://lkml.kernel.org/r/dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
03b9730b 4ea1636b

+34 -40
+2 -14
arch/x86/entry/vdso/vclock_gettime.c
··· 175 175 176 176 notrace static cycle_t vread_tsc(void) 177 177 { 178 - cycle_t ret; 179 - u64 last; 180 - 181 - /* 182 - * Empirically, a fence (of type that depends on the CPU) 183 - * before rdtsc is enough to ensure that rdtsc is ordered 184 - * with respect to loads. The various CPU manuals are unclear 185 - * as to whether rdtsc can be reordered with later loads, 186 - * but no one has ever seen it happen. 187 - */ 188 - rdtsc_barrier(); 189 - ret = (cycle_t)rdtsc(); 190 - 191 - last = gtod->cycle_last; 178 + cycle_t ret = (cycle_t)rdtsc_ordered(); 179 + u64 last = gtod->cycle_last; 192 180 193 181 if (likely(ret >= last)) 194 182 return ret;
+26
arch/x86/include/asm/msr.h
··· 127 127 return EAX_EDX_VAL(val, low, high); 128 128 } 129 129 130 + /** 131 + * rdtsc_ordered() - read the current TSC in program order 132 + * 133 + * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer. 134 + * It is ordered like a load to a global in-memory counter. It should 135 + * be impossible to observe non-monotonic rdtsc_unordered() behavior 136 + * across multiple CPUs as long as the TSC is synced. 137 + */ 138 + static __always_inline unsigned long long rdtsc_ordered(void) 139 + { 140 + /* 141 + * The RDTSC instruction is not ordered relative to memory 142 + * access. The Intel SDM and the AMD APM are both vague on this 143 + * point, but empirically an RDTSC instruction can be 144 + * speculatively executed before prior loads. An RDTSC 145 + * immediately after an appropriate barrier appears to be 146 + * ordered as a normal load, that is, it provides the same 147 + * ordering guarantees as reading from a global memory location 148 + * that some other imaginary CPU is updating continuously with a 149 + * time stamp. 150 + */ 151 + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, 152 + "lfence", X86_FEATURE_LFENCE_RDTSC); 153 + return rdtsc(); 154 + } 155 + 130 156 static inline unsigned long long native_read_pmc(int counter) 131 157 { 132 158 DECLARE_ARGS(val, low, high);
+1 -6
arch/x86/kernel/trace_clock.c
··· 12 12 */ 13 13 u64 notrace trace_clock_x86_tsc(void) 14 14 { 15 - u64 ret; 16 - 17 - rdtsc_barrier(); 18 - ret = rdtsc(); 19 - 20 - return ret; 15 + return rdtsc_ordered(); 21 16 }
+2 -14
arch/x86/kvm/x86.c
··· 1444 1444 1445 1445 static cycle_t read_tsc(void) 1446 1446 { 1447 - cycle_t ret; 1448 - u64 last; 1449 - 1450 - /* 1451 - * Empirically, a fence (of type that depends on the CPU) 1452 - * before rdtsc is enough to ensure that rdtsc is ordered 1453 - * with respect to loads. The various CPU manuals are unclear 1454 - * as to whether rdtsc can be reordered with later loads, 1455 - * but no one has ever seen it happen. 1456 - */ 1457 - rdtsc_barrier(); 1458 - ret = (cycle_t)rdtsc(); 1459 - 1460 - last = pvclock_gtod_data.clock.cycle_last; 1447 + cycle_t ret = (cycle_t)rdtsc_ordered(); 1448 + u64 last = pvclock_gtod_data.clock.cycle_last; 1461 1449 1462 1450 if (likely(ret >= last)) 1463 1451 return ret;
+3 -6
arch/x86/lib/delay.c
··· 54 54 55 55 preempt_disable(); 56 56 cpu = smp_processor_id(); 57 - rdtsc_barrier(); 58 - bclock = rdtsc(); 57 + bclock = rdtsc_ordered(); 59 58 for (;;) { 60 - rdtsc_barrier(); 61 - now = rdtsc(); 59 + now = rdtsc_ordered(); 62 60 if ((now - bclock) >= loops) 63 61 break; 64 62 ··· 77 79 if (unlikely(cpu != smp_processor_id())) { 78 80 loops -= (now - bclock); 79 81 cpu = smp_processor_id(); 80 - rdtsc_barrier(); 81 - bclock = rdtsc(); 82 + bclock = rdtsc_ordered(); 82 83 } 83 84 } 84 85 preempt_enable();