Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/tsc: Always Running Timer (ART) correlated clocksource

On modern Intel systems TSC is derived from the new Always Running Timer
(ART). ART can be captured simultaneous to the capture of
audio and network device clocks, allowing a correlation between timebases
to be constructed. Upon capture, the driver converts the captured ART
value to the appropriate system clock using the correlated clocksource
mechanism.

On systems that support ART a new CPUID leaf (0x15) returns parameters
“m” and “n” such that:

TSC_value = (ART_value * m) / n + k [n >= 1]

[k is an offset that can adjusted by a privileged agent. The
IA32_TSC_ADJUST MSR is an example of an interface to adjust k.
See 17.14.4 of the Intel SDM for more details]

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
[jstultz: Tweaked to fix build issue, also reworked math for
64bit division on 32bit systems, as well as !CONFIG_CPU_FREQ build
fixes]
Signed-off-by: John Stultz <john.stultz@linaro.org>

authored by

Christopher S. Hall and committed by
John Stultz
f9677e0f 2c756feb

+62 -1
+1 -1
arch/x86/include/asm/cpufeature.h
··· 85 85 #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ 86 86 #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ 87 87 #define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ 88 - /* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */ 88 + #define X86_FEATURE_ART (3*32+10) /* Platform has always running timer (ART) */ 89 89 #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ 90 90 #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ 91 91 #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+2
arch/x86/include/asm/tsc.h
··· 29 29 return rdtsc(); 30 30 } 31 31 32 + extern struct system_counterval_t convert_art_to_tsc(cycle_t art); 33 + 32 34 extern void tsc_init(void); 33 35 extern void mark_tsc_unstable(char *reason); 34 36 extern int unsynchronized_tsc(void);
+59
arch/x86/kernel/tsc.c
··· 43 43 44 44 int tsc_clocksource_reliable; 45 45 46 + static u32 art_to_tsc_numerator; 47 + static u32 art_to_tsc_denominator; 48 + static u64 art_to_tsc_offset; 49 + struct clocksource *art_related_clocksource; 50 + 46 51 /* 47 52 * Use a ring-buffer like data structure, where a writer advances the head by 48 53 * writing a new data entry and a reader advances the tail when it observes a ··· 969 964 970 965 #endif /* CONFIG_CPU_FREQ */ 971 966 967 + #define ART_CPUID_LEAF (0x15) 968 + #define ART_MIN_DENOMINATOR (1) 969 + 970 + 971 + /* 972 + * If ART is present detect the numerator:denominator to convert to TSC 973 + */ 974 + static void detect_art(void) 975 + { 976 + unsigned int unused[2]; 977 + 978 + if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) 979 + return; 980 + 981 + cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, 982 + &art_to_tsc_numerator, unused, unused+1); 983 + 984 + /* Don't enable ART in a VM, non-stop TSC required */ 985 + if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || 986 + !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || 987 + art_to_tsc_denominator < ART_MIN_DENOMINATOR) 988 + return; 989 + 990 + if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset)) 991 + return; 992 + 993 + /* Make this sticky over multiple CPU init calls */ 994 + setup_force_cpu_cap(X86_FEATURE_ART); 995 + } 996 + 997 + 972 998 /* clocksource code */ 973 999 974 1000 static struct clocksource clocksource_tsc; ··· 1107 1071 return 0; 1108 1072 } 1109 1073 1074 + /* 1075 + * Convert ART to TSC given numerator/denominator found in detect_art() 1076 + */ 1077 + struct system_counterval_t convert_art_to_tsc(cycle_t art) 1078 + { 1079 + u64 tmp, res, rem; 1080 + 1081 + rem = do_div(art, art_to_tsc_denominator); 1082 + 1083 + res = art * art_to_tsc_numerator; 1084 + tmp = rem * art_to_tsc_numerator; 1085 + 1086 + do_div(tmp, art_to_tsc_denominator); 1087 + res += tmp + art_to_tsc_offset; 1088 + 1089 + return (struct system_counterval_t) {.cs = art_related_clocksource, 1090 + .cycles = res}; 1091 + } 1092 + EXPORT_SYMBOL(convert_art_to_tsc); 1110 1093 1111 1094 static void tsc_refine_calibration_work(struct work_struct *work); 1112 1095 static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); ··· 1197 1142 (unsigned long)tsc_khz % 1000); 1198 1143 1199 1144 out: 1145 + if (boot_cpu_has(X86_FEATURE_ART)) 1146 + art_related_clocksource = &clocksource_tsc; 1200 1147 clocksource_register_khz(&clocksource_tsc, tsc_khz); 1201 1148 } 1202 1149 ··· 1292 1235 mark_tsc_unstable("TSCs unsynchronized"); 1293 1236 1294 1237 check_system_tsc_reliable(); 1238 + 1239 + detect_art(); 1295 1240 } 1296 1241 1297 1242 #ifdef CONFIG_SMP