Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tsc: Add arch TSC frequency information

The TSC frequency information is required for the event metrics with the
literal, system_tsc_freq. For the newer Intel platform, the TSC
frequency information can be retrieved from the CPUID leaf 0x15. If the
TSC frequency information isn't present the /proc/cpuinfo approach is
used.

Refactor cpuid() for this use. Note, the previous stack pushing/popping
approach was broken on x86-64 that has stack red zones that would be
clobbered.

Committer testing:

Before:

$ perf record sleep 0.0001
[ perf record: Woken up 1 times to write data ]
$ perf report --header-only |& grep cpuid
# cpuid : AuthenticAMD,25,33,0
$

After the patch:

$ perf record sleep 0.0001
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (8 samples) ]
$ perf report --header-only |& grep cpuid
# cpuid : AuthenticAMD,25,33,0
$

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220718164312.3994191-2-irogers@google.com
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
bc2373a5 9fe9b252

+92 -16
+34
tools/perf/arch/x86/util/cpuid.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef PERF_CPUID_H 3 + #define PERF_CPUID_H 1 4 + 5 + 6 + static inline void 7 + cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b, 8 + unsigned int *c, unsigned int *d) 9 + { 10 + /* 11 + * Preserve %ebx/%rbx register by either placing it in %rdi or saving it 12 + * on the stack - x86-64 needs to avoid the stack red zone. In PIC 13 + * compilations %ebx contains the address of the global offset 14 + * table. %rbx is occasionally used to address stack variables in 15 + * presence of dynamic allocas. 16 + */ 17 + asm( 18 + #if defined(__x86_64__) 19 + "mov %%rbx, %%rdi\n" 20 + "cpuid\n" 21 + "xchg %%rdi, %%rbx\n" 22 + #else 23 + "pushl %%ebx\n" 24 + "cpuid\n" 25 + "movl %%ebx, %%edi\n" 26 + "popl %%ebx\n" 27 + #endif 28 + : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d) 29 + : "a"(op), "2"(op2)); 30 + } 31 + 32 + void get_cpuid_0(char *vendor, unsigned int *lvl); 33 + 34 + #endif
+11 -16
tools/perf/arch/x86/util/header.c
··· 9 9 10 10 #include "../../../util/debug.h" 11 11 #include "../../../util/header.h" 12 + #include "cpuid.h" 12 13 13 - static inline void 14 - cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, 15 - unsigned int *d) 14 + void get_cpuid_0(char *vendor, unsigned int *lvl) 16 15 { 17 - __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t" 18 - "movl %%ebx, %%esi\n\t.byte 0x5b" 19 - : "=a" (*a), 20 - "=S" (*b), 21 - "=c" (*c), 22 - "=d" (*d) 23 - : "a" (op)); 16 + unsigned int b, c, d; 17 + 18 + cpuid(0, 0, lvl, &b, &c, &d); 19 + strncpy(&vendor[0], (char *)(&b), 4); 20 + strncpy(&vendor[4], (char *)(&d), 4); 21 + strncpy(&vendor[8], (char *)(&c), 4); 22 + vendor[12] = '\0'; 24 23 } 25 24 26 25 static int ··· 30 31 int nb; 31 32 char vendor[16]; 32 33 33 - cpuid(0, &lvl, &b, &c, &d); 34 - strncpy(&vendor[0], (char *)(&b), 4); 35 - strncpy(&vendor[4], (char *)(&d), 4); 36 - strncpy(&vendor[8], (char *)(&c), 4); 37 - vendor[12] = '\0'; 34 + get_cpuid_0(vendor, &lvl); 38 35 39 36 if (lvl >= 1) { 40 - cpuid(1, &a, &b, &c, &d); 37 + cpuid(1, 0, &a, &b, &c, &d); 41 38 42 39 family = (a >> 8) & 0xf; /* bits 11 - 8 */ 43 40 model = (a >> 4) & 0xf; /* Bits 7 - 4 */
+33
tools/perf/arch/x86/util/tsc.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/types.h> 3 + #include <string.h> 3 4 4 5 #include "../../../util/tsc.h" 6 + #include "cpuid.h" 5 7 6 8 u64 rdtsc(void) 7 9 { ··· 12 10 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 13 11 14 12 return low | ((u64)high) << 32; 13 + } 14 + 15 + double arch_get_tsc_freq(void) 16 + { 17 + unsigned int a, b, c, d, lvl; 18 + static bool cached; 19 + static double tsc; 20 + char vendor[16]; 21 + 22 + if (cached) 23 + return tsc; 24 + 25 + cached = true; 26 + get_cpuid_0(vendor, &lvl); 27 + if (!strstr(vendor, "Intel")) 28 + return 0; 29 + 30 + /* 31 + * Don't support Time Stamp Counter and 32 + * Nominal Core Crystal Clock Information Leaf. 33 + */ 34 + if (lvl < 0x15) 35 + return 0; 36 + 37 + cpuid(0x15, 0, &a, &b, &c, &d); 38 + /* TSC frequency is not enumerated */ 39 + if (!a || !b || !c) 40 + return 0; 41 + 42 + tsc = (double)c * (double)b / (double)a; 43 + return tsc; 15 44 }
+13
tools/perf/util/expr.c
··· 12 12 #include "expr-bison.h" 13 13 #include "expr-flex.h" 14 14 #include "smt.h" 15 + #include "tsc.h" 15 16 #include <linux/err.h> 16 17 #include <linux/kernel.h> 17 18 #include <linux/zalloc.h> ··· 403 402 return data->val.source_count; 404 403 } 405 404 405 + #if !defined(__i386__) && !defined(__x86_64__) 406 + double arch_get_tsc_freq(void) 407 + { 408 + return 0.0; 409 + } 410 + #endif 411 + 406 412 double expr__get_literal(const char *literal) 407 413 { 408 414 static struct cpu_topology *topology; ··· 422 414 423 415 if (!strcmp("#num_cpus", literal)) { 424 416 result = cpu__max_present_cpu().cpu; 417 + goto out; 418 + } 419 + 420 + if (!strcasecmp("#system_tsc_freq", literal)) { 421 + result = arch_get_tsc_freq(); 425 422 goto out; 426 423 } 427 424
+1
tools/perf/util/tsc.h
··· 25 25 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); 26 26 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); 27 27 u64 rdtsc(void); 28 + double arch_get_tsc_freq(void); 28 29 29 30 size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); 30 31