Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf header: Store clock references for -k/--clockid option

Add a new CLOCK_DATA feature that stores reference times when
-k/--clockid option is specified.

It contains the clock id and its reference time together with wall clock
time taken at the 'same time', both values are in nanoseconds.

The format of data is as below:

struct {
u32 version; /* version = 1 */
u32 clockid;
u64 wall_clock_ns;
u64 clockid_time_ns;
};

This clock reference times will be used in following changes to display
wall clock for perf events.

It's available only for recording with clockid specified, because it's
the only case where we can get reference time to wallclock time. It's
can't do that with perf clock yet.

Committer testing:

$ perf record -h -k

Usage: perf record [<options>] [<command>]
or: perf record [<options>] -- <command> [<options>]

-k, --clockid <clockid>
clockid to use for events, see clock_gettime()

$ perf record -k monotonic sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (8 samples) ]
$ perf report --header-only | grep clockid -A1
# event : name = cycles:u, , id = { 88815, 88816, 88817, 88818, 88819, 88820, 88821, 88822 }, size = 120, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|PERIOD, read_format = ID, disabled = 1, inherit = 1, exclude_kernel = 1, mmap = 1, comm = 1, freq = 1, enable_on_exec = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1, use_clockid = 1, ksymbol = 1, bpf_event = 1, clockid = 1
# CPU_TOPOLOGY info available, use -I to display
--
# clockid frequency: 1000 MHz
# cpu pmu capabilities: branches=32, max_precise=3, pmu_name=skylake
# clockid: monotonic (1)
# reference time: 2020-08-06 09:40:21.619290 = 1596717621.619290 (TOD) = 21931.077673635 (monotonic)
$

Original-patch-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Geneviève Bastien <gbastien@versatic.net>
Cc: Ian Rogers <irogers@google.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lore.kernel.org/lkml/20200805093444.314999-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jiri Olsa and committed by
Arnaldo Carvalho de Melo
d1e325cf cc3365bb

+180
+13
tools/perf/Documentation/perf.data-file-format.txt
··· 389 389 Example: 390 390 cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake 391 391 392 + HEADER_CLOCK_DATA = 29, 393 + 394 + Contains clock id and its reference time together with wall clock 395 + time taken at the 'same time', both values are in nanoseconds. 396 + The format of data is as below. 397 + 398 + struct { 399 + u32 version; /* version = 1 */ 400 + u32 clockid; 401 + u64 wall_clock_ns; 402 + u64 clockid_time_ns; 403 + }; 404 + 392 405 other bits are reserved and should ignored for now 393 406 HEADER_FEAT_BITS = 256, 394 407
+41
tools/perf/builtin-record.c
··· 71 71 #include <linux/time64.h> 72 72 #include <linux/zalloc.h> 73 73 #include <linux/bitmap.h> 74 + #include <sys/time.h> 74 75 75 76 struct switch_output { 76 77 bool enabled; ··· 1205 1204 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1206 1205 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1207 1206 1207 + if (!rec->opts.use_clockid) 1208 + perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1209 + 1208 1210 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1209 1211 if (!record__comp_enabled(rec)) 1210 1212 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); ··· 1556 1552 return 0; 1557 1553 } 1558 1554 1555 + static int record__init_clock(struct record *rec) 1556 + { 1557 + struct perf_session *session = rec->session; 1558 + struct timespec ref_clockid; 1559 + struct timeval ref_tod; 1560 + u64 ref; 1561 + 1562 + if (!rec->opts.use_clockid) 1563 + return 0; 1564 + 1565 + session->header.env.clock.clockid = rec->opts.clockid; 1566 + 1567 + if (gettimeofday(&ref_tod, NULL) != 0) { 1568 + pr_err("gettimeofday failed, cannot set reference time.\n"); 1569 + return -1; 1570 + } 1571 + 1572 + if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1573 + pr_err("clock_gettime failed, cannot set reference time.\n"); 1574 + return -1; 1575 + } 1576 + 1577 + ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1578 + (u64) ref_tod.tv_usec * NSEC_PER_USEC; 1579 + 1580 + session->header.env.clock.tod_ns = ref; 1581 + 1582 + ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 1583 + (u64) ref_clockid.tv_nsec; 1584 + 1585 + session->header.env.clock.clockid_ns = ref; 1586 + return 0; 1587 + } 1588 + 1559 1589 static int __cmd_record(struct record *rec, int argc, const char **argv) 1560 1590 { 1561 1591 int err; ··· 1669 1631 pr_err("ERROR: kcore is not readable.\n"); 1670 1632 return -1; 1671 1633 } 1634 + 1635 + if (record__init_clock(rec)) 1636 + return -1; 1672 1637 1673 1638 record__init_features(rec); 1674 1639
+12
tools/perf/util/env.h
··· 100 100 /* For fast cpu to numa node lookup via perf_env__numa_node */ 101 101 int *numa_map; 102 102 int nr_numa_map; 103 + 104 + /* For real clock time reference. */ 105 + struct { 106 + u64 tod_ns; 107 + u64 clockid_ns; 108 + int clockid; 109 + /* 110 + * enabled is valid for report mode, and is true if above 111 + * values are set, it's set in process_clock_data 112 + */ 113 + bool enabled; 114 + } clock; 103 115 }; 104 116 105 117 enum perf_compress_type {
+113
tools/perf/util/header.c
··· 46 46 #include "util/util.h" // perf_exe() 47 47 #include "cputopo.h" 48 48 #include "bpf-event.h" 49 + #include "clockid.h" 49 50 50 51 #include <linux/ctype.h> 51 52 #include <internal/lib.h> ··· 896 895 sizeof(ff->ph->env.clockid_res_ns)); 897 896 } 898 897 898 + static int write_clock_data(struct feat_fd *ff, 899 + struct evlist *evlist __maybe_unused) 900 + { 901 + u64 *data64; 902 + u32 data32; 903 + int ret; 904 + 905 + /* version */ 906 + data32 = 1; 907 + 908 + ret = do_write(ff, &data32, sizeof(data32)); 909 + if (ret < 0) 910 + return ret; 911 + 912 + /* clockid */ 913 + data32 = ff->ph->env.clock.clockid; 914 + 915 + ret = do_write(ff, &data32, sizeof(data32)); 916 + if (ret < 0) 917 + return ret; 918 + 919 + /* TOD ref time */ 920 + data64 = &ff->ph->env.clock.tod_ns; 921 + 922 + ret = do_write(ff, data64, sizeof(*data64)); 923 + if (ret < 0) 924 + return ret; 925 + 926 + /* clockid ref time */ 927 + data64 = &ff->ph->env.clock.clockid_ns; 928 + 929 + return do_write(ff, data64, sizeof(*data64)); 930 + } 931 + 899 932 static int write_dir_format(struct feat_fd *ff, 900 933 struct evlist *evlist __maybe_unused) 901 934 { ··· 1582 1547 { 1583 1548 fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", 1584 1549 ff->ph->env.clockid_res_ns * 1000); 1550 + } 1551 + 1552 + static void print_clock_data(struct feat_fd *ff, FILE *fp) 1553 + { 1554 + struct timespec clockid_ns; 1555 + char tstr[64], date[64]; 1556 + struct timeval tod_ns; 1557 + clockid_t clockid; 1558 + struct tm ltime; 1559 + u64 ref; 1560 + 1561 + if (!ff->ph->env.clock.enabled) { 1562 + fprintf(fp, "# reference time disabled\n"); 1563 + return; 1564 + } 1565 + 1566 + /* Compute TOD time. */ 1567 + ref = ff->ph->env.clock.tod_ns; 1568 + tod_ns.tv_sec = ref / NSEC_PER_SEC; 1569 + ref -= tod_ns.tv_sec * NSEC_PER_SEC; 1570 + tod_ns.tv_usec = ref / NSEC_PER_USEC; 1571 + 1572 + /* Compute clockid time. */ 1573 + ref = ff->ph->env.clock.clockid_ns; 1574 + clockid_ns.tv_sec = ref / NSEC_PER_SEC; 1575 + ref -= clockid_ns.tv_sec * NSEC_PER_SEC; 1576 + clockid_ns.tv_nsec = ref; 1577 + 1578 + clockid = ff->ph->env.clock.clockid; 1579 + 1580 + if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL) 1581 + snprintf(tstr, sizeof(tstr), "<error>"); 1582 + else { 1583 + strftime(date, sizeof(date), "%F %T", &ltime); 1584 + scnprintf(tstr, sizeof(tstr), "%s.%06d", 1585 + date, (int) tod_ns.tv_usec); 1586 + } 1587 + 1588 + fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); 1589 + fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", 1590 + tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, 1591 + clockid_ns.tv_sec, clockid_ns.tv_nsec, 1592 + clockid_name(clockid)); 1585 1593 } 1586 1594 1587 1595 static void print_dir_format(struct feat_fd *ff, FILE *fp) ··· 2816 2738 return 0; 2817 2739 } 2818 2740 2741 + static int process_clock_data(struct feat_fd *ff, 2742 + void *_data __maybe_unused) 2743 + { 2744 + u32 data32; 2745 + u64 data64; 2746 + 2747 + /* version */ 2748 + if (do_read_u32(ff, &data32)) 2749 + return -1; 2750 + 2751 + if (data32 != 1) 2752 + return -1; 2753 + 2754 + /* clockid */ 2755 + if (do_read_u32(ff, &data32)) 2756 + return -1; 2757 + 2758 + ff->ph->env.clock.clockid = data32; 2759 + 2760 + /* TOD ref time */ 2761 + if (do_read_u64(ff, &data64)) 2762 + return -1; 2763 + 2764 + ff->ph->env.clock.tod_ns = data64; 2765 + 2766 + /* clockid ref time */ 2767 + if (do_read_u64(ff, &data64)) 2768 + return -1; 2769 + 2770 + ff->ph->env.clock.clockid_ns = data64; 2771 + ff->ph->env.clock.enabled = true; 2772 + return 0; 2773 + } 2774 + 2819 2775 static int process_dir_format(struct feat_fd *ff, 2820 2776 void *_data __maybe_unused) 2821 2777 { ··· 3120 3008 FEAT_OPR(BPF_BTF, bpf_btf, false), 3121 3009 FEAT_OPR(COMPRESSED, compressed, false), 3122 3010 FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), 3011 + FEAT_OPR(CLOCK_DATA, clock_data, false), 3123 3012 }; 3124 3013 3125 3014 struct header_print_data {
+1
tools/perf/util/header.h
··· 44 44 HEADER_BPF_BTF, 45 45 HEADER_COMPRESSED, 46 46 HEADER_CPU_PMU_CAPS, 47 + HEADER_CLOCK_DATA, 47 48 HEADER_LAST_FEATURE, 48 49 HEADER_FEAT_BITS = 256, 49 50 };