Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Add support for skipping itrace instructions

When using 'perf script' to look at PT traces it is often useful to
ignore the initialization code at the beginning.

On larger traces which may have many millions of instructions in
initialization code doing that in a pipeline can be very slow, with perf
script spending a lot of CPU time calling printf and writing data.

This patch adds an extension to the --itrace argument that skips 'n'
events (instructions, branches or transactions) at the beginning. This
is much more efficient.

v2:
Add support for BTS (Adrian Hunter)
Document in itrace.txt
Fix branch check
Check transactions and instructions too

Committer note:

To test intel_pt one needs to make sure VT-x isn't active, i.e.
stopping KVM guests on the test machine, as described by Andi Kleen
at http://lkml.kernel.org/r/20160301234953.GD23621@tassilo.jf.intel.com

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1459187142-20035-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Andi Kleen and committed by
Arnaldo Carvalho de Melo
d1706b39 f7380c12

+49 -2
+7
tools/perf/Documentation/intel-pt.txt
··· 672 672 d create a debug log 673 673 g synthesize a call chain (use with i or x) 674 674 l synthesize last branch entries (use with i or x) 675 + s skip initial number of events 675 676 676 677 "Instructions" events look like they were recorded by "perf record -e 677 678 instructions". ··· 731 730 732 731 To disable trace decoding entirely, use the option --no-itrace. 733 732 733 + It is also possible to skip events generated (instructions, branches, transactions) 734 + at the beginning. This is useful to ignore initialization code. 735 + 736 + --itrace=i0nss1000000 737 + 738 + skips the first million instructions. 734 739 735 740 dump option 736 741 -----------
+8
tools/perf/Documentation/itrace.txt
··· 7 7 d create a debug log 8 8 g synthesize a call chain (use with i or x) 9 9 l synthesize last branch entries (use with i or x) 10 + s skip initial number of events 10 11 11 12 The default is all events i.e. the same as --itrace=ibxe 12 13 ··· 25 24 26 25 Also the number of last branch entries (default 64, max. 1024) for 27 26 instructions or transactions events can be specified. 27 + 28 + It is also possible to skip events generated (instructions, branches, transactions) 29 + at the beginning. This is useful to ignore initialization code. 30 + 31 + --itrace=i0nss1000000 32 + 33 + skips the first million instructions.
+7
tools/perf/util/auxtrace.c
··· 940 940 synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; 941 941 synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; 942 942 synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; 943 + synth_opts->initial_skip = 0; 943 944 } 944 945 945 946 /* ··· 1064 1063 goto out_err; 1065 1064 synth_opts->last_branch_sz = val; 1066 1065 } 1066 + break; 1067 + case 's': 1068 + synth_opts->initial_skip = strtoul(p, &endptr, 10); 1069 + if (p == endptr) 1070 + goto out_err; 1071 + p = endptr; 1067 1072 break; 1068 1073 case ' ': 1069 1074 case ',':
+2
tools/perf/util/auxtrace.h
··· 68 68 * @last_branch_sz: branch context size 69 69 * @period: 'instructions' events period 70 70 * @period_type: 'instructions' events period type 71 + * @initial_skip: skip N events at the beginning. 71 72 */ 72 73 struct itrace_synth_opts { 73 74 bool set; ··· 87 86 unsigned int last_branch_sz; 88 87 unsigned long long period; 89 88 enum itrace_period_type period_type; 89 + unsigned long initial_skip; 90 90 }; 91 91 92 92 /**
+5
tools/perf/util/intel-bts.c
··· 66 66 u64 branches_id; 67 67 size_t branches_event_size; 68 68 bool synth_needs_swap; 69 + unsigned long num_events; 69 70 }; 70 71 71 72 struct intel_bts_queue { ··· 275 274 struct intel_bts *bts = btsq->bts; 276 275 union perf_event event; 277 276 struct perf_sample sample = { .ip = 0, }; 277 + 278 + if (bts->synth_opts.initial_skip && 279 + bts->num_events++ <= bts->synth_opts.initial_skip) 280 + return 0; 278 281 279 282 event.sample.header.type = PERF_RECORD_SAMPLE; 280 283 event.sample.header.misc = PERF_RECORD_MISC_USER;
+20 -2
tools/perf/util/intel-pt.c
··· 100 100 u64 cyc_bit; 101 101 u64 noretcomp_bit; 102 102 unsigned max_non_turbo_ratio; 103 + 104 + unsigned long num_events; 103 105 }; 104 106 105 107 enum switch_state { ··· 974 972 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 975 973 return 0; 976 974 975 + if (pt->synth_opts.initial_skip && 976 + pt->num_events++ < pt->synth_opts.initial_skip) 977 + return 0; 978 + 977 979 event->sample.header.type = PERF_RECORD_SAMPLE; 978 980 event->sample.header.misc = PERF_RECORD_MISC_USER; 979 981 event->sample.header.size = sizeof(struct perf_event_header); ··· 1034 1028 struct intel_pt *pt = ptq->pt; 1035 1029 union perf_event *event = ptq->event_buf; 1036 1030 struct perf_sample sample = { .ip = 0, }; 1031 + 1032 + if (pt->synth_opts.initial_skip && 1033 + pt->num_events++ < pt->synth_opts.initial_skip) 1034 + return 0; 1037 1035 1038 1036 event->sample.header.type = PERF_RECORD_SAMPLE; 1039 1037 event->sample.header.misc = PERF_RECORD_MISC_USER; ··· 1096 1086 struct intel_pt *pt = ptq->pt; 1097 1087 union perf_event *event = ptq->event_buf; 1098 1088 struct perf_sample sample = { .ip = 0, }; 1089 + 1090 + if (pt->synth_opts.initial_skip && 1091 + pt->num_events++ < pt->synth_opts.initial_skip) 1092 + return 0; 1099 1093 1100 1094 event->sample.header.type = PERF_RECORD_SAMPLE; 1101 1095 event->sample.header.misc = PERF_RECORD_MISC_USER; ··· 1213 1199 ptq->have_sample = false; 1214 1200 1215 1201 if (pt->sample_instructions && 1216 - (state->type & INTEL_PT_INSTRUCTION)) { 1202 + (state->type & INTEL_PT_INSTRUCTION) && 1203 + (!pt->synth_opts.initial_skip || 1204 + pt->num_events++ >= pt->synth_opts.initial_skip)) { 1217 1205 err = intel_pt_synth_instruction_sample(ptq); 1218 1206 if (err) 1219 1207 return err; 1220 1208 } 1221 1209 1222 1210 if (pt->sample_transactions && 1223 - (state->type & INTEL_PT_TRANSACTION)) { 1211 + (state->type & INTEL_PT_TRANSACTION) && 1212 + (!pt->synth_opts.initial_skip || 1213 + pt->num_events++ >= pt->synth_opts.initial_skip)) { 1224 1214 err = intel_pt_synth_transaction_sample(ptq); 1225 1215 if (err) 1226 1216 return err;