Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf intel-pt: Add a config for max loops without consuming a packet

The Intel PT decoder limits the number of unconditional branches (e.g.
jmps) decoded without consuming any trace packets. Generally, a loop
needs a conditional branch which generates a TNT packet, whereas a "ret"
instruction will generate a TIP or TNT packet. So exceeding the limit is
assumed to be a never-ending loop, which can happen if there has been a
decoding error putting the decoder at the wrong place in the code.

Up until now, the limit of 10000 has been enough but some analytic
purposes have been reported to exceed that.

Increase the limit to 100000, and make it configurable via perf config
intel-pt.max-loops. Also amend the "Never-ending loop" message to
mention the configuration entry.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20210701175132.3977-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Adrian Hunter and committed by
Arnaldo Carvalho de Melo
b4b046ff 493be70a

+21 -4
+6
tools/perf/Documentation/perf-config.txt
··· 706 706 If set, Intel PT decoder will set the mispred flag on all 707 707 branches. 708 708 709 + intel-pt.max-loops:: 710 + If set and non-zero, the maximum number of unconditional 711 + branches decoded without consuming any trace packets. If 712 + the maximum is exceeded there will be a "Never-ending loop" 713 + error. The default is 100000. 714 + 709 715 auxtrace.*:: 710 716 711 717 auxtrace.dumpdir::
+9 -4
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 41 41 42 42 #define INTEL_PT_RETURN 1 43 43 44 - /* Maximum number of loops with no packets consumed i.e. stuck in a loop */ 45 - #define INTEL_PT_MAX_LOOPS 10000 44 + /* 45 + * Default maximum number of loops with no packets consumed i.e. stuck in a 46 + * loop. 47 + */ 48 + #define INTEL_PT_MAX_LOOPS 100000 46 49 47 50 struct intel_pt_blk { 48 51 struct intel_pt_blk *prev; ··· 223 220 uint64_t timestamp_insn_cnt; 224 221 uint64_t sample_insn_cnt; 225 222 uint64_t stuck_ip; 223 + int max_loops; 226 224 int no_progress; 227 225 int stuck_ip_prd; 228 226 int stuck_ip_cnt; ··· 319 315 decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run; 320 316 decoder->first_timestamp = params->first_timestamp; 321 317 decoder->last_reliable_timestamp = params->first_timestamp; 318 + decoder->max_loops = params->max_loops ? params->max_loops : INTEL_PT_MAX_LOOPS; 322 319 323 320 decoder->flags = params->flags; 324 321 ··· 488 483 [INTEL_PT_ERR_OVR] = "Overflow packet", 489 484 [INTEL_PT_ERR_LOST] = "Lost trace data", 490 485 [INTEL_PT_ERR_UNK] = "Unknown error!", 491 - [INTEL_PT_ERR_NELOOP] = "Never-ending loop", 486 + [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)", 492 487 }; 493 488 494 489 int intel_pt__strerror(int code, char *buf, size_t buflen) ··· 1173 1168 decoder->stuck_ip = decoder->state.to_ip; 1174 1169 decoder->stuck_ip_prd = 1; 1175 1170 decoder->stuck_ip_cnt = 1; 1176 - } else if (cnt > INTEL_PT_MAX_LOOPS || 1171 + } else if (cnt > decoder->max_loops || 1177 1172 decoder->state.to_ip == decoder->stuck_ip) { 1178 1173 intel_pt_log_at("ERROR: Never-ending loop", 1179 1174 decoder->state.to_ip);
+1
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
··· 270 270 uint32_t tsc_ctc_ratio_d; 271 271 enum intel_pt_param_flags flags; 272 272 unsigned int quick; 273 + int max_loops; 273 274 }; 274 275 275 276 struct intel_pt_decoder;
+5
tools/perf/util/intel-pt.c
··· 123 123 u64 noretcomp_bit; 124 124 unsigned max_non_turbo_ratio; 125 125 unsigned cbr2khz; 126 + int max_loops; 126 127 127 128 unsigned long num_events; 128 129 ··· 1201 1200 params.vm_time_correlation = pt->synth_opts.vm_time_correlation; 1202 1201 params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; 1203 1202 params.first_timestamp = pt->first_timestamp; 1203 + params.max_loops = pt->max_loops; 1204 1204 1205 1205 if (pt->filts.cnt > 0) 1206 1206 params.pgd_ip = intel_pt_pgd_ip; ··· 3432 3430 3433 3431 if (!strcmp(var, "intel-pt.mispred-all")) 3434 3432 pt->mispred_all = perf_config_bool(var, value); 3433 + 3434 + if (!strcmp(var, "intel-pt.max-loops")) 3435 + perf_config_int(&pt->max_loops, var, value); 3435 3436 3436 3437 return 0; 3437 3438 }