Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf intel-pt: Accumulate cycle count from TSC/TMA/MTC packets

When CYC packets are not available, it is still possible to count cycles
using TSC/TMA/MTC timestamps.

As the timestamp increments in TSC ticks, convert to CPU cycles using
the current core-to-bus ratio.

Do not accumulate cycles when control flow packet generation is not
enabled, nor when time has been "lost", typically due to mwait, which is
indicated by a TSC/TMA packet that is not part of PSB+.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20190520113728.14389-12-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Adrian Hunter and committed by
Arnaldo Carvalho de Melo
3f055167 f3c98c4b

+51
+51
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 163 163 uint64_t last_masked_timestamp; 164 164 uint64_t tot_cyc_cnt; 165 165 uint64_t sample_tot_cyc_cnt; 166 + uint64_t base_cyc_cnt; 167 + uint64_t cyc_cnt_timestamp; 168 + double tsc_to_cyc; 166 169 bool continuous_period; 167 170 bool overflow; 168 171 bool set_fup_tx_flags; ··· 1426 1423 return -EOVERFLOW; 1427 1424 } 1428 1425 1426 + static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder) 1427 + { 1428 + if (decoder->have_cyc) 1429 + return; 1430 + 1431 + decoder->cyc_cnt_timestamp = decoder->timestamp; 1432 + decoder->base_cyc_cnt = decoder->tot_cyc_cnt; 1433 + } 1434 + 1435 + static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder) 1436 + { 1437 + decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp; 1438 + 1439 + if (decoder->pge) 1440 + intel_pt_mtc_cyc_cnt_pge(decoder); 1441 + } 1442 + 1443 + static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder) 1444 + { 1445 + uint64_t tot_cyc_cnt, tsc_delta; 1446 + 1447 + if (decoder->have_cyc) 1448 + return; 1449 + 1450 + decoder->sample_cyc = true; 1451 + 1452 + if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp) 1453 + return; 1454 + 1455 + tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp; 1456 + tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt; 1457 + 1458 + if (tot_cyc_cnt > decoder->tot_cyc_cnt) 1459 + decoder->tot_cyc_cnt = tot_cyc_cnt; 1460 + } 1461 + 1429 1462 static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) 1430 1463 { 1431 1464 uint32_t ctc = decoder->packet.payload; ··· 1470 1431 1471 1432 if (!decoder->tsc_ctc_ratio_d) 1472 1433 return; 1434 + 1435 + if (decoder->pge && !decoder->in_psb) 1436 + intel_pt_mtc_cyc_cnt_pge(decoder); 1437 + else 1438 + intel_pt_mtc_cyc_cnt_upd(decoder); 1473 1439 1474 1440 decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; 1475 1441 decoder->ctc_timestamp = decoder->tsc_timestamp - fc; ··· 1531 1487 else 1532 1488 decoder->timestamp = timestamp; 1533 1489 1490 + intel_pt_mtc_cyc_cnt_upd(decoder); 1491 + 1534 1492 decoder->timestamp_insn_cnt = 0; 1535 1493 decoder->last_mtc = mtc; 1536 1494 ··· 1557 1511 1558 1512 decoder->cbr = cbr; 1559 1513 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; 1514 + 1515 + intel_pt_mtc_cyc_cnt_cbr(decoder); 1560 1516 } 1561 1517 1562 1518 static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) ··· 1754 1706 decoder->state.to_ip = decoder->ip; 1755 1707 } 1756 1708 decoder->state.type |= INTEL_PT_TRACE_BEGIN; 1709 + intel_pt_mtc_cyc_cnt_pge(decoder); 1757 1710 return 0; 1758 1711 1759 1712 case INTEL_PT_TIP: ··· 1825 1776 1826 1777 case INTEL_PT_TIP_PGE: { 1827 1778 decoder->pge = true; 1779 + intel_pt_mtc_cyc_cnt_pge(decoder); 1828 1780 if (decoder->packet.count == 0) { 1829 1781 intel_pt_log_at("Skipping zero TIP.PGE", 1830 1782 decoder->pos); ··· 2188 2138 2189 2139 case INTEL_PT_TIP_PGE: 2190 2140 decoder->pge = true; 2141 + intel_pt_mtc_cyc_cnt_pge(decoder); 2191 2142 if (intel_pt_have_ip(decoder)) 2192 2143 intel_pt_set_ip(decoder); 2193 2144 if (!decoder->ip)