Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf intel-pt: Add support for emulated ptwrite

ptwrite is an Intel x86 instruction that writes arbitrary values into an
Intel PT trace. It is not supported on all hardware, so provide an
alternative that makes use of TNT packets to convey the payload data.
TNT packets encode Taken/Not-taken conditional branch information, so
taking branches based on the payload value will encode the value into
the TNT packet. Refer to the changes to the documentation file
perf-intel-pt.txt in this patch for an example.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220509152400.376613-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Adrian Hunter and committed by
Arnaldo Carvalho de Melo
d7015e50 df36d257

+224 -3
+88
tools/perf/Documentation/perf-intel-pt.txt
··· 468 468 which contains "1" if the feature is supported and 469 469 "0" otherwise. 470 470 471 + As an alternative, refer to "Emulated PTWRITE" further below. 472 + 471 473 fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet 472 474 provides the address of the ptwrite instruction. In the absence of 473 475 fup_on_ptw, the decoder will use the address of the previous branch ··· 1471 1469 1472 1470 In that case the --itrace q option is forced because walking executable code 1473 1471 to reconstruct the control flow is not possible. 1472 + 1473 + 1474 + Emulated PTWRITE 1475 + ---------------- 1476 + 1477 + Later perf tools support a method to emulate the ptwrite instruction, which 1478 + can be useful if hardware does not support the ptwrite instruction. 1479 + 1480 + Instead of using the ptwrite instruction, a function is used which produces 1481 + a trace that encodes the payload data into TNT packets. Here is an example 1482 + of the function: 1483 + 1484 + #include <stdint.h> 1485 + 1486 + void perf_emulate_ptwrite(uint64_t x) 1487 + __attribute__((externally_visible, noipa, no_instrument_function, naked)); 1488 + 1489 + #define PERF_EMULATE_PTWRITE_8_BITS \ 1490 + "1: shl %rax\n" \ 1491 + " jc 1f\n" \ 1492 + "1: shl %rax\n" \ 1493 + " jc 1f\n" \ 1494 + "1: shl %rax\n" \ 1495 + " jc 1f\n" \ 1496 + "1: shl %rax\n" \ 1497 + " jc 1f\n" \ 1498 + "1: shl %rax\n" \ 1499 + " jc 1f\n" \ 1500 + "1: shl %rax\n" \ 1501 + " jc 1f\n" \ 1502 + "1: shl %rax\n" \ 1503 + " jc 1f\n" \ 1504 + "1: shl %rax\n" \ 1505 + " jc 1f\n" 1506 + 1507 + /* Undefined instruction */ 1508 + #define PERF_EMULATE_PTWRITE_UD2 ".byte 0x0f, 0x0b\n" 1509 + 1510 + #define PERF_EMULATE_PTWRITE_MAGIC PERF_EMULATE_PTWRITE_UD2 ".ascii \"perf,ptwrite \"\n" 1511 + 1512 + void perf_emulate_ptwrite(uint64_t x __attribute__ ((__unused__))) 1513 + { 1514 + /* Assumes SysV ABI : x passed in rdi */ 1515 + __asm__ volatile ( 1516 + "jmp 1f\n" 1517 + PERF_EMULATE_PTWRITE_MAGIC 1518 + "1: mov %rdi, %rax\n" 1519 + PERF_EMULATE_PTWRITE_8_BITS 1520 + PERF_EMULATE_PTWRITE_8_BITS 1521 + PERF_EMULATE_PTWRITE_8_BITS 1522 + PERF_EMULATE_PTWRITE_8_BITS 1523 + PERF_EMULATE_PTWRITE_8_BITS 1524 + PERF_EMULATE_PTWRITE_8_BITS 1525 + PERF_EMULATE_PTWRITE_8_BITS 1526 + PERF_EMULATE_PTWRITE_8_BITS 1527 + "1: ret\n" 1528 + ); 1529 + } 1530 + 1531 + For example, a test program with the function above: 1532 + 1533 + #include <stdio.h> 1534 + #include <stdint.h> 1535 + #include <stdlib.h> 1536 + 1537 + #include "perf_emulate_ptwrite.h" 1538 + 1539 + int main(int argc, char *argv[]) 1540 + { 1541 + uint64_t x = 0; 1542 + 1543 + if (argc > 1) 1544 + x = strtoull(argv[1], NULL, 0); 1545 + perf_emulate_ptwrite(x); 1546 + return 0; 1547 + } 1548 + 1549 + Can be compiled and traced: 1550 + 1551 + $ gcc -Wall -Wextra -O3 -g -o eg_ptw eg_ptw.c 1552 + $ perf record -e intel_pt//u ./eg_ptw 0x1234567890abcdef 1553 + [ perf record: Woken up 1 times to write data ] 1554 + [ perf record: Captured and wrote 0.017 MB perf.data ] 1555 + $ perf script --itrace=ew 1556 + eg_ptw 19875 [007] 8061.235912: ptwrite: IP: 0 payload: 0x1234567890abcdef 55701249a196 perf_emulate_ptwrite+0x16 (/home/user/eg_ptw) 1557 + $ 1474 1558 1475 1559 1476 1560 EXAMPLE
+97 -2
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 137 137 bool in_psb; 138 138 bool hop; 139 139 bool leap; 140 + bool emulated_ptwrite; 140 141 bool vm_time_correlation; 141 142 bool vm_tm_corr_dry_run; 142 143 bool vm_tm_corr_reliable; ··· 482 481 return INTEL_PT_ERR_LOST; 483 482 case -ELOOP: 484 483 return INTEL_PT_ERR_NELOOP; 484 + case -ECONNRESET: 485 + return INTEL_PT_ERR_EPTW; 485 486 default: 486 487 return INTEL_PT_ERR_UNK; 487 488 } ··· 500 497 [INTEL_PT_ERR_LOST] = "Lost trace data", 501 498 [INTEL_PT_ERR_UNK] = "Unknown error!", 502 499 [INTEL_PT_ERR_NELOOP] = "Never-ending loop (refer perf config intel-pt.max-loops)", 500 + [INTEL_PT_ERR_EPTW] = "Broken emulated ptwrite", 503 501 }; 504 502 505 503 int intel_pt__strerror(int code, char *buf, size_t buflen) ··· 1539 1535 return intel_pt_bug(decoder); 1540 1536 } 1541 1537 1538 + struct eptw_data { 1539 + int bit_countdown; 1540 + uint64_t payload; 1541 + }; 1542 + 1543 + static int intel_pt_eptw_lookahead_cb(struct intel_pt_pkt_info *pkt_info) 1544 + { 1545 + struct eptw_data *data = pkt_info->data; 1546 + int nr_bits; 1547 + 1548 + switch (pkt_info->packet.type) { 1549 + case INTEL_PT_PAD: 1550 + case INTEL_PT_MNT: 1551 + case INTEL_PT_MODE_EXEC: 1552 + case INTEL_PT_MODE_TSX: 1553 + case INTEL_PT_MTC: 1554 + case INTEL_PT_FUP: 1555 + case INTEL_PT_CYC: 1556 + case INTEL_PT_CBR: 1557 + case INTEL_PT_TSC: 1558 + case INTEL_PT_TMA: 1559 + case INTEL_PT_PIP: 1560 + case INTEL_PT_VMCS: 1561 + case INTEL_PT_PSB: 1562 + case INTEL_PT_PSBEND: 1563 + case INTEL_PT_PTWRITE: 1564 + case INTEL_PT_PTWRITE_IP: 1565 + case INTEL_PT_EXSTOP: 1566 + case INTEL_PT_EXSTOP_IP: 1567 + case INTEL_PT_MWAIT: 1568 + case INTEL_PT_PWRE: 1569 + case INTEL_PT_PWRX: 1570 + case INTEL_PT_BBP: 1571 + case INTEL_PT_BIP: 1572 + case INTEL_PT_BEP: 1573 + case INTEL_PT_BEP_IP: 1574 + case INTEL_PT_CFE: 1575 + case INTEL_PT_CFE_IP: 1576 + case INTEL_PT_EVD: 1577 + break; 1578 + 1579 + case INTEL_PT_TNT: 1580 + nr_bits = data->bit_countdown; 1581 + if (nr_bits > pkt_info->packet.count) 1582 + nr_bits = pkt_info->packet.count; 1583 + data->payload <<= nr_bits; 1584 + data->payload |= pkt_info->packet.payload >> (64 - nr_bits); 1585 + data->bit_countdown -= nr_bits; 1586 + return !data->bit_countdown; 1587 + 1588 + case INTEL_PT_TIP_PGE: 1589 + case INTEL_PT_TIP_PGD: 1590 + case INTEL_PT_TIP: 1591 + case INTEL_PT_BAD: 1592 + case INTEL_PT_OVF: 1593 + case INTEL_PT_TRACESTOP: 1594 + default: 1595 + return 1; 1596 + } 1597 + 1598 + return 0; 1599 + } 1600 + 1601 + static int intel_pt_emulated_ptwrite(struct intel_pt_decoder *decoder) 1602 + { 1603 + int n = 64 - decoder->tnt.count; 1604 + struct eptw_data data = { 1605 + .bit_countdown = n, 1606 + .payload = decoder->tnt.payload >> n, 1607 + }; 1608 + 1609 + decoder->emulated_ptwrite = false; 1610 + intel_pt_log("Emulated ptwrite detected\n"); 1611 + 1612 + intel_pt_pkt_lookahead(decoder, intel_pt_eptw_lookahead_cb, &data); 1613 + if (data.bit_countdown) 1614 + return -ECONNRESET; 1615 + 1616 + decoder->state.type = INTEL_PT_PTW; 1617 + decoder->state.from_ip = decoder->ip; 1618 + decoder->state.to_ip = 0; 1619 + decoder->state.ptw_payload = data.payload; 1620 + return 0; 1621 + } 1622 + 1542 1623 static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) 1543 1624 { 1544 1625 struct intel_pt_insn intel_pt_insn; 1545 1626 int err; 1546 1627 1547 1628 while (1) { 1629 + if (decoder->emulated_ptwrite) 1630 + return intel_pt_emulated_ptwrite(decoder); 1548 1631 err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); 1549 - if (err == INTEL_PT_RETURN) 1632 + if (err == INTEL_PT_RETURN) { 1633 + decoder->emulated_ptwrite = intel_pt_insn.emulated_ptwrite; 1550 1634 return 0; 1551 - if (err) 1635 + } 1636 + if (err) { 1637 + decoder->emulated_ptwrite = false; 1552 1638 return err; 1639 + } 1553 1640 1554 1641 if (intel_pt_insn.op == INTEL_PT_OP_RET) { 1555 1642 if (!decoder->return_compression) {
+1
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
··· 58 58 INTEL_PT_ERR_LOST, 59 59 INTEL_PT_ERR_UNK, 60 60 INTEL_PT_ERR_NELOOP, 61 + INTEL_PT_ERR_EPTW, 61 62 INTEL_PT_ERR_MAX, 62 63 }; 63 64
+1
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
··· 32 32 int ext; 33 33 34 34 intel_pt_insn->rel = 0; 35 + intel_pt_insn->emulated_ptwrite = false; 35 36 36 37 if (insn_is_avx(insn)) { 37 38 intel_pt_insn->op = INTEL_PT_OP_OTHER;
+1
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
··· 37 37 struct intel_pt_insn { 38 38 enum intel_pt_insn_op op; 39 39 enum intel_pt_insn_branch branch; 40 + bool emulated_ptwrite; 40 41 int length; 41 42 int32_t rel; 42 43 unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+36 -1
tools/perf/util/intel-pt.c
··· 530 530 u64 byte_cnt; 531 531 enum intel_pt_insn_op op; 532 532 enum intel_pt_insn_branch branch; 533 + bool emulated_ptwrite; 533 534 int length; 534 535 int32_t rel; 535 536 char insn[INTEL_PT_INSN_BUF_SZ]; ··· 617 616 e->byte_cnt = byte_cnt; 618 617 e->op = intel_pt_insn->op; 619 618 e->branch = intel_pt_insn->branch; 619 + e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; 620 620 e->length = intel_pt_insn->length; 621 621 e->rel = intel_pt_insn->rel; 622 622 memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); ··· 704 702 return 0; 705 703 } 706 704 705 + static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) 706 + { 707 + return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; 708 + } 709 + 710 + #define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " 711 + #define PTWRITE_MAGIC_LEN 16 712 + 713 + static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) 714 + { 715 + unsigned char buf[PTWRITE_MAGIC_LEN]; 716 + ssize_t len; 717 + 718 + len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); 719 + if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { 720 + intel_pt_log("Emulated ptwrite signature found\n"); 721 + return true; 722 + } 723 + intel_pt_log("Emulated ptwrite signature not found\n"); 724 + return false; 725 + } 726 + 707 727 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, 708 728 uint64_t *insn_cnt_ptr, uint64_t *ip, 709 729 uint64_t to_ip, uint64_t max_insn_cnt, ··· 788 764 *ip += e->byte_cnt; 789 765 intel_pt_insn->op = e->op; 790 766 intel_pt_insn->branch = e->branch; 767 + intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; 791 768 intel_pt_insn->length = e->length; 792 769 intel_pt_insn->rel = e->rel; 793 770 memcpy(intel_pt_insn->buf, e->insn, ··· 820 795 821 796 insn_cnt += 1; 822 797 823 - if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) 798 + if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { 799 + bool eptw; 800 + u64 offs; 801 + 802 + if (!intel_pt_jmp_16(intel_pt_insn)) 803 + goto out; 804 + /* Check for emulated ptwrite */ 805 + offs = offset + intel_pt_insn->length; 806 + eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); 807 + intel_pt_insn->emulated_ptwrite = eptw; 824 808 goto out; 809 + } 825 810 826 811 if (max_insn_cnt && insn_cnt >= max_insn_cnt) 827 812 goto out_no_cache;