Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf arm-spe: Support synthetic events

After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace". However, it misses for
support synthetic events so cannot output any statistical info.

This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:

First level cache synthetic events, including L1 data cache accessing
and missing events;
Last level cache synthetic events, including last level cache
accessing and missing events;
TLB synthetic events, including TLB accessing and missing events;
Remote access events, which is used to account load/store operations
caused to another socket.

Example usage:

$ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
$ perf report --stdio

# Samples: 59 of event 'l1d-miss'
# Event count (approx.): 59
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages
5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap
5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg
5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range
3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge
3.39% 3.39% dd [kernel.kallsyms] [k] release_pages
3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c
1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd
[...]

# Samples: 3K of event 'l1d-access'
# Event count (approx.): 3980
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user
10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify
7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read
4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read
4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write
3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light
3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area
3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission
2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent
2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write
2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero
2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero
1.81% 1.81% dd dd [.] 0x0000000000002960
1.78% 1.78% dd dd [.] 0x0000000000002980
[...]

# Samples: 35 of event 'llc-miss'
# Event count (approx.): 35
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages
8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg
8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range
5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge
5.71% 5.71% dd [kernel.kallsyms] [k] release_pages
5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c
2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work
2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup
2.86% 2.86% dd [kernel.kallsyms] [k] copy_page
[...]

# Samples: 2 of event 'llc-access'
# Event count (approx.): 2
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .............
#
50.00% 50.00% dd [kernel.kallsyms] [k] copy_page
50.00% 50.00% dd libc-2.28.so [.] _dl_addr

# Samples: 48 of event 'tlb-miss'
# Event count (approx.): 48
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user
10.42% 10.42% dd [kernel.kallsyms] [k] clear_page
4.17% 4.17% dd [kernel.kallsyms] [k] copy_page
4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages
2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd
2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70
2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work
2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock
2.08% 2.08% dd [kernel.kallsyms] [k] d_path
2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode
2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open
[...]

# Samples: 9K of event 'tlb-access'
# Event count (approx.): 9573
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user
11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user
8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify
4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read
3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2
3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent
2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write
2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read
2.52% 2.52% dd libc-2.28.so [.] write
2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission
2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write
1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area
1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero
[...]

# Samples: 9 of event 'branch-miss'
# Event count (approx.): 9
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .........................
#
22.22% 22.22% dd libc-2.28.so [.] _dl_addr
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user
11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill
11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy
11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c
11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980
11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340

# Samples: 29 of event 'remote-access'
# Event count (approx.): 29
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages
10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg
10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range
6.90% 6.90% dd [kernel.kallsyms] [k] release_pages
3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge
3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work
3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge
3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] xas_start
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c
3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Tan Xiaojun and committed by
Arnaldo Carvalho de Melo
a54ca194 9f74d770

+1097 -43
+1 -1
tools/perf/util/arm-spe-decoder/Build
··· 1 - perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o 1 + perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
+219
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * arm_spe_decoder.c: ARM SPE support 4 + */ 5 + 6 + #ifndef _GNU_SOURCE 7 + #define _GNU_SOURCE 8 + #endif 9 + #include <errno.h> 10 + #include <inttypes.h> 11 + #include <stdbool.h> 12 + #include <string.h> 13 + #include <stdint.h> 14 + #include <stdlib.h> 15 + #include <linux/compiler.h> 16 + #include <linux/zalloc.h> 17 + 18 + #include "../auxtrace.h" 19 + #include "../debug.h" 20 + #include "../util.h" 21 + 22 + #include "arm-spe-decoder.h" 23 + 24 + #ifndef BIT 25 + #define BIT(n) (1UL << (n)) 26 + #endif 27 + 28 + static u64 arm_spe_calc_ip(int index, u64 payload) 29 + { 30 + u8 *addr = (u8 *)&payload; 31 + int ns, el; 32 + 33 + /* Instruction virtual address or Branch target address */ 34 + if (index == SPE_ADDR_PKT_HDR_INDEX_INS || 35 + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { 36 + ns = addr[7] & SPE_ADDR_PKT_NS; 37 + el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET; 38 + 39 + /* Fill highest byte for EL1 or EL2 (VHE) mode */ 40 + if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2)) 41 + addr[7] = 0xff; 42 + /* Clean highest byte for other cases */ 43 + else 44 + addr[7] = 0x0; 45 + 46 + /* Data access virtual address */ 47 + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) { 48 + 49 + /* Fill highest byte if bits [48..55] is 0xff */ 50 + if (addr[6] == 0xff) 51 + addr[7] = 0xff; 52 + /* Otherwise, cleanup tags */ 53 + else 54 + addr[7] = 0x0; 55 + 56 + /* Data access physical address */ 57 + } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) { 58 + /* Cleanup byte 7 */ 59 + addr[7] = 0x0; 60 + } else { 61 + pr_err("unsupported address packet index: 0x%x\n", index); 62 + } 63 + 64 + return payload; 65 + } 66 + 67 + struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params) 68 + { 69 + struct arm_spe_decoder *decoder; 70 + 71 + if (!params->get_trace) 72 + return NULL; 73 + 74 + decoder = zalloc(sizeof(struct arm_spe_decoder)); 75 + if (!decoder) 76 + return NULL; 77 + 78 + decoder->get_trace = params->get_trace; 79 + decoder->data = params->data; 80 + 81 + return decoder; 82 + } 83 + 84 + void arm_spe_decoder_free(struct arm_spe_decoder *decoder) 85 + { 86 + free(decoder); 87 + } 88 + 89 + static int arm_spe_get_data(struct arm_spe_decoder *decoder) 90 + { 91 + struct arm_spe_buffer buffer = { .buf = 0, }; 92 + int ret; 93 + 94 + pr_debug("Getting more data\n"); 95 + ret = decoder->get_trace(&buffer, decoder->data); 96 + if (ret < 0) 97 + return ret; 98 + 99 + decoder->buf = buffer.buf; 100 + decoder->len = buffer.len; 101 + 102 + if (!decoder->len) 103 + pr_debug("No more data\n"); 104 + 105 + return decoder->len; 106 + } 107 + 108 + static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder) 109 + { 110 + int ret; 111 + 112 + do { 113 + if (!decoder->len) { 114 + ret = arm_spe_get_data(decoder); 115 + 116 + /* Failed to read out trace data */ 117 + if (ret <= 0) 118 + return ret; 119 + } 120 + 121 + ret = arm_spe_get_packet(decoder->buf, decoder->len, 122 + &decoder->packet); 123 + if (ret <= 0) { 124 + /* Move forward for 1 byte */ 125 + decoder->buf += 1; 126 + decoder->len -= 1; 127 + return -EBADMSG; 128 + } 129 + 130 + decoder->buf += ret; 131 + decoder->len -= ret; 132 + } while (decoder->packet.type == ARM_SPE_PAD); 133 + 134 + return 1; 135 + } 136 + 137 + static int arm_spe_read_record(struct arm_spe_decoder *decoder) 138 + { 139 + int err; 140 + int idx; 141 + u64 payload, ip; 142 + 143 + memset(&decoder->record, 0x0, sizeof(decoder->record)); 144 + 145 + while (1) { 146 + err = arm_spe_get_next_packet(decoder); 147 + if (err <= 0) 148 + return err; 149 + 150 + idx = decoder->packet.index; 151 + payload = decoder->packet.payload; 152 + 153 + switch (decoder->packet.type) { 154 + case ARM_SPE_TIMESTAMP: 155 + decoder->record.timestamp = payload; 156 + return 1; 157 + case ARM_SPE_END: 158 + return 1; 159 + case ARM_SPE_ADDRESS: 160 + ip = arm_spe_calc_ip(idx, payload); 161 + if (idx == SPE_ADDR_PKT_HDR_INDEX_INS) 162 + decoder->record.from_ip = ip; 163 + else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) 164 + decoder->record.to_ip = ip; 165 + break; 166 + case ARM_SPE_COUNTER: 167 + break; 168 + case ARM_SPE_CONTEXT: 169 + break; 170 + case ARM_SPE_OP_TYPE: 171 + break; 172 + case ARM_SPE_EVENTS: 173 + if (payload & BIT(EV_L1D_REFILL)) 174 + decoder->record.type |= ARM_SPE_L1D_MISS; 175 + 176 + if (payload & BIT(EV_L1D_ACCESS)) 177 + decoder->record.type |= ARM_SPE_L1D_ACCESS; 178 + 179 + if (payload & BIT(EV_TLB_WALK)) 180 + decoder->record.type |= ARM_SPE_TLB_MISS; 181 + 182 + if (payload & BIT(EV_TLB_ACCESS)) 183 + decoder->record.type |= ARM_SPE_TLB_ACCESS; 184 + 185 + if ((idx == 1 || idx == 2 || idx == 3) && 186 + (payload & BIT(EV_LLC_MISS))) 187 + decoder->record.type |= ARM_SPE_LLC_MISS; 188 + 189 + if ((idx == 1 || idx == 2 || idx == 3) && 190 + (payload & BIT(EV_LLC_ACCESS))) 191 + decoder->record.type |= ARM_SPE_LLC_ACCESS; 192 + 193 + if ((idx == 1 || idx == 2 || idx == 3) && 194 + (payload & BIT(EV_REMOTE_ACCESS))) 195 + decoder->record.type |= ARM_SPE_REMOTE_ACCESS; 196 + 197 + if (payload & BIT(EV_MISPRED)) 198 + decoder->record.type |= ARM_SPE_BRANCH_MISS; 199 + 200 + break; 201 + case ARM_SPE_DATA_SOURCE: 202 + break; 203 + case ARM_SPE_BAD: 204 + break; 205 + case ARM_SPE_PAD: 206 + break; 207 + default: 208 + pr_err("Get packet error!\n"); 209 + return -1; 210 + } 211 + } 212 + 213 + return 0; 214 + } 215 + 216 + int arm_spe_decode(struct arm_spe_decoder *decoder) 217 + { 218 + return arm_spe_read_record(decoder); 219 + }
+82
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * arm_spe_decoder.h: Arm Statistical Profiling Extensions support 4 + * Copyright (c) 2019-2020, Arm Ltd. 5 + */ 6 + 7 + #ifndef INCLUDE__ARM_SPE_DECODER_H__ 8 + #define INCLUDE__ARM_SPE_DECODER_H__ 9 + 10 + #include <stdbool.h> 11 + #include <stddef.h> 12 + #include <stdint.h> 13 + 14 + #include "arm-spe-pkt-decoder.h" 15 + 16 + enum arm_spe_events { 17 + EV_EXCEPTION_GEN = 0, 18 + EV_RETIRED = 1, 19 + EV_L1D_ACCESS = 2, 20 + EV_L1D_REFILL = 3, 21 + EV_TLB_ACCESS = 4, 22 + EV_TLB_WALK = 5, 23 + EV_NOT_TAKEN = 6, 24 + EV_MISPRED = 7, 25 + EV_LLC_ACCESS = 8, 26 + EV_LLC_MISS = 9, 27 + EV_REMOTE_ACCESS = 10, 28 + EV_ALIGNMENT = 11, 29 + EV_PARTIAL_PREDICATE = 17, 30 + EV_EMPTY_PREDICATE = 18, 31 + }; 32 + 33 + enum arm_spe_sample_type { 34 + ARM_SPE_L1D_ACCESS = 1 << 0, 35 + ARM_SPE_L1D_MISS = 1 << 1, 36 + ARM_SPE_LLC_ACCESS = 1 << 2, 37 + ARM_SPE_LLC_MISS = 1 << 3, 38 + ARM_SPE_TLB_ACCESS = 1 << 4, 39 + ARM_SPE_TLB_MISS = 1 << 5, 40 + ARM_SPE_BRANCH_MISS = 1 << 6, 41 + ARM_SPE_REMOTE_ACCESS = 1 << 7, 42 + }; 43 + 44 + struct arm_spe_record { 45 + enum arm_spe_sample_type type; 46 + int err; 47 + u64 from_ip; 48 + u64 to_ip; 49 + u64 timestamp; 50 + }; 51 + 52 + struct arm_spe_insn; 53 + 54 + struct arm_spe_buffer { 55 + const unsigned char *buf; 56 + size_t len; 57 + u64 offset; 58 + u64 trace_nr; 59 + }; 60 + 61 + struct arm_spe_params { 62 + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); 63 + void *data; 64 + }; 65 + 66 + struct arm_spe_decoder { 67 + int (*get_trace)(struct arm_spe_buffer *buffer, void *data); 68 + void *data; 69 + struct arm_spe_record record; 70 + 71 + const unsigned char *buf; 72 + size_t len; 73 + 74 + struct arm_spe_pkt packet; 75 + }; 76 + 77 + struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params); 78 + void arm_spe_decoder_free(struct arm_spe_decoder *decoder); 79 + 80 + int arm_spe_decode(struct arm_spe_decoder *decoder); 81 + 82 + #endif
+16
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
··· 15 15 #define ARM_SPE_NEED_MORE_BYTES -1 16 16 #define ARM_SPE_BAD_PACKET -2 17 17 18 + #define ARM_SPE_PKT_MAX_SZ 16 19 + 18 20 enum arm_spe_pkt_type { 19 21 ARM_SPE_BAD, 20 22 ARM_SPE_PAD, ··· 35 33 unsigned char index; 36 34 uint64_t payload; 37 35 }; 36 + 37 + #define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) 38 + #define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) 39 + #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) 40 + #define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) 41 + 42 + #define SPE_ADDR_PKT_NS BIT(7) 43 + #define SPE_ADDR_PKT_CH BIT(6) 44 + #define SPE_ADDR_PKT_EL_OFFSET (5) 45 + #define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET) 46 + #define SPE_ADDR_PKT_EL0 (0) 47 + #define SPE_ADDR_PKT_EL1 (1) 48 + #define SPE_ADDR_PKT_EL2 (2) 49 + #define SPE_ADDR_PKT_EL3 (3) 38 50 39 51 const char *arm_spe_pkt_name(enum arm_spe_pkt_type); 40 52
+779 -42
tools/perf/util/arm-spe.c
··· 4 4 * Copyright (c) 2017-2018, Arm Ltd. 5 5 */ 6 6 7 + #include <byteswap.h> 7 8 #include <endian.h> 8 9 #include <errno.h> 9 - #include <byteswap.h> 10 10 #include <inttypes.h> 11 - #include <unistd.h> 12 - #include <stdlib.h> 13 - #include <linux/kernel.h> 14 - #include <linux/types.h> 15 11 #include <linux/bitops.h> 12 + #include <linux/kernel.h> 16 13 #include <linux/log2.h> 14 + #include <linux/types.h> 17 15 #include <linux/zalloc.h> 16 + #include <stdlib.h> 17 + #include <unistd.h> 18 18 19 + #include "auxtrace.h" 19 20 #include "color.h" 21 + #include "debug.h" 22 + #include "evlist.h" 20 23 #include "evsel.h" 21 24 #include "machine.h" 22 25 #include "session.h" 23 - #include "debug.h" 24 - #include "auxtrace.h" 26 + #include "symbol.h" 27 + #include "thread.h" 28 + #include "thread-stack.h" 29 + #include "tool.h" 30 + #include "util/synthetic-events.h" 31 + 25 32 #include "arm-spe.h" 33 + #include "arm-spe-decoder/arm-spe-decoder.h" 26 34 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 35 + 36 + #define MAX_TIMESTAMP (~0ULL) 27 37 28 38 struct arm_spe { 29 39 struct auxtrace auxtrace; 30 40 struct auxtrace_queues queues; 31 41 struct auxtrace_heap heap; 42 + struct itrace_synth_opts synth_opts; 32 43 u32 auxtrace_type; 33 44 struct perf_session *session; 34 45 struct machine *machine; 35 46 u32 pmu_type; 47 + 48 + u8 timeless_decoding; 49 + u8 data_queued; 50 + 51 + u8 sample_flc; 52 + u8 sample_llc; 53 + u8 sample_tlb; 54 + u8 sample_branch; 55 + u8 sample_remote_access; 56 + 57 + u64 l1d_miss_id; 58 + u64 l1d_access_id; 59 + u64 llc_miss_id; 60 + u64 llc_access_id; 61 + u64 tlb_miss_id; 62 + u64 tlb_access_id; 63 + u64 branch_miss_id; 64 + u64 remote_access_id; 65 + 66 + u64 kernel_start; 67 + 68 + unsigned long num_events; 36 69 }; 37 70 38 71 struct arm_spe_queue { 39 - struct arm_spe *spe; 40 - unsigned int queue_nr; 41 - struct auxtrace_buffer *buffer; 42 - bool on_heap; 43 - bool done; 44 - pid_t pid; 45 - pid_t tid; 46 - int cpu; 72 + struct arm_spe *spe; 73 + unsigned int queue_nr; 74 + struct auxtrace_buffer *buffer; 75 + struct auxtrace_buffer *old_buffer; 76 + union perf_event *event_buf; 77 + bool on_heap; 78 + bool done; 79 + pid_t pid; 80 + pid_t tid; 81 + int cpu; 82 + struct arm_spe_decoder *decoder; 83 + u64 time; 84 + u64 timestamp; 85 + struct thread *thread; 47 86 }; 48 87 49 88 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, ··· 131 92 arm_spe_dump(spe, buf, len); 132 93 } 133 94 134 - static int arm_spe_process_event(struct perf_session *session __maybe_unused, 135 - union perf_event *event __maybe_unused, 136 - struct perf_sample *sample __maybe_unused, 137 - struct perf_tool *tool __maybe_unused) 95 + static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 138 96 { 97 + struct arm_spe_queue *speq = data; 98 + struct auxtrace_buffer *buffer = speq->buffer; 99 + struct auxtrace_buffer *old_buffer = speq->old_buffer; 100 + struct auxtrace_queue *queue; 101 + 102 + queue = &speq->spe->queues.queue_array[speq->queue_nr]; 103 + 104 + buffer = auxtrace_buffer__next(queue, buffer); 105 + /* If no more data, drop the previous auxtrace_buffer and return */ 106 + if (!buffer) { 107 + if (old_buffer) 108 + auxtrace_buffer__drop_data(old_buffer); 109 + b->len = 0; 110 + return 0; 111 + } 112 + 113 + speq->buffer = buffer; 114 + 115 + /* If the aux_buffer doesn't have data associated, try to load it */ 116 + if (!buffer->data) { 117 + /* get the file desc associated with the perf data file */ 118 + int fd = perf_data__fd(speq->spe->session->data); 119 + 120 + buffer->data = auxtrace_buffer__get_data(buffer, fd); 121 + if (!buffer->data) 122 + return -ENOMEM; 123 + } 124 + 125 + b->len = buffer->size; 126 + b->buf = buffer->data; 127 + 128 + if (b->len) { 129 + if (old_buffer) 130 + auxtrace_buffer__drop_data(old_buffer); 131 + speq->old_buffer = buffer; 132 + } else { 133 + auxtrace_buffer__drop_data(buffer); 134 + return arm_spe_get_trace(b, data); 135 + } 136 + 139 137 return 0; 138 + } 139 + 140 + static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 141 + unsigned int queue_nr) 142 + { 143 + struct arm_spe_params params = { .get_trace = 0, }; 144 + struct arm_spe_queue *speq; 145 + 146 + speq = zalloc(sizeof(*speq)); 147 + if (!speq) 148 + return NULL; 149 + 150 + speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 151 + if (!speq->event_buf) 152 + goto out_free; 153 + 154 + speq->spe = spe; 155 + speq->queue_nr = queue_nr; 156 + speq->pid = -1; 157 + speq->tid = -1; 158 + speq->cpu = -1; 159 + 160 + /* params set */ 161 + params.get_trace = arm_spe_get_trace; 162 + params.data = speq; 163 + 164 + /* create new decoder */ 165 + speq->decoder = arm_spe_decoder_new(&params); 166 + if (!speq->decoder) 167 + goto out_free; 168 + 169 + return speq; 170 + 171 + out_free: 172 + zfree(&speq->event_buf); 173 + free(speq); 174 + 175 + return NULL; 176 + } 177 + 178 + static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 179 + { 180 + return ip >= spe->kernel_start ? 181 + PERF_RECORD_MISC_KERNEL : 182 + PERF_RECORD_MISC_USER; 183 + } 184 + 185 + static void arm_spe_prep_sample(struct arm_spe *spe, 186 + struct arm_spe_queue *speq, 187 + union perf_event *event, 188 + struct perf_sample *sample) 189 + { 190 + struct arm_spe_record *record = &speq->decoder->record; 191 + 192 + if (!spe->timeless_decoding) 193 + sample->time = speq->timestamp; 194 + 195 + sample->ip = record->from_ip; 196 + sample->cpumode = arm_spe_cpumode(spe, sample->ip); 197 + sample->pid = speq->pid; 198 + sample->tid = speq->tid; 199 + sample->addr = record->to_ip; 200 + sample->period = 1; 201 + sample->cpu = speq->cpu; 202 + 203 + event->sample.header.type = PERF_RECORD_SAMPLE; 204 + event->sample.header.misc = sample->cpumode; 205 + event->sample.header.size = sizeof(struct perf_event_header); 206 + } 207 + 208 + static inline int 209 + arm_spe_deliver_synth_event(struct arm_spe *spe, 210 + struct arm_spe_queue *speq __maybe_unused, 211 + union perf_event *event, 212 + struct perf_sample *sample) 213 + { 214 + int ret; 215 + 216 + ret = perf_session__deliver_synth_event(spe->session, event, sample); 217 + if (ret) 218 + pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 219 + 220 + return ret; 221 + } 222 + 223 + static int 224 + arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, 225 + u64 spe_events_id) 226 + { 227 + struct arm_spe *spe = speq->spe; 228 + union perf_event *event = speq->event_buf; 229 + struct perf_sample sample = { .ip = 0, }; 230 + 231 + arm_spe_prep_sample(spe, speq, event, &sample); 232 + 233 + sample.id = spe_events_id; 234 + sample.stream_id = spe_events_id; 235 + 236 + return arm_spe_deliver_synth_event(spe, speq, event, &sample); 237 + } 238 + 239 + static int arm_spe_sample(struct arm_spe_queue *speq) 240 + { 241 + const struct arm_spe_record *record = &speq->decoder->record; 242 + struct arm_spe *spe = speq->spe; 243 + int err; 244 + 245 + if (spe->sample_flc) { 246 + if (record->type & ARM_SPE_L1D_MISS) { 247 + err = arm_spe_synth_spe_events_sample( 248 + speq, spe->l1d_miss_id); 249 + if (err) 250 + return err; 251 + } 252 + 253 + if (record->type & ARM_SPE_L1D_ACCESS) { 254 + err = arm_spe_synth_spe_events_sample( 255 + speq, spe->l1d_access_id); 256 + if (err) 257 + return err; 258 + } 259 + } 260 + 261 + if (spe->sample_llc) { 262 + if (record->type & ARM_SPE_LLC_MISS) { 263 + err = arm_spe_synth_spe_events_sample( 264 + speq, spe->llc_miss_id); 265 + if (err) 266 + return err; 267 + } 268 + 269 + if (record->type & ARM_SPE_LLC_ACCESS) { 270 + err = arm_spe_synth_spe_events_sample( 271 + speq, spe->llc_access_id); 272 + if (err) 273 + return err; 274 + } 275 + } 276 + 277 + if (spe->sample_tlb) { 278 + if (record->type & ARM_SPE_TLB_MISS) { 279 + err = arm_spe_synth_spe_events_sample( 280 + speq, spe->tlb_miss_id); 281 + if (err) 282 + return err; 283 + } 284 + 285 + if (record->type & ARM_SPE_TLB_ACCESS) { 286 + err = arm_spe_synth_spe_events_sample( 287 + speq, spe->tlb_access_id); 288 + if (err) 289 + return err; 290 + } 291 + } 292 + 293 + if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 294 + err = arm_spe_synth_spe_events_sample(speq, 295 + spe->branch_miss_id); 296 + if (err) 297 + return err; 298 + } 299 + 300 + if (spe->sample_remote_access && 301 + (record->type & ARM_SPE_REMOTE_ACCESS)) { 302 + err = arm_spe_synth_spe_events_sample(speq, 303 + spe->remote_access_id); 304 + if (err) 305 + return err; 306 + } 307 + 308 + return 0; 309 + } 310 + 311 + static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 312 + { 313 + struct arm_spe *spe = speq->spe; 314 + int ret; 315 + 316 + if (!spe->kernel_start) 317 + spe->kernel_start = machine__kernel_start(spe->machine); 318 + 319 + while (1) { 320 + ret = arm_spe_decode(speq->decoder); 321 + if (!ret) { 322 + pr_debug("No data or all data has been processed.\n"); 323 + return 1; 324 + } 325 + 326 + /* 327 + * Error is detected when decode SPE trace data, continue to 328 + * the next trace data and find out more records. 329 + */ 330 + if (ret < 0) 331 + continue; 332 + 333 + ret = arm_spe_sample(speq); 334 + if (ret) 335 + return ret; 336 + 337 + if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 338 + *timestamp = speq->timestamp; 339 + return 0; 340 + } 341 + } 342 + 343 + return 0; 344 + } 345 + 346 + static int arm_spe__setup_queue(struct arm_spe *spe, 347 + struct auxtrace_queue *queue, 348 + unsigned int queue_nr) 349 + { 350 + struct arm_spe_queue *speq = queue->priv; 351 + struct arm_spe_record *record; 352 + 353 + if (list_empty(&queue->head) || speq) 354 + return 0; 355 + 356 + speq = arm_spe__alloc_queue(spe, queue_nr); 357 + 358 + if (!speq) 359 + return -ENOMEM; 360 + 361 + queue->priv = speq; 362 + 363 + if (queue->cpu != -1) 364 + speq->cpu = queue->cpu; 365 + 366 + if (!speq->on_heap) { 367 + int ret; 368 + 369 + if (spe->timeless_decoding) 370 + return 0; 371 + 372 + retry: 373 + ret = arm_spe_decode(speq->decoder); 374 + 375 + if (!ret) 376 + return 0; 377 + 378 + if (ret < 0) 379 + goto retry; 380 + 381 + record = &speq->decoder->record; 382 + 383 + speq->timestamp = record->timestamp; 384 + ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 385 + if (ret) 386 + return ret; 387 + speq->on_heap = true; 388 + } 389 + 390 + return 0; 391 + } 392 + 393 + static int arm_spe__setup_queues(struct arm_spe *spe) 394 + { 395 + unsigned int i; 396 + int ret; 397 + 398 + for (i = 0; i < spe->queues.nr_queues; i++) { 399 + ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 400 + if (ret) 401 + return ret; 402 + } 403 + 404 + return 0; 405 + } 406 + 407 + static int arm_spe__update_queues(struct arm_spe *spe) 408 + { 409 + if (spe->queues.new_data) { 410 + spe->queues.new_data = false; 411 + return arm_spe__setup_queues(spe); 412 + } 413 + 414 + return 0; 415 + } 416 + 417 + static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 418 + { 419 + struct evsel *evsel; 420 + struct evlist *evlist = spe->session->evlist; 421 + bool timeless_decoding = true; 422 + 423 + /* 424 + * Circle through the list of event and complain if we find one 425 + * with the time bit set. 426 + */ 427 + evlist__for_each_entry(evlist, evsel) { 428 + if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 429 + timeless_decoding = false; 430 + } 431 + 432 + return timeless_decoding; 433 + } 434 + 435 + static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 436 + struct auxtrace_queue *queue) 437 + { 438 + struct arm_spe_queue *speq = queue->priv; 439 + pid_t tid; 440 + 441 + tid = machine__get_current_tid(spe->machine, speq->cpu); 442 + if (tid != -1) { 443 + speq->tid = tid; 444 + thread__zput(speq->thread); 445 + } else 446 + speq->tid = queue->tid; 447 + 448 + if ((!speq->thread) && (speq->tid != -1)) { 449 + speq->thread = machine__find_thread(spe->machine, -1, 450 + speq->tid); 451 + } 452 + 453 + if (speq->thread) { 454 + speq->pid = speq->thread->pid_; 455 + if (queue->cpu == -1) 456 + speq->cpu = speq->thread->cpu; 457 + } 458 + } 459 + 460 + static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 461 + { 462 + unsigned int queue_nr; 463 + u64 ts; 464 + int ret; 465 + 466 + while (1) { 467 + struct auxtrace_queue *queue; 468 + struct arm_spe_queue *speq; 469 + 470 + if (!spe->heap.heap_cnt) 471 + return 0; 472 + 473 + if (spe->heap.heap_array[0].ordinal >= timestamp) 474 + return 0; 475 + 476 + queue_nr = spe->heap.heap_array[0].queue_nr; 477 + queue = &spe->queues.queue_array[queue_nr]; 478 + speq = queue->priv; 479 + 480 + auxtrace_heap__pop(&spe->heap); 481 + 482 + if (spe->heap.heap_cnt) { 483 + ts = spe->heap.heap_array[0].ordinal + 1; 484 + if (ts > timestamp) 485 + ts = timestamp; 486 + } else { 487 + ts = timestamp; 488 + } 489 + 490 + arm_spe_set_pid_tid_cpu(spe, queue); 491 + 492 + ret = arm_spe_run_decoder(speq, &ts); 493 + if (ret < 0) { 494 + auxtrace_heap__add(&spe->heap, queue_nr, ts); 495 + return ret; 496 + } 497 + 498 + if (!ret) { 499 + ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 500 + if (ret < 0) 501 + return ret; 502 + } else { 503 + speq->on_heap = false; 504 + } 505 + } 506 + 507 + return 0; 508 + } 509 + 510 + static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 511 + u64 time_) 512 + { 513 + struct auxtrace_queues *queues = &spe->queues; 514 + unsigned int i; 515 + u64 ts = 0; 516 + 517 + for (i = 0; i < queues->nr_queues; i++) { 518 + struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 519 + struct arm_spe_queue *speq = queue->priv; 520 + 521 + if (speq && (tid == -1 || speq->tid == tid)) { 522 + speq->time = time_; 523 + arm_spe_set_pid_tid_cpu(spe, queue); 524 + arm_spe_run_decoder(speq, &ts); 525 + } 526 + } 527 + return 0; 528 + } 529 + 530 + static int arm_spe_process_event(struct perf_session *session, 531 + union perf_event *event, 532 + struct perf_sample *sample, 533 + struct perf_tool *tool) 534 + { 535 + int err = 0; 536 + u64 timestamp; 537 + struct arm_spe *spe = container_of(session->auxtrace, 538 + struct arm_spe, auxtrace); 539 + 540 + if (dump_trace) 541 + return 0; 542 + 543 + if (!tool->ordered_events) { 544 + pr_err("SPE trace requires ordered events\n"); 545 + return -EINVAL; 546 + } 547 + 548 + if (sample->time && (sample->time != (u64) -1)) 549 + timestamp = sample->time; 550 + else 551 + timestamp = 0; 552 + 553 + if (timestamp || spe->timeless_decoding) { 554 + err = arm_spe__update_queues(spe); 555 + if (err) 556 + return err; 557 + } 558 + 559 + if (spe->timeless_decoding) { 560 + if (event->header.type == PERF_RECORD_EXIT) { 561 + err = arm_spe_process_timeless_queues(spe, 562 + event->fork.tid, 563 + sample->time); 564 + } 565 + } else if (timestamp) { 566 + if (event->header.type == PERF_RECORD_EXIT) { 567 + err = arm_spe_process_queues(spe, timestamp); 568 + if (err) 569 + return err; 570 + } 571 + } 572 + 573 + return err; 140 574 } 141 575 142 576 static int arm_spe_process_auxtrace_event(struct perf_session *session, ··· 618 106 { 619 107 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 620 108 auxtrace); 621 - struct auxtrace_buffer *buffer; 622 - off_t data_offset; 623 - int fd = perf_data__fd(session->data); 624 - int err; 625 109 626 - if (perf_data__is_pipe(session->data)) { 627 - data_offset = 0; 628 - } else { 629 - data_offset = lseek(fd, 0, SEEK_CUR); 630 - if (data_offset == -1) 631 - return -errno; 632 - } 110 + if (!spe->data_queued) { 111 + struct auxtrace_buffer *buffer; 112 + off_t data_offset; 113 + int fd = perf_data__fd(session->data); 114 + int err; 633 115 634 - err = auxtrace_queues__add_event(&spe->queues, session, event, 635 - data_offset, &buffer); 636 - if (err) 637 - return err; 116 + if (perf_data__is_pipe(session->data)) { 117 + data_offset = 0; 118 + } else { 119 + data_offset = lseek(fd, 0, SEEK_CUR); 120 + if (data_offset == -1) 121 + return -errno; 122 + } 638 123 639 - /* Dump here now we have copied a piped trace out of the pipe */ 640 - if (dump_trace) { 641 - if (auxtrace_buffer__get_data(buffer, fd)) { 642 - arm_spe_dump_event(spe, buffer->data, 643 - buffer->size); 644 - auxtrace_buffer__put_data(buffer); 124 + err = auxtrace_queues__add_event(&spe->queues, session, event, 125 + data_offset, &buffer); 126 + if (err) 127 + return err; 128 + 129 + /* Dump here now we have copied a piped trace out of the pipe */ 130 + if (dump_trace) { 131 + if (auxtrace_buffer__get_data(buffer, fd)) { 132 + arm_spe_dump_event(spe, buffer->data, 133 + buffer->size); 134 + auxtrace_buffer__put_data(buffer); 135 + } 645 136 } 646 137 } 647 138 ··· 654 139 static int arm_spe_flush(struct perf_session *session __maybe_unused, 655 140 struct perf_tool *tool __maybe_unused) 656 141 { 657 - return 0; 142 + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 143 + auxtrace); 144 + int ret; 145 + 146 + if (dump_trace) 147 + return 0; 148 + 149 + if (!tool->ordered_events) 150 + return -EINVAL; 151 + 152 + ret = arm_spe__update_queues(spe); 153 + if (ret < 0) 154 + return ret; 155 + 156 + if (spe->timeless_decoding) 157 + return arm_spe_process_timeless_queues(spe, -1, 158 + MAX_TIMESTAMP - 1); 159 + 160 + return arm_spe_process_queues(spe, MAX_TIMESTAMP); 658 161 } 659 162 660 163 static void arm_spe_free_queue(void *priv) ··· 681 148 682 149 if (!speq) 683 150 return; 151 + thread__zput(speq->thread); 152 + arm_spe_decoder_free(speq->decoder); 153 + zfree(&speq->event_buf); 684 154 free(speq); 685 155 } 686 156 ··· 732 196 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 733 197 } 734 198 199 + struct arm_spe_synth { 200 + struct perf_tool dummy_tool; 201 + struct perf_session *session; 202 + }; 203 + 204 + static int arm_spe_event_synth(struct perf_tool *tool, 205 + union perf_event *event, 206 + struct perf_sample *sample __maybe_unused, 207 + struct machine *machine __maybe_unused) 208 + { 209 + struct arm_spe_synth *arm_spe_synth = 210 + container_of(tool, struct arm_spe_synth, dummy_tool); 211 + 212 + return perf_session__deliver_synth_event(arm_spe_synth->session, 213 + event, NULL); 214 + } 215 + 216 + static int arm_spe_synth_event(struct perf_session *session, 217 + struct perf_event_attr *attr, u64 id) 218 + { 219 + struct arm_spe_synth arm_spe_synth; 220 + 221 + memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 222 + arm_spe_synth.session = session; 223 + 224 + return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 225 + &id, arm_spe_event_synth); 226 + } 227 + 228 + static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 229 + const char *name) 230 + { 231 + struct evsel *evsel; 232 + 233 + evlist__for_each_entry(evlist, evsel) { 234 + if (evsel->core.id && evsel->core.id[0] == id) { 235 + if (evsel->name) 236 + zfree(&evsel->name); 237 + evsel->name = strdup(name); 238 + break; 239 + } 240 + } 241 + } 242 + 243 + static int 244 + arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 245 + { 246 + struct evlist *evlist = session->evlist; 247 + struct evsel *evsel; 248 + struct perf_event_attr attr; 249 + bool found = false; 250 + u64 id; 251 + int err; 252 + 253 + evlist__for_each_entry(evlist, evsel) { 254 + if (evsel->core.attr.type == spe->pmu_type) { 255 + found = true; 256 + break; 257 + } 258 + } 259 + 260 + if (!found) { 261 + pr_debug("No selected events with SPE trace data\n"); 262 + return 0; 263 + } 264 + 265 + memset(&attr, 0, sizeof(struct perf_event_attr)); 266 + attr.size = sizeof(struct perf_event_attr); 267 + attr.type = PERF_TYPE_HARDWARE; 268 + attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 269 + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 270 + PERF_SAMPLE_PERIOD; 271 + if (spe->timeless_decoding) 272 + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 273 + else 274 + attr.sample_type |= PERF_SAMPLE_TIME; 275 + 276 + attr.exclude_user = evsel->core.attr.exclude_user; 277 + attr.exclude_kernel = evsel->core.attr.exclude_kernel; 278 + attr.exclude_hv = evsel->core.attr.exclude_hv; 279 + attr.exclude_host = evsel->core.attr.exclude_host; 280 + attr.exclude_guest = evsel->core.attr.exclude_guest; 281 + attr.sample_id_all = evsel->core.attr.sample_id_all; 282 + attr.read_format = evsel->core.attr.read_format; 283 + 284 + /* create new id val to be a fixed offset from evsel id */ 285 + id = evsel->core.id[0] + 1000000000; 286 + 287 + if (!id) 288 + id = 1; 289 + 290 + if (spe->synth_opts.flc) { 291 + spe->sample_flc = true; 292 + 293 + /* Level 1 data cache miss */ 294 + err = arm_spe_synth_event(session, &attr, id); 295 + if (err) 296 + return err; 297 + spe->l1d_miss_id = id; 298 + arm_spe_set_event_name(evlist, id, "l1d-miss"); 299 + id += 1; 300 + 301 + /* Level 1 data cache access */ 302 + err = arm_spe_synth_event(session, &attr, id); 303 + if (err) 304 + return err; 305 + spe->l1d_access_id = id; 306 + arm_spe_set_event_name(evlist, id, "l1d-access"); 307 + id += 1; 308 + } 309 + 310 + if (spe->synth_opts.llc) { 311 + spe->sample_llc = true; 312 + 313 + /* Last level cache miss */ 314 + err = arm_spe_synth_event(session, &attr, id); 315 + if (err) 316 + return err; 317 + spe->llc_miss_id = id; 318 + arm_spe_set_event_name(evlist, id, "llc-miss"); 319 + id += 1; 320 + 321 + /* Last level cache access */ 322 + err = arm_spe_synth_event(session, &attr, id); 323 + if (err) 324 + return err; 325 + spe->llc_access_id = id; 326 + arm_spe_set_event_name(evlist, id, "llc-access"); 327 + id += 1; 328 + } 329 + 330 + if (spe->synth_opts.tlb) { 331 + spe->sample_tlb = true; 332 + 333 + /* TLB miss */ 334 + err = arm_spe_synth_event(session, &attr, id); 335 + if (err) 336 + return err; 337 + spe->tlb_miss_id = id; 338 + arm_spe_set_event_name(evlist, id, "tlb-miss"); 339 + id += 1; 340 + 341 + /* TLB access */ 342 + err = arm_spe_synth_event(session, &attr, id); 343 + if (err) 344 + return err; 345 + spe->tlb_access_id = id; 346 + arm_spe_set_event_name(evlist, id, "tlb-access"); 347 + id += 1; 348 + } 349 + 350 + if (spe->synth_opts.branches) { 351 + spe->sample_branch = true; 352 + 353 + /* Branch miss */ 354 + err = arm_spe_synth_event(session, &attr, id); 355 + if (err) 356 + return err; 357 + spe->branch_miss_id = id; 358 + arm_spe_set_event_name(evlist, id, "branch-miss"); 359 + id += 1; 360 + } 361 + 362 + if (spe->synth_opts.remote_access) { 363 + spe->sample_remote_access = true; 364 + 365 + /* Remote access */ 366 + err = arm_spe_synth_event(session, &attr, id); 367 + if (err) 368 + return err; 369 + spe->remote_access_id = id; 370 + arm_spe_set_event_name(evlist, id, "remote-access"); 371 + id += 1; 372 + } 373 + 374 + return 0; 375 + } 376 + 735 377 int arm_spe_process_auxtrace_info(union perf_event *event, 736 378 struct perf_session *session) 737 379 { 738 380 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 739 - size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; 381 + size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 740 382 struct arm_spe *spe; 741 383 int err; 742 384 ··· 935 221 spe->auxtrace_type = auxtrace_info->type; 936 222 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 937 223 224 + spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 938 225 spe->auxtrace.process_event = arm_spe_process_event; 939 226 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 940 227 spe->auxtrace.flush_events = arm_spe_flush; ··· 946 231 947 232 arm_spe_print_info(&auxtrace_info->priv[0]); 948 233 234 + if (dump_trace) 235 + return 0; 236 + 237 + if (session->itrace_synth_opts && session->itrace_synth_opts->set) 238 + spe->synth_opts = *session->itrace_synth_opts; 239 + else 240 + itrace_synth_opts__set_default(&spe->synth_opts, false); 241 + 242 + err = arm_spe_synth_events(spe, session); 243 + if (err) 244 + goto err_free_queues; 245 + 246 + err = auxtrace_queues__process_index(&spe->queues, session); 247 + if (err) 248 + goto err_free_queues; 249 + 250 + if (spe->queues.populated) 251 + spe->data_queued = true; 252 + 949 253 return 0; 950 254 255 + err_free_queues: 256 + auxtrace_queues__free(&spe->queues); 257 + session->auxtrace = NULL; 951 258 err_free: 952 259 free(spe); 953 260 return err;