Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-5.1-20190206' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

Hardware tracing:

Adrian Hunter:

- Handle calls optimized into jumps to a different symbol
in the thread stack routines used to process hardware traces (Adrian Hunter)

Intel PT:

Adrian Hunter:

- Fix overlap calculation for padding.

- Fix CYC timestamp calculation after OVF.

- Packet splitting can only happen in 32-bit.

- Add timestamp to auxtrace errors.

ARM CoreSight:

Leo Yan:

- Add last instruction information in packet

- Set sample flags for instruction range, exception and
return packets and for a trace discontinuity.

- Add exception number in exception packet

- Change tuple from traceID-CPU# to traceID-metadata

- Add traceID in packet

Mathieu Poirier:

- Add "sinks" group to PMU directory

- Use event attributes to send sink information to kernel

- Remove set_drv_config() API, no longer used.

perf annotate:

Jiri Olsa:

- Delay symbol annotation to the resort phase, speeding up 'perf report'
startup.

perf record:

Alexey Budankov:

- Allow binding userspace buffers to NUMA nodes.

Symbols:

Adrian Hunter:

- Fix calculating of symbol sizes when splitting kallsyms into
maps for kcore processing.

Vendor events:

William Cohen:

- Intel: Fix Load_Miss_Real_Latency on CLX

Misc:

Arnaldo Carvalho de Melo:

- Streamline headers, removing includes when all that is needed are
just forward declarations, fixup the fallout for cases where headers
should have been explicitely included but were instead obtained
indirectly, by sheer luck.

- Add fallback versions for CPU_{OR,EQUAL}(), so that code using it
continue to build on older systems where those were not yet introduced
or in systems using some other libc than the GNU one where those
helpers aren't present.

Documentation:

Changbin Du:

- Add documentation for BPF event selection.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+1398 -472
+3 -3
arch/s390/kernel/perf_cpum_sf.c
··· 1600 1600 1601 1601 /* 1602 1602 * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling 1603 - * @cpu: On which to allocate, -1 means current 1603 + * @event: Event the buffer is setup for, event->cpu == -1 means current 1604 1604 * @pages: Array of pointers to buffer pages passed from perf core 1605 1605 * @nr_pages: Total pages 1606 1606 * @snapshot: Flag for snapshot mode ··· 1612 1612 * 1613 1613 * Return the private AUX buffer structure if success or NULL if fails. 1614 1614 */ 1615 - static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, 1616 - bool snapshot) 1615 + static void *aux_buffer_setup(struct perf_event *event, void **pages, 1616 + int nr_pages, bool snapshot) 1617 1617 { 1618 1618 struct sf_buffer *sfb; 1619 1619 struct aux_buffer *aux;
+3 -1
arch/x86/events/intel/bts.c
··· 77 77 } 78 78 79 79 static void * 80 - bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) 80 + bts_buffer_setup_aux(struct perf_event *event, void **pages, 81 + int nr_pages, bool overwrite) 81 82 { 82 83 struct bts_buffer *buf; 83 84 struct page *page; 85 + int cpu = event->cpu; 84 86 int node = (cpu == -1) ? cpu : cpu_to_node(cpu); 85 87 unsigned long offset; 86 88 size_t size = nr_pages << PAGE_SHIFT;
+3 -2
arch/x86/events/intel/pt.c
··· 1114 1114 * Return: Our private PT buffer structure. 1115 1115 */ 1116 1116 static void * 1117 - pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) 1117 + pt_buffer_setup_aux(struct perf_event *event, void **pages, 1118 + int nr_pages, bool snapshot) 1118 1119 { 1119 1120 struct pt_buffer *buf; 1120 - int node, ret; 1121 + int node, ret, cpu = event->cpu; 1121 1122 1122 1123 if (!nr_pages) 1123 1124 return NULL;
+97 -15
drivers/hwtracing/coresight/coresight-etm-perf.c
··· 14 14 #include <linux/perf_event.h> 15 15 #include <linux/percpu-defs.h> 16 16 #include <linux/slab.h> 17 + #include <linux/stringhash.h> 17 18 #include <linux/types.h> 18 19 #include <linux/workqueue.h> 19 20 ··· 31 30 PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); 32 31 PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); 33 32 PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); 33 + /* Sink ID - same for all ETMs */ 34 + PMU_FORMAT_ATTR(sinkid, "config2:0-31"); 34 35 35 36 static struct attribute *etm_config_formats_attr[] = { 36 37 &format_attr_cycacc.attr, 37 38 &format_attr_timestamp.attr, 38 39 &format_attr_retstack.attr, 40 + &format_attr_sinkid.attr, 39 41 NULL, 40 42 }; 41 43 ··· 47 43 .attrs = etm_config_formats_attr, 48 44 }; 49 45 46 + static struct attribute *etm_config_sinks_attr[] = { 47 + NULL, 48 + }; 49 + 50 + static const struct attribute_group etm_pmu_sinks_group = { 51 + .name = "sinks", 52 + .attrs = etm_config_sinks_attr, 53 + }; 54 + 50 55 static const struct attribute_group *etm_pmu_attr_groups[] = { 51 56 &etm_pmu_format_group, 57 + &etm_pmu_sinks_group, 52 58 NULL, 53 59 }; 54 60 ··· 191 177 schedule_work(&event_data->work); 192 178 } 193 179 194 - static void *etm_setup_aux(int event_cpu, void **pages, 180 + static void *etm_setup_aux(struct perf_event *event, void **pages, 195 181 int nr_pages, bool overwrite) 196 182 { 197 - int cpu; 183 + u32 id; 184 + int cpu = event->cpu; 198 185 cpumask_t *mask; 199 186 struct coresight_device *sink; 200 187 struct etm_event_data *event_data = NULL; 201 188 202 - event_data = alloc_event_data(event_cpu); 189 + event_data = alloc_event_data(cpu); 203 190 if (!event_data) 204 191 return NULL; 205 192 INIT_WORK(&event_data->work, free_event_data); 206 193 207 - /* 208 - * In theory nothing prevent tracers in a trace session from being 209 - * associated with different sinks, nor having a sink per tracer. But 210 - * until we have HW with this kind of topology we need to assume tracers 211 - * in a trace session are using the same sink. Therefore go through 212 - * the coresight bus and pick the first enabled sink. 213 - * 214 - * When operated from sysFS users are responsible to enable the sink 215 - * while from perf, the perf tools will do it based on the choice made 216 - * on the cmd line. As such the "enable_sink" flag in sysFS is reset. 217 - */ 218 - sink = coresight_get_enabled_sink(true); 194 + /* First get the selected sink from user space. */ 195 + if (event->attr.config2) { 196 + id = (u32)event->attr.config2; 197 + sink = coresight_get_sink_by_id(id); 198 + } else { 199 + sink = coresight_get_enabled_sink(true); 200 + } 201 + 219 202 if (!sink || !sink_ops(sink)->alloc_buffer) 220 203 goto err; 221 204 ··· 488 477 } 489 478 490 479 return 0; 480 + } 481 + 482 + static ssize_t etm_perf_sink_name_show(struct device *dev, 483 + struct device_attribute *dattr, 484 + char *buf) 485 + { 486 + struct dev_ext_attribute *ea; 487 + 488 + ea = container_of(dattr, struct dev_ext_attribute, attr); 489 + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); 490 + } 491 + 492 + int etm_perf_add_symlink_sink(struct coresight_device *csdev) 493 + { 494 + int ret; 495 + unsigned long hash; 496 + const char *name; 497 + struct device *pmu_dev = etm_pmu.dev; 498 + struct device *pdev = csdev->dev.parent; 499 + struct dev_ext_attribute *ea; 500 + 501 + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && 502 + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) 503 + return -EINVAL; 504 + 505 + if (csdev->ea != NULL) 506 + return -EINVAL; 507 + 508 + if (!etm_perf_up) 509 + return -EPROBE_DEFER; 510 + 511 + ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL); 512 + if (!ea) 513 + return -ENOMEM; 514 + 515 + name = dev_name(pdev); 516 + /* See function coresight_get_sink_by_id() to know where this is used */ 517 + hash = hashlen_hash(hashlen_string(NULL, name)); 518 + 519 + ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL); 520 + if (!ea->attr.attr.name) 521 + return -ENOMEM; 522 + 523 + ea->attr.attr.mode = 0444; 524 + ea->attr.show = etm_perf_sink_name_show; 525 + ea->var = (unsigned long *)hash; 526 + 527 + ret = sysfs_add_file_to_group(&pmu_dev->kobj, 528 + &ea->attr.attr, "sinks"); 529 + 530 + if (!ret) 531 + csdev->ea = ea; 532 + 533 + return ret; 534 + } 535 + 536 + void etm_perf_del_symlink_sink(struct coresight_device *csdev) 537 + { 538 + struct device *pmu_dev = etm_pmu.dev; 539 + struct dev_ext_attribute *ea = csdev->ea; 540 + 541 + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && 542 + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) 543 + return; 544 + 545 + if (!ea) 546 + return; 547 + 548 + sysfs_remove_file_from_group(&pmu_dev->kobj, 549 + &ea->attr.attr, "sinks"); 550 + csdev->ea = NULL; 491 551 } 492 552 493 553 static int __init etm_perf_init(void)
+5 -1
drivers/hwtracing/coresight/coresight-etm-perf.h
··· 59 59 60 60 #ifdef CONFIG_CORESIGHT 61 61 int etm_perf_symlink(struct coresight_device *csdev, bool link); 62 + int etm_perf_add_symlink_sink(struct coresight_device *csdev); 63 + void etm_perf_del_symlink_sink(struct coresight_device *csdev); 62 64 static inline void *etm_perf_sink_config(struct perf_output_handle *handle) 63 65 { 64 66 struct etm_event_data *data = perf_get_aux(handle); ··· 72 70 #else 73 71 static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) 74 72 { return -EINVAL; } 75 - 73 + int etm_perf_add_symlink_sink(struct coresight_device *csdev) 74 + { return -EINVAL; } 75 + void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} 76 76 static inline void *etm_perf_sink_config(struct perf_output_handle *handle) 77 77 { 78 78 return NULL;
+1
drivers/hwtracing/coresight/coresight-priv.h
··· 147 147 int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); 148 148 struct coresight_device *coresight_get_sink(struct list_head *path); 149 149 struct coresight_device *coresight_get_enabled_sink(bool reset); 150 + struct coresight_device *coresight_get_sink_by_id(u32 id); 150 151 struct list_head *coresight_build_path(struct coresight_device *csdev, 151 152 struct coresight_device *sink); 152 153 void coresight_release_path(struct list_head *path);
+60
drivers/hwtracing/coresight/coresight.c
··· 11 11 #include <linux/err.h> 12 12 #include <linux/export.h> 13 13 #include <linux/slab.h> 14 + #include <linux/stringhash.h> 14 15 #include <linux/mutex.h> 15 16 #include <linux/clk.h> 16 17 #include <linux/coresight.h> ··· 19 18 #include <linux/delay.h> 20 19 #include <linux/pm_runtime.h> 21 20 21 + #include "coresight-etm-perf.h" 22 22 #include "coresight-priv.h" 23 23 24 24 static DEFINE_MUTEX(coresight_mutex); ··· 538 536 539 537 dev = bus_find_device(&coresight_bustype, NULL, &deactivate, 540 538 coresight_enabled_sink); 539 + 540 + return dev ? to_coresight_device(dev) : NULL; 541 + } 542 + 543 + static int coresight_sink_by_id(struct device *dev, void *data) 544 + { 545 + struct coresight_device *csdev = to_coresight_device(dev); 546 + unsigned long hash; 547 + 548 + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || 549 + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { 550 + 551 + if (!csdev->ea) 552 + return 0; 553 + /* 554 + * See function etm_perf_add_symlink_sink() to know where 555 + * this comes from. 556 + */ 557 + hash = (unsigned long)csdev->ea->var; 558 + 559 + if ((u32)hash == *(u32 *)data) 560 + return 1; 561 + } 562 + 563 + return 0; 564 + } 565 + 566 + /** 567 + * coresight_get_sink_by_id - returns the sink that matches the id 568 + * @id: Id of the sink to match 569 + * 570 + * The name of a sink is unique, whether it is found on the AMBA bus or 571 + * otherwise. As such the hash of that name can easily be used to identify 572 + * a sink. 573 + */ 574 + struct coresight_device *coresight_get_sink_by_id(u32 id) 575 + { 576 + struct device *dev = NULL; 577 + 578 + dev = bus_find_device(&coresight_bustype, NULL, &id, 579 + coresight_sink_by_id); 541 580 542 581 return dev ? to_coresight_device(dev) : NULL; 543 582 } ··· 1210 1167 goto err_out; 1211 1168 } 1212 1169 1170 + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || 1171 + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { 1172 + ret = etm_perf_add_symlink_sink(csdev); 1173 + 1174 + if (ret) { 1175 + device_unregister(&csdev->dev); 1176 + /* 1177 + * As with the above, all resources are free'd 1178 + * explicitly via coresight_device_release() triggered 1179 + * from put_device(), which is in turn called from 1180 + * function device_unregister(). 1181 + */ 1182 + goto err_out; 1183 + } 1184 + } 1185 + 1213 1186 mutex_lock(&coresight_mutex); 1214 1187 1215 1188 coresight_fixup_device_conns(csdev); ··· 1244 1185 1245 1186 void coresight_unregister(struct coresight_device *csdev) 1246 1187 { 1188 + etm_perf_del_symlink_sink(csdev); 1247 1189 /* Remove references of that device in the topology */ 1248 1190 coresight_remove_conns(csdev); 1249 1191 device_unregister(&csdev->dev);
+3 -3
drivers/perf/arm_spe_pmu.c
··· 824 824 { 825 825 } 826 826 827 - static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages, 828 - bool snapshot) 827 + static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages, 828 + int nr_pages, bool snapshot) 829 829 { 830 - int i; 830 + int i, cpu = event->cpu; 831 831 struct page **pglist; 832 832 struct arm_spe_pmu_buf *buf; 833 833
+5 -2
include/linux/coresight.h
··· 154 154 * @orphan: true if the component has connections that haven't been linked. 155 155 * @enable: 'true' if component is currently part of an active path. 156 156 * @activated: 'true' only if a _sink_ has been activated. A sink can be 157 - activated but not yet enabled. Enabling for a _sink_ 158 - happens when a source has been selected for that it. 157 + * activated but not yet enabled. Enabling for a _sink_ 158 + * appens when a source has been selected for that it. 159 + * @ea: Device attribute for sink representation under PMU directory. 159 160 */ 160 161 struct coresight_device { 161 162 struct coresight_connection *conns; ··· 169 168 atomic_t *refcnt; 170 169 bool orphan; 171 170 bool enable; /* true only if configured as part of a path */ 171 + /* sink specific fields */ 172 172 bool activated; /* true only if a sink is part of a path */ 173 + struct dev_ext_attribute *ea; 173 174 }; 174 175 175 176 #define to_coresight_device(d) container_of(d, struct coresight_device, dev)
+1 -1
include/linux/perf_event.h
··· 410 410 /* 411 411 * Set up pmu-private data structures for an AUX area 412 412 */ 413 - void *(*setup_aux) (int cpu, void **pages, 413 + void *(*setup_aux) (struct perf_event *event, void **pages, 414 414 int nr_pages, bool overwrite); 415 415 /* optional */ 416 416
+1 -1
kernel/events/ring_buffer.c
··· 657 657 goto out; 658 658 } 659 659 660 - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, 660 + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, 661 661 overwrite); 662 662 if (!rb->aux_priv) 663 663 goto out;
+31
tools/perf/Documentation/perf-config.txt
··· 120 120 children = true 121 121 group = true 122 122 123 + [llvm] 124 + dump-obj = true 125 + clang-opt = -g 126 + 123 127 You can hide source code of annotate feature setting the config to false with 124 128 125 129 % perf config annotate.hide_src_code=true ··· 556 552 557 553 trace.show_zeros:: 558 554 Do not suppress syscall arguments that are equal to zero. 555 + 556 + llvm.*:: 557 + llvm.clang-path:: 558 + Path to clang. If omit, search it from $PATH. 559 + 560 + llvm.clang-bpf-cmd-template:: 561 + Cmdline template. Below lines show its default value. Environment 562 + variable is used to pass options. 563 + "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ 564 + -Wno-unused-value -Wno-pointer-sign -working-directory \ 565 + $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" 566 + 567 + llvm.clang-opt:: 568 + Options passed to clang. 569 + 570 + llvm.kbuild-dir:: 571 + kbuild directory. If not set, use /lib/modules/`uname -r`/build. 572 + If set to "" deliberately, skip kernel header auto-detector. 573 + 574 + llvm.kbuild-opts:: 575 + Options passed to 'make' when detecting kernel header options. 576 + 577 + llvm.dump-obj:: 578 + Enable perf dump BPF object files compiled by LLVM. 579 + 580 + llvm.opts:: 581 + Options passed to llc. 559 582 560 583 SEE ALSO 561 584 --------
+14
tools/perf/Documentation/perf-record.txt
··· 88 88 If you want to profile write accesses in [0x1000~1008), just set 89 89 'mem:0x1000/8:w'. 90 90 91 + - a BPF source file (ending in .c) or a precompiled object file (ending 92 + in .o) selects one or more BPF events. 93 + The BPF program can attach to various perf events based on the ELF section 94 + names. 95 + 96 + When processing a '.c' file, perf searches an installed LLVM to compile it 97 + into an object file first. Optional clang options can be passed via the 98 + '--clang-opt' command line option, e.g.: 99 + 100 + perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ 101 + -e tests/bpf-script-example.c 102 + 103 + Note: '--clang-opt' must be placed before '--event/-e'. 104 + 91 105 - a group of events surrounded by a pair of brace ("{event1,event2,...}"). 92 106 Each event is separated by commas and the group should be quoted to 93 107 prevent the shell interpretation. You also need to use --group on
+1
tools/perf/arch/arm/tests/dwarf-unwind.c
··· 3 3 #include "perf_regs.h" 4 4 #include "thread.h" 5 5 #include "map.h" 6 + #include "map_groups.h" 6 7 #include "event.h" 7 8 #include "debug.h" 8 9 #include "tests/tests.h"
+44 -54
tools/perf/arch/arm/util/cs-etm.c
··· 5 5 */ 6 6 7 7 #include <api/fs/fs.h> 8 + #include <linux/bits.h> 8 9 #include <linux/bitops.h> 9 10 #include <linux/compiler.h> 10 11 #include <linux/coresight-pmu.h> ··· 23 22 #include "../../util/thread_map.h" 24 23 #include "../../util/cs-etm.h" 25 24 25 + #include <errno.h> 26 26 #include <stdlib.h> 27 27 #include <sys/stat.h> 28 - 29 - #define ENABLE_SINK_MAX 128 30 - #define CS_BUS_DEVICE_PATH "/bus/coresight/devices/" 31 28 32 29 struct cs_etm_recording { 33 30 struct auxtrace_record itr; ··· 59 60 return 0; 60 61 } 61 62 63 + static int cs_etm_set_sink_attr(struct perf_pmu *pmu, 64 + struct perf_evsel *evsel) 65 + { 66 + char msg[BUFSIZ], path[PATH_MAX], *sink; 67 + struct perf_evsel_config_term *term; 68 + int ret = -EINVAL; 69 + u32 hash; 70 + 71 + if (evsel->attr.config2 & GENMASK(31, 0)) 72 + return 0; 73 + 74 + list_for_each_entry(term, &evsel->config_terms, list) { 75 + if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) 76 + continue; 77 + 78 + sink = term->val.drv_cfg; 79 + snprintf(path, PATH_MAX, "sinks/%s", sink); 80 + 81 + ret = perf_pmu__scan_file(pmu, path, "%x", &hash); 82 + if (ret != 1) { 83 + pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", 84 + sink, perf_evsel__name(evsel), errno, 85 + str_error_r(errno, msg, sizeof(msg))); 86 + return ret; 87 + } 88 + 89 + evsel->attr.config2 |= hash; 90 + return 0; 91 + } 92 + 93 + /* 94 + * No sink was provided on the command line - for _now_ treat 95 + * this as an error. 96 + */ 97 + return ret; 98 + } 99 + 62 100 static int cs_etm_recording_options(struct auxtrace_record *itr, 63 101 struct perf_evlist *evlist, 64 102 struct record_opts *opts) 65 103 { 104 + int ret; 66 105 struct cs_etm_recording *ptr = 67 106 container_of(itr, struct cs_etm_recording, itr); 68 107 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; ··· 128 91 /* no need to continue if at least one event of interest was found */ 129 92 if (!cs_etm_evsel) 130 93 return 0; 94 + 95 + ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel); 96 + if (ret) 97 + return ret; 131 98 132 99 if (opts->use_clockid) { 133 100 pr_err("Cannot use clockid (-k option) with %s\n", ··· 638 597 return &ptr->itr; 639 598 out: 640 599 return NULL; 641 - } 642 - 643 - static FILE *cs_device__open_file(const char *name) 644 - { 645 - struct stat st; 646 - char path[PATH_MAX]; 647 - const char *sysfs; 648 - 649 - sysfs = sysfs__mountpoint(); 650 - if (!sysfs) 651 - return NULL; 652 - 653 - snprintf(path, PATH_MAX, 654 - "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name); 655 - 656 - if (stat(path, &st) < 0) 657 - return NULL; 658 - 659 - return fopen(path, "w"); 660 - 661 - } 662 - 663 - static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) 664 - { 665 - va_list args; 666 - FILE *file; 667 - int ret = -EINVAL; 668 - 669 - va_start(args, fmt); 670 - file = cs_device__open_file(name); 671 - if (file) { 672 - ret = vfprintf(file, fmt, args); 673 - fclose(file); 674 - } 675 - va_end(args); 676 - return ret; 677 - } 678 - 679 - int cs_etm_set_drv_config(struct perf_evsel_config_term *term) 680 - { 681 - int ret; 682 - char enable_sink[ENABLE_SINK_MAX]; 683 - 684 - snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s", 685 - term->val.drv_cfg, "enable_sink"); 686 - 687 - ret = cs_device__print_file(enable_sink, "%d", 1); 688 - if (ret < 0) 689 - return ret; 690 - 691 - return 0; 692 600 }
-3
tools/perf/arch/arm/util/cs-etm.h
··· 7 7 #ifndef INCLUDE__PERF_CS_ETM_H__ 8 8 #define INCLUDE__PERF_CS_ETM_H__ 9 9 10 - #include "../../util/evsel.h" 11 - 12 10 struct auxtrace_record *cs_etm_record_init(int *err); 13 - int cs_etm_set_drv_config(struct perf_evsel_config_term *term); 14 11 15 12 #endif
+1 -2
tools/perf/arch/arm/util/pmu.c
··· 7 7 #include <string.h> 8 8 #include <linux/coresight-pmu.h> 9 9 #include <linux/perf_event.h> 10 + #include <linux/string.h> 10 11 11 - #include "cs-etm.h" 12 12 #include "arm-spe.h" 13 13 #include "../../util/pmu.h" 14 14 ··· 19 19 if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { 20 20 /* add ETM default config here */ 21 21 pmu->selectable = true; 22 - pmu->set_drv_config = cs_etm_set_drv_config; 23 22 #if defined(__aarch64__) 24 23 } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { 25 24 return arm_spe_pmu_default_config(pmu);
+1
tools/perf/arch/arm64/tests/dwarf-unwind.c
··· 3 3 #include "perf_regs.h" 4 4 #include "thread.h" 5 5 #include "map.h" 6 + #include "map_groups.h" 6 7 #include "event.h" 7 8 #include "debug.h" 8 9 #include "tests/tests.h"
+1
tools/perf/arch/powerpc/tests/dwarf-unwind.c
··· 3 3 #include "perf_regs.h" 4 4 #include "thread.h" 5 5 #include "map.h" 6 + #include "map_groups.h" 6 7 #include "event.h" 7 8 #include "debug.h" 8 9 #include "tests/tests.h"
+2
tools/perf/arch/powerpc/util/kvm-stat.c
··· 3 3 #include "util/kvm-stat.h" 4 4 #include "util/parse-events.h" 5 5 #include "util/debug.h" 6 + #include "util/evsel.h" 7 + #include "util/evlist.h" 6 8 7 9 #include "book3s_hv_exits.h" 8 10 #include "book3s_hcalls.h"
+3
tools/perf/arch/powerpc/util/skip-callchain-idx.c
··· 16 16 #include "util/thread.h" 17 17 #include "util/callchain.h" 18 18 #include "util/debug.h" 19 + #include "util/dso.h" 20 + #include "util/map.h" 21 + #include "util/symbol.h" 19 22 20 23 /* 21 24 * When saving the callchain on Power, the kernel conservatively saves
+1
tools/perf/arch/s390/util/kvm-stat.c
··· 11 11 12 12 #include <errno.h> 13 13 #include "../../util/kvm-stat.h" 14 + #include "../../util/evsel.h" 14 15 #include <asm/sie.h> 15 16 16 17 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
+1
tools/perf/arch/x86/tests/dwarf-unwind.c
··· 3 3 #include "perf_regs.h" 4 4 #include "thread.h" 5 5 #include "map.h" 6 + #include "map_groups.h" 6 7 #include "event.h" 7 8 #include "debug.h" 8 9 #include "tests/tests.h"
+1
tools/perf/arch/x86/util/kvm-stat.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <errno.h> 3 3 #include "../../util/kvm-stat.h" 4 + #include "../../util/evsel.h" 4 5 #include <asm/svm.h> 5 6 #include <asm/vmx.h> 6 7 #include <asm/kvm.h>
+1
tools/perf/builtin-annotate.c
··· 27 27 #include "util/thread.h" 28 28 #include "util/sort.h" 29 29 #include "util/hist.h" 30 + #include "util/map.h" 30 31 #include "util/session.h" 31 32 #include "util/tool.h" 32 33 #include "util/data.h"
+5 -4
tools/perf/builtin-c2c.c
··· 33 33 #include "ui/browsers/hists.h" 34 34 #include "thread.h" 35 35 #include "mem2node.h" 36 + #include "symbol.h" 36 37 37 38 struct c2c_hists { 38 39 struct hists hists; ··· 1970 1969 set_nodestr(c2c_he); 1971 1970 } 1972 1971 1973 - static int filter_cb(struct hist_entry *he) 1972 + static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) 1974 1973 { 1975 1974 struct c2c_hist_entry *c2c_he; 1976 1975 ··· 1987 1986 return 0; 1988 1987 } 1989 1988 1990 - static int resort_cl_cb(struct hist_entry *he) 1989 + static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) 1991 1990 { 1992 1991 struct c2c_hist_entry *c2c_he; 1993 1992 struct c2c_hists *c2c_hists; ··· 2074 2073 2075 2074 #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) 2076 2075 2077 - static int resort_hitm_cb(struct hist_entry *he) 2076 + static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) 2078 2077 { 2079 2078 struct c2c_hist_entry *c2c_he; 2080 2079 c2c_he = container_of(he, struct c2c_hist_entry, he); ··· 2096 2095 struct hist_entry *he; 2097 2096 2098 2097 he = rb_entry(next, struct hist_entry, rb_node); 2099 - ret = cb(he); 2098 + ret = cb(he, NULL); 2100 2099 if (ret) 2101 2100 break; 2102 2101 next = rb_next(&he->rb_node);
+2
tools/perf/builtin-inject.c
··· 12 12 #include "util/color.h" 13 13 #include "util/evlist.h" 14 14 #include "util/evsel.h" 15 + #include "util/map.h" 15 16 #include "util/session.h" 16 17 #include "util/tool.h" 17 18 #include "util/debug.h" ··· 20 19 #include "util/data.h" 21 20 #include "util/auxtrace.h" 22 21 #include "util/jit.h" 22 + #include "util/symbol.h" 23 23 #include "util/thread.h" 24 24 25 25 #include <subcmd/parse-options.h>
+1
tools/perf/builtin-kallsyms.c
··· 13 13 #include <subcmd/parse-options.h> 14 14 #include "debug.h" 15 15 #include "machine.h" 16 + #include "map.h" 16 17 #include "symbol.h" 17 18 18 19 static int __cmd_kallsyms(int argc, const char **argv)
+1
tools/perf/builtin-kmem.c
··· 6 6 #include "util/evsel.h" 7 7 #include "util/util.h" 8 8 #include "util/config.h" 9 + #include "util/map.h" 9 10 #include "util/symbol.h" 10 11 #include "util/thread.h" 11 12 #include "util/header.h"
+1
tools/perf/builtin-mem.c
··· 13 13 #include "util/data.h" 14 14 #include "util/mem-events.h" 15 15 #include "util/debug.h" 16 + #include "util/map.h" 16 17 #include "util/symbol.h" 17 18 18 19 #define MEM_OPERATION_LOAD 0x1
+27 -11
tools/perf/builtin-record.c
··· 23 23 #include "util/evlist.h" 24 24 #include "util/evsel.h" 25 25 #include "util/debug.h" 26 - #include "util/drv_configs.h" 27 26 #include "util/session.h" 28 27 #include "util/tool.h" 29 28 #include "util/symbol.h" ··· 38 39 #include "util/bpf-loader.h" 39 40 #include "util/trigger.h" 40 41 #include "util/perf-hooks.h" 42 + #include "util/cpu-set-sched.h" 41 43 #include "util/time-utils.h" 42 44 #include "util/units.h" 43 45 #include "util/bpf-event.h" ··· 82 82 bool timestamp_boundary; 83 83 struct switch_output switch_output; 84 84 unsigned long long samples; 85 + cpu_set_t affinity_mask; 85 86 }; 86 87 87 88 static volatile int auxtrace_record__snapshot_started; 88 89 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 89 90 static DEFINE_TRIGGER(switch_output_trigger); 91 + 92 + static const char *affinity_tags[PERF_AFFINITY_MAX] = { 93 + "SYS", "NODE", "CPU" 94 + }; 90 95 91 96 static bool switch_output_signal(struct record *rec) 92 97 { ··· 537 532 struct record_opts *opts = &rec->opts; 538 533 char msg[512]; 539 534 535 + if (opts->affinity != PERF_AFFINITY_SYS) 536 + cpu__setup_cpunode_map(); 537 + 540 538 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 541 539 opts->auxtrace_mmap_pages, 542 - opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) { 540 + opts->auxtrace_snapshot_mode, 541 + opts->nr_cblocks, opts->affinity) < 0) { 543 542 if (errno == EPERM) { 544 543 pr_err("Permission error mapping pages.\n" 545 544 "Consider increasing " ··· 576 567 struct perf_evlist *evlist = rec->evlist; 577 568 struct perf_session *session = rec->session; 578 569 struct record_opts *opts = &rec->opts; 579 - struct perf_evsel_config_term *err_term; 580 570 int rc = 0; 581 571 582 572 /* ··· 624 616 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 625 617 pos->filter, perf_evsel__name(pos), errno, 626 618 str_error_r(errno, msg, sizeof(msg))); 627 - rc = -1; 628 - goto out; 629 - } 630 - 631 - if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 632 - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 633 - err_term->val.drv_cfg, perf_evsel__name(pos), errno, 634 - str_error_r(errno, msg, sizeof(msg))); 635 619 rc = -1; 636 620 goto out; 637 621 } ··· 723 723 .type = PERF_RECORD_FINISHED_ROUND, 724 724 }; 725 725 726 + static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 727 + { 728 + if (rec->opts.affinity != PERF_AFFINITY_SYS && 729 + !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 730 + CPU_ZERO(&rec->affinity_mask); 731 + CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 732 + sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 733 + } 734 + } 735 + 726 736 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 727 737 bool overwrite) 728 738 { ··· 760 750 struct perf_mmap *map = &maps[i]; 761 751 762 752 if (map->base) { 753 + record__adjust_affinity(rec, map); 763 754 if (!record__aio_enabled(rec)) { 764 755 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 765 756 rc = -1; ··· 1998 1987 # undef REASON 1999 1988 #endif 2000 1989 1990 + CPU_ZERO(&rec->affinity_mask); 1991 + rec->opts.affinity = PERF_AFFINITY_SYS; 1992 + 2001 1993 rec->evlist = perf_evlist__new(); 2002 1994 if (rec->evlist == NULL) 2003 1995 return -ENOMEM; ··· 2163 2149 rec->opts.nr_cblocks = nr_cblocks_max; 2164 2150 if (verbose > 0) 2165 2151 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2152 + 2153 + pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2166 2154 2167 2155 err = __cmd_record(&record, argc, argv); 2168 2156 out:
+20 -2
tools/perf/builtin-report.c
··· 16 16 #include <linux/list.h> 17 17 #include <linux/rbtree.h> 18 18 #include <linux/err.h> 19 + #include "util/map.h" 19 20 #include "util/symbol.h" 20 21 #include "util/callchain.h" 21 22 #include "util/values.h" ··· 616 615 return ret; 617 616 } 618 617 618 + static int hists__resort_cb(struct hist_entry *he, void *arg) 619 + { 620 + struct report *rep = arg; 621 + struct symbol *sym = he->ms.sym; 622 + 623 + if (rep->symbol_ipc && sym && !sym->annotate2) { 624 + struct perf_evsel *evsel = hists_to_evsel(he->hists); 625 + 626 + symbol__annotate2(sym, he->ms.map, evsel, 627 + &annotation__default_options, NULL); 628 + } 629 + 630 + return 0; 631 + } 632 + 619 633 static void report__output_resort(struct report *rep) 620 634 { 621 635 struct ui_progress prog; ··· 638 622 639 623 ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); 640 624 641 - evlist__for_each_entry(rep->session->evlist, pos) 642 - perf_evsel__output_resort(pos, &prog); 625 + evlist__for_each_entry(rep->session->evlist, pos) { 626 + perf_evsel__output_resort_cb(pos, &prog, 627 + hists__resort_cb, rep); 628 + } 643 629 644 630 ui_progress__finish(); 645 631 }
+1
tools/perf/builtin-script.c
··· 10 10 #include "util/perf_regs.h" 11 11 #include "util/session.h" 12 12 #include "util/tool.h" 13 + #include "util/map.h" 13 14 #include "util/symbol.h" 14 15 #include "util/thread.h" 15 16 #include "util/trace-event.h"
-9
tools/perf/builtin-stat.c
··· 52 52 #include "util/evlist.h" 53 53 #include "util/evsel.h" 54 54 #include "util/debug.h" 55 - #include "util/drv_configs.h" 56 55 #include "util/color.h" 57 56 #include "util/stat.h" 58 57 #include "util/header.h" ··· 416 417 int status = 0; 417 418 const bool forks = (argc > 0); 418 419 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 419 - struct perf_evsel_config_term *err_term; 420 420 421 421 if (interval) { 422 422 ts.tv_sec = interval / USEC_PER_MSEC; ··· 509 511 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 510 512 counter->filter, perf_evsel__name(counter), errno, 511 513 str_error_r(errno, msg, sizeof(msg))); 512 - return -1; 513 - } 514 - 515 - if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 516 - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 517 - err_term->val.drv_cfg, perf_evsel__name(counter), errno, 518 - str_error_r(errno, msg, sizeof(msg))); 519 514 return -1; 520 515 } 521 516
+1 -13
tools/perf/builtin-top.c
··· 25 25 #include "util/bpf-event.h" 26 26 #include "util/config.h" 27 27 #include "util/color.h" 28 - #include "util/drv_configs.h" 29 28 #include "util/evlist.h" 30 29 #include "util/evsel.h" 31 30 #include "util/event.h" 32 31 #include "util/machine.h" 32 + #include "util/map.h" 33 33 #include "util/session.h" 34 34 #include "util/symbol.h" 35 35 #include "util/thread.h" ··· 1185 1185 1186 1186 static int __cmd_top(struct perf_top *top) 1187 1187 { 1188 - char msg[512]; 1189 - struct perf_evsel *pos; 1190 - struct perf_evsel_config_term *err_term; 1191 - struct perf_evlist *evlist = top->evlist; 1192 1188 struct record_opts *opts = &top->record_opts; 1193 1189 pthread_t thread, thread_process; 1194 1190 int ret; ··· 1234 1238 ret = perf_top__start_counters(top); 1235 1239 if (ret) 1236 1240 goto out_delete; 1237 - 1238 - ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term); 1239 - if (ret) { 1240 - pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 1241 - err_term->val.drv_cfg, perf_evsel__name(pos), errno, 1242 - str_error_r(errno, msg, sizeof(msg))); 1243 - goto out_delete; 1244 - } 1245 1241 1246 1242 top->session->evlist = top->evlist; 1247 1243 perf_session__set_id_hdr_size(top->session);
+2
tools/perf/builtin-trace.c
··· 29 29 #include "util/evlist.h" 30 30 #include <subcmd/exec-cmd.h> 31 31 #include "util/machine.h" 32 + #include "util/map.h" 33 + #include "util/symbol.h" 32 34 #include "util/path.h" 33 35 #include "util/session.h" 34 36 #include "util/thread.h"
+8
tools/perf/perf.h
··· 84 84 clockid_t clockid; 85 85 u64 clockid_res_ns; 86 86 int nr_cblocks; 87 + int affinity; 88 + }; 89 + 90 + enum perf_affinity { 91 + PERF_AFFINITY_SYS = 0, 92 + PERF_AFFINITY_NODE, 93 + PERF_AFFINITY_CPU, 94 + PERF_AFFINITY_MAX 87 95 }; 88 96 89 97 struct option;
+1 -1
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
··· 73 73 }, 74 74 { 75 75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", 76 - "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", 76 + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", 77 77 "MetricGroup": "Memory_Bound;Memory_Lat", 78 78 "MetricName": "Load_Miss_Real_Latency" 79 79 },
+1 -1
tools/perf/scripts/python/export-to-postgresql.py
··· 478 478 'branch_count,' 479 479 'call_id,' 480 480 'return_id,' 481 - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' 481 + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 482 482 'parent_call_path_id' 483 483 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') 484 484
+1 -1
tools/perf/scripts/python/export-to-sqlite.py
··· 320 320 'branch_count,' 321 321 'call_id,' 322 322 'return_id,' 323 - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' 323 + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 324 324 'parent_call_path_id' 325 325 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') 326 326
+2
tools/perf/tests/code-reading.c
··· 15 15 #include "thread_map.h" 16 16 #include "cpumap.h" 17 17 #include "machine.h" 18 + #include "map.h" 19 + #include "symbol.h" 18 20 #include "event.h" 19 21 #include "thread.h" 20 22
+1
tools/perf/tests/dwarf-unwind.c
··· 10 10 #include "../util/unwind.h" 11 11 #include "perf_regs.h" 12 12 #include "map.h" 13 + #include "symbol.h" 13 14 #include "thread.h" 14 15 #include "callchain.h" 15 16
+1
tools/perf/tests/hists_common.c
··· 2 2 #include <inttypes.h> 3 3 #include "perf.h" 4 4 #include "util/debug.h" 5 + #include "util/map.h" 5 6 #include "util/symbol.h" 6 7 #include "util/sort.h" 7 8 #include "util/evsel.h"
+1
tools/perf/tests/hists_cumulate.c
··· 2 2 #include "perf.h" 3 3 #include "util/debug.h" 4 4 #include "util/event.h" 5 + #include "util/map.h" 5 6 #include "util/symbol.h" 6 7 #include "util/sort.h" 7 8 #include "util/evsel.h"
+1
tools/perf/tests/hists_filter.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "perf.h" 3 3 #include "util/debug.h" 4 + #include "util/map.h" 4 5 #include "util/symbol.h" 5 6 #include "util/sort.h" 6 7 #include "util/evsel.h"
+1
tools/perf/tests/hists_output.c
··· 2 2 #include "perf.h" 3 3 #include "util/debug.h" 4 4 #include "util/event.h" 5 + #include "util/map.h" 5 6 #include "util/symbol.h" 6 7 #include "util/sort.h" 7 8 #include "util/evsel.h"
+1
tools/perf/tests/mmap-thread-lookup.c
··· 11 11 #include "tests.h" 12 12 #include "machine.h" 13 13 #include "thread_map.h" 14 + #include "map.h" 14 15 #include "symbol.h" 15 16 #include "thread.h" 16 17 #include "util.h"
+2
tools/perf/tests/pmu.c
··· 4 4 #include "util.h" 5 5 #include "tests.h" 6 6 #include <errno.h> 7 + #include <stdio.h> 7 8 #include <linux/kernel.h> 9 + #include <linux/limits.h> 8 10 9 11 /* Simulated format definitions. */ 10 12 static struct test_format {
+2
tools/perf/tests/sample-parsing.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <stdbool.h> 3 3 #include <inttypes.h> 4 + #include <linux/bitops.h> 4 5 #include <linux/kernel.h> 5 6 #include <linux/types.h> 6 7 8 + #include "branch.h" 7 9 #include "util.h" 8 10 #include "event.h" 9 11 #include "evsel.h"
+1
tools/perf/tests/sdt.c
··· 3 3 #include <stdio.h> 4 4 #include <sys/epoll.h> 5 5 #include <util/evlist.h> 6 + #include <util/symbol.h> 6 7 #include <linux/filter.h> 7 8 #include "tests.h" 8 9 #include "debug.h"
+1
tools/perf/ui/browsers/annotate.c
··· 7 7 #include "../../util/annotate.h" 8 8 #include "../../util/hist.h" 9 9 #include "../../util/sort.h" 10 + #include "../../util/map.h" 10 11 #include "../../util/symbol.h" 11 12 #include "../../util/evsel.h" 12 13 #include "../../util/evlist.h"
+3
tools/perf/ui/browsers/hists.c
··· 8 8 #include <linux/rbtree.h> 9 9 #include <sys/ttydefaults.h> 10 10 11 + #include "../../util/callchain.h" 11 12 #include "../../util/evsel.h" 12 13 #include "../../util/evlist.h" 13 14 #include "../../util/hist.h" 15 + #include "../../util/map.h" 16 + #include "../../util/symbol.h" 14 17 #include "../../util/pstack.h" 15 18 #include "../../util/sort.h" 16 19 #include "../../util/util.h"
+2
tools/perf/ui/gtk/annotate.c
··· 4 4 #include "util/debug.h" 5 5 #include "util/annotate.h" 6 6 #include "util/evsel.h" 7 + #include "util/map.h" 8 + #include "util/symbol.h" 7 9 #include "ui/helpline.h" 8 10 #include <inttypes.h> 9 11 #include <signal.h>
+1
tools/perf/ui/gtk/hists.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "../evlist.h" 3 3 #include "../cache.h" 4 + #include "../callchain.h" 4 5 #include "../evsel.h" 5 6 #include "../sort.h" 6 7 #include "../hist.h"
+1
tools/perf/ui/hist.c
··· 3 3 #include <math.h> 4 4 #include <linux/compiler.h> 5 5 6 + #include "../util/callchain.h" 6 7 #include "../util/hist.h" 7 8 #include "../util/util.h" 8 9 #include "../util/sort.h"
+4
tools/perf/ui/stdio/hist.c
··· 2 2 #include <stdio.h> 3 3 #include <linux/string.h> 4 4 5 + #include "../../util/callchain.h" 5 6 #include "../../util/util.h" 6 7 #include "../../util/hist.h" 8 + #include "../../util/map.h" 9 + #include "../../util/map_groups.h" 10 + #include "../../util/symbol.h" 7 11 #include "../../util/sort.h" 8 12 #include "../../util/evsel.h" 9 13 #include "../../util/srcline.h"
-1
tools/perf/util/Build
··· 107 107 libperf-y += help-unknown-cmd.o 108 108 libperf-y += mem-events.o 109 109 libperf-y += vsprintf.o 110 - libperf-y += drv_configs.o 111 110 libperf-y += units.o 112 111 libperf-y += time-utils.o 113 112 libperf-y += expr-bison.o
+1
tools/perf/util/annotate.c
··· 17 17 #include "color.h" 18 18 #include "config.h" 19 19 #include "cache.h" 20 + #include "map.h" 20 21 #include "symbol.h" 21 22 #include "units.h" 22 23 #include "debug.h"
+23 -4
tools/perf/util/auxtrace.c
··· 27 27 #include <linux/bitops.h> 28 28 #include <linux/log2.h> 29 29 #include <linux/string.h> 30 + #include <linux/time64.h> 30 31 31 32 #include <sys/param.h> 32 33 #include <stdlib.h> ··· 42 41 #include "pmu.h" 43 42 #include "evsel.h" 44 43 #include "cpumap.h" 44 + #include "symbol.h" 45 45 #include "thread_map.h" 46 46 #include "asm/bug.h" 47 47 #include "auxtrace.h" ··· 859 857 860 858 void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, 861 859 int code, int cpu, pid_t pid, pid_t tid, u64 ip, 862 - const char *msg) 860 + const char *msg, u64 timestamp) 863 861 { 864 862 size_t size; 865 863 ··· 871 869 auxtrace_error->cpu = cpu; 872 870 auxtrace_error->pid = pid; 873 871 auxtrace_error->tid = tid; 872 + auxtrace_error->fmt = 1; 874 873 auxtrace_error->ip = ip; 874 + auxtrace_error->time = timestamp; 875 875 strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); 876 876 877 877 size = (void *)auxtrace_error->msg - (void *)auxtrace_error + ··· 1163 1159 size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) 1164 1160 { 1165 1161 struct auxtrace_error_event *e = &event->auxtrace_error; 1162 + unsigned long long nsecs = e->time; 1163 + const char *msg = e->msg; 1166 1164 int ret; 1167 1165 1168 1166 ret = fprintf(fp, " %s error type %u", 1169 1167 auxtrace_error_name(e->type), e->type); 1168 + 1169 + if (e->fmt && nsecs) { 1170 + unsigned long secs = nsecs / NSEC_PER_SEC; 1171 + 1172 + nsecs -= secs * NSEC_PER_SEC; 1173 + ret += fprintf(fp, " time %lu.%09llu", secs, nsecs); 1174 + } else { 1175 + ret += fprintf(fp, " time 0"); 1176 + } 1177 + 1178 + if (!e->fmt) 1179 + msg = (const char *)&e->time; 1180 + 1170 1181 ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", 1171 - e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); 1182 + e->cpu, e->pid, e->tid, e->ip, e->code, msg); 1172 1183 return ret; 1173 1184 } 1174 1185 ··· 1297 1278 } 1298 1279 1299 1280 /* padding must be written by fn() e.g. record__process_auxtrace() */ 1300 - padding = size & 7; 1281 + padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); 1301 1282 if (padding) 1302 - padding = 8 - padding; 1283 + padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; 1303 1284 1304 1285 memset(&ev, 0, sizeof(ev)); 1305 1286 ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+4 -1
tools/perf/util/auxtrace.h
··· 40 40 struct auxtrace_info_event; 41 41 struct events_stats; 42 42 43 + /* Auxtrace records must have the same alignment as perf event records */ 44 + #define PERF_AUXTRACE_RECORD_ALIGNMENT 8 45 + 43 46 enum auxtrace_type { 44 47 PERF_AUXTRACE_UNKNOWN, 45 48 PERF_AUXTRACE_INTEL_PT, ··· 519 516 520 517 void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, 521 518 int code, int cpu, pid_t pid, pid_t tid, u64 ip, 522 - const char *msg); 519 + const char *msg, u64 timestamp); 523 520 524 521 int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, 525 522 struct perf_tool *tool,
+1
tools/perf/util/bpf-loader.c
··· 15 15 #include <errno.h> 16 16 #include "perf.h" 17 17 #include "debug.h" 18 + #include "evlist.h" 18 19 #include "bpf-loader.h" 19 20 #include "bpf-prologue.h" 20 21 #include "probe-event.h"
+3 -4
tools/perf/util/bpf-loader.h
··· 8 8 9 9 #include <linux/compiler.h> 10 10 #include <linux/err.h> 11 - #include <string.h> 12 11 #include <bpf/libbpf.h> 13 - #include "probe-event.h" 14 - #include "evlist.h" 15 - #include "debug.h" 16 12 17 13 enum bpf_loader_errno { 18 14 __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, ··· 40 44 }; 41 45 42 46 struct perf_evsel; 47 + struct perf_evlist; 43 48 struct bpf_object; 44 49 struct parse_events_term; 45 50 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" ··· 84 87 int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); 85 88 #else 86 89 #include <errno.h> 90 + #include <string.h> 91 + #include "debug.h" 87 92 88 93 static inline struct bpf_object * 89 94 bpf__prepare_load(const char *filename __maybe_unused,
+1
tools/perf/util/build-id.c
··· 16 16 #include "build-id.h" 17 17 #include "event.h" 18 18 #include "namespaces.h" 19 + #include "map.h" 19 20 #include "symbol.h" 20 21 #include "thread.h" 21 22 #include <linux/kernel.h>
+17
tools/perf/util/callchain.c
··· 23 23 #include "util.h" 24 24 #include "sort.h" 25 25 #include "machine.h" 26 + #include "map.h" 26 27 #include "callchain.h" 27 28 #include "branch.h" 29 + #include "symbol.h" 28 30 29 31 #define CALLCHAIN_PARAM_DEFAULT \ 30 32 .mode = CHAIN_GRAPH_ABS, \ ··· 1578 1576 } 1579 1577 1580 1578 return rc; 1579 + } 1580 + 1581 + /* 1582 + * Initialize a cursor before adding entries inside, but keep 1583 + * the previously allocated entries as a cache. 1584 + */ 1585 + void callchain_cursor_reset(struct callchain_cursor *cursor) 1586 + { 1587 + struct callchain_cursor_node *node; 1588 + 1589 + cursor->nr = 0; 1590 + cursor->last = &cursor->first; 1591 + 1592 + for (node = cursor->first; node != NULL; node = node->next) 1593 + map__zput(node->map); 1581 1594 }
+4 -16
tools/perf/util/callchain.h
··· 5 5 #include <linux/list.h> 6 6 #include <linux/rbtree.h> 7 7 #include "event.h" 8 - #include "map.h" 9 - #include "symbol.h" 8 + #include "map_symbol.h" 10 9 #include "branch.h" 10 + 11 + struct map; 11 12 12 13 #define HELP_PAD "\t\t\t\t" 13 14 ··· 188 187 int callchain_merge(struct callchain_cursor *cursor, 189 188 struct callchain_root *dst, struct callchain_root *src); 190 189 191 - /* 192 - * Initialize a cursor before adding entries inside, but keep 193 - * the previously allocated entries as a cache. 194 - */ 195 - static inline void callchain_cursor_reset(struct callchain_cursor *cursor) 196 - { 197 - struct callchain_cursor_node *node; 198 - 199 - cursor->nr = 0; 200 - cursor->last = &cursor->first; 201 - 202 - for (node = cursor->first; node != NULL; node = node->next) 203 - map__zput(node->map); 204 - } 190 + void callchain_cursor_reset(struct callchain_cursor *cursor); 205 191 206 192 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, 207 193 struct map *map, struct symbol *sym,
+1
tools/perf/util/config.c
··· 13 13 #include <sys/param.h> 14 14 #include "util.h" 15 15 #include "cache.h" 16 + #include "callchain.h" 16 17 #include <subcmd/exec-cmd.h> 17 18 #include "util/event.h" /* proc_map_timeout */ 18 19 #include "util/hist.h" /* perf_hist_config */
+50
tools/perf/util/cpu-set-sched.h
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + // Definitions taken from glibc for use with older systems, same licensing. 3 + #ifndef _CPU_SET_SCHED_PERF_H 4 + #define _CPU_SET_SCHED_PERF_H 5 + 6 + #include <features.h> 7 + #include <sched.h> 8 + 9 + #ifndef CPU_EQUAL 10 + #ifndef __CPU_EQUAL_S 11 + #if __GNUC_PREREQ (2, 91) 12 + # define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ 13 + (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0) 14 + #else 15 + # define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ 16 + (__extension__ \ 17 + ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \ 18 + const __cpu_mask *__arr2 = (cpusetp2)->__bits; \ 19 + size_t __imax = (setsize) / sizeof (__cpu_mask); \ 20 + size_t __i; \ 21 + for (__i = 0; __i < __imax; ++__i) \ 22 + if (__arr1[__i] != __arr2[__i]) \ 23 + break; \ 24 + __i == __imax; })) 25 + #endif 26 + #endif // __CPU_EQUAL_S 27 + 28 + #define CPU_EQUAL(cpusetp1, cpusetp2) \ 29 + __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2) 30 + #endif // CPU_EQUAL 31 + 32 + #ifndef CPU_OR 33 + #ifndef __CPU_OP_S 34 + #define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \ 35 + (__extension__ \ 36 + ({ cpu_set_t *__dest = (destset); \ 37 + const __cpu_mask *__arr1 = (srcset1)->__bits; \ 38 + const __cpu_mask *__arr2 = (srcset2)->__bits; \ 39 + size_t __imax = (setsize) / sizeof (__cpu_mask); \ 40 + size_t __i; \ 41 + for (__i = 0; __i < __imax; ++__i) \ 42 + ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \ 43 + __dest; })) 44 + #endif // __CPU_OP_S 45 + 46 + #define CPU_OR(destset, srcset1, srcset2) \ 47 + __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |) 48 + #endif // CPU_OR 49 + 50 + #endif // _CPU_SET_SCHED_PERF_H
+10
tools/perf/util/cpumap.c
··· 730 730 buf[size - 1] = '\0'; 731 731 return ptr - buf; 732 732 } 733 + 734 + const struct cpu_map *cpu_map__online(void) /* thread unsafe */ 735 + { 736 + static const struct cpu_map *online = NULL; 737 + 738 + if (!online) 739 + online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ 740 + 741 + return online; 742 + }
+1
tools/perf/util/cpumap.h
··· 29 29 int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 30 30 int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 31 31 int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 32 + const struct cpu_map *cpu_map__online(void); /* thread unsafe */ 32 33 33 34 struct cpu_map *cpu_map__get(struct cpu_map *map); 34 35 void cpu_map__put(struct cpu_map *map);
+32 -9
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
··· 290 290 decoder->packet_buffer[i].instr_count = 0; 291 291 decoder->packet_buffer[i].last_instr_taken_branch = false; 292 292 decoder->packet_buffer[i].last_instr_size = 0; 293 + decoder->packet_buffer[i].last_instr_type = 0; 294 + decoder->packet_buffer[i].last_instr_subtype = 0; 295 + decoder->packet_buffer[i].last_instr_cond = 0; 296 + decoder->packet_buffer[i].flags = 0; 297 + decoder->packet_buffer[i].exception_number = UINT32_MAX; 298 + decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; 293 299 decoder->packet_buffer[i].cpu = INT_MIN; 294 300 } 295 301 } ··· 306 300 enum cs_etm_sample_type sample_type) 307 301 { 308 302 u32 et = 0; 309 - struct int_node *inode = NULL; 303 + int cpu; 310 304 311 305 if (decoder->packet_count >= MAX_BUFFER - 1) 312 306 return OCSD_RESP_FATAL_SYS_ERR; 313 307 314 - /* Search the RB tree for the cpu associated with this traceID */ 315 - inode = intlist__find(traceid_list, trace_chan_id); 316 - if (!inode) 308 + if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 317 309 return OCSD_RESP_FATAL_SYS_ERR; 318 310 319 311 et = decoder->tail; ··· 321 317 322 318 decoder->packet_buffer[et].sample_type = sample_type; 323 319 decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; 324 - decoder->packet_buffer[et].cpu = *((int *)inode->priv); 320 + decoder->packet_buffer[et].cpu = cpu; 325 321 decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; 326 322 decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; 327 323 decoder->packet_buffer[et].instr_count = 0; 328 324 decoder->packet_buffer[et].last_instr_taken_branch = false; 329 325 decoder->packet_buffer[et].last_instr_size = 0; 326 + decoder->packet_buffer[et].last_instr_type = 0; 327 + decoder->packet_buffer[et].last_instr_subtype = 0; 328 + decoder->packet_buffer[et].last_instr_cond = 0; 329 + decoder->packet_buffer[et].flags = 0; 330 + decoder->packet_buffer[et].exception_number = UINT32_MAX; 331 + decoder->packet_buffer[et].trace_chan_id = trace_chan_id; 330 332 331 333 if (decoder->packet_count == MAX_BUFFER - 1) 332 334 return OCSD_RESP_WAIT; ··· 376 366 packet->start_addr = elem->st_addr; 377 367 packet->end_addr = elem->en_addr; 378 368 packet->instr_count = elem->num_instr_range; 369 + packet->last_instr_type = elem->last_i_type; 370 + packet->last_instr_subtype = elem->last_i_subtype; 371 + packet->last_instr_cond = elem->last_instr_cond; 379 372 380 373 switch (elem->last_i_type) { 381 374 case OCSD_INSTR_BR: ··· 408 395 409 396 static ocsd_datapath_resp_t 410 397 cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, 398 + const ocsd_generic_trace_elem *elem, 411 399 const uint8_t trace_chan_id) 412 - { 413 - return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 414 - CS_ETM_EXCEPTION); 400 + { int ret = 0; 401 + struct cs_etm_packet *packet; 402 + 403 + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 404 + CS_ETM_EXCEPTION); 405 + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) 406 + return ret; 407 + 408 + packet = &decoder->packet_buffer[decoder->tail]; 409 + packet->exception_number = elem->exception_number; 410 + 411 + return ret; 415 412 } 416 413 417 414 static ocsd_datapath_resp_t ··· 455 432 trace_chan_id); 456 433 break; 457 434 case OCSD_GEN_TRC_ELEM_EXCEPTION: 458 - resp = cs_etm_decoder__buffer_exception(decoder, 435 + resp = cs_etm_decoder__buffer_exception(decoder, elem, 459 436 trace_chan_id); 460 437 break; 461 438 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+6
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
··· 43 43 u64 start_addr; 44 44 u64 end_addr; 45 45 u32 instr_count; 46 + u32 last_instr_type; 47 + u32 last_instr_subtype; 48 + u32 flags; 49 + u32 exception_number; 50 + u8 last_instr_cond; 46 51 u8 last_instr_taken_branch; 47 52 u8 last_instr_size; 53 + u8 trace_chan_id; 48 54 int cpu; 49 55 }; 50 56
+386 -8
tools/perf/util/cs-etm.c
··· 12 12 #include <linux/log2.h> 13 13 #include <linux/types.h> 14 14 15 + #include <opencsd/ocsd_if_types.h> 15 16 #include <stdlib.h> 16 17 17 18 #include "auxtrace.h" ··· 95 94 return CS_ETM_PROTO_PTM; 96 95 97 96 return CS_ETM_PROTO_ETMV3; 97 + } 98 + 99 + static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 100 + { 101 + struct int_node *inode; 102 + u64 *metadata; 103 + 104 + inode = intlist__find(traceid_list, trace_chan_id); 105 + if (!inode) 106 + return -EINVAL; 107 + 108 + metadata = inode->priv; 109 + *magic = metadata[CS_ETM_MAGIC]; 110 + return 0; 111 + } 112 + 113 + int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 114 + { 115 + struct int_node *inode; 116 + u64 *metadata; 117 + 118 + inode = intlist__find(traceid_list, trace_chan_id); 119 + if (!inode) 120 + return -EINVAL; 121 + 122 + metadata = inode->priv; 123 + *cpu = (int)metadata[CS_ETM_CPU]; 124 + return 0; 98 125 } 99 126 100 127 static void cs_etm__packet_dump(const char *pkt_string) ··· 280 251 cs_etm__free_events(session); 281 252 session->auxtrace = NULL; 282 253 283 - /* First remove all traceID/CPU# nodes for the RB tree */ 254 + /* First remove all traceID/metadata nodes for the RB tree */ 284 255 intlist__for_each_entry_safe(inode, tmp, traceid_list) 285 256 intlist__remove(traceid_list, inode); 286 257 /* Then the RB tree itself */ ··· 748 719 sample.stream_id = etmq->etm->instructions_id; 749 720 sample.period = period; 750 721 sample.cpu = etmq->packet->cpu; 751 - sample.flags = 0; 722 + sample.flags = etmq->prev_packet->flags; 752 723 sample.insn_len = 1; 753 724 sample.cpumode = event->sample.header.misc; 754 725 ··· 807 778 sample.stream_id = etmq->etm->branches_id; 808 779 sample.period = 1; 809 780 sample.cpu = etmq->packet->cpu; 810 - sample.flags = 0; 781 + sample.flags = etmq->prev_packet->flags; 811 782 sample.cpumode = event->sample.header.misc; 812 783 813 784 /* ··· 1136 1107 return 0; 1137 1108 } 1138 1109 1110 + static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 1111 + struct cs_etm_packet *packet, 1112 + u64 end_addr) 1113 + { 1114 + u16 instr16; 1115 + u32 instr32; 1116 + u64 addr; 1117 + 1118 + switch (packet->isa) { 1119 + case CS_ETM_ISA_T32: 1120 + /* 1121 + * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1122 + * 1123 + * b'15 b'8 1124 + * +-----------------+--------+ 1125 + * | 1 1 0 1 1 1 1 1 | imm8 | 1126 + * +-----------------+--------+ 1127 + * 1128 + * According to the specifiction, it only defines SVC for T32 1129 + * with 16 bits instruction and has no definition for 32bits; 1130 + * so below only read 2 bytes as instruction size for T32. 1131 + */ 1132 + addr = end_addr - 2; 1133 + cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); 1134 + if ((instr16 & 0xFF00) == 0xDF00) 1135 + return true; 1136 + 1137 + break; 1138 + case CS_ETM_ISA_A32: 1139 + /* 1140 + * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1141 + * 1142 + * b'31 b'28 b'27 b'24 1143 + * +---------+---------+-------------------------+ 1144 + * | !1111 | 1 1 1 1 | imm24 | 1145 + * +---------+---------+-------------------------+ 1146 + */ 1147 + addr = end_addr - 4; 1148 + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1149 + if ((instr32 & 0x0F000000) == 0x0F000000 && 1150 + (instr32 & 0xF0000000) != 0xF0000000) 1151 + return true; 1152 + 1153 + break; 1154 + case CS_ETM_ISA_A64: 1155 + /* 1156 + * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1157 + * 1158 + * b'31 b'21 b'4 b'0 1159 + * +-----------------------+---------+-----------+ 1160 + * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1161 + * +-----------------------+---------+-----------+ 1162 + */ 1163 + addr = end_addr - 4; 1164 + cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1165 + if ((instr32 & 0xFFE0001F) == 0xd4000001) 1166 + return true; 1167 + 1168 + break; 1169 + case CS_ETM_ISA_UNKNOWN: 1170 + default: 1171 + break; 1172 + } 1173 + 1174 + return false; 1175 + } 1176 + 1177 + static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) 1178 + { 1179 + struct cs_etm_packet *packet = etmq->packet; 1180 + struct cs_etm_packet *prev_packet = etmq->prev_packet; 1181 + 1182 + if (magic == __perf_cs_etmv3_magic) 1183 + if (packet->exception_number == CS_ETMV3_EXC_SVC) 1184 + return true; 1185 + 1186 + /* 1187 + * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1188 + * HVC cases; need to check if it's SVC instruction based on 1189 + * packet address. 1190 + */ 1191 + if (magic == __perf_cs_etmv4_magic) { 1192 + if (packet->exception_number == CS_ETMV4_EXC_CALL && 1193 + cs_etm__is_svc_instr(etmq, prev_packet, 1194 + prev_packet->end_addr)) 1195 + return true; 1196 + } 1197 + 1198 + return false; 1199 + } 1200 + 1201 + static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) 1202 + { 1203 + struct cs_etm_packet *packet = etmq->packet; 1204 + 1205 + if (magic == __perf_cs_etmv3_magic) 1206 + if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1207 + packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1208 + packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1209 + packet->exception_number == CS_ETMV3_EXC_IRQ || 1210 + packet->exception_number == CS_ETMV3_EXC_FIQ) 1211 + return true; 1212 + 1213 + if (magic == __perf_cs_etmv4_magic) 1214 + if (packet->exception_number == CS_ETMV4_EXC_RESET || 1215 + packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1216 + packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1217 + packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1218 + packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1219 + packet->exception_number == CS_ETMV4_EXC_IRQ || 1220 + packet->exception_number == CS_ETMV4_EXC_FIQ) 1221 + return true; 1222 + 1223 + return false; 1224 + } 1225 + 1226 + static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) 1227 + { 1228 + struct cs_etm_packet *packet = etmq->packet; 1229 + struct cs_etm_packet *prev_packet = etmq->prev_packet; 1230 + 1231 + if (magic == __perf_cs_etmv3_magic) 1232 + if (packet->exception_number == CS_ETMV3_EXC_SMC || 1233 + packet->exception_number == CS_ETMV3_EXC_HYP || 1234 + packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1235 + packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1236 + packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1237 + packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1238 + packet->exception_number == CS_ETMV3_EXC_GENERIC) 1239 + return true; 1240 + 1241 + if (magic == __perf_cs_etmv4_magic) { 1242 + if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1243 + packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1244 + packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1245 + packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1246 + return true; 1247 + 1248 + /* 1249 + * For CS_ETMV4_EXC_CALL, except SVC other instructions 1250 + * (SMC, HVC) are taken as sync exceptions. 1251 + */ 1252 + if (packet->exception_number == CS_ETMV4_EXC_CALL && 1253 + !cs_etm__is_svc_instr(etmq, prev_packet, 1254 + prev_packet->end_addr)) 1255 + return true; 1256 + 1257 + /* 1258 + * ETMv4 has 5 bits for exception number; if the numbers 1259 + * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1260 + * they are implementation defined exceptions. 1261 + * 1262 + * For this case, simply take it as sync exception. 1263 + */ 1264 + if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1265 + packet->exception_number <= CS_ETMV4_EXC_END) 1266 + return true; 1267 + } 1268 + 1269 + return false; 1270 + } 1271 + 1272 + static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) 1273 + { 1274 + struct cs_etm_packet *packet = etmq->packet; 1275 + struct cs_etm_packet *prev_packet = etmq->prev_packet; 1276 + u64 magic; 1277 + int ret; 1278 + 1279 + switch (packet->sample_type) { 1280 + case CS_ETM_RANGE: 1281 + /* 1282 + * Immediate branch instruction without neither link nor 1283 + * return flag, it's normal branch instruction within 1284 + * the function. 1285 + */ 1286 + if (packet->last_instr_type == OCSD_INSTR_BR && 1287 + packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1288 + packet->flags = PERF_IP_FLAG_BRANCH; 1289 + 1290 + if (packet->last_instr_cond) 1291 + packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1292 + } 1293 + 1294 + /* 1295 + * Immediate branch instruction with link (e.g. BL), this is 1296 + * branch instruction for function call. 1297 + */ 1298 + if (packet->last_instr_type == OCSD_INSTR_BR && 1299 + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1300 + packet->flags = PERF_IP_FLAG_BRANCH | 1301 + PERF_IP_FLAG_CALL; 1302 + 1303 + /* 1304 + * Indirect branch instruction with link (e.g. BLR), this is 1305 + * branch instruction for function call. 1306 + */ 1307 + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1308 + packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1309 + packet->flags = PERF_IP_FLAG_BRANCH | 1310 + PERF_IP_FLAG_CALL; 1311 + 1312 + /* 1313 + * Indirect branch instruction with subtype of 1314 + * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1315 + * function return for A32/T32. 1316 + */ 1317 + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1318 + packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1319 + packet->flags = PERF_IP_FLAG_BRANCH | 1320 + PERF_IP_FLAG_RETURN; 1321 + 1322 + /* 1323 + * Indirect branch instruction without link (e.g. BR), usually 1324 + * this is used for function return, especially for functions 1325 + * within dynamic link lib. 1326 + */ 1327 + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1328 + packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1329 + packet->flags = PERF_IP_FLAG_BRANCH | 1330 + PERF_IP_FLAG_RETURN; 1331 + 1332 + /* Return instruction for function return. */ 1333 + if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1334 + packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1335 + packet->flags = PERF_IP_FLAG_BRANCH | 1336 + PERF_IP_FLAG_RETURN; 1337 + 1338 + /* 1339 + * Decoder might insert a discontinuity in the middle of 1340 + * instruction packets, fixup prev_packet with flag 1341 + * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1342 + */ 1343 + if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1344 + prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1345 + PERF_IP_FLAG_TRACE_BEGIN; 1346 + 1347 + /* 1348 + * If the previous packet is an exception return packet 1349 + * and the return address just follows SVC instuction, 1350 + * it needs to calibrate the previous packet sample flags 1351 + * as PERF_IP_FLAG_SYSCALLRET. 1352 + */ 1353 + if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1354 + PERF_IP_FLAG_RETURN | 1355 + PERF_IP_FLAG_INTERRUPT) && 1356 + cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) 1357 + prev_packet->flags = PERF_IP_FLAG_BRANCH | 1358 + PERF_IP_FLAG_RETURN | 1359 + PERF_IP_FLAG_SYSCALLRET; 1360 + break; 1361 + case CS_ETM_DISCONTINUITY: 1362 + /* 1363 + * The trace is discontinuous, if the previous packet is 1364 + * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1365 + * for previous packet. 1366 + */ 1367 + if (prev_packet->sample_type == CS_ETM_RANGE) 1368 + prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1369 + PERF_IP_FLAG_TRACE_END; 1370 + break; 1371 + case CS_ETM_EXCEPTION: 1372 + ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1373 + if (ret) 1374 + return ret; 1375 + 1376 + /* The exception is for system call. */ 1377 + if (cs_etm__is_syscall(etmq, magic)) 1378 + packet->flags = PERF_IP_FLAG_BRANCH | 1379 + PERF_IP_FLAG_CALL | 1380 + PERF_IP_FLAG_SYSCALLRET; 1381 + /* 1382 + * The exceptions are triggered by external signals from bus, 1383 + * interrupt controller, debug module, PE reset or halt. 1384 + */ 1385 + else if (cs_etm__is_async_exception(etmq, magic)) 1386 + packet->flags = PERF_IP_FLAG_BRANCH | 1387 + PERF_IP_FLAG_CALL | 1388 + PERF_IP_FLAG_ASYNC | 1389 + PERF_IP_FLAG_INTERRUPT; 1390 + /* 1391 + * Otherwise, exception is caused by trap, instruction & 1392 + * data fault, or alignment errors. 1393 + */ 1394 + else if (cs_etm__is_sync_exception(etmq, magic)) 1395 + packet->flags = PERF_IP_FLAG_BRANCH | 1396 + PERF_IP_FLAG_CALL | 1397 + PERF_IP_FLAG_INTERRUPT; 1398 + 1399 + /* 1400 + * When the exception packet is inserted, since exception 1401 + * packet is not used standalone for generating samples 1402 + * and it's affiliation to the previous instruction range 1403 + * packet; so set previous range packet flags to tell perf 1404 + * it is an exception taken branch. 1405 + */ 1406 + if (prev_packet->sample_type == CS_ETM_RANGE) 1407 + prev_packet->flags = packet->flags; 1408 + break; 1409 + case CS_ETM_EXCEPTION_RET: 1410 + /* 1411 + * When the exception return packet is inserted, since 1412 + * exception return packet is not used standalone for 1413 + * generating samples and it's affiliation to the previous 1414 + * instruction range packet; so set previous range packet 1415 + * flags to tell perf it is an exception return branch. 1416 + * 1417 + * The exception return can be for either system call or 1418 + * other exception types; unfortunately the packet doesn't 1419 + * contain exception type related info so we cannot decide 1420 + * the exception type purely based on exception return packet. 1421 + * If we record the exception number from exception packet and 1422 + * reuse it for excpetion return packet, this is not reliable 1423 + * due the trace can be discontinuity or the interrupt can 1424 + * be nested, thus the recorded exception number cannot be 1425 + * used for exception return packet for these two cases. 1426 + * 1427 + * For exception return packet, we only need to distinguish the 1428 + * packet is for system call or for other types. Thus the 1429 + * decision can be deferred when receive the next packet which 1430 + * contains the return address, based on the return address we 1431 + * can read out the previous instruction and check if it's a 1432 + * system call instruction and then calibrate the sample flag 1433 + * as needed. 1434 + */ 1435 + if (prev_packet->sample_type == CS_ETM_RANGE) 1436 + prev_packet->flags = PERF_IP_FLAG_BRANCH | 1437 + PERF_IP_FLAG_RETURN | 1438 + PERF_IP_FLAG_INTERRUPT; 1439 + break; 1440 + case CS_ETM_EMPTY: 1441 + default: 1442 + break; 1443 + } 1444 + 1445 + return 0; 1446 + } 1447 + 1139 1448 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1140 1449 { 1141 1450 struct cs_etm_auxtrace *etm = etmq->etm; ··· 1523 1156 * Stop processing this chunk on 1524 1157 * end of data or error 1525 1158 */ 1159 + break; 1160 + 1161 + /* 1162 + * Since packet addresses are swapped in packet 1163 + * handling within below switch() statements, 1164 + * thus setting sample flags must be called 1165 + * prior to switch() statement to use address 1166 + * information before packets swapping. 1167 + */ 1168 + err = cs_etm__set_sample_flags(etmq); 1169 + if (err < 0) 1526 1170 break; 1527 1171 1528 1172 switch (etmq->packet->sample_type) { ··· 1792 1414 0xffffffff); 1793 1415 1794 1416 /* 1795 - * Create an RB tree for traceID-CPU# tuple. Since the conversion has 1796 - * to be made for each packet that gets decoded, optimizing access in 1797 - * anything other than a sequential array is worth doing. 1417 + * Create an RB tree for traceID-metadata tuple. Since the conversion 1418 + * has to be made for each packet that gets decoded, optimizing access 1419 + * in anything other than a sequential array is worth doing. 1798 1420 */ 1799 1421 traceid_list = intlist__new(NULL); 1800 1422 if (!traceid_list) { ··· 1860 1482 err = -EINVAL; 1861 1483 goto err_free_metadata; 1862 1484 } 1863 - /* All good, associate the traceID with the CPU# */ 1864 - inode->priv = &metadata[j][CS_ETM_CPU]; 1485 + /* All good, associate the traceID with the metadata pointer */ 1486 + inode->priv = metadata[j]; 1865 1487 } 1866 1488 1867 1489 /*
+52 -1
tools/perf/util/cs-etm.h
··· 53 53 CS_ETMV4_PRIV_MAX, 54 54 }; 55 55 56 - /* RB tree for quick conversion between traceID and CPUs */ 56 + /* 57 + * ETMv3 exception encoding number: 58 + * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) 59 + * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. 60 + */ 61 + enum { 62 + CS_ETMV3_EXC_NONE = 0, 63 + CS_ETMV3_EXC_DEBUG_HALT = 1, 64 + CS_ETMV3_EXC_SMC = 2, 65 + CS_ETMV3_EXC_HYP = 3, 66 + CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4, 67 + CS_ETMV3_EXC_JAZELLE_THUMBEE = 5, 68 + CS_ETMV3_EXC_PE_RESET = 8, 69 + CS_ETMV3_EXC_UNDEFINED_INSTR = 9, 70 + CS_ETMV3_EXC_SVC = 10, 71 + CS_ETMV3_EXC_PREFETCH_ABORT = 11, 72 + CS_ETMV3_EXC_DATA_FAULT = 12, 73 + CS_ETMV3_EXC_GENERIC = 13, 74 + CS_ETMV3_EXC_IRQ = 14, 75 + CS_ETMV3_EXC_FIQ = 15, 76 + }; 77 + 78 + /* 79 + * ETMv4 exception encoding number: 80 + * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D) 81 + * table 6-12 Possible values for the TYPE field in an Exception instruction 82 + * trace packet, for ARMv7-A/R and ARMv8-A/R PEs. 83 + */ 84 + enum { 85 + CS_ETMV4_EXC_RESET = 0, 86 + CS_ETMV4_EXC_DEBUG_HALT = 1, 87 + CS_ETMV4_EXC_CALL = 2, 88 + CS_ETMV4_EXC_TRAP = 3, 89 + CS_ETMV4_EXC_SYSTEM_ERROR = 4, 90 + CS_ETMV4_EXC_INST_DEBUG = 6, 91 + CS_ETMV4_EXC_DATA_DEBUG = 7, 92 + CS_ETMV4_EXC_ALIGNMENT = 10, 93 + CS_ETMV4_EXC_INST_FAULT = 11, 94 + CS_ETMV4_EXC_DATA_FAULT = 12, 95 + CS_ETMV4_EXC_IRQ = 14, 96 + CS_ETMV4_EXC_FIQ = 15, 97 + CS_ETMV4_EXC_END = 31, 98 + }; 99 + 100 + /* RB tree for quick conversion between traceID and metadata pointers */ 57 101 struct intlist *traceid_list; 58 102 59 103 #define KiB(x) ((x) * 1024) ··· 113 69 #ifdef HAVE_CSTRACE_SUPPORT 114 70 int cs_etm__process_auxtrace_info(union perf_event *event, 115 71 struct perf_session *session); 72 + int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); 116 73 #else 117 74 static inline int 118 75 cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, 119 76 struct perf_session *session __maybe_unused) 77 + { 78 + return -1; 79 + } 80 + 81 + static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, 82 + int *cpu __maybe_unused) 120 83 { 121 84 return -1; 122 85 }
+1
tools/perf/util/db-export.c
··· 20 20 #include "thread.h" 21 21 #include "comm.h" 22 22 #include "symbol.h" 23 + #include "map.h" 23 24 #include "event.h" 24 25 #include "util.h" 25 26 #include "thread-stack.h"
-78
tools/perf/util/drv_configs.c
··· 1 - /* 2 - * drv_configs.h: Interface to apply PMU specific configuration 3 - * Copyright (c) 2016-2018, Linaro Ltd. 4 - * 5 - * This program is free software; you can redistribute it and/or modify it 6 - * under the terms and conditions of the GNU General Public License, 7 - * version 2, as published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope it will be useful, but WITHOUT 10 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 - * more details. 13 - * 14 - */ 15 - 16 - #include "drv_configs.h" 17 - #include "evlist.h" 18 - #include "evsel.h" 19 - #include "pmu.h" 20 - #include <errno.h> 21 - 22 - static int 23 - perf_evsel__apply_drv_configs(struct perf_evsel *evsel, 24 - struct perf_evsel_config_term **err_term) 25 - { 26 - bool found = false; 27 - int err = 0; 28 - struct perf_evsel_config_term *term; 29 - struct perf_pmu *pmu = NULL; 30 - 31 - while ((pmu = perf_pmu__scan(pmu)) != NULL) 32 - if (pmu->type == evsel->attr.type) { 33 - found = true; 34 - break; 35 - } 36 - 37 - list_for_each_entry(term, &evsel->config_terms, list) { 38 - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) 39 - continue; 40 - 41 - /* 42 - * We have a configuration term, report an error if we 43 - * can't find the PMU or if the PMU driver doesn't support 44 - * cmd line driver configuration. 45 - */ 46 - if (!found || !pmu->set_drv_config) { 47 - err = -EINVAL; 48 - *err_term = term; 49 - break; 50 - } 51 - 52 - err = pmu->set_drv_config(term); 53 - if (err) { 54 - *err_term = term; 55 - break; 56 - } 57 - } 58 - 59 - return err; 60 - } 61 - 62 - int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, 63 - struct perf_evsel **err_evsel, 64 - struct perf_evsel_config_term **err_term) 65 - { 66 - struct perf_evsel *evsel; 67 - int err = 0; 68 - 69 - evlist__for_each_entry(evlist, evsel) { 70 - err = perf_evsel__apply_drv_configs(evsel, err_term); 71 - if (err) { 72 - *err_evsel = evsel; 73 - break; 74 - } 75 - } 76 - 77 - return err; 78 - }
-26
tools/perf/util/drv_configs.h
··· 1 - /* 2 - * drv_configs.h: Interface to apply PMU specific configuration 3 - * Copyright (c) 2016-2018, Linaro Ltd. 4 - * 5 - * This program is free software; you can redistribute it and/or modify it 6 - * under the terms and conditions of the GNU General Public License, 7 - * version 2, as published by the Free Software Foundation. 8 - * 9 - * This program is distributed in the hope it will be useful, but WITHOUT 10 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 - * more details. 13 - * 14 - */ 15 - 16 - #ifndef __PERF_DRV_CONFIGS_H 17 - #define __PERF_DRV_CONFIGS_H 18 - 19 - #include "drv_configs.h" 20 - #include "evlist.h" 21 - #include "evsel.h" 22 - 23 - int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, 24 - struct perf_evsel **err_evsel, 25 - struct perf_evsel_config_term **term); 26 - #endif
+1
tools/perf/util/dso.c
··· 12 12 #include "compress.h" 13 13 #include "namespaces.h" 14 14 #include "path.h" 15 + #include "map.h" 15 16 #include "symbol.h" 16 17 #include "srcline.h" 17 18 #include "dso.h"
+2
tools/perf/util/event.c
··· 21 21 #include "thread.h" 22 22 #include "thread_map.h" 23 23 #include "sane_ctype.h" 24 + #include "map.h" 25 + #include "symbol.h" 24 26 #include "symbol/kallsyms.h" 25 27 #include "asm/bug.h" 26 28 #include "stat.h"
+2 -1
tools/perf/util/event.h
··· 532 532 u32 cpu; 533 533 u32 pid; 534 534 u32 tid; 535 - u32 reserved__; /* For alignment */ 535 + u32 fmt; 536 536 u64 ip; 537 + u64 time; 537 538 char msg[MAX_AUXTRACE_ERROR_MSG]; 538 539 }; 539 540
+3 -3
tools/perf/util/evlist.c
··· 1022 1022 */ 1023 1023 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1024 1024 unsigned int auxtrace_pages, 1025 - bool auxtrace_overwrite, int nr_cblocks) 1025 + bool auxtrace_overwrite, int nr_cblocks, int affinity) 1026 1026 { 1027 1027 struct perf_evsel *evsel; 1028 1028 const struct cpu_map *cpus = evlist->cpus; ··· 1032 1032 * Its value is decided by evsel's write_backward. 1033 1033 * So &mp should not be passed through const pointer. 1034 1034 */ 1035 - struct mmap_params mp = { .nr_cblocks = nr_cblocks }; 1035 + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; 1036 1036 1037 1037 if (!evlist->mmap) 1038 1038 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); ··· 1064 1064 1065 1065 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1066 1066 { 1067 - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); 1067 + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); 1068 1068 } 1069 1069 1070 1070 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
+1 -1
tools/perf/util/evlist.h
··· 165 165 166 166 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 167 167 unsigned int auxtrace_pages, 168 - bool auxtrace_overwrite, int nr_cblocks); 168 + bool auxtrace_overwrite, int nr_cblocks, int affinity); 169 169 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); 170 170 void perf_evlist__munmap(struct perf_evlist *evlist); 171 171
+1 -1
tools/perf/util/evsel.h
··· 8 8 #include <linux/perf_event.h> 9 9 #include <linux/types.h> 10 10 #include "xyarray.h" 11 - #include "symbol.h" 11 + #include "symbol_conf.h" 12 12 #include "cpumap.h" 13 13 #include "counts.h" 14 14
+15 -6
tools/perf/util/hist.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + #include "callchain.h" 2 3 #include "util.h" 3 4 #include "build-id.h" 4 5 #include "hist.h" ··· 12 11 #include "evsel.h" 13 12 #include "annotate.h" 14 13 #include "srcline.h" 14 + #include "symbol.h" 15 15 #include "thread.h" 16 16 #include "ui/progress.h" 17 17 #include <errno.h> ··· 1721 1719 } 1722 1720 1723 1721 static void output_resort(struct hists *hists, struct ui_progress *prog, 1724 - bool use_callchain, hists__resort_cb_t cb) 1722 + bool use_callchain, hists__resort_cb_t cb, 1723 + void *cb_arg) 1725 1724 { 1726 1725 struct rb_root_cached *root; 1727 1726 struct rb_node *next; ··· 1761 1758 n = rb_entry(next, struct hist_entry, rb_node_in); 1762 1759 next = rb_next(&n->rb_node_in); 1763 1760 1764 - if (cb && cb(n)) 1761 + if (cb && cb(n, cb_arg)) 1765 1762 continue; 1766 1763 1767 1764 __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); ··· 1775 1772 } 1776 1773 } 1777 1774 1778 - void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) 1775 + void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, 1776 + hists__resort_cb_t cb, void *cb_arg) 1779 1777 { 1780 1778 bool use_callchain; 1781 1779 ··· 1787 1783 1788 1784 use_callchain |= symbol_conf.show_branchflag_count; 1789 1785 1790 - output_resort(evsel__hists(evsel), prog, use_callchain, NULL); 1786 + output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); 1787 + } 1788 + 1789 + void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) 1790 + { 1791 + return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); 1791 1792 } 1792 1793 1793 1794 void hists__output_resort(struct hists *hists, struct ui_progress *prog) 1794 1795 { 1795 - output_resort(hists, prog, symbol_conf.use_callchain, NULL); 1796 + output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL); 1796 1797 } 1797 1798 1798 1799 void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, 1799 1800 hists__resort_cb_t cb) 1800 1801 { 1801 - output_resort(hists, prog, symbol_conf.use_callchain, cb); 1802 + output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL); 1802 1803 } 1803 1804 1804 1805 static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+7 -2
tools/perf/util/hist.h
··· 2 2 #ifndef __PERF_HIST_H 3 3 #define __PERF_HIST_H 4 4 5 + #include <linux/rbtree.h> 5 6 #include <linux/types.h> 6 7 #include <pthread.h> 7 - #include "callchain.h" 8 8 #include "evsel.h" 9 9 #include "header.h" 10 10 #include "color.h" ··· 13 13 struct hist_entry; 14 14 struct hist_entry_ops; 15 15 struct addr_location; 16 + struct map_symbol; 17 + struct mem_info; 18 + struct branch_info; 16 19 struct symbol; 17 20 18 21 enum hist_filter { ··· 163 160 struct perf_hpp_fmt *fmt, int printed); 164 161 void hist_entry__delete(struct hist_entry *he); 165 162 166 - typedef int (*hists__resort_cb_t)(struct hist_entry *he); 163 + typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); 167 164 165 + void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, 166 + hists__resort_cb_t cb, void *cb_arg); 168 167 void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); 169 168 void hists__output_resort(struct hists *hists, struct ui_progress *prog); 170 169 void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+4 -2
tools/perf/util/intel-bts.c
··· 27 27 #include "evsel.h" 28 28 #include "evlist.h" 29 29 #include "machine.h" 30 + #include "map.h" 31 + #include "symbol.h" 30 32 #include "session.h" 31 33 #include "util.h" 32 34 #include "thread.h" ··· 144 142 145 143 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 146 144 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, 147 - sample->tid, 0, "Lost trace data"); 145 + sample->tid, 0, "Lost trace data", sample->time); 148 146 149 147 err = perf_session__deliver_synth_event(bts->session, &event, NULL); 150 148 if (err) ··· 374 372 375 373 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 376 374 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, 377 - "Failed to get instruction"); 375 + "Failed to get instruction", 0); 378 376 379 377 err = perf_session__deliver_synth_event(bts->session, &event, NULL); 380 378 if (err)
+35 -4
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 26 26 27 27 #include "../cache.h" 28 28 #include "../util.h" 29 + #include "../auxtrace.h" 29 30 30 31 #include "intel-pt-insn-decoder.h" 31 32 #include "intel-pt-pkt-decoder.h" ··· 868 867 869 868 ret = intel_pt_get_packet(decoder->buf, decoder->len, 870 869 &decoder->packet); 871 - if (ret == INTEL_PT_NEED_MORE_BYTES && 870 + if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && 872 871 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { 873 872 ret = intel_pt_get_split_packet(decoder); 874 873 if (ret < 0) ··· 1395 1394 { 1396 1395 intel_pt_log("ERROR: Buffer overflow\n"); 1397 1396 intel_pt_clear_tx_flags(decoder); 1398 - decoder->cbr = 0; 1399 1397 decoder->timestamp_insn_cnt = 0; 1400 1398 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1401 1399 decoder->overflow = true; ··· 2575 2575 } 2576 2576 } 2577 2577 2578 + #define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) 2579 + 2580 + /** 2581 + * adj_for_padding - adjust overlap to account for padding. 2582 + * @buf_b: second buffer 2583 + * @buf_a: first buffer 2584 + * @len_a: size of first buffer 2585 + * 2586 + * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap 2587 + * accordingly. 2588 + * 2589 + * Return: A pointer into @buf_b from where non-overlapped data starts 2590 + */ 2591 + static unsigned char *adj_for_padding(unsigned char *buf_b, 2592 + unsigned char *buf_a, size_t len_a) 2593 + { 2594 + unsigned char *p = buf_b - MAX_PADDING; 2595 + unsigned char *q = buf_a + len_a - MAX_PADDING; 2596 + int i; 2597 + 2598 + for (i = MAX_PADDING; i; i--, p++, q++) { 2599 + if (*p != *q) 2600 + break; 2601 + } 2602 + 2603 + return p; 2604 + } 2605 + 2578 2606 /** 2579 2607 * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data 2580 2608 * using TSC. ··· 2653 2625 2654 2626 /* Same TSC, so buffers are consecutive */ 2655 2627 if (!cmp && rem_b >= rem_a) { 2628 + unsigned char *start; 2629 + 2656 2630 *consecutive = true; 2657 - return buf_b + len_b - (rem_b - rem_a); 2631 + start = buf_b + len_b - (rem_b - rem_a); 2632 + return adj_for_padding(start, buf_a, len_a); 2658 2633 } 2659 2634 if (cmp < 0) 2660 2635 return buf_b; /* tsc_a < tsc_b => no overlap */ ··· 2720 2689 found = memmem(buf_a, len_a, buf_b, len_a); 2721 2690 if (found) { 2722 2691 *consecutive = true; 2723 - return buf_b + len_a; 2692 + return adj_for_padding(buf_b + len_a, buf_a, len_a); 2724 2693 } 2725 2694 2726 2695 /* Try again at next PSB in buffer 'a' */
+16 -7
tools/perf/util/intel-pt.c
··· 1411 1411 } 1412 1412 1413 1413 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1414 - pid_t pid, pid_t tid, u64 ip) 1414 + pid_t pid, pid_t tid, u64 ip, u64 timestamp) 1415 1415 { 1416 1416 union perf_event event; 1417 1417 char msg[MAX_AUXTRACE_ERROR_MSG]; ··· 1420 1420 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 1421 1421 1422 1422 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 1423 - code, cpu, pid, tid, ip, msg); 1423 + code, cpu, pid, tid, ip, msg, timestamp); 1424 1424 1425 1425 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 1426 1426 if (err) ··· 1428 1428 err); 1429 1429 1430 1430 return err; 1431 + } 1432 + 1433 + static int intel_ptq_synth_error(struct intel_pt_queue *ptq, 1434 + const struct intel_pt_state *state) 1435 + { 1436 + struct intel_pt *pt = ptq->pt; 1437 + u64 tm = ptq->timestamp; 1438 + 1439 + tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); 1440 + 1441 + return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, 1442 + ptq->tid, state->from_ip, tm); 1431 1443 } 1432 1444 1433 1445 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) ··· 1688 1676 intel_pt_next_tid(pt, ptq); 1689 1677 } 1690 1678 if (pt->synth_opts.errors) { 1691 - err = intel_pt_synth_error(pt, state->err, 1692 - ptq->cpu, ptq->pid, 1693 - ptq->tid, 1694 - state->from_ip); 1679 + err = intel_ptq_synth_error(ptq, state); 1695 1680 if (err) 1696 1681 return err; 1697 1682 } ··· 1813 1804 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 1814 1805 { 1815 1806 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 1816 - sample->pid, sample->tid, 0); 1807 + sample->pid, sample->tid, 0, sample->time); 1817 1808 } 1818 1809 1819 1810 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+4 -3
tools/perf/util/kvm-stat.h
··· 3 3 #define __PERF_KVM_STAT_H 4 4 5 5 #include "../perf.h" 6 - #include "evsel.h" 7 - #include "evlist.h" 8 - #include "session.h" 9 6 #include "tool.h" 10 7 #include "stat.h" 8 + 9 + struct perf_evsel; 10 + struct perf_evlist; 11 + struct perf_session; 11 12 12 13 struct event_key { 13 14 #define INVALID_KEY (~0ULL)
+1
tools/perf/util/machine.c
··· 10 10 #include "hist.h" 11 11 #include "machine.h" 12 12 #include "map.h" 13 + #include "symbol.h" 13 14 #include "sort.h" 14 15 #include "strlist.h" 15 16 #include "thread.h"
+1 -1
tools/perf/util/machine.h
··· 4 4 5 5 #include <sys/types.h> 6 6 #include <linux/rbtree.h> 7 - #include "map.h" 7 + #include "map_groups.h" 8 8 #include "dso.h" 9 9 #include "event.h" 10 10 #include "rwsem.h"
+6
tools/perf/util/map.c
··· 557 557 refcount_set(&mg->refcnt, 1); 558 558 } 559 559 560 + void map_groups__insert(struct map_groups *mg, struct map *map) 561 + { 562 + maps__insert(&mg->maps, map); 563 + map->groups = mg; 564 + } 565 + 560 566 static void __maps__purge(struct maps *maps) 561 567 { 562 568 struct rb_root *root = &maps->entries;
+2 -98
tools/perf/util/map.h
··· 6 6 #include <linux/compiler.h> 7 7 #include <linux/list.h> 8 8 #include <linux/rbtree.h> 9 - #include <pthread.h> 10 9 #include <stdio.h> 11 10 #include <string.h> 12 11 #include <stdbool.h> 13 12 #include <linux/types.h> 14 - #include "rwsem.h" 15 13 16 14 struct dso; 17 15 struct ip_callchain; ··· 46 48 refcount_t refcnt; 47 49 }; 48 50 49 - #define KMAP_NAME_LEN 256 50 - 51 - struct kmap { 52 - struct ref_reloc_sym *ref_reloc_sym; 53 - struct map_groups *kmaps; 54 - char name[KMAP_NAME_LEN]; 55 - }; 56 - 57 - struct maps { 58 - struct rb_root entries; 59 - struct rb_root names; 60 - struct rw_semaphore lock; 61 - }; 62 - 63 - struct map_groups { 64 - struct maps maps; 65 - struct machine *machine; 66 - refcount_t refcnt; 67 - }; 68 - 69 - struct map_groups *map_groups__new(struct machine *machine); 70 - void map_groups__delete(struct map_groups *mg); 71 - bool map_groups__empty(struct map_groups *mg); 72 - 73 - static inline struct map_groups *map_groups__get(struct map_groups *mg) 74 - { 75 - if (mg) 76 - refcount_inc(&mg->refcnt); 77 - return mg; 78 - } 79 - 80 - void map_groups__put(struct map_groups *mg); 51 + struct kmap; 81 52 82 53 struct kmap *__map__kmap(struct map *map); 83 54 struct kmap *map__kmap(struct map *map); ··· 141 174 int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, 142 175 FILE *fp); 143 176 144 - struct srccode_state { 145 - char *srcfile; 146 - unsigned line; 147 - }; 148 - 149 - static inline void srccode_state_init(struct srccode_state *state) 150 - { 151 - state->srcfile = NULL; 152 - state->line = 0; 153 - } 154 - 155 - void srccode_state_free(struct srccode_state *state); 177 + struct srccode_state; 156 178 157 179 int map__fprintf_srccode(struct map *map, u64 addr, 158 180 FILE *fp, struct srccode_state *state); ··· 154 198 155 199 void map__reloc_vmlinux(struct map *map); 156 200 157 - void maps__insert(struct maps *maps, struct map *map); 158 - void maps__remove(struct maps *maps, struct map *map); 159 - struct map *maps__find(struct maps *maps, u64 addr); 160 - struct map *maps__first(struct maps *maps); 161 - struct map *map__next(struct map *map); 162 - struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, 163 - struct map **mapp); 164 - void map_groups__init(struct map_groups *mg, struct machine *machine); 165 - void map_groups__exit(struct map_groups *mg); 166 - int map_groups__clone(struct thread *thread, 167 - struct map_groups *parent); 168 - size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); 169 - 170 201 int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, 171 202 u64 addr); 172 - 173 - static inline void map_groups__insert(struct map_groups *mg, struct map *map) 174 - { 175 - maps__insert(&mg->maps, map); 176 - map->groups = mg; 177 - } 178 - 179 - static inline void map_groups__remove(struct map_groups *mg, struct map *map) 180 - { 181 - maps__remove(&mg->maps, map); 182 - } 183 - 184 - static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) 185 - { 186 - return maps__find(&mg->maps, addr); 187 - } 188 - 189 - struct map *map_groups__first(struct map_groups *mg); 190 - 191 - static inline struct map *map_groups__next(struct map *map) 192 - { 193 - return map__next(map); 194 - } 195 - 196 - struct symbol *map_groups__find_symbol(struct map_groups *mg, 197 - u64 addr, struct map **mapp); 198 - 199 - struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, 200 - const char *name, 201 - struct map **mapp); 202 - 203 - struct addr_map_symbol; 204 - 205 - int map_groups__find_ams(struct addr_map_symbol *ams); 206 - 207 - int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, 208 - FILE *fp); 209 - 210 - struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); 211 203 212 204 bool __map__is_kernel(const struct map *map); 213 205 bool __map__is_extra_kernel_map(const struct map *map);
+91
tools/perf/util/map_groups.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __PERF_MAP_GROUPS_H 3 + #define __PERF_MAP_GROUPS_H 4 + 5 + #include <linux/refcount.h> 6 + #include <linux/rbtree.h> 7 + #include <stdio.h> 8 + #include <stdbool.h> 9 + #include <linux/types.h> 10 + #include "rwsem.h" 11 + 12 + struct ref_reloc_sym; 13 + struct machine; 14 + struct map; 15 + struct thread; 16 + 17 + struct maps { 18 + struct rb_root entries; 19 + struct rb_root names; 20 + struct rw_semaphore lock; 21 + }; 22 + 23 + void maps__insert(struct maps *maps, struct map *map); 24 + void maps__remove(struct maps *maps, struct map *map); 25 + struct map *maps__find(struct maps *maps, u64 addr); 26 + struct map *maps__first(struct maps *maps); 27 + struct map *map__next(struct map *map); 28 + struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); 29 + 30 + struct map_groups { 31 + struct maps maps; 32 + struct machine *machine; 33 + refcount_t refcnt; 34 + }; 35 + 36 + #define KMAP_NAME_LEN 256 37 + 38 + struct kmap { 39 + struct ref_reloc_sym *ref_reloc_sym; 40 + struct map_groups *kmaps; 41 + char name[KMAP_NAME_LEN]; 42 + }; 43 + 44 + struct map_groups *map_groups__new(struct machine *machine); 45 + void map_groups__delete(struct map_groups *mg); 46 + bool map_groups__empty(struct map_groups *mg); 47 + 48 + static inline struct map_groups *map_groups__get(struct map_groups *mg) 49 + { 50 + if (mg) 51 + refcount_inc(&mg->refcnt); 52 + return mg; 53 + } 54 + 55 + void map_groups__put(struct map_groups *mg); 56 + void map_groups__init(struct map_groups *mg, struct machine *machine); 57 + void map_groups__exit(struct map_groups *mg); 58 + int map_groups__clone(struct thread *thread, struct map_groups *parent); 59 + size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); 60 + 61 + void map_groups__insert(struct map_groups *mg, struct map *map); 62 + 63 + static inline void map_groups__remove(struct map_groups *mg, struct map *map) 64 + { 65 + maps__remove(&mg->maps, map); 66 + } 67 + 68 + static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) 69 + { 70 + return maps__find(&mg->maps, addr); 71 + } 72 + 73 + struct map *map_groups__first(struct map_groups *mg); 74 + 75 + static inline struct map *map_groups__next(struct map *map) 76 + { 77 + return map__next(map); 78 + } 79 + 80 + struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp); 81 + struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp); 82 + 83 + struct addr_map_symbol; 84 + 85 + int map_groups__find_ams(struct addr_map_symbol *ams); 86 + 87 + int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); 88 + 89 + struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); 90 + 91 + #endif // __PERF_MAP_GROUPS_H
+22
tools/perf/util/map_symbol.h
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #ifndef __PERF_MAP_SYMBOL 3 + #define __PERF_MAP_SYMBOL 1 4 + 5 + #include <linux/types.h> 6 + 7 + struct map; 8 + struct symbol; 9 + 10 + struct map_symbol { 11 + struct map *map; 12 + struct symbol *sym; 13 + }; 14 + 15 + struct addr_map_symbol { 16 + struct map *map; 17 + struct symbol *sym; 18 + u64 addr; 19 + u64 al_addr; 20 + u64 phys_addr; 21 + }; 22 + #endif // __PERF_MAP_SYMBOL
+101 -4
tools/perf/util/mmap.c
··· 10 10 #include <sys/mman.h> 11 11 #include <inttypes.h> 12 12 #include <asm/bug.h> 13 + #ifdef HAVE_LIBNUMA_SUPPORT 14 + #include <numaif.h> 15 + #endif 13 16 #include "debug.h" 14 17 #include "event.h" 15 18 #include "mmap.h" ··· 157 154 } 158 155 159 156 #ifdef HAVE_AIO_SUPPORT 157 + 158 + #ifdef HAVE_LIBNUMA_SUPPORT 159 + static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 160 + { 161 + map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, 162 + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); 163 + if (map->aio.data[idx] == MAP_FAILED) { 164 + map->aio.data[idx] = NULL; 165 + return -1; 166 + } 167 + 168 + return 0; 169 + } 170 + 171 + static void perf_mmap__aio_free(struct perf_mmap *map, int idx) 172 + { 173 + if (map->aio.data[idx]) { 174 + munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); 175 + map->aio.data[idx] = NULL; 176 + } 177 + } 178 + 179 + static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) 180 + { 181 + void *data; 182 + size_t mmap_len; 183 + unsigned long node_mask; 184 + 185 + if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { 186 + data = map->aio.data[idx]; 187 + mmap_len = perf_mmap__mmap_len(map); 188 + node_mask = 1UL << cpu__get_node(cpu); 189 + if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { 190 + pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", 191 + data, data + mmap_len, cpu__get_node(cpu)); 192 + return -1; 193 + } 194 + } 195 + 196 + return 0; 197 + } 198 + #else 199 + static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 200 + { 201 + map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); 202 + if (map->aio.data[idx] == NULL) 203 + return -1; 204 + 205 + return 0; 206 + } 207 + 208 + static void perf_mmap__aio_free(struct perf_mmap *map, int idx) 209 + { 210 + zfree(&(map->aio.data[idx])); 211 + } 212 + 213 + static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, 214 + int cpu __maybe_unused, int affinity __maybe_unused) 215 + { 216 + return 0; 217 + } 218 + #endif 219 + 160 220 static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) 161 221 { 162 - int delta_max, i, prio; 222 + int delta_max, i, prio, ret; 163 223 164 224 map->aio.nr_cblocks = mp->nr_cblocks; 165 225 if (map->aio.nr_cblocks) { ··· 243 177 } 244 178 delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); 245 179 for (i = 0; i < map->aio.nr_cblocks; ++i) { 246 - map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); 247 - if (!map->aio.data[i]) { 180 + ret = perf_mmap__aio_alloc(map, i); 181 + if (ret == -1) { 248 182 pr_debug2("failed to allocate data buffer area, error %m"); 249 183 return -1; 250 184 } 185 + ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); 186 + if (ret == -1) 187 + return -1; 251 188 /* 252 189 * Use cblock.aio_fildes value different from -1 253 190 * to denote started aio write operation on the ··· 279 210 int i; 280 211 281 212 for (i = 0; i < map->aio.nr_cblocks; ++i) 282 - zfree(&map->aio.data[i]); 213 + perf_mmap__aio_free(map, i); 283 214 if (map->aio.data) 284 215 zfree(&map->aio.data); 285 216 zfree(&map->aio.cblocks); ··· 383 314 auxtrace_mmap__munmap(&map->auxtrace_mmap); 384 315 } 385 316 317 + static void build_node_mask(int node, cpu_set_t *mask) 318 + { 319 + int c, cpu, nr_cpus; 320 + const struct cpu_map *cpu_map = NULL; 321 + 322 + cpu_map = cpu_map__online(); 323 + if (!cpu_map) 324 + return; 325 + 326 + nr_cpus = cpu_map__nr(cpu_map); 327 + for (c = 0; c < nr_cpus; c++) { 328 + cpu = cpu_map->map[c]; /* map c index to online cpu index */ 329 + if (cpu__get_node(cpu) == node) 330 + CPU_SET(cpu, mask); 331 + } 332 + } 333 + 334 + static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) 335 + { 336 + CPU_ZERO(&map->affinity_mask); 337 + if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) 338 + build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); 339 + else if (mp->affinity == PERF_AFFINITY_CPU) 340 + CPU_SET(map->cpu, &map->affinity_mask); 341 + } 342 + 386 343 int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) 387 344 { 388 345 /* ··· 437 342 } 438 343 map->fd = fd; 439 344 map->cpu = cpu; 345 + 346 + perf_mmap__setup_affinity_mask(map, mp); 440 347 441 348 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 442 349 &mp->auxtrace_mp, map->base, fd))
+2 -1
tools/perf/util/mmap.h
··· 38 38 int nr_cblocks; 39 39 } aio; 40 40 #endif 41 + cpu_set_t affinity_mask; 41 42 }; 42 43 43 44 /* ··· 70 69 }; 71 70 72 71 struct mmap_params { 73 - int prot, mask, nr_cblocks; 72 + int prot, mask, nr_cblocks, affinity; 74 73 struct auxtrace_mmap_params auxtrace_mp; 75 74 }; 76 75
-2
tools/perf/util/pmu.c
··· 29 29 struct list_head list; 30 30 }; 31 31 32 - #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" 33 - 34 32 int perf_pmu_parse(struct list_head *list, char *name); 35 33 extern FILE *perf_pmu_in; 36 34
+3 -2
tools/perf/util/pmu.h
··· 6 6 #include <linux/compiler.h> 7 7 #include <linux/perf_event.h> 8 8 #include <stdbool.h> 9 - #include "evsel.h" 10 9 #include "parse-events.h" 10 + 11 + struct perf_evsel_config_term; 11 12 12 13 enum { 13 14 PERF_PMU_FORMAT_VALUE_CONFIG, ··· 17 16 }; 18 17 19 18 #define PERF_PMU_FORMAT_BITS 64 19 + #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" 20 20 21 21 struct perf_event_attr; 22 22 ··· 31 29 struct list_head format; /* HEAD struct perf_pmu_format -> list */ 32 30 struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ 33 31 struct list_head list; /* ELEM */ 34 - int (*set_drv_config) (struct perf_evsel_config_term *term); 35 32 }; 36 33 37 34 struct perf_pmu_info {
+2
tools/perf/util/probe-event.c
··· 41 41 #include "debug.h" 42 42 #include "cache.h" 43 43 #include "color.h" 44 + #include "map.h" 45 + #include "map_groups.h" 44 46 #include "symbol.h" 45 47 #include "thread.h" 46 48 #include <api/fs/fs.h>
+4 -3
tools/perf/util/s390-cpumsf.c
··· 819 819 } 820 820 821 821 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 822 - pid_t pid, pid_t tid, u64 ip) 822 + pid_t pid, pid_t tid, u64 ip, u64 timestamp) 823 823 { 824 824 char msg[MAX_AUXTRACE_ERROR_MSG]; 825 825 union perf_event event; ··· 827 827 828 828 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 829 829 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 830 - code, cpu, pid, tid, ip, msg); 830 + code, cpu, pid, tid, ip, msg, timestamp); 831 831 832 832 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 833 833 if (err) ··· 839 839 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 840 840 { 841 841 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 842 - sample->pid, sample->tid, 0); 842 + sample->pid, sample->tid, 0, 843 + sample->time); 843 844 } 844 845 845 846 static int
+2
tools/perf/util/scripting-engines/trace-event-perl.c
··· 37 37 #include "../../perf.h" 38 38 #include "../callchain.h" 39 39 #include "../machine.h" 40 + #include "../map.h" 41 + #include "../symbol.h" 40 42 #include "../thread.h" 41 43 #include "../event.h" 42 44 #include "../trace-event.h"
+2
tools/perf/util/scripting-engines/trace-event-python.c
··· 44 44 #include "../thread-stack.h" 45 45 #include "../trace-event.h" 46 46 #include "../call-path.h" 47 + #include "map.h" 48 + #include "symbol.h" 47 49 #include "thread_map.h" 48 50 #include "cpumap.h" 49 51 #include "print_binary.h"
+5
tools/perf/util/session.c
··· 13 13 #include "evlist.h" 14 14 #include "evsel.h" 15 15 #include "memswap.h" 16 + #include "map.h" 17 + #include "symbol.h" 16 18 #include "session.h" 17 19 #include "tool.h" 18 20 #include "sort.h" ··· 703 701 event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); 704 702 event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); 705 703 event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); 704 + event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt); 706 705 event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); 706 + if (event->auxtrace_error.fmt) 707 + event->auxtrace_error.time = bswap_64(event->auxtrace_error.time); 707 708 } 708 709 709 710 static void perf_event__thread_map_swap(union perf_event *event,
+5
tools/perf/util/setup.py
··· 53 53 # use full paths with source files 54 54 ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) 55 55 56 + extra_libraries = [] 57 + if '-DHAVE_LIBNUMA_SUPPORT' in cflags: 58 + extra_libraries = [ 'numa' ] 59 + 56 60 perf = Extension('perf', 57 61 sources = ext_sources, 58 62 include_dirs = ['util/include'], 63 + libraries = extra_libraries, 59 64 extra_compile_args = cflags, 60 65 extra_objects = [libtraceevent, libapikfs], 61 66 )
+1 -7
tools/perf/util/sort.c
··· 6 6 #include "sort.h" 7 7 #include "hist.h" 8 8 #include "comm.h" 9 + #include "map.h" 9 10 #include "symbol.h" 10 11 #include "thread.h" 11 12 #include "evsel.h" ··· 429 428 { 430 429 431 430 struct symbol *sym = he->ms.sym; 432 - struct map *map = he->ms.map; 433 - struct perf_evsel *evsel = hists_to_evsel(he->hists); 434 431 struct annotation *notes; 435 432 double ipc = 0.0, coverage = 0.0; 436 433 char tmp[64]; 437 434 438 435 if (!sym) 439 436 return repsep_snprintf(bf, size, "%-*s", width, "-"); 440 - 441 - if (!sym->annotate2 && symbol__annotate2(sym, map, evsel, 442 - &annotation__default_options, NULL) < 0) { 443 - return 0; 444 - } 445 437 446 438 notes = symbol__annotation(sym); 447 439
+2 -1
tools/perf/util/sort.h
··· 9 9 #include <linux/list.h> 10 10 #include "cache.h" 11 11 #include <linux/rbtree.h> 12 - #include "symbol.h" 12 + #include "map_symbol.h" 13 + #include "symbol_conf.h" 13 14 #include "string.h" 14 15 #include "callchain.h" 15 16 #include "values.h"
+13
tools/perf/util/srccode.h
··· 1 1 #ifndef SRCCODE_H 2 2 #define SRCCODE_H 1 3 3 4 + struct srccode_state { 5 + char *srcfile; 6 + unsigned line; 7 + }; 8 + 9 + static inline void srccode_state_init(struct srccode_state *state) 10 + { 11 + state->srcfile = NULL; 12 + state->line = 0; 13 + } 14 + 15 + void srccode_state_free(struct srccode_state *state); 16 + 4 17 /* Result is not 0 terminated */ 5 18 char *find_sourceline(char *fn, unsigned line, int *lenp); 6 19
+2
tools/perf/util/symbol-elf.c
··· 6 6 #include <unistd.h> 7 7 #include <inttypes.h> 8 8 9 + #include "map.h" 10 + #include "map_groups.h" 9 11 #include "symbol.h" 10 12 #include "demangle-java.h" 11 13 #include "demangle-rust.h"
+3
tools/perf/util/symbol.c
··· 17 17 #include "util.h" 18 18 #include "debug.h" 19 19 #include "machine.h" 20 + #include "map.h" 20 21 #include "symbol.h" 21 22 #include "strlist.h" 22 23 #include "intlist.h" ··· 718 717 } 719 718 720 719 pos->start -= curr_map->start - curr_map->pgoff; 720 + if (pos->end > curr_map->end) 721 + pos->end = curr_map->end; 721 722 if (pos->end) 722 723 pos->end -= curr_map->start - curr_map->pgoff; 723 724 symbols__insert(&curr_map->dso->symbols, pos);
+1 -13
tools/perf/util/symbol.h
··· 8 8 #include <linux/list.h> 9 9 #include <linux/rbtree.h> 10 10 #include <stdio.h> 11 + #include "map_symbol.h" 11 12 #include "branch.h" 12 13 #include "path.h" 13 14 #include "symbol_conf.h" ··· 114 113 const char *name; 115 114 u64 addr; 116 115 u64 unrelocated_addr; 117 - }; 118 - 119 - struct map_symbol { 120 - struct map *map; 121 - struct symbol *sym; 122 - }; 123 - 124 - struct addr_map_symbol { 125 - struct map *map; 126 - struct symbol *sym; 127 - u64 addr; 128 - u64 al_addr; 129 - u64 phys_addr; 130 116 }; 131 117 132 118 struct branch_info {
+48 -30
tools/perf/util/thread-stack.c
··· 38 38 * @cp: call path 39 39 * @no_call: a 'call' was not seen 40 40 * @trace_end: a 'call' but trace ended 41 + * @non_call: a branch but not a 'call' to the start of a different symbol 41 42 */ 42 43 struct thread_stack_entry { 43 44 u64 ret_addr; ··· 48 47 struct call_path *cp; 49 48 bool no_call; 50 49 bool trace_end; 50 + bool non_call; 51 51 }; 52 52 53 53 /** ··· 270 268 cr.flags |= CALL_RETURN_NO_CALL; 271 269 if (no_return) 272 270 cr.flags |= CALL_RETURN_NO_RETURN; 271 + if (tse->non_call) 272 + cr.flags |= CALL_RETURN_NON_CALL; 273 273 274 274 return crp->process(&cr, crp->data); 275 275 } ··· 497 493 struct thread_stack_entry *tse; 498 494 int err; 499 495 496 + if (!cp) 497 + return -ENOMEM; 498 + 500 499 if (ts->cnt == ts->sz) { 501 500 err = thread_stack__grow(ts); 502 501 if (err) ··· 514 507 tse->cp = cp; 515 508 tse->no_call = no_call; 516 509 tse->trace_end = trace_end; 510 + tse->non_call = false; 517 511 518 512 return 0; 519 513 } ··· 536 528 timestamp, ref, false); 537 529 } 538 530 539 - if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { 531 + if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 532 + !ts->stack[ts->cnt - 1].non_call) { 540 533 return thread_stack__call_return(thread, ts, --ts->cnt, 541 534 timestamp, ref, false); 542 535 } else { 543 536 size_t i = ts->cnt - 1; 544 537 545 538 while (i--) { 546 - if (ts->stack[i].ret_addr != ret_addr) 539 + if (ts->stack[i].ret_addr != ret_addr || 540 + ts->stack[i].non_call) 547 541 continue; 548 542 i += 1; 549 543 while (ts->cnt > i) { ··· 586 576 587 577 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 588 578 ts->kernel_start); 589 - if (!cp) 590 - return -ENOMEM; 591 579 592 580 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 593 581 true, false); ··· 598 590 struct addr_location *to_al, u64 ref) 599 591 { 600 592 struct call_path_root *cpr = ts->crp->cpr; 593 + struct call_path *root = &cpr->call_path; 594 + struct symbol *fsym = from_al->sym; 595 + struct symbol *tsym = to_al->sym; 601 596 struct call_path *cp, *parent; 602 597 u64 ks = ts->kernel_start; 598 + u64 addr = sample->addr; 599 + u64 tm = sample->time; 600 + u64 ip = sample->ip; 603 601 int err; 604 602 605 - if (sample->ip >= ks && sample->addr < ks) { 603 + if (ip >= ks && addr < ks) { 606 604 /* Return to userspace, so pop all kernel addresses */ 607 605 while (thread_stack__in_kernel(ts)) { 608 606 err = thread_stack__call_return(thread, ts, --ts->cnt, 609 - sample->time, ref, 610 - true); 607 + tm, ref, true); 611 608 if (err) 612 609 return err; 613 610 } 614 611 615 612 /* If the stack is empty, push the userspace address */ 616 613 if (!ts->cnt) { 617 - cp = call_path__findnew(cpr, &cpr->call_path, 618 - to_al->sym, sample->addr, 619 - ts->kernel_start); 620 - if (!cp) 621 - return -ENOMEM; 622 - return thread_stack__push_cp(ts, 0, sample->time, ref, 623 - cp, true, false); 614 + cp = call_path__findnew(cpr, root, tsym, addr, ks); 615 + return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 616 + false); 624 617 } 625 - } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { 618 + } else if (thread_stack__in_kernel(ts) && ip < ks) { 626 619 /* Return to userspace, so pop all kernel addresses */ 627 620 while (thread_stack__in_kernel(ts)) { 628 621 err = thread_stack__call_return(thread, ts, --ts->cnt, 629 - sample->time, ref, 630 - true); 622 + tm, ref, true); 631 623 if (err) 632 624 return err; 633 625 } ··· 636 628 if (ts->cnt) 637 629 parent = ts->stack[ts->cnt - 1].cp; 638 630 else 639 - parent = &cpr->call_path; 631 + parent = root; 640 632 641 633 /* This 'return' had no 'call', so push and pop top of stack */ 642 - cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, 643 - ts->kernel_start); 644 - if (!cp) 645 - return -ENOMEM; 634 + cp = call_path__findnew(cpr, parent, fsym, ip, ks); 646 635 647 - err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, 648 - true, false); 636 + err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 649 637 if (err) 650 638 return err; 651 639 652 - return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, 653 - to_al->sym); 640 + return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym); 654 641 } 655 642 656 643 static int thread_stack__trace_begin(struct thread *thread, ··· 683 680 684 681 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 685 682 ts->kernel_start); 686 - if (!cp) 687 - return -ENOMEM; 688 683 689 684 ret_addr = sample->ip + sample->insn_len; 690 685 ··· 746 745 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 747 746 to_al->sym, sample->addr, 748 747 ts->kernel_start); 749 - if (!cp) 750 - return -ENOMEM; 751 748 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 752 749 cp, false, trace_end); 753 750 } else if (sample->flags & PERF_IP_FLAG_RETURN) { ··· 764 765 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 765 766 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 766 767 err = thread_stack__trace_end(ts, sample, ref); 768 + } else if (sample->flags & PERF_IP_FLAG_BRANCH && 769 + from_al->sym != to_al->sym && to_al->sym && 770 + to_al->addr == to_al->sym->start) { 771 + struct call_path_root *cpr = ts->crp->cpr; 772 + struct call_path *cp; 773 + 774 + /* 775 + * The compiler might optimize a call/ret combination by making 776 + * it a jmp. Make that visible by recording on the stack a 777 + * branch to the start of a different symbol. Note, that means 778 + * when a ret pops the stack, all jmps must be popped off first. 779 + */ 780 + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 781 + to_al->sym, sample->addr, 782 + ts->kernel_start); 783 + err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 784 + false); 785 + if (!err) 786 + ts->stack[ts->cnt - 1].non_call = true; 767 787 } 768 788 769 789 return err;
+3
tools/perf/util/thread-stack.h
··· 35 35 * 36 36 * CALL_RETURN_NO_CALL: 'return' but no matching 'call' 37 37 * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' 38 + * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different 39 + * symbol 38 40 */ 39 41 enum { 40 42 CALL_RETURN_NO_CALL = 1 << 0, 41 43 CALL_RETURN_NO_RETURN = 1 << 1, 44 + CALL_RETURN_NON_CALL = 1 << 2, 42 45 }; 43 46 44 47 /**
+1
tools/perf/util/thread.c
··· 12 12 #include "debug.h" 13 13 #include "namespaces.h" 14 14 #include "comm.h" 15 + #include "symbol.h" 15 16 #include "unwind.h" 16 17 17 18 #include <api/fs/fs.h>
+5 -2
tools/perf/util/thread.h
··· 5 5 #include <linux/refcount.h> 6 6 #include <linux/rbtree.h> 7 7 #include <linux/list.h> 8 + #include <stdio.h> 8 9 #include <unistd.h> 9 10 #include <sys/types.h> 10 - #include "symbol.h" 11 - #include "map.h" 11 + #include "srccode.h" 12 + #include "symbol_conf.h" 12 13 #include <strlist.h> 13 14 #include <intlist.h> 14 15 #include "rwsem.h" 15 16 17 + struct addr_location; 18 + struct map; 16 19 struct namespaces_event; 17 20 struct thread_stack; 18 21 struct unwind_libunwind_ops;
+2
tools/perf/util/unwind-libdw.c
··· 8 8 #include "unwind.h" 9 9 #include "unwind-libdw.h" 10 10 #include "machine.h" 11 + #include "map.h" 12 + #include "symbol.h" 11 13 #include "thread.h" 12 14 #include <linux/types.h> 13 15 #include "event.h"
+1
tools/perf/util/unwind-libunwind-local.c
··· 34 34 #include "session.h" 35 35 #include "perf_regs.h" 36 36 #include "unwind.h" 37 + #include "map.h" 37 38 #include "symbol.h" 38 39 #include "util.h" 39 40 #include "debug.h"
+1
tools/perf/util/unwind-libunwind.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "unwind.h" 3 + #include "map.h" 3 4 #include "thread.h" 4 5 #include "session.h" 5 6 #include "debug.h"
+1
tools/perf/util/vdso.c
··· 11 11 12 12 #include "vdso.h" 13 13 #include "util.h" 14 + #include "map.h" 14 15 #include "symbol.h" 15 16 #include "machine.h" 16 17 #include "thread.h"