Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-5.7-20200310' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf stat:

Jin Yao:

- Show percore counts in per CPU output.

perf report:

Jin Yao:

- Allow selecting which block info columns to report and its order.

- Support color ops to print block percents in color.

- Fix wrong block address comparison in block_info__cmp().

perf annotate:

Ravi Bangoria:

- Get rid of annotation->nr_jumps, unused.

expr:

Jiri Olsa:

- Move expr lexer to flex.

llvm:

Arnaldo Carvalho de Melo:

- Add debug hint message about missing kernel-devel package.

core:

Kan Liang:

- Initial patches to support the recently added PERF_SAMPLE_BRANCH_HW_INDEX
kernel feature.

- Add check for unexpected use of reserved membrs in event attr, so that in
the future older perf tools will complain instead of silently try to process
unknown features.

libapi:

Namhyung Kim:

- Adopt cgroupsfs_find_mountpoint() from tools/perf/util/.

libperf:

Michael Petlan:

- Add counting example.

libtraceevent:

Steven Rostedt (VMware):

- Remove extra '\n' in print_event_time().

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+755 -382
+7 -1
tools/include/uapi/linux/perf_event.h
··· 181 181 182 182 PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ 183 183 184 + PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ 185 + 184 186 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 185 187 }; 186 188 ··· 209 207 210 208 PERF_SAMPLE_BRANCH_TYPE_SAVE = 211 209 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, 210 + 211 + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 212 212 213 213 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 214 214 }; ··· 857 853 * char data[size];}&& PERF_SAMPLE_RAW 858 854 * 859 855 * { u64 nr; 860 - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK 856 + * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 857 + * { u64 from, to, flags } lbr[nr]; 858 + * } && PERF_SAMPLE_BRANCH_STACK 861 859 * 862 860 * { u64 abi; # enum perf_sample_regs_abi 863 861 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+1
tools/lib/api/fs/Build
··· 1 1 libapi-y += fs.o 2 2 libapi-y += tracing_path.o 3 + libapi-y += cgroup.o
+67
tools/lib/api/fs/cgroup.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/stringify.h> 3 + #include <sys/types.h> 4 + #include <sys/stat.h> 5 + #include <fcntl.h> 6 + #include <stdio.h> 7 + #include <stdlib.h> 8 + #include <string.h> 9 + #include "fs.h" 10 + 11 + int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys) 12 + { 13 + FILE *fp; 14 + char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; 15 + char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; 16 + char *token, *saved_ptr = NULL; 17 + 18 + fp = fopen("/proc/mounts", "r"); 19 + if (!fp) 20 + return -1; 21 + 22 + /* 23 + * in order to handle split hierarchy, we need to scan /proc/mounts 24 + * and inspect every cgroupfs mount point to find one that has 25 + * perf_event subsystem 26 + */ 27 + path_v1[0] = '\0'; 28 + path_v2[0] = '\0'; 29 + 30 + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" 31 + __stringify(PATH_MAX)"s %*d %*d\n", 32 + mountpoint, type, tokens) == 3) { 33 + 34 + if (!path_v1[0] && !strcmp(type, "cgroup")) { 35 + 36 + token = strtok_r(tokens, ",", &saved_ptr); 37 + 38 + while (token != NULL) { 39 + if (subsys && !strcmp(token, subsys)) { 40 + strcpy(path_v1, mountpoint); 41 + break; 42 + } 43 + token = strtok_r(NULL, ",", &saved_ptr); 44 + } 45 + } 46 + 47 + if (!path_v2[0] && !strcmp(type, "cgroup2")) 48 + strcpy(path_v2, mountpoint); 49 + 50 + if (path_v1[0] && path_v2[0]) 51 + break; 52 + } 53 + fclose(fp); 54 + 55 + if (path_v1[0]) 56 + path = path_v1; 57 + else if (path_v2[0]) 58 + path = path_v2; 59 + else 60 + return -1; 61 + 62 + if (strlen(path) < maxlen) { 63 + strcpy(buf, path); 64 + return 0; 65 + } 66 + return -1; 67 + }
+2
tools/lib/api/fs/fs.h
··· 28 28 #undef FS 29 29 30 30 31 + int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys); 32 + 31 33 int filename__read_int(const char *filename, int *value); 32 34 int filename__read_ull(const char *filename, unsigned long long *value); 33 35 int filename__read_xll(const char *filename, unsigned long long *value);
+83
tools/lib/perf/Documentation/examples/counting.c
··· 1 + #include <linux/perf_event.h> 2 + #include <perf/evlist.h> 3 + #include <perf/evsel.h> 4 + #include <perf/cpumap.h> 5 + #include <perf/threadmap.h> 6 + #include <perf/mmap.h> 7 + #include <perf/core.h> 8 + #include <perf/event.h> 9 + #include <stdio.h> 10 + #include <unistd.h> 11 + 12 + static int libperf_print(enum libperf_print_level level, 13 + const char *fmt, va_list ap) 14 + { 15 + return vfprintf(stderr, fmt, ap); 16 + } 17 + 18 + int main(int argc, char **argv) 19 + { 20 + int count = 100000, err = 0; 21 + struct perf_evlist *evlist; 22 + struct perf_evsel *evsel; 23 + struct perf_thread_map *threads; 24 + struct perf_counts_values counts; 25 + 26 + struct perf_event_attr attr1 = { 27 + .type = PERF_TYPE_SOFTWARE, 28 + .config = PERF_COUNT_SW_CPU_CLOCK, 29 + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, 30 + .disabled = 1, 31 + }; 32 + struct perf_event_attr attr2 = { 33 + .type = PERF_TYPE_SOFTWARE, 34 + .config = PERF_COUNT_SW_TASK_CLOCK, 35 + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, 36 + .disabled = 1, 37 + }; 38 + 39 + libperf_init(libperf_print); 40 + threads = perf_thread_map__new_dummy(); 41 + if (!threads) { 42 + fprintf(stderr, "failed to create threads\n"); 43 + return -1; 44 + } 45 + perf_thread_map__set_pid(threads, 0, 0); 46 + evlist = perf_evlist__new(); 47 + if (!evlist) { 48 + fprintf(stderr, "failed to create evlist\n"); 49 + goto out_threads; 50 + } 51 + evsel = perf_evsel__new(&attr1); 52 + if (!evsel) { 53 + fprintf(stderr, "failed to create evsel1\n"); 54 + goto out_evlist; 55 + } 56 + perf_evlist__add(evlist, evsel); 57 + evsel = perf_evsel__new(&attr2); 58 + if (!evsel) { 59 + fprintf(stderr, "failed to create evsel2\n"); 60 + goto out_evlist; 61 + } 62 + perf_evlist__add(evlist, evsel); 63 + perf_evlist__set_maps(evlist, NULL, threads); 64 + err = perf_evlist__open(evlist); 65 + if (err) { 66 + fprintf(stderr, "failed to open evsel\n"); 67 + goto out_evlist; 68 + } 69 + perf_evlist__enable(evlist); 70 + while (count--); 71 + perf_evlist__disable(evlist); 72 + perf_evlist__for_each_evsel(evlist, evsel) { 73 + perf_evsel__read(evsel, 0, 0, &counts); 74 + fprintf(stdout, "count %llu, enabled %llu, run %llu\n", 75 + counts.val, counts.ena, counts.run); 76 + } 77 + perf_evlist__close(evlist); 78 + out_evlist: 79 + perf_evlist__delete(evlist); 80 + out_threads: 81 + perf_thread_map__put(threads); 82 + return err; 83 + }
+1 -1
tools/lib/traceevent/event-parse.c
··· 5541 5541 if (p10 > 1 && p10 < time) 5542 5542 trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10); 5543 5543 else 5544 - trace_seq_printf(s, "%12llu\n", time); 5544 + trace_seq_printf(s, "%12llu", time); 5545 5545 } 5546 5546 5547 5547 struct print_event_type {
+9
tools/perf/Documentation/perf-stat.txt
··· 334 334 --all-user:: 335 335 Configure all used events to run in user space. 336 336 337 + --percore-show-thread:: 338 + The event modifier "percore" has supported to sum up the event counts 339 + for all hardware threads in a core and show the counts per core. 340 + 341 + This option with event modifier "percore" enabled also sums up the event 342 + counts for all hardware threads in a core but show the sum counts per 343 + hardware thread. This is essentially a replacement for the any bit and 344 + convenient for post processing. 345 + 337 346 EXAMPLES 338 347 -------- 339 348
+4 -20
tools/perf/builtin-diff.c
··· 572 572 bh->valid = true; 573 573 } 574 574 575 - static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) 576 - { 577 - struct block_info *bi_a = a->block_info; 578 - struct block_info *bi_b = b->block_info; 579 - int cmp; 580 - 581 - if (!bi_a->sym || !bi_b->sym) 582 - return -1; 583 - 584 - cmp = strcmp(bi_a->sym->name, bi_b->sym->name); 585 - 586 - if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) 587 - return 0; 588 - 589 - return -1; 590 - } 591 - 592 575 static struct hist_entry *get_block_pair(struct hist_entry *he, 593 576 struct hists *hists_pair) 594 577 { 595 578 struct rb_root_cached *root = hists_pair->entries_in; 596 579 struct rb_node *next = rb_first_cached(root); 597 - int cmp; 580 + int64_t cmp; 598 581 599 582 while (next != NULL) { 600 583 struct hist_entry *he_pair = rb_entry(next, struct hist_entry, ··· 585 602 586 603 next = rb_next(&he_pair->rb_node_in); 587 604 588 - cmp = block_pair_cmp(he_pair, he); 605 + cmp = __block_info__cmp(he_pair, he); 589 606 if (!cmp) 590 607 return he_pair; 591 608 } ··· 1295 1312 end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, 1296 1313 he->ms.sym); 1297 1314 1298 - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { 1315 + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && 1316 + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { 1299 1317 scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", 1300 1318 start_line, end_line, block_he->diff.cycles); 1301 1319 } else {
+18 -3
tools/perf/builtin-report.c
··· 104 104 bool symbol_ipc; 105 105 bool total_cycles_mode; 106 106 struct block_report *block_reports; 107 + int nr_block_reports; 107 108 }; 108 109 109 110 static int report__config(const char *var, const char *value, void *cb) ··· 967 966 report__output_resort(rep); 968 967 969 968 if (rep->total_cycles_mode) { 969 + int block_hpps[6] = { 970 + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT, 971 + PERF_HPP_REPORT__BLOCK_LBR_CYCLES, 972 + PERF_HPP_REPORT__BLOCK_CYCLES_PCT, 973 + PERF_HPP_REPORT__BLOCK_AVG_CYCLES, 974 + PERF_HPP_REPORT__BLOCK_RANGE, 975 + PERF_HPP_REPORT__BLOCK_DSO, 976 + }; 977 + 970 978 rep->block_reports = block_info__create_report(session->evlist, 971 - rep->total_cycles); 979 + rep->total_cycles, 980 + block_hpps, 6, 981 + &rep->nr_block_reports); 972 982 if (!rep->block_reports) 973 983 return -1; 974 984 } ··· 1563 1551 zfree(&report.ptime_range); 1564 1552 } 1565 1553 1566 - if (report.block_reports) 1567 - zfree(&report.block_reports); 1554 + if (report.block_reports) { 1555 + block_info__free_report(report.block_reports, 1556 + report.nr_block_reports); 1557 + report.block_reports = NULL; 1558 + } 1568 1559 1569 1560 zstd_fini(&(session->zstd_data)); 1570 1561 perf_session__delete(session);
+37 -33
tools/perf/builtin-script.c
··· 735 735 struct perf_event_attr *attr, FILE *fp) 736 736 { 737 737 struct branch_stack *br = sample->branch_stack; 738 + struct branch_entry *entries = perf_sample__branch_entries(sample); 738 739 struct addr_location alf, alt; 739 740 u64 i, from, to; 740 741 int printed = 0; ··· 744 743 return 0; 745 744 746 745 for (i = 0; i < br->nr; i++) { 747 - from = br->entries[i].from; 748 - to = br->entries[i].to; 746 + from = entries[i].from; 747 + to = entries[i].to; 749 748 750 749 if (PRINT_FIELD(DSO)) { 751 750 memset(&alf, 0, sizeof(alf)); ··· 769 768 } 770 769 771 770 printed += fprintf(fp, "/%c/%c/%c/%d ", 772 - mispred_str( br->entries + i), 773 - br->entries[i].flags.in_tx? 'X' : '-', 774 - br->entries[i].flags.abort? 'A' : '-', 775 - br->entries[i].flags.cycles); 771 + mispred_str(entries + i), 772 + entries[i].flags.in_tx ? 'X' : '-', 773 + entries[i].flags.abort ? 'A' : '-', 774 + entries[i].flags.cycles); 776 775 } 777 776 778 777 return printed; ··· 783 782 struct perf_event_attr *attr, FILE *fp) 784 783 { 785 784 struct branch_stack *br = sample->branch_stack; 785 + struct branch_entry *entries = perf_sample__branch_entries(sample); 786 786 struct addr_location alf, alt; 787 787 u64 i, from, to; 788 788 int printed = 0; ··· 795 793 796 794 memset(&alf, 0, sizeof(alf)); 797 795 memset(&alt, 0, sizeof(alt)); 798 - from = br->entries[i].from; 799 - to = br->entries[i].to; 796 + from = entries[i].from; 797 + to = entries[i].to; 800 798 801 799 thread__find_symbol_fb(thread, sample->cpumode, from, &alf); 802 800 thread__find_symbol_fb(thread, sample->cpumode, to, &alt); ··· 815 813 printed += fprintf(fp, ")"); 816 814 } 817 815 printed += fprintf(fp, "/%c/%c/%c/%d ", 818 - mispred_str( br->entries + i), 819 - br->entries[i].flags.in_tx? 'X' : '-', 820 - br->entries[i].flags.abort? 'A' : '-', 821 - br->entries[i].flags.cycles); 816 + mispred_str(entries + i), 817 + entries[i].flags.in_tx ? 'X' : '-', 818 + entries[i].flags.abort ? 'A' : '-', 819 + entries[i].flags.cycles); 822 820 } 823 821 824 822 return printed; ··· 829 827 struct perf_event_attr *attr, FILE *fp) 830 828 { 831 829 struct branch_stack *br = sample->branch_stack; 830 + struct branch_entry *entries = perf_sample__branch_entries(sample); 832 831 struct addr_location alf, alt; 833 832 u64 i, from, to; 834 833 int printed = 0; ··· 841 838 842 839 memset(&alf, 0, sizeof(alf)); 843 840 memset(&alt, 0, sizeof(alt)); 844 - from = br->entries[i].from; 845 - to = br->entries[i].to; 841 + from = entries[i].from; 842 + to = entries[i].to; 846 843 847 844 if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && 848 845 !alf.map->dso->adjust_symbols) ··· 865 862 printed += fprintf(fp, ")"); 866 863 } 867 864 printed += fprintf(fp, "/%c/%c/%c/%d ", 868 - mispred_str(br->entries + i), 869 - br->entries[i].flags.in_tx ? 'X' : '-', 870 - br->entries[i].flags.abort ? 'A' : '-', 871 - br->entries[i].flags.cycles); 865 + mispred_str(entries + i), 866 + entries[i].flags.in_tx ? 'X' : '-', 867 + entries[i].flags.abort ? 'A' : '-', 868 + entries[i].flags.cycles); 872 869 } 873 870 874 871 return printed; ··· 1056 1053 struct machine *machine, FILE *fp) 1057 1054 { 1058 1055 struct branch_stack *br = sample->branch_stack; 1056 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1059 1057 u64 start, end; 1060 1058 int i, insn, len, nr, ilen, printed = 0; 1061 1059 struct perf_insn x; ··· 1077 1073 printed += fprintf(fp, "%c", '\n'); 1078 1074 1079 1075 /* Handle first from jump, of which we don't know the entry. */ 1080 - len = grab_bb(buffer, br->entries[nr-1].from, 1081 - br->entries[nr-1].from, 1076 + len = grab_bb(buffer, entries[nr-1].from, 1077 + entries[nr-1].from, 1082 1078 machine, thread, &x.is64bit, &x.cpumode, false); 1083 1079 if (len > 0) { 1084 - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, 1080 + printed += ip__fprintf_sym(entries[nr - 1].from, thread, 1085 1081 x.cpumode, x.cpu, &lastsym, attr, fp); 1086 - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], 1082 + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], 1087 1083 &x, buffer, len, 0, fp, &total_cycles); 1088 1084 if (PRINT_FIELD(SRCCODE)) 1089 - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); 1085 + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); 1090 1086 } 1091 1087 1092 1088 /* Print all blocks */ 1093 1089 for (i = nr - 2; i >= 0; i--) { 1094 - if (br->entries[i].from || br->entries[i].to) 1090 + if (entries[i].from || entries[i].to) 1095 1091 pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, 1096 - br->entries[i].from, 1097 - br->entries[i].to); 1098 - start = br->entries[i + 1].to; 1099 - end = br->entries[i].from; 1092 + entries[i].from, 1093 + entries[i].to); 1094 + start = entries[i + 1].to; 1095 + end = entries[i].from; 1100 1096 1101 1097 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); 1102 1098 /* Patch up missing kernel transfers due to ring filters */ 1103 1099 if (len == -ENXIO && i > 0) { 1104 - end = br->entries[--i].from; 1100 + end = entries[--i].from; 1105 1101 pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); 1106 1102 len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); 1107 1103 } ··· 1114 1110 1115 1111 printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); 1116 1112 if (ip == end) { 1117 - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, 1113 + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, 1118 1114 &total_cycles); 1119 1115 if (PRINT_FIELD(SRCCODE)) 1120 1116 printed += print_srccode(thread, x.cpumode, ip); ··· 1138 1134 * Hit the branch? In this case we are already done, and the target 1139 1135 * has not been executed yet. 1140 1136 */ 1141 - if (br->entries[0].from == sample->ip) 1137 + if (entries[0].from == sample->ip) 1142 1138 goto out; 1143 - if (br->entries[0].flags.abort) 1139 + if (entries[0].flags.abort) 1144 1140 goto out; 1145 1141 1146 1142 /* ··· 1151 1147 * between final branch and sample. When this happens just 1152 1148 * continue walking after the last TO until we hit a branch. 1153 1149 */ 1154 - start = br->entries[0].to; 1150 + start = entries[0].to; 1155 1151 end = sample->ip; 1156 1152 if (end < start) { 1157 1153 /* Missing jump. Scan 128 bytes for the next branch */
+4
tools/perf/builtin-stat.c
··· 929 929 OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, 930 930 "Configure all used events to run in user space.", 931 931 PARSE_OPT_EXCLUSIVE), 932 + OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, 933 + "Use with 'percore' event qualifier to show the event " 934 + "counts of one hardware thread by sum up total hardware " 935 + "threads of same physical core"), 932 936 OPT_END() 933 937 }; 934 938
+5 -5
tools/perf/tests/expr.c
··· 10 10 { 11 11 double val; 12 12 13 - if (expr__parse(&val, ctx, &e)) 13 + if (expr__parse(&val, ctx, e)) 14 14 TEST_ASSERT_VAL("parse test failed", 0); 15 15 TEST_ASSERT_VAL("unexpected value", val == val2); 16 16 return 0; ··· 44 44 return ret; 45 45 46 46 p = "FOO/0"; 47 - ret = expr__parse(&val, &ctx, &p); 48 - TEST_ASSERT_VAL("division by zero", ret == 1); 47 + ret = expr__parse(&val, &ctx, p); 48 + TEST_ASSERT_VAL("division by zero", ret == -1); 49 49 50 50 p = "BAR/"; 51 - ret = expr__parse(&val, &ctx, &p); 52 - TEST_ASSERT_VAL("missing operand", ret == 1); 51 + ret = expr__parse(&val, &ctx, p); 52 + TEST_ASSERT_VAL("missing operand", ret == -1); 53 53 54 54 TEST_ASSERT_VAL("find other", 55 55 expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+6 -1
tools/perf/tests/sample-parsing.c
··· 99 99 100 100 if (type & PERF_SAMPLE_BRANCH_STACK) { 101 101 COMP(branch_stack->nr); 102 + COMP(branch_stack->hw_idx); 102 103 for (i = 0; i < s1->branch_stack->nr; i++) 103 104 MCOMP(branch_stack->entries[i]); 104 105 } ··· 187 186 u64 data[64]; 188 187 } branch_stack = { 189 188 /* 1 branch_entry */ 190 - .data = {1, 211, 212, 213}, 189 + .data = {1, -1ULL, 211, 212, 213}, 191 190 }; 192 191 u64 regs[64]; 193 192 const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; ··· 209 208 .transaction = 112, 210 209 .raw_data = (void *)raw_data, 211 210 .callchain = &callchain.callchain, 211 + .no_hw_idx = false, 212 212 .branch_stack = &branch_stack.branch_stack, 213 213 .user_regs = { 214 214 .abi = PERF_SAMPLE_REGS_ABI_64, ··· 245 243 246 244 if (sample_type & PERF_SAMPLE_REGS_INTR) 247 245 evsel.core.attr.sample_regs_intr = sample_regs; 246 + 247 + if (sample_type & PERF_SAMPLE_BRANCH_STACK) 248 + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 248 249 249 250 for (i = 0; i < sizeof(regs); i++) 250 251 *(i + (u8 *)regs) = i & 0xfe;
+10 -1
tools/perf/util/Build
··· 121 121 perf-y += vsprintf.o 122 122 perf-y += units.o 123 123 perf-y += time-utils.o 124 + perf-y += expr-flex.o 124 125 perf-y += expr-bison.o 126 + perf-y += expr.o 125 127 perf-y += branch.o 126 128 perf-y += mem2node.o 127 129 ··· 191 189 $(call rule_mkdir) 192 190 $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ 193 191 192 + $(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c 193 + $(call rule_mkdir) 194 + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l 195 + 194 196 $(OUTPUT)util/expr-bison.c: util/expr.y 195 197 $(call rule_mkdir) 196 - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ 198 + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ 197 199 198 200 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c 199 201 $(call rule_mkdir) ··· 209 203 210 204 CFLAGS_parse-events-flex.o += -w 211 205 CFLAGS_pmu-flex.o += -w 206 + CFLAGS_expr-flex.o += -w 212 207 CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w 213 208 CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 214 209 CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 215 210 216 211 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c 217 212 $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c 213 + $(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c 218 214 219 215 CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 220 216 CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" ··· 224 216 CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 225 217 CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 226 218 CFLAGS_parse-events.o += -Wno-redundant-decls 219 + CFLAGS_expr.o += -Wno-redundant-decls 227 220 CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE 228 221 229 222 $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
-2
tools/perf/util/annotate.c
··· 2611 2611 2612 2612 if (++al->jump_sources > notes->max_jump_sources) 2613 2613 notes->max_jump_sources = al->jump_sources; 2614 - 2615 - ++notes->nr_jumps; 2616 2614 } 2617 2615 } 2618 2616
-1
tools/perf/util/annotate.h
··· 279 279 struct annotation_options *options; 280 280 struct annotation_line **offsets; 281 281 int nr_events; 282 - int nr_jumps; 283 282 int max_jump_sources; 284 283 int nr_entries; 285 284 int nr_asm_entries;
+65 -44
tools/perf/util/block-info.c
··· 65 65 return bi; 66 66 } 67 67 68 - int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 69 - struct hist_entry *left, struct hist_entry *right) 68 + int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) 70 69 { 71 70 struct block_info *bi_l = left->block_info; 72 71 struct block_info *bi_r = right->block_info; ··· 73 74 74 75 if (!bi_l->sym || !bi_r->sym) { 75 76 if (!bi_l->sym && !bi_r->sym) 76 - return 0; 77 + return -1; 77 78 else if (!bi_l->sym) 78 79 return -1; 79 80 else 80 81 return 1; 81 82 } 82 83 83 - if (bi_l->sym == bi_r->sym) { 84 - if (bi_l->start == bi_r->start) { 85 - if (bi_l->end == bi_r->end) 86 - return 0; 87 - else 88 - return (int64_t)(bi_r->end - bi_l->end); 89 - } else 90 - return (int64_t)(bi_r->start - bi_l->start); 91 - } else { 92 - cmp = strcmp(bi_l->sym->name, bi_r->sym->name); 84 + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); 85 + if (cmp) 93 86 return cmp; 94 - } 95 87 96 - if (bi_l->sym->start != bi_r->sym->start) 97 - return (int64_t)(bi_r->sym->start - bi_l->sym->start); 88 + if (bi_l->start != bi_r->start) 89 + return (int64_t)(bi_r->start - bi_l->start); 98 90 99 - return (int64_t)(bi_r->sym->end - bi_l->sym->end); 91 + return (int64_t)(bi_r->end - bi_l->end); 92 + } 93 + 94 + int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 95 + struct hist_entry *left, struct hist_entry *right) 96 + { 97 + return __block_info__cmp(left, right); 100 98 } 101 99 102 100 static void init_block_info(struct block_info *bi, struct symbol *sym, ··· 181 185 return block_fmt->width; 182 186 } 183 187 188 + static int color_pct(struct perf_hpp *hpp, int width, double pct) 189 + { 190 + #ifdef HAVE_SLANG_SUPPORT 191 + if (use_browser) { 192 + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", 193 + width - 1, pct); 194 + } 195 + #endif 196 + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct); 197 + } 198 + 184 199 static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, 185 200 struct perf_hpp *hpp, 186 201 struct hist_entry *he) ··· 199 192 struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); 200 193 struct block_info *bi = he->block_info; 201 194 double ratio = 0.0; 202 - char buf[16]; 203 195 204 196 if (block_fmt->total_cycles) 205 197 ratio = (double)bi->cycles / (double)block_fmt->total_cycles; 206 198 207 - sprintf(buf, "%.2f%%", 100.0 * ratio); 208 - 209 - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); 199 + return color_pct(hpp, block_fmt->width, 100.0 * ratio); 210 200 } 211 201 212 202 static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, ··· 256 252 struct block_info *bi = he->block_info; 257 253 double ratio = 0.0; 258 254 u64 avg; 259 - char buf[16]; 260 255 261 256 if (block_fmt->block_cycles && bi->num_aggr) { 262 257 avg = bi->cycles_aggr / bi->num_aggr; 263 258 ratio = (double)avg / (double)block_fmt->block_cycles; 264 259 } 265 260 266 - sprintf(buf, "%.2f%%", 100.0 * ratio); 267 - 268 - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); 261 + return color_pct(hpp, block_fmt->width, 100.0 * ratio); 269 262 } 270 263 271 264 static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt, ··· 296 295 end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, 297 296 he->ms.sym); 298 297 299 - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { 298 + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && 299 + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { 300 300 scnprintf(buf, sizeof(buf), "[%s -> %s]", 301 301 start_line, end_line); 302 302 } else { ··· 350 348 351 349 switch (idx) { 352 350 case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT: 353 - fmt->entry = block_total_cycles_pct_entry; 351 + fmt->color = block_total_cycles_pct_entry; 354 352 fmt->cmp = block_info__cmp; 355 353 fmt->sort = block_total_cycles_pct_sort; 356 354 break; ··· 358 356 fmt->entry = block_cycles_lbr_entry; 359 357 break; 360 358 case PERF_HPP_REPORT__BLOCK_CYCLES_PCT: 361 - fmt->entry = block_cycles_pct_entry; 359 + fmt->color = block_cycles_pct_entry; 362 360 break; 363 361 case PERF_HPP_REPORT__BLOCK_AVG_CYCLES: 364 362 fmt->entry = block_avg_cycles_entry; ··· 378 376 } 379 377 380 378 static void register_block_columns(struct perf_hpp_list *hpp_list, 381 - struct block_fmt *block_fmts) 379 + struct block_fmt *block_fmts, 380 + int *block_hpps, int nr_hpps) 382 381 { 383 - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) 384 - hpp_register(&block_fmts[i], i, hpp_list); 382 + for (int i = 0; i < nr_hpps; i++) 383 + hpp_register(&block_fmts[i], block_hpps[i], hpp_list); 385 384 } 386 385 387 - static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts) 386 + static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, 387 + int *block_hpps, int nr_hpps) 388 388 { 389 389 __hists__init(&bh->block_hists, &bh->block_list); 390 390 perf_hpp_list__init(&bh->block_list); 391 391 bh->block_list.nr_header_lines = 1; 392 392 393 - register_block_columns(&bh->block_list, block_fmts); 393 + register_block_columns(&bh->block_list, block_fmts, 394 + block_hpps, nr_hpps); 394 395 395 - perf_hpp_list__register_sort_field(&bh->block_list, 396 - &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt); 396 + /* Sort by the first fmt */ 397 + perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt); 397 398 } 398 399 399 - static void process_block_report(struct hists *hists, 400 - struct block_report *block_report, 401 - u64 total_cycles) 400 + static int process_block_report(struct hists *hists, 401 + struct block_report *block_report, 402 + u64 total_cycles, int *block_hpps, 403 + int nr_hpps) 402 404 { 403 405 struct rb_node *next = rb_first_cached(&hists->entries); 404 406 struct block_hist *bh = &block_report->hist; 405 407 struct hist_entry *he; 406 408 407 - init_block_hist(bh, block_report->fmts); 409 + if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX) 410 + return -1; 411 + 412 + block_report->nr_fmts = nr_hpps; 413 + init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps); 408 414 409 415 while (next) { 410 416 he = rb_entry(next, struct hist_entry, rb_node); ··· 421 411 next = rb_next(&he->rb_node); 422 412 } 423 413 424 - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) { 414 + for (int i = 0; i < nr_hpps; i++) { 425 415 block_report->fmts[i].total_cycles = total_cycles; 426 416 block_report->fmts[i].block_cycles = block_report->cycles; 427 417 } 428 418 429 419 hists__output_resort(&bh->block_hists, NULL); 420 + return 0; 430 421 } 431 422 432 423 struct block_report *block_info__create_report(struct evlist *evlist, 433 - u64 total_cycles) 424 + u64 total_cycles, 425 + int *block_hpps, int nr_hpps, 426 + int *nr_reps) 434 427 { 435 428 struct block_report *block_reports; 436 429 int nr_hists = evlist->core.nr_entries, i = 0; ··· 446 433 evlist__for_each_entry(evlist, pos) { 447 434 struct hists *hists = evsel__hists(pos); 448 435 449 - process_block_report(hists, &block_reports[i], total_cycles); 436 + process_block_report(hists, &block_reports[i], total_cycles, 437 + block_hpps, nr_hpps); 450 438 i++; 451 439 } 452 440 441 + *nr_reps = nr_hists; 453 442 return block_reports; 443 + } 444 + 445 + void block_info__free_report(struct block_report *reps, int nr_reps) 446 + { 447 + for (int i = 0; i < nr_reps; i++) 448 + hists__delete_entries(&reps[i].hist.block_hists); 449 + 450 + free(reps); 454 451 } 455 452 456 453 int report__browse_block_hists(struct block_hist *bh, float min_percent, ··· 474 451 symbol_conf.report_individual_block = true; 475 452 hists__fprintf(&bh->block_hists, true, 0, 0, min_percent, 476 453 stdout, true); 477 - hists__delete_entries(&bh->block_hists); 478 454 return 0; 479 455 case 1: 480 456 symbol_conf.report_individual_block = true; 481 457 ret = block_hists_tui_browse(bh, evsel, min_percent, 482 458 env, annotation_opts); 483 - hists__delete_entries(&bh->block_hists); 484 459 return ret; 485 460 default: 486 461 return -1;
+8 -1
tools/perf/util/block-info.h
··· 45 45 struct block_hist hist; 46 46 u64 cycles; 47 47 struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX]; 48 + int nr_fmts; 48 49 }; 49 50 50 51 struct block_hist; ··· 62 61 63 62 #define block_info__zput(bi) __block_info__zput(&bi) 64 63 64 + int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); 65 + 65 66 int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, 66 67 struct hist_entry *left, struct hist_entry *right); 67 68 ··· 71 68 u64 *block_cycles_aggr, u64 total_cycles); 72 69 73 70 struct block_report *block_info__create_report(struct evlist *evlist, 74 - u64 total_cycles); 71 + u64 total_cycles, 72 + int *block_hpps, int nr_hpps, 73 + int *nr_reps); 74 + 75 + void block_info__free_report(struct block_report *reps, int nr_reps); 75 76 76 77 int report__browse_block_hists(struct block_hist *bh, float min_percent, 77 78 struct evsel *evsel, struct perf_env *env,
+22
tools/perf/util/branch.h
··· 12 12 #include <linux/stddef.h> 13 13 #include <linux/perf_event.h> 14 14 #include <linux/types.h> 15 + #include "event.h" 15 16 16 17 struct branch_flags { 17 18 u64 mispred:1; ··· 40 39 41 40 struct branch_stack { 42 41 u64 nr; 42 + u64 hw_idx; 43 43 struct branch_entry entries[0]; 44 44 }; 45 + 46 + /* 47 + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. 48 + * Otherwise, the output format of a sample with branch stack is 49 + * struct branch_stack { 50 + * u64 nr; 51 + * struct branch_entry entries[0]; 52 + * } 53 + * Check whether the hw_idx is available, 54 + * and return the corresponding pointer of entries[0]. 55 + */ 56 + static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) 57 + { 58 + u64 *entry = (u64 *)sample->branch_stack; 59 + 60 + entry++; 61 + if (sample->no_hw_idx) 62 + return (struct branch_entry *)entry; 63 + return (struct branch_entry *)(++entry); 64 + } 45 65 46 66 struct branch_type_stat { 47 67 bool branch_to;
+2 -61
tools/perf/util/cgroup.c
··· 3 3 #include "evsel.h" 4 4 #include "cgroup.h" 5 5 #include "evlist.h" 6 - #include <linux/stringify.h> 7 6 #include <linux/zalloc.h> 8 7 #include <sys/types.h> 9 8 #include <sys/stat.h> 10 9 #include <fcntl.h> 11 10 #include <stdlib.h> 12 11 #include <string.h> 12 + #include <api/fs/fs.h> 13 13 14 14 int nr_cgroups; 15 - 16 - static int 17 - cgroupfs_find_mountpoint(char *buf, size_t maxlen) 18 - { 19 - FILE *fp; 20 - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; 21 - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; 22 - char *token, *saved_ptr = NULL; 23 - 24 - fp = fopen("/proc/mounts", "r"); 25 - if (!fp) 26 - return -1; 27 - 28 - /* 29 - * in order to handle split hierarchy, we need to scan /proc/mounts 30 - * and inspect every cgroupfs mount point to find one that has 31 - * perf_event subsystem 32 - */ 33 - path_v1[0] = '\0'; 34 - path_v2[0] = '\0'; 35 - 36 - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" 37 - __stringify(PATH_MAX)"s %*d %*d\n", 38 - mountpoint, type, tokens) == 3) { 39 - 40 - if (!path_v1[0] && !strcmp(type, "cgroup")) { 41 - 42 - token = strtok_r(tokens, ",", &saved_ptr); 43 - 44 - while (token != NULL) { 45 - if (!strcmp(token, "perf_event")) { 46 - strcpy(path_v1, mountpoint); 47 - break; 48 - } 49 - token = strtok_r(NULL, ",", &saved_ptr); 50 - } 51 - } 52 - 53 - if (!path_v2[0] && !strcmp(type, "cgroup2")) 54 - strcpy(path_v2, mountpoint); 55 - 56 - if (path_v1[0] && path_v2[0]) 57 - break; 58 - } 59 - fclose(fp); 60 - 61 - if (path_v1[0]) 62 - path = path_v1; 63 - else if (path_v2[0]) 64 - path = path_v2; 65 - else 66 - return -1; 67 - 68 - if (strlen(path) < maxlen) { 69 - strcpy(buf, path); 70 - return 0; 71 - } 72 - return -1; 73 - } 74 15 75 16 static int open_cgroup(const char *name) 76 17 { ··· 20 79 int fd; 21 80 22 81 23 - if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) 82 + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) 24 83 return -1; 25 84 26 85 scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+2
tools/perf/util/cs-etm.c
··· 1172 1172 union perf_event *event = tidq->event_buf; 1173 1173 struct dummy_branch_stack { 1174 1174 u64 nr; 1175 + u64 hw_idx; 1175 1176 struct branch_entry entries; 1176 1177 } dummy_bs; 1177 1178 u64 ip; ··· 1203 1202 if (etm->synth_opts.last_branch) { 1204 1203 dummy_bs = (struct dummy_branch_stack){ 1205 1204 .nr = 1, 1205 + .hw_idx = -1ULL, 1206 1206 .entries = { 1207 1207 .from = sample.ip, 1208 1208 .to = sample.addr,
+1
tools/perf/util/event.h
··· 139 139 u16 insn_len; 140 140 u8 cpumode; 141 141 u16 misc; 142 + bool no_hw_idx; /* No hw_idx collected in branch_stack */ 142 143 char insn[MAX_INSN]; 143 144 void *raw_data; 144 145 struct ip_callchain *callchain;
+17 -3
tools/perf/util/evsel.c
··· 712 712 attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | 713 713 PERF_SAMPLE_BRANCH_CALL_STACK | 714 714 PERF_SAMPLE_BRANCH_NO_CYCLES | 715 - PERF_SAMPLE_BRANCH_NO_FLAGS; 715 + PERF_SAMPLE_BRANCH_NO_FLAGS | 716 + PERF_SAMPLE_BRANCH_HW_INDEX; 716 717 } 717 718 } else 718 719 pr_warning("Cannot use LBR callstack with branch stack. " ··· 764 763 if (param->record_mode == CALLCHAIN_LBR) { 765 764 perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 766 765 attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | 767 - PERF_SAMPLE_BRANCH_CALL_STACK); 766 + PERF_SAMPLE_BRANCH_CALL_STACK | 767 + PERF_SAMPLE_BRANCH_HW_INDEX); 768 768 } 769 769 if (param->record_mode == CALLCHAIN_DWARF) { 770 770 perf_evsel__reset_sample_bit(evsel, REGS_USER); ··· 1675 1673 evsel->core.attr.ksymbol = 0; 1676 1674 if (perf_missing_features.bpf) 1677 1675 evsel->core.attr.bpf_event = 0; 1676 + if (perf_missing_features.branch_hw_idx) 1677 + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; 1678 1678 retry_sample_id: 1679 1679 if (perf_missing_features.sample_id_all) 1680 1680 evsel->core.attr.sample_id_all = 0; ··· 1788 1784 * Must probe features in the order they were added to the 1789 1785 * perf_event_attr interface. 1790 1786 */ 1791 - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { 1787 + if (!perf_missing_features.branch_hw_idx && 1788 + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { 1789 + perf_missing_features.branch_hw_idx = true; 1790 + pr_debug2("switching off branch HW index support\n"); 1791 + goto fallback_missing_features; 1792 + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { 1792 1793 perf_missing_features.aux_output = true; 1793 1794 pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); 1794 1795 goto out_close; ··· 2178 2169 2179 2170 if (data->branch_stack->nr > max_branch_nr) 2180 2171 return -EFAULT; 2172 + 2181 2173 sz = data->branch_stack->nr * sizeof(struct branch_entry); 2174 + if (perf_evsel__has_branch_hw_idx(evsel)) 2175 + sz += sizeof(u64); 2176 + else 2177 + data->no_hw_idx = true; 2182 2178 OVERFLOW_CHECK(array, sz, max_size); 2183 2179 array = (void *)array + sz; 2184 2180 }
+6
tools/perf/util/evsel.h
··· 119 119 bool ksymbol; 120 120 bool bpf; 121 121 bool aux_output; 122 + bool branch_hw_idx; 122 123 }; 123 124 124 125 extern struct perf_missing_features perf_missing_features; ··· 388 387 static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) 389 388 { 390 389 return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; 390 + } 391 + 392 + static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) 393 + { 394 + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; 391 395 } 392 396 393 397 static inline bool evsel__has_callchain(const struct evsel *evsel)
+112
tools/perf/util/expr.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdbool.h> 3 + #include <assert.h> 4 + #include "expr.h" 5 + #include "expr-bison.h" 6 + #define YY_EXTRA_TYPE int 7 + #include "expr-flex.h" 8 + 9 + #ifdef PARSER_DEBUG 10 + extern int expr_debug; 11 + #endif 12 + 13 + /* Caller must make sure id is allocated */ 14 + void expr__add_id(struct parse_ctx *ctx, const char *name, double val) 15 + { 16 + int idx; 17 + 18 + assert(ctx->num_ids < MAX_PARSE_ID); 19 + idx = ctx->num_ids++; 20 + ctx->ids[idx].name = name; 21 + ctx->ids[idx].val = val; 22 + } 23 + 24 + void expr__ctx_init(struct parse_ctx *ctx) 25 + { 26 + ctx->num_ids = 0; 27 + } 28 + 29 + static int 30 + __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, 31 + int start) 32 + { 33 + YY_BUFFER_STATE buffer; 34 + void *scanner; 35 + int ret; 36 + 37 + ret = expr_lex_init_extra(start, &scanner); 38 + if (ret) 39 + return ret; 40 + 41 + buffer = expr__scan_string(expr, scanner); 42 + 43 + #ifdef PARSER_DEBUG 44 + expr_debug = 1; 45 + #endif 46 + 47 + ret = expr_parse(val, ctx, scanner); 48 + 49 + expr__flush_buffer(buffer, scanner); 50 + expr__delete_buffer(buffer, scanner); 51 + expr_lex_destroy(scanner); 52 + return ret; 53 + } 54 + 55 + int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) 56 + { 57 + return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; 58 + } 59 + 60 + static bool 61 + already_seen(const char *val, const char *one, const char **other, 62 + int num_other) 63 + { 64 + int i; 65 + 66 + if (one && !strcasecmp(one, val)) 67 + return true; 68 + for (i = 0; i < num_other; i++) 69 + if (!strcasecmp(other[i], val)) 70 + return true; 71 + return false; 72 + } 73 + 74 + int expr__find_other(const char *expr, const char *one, const char ***other, 75 + int *num_other) 76 + { 77 + int err, i = 0, j = 0; 78 + struct parse_ctx ctx; 79 + 80 + expr__ctx_init(&ctx); 81 + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); 82 + if (err) 83 + return -1; 84 + 85 + *other = malloc((ctx.num_ids + 1) * sizeof(char *)); 86 + if (!*other) 87 + return -ENOMEM; 88 + 89 + for (i = 0, j = 0; i < ctx.num_ids; i++) { 90 + const char *str = ctx.ids[i].name; 91 + 92 + if (already_seen(str, one, *other, j)) 93 + continue; 94 + 95 + str = strdup(str); 96 + if (!str) 97 + goto out; 98 + (*other)[j++] = str; 99 + } 100 + (*other)[j] = NULL; 101 + 102 + out: 103 + if (i != ctx.num_ids) { 104 + while (--j) 105 + free((char *) (*other)[i]); 106 + free(*other); 107 + err = -1; 108 + } 109 + 110 + *num_other = j; 111 + return err; 112 + }
+3 -5
tools/perf/util/expr.h
··· 2 2 #ifndef PARSE_CTX_H 3 3 #define PARSE_CTX_H 1 4 4 5 - #define EXPR_MAX_OTHER 15 5 + #define EXPR_MAX_OTHER 20 6 6 #define MAX_PARSE_ID EXPR_MAX_OTHER 7 7 8 8 struct parse_id { ··· 17 17 18 18 void expr__ctx_init(struct parse_ctx *ctx); 19 19 void expr__add_id(struct parse_ctx *ctx, const char *id, double val); 20 - #ifndef IN_EXPR_Y 21 - int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); 22 - #endif 23 - int expr__find_other(const char *p, const char *one, const char ***other, 20 + int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); 21 + int expr__find_other(const char *expr, const char *one, const char ***other, 24 22 int *num_other); 25 23 26 24 #endif
+114
tools/perf/util/expr.l
··· 1 + %option prefix="expr_" 2 + %option reentrant 3 + %option bison-bridge 4 + 5 + %{ 6 + #include <linux/compiler.h> 7 + #include "expr.h" 8 + #include "expr-bison.h" 9 + 10 + char *expr_get_text(yyscan_t yyscanner); 11 + YYSTYPE *expr_get_lval(yyscan_t yyscanner); 12 + 13 + static int __value(YYSTYPE *yylval, char *str, int base, int token) 14 + { 15 + u64 num; 16 + 17 + errno = 0; 18 + num = strtoull(str, NULL, base); 19 + if (errno) 20 + return EXPR_ERROR; 21 + 22 + yylval->num = num; 23 + return token; 24 + } 25 + 26 + static int value(yyscan_t scanner, int base) 27 + { 28 + YYSTYPE *yylval = expr_get_lval(scanner); 29 + char *text = expr_get_text(scanner); 30 + 31 + return __value(yylval, text, base, NUMBER); 32 + } 33 + 34 + /* 35 + * Allow @ instead of / to be able to specify pmu/event/ without 36 + * conflicts with normal division. 37 + */ 38 + static char *normalize(char *str) 39 + { 40 + char *ret = str; 41 + char *dst = str; 42 + 43 + while (*str) { 44 + if (*str == '@') 45 + *dst++ = '/'; 46 + else if (*str == '\\') 47 + *dst++ = *++str; 48 + else 49 + *dst++ = *str; 50 + str++; 51 + } 52 + 53 + *dst = 0x0; 54 + return ret; 55 + } 56 + 57 + static int str(yyscan_t scanner, int token) 58 + { 59 + YYSTYPE *yylval = expr_get_lval(scanner); 60 + char *text = expr_get_text(scanner); 61 + 62 + yylval->str = normalize(strdup(text)); 63 + if (!yylval->str) 64 + return EXPR_ERROR; 65 + 66 + yylval->str = normalize(yylval->str); 67 + return token; 68 + } 69 + %} 70 + 71 + number [0-9]+ 72 + 73 + sch [-,=] 74 + spec \\{sch} 75 + sym [0-9a-zA-Z_\.:@]+ 76 + symbol {spec}*{sym}*{spec}*{sym}* 77 + 78 + %% 79 + { 80 + int start_token; 81 + 82 + start_token = parse_events_get_extra(yyscanner); 83 + 84 + if (start_token) { 85 + parse_events_set_extra(NULL, yyscanner); 86 + return start_token; 87 + } 88 + } 89 + 90 + max { return MAX; } 91 + min { return MIN; } 92 + if { return IF; } 93 + else { return ELSE; } 94 + #smt_on { return SMT_ON; } 95 + {number} { return value(yyscanner, 10); } 96 + {symbol} { return str(yyscanner, ID); } 97 + "|" { return '|'; } 98 + "^" { return '^'; } 99 + "&" { return '&'; } 100 + "-" { return '-'; } 101 + "+" { return '+'; } 102 + "*" { return '*'; } 103 + "/" { return '/'; } 104 + "%" { return '%'; } 105 + "(" { return '('; } 106 + ")" { return ')'; } 107 + "," { return ','; } 108 + . { } 109 + %% 110 + 111 + int expr_wrap(void *scanner __maybe_unused) 112 + { 113 + return 1; 114 + }
+31 -154
tools/perf/util/expr.y
··· 1 1 /* Simple expression parser */ 2 2 %{ 3 + #define YYDEBUG 1 4 + #include <stdio.h> 3 5 #include "util.h" 4 6 #include "util/debug.h" 5 7 #include <stdlib.h> // strtod() 6 8 #define IN_EXPR_Y 1 7 9 #include "expr.h" 8 10 #include "smt.h" 9 - #include <assert.h> 10 11 #include <string.h> 11 12 12 - #define MAXIDLEN 256 13 13 %} 14 14 15 15 %define api.pure full 16 16 17 17 %parse-param { double *final_val } 18 18 %parse-param { struct parse_ctx *ctx } 19 - %parse-param { const char **pp } 20 - %lex-param { const char **pp } 19 + %parse-param {void *scanner} 20 + %lex-param {void* scanner} 21 21 22 22 %union { 23 - double num; 24 - char id[MAXIDLEN+1]; 23 + double num; 24 + char *str; 25 25 } 26 26 27 + %token EXPR_PARSE EXPR_OTHER EXPR_ERROR 27 28 %token <num> NUMBER 28 - %token <id> ID 29 + %token <str> ID 29 30 %token MIN MAX IF ELSE SMT_ON 30 31 %left MIN MAX IF 31 32 %left '|' ··· 38 37 %type <num> expr if_expr 39 38 40 39 %{ 41 - static int expr__lex(YYSTYPE *res, const char **pp); 42 - 43 - static void expr__error(double *final_val __maybe_unused, 40 + static void expr_error(double *final_val __maybe_unused, 44 41 struct parse_ctx *ctx __maybe_unused, 45 - const char **pp __maybe_unused, 42 + void *scanner, 46 43 const char *s) 47 44 { 48 45 pr_debug("%s\n", s); ··· 61 62 62 63 %} 63 64 %% 65 + 66 + start: 67 + EXPR_PARSE all_expr 68 + | 69 + EXPR_OTHER all_other 70 + 71 + all_other: all_other other 72 + | 73 + 74 + other: ID 75 + { 76 + if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { 77 + pr_err("failed: way too many variables"); 78 + YYABORT; 79 + } 80 + 81 + ctx->ids[ctx->num_ids++].name = $1; 82 + } 83 + | 84 + MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' 85 + 64 86 65 87 all_expr: if_expr { *final_val = $1; } 66 88 ; ··· 113 93 ; 114 94 115 95 %% 116 - 117 - static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) 118 - { 119 - char *dst = res->id; 120 - const char *s = p; 121 - 122 - if (*p == '#') 123 - *dst++ = *p++; 124 - 125 - while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { 126 - if (p - s >= MAXIDLEN) 127 - return -1; 128 - /* 129 - * Allow @ instead of / to be able to specify pmu/event/ without 130 - * conflicts with normal division. 131 - */ 132 - if (*p == '@') 133 - *dst++ = '/'; 134 - else if (*p == '\\') 135 - *dst++ = *++p; 136 - else 137 - *dst++ = *p; 138 - p++; 139 - } 140 - *dst = 0; 141 - *pp = p; 142 - dst = res->id; 143 - switch (dst[0]) { 144 - case 'm': 145 - if (!strcmp(dst, "min")) 146 - return MIN; 147 - if (!strcmp(dst, "max")) 148 - return MAX; 149 - break; 150 - case 'i': 151 - if (!strcmp(dst, "if")) 152 - return IF; 153 - break; 154 - case 'e': 155 - if (!strcmp(dst, "else")) 156 - return ELSE; 157 - break; 158 - case '#': 159 - if (!strcasecmp(dst, "#smt_on")) 160 - return SMT_ON; 161 - break; 162 - } 163 - return ID; 164 - } 165 - 166 - static int expr__lex(YYSTYPE *res, const char **pp) 167 - { 168 - int tok; 169 - const char *s; 170 - const char *p = *pp; 171 - 172 - while (isspace(*p)) 173 - p++; 174 - s = p; 175 - switch (*p++) { 176 - case '#': 177 - case 'a' ... 'z': 178 - case 'A' ... 'Z': 179 - return expr__symbol(res, p - 1, pp); 180 - case '0' ... '9': case '.': 181 - res->num = strtod(s, (char **)&p); 182 - tok = NUMBER; 183 - break; 184 - default: 185 - tok = *s; 186 - break; 187 - } 188 - *pp = p; 189 - return tok; 190 - } 191 - 192 - /* Caller must make sure id is allocated */ 193 - void expr__add_id(struct parse_ctx *ctx, const char *name, double val) 194 - { 195 - int idx; 196 - assert(ctx->num_ids < MAX_PARSE_ID); 197 - idx = ctx->num_ids++; 198 - ctx->ids[idx].name = name; 199 - ctx->ids[idx].val = val; 200 - } 201 - 202 - void expr__ctx_init(struct parse_ctx *ctx) 203 - { 204 - ctx->num_ids = 0; 205 - } 206 - 207 - static bool already_seen(const char *val, const char *one, const char **other, 208 - int num_other) 209 - { 210 - int i; 211 - 212 - if (one && !strcasecmp(one, val)) 213 - return true; 214 - for (i = 0; i < num_other; i++) 215 - if (!strcasecmp(other[i], val)) 216 - return true; 217 - return false; 218 - } 219 - 220 - int expr__find_other(const char *p, const char *one, const char ***other, 221 - int *num_otherp) 222 - { 223 - const char *orig = p; 224 - int err = -1; 225 - int num_other; 226 - 227 - *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); 228 - if (!*other) 229 - return -1; 230 - 231 - num_other = 0; 232 - for (;;) { 233 - YYSTYPE val; 234 - int tok = expr__lex(&val, &p); 235 - if (tok == 0) { 236 - err = 0; 237 - break; 238 - } 239 - if (tok == ID && !already_seen(val.id, one, *other, num_other)) { 240 - if (num_other >= EXPR_MAX_OTHER - 1) { 241 - pr_debug("Too many extra events in %s\n", orig); 242 - break; 243 - } 244 - (*other)[num_other] = strdup(val.id); 245 - if (!(*other)[num_other]) 246 - return -1; 247 - num_other++; 248 - } 249 - } 250 - (*other)[num_other] = NULL; 251 - *num_otherp = num_other; 252 - if (err) { 253 - *num_otherp = 0; 254 - free(*other); 255 - *other = NULL; 256 - } 257 - return err; 258 - }
+37
tools/perf/util/header.c
··· 1590 1590 free(events); 1591 1591 } 1592 1592 1593 + static bool perf_attr_check(struct perf_event_attr *attr) 1594 + { 1595 + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) { 1596 + pr_warning("Reserved bits are set unexpectedly. " 1597 + "Please update perf tool.\n"); 1598 + return false; 1599 + } 1600 + 1601 + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) { 1602 + pr_warning("Unknown sample type (0x%llx) is detected. " 1603 + "Please update perf tool.\n", 1604 + attr->sample_type); 1605 + return false; 1606 + } 1607 + 1608 + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) { 1609 + pr_warning("Unknown read format (0x%llx) is detected. " 1610 + "Please update perf tool.\n", 1611 + attr->read_format); 1612 + return false; 1613 + } 1614 + 1615 + if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) && 1616 + (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) { 1617 + pr_warning("Unknown branch sample type (0x%llx) is detected. " 1618 + "Please update perf tool.\n", 1619 + attr->branch_sample_type); 1620 + 1621 + return false; 1622 + } 1623 + 1624 + return true; 1625 + } 1626 + 1593 1627 static struct evsel *read_event_desc(struct feat_fd *ff) 1594 1628 { 1595 1629 struct evsel *evsel, *events = NULL; ··· 1667 1633 perf_event__attr_swap(buf); 1668 1634 1669 1635 memcpy(&evsel->core.attr, buf, msz); 1636 + 1637 + if (!perf_attr_check(&evsel->core.attr)) 1638 + goto error; 1670 1639 1671 1640 if (do_read_u32(ff, &nr)) 1672 1641 goto error;
+2 -1
tools/perf/util/hist.c
··· 2584 2584 u64 *total_cycles) 2585 2585 { 2586 2586 struct branch_info *bi; 2587 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2587 2588 2588 2589 /* If we have branch cycles always annotate them. */ 2589 - if (bs && bs->nr && bs->entries[0].flags.cycles) { 2590 + if (bs && bs->nr && entries[0].flags.cycles) { 2590 2591 int i; 2591 2592 2592 2593 bi = sample__resolve_bstack(sample, al);
+2
tools/perf/util/intel-pt.c
··· 1295 1295 struct perf_sample sample = { .ip = 0, }; 1296 1296 struct dummy_branch_stack { 1297 1297 u64 nr; 1298 + u64 hw_idx; 1298 1299 struct branch_entry entries; 1299 1300 } dummy_bs; 1300 1301 ··· 1317 1316 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1318 1317 dummy_bs = (struct dummy_branch_stack){ 1319 1318 .nr = 1, 1319 + .hw_idx = -1ULL, 1320 1320 .entries = { 1321 1321 .from = sample.ip, 1322 1322 .to = sample.addr,
+2
tools/perf/util/llvm-utils.c
··· 265 265 return -ENOMEM; 266 266 return 0; 267 267 } 268 + pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", 269 + __func__, autoconf_path); 268 270 free(autoconf_path); 269 271 return -ENOENT; 270 272 }
+18 -17
tools/perf/util/machine.c
··· 2081 2081 { 2082 2082 unsigned int i; 2083 2083 const struct branch_stack *bs = sample->branch_stack; 2084 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2084 2085 struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); 2085 2086 2086 2087 if (!bi) 2087 2088 return NULL; 2088 2089 2089 2090 for (i = 0; i < bs->nr; i++) { 2090 - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); 2091 - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); 2092 - bi[i].flags = bs->entries[i].flags; 2091 + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); 2092 + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); 2093 + bi[i].flags = entries[i].flags; 2093 2094 } 2094 2095 return bi; 2095 2096 } ··· 2186 2185 /* LBR only affects the user callchain */ 2187 2186 if (i != chain_nr) { 2188 2187 struct branch_stack *lbr_stack = sample->branch_stack; 2188 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2189 2189 int lbr_nr = lbr_stack->nr, j, k; 2190 2190 bool branch; 2191 2191 struct branch_flags *flags; ··· 2212 2210 ip = chain->ips[j]; 2213 2211 else if (j > i + 1) { 2214 2212 k = j - i - 2; 2215 - ip = lbr_stack->entries[k].from; 2213 + ip = entries[k].from; 2216 2214 branch = true; 2217 - flags = &lbr_stack->entries[k].flags; 2215 + flags = &entries[k].flags; 2218 2216 } else { 2219 - ip = lbr_stack->entries[0].to; 2217 + ip = entries[0].to; 2220 2218 branch = true; 2221 - flags = &lbr_stack->entries[0].flags; 2222 - branch_from = 2223 - lbr_stack->entries[0].from; 2219 + flags = &entries[0].flags; 2220 + branch_from = entries[0].from; 2224 2221 } 2225 2222 } else { 2226 2223 if (j < lbr_nr) { 2227 2224 k = lbr_nr - j - 1; 2228 - ip = lbr_stack->entries[k].from; 2225 + ip = entries[k].from; 2229 2226 branch = true; 2230 - flags = &lbr_stack->entries[k].flags; 2227 + flags = &entries[k].flags; 2231 2228 } 2232 2229 else if (j > lbr_nr) 2233 2230 ip = chain->ips[i + 1 - (j - lbr_nr)]; 2234 2231 else { 2235 - ip = lbr_stack->entries[0].to; 2232 + ip = entries[0].to; 2236 2233 branch = true; 2237 - flags = &lbr_stack->entries[0].flags; 2238 - branch_from = 2239 - lbr_stack->entries[0].from; 2234 + flags = &entries[0].flags; 2235 + branch_from = entries[0].from; 2240 2236 } 2241 2237 } 2242 2238 ··· 2281 2281 int max_stack) 2282 2282 { 2283 2283 struct branch_stack *branch = sample->branch_stack; 2284 + struct branch_entry *entries = perf_sample__branch_entries(sample); 2284 2285 struct ip_callchain *chain = sample->callchain; 2285 2286 int chain_nr = 0; 2286 2287 u8 cpumode = PERF_RECORD_MISC_USER; ··· 2329 2328 2330 2329 for (i = 0; i < nr; i++) { 2331 2330 if (callchain_param.order == ORDER_CALLEE) { 2332 - be[i] = branch->entries[i]; 2331 + be[i] = entries[i]; 2333 2332 2334 2333 if (chain == NULL) 2335 2334 continue; ··· 2348 2347 be[i].from >= chain->ips[first_call] - 8) 2349 2348 first_call++; 2350 2349 } else 2351 - be[i] = branch->entries[branch->nr - i - 1]; 2350 + be[i] = entries[branch->nr - i - 1]; 2352 2351 } 2353 2352 2354 2353 memset(iter, 0, sizeof(struct iterations) * nr);
+1 -1
tools/perf/util/map.c
··· 431 431 432 432 if (map && map->dso) { 433 433 char *srcline = map__srcline(map, addr, NULL); 434 - if (srcline != SRCLINE_UNKNOWN) 434 + if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) 435 435 ret = fprintf(fp, "%s%s", prefix, srcline); 436 436 free_srcline(srcline); 437 437 }
+1
tools/perf/util/perf_event_attr_fprintf.c
··· 50 50 bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), 51 51 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 52 52 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 53 + bit_name(HW_INDEX), 53 54 { .name = NULL, } 54 55 }; 55 56 #undef bit_name
+16 -14
tools/perf/util/scripting-engines/trace-event-python.c
··· 464 464 struct thread *thread) 465 465 { 466 466 struct branch_stack *br = sample->branch_stack; 467 + struct branch_entry *entries = perf_sample__branch_entries(sample); 467 468 PyObject *pylist; 468 469 u64 i; 469 470 ··· 485 484 Py_FatalError("couldn't create Python dictionary"); 486 485 487 486 pydict_set_item_string_decref(pyelem, "from", 488 - PyLong_FromUnsignedLongLong(br->entries[i].from)); 487 + PyLong_FromUnsignedLongLong(entries[i].from)); 489 488 pydict_set_item_string_decref(pyelem, "to", 490 - PyLong_FromUnsignedLongLong(br->entries[i].to)); 489 + PyLong_FromUnsignedLongLong(entries[i].to)); 491 490 pydict_set_item_string_decref(pyelem, "mispred", 492 - PyBool_FromLong(br->entries[i].flags.mispred)); 491 + PyBool_FromLong(entries[i].flags.mispred)); 493 492 pydict_set_item_string_decref(pyelem, "predicted", 494 - PyBool_FromLong(br->entries[i].flags.predicted)); 493 + PyBool_FromLong(entries[i].flags.predicted)); 495 494 pydict_set_item_string_decref(pyelem, "in_tx", 496 - PyBool_FromLong(br->entries[i].flags.in_tx)); 495 + PyBool_FromLong(entries[i].flags.in_tx)); 497 496 pydict_set_item_string_decref(pyelem, "abort", 498 - PyBool_FromLong(br->entries[i].flags.abort)); 497 + PyBool_FromLong(entries[i].flags.abort)); 499 498 pydict_set_item_string_decref(pyelem, "cycles", 500 - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); 499 + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); 501 500 502 501 thread__find_map_fb(thread, sample->cpumode, 503 - br->entries[i].from, &al); 502 + entries[i].from, &al); 504 503 dsoname = get_dsoname(al.map); 505 504 pydict_set_item_string_decref(pyelem, "from_dsoname", 506 505 _PyUnicode_FromString(dsoname)); 507 506 508 507 thread__find_map_fb(thread, sample->cpumode, 509 - br->entries[i].to, &al); 508 + entries[i].to, &al); 510 509 dsoname = get_dsoname(al.map); 511 510 pydict_set_item_string_decref(pyelem, "to_dsoname", 512 511 _PyUnicode_FromString(dsoname)); ··· 562 561 struct thread *thread) 563 562 { 564 563 struct branch_stack *br = sample->branch_stack; 564 + struct branch_entry *entries = perf_sample__branch_entries(sample); 565 565 PyObject *pylist; 566 566 u64 i; 567 567 char bf[512]; ··· 583 581 Py_FatalError("couldn't create Python dictionary"); 584 582 585 583 thread__find_symbol_fb(thread, sample->cpumode, 586 - br->entries[i].from, &al); 584 + entries[i].from, &al); 587 585 get_symoff(al.sym, &al, true, bf, sizeof(bf)); 588 586 pydict_set_item_string_decref(pyelem, "from", 589 587 _PyUnicode_FromString(bf)); 590 588 591 589 thread__find_symbol_fb(thread, sample->cpumode, 592 - br->entries[i].to, &al); 590 + entries[i].to, &al); 593 591 get_symoff(al.sym, &al, true, bf, sizeof(bf)); 594 592 pydict_set_item_string_decref(pyelem, "to", 595 593 _PyUnicode_FromString(bf)); 596 594 597 - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); 595 + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); 598 596 pydict_set_item_string_decref(pyelem, "pred", 599 597 _PyUnicode_FromString(bf)); 600 598 601 - if (br->entries[i].flags.in_tx) { 599 + if (entries[i].flags.in_tx) { 602 600 pydict_set_item_string_decref(pyelem, "in_tx", 603 601 _PyUnicode_FromString("X")); 604 602 } else { ··· 606 604 _PyUnicode_FromString("-")); 607 605 } 608 606 609 - if (br->entries[i].flags.abort) { 607 + if (entries[i].flags.abort) { 610 608 pydict_set_item_string_decref(pyelem, "abort", 611 609 _PyUnicode_FromString("A")); 612 610 } else {
+5 -3
tools/perf/util/session.c
··· 1007 1007 { 1008 1008 struct ip_callchain *callchain = sample->callchain; 1009 1009 struct branch_stack *lbr_stack = sample->branch_stack; 1010 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1010 1011 u64 kernel_callchain_nr = callchain->nr; 1011 1012 unsigned int i; 1012 1013 ··· 1044 1043 i, callchain->ips[i]); 1045 1044 1046 1045 printf("..... %2d: %016" PRIx64 "\n", 1047 - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); 1046 + (int)(kernel_callchain_nr), entries[0].to); 1048 1047 for (i = 0; i < lbr_stack->nr; i++) 1049 1048 printf("..... %2d: %016" PRIx64 "\n", 1050 - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); 1049 + (int)(i + kernel_callchain_nr + 1), entries[i].from); 1051 1050 } 1052 1051 } 1053 1052 ··· 1069 1068 1070 1069 static void branch_stack__printf(struct perf_sample *sample, bool callstack) 1071 1070 { 1071 + struct branch_entry *entries = perf_sample__branch_entries(sample); 1072 1072 uint64_t i; 1073 1073 1074 1074 printf("%s: nr:%" PRIu64 "\n", ··· 1077 1075 sample->branch_stack->nr); 1078 1076 1079 1077 for (i = 0; i < sample->branch_stack->nr; i++) { 1080 - struct branch_entry *e = &sample->branch_stack->entries[i]; 1078 + struct branch_entry *e = &entries[i]; 1081 1079 1082 1080 if (!callstack) { 1083 1081 printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+28 -5
tools/perf/util/stat-display.c
··· 110 110 config->csv_sep); 111 111 break; 112 112 case AGGR_NONE: 113 - if (evsel->percore) { 113 + if (evsel->percore && !config->percore_show_thread) { 114 114 fprintf(config->output, "S%d-D%d-C%*d%s", 115 115 cpu_map__id_to_socket(id), 116 116 cpu_map__id_to_die(id), ··· 628 628 static void print_counter_aggrdata(struct perf_stat_config *config, 629 629 struct evsel *counter, int s, 630 630 char *prefix, bool metric_only, 631 - bool *first) 631 + bool *first, int cpu) 632 632 { 633 633 struct aggr_data ad; 634 634 FILE *output = config->output; ··· 654 654 fprintf(output, "%s", prefix); 655 655 656 656 uval = val * counter->scale; 657 - printout(config, id, nr, counter, uval, prefix, 657 + printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, 658 658 run, ena, 1.0, &rt_stat); 659 659 if (!metric_only) 660 660 fputc('\n', output); ··· 687 687 evlist__for_each_entry(evlist, counter) { 688 688 print_counter_aggrdata(config, counter, s, 689 689 prefix, metric_only, 690 - &first); 690 + &first, -1); 691 691 } 692 692 if (metric_only) 693 693 fputc('\n', output); ··· 1146 1146 "the same PMU. Try reorganizing the group.\n"); 1147 1147 } 1148 1148 1149 + static void print_percore_thread(struct perf_stat_config *config, 1150 + struct evsel *counter, char *prefix) 1151 + { 1152 + int s, s2, id; 1153 + bool first = true; 1154 + 1155 + for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { 1156 + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); 1157 + for (s = 0; s < config->aggr_map->nr; s++) { 1158 + id = config->aggr_map->map[s]; 1159 + if (s2 == id) 1160 + break; 1161 + } 1162 + 1163 + print_counter_aggrdata(config, counter, s, 1164 + prefix, false, 1165 + &first, i); 1166 + } 1167 + } 1168 + 1149 1169 static void print_percore(struct perf_stat_config *config, 1150 1170 struct evsel *counter, char *prefix) 1151 1171 { ··· 1177 1157 if (!(config->aggr_map || config->aggr_get_id)) 1178 1158 return; 1179 1159 1160 + if (config->percore_show_thread) 1161 + return print_percore_thread(config, counter, prefix); 1162 + 1180 1163 for (s = 0; s < config->aggr_map->nr; s++) { 1181 1164 if (prefix && metric_only) 1182 1165 fprintf(output, "%s", prefix); 1183 1166 1184 1167 print_counter_aggrdata(config, counter, s, 1185 1168 prefix, metric_only, 1186 - &first); 1169 + &first, -1); 1187 1170 } 1188 1171 1189 1172 if (metric_only)
+1 -3
tools/perf/util/stat-shadow.c
··· 777 777 } 778 778 779 779 if (!metric_events[i]) { 780 - const char *p = metric_expr; 781 - 782 - if (expr__parse(&ratio, &pctx, &p) == 0) { 780 + if (expr__parse(&ratio, &pctx, metric_expr) == 0) { 783 781 char *unit; 784 782 char metric_bf[64]; 785 783
+1
tools/perf/util/stat.h
··· 109 109 bool walltime_run_table; 110 110 bool all_kernel; 111 111 bool all_user; 112 + bool percore_show_thread; 112 113 FILE *output; 113 114 unsigned int interval; 114 115 unsigned int timeout;
+4 -2
tools/perf/util/synthetic-events.c
··· 1183 1183 1184 1184 if (type & PERF_SAMPLE_BRANCH_STACK) { 1185 1185 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 1186 - sz += sizeof(u64); 1186 + /* nr, hw_idx */ 1187 + sz += 2 * sizeof(u64); 1187 1188 result += sz; 1188 1189 } 1189 1190 ··· 1345 1344 1346 1345 if (type & PERF_SAMPLE_BRANCH_STACK) { 1347 1346 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 1348 - sz += sizeof(u64); 1347 + /* nr, hw_idx */ 1348 + sz += 2 * sizeof(u64); 1349 1349 memcpy(array, sample->branch_stack, sz); 1350 1350 array = (void *)array + sz; 1351 1351 }