Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf stat: Display event stats using aggr counts

Now aggr counts are ready for use. Convert the display routines to use
the aggr counts and update the shadow stat with them. It doesn't need
to aggregate counts or collect aliases anymore during the display. Get
rid of now unused struct perf_aggr_thread_value.

Note that there's a difference in the display order among the aggr mode.
For per-core/die/socket/node aggregation, it shows relevant events in
the same unit together, whereas global/thread/no aggregation it shows
the same events for different units together. So it still uses separate
codes to display them due to the ordering.

One more thing to note is that it breaks per-core event display for now.
The next patch will fix it to have identical output as of now.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-19-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
91f85f98 88f1d351

+49 -386
+49 -372
tools/perf/util/stat-display.c
··· 442 442 fprintf(os->fh, "%*s ", config->metric_only_len, unit); 443 443 } 444 444 445 - static int first_shadow_map_idx(struct perf_stat_config *config, 446 - struct evsel *evsel, const struct aggr_cpu_id *id) 447 - { 448 - struct perf_cpu_map *cpus = evsel__cpus(evsel); 449 - struct perf_cpu cpu; 450 - int idx; 451 - 452 - if (config->aggr_mode == AGGR_NONE) 453 - return perf_cpu_map__idx(cpus, id->cpu); 454 - 455 - if (config->aggr_mode == AGGR_THREAD) 456 - return id->thread_idx; 457 - 458 - if (!config->aggr_get_id) 459 - return 0; 460 - 461 - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 462 - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); 463 - 464 - if (aggr_cpu_id__equal(&cpu_id, id)) 465 - return idx; 466 - } 467 - return 0; 468 - } 469 - 470 445 static void abs_printout(struct perf_stat_config *config, 471 446 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) 472 447 { ··· 512 537 static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, 513 538 struct evsel *counter, double uval, 514 539 char *prefix, u64 run, u64 ena, double noise, 515 - struct runtime_stat *st) 540 + struct runtime_stat *st, int map_idx) 516 541 { 517 542 struct perf_stat_output_ctx out; 518 543 struct outstate os = { ··· 623 648 print_running(config, run, ena); 624 649 } 625 650 626 - perf_stat__print_shadow_stats(config, counter, uval, 627 - first_shadow_map_idx(config, counter, &id), 651 + perf_stat__print_shadow_stats(config, counter, uval, map_idx, 628 652 &out, &config->metric_events, st); 629 653 if (!config->csv_output && !config->metric_only && !config->json_output) { 630 654 print_noise(config, counter, noise); 631 655 print_running(config, run, ena); 632 - } 633 - } 634 - 635 - static void aggr_update_shadow(struct perf_stat_config *config, 636 - struct evlist *evlist) 637 - { 638 - int idx, s; 639 - struct perf_cpu cpu; 640 - struct aggr_cpu_id s2, id; 641 - u64 val; 642 - struct evsel *counter; 643 - struct perf_cpu_map *cpus; 644 - 645 - for (s = 0; s < config->aggr_map->nr; s++) { 646 - id = config->aggr_map->map[s]; 647 - evlist__for_each_entry(evlist, counter) { 648 - cpus = evsel__cpus(counter); 649 - val = 0; 650 - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 651 - s2 = config->aggr_get_id(config, cpu); 652 - if (!aggr_cpu_id__equal(&s2, &id)) 653 - continue; 654 - val += perf_counts(counter->counts, idx, 0)->val; 655 - } 656 - perf_stat__update_shadow_stats(counter, val, 657 - first_shadow_map_idx(config, counter, &id), 658 - &rt_stat); 659 - } 660 656 } 661 657 } 662 658 ··· 667 721 counter->uniquified_name = true; 668 722 } 669 723 670 - static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter, 671 - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, 672 - bool first), 673 - void *data) 724 + static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) 674 725 { 675 - struct evlist *evlist = counter->evlist; 676 - struct evsel *alias; 677 - 678 - alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); 679 - list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { 680 - /* Merge events with the same name, etc. but on different PMUs. */ 681 - if (!strcmp(evsel__name(alias), evsel__name(counter)) && 682 - alias->scale == counter->scale && 683 - alias->cgrp == counter->cgrp && 684 - !strcmp(alias->unit, counter->unit) && 685 - evsel__is_clock(alias) == evsel__is_clock(counter) && 686 - strcmp(alias->pmu_name, counter->pmu_name)) { 687 - alias->merged_stat = true; 688 - cb(config, alias, data, false); 689 - } 690 - } 726 + return evsel__is_hybrid(evsel) && !config->hybrid_merge; 691 727 } 692 728 693 - static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, 694 - bool check) 729 + static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) 695 730 { 696 - if (evsel__is_hybrid(counter)) { 697 - if (check) 698 - return config->hybrid_merge; 699 - else 700 - return !config->hybrid_merge; 701 - } 702 - 703 - return false; 704 - } 705 - 706 - static bool collect_data(struct perf_stat_config *config, struct evsel *counter, 707 - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, 708 - bool first), 709 - void *data) 710 - { 711 - if (counter->merged_stat) 712 - return false; 713 - cb(config, counter, data, true); 714 - if (config->no_merge || hybrid_merge(counter, config, false)) 731 + if (config->no_merge || hybrid_uniquify(counter, config)) 715 732 uniquify_event_name(counter); 716 - else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) 717 - collect_all_aliases(config, counter, cb, data); 718 - return true; 719 - } 720 - 721 - struct aggr_data { 722 - u64 ena, run, val; 723 - struct aggr_cpu_id id; 724 - int nr; 725 - int cpu_map_idx; 726 - }; 727 - 728 - static void aggr_cb(struct perf_stat_config *config, 729 - struct evsel *counter, void *data, bool first) 730 - { 731 - struct aggr_data *ad = data; 732 - int idx; 733 - struct perf_cpu cpu; 734 - struct perf_cpu_map *cpus; 735 - struct aggr_cpu_id s2; 736 - 737 - cpus = evsel__cpus(counter); 738 - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 739 - struct perf_counts_values *counts; 740 - 741 - s2 = config->aggr_get_id(config, cpu); 742 - if (!aggr_cpu_id__equal(&s2, &ad->id)) 743 - continue; 744 - if (first) 745 - ad->nr++; 746 - counts = perf_counts(counter->counts, idx, 0); 747 - /* 748 - * When any result is bad, make them all to give 749 - * consistent output in interval mode. 750 - */ 751 - if (counts->ena == 0 || counts->run == 0 || 752 - counter->counts->scaled == -1) { 753 - ad->ena = 0; 754 - ad->run = 0; 755 - break; 756 - } 757 - ad->val += counts->val; 758 - ad->ena += counts->ena; 759 - ad->run += counts->run; 760 - } 761 733 } 762 734 763 735 static void print_counter_aggrdata(struct perf_stat_config *config, 764 736 struct evsel *counter, int s, 765 737 char *prefix, bool metric_only, 766 - bool *first, struct perf_cpu cpu) 738 + bool *first) 767 739 { 768 - struct aggr_data ad; 769 740 FILE *output = config->output; 770 741 u64 ena, run, val; 771 - int nr; 772 - struct aggr_cpu_id id; 773 742 double uval; 743 + struct perf_stat_evsel *ps = counter->stats; 744 + struct perf_stat_aggr *aggr = &ps->aggr[s]; 745 + struct aggr_cpu_id id = config->aggr_map->map[s]; 746 + double avg = aggr->counts.val; 774 747 775 - ad.id = id = config->aggr_map->map[s]; 776 - ad.val = ad.ena = ad.run = 0; 777 - ad.nr = 0; 778 - if (!collect_data(config, counter, aggr_cb, &ad)) 748 + if (counter->supported && aggr->nr == 0) 779 749 return; 780 750 781 - if (perf_pmu__has_hybrid() && ad.ena == 0) 782 - return; 751 + uniquify_counter(config, counter); 783 752 784 - nr = ad.nr; 785 - ena = ad.ena; 786 - run = ad.run; 787 - val = ad.val; 753 + val = aggr->counts.val; 754 + ena = aggr->counts.ena; 755 + run = aggr->counts.run; 756 + 788 757 if (*first && metric_only) { 789 758 *first = false; 790 - aggr_printout(config, counter, id, nr); 759 + aggr_printout(config, counter, id, aggr->nr); 791 760 } 792 761 if (prefix && !metric_only) 793 762 fprintf(output, "%s", prefix); 794 763 795 764 uval = val * counter->scale; 796 - if (cpu.cpu != -1) 797 - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); 798 765 799 - printout(config, id, nr, counter, uval, 800 - prefix, run, ena, 1.0, &rt_stat); 766 + printout(config, id, aggr->nr, counter, uval, 767 + prefix, run, ena, avg, &rt_stat, s); 768 + 801 769 if (!metric_only) 802 770 fputc('\n', output); 803 771 } ··· 729 869 if (!config->aggr_map || !config->aggr_get_id) 730 870 return; 731 871 732 - aggr_update_shadow(config, evlist); 733 - 734 872 /* 735 873 * With metric_only everything is on a single line. 736 874 * Without each counter has its own line. ··· 739 881 740 882 first = true; 741 883 evlist__for_each_entry(evlist, counter) { 884 + if (counter->merged_stat) 885 + continue; 886 + 742 887 print_counter_aggrdata(config, counter, s, 743 - prefix, metric_only, 744 - &first, (struct perf_cpu){ .cpu = -1 }); 888 + prefix, metric_only, 889 + &first); 745 890 } 746 891 if (metric_only) 747 892 fputc('\n', output); 748 893 } 749 894 } 750 895 751 - static int cmp_val(const void *a, const void *b) 752 - { 753 - return ((struct perf_aggr_thread_value *)b)->val - 754 - ((struct perf_aggr_thread_value *)a)->val; 755 - } 756 - 757 - static struct perf_aggr_thread_value *sort_aggr_thread( 758 - struct evsel *counter, 759 - int *ret, 760 - struct target *_target) 761 - { 762 - int nthreads = perf_thread_map__nr(counter->core.threads); 763 - int i = 0; 764 - double uval; 765 - struct perf_aggr_thread_value *buf; 766 - 767 - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 768 - if (!buf) 769 - return NULL; 770 - 771 - for (int thread = 0; thread < nthreads; thread++) { 772 - int idx; 773 - u64 ena = 0, run = 0, val = 0; 774 - 775 - perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { 776 - struct perf_counts_values *counts = 777 - perf_counts(counter->counts, idx, thread); 778 - 779 - val += counts->val; 780 - ena += counts->ena; 781 - run += counts->run; 782 - } 783 - 784 - uval = val * counter->scale; 785 - 786 - /* 787 - * Skip value 0 when enabling --per-thread globally, 788 - * otherwise too many 0 output. 789 - */ 790 - if (uval == 0.0 && target__has_per_thread(_target)) 791 - continue; 792 - 793 - buf[i].counter = counter; 794 - buf[i].id = aggr_cpu_id__empty(); 795 - buf[i].id.thread_idx = thread; 796 - buf[i].uval = uval; 797 - buf[i].val = val; 798 - buf[i].run = run; 799 - buf[i].ena = ena; 800 - i++; 801 - } 802 - 803 - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 804 - 805 - if (ret) 806 - *ret = i; 807 - 808 - return buf; 809 - } 810 - 811 - static void print_aggr_thread(struct perf_stat_config *config, 812 - struct target *_target, 813 - struct evsel *counter, char *prefix) 814 - { 815 - FILE *output = config->output; 816 - int thread, sorted_threads; 817 - struct aggr_cpu_id id; 818 - struct perf_aggr_thread_value *buf; 819 - 820 - buf = sort_aggr_thread(counter, &sorted_threads, _target); 821 - if (!buf) { 822 - perror("cannot sort aggr thread"); 823 - return; 824 - } 825 - 826 - for (thread = 0; thread < sorted_threads; thread++) { 827 - if (prefix) 828 - fprintf(output, "%s", prefix); 829 - 830 - id = buf[thread].id; 831 - printout(config, id, 0, buf[thread].counter, buf[thread].uval, 832 - prefix, buf[thread].run, buf[thread].ena, 1.0, 833 - &rt_stat); 834 - fputc('\n', output); 835 - } 836 - 837 - free(buf); 838 - } 839 - 840 - struct caggr_data { 841 - double avg, avg_enabled, avg_running; 842 - }; 843 - 844 - static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, 845 - struct evsel *counter, void *data, 846 - bool first __maybe_unused) 847 - { 848 - struct caggr_data *cd = data; 849 - struct perf_counts_values *aggr = &counter->counts->aggr; 850 - 851 - cd->avg += aggr->val; 852 - cd->avg_enabled += aggr->ena; 853 - cd->avg_running += aggr->run; 854 - } 855 - 856 - /* 857 - * Print out the results of a single counter: 858 - * aggregated counts in system-wide mode 859 - */ 860 - static void print_counter_aggr(struct perf_stat_config *config, 861 - struct evsel *counter, char *prefix) 862 - { 863 - bool metric_only = config->metric_only; 864 - FILE *output = config->output; 865 - double uval; 866 - struct caggr_data cd = { .avg = 0.0 }; 867 - 868 - if (!collect_data(config, counter, counter_aggr_cb, &cd)) 869 - return; 870 - 871 - if (prefix && !metric_only) 872 - fprintf(output, "%s", prefix); 873 - 874 - uval = cd.avg * counter->scale; 875 - printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, 876 - cd.avg_enabled, cd.avg, &rt_stat); 877 - if (!metric_only) 878 - fprintf(output, "\n"); 879 - } 880 - 881 - static void counter_cb(struct perf_stat_config *config __maybe_unused, 882 - struct evsel *counter, void *data, 883 - bool first __maybe_unused) 884 - { 885 - struct aggr_data *ad = data; 886 - 887 - ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; 888 - ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; 889 - ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; 890 - } 891 - 892 - /* 893 - * Print out the results of a single counter: 894 - * does not use aggregated count in system-wide 895 - */ 896 896 static void print_counter(struct perf_stat_config *config, 897 897 struct evsel *counter, char *prefix) 898 898 { 899 - FILE *output = config->output; 900 - u64 ena, run, val; 901 - double uval; 902 - int idx; 903 - struct perf_cpu cpu; 904 - struct aggr_cpu_id id; 899 + bool metric_only = config->metric_only; 900 + bool first = false; 901 + int s; 905 902 906 - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { 907 - struct aggr_data ad = { .cpu_map_idx = idx }; 903 + /* AGGR_THREAD doesn't have config->aggr_get_id */ 904 + if (!config->aggr_map) 905 + return; 908 906 909 - if (!collect_data(config, counter, counter_cb, &ad)) 910 - return; 911 - val = ad.val; 912 - ena = ad.ena; 913 - run = ad.run; 907 + if (counter->merged_stat) 908 + return; 914 909 915 - if (prefix) 916 - fprintf(output, "%s", prefix); 917 - 918 - uval = val * counter->scale; 919 - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); 920 - printout(config, id, 0, counter, uval, prefix, 921 - run, ena, 1.0, &rt_stat); 922 - 923 - fputc('\n', output); 910 + for (s = 0; s < config->aggr_map->nr; s++) { 911 + print_counter_aggrdata(config, counter, s, 912 + prefix, metric_only, 913 + &first); 924 914 } 925 915 } 926 916 ··· 787 1081 u64 ena, run, val; 788 1082 double uval; 789 1083 struct aggr_cpu_id id; 1084 + struct perf_stat_evsel *ps = counter->stats; 790 1085 int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); 791 1086 792 1087 if (counter_idx < 0) ··· 800 1093 aggr_printout(config, counter, id, 0); 801 1094 first = false; 802 1095 } 803 - val = perf_counts(counter->counts, counter_idx, 0)->val; 804 - ena = perf_counts(counter->counts, counter_idx, 0)->ena; 805 - run = perf_counts(counter->counts, counter_idx, 0)->run; 1096 + val = ps->aggr[counter_idx].counts.val; 1097 + ena = ps->aggr[counter_idx].counts.ena; 1098 + run = ps->aggr[counter_idx].counts.run; 806 1099 807 1100 uval = val * counter->scale; 808 1101 printout(config, id, 0, counter, uval, prefix, 809 - run, ena, 1.0, &rt_stat); 1102 + run, ena, 1.0, &rt_stat, counter_idx); 810 1103 } 811 1104 if (!first) 812 1105 fputc('\n', config->output); ··· 842 1135 }; 843 1136 bool first = true; 844 1137 845 - if (config->json_output && !config->interval) 846 - fprintf(config->output, "{"); 1138 + if (config->json_output && !config->interval) 1139 + fprintf(config->output, "{"); 847 1140 848 1141 if (prefix && !config->json_output) 849 1142 fprintf(config->output, "%s", prefix); ··· 1086 1379 "the same PMU. Try reorganizing the group.\n"); 1087 1380 } 1088 1381 1089 - static void print_percore_thread(struct perf_stat_config *config, 1090 - struct evsel *counter, char *prefix) 1091 - { 1092 - int s; 1093 - struct aggr_cpu_id s2, id; 1094 - struct perf_cpu_map *cpus; 1095 - bool first = true; 1096 - int idx; 1097 - struct perf_cpu cpu; 1098 - 1099 - cpus = evsel__cpus(counter); 1100 - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 1101 - s2 = config->aggr_get_id(config, cpu); 1102 - for (s = 0; s < config->aggr_map->nr; s++) { 1103 - id = config->aggr_map->map[s]; 1104 - if (aggr_cpu_id__equal(&s2, &id)) 1105 - break; 1106 - } 1107 - 1108 - print_counter_aggrdata(config, counter, s, 1109 - prefix, false, 1110 - &first, cpu); 1111 - } 1112 - } 1113 - 1114 1382 static void print_percore(struct perf_stat_config *config, 1115 1383 struct evsel *counter, char *prefix) 1116 1384 { ··· 1098 1416 return; 1099 1417 1100 1418 if (config->percore_show_thread) 1101 - return print_percore_thread(config, counter, prefix); 1419 + return print_counter(config, counter, prefix); 1102 1420 1103 1421 for (s = 0; s < config->aggr_map->nr; s++) { 1104 1422 if (prefix && metric_only) 1105 1423 fprintf(output, "%s", prefix); 1106 1424 1107 1425 print_counter_aggrdata(config, counter, s, 1108 - prefix, metric_only, 1109 - &first, (struct perf_cpu){ .cpu = -1 }); 1426 + prefix, metric_only, &first); 1110 1427 } 1111 1428 1112 1429 if (metric_only) ··· 1150 1469 print_aggr(config, evlist, prefix); 1151 1470 break; 1152 1471 case AGGR_THREAD: 1153 - evlist__for_each_entry(evlist, counter) { 1154 - print_aggr_thread(config, _target, counter, prefix); 1155 - } 1156 - break; 1157 1472 case AGGR_GLOBAL: 1158 1473 if (config->iostat_run) 1159 1474 iostat_print_counters(evlist, config, ts, prefix = buf, 1160 - print_counter_aggr); 1475 + print_counter); 1161 1476 else { 1162 1477 evlist__for_each_entry(evlist, counter) { 1163 - print_counter_aggr(config, counter, prefix); 1478 + print_counter(config, counter, prefix); 1164 1479 } 1165 1480 if (metric_only) 1166 1481 fputc('\n', config->output);
-5
tools/perf/util/stat.c
··· 565 565 evsel__name(counter), count[0], count[1], count[2]); 566 566 } 567 567 568 - /* 569 - * Save the full runtime - to allow normalization during printout: 570 - */ 571 - perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); 572 - 573 568 return 0; 574 569 } 575 570
-9
tools/perf/util/stat.h
··· 224 224 struct evsel; 225 225 struct evlist; 226 226 227 - struct perf_aggr_thread_value { 228 - struct evsel *counter; 229 - struct aggr_cpu_id id; 230 - double uval; 231 - u64 val; 232 - u64 run; 233 - u64 ena; 234 - }; 235 - 236 227 bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); 237 228 238 229 #define perf_stat_evsel__is(evsel, id) \