Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Replace CTRL+z with 'f' as hotkey for enable/disable events (Arnaldo Carvalho de Melo)

- Do not exit when 'f' is pressed in 'report' mode (Arnaldo Carvalho de Melo)

- Tell the user how to unfreeze events after pressing 'f' in 'perf top' (Arnaldo Carvalho de Melo)

- React to unassigned hotkey pressing in 'top/report' (Arnaldo Carvalho de Melo)

- Display total number of samples with --show-total-period in 'annotate' (Martin Liška)

- Add timeout to make procfs mmap processing more robust (Kan Liang)

- Fix sort__sym_cmp to also compare end of symbol (Yannick Brosseau)

Infrastructure changes:

- Ensure thread-stack is flushed (Adrian Hunter)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+278 -81
+4
include/uapi/linux/perf_event.h
··· 566 566 #define PERF_RECORD_MISC_GUEST_USER (5 << 0) 567 567 568 568 /* 569 + * Indicates that /proc/PID/maps parsing are truncated by time out. 570 + */ 571 + #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) 572 + /* 569 573 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 570 574 * different events so can reuse the same bit position. 571 575 */
+6
tools/perf/Documentation/perf-kvm.txt
··· 151 151 Show events other than HLT (x86 only) or Wait state (s390 only) 152 152 that take longer than duration usecs. 153 153 154 + --proc-map-timeout:: 155 + When processing pre-existing threads /proc/XXX/mmap, it may take 156 + a long time, because the file may be huge. A time out is needed 157 + in such cases. 158 + This option sets the time out limit. The default value is 500 ms. 159 + 154 160 SEE ALSO 155 161 -------- 156 162 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
+5
tools/perf/Documentation/perf-record.txt
··· 271 271 snapshot can be specified. In Snapshot Mode, trace data is captured only when 272 272 signal SIGUSR2 is received. 273 273 274 + --proc-map-timeout:: 275 + When processing pre-existing threads /proc/XXX/mmap, it may take a long time, 276 + because the file may be huge. A time out is needed in such cases. 277 + This option sets the time out limit. The default value is 500 ms. 278 + 274 279 SEE ALSO 275 280 -------- 276 281 linkperf:perf-stat[1], linkperf:perf-list[1]
+6
tools/perf/Documentation/perf-top.txt
··· 201 201 Force each column width to the provided list, for large terminal 202 202 readability. 0 means no limit (default behavior). 203 203 204 + --proc-map-timeout:: 205 + When processing pre-existing threads /proc/XXX/mmap, it may take 206 + a long time, because the file may be huge. A time out is needed 207 + in such cases. 208 + This option sets the time out limit. The default value is 500 ms. 209 + 204 210 205 211 INTERACTIVE PROMPTING KEYS 206 212 --------------------------
+5
tools/perf/Documentation/perf-trace.txt
··· 121 121 --event:: 122 122 Trace other events, see 'perf list' for a complete list. 123 123 124 + --proc-map-timeout:: 125 + When processing pre-existing threads /proc/XXX/mmap, it may take a long time, 126 + because the file may be huge. A time out is needed in such cases. 127 + This option sets the time out limit. The default value is 500 ms. 128 + 124 129 PAGEFAULTS 125 130 ---------- 126 131
+2
tools/perf/builtin-annotate.c
··· 329 329 "objdump binary to use for disassembly and annotations"), 330 330 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 331 331 "Show event group information together"), 332 + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 333 + "Show a column with the sum of periods"), 332 334 OPT_END() 333 335 }; 334 336 int ret = hists__init();
+4 -1
tools/perf/builtin-kvm.c
··· 1311 1311 "show events other than" 1312 1312 " HLT (x86 only) or Wait state (s390 only)" 1313 1313 " that take longer than duration usecs"), 1314 + OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout, 1315 + "per thread proc mmap processing timeout in ms"), 1314 1316 OPT_END() 1315 1317 }; 1316 1318 const char * const live_usage[] = { ··· 1340 1338 kvm->opts.target.uses_mmap = false; 1341 1339 kvm->opts.target.uid_str = NULL; 1342 1340 kvm->opts.target.uid = UINT_MAX; 1341 + kvm->opts.proc_map_timeout = 500; 1343 1342 1344 1343 symbol__init(NULL); 1345 1344 disable_buildid_cache(); ··· 1396 1393 perf_session__set_id_hdr_size(kvm->session); 1397 1394 ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); 1398 1395 machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, 1399 - kvm->evlist->threads, false); 1396 + kvm->evlist->threads, false, kvm->opts.proc_map_timeout); 1400 1397 err = kvm_live_open_events(kvm); 1401 1398 if (err) 1402 1399 goto out;
+5 -1
tools/perf/builtin-record.c
··· 598 598 } 599 599 600 600 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 601 - process_synthesized_event, opts->sample_address); 601 + process_synthesized_event, opts->sample_address, 602 + opts->proc_map_timeout); 602 603 if (err != 0) 603 604 goto out_child; 604 605 ··· 960 959 .uses_mmap = true, 961 960 .default_per_cpu = true, 962 961 }, 962 + .proc_map_timeout = 500, 963 963 }, 964 964 .tool = { 965 965 .sample = process_sample_event, ··· 1068 1066 parse_clockid), 1069 1067 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1070 1068 "opts", "AUX area tracing Snapshot Mode", ""), 1069 + OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1070 + "per thread proc mmap processing timeout in ms"), 1071 1071 OPT_END() 1072 1072 }; 1073 1073
+12 -3
tools/perf/builtin-top.c
··· 591 591 top->min_percent, 592 592 &top->session->header.env); 593 593 594 - if (key != CTRL('z')) 594 + if (key != 'f') 595 595 break; 596 596 597 597 perf_evlist__toggle_enable(top->evlist); ··· 599 599 * No need to refresh, resort/decay histogram entries 600 600 * if we are not collecting samples: 601 601 */ 602 - hbt.refresh = top->evlist->enabled ? top->delay_secs : 0; 602 + if (top->evlist->enabled) { 603 + hbt.refresh = top->delay_secs; 604 + help = "Press 'f' to disable the events or 'h' to see other hotkeys"; 605 + } else { 606 + help = "Press 'f' again to re-enable the events"; 607 + hbt.refresh = 0; 608 + } 603 609 } 604 610 605 611 done = 1; ··· 977 971 goto out_delete; 978 972 979 973 machine__synthesize_threads(&top->session->machines.host, &opts->target, 980 - top->evlist->threads, false); 974 + top->evlist->threads, false, opts->proc_map_timeout); 981 975 ret = perf_top__start_counters(top); 982 976 if (ret) 983 977 goto out_delete; ··· 1087 1081 .target = { 1088 1082 .uses_mmap = true, 1089 1083 }, 1084 + .proc_map_timeout = 500, 1090 1085 }, 1091 1086 .max_stack = PERF_MAX_STACK_DEPTH, 1092 1087 .sym_pcnt_filter = 5, ··· 1187 1180 OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, 1188 1181 "width[,width...]", 1189 1182 "don't try to adjust column width, use these fixed values"), 1183 + OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, 1184 + "per thread proc mmap processing timeout in ms"), 1190 1185 OPT_END() 1191 1186 }; 1192 1187 const char * const top_usage[] = {
+5 -1
tools/perf/builtin-trace.c
··· 1518 1518 return -ENOMEM; 1519 1519 1520 1520 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1521 - evlist->threads, trace__tool_process, false); 1521 + evlist->threads, trace__tool_process, false, 1522 + trace->opts.proc_map_timeout); 1522 1523 if (err) 1523 1524 symbol__exit(); 1524 1525 ··· 2748 2747 .user_interval = ULLONG_MAX, 2749 2748 .no_buffering = true, 2750 2749 .mmap_pages = UINT_MAX, 2750 + .proc_map_timeout = 500, 2751 2751 }, 2752 2752 .output = stdout, 2753 2753 .show_comm = true, ··· 2798 2796 "Trace pagefaults", parse_pagefaults, "maj"), 2799 2797 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2800 2798 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2799 + OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2800 + "per thread proc mmap processing timeout in ms"), 2801 2801 OPT_END() 2802 2802 }; 2803 2803 const char * const trace_subcommands[] = { "record", NULL };
+1
tools/perf/perf.h
··· 69 69 unsigned initial_delay; 70 70 bool use_clockid; 71 71 clockid_t clockid; 72 + unsigned int proc_map_timeout; 72 73 }; 73 74 74 75 struct option;
+1 -1
tools/perf/tests/code-reading.c
··· 451 451 } 452 452 453 453 ret = perf_event__synthesize_thread_map(NULL, threads, 454 - perf_event__process, machine, false); 454 + perf_event__process, machine, false, 500); 455 455 if (ret < 0) { 456 456 pr_debug("perf_event__synthesize_thread_map failed\n"); 457 457 goto out_err;
+1 -1
tools/perf/tests/dwarf-unwind.c
··· 28 28 pid_t pid = getpid(); 29 29 30 30 return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, 31 - mmap_handler, machine, true); 31 + mmap_handler, machine, true, 500); 32 32 } 33 33 34 34 #define MAX_STACK 8
+2 -2
tools/perf/tests/mmap-thread-lookup.c
··· 129 129 { 130 130 return perf_event__synthesize_threads(NULL, 131 131 perf_event__process, 132 - machine, 0); 132 + machine, 0, 500); 133 133 } 134 134 135 135 static int synth_process(struct machine *machine) ··· 141 141 142 142 err = perf_event__synthesize_thread_map(NULL, map, 143 143 perf_event__process, 144 - machine, 0); 144 + machine, 0, 500); 145 145 146 146 thread_map__delete(map); 147 147 return err;
+43 -17
tools/perf/ui/browsers/annotate.c
··· 11 11 #include "../../util/evsel.h" 12 12 #include <pthread.h> 13 13 14 + struct disasm_line_samples { 15 + double percent; 16 + u64 nr; 17 + }; 18 + 14 19 struct browser_disasm_line { 15 - struct rb_node rb_node; 16 - u32 idx; 17 - int idx_asm; 18 - int jump_sources; 20 + struct rb_node rb_node; 21 + u32 idx; 22 + int idx_asm; 23 + int jump_sources; 19 24 /* 20 25 * actual length of this array is saved on the nr_events field 21 26 * of the struct annotate_browser 22 27 */ 23 - double percent[1]; 28 + struct disasm_line_samples samples[1]; 24 29 }; 25 30 26 31 static struct annotate_browser_opt { ··· 33 28 use_offset, 34 29 jump_arrows, 35 30 show_linenr, 36 - show_nr_jumps; 31 + show_nr_jumps, 32 + show_total_period; 37 33 } annotate_browser__opts = { 38 34 .use_offset = true, 39 35 .jump_arrows = true, ··· 111 105 char bf[256]; 112 106 113 107 for (i = 0; i < ab->nr_events; i++) { 114 - if (bdl->percent[i] > percent_max) 115 - percent_max = bdl->percent[i]; 108 + if (bdl->samples[i].percent > percent_max) 109 + percent_max = bdl->samples[i].percent; 116 110 } 117 111 118 112 if (dl->offset != -1 && percent_max != 0.0) { 119 113 for (i = 0; i < ab->nr_events; i++) { 120 - ui_browser__set_percent_color(browser, bdl->percent[i], 114 + ui_browser__set_percent_color(browser, 115 + bdl->samples[i].percent, 121 116 current_entry); 122 - slsmg_printf("%6.2f ", bdl->percent[i]); 117 + if (annotate_browser__opts.show_total_period) 118 + slsmg_printf("%6" PRIu64 " ", 119 + bdl->samples[i].nr); 120 + else 121 + slsmg_printf("%6.2f ", bdl->samples[i].percent); 123 122 } 124 123 } else { 125 124 ui_browser__set_percent_color(browser, 0, current_entry); ··· 284 273 int i; 285 274 286 275 for (i = 0; i < nr_pcnt; i++) { 287 - if (a->percent[i] == b->percent[i]) 276 + if (a->samples[i].percent == b->samples[i].percent) 288 277 continue; 289 - return a->percent[i] < b->percent[i]; 278 + return a->samples[i].percent < b->samples[i].percent; 290 279 } 291 280 return 0; 292 281 } ··· 377 366 next = disasm__get_next_ip_line(&notes->src->source, pos); 378 367 379 368 for (i = 0; i < browser->nr_events; i++) { 380 - bpos->percent[i] = disasm__calc_percent(notes, 369 + u64 nr_samples; 370 + 371 + bpos->samples[i].percent = disasm__calc_percent(notes, 381 372 evsel->idx + i, 382 373 pos->offset, 383 374 next ? next->offset : len, 384 - &path); 375 + &path, &nr_samples); 376 + bpos->samples[i].nr = nr_samples; 385 377 386 - if (max_percent < bpos->percent[i]) 387 - max_percent = bpos->percent[i]; 378 + if (max_percent < bpos->samples[i].percent) 379 + max_percent = bpos->samples[i].percent; 388 380 } 389 381 390 382 if (max_percent < 0.01) { ··· 751 737 "n Search next string\n" 752 738 "o Toggle disassembler output/simplified view\n" 753 739 "s Toggle source code view\n" 740 + "t Toggle total period view\n" 754 741 "/ Search string\n" 755 742 "k Toggle line numbers\n" 756 743 "r Run available scripts\n" ··· 827 812 ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions."); 828 813 } 829 814 continue; 815 + case 't': 816 + annotate_browser__opts.show_total_period = 817 + !annotate_browser__opts.show_total_period; 818 + annotate_browser__update_addr_width(browser); 819 + continue; 830 820 case K_LEFT: 831 821 case K_ESC: 832 822 case 'q': ··· 852 832 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, 853 833 struct hist_browser_timer *hbt) 854 834 { 835 + /* Set default value for show_total_period. */ 836 + annotate_browser__opts.show_total_period = 837 + symbol_conf.show_total_period; 838 + 855 839 return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); 856 840 } 857 841 ··· 953 929 954 930 if (perf_evsel__is_group_event(evsel)) { 955 931 nr_pcnt = evsel->nr_members; 956 - sizeof_bdl += sizeof(double) * (nr_pcnt - 1); 932 + sizeof_bdl += sizeof(struct disasm_line_samples) * 933 + (nr_pcnt - 1); 957 934 } 958 935 959 936 if (symbol__annotate(sym, map, sizeof_bdl) < 0) { ··· 1031 1006 ANNOTATE_CFG(show_linenr), 1032 1007 ANNOTATE_CFG(show_nr_jumps), 1033 1008 ANNOTATE_CFG(use_offset), 1009 + ANNOTATE_CFG(show_total_period), 1034 1010 }; 1035 1011 1036 1012 #undef ANNOTATE_CFG
+9 -6
tools/perf/ui/browsers/hists.c
··· 424 424 "Or reduce the sampling frequency."); 425 425 } 426 426 427 - static int hist_browser__run(struct hist_browser *browser) 427 + static int hist_browser__run(struct hist_browser *browser, const char *help) 428 428 { 429 429 int key; 430 430 char title[160]; ··· 436 436 437 437 hists__browser_title(browser->hists, hbt, title, sizeof(title)); 438 438 439 - if (ui_browser__show(&browser->b, title, 440 - "Press '?' for help on key bindings") < 0) 439 + if (ui_browser__show(&browser->b, title, help) < 0) 441 440 return -1; 442 441 443 442 while (1) { ··· 1735 1736 "t Zoom into current Thread\n" 1736 1737 "V Verbose (DSO names in callchains, etc)\n" 1737 1738 "z Toggle zeroing of samples\n" 1738 - "CTRL+z Enable/Disable events\n" 1739 + "f Enable/Disable events\n" 1739 1740 "/ Filter symbol by name"; 1740 1741 1741 1742 if (browser == NULL) ··· 1772 1773 1773 1774 nr_options = 0; 1774 1775 1775 - key = hist_browser__run(browser); 1776 + key = hist_browser__run(browser, helpline); 1776 1777 1777 1778 if (browser->he_selection != NULL) { 1778 1779 thread = hist_browser__selected_thread(browser); ··· 1900 1901 /* Fall thru */ 1901 1902 case 'q': 1902 1903 case CTRL('c'): 1903 - case CTRL('z'): 1904 1904 goto out_free_stack; 1905 + case 'f': 1906 + if (!is_report_browser(hbt)) 1907 + goto out_free_stack; 1908 + /* Fall thru */ 1905 1909 default: 1910 + helpline = "Press '?' for help on key bindings"; 1906 1911 continue; 1907 1912 } 1908 1913
+35 -17
tools/perf/util/annotate.c
··· 654 654 } 655 655 656 656 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, 657 - s64 end, const char **path) 657 + s64 end, const char **path, u64 *nr_samples) 658 658 { 659 659 struct source_line *src_line = notes->src->lines; 660 660 double percent = 0.0; 661 + *nr_samples = 0; 661 662 662 663 if (src_line) { 663 664 size_t sizeof_src_line = sizeof(*src_line) + 664 - sizeof(src_line->p) * (src_line->nr_pcnt - 1); 665 + sizeof(src_line->samples) * (src_line->nr_pcnt - 1); 665 666 666 667 while (offset < end) { 667 668 src_line = (void *)notes->src->lines + ··· 671 670 if (*path == NULL) 672 671 *path = src_line->path; 673 672 674 - percent += src_line->p[evidx].percent; 673 + percent += src_line->samples[evidx].percent; 674 + *nr_samples += src_line->samples[evidx].nr; 675 675 offset++; 676 676 } 677 677 } else { ··· 682 680 while (offset < end) 683 681 hits += h->addr[offset++]; 684 682 685 - if (h->sum) 683 + if (h->sum) { 684 + *nr_samples = hits; 686 685 percent = 100.0 * hits / h->sum; 686 + } 687 687 } 688 688 689 689 return percent; ··· 700 696 701 697 if (dl->offset != -1) { 702 698 const char *path = NULL; 699 + u64 nr_samples; 703 700 double percent, max_percent = 0.0; 704 701 double *ppercents = &percent; 702 + u64 *psamples = &nr_samples; 705 703 int i, nr_percent = 1; 706 704 const char *color; 707 705 struct annotation *notes = symbol__annotation(sym); ··· 716 710 if (perf_evsel__is_group_event(evsel)) { 717 711 nr_percent = evsel->nr_members; 718 712 ppercents = calloc(nr_percent, sizeof(double)); 719 - if (ppercents == NULL) 713 + psamples = calloc(nr_percent, sizeof(u64)); 714 + if (ppercents == NULL || psamples == NULL) { 720 715 return -1; 716 + } 721 717 } 722 718 723 719 for (i = 0; i < nr_percent; i++) { ··· 727 719 notes->src->lines ? i : evsel->idx + i, 728 720 offset, 729 721 next ? next->offset : (s64) len, 730 - &path); 722 + &path, &nr_samples); 731 723 732 724 ppercents[i] = percent; 725 + psamples[i] = nr_samples; 733 726 if (percent > max_percent) 734 727 max_percent = percent; 735 728 } ··· 768 759 769 760 for (i = 0; i < nr_percent; i++) { 770 761 percent = ppercents[i]; 762 + nr_samples = psamples[i]; 771 763 color = get_percent_color(percent); 772 - color_fprintf(stdout, color, " %7.2f", percent); 764 + 765 + if (symbol_conf.show_total_period) 766 + color_fprintf(stdout, color, " %7" PRIu64, 767 + nr_samples); 768 + else 769 + color_fprintf(stdout, color, " %7.2f", percent); 773 770 } 774 771 775 772 printf(" : "); ··· 784 769 785 770 if (ppercents != &percent) 786 771 free(ppercents); 772 + 773 + if (psamples != &nr_samples) 774 + free(psamples); 787 775 788 776 } else if (max_lines && printed >= max_lines) 789 777 return 1; ··· 1121 1103 ret = strcmp(iter->path, src_line->path); 1122 1104 if (ret == 0) { 1123 1105 for (i = 0; i < src_line->nr_pcnt; i++) 1124 - iter->p[i].percent_sum += src_line->p[i].percent; 1106 + iter->samples[i].percent_sum += src_line->samples[i].percent; 1125 1107 return; 1126 1108 } 1127 1109 ··· 1132 1114 } 1133 1115 1134 1116 for (i = 0; i < src_line->nr_pcnt; i++) 1135 - src_line->p[i].percent_sum = src_line->p[i].percent; 1117 + src_line->samples[i].percent_sum = src_line->samples[i].percent; 1136 1118 1137 1119 rb_link_node(&src_line->node, parent, p); 1138 1120 rb_insert_color(&src_line->node, root); ··· 1143 1125 int i; 1144 1126 1145 1127 for (i = 0; i < a->nr_pcnt; i++) { 1146 - if (a->p[i].percent_sum == b->p[i].percent_sum) 1128 + if (a->samples[i].percent_sum == b->samples[i].percent_sum) 1147 1129 continue; 1148 - return a->p[i].percent_sum > b->p[i].percent_sum; 1130 + return a->samples[i].percent_sum > b->samples[i].percent_sum; 1149 1131 } 1150 1132 1151 1133 return 0; ··· 1197 1179 int i; 1198 1180 1199 1181 sizeof_src_line = sizeof(*src_line) + 1200 - (sizeof(src_line->p) * (src_line->nr_pcnt - 1)); 1182 + (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); 1201 1183 1202 1184 for (i = 0; i < len; i++) { 1203 1185 free_srcline(src_line->path); ··· 1229 1211 h_sum += h->sum; 1230 1212 } 1231 1213 nr_pcnt = evsel->nr_members; 1232 - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p); 1214 + sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); 1233 1215 } 1234 1216 1235 1217 if (!h_sum) ··· 1249 1231 1250 1232 for (k = 0; k < nr_pcnt; k++) { 1251 1233 h = annotation__histogram(notes, evidx + k); 1252 - src_line->p[k].percent = 100.0 * h->addr[i] / h->sum; 1234 + src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum; 1253 1235 1254 - if (src_line->p[k].percent > percent_max) 1255 - percent_max = src_line->p[k].percent; 1236 + if (src_line->samples[k].percent > percent_max) 1237 + percent_max = src_line->samples[k].percent; 1256 1238 } 1257 1239 1258 1240 if (percent_max <= 0.5) ··· 1292 1274 1293 1275 src_line = rb_entry(node, struct source_line, node); 1294 1276 for (i = 0; i < src_line->nr_pcnt; i++) { 1295 - percent = src_line->p[i].percent_sum; 1277 + percent = src_line->samples[i].percent_sum; 1296 1278 color = get_percent_color(percent); 1297 1279 color_fprintf(stdout, color, " %7.2f", percent); 1298 1280
+4 -3
tools/perf/util/annotate.h
··· 72 72 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); 73 73 size_t disasm__fprintf(struct list_head *head, FILE *fp); 74 74 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, 75 - s64 end, const char **path); 75 + s64 end, const char **path, u64 *nr_samples); 76 76 77 77 struct sym_hist { 78 78 u64 sum; 79 79 u64 addr[0]; 80 80 }; 81 81 82 - struct source_line_percent { 82 + struct source_line_samples { 83 83 double percent; 84 84 double percent_sum; 85 + double nr; 85 86 }; 86 87 87 88 struct source_line { 88 89 struct rb_node node; 89 90 char *path; 90 91 int nr_pcnt; 91 - struct source_line_percent p[1]; 92 + struct source_line_samples samples[1]; 92 93 }; 93 94 94 95 /** struct annotated_source - symbols with hits have this attached as in sannotation
+37 -9
tools/perf/util/event.c
··· 218 218 pid_t pid, pid_t tgid, 219 219 perf_event__handler_t process, 220 220 struct machine *machine, 221 - bool mmap_data) 221 + bool mmap_data, 222 + unsigned int proc_map_timeout) 222 223 { 223 224 char filename[PATH_MAX]; 224 225 FILE *fp; 226 + unsigned long long t; 227 + bool truncation = false; 228 + unsigned long long timeout = proc_map_timeout * 1000000ULL; 225 229 int rc = 0; 226 230 227 231 if (machine__is_default_guest(machine)) ··· 244 240 } 245 241 246 242 event->header.type = PERF_RECORD_MMAP2; 243 + t = rdclock(); 247 244 248 245 while (1) { 249 246 char bf[BUFSIZ]; ··· 257 252 258 253 if (fgets(bf, sizeof(bf), fp) == NULL) 259 254 break; 255 + 256 + if ((rdclock() - t) > timeout) { 257 + pr_warning("Reading %s time out. " 258 + "You may want to increase " 259 + "the time limit by --proc-map-timeout\n", 260 + filename); 261 + truncation = true; 262 + goto out; 263 + } 260 264 261 265 /* ensure null termination since stack will be reused. */ 262 266 strcpy(execname, ""); ··· 315 301 event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; 316 302 } 317 303 304 + out: 305 + if (truncation) 306 + event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; 307 + 318 308 if (!strcmp(execname, "")) 319 309 strcpy(execname, anonstr); 320 310 ··· 337 319 rc = -1; 338 320 break; 339 321 } 322 + 323 + if (truncation) 324 + break; 340 325 } 341 326 342 327 fclose(fp); ··· 407 386 pid_t pid, int full, 408 387 perf_event__handler_t process, 409 388 struct perf_tool *tool, 410 - struct machine *machine, bool mmap_data) 389 + struct machine *machine, 390 + bool mmap_data, 391 + unsigned int proc_map_timeout) 411 392 { 412 393 char filename[PATH_MAX]; 413 394 DIR *tasks; ··· 426 403 return -1; 427 404 428 405 return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, 429 - process, machine, mmap_data); 406 + process, machine, mmap_data, 407 + proc_map_timeout); 430 408 } 431 409 432 410 if (machine__is_default_guest(machine)) ··· 468 444 if (_pid == pid) { 469 445 /* process the parent's maps too */ 470 446 rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, 471 - process, machine, mmap_data); 447 + process, machine, mmap_data, proc_map_timeout); 472 448 if (rc) 473 449 break; 474 450 } ··· 482 458 struct thread_map *threads, 483 459 perf_event__handler_t process, 484 460 struct machine *machine, 485 - bool mmap_data) 461 + bool mmap_data, 462 + unsigned int proc_map_timeout) 486 463 { 487 464 union perf_event *comm_event, *mmap_event, *fork_event; 488 465 int err = -1, thread, j; ··· 506 481 fork_event, 507 482 threads->map[thread], 0, 508 483 process, tool, machine, 509 - mmap_data)) { 484 + mmap_data, proc_map_timeout)) { 510 485 err = -1; 511 486 break; 512 487 } ··· 532 507 fork_event, 533 508 comm_event->comm.pid, 0, 534 509 process, tool, machine, 535 - mmap_data)) { 510 + mmap_data, proc_map_timeout)) { 536 511 err = -1; 537 512 break; 538 513 } ··· 549 524 550 525 int perf_event__synthesize_threads(struct perf_tool *tool, 551 526 perf_event__handler_t process, 552 - struct machine *machine, bool mmap_data) 527 + struct machine *machine, 528 + bool mmap_data, 529 + unsigned int proc_map_timeout) 553 530 { 554 531 DIR *proc; 555 532 char proc_path[PATH_MAX]; ··· 591 564 * one thread couldn't be synthesized. 592 565 */ 593 566 __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, 594 - 1, process, tool, machine, mmap_data); 567 + 1, process, tool, machine, mmap_data, 568 + proc_map_timeout); 595 569 } 596 570 597 571 err = 0;
+7 -3
tools/perf/util/event.h
··· 265 265 u32 nr_unknown_id; 266 266 u32 nr_unprocessable_samples; 267 267 u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX]; 268 + u32 nr_proc_map_timeout; 268 269 }; 269 270 270 271 struct attr_event { ··· 384 383 int perf_event__synthesize_thread_map(struct perf_tool *tool, 385 384 struct thread_map *threads, 386 385 perf_event__handler_t process, 387 - struct machine *machine, bool mmap_data); 386 + struct machine *machine, bool mmap_data, 387 + unsigned int proc_map_timeout); 388 388 int perf_event__synthesize_threads(struct perf_tool *tool, 389 389 perf_event__handler_t process, 390 - struct machine *machine, bool mmap_data); 390 + struct machine *machine, bool mmap_data, 391 + unsigned int proc_map_timeout); 391 392 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 392 393 perf_event__handler_t process, 393 394 struct machine *machine); ··· 471 468 pid_t pid, pid_t tgid, 472 469 perf_event__handler_t process, 473 470 struct machine *machine, 474 - bool mmap_data); 471 + bool mmap_data, 472 + unsigned int proc_map_timeout); 475 473 476 474 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); 477 475 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+25 -3
tools/perf/util/machine.c
··· 1890 1890 return rc; 1891 1891 } 1892 1892 1893 + int machines__for_each_thread(struct machines *machines, 1894 + int (*fn)(struct thread *thread, void *p), 1895 + void *priv) 1896 + { 1897 + struct rb_node *nd; 1898 + int rc = 0; 1899 + 1900 + rc = machine__for_each_thread(&machines->host, fn, priv); 1901 + if (rc != 0) 1902 + return rc; 1903 + 1904 + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 1905 + struct machine *machine = rb_entry(nd, struct machine, rb_node); 1906 + 1907 + rc = machine__for_each_thread(machine, fn, priv); 1908 + if (rc != 0) 1909 + return rc; 1910 + } 1911 + return rc; 1912 + } 1913 + 1893 1914 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 1894 1915 struct target *target, struct thread_map *threads, 1895 - perf_event__handler_t process, bool data_mmap) 1916 + perf_event__handler_t process, bool data_mmap, 1917 + unsigned int proc_map_timeout) 1896 1918 { 1897 1919 if (target__has_task(target)) 1898 - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); 1920 + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); 1899 1921 else if (target__has_cpu(target)) 1900 - return perf_event__synthesize_threads(tool, process, machine, data_mmap); 1922 + return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); 1901 1923 /* command specified */ 1902 1924 return 0; 1903 1925 }
+9 -3
tools/perf/util/machine.h
··· 216 216 int machine__for_each_thread(struct machine *machine, 217 217 int (*fn)(struct thread *thread, void *p), 218 218 void *priv); 219 + int machines__for_each_thread(struct machines *machines, 220 + int (*fn)(struct thread *thread, void *p), 221 + void *priv); 219 222 220 223 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 221 224 struct target *target, struct thread_map *threads, 222 - perf_event__handler_t process, bool data_mmap); 225 + perf_event__handler_t process, bool data_mmap, 226 + unsigned int proc_map_timeout); 223 227 static inline 224 228 int machine__synthesize_threads(struct machine *machine, struct target *target, 225 - struct thread_map *threads, bool data_mmap) 229 + struct thread_map *threads, bool data_mmap, 230 + unsigned int proc_map_timeout) 226 231 { 227 232 return __machine__synthesize_threads(machine, NULL, target, threads, 228 - perf_event__process, data_mmap); 233 + perf_event__process, data_mmap, 234 + proc_map_timeout); 229 235 } 230 236 231 237 pid_t machine__get_current_tid(struct machine *machine, int cpu);
+33
tools/perf/util/session.c
··· 16 16 #include "perf_regs.h" 17 17 #include "asm/bug.h" 18 18 #include "auxtrace.h" 19 + #include "thread-stack.h" 19 20 20 21 static int perf_session__deliver_event(struct perf_session *session, 21 22 union perf_event *event, ··· 1064 1063 case PERF_RECORD_MMAP: 1065 1064 return tool->mmap(tool, event, sample, machine); 1066 1065 case PERF_RECORD_MMAP2: 1066 + if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT) 1067 + ++evlist->stats.nr_proc_map_timeout; 1067 1068 return tool->mmap2(tool, event, sample, machine); 1068 1069 case PERF_RECORD_COMM: 1069 1070 return tool->comm(tool, event, sample, machine); ··· 1362 1359 ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); 1363 1360 1364 1361 events_stats__auxtrace_error_warn(stats); 1362 + 1363 + if (stats->nr_proc_map_timeout != 0) { 1364 + ui__warning("%d map information files for pre-existing threads were\n" 1365 + "not processed, if there are samples for addresses they\n" 1366 + "will not be resolved, you may find out which are these\n" 1367 + "threads by running with -v and redirecting the output\n" 1368 + "to a file.\n" 1369 + "The time limit to process proc map is too short?\n" 1370 + "Increase it by --proc-map-timeout\n", 1371 + stats->nr_proc_map_timeout); 1372 + } 1373 + } 1374 + 1375 + static int perf_session__flush_thread_stack(struct thread *thread, 1376 + void *p __maybe_unused) 1377 + { 1378 + return thread_stack__flush(thread); 1379 + } 1380 + 1381 + static int perf_session__flush_thread_stacks(struct perf_session *session) 1382 + { 1383 + return machines__for_each_thread(&session->machines, 1384 + perf_session__flush_thread_stack, 1385 + NULL); 1365 1386 } 1366 1387 1367 1388 volatile int session_done; ··· 1477 1450 if (err) 1478 1451 goto out_err; 1479 1452 err = auxtrace__flush_events(session, tool); 1453 + if (err) 1454 + goto out_err; 1455 + err = perf_session__flush_thread_stacks(session); 1480 1456 out_err: 1481 1457 free(buf); 1482 1458 perf_session__warn_about_errors(session); ··· 1630 1600 if (err) 1631 1601 goto out_err; 1632 1602 err = auxtrace__flush_events(session, tool); 1603 + if (err) 1604 + goto out_err; 1605 + err = perf_session__flush_thread_stacks(session); 1633 1606 out_err: 1634 1607 ui_progress__finish(); 1635 1608 perf_session__warn_about_errors(session);
+3 -5
tools/perf/util/sort.c
··· 182 182 183 183 static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) 184 184 { 185 - u64 ip_l, ip_r; 186 - 187 185 if (!sym_l || !sym_r) 188 186 return cmp_null(sym_l, sym_r); 189 187 190 188 if (sym_l == sym_r) 191 189 return 0; 192 190 193 - ip_l = sym_l->start; 194 - ip_r = sym_r->start; 191 + if (sym_l->start != sym_r->start) 192 + return (int64_t)(sym_r->start - sym_l->start); 195 193 196 - return (int64_t)(ip_r - ip_l); 194 + return (int64_t)(sym_r->end - sym_l->end); 197 195 } 198 196 199 197 static int64_t
+13 -5
tools/perf/util/thread-stack.c
··· 219 219 return crp->process(&cr, crp->data); 220 220 } 221 221 222 - static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) 222 + static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 223 223 { 224 224 struct call_return_processor *crp = ts->crp; 225 225 int err; ··· 238 238 return err; 239 239 } 240 240 } 241 + 242 + return 0; 243 + } 244 + 245 + int thread_stack__flush(struct thread *thread) 246 + { 247 + if (thread->ts) 248 + return __thread_stack__flush(thread, thread->ts); 241 249 242 250 return 0; 243 251 } ··· 272 264 */ 273 265 if (trace_nr != thread->ts->trace_nr) { 274 266 if (thread->ts->trace_nr) 275 - thread_stack__flush(thread, thread->ts); 267 + __thread_stack__flush(thread, thread->ts); 276 268 thread->ts->trace_nr = trace_nr; 277 269 } 278 270 ··· 305 297 306 298 if (trace_nr != thread->ts->trace_nr) { 307 299 if (thread->ts->trace_nr) 308 - thread_stack__flush(thread, thread->ts); 300 + __thread_stack__flush(thread, thread->ts); 309 301 thread->ts->trace_nr = trace_nr; 310 302 } 311 303 } ··· 313 305 void thread_stack__free(struct thread *thread) 314 306 { 315 307 if (thread->ts) { 316 - thread_stack__flush(thread, thread->ts); 308 + __thread_stack__flush(thread, thread->ts); 317 309 zfree(&thread->ts->stack); 318 310 zfree(&thread->ts); 319 311 } ··· 697 689 698 690 /* Flush stack on exec */ 699 691 if (ts->comm != comm && thread->pid_ == thread->tid) { 700 - err = thread_stack__flush(thread, ts); 692 + err = __thread_stack__flush(thread, ts); 701 693 if (err) 702 694 return err; 703 695 ts->comm = comm;
+1
tools/perf/util/thread-stack.h
··· 96 96 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); 97 97 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 98 98 size_t sz, u64 ip); 99 + int thread_stack__flush(struct thread *thread); 99 100 void thread_stack__free(struct thread *thread); 100 101 101 102 struct call_return_processor *