Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"Mostly tooling fixes, but also some kernel side fixes: uncore PMU
driver fix, user regs sampling fix and an instruction decoder fix that
unbreaks PEBS precise sampling"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes
perf/x86_64: Improve user regs sampling
perf: Move task_pt_regs sampling into arch code
x86: Fix off-by-one in instruction decoder
perf hists browser: Fix segfault when showing callchain
perf callchain: Free callchains when hist entries are deleted
perf hists: Fix children sort key behavior
perf diff: Fix to sort by baseline field by default
perf list: Fix --raw-dump option
perf probe: Fix crash in dwarf_getcfi_elf
perf probe: Fix to fall back to find probe point in symbols
perf callchain: Append callchains only when requested
perf ui/tui: Print backtrace symbols when segfault occurs
perf report: Show progress bar for output resorting

+8
arch/arm/kernel/perf_regs.c
··· 28 28 { 29 29 return PERF_SAMPLE_REGS_ABI_32; 30 30 } 31 + 32 + void perf_get_regs_user(struct perf_regs *regs_user, 33 + struct pt_regs *regs, 34 + struct pt_regs *regs_user_copy) 35 + { 36 + regs_user->regs = task_pt_regs(current); 37 + regs_user->abi = perf_reg_abi(current); 38 + }
+8
arch/arm64/kernel/perf_regs.c
··· 50 50 else 51 51 return PERF_SAMPLE_REGS_ABI_64; 52 52 } 53 + 54 + void perf_get_regs_user(struct perf_regs *regs_user, 55 + struct pt_regs *regs, 56 + struct pt_regs *regs_user_copy) 57 + { 58 + regs_user->regs = task_pt_regs(current); 59 + regs_user->abi = perf_reg_abi(current); 60 + }
+1 -1
arch/x86/kernel/cpu/perf_event_intel_uncore.h
··· 17 17 #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) 18 18 #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) 19 19 #define UNCORE_EXTRA_PCI_DEV 0xff 20 - #define UNCORE_EXTRA_PCI_DEV_MAX 2 20 + #define UNCORE_EXTRA_PCI_DEV_MAX 3 21 21 22 22 /* support up to 8 sockets */ 23 23 #define UNCORE_SOCKET_MAX 8
+17
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
··· 891 891 enum { 892 892 SNBEP_PCI_QPI_PORT0_FILTER, 893 893 SNBEP_PCI_QPI_PORT1_FILTER, 894 + HSWEP_PCI_PCU_3, 894 895 }; 895 896 896 897 static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) ··· 2027 2026 { 2028 2027 if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) 2029 2028 hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; 2029 + 2030 + /* Detect 6-8 core systems with only two SBOXes */ 2031 + if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { 2032 + u32 capid4; 2033 + 2034 + pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], 2035 + 0x94, &capid4); 2036 + if (((capid4 >> 6) & 0x3) == 0) 2037 + hswep_uncore_sbox.num_boxes = 2; 2038 + } 2039 + 2030 2040 uncore_msr_uncores = hswep_msr_uncores; 2031 2041 } 2032 2042 ··· 2298 2286 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96), 2299 2287 .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2300 2288 SNBEP_PCI_QPI_PORT1_FILTER), 2289 + }, 2290 + { /* PCU.3 (for Capability registers) */ 2291 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), 2292 + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2293 + HSWEP_PCI_PCU_3), 2301 2294 }, 2302 2295 { /* end: all zeroes */ } 2303 2296 };
+90
arch/x86/kernel/perf_regs.c
··· 78 78 { 79 79 return PERF_SAMPLE_REGS_ABI_32; 80 80 } 81 + 82 + void perf_get_regs_user(struct perf_regs *regs_user, 83 + struct pt_regs *regs, 84 + struct pt_regs *regs_user_copy) 85 + { 86 + regs_user->regs = task_pt_regs(current); 87 + regs_user->abi = perf_reg_abi(current); 88 + } 81 89 #else /* CONFIG_X86_64 */ 82 90 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ 83 91 (1ULL << PERF_REG_X86_ES) | \ ··· 109 101 return PERF_SAMPLE_REGS_ABI_32; 110 102 else 111 103 return PERF_SAMPLE_REGS_ABI_64; 104 + } 105 + 106 + void perf_get_regs_user(struct perf_regs *regs_user, 107 + struct pt_regs *regs, 108 + struct pt_regs *regs_user_copy) 109 + { 110 + struct pt_regs *user_regs = task_pt_regs(current); 111 + 112 + /* 113 + * If we're in an NMI that interrupted task_pt_regs setup, then 114 + * we can't sample user regs at all. This check isn't really 115 + * sufficient, though, as we could be in an NMI inside an interrupt 116 + * that happened during task_pt_regs setup. 117 + */ 118 + if (regs->sp > (unsigned long)&user_regs->r11 && 119 + regs->sp <= (unsigned long)(user_regs + 1)) { 120 + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; 121 + regs_user->regs = NULL; 122 + return; 123 + } 124 + 125 + /* 126 + * RIP, flags, and the argument registers are usually saved. 127 + * orig_ax is probably okay, too. 128 + */ 129 + regs_user_copy->ip = user_regs->ip; 130 + regs_user_copy->cx = user_regs->cx; 131 + regs_user_copy->dx = user_regs->dx; 132 + regs_user_copy->si = user_regs->si; 133 + regs_user_copy->di = user_regs->di; 134 + regs_user_copy->r8 = user_regs->r8; 135 + regs_user_copy->r9 = user_regs->r9; 136 + regs_user_copy->r10 = user_regs->r10; 137 + regs_user_copy->r11 = user_regs->r11; 138 + regs_user_copy->orig_ax = user_regs->orig_ax; 139 + regs_user_copy->flags = user_regs->flags; 140 + 141 + /* 142 + * Don't even try to report the "rest" regs. 143 + */ 144 + regs_user_copy->bx = -1; 145 + regs_user_copy->bp = -1; 146 + regs_user_copy->r12 = -1; 147 + regs_user_copy->r13 = -1; 148 + regs_user_copy->r14 = -1; 149 + regs_user_copy->r15 = -1; 150 + 151 + /* 152 + * For this to be at all useful, we need a reasonable guess for 153 + * sp and the ABI. Be careful: we're in NMI context, and we're 154 + * considering current to be the current task, so we should 155 + * be careful not to look at any other percpu variables that might 156 + * change during context switches. 157 + */ 158 + if (IS_ENABLED(CONFIG_IA32_EMULATION) && 159 + task_thread_info(current)->status & TS_COMPAT) { 160 + /* Easy case: we're in a compat syscall. */ 161 + regs_user->abi = PERF_SAMPLE_REGS_ABI_32; 162 + regs_user_copy->sp = user_regs->sp; 163 + regs_user_copy->cs = user_regs->cs; 164 + regs_user_copy->ss = user_regs->ss; 165 + } else if (user_regs->orig_ax != -1) { 166 + /* 167 + * We're probably in a 64-bit syscall. 168 + * Warning: this code is severely racy. At least it's better 169 + * than just blindly copying user_regs. 170 + */ 171 + regs_user->abi = PERF_SAMPLE_REGS_ABI_64; 172 + regs_user_copy->sp = this_cpu_read(old_rsp); 173 + regs_user_copy->cs = __USER_CS; 174 + regs_user_copy->ss = __USER_DS; 175 + regs_user_copy->cx = -1; /* usually contains garbage */ 176 + } else { 177 + /* We're probably in an interrupt or exception. */ 178 + regs_user->abi = user_64bit_mode(user_regs) ? 179 + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; 180 + regs_user_copy->sp = user_regs->sp; 181 + regs_user_copy->cs = user_regs->cs; 182 + regs_user_copy->ss = user_regs->ss; 183 + } 184 + 185 + regs_user->regs = regs_user_copy; 112 186 } 113 187 #endif /* CONFIG_X86_32 */
+1 -1
arch/x86/lib/insn.c
··· 28 28 29 29 /* Verify next sizeof(t) bytes can be on the same instruction */ 30 30 #define validate_next(t, insn, n) \ 31 - ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) 31 + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) 32 32 33 33 #define __get_next(t, insn) \ 34 34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+7 -5
include/linux/perf_event.h
··· 79 79 struct perf_branch_entry entries[0]; 80 80 }; 81 81 82 - struct perf_regs { 83 - __u64 abi; 84 - struct pt_regs *regs; 85 - }; 86 - 87 82 struct task_struct; 88 83 89 84 /* ··· 605 610 u32 reserved; 606 611 } cpu_entry; 607 612 struct perf_callchain_entry *callchain; 613 + 614 + /* 615 + * regs_user may point to task_pt_regs or to regs_user_copy, depending 616 + * on arch details. 617 + */ 608 618 struct perf_regs regs_user; 619 + struct pt_regs regs_user_copy; 620 + 609 621 struct perf_regs regs_intr; 610 622 u64 stack_user_size; 611 623 } ____cacheline_aligned;
+16
include/linux/perf_regs.h
··· 1 1 #ifndef _LINUX_PERF_REGS_H 2 2 #define _LINUX_PERF_REGS_H 3 3 4 + struct perf_regs { 5 + __u64 abi; 6 + struct pt_regs *regs; 7 + }; 8 + 4 9 #ifdef CONFIG_HAVE_PERF_REGS 5 10 #include <asm/perf_regs.h> 6 11 u64 perf_reg_value(struct pt_regs *regs, int idx); 7 12 int perf_reg_validate(u64 mask); 8 13 u64 perf_reg_abi(struct task_struct *task); 14 + void perf_get_regs_user(struct perf_regs *regs_user, 15 + struct pt_regs *regs, 16 + struct pt_regs *regs_user_copy); 9 17 #else 10 18 static inline u64 perf_reg_value(struct pt_regs *regs, int idx) 11 19 { ··· 28 20 static inline u64 perf_reg_abi(struct task_struct *task) 29 21 { 30 22 return PERF_SAMPLE_REGS_ABI_NONE; 23 + } 24 + 25 + static inline void perf_get_regs_user(struct perf_regs *regs_user, 26 + struct pt_regs *regs, 27 + struct pt_regs *regs_user_copy) 28 + { 29 + regs_user->regs = task_pt_regs(current); 30 + regs_user->abi = perf_reg_abi(current); 31 31 } 32 32 #endif /* CONFIG_HAVE_PERF_REGS */ 33 33 #endif /* _LINUX_PERF_REGS_H */
+8 -11
kernel/events/core.c
··· 4461 4461 } 4462 4462 4463 4463 static void perf_sample_regs_user(struct perf_regs *regs_user, 4464 - struct pt_regs *regs) 4464 + struct pt_regs *regs, 4465 + struct pt_regs *regs_user_copy) 4465 4466 { 4466 - if (!user_mode(regs)) { 4467 - if (current->mm) 4468 - regs = task_pt_regs(current); 4469 - else 4470 - regs = NULL; 4471 - } 4472 - 4473 - if (regs) { 4474 - regs_user->abi = perf_reg_abi(current); 4467 + if (user_mode(regs)) { 4468 + regs_user->abi = perf_reg_abi(current); 4475 4469 regs_user->regs = regs; 4470 + } else if (current->mm) { 4471 + perf_get_regs_user(regs_user, regs, regs_user_copy); 4476 4472 } else { 4477 4473 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; 4478 4474 regs_user->regs = NULL; ··· 4947 4951 } 4948 4952 4949 4953 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) 4950 - perf_sample_regs_user(&data->regs_user, regs); 4954 + perf_sample_regs_user(&data->regs_user, regs, 4955 + &data->regs_user_copy); 4951 4956 4952 4957 if (sample_type & PERF_SAMPLE_REGS_USER) { 4953 4958 /* regs dump ABI info */
+1 -1
tools/perf/builtin-annotate.c
··· 232 232 if (nr_samples > 0) { 233 233 total_nr_samples += nr_samples; 234 234 hists__collapse_resort(hists, NULL); 235 - hists__output_resort(hists); 235 + hists__output_resort(hists, NULL); 236 236 237 237 if (symbol_conf.event_group && 238 238 !perf_evsel__is_group_leader(pos))
+45 -1
tools/perf/builtin-diff.c
··· 545 545 return __hist_entry__cmp_compute(p_left, p_right, c); 546 546 } 547 547 548 + static int64_t 549 + hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, 550 + struct hist_entry *right __maybe_unused) 551 + { 552 + return 0; 553 + } 554 + 555 + static int64_t 556 + hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) 557 + { 558 + if (sort_compute) 559 + return 0; 560 + 561 + if (left->stat.period == right->stat.period) 562 + return 0; 563 + return left->stat.period > right->stat.period ? 1 : -1; 564 + } 565 + 566 + static int64_t 567 + hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) 568 + { 569 + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); 570 + } 571 + 572 + static int64_t 573 + hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) 574 + { 575 + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); 576 + } 577 + 578 + static int64_t 579 + hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) 580 + { 581 + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); 582 + } 583 + 548 584 static void insert_hist_entry_by_compute(struct rb_root *root, 549 585 struct hist_entry *he, 550 586 int c) ··· 641 605 hists__precompute(hists); 642 606 hists__compute_resort(hists); 643 607 } else { 644 - hists__output_resort(hists); 608 + hists__output_resort(hists, NULL); 645 609 } 646 610 647 611 hists__fprintf(hists, true, 0, 0, 0, stdout); ··· 1074 1038 fmt->header = hpp__header; 1075 1039 fmt->width = hpp__width; 1076 1040 fmt->entry = hpp__entry_global; 1041 + fmt->cmp = hist_entry__cmp_nop; 1042 + fmt->collapse = hist_entry__cmp_nop; 1077 1043 1078 1044 /* TODO more colors */ 1079 1045 switch (idx) { 1080 1046 case PERF_HPP_DIFF__BASELINE: 1081 1047 fmt->color = hpp__color_baseline; 1048 + fmt->sort = hist_entry__cmp_baseline; 1082 1049 break; 1083 1050 case PERF_HPP_DIFF__DELTA: 1084 1051 fmt->color = hpp__color_delta; 1052 + fmt->sort = hist_entry__cmp_delta; 1085 1053 break; 1086 1054 case PERF_HPP_DIFF__RATIO: 1087 1055 fmt->color = hpp__color_ratio; 1056 + fmt->sort = hist_entry__cmp_ratio; 1088 1057 break; 1089 1058 case PERF_HPP_DIFF__WEIGHTED_DIFF: 1090 1059 fmt->color = hpp__color_wdiff; 1060 + fmt->sort = hist_entry__cmp_wdiff; 1091 1061 break; 1092 1062 default: 1063 + fmt->sort = hist_entry__cmp_nop; 1093 1064 break; 1094 1065 } 1095 1066 1096 1067 init_header(d, dfmt); 1097 1068 perf_hpp__column_register(fmt); 1069 + perf_hpp__register_sort_field(fmt); 1098 1070 } 1099 1071 1100 1072 static void ui_init(void)
+10 -3
tools/perf/builtin-list.c
··· 19 19 int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) 20 20 { 21 21 int i; 22 - const struct option list_options[] = { 22 + bool raw_dump = false; 23 + struct option list_options[] = { 24 + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), 23 25 OPT_END() 24 26 }; 25 27 const char * const list_usage[] = { ··· 29 27 NULL 30 28 }; 31 29 30 + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); 31 + 32 32 argc = parse_options(argc, argv, list_options, list_usage, 33 33 PARSE_OPT_STOP_AT_NON_OPTION); 34 34 35 35 setup_pager(); 36 + 37 + if (raw_dump) { 38 + print_events(NULL, true); 39 + return 0; 40 + } 36 41 37 42 if (argc == 0) { 38 43 print_events(NULL, false); ··· 62 53 print_hwcache_events(NULL, false); 63 54 else if (strcmp(argv[i], "pmu") == 0) 64 55 print_pmu_events(NULL, false); 65 - else if (strcmp(argv[i], "--raw-dump") == 0) 66 - print_events(NULL, true); 67 56 else { 68 57 char *sep = strchr(argv[i], ':'), *s; 69 58 int sep_idx;
+22 -2
tools/perf/builtin-report.c
··· 457 457 ui_progress__finish(); 458 458 } 459 459 460 + static void report__output_resort(struct report *rep) 461 + { 462 + struct ui_progress prog; 463 + struct perf_evsel *pos; 464 + 465 + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); 466 + 467 + evlist__for_each(rep->session->evlist, pos) 468 + hists__output_resort(evsel__hists(pos), &prog); 469 + 470 + ui_progress__finish(); 471 + } 472 + 460 473 static int __cmd_report(struct report *rep) 461 474 { 462 475 int ret; ··· 518 505 if (session_done()) 519 506 return 0; 520 507 508 + /* 509 + * recalculate number of entries after collapsing since it 510 + * might be changed during the collapse phase. 511 + */ 512 + rep->nr_entries = 0; 513 + evlist__for_each(session->evlist, pos) 514 + rep->nr_entries += evsel__hists(pos)->nr_entries; 515 + 521 516 if (rep->nr_entries == 0) { 522 517 ui__error("The %s file has no samples!\n", file->path); 523 518 return 0; 524 519 } 525 520 526 - evlist__for_each(session->evlist, pos) 527 - hists__output_resort(evsel__hists(pos)); 521 + report__output_resort(rep); 528 522 529 523 return report__browse_hists(rep); 530 524 }
+2 -2
tools/perf/builtin-top.c
··· 285 285 } 286 286 287 287 hists__collapse_resort(hists, NULL); 288 - hists__output_resort(hists); 288 + hists__output_resort(hists, NULL); 289 289 290 290 hists__output_recalc_col_len(hists, top->print_entries - printed); 291 291 putchar('\n'); ··· 554 554 } 555 555 556 556 hists__collapse_resort(hists, NULL); 557 - hists__output_resort(hists); 557 + hists__output_resort(hists, NULL); 558 558 } 559 559 560 560 static void *display_thread_tui(void *arg)
+34 -34
tools/perf/tests/hists_cumulate.c
··· 187 187 * function since TEST_ASSERT_VAL() returns in case of failure. 188 188 */ 189 189 hists__collapse_resort(hists, NULL); 190 - hists__output_resort(hists); 190 + hists__output_resort(hists, NULL); 191 191 192 192 if (verbose > 2) { 193 193 pr_info("use callchain: %d, cumulate callchain: %d\n", ··· 454 454 * 30.00% 10.00% perf perf [.] cmd_record 455 455 * 20.00% 0.00% bash libc [.] malloc 456 456 * 10.00% 10.00% bash [kernel] [k] page_fault 457 - * 10.00% 10.00% perf [kernel] [k] schedule 458 - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open 459 - * 10.00% 10.00% perf [kernel] [k] page_fault 460 - * 10.00% 10.00% perf libc [.] free 461 - * 10.00% 10.00% perf libc [.] malloc 462 457 * 10.00% 10.00% bash bash [.] xmalloc 458 + * 10.00% 10.00% perf [kernel] [k] page_fault 459 + * 10.00% 10.00% perf libc [.] malloc 460 + * 10.00% 10.00% perf [kernel] [k] schedule 461 + * 10.00% 10.00% perf libc [.] free 462 + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open 463 463 */ 464 464 struct result expected[] = { 465 465 { 7000, 2000, "perf", "perf", "main" }, ··· 468 468 { 3000, 1000, "perf", "perf", "cmd_record" }, 469 469 { 2000, 0, "bash", "libc", "malloc" }, 470 470 { 1000, 1000, "bash", "[kernel]", "page_fault" }, 471 - { 1000, 1000, "perf", "[kernel]", "schedule" }, 472 - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, 471 + { 1000, 1000, "bash", "bash", "xmalloc" }, 473 472 { 1000, 1000, "perf", "[kernel]", "page_fault" }, 473 + { 1000, 1000, "perf", "[kernel]", "schedule" }, 474 474 { 1000, 1000, "perf", "libc", "free" }, 475 475 { 1000, 1000, "perf", "libc", "malloc" }, 476 - { 1000, 1000, "bash", "bash", "xmalloc" }, 476 + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, 477 477 }; 478 478 479 479 symbol_conf.use_callchain = false; ··· 537 537 * malloc 538 538 * main 539 539 * 540 - * 10.00% 10.00% perf [kernel] [k] schedule 540 + * 10.00% 10.00% bash bash [.] xmalloc 541 541 * | 542 - * --- schedule 543 - * run_command 542 + * --- xmalloc 543 + * malloc 544 + * xmalloc <--- NOTE: there's a cycle 545 + * malloc 546 + * xmalloc 544 547 * main 545 548 * 546 549 * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open ··· 556 553 * | 557 554 * --- page_fault 558 555 * sys_perf_event_open 556 + * run_command 557 + * main 558 + * 559 + * 10.00% 10.00% perf [kernel] [k] schedule 560 + * | 561 + * --- schedule 559 562 * run_command 560 563 * main 561 564 * ··· 579 570 * run_command 580 571 * main 581 572 * 582 - * 10.00% 10.00% bash bash [.] xmalloc 583 - * | 584 - * --- xmalloc 585 - * malloc 586 - * xmalloc <--- NOTE: there's a cycle 587 - * malloc 588 - * xmalloc 589 - * main 590 - * 591 573 */ 592 574 struct result expected[] = { 593 575 { 7000, 2000, "perf", "perf", "main" }, ··· 587 587 { 3000, 1000, "perf", "perf", "cmd_record" }, 588 588 { 2000, 0, "bash", "libc", "malloc" }, 589 589 { 1000, 1000, "bash", "[kernel]", "page_fault" }, 590 - { 1000, 1000, "perf", "[kernel]", "schedule" }, 590 + { 1000, 1000, "bash", "bash", "xmalloc" }, 591 591 { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, 592 592 { 1000, 1000, "perf", "[kernel]", "page_fault" }, 593 + { 1000, 1000, "perf", "[kernel]", "schedule" }, 593 594 { 1000, 1000, "perf", "libc", "free" }, 594 595 { 1000, 1000, "perf", "libc", "malloc" }, 595 - { 1000, 1000, "bash", "bash", "xmalloc" }, 596 596 }; 597 597 struct callchain_result expected_callchain[] = { 598 598 { ··· 622 622 { "bash", "main" }, }, 623 623 }, 624 624 { 625 - 3, { { "[kernel]", "schedule" }, 626 - { "perf", "run_command" }, 627 - { "perf", "main" }, }, 625 + 6, { { "bash", "xmalloc" }, 626 + { "libc", "malloc" }, 627 + { "bash", "xmalloc" }, 628 + { "libc", "malloc" }, 629 + { "bash", "xmalloc" }, 630 + { "bash", "main" }, }, 628 631 }, 629 632 { 630 633 3, { { "[kernel]", "sys_perf_event_open" }, ··· 637 634 { 638 635 4, { { "[kernel]", "page_fault" }, 639 636 { "[kernel]", "sys_perf_event_open" }, 637 + { "perf", "run_command" }, 638 + { "perf", "main" }, }, 639 + }, 640 + { 641 + 3, { { "[kernel]", "schedule" }, 640 642 { "perf", "run_command" }, 641 643 { "perf", "main" }, }, 642 644 }, ··· 656 648 { "perf", "cmd_record" }, 657 649 { "perf", "run_command" }, 658 650 { "perf", "main" }, }, 659 - }, 660 - { 661 - 6, { { "bash", "xmalloc" }, 662 - { "libc", "malloc" }, 663 - { "bash", "xmalloc" }, 664 - { "libc", "malloc" }, 665 - { "bash", "xmalloc" }, 666 - { "bash", "main" }, }, 667 651 }, 668 652 }; 669 653
+1 -1
tools/perf/tests/hists_filter.c
··· 138 138 struct hists *hists = evsel__hists(evsel); 139 139 140 140 hists__collapse_resort(hists, NULL); 141 - hists__output_resort(hists); 141 + hists__output_resort(hists, NULL); 142 142 143 143 if (verbose > 2) { 144 144 pr_info("Normal histogram\n");
+5 -5
tools/perf/tests/hists_output.c
··· 152 152 goto out; 153 153 154 154 hists__collapse_resort(hists, NULL); 155 - hists__output_resort(hists); 155 + hists__output_resort(hists, NULL); 156 156 157 157 if (verbose > 2) { 158 158 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 252 252 goto out; 253 253 254 254 hists__collapse_resort(hists, NULL); 255 - hists__output_resort(hists); 255 + hists__output_resort(hists, NULL); 256 256 257 257 if (verbose > 2) { 258 258 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 306 306 goto out; 307 307 308 308 hists__collapse_resort(hists, NULL); 309 - hists__output_resort(hists); 309 + hists__output_resort(hists, NULL); 310 310 311 311 if (verbose > 2) { 312 312 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 384 384 goto out; 385 385 386 386 hists__collapse_resort(hists, NULL); 387 - hists__output_resort(hists); 387 + hists__output_resort(hists, NULL); 388 388 389 389 if (verbose > 2) { 390 390 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); ··· 487 487 goto out; 488 488 489 489 hists__collapse_resort(hists, NULL); 490 - hists__output_resort(hists); 490 + hists__output_resort(hists, NULL); 491 491 492 492 if (verbose > 2) { 493 493 pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+1 -1
tools/perf/ui/browsers/hists.c
··· 550 550 bool need_percent; 551 551 552 552 node = rb_first(root); 553 - need_percent = !!rb_next(node); 553 + need_percent = node && rb_next(node); 554 554 555 555 while (node) { 556 556 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+3
tools/perf/ui/hist.c
··· 204 204 if (ret) 205 205 return ret; 206 206 207 + if (a->thread != b->thread || !symbol_conf.use_callchain) 208 + return 0; 209 + 207 210 ret = b->callchain->max_depth - a->callchain->max_depth; 208 211 } 209 212 return ret;
+24 -2
tools/perf/ui/tui/setup.c
··· 1 1 #include <signal.h> 2 2 #include <stdbool.h> 3 + #ifdef HAVE_BACKTRACE_SUPPORT 4 + #include <execinfo.h> 5 + #endif 3 6 4 7 #include "../../util/cache.h" 5 8 #include "../../util/debug.h" ··· 91 88 return SLkp_getkey(); 92 89 } 93 90 91 + #ifdef HAVE_BACKTRACE_SUPPORT 92 + static void ui__signal_backtrace(int sig) 93 + { 94 + void *stackdump[32]; 95 + size_t size; 96 + 97 + ui__exit(false); 98 + psignal(sig, "perf"); 99 + 100 + printf("-------- backtrace --------\n"); 101 + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); 102 + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); 103 + 104 + exit(0); 105 + } 106 + #else 107 + # define ui__signal_backtrace ui__signal 108 + #endif 109 + 94 110 static void ui__signal(int sig) 95 111 { 96 112 ui__exit(false); ··· 144 122 ui_browser__init(); 145 123 tui_progress__init(); 146 124 147 - signal(SIGSEGV, ui__signal); 148 - signal(SIGFPE, ui__signal); 125 + signal(SIGSEGV, ui__signal_backtrace); 126 + signal(SIGFPE, ui__signal_backtrace); 149 127 signal(SIGINT, ui__signal); 150 128 signal(SIGQUIT, ui__signal); 151 129 signal(SIGTERM, ui__signal);
+30
tools/perf/util/callchain.c
··· 841 841 842 842 return bf; 843 843 } 844 + 845 + static void free_callchain_node(struct callchain_node *node) 846 + { 847 + struct callchain_list *list, *tmp; 848 + struct callchain_node *child; 849 + struct rb_node *n; 850 + 851 + list_for_each_entry_safe(list, tmp, &node->val, list) { 852 + list_del(&list->list); 853 + free(list); 854 + } 855 + 856 + n = rb_first(&node->rb_root_in); 857 + while (n) { 858 + child = container_of(n, struct callchain_node, rb_node_in); 859 + n = rb_next(n); 860 + rb_erase(&child->rb_node_in, &node->rb_root_in); 861 + 862 + free_callchain_node(child); 863 + free(child); 864 + } 865 + } 866 + 867 + void free_callchain(struct callchain_root *root) 868 + { 869 + if (!symbol_conf.use_callchain) 870 + return; 871 + 872 + free_callchain_node(&root->node); 873 + }
+2
tools/perf/util/callchain.h
··· 198 198 char *callchain_list__sym_name(struct callchain_list *cl, 199 199 char *bf, size_t bfsize, bool show_dso); 200 200 201 + void free_callchain(struct callchain_root *root); 202 + 201 203 #endif /* __PERF_CALLCHAIN_H */
+14 -4
tools/perf/util/hist.c
··· 6 6 #include "evlist.h" 7 7 #include "evsel.h" 8 8 #include "annotate.h" 9 + #include "ui/progress.h" 9 10 #include <math.h> 10 11 11 12 static bool hists__filter_entry_by_dso(struct hists *hists, ··· 304 303 size_t callchain_size = 0; 305 304 struct hist_entry *he; 306 305 307 - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) 306 + if (symbol_conf.use_callchain) 308 307 callchain_size = sizeof(struct callchain_root); 309 308 310 309 he = zalloc(sizeof(*he) + callchain_size); ··· 737 736 iter->he = he; 738 737 he_cache[iter->curr++] = he; 739 738 740 - callchain_append(he->callchain, &callchain_cursor, sample->period); 739 + hist_entry__append_callchain(he, sample); 741 740 742 741 /* 743 742 * We need to re-initialize the cursor since callchain_append() ··· 810 809 iter->he = he; 811 810 he_cache[iter->curr++] = he; 812 811 813 - callchain_append(he->callchain, &cursor, sample->period); 812 + if (symbol_conf.use_callchain) 813 + callchain_append(he->callchain, &cursor, sample->period); 814 814 return 0; 815 815 } 816 816 ··· 947 945 zfree(&he->mem_info); 948 946 zfree(&he->stat_acc); 949 947 free_srcline(he->srcline); 948 + free_callchain(he->callchain); 950 949 free(he); 951 950 } 952 951 ··· 990 987 else 991 988 p = &(*p)->rb_right; 992 989 } 990 + hists->nr_entries++; 993 991 994 992 rb_link_node(&he->rb_node_in, parent, p); 995 993 rb_insert_color(&he->rb_node_in, root); ··· 1028 1024 if (!sort__need_collapse) 1029 1025 return; 1030 1026 1027 + hists->nr_entries = 0; 1028 + 1031 1029 root = hists__get_rotate_entries_in(hists); 1030 + 1032 1031 next = rb_first(root); 1033 1032 1034 1033 while (next) { ··· 1126 1119 rb_insert_color(&he->rb_node, entries); 1127 1120 } 1128 1121 1129 - void hists__output_resort(struct hists *hists) 1122 + void hists__output_resort(struct hists *hists, struct ui_progress *prog) 1130 1123 { 1131 1124 struct rb_root *root; 1132 1125 struct rb_node *next; ··· 1155 1148 1156 1149 if (!n->filtered) 1157 1150 hists__calc_col_len(hists, n); 1151 + 1152 + if (prog) 1153 + ui_progress__update(prog, 1); 1158 1154 } 1159 1155 } 1160 1156
+1 -1
tools/perf/util/hist.h
··· 121 121 struct hists *hists); 122 122 void hist_entry__free(struct hist_entry *); 123 123 124 - void hists__output_resort(struct hists *hists); 124 + void hists__output_resort(struct hists *hists, struct ui_progress *prog); 125 125 void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); 126 126 127 127 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
+4 -2
tools/perf/util/probe-event.c
··· 495 495 } 496 496 497 497 if (ntevs == 0) { /* No error but failed to find probe point. */ 498 - pr_warning("Probe point '%s' not found.\n", 498 + pr_warning("Probe point '%s' not found in debuginfo.\n", 499 499 synthesize_perf_probe_point(&pev->point)); 500 - return -ENOENT; 500 + if (need_dwarf) 501 + return -ENOENT; 502 + return 0; 501 503 } 502 504 /* Error path : ntevs < 0 */ 503 505 pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
+17 -1
tools/perf/util/probe-finder.c
··· 989 989 int ret = 0; 990 990 991 991 #if _ELFUTILS_PREREQ(0, 142) 992 + Elf *elf; 993 + GElf_Ehdr ehdr; 994 + GElf_Shdr shdr; 995 + 992 996 /* Get the call frame information from this dwarf */ 993 - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); 997 + elf = dwarf_getelf(dbg->dbg); 998 + if (elf == NULL) 999 + return -EINVAL; 1000 + 1001 + if (gelf_getehdr(elf, &ehdr) == NULL) 1002 + return -EINVAL; 1003 + 1004 + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && 1005 + shdr.sh_type == SHT_PROGBITS) { 1006 + pf->cfi = dwarf_getcfi_elf(elf); 1007 + } else { 1008 + pf->cfi = dwarf_getcfi(dbg->dbg); 1009 + } 994 1010 #endif 995 1011 996 1012 off = 0;