Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf_counter tools: Add 'perf annotate' feature

Add new perf sub-command to display annotated source code:

$ perf annotate decode_tree_entry

------------------------------------------------
Percent | Source code & Disassembly of /home/mingo/git/git
------------------------------------------------
:
: /home/mingo/git/git: file format elf64-x86-64
:
:
: Disassembly of section .text:
:
: 00000000004a0da0 <decode_tree_entry>:
: *modep = mode;
: return str;
: }
:
: static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
: {
3.82 : 4a0da0: 41 54 push %r12
: const char *path;
: unsigned int mode, len;
:
: if (size < 24 || buf[size - 21])
0.17 : 4a0da2: 48 83 fa 17 cmp $0x17,%rdx
: *modep = mode;
: return str;
: }
:
: static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
: {
0.00 : 4a0da6: 49 89 fc mov %rdi,%r12
0.00 : 4a0da9: 55 push %rbp
3.37 : 4a0daa: 53 push %rbx
: const char *path;
: unsigned int mode, len;
:
: if (size < 24 || buf[size - 21])
0.08 : 4a0dab: 76 73 jbe 4a0e20 <decode_tree_entry+0x80>
0.00 : 4a0dad: 80 7c 16 eb 00 cmpb $0x0,-0x15(%rsi,%rdx,1)
3.48 : 4a0db2: 75 6c jne 4a0e20 <decode_tree_entry+0x80>
: static const char *get_mode(const char *str, unsigned int *modep)
: {
: unsigned char c;
: unsigned int mode = 0;
:
: if (*str == ' ')
1.94 : 4a0db4: 0f b6 06 movzbl (%rsi),%eax
0.39 : 4a0db7: 3c 20 cmp $0x20,%al
0.00 : 4a0db9: 74 65 je 4a0e20 <decode_tree_entry+0x80>
: return NULL;
:
: while ((c = *str++) != ' ') {
0.06 : 4a0dbb: 89 c2 mov %eax,%edx
: if (c < '0' || c > '7')
1.99 : 4a0dbd: 31 ed xor %ebp,%ebp
: unsigned int mode = 0;
:
: if (*str == ' ')
: return NULL;
:
: while ((c = *str++) != ' ') {
1.74 : 4a0dbf: 48 8d 5e 01 lea 0x1(%rsi),%rbx
: if (c < '0' || c > '7')
0.00 : 4a0dc3: 8d 42 d0 lea -0x30(%rdx),%eax
0.17 : 4a0dc6: 3c 07 cmp $0x7,%al
0.00 : 4a0dc8: 76 0d jbe 4a0dd7 <decode_tree_entry+0x37>
0.00 : 4a0dca: eb 54 jmp 4a0e20 <decode_tree_entry+0x80>
0.00 : 4a0dcc: 0f 1f 40 00 nopl 0x0(%rax)
16.57 : 4a0dd0: 8d 42 d0 lea -0x30(%rdx),%eax
0.14 : 4a0dd3: 3c 07 cmp $0x7,%al
0.00 : 4a0dd5: 77 49 ja 4a0e20 <decode_tree_entry+0x80>
: return NULL;
: mode = (mode << 3) + (c - '0');
3.12 : 4a0dd7: 0f b6 c2 movzbl %dl,%eax
: unsigned int mode = 0;
:
: if (*str == ' ')
: return NULL;
:
: while ((c = *str++) != ' ') {
0.00 : 4a0dda: 0f b6 13 movzbl (%rbx),%edx
16.74 : 4a0ddd: 48 83 c3 01 add $0x1,%rbx
: if (c < '0' || c > '7')
: return NULL;
: mode = (mode << 3) + (c - '0');

The first column is the percentage of samples that arrived on that
particular line - relative to the total cost of the function.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

+234 -144
+6 -3
Documentation/perf_counter/Documentation/perf-annotate.txt
··· 3 3 4 4 NAME 5 5 ---- 6 - perf-annotate - Read perf.data (created by perf record) and annotate functions 6 + perf-annotate - Read perf.data (created by perf record) and display annotated code 7 7 8 8 SYNOPSIS 9 9 -------- ··· 12 12 13 13 DESCRIPTION 14 14 ----------- 15 - This command displays the performance counter profile information recorded 16 - via perf record. 15 + This command reads the input file and displays an annotated version of the 16 + code. If the object file has debug symbols then the source code will be 17 + displayed alongside assembly code. 18 + 19 + If there is no debug info in the object, then annotated assembly is displayed. 17 20 18 21 OPTIONS 19 22 -------
+189 -125
Documentation/perf_counter/builtin-annotate.c
··· 28 28 static char const *input_name = "perf.data"; 29 29 static char *vmlinux = NULL; 30 30 31 - static char default_sort_order[] = "comm,dso"; 31 + static char default_sort_order[] = "comm,symbol"; 32 32 static char *sort_order = default_sort_order; 33 33 34 34 static int input; ··· 38 38 #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) 39 39 40 40 static int verbose; 41 - static int full_paths; 42 41 43 42 static unsigned long page_size; 44 43 static unsigned long mmap_window = 32; ··· 87 88 static LIST_HEAD(dsos); 88 89 static struct dso *kernel_dso; 89 90 static struct dso *vdso; 91 + 90 92 91 93 static void dsos__add(struct dso *dso) 92 94 { ··· 176 176 return err; 177 177 } 178 178 179 - static char __cwd[PATH_MAX]; 180 - static char *cwd = __cwd; 181 - static int cwdlen; 182 - 183 - static int strcommon(const char *pathname) 184 - { 185 - int n = 0; 186 - 187 - while (pathname[n] == cwd[n] && n < cwdlen) 188 - ++n; 189 - 190 - return n; 191 - } 192 - 193 179 struct map { 194 180 struct list_head node; 195 181 uint64_t start; ··· 201 215 202 216 if (self != NULL) { 203 217 const char *filename = event->filename; 204 - char newfilename[PATH_MAX]; 205 - 206 - if (cwd) { 207 - int n = strcommon(filename); 208 - 209 - if (n == cwdlen) { 210 - snprintf(newfilename, sizeof(newfilename), 211 - ".%s", filename + n); 212 - filename = newfilename; 213 - } 214 - } 215 218 216 219 self->start = event->start; 217 220 self->end = event->start + event->len; ··· 644 669 return cmp; 645 670 } 646 671 647 - static size_t 648 - hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) 649 - { 650 - struct sort_entry *se; 651 - size_t ret; 652 - 653 - if (total_samples) { 654 - double percent = self->count * 100.0 / total_samples; 655 - char *color = PERF_COLOR_NORMAL; 656 - 657 - /* 658 - * We color high-overhead entries in red, low-overhead 659 - * entries in green - and keep the middle ground normal: 660 - */ 661 - if (percent >= 5.0) 662 - color = PERF_COLOR_RED; 663 - if (percent < 0.5) 664 - color = PERF_COLOR_GREEN; 665 - 666 - ret = color_fprintf(fp, color, " %6.2f%%", 667 - (self->count * 100.0) / total_samples); 668 - } else 669 - ret = fprintf(fp, "%12d ", self->count); 670 - 671 - list_for_each_entry(se, &hist_entry__sort_list, list) { 672 - fprintf(fp, " "); 673 - ret += se->print(fp, self); 674 - } 675 - 676 - ret += fprintf(fp, "\n"); 677 - 678 - return ret; 679 - } 680 - 681 672 /* 682 673 * collect histogram counts 683 674 */ 675 + static void hist_hit(struct hist_entry *he, uint64_t ip) 676 + { 677 + unsigned int sym_size, offset; 678 + struct symbol *sym = he->sym; 679 + 680 + he->count++; 681 + 682 + if (!sym || !sym->hist) 683 + return; 684 + 685 + sym_size = sym->end - sym->start; 686 + offset = ip - sym->start; 687 + 688 + if (offset >= sym_size) 689 + return; 690 + 691 + sym->hist_sum++; 692 + sym->hist[offset]++; 693 + 694 + if (verbose >= 3) 695 + printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", 696 + (void *)he->sym->start, 697 + he->sym->name, 698 + (void *)ip, ip - he->sym->start, 699 + sym->hist[offset]); 700 + } 684 701 685 702 static int 686 703 hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, ··· 699 732 cmp = hist_entry__cmp(&entry, he); 700 733 701 734 if (!cmp) { 702 - he->count++; 735 + hist_hit(he, ip); 736 + 703 737 return 0; 704 738 } 705 739 ··· 822 854 rb_erase(&n->rb_node, tree); 823 855 output__insert_entry(n); 824 856 } 825 - } 826 - 827 - static size_t output__fprintf(FILE *fp, uint64_t total_samples) 828 - { 829 - struct hist_entry *pos; 830 - struct sort_entry *se; 831 - struct rb_node *nd; 832 - size_t ret = 0; 833 - 834 - fprintf(fp, "\n"); 835 - fprintf(fp, "#\n"); 836 - fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); 837 - fprintf(fp, "#\n"); 838 - 839 - fprintf(fp, "# Overhead"); 840 - list_for_each_entry(se, &hist_entry__sort_list, list) 841 - fprintf(fp, " %s", se->header); 842 - fprintf(fp, "\n"); 843 - 844 - fprintf(fp, "# ........"); 845 - list_for_each_entry(se, &hist_entry__sort_list, list) { 846 - int i; 847 - 848 - fprintf(fp, " "); 849 - for (i = 0; i < strlen(se->header); i++) 850 - fprintf(fp, "."); 851 - } 852 - fprintf(fp, "\n"); 853 - 854 - fprintf(fp, "#\n"); 855 - 856 - for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { 857 - pos = rb_entry(nd, struct hist_entry, rb_node); 858 - ret += hist_entry__fprintf(fp, pos, total_samples); 859 - } 860 - 861 - if (!strcmp(sort_order, default_sort_order)) { 862 - fprintf(fp, "#\n"); 863 - fprintf(fp, "# (For more details, try: perf annotate --sort comm,dso,symbol)\n"); 864 - fprintf(fp, "#\n"); 865 - } 866 - fprintf(fp, "\n"); 867 - 868 - return ret; 869 857 } 870 858 871 859 static void register_idle_thread(void) ··· 1030 1106 return 0; 1031 1107 } 1032 1108 1109 + static int 1110 + parse_line(FILE *file, struct symbol *sym, uint64_t start, uint64_t len) 1111 + { 1112 + char *line = NULL, *tmp, *tmp2; 1113 + unsigned int offset; 1114 + size_t line_len; 1115 + __u64 line_ip; 1116 + int ret; 1117 + char *c; 1118 + 1119 + if (getline(&line, &line_len, file) < 0) 1120 + return -1; 1121 + if (!line) 1122 + return -1; 1123 + 1124 + c = strchr(line, '\n'); 1125 + if (c) 1126 + *c = 0; 1127 + 1128 + line_ip = -1; 1129 + offset = 0; 1130 + ret = -2; 1131 + 1132 + /* 1133 + * Strip leading spaces: 1134 + */ 1135 + tmp = line; 1136 + while (*tmp) { 1137 + if (*tmp != ' ') 1138 + break; 1139 + tmp++; 1140 + } 1141 + 1142 + if (*tmp) { 1143 + /* 1144 + * Parse hexa addresses followed by ':' 1145 + */ 1146 + line_ip = strtoull(tmp, &tmp2, 16); 1147 + if (*tmp2 != ':') 1148 + line_ip = -1; 1149 + } 1150 + 1151 + if (line_ip != -1) { 1152 + unsigned int hits = 0; 1153 + double percent = 0.0; 1154 + char *color = PERF_COLOR_NORMAL; 1155 + 1156 + offset = line_ip - start; 1157 + if (offset < len) 1158 + hits = sym->hist[offset]; 1159 + 1160 + if (sym->hist_sum) 1161 + percent = 100.0 * hits / sym->hist_sum; 1162 + 1163 + /* 1164 + * We color high-overhead entries in red, low-overhead 1165 + * entries in green - and keep the middle ground normal: 1166 + */ 1167 + if (percent >= 5.0) 1168 + color = PERF_COLOR_RED; 1169 + else { 1170 + if (percent > 0.5) 1171 + color = PERF_COLOR_GREEN; 1172 + } 1173 + 1174 + color_fprintf(stdout, color, " %7.2f", percent); 1175 + printf(" : "); 1176 + color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line); 1177 + } else { 1178 + if (!*line) 1179 + printf(" :\n"); 1180 + else 1181 + printf(" : %s\n", line); 1182 + } 1183 + 1184 + return 0; 1185 + } 1186 + 1187 + static void annotate_sym(struct dso *dso, struct symbol *sym) 1188 + { 1189 + char *filename = dso->name; 1190 + uint64_t start, end, len; 1191 + char command[PATH_MAX*2]; 1192 + FILE *file; 1193 + 1194 + if (!filename) 1195 + return; 1196 + if (dso == kernel_dso) 1197 + filename = vmlinux; 1198 + 1199 + printf("\n------------------------------------------------\n"); 1200 + printf(" Percent | Source code & Disassembly of %s\n", filename); 1201 + printf("------------------------------------------------\n"); 1202 + 1203 + if (verbose >= 2) 1204 + printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); 1205 + 1206 + start = sym->obj_start; 1207 + if (!start) 1208 + start = sym->start; 1209 + 1210 + end = start + sym->end - sym->start + 1; 1211 + len = sym->end - sym->start; 1212 + 1213 + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); 1214 + 1215 + if (verbose >= 3) 1216 + printf("doing: %s\n", command); 1217 + 1218 + file = popen(command, "r"); 1219 + if (!file) 1220 + return; 1221 + 1222 + while (!feof(file)) { 1223 + if (parse_line(file, sym, start, len) < 0) 1224 + break; 1225 + } 1226 + 1227 + pclose(file); 1228 + } 1229 + 1230 + static void find_annotations(void) 1231 + { 1232 + struct rb_node *nd; 1233 + struct dso *dso; 1234 + int count = 0; 1235 + 1236 + list_for_each_entry(dso, &dsos, node) { 1237 + 1238 + for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) { 1239 + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 1240 + 1241 + if (sym->hist) { 1242 + annotate_sym(dso, sym); 1243 + count++; 1244 + } 1245 + } 1246 + } 1247 + 1248 + if (!count) 1249 + printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter); 1250 + } 1251 + 1033 1252 static int __cmd_annotate(void) 1034 1253 { 1035 1254 int ret, rc = EXIT_FAILURE; ··· 1207 1140 return EXIT_FAILURE; 1208 1141 } 1209 1142 1210 - if (!full_paths) { 1211 - if (getcwd(__cwd, sizeof(__cwd)) == NULL) { 1212 - perror("failed to get the current directory"); 1213 - return EXIT_FAILURE; 1214 - } 1215 - cwdlen = strlen(cwd); 1216 - } else { 1217 - cwd = NULL; 1218 - cwdlen = 0; 1219 - } 1220 1143 remap: 1221 1144 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, 1222 1145 MAP_SHARED, input, offset); ··· 1286 1229 1287 1230 collapse__resort(); 1288 1231 output__resort(); 1289 - output__fprintf(stdout, total); 1232 + 1233 + find_annotations(); 1290 1234 1291 1235 return rc; 1292 1236 } ··· 1300 1242 static const struct option options[] = { 1301 1243 OPT_STRING('i', "input", &input_name, "file", 1302 1244 "input file name"), 1245 + OPT_STRING('s', "symbol", &sym_hist_filter, "file", 1246 + "symbol to annotate"), 1303 1247 OPT_BOOLEAN('v', "verbose", &verbose, 1304 1248 "be more verbose (show symbol address, etc)"), 1305 1249 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1306 1250 "dump raw trace in ASCII"), 1307 1251 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), 1308 - OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1309 - "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), 1310 - OPT_BOOLEAN('P', "full-paths", &full_paths, 1311 - "Don't shorten the pathnames taking into account the cwd"), 1312 1252 OPT_END() 1313 1253 }; 1314 1254 ··· 1335 1279 1336 1280 setup_sorting(); 1337 1281 1338 - /* 1339 - * Any (unrecognized) arguments left? 1340 - */ 1341 - if (argc) 1282 + if (argc) { 1283 + /* 1284 + * Special case: if there's an argument left then assume tha 1285 + * it's a symbol filter: 1286 + */ 1287 + if (argc > 1) 1288 + usage_with_options(annotate_usage, options); 1289 + 1290 + sym_hist_filter = argv[0]; 1291 + } 1292 + 1293 + if (!sym_hist_filter) 1342 1294 usage_with_options(annotate_usage, options); 1343 1295 1344 1296 setup_pager();
+34 -16
Documentation/perf_counter/util/symbol.c
··· 7 7 #include <gelf.h> 8 8 #include <elf.h> 9 9 10 + const char *sym_hist_filter; 11 + 10 12 static struct symbol *symbol__new(uint64_t start, uint64_t len, 11 - const char *name, unsigned int priv_size) 13 + const char *name, unsigned int priv_size, 14 + uint64_t obj_start, int verbose) 12 15 { 13 16 size_t namelen = strlen(name) + 1; 14 - struct symbol *self = malloc(priv_size + sizeof(*self) + namelen); 17 + struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); 15 18 16 - if (self != NULL) { 17 - if (priv_size) { 18 - memset(self, 0, priv_size); 19 - self = ((void *)self) + priv_size; 20 - } 21 - self->start = start; 22 - self->end = start + len - 1; 23 - memcpy(self->name, name, namelen); 19 + if (!self) 20 + return NULL; 21 + 22 + if (verbose >= 2) 23 + printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", 24 + (__u64)start, len, name, self->hist, (void *)obj_start); 25 + 26 + self->obj_start= obj_start; 27 + self->hist = NULL; 28 + self->hist_sum = 0; 29 + 30 + if (sym_hist_filter && !strcmp(name, sym_hist_filter)) 31 + self->hist = calloc(sizeof(__u64), len); 32 + 33 + if (priv_size) { 34 + memset(self, 0, priv_size); 35 + self = ((void *)self) + priv_size; 24 36 } 37 + self->start = start; 38 + self->end = start + len - 1; 39 + memcpy(self->name, name, namelen); 25 40 26 41 return self; 27 42 } ··· 181 166 * Well fix up the end later, when we have all sorted. 182 167 */ 183 168 sym = symbol__new(start, 0xdead, line + len + 2, 184 - self->sym_priv_size); 169 + self->sym_priv_size, 0, verbose); 185 170 186 171 if (sym == NULL) 187 172 goto out_delete_line; ··· 287 272 static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, 288 273 GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, 289 274 GElf_Shdr *shdr_dynsym, 290 - size_t dynsym_idx) 275 + size_t dynsym_idx, int verbose) 291 276 { 292 277 uint32_t nr_rel_entries, idx; 293 278 GElf_Sym sym; ··· 350 335 "%s@plt", elf_sym__name(&sym, symstrs)); 351 336 352 337 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 353 - sympltname, self->sym_priv_size); 338 + sympltname, self->sym_priv_size, 0, verbose); 354 339 if (!f) 355 340 return -1; 356 341 ··· 368 353 "%s@plt", elf_sym__name(&sym, symstrs)); 369 354 370 355 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 371 - sympltname, self->sym_priv_size); 356 + sympltname, self->sym_priv_size, 0, verbose); 372 357 if (!f) 373 358 return -1; 374 359 ··· 425 410 if (sec_dynsym != NULL) { 426 411 nr = dso__synthesize_plt_symbols(self, elf, &ehdr, 427 412 sec_dynsym, &shdr, 428 - dynsym_idx); 413 + dynsym_idx, verbose); 429 414 if (nr < 0) 430 415 goto out_elf_end; 431 416 } ··· 459 444 460 445 elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { 461 446 struct symbol *f; 447 + uint64_t obj_start; 462 448 463 449 if (!elf_sym__is_function(&sym)) 464 450 continue; ··· 469 453 goto out_elf_end; 470 454 471 455 gelf_getshdr(sec, &shdr); 456 + obj_start = sym.st_value; 457 + 472 458 sym.st_value -= shdr.sh_addr - shdr.sh_offset; 473 459 474 460 f = symbol__new(sym.st_value, sym.st_size, 475 461 elf_sym__name(&sym, symstrs), 476 - self->sym_priv_size); 462 + self->sym_priv_size, obj_start, verbose); 477 463 if (!f) 478 464 goto out_elf_end; 479 465
+5
Documentation/perf_counter/util/symbol.h
··· 9 9 struct rb_node rb_node; 10 10 __u64 start; 11 11 __u64 end; 12 + __u64 obj_start; 13 + __u64 hist_sum; 14 + __u64 *hist; 12 15 char name[0]; 13 16 }; 14 17 ··· 22 19 struct symbol *(*find_symbol)(struct dso *, uint64_t ip); 23 20 char name[0]; 24 21 }; 22 + 23 + const char *sym_hist_filter; 25 24 26 25 typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); 27 26