Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-4.11-20170213' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Introduce the 'delta-abs' 'perf diff' compute method, that orders the
histogram entries by the absolute value of the percentage delta for a
function in two perf.data files, i.e. the functions that changed the
most (increase or decrease in samples) comes first (Namhyung Kim)

User visible changes:

- Improve message about tweaking the kernel.perf_event_paranoid setting,
telling how to make the change permanent by editing /etc/sysctl.conf
(Arnaldo Carvalho de Melo)

Infrastructure changes:

- Introduce linux/compiler-gcc.h as a counterpart to the kernel's,
initially containing the definition of __fallthrough, more to
come (__maybe_unused, etc) (Arnaldo Carvalho de Melo)

- Fixes for problems uncovered by building tools/perf with clang, such
as always true tests of arrays against NULL and variables that sometimes
were used without being initialized (Arnaldo Carvalho de Melo, Steven Rostedt)

- Before loading a new ELF, clear global variables set by the
samples/bpf loader (Mickaël Salaün)

- Ignore already processed ELF sections in the samples/bpf
loader (Mickaël Salaün)

- Fix compile error in the scripting code with some perl5
versions (Wang YanQing)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+145 -29
+7
samples/bpf/bpf_load.c
··· 277 277 Elf_Data *data, *data_prog, *symbols = NULL; 278 278 char *shname, *shname_prog; 279 279 280 + /* reset global variables */ 281 + kern_version = 0; 282 + memset(license, 0, sizeof(license)); 283 + memset(processed_sec, 0, sizeof(processed_sec)); 284 + 280 285 if (elf_version(EV_CURRENT) == EV_NONE) 281 286 return 1; 282 287 ··· 333 328 334 329 /* load programs that need map fixup (relocations) */ 335 330 for (i = 1; i < ehdr.e_shnum; i++) { 331 + if (processed_sec[i]) 332 + continue; 336 333 337 334 if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) 338 335 continue;
+1
samples/bpf/tracex5_kern.c
··· 8 8 #include <linux/version.h> 9 9 #include <uapi/linux/bpf.h> 10 10 #include <uapi/linux/seccomp.h> 11 + #include <uapi/linux/unistd.h> 11 12 #include "bpf_helpers.h" 12 13 13 14 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
+14
tools/include/linux/compiler-gcc.h
··· 1 + #ifndef _TOOLS_LINUX_COMPILER_H_ 2 + #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." 3 + #endif 4 + 5 + /* 6 + * Common definitions for all gcc versions go here. 7 + */ 8 + #define GCC_VERSION (__GNUC__ * 10000 \ 9 + + __GNUC_MINOR__ * 100 \ 10 + + __GNUC_PATCHLEVEL__) 11 + 12 + #if GCC_VERSION >= 70000 && !defined(__CHECKER__) 13 + # define __fallthrough __attribute__ ((fallthrough)) 14 + #endif
+5 -5
tools/include/linux/compiler.h
··· 1 1 #ifndef _TOOLS_LINUX_COMPILER_H_ 2 2 #define _TOOLS_LINUX_COMPILER_H_ 3 3 4 + #ifdef __GNUC__ 5 + #include <linux/compiler-gcc.h> 6 + #endif 7 + 4 8 /* Optimization barrier */ 5 9 /* The "volatile" is due to gcc bugs */ 6 10 #define barrier() __asm__ __volatile__("": : :"memory") ··· 132 128 133 129 134 130 #ifndef __fallthrough 135 - # if defined(__GNUC__) && __GNUC__ >= 7 136 - # define __fallthrough __attribute__ ((fallthrough)) 137 - # else 138 - # define __fallthrough 139 - # endif 131 + # define __fallthrough 140 132 #endif 141 133 142 134 #endif /* _TOOLS_LINUX_COMPILER_H */
+1
tools/lib/traceevent/kbuffer-parse.c
··· 315 315 extend += delta; 316 316 delta = extend; 317 317 ptr += 4; 318 + length = 0; 318 319 break; 319 320 320 321 case OLD_RINGBUF_TYPE_TIME_STAMP:
+1 -1
tools/lib/traceevent/plugin_function.c
··· 130 130 unsigned long long pfunction; 131 131 const char *func; 132 132 const char *parent; 133 - int index; 133 + int index = 0; 134 134 135 135 if (pevent_get_field_val(s, event, "ip", record, &function, 1)) 136 136 return trace_seq_putc(s, '!');
+12
tools/perf/Documentation/perf-config.txt
··· 498 498 But if this option is 'no-cache', it will not update the build-id cache. 499 499 'skip' skips post-processing and does not update the cache. 500 500 501 + diff.*:: 502 + diff.order:: 503 + This option sets the number of columns to sort the result. 504 + The default is 0, which means sorting by baseline. 505 + Setting it to 1 will sort the result by delta (or other 506 + compute method selected). 507 + 508 + diff.compute:: 509 + This options sets the method for computing the diff result. 510 + Possible values are 'delta', 'delta-abs', 'ratio' and 511 + 'wdiff'. Default is 'delta'. 512 + 501 513 SEE ALSO 502 514 -------- 503 515 linkperf:perf[1]
+12 -3
tools/perf/Documentation/perf-diff.txt
··· 86 86 87 87 -c:: 88 88 --compute:: 89 - Differential computation selection - delta,ratio,wdiff (default is delta). 90 - See COMPARISON METHODS section for more info. 89 + Differential computation selection - delta, ratio, wdiff, delta-abs 90 + (default is delta-abs). Default can be changed using diff.compute 91 + config option. See COMPARISON METHODS section for more info. 91 92 92 93 -p:: 93 94 --period:: ··· 100 99 101 100 -o:: 102 101 --order:: 103 - Specify compute sorting column number. 102 + Specify compute sorting column number. 0 means sorting by baseline 103 + overhead and 1 (default) means sorting by computed value of column 1 104 + (data from the first file other base baseline). Values more than 1 105 + can be used only if enough data files are provided. 106 + The default value can be set using the diff.order config option. 104 107 105 108 --percentage:: 106 109 Determine how to display the overhead percentage of filtered entries. ··· 185 180 - with filtering by -C, -d and/or -S, period_percent might be changed 186 181 relative to how entries are filtered. Use --percentage=absolute to 187 182 prevent such fluctuation. 183 + 184 + delta-abs 185 + ~~~~~~~~~ 186 + Same as 'delta` method, but sort the result with the absolute values. 188 187 189 188 ratio 190 189 ~~~~~
+1
tools/perf/MANIFEST
··· 61 61 tools/include/linux/atomic.h 62 62 tools/include/linux/bitops.h 63 63 tools/include/linux/compiler.h 64 + tools/include/linux/compiler-gcc.h 64 65 tools/include/linux/coresight-pmu.h 65 66 tools/include/linux/filter.h 66 67 tools/include/linux/hash.h
+74 -4
tools/perf/builtin-diff.c
··· 17 17 #include "util/symbol.h" 18 18 #include "util/util.h" 19 19 #include "util/data.h" 20 + #include "util/config.h" 20 21 21 22 #include <stdlib.h> 22 23 #include <math.h> ··· 31 30 PERF_HPP_DIFF__RATIO, 32 31 PERF_HPP_DIFF__WEIGHTED_DIFF, 33 32 PERF_HPP_DIFF__FORMULA, 33 + PERF_HPP_DIFF__DELTA_ABS, 34 34 35 35 PERF_HPP_DIFF__MAX_INDEX 36 36 }; ··· 66 64 static bool show_period; 67 65 static bool show_formula; 68 66 static bool show_baseline_only; 69 - static unsigned int sort_compute; 67 + static unsigned int sort_compute = 1; 70 68 71 69 static s64 compute_wdiff_w1; 72 70 static s64 compute_wdiff_w2; ··· 75 73 COMPUTE_DELTA, 76 74 COMPUTE_RATIO, 77 75 COMPUTE_WEIGHTED_DIFF, 76 + COMPUTE_DELTA_ABS, 78 77 COMPUTE_MAX, 79 78 }; 80 79 81 80 const char *compute_names[COMPUTE_MAX] = { 82 81 [COMPUTE_DELTA] = "delta", 82 + [COMPUTE_DELTA_ABS] = "delta-abs", 83 83 [COMPUTE_RATIO] = "ratio", 84 84 [COMPUTE_WEIGHTED_DIFF] = "wdiff", 85 85 }; 86 86 87 - static int compute; 87 + static int compute = COMPUTE_DELTA_ABS; 88 88 89 89 static int compute_2_hpp[COMPUTE_MAX] = { 90 90 [COMPUTE_DELTA] = PERF_HPP_DIFF__DELTA, 91 + [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, 91 92 [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, 92 93 [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, 93 94 }; ··· 114 109 }, 115 110 [PERF_HPP_DIFF__DELTA] = { 116 111 .name = "Delta", 112 + .width = 7, 113 + }, 114 + [PERF_HPP_DIFF__DELTA_ABS] = { 115 + .name = "Delta Abs", 117 116 .width = 7, 118 117 }, 119 118 [PERF_HPP_DIFF__RATIO] = { ··· 307 298 { 308 299 switch (compute) { 309 300 case COMPUTE_DELTA: 301 + case COMPUTE_DELTA_ABS: 310 302 return formula_delta(he, pair, buf, size); 311 303 case COMPUTE_RATIO: 312 304 return formula_ratio(he, pair, buf, size); ··· 471 461 472 462 switch (compute) { 473 463 case COMPUTE_DELTA: 464 + case COMPUTE_DELTA_ABS: 474 465 compute_delta(he, pair); 475 466 break; 476 467 case COMPUTE_RATIO: ··· 506 495 { 507 496 double l = left->diff.period_ratio_delta; 508 497 double r = right->diff.period_ratio_delta; 498 + 499 + return cmp_doubles(l, r); 500 + } 501 + case COMPUTE_DELTA_ABS: 502 + { 503 + double l = fabs(left->diff.period_ratio_delta); 504 + double r = fabs(right->diff.period_ratio_delta); 509 505 510 506 return cmp_doubles(l, r); 511 507 } ··· 582 564 if (!p_left || !p_right) 583 565 return p_left ? -1 : 1; 584 566 585 - if (c != COMPUTE_DELTA) { 567 + if (c != COMPUTE_DELTA && c != COMPUTE_DELTA_ABS) { 586 568 /* 587 569 * The delta can be computed without the baseline, but 588 570 * others are not. Put those entries which have no ··· 625 607 } 626 608 627 609 static int64_t 610 + hist_entry__cmp_delta_abs(struct perf_hpp_fmt *fmt, 611 + struct hist_entry *left, struct hist_entry *right) 612 + { 613 + struct data__file *d = fmt_to_data_file(fmt); 614 + 615 + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA_ABS, d->idx); 616 + } 617 + 618 + static int64_t 628 619 hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt, 629 620 struct hist_entry *left, struct hist_entry *right) 630 621 { ··· 656 629 struct hist_entry *left, struct hist_entry *right) 657 630 { 658 631 return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA, 632 + sort_compute); 633 + } 634 + 635 + static int64_t 636 + hist_entry__cmp_delta_abs_idx(struct perf_hpp_fmt *fmt __maybe_unused, 637 + struct hist_entry *left, struct hist_entry *right) 638 + { 639 + return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA_ABS, 659 640 sort_compute); 660 641 } 661 642 ··· 810 775 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, 811 776 "Show only items with match in baseline"), 812 777 OPT_CALLBACK('c', "compute", &compute, 813 - "delta,ratio,wdiff:w1,w2 (default delta)", 778 + "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", 814 779 "Entries differential computation selection", 815 780 setup_compute), 816 781 OPT_BOOLEAN('p', "period", &show_period, ··· 980 945 981 946 switch (idx) { 982 947 case PERF_HPP_DIFF__DELTA: 948 + case PERF_HPP_DIFF__DELTA_ABS: 983 949 if (pair->diff.computed) 984 950 diff = pair->diff.period_ratio_delta; 985 951 else ··· 1154 1118 fmt->color = hpp__color_wdiff; 1155 1119 fmt->sort = hist_entry__cmp_wdiff; 1156 1120 break; 1121 + case PERF_HPP_DIFF__DELTA_ABS: 1122 + fmt->color = hpp__color_delta; 1123 + fmt->sort = hist_entry__cmp_delta_abs; 1124 + break; 1157 1125 default: 1158 1126 fmt->sort = hist_entry__cmp_nop; 1159 1127 break; ··· 1235 1195 case COMPUTE_WEIGHTED_DIFF: 1236 1196 fmt->sort = hist_entry__cmp_wdiff_idx; 1237 1197 break; 1198 + case COMPUTE_DELTA_ABS: 1199 + fmt->sort = hist_entry__cmp_delta_abs_idx; 1200 + break; 1238 1201 default: 1239 1202 BUG_ON(1); 1240 1203 } ··· 1292 1249 return 0; 1293 1250 } 1294 1251 1252 + static int diff__config(const char *var, const char *value, 1253 + void *cb __maybe_unused) 1254 + { 1255 + if (!strcmp(var, "diff.order")) { 1256 + sort_compute = perf_config_int(var, value); 1257 + return 0; 1258 + } 1259 + if (!strcmp(var, "diff.compute")) { 1260 + if (!strcmp(value, "delta")) { 1261 + compute = COMPUTE_DELTA; 1262 + } else if (!strcmp(value, "delta-abs")) { 1263 + compute = COMPUTE_DELTA_ABS; 1264 + } else if (!strcmp(value, "ratio")) { 1265 + compute = COMPUTE_RATIO; 1266 + } else if (!strcmp(value, "wdiff")) { 1267 + compute = COMPUTE_WEIGHTED_DIFF; 1268 + } else { 1269 + pr_err("Invalid compute method: %s\n", value); 1270 + return -1; 1271 + } 1272 + } 1273 + 1274 + return 0; 1275 + } 1276 + 1295 1277 int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) 1296 1278 { 1297 1279 int ret = hists__init(); 1298 1280 1299 1281 if (ret < 0) 1300 1282 return ret; 1283 + 1284 + perf_config(diff__config, NULL); 1301 1285 1302 1286 argc = parse_options(argc, argv, options, diff_usage, 0); 1303 1287
+2 -2
tools/perf/builtin-kmem.c
··· 1065 1065 1066 1066 data = rb_entry(next, struct page_stat, node); 1067 1067 sym = machine__find_kernel_function(machine, data->callsite, &map); 1068 - if (sym && sym->name) 1068 + if (sym) 1069 1069 caller = sym->name; 1070 1070 else 1071 1071 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); ··· 1107 1107 1108 1108 data = rb_entry(next, struct page_stat, node); 1109 1109 sym = machine__find_kernel_function(machine, data->callsite, &map); 1110 - if (sym && sym->name) 1110 + if (sym) 1111 1111 caller = sym->name; 1112 1112 else 1113 1113 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+1 -1
tools/perf/builtin-record.c
··· 418 418 419 419 static int record__open(struct record *rec) 420 420 { 421 - char msg[512]; 421 + char msg[BUFSIZ]; 422 422 struct perf_evsel *pos; 423 423 struct perf_evlist *evlist = rec->evlist; 424 424 struct perf_session *session = rec->session;
+1 -1
tools/perf/builtin-sched.c
··· 2067 2067 break; 2068 2068 2069 2069 sym = node->sym; 2070 - if (sym && sym->name) { 2070 + if (sym) { 2071 2071 if (!strcmp(sym->name, "schedule") || 2072 2072 !strcmp(sym->name, "__schedule") || 2073 2073 !strcmp(sym->name, "preempt_schedule"))
+1 -1
tools/perf/builtin-stat.c
··· 533 533 static int __run_perf_stat(int argc, const char **argv) 534 534 { 535 535 int interval = stat_config.interval; 536 - char msg[512]; 536 + char msg[BUFSIZ]; 537 537 unsigned long long t0, t1; 538 538 struct perf_evsel *counter; 539 539 struct timespec ts;
+1 -1
tools/perf/builtin-top.c
··· 859 859 860 860 static int perf_top__start_counters(struct perf_top *top) 861 861 { 862 - char msg[512]; 862 + char msg[BUFSIZ]; 863 863 struct perf_evsel *counter; 864 864 struct perf_evlist *evlist = top->evlist; 865 865 struct record_opts *opts = &top->record_opts;
+1 -1
tools/perf/tests/perf-record.c
··· 66 66 if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */ 67 67 evlist = perf_evlist__new_default(); 68 68 69 - if (evlist == NULL || argv == NULL) { 69 + if (evlist == NULL) { 70 70 pr_debug("Not enough memory to create evlist\n"); 71 71 goto out; 72 72 }
+3 -1
tools/perf/util/evsel.c
··· 2469 2469 " -1: Allow use of (almost) all events by all users\n" 2470 2470 ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" 2471 2471 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" 2472 - ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", 2472 + ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" 2473 + "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" 2474 + " kernel.perf_event_paranoid = -1\n" , 2473 2475 target->system_wide ? "system-wide " : "", 2474 2476 perf_event_paranoid()); 2475 2477 case ENOENT:
-1
tools/perf/util/evsel_fprintf.c
··· 168 168 169 169 if (symbol_conf.bt_stop_list && 170 170 node->sym && 171 - node->sym->name && 172 171 strlist__has_entry(symbol_conf.bt_stop_list, 173 172 node->sym->name)) { 174 173 break;
+1 -1
tools/perf/util/machine.c
··· 1565 1565 1566 1566 static bool symbol__match_regex(struct symbol *sym, regex_t *regex) 1567 1567 { 1568 - if (sym->name && !regexec(regex, sym->name, 0, NULL, 0)) 1568 + if (!regexec(regex, sym->name, 0, NULL, 0)) 1569 1569 return 1; 1570 1570 return 0; 1571 1571 }
+2 -2
tools/perf/util/map.c
··· 387 387 { 388 388 const char *dsoname = "[unknown]"; 389 389 390 - if (map && map->dso && (map->dso->name || map->dso->long_name)) { 390 + if (map && map->dso) { 391 391 if (symbol_conf.show_kernel_path && map->dso->long_name) 392 392 dsoname = map->dso->long_name; 393 - else if (map->dso->name) 393 + else 394 394 dsoname = map->dso->name; 395 395 } 396 396
+1 -1
tools/perf/util/scripting-engines/Build
··· 1 1 libperf-$(CONFIG_LIBPERL) += trace-event-perl.o 2 2 libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o 3 3 4 - CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default 4 + CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default 5 5 6 6 CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow
+2 -2
tools/perf/util/scripting-engines/trace-event-perl.c
··· 309 309 if (node->map) { 310 310 struct map *map = node->map; 311 311 const char *dsoname = "[unknown]"; 312 - if (map && map->dso && (map->dso->name || map->dso->long_name)) { 312 + if (map && map->dso) { 313 313 if (symbol_conf.show_kernel_path && map->dso->long_name) 314 314 dsoname = map->dso->long_name; 315 - else if (map->dso->name) 315 + else 316 316 dsoname = map->dso->name; 317 317 } 318 318 if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+1 -1
tools/perf/util/symbol_fprintf.c
··· 21 21 unsigned long offset; 22 22 size_t length; 23 23 24 - if (sym && sym->name) { 24 + if (sym) { 25 25 length = fprintf(fp, "%s", sym->name); 26 26 if (al && print_offsets) { 27 27 if (al->addr < sym->end)