Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-20161028' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Support matching by topic in 'perf list' (Andi Kleen)

User visible:

- Apply cpu color only when there was activity in 'perf sched map' (Namhyung Kim)

- Always show the task's COMM in 'perf sched map -v' (Namhyung Kim)

- Fix hierarchy column counts in the perf hist browser (top, report), avoiding
showing nothing after pressing the RIGHT key a number of times (Namhyung Kim)

Infrastructure:

- Support cascading options in libsubcmd and use it to share common options in
'perf sched' subcommands (Namhyung Kim)

- Avoid worker cacheline bouncing in 'perf bench futex' (Davidlohr Bueso)

- Sanitize numeric parameters in 'perf bench futex' (Davidlohr Bueso)

- Update copies of kernel files (Arnaldo Carvalho de Melo)

- Fix scripting (perl, python) setup to avoid leaks (Arnaldo Carvalho de Melo)

- Add missing object file to the python binding linkage list (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+128 -51
+5
tools/include/uapi/asm-generic/mman-common.h
··· 72 72 #define MAP_HUGE_SHIFT 26 73 73 #define MAP_HUGE_MASK 0x3f 74 74 75 + #define PKEY_DISABLE_ACCESS 0x1 76 + #define PKEY_DISABLE_WRITE 0x2 77 + #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ 78 + PKEY_DISABLE_WRITE) 79 + 75 80 #endif /* __ASM_GENERIC_MMAN_COMMON_H */
+14
tools/lib/subcmd/parse-options.c
··· 314 314 315 315 static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) 316 316 { 317 + retry: 317 318 for (; options->type != OPTION_END; options++) { 318 319 if (options->short_name == *p->opt) { 319 320 p->opt = p->opt[1] ? p->opt + 1 : NULL; 320 321 return get_value(p, options, OPT_SHORT); 321 322 } 322 323 } 324 + 325 + if (options->parent) { 326 + options = options->parent; 327 + goto retry; 328 + } 329 + 323 330 return -2; 324 331 } 325 332 ··· 340 333 if (!arg_end) 341 334 arg_end = arg + strlen(arg); 342 335 336 + retry: 343 337 for (; options->type != OPTION_END; options++) { 344 338 const char *rest; 345 339 int flags = 0; ··· 434 426 } 435 427 if (abbrev_option) 436 428 return get_value(p, abbrev_option, abbrev_flags); 429 + 430 + if (options->parent) { 431 + options = options->parent; 432 + goto retry; 433 + } 434 + 437 435 return -2; 438 436 } 439 437
+2
tools/lib/subcmd/parse-options.h
··· 109 109 intptr_t defval; 110 110 bool *set; 111 111 void *data; 112 + const struct option *parent; 112 113 }; 113 114 114 115 #define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) 115 116 116 117 #define OPT_END() { .type = OPTION_END } 118 + #define OPT_PARENT(p) { .type = OPTION_END, .parent = (p) } 117 119 #define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } 118 120 #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } 119 121 #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) }
+2 -2
tools/perf/Makefile.perf
··· 381 381 (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ 382 382 || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true 383 383 @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ 384 - (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ 384 + (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ 385 385 || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true 386 386 @(test -f ../../arch/x86/lib/memset_64.S && ( \ 387 - (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ 387 + (diff -B -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ 388 388 || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true 389 389 @(test -f ../../arch/arm/include/uapi/asm/perf_regs.h && ( \ 390 390 (diff -B ../arch/arm/include/uapi/asm/perf_regs.h ../../arch/arm/include/uapi/asm/perf_regs.h >/dev/null) \
+3
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
··· 335 335 326 common copy_file_range sys_copy_file_range 336 336 327 64 preadv2 sys_preadv2 337 337 328 64 pwritev2 sys_pwritev2 338 + 329 common pkey_mprotect sys_pkey_mprotect 339 + 330 common pkey_alloc sys_pkey_alloc 340 + 331 common pkey_free sys_pkey_free 338 341 339 342 # 340 343 # x32-specific system call numbers start at 512 to avoid cache impact
+9 -6
tools/perf/bench/futex-hash.c
··· 39 39 static struct stats throughput_stats; 40 40 static pthread_cond_t thread_parent, thread_worker; 41 41 42 - #define SMP_CACHE_BYTES 256 43 - #define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES))) 44 - 45 42 struct worker { 46 43 int tid; 47 44 u_int32_t *futex; 48 45 pthread_t thread; 49 46 unsigned long ops; 50 - } __cacheline_aligned; 47 + }; 51 48 52 49 static const struct option options[] = { 53 50 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), ··· 63 66 static void *workerfn(void *arg) 64 67 { 65 68 int ret; 66 - unsigned int i; 67 69 struct worker *w = (struct worker *) arg; 70 + unsigned int i; 71 + unsigned long ops = w->ops; /* avoid cacheline bouncing */ 68 72 69 73 pthread_mutex_lock(&thread_lock); 70 74 threads_starting--; ··· 75 77 pthread_mutex_unlock(&thread_lock); 76 78 77 79 do { 78 - for (i = 0; i < nfutexes; i++, w->ops++) { 80 + for (i = 0; i < nfutexes; i++, ops++) { 79 81 /* 80 82 * We want the futex calls to fail in order to stress 81 83 * the hashing of uaddr and not measure other steps, ··· 89 91 } 90 92 } while (!done); 91 93 94 + w->ops = ops; 92 95 return NULL; 93 96 } 94 97 ··· 130 131 } 131 132 132 133 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 134 + nsecs = futexbench_sanitize_numeric(nsecs); 135 + nfutexes = futexbench_sanitize_numeric(nfutexes); 133 136 134 137 sigfillset(&act.sa_mask); 135 138 act.sa_sigaction = toggle_done; ··· 139 138 140 139 if (!nthreads) /* default to the number of CPUs */ 141 140 nthreads = ncpus; 141 + else 142 + nthreads = futexbench_sanitize_numeric(nthreads); 142 143 143 144 worker = calloc(nthreads, sizeof(*worker)); 144 145 if (!worker)
+6 -1
tools/perf/bench/futex-lock-pi.c
··· 75 75 static void *workerfn(void *arg) 76 76 { 77 77 struct worker *w = (struct worker *) arg; 78 + unsigned long ops = w->ops; 78 79 79 80 pthread_mutex_lock(&thread_lock); 80 81 threads_starting--; ··· 104 103 if (ret && !silent) 105 104 warn("thread %d: Could not unlock pi-lock for %p (%d)", 106 105 w->tid, w->futex, ret); 107 - w->ops++; /* account for thread's share of work */ 106 + ops++; /* account for thread's share of work */ 108 107 } while (!done); 109 108 109 + w->ops = ops; 110 110 return NULL; 111 111 } 112 112 ··· 152 150 goto err; 153 151 154 152 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 153 + nsecs = futexbench_sanitize_numeric(nsecs); 155 154 156 155 sigfillset(&act.sa_mask); 157 156 act.sa_sigaction = toggle_done; ··· 160 157 161 158 if (!nthreads) 162 159 nthreads = ncpus; 160 + else 161 + nthreads = futexbench_sanitize_numeric(nthreads); 163 162 164 163 worker = calloc(nthreads, sizeof(*worker)); 165 164 if (!worker)
+2
tools/perf/bench/futex-requeue.c
··· 128 128 129 129 if (!nthreads) 130 130 nthreads = ncpus; 131 + else 132 + nthreads = futexbench_sanitize_numeric(nthreads); 131 133 132 134 worker = calloc(nthreads, sizeof(*worker)); 133 135 if (!worker)
+4
tools/perf/bench/futex-wake-parallel.c
··· 217 217 sigaction(SIGINT, &act, NULL); 218 218 219 219 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 220 + nwaking_threads = futexbench_sanitize_numeric(nwaking_threads); 221 + 220 222 if (!nblocked_threads) 221 223 nblocked_threads = ncpus; 224 + else 225 + nblocked_threads = futexbench_sanitize_numeric(nblocked_threads); 222 226 223 227 /* some sanity checks */ 224 228 if (nwaking_threads > nblocked_threads || !nwaking_threads)
+3
tools/perf/bench/futex-wake.c
··· 129 129 } 130 130 131 131 ncpus = sysconf(_SC_NPROCESSORS_ONLN); 132 + nwakes = futexbench_sanitize_numeric(nwakes); 132 133 133 134 sigfillset(&act.sa_mask); 134 135 act.sa_sigaction = toggle_done; ··· 137 136 138 137 if (!nthreads) 139 138 nthreads = ncpus; 139 + else 140 + nthreads = futexbench_sanitize_numeric(nthreads); 140 141 141 142 worker = calloc(nthreads, sizeof(*worker)); 142 143 if (!worker)
+4
tools/perf/bench/futex.h
··· 7 7 #ifndef _FUTEX_H 8 8 #define _FUTEX_H 9 9 10 + #include <stdlib.h> 10 11 #include <unistd.h> 11 12 #include <sys/syscall.h> 12 13 #include <sys/types.h> ··· 99 98 return 0; 100 99 } 101 100 #endif 101 + 102 + /* User input sanitation */ 103 + #define futexbench_sanitize_numeric(__n) abs((__n)) 102 104 103 105 #endif /* _FUTEX_H */
+21 -17
tools/perf/builtin-sched.c
··· 1191 1191 int i; 1192 1192 int ret; 1193 1193 u64 avg; 1194 + char max_lat_at[32]; 1194 1195 1195 1196 if (!work_list->nb_atoms) 1196 1197 return; ··· 1213 1212 printf(" "); 1214 1213 1215 1214 avg = work_list->total_lat / work_list->nb_atoms; 1215 + timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); 1216 1216 1217 - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n", 1217 + printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", 1218 1218 (double)work_list->total_runtime / NSEC_PER_MSEC, 1219 1219 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, 1220 1220 (double)work_list->max_lat / NSEC_PER_MSEC, 1221 - (double)work_list->max_lat_at / NSEC_PER_SEC); 1221 + max_lat_at); 1222 1222 } 1223 1223 1224 1224 static int pid_cmp(struct work_atoms *l, struct work_atoms *r) ··· 1404 1402 int cpus_nr; 1405 1403 bool new_cpu = false; 1406 1404 const char *color = PERF_COLOR_NORMAL; 1405 + char stimestamp[32]; 1407 1406 1408 1407 BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); 1409 1408 ··· 1482 1479 cpu_color = COLOR_CPUS; 1483 1480 1484 1481 if (cpu != this_cpu) 1485 - color_fprintf(stdout, cpu_color, " "); 1482 + color_fprintf(stdout, color, " "); 1486 1483 else 1487 1484 color_fprintf(stdout, cpu_color, "*"); 1488 1485 ··· 1495 1492 if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) 1496 1493 goto out; 1497 1494 1498 - color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp / NSEC_PER_SEC); 1499 - if (new_shortname) { 1495 + timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); 1496 + color_fprintf(stdout, color, " %12s secs ", stimestamp); 1497 + if (new_shortname || (verbose && sched_in->tid)) { 1500 1498 const char *pid_color = color; 1501 1499 1502 1500 if (thread__has_color(sched_in)) ··· 1958 1954 .next_shortname2 = '0', 1959 1955 .skip_merge = 0, 1960 1956 }; 1957 + const struct option sched_options[] = { 1958 + OPT_STRING('i', "input", &input_name, "file", 1959 + "input file name"), 1960 + OPT_INCR('v', "verbose", &verbose, 1961 + "be more verbose (show symbol address, etc)"), 1962 + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1963 + "dump raw trace in ASCII"), 1964 + OPT_END() 1965 + }; 1961 1966 const struct option latency_options[] = { 1962 1967 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 1963 1968 "sort by key(s): runtime, switch, avg, max"), ··· 1978 1965 "dump raw trace in ASCII"), 1979 1966 OPT_BOOLEAN('p', "pids", &sched.skip_merge, 1980 1967 "latency stats per pid instead of per comm"), 1981 - OPT_END() 1968 + OPT_PARENT(sched_options) 1982 1969 }; 1983 1970 const struct option replay_options[] = { 1984 1971 OPT_UINTEGER('r', "repeat", &sched.replay_repeat, ··· 1988 1975 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1989 1976 "dump raw trace in ASCII"), 1990 1977 OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), 1991 - OPT_END() 1992 - }; 1993 - const struct option sched_options[] = { 1994 - OPT_STRING('i', "input", &input_name, "file", 1995 - "input file name"), 1996 - OPT_INCR('v', "verbose", &verbose, 1997 - "be more verbose (show symbol address, etc)"), 1998 - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1999 - "dump raw trace in ASCII"), 2000 - OPT_END() 1978 + OPT_PARENT(sched_options) 2001 1979 }; 2002 1980 const struct option map_options[] = { 2003 1981 OPT_BOOLEAN(0, "compact", &sched.map.comp, ··· 1999 1995 "highlight given CPUs in map"), 2000 1996 OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus", 2001 1997 "display given CPUs in map"), 2002 - OPT_END() 1998 + OPT_PARENT(sched_options) 2003 1999 }; 2004 2000 const char * const latency_usage[] = { 2005 2001 "perf sched latency [<options>]",
+6 -4
tools/perf/builtin-script.c
··· 441 441 { 442 442 struct perf_event_attr *attr = &evsel->attr; 443 443 unsigned long secs; 444 - unsigned long usecs; 445 444 unsigned long long nsecs; 446 445 447 446 if (PRINT_FIELD(COMM)) { ··· 470 471 nsecs = sample->time; 471 472 secs = nsecs / NSEC_PER_SEC; 472 473 nsecs -= secs * NSEC_PER_SEC; 473 - usecs = nsecs / NSEC_PER_USEC; 474 + 474 475 if (nanosecs) 475 476 printf("%5lu.%09llu: ", secs, nsecs); 476 - else 477 - printf("%5lu.%06lu: ", secs, usecs); 477 + else { 478 + char sample_time[32]; 479 + timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); 480 + printf("%12s: ", sample_time); 481 + } 478 482 } 479 483 } 480 484
+14 -1
tools/perf/ui/browsers/hists.c
··· 2076 2076 browser->b.use_navkeypressed = true; 2077 2077 browser->show_headers = symbol_conf.show_hist_headers; 2078 2078 2079 - hists__for_each_format(hists, fmt) 2079 + if (symbol_conf.report_hierarchy) { 2080 + struct perf_hpp_list_node *fmt_node; 2081 + 2082 + /* count overhead columns (in the first node) */ 2083 + fmt_node = list_first_entry(&hists->hpp_formats, 2084 + struct perf_hpp_list_node, list); 2085 + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) 2086 + ++browser->b.columns; 2087 + 2088 + /* add a single column for whole hierarchy sort keys*/ 2080 2089 ++browser->b.columns; 2090 + } else { 2091 + hists__for_each_format(hists, fmt) 2092 + ++browser->b.columns; 2093 + } 2081 2094 2082 2095 hists__reset_column_width(hists); 2083 2096 }
+1 -1
tools/perf/util/parse-branch-options.c
··· 64 64 } 65 65 if (!br->name) { 66 66 ret = -1; 67 - ui__warning("unknown branch filter %s," 67 + pr_warning("unknown branch filter %s," 68 68 " check man page\n", s); 69 69 goto error; 70 70 }
+3 -1
tools/perf/util/pmu.c
··· 1141 1141 if (event_glob != NULL && 1142 1142 !(strglobmatch_nocase(name, event_glob) || 1143 1143 (!is_cpu && strglobmatch_nocase(alias->name, 1144 - event_glob)))) 1144 + event_glob)) || 1145 + (alias->topic && 1146 + strglobmatch_nocase(alias->topic, event_glob)))) 1145 1147 continue; 1146 1148 1147 1149 if (is_cpu && !name_only && !alias->desc)
+1
tools/perf/util/python-ext-sources
··· 18 18 util/util.c 19 19 util/xyarray.c 20 20 util/cgroup.c 21 + util/parse-branch-options.c 21 22 util/rblist.c 22 23 util/counts.c 23 24 util/strlist.c
+17 -18
tools/perf/util/trace-event-scripting.c
··· 25 25 #include <errno.h> 26 26 27 27 #include "../perf.h" 28 + #include "debug.h" 28 29 #include "util.h" 29 30 #include "trace-event.h" 30 31 ··· 87 86 88 87 static void register_python_scripting(struct scripting_ops *scripting_ops) 89 88 { 90 - int err; 91 - err = script_spec_register("Python", scripting_ops); 92 - if (err) 93 - die("error registering Python script extension"); 89 + if (scripting_context == NULL) 90 + scripting_context = malloc(sizeof(*scripting_context)); 94 91 95 - err = script_spec_register("py", scripting_ops); 96 - if (err) 97 - die("error registering py script extension"); 98 - 99 - scripting_context = malloc(sizeof(struct scripting_context)); 92 + if (scripting_context == NULL || 93 + script_spec_register("Python", scripting_ops) || 94 + script_spec_register("py", scripting_ops)) { 95 + pr_err("Error registering Python script extension: disabling it\n"); 96 + zfree(&scripting_context); 97 + } 100 98 } 101 99 102 100 #ifdef NO_LIBPYTHON ··· 150 150 151 151 static void register_perl_scripting(struct scripting_ops *scripting_ops) 152 152 { 153 - int err; 154 - err = script_spec_register("Perl", scripting_ops); 155 - if (err) 156 - die("error registering Perl script extension"); 153 + if (scripting_context == NULL) 154 + scripting_context = malloc(sizeof(*scripting_context)); 157 155 158 - err = script_spec_register("pl", scripting_ops); 159 - if (err) 160 - die("error registering pl script extension"); 161 - 162 - scripting_context = malloc(sizeof(struct scripting_context)); 156 + if (scripting_context == NULL || 157 + script_spec_register("Perl", scripting_ops) || 158 + script_spec_register("pl", scripting_ops)) { 159 + pr_err("Error registering Perl script extension: disabling it\n"); 160 + zfree(&scripting_context); 161 + } 163 162 } 164 163 165 164 #ifdef NO_LIBPERL
+8
tools/perf/util/util.c
··· 433 433 return 0; 434 434 } 435 435 436 + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) 437 + { 438 + u64 sec = timestamp / NSEC_PER_SEC; 439 + u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; 440 + 441 + return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); 442 + } 443 + 436 444 unsigned long parse_tag_value(const char *str, struct parse_tag *tags) 437 445 { 438 446 struct parse_tag *i = tags;
+3
tools/perf/util/util.h
··· 362 362 #endif 363 363 364 364 int is_printable_array(char *p, unsigned int len); 365 + 366 + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); 367 + 365 368 #endif /* GIT_COMPAT_UTIL_H */