Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-20160303' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes:

User visible changes:

- Check existence of frontend/backed stalled cycles in 'perf stat' (Andi Kleen)

- Implement CSV metrics output in 'perf stat' (Andi Kleen)

- Support metrics in 'perf stat' --per-core/socket mode (Andi Kleen)

- Avoid installing .o files from tools/lib/ into the python extension (Jiri Olsa)

- Rename the tracepoint '/format' field that carries the syscall ID from 'nr',
that is also the name of some syscalls arguments, to "__syscall_nr", to
avoid having multiple fields with the same name, that was breaking the
python script skeleton generator from perf.data files (Taeung Song)

- Support converting data from bpf events in 'perf data' (Wang Nan)

- Fix segfault in 'perf test' hists related entries (Arnaldo Carvalho de Melo)

- Fix output of %llu for 64 bit values read on 32 bit machines in libtraceevent (Steven Rostedt)

- Fix time stamp rounding issue in libtraceevent (Chaos.Chen)

Infrastructure changes:

- Fix setlocale() breakage in the pmu parsing code (Jiri Olsa)

- Split libtraceevent's pevent_print_event() (Steven Rostedt)

- Librarize some 'perf record' bits to allow handling multiple perf.data
files per session (Wang Nan)

- Ensure return non-zero rc when mmap fails in 'perf record' (Wang Nan)

- Fix double free on 'command_line' in a error path in 'perf script' (Colin Ian King)

- Initialize struct sigaction 'sa_flags' field in a 'perf test' entry (Colin Ian King)

- Fix various build warnings in turbostat, detected with gcc6 (Colin Ian King)

- Use .s extension for preprocessed assembler code (Masahiro Yamada)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+567 -150
+9 -7
kernel/trace/trace_syscalls.c
··· 186 186 187 187 extern char *__bad_type_size(void); 188 188 189 - #define SYSCALL_FIELD(type, name) \ 190 - sizeof(type) != sizeof(trace.name) ? \ 189 + #define SYSCALL_FIELD(type, field, name) \ 190 + sizeof(type) != sizeof(trace.field) ? \ 191 191 __bad_type_size() : \ 192 - #type, #name, offsetof(typeof(trace), name), \ 193 - sizeof(trace.name), is_signed_type(type) 192 + #type, #name, offsetof(typeof(trace), field), \ 193 + sizeof(trace.field), is_signed_type(type) 194 194 195 195 static int __init 196 196 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) ··· 261 261 int i; 262 262 int offset = offsetof(typeof(trace), args); 263 263 264 - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 264 + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), 265 + FILTER_OTHER); 265 266 if (ret) 266 267 return ret; 267 268 ··· 282 281 struct syscall_trace_exit trace; 283 282 int ret; 284 283 285 - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); 284 + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), 285 + FILTER_OTHER); 286 286 if (ret) 287 287 return ret; 288 288 289 - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 289 + ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), 290 290 FILTER_OTHER); 291 291 292 292 return ret;
+1 -1
tools/build/Makefile.build
··· 85 85 $(call rule_mkdir) 86 86 $(call if_changed_dep,cc_i_c) 87 87 88 - $(OUTPUT)%.i: %.S FORCE 88 + $(OUTPUT)%.s: %.S FORCE 89 89 $(call rule_mkdir) 90 90 $(call if_changed_dep,cc_i_c) 91 91
+113 -33
tools/lib/traceevent/event-parse.c
··· 2635 2635 2636 2636 free_field: 2637 2637 free_arg(arg->hex.field); 2638 + arg->hex.field = NULL; 2638 2639 out: 2639 2640 *tok = NULL; 2640 2641 return EVENT_ERROR; ··· 2660 2659 2661 2660 free_size: 2662 2661 free_arg(arg->int_array.count); 2662 + arg->int_array.count = NULL; 2663 2663 free_field: 2664 2664 free_arg(arg->int_array.field); 2665 + arg->int_array.field = NULL; 2665 2666 out: 2666 2667 *tok = NULL; 2667 2668 return EVENT_ERROR; ··· 4978 4975 break; 4979 4976 } 4980 4977 } 4981 - if (pevent->long_size == 8 && ls && 4978 + if (pevent->long_size == 8 && ls == 1 && 4982 4979 sizeof(long) != 8) { 4983 4980 char *p; 4984 4981 ··· 5342 5339 return false; 5343 5340 } 5344 5341 5345 - void pevent_print_event(struct pevent *pevent, struct trace_seq *s, 5346 - struct pevent_record *record, bool use_trace_clock) 5342 + /** 5343 + * pevent_find_event_by_record - return the event from a given record 5344 + * @pevent: a handle to the pevent 5345 + * @record: The record to get the event from 5346 + * 5347 + * Returns the associated event for a given record, or NULL if non is 5348 + * is found. 5349 + */ 5350 + struct event_format * 5351 + pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record) 5347 5352 { 5348 - static const char *spaces = " "; /* 20 spaces */ 5349 - struct event_format *event; 5353 + int type; 5354 + 5355 + if (record->size < 0) { 5356 + do_warning("ug! negative record size %d", record->size); 5357 + return NULL; 5358 + } 5359 + 5360 + type = trace_parse_common_type(pevent, record->data); 5361 + 5362 + return pevent_find_event(pevent, type); 5363 + } 5364 + 5365 + /** 5366 + * pevent_print_event_task - Write the event task comm, pid and CPU 5367 + * @pevent: a handle to the pevent 5368 + * @s: the trace_seq to write to 5369 + * @event: the handle to the record's event 5370 + * @record: The record to get the event from 5371 + * 5372 + * Writes the tasks comm, pid and CPU to @s. 5373 + */ 5374 + void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, 5375 + struct event_format *event, 5376 + struct pevent_record *record) 5377 + { 5378 + void *data = record->data; 5379 + const char *comm; 5380 + int pid; 5381 + 5382 + pid = parse_common_pid(pevent, data); 5383 + comm = find_cmdline(pevent, pid); 5384 + 5385 + if (pevent->latency_format) { 5386 + trace_seq_printf(s, "%8.8s-%-5d %3d", 5387 + comm, pid, record->cpu); 5388 + } else 5389 + trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); 5390 + } 5391 + 5392 + /** 5393 + * pevent_print_event_time - Write the event timestamp 5394 + * @pevent: a handle to the pevent 5395 + * @s: the trace_seq to write to 5396 + * @event: the handle to the record's event 5397 + * @record: The record to get the event from 5398 + * @use_trace_clock: Set to parse according to the @pevent->trace_clock 5399 + * 5400 + * Writes the timestamp of the record into @s. 5401 + */ 5402 + void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, 5403 + struct event_format *event, 5404 + struct pevent_record *record, 5405 + bool use_trace_clock) 5406 + { 5350 5407 unsigned long secs; 5351 5408 unsigned long usecs; 5352 5409 unsigned long nsecs; 5353 - const char *comm; 5354 - void *data = record->data; 5355 - int type; 5356 - int pid; 5357 - int len; 5358 5410 int p; 5359 5411 bool use_usec_format; 5360 5412 ··· 5420 5362 nsecs = record->ts - secs * NSECS_PER_SEC; 5421 5363 } 5422 5364 5423 - if (record->size < 0) { 5424 - do_warning("ug! negative record size %d", record->size); 5425 - return; 5426 - } 5427 - 5428 - type = trace_parse_common_type(pevent, data); 5429 - 5430 - event = pevent_find_event(pevent, type); 5431 - if (!event) { 5432 - do_warning("ug! no event found for type %d", type); 5433 - return; 5434 - } 5435 - 5436 - pid = parse_common_pid(pevent, data); 5437 - comm = find_cmdline(pevent, pid); 5438 - 5439 5365 if (pevent->latency_format) { 5440 - trace_seq_printf(s, "%8.8s-%-5d %3d", 5441 - comm, pid, record->cpu); 5366 + trace_seq_printf(s, " %3d", record->cpu); 5442 5367 pevent_data_lat_fmt(pevent, s, record); 5443 5368 } else 5444 - trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); 5369 + trace_seq_printf(s, " [%03d]", record->cpu); 5445 5370 5446 5371 if (use_usec_format) { 5447 5372 if (pevent->flags & PEVENT_NSEC_OUTPUT) { ··· 5432 5391 p = 9; 5433 5392 } else { 5434 5393 usecs = (nsecs + 500) / NSECS_PER_USEC; 5394 + /* To avoid usecs larger than 1 sec */ 5395 + if (usecs >= 1000000) { 5396 + usecs -= 1000000; 5397 + secs++; 5398 + } 5435 5399 p = 6; 5436 5400 } 5437 5401 5438 - trace_seq_printf(s, " %5lu.%0*lu: %s: ", 5439 - secs, p, usecs, event->name); 5402 + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); 5440 5403 } else 5441 - trace_seq_printf(s, " %12llu: %s: ", 5442 - record->ts, event->name); 5404 + trace_seq_printf(s, " %12llu:", record->ts); 5405 + } 5406 + 5407 + /** 5408 + * pevent_print_event_data - Write the event data section 5409 + * @pevent: a handle to the pevent 5410 + * @s: the trace_seq to write to 5411 + * @event: the handle to the record's event 5412 + * @record: The record to get the event from 5413 + * 5414 + * Writes the parsing of the record's data to @s. 5415 + */ 5416 + void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, 5417 + struct event_format *event, 5418 + struct pevent_record *record) 5419 + { 5420 + static const char *spaces = " "; /* 20 spaces */ 5421 + int len; 5422 + 5423 + trace_seq_printf(s, " %s: ", event->name); 5443 5424 5444 5425 /* Space out the event names evenly. */ 5445 5426 len = strlen(event->name); ··· 5469 5406 trace_seq_printf(s, "%.*s", 20 - len, spaces); 5470 5407 5471 5408 pevent_event_info(s, event, record); 5409 + } 5410 + 5411 + void pevent_print_event(struct pevent *pevent, struct trace_seq *s, 5412 + struct pevent_record *record, bool use_trace_clock) 5413 + { 5414 + struct event_format *event; 5415 + 5416 + event = pevent_find_event_by_record(pevent, record); 5417 + if (!event) { 5418 + do_warning("ug! no event found for type %d", 5419 + trace_parse_common_type(pevent, record->data)); 5420 + return; 5421 + } 5422 + 5423 + pevent_print_event_task(pevent, s, event, record); 5424 + pevent_print_event_time(pevent, s, event, record, use_trace_clock); 5425 + pevent_print_event_data(pevent, s, event, record); 5472 5426 } 5473 5427 5474 5428 static int events_id_cmp(const void *a, const void *b)
+13
tools/lib/traceevent/event-parse.h
··· 628 628 unsigned long long addr); 629 629 int pevent_pid_is_registered(struct pevent *pevent, int pid); 630 630 631 + void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, 632 + struct event_format *event, 633 + struct pevent_record *record); 634 + void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, 635 + struct event_format *event, 636 + struct pevent_record *record, 637 + bool use_trace_clock); 638 + void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, 639 + struct event_format *event, 640 + struct pevent_record *record); 631 641 void pevent_print_event(struct pevent *pevent, struct trace_seq *s, 632 642 struct pevent_record *record, bool use_trace_clock); 633 643 ··· 703 693 704 694 struct event_format * 705 695 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); 696 + 697 + struct event_format * 698 + pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record); 706 699 707 700 void pevent_data_lat_fmt(struct pevent *pevent, 708 701 struct trace_seq *s, struct pevent_record *record);
+1
tools/perf/arch/x86/tests/rdpmc.c
··· 103 103 104 104 sigfillset(&sa.sa_mask); 105 105 sa.sa_sigaction = segfault_handler; 106 + sa.sa_flags = 0; 106 107 sigaction(SIGSEGV, &sa, NULL); 107 108 108 109 fd = sys_perf_event_open(&attr, 0, -1, -1,
+99 -69
tools/perf/builtin-record.c
··· 33 33 #include "util/parse-regs-options.h" 34 34 #include "util/llvm-utils.h" 35 35 #include "util/bpf-loader.h" 36 + #include "asm/bug.h" 36 37 37 38 #include <unistd.h> 38 39 #include <sched.h> ··· 324 323 } else { 325 324 pr_err("failed to mmap with %d (%s)\n", errno, 326 325 strerror_r(errno, msg, sizeof(msg))); 327 - rc = -errno; 326 + if (errno) 327 + rc = -errno; 328 + else 329 + rc = -EINVAL; 328 330 } 329 331 goto out; 330 332 } ··· 471 467 perf_header__clear_feat(&session->header, HEADER_STAT); 472 468 } 473 469 470 + static void 471 + record__finish_output(struct record *rec) 472 + { 473 + struct perf_data_file *file = &rec->file; 474 + int fd = perf_data_file__fd(file); 475 + 476 + if (file->is_pipe) 477 + return; 478 + 479 + rec->session->header.data_size += rec->bytes_written; 480 + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 481 + 482 + if (!rec->no_buildid) { 483 + process_buildids(rec); 484 + 485 + if (rec->buildid_all) 486 + dsos__hit_all(rec->session); 487 + } 488 + perf_session__write_header(rec->session, rec->evlist, fd, true); 489 + 490 + return; 491 + } 492 + 474 493 static volatile int workload_exec_errno; 475 494 476 495 /* ··· 511 484 } 512 485 513 486 static void snapshot_sig_handler(int sig); 487 + 488 + static int record__synthesize(struct record *rec) 489 + { 490 + struct perf_session *session = rec->session; 491 + struct machine *machine = &session->machines.host; 492 + struct perf_data_file *file = &rec->file; 493 + struct record_opts *opts = &rec->opts; 494 + struct perf_tool *tool = &rec->tool; 495 + int fd = perf_data_file__fd(file); 496 + int err = 0; 497 + 498 + if (file->is_pipe) { 499 + err = perf_event__synthesize_attrs(tool, session, 500 + process_synthesized_event); 501 + if (err < 0) { 502 + pr_err("Couldn't synthesize attrs.\n"); 503 + goto out; 504 + } 505 + 506 + if (have_tracepoints(&rec->evlist->entries)) { 507 + /* 508 + * FIXME err <= 0 here actually means that 509 + * there were no tracepoints so its not really 510 + * an error, just that we don't need to 511 + * synthesize anything. We really have to 512 + * return this more properly and also 513 + * propagate errors that now are calling die() 514 + */ 515 + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 516 + process_synthesized_event); 517 + if (err <= 0) { 518 + pr_err("Couldn't record tracing data.\n"); 519 + goto out; 520 + } 521 + rec->bytes_written += err; 522 + } 523 + } 524 + 525 + if (rec->opts.full_auxtrace) { 526 + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 527 + session, process_synthesized_event); 528 + if (err) 529 + goto out; 530 + } 531 + 532 + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 533 + machine); 534 + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 535 + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 536 + "Check /proc/kallsyms permission or run as root.\n"); 537 + 538 + err = perf_event__synthesize_modules(tool, process_synthesized_event, 539 + machine); 540 + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 541 + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 542 + "Check /proc/modules permission or run as root.\n"); 543 + 544 + if (perf_guest) { 545 + machines__process_guests(&session->machines, 546 + perf_event__synthesize_guest_os, tool); 547 + } 548 + 549 + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 550 + process_synthesized_event, opts->sample_address, 551 + opts->proc_map_timeout); 552 + out: 553 + return err; 554 + } 514 555 515 556 static int __cmd_record(struct record *rec, int argc, const char **argv) 516 557 { ··· 674 579 675 580 machine = &session->machines.host; 676 581 677 - if (file->is_pipe) { 678 - err = perf_event__synthesize_attrs(tool, session, 679 - process_synthesized_event); 680 - if (err < 0) { 681 - pr_err("Couldn't synthesize attrs.\n"); 682 - goto out_child; 683 - } 684 - 685 - if (have_tracepoints(&rec->evlist->entries)) { 686 - /* 687 - * FIXME err <= 0 here actually means that 688 - * there were no tracepoints so its not really 689 - * an error, just that we don't need to 690 - * synthesize anything. We really have to 691 - * return this more properly and also 692 - * propagate errors that now are calling die() 693 - */ 694 - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 695 - process_synthesized_event); 696 - if (err <= 0) { 697 - pr_err("Couldn't record tracing data.\n"); 698 - goto out_child; 699 - } 700 - rec->bytes_written += err; 701 - } 702 - } 703 - 704 - if (rec->opts.full_auxtrace) { 705 - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 706 - session, process_synthesized_event); 707 - if (err) 708 - goto out_delete_session; 709 - } 710 - 711 - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 712 - machine); 582 + err = record__synthesize(rec); 713 583 if (err < 0) 714 - pr_err("Couldn't record kernel reference relocation symbol\n" 715 - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 716 - "Check /proc/kallsyms permission or run as root.\n"); 717 - 718 - err = perf_event__synthesize_modules(tool, process_synthesized_event, 719 - machine); 720 - if (err < 0) 721 - pr_err("Couldn't record kernel module information.\n" 722 - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 723 - "Check /proc/modules permission or run as root.\n"); 724 - 725 - if (perf_guest) { 726 - machines__process_guests(&session->machines, 727 - perf_event__synthesize_guest_os, tool); 728 - } 729 - 730 - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 731 - process_synthesized_event, opts->sample_address, 732 - opts->proc_map_timeout); 733 - if (err != 0) 734 584 goto out_child; 735 585 736 586 if (rec->realtime_prio) { ··· 811 771 /* this will be recalculated during process_buildids() */ 812 772 rec->samples = 0; 813 773 814 - if (!err && !file->is_pipe) { 815 - rec->session->header.data_size += rec->bytes_written; 816 - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 817 - 818 - if (!rec->no_buildid) { 819 - process_buildids(rec); 820 - 821 - if (rec->buildid_all) 822 - dsos__hit_all(rec->session); 823 - } 824 - perf_session__write_header(rec->session, rec->evlist, fd, true); 825 - } 774 + if (!err) 775 + record__finish_output(rec); 826 776 827 777 if (!err && !quiet) { 828 778 char samples[128];
+146 -14
tools/perf/builtin-stat.c
··· 739 739 FILE *fh; 740 740 bool newline; 741 741 const char *prefix; 742 + int nfields; 743 + int id, nr; 744 + struct perf_evsel *evsel; 742 745 }; 743 746 744 747 #define METRIC_LEN 35 ··· 757 754 { 758 755 fputc('\n', os->fh); 759 756 fputs(os->prefix, os->fh); 757 + aggr_printout(os->evsel, os->id, os->nr); 760 758 if (stat_config.aggr_mode == AGGR_NONE) 761 759 fprintf(os->fh, " "); 762 - if (stat_config.aggr_mode == AGGR_CORE) 763 - fprintf(os->fh, " "); 764 - if (stat_config.aggr_mode == AGGR_SOCKET) 765 - fprintf(os->fh, " "); 766 760 fprintf(os->fh, " "); 767 761 } 768 762 ··· 789 789 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 790 790 } 791 791 792 + static void new_line_csv(void *ctx) 793 + { 794 + struct outstate *os = ctx; 795 + int i; 796 + 797 + fputc('\n', os->fh); 798 + if (os->prefix) 799 + fprintf(os->fh, "%s%s", os->prefix, csv_sep); 800 + aggr_printout(os->evsel, os->id, os->nr); 801 + for (i = 0; i < os->nfields; i++) 802 + fputs(csv_sep, os->fh); 803 + } 804 + 805 + static void print_metric_csv(void *ctx, 806 + const char *color __maybe_unused, 807 + const char *fmt, const char *unit, double val) 808 + { 809 + struct outstate *os = ctx; 810 + FILE *out = os->fh; 811 + char buf[64], *vals, *ends; 812 + 813 + if (unit == NULL || fmt == NULL) { 814 + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 815 + return; 816 + } 817 + snprintf(buf, sizeof(buf), fmt, val); 818 + vals = buf; 819 + while (isspace(*vals)) 820 + vals++; 821 + ends = vals; 822 + while (isdigit(*ends) || *ends == '.') 823 + ends++; 824 + *ends = 0; 825 + while (isspace(*unit)) 826 + unit++; 827 + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 828 + } 829 + 792 830 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 793 831 { 794 832 FILE *output = stat_config.output; ··· 853 815 854 816 if (evsel->cgrp) 855 817 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 818 + } 819 + 820 + static int first_shadow_cpu(struct perf_evsel *evsel, int id) 821 + { 822 + int i; 823 + 824 + if (!aggr_get_id) 825 + return 0; 826 + 827 + if (stat_config.aggr_mode == AGGR_NONE) 828 + return id; 829 + 830 + if (stat_config.aggr_mode == AGGR_GLOBAL) 831 + return 0; 832 + 833 + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 834 + int cpu2 = perf_evsel__cpus(evsel)->map[i]; 835 + 836 + if (aggr_get_id(evsel_list->cpus, cpu2) == id) 837 + return cpu2; 838 + } 839 + return 0; 856 840 } 857 841 858 842 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) ··· 913 853 struct perf_stat_output_ctx out; 914 854 struct outstate os = { 915 855 .fh = stat_config.output, 916 - .prefix = prefix ? prefix : "" 856 + .prefix = prefix ? prefix : "", 857 + .id = id, 858 + .nr = nr, 859 + .evsel = counter, 917 860 }; 918 861 print_metric_t pm = print_metric_std; 919 862 void (*nl)(void *); 920 863 921 864 nl = new_line_std; 922 865 866 + if (csv_output) { 867 + static int aggr_fields[] = { 868 + [AGGR_GLOBAL] = 0, 869 + [AGGR_THREAD] = 1, 870 + [AGGR_NONE] = 1, 871 + [AGGR_SOCKET] = 2, 872 + [AGGR_CORE] = 2, 873 + }; 874 + 875 + pm = print_metric_csv; 876 + nl = new_line_csv; 877 + os.nfields = 3; 878 + os.nfields += aggr_fields[stat_config.aggr_mode]; 879 + if (counter->cgrp) 880 + os.nfields++; 881 + } 923 882 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 924 883 aggr_printout(counter, id, nr); 925 884 ··· 959 880 fprintf(stat_config.output, "%s%s", 960 881 csv_sep, counter->cgrp->name); 961 882 883 + if (!csv_output) 884 + pm(&os, NULL, NULL, "", 0); 885 + print_noise(counter, noise); 962 886 print_running(run, ena); 887 + if (csv_output) 888 + pm(&os, NULL, NULL, "", 0); 963 889 return; 964 890 } 965 891 ··· 977 893 out.new_line = nl; 978 894 out.ctx = &os; 979 895 980 - if (!csv_output) 981 - perf_stat__print_shadow_stats(counter, uval, 982 - stat_config.aggr_mode == AGGR_GLOBAL ? 0 : 983 - cpu_map__id_to_cpu(id), 984 - &out); 896 + if (csv_output) { 897 + print_noise(counter, noise); 898 + print_running(run, ena); 899 + } 985 900 986 - print_noise(counter, noise); 987 - print_running(run, ena); 901 + perf_stat__print_shadow_stats(counter, uval, 902 + first_shadow_cpu(counter, id), 903 + &out); 904 + if (!csv_output) { 905 + print_noise(counter, noise); 906 + print_running(run, ena); 907 + } 908 + } 909 + 910 + static void aggr_update_shadow(void) 911 + { 912 + int cpu, s2, id, s; 913 + u64 val; 914 + struct perf_evsel *counter; 915 + 916 + for (s = 0; s < aggr_map->nr; s++) { 917 + id = aggr_map->map[s]; 918 + evlist__for_each(evsel_list, counter) { 919 + val = 0; 920 + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 921 + s2 = aggr_get_id(evsel_list->cpus, cpu); 922 + if (s2 != id) 923 + continue; 924 + val += perf_counts(counter->counts, cpu, 0)->val; 925 + } 926 + val = val * counter->scale; 927 + perf_stat__update_shadow_stats(counter, &val, 928 + first_shadow_cpu(counter, id)); 929 + } 930 + } 988 931 } 989 932 990 933 static void print_aggr(char *prefix) ··· 1024 913 1025 914 if (!(aggr_map || aggr_get_id)) 1026 915 return; 916 + 917 + aggr_update_shadow(); 1027 918 1028 919 for (s = 0; s < aggr_map->nr; s++) { 1029 920 id = aggr_map->map[s]; ··· 1554 1441 */ 1555 1442 static int add_default_attributes(void) 1556 1443 { 1557 - struct perf_event_attr default_attrs[] = { 1444 + struct perf_event_attr default_attrs0[] = { 1558 1445 1559 1446 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1560 1447 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, ··· 1562 1449 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1563 1450 1564 1451 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1452 + }; 1453 + struct perf_event_attr frontend_attrs[] = { 1565 1454 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1455 + }; 1456 + struct perf_event_attr backend_attrs[] = { 1566 1457 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1458 + }; 1459 + struct perf_event_attr default_attrs1[] = { 1567 1460 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1568 1461 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1569 1462 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, ··· 1686 1567 } 1687 1568 1688 1569 if (!evsel_list->nr_entries) { 1689 - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1570 + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 1571 + return -1; 1572 + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 1573 + if (perf_evlist__add_default_attrs(evsel_list, 1574 + frontend_attrs) < 0) 1575 + return -1; 1576 + } 1577 + if (pmu_have_event("cpu", "stalled-cycles-backend")) { 1578 + if (perf_evlist__add_default_attrs(evsel_list, 1579 + backend_attrs) < 0) 1580 + return -1; 1581 + } 1582 + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 1690 1583 return -1; 1691 1584 } 1692 1585 ··· 1966 1835 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 1967 1836 (const char **) stat_usage, 1968 1837 PARSE_OPT_STOP_AT_NON_OPTION); 1838 + perf_stat__init_shadow_stats(); 1969 1839 1970 1840 if (csv_sep) { 1971 1841 csv_output = true;
+6 -2
tools/perf/builtin-trace.c
··· 1725 1725 1726 1726 sc->args = sc->tp_format->format.fields; 1727 1727 sc->nr_args = sc->tp_format->format.nr_fields; 1728 - /* drop nr field - not relevant here; does not exist on older kernels */ 1729 - if (sc->args && strcmp(sc->args->name, "nr") == 0) { 1728 + /* 1729 + * We need to check and discard the first variable '__syscall_nr' 1730 + * or 'nr' that mean the syscall number. It is needless here. 1731 + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. 1732 + */ 1733 + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { 1730 1734 sc->args = sc->args->next; 1731 1735 --sc->nr_args; 1732 1736 }
+117 -1
tools/perf/util/data-convert-bt.c
··· 352 352 return ret; 353 353 } 354 354 355 + static int 356 + add_bpf_output_values(struct bt_ctf_event_class *event_class, 357 + struct bt_ctf_event *event, 358 + struct perf_sample *sample) 359 + { 360 + struct bt_ctf_field_type *len_type, *seq_type; 361 + struct bt_ctf_field *len_field, *seq_field; 362 + unsigned int raw_size = sample->raw_size; 363 + unsigned int nr_elements = raw_size / sizeof(u32); 364 + unsigned int i; 365 + int ret; 366 + 367 + if (nr_elements * sizeof(u32) != raw_size) 368 + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", 369 + raw_size, nr_elements * sizeof(u32) - raw_size); 370 + 371 + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); 372 + len_field = bt_ctf_field_create(len_type); 373 + if (!len_field) { 374 + pr_err("failed to create 'raw_len' for bpf output event\n"); 375 + ret = -1; 376 + goto put_len_type; 377 + } 378 + 379 + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); 380 + if (ret) { 381 + pr_err("failed to set field value for raw_len\n"); 382 + goto put_len_field; 383 + } 384 + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); 385 + if (ret) { 386 + pr_err("failed to set payload to raw_len\n"); 387 + goto put_len_field; 388 + } 389 + 390 + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); 391 + seq_field = bt_ctf_field_create(seq_type); 392 + if (!seq_field) { 393 + pr_err("failed to create 'raw_data' for bpf output event\n"); 394 + ret = -1; 395 + goto put_seq_type; 396 + } 397 + 398 + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); 399 + if (ret) { 400 + pr_err("failed to set length of 'raw_data'\n"); 401 + goto put_seq_field; 402 + } 403 + 404 + for (i = 0; i < nr_elements; i++) { 405 + struct bt_ctf_field *elem_field = 406 + bt_ctf_field_sequence_get_field(seq_field, i); 407 + 408 + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, 409 + ((u32 *)(sample->raw_data))[i]); 410 + 411 + bt_ctf_field_put(elem_field); 412 + if (ret) { 413 + pr_err("failed to set raw_data[%d]\n", i); 414 + goto put_seq_field; 415 + } 416 + } 417 + 418 + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); 419 + if (ret) 420 + pr_err("failed to set payload for raw_data\n"); 421 + 422 + put_seq_field: 423 + bt_ctf_field_put(seq_field); 424 + put_seq_type: 425 + bt_ctf_field_type_put(seq_type); 426 + put_len_field: 427 + bt_ctf_field_put(len_field); 428 + put_len_type: 429 + bt_ctf_field_type_put(len_type); 430 + return ret; 431 + } 432 + 355 433 static int add_generic_values(struct ctf_writer *cw, 356 434 struct bt_ctf_event *event, 357 435 struct perf_evsel *evsel, ··· 675 597 return -1; 676 598 } 677 599 600 + if (perf_evsel__is_bpf_output(evsel)) { 601 + ret = add_bpf_output_values(event_class, event, sample); 602 + if (ret) 603 + return -1; 604 + } 605 + 678 606 cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); 679 607 if (cs) { 680 608 if (is_flush_needed(cs)) ··· 828 744 return ret; 829 745 } 830 746 747 + static int add_bpf_output_types(struct ctf_writer *cw, 748 + struct bt_ctf_event_class *class) 749 + { 750 + struct bt_ctf_field_type *len_type = cw->data.u32; 751 + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; 752 + struct bt_ctf_field_type *seq_type; 753 + int ret; 754 + 755 + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); 756 + if (ret) 757 + return ret; 758 + 759 + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); 760 + if (!seq_type) 761 + return -1; 762 + 763 + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); 764 + } 765 + 831 766 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, 832 767 struct bt_ctf_event_class *event_class) 833 768 { ··· 858 755 * ctf event header 859 756 * PERF_SAMPLE_READ - TODO 860 757 * PERF_SAMPLE_CALLCHAIN - TODO 861 - * PERF_SAMPLE_RAW - tracepoint fields are handled separately 758 + * PERF_SAMPLE_RAW - tracepoint fields and BPF output 759 + * are handled separately 862 760 * PERF_SAMPLE_BRANCH_STACK - TODO 863 761 * PERF_SAMPLE_REGS_USER - TODO 864 762 * PERF_SAMPLE_STACK_USER - TODO ··· 924 820 925 821 if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { 926 822 ret = add_tracepoint_types(cw, evsel, event_class); 823 + if (ret) 824 + goto err; 825 + } 826 + 827 + if (perf_evsel__is_bpf_output(evsel)) { 828 + ret = add_bpf_output_types(cw, event_class); 927 829 if (ret) 928 830 goto err; 929 831 } ··· 1079 969 if (hex && 1080 970 bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) 1081 971 goto err; 972 + 973 + #if __BYTE_ORDER == __BIG_ENDIAN 974 + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); 975 + #else 976 + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); 977 + #endif 1082 978 1083 979 pr2("Created type: INTEGER %d-bit %ssigned %s\n", 1084 980 size, sign ? "un" : "", hex ? "hex" : "");
+13
tools/perf/util/pmu.c
··· 124 124 lc = setlocale(LC_NUMERIC, NULL); 125 125 126 126 /* 127 + * The lc string may be allocated in static storage, 128 + * so get a dynamic copy to make it survive setlocale 129 + * call below. 130 + */ 131 + lc = strdup(lc); 132 + if (!lc) { 133 + ret = -ENOMEM; 134 + goto error; 135 + } 136 + 137 + /* 127 138 * force to C locale to ensure kernel 128 139 * scale string is converted correctly. 129 140 * kernel uses default C locale. ··· 145 134 146 135 /* restore locale */ 147 136 setlocale(LC_NUMERIC, lc); 137 + 138 + free((char *) lc); 148 139 149 140 ret = 0; 150 141 error:
+2 -2
tools/perf/util/scripting-engines/trace-event-python.c
··· 1094 1094 goto error; 1095 1095 } 1096 1096 1097 - free(command_line); 1098 - 1099 1097 set_table_handlers(tables); 1100 1098 1101 1099 if (tables->db_export_mode) { ··· 1101 1103 if (err) 1102 1104 goto error; 1103 1105 } 1106 + 1107 + free(command_line); 1104 1108 1105 1109 return err; 1106 1110 error:
+4
tools/perf/util/setup.py
··· 22 22 # switch off several checks (need to be at the end of cflags list) 23 23 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] 24 24 25 + src_perf = getenv('srctree') + '/tools/perf' 25 26 build_lib = getenv('PYTHON_EXTBUILD_LIB') 26 27 build_tmp = getenv('PYTHON_EXTBUILD_TMP') 27 28 libtraceevent = getenv('LIBTRACEEVENT') ··· 30 29 31 30 ext_sources = [f.strip() for f in file('util/python-ext-sources') 32 31 if len(f.strip()) > 0 and f[0] != '#'] 32 + 33 + # use full paths with source files 34 + ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) 33 35 34 36 perf = Extension('perf', 35 37 sources = ext_sources,
+22 -15
tools/perf/util/sort.c
··· 2635 2635 return ret; 2636 2636 } 2637 2637 2638 - int setup_sorting(struct perf_evlist *evlist) 2638 + static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) 2639 2639 { 2640 - int err; 2641 - struct hists *hists; 2642 2640 struct perf_evsel *evsel; 2643 - struct perf_hpp_fmt *fmt; 2644 - 2645 - err = __setup_sorting(evlist); 2646 - if (err < 0) 2647 - return err; 2648 - 2649 - if (parent_pattern != default_parent_pattern) { 2650 - err = sort_dimension__add("parent", evlist); 2651 - if (err < 0) 2652 - return err; 2653 - } 2654 2641 2655 2642 evlist__for_each(evlist, evsel) { 2656 - hists = evsel__hists(evsel); 2643 + struct perf_hpp_fmt *fmt; 2644 + struct hists *hists = evsel__hists(evsel); 2645 + 2657 2646 hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; 2658 2647 2659 2648 /* ··· 2656 2667 hists->nr_sort_keys--; 2657 2668 } 2658 2669 } 2670 + } 2671 + 2672 + int setup_sorting(struct perf_evlist *evlist) 2673 + { 2674 + int err; 2675 + 2676 + err = __setup_sorting(evlist); 2677 + if (err < 0) 2678 + return err; 2679 + 2680 + if (parent_pattern != default_parent_pattern) { 2681 + err = sort_dimension__add("parent", evlist); 2682 + if (err < 0) 2683 + return err; 2684 + } 2685 + 2686 + if (evlist != NULL) 2687 + evlist__set_hists_nr_sort_keys(evlist); 2659 2688 2660 2689 reset_dimensions(); 2661 2690
+16 -2
tools/perf/util/stat-shadow.c
··· 2 2 #include "evsel.h" 3 3 #include "stat.h" 4 4 #include "color.h" 5 + #include "pmu.h" 5 6 6 7 enum { 7 8 CTX_BIT_USER = 1 << 0, ··· 15 14 16 15 #define NUM_CTX CTX_BIT_MAX 17 16 17 + /* 18 + * AGGR_GLOBAL: Use CPU 0 19 + * AGGR_SOCKET: Use first CPU of socket 20 + * AGGR_CORE: Use first CPU of core 21 + * AGGR_NONE: Use matching CPU 22 + * AGGR_THREAD: Not supported? 23 + */ 18 24 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 19 25 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; 20 26 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; ··· 36 28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; 37 29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; 38 30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; 31 + static bool have_frontend_stalled; 39 32 40 33 struct stats walltime_nsecs_stats; 34 + 35 + void perf_stat__init_shadow_stats(void) 36 + { 37 + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 38 + } 41 39 42 40 static int evsel_context(struct perf_evsel *evsel) 43 41 { ··· 324 310 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); 325 311 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); 326 312 327 - out->new_line(ctxp); 328 313 if (total && avg) { 314 + out->new_line(ctxp); 329 315 ratio = total / avg; 330 316 print_metric(ctxp, NULL, "%7.2f ", 331 317 "stalled cycles per insn", 332 318 ratio); 333 - } else { 319 + } else if (have_frontend_stalled) { 334 320 print_metric(ctxp, NULL, NULL, 335 321 "stalled cycles per insn", 0); 336 322 }
+1
tools/perf/util/stat.h
··· 72 72 const char *fmt, double val); 73 73 typedef void (*new_line_t )(void *ctx); 74 74 75 + void perf_stat__init_shadow_stats(void); 75 76 void perf_stat__reset_shadow_stats(void); 76 77 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, 77 78 int cpu);
+4 -4
tools/power/x86/turbostat/turbostat.c
··· 1970 1970 } 1971 1971 1972 1972 static void 1973 - dump_cstate_pstate_config_info(family, model) 1973 + dump_cstate_pstate_config_info(unsigned int family, unsigned int model) 1974 1974 { 1975 1975 if (!do_nhm_platform_info) 1976 1976 return; ··· 2142 2142 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 2143 2143 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 2144 2144 2145 - double get_tdp(model) 2145 + double get_tdp(unsigned int model) 2146 2146 { 2147 2147 unsigned long long msr; 2148 2148 ··· 2256 2256 return; 2257 2257 } 2258 2258 2259 - void perf_limit_reasons_probe(family, model) 2259 + void perf_limit_reasons_probe(unsigned int family, unsigned int model) 2260 2260 { 2261 2261 if (!genuine_intel) 2262 2262 return; ··· 2792 2792 perf_limit_reasons_probe(family, model); 2793 2793 2794 2794 if (debug) 2795 - dump_cstate_pstate_config_info(); 2795 + dump_cstate_pstate_config_info(family, model); 2796 2796 2797 2797 if (has_skl_msrs(family, model)) 2798 2798 calculate_tsc_tweak();