Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf callchain: Per-event type selection support

This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.

Here is an example.

perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1

perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
d457c963 75186a9b

+86 -3
+3
tools/perf/Documentation/perf-record.txt
··· 53 53 - 'time': Disable/enable time stamping. Acceptable values are 1 for 54 54 enabling time stamping. 0 for disabling time stamping. 55 55 The default is 1. 56 + - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for 57 + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode. 58 + - 'stack-size': user stack size for dwarf mode 56 59 Note: If user explicitly sets options which conflict with the params, 57 60 the value set by the params will be overridden. 58 61
+60 -2
tools/perf/util/evsel.c
··· 588 588 } 589 589 } 590 590 591 - static void apply_config_terms(struct perf_evsel *evsel) 591 + static void 592 + perf_evsel__reset_callgraph(struct perf_evsel *evsel, 593 + struct callchain_param *param) 594 + { 595 + struct perf_event_attr *attr = &evsel->attr; 596 + 597 + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); 598 + if (param->record_mode == CALLCHAIN_LBR) { 599 + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 600 + attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | 601 + PERF_SAMPLE_BRANCH_CALL_STACK); 602 + } 603 + if (param->record_mode == CALLCHAIN_DWARF) { 604 + perf_evsel__reset_sample_bit(evsel, REGS_USER); 605 + perf_evsel__reset_sample_bit(evsel, STACK_USER); 606 + } 607 + } 608 + 609 + static void apply_config_terms(struct perf_evsel *evsel, 610 + struct record_opts *opts) 592 611 { 593 612 struct perf_evsel_config_term *term; 594 613 struct list_head *config_terms = &evsel->config_terms; 595 614 struct perf_event_attr *attr = &evsel->attr; 615 + struct callchain_param param; 616 + u32 dump_size = 0; 617 + char *callgraph_buf = NULL; 618 + 619 + /* callgraph default */ 620 + param.record_mode = callchain_param.record_mode; 596 621 597 622 list_for_each_entry(term, config_terms, list) { 598 623 switch (term->type) { ··· 635 610 else 636 611 perf_evsel__reset_sample_bit(evsel, TIME); 637 612 break; 613 + case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: 614 + callgraph_buf = term->val.callgraph; 615 + break; 616 + case PERF_EVSEL__CONFIG_TERM_STACK_USER: 617 + dump_size = term->val.stack_user; 618 + break; 638 619 default: 639 620 break; 640 621 } 622 + } 623 + 624 + /* User explicitly set per-event callgraph, clear the old setting and reset. */ 625 + if ((callgraph_buf != NULL) || (dump_size > 0)) { 626 + 627 + /* parse callgraph parameters */ 628 + if (callgraph_buf != NULL) { 629 + param.enabled = true; 630 + if (parse_callchain_record(callgraph_buf, &param)) { 631 + pr_err("per-event callgraph setting for %s failed. " 632 + "Apply callgraph global setting for it\n", 633 + evsel->name); 634 + return; 635 + } 636 + } 637 + if (dump_size > 0) { 638 + dump_size = round_up(dump_size, sizeof(u64)); 639 + param.dump_size = dump_size; 640 + } 641 + 642 + /* If global callgraph set, clear it */ 643 + if (callchain_param.enabled) 644 + perf_evsel__reset_callgraph(evsel, &callchain_param); 645 + 646 + /* set perf-event callgraph */ 647 + if (param.enabled) 648 + perf_evsel__config_callgraph(evsel, opts, &param); 641 649 } 642 650 } 643 651 ··· 870 812 * Apply event specific term settings, 871 813 * it overloads any global configuration. 872 814 */ 873 - apply_config_terms(evsel); 815 + apply_config_terms(evsel, opts); 874 816 } 875 817 876 818 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+4
tools/perf/util/evsel.h
··· 41 41 PERF_EVSEL__CONFIG_TERM_PERIOD, 42 42 PERF_EVSEL__CONFIG_TERM_FREQ, 43 43 PERF_EVSEL__CONFIG_TERM_TIME, 44 + PERF_EVSEL__CONFIG_TERM_CALLGRAPH, 45 + PERF_EVSEL__CONFIG_TERM_STACK_USER, 44 46 PERF_EVSEL__CONFIG_TERM_MAX, 45 47 }; 46 48 ··· 53 51 u64 period; 54 52 u64 freq; 55 53 bool time; 54 + char *callgraph; 55 + u64 stack_user; 56 56 } val; 57 57 }; 58 58
+12
tools/perf/util/parse-events.c
··· 614 614 return -EINVAL; 615 615 } 616 616 break; 617 + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: 618 + CHECK_TYPE_VAL(STR); 619 + break; 620 + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: 621 + CHECK_TYPE_VAL(NUM); 622 + break; 617 623 case PARSE_EVENTS__TERM_TYPE_NAME: 618 624 CHECK_TYPE_VAL(STR); 619 625 break; ··· 673 667 break; 674 668 case PARSE_EVENTS__TERM_TYPE_TIME: 675 669 ADD_CONFIG_TERM(TIME, time, term->val.num); 670 + break; 671 + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: 672 + ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); 673 + break; 674 + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: 675 + ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); 676 676 break; 677 677 default: 678 678 break;
+2
tools/perf/util/parse-events.h
··· 65 65 PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, 66 66 PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, 67 67 PARSE_EVENTS__TERM_TYPE_TIME, 68 + PARSE_EVENTS__TERM_TYPE_CALLGRAPH, 69 + PARSE_EVENTS__TERM_TYPE_STACKSIZE, 68 70 }; 69 71 70 72 struct parse_events_term {
+2
tools/perf/util/parse-events.l
··· 185 185 freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } 186 186 branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } 187 187 time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } 188 + call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } 189 + stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } 188 190 , { return ','; } 189 191 "/" { BEGIN(INITIAL); return '/'; } 190 192 {name_minus} { return str(yyscanner, PE_NAME); }
+3 -1
tools/perf/util/pmu.c
··· 634 634 { 635 635 struct perf_pmu_format *format; 636 636 char *err, *str; 637 - static const char *static_terms = "config,config1,config2,name,period,freq,branch_type,time\n"; 637 + static const char *static_terms = "config,config1,config2,name," 638 + "period,freq,branch_type,time," 639 + "call-graph,stack-size\n"; 638 640 unsigned i = 0; 639 641 640 642 if (!asprintf(&str, "valid terms:"))