Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Minimal DEFERRED_CALLCHAIN support

Add a new event type for deferred callchains and a new callback for the
struct perf_tool. For now it doesn't actually handle the deferred
callchains but it just marks the sample if it has the PERF_CONTEXT_
USER_DEFFERED in the callchain array.

At least, perf report can dump the raw data with this change. Actually
this requires the next commit to enable attr.defer_callchain, but if you
already have a data file, it'll show the following result.

$ perf report -D
...
0x2158@perf.data [0x40]: event: 22
.
. ... raw event: size 64 bytes
. 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@.........
. 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3.....
. 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3.....
. 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5....

121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
... FP chain: nr:3
..... 0: 00007f18fe337fa7
..... 1: 00007f18fe330e0f
..... 2: 00007f18fe331448
: unhandled!

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

+73 -3
+13
tools/lib/perf/include/perf/event.h
··· 151 151 __u32 next_prev_tid; 152 152 }; 153 153 154 + struct perf_record_callchain_deferred { 155 + struct perf_event_header header; 156 + /* 157 + * This is to match kernel and (deferred) user stacks together. 158 + * The kernel part will be in the sample callchain array after 159 + * the PERF_CONTEXT_USER_DEFERRED entry. 160 + */ 161 + __u64 cookie; 162 + __u64 nr; 163 + __u64 ips[]; 164 + }; 165 + 154 166 struct perf_record_header_attr { 155 167 struct perf_event_header header; 156 168 struct perf_event_attr attr; ··· 535 523 struct perf_record_read read; 536 524 struct perf_record_throttle throttle; 537 525 struct perf_record_sample sample; 526 + struct perf_record_callchain_deferred callchain_deferred; 538 527 struct perf_record_bpf_event bpf; 539 528 struct perf_record_ksymbol ksymbol; 540 529 struct perf_record_text_poke_event text_poke;
+1
tools/perf/util/event.c
··· 61 61 [PERF_RECORD_CGROUP] = "CGROUP", 62 62 [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", 63 63 [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", 64 + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED", 64 65 [PERF_RECORD_HEADER_ATTR] = "ATTR", 65 66 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 66 67 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
+29 -2
tools/perf/util/evsel.c
··· 3090 3090 data->data_src = PERF_MEM_DATA_SRC_NONE; 3091 3091 data->vcpu = -1; 3092 3092 3093 + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) { 3094 + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); 3095 + 3096 + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr; 3097 + if (data->callchain->nr > max_callchain_nr) 3098 + return -EFAULT; 3099 + 3100 + data->deferred_cookie = event->callchain_deferred.cookie; 3101 + 3102 + if (evsel->core.attr.sample_id_all) 3103 + perf_evsel__parse_id_sample(evsel, event, data); 3104 + return 0; 3105 + } 3106 + 3093 3107 if (event->header.type != PERF_RECORD_SAMPLE) { 3094 3108 if (!evsel->core.attr.sample_id_all) 3095 3109 return 0; ··· 3228 3214 3229 3215 if (type & PERF_SAMPLE_CALLCHAIN) { 3230 3216 const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); 3217 + u64 callchain_nr; 3231 3218 3232 3219 OVERFLOW_CHECK_u64(array); 3233 3220 data->callchain = (struct ip_callchain *)array++; 3234 - if (data->callchain->nr > max_callchain_nr) 3221 + callchain_nr = data->callchain->nr; 3222 + if (callchain_nr > max_callchain_nr) 3235 3223 return -EFAULT; 3236 - sz = data->callchain->nr * sizeof(u64); 3224 + sz = callchain_nr * sizeof(u64); 3225 + /* 3226 + * Save the cookie for the deferred user callchain. The last 2 3227 + * entries in the callchain should be the context marker and the 3228 + * cookie. The cookie will be used to match PERF_RECORD_ 3229 + * CALLCHAIN_DEFERRED later. 3230 + */ 3231 + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 && 3232 + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) { 3233 + data->deferred_cookie = data->callchain->ips[callchain_nr - 1]; 3234 + data->deferred_callchain = true; 3235 + } 3237 3236 OVERFLOW_CHECK(array, sz, max_size); 3238 3237 array = (void *)array + sz; 3239 3238 }
+1
tools/perf/util/machine.c
··· 2124 2124 *cpumode = PERF_RECORD_MISC_KERNEL; 2125 2125 break; 2126 2126 case PERF_CONTEXT_USER: 2127 + case PERF_CONTEXT_USER_DEFERRED: 2127 2128 *cpumode = PERF_RECORD_MISC_USER; 2128 2129 break; 2129 2130 default:
+2
tools/perf/util/perf_event_attr_fprintf.c
··· 343 343 PRINT_ATTRf(inherit_thread, p_unsigned); 344 344 PRINT_ATTRf(remove_on_exec, p_unsigned); 345 345 PRINT_ATTRf(sigtrap, p_unsigned); 346 + PRINT_ATTRf(defer_callchain, p_unsigned); 347 + PRINT_ATTRf(defer_output, p_unsigned); 346 348 347 349 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false); 348 350 PRINT_ATTRf(bp_type, p_unsigned);
+2
tools/perf/util/sample.h
··· 107 107 /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ 108 108 u16 weight3; 109 109 bool no_hw_idx; /* No hw_idx collected in branch_stack */ 110 + bool deferred_callchain; /* Has deferred user callchains */ 111 + u64 deferred_cookie; 110 112 char insn[MAX_INSN]; 111 113 void *raw_data; 112 114 struct ip_callchain *callchain;
+20
tools/perf/util/session.c
··· 720 720 [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, 721 721 [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, 722 722 [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, 723 + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap, 723 724 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, 724 725 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 725 726 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, ··· 855 854 for (i = 0; i < callchain->nr; i++) 856 855 printf("..... %2d: %016" PRIx64 "\n", 857 856 i, callchain->ips[i]); 857 + 858 + if (sample->deferred_callchain) 859 + printf("...... (deferred)\n"); 858 860 } 859 861 860 862 static void branch_stack__printf(struct perf_sample *sample, ··· 1127 1123 sample_read__printf(sample, evsel->core.attr.read_format); 1128 1124 } 1129 1125 1126 + static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event, 1127 + struct perf_sample *sample) 1128 + { 1129 + if (!dump_trace) 1130 + return; 1131 + 1132 + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", 1133 + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie); 1134 + 1135 + if (evsel__has_callchain(evsel)) 1136 + callchain__printf(evsel, sample); 1137 + } 1138 + 1130 1139 static void dump_read(struct evsel *evsel, union perf_event *event) 1131 1140 { 1132 1141 struct perf_record_read *read_event = &event->read; ··· 1370 1353 return tool->text_poke(tool, event, sample, machine); 1371 1354 case PERF_RECORD_AUX_OUTPUT_HW_ID: 1372 1355 return tool->aux_output_hw_id(tool, event, sample, machine); 1356 + case PERF_RECORD_CALLCHAIN_DEFERRED: 1357 + dump_deferred_callchain(evsel, event, sample); 1358 + return tool->callchain_deferred(tool, event, sample, evsel, machine); 1373 1359 default: 1374 1360 ++evlist->stats.nr_unknown_events; 1375 1361 return -1;
+3
tools/perf/util/tool.c
··· 287 287 tool->read = process_event_sample_stub; 288 288 tool->throttle = process_event_stub; 289 289 tool->unthrottle = process_event_stub; 290 + tool->callchain_deferred = process_event_sample_stub; 290 291 tool->attr = process_event_synth_attr_stub; 291 292 tool->event_update = process_event_synth_event_update_stub; 292 293 tool->tracing_data = process_event_synth_tracing_data_stub; ··· 336 335 } 337 336 CREATE_DELEGATE_SAMPLE(read); 338 337 CREATE_DELEGATE_SAMPLE(sample); 338 + CREATE_DELEGATE_SAMPLE(callchain_deferred); 339 339 340 340 #define CREATE_DELEGATE_ATTR(name) \ 341 341 static int delegate_ ## name(const struct perf_tool *tool, \ ··· 470 468 tool->tool.ksymbol = delegate_ksymbol; 471 469 tool->tool.bpf = delegate_bpf; 472 470 tool->tool.text_poke = delegate_text_poke; 471 + tool->tool.callchain_deferred = delegate_callchain_deferred; 473 472 474 473 tool->tool.attr = delegate_attr; 475 474 tool->tool.event_update = delegate_event_update;
+2 -1
tools/perf/util/tool.h
··· 44 44 45 45 struct perf_tool { 46 46 event_sample sample, 47 - read; 47 + read, 48 + callchain_deferred; 48 49 event_op mmap, 49 50 mmap2, 50 51 comm,