Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: handle PERF_RECORD_LOST_SAMPLES

This patch modifies the perf tool to handle the new RECORD type,
PERF_RECORD_LOST_SAMPLES.

The number of lost-sample events is stored in
.nr_events[PERF_RECORD_LOST_SAMPLES]. The exact number of samples
which the kernel dropped is stored in total_lost_samples.

When the percentage of dropped samples is greater than 5%, a warning
is printed.

Here are some examples:

Eg 1, Recording different frequently-occurring events is safe with the
patch. Only a very low drop rate is associated with such actions.

$ perf record -e '{cycles:p,instructions:p}' -c 20003 --no-time ~/tchain ~/tchain

$ perf report -D | tail
SAMPLE events: 120243
MMAP2 events: 5
LOST_SAMPLES events: 24
FINISHED_ROUND events: 15
cycles:p stats:
TOTAL events: 59348
SAMPLE events: 59348
instructions:p stats:
TOTAL events: 60895
SAMPLE events: 60895

$ perf report --stdio --group
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 24
#
# Samples: 120K of event 'anon group { cycles:p, instructions:p }'
# Event count (approx.): 24048600000
#
# Overhead Command Shared Object Symbol
# ................ ........... ................
..................................
#
99.74% 99.86% tchain_edit tchain_edit [.] f3
0.09% 0.02% tchain_edit tchain_edit [.] f2
0.04% 0.00% tchain_edit [kernel.vmlinux] [k] ixgbe_read_reg

Eg 2, Recording the same thing multiple times can lead to high drop
rate, but it is not a useful configuration.

$ perf record -e '{cycles:p,cycles:p}' -c 20003 --no-time ~/tchain
Warning: Processed 600592 samples and lost 99.73% samples!
[perf record: Woken up 148 times to write data]
[perf record: Captured and wrote 36.922 MB perf.data (1206322 samples)]
[perf record: Woken up 1 times to write data]
[perf record: Captured and wrote 0.121 MB perf.data (1629 samples)]

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-9-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Kan Liang and committed by
Ingo Molnar
c4937a91 f38b0dbb

+59
+1
tools/perf/builtin-report.c
··· 320 320 { 321 321 struct perf_evsel *pos; 322 322 323 + fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples); 323 324 evlist__for_each(evlist, pos) { 324 325 struct hists *hists = evsel__hists(pos); 325 326 const char *evname = perf_evsel__name(pos);
+9
tools/perf/util/event.c
··· 25 25 [PERF_RECORD_SAMPLE] = "SAMPLE", 26 26 [PERF_RECORD_AUX] = "AUX", 27 27 [PERF_RECORD_ITRACE_START] = "ITRACE_START", 28 + [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", 28 29 [PERF_RECORD_HEADER_ATTR] = "ATTR", 29 30 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 30 31 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", ··· 711 710 struct machine *machine) 712 711 { 713 712 return machine__process_itrace_start_event(machine, event); 713 + } 714 + 715 + int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, 716 + union perf_event *event, 717 + struct perf_sample *sample, 718 + struct machine *machine) 719 + { 720 + return machine__process_lost_samples_event(machine, event, sample); 714 721 } 715 722 716 723 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+17
tools/perf/util/event.h
··· 52 52 u64 lost; 53 53 }; 54 54 55 + struct lost_samples_event { 56 + struct perf_event_header header; 57 + u64 lost; 58 + }; 59 + 55 60 /* 56 61 * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID 57 62 */ ··· 240 235 * total_lost tells exactly how many events the kernel in fact lost, i.e. it is 241 236 * the sum of all struct lost_event.lost fields reported. 242 237 * 238 + * The kernel discards mixed up samples and sends the number in a 239 + * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored 240 + * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells 241 + * exactly how many samples the kernel in fact dropped, i.e. it is the sum of 242 + * all struct lost_samples_event.lost fields reported. 243 + * 243 244 * The total_period is needed because by default auto-freq is used, so 244 245 * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get 245 246 * the total number of low level events, it is necessary to to sum all struct ··· 255 244 u64 total_period; 256 245 u64 total_non_filtered_period; 257 246 u64 total_lost; 247 + u64 total_lost_samples; 258 248 u64 total_invalid_chains; 259 249 u32 nr_events[PERF_RECORD_HEADER_MAX]; 260 250 u32 nr_non_filtered_samples; ··· 354 342 struct comm_event comm; 355 343 struct fork_event fork; 356 344 struct lost_event lost; 345 + struct lost_samples_event lost_samples; 357 346 struct read_event read; 358 347 struct throttle_event throttle; 359 348 struct sample_event sample; ··· 403 390 union perf_event *event, 404 391 struct perf_sample *sample, 405 392 struct machine *machine); 393 + int perf_event__process_lost_samples(struct perf_tool *tool, 394 + union perf_event *event, 395 + struct perf_sample *sample, 396 + struct machine *machine); 406 397 int perf_event__process_aux(struct perf_tool *tool, 407 398 union perf_event *event, 408 399 struct perf_sample *sample,
+10
tools/perf/util/machine.c
··· 482 482 return 0; 483 483 } 484 484 485 + int machine__process_lost_samples_event(struct machine *machine __maybe_unused, 486 + union perf_event *event, struct perf_sample *sample) 487 + { 488 + dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n", 489 + sample->id, event->lost_samples.lost); 490 + return 0; 491 + } 492 + 485 493 static struct dso* 486 494 machine__module_dso(struct machine *machine, struct kmod_path *m, 487 495 const char *filename) ··· 1427 1419 ret = machine__process_aux_event(machine, event); break; 1428 1420 case PERF_RECORD_ITRACE_START: 1429 1421 ret = machine__process_itrace_start_event(machine, event); 1422 + case PERF_RECORD_LOST_SAMPLES: 1423 + ret = machine__process_lost_samples_event(machine, event, sample); break; 1430 1424 break; 1431 1425 default: 1432 1426 ret = -1;
+2
tools/perf/util/machine.h
··· 81 81 struct perf_sample *sample); 82 82 int machine__process_lost_event(struct machine *machine, union perf_event *event, 83 83 struct perf_sample *sample); 84 + int machine__process_lost_samples_event(struct machine *machine, union perf_event *event, 85 + struct perf_sample *sample); 84 86 int machine__process_aux_event(struct machine *machine, 85 87 union perf_event *event); 86 88 int machine__process_itrace_start_event(struct machine *machine,
+19
tools/perf/util/session.c
··· 325 325 tool->exit = process_event_stub; 326 326 if (tool->lost == NULL) 327 327 tool->lost = perf_event__process_lost; 328 + if (tool->lost_samples == NULL) 329 + tool->lost_samples = perf_event__process_lost_samples; 328 330 if (tool->aux == NULL) 329 331 tool->aux = perf_event__process_aux; 330 332 if (tool->itrace_start == NULL) ··· 608 606 [PERF_RECORD_SAMPLE] = perf_event__all64_swap, 609 607 [PERF_RECORD_AUX] = perf_event__aux_swap, 610 608 [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, 609 + [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, 611 610 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, 612 611 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 613 612 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, ··· 1052 1049 if (tool->lost == perf_event__process_lost) 1053 1050 evlist->stats.total_lost += event->lost.lost; 1054 1051 return tool->lost(tool, event, sample, machine); 1052 + case PERF_RECORD_LOST_SAMPLES: 1053 + if (tool->lost_samples == perf_event__process_lost_samples) 1054 + evlist->stats.total_lost_samples += event->lost_samples.lost; 1055 + return tool->lost_samples(tool, event, sample, machine); 1055 1056 case PERF_RECORD_READ: 1056 1057 return tool->read(tool, event, sample, evsel, machine); 1057 1058 case PERF_RECORD_THROTTLE: ··· 1291 1284 "Check IO/CPU overload!\n\n", 1292 1285 stats->nr_events[0], 1293 1286 stats->nr_events[PERF_RECORD_LOST]); 1287 + } 1288 + 1289 + if (session->tool->lost_samples == perf_event__process_lost_samples) { 1290 + double drop_rate; 1291 + 1292 + drop_rate = (double)stats->total_lost_samples / 1293 + (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples); 1294 + if (drop_rate > 0.05) { 1295 + ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n", 1296 + stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples, 1297 + drop_rate * 100.0); 1298 + } 1294 1299 } 1295 1300 1296 1301 if (stats->nr_unknown_events != 0) {
+1
tools/perf/util/tool.h
··· 43 43 fork, 44 44 exit, 45 45 lost, 46 + lost_samples, 46 47 aux, 47 48 itrace_start, 48 49 throttle,