Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-4.16-20180110' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- The 'perf test bpf' entry hooked a eBPF proggie to the
SyS_epoll_wait() kernel function and expected it to be hit when calling
the epoll_wait() libc wrapper, which changed recently, in systems such
as Fedora 27, with the glibc wrapper calling instead the epoll_pwait()
syscall, so switch to epoll_pwait() for both the kernel and libc
function, getting it to work both in old and new systems (Arnaldo Carvalho de Melo)

- Beautify 'gettid' syscall result in 'perf trace', and in doing so
noticed that we need to handle namespaces in 'perf trace', will be
dealt with in follow up patches where we'll try to figure out if
the recent support for namespace in tools/perf/ can be used for this
purpose as well. (Arnaldo Carvalho de Melo)

- Introduce 'perf report --mmaps' and 'perf report --tasks' to show
info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo)

- Synchronize kernel <-> tooling headers wrt meltdown/spectre changes
(Arnaldo Carvalho de Melo)

- Fix a wrong offset issue when using /proc/kcore (Jin Yao)

- Fix bug that prevented annotating symbols in perf.data files
generated with 'perf record --branch-any' (Jin Yao)

- Add infrastructure to record first and last sample time to the
perf.data file header, so that when processing all samples in
a 'perf record' session, such as when doing build-id processing,
or when specifically requesting that that info be recorded, use
that in 'perf report --time', that also got support for percent
slices in addition to absolute ones.

I.e. now it is possible to ask for the samples in the 10%-20%
time slice of a perf.data file (Jin Yao)

- Enable building with libbabeltrace by default (Jiri Olsa)

- Display perf_event_attr::namespaces when duping the attributes
in verbose mode (Jiri Olsa)

- Allocate context task_ctx_data for child event (Jiri Olsa)

- Update comments for PERF_RECORD_ITRACE_START and PERF_RECORD_MISC_* (Jiri Olsa)

- Add support for showing PERF_RECORD_LOST events in 'perf script' (Jiri Olsa)

- Add 'perf report --stats' option to display quick statistics about
metadata events (PERF_RECORD_*) i.e. what we get at the end of 'perf
report -D' (Jiri Olsa)

- Fix compile error with libunwind x86 (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+884 -140
+7 -3
include/uapi/linux/perf_event.h
··· 612 612 */ 613 613 #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) 614 614 /* 615 - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 616 - * different events so can reuse the same bit position. 617 - * Ditto PERF_RECORD_MISC_SWITCH_OUT. 615 + * Following PERF_RECORD_MISC_* are used on different 616 + * events, so can reuse the same bit position: 617 + * 618 + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events 619 + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event 620 + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events 618 621 */ 619 622 #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) 620 623 #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) ··· 867 864 * struct perf_event_header header; 868 865 * u32 pid; 869 866 * u32 tid; 867 + * struct sample_id sample_id; 870 868 * }; 871 869 */ 872 870 PERF_RECORD_ITRACE_START = 12,
-15
kernel/events/callchain.c
··· 179 179 } 180 180 181 181 struct perf_callchain_entry * 182 - perf_callchain(struct perf_event *event, struct pt_regs *regs) 183 - { 184 - bool kernel = !event->attr.exclude_callchain_kernel; 185 - bool user = !event->attr.exclude_callchain_user; 186 - /* Disallow cross-task user callchains. */ 187 - bool crosstask = event->ctx->task && event->ctx->task != current; 188 - const u32 max_stack = event->attr.sample_max_stack; 189 - 190 - if (!kernel && !user) 191 - return NULL; 192 - 193 - return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); 194 - } 195 - 196 - struct perf_callchain_entry * 197 182 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, 198 183 u32 max_stack, bool crosstask, bool add_mark) 199 184 {
+39 -15
kernel/events/core.c
··· 5815 5815 perf_output_read(handle, event); 5816 5816 5817 5817 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 5818 - if (data->callchain) { 5819 - int size = 1; 5818 + int size = 1; 5820 5819 5821 - if (data->callchain) 5822 - size += data->callchain->nr; 5823 - 5824 - size *= sizeof(u64); 5825 - 5826 - __output_copy(handle, data->callchain, size); 5827 - } else { 5828 - u64 nr = 0; 5829 - perf_output_put(handle, nr); 5830 - } 5820 + size += data->callchain->nr; 5821 + size *= sizeof(u64); 5822 + __output_copy(handle, data->callchain, size); 5831 5823 } 5832 5824 5833 5825 if (sample_type & PERF_SAMPLE_RAW) { ··· 5972 5980 return phys_addr; 5973 5981 } 5974 5982 5983 + static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; 5984 + 5985 + static struct perf_callchain_entry * 5986 + perf_callchain(struct perf_event *event, struct pt_regs *regs) 5987 + { 5988 + bool kernel = !event->attr.exclude_callchain_kernel; 5989 + bool user = !event->attr.exclude_callchain_user; 5990 + /* Disallow cross-task user callchains. */ 5991 + bool crosstask = event->ctx->task && event->ctx->task != current; 5992 + const u32 max_stack = event->attr.sample_max_stack; 5993 + struct perf_callchain_entry *callchain; 5994 + 5995 + if (!kernel && !user) 5996 + return &__empty_callchain; 5997 + 5998 + callchain = get_perf_callchain(regs, 0, kernel, user, 5999 + max_stack, crosstask, true); 6000 + return callchain ?: &__empty_callchain; 6001 + } 6002 + 5975 6003 void perf_prepare_sample(struct perf_event_header *header, 5976 6004 struct perf_sample_data *data, 5977 6005 struct perf_event *event, ··· 6014 6002 int size = 1; 6015 6003 6016 6004 data->callchain = perf_callchain(event, regs); 6017 - 6018 - if (data->callchain) 6019 - size += data->callchain->nr; 6005 + size += data->callchain->nr; 6020 6006 6021 6007 header->size += size * sizeof(u64); 6022 6008 } ··· 10713 10703 if (IS_ERR(child_event)) 10714 10704 return child_event; 10715 10705 10706 + 10707 + if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) && 10708 + !child_ctx->task_ctx_data) { 10709 + struct pmu *pmu = child_event->pmu; 10710 + 10711 + child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size, 10712 + GFP_KERNEL); 10713 + if (!child_ctx->task_ctx_data) { 10714 + free_event(child_event); 10715 + return NULL; 10716 + } 10717 + } 10718 + 10716 10719 /* 10717 10720 * is_orphaned_event() and list_add_tail(&parent_event->child_list) 10718 10721 * must be under the same lock in order to serialize against ··· 10736 10713 if (is_orphaned_event(parent_event) || 10737 10714 !atomic_long_inc_not_zero(&parent_event->refcount)) { 10738 10715 mutex_unlock(&parent_event->child_mutex); 10716 + /* task_ctx_data is freed with child_ctx */ 10739 10717 free_event(child_event); 10740 10718 return NULL; 10741 10719 }
-4
kernel/events/internal.h
··· 201 201 202 202 DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) 203 203 204 - /* Callchain handling */ 205 - extern struct perf_callchain_entry * 206 - perf_callchain(struct perf_event *event, struct pt_regs *regs); 207 - 208 204 static inline int get_recursion_context(int *recursion) 209 205 { 210 206 int rctx;
+3 -1
tools/arch/x86/include/asm/cpufeatures.h
··· 197 197 #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ 198 198 #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 199 199 #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 200 + #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ 200 201 201 202 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 202 203 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 203 204 #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 204 - 205 + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ 205 206 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 206 207 #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 207 208 #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ ··· 341 340 #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ 342 341 #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ 343 342 #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ 343 + #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ 344 344 345 345 #endif /* _ASM_X86_CPUFEATURES_H */
+7 -1
tools/arch/x86/include/asm/disabled-features.h
··· 50 50 # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) 51 51 #endif 52 52 53 + #ifdef CONFIG_PAGE_TABLE_ISOLATION 54 + # define DISABLE_PTI 0 55 + #else 56 + # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) 57 + #endif 58 + 53 59 /* 54 60 * Make sure to add features to the correct mask 55 61 */ ··· 66 60 #define DISABLED_MASK4 (DISABLE_PCID) 67 61 #define DISABLED_MASK5 0 68 62 #define DISABLED_MASK6 0 69 - #define DISABLED_MASK7 0 63 + #define DISABLED_MASK7 (DISABLE_PTI) 70 64 #define DISABLED_MASK8 0 71 65 #define DISABLED_MASK9 (DISABLE_MPX) 72 66 #define DISABLED_MASK10 0
+7 -3
tools/include/uapi/linux/perf_event.h
··· 612 612 */ 613 613 #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) 614 614 /* 615 - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 616 - * different events so can reuse the same bit position. 617 - * Ditto PERF_RECORD_MISC_SWITCH_OUT. 615 + * Following PERF_RECORD_MISC_* are used on different 616 + * events, so can reuse the same bit position: 617 + * 618 + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events 619 + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event 620 + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events 618 621 */ 619 622 #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) 620 623 #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) ··· 867 864 * struct perf_event_header header; 868 865 * u32 pid; 869 866 * u32 tid; 867 + * struct sample_id sample_id; 870 868 * }; 871 869 */ 872 870 PERF_RECORD_ITRACE_START = 12,
+3
tools/perf/Documentation/perf-record.txt
··· 430 430 --timestamp-filename 431 431 Append timestamp to output file name. 432 432 433 + --timestamp-boundary:: 434 + Record timestamp boundary (time of first/last samples). 435 + 433 436 --switch-output[=mode]:: 434 437 Generate multiple perf.data files, timestamp prefixed, switching to a new one 435 438 based on 'mode' value:
+36 -1
tools/perf/Documentation/perf-report.txt
··· 402 402 stop time is not given (i.e, time string is 'x.y,') then analysis goes 403 403 to end of file. 404 404 405 + Also support time percent with multiple time range. Time string is 406 + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. 407 + 408 + For example: 409 + Select the second 10% time slice: 410 + 411 + perf report --time 10%/2 412 + 413 + Select from 0% to 10% time slice: 414 + 415 + perf report --time 0%-10% 416 + 417 + Select the first and second 10% time slices: 418 + 419 + perf report --time 10%/1,10%/2 420 + 421 + Select from 0% to 10% and 30% to 40% slices: 422 + 423 + perf report --time 0%-10%,30%-40% 424 + 405 425 --itrace:: 406 426 Options for decoding instruction tracing data. The options are: 407 427 ··· 457 437 will be printed. Each entry is function name or file/line. Enabled by 458 438 default, disable with --no-inline. 459 439 440 + --mmaps:: 441 + Show --tasks output plus mmap information in a format similar to 442 + /proc/<PID>/maps. 443 + 444 + Please note that not all mmaps are stored, options affecting which ones 445 + are include 'perf record --data', for instance. 446 + 447 + --stats:: 448 + Display overall events statistics without any further processing. 449 + (like the one at the end of the perf report -D command) 450 + 451 + --tasks:: 452 + Display monitored tasks stored in perf data. Displaying pid/tid/ppid 453 + plus the command string aligned to distinguish parent and child tasks. 454 + 460 455 include::callchain-overhead-calculation.txt[] 461 456 462 457 SEE ALSO 463 458 -------- 464 - linkperf:perf-stat[1], linkperf:perf-annotate[1] 459 + linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
+38 -1
tools/perf/Documentation/perf-script.txt
··· 117 117 Comma separated list of fields to print. Options are: 118 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 119 119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, 120 - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. 120 + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc. 121 121 Field list can be prepended with the type, trace, sw or hw, 122 122 to indicate to which event type the field list applies. 123 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace ··· 225 225 that the metric computed is averaged over the whole sampling 226 226 period, not just for the sample point. 227 227 228 + For sample events it's possible to display misc field with -F +misc option, 229 + following letters are displayed for each bit: 230 + 231 + PERF_RECORD_MISC_KERNEL K 232 + PERF_RECORD_MISC_USER U 233 + PERF_RECORD_MISC_HYPERVISOR H 234 + PERF_RECORD_MISC_GUEST_KERNEL G 235 + PERF_RECORD_MISC_GUEST_USER g 236 + PERF_RECORD_MISC_MMAP_DATA* M 237 + PERF_RECORD_MISC_COMM_EXEC E 238 + PERF_RECORD_MISC_SWITCH_OUT S 239 + 240 + $ perf script -F +misc ... 241 + sched-messaging 1414 K 28690.636582: 4590 cycles ... 242 + sched-messaging 1407 U 28690.636600: 325620 cycles ... 243 + sched-messaging 1414 K 28690.636608: 19473 cycles ... 244 + misc field ___________/ 245 + 228 246 -k:: 229 247 --vmlinux=<file>:: 230 248 vmlinux pathname ··· 300 282 Display context switch events i.e. events of type PERF_RECORD_SWITCH or 301 283 PERF_RECORD_SWITCH_CPU_WIDE. 302 284 285 + --show-lost-events 286 + Display lost events i.e. events of type PERF_RECORD_LOST. 287 + 303 288 --demangle:: 304 289 Demangle symbol names to human readable form. It's enabled by default, 305 290 disable with --no-demangle. ··· 349 328 string is ',x.y') then analysis starts at the beginning of the file. If 350 329 stop time is not given (i.e, time string is 'x.y,') then analysis goes 351 330 to end of file. 331 + 332 + Also support time percent with multipe time range. Time string is 333 + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. 334 + 335 + For example: 336 + Select the second 10% time slice 337 + perf script --time 10%/2 338 + 339 + Select from 0% to 10% time slice 340 + perf script --time 0%-10% 341 + 342 + Select the first and second 10% time slices 343 + perf script --time 10%/1,10%/2 344 + 345 + Select from 0% to 10% and 30% to 40% slices 346 + perf script --time 0%-10%,30%-40% 352 347 353 348 --max-blocks:: 354 349 Set the maximum number of program blocks to print with brstackasm for
+4
tools/perf/Documentation/perf.data-file-format.txt
··· 261 261 struct perf_header_string map; 262 262 }[number_of_cache_levels]; 263 263 264 + HEADER_SAMPLE_TIME = 21, 265 + 266 + Two uint64_t for the time of first sample and the time of last sample. 267 + 264 268 other bits are reserved and should ignored for now 265 269 HEADER_FEAT_BITS = 256, 266 270
+1 -1
tools/perf/Makefile.config
··· 780 780 NO_PERF_READ_VDSOX32 := 1 781 781 endif 782 782 783 - ifdef LIBBABELTRACE 783 + ifndef NO_LIBBABELTRACE 784 784 $(call feature_check,libbabeltrace) 785 785 ifeq ($(feature-libbabeltrace), 1) 786 786 CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
+1 -1
tools/perf/Makefile.perf
··· 77 77 # 78 78 # Define NO_ZLIB if you do not want to support compressed kernel modules 79 79 # 80 - # Define LIBBABELTRACE if you DO want libbabeltrace support 80 + # Define NO_LIBBABELTRACE if you do not want libbabeltrace support 81 81 # for CTF data format. 82 82 # 83 83 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+1 -1
tools/perf/arch/x86/util/unwind-libunwind.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - #ifndef REMOTE_UNWIND_LIBUNWIND 4 3 #include <errno.h> 4 + #ifndef REMOTE_UNWIND_LIBUNWIND 5 5 #include <libunwind.h> 6 6 #include "perf_regs.h" 7 7 #include "../../util/unwind.h"
+15 -3
tools/perf/builtin-record.c
··· 78 78 bool no_buildid_cache_set; 79 79 bool buildid_all; 80 80 bool timestamp_filename; 81 + bool timestamp_boundary; 81 82 struct switch_output switch_output; 82 83 unsigned long long samples; 83 84 }; ··· 410 409 { 411 410 struct record *rec = container_of(tool, struct record, tool); 412 411 413 - rec->samples++; 412 + if (rec->evlist->first_sample_time == 0) 413 + rec->evlist->first_sample_time = sample->time; 414 414 415 + rec->evlist->last_sample_time = sample->time; 416 + 417 + if (rec->buildid_all) 418 + return 0; 419 + 420 + rec->samples++; 415 421 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 416 422 } 417 423 ··· 443 435 444 436 /* 445 437 * If --buildid-all is given, it marks all DSO regardless of hits, 446 - * so no need to process samples. 438 + * so no need to process samples. But if timestamp_boundary is enabled, 439 + * it still needs to walk on all samples to get the timestamps of 440 + * first/last samples. 447 441 */ 448 - if (rec->buildid_all) 442 + if (rec->buildid_all && !rec->timestamp_boundary) 449 443 rec->tool.sample = NULL; 450 444 451 445 return perf_session__process_events(session); ··· 1631 1621 "Record build-id of all DSOs regardless of hits"), 1632 1622 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1633 1623 "append timestamp to output filename"), 1624 + OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 1625 + "Record timestamp boundary (time of first/last samples)"), 1634 1626 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1635 1627 &record.switch_output.set, "signal,size,time", 1636 1628 "Switch output when receive SIGUSR2 or cross size,time threshold",
+242 -7
tools/perf/builtin-report.c
··· 15 15 #include "util/color.h" 16 16 #include <linux/list.h> 17 17 #include <linux/rbtree.h> 18 + #include <linux/err.h> 18 19 #include "util/symbol.h" 19 20 #include "util/callchain.h" 20 21 #include "util/values.h" ··· 52 51 #include <sys/types.h> 53 52 #include <sys/stat.h> 54 53 #include <unistd.h> 54 + #include <linux/mman.h> 55 + 56 + #define PTIME_RANGE_MAX 10 55 57 56 58 struct report { 57 59 struct perf_tool tool; ··· 64 60 bool show_threads; 65 61 bool inverted_callchain; 66 62 bool mem_mode; 63 + bool stats_mode; 64 + bool tasks_mode; 65 + bool mmaps_mode; 67 66 bool header; 68 67 bool header_only; 69 68 bool nonany_branch_mode; ··· 76 69 const char *cpu_list; 77 70 const char *symbol_filter_str; 78 71 const char *time_str; 79 - struct perf_time_interval ptime; 72 + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; 73 + int range_num; 80 74 float min_percent; 81 75 u64 nr_entries; 82 76 u64 queue_size; ··· 170 162 struct hist_entry *he = iter->he; 171 163 struct report *rep = arg; 172 164 struct branch_info *bi; 165 + struct perf_sample *sample = iter->sample; 166 + struct perf_evsel *evsel = iter->evsel; 167 + int err; 168 + 169 + if (!ui__has_annotation()) 170 + return 0; 171 + 172 + hist__account_cycles(sample->branch_stack, al, sample, 173 + rep->nonany_branch_mode); 173 174 174 175 bi = he->branch_info; 176 + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); 177 + if (err) 178 + goto out; 179 + 180 + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); 181 + 175 182 branch_type_count(&rep->brtype_stat, &bi->flags, 176 183 bi->from.addr, bi->to.addr); 177 184 178 - return 0; 185 + out: 186 + return err; 179 187 } 180 188 181 189 static int process_sample_event(struct perf_tool *tool, ··· 210 186 }; 211 187 int ret = 0; 212 188 213 - if (perf_time__skip_sample(&rep->ptime, sample->time)) 189 + if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num, 190 + sample->time)) { 214 191 return 0; 192 + } 215 193 216 194 if (machine__resolve(machine, &al, sample) < 0) { 217 195 pr_debug("problem processing %d event, skipping it.\n", ··· 593 567 ui_progress__finish(); 594 568 } 595 569 570 + static void stats_setup(struct report *rep) 571 + { 572 + memset(&rep->tool, 0, sizeof(rep->tool)); 573 + rep->tool.no_warn = true; 574 + } 575 + 576 + static int stats_print(struct report *rep) 577 + { 578 + struct perf_session *session = rep->session; 579 + 580 + perf_session__fprintf_nr_events(session, stdout); 581 + return 0; 582 + } 583 + 584 + static void tasks_setup(struct report *rep) 585 + { 586 + memset(&rep->tool, 0, sizeof(rep->tool)); 587 + if (rep->mmaps_mode) { 588 + rep->tool.mmap = perf_event__process_mmap; 589 + rep->tool.mmap2 = perf_event__process_mmap2; 590 + } 591 + rep->tool.comm = perf_event__process_comm; 592 + rep->tool.exit = perf_event__process_exit; 593 + rep->tool.fork = perf_event__process_fork; 594 + rep->tool.no_warn = true; 595 + } 596 + 597 + struct task { 598 + struct thread *thread; 599 + struct list_head list; 600 + struct list_head children; 601 + }; 602 + 603 + static struct task *tasks_list(struct task *task, struct machine *machine) 604 + { 605 + struct thread *parent_thread, *thread = task->thread; 606 + struct task *parent_task; 607 + 608 + /* Already listed. */ 609 + if (!list_empty(&task->list)) 610 + return NULL; 611 + 612 + /* Last one in the chain. */ 613 + if (thread->ppid == -1) 614 + return task; 615 + 616 + parent_thread = machine__find_thread(machine, -1, thread->ppid); 617 + if (!parent_thread) 618 + return ERR_PTR(-ENOENT); 619 + 620 + parent_task = thread__priv(parent_thread); 621 + list_add_tail(&task->list, &parent_task->children); 622 + return tasks_list(parent_task, machine); 623 + } 624 + 625 + static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) 626 + { 627 + size_t printed = 0; 628 + struct rb_node *nd; 629 + 630 + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { 631 + struct map *map = rb_entry(nd, struct map, rb_node); 632 + 633 + printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", 634 + indent, "", map->start, map->end, 635 + map->prot & PROT_READ ? 'r' : '-', 636 + map->prot & PROT_WRITE ? 'w' : '-', 637 + map->prot & PROT_EXEC ? 'x' : '-', 638 + map->flags & MAP_SHARED ? 's' : 'p', 639 + map->pgoff, 640 + map->ino, map->dso->name); 641 + } 642 + 643 + return printed; 644 + } 645 + 646 + static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) 647 + { 648 + int printed = 0, i; 649 + for (i = 0; i < MAP__NR_TYPES; ++i) 650 + printed += maps__fprintf_task(&mg->maps[i], indent, fp); 651 + return printed; 652 + } 653 + 654 + static void task__print_level(struct task *task, FILE *fp, int level) 655 + { 656 + struct thread *thread = task->thread; 657 + struct task *child; 658 + int comm_indent = fprintf(fp, " %8d %8d %8d |%*s", 659 + thread->pid_, thread->tid, thread->ppid, 660 + level, ""); 661 + 662 + fprintf(fp, "%s\n", thread__comm_str(thread)); 663 + 664 + map_groups__fprintf_task(thread->mg, comm_indent, fp); 665 + 666 + if (!list_empty(&task->children)) { 667 + list_for_each_entry(child, &task->children, list) 668 + task__print_level(child, fp, level + 1); 669 + } 670 + } 671 + 672 + static int tasks_print(struct report *rep, FILE *fp) 673 + { 674 + struct perf_session *session = rep->session; 675 + struct machine *machine = &session->machines.host; 676 + struct task *tasks, *task; 677 + unsigned int nr = 0, itask = 0, i; 678 + struct rb_node *nd; 679 + LIST_HEAD(list); 680 + 681 + /* 682 + * No locking needed while accessing machine->threads, 683 + * because --tasks is single threaded command. 684 + */ 685 + 686 + /* Count all the threads. */ 687 + for (i = 0; i < THREADS__TABLE_SIZE; i++) 688 + nr += machine->threads[i].nr; 689 + 690 + tasks = malloc(sizeof(*tasks) * nr); 691 + if (!tasks) 692 + return -ENOMEM; 693 + 694 + for (i = 0; i < THREADS__TABLE_SIZE; i++) { 695 + struct threads *threads = &machine->threads[i]; 696 + 697 + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { 698 + task = tasks + itask++; 699 + 700 + task->thread = rb_entry(nd, struct thread, rb_node); 701 + INIT_LIST_HEAD(&task->children); 702 + INIT_LIST_HEAD(&task->list); 703 + thread__set_priv(task->thread, task); 704 + } 705 + } 706 + 707 + /* 708 + * Iterate every task down to the unprocessed parent 709 + * and link all in task children list. Task with no 710 + * parent is added into 'list'. 711 + */ 712 + for (itask = 0; itask < nr; itask++) { 713 + task = tasks + itask; 714 + 715 + if (!list_empty(&task->list)) 716 + continue; 717 + 718 + task = tasks_list(task, machine); 719 + if (IS_ERR(task)) { 720 + pr_err("Error: failed to process tasks\n"); 721 + free(tasks); 722 + return PTR_ERR(task); 723 + } 724 + 725 + if (task) 726 + list_add_tail(&task->list, &list); 727 + } 728 + 729 + fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); 730 + 731 + list_for_each_entry(task, &list, list) 732 + task__print_level(task, fp, 0); 733 + 734 + free(tasks); 735 + return 0; 736 + } 737 + 596 738 static int __cmd_report(struct report *rep) 597 739 { 598 740 int ret; ··· 792 598 return ret; 793 599 } 794 600 601 + if (rep->stats_mode) 602 + stats_setup(rep); 603 + 604 + if (rep->tasks_mode) 605 + tasks_setup(rep); 606 + 795 607 ret = perf_session__process_events(session); 796 608 if (ret) { 797 609 ui__error("failed to process sample\n"); 798 610 return ret; 799 611 } 612 + 613 + if (rep->stats_mode) 614 + return stats_print(rep); 615 + 616 + if (rep->tasks_mode) 617 + return tasks_print(rep, stdout); 800 618 801 619 report__warn_kptr_restrict(rep); 802 620 ··· 966 760 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), 967 761 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 968 762 "dump raw trace in ASCII"), 763 + OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), 764 + OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), 765 + OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"), 969 766 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 970 767 "file", "vmlinux pathname"), 971 768 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, ··· 1116 907 report.symbol_filter_str = argv[0]; 1117 908 } 1118 909 910 + if (report.mmaps_mode) 911 + report.tasks_mode = true; 912 + 1119 913 if (quiet) 1120 914 perf_quiet_option(); 1121 915 ··· 1233 1021 report.tool.show_feat_hdr = SHOW_FEAT_HEADER; 1234 1022 if (report.show_full_info) 1235 1023 report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; 1024 + if (report.stats_mode || report.tasks_mode) 1025 + use_browser = 0; 1026 + if (report.stats_mode && report.tasks_mode) { 1027 + pr_err("Error: --tasks and --mmaps can't be used together with --stats\n"); 1028 + goto error; 1029 + } 1236 1030 1237 1031 if (strcmp(input_name, "-") != 0) 1238 1032 setup_browser(true); ··· 1261 1043 ret = 0; 1262 1044 goto error; 1263 1045 } 1264 - } else if (use_browser == 0 && !quiet) { 1046 + } else if (use_browser == 0 && !quiet && 1047 + !report.stats_mode && !report.tasks_mode) { 1265 1048 fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", 1266 1049 stdout); 1267 1050 } ··· 1296 1077 if (symbol__init(&session->header.env) < 0) 1297 1078 goto error; 1298 1079 1299 - if (perf_time__parse_str(&report.ptime, report.time_str) != 0) { 1300 - pr_err("Invalid time string\n"); 1301 - return -EINVAL; 1080 + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { 1081 + if (session->evlist->first_sample_time == 0 && 1082 + session->evlist->last_sample_time == 0) { 1083 + pr_err("No first/last sample time in perf data\n"); 1084 + return -EINVAL; 1085 + } 1086 + 1087 + report.range_num = perf_time__percent_parse_str( 1088 + report.ptime_range, PTIME_RANGE_MAX, 1089 + report.time_str, 1090 + session->evlist->first_sample_time, 1091 + session->evlist->last_sample_time); 1092 + 1093 + if (report.range_num < 0) { 1094 + pr_err("Invalid time string\n"); 1095 + return -EINVAL; 1096 + } 1097 + } else { 1098 + report.range_num = 1; 1302 1099 } 1303 1100 1304 1101 sort__setup_elide(stdout);
+119 -17
tools/perf/builtin-script.c
··· 93 93 PERF_OUTPUT_PHYS_ADDR = 1U << 26, 94 94 PERF_OUTPUT_UREGS = 1U << 27, 95 95 PERF_OUTPUT_METRIC = 1U << 28, 96 + PERF_OUTPUT_MISC = 1U << 29, 96 97 }; 97 98 98 99 struct output_option { ··· 129 128 {.str = "synth", .field = PERF_OUTPUT_SYNTH}, 130 129 {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, 131 130 {.str = "metric", .field = PERF_OUTPUT_METRIC}, 131 + {.str = "misc", .field = PERF_OUTPUT_MISC}, 132 132 }; 133 133 134 134 enum { ··· 596 594 597 595 static int perf_sample__fprintf_start(struct perf_sample *sample, 598 596 struct thread *thread, 599 - struct perf_evsel *evsel, FILE *fp) 597 + struct perf_evsel *evsel, 598 + u32 type, FILE *fp) 600 599 { 601 600 struct perf_event_attr *attr = &evsel->attr; 602 601 unsigned long secs; ··· 625 622 printed += fprintf(fp, "%3d ", sample->cpu); 626 623 else 627 624 printed += fprintf(fp, "[%03d] ", sample->cpu); 625 + } 626 + 627 + if (PRINT_FIELD(MISC)) { 628 + int ret = 0; 629 + 630 + #define has(m) \ 631 + (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m 632 + 633 + if (has(KERNEL)) 634 + ret += fprintf(fp, "K"); 635 + if (has(USER)) 636 + ret += fprintf(fp, "U"); 637 + if (has(HYPERVISOR)) 638 + ret += fprintf(fp, "H"); 639 + if (has(GUEST_KERNEL)) 640 + ret += fprintf(fp, "G"); 641 + if (has(GUEST_USER)) 642 + ret += fprintf(fp, "g"); 643 + 644 + switch (type) { 645 + case PERF_RECORD_MMAP: 646 + case PERF_RECORD_MMAP2: 647 + if (has(MMAP_DATA)) 648 + ret += fprintf(fp, "M"); 649 + break; 650 + case PERF_RECORD_COMM: 651 + if (has(COMM_EXEC)) 652 + ret += fprintf(fp, "E"); 653 + break; 654 + case PERF_RECORD_SWITCH: 655 + case PERF_RECORD_SWITCH_CPU_WIDE: 656 + if (has(SWITCH_OUT)) 657 + ret += fprintf(fp, "S"); 658 + default: 659 + break; 660 + } 661 + 662 + #undef has 663 + 664 + ret += fprintf(fp, "%*s", 6 - ret, " "); 665 + printed += ret; 628 666 } 629 667 630 668 if (PRINT_FIELD(TIME)) { ··· 1480 1436 return 0; 1481 1437 } 1482 1438 1439 + #define PTIME_RANGE_MAX 10 1440 + 1483 1441 struct perf_script { 1484 1442 struct perf_tool tool; 1485 1443 struct perf_session *session; ··· 1489 1443 bool show_mmap_events; 1490 1444 bool show_switch_events; 1491 1445 bool show_namespace_events; 1446 + bool show_lost_events; 1492 1447 bool allocated; 1493 1448 bool per_event_dump; 1494 1449 struct cpu_map *cpus; 1495 1450 struct thread_map *threads; 1496 1451 int name_width; 1497 1452 const char *time_str; 1498 - struct perf_time_interval ptime; 1453 + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; 1454 + int range_num; 1499 1455 }; 1500 1456 1501 1457 static int perf_evlist__max_name_len(struct perf_evlist *evlist) ··· 1547 1499 if (!fmt) 1548 1500 return; 1549 1501 perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, 1550 - mctx->fp); 1502 + PERF_RECORD_SAMPLE, mctx->fp); 1551 1503 fputs("\tmetric: ", mctx->fp); 1552 1504 if (color) 1553 1505 color_fprintf(mctx->fp, color, fmt, val); ··· 1561 1513 struct metric_ctx *mctx = ctx; 1562 1514 1563 1515 perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, 1564 - mctx->fp); 1516 + PERF_RECORD_SAMPLE, mctx->fp); 1565 1517 fputs("\tmetric: ", mctx->fp); 1566 1518 } 1567 1519 ··· 1629 1581 1630 1582 ++es->samples; 1631 1583 1632 - perf_sample__fprintf_start(sample, thread, evsel, fp); 1584 + perf_sample__fprintf_start(sample, thread, evsel, 1585 + PERF_RECORD_SAMPLE, fp); 1633 1586 1634 1587 if (PRINT_FIELD(PERIOD)) 1635 1588 fprintf(fp, "%10" PRIu64 " ", sample->period); ··· 1783 1734 struct perf_script *scr = container_of(tool, struct perf_script, tool); 1784 1735 struct addr_location al; 1785 1736 1786 - if (perf_time__skip_sample(&scr->ptime, sample->time)) 1737 + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, 1738 + sample->time)) { 1787 1739 return 0; 1740 + } 1788 1741 1789 1742 if (debug_mode) { 1790 1743 if (sample->time < last_timestamp) { ··· 1879 1828 sample->tid = event->comm.tid; 1880 1829 sample->pid = event->comm.pid; 1881 1830 } 1882 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1831 + perf_sample__fprintf_start(sample, thread, evsel, 1832 + PERF_RECORD_COMM, stdout); 1883 1833 perf_event__fprintf(event, stdout); 1884 1834 ret = 0; 1885 1835 out: ··· 1915 1863 sample->tid = event->namespaces.tid; 1916 1864 sample->pid = event->namespaces.pid; 1917 1865 } 1918 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1866 + perf_sample__fprintf_start(sample, thread, evsel, 1867 + PERF_RECORD_NAMESPACES, stdout); 1919 1868 perf_event__fprintf(event, stdout); 1920 1869 ret = 0; 1921 1870 out: ··· 1949 1896 sample->tid = event->fork.tid; 1950 1897 sample->pid = event->fork.pid; 1951 1898 } 1952 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1899 + perf_sample__fprintf_start(sample, thread, evsel, 1900 + PERF_RECORD_FORK, stdout); 1953 1901 perf_event__fprintf(event, stdout); 1954 1902 thread__put(thread); 1955 1903 ··· 1979 1925 sample->tid = event->fork.tid; 1980 1926 sample->pid = event->fork.pid; 1981 1927 } 1982 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1928 + perf_sample__fprintf_start(sample, thread, evsel, 1929 + PERF_RECORD_EXIT, stdout); 1983 1930 perf_event__fprintf(event, stdout); 1984 1931 1985 1932 if (perf_event__process_exit(tool, event, sample, machine) < 0) ··· 2015 1960 sample->tid = event->mmap.tid; 2016 1961 sample->pid = event->mmap.pid; 2017 1962 } 2018 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1963 + perf_sample__fprintf_start(sample, thread, evsel, 1964 + PERF_RECORD_MMAP, stdout); 2019 1965 perf_event__fprintf(event, stdout); 2020 1966 thread__put(thread); 2021 1967 return 0; ··· 2047 1991 sample->tid = event->mmap2.tid; 2048 1992 sample->pid = event->mmap2.pid; 2049 1993 } 2050 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 1994 + perf_sample__fprintf_start(sample, thread, evsel, 1995 + PERF_RECORD_MMAP2, stdout); 2051 1996 perf_event__fprintf(event, stdout); 2052 1997 thread__put(thread); 2053 1998 return 0; ··· 2074 2017 return -1; 2075 2018 } 2076 2019 2077 - perf_sample__fprintf_start(sample, thread, evsel, stdout); 2020 + perf_sample__fprintf_start(sample, thread, evsel, 2021 + PERF_RECORD_SWITCH, stdout); 2022 + perf_event__fprintf(event, stdout); 2023 + thread__put(thread); 2024 + return 0; 2025 + } 2026 + 2027 + static int 2028 + process_lost_event(struct perf_tool *tool, 2029 + union perf_event *event, 2030 + struct perf_sample *sample, 2031 + struct machine *machine) 2032 + { 2033 + struct perf_script *script = container_of(tool, struct perf_script, tool); 2034 + struct perf_session *session = script->session; 2035 + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); 2036 + struct thread *thread; 2037 + 2038 + thread = machine__findnew_thread(machine, sample->pid, 2039 + sample->tid); 2040 + if (thread == NULL) 2041 + return -1; 2042 + 2043 + perf_sample__fprintf_start(sample, thread, evsel, 2044 + PERF_RECORD_LOST, stdout); 2078 2045 perf_event__fprintf(event, stdout); 2079 2046 thread__put(thread); 2080 2047 return 0; ··· 2198 2117 script->tool.context_switch = process_switch_event; 2199 2118 if (script->show_namespace_events) 2200 2119 script->tool.namespaces = process_namespaces_event; 2120 + if (script->show_lost_events) 2121 + script->tool.lost = process_lost_event; 2201 2122 2202 2123 if (perf_script__setup_per_event_dump(script)) { 2203 2124 pr_err("Couldn't create the per event dump files\n"); ··· 3136 3053 "Show context switch events (if recorded)"), 3137 3054 OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, 3138 3055 "Show namespace events (if recorded)"), 3056 + OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, 3057 + "Show lost events (if recorded)"), 3139 3058 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, 3140 3059 "Dump trace output to files named by the monitored events"), 3141 3060 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), ··· 3445 3360 goto out_delete; 3446 3361 3447 3362 /* needs to be parsed after looking up reference time */ 3448 - if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { 3449 - pr_err("Invalid time string\n"); 3450 - err = -EINVAL; 3451 - goto out_delete; 3363 + if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { 3364 + if (session->evlist->first_sample_time == 0 && 3365 + session->evlist->last_sample_time == 0) { 3366 + pr_err("No first/last sample time in perf data\n"); 3367 + err = -EINVAL; 3368 + goto out_delete; 3369 + } 3370 + 3371 + script.range_num = perf_time__percent_parse_str( 3372 + script.ptime_range, PTIME_RANGE_MAX, 3373 + script.time_str, 3374 + session->evlist->first_sample_time, 3375 + session->evlist->last_sample_time); 3376 + 3377 + if (script.range_num < 0) { 3378 + pr_err("Invalid time string\n"); 3379 + err = -EINVAL; 3380 + goto out_delete; 3381 + } 3382 + } else { 3383 + script.range_num = 1; 3452 3384 } 3453 3385 3454 3386 err = __cmd_script(&script);
+1
tools/perf/builtin-trace.c
··· 622 622 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, 623 623 { .name = "getrlimit", 624 624 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, 625 + { .name = "gettid", .errpid = true, }, 625 626 { .name = "ioctl", 626 627 .arg = { 627 628 #if defined(__i386__) || defined(__x86_64__)
+2 -2
tools/perf/tests/bpf-script-example.c
··· 31 31 .max_entries = 1, 32 32 }; 33 33 34 - SEC("func=SyS_epoll_wait") 35 - int bpf_func__SyS_epoll_wait(void *ctx) 34 + SEC("func=SyS_epoll_pwait") 35 + int bpf_func__SyS_epoll_pwait(void *ctx) 36 36 { 37 37 int ind =0; 38 38 int *flag = bpf_map_lookup_elem(&flip_table, &ind);
+30 -35
tools/perf/tests/bpf.c
··· 19 19 20 20 #ifdef HAVE_LIBBPF_SUPPORT 21 21 22 - static int epoll_wait_loop(void) 22 + static int epoll_pwait_loop(void) 23 23 { 24 24 int i; 25 25 26 26 /* Should fail NR_ITERS times */ 27 27 for (i = 0; i < NR_ITERS; i++) 28 - epoll_wait(-(i + 1), NULL, 0, 0); 28 + epoll_pwait(-(i + 1), NULL, 0, 0, NULL); 29 29 return 0; 30 30 } 31 31 ··· 63 63 bool pin; 64 64 } bpf_testcase_table[] = { 65 65 { 66 - LLVM_TESTCASE_BASE, 67 - "Basic BPF filtering", 68 - "[basic_bpf_test]", 69 - "fix 'perf test LLVM' first", 70 - "load bpf object failed", 71 - &epoll_wait_loop, 72 - (NR_ITERS + 1) / 2, 73 - false, 66 + .prog_id = LLVM_TESTCASE_BASE, 67 + .desc = "Basic BPF filtering", 68 + .name = "[basic_bpf_test]", 69 + .msg_compile_fail = "fix 'perf test LLVM' first", 70 + .msg_load_fail = "load bpf object failed", 71 + .target_func = &epoll_pwait_loop, 72 + .expect_result = (NR_ITERS + 1) / 2, 74 73 }, 75 74 { 76 - LLVM_TESTCASE_BASE, 77 - "BPF pinning", 78 - "[bpf_pinning]", 79 - "fix kbuild first", 80 - "check your vmlinux setting?", 81 - &epoll_wait_loop, 82 - (NR_ITERS + 1) / 2, 83 - true, 75 + .prog_id = LLVM_TESTCASE_BASE, 76 + .desc = "BPF pinning", 77 + .name = "[bpf_pinning]", 78 + .msg_compile_fail = "fix kbuild first", 79 + .msg_load_fail = "check your vmlinux setting?", 80 + .target_func = &epoll_pwait_loop, 81 + .expect_result = (NR_ITERS + 1) / 2, 82 + .pin = true, 84 83 }, 85 84 #ifdef HAVE_BPF_PROLOGUE 86 85 { 87 - LLVM_TESTCASE_BPF_PROLOGUE, 88 - "BPF prologue generation", 89 - "[bpf_prologue_test]", 90 - "fix kbuild first", 91 - "check your vmlinux setting?", 92 - &llseek_loop, 93 - (NR_ITERS + 1) / 4, 94 - false, 86 + .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, 87 + .desc = "BPF prologue generation", 88 + .name = "[bpf_prologue_test]", 89 + .msg_compile_fail = "fix kbuild first", 90 + .msg_load_fail = "check your vmlinux setting?", 91 + .target_func = &llseek_loop, 92 + .expect_result = (NR_ITERS + 1) / 4, 95 93 }, 96 94 #endif 97 95 { 98 - LLVM_TESTCASE_BPF_RELOCATION, 99 - "BPF relocation checker", 100 - "[bpf_relocation_test]", 101 - "fix 'perf test LLVM' first", 102 - "libbpf error when dealing with relocation", 103 - NULL, 104 - 0, 105 - false, 96 + .prog_id = LLVM_TESTCASE_BPF_RELOCATION, 97 + .desc = "BPF relocation checker", 98 + .name = "[bpf_relocation_test]", 99 + .msg_compile_fail = "fix 'perf test LLVM' first", 100 + .msg_load_fail = "libbpf error when dealing with relocation", 106 101 }, 107 102 }; 108 103 ··· 185 190 } 186 191 187 192 if (count != expect) { 188 - pr_debug("BPF filter result incorrect\n"); 193 + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); 189 194 goto out_delete_evlist; 190 195 } 191 196
+2 -1
tools/perf/util/annotate.c
··· 1960 1960 if (percent_max <= 0.5) 1961 1961 continue; 1962 1962 1963 - al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); 1963 + al->path = get_srcline(map->dso, start + al->offset, NULL, 1964 + false, true, start + al->offset); 1964 1965 insert_source_line(&tmp_root, al); 1965 1966 } 1966 1967
+8
tools/perf/util/event.c
··· 1435 1435 event->context_switch.next_prev_tid); 1436 1436 } 1437 1437 1438 + static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) 1439 + { 1440 + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); 1441 + } 1442 + 1438 1443 size_t perf_event__fprintf(union perf_event *event, FILE *fp) 1439 1444 { 1440 1445 size_t ret = fprintf(fp, "PERF_RECORD_%s", ··· 1471 1466 case PERF_RECORD_SWITCH: 1472 1467 case PERF_RECORD_SWITCH_CPU_WIDE: 1473 1468 ret += perf_event__fprintf_switch(event, fp); 1469 + break; 1470 + case PERF_RECORD_LOST: 1471 + ret += perf_event__fprintf_lost(event, fp); 1474 1472 break; 1475 1473 default: 1476 1474 ret += fprintf(fp, "\n");
+1
tools/perf/util/event.h
··· 205 205 u32 flags; 206 206 u16 insn_len; 207 207 u8 cpumode; 208 + u16 misc; 208 209 char insn[MAX_INSN]; 209 210 void *raw_data; 210 211 struct ip_callchain *callchain;
+2
tools/perf/util/evlist.h
··· 50 50 struct perf_evsel *selected; 51 51 struct events_stats stats; 52 52 struct perf_env *env; 53 + u64 first_sample_time; 54 + u64 last_sample_time; 53 55 }; 54 56 55 57 struct perf_evsel_str_handler {
+2
tools/perf/util/evsel.c
··· 1577 1577 PRINT_ATTRf(use_clockid, p_unsigned); 1578 1578 PRINT_ATTRf(context_switch, p_unsigned); 1579 1579 PRINT_ATTRf(write_backward, p_unsigned); 1580 + PRINT_ATTRf(namespaces, p_unsigned); 1580 1581 1581 1582 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 1582 1583 PRINT_ATTRf(bp_type, p_unsigned); ··· 2042 2041 data->stream_id = data->id = data->time = -1ULL; 2043 2042 data->period = evsel->attr.sample_period; 2044 2043 data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2044 + data->misc = event->header.misc; 2045 2045 data->id = -1ULL; 2046 2046 data->data_src = PERF_MEM_DATA_SRC_NONE; 2047 2047
+60
tools/perf/util/header.c
··· 16 16 #include <linux/stringify.h> 17 17 #include <sys/stat.h> 18 18 #include <sys/utsname.h> 19 + #include <linux/time64.h> 19 20 20 21 #include "evlist.h" 21 22 #include "evsel.h" ··· 36 35 #include <api/fs/fs.h> 37 36 #include "asm/bug.h" 38 37 #include "tool.h" 38 + #include "time-utils.h" 39 39 40 40 #include "sane_ctype.h" 41 41 ··· 1182 1180 return 0; 1183 1181 } 1184 1182 1183 + static int write_sample_time(struct feat_fd *ff, 1184 + struct perf_evlist *evlist) 1185 + { 1186 + int ret; 1187 + 1188 + ret = do_write(ff, &evlist->first_sample_time, 1189 + sizeof(evlist->first_sample_time)); 1190 + if (ret < 0) 1191 + return ret; 1192 + 1193 + return do_write(ff, &evlist->last_sample_time, 1194 + sizeof(evlist->last_sample_time)); 1195 + } 1196 + 1185 1197 static void print_hostname(struct feat_fd *ff, FILE *fp) 1186 1198 { 1187 1199 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); ··· 1519 1503 fprintf(fp, "}\n"); 1520 1504 } 1521 1505 } 1506 + } 1507 + 1508 + static void print_sample_time(struct feat_fd *ff, FILE *fp) 1509 + { 1510 + struct perf_session *session; 1511 + char time_buf[32]; 1512 + double d; 1513 + 1514 + session = container_of(ff->ph, struct perf_session, header); 1515 + 1516 + timestamp__scnprintf_usec(session->evlist->first_sample_time, 1517 + time_buf, sizeof(time_buf)); 1518 + fprintf(fp, "# time of first sample : %s\n", time_buf); 1519 + 1520 + timestamp__scnprintf_usec(session->evlist->last_sample_time, 1521 + time_buf, sizeof(time_buf)); 1522 + fprintf(fp, "# time of last sample : %s\n", time_buf); 1523 + 1524 + d = (double)(session->evlist->last_sample_time - 1525 + session->evlist->first_sample_time) / NSEC_PER_MSEC; 1526 + 1527 + fprintf(fp, "# sample duration : %10.3f ms\n", d); 1522 1528 } 1523 1529 1524 1530 static int __event_process_build_id(struct build_id_event *bev, ··· 2184 2146 return -1; 2185 2147 } 2186 2148 2149 + static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) 2150 + { 2151 + struct perf_session *session; 2152 + u64 first_sample_time, last_sample_time; 2153 + int ret; 2154 + 2155 + session = container_of(ff->ph, struct perf_session, header); 2156 + 2157 + ret = do_read_u64(ff, &first_sample_time); 2158 + if (ret) 2159 + return -1; 2160 + 2161 + ret = do_read_u64(ff, &last_sample_time); 2162 + if (ret) 2163 + return -1; 2164 + 2165 + session->evlist->first_sample_time = first_sample_time; 2166 + session->evlist->last_sample_time = last_sample_time; 2167 + return 0; 2168 + } 2169 + 2187 2170 struct feature_ops { 2188 2171 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); 2189 2172 void (*print)(struct feat_fd *ff, FILE *fp); ··· 2262 2203 FEAT_OPN(AUXTRACE, auxtrace, false), 2263 2204 FEAT_OPN(STAT, stat, false), 2264 2205 FEAT_OPN(CACHE, cache, true), 2206 + FEAT_OPR(SAMPLE_TIME, sample_time, false), 2265 2207 }; 2266 2208 2267 2209 struct header_print_data {
+1
tools/perf/util/header.h
··· 35 35 HEADER_AUXTRACE, 36 36 HEADER_STAT, 37 37 HEADER_CACHE, 38 + HEADER_SAMPLE_TIME, 38 39 HEADER_LAST_FEATURE, 39 40 HEADER_FEAT_BITS = 256, 40 41 };
+1 -1
tools/perf/util/machine.c
··· 1726 1726 bool show_addr = callchain_param.key == CCKEY_ADDRESS; 1727 1727 1728 1728 srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), 1729 - sym, show_sym, show_addr); 1729 + sym, show_sym, show_addr, ip); 1730 1730 srcline__tree_insert(&map->dso->srclines, ip, srcline); 1731 1731 } 1732 1732
+1 -1
tools/perf/util/map.c
··· 419 419 if (map && map->dso) { 420 420 srcline = get_srcline(map->dso, 421 421 map__rip_2objdump(map, addr), NULL, 422 - true, true); 422 + true, true, addr); 423 423 if (srcline != SRCLINE_UNKNOWN) 424 424 ret = fprintf(fp, "%s%s", prefix, srcline); 425 425 free_srcline(srcline);
+4 -2
tools/perf/util/session.c
··· 1773 1773 err = perf_session__flush_thread_stacks(session); 1774 1774 out_err: 1775 1775 free(buf); 1776 - perf_session__warn_about_errors(session); 1776 + if (!tool->no_warn) 1777 + perf_session__warn_about_errors(session); 1777 1778 ordered_events__free(&session->ordered_events); 1778 1779 auxtrace__free_events(session); 1779 1780 return err; ··· 1930 1929 err = perf_session__flush_thread_stacks(session); 1931 1930 out_err: 1932 1931 ui_progress__finish(); 1933 - perf_session__warn_about_errors(session); 1932 + if (!tool->no_warn) 1933 + perf_session__warn_about_errors(session); 1934 1934 /* 1935 1935 * We may switching perf.data output, make ordered_events 1936 1936 * reusable.
+10 -6
tools/perf/util/sort.c
··· 336 336 return SRCLINE_UNKNOWN; 337 337 338 338 return get_srcline(map->dso, map__rip_2objdump(map, he->ip), 339 - he->ms.sym, true, true); 339 + he->ms.sym, true, true, he->ip); 340 340 } 341 341 342 342 static int64_t ··· 380 380 map__rip_2objdump(map, 381 381 left->branch_info->from.al_addr), 382 382 left->branch_info->from.sym, 383 - true, true); 383 + true, true, 384 + left->branch_info->from.al_addr); 384 385 } 385 386 if (!right->branch_info->srcline_from) { 386 387 struct map *map = right->branch_info->from.map; ··· 392 391 map__rip_2objdump(map, 393 392 right->branch_info->from.al_addr), 394 393 right->branch_info->from.sym, 395 - true, true); 394 + true, true, 395 + right->branch_info->from.al_addr); 396 396 } 397 397 return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); 398 398 } ··· 425 423 map__rip_2objdump(map, 426 424 left->branch_info->to.al_addr), 427 425 left->branch_info->from.sym, 428 - true, true); 426 + true, true, 427 + left->branch_info->to.al_addr); 429 428 } 430 429 if (!right->branch_info->srcline_to) { 431 430 struct map *map = right->branch_info->to.map; ··· 437 434 map__rip_2objdump(map, 438 435 right->branch_info->to.al_addr), 439 436 right->branch_info->to.sym, 440 - true, true); 437 + true, true, 438 + right->branch_info->to.al_addr); 441 439 } 442 440 return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); 443 441 } ··· 469 465 return no_srcfile; 470 466 471 467 sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), 472 - e->ms.sym, false, true, true); 468 + e->ms.sym, false, true, true, e->ip); 473 469 if (!strcmp(sf, SRCLINE_UNKNOWN)) 474 470 return no_srcfile; 475 471 p = strchr(sf, ':');
+5 -4
tools/perf/util/srcline.c
··· 496 496 #define A2L_FAIL_LIMIT 123 497 497 498 498 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, 499 - bool show_sym, bool show_addr, bool unwind_inlines) 499 + bool show_sym, bool show_addr, bool unwind_inlines, 500 + u64 ip) 500 501 { 501 502 char *file = NULL; 502 503 unsigned line = 0; ··· 537 536 538 537 if (sym) { 539 538 if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", 540 - addr - sym->start) < 0) 539 + ip - sym->start) < 0) 541 540 return SRCLINE_UNKNOWN; 542 541 } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) 543 542 return SRCLINE_UNKNOWN; ··· 551 550 } 552 551 553 552 char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, 554 - bool show_sym, bool show_addr) 553 + bool show_sym, bool show_addr, u64 ip) 555 554 { 556 - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); 555 + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); 557 556 } 558 557 559 558 struct srcline_node {
+3 -2
tools/perf/util/srcline.h
··· 11 11 12 12 extern bool srcline_full_filename; 13 13 char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, 14 - bool show_sym, bool show_addr); 14 + bool show_sym, bool show_addr, u64 ip); 15 15 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, 16 - bool show_sym, bool show_addr, bool unwind_inlines); 16 + bool show_sym, bool show_addr, bool unwind_inlines, 17 + u64 ip); 17 18 void free_srcline(char *srcline); 18 19 19 20 /* insert the srcline into the DSO, which will take ownership */
+221 -12
tools/perf/util/time-utils.c
··· 6 6 #include <time.h> 7 7 #include <errno.h> 8 8 #include <inttypes.h> 9 + #include <math.h> 9 10 10 11 #include "perf.h" 11 12 #include "debug.h" ··· 61 60 return 0; 62 61 } 63 62 64 - int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) 63 + static int split_start_end(char **start, char **end, const char *ostr, char ch) 65 64 { 66 65 char *start_str, *end_str; 67 66 char *d, *str; 68 - int rc = 0; 69 67 70 68 if (ostr == NULL || *ostr == '\0') 71 69 return 0; ··· 74 74 if (str == NULL) 75 75 return -ENOMEM; 76 76 77 - ptime->start = 0; 78 - ptime->end = 0; 79 - 80 - /* str has the format: <start>,<stop> 81 - * variations: <start>, 82 - * ,<stop> 83 - * , 84 - */ 85 77 start_str = str; 86 - d = strchr(start_str, ','); 78 + d = strchr(start_str, ch); 87 79 if (d) { 88 80 *d = '\0'; 89 81 ++d; 90 82 } 91 83 end_str = d; 92 84 85 + *start = start_str; 86 + *end = end_str; 87 + 88 + return 0; 89 + } 90 + 91 + int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) 92 + { 93 + char *start_str = NULL, *end_str; 94 + int rc; 95 + 96 + rc = split_start_end(&start_str, &end_str, ostr, ','); 97 + if (rc || !start_str) 98 + return rc; 99 + 100 + ptime->start = 0; 101 + ptime->end = 0; 102 + 93 103 rc = parse_timestr_sec_nsec(ptime, start_str, end_str); 94 104 95 - free(str); 105 + free(start_str); 96 106 97 107 /* make sure end time is after start time if it was given */ 98 108 if (rc == 0 && ptime->end && ptime->end < ptime->start) ··· 112 102 pr_debug("end time %" PRIu64 "\n", ptime->end); 113 103 114 104 return rc; 105 + } 106 + 107 + static int parse_percent(double *pcnt, char *str) 108 + { 109 + char *c; 110 + 111 + c = strchr(str, '%'); 112 + if (c) 113 + *c = '\0'; 114 + else 115 + return -1; 116 + 117 + *pcnt = atof(str) / 100.0; 118 + 119 + return 0; 120 + } 121 + 122 + static int percent_slash_split(char *str, struct perf_time_interval *ptime, 123 + u64 start, u64 end) 124 + { 125 + char *p, *end_str; 126 + double pcnt, start_pcnt, end_pcnt; 127 + u64 total = end - start; 128 + int i; 129 + 130 + /* 131 + * Example: 132 + * 10%/2: select the second 10% slice and the third 10% slice 133 + */ 134 + 135 + /* We can modify this string since the original one is copied */ 136 + p = strchr(str, '/'); 137 + if (!p) 138 + return -1; 139 + 140 + *p = '\0'; 141 + if (parse_percent(&pcnt, str) < 0) 142 + return -1; 143 + 144 + p++; 145 + i = (int)strtol(p, &end_str, 10); 146 + if (*end_str) 147 + return -1; 148 + 149 + if (pcnt <= 0.0) 150 + return -1; 151 + 152 + start_pcnt = pcnt * (i - 1); 153 + end_pcnt = pcnt * i; 154 + 155 + if (start_pcnt < 0.0 || start_pcnt > 1.0 || 156 + end_pcnt < 0.0 || end_pcnt > 1.0) { 157 + return -1; 158 + } 159 + 160 + ptime->start = start + round(start_pcnt * total); 161 + ptime->end = start + round(end_pcnt * total); 162 + 163 + return 0; 164 + } 165 + 166 + static int percent_dash_split(char *str, struct perf_time_interval *ptime, 167 + u64 start, u64 end) 168 + { 169 + char *start_str = NULL, *end_str; 170 + double start_pcnt, end_pcnt; 171 + u64 total = end - start; 172 + int ret; 173 + 174 + /* 175 + * Example: 0%-10% 176 + */ 177 + 178 + ret = split_start_end(&start_str, &end_str, str, '-'); 179 + if (ret || !start_str) 180 + return ret; 181 + 182 + if ((parse_percent(&start_pcnt, start_str) != 0) || 183 + (parse_percent(&end_pcnt, end_str) != 0)) { 184 + free(start_str); 185 + return -1; 186 + } 187 + 188 + free(start_str); 189 + 190 + if (start_pcnt < 0.0 || start_pcnt > 1.0 || 191 + end_pcnt < 0.0 || end_pcnt > 1.0 || 192 + start_pcnt > end_pcnt) { 193 + return -1; 194 + } 195 + 196 + ptime->start = start + round(start_pcnt * total); 197 + ptime->end = start + round(end_pcnt * total); 198 + 199 + return 0; 200 + } 201 + 202 + typedef int (*time_pecent_split)(char *, struct perf_time_interval *, 203 + u64 start, u64 end); 204 + 205 + static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, 206 + const char *ostr, u64 start, u64 end, 207 + time_pecent_split func) 208 + { 209 + char *str, *p1, *p2; 210 + int len, ret, i = 0; 211 + 212 + str = strdup(ostr); 213 + if (str == NULL) 214 + return -ENOMEM; 215 + 216 + len = strlen(str); 217 + p1 = str; 218 + 219 + while (p1 < str + len) { 220 + if (i >= num) { 221 + free(str); 222 + return -1; 223 + } 224 + 225 + p2 = strchr(p1, ','); 226 + if (p2) 227 + *p2 = '\0'; 228 + 229 + ret = (func)(p1, &ptime_buf[i], start, end); 230 + if (ret < 0) { 231 + free(str); 232 + return -1; 233 + } 234 + 235 + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); 236 + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); 237 + 238 + i++; 239 + 240 + if (p2) 241 + p1 = p2 + 1; 242 + else 243 + break; 244 + } 245 + 246 + free(str); 247 + return i; 248 + } 249 + 250 + int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, 251 + const char *ostr, u64 start, u64 end) 252 + { 253 + char *c; 254 + 255 + /* 256 + * ostr example: 257 + * 10%/2,10%/3: select the second 10% slice and the third 10% slice 258 + * 0%-10%,30%-40%: multiple time range 259 + */ 260 + 261 + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); 262 + 263 + c = strchr(ostr, '/'); 264 + if (c) { 265 + return percent_comma_split(ptime_buf, num, ostr, start, 266 + end, percent_slash_split); 267 + } 268 + 269 + c = strchr(ostr, '-'); 270 + if (c) { 271 + return percent_comma_split(ptime_buf, num, ostr, start, 272 + end, percent_dash_split); 273 + } 274 + 275 + return -1; 115 276 } 116 277 117 278 bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) ··· 298 117 } 299 118 300 119 return false; 120 + } 121 + 122 + bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, 123 + int num, u64 timestamp) 124 + { 125 + struct perf_time_interval *ptime; 126 + int i; 127 + 128 + if ((timestamp == 0) || (num == 0)) 129 + return false; 130 + 131 + if (num == 1) 132 + return perf_time__skip_sample(&ptime_buf[0], timestamp); 133 + 134 + /* 135 + * start/end of multiple time ranges must be valid. 136 + */ 137 + for (i = 0; i < num; i++) { 138 + ptime = &ptime_buf[i]; 139 + 140 + if (timestamp >= ptime->start && 141 + ((timestamp < ptime->end && i < num - 1) || 142 + (timestamp <= ptime->end && i == num - 1))) { 143 + break; 144 + } 145 + } 146 + 147 + return (i == num) ? true : false; 301 148 } 302 149 303 150 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+6
tools/perf/util/time-utils.h
··· 13 13 14 14 int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); 15 15 16 + int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, 17 + const char *ostr, u64 start, u64 end); 18 + 16 19 bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); 20 + 21 + bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, 22 + int num, u64 timestamp); 17 23 18 24 int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); 19 25
+1
tools/perf/util/tool.h
··· 76 76 bool ordered_events; 77 77 bool ordering_requires_timestamps; 78 78 bool namespace_events; 79 + bool no_warn; 79 80 enum show_feature_header show_feat_hdr; 80 81 }; 81 82