Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-20161024' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Dynamicly change verbosity level by pressing 'V' in the 'perf top/report'
hists TUI browser (Alexis Berlemont)

- Implement 'perf trace --delay' in the same fashion as in 'perf record --delay',
to skip sampling workload initialization events (Alexis Berlemont)

- Make vendor named events case insensitive in 'perf list', i.e.
'perf list LONGEST_LAT' works just the same as 'perf list longest_lat' (Andi Kleen)

- Show instruction bytes and lenght in 'perf script' for Intel PT and BTS (Andi Kleen, Adrian Hunter)

E.g:

% perf record -e intel_pt// foo
% perf script --itrace=i0ns -F ip,insn,insnlen
ffffffff8101232f ilen: 5 insn: 0f 1f 44 00 00
ffffffff81012334 ilen: 1 insn: 5b
ffffffff81012335 ilen: 1 insn: 5d
ffffffff81012336 ilen: 1 insn: c3
ffffffff810123e3 ilen: 1 insn: 5b
ffffffff810123e4 ilen: 2 insn: 41 5c
ffffffff810123e6 ilen: 1 insn: 5d
ffffffff810123e7 ilen: 1 insn: c3
ffffffff810124a6 ilen: 2 insn: 31 c0
ffffffff810124a8 ilen: 9 insn: 41 83 bc 24 a8 01 00 00 01
ffffffff810124b1 ilen: 2 insn: 75 87

- Allow enabling the perf_event_attr.branch_type attribute member: (Andi Kleen)

perf record -e sched:sched_switch,cpu/cpu-cycles,branch_type=any/ ...

- Add unwinding support for jitdump (Stefano Sanfilippo)

Fixes:

- Use raw_syscall:sys_enter timestamp in 'perf trace' (Arnaldo Carvalho de Melo)

Infrastructure:

- Allow jitdump to be built without libdwarf (Maciej Debski)

- Sync x86's syscall table tools/ copy (Arnaldo Carvalho de Melo)

- Fixes to avoid calling die() in library fuctions already propagating other
errors (Arnaldo Carvalho de Melo)

- Improvements to allow libtraceevent to be properly installed in distro
packages (Jiri Olsa)

- Removing coresight miscellaneous debug output (Mathieu Poirier)

- Cache align the 'perf bench futex' worker struct (Sebastian Andrzej Siewior)

Documentation:

- Minor improvements on the documentation of event parameters (Andi Kleen)

- Add jitdump format specification document (Stephane Eranian)

Spelling fixes:

- Fix typo "No enough" to "Not enough" (Alexander Alemayhu)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+836 -279
+1
tools/include/asm-generic/bitops.h
··· 13 13 */ 14 14 15 15 #include <asm-generic/bitops/__ffs.h> 16 + #include <asm-generic/bitops/__ffz.h> 16 17 #include <asm-generic/bitops/fls.h> 17 18 #include <asm-generic/bitops/__fls.h> 18 19 #include <asm-generic/bitops/fls64.h>
+12
tools/include/asm-generic/bitops/__ffz.h
··· 1 + #ifndef _ASM_GENERIC_BITOPS_FFZ_H_ 2 + #define _ASM_GENERIC_BITOPS_FFZ_H_ 3 + 4 + /* 5 + * ffz - find first zero in word. 6 + * @word: The word to search 7 + * 8 + * Undefined if no zero exists, so code should check against ~0UL first. 9 + */ 10 + #define ffz(x) __ffs(~(x)) 11 + 12 + #endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
+28
tools/include/asm-generic/bitops/find.h
··· 15 15 size, unsigned long offset); 16 16 #endif 17 17 18 + #ifndef find_next_zero_bit 19 + 20 + /** 21 + * find_next_zero_bit - find the next cleared bit in a memory region 22 + * @addr: The address to base the search on 23 + * @offset: The bitnumber to start searching at 24 + * @size: The bitmap size in bits 25 + * 26 + * Returns the bit number of the next zero bit 27 + * If no bits are zero, returns @size. 28 + */ 29 + unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, 30 + unsigned long offset); 31 + #endif 32 + 18 33 #ifndef find_first_bit 19 34 20 35 /** ··· 44 29 unsigned long size); 45 30 46 31 #endif /* find_first_bit */ 32 + 33 + #ifndef find_first_zero_bit 34 + 35 + /** 36 + * find_first_zero_bit - find the first cleared bit in a memory region 37 + * @addr: The address to start the search at 38 + * @size: The maximum number of bits to search 39 + * 40 + * Returns the bit number of the first cleared bit. 41 + * If no bits are zero, returns @size. 42 + */ 43 + unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); 44 + #endif 47 45 48 46 #endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
+5
tools/include/linux/bitops.h
··· 39 39 (bit) < (size); \ 40 40 (bit) = find_next_bit((addr), (size), (bit) + 1)) 41 41 42 + #define for_each_clear_bit(bit, addr, size) \ 43 + for ((bit) = find_first_zero_bit((addr), (size)); \ 44 + (bit) < (size); \ 45 + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) 46 + 42 47 /* same as for_each_set_bit() but use bit as value to start with */ 43 48 #define for_each_set_bit_from(bit, addr, size) \ 44 49 for ((bit) = find_next_bit((addr), (size), (bit)); \
+25
tools/lib/find_bit.c
··· 82 82 return size; 83 83 } 84 84 #endif 85 + 86 + #ifndef find_first_zero_bit 87 + /* 88 + * Find the first cleared bit in a memory region. 89 + */ 90 + unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) 91 + { 92 + unsigned long idx; 93 + 94 + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { 95 + if (addr[idx] != ~0UL) 96 + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); 97 + } 98 + 99 + return size; 100 + } 101 + #endif 102 + 103 + #ifndef find_next_zero_bit 104 + unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, 105 + unsigned long offset) 106 + { 107 + return _find_next_bit(addr, size, offset, ~0UL); 108 + } 109 + #endif
+27 -13
tools/lib/traceevent/Makefile
··· 99 99 libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) 100 100 plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) 101 101 102 - LIB_FILE = libtraceevent.a libtraceevent.so 103 - 104 102 CONFIG_INCLUDES = 105 103 CONFIG_LIBS = 106 104 CONFIG_FLAGS = ··· 111 113 N = 112 114 113 115 EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) 116 + 117 + LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) 118 + LIB_INSTALL = libtraceevent.a libtraceevent.so* 114 119 115 120 INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) 116 121 ··· 157 156 PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) 158 157 PLUGINS_IN := $(PLUGINS:.so=-in.o) 159 158 160 - TE_IN := $(OUTPUT)libtraceevent-in.o 161 - LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) 159 + TE_IN := $(OUTPUT)libtraceevent-in.o 160 + LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) 162 161 DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list 163 162 164 - CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE) 163 + CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE) 165 164 166 165 TARGETS = $(CMD_TARGETS) 167 166 ··· 172 171 $(TE_IN): force 173 172 $(Q)$(MAKE) $(build)=libtraceevent 174 173 175 - $(OUTPUT)libtraceevent.so: $(TE_IN) 176 - $(QUIET_LINK)$(CC) --shared $^ -o $@ 174 + $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) 175 + $(QUIET_LINK)$(CC) --shared $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ 176 + @ln -sf $(@F) $(OUTPUT)libtraceevent.so 177 + @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) 177 178 178 179 $(OUTPUT)libtraceevent.a: $(TE_IN) 179 180 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ ··· 239 236 find . -name '*.[ch]' | xargs etags \ 240 237 --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' 241 238 239 + define do_install_mkdir 240 + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ 241 + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ 242 + fi 243 + endef 244 + 242 245 define do_install 243 - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ 244 - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ 245 - fi; \ 246 - $(INSTALL) $1 '$(DESTDIR_SQ)$2' 246 + $(call do_install_mkdir,$2); \ 247 + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' 247 248 endef 248 249 249 250 define do_install_plugins ··· 264 257 endef 265 258 266 259 install_lib: all_cmd install_plugins 267 - $(call QUIET_INSTALL, $(LIB_FILE)) \ 268 - $(call do_install,$(LIB_FILE),$(libdir_SQ)) 260 + $(call QUIET_INSTALL, $(LIB_TARGET)) \ 261 + $(call do_install_mkdir,$(libdir_SQ)); \ 262 + cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) 269 263 270 264 install_plugins: $(PLUGINS) 271 265 $(call QUIET_INSTALL, trace_plugins) \ 272 266 $(call do_install_plugins, $(PLUGINS)) 267 + 268 + install_headers: 269 + $(call QUIET_INSTALL, headers) \ 270 + $(call do_install,event-parse.h,$(prefix)/include/traceevent,644); \ 271 + $(call do_install,event-utils.h,$(prefix)/include/traceevent,644); \ 272 + $(call do_install,kbuffer.h,$(prefix)/include/traceevent,644) 273 273 274 274 install: install_lib 275 275
+170
tools/perf/Documentation/jitdump-specification.txt
··· 1 + JITDUMP specification version 2 2 + Last Revised: 09/15/2016 3 + Author: Stephane Eranian <eranian@gmail.com> 4 + 5 + -------------------------------------------------------- 6 + | Revision | Date | Description | 7 + -------------------------------------------------------- 8 + | 1 | 09/07/2016 | Initial revision | 9 + -------------------------------------------------------- 10 + | 2 | 09/15/2016 | Add JIT_CODE_UNWINDING_INFO | 11 + -------------------------------------------------------- 12 + 13 + 14 + I/ Introduction 15 + 16 + 17 + This document describes the jitdump file format. The file is generated by Just-In-time compiler runtimes to save meta-data information about the generated code, such as address, size, and name of generated functions, the native code generated, the source line information. The data may then be used by performance tools, such as Linux perf to generate function and assembly level profiles. 18 + 19 + The format is not specific to any particular programming language. It can be extended as need be. 20 + 21 + The format of the file is binary. It is self-describing in terms of endianness and is portable across multiple processor architectures. 22 + 23 + 24 + II/ Overview of the format 25 + 26 + 27 + The format requires only sequential accesses, i.e., append only mode. The file starts with a fixed size file header describing the version of the specification, the endianness. 28 + 29 + The header is followed by a series of records, each starting with a fixed size header describing the type of record and its size. It is, itself, followed by the payload for the record. Records can have a variable size even for a given type. 30 + 31 + Each entry in the file is timestamped. All timestamps must use the same clock source. The CLOCK_MONOTONIC clock source is recommended. 32 + 33 + 34 + III/ Jitdump file header format 35 + 36 + Each jitdump file starts with a fixed size header containing the following fields in order: 37 + 38 + 39 + * uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file 40 + * uint32_t version : a 4-byte value representing the format version. It is currently set to 2 41 + * uint32_t total_size: size in bytes of file header 42 + * uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) 43 + * uint32_t pad1 : padding. Reserved for future use 44 + * uint32_t pid : JIT runtime process identification (OS specific) 45 + * uint64_t timestamp : timestamp of when the file was created 46 + * uint64_t flags : a bitmask of flags 47 + 48 + The flags currently defined are as follows: 49 + * bit 0: JITDUMP_FLAGS_ARCH_TIMESTAMP : set if the jitdump file is using an architecture-specific timestamp clock source. For instance, on x86, one could use TSC directly 50 + 51 + IV/ Record header 52 + 53 + The file header is immediately followed by records. Each record starts with a fixed size header describing the record that follows. 54 + 55 + The record header is specified in order as follows: 56 + * uint32_t id : a value identifying the record type (see below) 57 + * uint32_t total_size: the size in bytes of the record including the header. 58 + * uint64_t timestamp : a timestamp of when the record was created. 59 + 60 + The following record types are defined: 61 + * Value 0 : JIT_CODE_LOAD : record describing a jitted function 62 + * Value 1 : JIT_CODE_MOVE : record describing an already jitted function which is moved 63 + * Value 2 : JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function 64 + * Value 3 : JIT_CODE_CLOSE : record marking the end of the jit runtime (optional) 65 + * Value 4 : JIT_CODE_UNWINDING_INFO: record describing a function unwinding information 66 + 67 + The payload of the record must immediately follow the record header without padding. 68 + 69 + V/ JIT_CODE_LOAD record 70 + 71 + 72 + The record has the following fields following the fixed-size record header in order: 73 + * uint32_t pid: OS process id of the runtime generating the jitted code 74 + * uint32_t tid: OS thread identification of the runtime thread generating the jitted code 75 + * uint64_t vma: virtual address of jitted code start 76 + * uint64_t code_addr: code start address for the jitted code. By default vma = code_addr 77 + * uint64_t code_size: size in bytes of the generated jitted code 78 + * uint64_t code_index: unique identifier for the jitted code (see below) 79 + * char[n]: function name in ASCII including the null termination 80 + * native code: raw byte encoding of the jitted code 81 + 82 + The record header total_size field is inclusive of all components: 83 + * record header 84 + * fixed-sized fields 85 + * function name string, including termination 86 + * native code length 87 + * record specific variable data (e.g., array of data entries) 88 + 89 + The code_index is used to uniquely identify each jitted function. The index can be a monotonically increasing 64-bit value. Each time a function is jitted it gets a new number. This value is used in case the code for a function is moved and avoids having to issue another JIT_CODE_LOAD record. 90 + 91 + The format supports empty functions with no native code. 92 + 93 + 94 + VI/ JIT_CODE_MOVE record 95 + 96 + The record type is optional. 97 + 98 + The record has the following fields following the fixed-size record header in order: 99 + * uint32_t pid : OS process id of the runtime generating the jitted code 100 + * uint32_t tid : OS thread identification of the runtime thread generating the jitted code 101 + * uint64_t vma : new virtual address of jitted code start 102 + * uint64_t old_code_addr: previous code address for the same function 103 + * uint64_t new_code_addr: alternate new code started address for the jitted code. By default it should be equal to the vma address. 104 + * uint64_t code_size : size in bytes of the jitted code 105 + * uint64_t code_index : index referring to the JIT_CODE_LOAD code_index record of when the function was initially jitted 106 + 107 + 108 + The MOVE record can be used in case an already jitted function is simply moved by the runtime inside the code cache. 109 + 110 + The JIT_CODE_MOVE record cannot come before the JIT_CODE_LOAD record for the same function name. The function cannot have changed name, otherwise a new JIT_CODE_LOAD record must be emitted. 111 + 112 + The code size of the function cannot change. 113 + 114 + 115 + VII/ JIT_DEBUG_INFO record 116 + 117 + The record type is optional. 118 + 119 + The record contains source lines debug information, i.e., a way to map a code address back to a source line. This information may be used by the performance tool. 120 + 121 + The record has the following fields following the fixed-size record header in order: 122 + * uint64_t code_addr: address of function for which the debug information is generated 123 + * uint64_t nr_entry : number of debug entries for the function 124 + * debug_entry[n]: array of nr_entry debug entries for the function 125 + 126 + The debug_entry describes the source line information. It is defined as follows in order: 127 + * uint64_t code_addr: address of function for which the debug information is generated 128 + * uint32_t line : source file line number (starting at 1) 129 + * uint32_t discrim : column discriminator, 0 is default 130 + * char name[n] : source file name in ASCII, including null termination 131 + 132 + The debug_entry entries are saved in sequence but given that they have variable sizes due to the file name string, they cannot be indexed directly. 133 + They need to be walked sequentially. The next debug_entry is found at sizeof(debug_entry) + strlen(name) + 1. 134 + 135 + IMPORTANT: 136 + The JIT_CODE_DEBUG for a given function must always be generated BEFORE the JIT_CODE_LOAD for the function. This facilitates greatly the parser for the jitdump file. 137 + 138 + 139 + VIII/ JIT_CODE_CLOSE record 140 + 141 + 142 + The record type is optional. 143 + 144 + The record is used as a marker for the end of the jitted runtime. It can be replaced by the end of the file. 145 + 146 + The JIT_CODE_CLOSE record does not have any specific fields, the record header contains all the information needed. 147 + 148 + 149 + IX/ JIT_CODE_UNWINDING_INFO 150 + 151 + 152 + The record type is optional. 153 + 154 + The record is used to describe the unwinding information for a jitted function. 155 + 156 + The record has the following fields following the fixed-size record header in order: 157 + 158 + uint64_t unwind_data_size : the size in bytes of the unwinding data table at the end of the record 159 + uint64_t eh_frame_hdr_size : the size in bytes of the DWARF EH Frame Header at the start of the unwinding data table at the end of the record 160 + uint64_t mapped_size : the size of the unwinding data mapped in memory 161 + const char unwinding_data[n]: an array of unwinding data, consisting of the EH Frame Header, followed by the actual EH Frame 162 + 163 + 164 + The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html 165 + 166 + 167 + The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html 168 + 169 + 170 + NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.
+6 -3
tools/perf/Documentation/perf-record.txt
··· 45 45 param1 and param2 are defined as formats for the PMU in: 46 46 /sys/bus/event_source/devices/<pmu>/format/* 47 47 48 - There are also some params which are not defined in .../<pmu>/format/*. 48 + There are also some parameters which are not defined in .../<pmu>/format/*. 49 49 These params can be used to overload default config values per event. 50 - Here is a list of the params. 50 + Here are some common parameters: 51 51 - 'period': Set event sampling period 52 52 - 'freq': Set event sampling frequency 53 53 - 'time': Disable/enable time stamping. Acceptable values are 1 for ··· 57 57 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and 58 58 "no" for disable callgraph. 59 59 - 'stack-size': user stack size for dwarf mode 60 + 61 + See the linkperf:perf-list[1] man page for more parameters. 62 + 60 63 Note: If user explicitly sets options which conflict with the params, 61 - the value set by the params will be overridden. 64 + the value set by the parameters will be overridden. 62 65 63 66 Also not defined in .../<pmu>/format/* are PMU driver specific 64 67 configuration parameters. Any configuration parameter preceded by
+5 -1
tools/perf/Documentation/perf-script.txt
··· 117 117 Comma separated list of fields to print. Options are: 118 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 119 119 srcline, period, iregs, brstack, brstacksym, flags, bpf-output, 120 - callindent. Field list can be prepended with the type, trace, sw or hw, 120 + callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, 121 121 to indicate to which event type the field list applies. 122 122 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 123 123 ··· 180 180 The callindent field is synthesized and may have a value when 181 181 Instruction Trace decoding. For calls and returns, it will display the 182 182 name of the symbol indented with spaces to reflect the stack depth. 183 + 184 + When doing instruction trace decoding insn and insnlen give the 185 + instruction bytes and the instruction length of the current 186 + instruction. 183 187 184 188 Finally, a user may not set fields to none for all event types. 185 189 i.e., -F "" is not allowed.
+5
tools/perf/Documentation/perf-trace.txt
··· 39 39 Prefixing with ! shows all syscalls but the ones specified. You may 40 40 need to escape it. 41 41 42 + -D msecs:: 43 + --delay msecs:: 44 + After starting the program, wait msecs before measuring. This is useful to 45 + filter out the startup phase of the program, which is often very different. 46 + 42 47 -o:: 43 48 --output=:: 44 49 Output file name.
+1
tools/perf/MANIFEST
··· 51 51 tools/include/asm-generic/bitops/atomic.h 52 52 tools/include/asm-generic/bitops/const_hweight.h 53 53 tools/include/asm-generic/bitops/__ffs.h 54 + tools/include/asm-generic/bitops/__ffz.h 54 55 tools/include/asm-generic/bitops/__fls.h 55 56 tools/include/asm-generic/bitops/find.h 56 57 tools/include/asm-generic/bitops/fls64.h
+1 -1
tools/perf/Makefile.config
··· 366 366 endif 367 367 368 368 ifdef PERF_HAVE_JITDUMP 369 - ifndef NO_DWARF 369 + ifndef NO_LIBELF 370 370 $(call detected,CONFIG_JITDUMP) 371 371 CFLAGS += -DHAVE_JITDUMP 372 372 endif
-2
tools/perf/arch/arm/util/cs-etm.c
··· 575 575 snprintf(path, PATH_MAX, 576 576 "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name); 577 577 578 - printf("path: %s\n", path); 579 - 580 578 if (stat(path, &st) < 0) 581 579 return NULL; 582 580
+2 -2
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
··· 374 374 543 x32 io_setup compat_sys_io_setup 375 375 544 x32 io_submit compat_sys_io_submit 376 376 545 x32 execveat compat_sys_execveat/ptregs 377 - 534 x32 preadv2 compat_sys_preadv2 378 - 535 x32 pwritev2 compat_sys_pwritev2 377 + 546 x32 preadv2 compat_sys_preadv64v2 378 + 547 x32 pwritev2 compat_sys_pwritev64v2
+4 -1
tools/perf/bench/futex-hash.c
··· 39 39 static struct stats throughput_stats; 40 40 static pthread_cond_t thread_parent, thread_worker; 41 41 42 + #define SMP_CACHE_BYTES 256 43 + #define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES))) 44 + 42 45 struct worker { 43 46 int tid; 44 47 u_int32_t *futex; 45 48 pthread_t thread; 46 49 unsigned long ops; 47 - }; 50 + } __cacheline_aligned; 48 51 49 52 static const struct option options[] = { 50 53 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+30 -47
tools/perf/bench/mem-functions.c
··· 106 106 107 107 struct bench_mem_info { 108 108 const struct function *functions; 109 - u64 (*do_cycles)(const struct function *r, size_t size); 110 - double (*do_gettimeofday)(const struct function *r, size_t size); 109 + u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); 110 + double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); 111 111 const char *const *usage; 112 + bool alloc_src; 112 113 }; 113 114 114 115 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) ··· 117 116 const struct function *r = &info->functions[r_idx]; 118 117 double result_bps = 0.0; 119 118 u64 result_cycles = 0; 119 + void *src = NULL, *dst = zalloc(size); 120 120 121 121 printf("# function '%s' (%s)\n", r->name, r->desc); 122 + 123 + if (dst == NULL) 124 + goto out_alloc_failed; 125 + 126 + if (info->alloc_src) { 127 + src = zalloc(size); 128 + if (src == NULL) 129 + goto out_alloc_failed; 130 + } 122 131 123 132 if (bench_format == BENCH_FORMAT_DEFAULT) 124 133 printf("# Copying %s bytes ...\n\n", size_str); 125 134 126 135 if (use_cycles) { 127 - result_cycles = info->do_cycles(r, size); 136 + result_cycles = info->do_cycles(r, size, src, dst); 128 137 } else { 129 - result_bps = info->do_gettimeofday(r, size); 138 + result_bps = info->do_gettimeofday(r, size, src, dst); 130 139 } 131 140 132 141 switch (bench_format) { ··· 160 149 BUG_ON(1); 161 150 break; 162 151 } 152 + 153 + out_free: 154 + free(src); 155 + free(dst); 156 + return; 157 + out_alloc_failed: 158 + printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); 159 + goto out_free; 163 160 } 164 161 165 162 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) ··· 220 201 return 0; 221 202 } 222 203 223 - static void memcpy_alloc_mem(void **dst, void **src, size_t size) 224 - { 225 - *dst = zalloc(size); 226 - if (!*dst) 227 - die("memory allocation failed - maybe size is too large?\n"); 228 - 229 - *src = zalloc(size); 230 - if (!*src) 231 - die("memory allocation failed - maybe size is too large?\n"); 232 - 233 - /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 234 - memset(*src, 0, size); 235 - } 236 - 237 - static u64 do_memcpy_cycles(const struct function *r, size_t size) 204 + static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) 238 205 { 239 206 u64 cycle_start = 0ULL, cycle_end = 0ULL; 240 - void *src = NULL, *dst = NULL; 241 207 memcpy_t fn = r->fn.memcpy; 242 208 int i; 243 209 244 - memcpy_alloc_mem(&dst, &src, size); 210 + /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */ 211 + memset(src, 0, size); 245 212 246 213 /* 247 214 * We prefault the freshly allocated memory range here, ··· 240 235 fn(dst, src, size); 241 236 cycle_end = get_cycles(); 242 237 243 - free(src); 244 - free(dst); 245 238 return cycle_end - cycle_start; 246 239 } 247 240 248 - static double do_memcpy_gettimeofday(const struct function *r, size_t size) 241 + static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) 249 242 { 250 243 struct timeval tv_start, tv_end, tv_diff; 251 244 memcpy_t fn = r->fn.memcpy; 252 - void *src = NULL, *dst = NULL; 253 245 int i; 254 - 255 - memcpy_alloc_mem(&dst, &src, size); 256 246 257 247 /* 258 248 * We prefault the freshly allocated memory range here, ··· 261 261 BUG_ON(gettimeofday(&tv_end, NULL)); 262 262 263 263 timersub(&tv_end, &tv_start, &tv_diff); 264 - 265 - free(src); 266 - free(dst); 267 264 268 265 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 269 266 } ··· 291 294 .do_cycles = do_memcpy_cycles, 292 295 .do_gettimeofday = do_memcpy_gettimeofday, 293 296 .usage = bench_mem_memcpy_usage, 297 + .alloc_src = true, 294 298 }; 295 299 296 300 return bench_mem_common(argc, argv, &info); 297 301 } 298 302 299 - static void memset_alloc_mem(void **dst, size_t size) 300 - { 301 - *dst = zalloc(size); 302 - if (!*dst) 303 - die("memory allocation failed - maybe size is too large?\n"); 304 - } 305 - 306 - static u64 do_memset_cycles(const struct function *r, size_t size) 303 + static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 307 304 { 308 305 u64 cycle_start = 0ULL, cycle_end = 0ULL; 309 306 memset_t fn = r->fn.memset; 310 - void *dst = NULL; 311 307 int i; 312 - 313 - memset_alloc_mem(&dst, size); 314 308 315 309 /* 316 310 * We prefault the freshly allocated memory range here, ··· 314 326 fn(dst, i, size); 315 327 cycle_end = get_cycles(); 316 328 317 - free(dst); 318 329 return cycle_end - cycle_start; 319 330 } 320 331 321 - static double do_memset_gettimeofday(const struct function *r, size_t size) 332 + static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) 322 333 { 323 334 struct timeval tv_start, tv_end, tv_diff; 324 335 memset_t fn = r->fn.memset; 325 - void *dst = NULL; 326 336 int i; 327 - 328 - memset_alloc_mem(&dst, size); 329 337 330 338 /* 331 339 * We prefault the freshly allocated memory range here, ··· 336 352 337 353 timersub(&tv_end, &tv_start, &tv_diff); 338 354 339 - free(dst); 340 355 return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); 341 356 } 342 357
+9 -3
tools/perf/builtin-report.c
··· 207 207 208 208 if (rep->show_threads) { 209 209 const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; 210 - perf_read_values_add_value(&rep->show_threads_values, 210 + int err = perf_read_values_add_value(&rep->show_threads_values, 211 211 event->read.pid, event->read.tid, 212 212 event->read.id, 213 213 name, 214 214 event->read.value); 215 + 216 + if (err) 217 + return err; 215 218 } 216 219 217 220 dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, ··· 542 539 } 543 540 } 544 541 545 - if (rep->show_threads) 546 - perf_read_values_init(&rep->show_threads_values); 542 + if (rep->show_threads) { 543 + ret = perf_read_values_init(&rep->show_threads_values); 544 + if (ret) 545 + return ret; 546 + } 547 547 548 548 ret = report__setup_sample_type(rep); 549 549 if (ret) {
+22 -2
tools/perf/builtin-script.c
··· 66 66 PERF_OUTPUT_WEIGHT = 1U << 18, 67 67 PERF_OUTPUT_BPF_OUTPUT = 1U << 19, 68 68 PERF_OUTPUT_CALLINDENT = 1U << 20, 69 + PERF_OUTPUT_INSN = 1U << 21, 70 + PERF_OUTPUT_INSNLEN = 1U << 22, 69 71 }; 70 72 71 73 struct output_option { ··· 95 93 {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, 96 94 {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, 97 95 {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, 96 + {.str = "insn", .field = PERF_OUTPUT_INSN}, 97 + {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, 98 98 }; 99 99 100 100 /* default set to maintain compatibility with current format */ ··· 628 624 printf("%*s", spacing - len, ""); 629 625 } 630 626 627 + static void print_insn(struct perf_sample *sample, 628 + struct perf_event_attr *attr) 629 + { 630 + if (PRINT_FIELD(INSNLEN)) 631 + printf(" ilen: %d", sample->insn_len); 632 + if (PRINT_FIELD(INSN)) { 633 + int i; 634 + 635 + printf(" insn:"); 636 + for (i = 0; i < sample->insn_len; i++) 637 + printf(" %02x", (unsigned char)sample->insn[i]); 638 + } 639 + } 640 + 631 641 static void print_sample_bts(struct perf_sample *sample, 632 642 struct perf_evsel *evsel, 633 643 struct thread *thread, ··· 685 667 686 668 if (print_srcline_last) 687 669 map__fprintf_srcline(al->map, al->addr, "\n ", stdout); 670 + 671 + print_insn(sample, attr); 688 672 689 673 printf("\n"); 690 674 } ··· 931 911 932 912 if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) 933 913 print_sample_bpf_output(sample); 934 - 914 + print_insn(sample, attr); 935 915 printf("\n"); 936 916 } 937 917 ··· 2144 2124 "Valid types: hw,sw,trace,raw. " 2145 2125 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 2146 2126 "addr,symoff,period,iregs,brstack,brstacksym,flags," 2147 - "bpf-output,callindent", parse_output_fields), 2127 + "bpf-output,callindent,insn,insnlen", parse_output_fields), 2148 2128 OPT_BOOLEAN('a', "all-cpus", &system_wide, 2149 2129 "system-wide collection from all CPUs"), 2150 2130 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+12 -7
tools/perf/builtin-trace.c
··· 843 843 */ 844 844 struct thread_trace { 845 845 u64 entry_time; 846 - u64 exit_time; 847 846 bool entry_pending; 848 847 unsigned long nr_events; 849 848 unsigned long pfmaj, pfmin; ··· 1451 1452 1452 1453 duration = sample->time - ttrace->entry_time; 1453 1454 1454 - printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); 1455 + printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); 1455 1456 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); 1456 1457 ttrace->entry_pending = false; 1457 1458 ··· 1498 1499 1499 1500 if (sc->is_exit) { 1500 1501 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { 1501 - trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); 1502 + trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); 1502 1503 fprintf(trace->output, "%-70s)\n", ttrace->entry_str); 1503 1504 } 1504 1505 } else { ··· 1570 1571 ++trace->stats.vfs_getname; 1571 1572 } 1572 1573 1573 - ttrace->exit_time = sample->time; 1574 - 1575 1574 if (ttrace->entry_time) { 1576 1575 duration = sample->time - ttrace->entry_time; 1577 1576 if (trace__filter_duration(trace, duration)) ··· 1589 1592 if (trace->summary_only) 1590 1593 goto out; 1591 1594 1592 - trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); 1595 + trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); 1593 1596 1594 1597 if (ttrace->entry_pending) { 1595 1598 fprintf(trace->output, "%-70s", ttrace->entry_str); ··· 2307 2310 if (err < 0) 2308 2311 goto out_error_mmap; 2309 2312 2310 - if (!target__none(&trace->opts.target)) 2313 + if (!target__none(&trace->opts.target) && !trace->opts.initial_delay) 2311 2314 perf_evlist__enable(evlist); 2312 2315 2313 2316 if (forks) 2314 2317 perf_evlist__start_workload(evlist); 2318 + 2319 + if (trace->opts.initial_delay) { 2320 + usleep(trace->opts.initial_delay * 1000); 2321 + perf_evlist__enable(evlist); 2322 + } 2315 2323 2316 2324 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || 2317 2325 evlist->threads->nr > 1 || ··· 2818 2816 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), 2819 2817 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, 2820 2818 "per thread proc mmap processing timeout in ms"), 2819 + OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, 2820 + "ms to wait before starting measurement after program " 2821 + "start"), 2821 2822 OPT_END() 2822 2823 }; 2823 2824 bool __maybe_unused max_stack_user_set = true;
+1 -37
tools/perf/jvmti/jvmti_agent.c
··· 44 44 static char jit_path[PATH_MAX]; 45 45 static void *marker_addr; 46 46 47 - /* 48 - * padding buffer 49 - */ 50 - static const char pad_bytes[7]; 51 - 52 47 static inline pid_t gettid(void) 53 48 { 54 49 return (pid_t)syscall(__NR_gettid); ··· 225 230 226 231 void *jvmti_open(void) 227 232 { 228 - int pad_cnt; 229 233 char dump_path[PATH_MAX]; 230 234 struct jitheader header; 231 235 int fd; ··· 282 288 header.total_size = sizeof(header); 283 289 header.pid = getpid(); 284 290 285 - /* calculate amount of padding '\0' */ 286 - pad_cnt = PADDING_8ALIGNED(header.total_size); 287 - header.total_size += pad_cnt; 288 - 289 291 header.timestamp = perf_get_timestamp(); 290 292 291 293 if (use_arch_timestamp) ··· 291 301 warn("jvmti: cannot write dumpfile header"); 292 302 goto error; 293 303 } 294 - 295 - /* write padding '\0' if necessary */ 296 - if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) { 297 - warn("jvmti: cannot write dumpfile header padding"); 298 - goto error; 299 - } 300 - 301 304 return fp; 302 305 error: 303 306 fclose(fp); ··· 332 349 static int code_generation = 1; 333 350 struct jr_code_load rec; 334 351 size_t sym_len; 335 - size_t padding_count; 336 352 FILE *fp = agent; 337 353 int ret = -1; 338 354 ··· 348 366 349 367 rec.p.id = JIT_CODE_LOAD; 350 368 rec.p.total_size = sizeof(rec) + sym_len; 351 - padding_count = PADDING_8ALIGNED(rec.p.total_size); 352 - rec.p. total_size += padding_count; 353 369 rec.p.timestamp = perf_get_timestamp(); 354 370 355 371 rec.code_size = size; ··· 373 393 ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); 374 394 fwrite_unlocked(sym, sym_len, 1, fp); 375 395 376 - if (padding_count) 377 - fwrite_unlocked(pad_bytes, padding_count, 1, fp); 378 - 379 396 if (code) 380 397 fwrite_unlocked(code, size, 1, fp); 381 398 ··· 389 412 { 390 413 struct jr_code_debug_info rec; 391 414 size_t sret, len, size, flen; 392 - size_t padding_count; 393 415 uint64_t addr; 394 416 const char *fn = file; 395 417 FILE *fp = agent; ··· 419 443 * int : line number 420 444 * int : column discriminator 421 445 * file[] : source file name 422 - * padding : pad to multiple of 8 bytes 423 446 */ 424 447 size += nr_lines * sizeof(struct debug_entry); 425 448 size += flen * nr_lines; 426 - /* 427 - * pad to 8 bytes 428 - */ 429 - padding_count = PADDING_8ALIGNED(size); 430 - 431 - rec.p.total_size = size + padding_count; 449 + rec.p.total_size = size; 432 450 433 451 /* 434 452 * If JVM is multi-threaded, nultiple concurrent calls to agent ··· 456 486 if (sret != 1) 457 487 goto error; 458 488 } 459 - if (padding_count) { 460 - sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); 461 - if (sret != 1) 462 - goto error; 463 - } 464 - 465 489 funlockfile(fp); 466 490 return 0; 467 491 error:
+29 -10
tools/perf/jvmti/libjvmti.c
··· 12 12 static int has_line_numbers; 13 13 void *jvmti_agent; 14 14 15 + static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret) 16 + { 17 + char *err_msg = NULL; 18 + jvmtiError err; 19 + err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg); 20 + if (err == JVMTI_ERROR_NONE) { 21 + warnx("%s failed with %s", msg, err_msg); 22 + (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg); 23 + } else { 24 + warnx("%s failed with an unknown error %d", msg, ret); 25 + } 26 + } 27 + 15 28 static jvmtiError 16 29 do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, 17 30 jvmti_line_info_t *tab, jint *nr) ··· 35 22 jvmtiError ret; 36 23 37 24 ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); 38 - if (ret != JVMTI_ERROR_NONE) 25 + if (ret != JVMTI_ERROR_NONE) { 26 + print_error(jvmti, "GetLineNumberTable", ret); 39 27 return ret; 28 + } 40 29 41 30 for (i = 0; i < nr_lines; i++) { 42 31 if (loc_tab[i].start_location < bci) { ··· 86 71 /* free what was allocated for nothing */ 87 72 (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); 88 73 nr_total += (int)nr; 74 + } else { 75 + print_error(jvmti, "GetLineNumberTable", ret); 89 76 } 90 77 } 91 78 } ··· 147 130 ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, 148 131 &decl_class); 149 132 if (ret != JVMTI_ERROR_NONE) { 150 - warnx("jvmti: cannot get declaring class"); 133 + print_error(jvmti, "GetMethodDeclaringClass", ret); 151 134 return; 152 135 } 153 136 ··· 161 144 162 145 ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); 163 146 if (ret != JVMTI_ERROR_NONE) { 164 - warnx("jvmti: cannot get source filename ret=%d", ret); 147 + print_error(jvmti, "GetSourceFileName", ret); 165 148 goto error; 166 149 } 167 150 168 151 ret = (*jvmti)->GetClassSignature(jvmti, decl_class, 169 152 &class_sign, NULL); 170 153 if (ret != JVMTI_ERROR_NONE) { 171 - warnx("jvmti: getclassignature failed"); 154 + print_error(jvmti, "GetClassSignature", ret); 172 155 goto error; 173 156 } 174 157 175 158 ret = (*jvmti)->GetMethodName(jvmti, method, &func_name, 176 159 &func_sign, NULL); 177 160 if (ret != JVMTI_ERROR_NONE) { 178 - warnx("jvmti: failed getmethodname"); 161 + print_error(jvmti, "GetMethodName", ret); 179 162 goto error; 180 163 } 181 164 ··· 270 253 271 254 ret = (*jvmti)->AddCapabilities(jvmti, &caps1); 272 255 if (ret != JVMTI_ERROR_NONE) { 273 - warnx("jvmti: acquire compiled_method capability failed"); 256 + print_error(jvmti, "AddCapabilities", ret); 274 257 return -1; 275 258 } 276 259 ret = (*jvmti)->GetJLocationFormat(jvmti, &format); ··· 281 264 ret = (*jvmti)->AddCapabilities(jvmti, &caps1); 282 265 if (ret == JVMTI_ERROR_NONE) 283 266 has_line_numbers = 1; 284 - } 267 + } else if (ret != JVMTI_ERROR_NONE) 268 + print_error(jvmti, "GetJLocationFormat", ret); 269 + 285 270 286 271 memset(&cb, 0, sizeof(cb)); 287 272 ··· 292 273 293 274 ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb)); 294 275 if (ret != JVMTI_ERROR_NONE) { 295 - warnx("jvmti: cannot set event callbacks"); 276 + print_error(jvmti, "SetEventCallbacks", ret); 296 277 return -1; 297 278 } 298 279 299 280 ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, 300 281 JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL); 301 282 if (ret != JVMTI_ERROR_NONE) { 302 - warnx("jvmti: setnotification failed for method_load"); 283 + print_error(jvmti, "SetEventNotificationMode(METHOD_LOAD)", ret); 303 284 return -1; 304 285 } 305 286 306 287 ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, 307 288 JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); 308 289 if (ret != JVMTI_ERROR_NONE) { 309 - warnx("jvmti: setnotification failed on code_generated"); 290 + print_error(jvmti, "SetEventNotificationMode(CODE_GENERATED)", ret); 310 291 return -1; 311 292 } 312 293 return 0;
+1 -1
tools/perf/tests/backward-ring-buffer.c
··· 97 97 98 98 evlist = perf_evlist__new(); 99 99 if (!evlist) { 100 - pr_debug("No enough memory to create evlist\n"); 100 + pr_debug("Not enough memory to create evlist\n"); 101 101 return TEST_FAIL; 102 102 } 103 103
+1 -1
tools/perf/tests/bpf.c
··· 125 125 /* Instead of perf_evlist__new_default, don't add default events */ 126 126 evlist = perf_evlist__new(); 127 127 if (!evlist) { 128 - pr_debug("No enough memory to create evlist\n"); 128 + pr_debug("Not enough memory to create evlist\n"); 129 129 return TEST_FAIL; 130 130 } 131 131
+4 -1
tools/perf/ui/browsers/hists.c
··· 2807 2807 do_zoom_dso(browser, actions); 2808 2808 continue; 2809 2809 case 'V': 2810 - browser->show_dso = !browser->show_dso; 2810 + verbose = (verbose + 1) % 4; 2811 + browser->show_dso = verbose > 0; 2812 + ui_helpline__fpush("Verbosity level set to %d\n", 2813 + verbose); 2811 2814 continue; 2812 2815 case 't': 2813 2816 actions->thread = thread;
+1 -1
tools/perf/util/Build
··· 120 120 ifdef CONFIG_JITDUMP 121 121 libperf-$(CONFIG_LIBELF) += jitdump.o 122 122 libperf-$(CONFIG_LIBELF) += genelf.o 123 - libperf-$(CONFIG_LIBELF) += genelf_debug.o 123 + libperf-$(CONFIG_DWARF) += genelf_debug.o 124 124 endif 125 125 126 126 CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+7 -7
tools/perf/util/bpf-loader.c
··· 241 241 int err = 0; 242 242 243 243 if (!text) { 244 - pr_debug("No enough memory: dup config_str failed\n"); 244 + pr_debug("Not enough memory: dup config_str failed\n"); 245 245 return ERR_PTR(-ENOMEM); 246 246 } 247 247 ··· 531 531 532 532 ptevs = malloc(array_sz); 533 533 if (!ptevs) { 534 - pr_debug("No enough memory: alloc ptevs failed\n"); 534 + pr_debug("Not enough memory: alloc ptevs failed\n"); 535 535 return -ENOMEM; 536 536 } 537 537 ··· 604 604 priv->need_prologue = true; 605 605 priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS); 606 606 if (!priv->insns_buf) { 607 - pr_debug("No enough memory: alloc insns_buf failed\n"); 607 + pr_debug("Not enough memory: alloc insns_buf failed\n"); 608 608 return -ENOMEM; 609 609 } 610 610 611 611 priv->type_mapping = malloc(sizeof(int) * pev->ntevs); 612 612 if (!priv->type_mapping) { 613 - pr_debug("No enough memory: alloc type_mapping failed\n"); 613 + pr_debug("Not enough memory: alloc type_mapping failed\n"); 614 614 return -ENOMEM; 615 615 } 616 616 memset(priv->type_mapping, -1, ··· 864 864 865 865 op->k.array.ranges = memdup(term->array.ranges, memsz); 866 866 if (!op->k.array.ranges) { 867 - pr_debug("No enough memory to alloc indices for map\n"); 867 + pr_debug("Not enough memory to alloc indices for map\n"); 868 868 return -ENOMEM; 869 869 } 870 870 op->key_type = BPF_MAP_KEY_RANGES; ··· 929 929 930 930 newpriv = zalloc(sizeof(*newpriv)); 931 931 if (!newpriv) { 932 - pr_debug("No enough memory to alloc map private\n"); 932 + pr_debug("Not enough memory to alloc map private\n"); 933 933 return NULL; 934 934 } 935 935 INIT_LIST_HEAD(&newpriv->ops_list); ··· 960 960 if (!priv) { 961 961 priv = zalloc(sizeof(*priv)); 962 962 if (!priv) { 963 - pr_debug("No enough memory to alloc map private\n"); 963 + pr_debug("Not enough memory to alloc map private\n"); 964 964 return -ENOMEM; 965 965 } 966 966 INIT_LIST_HEAD(&priv->ops_list);
+3
tools/perf/util/event.h
··· 177 177 PERF_IP_FLAG_TRACE_BEGIN |\ 178 178 PERF_IP_FLAG_TRACE_END) 179 179 180 + #define MAX_INSN 16 181 + 180 182 struct perf_sample { 181 183 u64 ip; 182 184 u32 pid, tid; ··· 195 193 u32 flags; 196 194 u16 insn_len; 197 195 u8 cpumode; 196 + char insn[MAX_INSN]; 198 197 void *raw_data; 199 198 struct ip_callchain *callchain; 200 199 struct branch_stack *branch_stack;
+9
tools/perf/util/evsel.c
··· 28 28 #include "debug.h" 29 29 #include "trace-event.h" 30 30 #include "stat.h" 31 + #include "util/parse-branch-options.h" 31 32 32 33 static struct { 33 34 bool sample_id_all; ··· 708 707 break; 709 708 case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: 710 709 callgraph_buf = term->val.callgraph; 710 + break; 711 + case PERF_EVSEL__CONFIG_TERM_BRANCH: 712 + if (term->val.branch && strcmp(term->val.branch, "no")) { 713 + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); 714 + parse_branch_str(term->val.branch, 715 + &attr->branch_sample_type); 716 + } else 717 + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 711 718 break; 712 719 case PERF_EVSEL__CONFIG_TERM_STACK_USER: 713 720 dump_size = term->val.stack_user;
+2
tools/perf/util/evsel.h
··· 47 47 PERF_EVSEL__CONFIG_TERM_MAX_STACK, 48 48 PERF_EVSEL__CONFIG_TERM_OVERWRITE, 49 49 PERF_EVSEL__CONFIG_TERM_DRV_CFG, 50 + PERF_EVSEL__CONFIG_TERM_BRANCH, 50 51 PERF_EVSEL__CONFIG_TERM_MAX, 51 52 }; 52 53 ··· 64 63 int max_stack; 65 64 bool inherit; 66 65 bool overwrite; 66 + char *branch; 67 67 } val; 68 68 }; 69 69
+109 -4
tools/perf/util/genelf.c
··· 19 19 #include <limits.h> 20 20 #include <fcntl.h> 21 21 #include <err.h> 22 + #ifdef HAVE_DWARF_SUPPORT 22 23 #include <dwarf.h> 24 + #endif 23 25 24 26 #include "perf.h" 25 27 #include "genelf.h" 26 28 #include "../util/jitdump.h" 29 + 30 + #ifndef NT_GNU_BUILD_ID 31 + #define NT_GNU_BUILD_ID 3 32 + #endif 27 33 28 34 #define JVMTI 29 35 ··· 73 67 '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ 74 68 '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ 75 69 '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ 70 + '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */ 71 + '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */ 76 72 }; 77 73 78 74 static struct buildid_note { ··· 155 147 } 156 148 #endif 157 149 150 + static int 151 + jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, 152 + uint64_t unwinding_size, uint64_t base_offset) 153 + { 154 + Elf_Data *d; 155 + Elf_Scn *scn; 156 + Elf_Shdr *shdr; 157 + uint64_t unwinding_table_size = unwinding_size - unwinding_header_size; 158 + 159 + /* 160 + * setup eh_frame section 161 + */ 162 + scn = elf_newscn(e); 163 + if (!scn) { 164 + warnx("cannot create section"); 165 + return -1; 166 + } 167 + 168 + d = elf_newdata(scn); 169 + if (!d) { 170 + warnx("cannot get new data"); 171 + return -1; 172 + } 173 + 174 + d->d_align = 8; 175 + d->d_off = 0LL; 176 + d->d_buf = unwinding; 177 + d->d_type = ELF_T_BYTE; 178 + d->d_size = unwinding_table_size; 179 + d->d_version = EV_CURRENT; 180 + 181 + shdr = elf_getshdr(scn); 182 + if (!shdr) { 183 + warnx("cannot get section header"); 184 + return -1; 185 + } 186 + 187 + shdr->sh_name = 104; 188 + shdr->sh_type = SHT_PROGBITS; 189 + shdr->sh_addr = base_offset; 190 + shdr->sh_flags = SHF_ALLOC; 191 + shdr->sh_entsize = 0; 192 + 193 + /* 194 + * setup eh_frame_hdr section 195 + */ 196 + scn = elf_newscn(e); 197 + if (!scn) { 198 + warnx("cannot create section"); 199 + return -1; 200 + } 201 + 202 + d = elf_newdata(scn); 203 + if (!d) { 204 + warnx("cannot get new data"); 205 + return -1; 206 + } 207 + 208 + d->d_align = 4; 209 + d->d_off = 0LL; 210 + d->d_buf = unwinding + unwinding_table_size; 211 + d->d_type = ELF_T_BYTE; 212 + d->d_size = unwinding_header_size; 213 + d->d_version = EV_CURRENT; 214 + 215 + shdr = elf_getshdr(scn); 216 + if (!shdr) { 217 + warnx("cannot get section header"); 218 + return -1; 219 + } 220 + 221 + shdr->sh_name = 90; 222 + shdr->sh_type = SHT_PROGBITS; 223 + shdr->sh_addr = base_offset + unwinding_table_size; 224 + shdr->sh_flags = SHF_ALLOC; 225 + shdr->sh_entsize = 0; 226 + 227 + return 0; 228 + } 229 + 158 230 /* 159 231 * fd: file descriptor open for writing for the output file 160 232 * load_addr: code load address (could be zero, just used for buildid) ··· 245 157 int 246 158 jit_write_elf(int fd, uint64_t load_addr, const char *sym, 247 159 const void *code, int csize, 248 - void *debug, int nr_debug_entries) 160 + void *debug __maybe_unused, int nr_debug_entries __maybe_unused, 161 + void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size) 249 162 { 250 163 Elf *e; 251 164 Elf_Data *d; 252 165 Elf_Scn *scn; 253 166 Elf_Ehdr *ehdr; 254 167 Elf_Shdr *shdr; 168 + uint64_t eh_frame_base_offset; 255 169 char *strsym = NULL; 256 170 int symlen; 257 171 int retval = -1; ··· 284 194 ehdr->e_type = ET_DYN; 285 195 ehdr->e_entry = GEN_ELF_TEXT_OFFSET; 286 196 ehdr->e_version = EV_CURRENT; 287 - ehdr->e_shstrndx= 2; /* shdr index for section name */ 197 + ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */ 288 198 289 199 /* 290 200 * setup text section ··· 319 229 shdr->sh_addr = GEN_ELF_TEXT_OFFSET; 320 230 shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; 321 231 shdr->sh_entsize = 0; 232 + 233 + /* 234 + * Setup .eh_frame_hdr and .eh_frame 235 + */ 236 + if (unwinding) { 237 + eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize); 238 + retval = jit_add_eh_frame_info(e, unwinding, 239 + unwinding_header_size, unwinding_size, 240 + eh_frame_base_offset); 241 + if (retval) 242 + goto error; 243 + } 322 244 323 245 /* 324 246 * setup section headers string table ··· 400 298 shdr->sh_type = SHT_SYMTAB; 401 299 shdr->sh_flags = 0; 402 300 shdr->sh_entsize = sizeof(Elf_Sym); 403 - shdr->sh_link = 4; /* index of .strtab section */ 301 + shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */ 404 302 405 303 /* 406 304 * setup symbols string table ··· 488 386 shdr->sh_size = sizeof(bnote); 489 387 shdr->sh_entsize = 0; 490 388 389 + #ifdef HAVE_DWARF_SUPPORT 491 390 if (debug && nr_debug_entries) { 492 391 retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); 493 392 if (retval) 494 393 goto error; 495 - } else { 394 + } else 395 + #endif 396 + { 496 397 if (elf_update(e, ELF_C_WRITE) < 0) { 497 398 warnx("elf_update 4 failed"); 498 399 goto error;
+4 -1
tools/perf/util/genelf.h
··· 3 3 4 4 /* genelf.c */ 5 5 int jit_write_elf(int fd, uint64_t code_addr, const char *sym, 6 - const void *code, int csize, void *debug, int nr_debug_entries); 6 + const void *code, int csize, void *debug, int nr_debug_entries, 7 + void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); 8 + #ifdef HAVE_DWARF_SUPPORT 7 9 /* genelf_debug.c */ 8 10 int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); 11 + #endif 9 12 10 13 #if defined(__arm__) 11 14 #define GEN_ELF_ARCH EM_ARM
+18 -1
tools/perf/util/header.c
··· 2250 2250 struct header_print_data hd; 2251 2251 struct perf_header *header = &session->header; 2252 2252 int fd = perf_data_file__fd(session->file); 2253 + struct stat st; 2254 + int ret, bit; 2255 + 2253 2256 hd.fp = fp; 2254 2257 hd.full = full; 2255 2258 2259 + ret = fstat(fd, &st); 2260 + if (ret == -1) 2261 + return -1; 2262 + 2263 + fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); 2264 + 2256 2265 perf_header__process_sections(header, fd, &hd, 2257 2266 perf_file_section__fprintf_info); 2267 + 2268 + fprintf(fp, "# missing features: "); 2269 + for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) { 2270 + if (bit) 2271 + fprintf(fp, "%s ", feat_ops[bit].name); 2272 + } 2273 + 2274 + fprintf(fp, "\n"); 2258 2275 return 0; 2259 2276 } 2260 2277 ··· 2290 2273 2291 2274 err = feat_ops[type].write(fd, h, evlist); 2292 2275 if (err < 0) { 2293 - pr_debug("failed to write feature %d\n", type); 2276 + pr_debug("failed to write feature %s\n", feat_ops[type].name); 2294 2277 2295 2278 /* undo anything written */ 2296 2279 lseek(fd, (*p)->offset, SEEK_SET);
+4 -5
tools/perf/util/intel-bts.c
··· 295 295 sample.cpu = btsq->cpu; 296 296 sample.flags = btsq->sample_flags; 297 297 sample.insn_len = btsq->intel_pt_insn.length; 298 + memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); 298 299 299 300 if (bts->synth_opts.inject) { 300 301 event.sample.header.size = bts->branches_event_size; ··· 320 319 struct machine *machine = btsq->bts->machine; 321 320 struct thread *thread; 322 321 struct addr_location al; 323 - unsigned char buf[1024]; 324 - size_t bufsz; 322 + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; 325 323 ssize_t len; 326 324 int x86_64; 327 325 uint8_t cpumode; 328 326 int err = -1; 329 - 330 - bufsz = intel_pt_insn_max_size(); 331 327 332 328 if (machine__kernel_ip(machine, ip)) 333 329 cpumode = PERF_RECORD_MISC_KERNEL; ··· 339 341 if (!al.map || !al.map->dso) 340 342 goto out_put; 341 343 342 - len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); 344 + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, 345 + INTEL_PT_INSN_BUF_SZ); 343 346 if (len <= 0) 344 347 goto out_put; 345 348
+2
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 980 980 out_no_progress: 981 981 decoder->state.insn_op = intel_pt_insn->op; 982 982 decoder->state.insn_len = intel_pt_insn->length; 983 + memcpy(decoder->state.insn, intel_pt_insn->buf, 984 + INTEL_PT_INSN_BUF_SZ); 983 985 984 986 if (decoder->tx_flags & INTEL_PT_IN_TX) 985 987 decoder->state.flags |= INTEL_PT_IN_TX;
+1
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
··· 66 66 uint32_t flags; 67 67 enum intel_pt_insn_op insn_op; 68 68 int insn_len; 69 + char insn[INTEL_PT_INSN_BUF_SZ]; 69 70 }; 70 71 71 72 struct intel_pt_insn;
+6 -7
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
··· 27 27 28 28 #include "intel-pt-insn-decoder.h" 29 29 30 + #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN 31 + #error Instruction buffer size too small 32 + #endif 33 + 30 34 /* Based on branch_type() from perf_event_intel_lbr.c */ 31 35 static void intel_pt_insn_decoder(struct insn *insn, 32 36 struct intel_pt_insn *intel_pt_insn) ··· 170 166 if (!insn_complete(&insn) || insn.length > len) 171 167 return -1; 172 168 intel_pt_insn_decoder(&insn, intel_pt_insn); 173 - if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) 169 + if (insn.length < INTEL_PT_INSN_BUF_SZ) 174 170 memcpy(intel_pt_insn->buf, buf, insn.length); 175 171 else 176 - memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); 172 + memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ); 177 173 return 0; 178 174 } 179 175 ··· 213 209 break; 214 210 } 215 211 return 0; 216 - } 217 - 218 - size_t intel_pt_insn_max_size(void) 219 - { 220 - return MAX_INSN_SIZE; 221 212 } 222 213 223 214 int intel_pt_insn_type(enum intel_pt_insn_op op)
+2 -4
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
··· 20 20 #include <stdint.h> 21 21 22 22 #define INTEL_PT_INSN_DESC_MAX 32 23 - #define INTEL_PT_INSN_DBG_BUF_SZ 16 23 + #define INTEL_PT_INSN_BUF_SZ 16 24 24 25 25 enum intel_pt_insn_op { 26 26 INTEL_PT_OP_OTHER, ··· 47 47 enum intel_pt_insn_branch branch; 48 48 int length; 49 49 int32_t rel; 50 - unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; 50 + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; 51 51 }; 52 52 53 53 int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, ··· 57 57 58 58 int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, 59 59 size_t buf_len); 60 - 61 - size_t intel_pt_insn_max_size(void); 62 60 63 61 int intel_pt_insn_type(enum intel_pt_insn_op op); 64 62
+2 -2
tools/perf/util/intel-pt-decoder/intel-pt-log.c
··· 119 119 if (intel_pt_log_open()) 120 120 return; 121 121 122 - if (len > INTEL_PT_INSN_DBG_BUF_SZ) 123 - len = INTEL_PT_INSN_DBG_BUF_SZ; 122 + if (len > INTEL_PT_INSN_BUF_SZ) 123 + len = INTEL_PT_INSN_BUF_SZ; 124 124 intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); 125 125 if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) 126 126 fprintf(f, "%s\n", desc);
+14 -5
tools/perf/util/intel-pt.c
··· 143 143 u32 flags; 144 144 u16 insn_len; 145 145 u64 last_insn_cnt; 146 + char insn[INTEL_PT_INSN_BUF_SZ]; 146 147 }; 147 148 148 149 static void intel_pt_dump(struct intel_pt *pt __maybe_unused, ··· 316 315 enum intel_pt_insn_branch branch; 317 316 int length; 318 317 int32_t rel; 318 + char insn[INTEL_PT_INSN_BUF_SZ]; 319 319 }; 320 320 321 321 static int intel_pt_config_div(const char *var, const char *value, void *data) ··· 402 400 e->branch = intel_pt_insn->branch; 403 401 e->length = intel_pt_insn->length; 404 402 e->rel = intel_pt_insn->rel; 403 + memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); 405 404 406 405 err = auxtrace_cache__add(c, offset, &e->entry); 407 406 if (err) ··· 431 428 struct machine *machine = ptq->pt->machine; 432 429 struct thread *thread; 433 430 struct addr_location al; 434 - unsigned char buf[1024]; 435 - size_t bufsz; 431 + unsigned char buf[INTEL_PT_INSN_BUF_SZ]; 436 432 ssize_t len; 437 433 int x86_64; 438 434 u8 cpumode; ··· 439 437 u64 insn_cnt = 0; 440 438 bool one_map = true; 441 439 440 + intel_pt_insn->length = 0; 441 + 442 442 if (to_ip && *ip == to_ip) 443 443 goto out_no_cache; 444 - 445 - bufsz = intel_pt_insn_max_size(); 446 444 447 445 if (*ip >= ptq->pt->kernel_start) 448 446 cpumode = PERF_RECORD_MISC_KERNEL; ··· 480 478 intel_pt_insn->branch = e->branch; 481 479 intel_pt_insn->length = e->length; 482 480 intel_pt_insn->rel = e->rel; 481 + memcpy(intel_pt_insn->buf, e->insn, 482 + INTEL_PT_INSN_BUF_SZ); 483 483 intel_pt_log_insn_no_data(intel_pt_insn, *ip); 484 484 return 0; 485 485 } ··· 497 493 498 494 while (1) { 499 495 len = dso__data_read_offset(al.map->dso, machine, 500 - offset, buf, bufsz); 496 + offset, buf, 497 + INTEL_PT_INSN_BUF_SZ); 501 498 if (len <= 0) 502 499 return -EINVAL; 503 500 ··· 905 900 if (ptq->state->flags & INTEL_PT_IN_TX) 906 901 ptq->flags |= PERF_IP_FLAG_IN_TX; 907 902 ptq->insn_len = ptq->state->insn_len; 903 + memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); 908 904 } 909 905 } 910 906 ··· 1086 1080 sample.cpu = ptq->cpu; 1087 1081 sample.flags = ptq->flags; 1088 1082 sample.insn_len = ptq->insn_len; 1083 + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1089 1084 1090 1085 /* 1091 1086 * perf report cannot handle events without a branch stack when using ··· 1148 1141 sample.cpu = ptq->cpu; 1149 1142 sample.flags = ptq->flags; 1150 1143 sample.insn_len = ptq->insn_len; 1144 + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1151 1145 1152 1146 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1153 1147 ··· 1211 1203 sample.cpu = ptq->cpu; 1212 1204 sample.flags = ptq->flags; 1213 1205 sample.insn_len = ptq->insn_len; 1206 + memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1214 1207 1215 1208 if (pt->synth_opts.callchain) { 1216 1209 thread_stack__sample(ptq->thread, ptq->chain,
+72 -10
tools/perf/util/jitdump.c
··· 37 37 bool needs_bswap; /* handles cross-endianess */ 38 38 bool use_arch_timestamp; 39 39 void *debug_data; 40 + void *unwinding_data; 41 + uint64_t unwinding_size; 42 + uint64_t unwinding_mapped_size; 43 + uint64_t eh_frame_hdr_size; 40 44 size_t nr_debug_entries; 41 45 uint32_t code_load_count; 42 46 u64 bytes_written; ··· 72 68 const void *code, 73 69 int csize, 74 70 void *debug, 75 - int nr_debug_entries) 71 + int nr_debug_entries, 72 + void *unwinding, 73 + uint32_t unwinding_header_size, 74 + uint32_t unwinding_size) 76 75 { 77 76 int ret, fd; 78 77 ··· 88 81 return -1; 89 82 } 90 83 91 - ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); 84 + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries, 85 + unwinding, unwinding_header_size, unwinding_size); 92 86 93 87 close(fd); 94 88 ··· 179 171 header.pid, 180 172 header.elf_mach, 181 173 jd->use_arch_timestamp); 174 + 175 + if (header.version > JITHEADER_VERSION) { 176 + pr_err("wrong jitdump version %u, expected " STR(JITHEADER_VERSION), 177 + header.version); 178 + goto error; 179 + } 182 180 183 181 if (header.flags & JITDUMP_FLAGS_RESERVED) { 184 182 pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", ··· 277 263 return NULL; 278 264 279 265 if (id >= JIT_CODE_MAX) { 280 - pr_warning("next_entry: unknown prefix %d, skipping\n", id); 281 - return NULL; 266 + pr_warning("next_entry: unknown record type %d, skipping\n", id); 282 267 } 283 268 if (bs > jd->bufsize) { 284 269 void *n; ··· 309 296 } 310 297 } 311 298 break; 299 + case JIT_CODE_UNWINDING_INFO: 300 + if (jd->needs_bswap) { 301 + jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size); 302 + jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size); 303 + jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size); 304 + } 305 + break; 312 306 case JIT_CODE_CLOSE: 313 307 break; 314 308 case JIT_CODE_LOAD: ··· 342 322 break; 343 323 case JIT_CODE_MAX: 344 324 default: 345 - return NULL; 325 + /* skip unknown record (we have read them) */ 326 + break; 346 327 } 347 328 return jr; 348 329 } ··· 391 370 u16 idr_size; 392 371 const char *sym; 393 372 uint32_t count; 394 - int ret, csize; 373 + int ret, csize, usize; 395 374 pid_t pid, tid; 396 375 struct { 397 376 u32 pid, tid; ··· 401 380 pid = jr->load.pid; 402 381 tid = jr->load.tid; 403 382 csize = jr->load.code_size; 383 + usize = jd->unwinding_mapped_size; 404 384 addr = jr->load.code_addr; 405 385 sym = (void *)((unsigned long)jr + sizeof(jr->load)); 406 386 code = (unsigned long)jr + jr->load.p.total_size - csize; ··· 422 400 423 401 size = PERF_ALIGN(size, sizeof(u64)); 424 402 uaddr = (uintptr_t)code; 425 - ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); 403 + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, 404 + jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); 426 405 427 406 if (jd->debug_data && jd->nr_debug_entries) { 428 407 free(jd->debug_data); 429 408 jd->debug_data = NULL; 430 409 jd->nr_debug_entries = 0; 410 + } 411 + 412 + if (jd->unwinding_data && jd->eh_frame_hdr_size) { 413 + free(jd->unwinding_data); 414 + jd->unwinding_data = NULL; 415 + jd->eh_frame_hdr_size = 0; 416 + jd->unwinding_mapped_size = 0; 417 + jd->unwinding_size = 0; 431 418 } 432 419 433 420 if (ret) { ··· 453 422 454 423 event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; 455 424 event->mmap2.start = addr; 456 - event->mmap2.len = csize; 425 + event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize; 457 426 event->mmap2.pid = pid; 458 427 event->mmap2.tid = tid; 459 428 event->mmap2.ino = st.st_ino; ··· 504 473 char *filename; 505 474 size_t size; 506 475 struct stat st; 476 + int usize; 507 477 u16 idr_size; 508 478 int ret; 509 479 pid_t pid, tid; ··· 515 483 516 484 pid = jr->move.pid; 517 485 tid = jr->move.tid; 486 + usize = jd->unwinding_mapped_size; 518 487 idr_size = jd->machine->id_hdr_size; 519 488 520 489 /* ··· 544 511 (sizeof(event->mmap2.filename) - size) + idr_size); 545 512 event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; 546 513 event->mmap2.start = jr->move.new_code_addr; 547 - event->mmap2.len = jr->move.code_size; 514 + event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize 515 + : jr->move.code_size; 548 516 event->mmap2.pid = pid; 549 517 event->mmap2.tid = tid; 550 518 event->mmap2.ino = st.st_ino; ··· 612 578 } 613 579 614 580 static int 581 + jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr) 582 + { 583 + void *unwinding_data; 584 + uint32_t unwinding_data_size; 585 + 586 + if (!(jd && jr)) 587 + return -1; 588 + 589 + unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding); 590 + unwinding_data = malloc(unwinding_data_size); 591 + if (!unwinding_data) 592 + return -1; 593 + 594 + memcpy(unwinding_data, &jr->unwinding.unwinding_data, 595 + unwinding_data_size); 596 + 597 + jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size; 598 + jd->unwinding_size = jr->unwinding.unwinding_size; 599 + jd->unwinding_mapped_size = jr->unwinding.mapped_size; 600 + jd->unwinding_data = unwinding_data; 601 + 602 + return 0; 603 + } 604 + 605 + static int 615 606 jit_process_dump(struct jit_buf_desc *jd) 616 607 { 617 608 union jr_entry *jr; 618 - int ret; 609 + int ret = 0; 619 610 620 611 while ((jr = jit_get_next_entry(jd))) { 621 612 switch(jr->prefix.id) { ··· 652 593 break; 653 594 case JIT_CODE_DEBUG_INFO: 654 595 ret = jit_repipe_debug_info(jd, jr); 596 + break; 597 + case JIT_CODE_UNWINDING_INFO: 598 + ret = jit_repipe_unwinding_info(jd, jr); 655 599 break; 656 600 default: 657 601 ret = 0;
+12
tools/perf/util/jitdump.h
··· 19 19 #define JITHEADER_MAGIC_SW 0x4454694A 20 20 21 21 #define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) 22 + #define ALIGN_8(x) (((x) + 7) & (~7)) 22 23 23 24 #define JITHEADER_VERSION 1 24 25 ··· 49 48 JIT_CODE_MOVE = 1, 50 49 JIT_CODE_DEBUG_INFO = 2, 51 50 JIT_CODE_CLOSE = 3, 51 + JIT_CODE_UNWINDING_INFO = 4, 52 52 53 53 JIT_CODE_MAX, 54 54 }; ··· 103 101 struct debug_entry entries[0]; 104 102 }; 105 103 104 + struct jr_code_unwinding_info { 105 + struct jr_prefix p; 106 + 107 + uint64_t unwinding_size; 108 + uint64_t eh_frame_hdr_size; 109 + uint64_t mapped_size; 110 + const char unwinding_data[0]; 111 + }; 112 + 106 113 union jr_entry { 107 114 struct jr_code_debug_info info; 108 115 struct jr_code_close close; 109 116 struct jr_code_load load; 110 117 struct jr_code_move move; 111 118 struct jr_prefix prefix; 119 + struct jr_code_unwinding_info unwinding; 112 120 }; 113 121 114 122 static inline struct debug_entry *
+1 -1
tools/perf/util/llvm-utils.c
··· 339 339 char *p; 340 340 341 341 if (!obj_path) { 342 - pr_warning("WARNING: No enough memory, skip object dumping\n"); 342 + pr_warning("WARNING: Not enough memory, skip object dumping\n"); 343 343 return; 344 344 } 345 345
+12 -5
tools/perf/util/map.c
··· 682 682 continue; 683 683 684 684 if (verbose >= 2) { 685 - fputs("overlapping maps:\n", fp); 686 - map__fprintf(map, fp); 687 - map__fprintf(pos, fp); 685 + 686 + if (use_browser) { 687 + pr_warning("overlapping maps in %s " 688 + "(disable tui for more info)\n", 689 + map->dso->name); 690 + } else { 691 + fputs("overlapping maps:\n", fp); 692 + map__fprintf(map, fp); 693 + map__fprintf(pos, fp); 694 + } 688 695 } 689 696 690 697 rb_erase_init(&pos->rb_node, root); ··· 709 702 710 703 before->end = map->start; 711 704 __map_groups__insert(pos->groups, before); 712 - if (verbose >= 2) 705 + if (verbose >= 2 && !use_browser) 713 706 map__fprintf(before, fp); 714 707 map__put(before); 715 708 } ··· 724 717 725 718 after->start = map->end; 726 719 __map_groups__insert(pos->groups, after); 727 - if (verbose >= 2) 720 + if (verbose >= 2 && !use_browser) 728 721 map__fprintf(after, fp); 729 722 map__put(after); 730 723 }
+53 -44
tools/perf/util/parse-branch-options.c
··· 31 31 BRANCH_END 32 32 }; 33 33 34 - int 35 - parse_branch_stack(const struct option *opt, const char *str, int unset) 34 + int parse_branch_str(const char *str, __u64 *mode) 36 35 { 37 36 #define ONLY_PLM \ 38 37 (PERF_SAMPLE_BRANCH_USER |\ 39 38 PERF_SAMPLE_BRANCH_KERNEL |\ 40 39 PERF_SAMPLE_BRANCH_HV) 41 40 42 - uint64_t *mode = (uint64_t *)opt->value; 41 + int ret = 0; 42 + char *p, *s; 43 + char *os = NULL; 43 44 const struct branch_mode *br; 44 - char *s, *os = NULL, *p; 45 - int ret = -1; 45 + 46 + if (str == NULL) { 47 + *mode = PERF_SAMPLE_BRANCH_ANY; 48 + return 0; 49 + } 50 + 51 + /* because str is read-only */ 52 + s = os = strdup(str); 53 + if (!s) 54 + return -1; 55 + 56 + for (;;) { 57 + p = strchr(s, ','); 58 + if (p) 59 + *p = '\0'; 60 + 61 + for (br = branch_modes; br->name; br++) { 62 + if (!strcasecmp(s, br->name)) 63 + break; 64 + } 65 + if (!br->name) { 66 + ret = -1; 67 + ui__warning("unknown branch filter %s," 68 + " check man page\n", s); 69 + goto error; 70 + } 71 + 72 + *mode |= br->mode; 73 + 74 + if (!p) 75 + break; 76 + 77 + s = p + 1; 78 + } 79 + 80 + /* default to any branch */ 81 + if ((*mode & ~ONLY_PLM) == 0) { 82 + *mode = PERF_SAMPLE_BRANCH_ANY; 83 + } 84 + error: 85 + free(os); 86 + return ret; 87 + } 88 + 89 + int 90 + parse_branch_stack(const struct option *opt, const char *str, int unset) 91 + { 92 + __u64 *mode = (__u64 *)opt->value; 46 93 47 94 if (unset) 48 95 return 0; ··· 100 53 if (*mode) 101 54 return -1; 102 55 103 - /* str may be NULL in case no arg is passed to -b */ 104 - if (str) { 105 - /* because str is read-only */ 106 - s = os = strdup(str); 107 - if (!s) 108 - return -1; 109 - 110 - for (;;) { 111 - p = strchr(s, ','); 112 - if (p) 113 - *p = '\0'; 114 - 115 - for (br = branch_modes; br->name; br++) { 116 - if (!strcasecmp(s, br->name)) 117 - break; 118 - } 119 - if (!br->name) { 120 - ui__warning("unknown branch filter %s," 121 - " check man page\n", s); 122 - goto error; 123 - } 124 - 125 - *mode |= br->mode; 126 - 127 - if (!p) 128 - break; 129 - 130 - s = p + 1; 131 - } 132 - } 133 - ret = 0; 134 - 135 - /* default to any branch */ 136 - if ((*mode & ~ONLY_PLM) == 0) { 137 - *mode = PERF_SAMPLE_BRANCH_ANY; 138 - } 139 - error: 140 - free(os); 141 - return ret; 56 + return parse_branch_str(str, mode); 142 57 }
+2 -1
tools/perf/util/parse-branch-options.h
··· 1 1 #ifndef _PERF_PARSE_BRANCH_OPTIONS_H 2 2 #define _PERF_PARSE_BRANCH_OPTIONS_H 1 3 - struct option; 3 + #include <stdint.h> 4 4 int parse_branch_stack(const struct option *opt, const char *str, int unset); 5 + int parse_branch_str(const char *str, __u64 *mode); 5 6 #endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
+11 -4
tools/perf/util/parse-events.c
··· 22 22 #include "cpumap.h" 23 23 #include "probe-file.h" 24 24 #include "asm/bug.h" 25 + #include "util/parse-branch-options.h" 25 26 26 27 #define MAX_NAME_LEN 100 27 28 ··· 974 973 CHECK_TYPE_VAL(NUM); 975 974 break; 976 975 case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: 977 - /* 978 - * TODO uncomment when the field is available 979 - * attr->branch_sample_type = term->val.num; 980 - */ 976 + CHECK_TYPE_VAL(STR); 977 + if (strcmp(term->val.str, "no") && 978 + parse_branch_str(term->val.str, &attr->branch_sample_type)) { 979 + err->str = strdup("invalid branch sample type"); 980 + err->idx = term->err_val; 981 + return -EINVAL; 982 + } 981 983 break; 982 984 case PARSE_EVENTS__TERM_TYPE_TIME: 983 985 CHECK_TYPE_VAL(NUM); ··· 1122 1118 break; 1123 1119 case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: 1124 1120 ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); 1121 + break; 1122 + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: 1123 + ADD_CONFIG_TERM(BRANCH, branch, term->val.str); 1125 1124 break; 1126 1125 case PARSE_EVENTS__TERM_TYPE_STACKSIZE: 1127 1126 ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+7 -3
tools/perf/util/pmu.c
··· 504 504 struct pmu_events_map *map; 505 505 struct pmu_event *pe; 506 506 char *cpuid; 507 + static bool printed; 507 508 508 509 cpuid = getenv("PERF_CPUID"); 509 510 if (cpuid) ··· 514 513 if (!cpuid) 515 514 return; 516 515 517 - pr_debug("Using CPUID %s\n", cpuid); 516 + if (!printed) { 517 + pr_debug("Using CPUID %s\n", cpuid); 518 + printed = true; 519 + } 518 520 519 521 i = 0; 520 522 while (1) { ··· 1139 1135 bool is_cpu = !strcmp(pmu->name, "cpu"); 1140 1136 1141 1137 if (event_glob != NULL && 1142 - !(strglobmatch(name, event_glob) || 1143 - (!is_cpu && strglobmatch(alias->name, 1138 + !(strglobmatch_nocase(name, event_glob) || 1139 + (!is_cpu && strglobmatch_nocase(alias->name, 1144 1140 event_glob)))) 1145 1141 continue; 1146 1142
+1 -1
tools/perf/util/quote.c
··· 54 54 break; 55 55 ret = sq_quote_buf(dst, argv[i]); 56 56 if (maxlen && dst->len > maxlen) 57 - die("Too many or long arguments"); 57 + return -ENOSPC; 58 58 } 59 59 return ret; 60 60 }
-10
tools/perf/util/session.c
··· 2025 2025 void perf_session__fprintf_info(struct perf_session *session, FILE *fp, 2026 2026 bool full) 2027 2027 { 2028 - struct stat st; 2029 - int fd, ret; 2030 - 2031 2028 if (session == NULL || fp == NULL) 2032 2029 return; 2033 2030 2034 - fd = perf_data_file__fd(session->file); 2035 - 2036 - ret = fstat(fd, &st); 2037 - if (ret == -1) 2038 - return; 2039 - 2040 2031 fprintf(fp, "# ========\n"); 2041 - fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); 2042 2032 perf_header__fprintf_info(session, fp, full); 2043 2033 fprintf(fp, "# ========\n#\n"); 2044 2034 }
+16 -5
tools/perf/util/string.c
··· 193 193 } 194 194 195 195 /* Glob/lazy pattern matching */ 196 - static bool __match_glob(const char *str, const char *pat, bool ignore_space) 196 + static bool __match_glob(const char *str, const char *pat, bool ignore_space, 197 + bool case_ins) 197 198 { 198 199 while (*str && *pat && *pat != '*') { 199 200 if (ignore_space) { ··· 220 219 return false; 221 220 else if (*pat == '\\') /* Escaped char match as normal char */ 222 221 pat++; 223 - if (*str++ != *pat++) 222 + if (case_ins) { 223 + if (tolower(*str) != tolower(*pat)) 224 + return false; 225 + } else if (*str != *pat) 224 226 return false; 227 + str++; 228 + pat++; 225 229 } 226 230 /* Check wild card */ 227 231 if (*pat == '*') { ··· 235 229 if (!*pat) /* Tail wild card matches all */ 236 230 return true; 237 231 while (*str) 238 - if (__match_glob(str++, pat, ignore_space)) 232 + if (__match_glob(str++, pat, ignore_space, case_ins)) 239 233 return true; 240 234 } 241 235 return !*str && !*pat; ··· 255 249 */ 256 250 bool strglobmatch(const char *str, const char *pat) 257 251 { 258 - return __match_glob(str, pat, false); 252 + return __match_glob(str, pat, false, false); 253 + } 254 + 255 + bool strglobmatch_nocase(const char *str, const char *pat) 256 + { 257 + return __match_glob(str, pat, false, true); 259 258 } 260 259 261 260 /** ··· 273 262 */ 274 263 bool strlazymatch(const char *str, const char *pat) 275 264 { 276 - return __match_glob(str, pat, true); 265 + return __match_glob(str, pat, true, false); 277 266 } 278 267 279 268 /**
+2 -2
tools/perf/util/unwind-libunwind-local.c
··· 357 357 di.format = UNW_INFO_FORMAT_REMOTE_TABLE; 358 358 di.start_ip = map->start; 359 359 di.end_ip = map->end; 360 - di.u.rti.segbase = map->start + segbase; 361 - di.u.rti.table_data = map->start + table_data; 360 + di.u.rti.segbase = map->start + segbase - map->pgoff; 361 + di.u.rti.table_data = map->start + table_data - map->pgoff; 362 362 di.u.rti.table_len = fde_count * sizeof(struct table_entry) 363 363 / sizeof(unw_word_t); 364 364 ret = dwarf_search_unwind_table(as, ip, &di, pi,
+1
tools/perf/util/util.h
··· 222 222 char **argv_split(const char *str, int *argcp); 223 223 void argv_free(char **argv); 224 224 bool strglobmatch(const char *str, const char *pat); 225 + bool strglobmatch_nocase(const char *str, const char *pat); 225 226 bool strlazymatch(const char *str, const char *pat); 226 227 static inline bool strisglob(const char *str) 227 228 {
+59 -22
tools/perf/util/values.c
··· 2 2 3 3 #include "util.h" 4 4 #include "values.h" 5 + #include "debug.h" 5 6 6 - void perf_read_values_init(struct perf_read_values *values) 7 + int perf_read_values_init(struct perf_read_values *values) 7 8 { 8 9 values->threads_max = 16; 9 10 values->pid = malloc(values->threads_max * sizeof(*values->pid)); 10 11 values->tid = malloc(values->threads_max * sizeof(*values->tid)); 11 12 values->value = malloc(values->threads_max * sizeof(*values->value)); 12 - if (!values->pid || !values->tid || !values->value) 13 - die("failed to allocate read_values threads arrays"); 13 + if (!values->pid || !values->tid || !values->value) { 14 + pr_debug("failed to allocate read_values threads arrays"); 15 + goto out_free_pid; 16 + } 14 17 values->threads = 0; 15 18 16 19 values->counters_max = 16; ··· 21 18 * sizeof(*values->counterrawid)); 22 19 values->countername = malloc(values->counters_max 23 20 * sizeof(*values->countername)); 24 - if (!values->counterrawid || !values->countername) 25 - die("failed to allocate read_values counters arrays"); 21 + if (!values->counterrawid || !values->countername) { 22 + pr_debug("failed to allocate read_values counters arrays"); 23 + goto out_free_counter; 24 + } 26 25 values->counters = 0; 26 + 27 + return 0; 28 + 29 + out_free_counter: 30 + zfree(&values->counterrawid); 31 + zfree(&values->countername); 32 + out_free_pid: 33 + zfree(&values->pid); 34 + zfree(&values->tid); 35 + zfree(&values->value); 36 + return -ENOMEM; 27 37 } 28 38 29 39 void perf_read_values_destroy(struct perf_read_values *values) ··· 57 41 zfree(&values->countername); 58 42 } 59 43 60 - static void perf_read_values__enlarge_threads(struct perf_read_values *values) 44 + static int perf_read_values__enlarge_threads(struct perf_read_values *values) 61 45 { 62 - values->threads_max *= 2; 63 - values->pid = realloc(values->pid, 64 - values->threads_max * sizeof(*values->pid)); 65 - values->tid = realloc(values->tid, 66 - values->threads_max * sizeof(*values->tid)); 67 - values->value = realloc(values->value, 68 - values->threads_max * sizeof(*values->value)); 69 - if (!values->pid || !values->tid || !values->value) 70 - die("failed to enlarge read_values threads arrays"); 46 + int nthreads_max = values->threads_max * 2; 47 + void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)), 48 + *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)), 49 + *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value)); 50 + 51 + if (!npid || !ntid || !nvalue) 52 + goto out_err; 53 + 54 + values->threads_max = nthreads_max; 55 + values->pid = npid; 56 + values->tid = ntid; 57 + values->value = nvalue; 58 + return 0; 59 + out_err: 60 + free(npid); 61 + free(ntid); 62 + free(nvalue); 63 + pr_debug("failed to enlarge read_values threads arrays"); 64 + return -ENOMEM; 71 65 } 72 66 73 67 static int perf_read_values__findnew_thread(struct perf_read_values *values, ··· 89 63 if (values->pid[i] == pid && values->tid[i] == tid) 90 64 return i; 91 65 92 - if (values->threads == values->threads_max) 93 - perf_read_values__enlarge_threads(values); 66 + if (values->threads == values->threads_max) { 67 + i = perf_read_values__enlarge_threads(values); 68 + if (i < 0) 69 + return i; 70 + } 94 71 95 - i = values->threads++; 72 + i = values->threads + 1; 73 + values->value[i] = malloc(values->counters_max * sizeof(**values->value)); 74 + if (!values->value[i]) { 75 + pr_debug("failed to allocate read_values counters array"); 76 + return -ENOMEM; 77 + } 96 78 values->pid[i] = pid; 97 79 values->tid[i] = tid; 98 - values->value[i] = malloc(values->counters_max * sizeof(**values->value)); 99 - if (!values->value[i]) 100 - die("failed to allocate read_values counters array"); 80 + values->threads = i; 101 81 102 82 return i; 103 83 } ··· 147 115 return i; 148 116 } 149 117 150 - void perf_read_values_add_value(struct perf_read_values *values, 118 + int perf_read_values_add_value(struct perf_read_values *values, 151 119 u32 pid, u32 tid, 152 120 u64 rawid, const char *name, u64 value) 153 121 { 154 122 int tindex, cindex; 155 123 156 124 tindex = perf_read_values__findnew_thread(values, pid, tid); 125 + if (tindex < 0) 126 + return tindex; 157 127 cindex = perf_read_values__findnew_counter(values, rawid, name); 128 + if (cindex < 0) 129 + return cindex; 158 130 159 131 values->value[tindex][cindex] = value; 132 + return 0; 160 133 } 161 134 162 135 static void perf_read_values__display_pretty(FILE *fp,
+2 -2
tools/perf/util/values.h
··· 14 14 u64 **value; 15 15 }; 16 16 17 - void perf_read_values_init(struct perf_read_values *values); 17 + int perf_read_values_init(struct perf_read_values *values); 18 18 void perf_read_values_destroy(struct perf_read_values *values); 19 19 20 - void perf_read_values_add_value(struct perf_read_values *values, 20 + int perf_read_values_add_value(struct perf_read_values *values, 21 21 u32 pid, u32 tid, 22 22 u64 rawid, const char *name, u64 value); 23 23