Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-5.5-20191011' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf trace:

Arnaldo Carvalho de Melo:

- Reuse the strace-like syscall_arg_fmt->scnprintf() beautification routines
(convert integer arguments into strings, like open flags, etc) in tracepoint
arguments.

For now the type based scnprintf routines (pid_t, umode_t, etc) and the
ones based in well known arg name based ("fd", etc) gets associated with
tracepoint args of that type.

A tracepoint only arg, "msr", for the msr:{write,read}_msr gets added as
an initial step.

- Introduce syscall_arg_fmt->strtoul() methods to be the reverse operation
of ->scnprintf(), i.e. to go from a string to an integer.

- Implement --filter, just like in 'perf record', that affects the tracepoint
events specied thus far in the command line, use the ->strtoul() methods
to allow strings in tables associated with beautifiers to the integers
the in-kernel tracepoint (eBPF later) filters expect, e.g.:

# perf trace --max-events 1 -e sched:*ipi --filter="cpu==1 || cpu==2"
0.000 as/24630 sched:sched_wake_idle_without_ipi(cpu: 1)
#

# perf trace --max-events 1 --max-stack=32 -e msr:* --filter="msr==IA32_TSC_DEADLINE"
207.000 cc1/19963 msr:write_msr(msr: IA32_TSC_DEADLINE, val: 5442316760822)
do_trace_write_msr ([kernel.kallsyms])
do_trace_write_msr ([kernel.kallsyms])
lapic_next_deadline ([kernel.kallsyms])
clockevents_program_event ([kernel.kallsyms])
hrtimer_interrupt ([kernel.kallsyms])
smp_apic_timer_interrupt ([kernel.kallsyms])
apic_timer_interrupt ([kernel.kallsyms])
[0x6ff66c] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x7047c3] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x707708] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
execute_one_pass (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x4f3d37] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x4f3d49] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
execute_pass_list (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
cgraph_node::expand (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x2625b4] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
symbol_table::finalize_compilation_unit (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x5ae8b9] (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
toplev::main (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
main (/usr/lib/gcc-cross/alpha-linux-gnu/8/cc1)
[0x26b6a] (/usr/lib/x86_64-linux-gnu/libc-2.29.so)
#
# perf trace --max-events 8 -e msr:* --filter="msr==IA32_SPEC_CTRL"
0.000 :13281/13281 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6)
0.063 migration/3/25 msr:write_msr(msr: IA32_SPEC_CTRL)
0.217 kworker/u16:1-/4826 msr:write_msr(msr: IA32_SPEC_CTRL)
0.687 rcu_sched/11 msr:write_msr(msr: IA32_SPEC_CTRL)
0.696 :13280/13280 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6)
0.305 :13281/13281 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6)
0.355 :13274/13274 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6)
2.743 kworker/u16:0-/6711 msr:write_msr(msr: IA32_SPEC_CTRL)
#
# perf trace --max-events 8 --cpu 1 -e msr:* --filter="msr!=IA32_SPEC_CTRL && msr!=IA32_TSC_DEADLINE && msr != FS_BASE"
0.000 mtr-packet/30819 msr:write_msr(msr: 0x830, val: 68719479037)
0.096 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST)
238.925 mtr-packet/30819 msr:write_msr(msr: 0x830, val: 8589936893)
511.010 :0/0 msr:write_msr(msr: 0x830, val: 68719479037)
1005.052 :0/0 msr:read_msr(msr: IA32_TSC_ADJUST)
1235.131 CPU 0/KVM/3750 msr:write_msr(msr: 0x830, val: 4294969595)
1235.195 CPU 0/KVM/3750 msr:read_msr(msr: IA32_SYSENTER_ESP, val: -2199023037952)
1235.201 CPU 0/KVM/3750 msr:read_msr(msr: IA32_APICBASE, val: 4276096000)
#

- Default to not using libtraceevent and its plugins for beautifying
tracepoint arguments, since now we're reusing the strace-like beatufiers.
Use --libtraceevent_print (using just --libtrace is unambiguous and can
be used as a short hand) to go back to those beautifiers.

This will help in the transition, as can be seen in some of the sched tracepoints
that still need some work in the libbeauty based mode:

# trace --no-inherit -e msr:*,*sleep,sched:* sleep 1
0.000 ( ): sched:sched_waking(comm: "trace", pid: 3319 (trace), prio: 120, success: 1)
0.006 ( ): sched:sched_wakeup(comm: "trace", pid: 3319 (trace), prio: 120, success: 1)
0.348 ( ): sched:sched_process_exec(filename: 140212596720100, pid: 3319 (sleep), old_pid: 3319 (sleep))
0.490 ( ): msr:write_msr(msr: FS_BASE, val: 139631189321088)
0.670 ( ): nanosleep(rqtp: 0x7ffc52c23bc0) ...
0.674 ( ): sched:sched_stat_runtime(comm: "sleep", pid: 3319 (sleep), runtime: 659259, vruntime: 78942418342)
0.675 ( ): sched:sched_switch(prev_comm: "sleep", prev_pid: 3319 (sleep), prev_prio: 120, prev_state: 1, next_comm: "swapper/0", next_prio: 120)
1001.059 ( ): sched:sched_waking(comm: "sleep", pid: 3319 (sleep), prio: 120, success: 1)
1001.098 ( ): sched:sched_wakeup(comm: "sleep", pid: 3319 (sleep), prio: 120, success: 1)
0.670 (1000.504 ms): ... [continued]: nanosleep()) = 0
1001.456 ( ): sched:sched_process_exit(comm: "sleep", pid: 3319 (sleep), prio: 120)
# trace --libtrace --no-inherit -e msr:*,*sleep,sched:* sleep 1
# trace --libtrace --no-inherit -e msr:*,*sleep,sched:* sleep 1
0.000 ( ): sched:sched_waking(comm=trace pid=3323 prio=120 target_cpu=000)
0.007 ( ): sched:sched_wakeup(comm=trace pid=3323 prio=120 target_cpu=000)
0.382 ( ): sched:sched_process_exec(filename=/usr/bin/sleep pid=3323 old_pid=3323)
0.525 ( ): msr:write_msr(c0000100, value 7f5d508a0580)
0.713 ( ): nanosleep(rqtp: 0x7fff487fb4a0) ...
0.717 ( ): sched:sched_stat_runtime(comm=sleep pid=3323 runtime=617722 [ns] vruntime=78957731636 [ns])
0.719 ( ): sched:sched_switch(prev_comm=sleep prev_pid=3323 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120)
1001.117 ( ): sched:sched_waking(comm=sleep pid=3323 prio=120 target_cpu=000)
1001.157 ( ): sched:sched_wakeup(comm=sleep pid=3323 prio=120 target_cpu=000)
0.713 (1000.522 ms): ... [continued]: nanosleep()) = 0
1001.538 ( ): sched:sched_process_exit(comm=sleep pid=3323 prio=120)
#

- Make -v (verbose) mode be honoured for .perfconfig based trace.add_events,
to help in diagnosing problems with building eBPF events (-e source.c).

- When using eBPF syscall payload augmentation do not show strace-like
syscalls when all the user specified was some tracepoint event, bringing
the behaviour in line with that of when not using eBPF augmentation.

Intel PT:

exported-sql-viewer GUI:

Adrian Hunter:

- Add LookupModel, HBoxLayout, VBoxLayout, global time range calculations
so as to add a time chart by CPU.

perf script:

Andi Kleen:

- Allow --time (to specify a time span of interest) with --reltime

perf diff:

Jin Yao:

- Report noise for cycles diff, i.e. a histogram + stddev.
(timestamps relative to start).

perf annotate:

Arnaldo Carvalho de Melo:

- Initialize env->cpuid when running in live mode (perf top), as it
is used in some of the per arch annotation init routines.

samples bpf:

Björn Töpel:

- Fixup fallout of using tools/perf/perf-sys. from outside tools/perf.

Core:

Ian Rogers:

- Avoid 'sample_reg_masks' being const + weak, as this breaks with some
compilers that constant-propagate from the weak symbol.

libperf:

- First part of moving the perf_mmap class from tools/perf to libperf.

- Propagate CFLAGS to libperf from the tools/perf Makefile.

Vendor events:

John Garry:

- Add entry in MAINTAINERS with reviewers for the for perf tool arm64
pmu-events files.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+4272 -711
+7
MAINTAINERS
··· 12769 12769 F: arch/*/events/*/* 12770 12770 F: tools/perf/ 12771 12771 12772 + PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS 12773 + R: John Garry <john.garry@huawei.com> 12774 + R: Will Deacon <will@kernel.org> 12775 + L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 12776 + S: Supported 12777 + F: tools/perf/pmu-events/arch/arm64/ 12778 + 12772 12779 PERSONALITY HANDLING 12773 12780 M: Christoph Hellwig <hch@infradead.org> 12774 12781 L: linux-abi-devel@lists.sourceforge.net
+1
samples/bpf/Makefile
··· 176 176 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ 177 177 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include 178 178 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf 179 + KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0 179 180 180 181 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable 181 182
+857
tools/arch/x86/include/asm/msr-index.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_X86_MSR_INDEX_H 3 + #define _ASM_X86_MSR_INDEX_H 4 + 5 + #include <linux/bits.h> 6 + 7 + /* 8 + * CPU model specific register (MSR) numbers. 9 + * 10 + * Do not add new entries to this file unless the definitions are shared 11 + * between multiple compilation units. 12 + */ 13 + 14 + /* x86-64 specific MSRs */ 15 + #define MSR_EFER 0xc0000080 /* extended feature register */ 16 + #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ 17 + #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ 18 + #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ 19 + #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ 20 + #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ 21 + #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ 22 + #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ 23 + #define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ 24 + 25 + /* EFER bits: */ 26 + #define _EFER_SCE 0 /* SYSCALL/SYSRET */ 27 + #define _EFER_LME 8 /* Long mode enable */ 28 + #define _EFER_LMA 10 /* Long mode active (read-only) */ 29 + #define _EFER_NX 11 /* No execute enable */ 30 + #define _EFER_SVME 12 /* Enable virtualization */ 31 + #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ 32 + #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ 33 + 34 + #define EFER_SCE (1<<_EFER_SCE) 35 + #define EFER_LME (1<<_EFER_LME) 36 + #define EFER_LMA (1<<_EFER_LMA) 37 + #define EFER_NX (1<<_EFER_NX) 38 + #define EFER_SVME (1<<_EFER_SVME) 39 + #define EFER_LMSLE (1<<_EFER_LMSLE) 40 + #define EFER_FFXSR (1<<_EFER_FFXSR) 41 + 42 + /* Intel MSRs. Some also available on other CPUs */ 43 + 44 + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ 45 + #define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ 46 + #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ 47 + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ 48 + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ 49 + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ 50 + 51 + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ 52 + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ 53 + 54 + #define MSR_PPIN_CTL 0x0000004e 55 + #define MSR_PPIN 0x0000004f 56 + 57 + #define MSR_IA32_PERFCTR0 0x000000c1 58 + #define MSR_IA32_PERFCTR1 0x000000c2 59 + #define MSR_FSB_FREQ 0x000000cd 60 + #define MSR_PLATFORM_INFO 0x000000ce 61 + #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 62 + #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) 63 + 64 + #define MSR_IA32_UMWAIT_CONTROL 0xe1 65 + #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) 66 + #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) 67 + /* 68 + * The time field is bit[31:2], but representing a 32bit value with 69 + * bit[1:0] zero. 70 + */ 71 + #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) 72 + 73 + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 74 + #define NHM_C3_AUTO_DEMOTE (1UL << 25) 75 + #define NHM_C1_AUTO_DEMOTE (1UL << 26) 76 + #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) 77 + #define SNB_C3_AUTO_UNDEMOTE (1UL << 27) 78 + #define SNB_C1_AUTO_UNDEMOTE (1UL << 28) 79 + 80 + #define MSR_MTRRcap 0x000000fe 81 + 82 + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a 83 + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ 84 + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ 85 + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ 86 + #define ARCH_CAP_SSB_NO BIT(4) /* 87 + * Not susceptible to Speculative Store Bypass 88 + * attack, so no Speculative Store Bypass 89 + * control required. 90 + */ 91 + #define ARCH_CAP_MDS_NO BIT(5) /* 92 + * Not susceptible to 93 + * Microarchitectural Data 94 + * Sampling (MDS) vulnerabilities. 95 + */ 96 + 97 + #define MSR_IA32_FLUSH_CMD 0x0000010b 98 + #define L1D_FLUSH BIT(0) /* 99 + * Writeback and invalidate the 100 + * L1 data cache. 101 + */ 102 + 103 + #define MSR_IA32_BBL_CR_CTL 0x00000119 104 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e 105 + 106 + #define MSR_IA32_SYSENTER_CS 0x00000174 107 + #define MSR_IA32_SYSENTER_ESP 0x00000175 108 + #define MSR_IA32_SYSENTER_EIP 0x00000176 109 + 110 + #define MSR_IA32_MCG_CAP 0x00000179 111 + #define MSR_IA32_MCG_STATUS 0x0000017a 112 + #define MSR_IA32_MCG_CTL 0x0000017b 113 + #define MSR_IA32_MCG_EXT_CTL 0x000004d0 114 + 115 + #define MSR_OFFCORE_RSP_0 0x000001a6 116 + #define MSR_OFFCORE_RSP_1 0x000001a7 117 + #define MSR_TURBO_RATIO_LIMIT 0x000001ad 118 + #define MSR_TURBO_RATIO_LIMIT1 0x000001ae 119 + #define MSR_TURBO_RATIO_LIMIT2 0x000001af 120 + 121 + #define MSR_LBR_SELECT 0x000001c8 122 + #define MSR_LBR_TOS 0x000001c9 123 + #define MSR_LBR_NHM_FROM 0x00000680 124 + #define MSR_LBR_NHM_TO 0x000006c0 125 + #define MSR_LBR_CORE_FROM 0x00000040 126 + #define MSR_LBR_CORE_TO 0x00000060 127 + 128 + #define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ 129 + #define LBR_INFO_MISPRED BIT_ULL(63) 130 + #define LBR_INFO_IN_TX BIT_ULL(62) 131 + #define LBR_INFO_ABORT BIT_ULL(61) 132 + #define LBR_INFO_CYCLES 0xffff 133 + 134 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 135 + #define MSR_PEBS_DATA_CFG 0x000003f2 136 + #define MSR_IA32_DS_AREA 0x00000600 137 + #define MSR_IA32_PERF_CAPABILITIES 0x00000345 138 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 139 + 140 + #define MSR_IA32_RTIT_CTL 0x00000570 141 + #define RTIT_CTL_TRACEEN BIT(0) 142 + #define RTIT_CTL_CYCLEACC BIT(1) 143 + #define RTIT_CTL_OS BIT(2) 144 + #define RTIT_CTL_USR BIT(3) 145 + #define RTIT_CTL_PWR_EVT_EN BIT(4) 146 + #define RTIT_CTL_FUP_ON_PTW BIT(5) 147 + #define RTIT_CTL_FABRIC_EN BIT(6) 148 + #define RTIT_CTL_CR3EN BIT(7) 149 + #define RTIT_CTL_TOPA BIT(8) 150 + #define RTIT_CTL_MTC_EN BIT(9) 151 + #define RTIT_CTL_TSC_EN BIT(10) 152 + #define RTIT_CTL_DISRETC BIT(11) 153 + #define RTIT_CTL_PTW_EN BIT(12) 154 + #define RTIT_CTL_BRANCH_EN BIT(13) 155 + #define RTIT_CTL_MTC_RANGE_OFFSET 14 156 + #define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) 157 + #define RTIT_CTL_CYC_THRESH_OFFSET 19 158 + #define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) 159 + #define RTIT_CTL_PSB_FREQ_OFFSET 24 160 + #define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) 161 + #define RTIT_CTL_ADDR0_OFFSET 32 162 + #define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) 163 + #define RTIT_CTL_ADDR1_OFFSET 36 164 + #define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) 165 + #define RTIT_CTL_ADDR2_OFFSET 40 166 + #define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) 167 + #define RTIT_CTL_ADDR3_OFFSET 44 168 + #define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) 169 + #define MSR_IA32_RTIT_STATUS 0x00000571 170 + #define RTIT_STATUS_FILTEREN BIT(0) 171 + #define RTIT_STATUS_CONTEXTEN BIT(1) 172 + #define RTIT_STATUS_TRIGGEREN BIT(2) 173 + #define RTIT_STATUS_BUFFOVF BIT(3) 174 + #define RTIT_STATUS_ERROR BIT(4) 175 + #define RTIT_STATUS_STOPPED BIT(5) 176 + #define RTIT_STATUS_BYTECNT_OFFSET 32 177 + #define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET) 178 + #define MSR_IA32_RTIT_ADDR0_A 0x00000580 179 + #define MSR_IA32_RTIT_ADDR0_B 0x00000581 180 + #define MSR_IA32_RTIT_ADDR1_A 0x00000582 181 + #define MSR_IA32_RTIT_ADDR1_B 0x00000583 182 + #define MSR_IA32_RTIT_ADDR2_A 0x00000584 183 + #define MSR_IA32_RTIT_ADDR2_B 0x00000585 184 + #define MSR_IA32_RTIT_ADDR3_A 0x00000586 185 + #define MSR_IA32_RTIT_ADDR3_B 0x00000587 186 + #define MSR_IA32_RTIT_CR3_MATCH 0x00000572 187 + #define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 188 + #define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 189 + 190 + #define MSR_MTRRfix64K_00000 0x00000250 191 + #define MSR_MTRRfix16K_80000 0x00000258 192 + #define MSR_MTRRfix16K_A0000 0x00000259 193 + #define MSR_MTRRfix4K_C0000 0x00000268 194 + #define MSR_MTRRfix4K_C8000 0x00000269 195 + #define MSR_MTRRfix4K_D0000 0x0000026a 196 + #define MSR_MTRRfix4K_D8000 0x0000026b 197 + #define MSR_MTRRfix4K_E0000 0x0000026c 198 + #define MSR_MTRRfix4K_E8000 0x0000026d 199 + #define MSR_MTRRfix4K_F0000 0x0000026e 200 + #define MSR_MTRRfix4K_F8000 0x0000026f 201 + #define MSR_MTRRdefType 0x000002ff 202 + 203 + #define MSR_IA32_CR_PAT 0x00000277 204 + 205 + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 206 + #define MSR_IA32_LASTBRANCHFROMIP 0x000001db 207 + #define MSR_IA32_LASTBRANCHTOIP 0x000001dc 208 + #define MSR_IA32_LASTINTFROMIP 0x000001dd 209 + #define MSR_IA32_LASTINTTOIP 0x000001de 210 + 211 + /* DEBUGCTLMSR bits (others vary by model): */ 212 + #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ 213 + #define DEBUGCTLMSR_BTF_SHIFT 1 214 + #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ 215 + #define DEBUGCTLMSR_TR (1UL << 6) 216 + #define DEBUGCTLMSR_BTS (1UL << 7) 217 + #define DEBUGCTLMSR_BTINT (1UL << 8) 218 + #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) 219 + #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 220 + #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 221 + #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) 222 + #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 223 + #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) 224 + 225 + #define MSR_PEBS_FRONTEND 0x000003f7 226 + 227 + #define MSR_IA32_POWER_CTL 0x000001fc 228 + 229 + #define MSR_IA32_MC0_CTL 0x00000400 230 + #define MSR_IA32_MC0_STATUS 0x00000401 231 + #define MSR_IA32_MC0_ADDR 0x00000402 232 + #define MSR_IA32_MC0_MISC 0x00000403 233 + 234 + /* C-state Residency Counters */ 235 + #define MSR_PKG_C3_RESIDENCY 0x000003f8 236 + #define MSR_PKG_C6_RESIDENCY 0x000003f9 237 + #define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa 238 + #define MSR_PKG_C7_RESIDENCY 0x000003fa 239 + #define MSR_CORE_C3_RESIDENCY 0x000003fc 240 + #define MSR_CORE_C6_RESIDENCY 0x000003fd 241 + #define MSR_CORE_C7_RESIDENCY 0x000003fe 242 + #define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff 243 + #define MSR_PKG_C2_RESIDENCY 0x0000060d 244 + #define MSR_PKG_C8_RESIDENCY 0x00000630 245 + #define MSR_PKG_C9_RESIDENCY 0x00000631 246 + #define MSR_PKG_C10_RESIDENCY 0x00000632 247 + 248 + /* Interrupt Response Limit */ 249 + #define MSR_PKGC3_IRTL 0x0000060a 250 + #define MSR_PKGC6_IRTL 0x0000060b 251 + #define MSR_PKGC7_IRTL 0x0000060c 252 + #define MSR_PKGC8_IRTL 0x00000633 253 + #define MSR_PKGC9_IRTL 0x00000634 254 + #define MSR_PKGC10_IRTL 0x00000635 255 + 256 + /* Run Time Average Power Limiting (RAPL) Interface */ 257 + 258 + #define MSR_RAPL_POWER_UNIT 0x00000606 259 + 260 + #define MSR_PKG_POWER_LIMIT 0x00000610 261 + #define MSR_PKG_ENERGY_STATUS 0x00000611 262 + #define MSR_PKG_PERF_STATUS 0x00000613 263 + #define MSR_PKG_POWER_INFO 0x00000614 264 + 265 + #define MSR_DRAM_POWER_LIMIT 0x00000618 266 + #define MSR_DRAM_ENERGY_STATUS 0x00000619 267 + #define MSR_DRAM_PERF_STATUS 0x0000061b 268 + #define MSR_DRAM_POWER_INFO 0x0000061c 269 + 270 + #define MSR_PP0_POWER_LIMIT 0x00000638 271 + #define MSR_PP0_ENERGY_STATUS 0x00000639 272 + #define MSR_PP0_POLICY 0x0000063a 273 + #define MSR_PP0_PERF_STATUS 0x0000063b 274 + 275 + #define MSR_PP1_POWER_LIMIT 0x00000640 276 + #define MSR_PP1_ENERGY_STATUS 0x00000641 277 + #define MSR_PP1_POLICY 0x00000642 278 + 279 + /* Config TDP MSRs */ 280 + #define MSR_CONFIG_TDP_NOMINAL 0x00000648 281 + #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 282 + #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A 283 + #define MSR_CONFIG_TDP_CONTROL 0x0000064B 284 + #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C 285 + 286 + #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D 287 + 288 + #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 289 + #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 290 + #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A 291 + #define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B 292 + 293 + #define MSR_CORE_C1_RES 0x00000660 294 + #define MSR_MODULE_C6_RES_MS 0x00000664 295 + 296 + #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 297 + #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 298 + 299 + #define MSR_ATOM_CORE_RATIOS 0x0000066a 300 + #define MSR_ATOM_CORE_VIDS 0x0000066b 301 + #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c 302 + #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d 303 + 304 + 305 + #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 306 + #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 307 + #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 308 + 309 + /* Hardware P state interface */ 310 + #define MSR_PPERF 0x0000064e 311 + #define MSR_PERF_LIMIT_REASONS 0x0000064f 312 + #define MSR_PM_ENABLE 0x00000770 313 + #define MSR_HWP_CAPABILITIES 0x00000771 314 + #define MSR_HWP_REQUEST_PKG 0x00000772 315 + #define MSR_HWP_INTERRUPT 0x00000773 316 + #define MSR_HWP_REQUEST 0x00000774 317 + #define MSR_HWP_STATUS 0x00000777 318 + 319 + /* CPUID.6.EAX */ 320 + #define HWP_BASE_BIT (1<<7) 321 + #define HWP_NOTIFICATIONS_BIT (1<<8) 322 + #define HWP_ACTIVITY_WINDOW_BIT (1<<9) 323 + #define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) 324 + #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) 325 + 326 + /* IA32_HWP_CAPABILITIES */ 327 + #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) 328 + #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) 329 + #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) 330 + #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) 331 + 332 + /* IA32_HWP_REQUEST */ 333 + #define HWP_MIN_PERF(x) (x & 0xff) 334 + #define HWP_MAX_PERF(x) ((x & 0xff) << 8) 335 + #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) 336 + #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) 337 + #define HWP_EPP_PERFORMANCE 0x00 338 + #define HWP_EPP_BALANCE_PERFORMANCE 0x80 339 + #define HWP_EPP_BALANCE_POWERSAVE 0xC0 340 + #define HWP_EPP_POWERSAVE 0xFF 341 + #define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) 342 + #define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) 343 + 344 + /* IA32_HWP_STATUS */ 345 + #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) 346 + #define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) 347 + 348 + /* IA32_HWP_INTERRUPT */ 349 + #define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) 350 + #define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) 351 + 352 + #define MSR_AMD64_MC0_MASK 0xc0010044 353 + 354 + #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 355 + #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) 356 + #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) 357 + #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) 358 + 359 + #define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) 360 + 361 + /* These are consecutive and not in the normal 4er MCE bank block */ 362 + #define MSR_IA32_MC0_CTL2 0x00000280 363 + #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) 364 + 365 + #define MSR_P6_PERFCTR0 0x000000c1 366 + #define MSR_P6_PERFCTR1 0x000000c2 367 + #define MSR_P6_EVNTSEL0 0x00000186 368 + #define MSR_P6_EVNTSEL1 0x00000187 369 + 370 + #define MSR_KNC_PERFCTR0 0x00000020 371 + #define MSR_KNC_PERFCTR1 0x00000021 372 + #define MSR_KNC_EVNTSEL0 0x00000028 373 + #define MSR_KNC_EVNTSEL1 0x00000029 374 + 375 + /* Alternative perfctr range with full access. */ 376 + #define MSR_IA32_PMC0 0x000004c1 377 + 378 + /* Auto-reload via MSR instead of DS area */ 379 + #define MSR_RELOAD_PMC0 0x000014c1 380 + #define MSR_RELOAD_FIXED_CTR0 0x00001309 381 + 382 + /* 383 + * AMD64 MSRs. Not complete. See the architecture manual for a more 384 + * complete list. 385 + */ 386 + #define MSR_AMD64_PATCH_LEVEL 0x0000008b 387 + #define MSR_AMD64_TSC_RATIO 0xc0000104 388 + #define MSR_AMD64_NB_CFG 0xc001001f 389 + #define MSR_AMD64_CPUID_FN_1 0xc0011004 390 + #define MSR_AMD64_PATCH_LOADER 0xc0010020 391 + #define MSR_AMD_PERF_CTL 0xc0010062 392 + #define MSR_AMD_PERF_STATUS 0xc0010063 393 + #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 394 + #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 395 + #define MSR_AMD64_OSVW_STATUS 0xc0010141 396 + #define MSR_AMD64_LS_CFG 0xc0011020 397 + #define MSR_AMD64_DC_CFG 0xc0011022 398 + #define MSR_AMD64_BU_CFG2 0xc001102a 399 + #define MSR_AMD64_IBSFETCHCTL 0xc0011030 400 + #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 401 + #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 402 + #define MSR_AMD64_IBSFETCH_REG_COUNT 3 403 + #define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) 404 + #define MSR_AMD64_IBSOPCTL 0xc0011033 405 + #define MSR_AMD64_IBSOPRIP 0xc0011034 406 + #define MSR_AMD64_IBSOPDATA 0xc0011035 407 + #define MSR_AMD64_IBSOPDATA2 0xc0011036 408 + #define MSR_AMD64_IBSOPDATA3 0xc0011037 409 + #define MSR_AMD64_IBSDCLINAD 0xc0011038 410 + #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 411 + #define MSR_AMD64_IBSOP_REG_COUNT 7 412 + #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) 413 + #define MSR_AMD64_IBSCTL 0xc001103a 414 + #define MSR_AMD64_IBSBRTARGET 0xc001103b 415 + #define MSR_AMD64_IBSOPDATA4 0xc001103d 416 + #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ 417 + #define MSR_AMD64_SEV 0xc0010131 418 + #define MSR_AMD64_SEV_ENABLED_BIT 0 419 + #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) 420 + 421 + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f 422 + 423 + /* Fam 17h MSRs */ 424 + #define MSR_F17H_IRPERF 0xc00000e9 425 + 426 + /* Fam 16h MSRs */ 427 + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 428 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 429 + #define MSR_F16H_DR1_ADDR_MASK 0xc0011019 430 + #define MSR_F16H_DR2_ADDR_MASK 0xc001101a 431 + #define MSR_F16H_DR3_ADDR_MASK 0xc001101b 432 + #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 433 + 434 + /* Fam 15h MSRs */ 435 + #define MSR_F15H_PERF_CTL 0xc0010200 436 + #define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL 437 + #define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) 438 + #define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) 439 + #define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) 440 + #define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) 441 + #define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) 442 + 443 + #define MSR_F15H_PERF_CTR 0xc0010201 444 + #define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR 445 + #define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) 446 + #define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) 447 + #define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) 448 + #define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) 449 + #define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) 450 + 451 + #define MSR_F15H_NB_PERF_CTL 0xc0010240 452 + #define MSR_F15H_NB_PERF_CTR 0xc0010241 453 + #define MSR_F15H_PTSC 0xc0010280 454 + #define MSR_F15H_IC_CFG 0xc0011021 455 + #define MSR_F15H_EX_CFG 0xc001102c 456 + 457 + /* Fam 10h MSRs */ 458 + #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 459 + #define FAM10H_MMIO_CONF_ENABLE (1<<0) 460 + #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf 461 + #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 462 + #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL 463 + #define FAM10H_MMIO_CONF_BASE_SHIFT 20 464 + #define MSR_FAM10H_NODE_ID 0xc001100c 465 + #define MSR_F10H_DECFG 0xc0011029 466 + #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 467 + #define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) 468 + 469 + /* K8 MSRs */ 470 + #define MSR_K8_TOP_MEM1 0xc001001a 471 + #define MSR_K8_TOP_MEM2 0xc001001d 472 + #define MSR_K8_SYSCFG 0xc0010010 473 + #define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 474 + #define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) 475 + #define MSR_K8_INT_PENDING_MSG 0xc0010055 476 + /* C1E active bits in int pending message */ 477 + #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 478 + #define MSR_K8_TSEG_ADDR 0xc0010112 479 + #define MSR_K8_TSEG_MASK 0xc0010113 480 + #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ 481 + #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ 482 + #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ 483 + 484 + /* K7 MSRs */ 485 + #define MSR_K7_EVNTSEL0 0xc0010000 486 + #define MSR_K7_PERFCTR0 0xc0010004 487 + #define MSR_K7_EVNTSEL1 0xc0010001 488 + #define MSR_K7_PERFCTR1 0xc0010005 489 + #define MSR_K7_EVNTSEL2 0xc0010002 490 + #define MSR_K7_PERFCTR2 0xc0010006 491 + #define MSR_K7_EVNTSEL3 0xc0010003 492 + #define MSR_K7_PERFCTR3 0xc0010007 493 + #define MSR_K7_CLK_CTL 0xc001001b 494 + #define MSR_K7_HWCR 0xc0010015 495 + #define MSR_K7_HWCR_SMMLOCK_BIT 0 496 + #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) 497 + #define MSR_K7_FID_VID_CTL 0xc0010041 498 + #define MSR_K7_FID_VID_STATUS 0xc0010042 499 + 500 + /* K6 MSRs */ 501 + #define MSR_K6_WHCR 0xc0000082 502 + #define MSR_K6_UWCCR 0xc0000085 503 + #define MSR_K6_EPMR 0xc0000086 504 + #define MSR_K6_PSOR 0xc0000087 505 + #define MSR_K6_PFIR 0xc0000088 506 + 507 + /* Centaur-Hauls/IDT defined MSRs. */ 508 + #define MSR_IDT_FCR1 0x00000107 509 + #define MSR_IDT_FCR2 0x00000108 510 + #define MSR_IDT_FCR3 0x00000109 511 + #define MSR_IDT_FCR4 0x0000010a 512 + 513 + #define MSR_IDT_MCR0 0x00000110 514 + #define MSR_IDT_MCR1 0x00000111 515 + #define MSR_IDT_MCR2 0x00000112 516 + #define MSR_IDT_MCR3 0x00000113 517 + #define MSR_IDT_MCR4 0x00000114 518 + #define MSR_IDT_MCR5 0x00000115 519 + #define MSR_IDT_MCR6 0x00000116 520 + #define MSR_IDT_MCR7 0x00000117 521 + #define MSR_IDT_MCR_CTRL 0x00000120 522 + 523 + /* VIA Cyrix defined MSRs*/ 524 + #define MSR_VIA_FCR 0x00001107 525 + #define MSR_VIA_LONGHAUL 0x0000110a 526 + #define MSR_VIA_RNG 0x0000110b 527 + #define MSR_VIA_BCR2 0x00001147 528 + 529 + /* Transmeta defined MSRs */ 530 + #define MSR_TMTA_LONGRUN_CTRL 0x80868010 531 + #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 532 + #define MSR_TMTA_LRTI_READOUT 0x80868018 533 + #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a 534 + 535 + /* Intel defined MSRs. */ 536 + #define MSR_IA32_P5_MC_ADDR 0x00000000 537 + #define MSR_IA32_P5_MC_TYPE 0x00000001 538 + #define MSR_IA32_TSC 0x00000010 539 + #define MSR_IA32_PLATFORM_ID 0x00000017 540 + #define MSR_IA32_EBL_CR_POWERON 0x0000002a 541 + #define MSR_EBC_FREQUENCY_ID 0x0000002c 542 + #define MSR_SMI_COUNT 0x00000034 543 + #define MSR_IA32_FEATURE_CONTROL 0x0000003a 544 + #define MSR_IA32_TSC_ADJUST 0x0000003b 545 + #define MSR_IA32_BNDCFGS 0x00000d90 546 + 547 + #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc 548 + 549 + #define MSR_IA32_XSS 0x00000da0 550 + 551 + #define FEATURE_CONTROL_LOCKED (1<<0) 552 + #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 553 + #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) 554 + #define FEATURE_CONTROL_LMCE (1<<20) 555 + 556 + #define MSR_IA32_APICBASE 0x0000001b 557 + #define MSR_IA32_APICBASE_BSP (1<<8) 558 + #define MSR_IA32_APICBASE_ENABLE (1<<11) 559 + #define MSR_IA32_APICBASE_BASE (0xfffff<<12) 560 + 561 + #define MSR_IA32_TSCDEADLINE 0x000006e0 562 + 563 + #define MSR_IA32_UCODE_WRITE 0x00000079 564 + #define MSR_IA32_UCODE_REV 0x0000008b 565 + 566 + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b 567 + #define MSR_IA32_SMBASE 0x0000009e 568 + 569 + #define MSR_IA32_PERF_STATUS 0x00000198 570 + #define MSR_IA32_PERF_CTL 0x00000199 571 + #define INTEL_PERF_CTL_MASK 0xffff 572 + 573 + #define MSR_IA32_MPERF 0x000000e7 574 + #define MSR_IA32_APERF 0x000000e8 575 + 576 + #define MSR_IA32_THERM_CONTROL 0x0000019a 577 + #define MSR_IA32_THERM_INTERRUPT 0x0000019b 578 + 579 + #define THERM_INT_HIGH_ENABLE (1 << 0) 580 + #define THERM_INT_LOW_ENABLE (1 << 1) 581 + #define THERM_INT_PLN_ENABLE (1 << 24) 582 + 583 + #define MSR_IA32_THERM_STATUS 0x0000019c 584 + 585 + #define THERM_STATUS_PROCHOT (1 << 0) 586 + #define THERM_STATUS_POWER_LIMIT (1 << 10) 587 + 588 + #define MSR_THERM2_CTL 0x0000019d 589 + 590 + #define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) 591 + 592 + #define MSR_IA32_MISC_ENABLE 0x000001a0 593 + 594 + #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 595 + 596 + #define MSR_MISC_FEATURE_CONTROL 0x000001a4 597 + #define MSR_MISC_PWR_MGMT 0x000001aa 598 + 599 + #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 600 + #define ENERGY_PERF_BIAS_PERFORMANCE 0 601 + #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 602 + #define ENERGY_PERF_BIAS_NORMAL 6 603 + #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 604 + #define ENERGY_PERF_BIAS_POWERSAVE 15 605 + 606 + #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 607 + 608 + #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) 609 + #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) 610 + 611 + #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 612 + 613 + #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) 614 + #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) 615 + #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) 616 + 617 + /* Thermal Thresholds Support */ 618 + #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) 619 + #define THERM_SHIFT_THRESHOLD0 8 620 + #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) 621 + #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) 622 + #define THERM_SHIFT_THRESHOLD1 16 623 + #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) 624 + #define THERM_STATUS_THRESHOLD0 (1 << 6) 625 + #define THERM_LOG_THRESHOLD0 (1 << 7) 626 + #define THERM_STATUS_THRESHOLD1 (1 << 8) 627 + #define THERM_LOG_THRESHOLD1 (1 << 9) 628 + 629 + /* MISC_ENABLE bits: architectural */ 630 + #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 631 + #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) 632 + #define MSR_IA32_MISC_ENABLE_TCC_BIT 1 633 + #define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) 634 + #define MSR_IA32_MISC_ENABLE_EMON_BIT 7 635 + #define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) 636 + #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 637 + #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) 638 + #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 639 + #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) 640 + #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 641 + #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) 642 + #define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 643 + #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) 644 + #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 645 + #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) 646 + #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 647 + #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) 648 + #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 649 + #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) 650 + 651 + /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ 652 + #define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 653 + #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) 654 + #define MSR_IA32_MISC_ENABLE_TM1_BIT 3 655 + #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) 656 + #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 657 + #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) 658 + #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 659 + #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) 660 + #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 661 + #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) 662 + #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 663 + #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) 664 + #define MSR_IA32_MISC_ENABLE_FERR_BIT 10 665 + #define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) 666 + #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 667 + #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) 668 + #define MSR_IA32_MISC_ENABLE_TM2_BIT 13 669 + #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) 670 + #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 671 + #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) 672 + #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 673 + #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) 674 + #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 675 + #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) 676 + #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 677 + #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) 678 + #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 679 + #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) 680 + #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 681 + #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) 682 + 683 + /* MISC_FEATURES_ENABLES non-architectural features */ 684 + #define MSR_MISC_FEATURES_ENABLES 0x00000140 685 + 686 + #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 687 + #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) 688 + #define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 689 + 690 + #define MSR_IA32_TSC_DEADLINE 0x000006E0 691 + 692 + 693 + #define MSR_TSX_FORCE_ABORT 0x0000010F 694 + 695 + #define MSR_TFA_RTM_FORCE_ABORT_BIT 0 696 + #define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) 697 + 698 + /* P4/Xeon+ specific */ 699 + #define MSR_IA32_MCG_EAX 0x00000180 700 + #define MSR_IA32_MCG_EBX 0x00000181 701 + #define MSR_IA32_MCG_ECX 0x00000182 702 + #define MSR_IA32_MCG_EDX 0x00000183 703 + #define MSR_IA32_MCG_ESI 0x00000184 704 + #define MSR_IA32_MCG_EDI 0x00000185 705 + #define MSR_IA32_MCG_EBP 0x00000186 706 + #define MSR_IA32_MCG_ESP 0x00000187 707 + #define MSR_IA32_MCG_EFLAGS 0x00000188 708 + #define MSR_IA32_MCG_EIP 0x00000189 709 + #define MSR_IA32_MCG_RESERVED 0x0000018a 710 + 711 + /* Pentium IV performance counter MSRs */ 712 + #define MSR_P4_BPU_PERFCTR0 0x00000300 713 + #define MSR_P4_BPU_PERFCTR1 0x00000301 714 + #define MSR_P4_BPU_PERFCTR2 0x00000302 715 + #define MSR_P4_BPU_PERFCTR3 0x00000303 716 + #define MSR_P4_MS_PERFCTR0 0x00000304 717 + #define MSR_P4_MS_PERFCTR1 0x00000305 718 + #define MSR_P4_MS_PERFCTR2 0x00000306 719 + #define MSR_P4_MS_PERFCTR3 0x00000307 720 + #define MSR_P4_FLAME_PERFCTR0 0x00000308 721 + #define MSR_P4_FLAME_PERFCTR1 0x00000309 722 + #define MSR_P4_FLAME_PERFCTR2 0x0000030a 723 + #define MSR_P4_FLAME_PERFCTR3 0x0000030b 724 + #define MSR_P4_IQ_PERFCTR0 0x0000030c 725 + #define MSR_P4_IQ_PERFCTR1 0x0000030d 726 + #define MSR_P4_IQ_PERFCTR2 0x0000030e 727 + #define MSR_P4_IQ_PERFCTR3 0x0000030f 728 + #define MSR_P4_IQ_PERFCTR4 0x00000310 729 + #define MSR_P4_IQ_PERFCTR5 0x00000311 730 + #define MSR_P4_BPU_CCCR0 0x00000360 731 + #define MSR_P4_BPU_CCCR1 0x00000361 732 + #define MSR_P4_BPU_CCCR2 0x00000362 733 + #define MSR_P4_BPU_CCCR3 0x00000363 734 + #define MSR_P4_MS_CCCR0 0x00000364 735 + #define MSR_P4_MS_CCCR1 0x00000365 736 + #define MSR_P4_MS_CCCR2 0x00000366 737 + #define MSR_P4_MS_CCCR3 0x00000367 738 + #define MSR_P4_FLAME_CCCR0 0x00000368 739 + #define MSR_P4_FLAME_CCCR1 0x00000369 740 + #define MSR_P4_FLAME_CCCR2 0x0000036a 741 + #define MSR_P4_FLAME_CCCR3 0x0000036b 742 + #define MSR_P4_IQ_CCCR0 0x0000036c 743 + #define MSR_P4_IQ_CCCR1 0x0000036d 744 + #define MSR_P4_IQ_CCCR2 0x0000036e 745 + #define MSR_P4_IQ_CCCR3 0x0000036f 746 + #define MSR_P4_IQ_CCCR4 0x00000370 747 + #define MSR_P4_IQ_CCCR5 0x00000371 748 + #define MSR_P4_ALF_ESCR0 0x000003ca 749 + #define MSR_P4_ALF_ESCR1 0x000003cb 750 + #define MSR_P4_BPU_ESCR0 0x000003b2 751 + #define MSR_P4_BPU_ESCR1 0x000003b3 752 + #define MSR_P4_BSU_ESCR0 0x000003a0 753 + #define MSR_P4_BSU_ESCR1 0x000003a1 754 + #define MSR_P4_CRU_ESCR0 0x000003b8 755 + #define MSR_P4_CRU_ESCR1 0x000003b9 756 + #define MSR_P4_CRU_ESCR2 0x000003cc 757 + #define MSR_P4_CRU_ESCR3 0x000003cd 758 + #define MSR_P4_CRU_ESCR4 0x000003e0 759 + #define MSR_P4_CRU_ESCR5 0x000003e1 760 + #define MSR_P4_DAC_ESCR0 0x000003a8 761 + #define MSR_P4_DAC_ESCR1 0x000003a9 762 + #define MSR_P4_FIRM_ESCR0 0x000003a4 763 + #define MSR_P4_FIRM_ESCR1 0x000003a5 764 + #define MSR_P4_FLAME_ESCR0 0x000003a6 765 + #define MSR_P4_FLAME_ESCR1 0x000003a7 766 + #define MSR_P4_FSB_ESCR0 0x000003a2 767 + #define MSR_P4_FSB_ESCR1 0x000003a3 768 + #define MSR_P4_IQ_ESCR0 0x000003ba 769 + #define MSR_P4_IQ_ESCR1 0x000003bb 770 + #define MSR_P4_IS_ESCR0 0x000003b4 771 + #define MSR_P4_IS_ESCR1 0x000003b5 772 + #define MSR_P4_ITLB_ESCR0 0x000003b6 773 + #define MSR_P4_ITLB_ESCR1 0x000003b7 774 + #define MSR_P4_IX_ESCR0 0x000003c8 775 + #define MSR_P4_IX_ESCR1 0x000003c9 776 + #define MSR_P4_MOB_ESCR0 0x000003aa 777 + #define MSR_P4_MOB_ESCR1 0x000003ab 778 + #define MSR_P4_MS_ESCR0 0x000003c0 779 + #define MSR_P4_MS_ESCR1 0x000003c1 780 + #define MSR_P4_PMH_ESCR0 0x000003ac 781 + #define MSR_P4_PMH_ESCR1 0x000003ad 782 + #define MSR_P4_RAT_ESCR0 0x000003bc 783 + #define MSR_P4_RAT_ESCR1 0x000003bd 784 + #define MSR_P4_SAAT_ESCR0 0x000003ae 785 + #define MSR_P4_SAAT_ESCR1 0x000003af 786 + #define MSR_P4_SSU_ESCR0 0x000003be 787 + #define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ 788 + 789 + #define MSR_P4_TBPU_ESCR0 0x000003c2 790 + #define MSR_P4_TBPU_ESCR1 0x000003c3 791 + #define MSR_P4_TC_ESCR0 0x000003c4 792 + #define MSR_P4_TC_ESCR1 0x000003c5 793 + #define MSR_P4_U2L_ESCR0 0x000003b0 794 + #define MSR_P4_U2L_ESCR1 0x000003b1 795 + 796 + #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 797 + 798 + /* Intel Core-based CPU performance counters */ 799 + #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 800 + #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a 801 + #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b 802 + #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d 803 + #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e 804 + #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f 805 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 806 + 807 + /* PERF_GLOBAL_OVF_CTL bits */ 808 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 809 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) 810 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 811 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) 812 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 813 + #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) 814 + 815 + /* Geode defined MSRs */ 816 + #define MSR_GEODE_BUSCONT_CONF0 0x00001900 817 + 818 + /* Intel VT MSRs */ 819 + #define MSR_IA32_VMX_BASIC 0x00000480 820 + #define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 821 + #define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 822 + #define MSR_IA32_VMX_EXIT_CTLS 0x00000483 823 + #define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 824 + #define MSR_IA32_VMX_MISC 0x00000485 825 + #define MSR_IA32_VMX_CR0_FIXED0 0x00000486 826 + #define MSR_IA32_VMX_CR0_FIXED1 0x00000487 827 + #define MSR_IA32_VMX_CR4_FIXED0 0x00000488 828 + #define MSR_IA32_VMX_CR4_FIXED1 0x00000489 829 + #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a 830 + #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b 831 + #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c 832 + #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d 833 + #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e 834 + #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f 835 + #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 836 + #define MSR_IA32_VMX_VMFUNC 0x00000491 837 + 838 + /* VMX_BASIC bits and bitmasks */ 839 + #define VMX_BASIC_VMCS_SIZE_SHIFT 32 840 + #define VMX_BASIC_TRUE_CTLS (1ULL << 55) 841 + #define VMX_BASIC_64 0x0001000000000000LLU 842 + #define VMX_BASIC_MEM_TYPE_SHIFT 50 843 + #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU 844 + #define VMX_BASIC_MEM_TYPE_WB 6LLU 845 + #define VMX_BASIC_INOUT 0x0040000000000000LLU 846 + 847 + /* MSR_IA32_VMX_MISC bits */ 848 + #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) 849 + #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) 850 + #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F 851 + /* AMD-V MSRs */ 852 + 853 + #define MSR_VM_CR 0xc0010114 854 + #define MSR_VM_IGNNE 0xc0010115 855 + #define MSR_VM_HSAVE_PA 0xc0010117 856 + 857 + #endif /* _ASM_X86_MSR_INDEX_H */
+5
tools/perf/Documentation/perf-config.txt
··· 561 561 trace.show_zeros:: 562 562 Do not suppress syscall arguments that are equal to zero. 563 563 564 + trace.tracepoint_beautifiers:: 565 + Use "libtraceevent" to use that library to augment the tracepoint arguments, 566 + "libbeauty", the default, to use the same argument beautifiers used in the 567 + strace-like sys_enter+sys_exit lines. 568 + 564 569 llvm.*:: 565 570 llvm.clang-path:: 566 571 Path to clang. If omit, search it from $PATH.
+5
tools/perf/Documentation/perf-diff.txt
··· 95 95 diff.compute config option. See COMPARISON METHODS section for 96 96 more info. 97 97 98 + --cycles-hist:: 99 + Report a histogram and the standard deviation for cycles data. 100 + It can help us to judge if the reported cycles data is noisy or 101 + not. This option should be used with '-c cycles'. 102 + 98 103 -p:: 99 104 --period:: 100 105 Show period values for both compared hist entries.
+10
tools/perf/Documentation/perf-trace.txt
··· 42 42 Prefixing with ! shows all syscalls but the ones specified. You may 43 43 need to escape it. 44 44 45 + --filter=<filter>:: 46 + Event filter. This option should follow an event selector (-e) which 47 + selects tracepoint event(s). 48 + 49 + 45 50 -D msecs:: 46 51 --delay msecs:: 47 52 After starting the program, wait msecs before measuring. This is useful to ··· 223 218 Do sorting on batches of events, use when noticing out of order events that 224 219 may happen, for instance, when a thread gets migrated to a different CPU 225 220 while processing a syscall. 221 + 222 + --libtraceevent_print:: 223 + Use libtraceevent to print tracepoint arguments. By default 'perf trace' uses 224 + the same beautifiers used in the strace-like enter+exit lines to augment the 225 + tracepoint arguments. 226 226 227 227 --map-dump:: 228 228 Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls
+15 -13
tools/perf/Makefile.config
··· 188 188 189 189 # Treat warnings as errors unless directed not to 190 190 ifneq ($(WERROR),0) 191 - CFLAGS += -Werror 191 + CORE_CFLAGS += -Werror 192 192 CXXFLAGS += -Werror 193 193 endif 194 194 ··· 198 198 199 199 ifeq ($(DEBUG),0) 200 200 ifeq ($(CC_NO_CLANG), 0) 201 - CFLAGS += -O3 201 + CORE_CFLAGS += -O3 202 202 else 203 - CFLAGS += -O6 203 + CORE_CFLAGS += -O6 204 204 endif 205 205 endif 206 206 ··· 245 245 246 246 FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl 247 247 248 - CFLAGS += -fno-omit-frame-pointer 249 - CFLAGS += -ggdb3 250 - CFLAGS += -funwind-tables 251 - CFLAGS += -Wall 252 - CFLAGS += -Wextra 253 - CFLAGS += -std=gnu99 248 + CORE_CFLAGS += -fno-omit-frame-pointer 249 + CORE_CFLAGS += -ggdb3 250 + CORE_CFLAGS += -funwind-tables 251 + CORE_CFLAGS += -Wall 252 + CORE_CFLAGS += -Wextra 253 + CORE_CFLAGS += -std=gnu99 254 254 255 255 CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti 256 256 CXXFLAGS += -Wall ··· 272 272 endif 273 273 274 274 ifeq ($(feature-stackprotector-all), 1) 275 - CFLAGS += -fstack-protector-all 275 + CORE_CFLAGS += -fstack-protector-all 276 276 endif 277 277 278 278 ifeq ($(DEBUG),0) 279 279 ifeq ($(feature-fortify-source), 1) 280 - CFLAGS += -D_FORTIFY_SOURCE=2 280 + CORE_CFLAGS += -D_FORTIFY_SOURCE=2 281 281 endif 282 282 endif 283 283 ··· 301 301 INC_FLAGS += -I$(src-perf) 302 302 INC_FLAGS += -I$(srctree)/tools/lib/ 303 303 304 - CFLAGS += $(INC_FLAGS) 304 + CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE 305 + 306 + CFLAGS += $(CORE_CFLAGS) $(INC_FLAGS) 305 307 CXXFLAGS += $(INC_FLAGS) 306 308 307 - CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE 309 + LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS) 308 310 309 311 ifeq ($(feature-sync-compare-and-swap), 1) 310 312 CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
+10 -1
tools/perf/Makefile.perf
··· 407 407 asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic 408 408 arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ 409 409 x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/ 410 + x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/ 410 411 411 412 beauty_outdir := $(OUTPUT)trace/beauty/generated 412 413 beauty_ioctl_outdir := $(beauty_outdir)/ioctl ··· 544 543 $(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl) 545 544 $(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@ 546 545 546 + x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c 547 + x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh 548 + 549 + $(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl) 550 + $(Q)$(SHELL) '$(x86_arch_MSRs_tbl)' $(x86_arch_asm_dir) > $@ 551 + 547 552 rename_flags_array := $(beauty_outdir)/rename_flags_array.c 548 553 rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh 549 554 ··· 684 677 $(perf_ioctl_array) \ 685 678 $(prctl_option_array) \ 686 679 $(usbdevfs_ioctl_array) \ 680 + $(x86_arch_MSRs_array) \ 687 681 $(x86_arch_prctl_code_array) \ 688 682 $(rename_flags_array) \ 689 683 $(arch_errno_name_array) \ ··· 769 761 $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null 770 762 771 763 $(LIBPERF): FORCE 772 - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) $(OUTPUT)libperf.a 764 + $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a 773 765 774 766 $(LIBPERF)-clean: 775 767 $(call QUIET_CLEAN, libperf) ··· 989 981 $(OUTPUT)$(perf_ioctl_array) \ 990 982 $(OUTPUT)$(prctl_option_array) \ 991 983 $(OUTPUT)$(usbdevfs_ioctl_array) \ 984 + $(OUTPUT)$(x86_arch_MSRs_array) \ 992 985 $(OUTPUT)$(x86_arch_prctl_code_array) \ 993 986 $(OUTPUT)$(rename_flags_array) \ 994 987 $(OUTPUT)$(arch_errno_name_array) \
+2
tools/perf/arch/arm/util/Build
··· 1 + perf-y += perf_regs.o 2 + 1 3 perf-$(CONFIG_DWARF) += dwarf-regs.o 2 4 3 5 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+6
tools/perf/arch/arm/util/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };
+1
tools/perf/arch/arm64/util/Build
··· 1 1 perf-y += header.o 2 + perf-y += perf_regs.o 2 3 perf-y += sym-handling.o 3 4 perf-$(CONFIG_DWARF) += dwarf-regs.o 4 5 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+6
tools/perf/arch/arm64/util/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };
+2
tools/perf/arch/csky/util/Build
··· 1 + perf-y += perf_regs.o 2 + 1 3 perf-$(CONFIG_DWARF) += dwarf-regs.o 2 4 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+6
tools/perf/arch/csky/util/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };
+2
tools/perf/arch/riscv/util/Build
··· 1 + perf-y += perf_regs.o 2 + 1 3 perf-$(CONFIG_DWARF) += dwarf-regs.o 2 4 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+6
tools/perf/arch/riscv/util/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };
+1
tools/perf/arch/s390/util/Build
··· 1 1 perf-y += header.o 2 2 perf-y += kvm-stat.o 3 + perf-y += perf_regs.o 3 4 4 5 perf-$(CONFIG_DWARF) += dwarf-regs.o 5 6 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+6
tools/perf/arch/s390/util/perf_regs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "../../util/perf_regs.h" 3 + 4 + const struct sample_reg sample_reg_masks[] = { 5 + SMPL_REG_END 6 + };
+5 -4
tools/perf/arch/x86/tests/perf-time-to-tsc.c
··· 9 9 #include <sys/prctl.h> 10 10 #include <perf/cpumap.h> 11 11 #include <perf/evlist.h> 12 + #include <perf/mmap.h> 12 13 13 14 #include "debug.h" 14 15 #include "parse-events.h" ··· 118 117 119 118 for (i = 0; i < evlist->core.nr_mmaps; i++) { 120 119 md = &evlist->mmap[i]; 121 - if (perf_mmap__read_init(md) < 0) 120 + if (perf_mmap__read_init(&md->core) < 0) 122 121 continue; 123 122 124 - while ((event = perf_mmap__read_event(md)) != NULL) { 123 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 125 124 struct perf_sample sample; 126 125 127 126 if (event->header.type != PERF_RECORD_COMM || ··· 140 139 comm2_time = sample.time; 141 140 } 142 141 next_event: 143 - perf_mmap__consume(md); 142 + perf_mmap__consume(&md->core); 144 143 } 145 - perf_mmap__read_done(md); 144 + perf_mmap__read_done(&md->core); 146 145 } 147 146 148 147 if (!comm1_time || !comm2_time)
+143
tools/perf/builtin-diff.c
··· 23 23 #include "util/time-utils.h" 24 24 #include "util/annotate.h" 25 25 #include "util/map.h" 26 + #include "util/spark.h" 26 27 #include <linux/err.h> 27 28 #include <linux/zalloc.h> 28 29 #include <subcmd/pager.h> ··· 54 53 PERF_HPP_DIFF__FORMULA, 55 54 PERF_HPP_DIFF__DELTA_ABS, 56 55 PERF_HPP_DIFF__CYCLES, 56 + PERF_HPP_DIFF__CYCLES_HIST, 57 57 58 58 PERF_HPP_DIFF__MAX_INDEX 59 59 }; ··· 89 87 static bool show_period; 90 88 static bool show_formula; 91 89 static bool show_baseline_only; 90 + static bool cycles_hist; 92 91 static unsigned int sort_compute = 1; 93 92 94 93 static s64 compute_wdiff_w1; ··· 167 164 [PERF_HPP_DIFF__CYCLES] = { 168 165 .name = "[Program Block Range] Cycles Diff", 169 166 .width = 70, 167 + }, 168 + [PERF_HPP_DIFF__CYCLES_HIST] = { 169 + .name = "stddev/Hist", 170 + .width = NUM_SPARKS + 9, 170 171 } 171 172 }; 172 173 ··· 617 610 bi->cycles_aggr = ch->cycles_aggr; 618 611 bi->num = ch->num; 619 612 bi->num_aggr = ch->num_aggr; 613 + 614 + memcpy(bi->cycles_spark, ch->cycles_spark, 615 + NUM_SPARKS * sizeof(u64)); 620 616 } 621 617 622 618 static int process_block_per_sym(struct hist_entry *he) ··· 699 689 return NULL; 700 690 } 701 691 692 + static void init_spark_values(unsigned long *svals, int num) 693 + { 694 + for (int i = 0; i < num; i++) 695 + svals[i] = 0; 696 + } 697 + 698 + static void update_spark_value(unsigned long *svals, int num, 699 + struct stats *stats, u64 val) 700 + { 701 + int n = stats->n; 702 + 703 + if (n < num) 704 + svals[n] = val; 705 + } 706 + 702 707 static void compute_cycles_diff(struct hist_entry *he, 703 708 struct hist_entry *pair) 704 709 { ··· 722 697 pair->diff.cycles = 723 698 pair->block_info->cycles_aggr / pair->block_info->num_aggr - 724 699 he->block_info->cycles_aggr / he->block_info->num_aggr; 700 + 701 + if (!cycles_hist) 702 + return; 703 + 704 + init_stats(&pair->diff.stats); 705 + init_spark_values(pair->diff.svals, NUM_SPARKS); 706 + 707 + for (int i = 0; i < pair->block_info->num; i++) { 708 + u64 val; 709 + 710 + if (i >= he->block_info->num || i >= NUM_SPARKS) 711 + break; 712 + 713 + val = labs(pair->block_info->cycles_spark[i] - 714 + he->block_info->cycles_spark[i]); 715 + 716 + update_spark_value(pair->diff.svals, NUM_SPARKS, 717 + &pair->diff.stats, val); 718 + update_stats(&pair->diff.stats, val); 719 + } 725 720 } 726 721 } 727 722 ··· 1300 1255 "Show period values."), 1301 1256 OPT_BOOLEAN('F', "formula", &show_formula, 1302 1257 "Show formula."), 1258 + OPT_BOOLEAN(0, "cycles-hist", &cycles_hist, 1259 + "Show cycles histogram and standard deviation " 1260 + "- WARNING: use only with -c cycles."), 1303 1261 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1304 1262 "dump raw trace in ASCII"), 1305 1263 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), ··· 1510 1462 return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); 1511 1463 } 1512 1464 1465 + static int all_zero(unsigned long *vals, int len) 1466 + { 1467 + int i; 1468 + 1469 + for (i = 0; i < len; i++) 1470 + if (vals[i] != 0) 1471 + return 0; 1472 + return 1; 1473 + } 1474 + 1475 + static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n) 1476 + { 1477 + int printed; 1478 + 1479 + if (n <= 1) 1480 + return 0; 1481 + 1482 + if (n > NUM_SPARKS) 1483 + n = NUM_SPARKS; 1484 + if (all_zero(svals, n)) 1485 + return 0; 1486 + 1487 + printed = print_spark(bf, size, svals, n); 1488 + printed += scnprintf(bf + printed, size - printed, " "); 1489 + return printed; 1490 + } 1491 + 1492 + static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt, 1493 + struct perf_hpp *hpp, struct hist_entry *he) 1494 + { 1495 + struct diff_hpp_fmt *dfmt = 1496 + container_of(fmt, struct diff_hpp_fmt, fmt); 1497 + struct hist_entry *pair = get_pair_fmt(he, dfmt); 1498 + struct block_hist *bh = container_of(he, struct block_hist, he); 1499 + struct block_hist *bh_pair; 1500 + struct hist_entry *block_he; 1501 + char spark[32], buf[128]; 1502 + double r; 1503 + int ret, pad; 1504 + 1505 + if (!pair) { 1506 + if (bh->block_idx) 1507 + hpp->skip = true; 1508 + 1509 + goto no_print; 1510 + } 1511 + 1512 + bh_pair = container_of(pair, struct block_hist, he); 1513 + 1514 + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); 1515 + if (!block_he) { 1516 + hpp->skip = true; 1517 + goto no_print; 1518 + } 1519 + 1520 + ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals, 1521 + block_he->diff.stats.n); 1522 + 1523 + r = rel_stddev_stats(stddev_stats(&block_he->diff.stats), 1524 + avg_stats(&block_he->diff.stats)); 1525 + 1526 + if (ret) { 1527 + /* 1528 + * Padding spaces if number of sparks less than NUM_SPARKS 1529 + * otherwise the output is not aligned. 1530 + */ 1531 + pad = NUM_SPARKS - ((ret - 1) / 3); 1532 + scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark); 1533 + ret = scnprintf(hpp->buf, hpp->size, "%*s", 1534 + dfmt->header_width, buf); 1535 + 1536 + if (pad) { 1537 + ret += scnprintf(hpp->buf + ret, hpp->size - ret, 1538 + "%-*s", pad, " "); 1539 + } 1540 + 1541 + return ret; 1542 + } 1543 + 1544 + no_print: 1545 + return scnprintf(hpp->buf, hpp->size, "%*s", 1546 + dfmt->header_width, " "); 1547 + } 1548 + 1513 1549 static void 1514 1550 hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) 1515 1551 { ··· 1799 1667 fmt->color = hpp__color_cycles; 1800 1668 fmt->sort = hist_entry__cmp_nop; 1801 1669 break; 1670 + case PERF_HPP_DIFF__CYCLES_HIST: 1671 + fmt->color = hpp__color_cycles_hist; 1672 + fmt->sort = hist_entry__cmp_nop; 1673 + break; 1802 1674 default: 1803 1675 fmt->sort = hist_entry__cmp_nop; 1804 1676 break; ··· 1828 1692 * PERF_HPP_DIFF__DELTA 1829 1693 * PERF_HPP_DIFF__RATIO 1830 1694 * PERF_HPP_DIFF__WEIGHTED_DIFF 1695 + * PERF_HPP_DIFF__CYCLES 1831 1696 */ 1832 1697 data__hpp_register(d, i ? compute_2_hpp[compute] : 1833 1698 PERF_HPP_DIFF__BASELINE); 1699 + 1700 + if (cycles_hist && i) 1701 + data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST); 1834 1702 1835 1703 /* 1836 1704 * And the rest: ··· 1989 1849 1990 1850 if (quiet) 1991 1851 perf_quiet_option(); 1852 + 1853 + if (cycles_hist && (compute != COMPUTE_CYCLES)) 1854 + usage_with_options(diff_usage, options); 1992 1855 1993 1856 symbol__annotation_init(); 1994 1857
+6 -5
tools/perf/builtin-kvm.c
··· 46 46 #include <semaphore.h> 47 47 #include <signal.h> 48 48 #include <math.h> 49 + #include <perf/mmap.h> 49 50 50 51 static const char *get_filename_for_perf_kvm(void) 51 52 { ··· 760 759 761 760 *mmap_time = ULLONG_MAX; 762 761 md = &evlist->mmap[idx]; 763 - err = perf_mmap__read_init(md); 762 + err = perf_mmap__read_init(&md->core); 764 763 if (err < 0) 765 764 return (err == -EAGAIN) ? 0 : -1; 766 765 767 - while ((event = perf_mmap__read_event(md)) != NULL) { 766 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 768 767 err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp); 769 768 if (err) { 770 - perf_mmap__consume(md); 769 + perf_mmap__consume(&md->core); 771 770 pr_err("Failed to parse sample\n"); 772 771 return -1; 773 772 } ··· 777 776 * FIXME: Here we can't consume the event, as perf_session__queue_event will 778 777 * point to it, and it'll get possibly overwritten by the kernel. 779 778 */ 780 - perf_mmap__consume(md); 779 + perf_mmap__consume(&md->core); 781 780 782 781 if (err) { 783 782 pr_err("Failed to enqueue sample: %d\n", err); ··· 794 793 break; 795 794 } 796 795 797 - perf_mmap__read_done(md); 796 + perf_mmap__read_done(&md->core); 798 797 return n; 799 798 } 800 799
+5 -5
tools/perf/builtin-record.c
··· 197 197 * every aio write request started in record__aio_push() so 198 198 * decrement it because the request is now complete. 199 199 */ 200 - perf_mmap__put(md); 200 + perf_mmap__put(&md->core); 201 201 rc = 1; 202 202 } else { 203 203 /* ··· 276 276 277 277 if (record__comp_enabled(aio->rec)) { 278 278 size = zstd_compress(aio->rec->session, aio->data + aio->size, 279 - perf_mmap__mmap_len(map) - aio->size, 279 + mmap__mmap_len(map) - aio->size, 280 280 buf, size); 281 281 } else { 282 282 memcpy(aio->data + aio->size, buf, size); ··· 293 293 * after started aio request completion or at record__aio_push() 294 294 * if the request failed to start. 295 295 */ 296 - perf_mmap__get(map); 296 + perf_mmap__get(&map->core); 297 297 } 298 298 299 299 aio->size += size; ··· 332 332 * map->refcount is decremented in record__aio_complete() after 333 333 * aio write operation finishes successfully. 334 334 */ 335 - perf_mmap__put(map); 335 + perf_mmap__put(&map->core); 336 336 } 337 337 338 338 return ret; ··· 488 488 struct record *rec = to; 489 489 490 490 if (record__comp_enabled(rec)) { 491 - size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); 491 + size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); 492 492 bf = map->data; 493 493 } 494 494
-5
tools/perf/builtin-script.c
··· 3605 3605 } 3606 3606 } 3607 3607 3608 - if (script.time_str && reltime) { 3609 - fprintf(stderr, "Don't combine --reltime with --time\n"); 3610 - return -1; 3611 - } 3612 - 3613 3608 if (itrace_synth_opts.callchain && 3614 3609 itrace_synth_opts.callchain_sz > scripting_max_stack) 3615 3610 scripting_max_stack = itrace_synth_opts.callchain_sz;
+16 -4
tools/perf/builtin-top.c
··· 82 82 #include <linux/err.h> 83 83 84 84 #include <linux/ctype.h> 85 + #include <perf/mmap.h> 85 86 86 87 static volatile int done; 87 88 static volatile int resize; ··· 870 869 union perf_event *event; 871 870 872 871 md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; 873 - if (perf_mmap__read_init(md) < 0) 872 + if (perf_mmap__read_init(&md->core) < 0) 874 873 return; 875 874 876 - while ((event = perf_mmap__read_event(md)) != NULL) { 875 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 877 876 int ret; 878 877 879 878 ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); ··· 884 883 if (ret) 885 884 break; 886 885 887 - perf_mmap__consume(md); 886 + perf_mmap__consume(&md->core); 888 887 889 888 if (top->qe.rotate) { 890 889 pthread_mutex_lock(&top->qe.mutex); ··· 894 893 } 895 894 } 896 895 897 - perf_mmap__read_done(md); 896 + perf_mmap__read_done(&md->core); 898 897 } 899 898 900 899 static void perf_top__mmap_read(struct perf_top *top) ··· 1561 1560 status = perf_config(perf_top_config, &top); 1562 1561 if (status) 1563 1562 return status; 1563 + /* 1564 + * Since the per arch annotation init routine may need the cpuid, read 1565 + * it here, since we are not getting this from the perf.data header. 1566 + */ 1567 + status = perf_env__read_cpuid(&perf_env); 1568 + if (status) { 1569 + pr_err("Couldn't read the cpuid for this machine: %s\n", 1570 + str_error_r(errno, errbuf, sizeof(errbuf))); 1571 + goto out_delete_evlist; 1572 + } 1573 + top.evlist->env = &perf_env; 1564 1574 1565 1575 argc = parse_options(argc, argv, options, top_usage, 0); 1566 1576 if (argc)
+513 -90
tools/perf/builtin-trace.c
··· 77 77 #include <sys/sysmacros.h> 78 78 79 79 #include <linux/ctype.h> 80 + #include <perf/mmap.h> 80 81 81 82 #ifndef O_CLOEXEC 82 83 # define O_CLOEXEC 02000000 ··· 86 85 #ifndef F_LINUX_SPECIFIC_BASE 87 86 # define F_LINUX_SPECIFIC_BASE 1024 88 87 #endif 88 + 89 + /* 90 + * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 91 + */ 92 + struct syscall_arg_fmt { 93 + size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 94 + bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val); 95 + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); 96 + void *parm; 97 + const char *name; 98 + u16 nr_entries; // for arrays 99 + bool show_zero; 100 + }; 101 + 102 + struct syscall_fmt { 103 + const char *name; 104 + const char *alias; 105 + struct { 106 + const char *sys_enter, 107 + *sys_exit; 108 + } bpf_prog_name; 109 + struct syscall_arg_fmt arg[6]; 110 + u8 nr_args; 111 + bool errpid; 112 + bool timeout; 113 + bool hexret; 114 + }; 89 115 90 116 struct trace { 91 117 struct perf_tool tool; ··· 180 152 bool print_sample; 181 153 bool show_tool_stats; 182 154 bool trace_syscalls; 155 + bool libtraceevent_print; 183 156 bool kernel_syscallchains; 184 157 s16 args_alignment; 185 158 bool show_tstamp; ··· 191 162 bool force; 192 163 bool vfs_getname; 193 164 int trace_pgfaults; 165 + char *perfconfig_events; 194 166 struct { 195 167 struct ordered_events data; 196 168 u64 last; ··· 478 448 return printed; 479 449 } 480 450 451 + bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret) 452 + { 453 + int i; 454 + 455 + for (i = 0; i < sa->nr_entries; ++i) { 456 + if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') { 457 + *ret = sa->offset + i; 458 + return true; 459 + } 460 + } 461 + 462 + return false; 463 + } 464 + 465 + bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret) 466 + { 467 + int i; 468 + 469 + for (i = 0; i < sas->nr_entries; ++i) { 470 + struct strarray *sa = sas->entries[i]; 471 + 472 + if (strarray__strtoul(sa, bf, size, ret)) 473 + return true; 474 + } 475 + 476 + return false; 477 + } 478 + 481 479 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, 482 480 struct syscall_arg *arg) 483 481 { ··· 556 498 { 557 499 return scnprintf(bf, size, "%ld", arg->val); 558 500 } 501 + 502 + static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg) 503 + { 504 + // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can 505 + // fill missing comms using thread__set_comm()... 506 + // here or in a special syscall_arg__scnprintf_pid_sched_tp... 507 + return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries, arg->val); 508 + } 509 + 510 + #define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array 559 511 560 512 static const char *bpf_cmd[] = { 561 513 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", ··· 762 694 #include "trace/beauty/socket_type.c" 763 695 #include "trace/beauty/waitid_options.c" 764 696 765 - struct syscall_arg_fmt { 766 - size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); 767 - unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); 768 - void *parm; 769 - const char *name; 770 - bool show_zero; 771 - }; 772 - 773 - static struct syscall_fmt { 774 - const char *name; 775 - const char *alias; 776 - struct { 777 - const char *sys_enter, 778 - *sys_exit; 779 - } bpf_prog_name; 780 - struct syscall_arg_fmt arg[6]; 781 - u8 nr_args; 782 - bool errpid; 783 - bool timeout; 784 - bool hexret; 785 - } syscall_fmts[] = { 697 + static struct syscall_fmt syscall_fmts[] = { 786 698 { .name = "access", 787 699 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, 788 700 { .name = "arch_prctl", ··· 1012 964 return strcmp(name, fmt->name); 1013 965 } 1014 966 967 + static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name) 968 + { 969 + return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 970 + } 971 + 1015 972 static struct syscall_fmt *syscall_fmt__find(const char *name) 1016 973 { 1017 974 const int nmemb = ARRAY_SIZE(syscall_fmts); 1018 - return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); 975 + return __syscall_fmt__find(syscall_fmts, nmemb, name); 976 + } 977 + 978 + static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias) 979 + { 980 + int i; 981 + 982 + for (i = 0; i < nmemb; ++i) { 983 + if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0) 984 + return &fmts[i]; 985 + } 986 + 987 + return NULL; 1019 988 } 1020 989 1021 990 static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) 1022 991 { 1023 - int i, nmemb = ARRAY_SIZE(syscall_fmts); 1024 - 1025 - for (i = 0; i < nmemb; ++i) { 1026 - if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) 1027 - return &syscall_fmts[i]; 1028 - } 1029 - 1030 - return NULL; 992 + const int nmemb = ARRAY_SIZE(syscall_fmts); 993 + return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); 1031 994 } 1032 995 1033 996 /* ··· 1512 1453 return 0; 1513 1454 } 1514 1455 1515 - static int syscall__set_arg_fmts(struct syscall *sc) 1516 - { 1517 - struct tep_format_field *field, *last_field = NULL; 1518 - int idx = 0, len; 1456 + static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { 1457 + { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, } 1458 + }; 1519 1459 1520 - for (field = sc->args; field; field = field->next, ++idx) { 1460 + static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) 1461 + { 1462 + const struct syscall_arg_fmt *fmt = fmtp; 1463 + return strcmp(name, fmt->name); 1464 + } 1465 + 1466 + static struct syscall_arg_fmt * 1467 + __syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name) 1468 + { 1469 + return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp); 1470 + } 1471 + 1472 + static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name) 1473 + { 1474 + const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name); 1475 + return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name); 1476 + } 1477 + 1478 + static struct tep_format_field * 1479 + syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) 1480 + { 1481 + struct tep_format_field *last_field = NULL; 1482 + int len; 1483 + 1484 + for (; field; field = field->next, ++arg) { 1521 1485 last_field = field; 1522 1486 1523 - if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1487 + if (arg->scnprintf) 1524 1488 continue; 1525 1489 1526 1490 len = strlen(field->name); ··· 1551 1469 if (strcmp(field->type, "const char *") == 0 && 1552 1470 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || 1553 1471 strstr(field->name, "path") != NULL)) 1554 - sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1472 + arg->scnprintf = SCA_FILENAME; 1555 1473 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) 1556 - sc->arg_fmt[idx].scnprintf = SCA_PTR; 1474 + arg->scnprintf = SCA_PTR; 1557 1475 else if (strcmp(field->type, "pid_t") == 0) 1558 - sc->arg_fmt[idx].scnprintf = SCA_PID; 1476 + arg->scnprintf = SCA_PID; 1559 1477 else if (strcmp(field->type, "umode_t") == 0) 1560 - sc->arg_fmt[idx].scnprintf = SCA_MODE_T; 1561 - else if ((strcmp(field->type, "int") == 0 || 1478 + arg->scnprintf = SCA_MODE_T; 1479 + else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstarts(field->type, "char")) { 1480 + arg->scnprintf = SCA_CHAR_ARRAY; 1481 + arg->nr_entries = field->arraylen; 1482 + } else if ((strcmp(field->type, "int") == 0 || 1562 1483 strcmp(field->type, "unsigned int") == 0 || 1563 1484 strcmp(field->type, "long") == 0) && 1564 1485 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { ··· 1572 1487 * 23 unsigned int 1573 1488 * 7 unsigned long 1574 1489 */ 1575 - sc->arg_fmt[idx].scnprintf = SCA_FD; 1490 + arg->scnprintf = SCA_FD; 1491 + } else { 1492 + struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name); 1493 + 1494 + if (fmt) { 1495 + arg->scnprintf = fmt->scnprintf; 1496 + arg->strtoul = fmt->strtoul; 1497 + } 1576 1498 } 1577 1499 } 1500 + 1501 + return last_field; 1502 + } 1503 + 1504 + static int syscall__set_arg_fmts(struct syscall *sc) 1505 + { 1506 + struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args); 1578 1507 1579 1508 if (last_field) 1580 1509 sc->args_size = last_field->offset + last_field->size; ··· 1649 1550 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat"); 1650 1551 1651 1552 return syscall__set_arg_fmts(sc); 1553 + } 1554 + 1555 + static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel) 1556 + { 1557 + int nr_args = evsel->tp_format->format.nr_fields; 1558 + 1559 + evsel->priv = calloc(nr_args, sizeof(struct syscall_arg_fmt)); 1560 + if (evsel->priv != NULL) { 1561 + syscall_arg_fmt__init_array(evsel->priv, evsel->tp_format->format.fields); 1562 + return 0; 1563 + } 1564 + 1565 + return -ENOMEM; 1652 1566 } 1653 1567 1654 1568 static int intcmp(const void *a, const void *b) ··· 1792 1680 * as mount 'flags' argument that needs ignoring some magic flag, see comment 1793 1681 * in tools/perf/trace/beauty/mount_flags.c 1794 1682 */ 1795 - static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) 1683 + static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val) 1796 1684 { 1797 - if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) 1798 - return sc->arg_fmt[arg->idx].mask_val(arg, val); 1685 + if (fmt && fmt->mask_val) 1686 + return fmt->mask_val(arg, val); 1799 1687 1800 1688 return val; 1801 1689 } 1802 1690 1803 - static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, 1804 - struct syscall_arg *arg, unsigned long val) 1691 + static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size, 1692 + struct syscall_arg *arg, unsigned long val) 1805 1693 { 1806 - if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { 1694 + if (fmt && fmt->scnprintf) { 1807 1695 arg->val = val; 1808 - if (sc->arg_fmt[arg->idx].parm) 1809 - arg->parm = sc->arg_fmt[arg->idx].parm; 1810 - return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); 1696 + if (fmt->parm) 1697 + arg->parm = fmt->parm; 1698 + return fmt->scnprintf(bf, size, arg); 1811 1699 } 1812 1700 return scnprintf(bf, size, "%ld", val); 1813 1701 } ··· 1848 1736 if (arg.mask & bit) 1849 1737 continue; 1850 1738 1739 + arg.fmt = &sc->arg_fmt[arg.idx]; 1851 1740 val = syscall_arg__val(&arg, arg.idx); 1852 1741 /* 1853 1742 * Some syscall args need some mask, most don't and 1854 1743 * return val untouched. 1855 1744 */ 1856 - val = syscall__mask_val(sc, &arg, val); 1745 + val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val); 1857 1746 1858 1747 /* 1859 1748 * Suppress this argument if its value is zero and ··· 1875 1762 if (trace->show_arg_names) 1876 1763 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); 1877 1764 1878 - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1765 + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], 1766 + bf + printed, size - printed, &arg, val); 1879 1767 } 1880 1768 } else if (IS_ERR(sc->tp_format)) { 1881 1769 /* ··· 1891 1777 if (printed) 1892 1778 printed += scnprintf(bf + printed, size - printed, ", "); 1893 1779 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); 1894 - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); 1780 + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val); 1895 1781 next_arg: 1896 1782 ++arg.idx; 1897 1783 bit <<= 1; ··· 2460 2346 ++trace->nr_events_printed; 2461 2347 } 2462 2348 2349 + static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, 2350 + struct thread *thread, void *augmented_args, int augmented_args_size) 2351 + { 2352 + char bf[2048]; 2353 + size_t size = sizeof(bf); 2354 + struct tep_format_field *field = evsel->tp_format->format.fields; 2355 + struct syscall_arg_fmt *arg = evsel->priv; 2356 + size_t printed = 0; 2357 + unsigned long val; 2358 + u8 bit = 1; 2359 + struct syscall_arg syscall_arg = { 2360 + .augmented = { 2361 + .size = augmented_args_size, 2362 + .args = augmented_args, 2363 + }, 2364 + .idx = 0, 2365 + .mask = 0, 2366 + .trace = trace, 2367 + .thread = thread, 2368 + .show_string_prefix = trace->show_string_prefix, 2369 + }; 2370 + 2371 + for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) { 2372 + if (syscall_arg.mask & bit) 2373 + continue; 2374 + 2375 + syscall_arg.fmt = arg; 2376 + if (field->flags & TEP_FIELD_IS_ARRAY) 2377 + val = (uintptr_t)(sample->raw_data + field->offset); 2378 + else 2379 + val = format_field__intval(field, sample, evsel->needs_swap); 2380 + /* 2381 + * Some syscall args need some mask, most don't and 2382 + * return val untouched. 2383 + */ 2384 + val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val); 2385 + 2386 + /* 2387 + * Suppress this argument if its value is zero and 2388 + * and we don't have a string associated in an 2389 + * strarray for it. 2390 + */ 2391 + if (val == 0 && 2392 + !trace->show_zeros && 2393 + !((arg->show_zero || 2394 + arg->scnprintf == SCA_STRARRAY || 2395 + arg->scnprintf == SCA_STRARRAYS) && 2396 + arg->parm)) 2397 + continue; 2398 + 2399 + printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); 2400 + 2401 + /* 2402 + * XXX Perhaps we should have a show_tp_arg_names, 2403 + * leaving show_arg_names just for syscalls? 2404 + */ 2405 + if (1 || trace->show_arg_names) 2406 + printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); 2407 + 2408 + printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); 2409 + } 2410 + 2411 + return printed + fprintf(trace->output, "%s", bf); 2412 + } 2413 + 2463 2414 static int trace__event_handler(struct trace *trace, struct evsel *evsel, 2464 2415 union perf_event *event __maybe_unused, 2465 2416 struct perf_sample *sample) ··· 2578 2399 */ 2579 2400 } 2580 2401 2581 - fprintf(trace->output, "%s:", evsel->name); 2402 + fprintf(trace->output, "%s(", evsel->name); 2582 2403 2583 2404 if (perf_evsel__is_bpf_output(evsel)) { 2584 2405 bpf_output__fprintf(trace, sample); 2585 2406 } else if (evsel->tp_format) { 2586 2407 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || 2587 2408 trace__fprintf_sys_enter(trace, evsel, sample)) { 2588 - event_format__fprintf(evsel->tp_format, sample->cpu, 2589 - sample->raw_data, sample->raw_size, 2590 - trace->output); 2409 + if (trace->libtraceevent_print) { 2410 + event_format__fprintf(evsel->tp_format, sample->cpu, 2411 + sample->raw_data, sample->raw_size, 2412 + trace->output); 2413 + } else { 2414 + trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0); 2415 + } 2591 2416 ++trace->nr_events_printed; 2592 2417 2593 2418 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { ··· 2602 2419 } 2603 2420 2604 2421 newline: 2605 - fprintf(trace->output, "\n"); 2422 + fprintf(trace->output, ")\n"); 2606 2423 2607 2424 if (callchain_ret > 0) 2608 2425 trace__fprintf_callchain(trace, sample); ··· 3286 3103 3287 3104 return err; 3288 3105 } 3289 - #else 3106 + 3107 + static void trace__delete_augmented_syscalls(struct trace *trace) 3108 + { 3109 + struct evsel *evsel, *tmp; 3110 + 3111 + evlist__remove(trace->evlist, trace->syscalls.events.augmented); 3112 + evsel__delete(trace->syscalls.events.augmented); 3113 + trace->syscalls.events.augmented = NULL; 3114 + 3115 + evlist__for_each_entry_safe(trace->evlist, tmp, evsel) { 3116 + if (evsel->bpf_obj == trace->bpf_obj) { 3117 + evlist__remove(trace->evlist, evsel); 3118 + evsel__delete(evsel); 3119 + } 3120 + 3121 + } 3122 + 3123 + bpf_object__close(trace->bpf_obj); 3124 + trace->bpf_obj = NULL; 3125 + } 3126 + #else // HAVE_LIBBPF_SUPPORT 3290 3127 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) 3291 3128 { 3292 3129 return 0; ··· 3327 3124 { 3328 3125 return 0; 3329 3126 } 3127 + 3128 + static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused) 3129 + { 3130 + } 3330 3131 #endif // HAVE_LIBBPF_SUPPORT 3132 + 3133 + static bool trace__only_augmented_syscalls_evsels(struct trace *trace) 3134 + { 3135 + struct evsel *evsel; 3136 + 3137 + evlist__for_each_entry(trace->evlist, evsel) { 3138 + if (evsel == trace->syscalls.events.augmented || 3139 + evsel->bpf_obj == trace->bpf_obj) 3140 + continue; 3141 + 3142 + return false; 3143 + } 3144 + 3145 + return true; 3146 + } 3331 3147 3332 3148 static int trace__set_ev_qualifier_filter(struct trace *trace) 3333 3149 { ··· 3397 3175 thread = parent; 3398 3176 } 3399 3177 3400 - err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids); 3178 + err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids); 3401 3179 if (!err && trace->filter_pids.map) 3402 3180 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); 3403 3181 ··· 3414 3192 * we fork the workload in perf_evlist__prepare_workload. 3415 3193 */ 3416 3194 if (trace->filter_pids.nr > 0) { 3417 - err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr, 3418 - trace->filter_pids.entries); 3195 + err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, 3196 + trace->filter_pids.entries); 3419 3197 if (!err && trace->filter_pids.map) { 3420 3198 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, 3421 3199 trace->filter_pids.entries); ··· 3485 3263 return __trace__deliver_event(trace, event->event); 3486 3264 } 3487 3265 3266 + static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg) 3267 + { 3268 + struct tep_format_field *field; 3269 + struct syscall_arg_fmt *fmt = evsel->priv; 3270 + 3271 + if (evsel->tp_format == NULL || fmt == NULL) 3272 + return NULL; 3273 + 3274 + for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) 3275 + if (strcmp(field->name, arg) == 0) 3276 + return fmt; 3277 + 3278 + return NULL; 3279 + } 3280 + 3281 + static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel) 3282 + { 3283 + char *tok, *left = evsel->filter, *new_filter = evsel->filter; 3284 + 3285 + while ((tok = strpbrk(left, "=<>!")) != NULL) { 3286 + char *right = tok + 1, *right_end; 3287 + 3288 + if (*right == '=') 3289 + ++right; 3290 + 3291 + while (isspace(*right)) 3292 + ++right; 3293 + 3294 + if (*right == '\0') 3295 + break; 3296 + 3297 + while (!isalpha(*left)) 3298 + if (++left == tok) { 3299 + /* 3300 + * Bail out, can't find the name of the argument that is being 3301 + * used in the filter, let it try to set this filter, will fail later. 3302 + */ 3303 + return 0; 3304 + } 3305 + 3306 + right_end = right + 1; 3307 + while (isalnum(*right_end) || *right_end == '_') 3308 + ++right_end; 3309 + 3310 + if (isalpha(*right)) { 3311 + struct syscall_arg_fmt *fmt; 3312 + int left_size = tok - left, 3313 + right_size = right_end - right; 3314 + char arg[128]; 3315 + 3316 + while (isspace(left[left_size - 1])) 3317 + --left_size; 3318 + 3319 + scnprintf(arg, sizeof(arg), "%.*s", left_size, left); 3320 + 3321 + fmt = perf_evsel__syscall_arg_fmt(evsel, arg); 3322 + if (fmt == NULL) { 3323 + pr_debug("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", 3324 + arg, evsel->name, evsel->filter); 3325 + return -1; 3326 + } 3327 + 3328 + pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ", 3329 + arg, (int)(right - tok), tok, right_size, right); 3330 + 3331 + if (fmt->strtoul) { 3332 + u64 val; 3333 + if (fmt->strtoul(right, right_size, NULL, &val)) { 3334 + char *n, expansion[19]; 3335 + int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val); 3336 + int expansion_offset = right - new_filter; 3337 + 3338 + pr_debug("%s", expansion); 3339 + 3340 + if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) { 3341 + pr_debug(" out of memory!\n"); 3342 + free(new_filter); 3343 + return -1; 3344 + } 3345 + if (new_filter != evsel->filter) 3346 + free(new_filter); 3347 + left = n + expansion_offset + expansion_lenght; 3348 + new_filter = n; 3349 + } else { 3350 + pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n", 3351 + right_size, right, arg, evsel->name, evsel->filter); 3352 + return -1; 3353 + } 3354 + } else { 3355 + pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n", 3356 + arg, evsel->name, evsel->filter); 3357 + return -1; 3358 + } 3359 + 3360 + pr_debug("\n"); 3361 + } else { 3362 + left = right_end; 3363 + } 3364 + } 3365 + 3366 + if (new_filter != evsel->filter) { 3367 + pr_debug("New filter for %s: %s\n", evsel->name, new_filter); 3368 + perf_evsel__set_filter(evsel, new_filter); 3369 + free(new_filter); 3370 + } 3371 + 3372 + return 0; 3373 + } 3374 + 3375 + static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel) 3376 + { 3377 + struct evlist *evlist = trace->evlist; 3378 + struct evsel *evsel; 3379 + 3380 + evlist__for_each_entry(evlist, evsel) { 3381 + if (evsel->filter == NULL) 3382 + continue; 3383 + 3384 + if (trace__expand_filter(trace, evsel)) { 3385 + *err_evsel = evsel; 3386 + return -1; 3387 + } 3388 + } 3389 + 3390 + return 0; 3391 + } 3392 + 3488 3393 static int trace__run(struct trace *trace, int argc, const char **argv) 3489 3394 { 3490 3395 struct evlist *evlist = trace->evlist; ··· 3651 3302 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", 3652 3303 trace__sched_stat_runtime)) 3653 3304 goto out_error_sched_stat_runtime; 3654 - 3655 3305 /* 3656 3306 * If a global cgroup was set, apply it to all the events without an 3657 3307 * explicit cgroup. I.e.: ··· 3753 3405 */ 3754 3406 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); 3755 3407 3408 + err = trace__expand_filters(trace, &evsel); 3409 + if (err) 3410 + goto out_delete_evlist; 3756 3411 err = perf_evlist__apply_filters(evlist, &evsel); 3757 3412 if (err < 0) 3758 3413 goto out_error_apply_filters; ··· 3801 3450 struct mmap *md; 3802 3451 3803 3452 md = &evlist->mmap[i]; 3804 - if (perf_mmap__read_init(md) < 0) 3453 + if (perf_mmap__read_init(&md->core) < 0) 3805 3454 continue; 3806 3455 3807 - while ((event = perf_mmap__read_event(md)) != NULL) { 3456 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 3808 3457 ++trace->nr_events; 3809 3458 3810 3459 err = trace__deliver_event(trace, event); 3811 3460 if (err) 3812 3461 goto out_disable; 3813 3462 3814 - perf_mmap__consume(md); 3463 + perf_mmap__consume(&md->core); 3815 3464 3816 3465 if (interrupted) 3817 3466 goto out_disable; ··· 3821 3470 draining = true; 3822 3471 } 3823 3472 } 3824 - perf_mmap__read_done(md); 3473 + perf_mmap__read_done(&md->core); 3825 3474 } 3826 3475 3827 3476 if (trace->nr_events == before) { ··· 4209 3858 return 0; 4210 3859 } 4211 3860 4212 - static void evlist__set_evsel_handler(struct evlist *evlist, void *handler) 3861 + static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler) 4213 3862 { 4214 3863 struct evsel *evsel; 4215 3864 4216 - evlist__for_each_entry(evlist, evsel) 4217 - evsel->handler = handler; 3865 + evlist__for_each_entry(evlist, evsel) { 3866 + if (evsel->handler == NULL) 3867 + evsel->handler = handler; 3868 + } 4218 3869 } 4219 3870 4220 3871 static int evlist__set_syscall_tp_fields(struct evlist *evlist) ··· 4227 3874 if (evsel->priv || !evsel->tp_format) 4228 3875 continue; 4229 3876 4230 - if (strcmp(evsel->tp_format->system, "syscalls")) 3877 + if (strcmp(evsel->tp_format->system, "syscalls")) { 3878 + perf_evsel__init_tp_arg_scnprintf(evsel); 4231 3879 continue; 3880 + } 4232 3881 4233 3882 if (perf_evsel__init_syscall_tp(evsel)) 4234 3883 return -1; ··· 4384 4029 int err = 0; 4385 4030 4386 4031 if (!strcmp(var, "trace.add_events")) { 4387 - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 4388 - "event selector. use 'perf list' to list available events", 4389 - parse_events_option); 4390 - /* 4391 - * We can't propagate parse_event_option() return, as it is 1 4392 - * for failure while perf_config() expects -1. 4393 - */ 4394 - if (parse_events_option(&o, value, 0)) 4395 - err = -1; 4032 + trace->perfconfig_events = strdup(value); 4033 + if (trace->perfconfig_events == NULL) { 4034 + pr_err("Not enough memory for %s\n", "trace.add_events"); 4035 + return -1; 4036 + } 4396 4037 } else if (!strcmp(var, "trace.show_timestamp")) { 4397 4038 trace->show_tstamp = perf_config_bool(var, value); 4398 4039 } else if (!strcmp(var, "trace.show_duration")) { ··· 4412 4061 int args_alignment = 0; 4413 4062 if (perf_config_int(&args_alignment, var, value) == 0) 4414 4063 trace->args_alignment = args_alignment; 4064 + } else if (!strcmp(var, "trace.tracepoint_beautifiers")) { 4065 + if (strcasecmp(value, "libtraceevent") == 0) 4066 + trace->libtraceevent_print = true; 4067 + else if (strcasecmp(value, "libbeauty") == 0) 4068 + trace->libtraceevent_print = false; 4415 4069 } 4416 4070 out: 4417 4071 return err; ··· 4459 4103 OPT_CALLBACK('e', "event", &trace, "event", 4460 4104 "event/syscall selector. use 'perf list' to list available events", 4461 4105 trace__parse_events_option), 4106 + OPT_CALLBACK(0, "filter", &trace.evlist, "filter", 4107 + "event filter", parse_filter), 4462 4108 OPT_BOOLEAN(0, "comm", &trace.show_comm, 4463 4109 "show the thread COMM next to its id"), 4464 4110 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), ··· 4508 4150 OPT_CALLBACK(0, "call-graph", &trace.opts, 4509 4151 "record_mode[,record_size]", record_callchain_help, 4510 4152 &record_parse_callchain_opt), 4153 + OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print, 4154 + "Use libtraceevent to print the tracepoint arguments."), 4511 4155 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, 4512 4156 "Show the kernel callchains on the syscall exit path"), 4513 4157 OPT_ULONG(0, "max-events", &trace.max_events, ··· 4570 4210 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, 4571 4211 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); 4572 4212 4213 + /* 4214 + * Here we already passed thru trace__parse_events_option() and it has 4215 + * already figured out if -e syscall_name, if not but if --event 4216 + * foo:bar was used, the user is interested _just_ in those, say, 4217 + * tracepoint events, not in the strace-like syscall-name-based mode. 4218 + * 4219 + * This is important because we need to check if strace-like mode is 4220 + * needed to decided if we should filter out the eBPF 4221 + * __augmented_syscalls__ code, if it is in the mix, say, via 4222 + * .perfconfig trace.add_events, and filter those out. 4223 + */ 4224 + if (!trace.trace_syscalls && !trace.trace_pgfaults && 4225 + trace.evlist->core.nr_entries == 0 /* Was --events used? */) { 4226 + trace.trace_syscalls = true; 4227 + } 4228 + /* 4229 + * Now that we have --verbose figured out, lets see if we need to parse 4230 + * events from .perfconfig, so that if those events fail parsing, say some 4231 + * BPF program fails, then we'll be able to use --verbose to see what went 4232 + * wrong in more detail. 4233 + */ 4234 + if (trace.perfconfig_events != NULL) { 4235 + struct parse_events_error parse_err = { .idx = 0, }; 4236 + 4237 + err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); 4238 + if (err) { 4239 + parse_events_print_error(&parse_err, trace.perfconfig_events); 4240 + goto out; 4241 + } 4242 + } 4243 + 4573 4244 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { 4574 4245 usage_with_options_msg(trace_usage, trace_options, 4575 4246 "cgroup monitoring only available in system-wide mode"); ··· 4629 4238 4630 4239 trace.bpf_obj = evsel->bpf_obj; 4631 4240 4632 - trace__set_bpf_map_filtered_pids(&trace); 4633 - trace__set_bpf_map_syscalls(&trace); 4634 - trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); 4241 + /* 4242 + * If we have _just_ the augmenter event but don't have a 4243 + * explicit --syscalls, then assume we want all strace-like 4244 + * syscalls: 4245 + */ 4246 + if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace)) 4247 + trace.trace_syscalls = true; 4248 + /* 4249 + * So, if we have a syscall augmenter, but trace_syscalls, aka 4250 + * strace-like syscall tracing is not set, then we need to trow 4251 + * away the augmenter, i.e. all the events that were created 4252 + * from that BPF object file. 4253 + * 4254 + * This is more to fix the current .perfconfig trace.add_events 4255 + * style of setting up the strace-like eBPF based syscall point 4256 + * payload augmenter. 4257 + * 4258 + * All this complexity will be avoided by adding an alternative 4259 + * to trace.add_events in the form of 4260 + * trace.bpf_augmented_syscalls, that will be only parsed if we 4261 + * need it. 4262 + * 4263 + * .perfconfig trace.add_events is still useful if we want, for 4264 + * instance, have msr_write.msr in some .perfconfig profile based 4265 + * 'perf trace --config determinism.profile' mode, where for some 4266 + * particular goal/workload type we want a set of events and 4267 + * output mode (with timings, etc) instead of having to add 4268 + * all via the command line. 4269 + * 4270 + * Also --config to specify an alternate .perfconfig file needs 4271 + * to be implemented. 4272 + */ 4273 + if (!trace.trace_syscalls) { 4274 + trace__delete_augmented_syscalls(&trace); 4275 + } else { 4276 + trace__set_bpf_map_filtered_pids(&trace); 4277 + trace__set_bpf_map_syscalls(&trace); 4278 + trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); 4279 + } 4635 4280 } 4636 4281 4637 4282 err = bpf__setup_stdout(trace.evlist); ··· 4714 4287 } 4715 4288 4716 4289 if (trace.evlist->core.nr_entries > 0) { 4717 - evlist__set_evsel_handler(trace.evlist, trace__event_handler); 4290 + evlist__set_default_evsel_handler(trace.evlist, trace__event_handler); 4718 4291 if (evlist__set_syscall_tp_fields(trace.evlist)) { 4719 4292 perror("failed to set syscalls:* tracepoint fields"); 4720 4293 goto out; ··· 4810 4383 if (trace.summary_only) 4811 4384 trace.summary = trace.summary_only; 4812 4385 4813 - if (!trace.trace_syscalls && !trace.trace_pgfaults && 4814 - trace.evlist->core.nr_entries == 0 /* Was --events used? */) { 4815 - trace.trace_syscalls = true; 4816 - } 4817 - 4818 4386 if (output_name != NULL) { 4819 4387 err = trace__open_output(&trace, output_name); 4820 4388 if (err < 0) { ··· 4848 4426 if (output_name != NULL) 4849 4427 fclose(trace.output); 4850 4428 out: 4429 + zfree(&trace.perfconfig_events); 4851 4430 return err; 4852 4431 }
+1
tools/perf/check-headers.sh
··· 28 28 arch/x86/include/asm/required-features.h 29 29 arch/x86/include/asm/cpufeatures.h 30 30 arch/x86/include/asm/inat_types.h 31 + arch/x86/include/asm/msr-index.h 31 32 arch/x86/include/uapi/asm/prctl.h 32 33 arch/x86/lib/x86-opcode-map.txt 33 34 arch/x86/tools/gen-insn-attr-x86.awk
+1
tools/perf/lib/Build
··· 3 3 libperf-y += threadmap.o 4 4 libperf-y += evsel.o 5 5 libperf-y += evlist.o 6 + libperf-y += mmap.o 6 7 libperf-y += zalloc.o 7 8 libperf-y += xyarray.o 8 9 libperf-y += lib.o
+3 -2
tools/perf/lib/Makefile
··· 172 172 $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ 173 173 $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ 174 174 $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ 175 - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); 176 - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); 175 + $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ 176 + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ 177 + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); 177 178 178 179 install_pkgconfig: $(LIBPERF_PC) 179 180 $(call QUIET_INSTALL, $(LIBPERF_PC)) \
+2 -1
tools/perf/lib/core.c
··· 5 5 #include <stdio.h> 6 6 #include <stdarg.h> 7 7 #include <unistd.h> 8 + #include <linux/compiler.h> 8 9 #include <perf/core.h> 9 10 #include <internal/lib.h> 10 11 #include "internal.h" 11 12 12 - static int __base_pr(enum libperf_print_level level, const char *format, 13 + static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format, 13 14 va_list args) 14 15 { 15 16 return vfprintf(stderr, format, args);
+324
tools/perf/lib/evlist.c
··· 8 8 #include <internal/evlist.h> 9 9 #include <internal/evsel.h> 10 10 #include <internal/xyarray.h> 11 + #include <internal/mmap.h> 12 + #include <internal/cpumap.h> 13 + #include <internal/threadmap.h> 14 + #include <internal/xyarray.h> 15 + #include <internal/lib.h> 11 16 #include <linux/zalloc.h> 17 + #include <sys/ioctl.h> 12 18 #include <stdlib.h> 13 19 #include <errno.h> 14 20 #include <unistd.h> 15 21 #include <fcntl.h> 16 22 #include <signal.h> 17 23 #include <poll.h> 24 + #include <sys/mman.h> 18 25 #include <perf/cpumap.h> 19 26 #include <perf/threadmap.h> 20 27 #include <api/fd/array.h> ··· 34 27 INIT_HLIST_HEAD(&evlist->heads[i]); 35 28 INIT_LIST_HEAD(&evlist->entries); 36 29 evlist->nr_entries = 0; 30 + fdarray__init(&evlist->pollfd, 64); 37 31 } 38 32 39 33 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, ··· 109 101 return next; 110 102 } 111 103 104 + static void perf_evlist__purge(struct perf_evlist *evlist) 105 + { 106 + struct perf_evsel *pos, *n; 107 + 108 + perf_evlist__for_each_entry_safe(evlist, n, pos) { 109 + list_del_init(&pos->node); 110 + perf_evsel__delete(pos); 111 + } 112 + 113 + evlist->nr_entries = 0; 114 + } 115 + 116 + void perf_evlist__exit(struct perf_evlist *evlist) 117 + { 118 + perf_cpu_map__put(evlist->cpus); 119 + perf_thread_map__put(evlist->threads); 120 + evlist->cpus = NULL; 121 + evlist->threads = NULL; 122 + fdarray__exit(&evlist->pollfd); 123 + } 124 + 112 125 void perf_evlist__delete(struct perf_evlist *evlist) 113 126 { 127 + if (evlist == NULL) 128 + return; 129 + 130 + perf_evlist__munmap(evlist); 131 + perf_evlist__close(evlist); 132 + perf_evlist__purge(evlist); 133 + perf_evlist__exit(evlist); 114 134 free(evlist); 115 135 } 116 136 ··· 313 277 return pos; 314 278 } 315 279 280 + static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 281 + void *arg __maybe_unused) 282 + { 283 + struct perf_mmap *map = fda->priv[fd].ptr; 284 + 285 + if (map) 286 + perf_mmap__put(map); 287 + } 288 + 289 + int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 290 + { 291 + return fdarray__filter(&evlist->pollfd, revents_and_mask, 292 + perf_evlist__munmap_filtered, NULL); 293 + } 294 + 316 295 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 317 296 { 318 297 return fdarray__poll(&evlist->pollfd, timeout); 298 + } 299 + 300 + static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) 301 + { 302 + int i; 303 + struct perf_mmap *map; 304 + 305 + evlist->nr_mmaps = perf_cpu_map__nr(evlist->cpus); 306 + if (perf_cpu_map__empty(evlist->cpus)) 307 + evlist->nr_mmaps = perf_thread_map__nr(evlist->threads); 308 + 309 + map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 310 + if (!map) 311 + return NULL; 312 + 313 + for (i = 0; i < evlist->nr_mmaps; i++) { 314 + /* 315 + * When the perf_mmap() call is made we grab one refcount, plus 316 + * one extra to let perf_mmap__consume() get the last 317 + * events after all real references (perf_mmap__get()) are 318 + * dropped. 319 + * 320 + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 321 + * thus does perf_mmap__get() on it. 322 + */ 323 + perf_mmap__init(&map[i], overwrite, NULL); 324 + } 325 + 326 + return map; 327 + } 328 + 329 + static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 330 + struct perf_evsel *evsel, int idx, int cpu, 331 + int thread) 332 + { 333 + struct perf_sample_id *sid = SID(evsel, cpu, thread); 334 + 335 + sid->idx = idx; 336 + if (evlist->cpus && cpu >= 0) 337 + sid->cpu = evlist->cpus->map[cpu]; 338 + else 339 + sid->cpu = -1; 340 + if (!evsel->system_wide && evlist->threads && thread >= 0) 341 + sid->tid = perf_thread_map__pid(evlist->threads, thread); 342 + else 343 + sid->tid = -1; 344 + } 345 + 346 + static struct perf_mmap* 347 + perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) 348 + { 349 + struct perf_mmap *map = &evlist->mmap[idx]; 350 + 351 + if (overwrite) { 352 + if (!evlist->mmap_ovw) { 353 + evlist->mmap_ovw = perf_evlist__alloc_mmap(evlist, true); 354 + if (!evlist->mmap_ovw) 355 + return NULL; 356 + } 357 + map = &evlist->mmap_ovw[idx]; 358 + } 359 + 360 + return map; 361 + } 362 + 363 + #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) 364 + 365 + static int 366 + perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 367 + int output, int cpu) 368 + { 369 + return perf_mmap__mmap(map, mp, output, cpu); 370 + } 371 + 372 + static int 373 + mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 374 + int idx, struct perf_mmap_param *mp, int cpu_idx, 375 + int thread, int *_output, int *_output_overwrite) 376 + { 377 + int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); 378 + struct perf_evsel *evsel; 379 + int revent; 380 + 381 + perf_evlist__for_each_entry(evlist, evsel) { 382 + bool overwrite = evsel->attr.write_backward; 383 + struct perf_mmap *map; 384 + int *output, fd, cpu; 385 + 386 + if (evsel->system_wide && thread) 387 + continue; 388 + 389 + cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); 390 + if (cpu == -1) 391 + continue; 392 + 393 + map = ops->get(evlist, overwrite, idx); 394 + if (map == NULL) 395 + return -ENOMEM; 396 + 397 + if (overwrite) { 398 + mp->prot = PROT_READ; 399 + output = _output_overwrite; 400 + } else { 401 + mp->prot = PROT_READ | PROT_WRITE; 402 + output = _output; 403 + } 404 + 405 + fd = FD(evsel, cpu, thread); 406 + 407 + if (*output == -1) { 408 + *output = fd; 409 + 410 + /* 411 + * The last one will be done at perf_mmap__consume(), so that we 412 + * make sure we don't prevent tools from consuming every last event in 413 + * the ring buffer. 414 + * 415 + * I.e. we can get the POLLHUP meaning that the fd doesn't exist 416 + * anymore, but the last events for it are still in the ring buffer, 417 + * waiting to be consumed. 418 + * 419 + * Tools can chose to ignore this at their own discretion, but the 420 + * evlist layer can't just drop it when filtering events in 421 + * perf_evlist__filter_pollfd(). 422 + */ 423 + refcount_set(&map->refcnt, 2); 424 + 425 + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) 426 + return -1; 427 + } else { 428 + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 429 + return -1; 430 + 431 + perf_mmap__get(map); 432 + } 433 + 434 + revent = !overwrite ? POLLIN : 0; 435 + 436 + if (!evsel->system_wide && 437 + perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { 438 + perf_mmap__put(map); 439 + return -1; 440 + } 441 + 442 + if (evsel->attr.read_format & PERF_FORMAT_ID) { 443 + if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 444 + fd) < 0) 445 + return -1; 446 + perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 447 + thread); 448 + } 449 + } 450 + 451 + return 0; 452 + } 453 + 454 + static int 455 + mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 456 + struct perf_mmap_param *mp) 457 + { 458 + int thread; 459 + int nr_threads = perf_thread_map__nr(evlist->threads); 460 + 461 + for (thread = 0; thread < nr_threads; thread++) { 462 + int output = -1; 463 + int output_overwrite = -1; 464 + 465 + if (ops->idx) 466 + ops->idx(evlist, mp, thread, false); 467 + 468 + if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, 469 + &output, &output_overwrite)) 470 + goto out_unmap; 471 + } 472 + 473 + return 0; 474 + 475 + out_unmap: 476 + perf_evlist__munmap(evlist); 477 + return -1; 478 + } 479 + 480 + static int 481 + mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 482 + struct perf_mmap_param *mp) 483 + { 484 + int nr_threads = perf_thread_map__nr(evlist->threads); 485 + int nr_cpus = perf_cpu_map__nr(evlist->cpus); 486 + int cpu, thread; 487 + 488 + for (cpu = 0; cpu < nr_cpus; cpu++) { 489 + int output = -1; 490 + int output_overwrite = -1; 491 + 492 + if (ops->idx) 493 + ops->idx(evlist, mp, cpu, true); 494 + 495 + for (thread = 0; thread < nr_threads; thread++) { 496 + if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, 497 + thread, &output, &output_overwrite)) 498 + goto out_unmap; 499 + } 500 + } 501 + 502 + return 0; 503 + 504 + out_unmap: 505 + perf_evlist__munmap(evlist); 506 + return -1; 507 + } 508 + 509 + int perf_evlist__mmap_ops(struct perf_evlist *evlist, 510 + struct perf_evlist_mmap_ops *ops, 511 + struct perf_mmap_param *mp) 512 + { 513 + struct perf_evsel *evsel; 514 + const struct perf_cpu_map *cpus = evlist->cpus; 515 + const struct perf_thread_map *threads = evlist->threads; 516 + 517 + if (!ops || !ops->get || !ops->mmap) 518 + return -EINVAL; 519 + 520 + if (!evlist->mmap) 521 + evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 522 + if (!evlist->mmap) 523 + return -ENOMEM; 524 + 525 + perf_evlist__for_each_entry(evlist, evsel) { 526 + if ((evsel->attr.read_format & PERF_FORMAT_ID) && 527 + evsel->sample_id == NULL && 528 + perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 529 + return -ENOMEM; 530 + } 531 + 532 + if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 533 + return -ENOMEM; 534 + 535 + if (perf_cpu_map__empty(cpus)) 536 + return mmap_per_thread(evlist, ops, mp); 537 + 538 + return mmap_per_cpu(evlist, ops, mp); 539 + } 540 + 541 + int perf_evlist__mmap(struct perf_evlist *evlist, int pages) 542 + { 543 + struct perf_mmap_param mp; 544 + struct perf_evlist_mmap_ops ops = { 545 + .get = perf_evlist__mmap_cb_get, 546 + .mmap = perf_evlist__mmap_cb_mmap, 547 + }; 548 + 549 + evlist->mmap_len = (pages + 1) * page_size; 550 + mp.mask = evlist->mmap_len - page_size - 1; 551 + 552 + return perf_evlist__mmap_ops(evlist, &ops, &mp); 553 + } 554 + 555 + void perf_evlist__munmap(struct perf_evlist *evlist) 556 + { 557 + int i; 558 + 559 + if (evlist->mmap) { 560 + for (i = 0; i < evlist->nr_mmaps; i++) 561 + perf_mmap__munmap(&evlist->mmap[i]); 562 + } 563 + 564 + if (evlist->mmap_ovw) { 565 + for (i = 0; i < evlist->nr_mmaps; i++) 566 + perf_mmap__munmap(&evlist->mmap_ovw[i]); 567 + } 568 + 569 + zfree(&evlist->mmap); 570 + zfree(&evlist->mmap_ovw); 319 571 }
+40
tools/perf/lib/include/internal/evlist.h
··· 11 11 12 12 struct perf_cpu_map; 13 13 struct perf_thread_map; 14 + struct perf_mmap_param; 14 15 15 16 struct perf_evlist { 16 17 struct list_head entries; ··· 23 22 size_t mmap_len; 24 23 struct fdarray pollfd; 25 24 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 25 + struct perf_mmap *mmap; 26 + struct perf_mmap *mmap_ovw; 27 + }; 28 + 29 + typedef void 30 + (*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); 31 + typedef struct perf_mmap* 32 + (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); 33 + typedef int 34 + (*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); 35 + 36 + struct perf_evlist_mmap_ops { 37 + perf_evlist_mmap__cb_idx_t idx; 38 + perf_evlist_mmap__cb_get_t get; 39 + perf_evlist_mmap__cb_mmap_t mmap; 26 40 }; 27 41 28 42 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); 29 43 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 30 44 void *ptr, short revent); 45 + 46 + int perf_evlist__mmap_ops(struct perf_evlist *evlist, 47 + struct perf_evlist_mmap_ops *ops, 48 + struct perf_mmap_param *mp); 49 + 50 + void perf_evlist__exit(struct perf_evlist *evlist); 31 51 32 52 /** 33 53 * __perf_evlist__for_each_entry - iterate thru all the evsels ··· 81 59 */ 82 60 #define perf_evlist__for_each_entry_reverse(evlist, evsel) \ 83 61 __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) 62 + 63 + /** 64 + * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels 65 + * @list: list_head instance to iterate 66 + * @tmp: struct evsel temp iterator 67 + * @evsel: struct evsel iterator 68 + */ 69 + #define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \ 70 + list_for_each_entry_safe(evsel, tmp, list, node) 71 + 72 + /** 73 + * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels 74 + * @evlist: evlist instance to iterate 75 + * @evsel: struct evsel iterator 76 + * @tmp: struct evsel temp iterator 77 + */ 78 + #define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \ 79 + __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel) 84 80 85 81 static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) 86 82 {
+33 -11
tools/perf/lib/include/internal/mmap.h
··· 10 10 /* perf sample has 16 bits size limit */ 11 11 #define PERF_SAMPLE_MAX_SIZE (1 << 16) 12 12 13 + struct perf_mmap; 14 + 15 + typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); 16 + 13 17 /** 14 18 * struct perf_mmap - perf's ring buffer mmap details 15 19 * 16 20 * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this 17 21 */ 18 22 struct perf_mmap { 19 - void *base; 20 - int mask; 21 - int fd; 22 - int cpu; 23 - refcount_t refcnt; 24 - u64 prev; 25 - u64 start; 26 - u64 end; 27 - bool overwrite; 28 - u64 flush; 29 - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); 23 + void *base; 24 + int mask; 25 + int fd; 26 + int cpu; 27 + refcount_t refcnt; 28 + u64 prev; 29 + u64 start; 30 + u64 end; 31 + bool overwrite; 32 + u64 flush; 33 + libperf_unmap_cb_t unmap_cb; 34 + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); 30 35 }; 36 + 37 + struct perf_mmap_param { 38 + int prot; 39 + int mask; 40 + }; 41 + 42 + size_t perf_mmap__mmap_len(struct perf_mmap *map); 43 + 44 + void perf_mmap__init(struct perf_mmap *map, bool overwrite, 45 + libperf_unmap_cb_t unmap_cb); 46 + int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 47 + int fd, int cpu); 48 + void perf_mmap__munmap(struct perf_mmap *map); 49 + void perf_mmap__get(struct perf_mmap *map); 50 + void perf_mmap__put(struct perf_mmap *map); 51 + 52 + u64 perf_mmap__read_head(struct perf_mmap *map); 31 53 32 54 #endif /* __LIBPERF_INTERNAL_MMAP_H */
+2
tools/perf/lib/include/perf/core.h
··· 12 12 LIBPERF_WARN, 13 13 LIBPERF_INFO, 14 14 LIBPERF_DEBUG, 15 + LIBPERF_DEBUG2, 16 + LIBPERF_DEBUG3, 15 17 }; 16 18 17 19 typedef int (*libperf_print_fn_t)(enum libperf_print_level level,
+5
tools/perf/lib/include/perf/evlist.h
··· 32 32 struct perf_cpu_map *cpus, 33 33 struct perf_thread_map *threads); 34 34 LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); 35 + LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist, 36 + short revents_and_mask); 37 + 38 + LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages); 39 + LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist); 35 40 36 41 #endif /* __LIBPERF_EVLIST_H */
+15
tools/perf/lib/include/perf/mmap.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __LIBPERF_MMAP_H 3 + #define __LIBPERF_MMAP_H 4 + 5 + #include <perf/core.h> 6 + 7 + struct perf_mmap; 8 + union perf_event; 9 + 10 + LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); 11 + LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); 12 + LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); 13 + LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); 14 + 15 + #endif /* __LIBPERF_MMAP_H */
+2
tools/perf/lib/internal.h
··· 14 14 #define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__) 15 15 #define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__) 16 16 #define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__) 17 + #define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__) 18 + #define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__) 17 19 18 20 #endif /* __LIBPERF_INTERNAL_H */
+7
tools/perf/lib/libperf.map
··· 40 40 perf_evlist__next; 41 41 perf_evlist__set_maps; 42 42 perf_evlist__poll; 43 + perf_evlist__mmap; 44 + perf_evlist__munmap; 45 + perf_evlist__filter_pollfd; 46 + perf_mmap__consume; 47 + perf_mmap__read_init; 48 + perf_mmap__read_done; 49 + perf_mmap__read_event; 43 50 local: 44 51 *; 45 52 };
+273
tools/perf/lib/mmap.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <sys/mman.h> 3 + #include <inttypes.h> 4 + #include <asm/bug.h> 5 + #include <errno.h> 6 + #include <string.h> 7 + #include <linux/ring_buffer.h> 8 + #include <linux/perf_event.h> 9 + #include <perf/mmap.h> 10 + #include <perf/event.h> 11 + #include <internal/mmap.h> 12 + #include <internal/lib.h> 13 + #include <linux/kernel.h> 14 + #include "internal.h" 15 + 16 + void perf_mmap__init(struct perf_mmap *map, bool overwrite, 17 + libperf_unmap_cb_t unmap_cb) 18 + { 19 + map->fd = -1; 20 + map->overwrite = overwrite; 21 + map->unmap_cb = unmap_cb; 22 + refcount_set(&map->refcnt, 0); 23 + } 24 + 25 + size_t perf_mmap__mmap_len(struct perf_mmap *map) 26 + { 27 + return map->mask + 1 + page_size; 28 + } 29 + 30 + int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 31 + int fd, int cpu) 32 + { 33 + map->prev = 0; 34 + map->mask = mp->mask; 35 + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 36 + MAP_SHARED, fd, 0); 37 + if (map->base == MAP_FAILED) { 38 + map->base = NULL; 39 + return -1; 40 + } 41 + 42 + map->fd = fd; 43 + map->cpu = cpu; 44 + return 0; 45 + } 46 + 47 + void perf_mmap__munmap(struct perf_mmap *map) 48 + { 49 + if (map && map->base != NULL) { 50 + munmap(map->base, perf_mmap__mmap_len(map)); 51 + map->base = NULL; 52 + map->fd = -1; 53 + refcount_set(&map->refcnt, 0); 54 + } 55 + if (map && map->unmap_cb) 56 + map->unmap_cb(map); 57 + } 58 + 59 + void perf_mmap__get(struct perf_mmap *map) 60 + { 61 + refcount_inc(&map->refcnt); 62 + } 63 + 64 + void perf_mmap__put(struct perf_mmap *map) 65 + { 66 + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); 67 + 68 + if (refcount_dec_and_test(&map->refcnt)) 69 + perf_mmap__munmap(map); 70 + } 71 + 72 + static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) 73 + { 74 + ring_buffer_write_tail(md->base, tail); 75 + } 76 + 77 + u64 perf_mmap__read_head(struct perf_mmap *map) 78 + { 79 + return ring_buffer_read_head(map->base); 80 + } 81 + 82 + static bool perf_mmap__empty(struct perf_mmap *map) 83 + { 84 + struct perf_event_mmap_page *pc = map->base; 85 + 86 + return perf_mmap__read_head(map) == map->prev && !pc->aux_size; 87 + } 88 + 89 + void perf_mmap__consume(struct perf_mmap *map) 90 + { 91 + if (!map->overwrite) { 92 + u64 old = map->prev; 93 + 94 + perf_mmap__write_tail(map, old); 95 + } 96 + 97 + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) 98 + perf_mmap__put(map); 99 + } 100 + 101 + static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) 102 + { 103 + struct perf_event_header *pheader; 104 + u64 evt_head = *start; 105 + int size = mask + 1; 106 + 107 + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); 108 + pheader = (struct perf_event_header *)(buf + (*start & mask)); 109 + while (true) { 110 + if (evt_head - *start >= (unsigned int)size) { 111 + pr_debug("Finished reading overwrite ring buffer: rewind\n"); 112 + if (evt_head - *start > (unsigned int)size) 113 + evt_head -= pheader->size; 114 + *end = evt_head; 115 + return 0; 116 + } 117 + 118 + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 119 + 120 + if (pheader->size == 0) { 121 + pr_debug("Finished reading overwrite ring buffer: get start\n"); 122 + *end = evt_head; 123 + return 0; 124 + } 125 + 126 + evt_head += pheader->size; 127 + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 128 + } 129 + WARN_ONCE(1, "Shouldn't get here\n"); 130 + return -1; 131 + } 132 + 133 + /* 134 + * Report the start and end of the available data in ringbuffer 135 + */ 136 + static int __perf_mmap__read_init(struct perf_mmap *md) 137 + { 138 + u64 head = perf_mmap__read_head(md); 139 + u64 old = md->prev; 140 + unsigned char *data = md->base + page_size; 141 + unsigned long size; 142 + 143 + md->start = md->overwrite ? head : old; 144 + md->end = md->overwrite ? old : head; 145 + 146 + if ((md->end - md->start) < md->flush) 147 + return -EAGAIN; 148 + 149 + size = md->end - md->start; 150 + if (size > (unsigned long)(md->mask) + 1) { 151 + if (!md->overwrite) { 152 + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 153 + 154 + md->prev = head; 155 + perf_mmap__consume(md); 156 + return -EAGAIN; 157 + } 158 + 159 + /* 160 + * Backward ring buffer is full. We still have a chance to read 161 + * most of data from it. 162 + */ 163 + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) 164 + return -EINVAL; 165 + } 166 + 167 + return 0; 168 + } 169 + 170 + int perf_mmap__read_init(struct perf_mmap *map) 171 + { 172 + /* 173 + * Check if event was unmapped due to a POLLHUP/POLLERR. 174 + */ 175 + if (!refcount_read(&map->refcnt)) 176 + return -ENOENT; 177 + 178 + return __perf_mmap__read_init(map); 179 + } 180 + 181 + /* 182 + * Mandatory for overwrite mode 183 + * The direction of overwrite mode is backward. 184 + * The last perf_mmap__read() will set tail to map->core.prev. 185 + * Need to correct the map->core.prev to head which is the end of next read. 186 + */ 187 + void perf_mmap__read_done(struct perf_mmap *map) 188 + { 189 + /* 190 + * Check if event was unmapped due to a POLLHUP/POLLERR. 191 + */ 192 + if (!refcount_read(&map->refcnt)) 193 + return; 194 + 195 + map->prev = perf_mmap__read_head(map); 196 + } 197 + 198 + /* When check_messup is true, 'end' must points to a good entry */ 199 + static union perf_event *perf_mmap__read(struct perf_mmap *map, 200 + u64 *startp, u64 end) 201 + { 202 + unsigned char *data = map->base + page_size; 203 + union perf_event *event = NULL; 204 + int diff = end - *startp; 205 + 206 + if (diff >= (int)sizeof(event->header)) { 207 + size_t size; 208 + 209 + event = (union perf_event *)&data[*startp & map->mask]; 210 + size = event->header.size; 211 + 212 + if (size < sizeof(event->header) || diff < (int)size) 213 + return NULL; 214 + 215 + /* 216 + * Event straddles the mmap boundary -- header should always 217 + * be inside due to u64 alignment of output. 218 + */ 219 + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { 220 + unsigned int offset = *startp; 221 + unsigned int len = min(sizeof(*event), size), cpy; 222 + void *dst = map->event_copy; 223 + 224 + do { 225 + cpy = min(map->mask + 1 - (offset & map->mask), len); 226 + memcpy(dst, &data[offset & map->mask], cpy); 227 + offset += cpy; 228 + dst += cpy; 229 + len -= cpy; 230 + } while (len); 231 + 232 + event = (union perf_event *)map->event_copy; 233 + } 234 + 235 + *startp += size; 236 + } 237 + 238 + return event; 239 + } 240 + 241 + /* 242 + * Read event from ring buffer one by one. 243 + * Return one event for each call. 244 + * 245 + * Usage: 246 + * perf_mmap__read_init() 247 + * while(event = perf_mmap__read_event()) { 248 + * //process the event 249 + * perf_mmap__consume() 250 + * } 251 + * perf_mmap__read_done() 252 + */ 253 + union perf_event *perf_mmap__read_event(struct perf_mmap *map) 254 + { 255 + union perf_event *event; 256 + 257 + /* 258 + * Check if event was unmapped due to a POLLHUP/POLLERR. 259 + */ 260 + if (!refcount_read(&map->refcnt)) 261 + return NULL; 262 + 263 + /* non-overwirte doesn't pause the ringbuffer */ 264 + if (!map->overwrite) 265 + map->end = perf_mmap__read_head(map); 266 + 267 + event = perf_mmap__read(map, &map->start, map->end); 268 + 269 + if (!map->overwrite) 270 + map->prev = map->start; 271 + 272 + return event; 273 + }
+4 -2
tools/perf/perf-sys.h
··· 15 15 void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, 16 16 int fd, int group_fd, unsigned long flags); 17 17 18 - #define HAVE_ATTR_TEST 18 + #ifndef HAVE_ATTR_TEST 19 + #define HAVE_ATTR_TEST 1 20 + #endif 19 21 20 22 static inline int 21 23 sys_perf_event_open(struct perf_event_attr *attr, ··· 29 27 fd = syscall(__NR_perf_event_open, attr, pid, cpu, 30 28 group_fd, flags); 31 29 32 - #ifdef HAVE_ATTR_TEST 30 + #if HAVE_ATTR_TEST 33 31 if (unlikely(test_attr__enabled)) 34 32 test_attr__open(attr, pid, cpu, fd, group_fd, flags); 35 33 #endif
+1531 -24
tools/perf/scripts/python/exported-sql-viewer.py
··· 105 105 glb_nsz = 16 106 106 import re 107 107 import os 108 + import random 109 + import copy 110 + import math 108 111 109 112 pyside_version_1 = True 110 113 if not "--pyside-version-1" in sys.argv: ··· 341 338 if model is None: 342 339 model = create_fn() 343 340 model_cache[model_name] = model 341 + model_cache_lock.release() 342 + return model 343 + 344 + def LookupModel(model_name): 345 + model_cache_lock.acquire() 346 + try: 347 + model = model_cache[model_name] 348 + except: 349 + model = None 344 350 model_cache_lock.release() 345 351 return model 346 352 ··· 797 785 798 786 class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): 799 787 800 - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): 788 + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): 801 789 super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) 802 790 self.comm_id = comm_id 803 791 self.thread_id = thread_id 804 792 self.calls_id = calls_id 793 + self.call_time = call_time 794 + self.time = time 805 795 self.insn_cnt = insn_cnt 806 796 self.cyc_cnt = cyc_cnt 807 797 self.branch_count = branch_count 808 - self.time = time 809 798 810 799 def Select(self): 811 800 self.query_done = True ··· 843 830 844 831 class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): 845 832 846 - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): 847 - super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) 833 + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): 834 + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item) 848 835 dso = dsoname(dso) 849 836 if self.params.have_ipc: 850 837 insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) 851 838 cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) 852 839 br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) 853 840 ipc = CalcIPC(cyc_cnt, insn_cnt) 854 - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] 841 + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] 855 842 else: 856 - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 843 + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 857 844 self.dbid = calls_id 858 845 859 846 # Call tree data model level two item ··· 861 848 class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): 862 849 863 850 def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): 864 - super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) 851 + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, 0, parent_item) 865 852 if self.params.have_ipc: 866 853 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] 867 854 else: ··· 984 971 ids.insert(0, query.value(1)) 985 972 return ids 986 973 987 - # Vertical widget layout 974 + # Vertical layout 975 + 976 + class HBoxLayout(QHBoxLayout): 977 + 978 + def __init__(self, *children): 979 + super(HBoxLayout, self).__init__() 980 + 981 + self.layout().setContentsMargins(0, 0, 0, 0) 982 + for child in children: 983 + if child.isWidgetType(): 984 + self.layout().addWidget(child) 985 + else: 986 + self.layout().addLayout(child) 987 + 988 + # Horizontal layout 989 + 990 + class VBoxLayout(QVBoxLayout): 991 + 992 + def __init__(self, *children): 993 + super(VBoxLayout, self).__init__() 994 + 995 + self.layout().setContentsMargins(0, 0, 0, 0) 996 + for child in children: 997 + if child.isWidgetType(): 998 + self.layout().addWidget(child) 999 + else: 1000 + self.layout().addLayout(child) 1001 + 1002 + # Vertical layout widget 988 1003 989 1004 class VBox(): 990 1005 991 - def __init__(self, w1, w2, w3=None): 1006 + def __init__(self, *children): 992 1007 self.vbox = QWidget() 993 - self.vbox.setLayout(QVBoxLayout()) 994 - 995 - self.vbox.layout().setContentsMargins(0, 0, 0, 0) 996 - 997 - self.vbox.layout().addWidget(w1) 998 - self.vbox.layout().addWidget(w2) 999 - if w3: 1000 - self.vbox.layout().addWidget(w3) 1008 + self.vbox.setLayout(VBoxLayout(*children)) 1001 1009 1002 1010 def Widget(self): 1003 1011 return self.vbox ··· 1097 1063 1098 1064 class CallTreeWindow(TreeWindowBase): 1099 1065 1100 - def __init__(self, glb, parent=None): 1066 + def __init__(self, glb, parent=None, thread_at_time=None): 1101 1067 super(CallTreeWindow, self).__init__(parent) 1102 1068 1103 1069 self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x)) ··· 1114 1080 self.setWidget(self.vbox.Widget()) 1115 1081 1116 1082 AddSubWindow(glb.mainwindow.mdi_area, self, "Call Tree") 1083 + 1084 + if thread_at_time: 1085 + self.DisplayThreadAtTime(*thread_at_time) 1086 + 1087 + def DisplayThreadAtTime(self, comm_id, thread_id, time): 1088 + parent = QModelIndex() 1089 + for dbid in (comm_id, thread_id): 1090 + found = False 1091 + n = self.model.rowCount(parent) 1092 + for row in xrange(n): 1093 + child = self.model.index(row, 0, parent) 1094 + if child.internalPointer().dbid == dbid: 1095 + found = True 1096 + self.view.setCurrentIndex(child) 1097 + parent = child 1098 + break 1099 + if not found: 1100 + return 1101 + found = False 1102 + while True: 1103 + n = self.model.rowCount(parent) 1104 + if not n: 1105 + return 1106 + last_child = None 1107 + for row in xrange(n): 1108 + child = self.model.index(row, 0, parent) 1109 + child_call_time = child.internalPointer().call_time 1110 + if child_call_time < time: 1111 + last_child = child 1112 + elif child_call_time == time: 1113 + self.view.setCurrentIndex(child) 1114 + return 1115 + elif child_call_time > time: 1116 + break 1117 + if not last_child: 1118 + if not found: 1119 + child = self.model.index(0, 0, parent) 1120 + self.view.setCurrentIndex(child) 1121 + return 1122 + found = True 1123 + self.view.setCurrentIndex(last_child) 1124 + parent = last_child 1125 + 1126 + # ExecComm() gets the comm_id of the command string that was set when the process exec'd i.e. the program name 1127 + 1128 + def ExecComm(db, thread_id, time): 1129 + query = QSqlQuery(db) 1130 + QueryExec(query, "SELECT comm_threads.comm_id, comms.c_time, comms.exec_flag" 1131 + " FROM comm_threads" 1132 + " INNER JOIN comms ON comms.id = comm_threads.comm_id" 1133 + " WHERE comm_threads.thread_id = " + str(thread_id) + 1134 + " ORDER BY comms.c_time, comms.id") 1135 + first = None 1136 + last = None 1137 + while query.next(): 1138 + if first is None: 1139 + first = query.value(0) 1140 + if query.value(2) and Decimal(query.value(1)) <= Decimal(time): 1141 + last = query.value(0) 1142 + if not(last is None): 1143 + return last 1144 + return first 1145 + 1146 + # Container for (x, y) data 1147 + 1148 + class XY(): 1149 + def __init__(self, x=0, y=0): 1150 + self.x = x 1151 + self.y = y 1152 + 1153 + def __str__(self): 1154 + return "XY({}, {})".format(str(self.x), str(self.y)) 1155 + 1156 + # Container for sub-range data 1157 + 1158 + class Subrange(): 1159 + def __init__(self, lo=0, hi=0): 1160 + self.lo = lo 1161 + self.hi = hi 1162 + 1163 + def __str__(self): 1164 + return "Subrange({}, {})".format(str(self.lo), str(self.hi)) 1165 + 1166 + # Graph data region base class 1167 + 1168 + class GraphDataRegion(object): 1169 + 1170 + def __init__(self, key, title = "", ordinal = ""): 1171 + self.key = key 1172 + self.title = title 1173 + self.ordinal = ordinal 1174 + 1175 + # Function to sort GraphDataRegion 1176 + 1177 + def GraphDataRegionOrdinal(data_region): 1178 + return data_region.ordinal 1179 + 1180 + # Attributes for a graph region 1181 + 1182 + class GraphRegionAttribute(): 1183 + 1184 + def __init__(self, colour): 1185 + self.colour = colour 1186 + 1187 + # Switch graph data region represents a task 1188 + 1189 + class SwitchGraphDataRegion(GraphDataRegion): 1190 + 1191 + def __init__(self, key, exec_comm_id, pid, tid, comm, thread_id, comm_id): 1192 + super(SwitchGraphDataRegion, self).__init__(key) 1193 + 1194 + self.title = str(pid) + " / " + str(tid) + " " + comm 1195 + # Order graph legend within exec comm by pid / tid / time 1196 + self.ordinal = str(pid).rjust(16) + str(exec_comm_id).rjust(8) + str(tid).rjust(16) 1197 + self.exec_comm_id = exec_comm_id 1198 + self.pid = pid 1199 + self.tid = tid 1200 + self.comm = comm 1201 + self.thread_id = thread_id 1202 + self.comm_id = comm_id 1203 + 1204 + # Graph data point 1205 + 1206 + class GraphDataPoint(): 1207 + 1208 + def __init__(self, data, index, x, y, altx=None, alty=None, hregion=None, vregion=None): 1209 + self.data = data 1210 + self.index = index 1211 + self.x = x 1212 + self.y = y 1213 + self.altx = altx 1214 + self.alty = alty 1215 + self.hregion = hregion 1216 + self.vregion = vregion 1217 + 1218 + # Graph data (single graph) base class 1219 + 1220 + class GraphData(object): 1221 + 1222 + def __init__(self, collection, xbase=Decimal(0), ybase=Decimal(0)): 1223 + self.collection = collection 1224 + self.points = [] 1225 + self.xbase = xbase 1226 + self.ybase = ybase 1227 + self.title = "" 1228 + 1229 + def AddPoint(self, x, y, altx=None, alty=None, hregion=None, vregion=None): 1230 + index = len(self.points) 1231 + 1232 + x = float(Decimal(x) - self.xbase) 1233 + y = float(Decimal(y) - self.ybase) 1234 + 1235 + self.points.append(GraphDataPoint(self, index, x, y, altx, alty, hregion, vregion)) 1236 + 1237 + def XToData(self, x): 1238 + return Decimal(x) + self.xbase 1239 + 1240 + def YToData(self, y): 1241 + return Decimal(y) + self.ybase 1242 + 1243 + # Switch graph data (for one CPU) 1244 + 1245 + class SwitchGraphData(GraphData): 1246 + 1247 + def __init__(self, db, collection, cpu, xbase): 1248 + super(SwitchGraphData, self).__init__(collection, xbase) 1249 + 1250 + self.cpu = cpu 1251 + self.title = "CPU " + str(cpu) 1252 + self.SelectSwitches(db) 1253 + 1254 + def SelectComms(self, db, thread_id, last_comm_id, start_time, end_time): 1255 + query = QSqlQuery(db) 1256 + QueryExec(query, "SELECT id, c_time" 1257 + " FROM comms" 1258 + " WHERE c_thread_id = " + str(thread_id) + 1259 + " AND exec_flag = TRUE" 1260 + " AND c_time >= " + str(start_time) + 1261 + " AND c_time <= " + str(end_time) + 1262 + " ORDER BY c_time, id") 1263 + while query.next(): 1264 + comm_id = query.value(0) 1265 + if comm_id == last_comm_id: 1266 + continue 1267 + time = query.value(1) 1268 + hregion = self.HRegion(db, thread_id, comm_id, time) 1269 + self.AddPoint(time, 1000, None, None, hregion) 1270 + 1271 + def SelectSwitches(self, db): 1272 + last_time = None 1273 + last_comm_id = None 1274 + last_thread_id = None 1275 + query = QSqlQuery(db) 1276 + QueryExec(query, "SELECT time, thread_out_id, thread_in_id, comm_out_id, comm_in_id, flags" 1277 + " FROM context_switches" 1278 + " WHERE machine_id = " + str(self.collection.machine_id) + 1279 + " AND cpu = " + str(self.cpu) + 1280 + " ORDER BY time, id") 1281 + while query.next(): 1282 + flags = int(query.value(5)) 1283 + if flags & 1: 1284 + # Schedule-out: detect and add exec's 1285 + if last_thread_id == query.value(1) and last_comm_id is not None and last_comm_id != query.value(3): 1286 + self.SelectComms(db, last_thread_id, last_comm_id, last_time, query.value(0)) 1287 + continue 1288 + # Schedule-in: add data point 1289 + if len(self.points) == 0: 1290 + start_time = self.collection.glb.StartTime(self.collection.machine_id) 1291 + hregion = self.HRegion(db, query.value(1), query.value(3), start_time) 1292 + self.AddPoint(start_time, 1000, None, None, hregion) 1293 + time = query.value(0) 1294 + comm_id = query.value(4) 1295 + thread_id = query.value(2) 1296 + hregion = self.HRegion(db, thread_id, comm_id, time) 1297 + self.AddPoint(time, 1000, None, None, hregion) 1298 + last_time = time 1299 + last_comm_id = comm_id 1300 + last_thread_id = thread_id 1301 + 1302 + def NewHRegion(self, db, key, thread_id, comm_id, time): 1303 + exec_comm_id = ExecComm(db, thread_id, time) 1304 + query = QSqlQuery(db) 1305 + QueryExec(query, "SELECT pid, tid FROM threads WHERE id = " + str(thread_id)) 1306 + if query.next(): 1307 + pid = query.value(0) 1308 + tid = query.value(1) 1309 + else: 1310 + pid = -1 1311 + tid = -1 1312 + query = QSqlQuery(db) 1313 + QueryExec(query, "SELECT comm FROM comms WHERE id = " + str(comm_id)) 1314 + if query.next(): 1315 + comm = query.value(0) 1316 + else: 1317 + comm = "" 1318 + return SwitchGraphDataRegion(key, exec_comm_id, pid, tid, comm, thread_id, comm_id) 1319 + 1320 + def HRegion(self, db, thread_id, comm_id, time): 1321 + key = str(thread_id) + ":" + str(comm_id) 1322 + hregion = self.collection.LookupHRegion(key) 1323 + if hregion is None: 1324 + hregion = self.NewHRegion(db, key, thread_id, comm_id, time) 1325 + self.collection.AddHRegion(key, hregion) 1326 + return hregion 1327 + 1328 + # Graph data collection (multiple related graphs) base class 1329 + 1330 + class GraphDataCollection(object): 1331 + 1332 + def __init__(self, glb): 1333 + self.glb = glb 1334 + self.data = [] 1335 + self.hregions = {} 1336 + self.xrangelo = None 1337 + self.xrangehi = None 1338 + self.yrangelo = None 1339 + self.yrangehi = None 1340 + self.dp = XY(0, 0) 1341 + 1342 + def AddGraphData(self, data): 1343 + self.data.append(data) 1344 + 1345 + def LookupHRegion(self, key): 1346 + if key in self.hregions: 1347 + return self.hregions[key] 1348 + return None 1349 + 1350 + def AddHRegion(self, key, hregion): 1351 + self.hregions[key] = hregion 1352 + 1353 + # Switch graph data collection (SwitchGraphData for each CPU) 1354 + 1355 + class SwitchGraphDataCollection(GraphDataCollection): 1356 + 1357 + def __init__(self, glb, db, machine_id): 1358 + super(SwitchGraphDataCollection, self).__init__(glb) 1359 + 1360 + self.machine_id = machine_id 1361 + self.cpus = self.SelectCPUs(db) 1362 + 1363 + self.xrangelo = glb.StartTime(machine_id) 1364 + self.xrangehi = glb.FinishTime(machine_id) 1365 + 1366 + self.yrangelo = Decimal(0) 1367 + self.yrangehi = Decimal(1000) 1368 + 1369 + for cpu in self.cpus: 1370 + self.AddGraphData(SwitchGraphData(db, self, cpu, self.xrangelo)) 1371 + 1372 + def SelectCPUs(self, db): 1373 + cpus = [] 1374 + query = QSqlQuery(db) 1375 + QueryExec(query, "SELECT DISTINCT cpu" 1376 + " FROM context_switches" 1377 + " WHERE machine_id = " + str(self.machine_id)) 1378 + while query.next(): 1379 + cpus.append(int(query.value(0))) 1380 + return sorted(cpus) 1381 + 1382 + # Switch graph data graphics item displays the graphed data 1383 + 1384 + class SwitchGraphDataGraphicsItem(QGraphicsItem): 1385 + 1386 + def __init__(self, data, graph_width, graph_height, attrs, event_handler, parent=None): 1387 + super(SwitchGraphDataGraphicsItem, self).__init__(parent) 1388 + 1389 + self.data = data 1390 + self.graph_width = graph_width 1391 + self.graph_height = graph_height 1392 + self.attrs = attrs 1393 + self.event_handler = event_handler 1394 + self.setAcceptHoverEvents(True) 1395 + 1396 + def boundingRect(self): 1397 + return QRectF(0, 0, self.graph_width, self.graph_height) 1398 + 1399 + def PaintPoint(self, painter, last, x): 1400 + if not(last is None or last.hregion.pid == 0 or x < self.attrs.subrange.x.lo): 1401 + if last.x < self.attrs.subrange.x.lo: 1402 + x0 = self.attrs.subrange.x.lo 1403 + else: 1404 + x0 = last.x 1405 + if x > self.attrs.subrange.x.hi: 1406 + x1 = self.attrs.subrange.x.hi 1407 + else: 1408 + x1 = x - 1 1409 + x0 = self.attrs.XToPixel(x0) 1410 + x1 = self.attrs.XToPixel(x1) 1411 + 1412 + y0 = self.attrs.YToPixel(last.y) 1413 + 1414 + colour = self.attrs.region_attributes[last.hregion.key].colour 1415 + 1416 + width = x1 - x0 + 1 1417 + if width < 2: 1418 + painter.setPen(colour) 1419 + painter.drawLine(x0, self.graph_height - y0, x0, self.graph_height) 1420 + else: 1421 + painter.fillRect(x0, self.graph_height - y0, width, self.graph_height - 1, colour) 1422 + 1423 + def paint(self, painter, option, widget): 1424 + last = None 1425 + for point in self.data.points: 1426 + self.PaintPoint(painter, last, point.x) 1427 + if point.x > self.attrs.subrange.x.hi: 1428 + break; 1429 + last = point 1430 + self.PaintPoint(painter, last, self.attrs.subrange.x.hi + 1) 1431 + 1432 + def BinarySearchPoint(self, target): 1433 + lower_pos = 0 1434 + higher_pos = len(self.data.points) 1435 + while True: 1436 + pos = int((lower_pos + higher_pos) / 2) 1437 + val = self.data.points[pos].x 1438 + if target >= val: 1439 + lower_pos = pos 1440 + else: 1441 + higher_pos = pos 1442 + if higher_pos <= lower_pos + 1: 1443 + return lower_pos 1444 + 1445 + def XPixelToData(self, x): 1446 + x = self.attrs.PixelToX(x) 1447 + if x < self.data.points[0].x: 1448 + x = 0 1449 + pos = 0 1450 + low = True 1451 + else: 1452 + pos = self.BinarySearchPoint(x) 1453 + low = False 1454 + return (low, pos, self.data.XToData(x)) 1455 + 1456 + def EventToData(self, event): 1457 + no_data = (None,) * 4 1458 + if len(self.data.points) < 1: 1459 + return no_data 1460 + x = event.pos().x() 1461 + if x < 0: 1462 + return no_data 1463 + low0, pos0, time_from = self.XPixelToData(x) 1464 + low1, pos1, time_to = self.XPixelToData(x + 1) 1465 + hregions = set() 1466 + hregion_times = [] 1467 + if not low1: 1468 + for i in xrange(pos0, pos1 + 1): 1469 + hregion = self.data.points[i].hregion 1470 + hregions.add(hregion) 1471 + if i == pos0: 1472 + time = time_from 1473 + else: 1474 + time = self.data.XToData(self.data.points[i].x) 1475 + hregion_times.append((hregion, time)) 1476 + return (time_from, time_to, hregions, hregion_times) 1477 + 1478 + def hoverMoveEvent(self, event): 1479 + time_from, time_to, hregions, hregion_times = self.EventToData(event) 1480 + if time_from is not None: 1481 + self.event_handler.PointEvent(self.data.cpu, time_from, time_to, hregions) 1482 + 1483 + def hoverLeaveEvent(self, event): 1484 + self.event_handler.NoPointEvent() 1485 + 1486 + def mousePressEvent(self, event): 1487 + if event.button() != Qt.RightButton: 1488 + super(SwitchGraphDataGraphicsItem, self).mousePressEvent(event) 1489 + return 1490 + time_from, time_to, hregions, hregion_times = self.EventToData(event) 1491 + if hregion_times: 1492 + self.event_handler.RightClickEvent(self.data.cpu, hregion_times, event.screenPos()) 1493 + 1494 + # X-axis graphics item 1495 + 1496 + class XAxisGraphicsItem(QGraphicsItem): 1497 + 1498 + def __init__(self, width, parent=None): 1499 + super(XAxisGraphicsItem, self).__init__(parent) 1500 + 1501 + self.width = width 1502 + self.max_mark_sz = 4 1503 + self.height = self.max_mark_sz + 1 1504 + 1505 + def boundingRect(self): 1506 + return QRectF(0, 0, self.width, self.height) 1507 + 1508 + def Step(self): 1509 + attrs = self.parentItem().attrs 1510 + subrange = attrs.subrange.x 1511 + t = subrange.hi - subrange.lo 1512 + s = (3.0 * t) / self.width 1513 + n = 1.0 1514 + while s > n: 1515 + n = n * 10.0 1516 + return n 1517 + 1518 + def PaintMarks(self, painter, at_y, lo, hi, step, i): 1519 + attrs = self.parentItem().attrs 1520 + x = lo 1521 + while x <= hi: 1522 + xp = attrs.XToPixel(x) 1523 + if i % 10: 1524 + if i % 5: 1525 + sz = 1 1526 + else: 1527 + sz = 2 1528 + else: 1529 + sz = self.max_mark_sz 1530 + i = 0 1531 + painter.drawLine(xp, at_y, xp, at_y + sz) 1532 + x += step 1533 + i += 1 1534 + 1535 + def paint(self, painter, option, widget): 1536 + # Using QPainter::drawLine(int x1, int y1, int x2, int y2) so x2 = width -1 1537 + painter.drawLine(0, 0, self.width - 1, 0) 1538 + n = self.Step() 1539 + attrs = self.parentItem().attrs 1540 + subrange = attrs.subrange.x 1541 + if subrange.lo: 1542 + x_offset = n - (subrange.lo % n) 1543 + else: 1544 + x_offset = 0.0 1545 + x = subrange.lo + x_offset 1546 + i = (x / n) % 10 1547 + self.PaintMarks(painter, 0, x, subrange.hi, n, i) 1548 + 1549 + def ScaleDimensions(self): 1550 + n = self.Step() 1551 + attrs = self.parentItem().attrs 1552 + lo = attrs.subrange.x.lo 1553 + hi = (n * 10.0) + lo 1554 + width = attrs.XToPixel(hi) 1555 + if width > 500: 1556 + width = 0 1557 + return (n, lo, hi, width) 1558 + 1559 + def PaintScale(self, painter, at_x, at_y): 1560 + n, lo, hi, width = self.ScaleDimensions() 1561 + if not width: 1562 + return 1563 + painter.drawLine(at_x, at_y, at_x + width, at_y) 1564 + self.PaintMarks(painter, at_y, lo, hi, n, 0) 1565 + 1566 + def ScaleWidth(self): 1567 + n, lo, hi, width = self.ScaleDimensions() 1568 + return width 1569 + 1570 + def ScaleHeight(self): 1571 + return self.height 1572 + 1573 + def ScaleUnit(self): 1574 + return self.Step() * 10 1575 + 1576 + # Scale graphics item base class 1577 + 1578 + class ScaleGraphicsItem(QGraphicsItem): 1579 + 1580 + def __init__(self, axis, parent=None): 1581 + super(ScaleGraphicsItem, self).__init__(parent) 1582 + self.axis = axis 1583 + 1584 + def boundingRect(self): 1585 + scale_width = self.axis.ScaleWidth() 1586 + if not scale_width: 1587 + return QRectF() 1588 + return QRectF(0, 0, self.axis.ScaleWidth() + 100, self.axis.ScaleHeight()) 1589 + 1590 + def paint(self, painter, option, widget): 1591 + scale_width = self.axis.ScaleWidth() 1592 + if not scale_width: 1593 + return 1594 + self.axis.PaintScale(painter, 0, 5) 1595 + x = scale_width + 4 1596 + painter.drawText(QPointF(x, 10), self.Text()) 1597 + 1598 + def Unit(self): 1599 + return self.axis.ScaleUnit() 1600 + 1601 + def Text(self): 1602 + return "" 1603 + 1604 + # Switch graph scale graphics item 1605 + 1606 + class SwitchScaleGraphicsItem(ScaleGraphicsItem): 1607 + 1608 + def __init__(self, axis, parent=None): 1609 + super(SwitchScaleGraphicsItem, self).__init__(axis, parent) 1610 + 1611 + def Text(self): 1612 + unit = self.Unit() 1613 + if unit >= 1000000000: 1614 + unit = int(unit / 1000000000) 1615 + us = "s" 1616 + elif unit >= 1000000: 1617 + unit = int(unit / 1000000) 1618 + us = "ms" 1619 + elif unit >= 1000: 1620 + unit = int(unit / 1000) 1621 + us = "us" 1622 + else: 1623 + unit = int(unit) 1624 + us = "ns" 1625 + return " = " + str(unit) + " " + us 1626 + 1627 + # Switch graph graphics item contains graph title, scale, x/y-axis, and the graphed data 1628 + 1629 + class SwitchGraphGraphicsItem(QGraphicsItem): 1630 + 1631 + def __init__(self, collection, data, attrs, event_handler, first, parent=None): 1632 + super(SwitchGraphGraphicsItem, self).__init__(parent) 1633 + self.collection = collection 1634 + self.data = data 1635 + self.attrs = attrs 1636 + self.event_handler = event_handler 1637 + 1638 + margin = 20 1639 + title_width = 50 1640 + 1641 + self.title_graphics = QGraphicsSimpleTextItem(data.title, self) 1642 + 1643 + self.title_graphics.setPos(margin, margin) 1644 + graph_width = attrs.XToPixel(attrs.subrange.x.hi) + 1 1645 + graph_height = attrs.YToPixel(attrs.subrange.y.hi) + 1 1646 + 1647 + self.graph_origin_x = margin + title_width + margin 1648 + self.graph_origin_y = graph_height + margin 1649 + 1650 + x_axis_size = 1 1651 + y_axis_size = 1 1652 + self.yline = QGraphicsLineItem(0, 0, 0, graph_height, self) 1653 + 1654 + self.x_axis = XAxisGraphicsItem(graph_width, self) 1655 + self.x_axis.setPos(self.graph_origin_x, self.graph_origin_y + 1) 1656 + 1657 + if first: 1658 + self.scale_item = SwitchScaleGraphicsItem(self.x_axis, self) 1659 + self.scale_item.setPos(self.graph_origin_x, self.graph_origin_y + 10) 1660 + 1661 + self.yline.setPos(self.graph_origin_x - y_axis_size, self.graph_origin_y - graph_height) 1662 + 1663 + self.axis_point = QGraphicsLineItem(0, 0, 0, 0, self) 1664 + self.axis_point.setPos(self.graph_origin_x - 1, self.graph_origin_y +1) 1665 + 1666 + self.width = self.graph_origin_x + graph_width + margin 1667 + self.height = self.graph_origin_y + margin 1668 + 1669 + self.graph = SwitchGraphDataGraphicsItem(data, graph_width, graph_height, attrs, event_handler, self) 1670 + self.graph.setPos(self.graph_origin_x, self.graph_origin_y - graph_height) 1671 + 1672 + if parent and 'EnableRubberBand' in dir(parent): 1673 + parent.EnableRubberBand(self.graph_origin_x, self.graph_origin_x + graph_width - 1, self) 1674 + 1675 + def boundingRect(self): 1676 + return QRectF(0, 0, self.width, self.height) 1677 + 1678 + def paint(self, painter, option, widget): 1679 + pass 1680 + 1681 + def RBXToPixel(self, x): 1682 + return self.attrs.PixelToX(x - self.graph_origin_x) 1683 + 1684 + def RBXRangeToPixel(self, x0, x1): 1685 + return (self.RBXToPixel(x0), self.RBXToPixel(x1 + 1)) 1686 + 1687 + def RBPixelToTime(self, x): 1688 + if x < self.data.points[0].x: 1689 + return self.data.XToData(0) 1690 + return self.data.XToData(x) 1691 + 1692 + def RBEventTimes(self, x0, x1): 1693 + x0, x1 = self.RBXRangeToPixel(x0, x1) 1694 + time_from = self.RBPixelToTime(x0) 1695 + time_to = self.RBPixelToTime(x1) 1696 + return (time_from, time_to) 1697 + 1698 + def RBEvent(self, x0, x1): 1699 + time_from, time_to = self.RBEventTimes(x0, x1) 1700 + self.event_handler.RangeEvent(time_from, time_to) 1701 + 1702 + def RBMoveEvent(self, x0, x1): 1703 + if x1 < x0: 1704 + x0, x1 = x1, x0 1705 + self.RBEvent(x0, x1) 1706 + 1707 + def RBReleaseEvent(self, x0, x1, selection_state): 1708 + if x1 < x0: 1709 + x0, x1 = x1, x0 1710 + x0, x1 = self.RBXRangeToPixel(x0, x1) 1711 + self.event_handler.SelectEvent(x0, x1, selection_state) 1712 + 1713 + # Graphics item to draw a vertical bracket (used to highlight "forward" sub-range) 1714 + 1715 + class VerticalBracketGraphicsItem(QGraphicsItem): 1716 + 1717 + def __init__(self, parent=None): 1718 + super(VerticalBracketGraphicsItem, self).__init__(parent) 1719 + 1720 + self.width = 0 1721 + self.height = 0 1722 + self.hide() 1723 + 1724 + def SetSize(self, width, height): 1725 + self.width = width + 1 1726 + self.height = height + 1 1727 + 1728 + def boundingRect(self): 1729 + return QRectF(0, 0, self.width, self.height) 1730 + 1731 + def paint(self, painter, option, widget): 1732 + colour = QColor(255, 255, 0, 32) 1733 + painter.fillRect(0, 0, self.width, self.height, colour) 1734 + x1 = self.width - 1 1735 + y1 = self.height - 1 1736 + painter.drawLine(0, 0, x1, 0) 1737 + painter.drawLine(0, 0, 0, 3) 1738 + painter.drawLine(x1, 0, x1, 3) 1739 + painter.drawLine(0, y1, x1, y1) 1740 + painter.drawLine(0, y1, 0, y1 - 3) 1741 + painter.drawLine(x1, y1, x1, y1 - 3) 1742 + 1743 + # Graphics item to contain graphs arranged vertically 1744 + 1745 + class VertcalGraphSetGraphicsItem(QGraphicsItem): 1746 + 1747 + def __init__(self, collection, attrs, event_handler, child_class, parent=None): 1748 + super(VertcalGraphSetGraphicsItem, self).__init__(parent) 1749 + 1750 + self.collection = collection 1751 + 1752 + self.top = 10 1753 + 1754 + self.width = 0 1755 + self.height = self.top 1756 + 1757 + self.rubber_band = None 1758 + self.rb_enabled = False 1759 + 1760 + first = True 1761 + for data in collection.data: 1762 + child = child_class(collection, data, attrs, event_handler, first, self) 1763 + child.setPos(0, self.height + 1) 1764 + rect = child.boundingRect() 1765 + if rect.right() > self.width: 1766 + self.width = rect.right() 1767 + self.height = self.height + rect.bottom() + 1 1768 + first = False 1769 + 1770 + self.bracket = VerticalBracketGraphicsItem(self) 1771 + 1772 + def EnableRubberBand(self, xlo, xhi, rb_event_handler): 1773 + if self.rb_enabled: 1774 + return 1775 + self.rb_enabled = True 1776 + self.rb_in_view = False 1777 + self.setAcceptedMouseButtons(Qt.LeftButton) 1778 + self.rb_xlo = xlo 1779 + self.rb_xhi = xhi 1780 + self.rb_event_handler = rb_event_handler 1781 + self.mousePressEvent = self.MousePressEvent 1782 + self.mouseMoveEvent = self.MouseMoveEvent 1783 + self.mouseReleaseEvent = self.MouseReleaseEvent 1784 + 1785 + def boundingRect(self): 1786 + return QRectF(0, 0, self.width, self.height) 1787 + 1788 + def paint(self, painter, option, widget): 1789 + pass 1790 + 1791 + def RubberBandParent(self): 1792 + scene = self.scene() 1793 + view = scene.views()[0] 1794 + viewport = view.viewport() 1795 + return viewport 1796 + 1797 + def RubberBandSetGeometry(self, rect): 1798 + scene_rectf = self.mapRectToScene(QRectF(rect)) 1799 + scene = self.scene() 1800 + view = scene.views()[0] 1801 + poly = view.mapFromScene(scene_rectf) 1802 + self.rubber_band.setGeometry(poly.boundingRect()) 1803 + 1804 + def SetSelection(self, selection_state): 1805 + if self.rubber_band: 1806 + if selection_state: 1807 + self.RubberBandSetGeometry(selection_state) 1808 + self.rubber_band.show() 1809 + else: 1810 + self.rubber_band.hide() 1811 + 1812 + def SetBracket(self, rect): 1813 + if rect: 1814 + x, y, width, height = rect.x(), rect.y(), rect.width(), rect.height() 1815 + self.bracket.setPos(x, y) 1816 + self.bracket.SetSize(width, height) 1817 + self.bracket.show() 1818 + else: 1819 + self.bracket.hide() 1820 + 1821 + def RubberBandX(self, event): 1822 + x = event.pos().toPoint().x() 1823 + if x < self.rb_xlo: 1824 + x = self.rb_xlo 1825 + elif x > self.rb_xhi: 1826 + x = self.rb_xhi 1827 + else: 1828 + self.rb_in_view = True 1829 + return x 1830 + 1831 + def RubberBandRect(self, x): 1832 + if self.rb_origin.x() <= x: 1833 + width = x - self.rb_origin.x() 1834 + rect = QRect(self.rb_origin, QSize(width, self.height)) 1835 + else: 1836 + width = self.rb_origin.x() - x 1837 + top_left = QPoint(self.rb_origin.x() - width, self.rb_origin.y()) 1838 + rect = QRect(top_left, QSize(width, self.height)) 1839 + return rect 1840 + 1841 + def MousePressEvent(self, event): 1842 + self.rb_in_view = False 1843 + x = self.RubberBandX(event) 1844 + self.rb_origin = QPoint(x, self.top) 1845 + if self.rubber_band is None: 1846 + self.rubber_band = QRubberBand(QRubberBand.Rectangle, self.RubberBandParent()) 1847 + self.RubberBandSetGeometry(QRect(self.rb_origin, QSize(0, self.height))) 1848 + if self.rb_in_view: 1849 + self.rubber_band.show() 1850 + self.rb_event_handler.RBMoveEvent(x, x) 1851 + else: 1852 + self.rubber_band.hide() 1853 + 1854 + def MouseMoveEvent(self, event): 1855 + x = self.RubberBandX(event) 1856 + rect = self.RubberBandRect(x) 1857 + self.RubberBandSetGeometry(rect) 1858 + if self.rb_in_view: 1859 + self.rubber_band.show() 1860 + self.rb_event_handler.RBMoveEvent(self.rb_origin.x(), x) 1861 + 1862 + def MouseReleaseEvent(self, event): 1863 + x = self.RubberBandX(event) 1864 + if self.rb_in_view: 1865 + selection_state = self.RubberBandRect(x) 1866 + else: 1867 + selection_state = None 1868 + self.rb_event_handler.RBReleaseEvent(self.rb_origin.x(), x, selection_state) 1869 + 1870 + # Switch graph legend data model 1871 + 1872 + class SwitchGraphLegendModel(QAbstractTableModel): 1873 + 1874 + def __init__(self, collection, region_attributes, parent=None): 1875 + super(SwitchGraphLegendModel, self).__init__(parent) 1876 + 1877 + self.region_attributes = region_attributes 1878 + 1879 + self.child_items = sorted(collection.hregions.values(), key=GraphDataRegionOrdinal) 1880 + self.child_count = len(self.child_items) 1881 + 1882 + self.highlight_set = set() 1883 + 1884 + self.column_headers = ("pid", "tid", "comm") 1885 + 1886 + def rowCount(self, parent): 1887 + return self.child_count 1888 + 1889 + def headerData(self, section, orientation, role): 1890 + if role != Qt.DisplayRole: 1891 + return None 1892 + if orientation != Qt.Horizontal: 1893 + return None 1894 + return self.columnHeader(section) 1895 + 1896 + def index(self, row, column, parent): 1897 + return self.createIndex(row, column, self.child_items[row]) 1898 + 1899 + def columnCount(self, parent=None): 1900 + return len(self.column_headers) 1901 + 1902 + def columnHeader(self, column): 1903 + return self.column_headers[column] 1904 + 1905 + def data(self, index, role): 1906 + if role == Qt.BackgroundRole: 1907 + child = self.child_items[index.row()] 1908 + if child in self.highlight_set: 1909 + return self.region_attributes[child.key].colour 1910 + return None 1911 + if role == Qt.ForegroundRole: 1912 + child = self.child_items[index.row()] 1913 + if child in self.highlight_set: 1914 + return QColor(255, 255, 255) 1915 + return self.region_attributes[child.key].colour 1916 + if role != Qt.DisplayRole: 1917 + return None 1918 + hregion = self.child_items[index.row()] 1919 + col = index.column() 1920 + if col == 0: 1921 + return hregion.pid 1922 + if col == 1: 1923 + return hregion.tid 1924 + if col == 2: 1925 + return hregion.comm 1926 + return None 1927 + 1928 + def SetHighlight(self, row, set_highlight): 1929 + child = self.child_items[row] 1930 + top_left = self.createIndex(row, 0, child) 1931 + bottom_right = self.createIndex(row, len(self.column_headers) - 1, child) 1932 + self.dataChanged.emit(top_left, bottom_right) 1933 + 1934 + def Highlight(self, highlight_set): 1935 + for row in xrange(self.child_count): 1936 + child = self.child_items[row] 1937 + if child in self.highlight_set: 1938 + if child not in highlight_set: 1939 + self.SetHighlight(row, False) 1940 + elif child in highlight_set: 1941 + self.SetHighlight(row, True) 1942 + self.highlight_set = highlight_set 1943 + 1944 + # Switch graph legend is a table 1945 + 1946 + class SwitchGraphLegend(QWidget): 1947 + 1948 + def __init__(self, collection, region_attributes, parent=None): 1949 + super(SwitchGraphLegend, self).__init__(parent) 1950 + 1951 + self.data_model = SwitchGraphLegendModel(collection, region_attributes) 1952 + 1953 + self.model = QSortFilterProxyModel() 1954 + self.model.setSourceModel(self.data_model) 1955 + 1956 + self.view = QTableView() 1957 + self.view.setModel(self.model) 1958 + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) 1959 + self.view.verticalHeader().setVisible(False) 1960 + self.view.sortByColumn(-1, Qt.AscendingOrder) 1961 + self.view.setSortingEnabled(True) 1962 + self.view.resizeColumnsToContents() 1963 + self.view.resizeRowsToContents() 1964 + 1965 + self.vbox = VBoxLayout(self.view) 1966 + self.setLayout(self.vbox) 1967 + 1968 + sz1 = self.view.columnWidth(0) + self.view.columnWidth(1) + self.view.columnWidth(2) + 2 1969 + sz1 = sz1 + self.view.verticalScrollBar().sizeHint().width() 1970 + self.saved_size = sz1 1971 + 1972 + def resizeEvent(self, event): 1973 + self.saved_size = self.size().width() 1974 + super(SwitchGraphLegend, self).resizeEvent(event) 1975 + 1976 + def Highlight(self, highlight_set): 1977 + self.data_model.Highlight(highlight_set) 1978 + self.update() 1979 + 1980 + def changeEvent(self, event): 1981 + if event.type() == QEvent.FontChange: 1982 + self.view.resizeRowsToContents() 1983 + self.view.resizeColumnsToContents() 1984 + # Need to resize rows again after column resize 1985 + self.view.resizeRowsToContents() 1986 + super(SwitchGraphLegend, self).changeEvent(event) 1987 + 1988 + # Random colour generation 1989 + 1990 + def RGBColourTooLight(r, g, b): 1991 + if g > 230: 1992 + return True 1993 + if g <= 160: 1994 + return False 1995 + if r <= 180 and g <= 180: 1996 + return False 1997 + if r < 60: 1998 + return False 1999 + return True 2000 + 2001 + def GenerateColours(x): 2002 + cs = [0] 2003 + for i in xrange(1, x): 2004 + cs.append(int((255.0 / i) + 0.5)) 2005 + colours = [] 2006 + for r in cs: 2007 + for g in cs: 2008 + for b in cs: 2009 + # Exclude black and colours that look too light against a white background 2010 + if (r, g, b) == (0, 0, 0) or RGBColourTooLight(r, g, b): 2011 + continue 2012 + colours.append(QColor(r, g, b)) 2013 + return colours 2014 + 2015 + def GenerateNColours(n): 2016 + for x in xrange(2, n + 2): 2017 + colours = GenerateColours(x) 2018 + if len(colours) >= n: 2019 + return colours 2020 + return [] 2021 + 2022 + def GenerateNRandomColours(n, seed): 2023 + colours = GenerateNColours(n) 2024 + random.seed(seed) 2025 + random.shuffle(colours) 2026 + return colours 2027 + 2028 + # Graph attributes, in particular the scale and subrange that change when zooming 2029 + 2030 + class GraphAttributes(): 2031 + 2032 + def __init__(self, scale, subrange, region_attributes, dp): 2033 + self.scale = scale 2034 + self.subrange = subrange 2035 + self.region_attributes = region_attributes 2036 + # Rounding avoids errors due to finite floating point precision 2037 + self.dp = dp # data decimal places 2038 + self.Update() 2039 + 2040 + def XToPixel(self, x): 2041 + return int(round((x - self.subrange.x.lo) * self.scale.x, self.pdp.x)) 2042 + 2043 + def YToPixel(self, y): 2044 + return int(round((y - self.subrange.y.lo) * self.scale.y, self.pdp.y)) 2045 + 2046 + def PixelToXRounded(self, px): 2047 + return round((round(px, 0) / self.scale.x), self.dp.x) + self.subrange.x.lo 2048 + 2049 + def PixelToYRounded(self, py): 2050 + return round((round(py, 0) / self.scale.y), self.dp.y) + self.subrange.y.lo 2051 + 2052 + def PixelToX(self, px): 2053 + x = self.PixelToXRounded(px) 2054 + if self.pdp.x == 0: 2055 + rt = self.XToPixel(x) 2056 + if rt > px: 2057 + return x - 1 2058 + return x 2059 + 2060 + def PixelToY(self, py): 2061 + y = self.PixelToYRounded(py) 2062 + if self.pdp.y == 0: 2063 + rt = self.YToPixel(y) 2064 + if rt > py: 2065 + return y - 1 2066 + return y 2067 + 2068 + def ToPDP(self, dp, scale): 2069 + # Calculate pixel decimal places: 2070 + # (10 ** dp) is the minimum delta in the data 2071 + # scale it to get the minimum delta in pixels 2072 + # log10 gives the number of decimals places negatively 2073 + # subtrace 1 to divide by 10 2074 + # round to the lower negative number 2075 + # change the sign to get the number of decimals positively 2076 + x = math.log10((10 ** dp) * scale) 2077 + if x < 0: 2078 + x -= 1 2079 + x = -int(math.floor(x) - 0.1) 2080 + else: 2081 + x = 0 2082 + return x 2083 + 2084 + def Update(self): 2085 + x = self.ToPDP(self.dp.x, self.scale.x) 2086 + y = self.ToPDP(self.dp.y, self.scale.y) 2087 + self.pdp = XY(x, y) # pixel decimal places 2088 + 2089 + # Switch graph splitter which divides the CPU graphs from the legend 2090 + 2091 + class SwitchGraphSplitter(QSplitter): 2092 + 2093 + def __init__(self, parent=None): 2094 + super(SwitchGraphSplitter, self).__init__(parent) 2095 + 2096 + self.first_time = False 2097 + 2098 + def resizeEvent(self, ev): 2099 + if self.first_time: 2100 + self.first_time = False 2101 + sz1 = self.widget(1).view.columnWidth(0) + self.widget(1).view.columnWidth(1) + self.widget(1).view.columnWidth(2) + 2 2102 + sz1 = sz1 + self.widget(1).view.verticalScrollBar().sizeHint().width() 2103 + sz0 = self.size().width() - self.handleWidth() - sz1 2104 + self.setSizes([sz0, sz1]) 2105 + elif not(self.widget(1).saved_size is None): 2106 + sz1 = self.widget(1).saved_size 2107 + sz0 = self.size().width() - self.handleWidth() - sz1 2108 + self.setSizes([sz0, sz1]) 2109 + super(SwitchGraphSplitter, self).resizeEvent(ev) 2110 + 2111 + # Graph widget base class 2112 + 2113 + class GraphWidget(QWidget): 2114 + 2115 + graph_title_changed = Signal(object) 2116 + 2117 + def __init__(self, parent=None): 2118 + super(GraphWidget, self).__init__(parent) 2119 + 2120 + def GraphTitleChanged(self, title): 2121 + self.graph_title_changed.emit(title) 2122 + 2123 + def Title(self): 2124 + return "" 2125 + 2126 + # Display time in s, ms, us or ns 2127 + 2128 + def ToTimeStr(val): 2129 + val = Decimal(val) 2130 + if val >= 1000000000: 2131 + return "{} s".format((val / 1000000000).quantize(Decimal("0.000000001"))) 2132 + if val >= 1000000: 2133 + return "{} ms".format((val / 1000000).quantize(Decimal("0.000001"))) 2134 + if val >= 1000: 2135 + return "{} us".format((val / 1000).quantize(Decimal("0.001"))) 2136 + return "{} ns".format(val.quantize(Decimal("1"))) 2137 + 2138 + # Switch (i.e. context switch i.e. Time Chart by CPU) graph widget which contains the CPU graphs and the legend and control buttons 2139 + 2140 + class SwitchGraphWidget(GraphWidget): 2141 + 2142 + def __init__(self, glb, collection, parent=None): 2143 + super(SwitchGraphWidget, self).__init__(parent) 2144 + 2145 + self.glb = glb 2146 + self.collection = collection 2147 + 2148 + self.back_state = [] 2149 + self.forward_state = [] 2150 + self.selection_state = (None, None) 2151 + self.fwd_rect = None 2152 + self.start_time = self.glb.StartTime(collection.machine_id) 2153 + 2154 + i = 0 2155 + hregions = collection.hregions.values() 2156 + colours = GenerateNRandomColours(len(hregions), 1013) 2157 + region_attributes = {} 2158 + for hregion in hregions: 2159 + if hregion.pid == 0 and hregion.tid == 0: 2160 + region_attributes[hregion.key] = GraphRegionAttribute(QColor(0, 0, 0)) 2161 + else: 2162 + region_attributes[hregion.key] = GraphRegionAttribute(colours[i]) 2163 + i = i + 1 2164 + 2165 + # Default to entire range 2166 + xsubrange = Subrange(0.0, float(collection.xrangehi - collection.xrangelo) + 1.0) 2167 + ysubrange = Subrange(0.0, float(collection.yrangehi - collection.yrangelo) + 1.0) 2168 + subrange = XY(xsubrange, ysubrange) 2169 + 2170 + scale = self.GetScaleForRange(subrange) 2171 + 2172 + self.attrs = GraphAttributes(scale, subrange, region_attributes, collection.dp) 2173 + 2174 + self.item = VertcalGraphSetGraphicsItem(collection, self.attrs, self, SwitchGraphGraphicsItem) 2175 + 2176 + self.scene = QGraphicsScene() 2177 + self.scene.addItem(self.item) 2178 + 2179 + self.view = QGraphicsView(self.scene) 2180 + self.view.centerOn(0, 0) 2181 + self.view.setAlignment(Qt.AlignLeft | Qt.AlignTop) 2182 + 2183 + self.legend = SwitchGraphLegend(collection, region_attributes) 2184 + 2185 + self.splitter = SwitchGraphSplitter() 2186 + self.splitter.addWidget(self.view) 2187 + self.splitter.addWidget(self.legend) 2188 + 2189 + self.point_label = QLabel("") 2190 + self.point_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) 2191 + 2192 + self.back_button = QToolButton() 2193 + self.back_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowLeft)) 2194 + self.back_button.setDisabled(True) 2195 + self.back_button.released.connect(lambda: self.Back()) 2196 + 2197 + self.forward_button = QToolButton() 2198 + self.forward_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowRight)) 2199 + self.forward_button.setDisabled(True) 2200 + self.forward_button.released.connect(lambda: self.Forward()) 2201 + 2202 + self.zoom_button = QToolButton() 2203 + self.zoom_button.setText("Zoom") 2204 + self.zoom_button.setDisabled(True) 2205 + self.zoom_button.released.connect(lambda: self.Zoom()) 2206 + 2207 + self.hbox = HBoxLayout(self.back_button, self.forward_button, self.zoom_button, self.point_label) 2208 + 2209 + self.vbox = VBoxLayout(self.splitter, self.hbox) 2210 + 2211 + self.setLayout(self.vbox) 2212 + 2213 + def GetScaleForRangeX(self, xsubrange): 2214 + # Default graph 1000 pixels wide 2215 + dflt = 1000.0 2216 + r = xsubrange.hi - xsubrange.lo 2217 + return dflt / r 2218 + 2219 + def GetScaleForRangeY(self, ysubrange): 2220 + # Default graph 50 pixels high 2221 + dflt = 50.0 2222 + r = ysubrange.hi - ysubrange.lo 2223 + return dflt / r 2224 + 2225 + def GetScaleForRange(self, subrange): 2226 + # Default graph 1000 pixels wide, 50 pixels high 2227 + xscale = self.GetScaleForRangeX(subrange.x) 2228 + yscale = self.GetScaleForRangeY(subrange.y) 2229 + return XY(xscale, yscale) 2230 + 2231 + def PointEvent(self, cpu, time_from, time_to, hregions): 2232 + text = "CPU: " + str(cpu) 2233 + time_from = time_from.quantize(Decimal(1)) 2234 + rel_time_from = time_from - self.glb.StartTime(self.collection.machine_id) 2235 + text = text + " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ")" 2236 + self.point_label.setText(text) 2237 + self.legend.Highlight(hregions) 2238 + 2239 + def RightClickEvent(self, cpu, hregion_times, pos): 2240 + if not IsSelectable(self.glb.db, "calls", "WHERE parent_id >= 0"): 2241 + return 2242 + menu = QMenu(self.view) 2243 + for hregion, time in hregion_times: 2244 + thread_at_time = (hregion.exec_comm_id, hregion.thread_id, time) 2245 + menu_text = "Show Call Tree for {} {}:{} at {}".format(hregion.comm, hregion.pid, hregion.tid, time) 2246 + menu.addAction(CreateAction(menu_text, "Show Call Tree", lambda a=None, args=thread_at_time: self.RightClickSelect(args), self.view)) 2247 + menu.exec_(pos) 2248 + 2249 + def RightClickSelect(self, args): 2250 + CallTreeWindow(self.glb, self.glb.mainwindow, thread_at_time=args) 2251 + 2252 + def NoPointEvent(self): 2253 + self.point_label.setText("") 2254 + self.legend.Highlight({}) 2255 + 2256 + def RangeEvent(self, time_from, time_to): 2257 + time_from = time_from.quantize(Decimal(1)) 2258 + time_to = time_to.quantize(Decimal(1)) 2259 + if time_to <= time_from: 2260 + self.point_label.setText("") 2261 + return 2262 + rel_time_from = time_from - self.start_time 2263 + rel_time_to = time_to - self.start_time 2264 + text = " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ") to: " + str(time_to) + " (+" + ToTimeStr(rel_time_to) + ")" 2265 + text = text + " duration: " + ToTimeStr(time_to - time_from) 2266 + self.point_label.setText(text) 2267 + 2268 + def BackState(self): 2269 + return (self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect) 2270 + 2271 + def PushBackState(self): 2272 + state = copy.deepcopy(self.BackState()) 2273 + self.back_state.append(state) 2274 + self.back_button.setEnabled(True) 2275 + 2276 + def PopBackState(self): 2277 + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.back_state.pop() 2278 + self.attrs.Update() 2279 + if not self.back_state: 2280 + self.back_button.setDisabled(True) 2281 + 2282 + def PushForwardState(self): 2283 + state = copy.deepcopy(self.BackState()) 2284 + self.forward_state.append(state) 2285 + self.forward_button.setEnabled(True) 2286 + 2287 + def PopForwardState(self): 2288 + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.forward_state.pop() 2289 + self.attrs.Update() 2290 + if not self.forward_state: 2291 + self.forward_button.setDisabled(True) 2292 + 2293 + def Title(self): 2294 + time_from = self.collection.xrangelo + Decimal(self.attrs.subrange.x.lo) 2295 + time_to = self.collection.xrangelo + Decimal(self.attrs.subrange.x.hi) 2296 + rel_time_from = time_from - self.start_time 2297 + rel_time_to = time_to - self.start_time 2298 + title = "+" + ToTimeStr(rel_time_from) + " to +" + ToTimeStr(rel_time_to) 2299 + title = title + " (" + ToTimeStr(time_to - time_from) + ")" 2300 + return title 2301 + 2302 + def Update(self): 2303 + selected_subrange, selection_state = self.selection_state 2304 + self.item.SetSelection(selection_state) 2305 + self.item.SetBracket(self.fwd_rect) 2306 + self.zoom_button.setDisabled(selected_subrange is None) 2307 + self.GraphTitleChanged(self.Title()) 2308 + self.item.update(self.item.boundingRect()) 2309 + 2310 + def Back(self): 2311 + if not self.back_state: 2312 + return 2313 + self.PushForwardState() 2314 + self.PopBackState() 2315 + self.Update() 2316 + 2317 + def Forward(self): 2318 + if not self.forward_state: 2319 + return 2320 + self.PushBackState() 2321 + self.PopForwardState() 2322 + self.Update() 2323 + 2324 + def SelectEvent(self, x0, x1, selection_state): 2325 + if selection_state is None: 2326 + selected_subrange = None 2327 + else: 2328 + if x1 - x0 < 1.0: 2329 + x1 += 1.0 2330 + selected_subrange = Subrange(x0, x1) 2331 + self.selection_state = (selected_subrange, selection_state) 2332 + self.zoom_button.setDisabled(selected_subrange is None) 2333 + 2334 + def Zoom(self): 2335 + selected_subrange, selection_state = self.selection_state 2336 + if selected_subrange is None: 2337 + return 2338 + self.fwd_rect = selection_state 2339 + self.item.SetSelection(None) 2340 + self.PushBackState() 2341 + self.attrs.subrange.x = selected_subrange 2342 + self.forward_state = [] 2343 + self.forward_button.setDisabled(True) 2344 + self.selection_state = (None, None) 2345 + self.fwd_rect = None 2346 + self.attrs.scale.x = self.GetScaleForRangeX(self.attrs.subrange.x) 2347 + self.attrs.Update() 2348 + self.Update() 2349 + 2350 + # Slow initialization - perform non-GUI initialization in a separate thread and put up a modal message box while waiting 2351 + 2352 + class SlowInitClass(): 2353 + 2354 + def __init__(self, glb, title, init_fn): 2355 + self.init_fn = init_fn 2356 + self.done = False 2357 + self.result = None 2358 + 2359 + self.msg_box = QMessageBox(glb.mainwindow) 2360 + self.msg_box.setText("Initializing " + title + ". Please wait.") 2361 + self.msg_box.setWindowTitle("Initializing " + title) 2362 + self.msg_box.setWindowIcon(glb.mainwindow.style().standardIcon(QStyle.SP_MessageBoxInformation)) 2363 + 2364 + self.init_thread = Thread(self.ThreadFn, glb) 2365 + self.init_thread.done.connect(lambda: self.Done(), Qt.QueuedConnection) 2366 + 2367 + self.init_thread.start() 2368 + 2369 + def Done(self): 2370 + self.msg_box.done(0) 2371 + 2372 + def ThreadFn(self, glb): 2373 + conn_name = "SlowInitClass" + str(os.getpid()) 2374 + db, dbname = glb.dbref.Open(conn_name) 2375 + self.result = self.init_fn(db) 2376 + self.done = True 2377 + return (True, 0) 2378 + 2379 + def Result(self): 2380 + while not self.done: 2381 + self.msg_box.exec_() 2382 + self.init_thread.wait() 2383 + return self.result 2384 + 2385 + def SlowInit(glb, title, init_fn): 2386 + init = SlowInitClass(glb, title, init_fn) 2387 + return init.Result() 2388 + 2389 + # Time chart by CPU window 2390 + 2391 + class TimeChartByCPUWindow(QMdiSubWindow): 2392 + 2393 + def __init__(self, glb, parent=None): 2394 + super(TimeChartByCPUWindow, self).__init__(parent) 2395 + 2396 + self.glb = glb 2397 + self.machine_id = glb.HostMachineId() 2398 + self.collection_name = "SwitchGraphDataCollection " + str(self.machine_id) 2399 + 2400 + collection = LookupModel(self.collection_name) 2401 + if collection is None: 2402 + collection = SlowInit(glb, "Time Chart", self.Init) 2403 + 2404 + self.widget = SwitchGraphWidget(glb, collection, self) 2405 + self.view = self.widget 2406 + 2407 + self.base_title = "Time Chart by CPU" 2408 + self.setWindowTitle(self.base_title + self.widget.Title()) 2409 + self.widget.graph_title_changed.connect(self.GraphTitleChanged) 2410 + 2411 + self.setWidget(self.widget) 2412 + 2413 + AddSubWindow(glb.mainwindow.mdi_area, self, self.windowTitle()) 2414 + 2415 + def Init(self, db): 2416 + return LookupCreateModel(self.collection_name, lambda : SwitchGraphDataCollection(self.glb, db, self.machine_id)) 2417 + 2418 + def GraphTitleChanged(self, title): 2419 + self.setWindowTitle(self.base_title + " : " + title) 1117 2420 1118 2421 # Child data item finder 1119 2422 ··· 3429 2058 QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") 3430 2059 if query.next(): 3431 2060 self.last_id = int(query.value(0)) 3432 - self.last_time = int(query.value(1)) 3433 - QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") 3434 - if query.next(): 3435 - self.first_time = int(query.value(0)) 2061 + self.first_time = int(glb.HostStartTime()) 2062 + self.last_time = int(glb.HostFinishTime()) 3436 2063 if placeholder_text: 3437 2064 placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) 3438 2065 ··· 4323 2954 <p class=c2><a href=#allbranches>1.3 All branches</a></p> 4324 2955 <p class=c2><a href=#selectedbranches>1.4 Selected branches</a></p> 4325 2956 <p class=c2><a href=#topcallsbyelapsedtime>1.5 Top calls by elapsed time</a></p> 4326 - <p class=c1><a href=#tables>2. Tables</a></p> 2957 + <p class=c1><a href=#charts>2. Charts</a></p> 2958 + <p class=c2><a href=#timechartbycpu>2.1 Time chart by CPU</a></p> 2959 + <p class=c1><a href=#tables>3. Tables</a></p> 4327 2960 <h1 id=reports>1. Reports</h1> 4328 2961 <h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> 4329 2962 The result is a GUI window with a tree representing a context-sensitive ··· 4413 3042 The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned. 4414 3043 The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together. 4415 3044 If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar. 4416 - <h1 id=tables>2. Tables</h1> 3045 + <h1 id=charts>2. Charts</h1> 3046 + <h2 id=timechartbycpu>2.1 Time chart by CPU</h2> 3047 + This chart displays context switch information when that data is available. Refer to context_switches_view on the Tables menu. 3048 + <h3>Features</h3> 3049 + <ol> 3050 + <li>Mouse over to highight the task and show the time</li> 3051 + <li>Drag the mouse to select a region and zoom by pushing the Zoom button</li> 3052 + <li>Go back and forward by pressing the arrow buttons</li> 3053 + <li>If call information is available, right-click to show a call tree opened to that task and time. 3054 + Note, the call tree may take some time to appear, and there may not be call information for the task or time selected. 3055 + </li> 3056 + </ol> 3057 + <h3>Important</h3> 3058 + The graph can be misleading in the following respects: 3059 + <ol> 3060 + <li>The graph shows the first task on each CPU as running from the beginning of the time range. 3061 + Because tracing might start on different CPUs at different times, that is not necessarily the case. 3062 + Refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li> 3063 + <li>Similarly, the last task on each CPU can be showing running longer than it really was. 3064 + Again, refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li> 3065 + <li>When the mouse is over a task, the highlighted task might not be visible on the legend without scrolling if the legend does not fit fully in the window</li> 3066 + </ol> 3067 + <h1 id=tables>3. Tables</h1> 4417 3068 The Tables menu shows all tables and views in the database. Most tables have an associated view 4418 3069 which displays the information in a more friendly way. Not all data for large tables is fetched 4419 3070 immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, ··· 4631 3238 if IsSelectable(glb.db, "calls"): 4632 3239 reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self)) 4633 3240 3241 + if IsSelectable(glb.db, "context_switches"): 3242 + charts_menu = menu.addMenu("&Charts") 3243 + charts_menu.addAction(CreateAction("&Time chart by CPU", "Create a new window displaying time charts by CPU", self.TimeChartByCPU, self)) 3244 + 4634 3245 self.TableMenu(GetTableList(glb), menu) 4635 3246 4636 3247 self.window_menu = WindowMenu(self.mdi_area, menu) ··· 4694 3297 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self)) 4695 3298 label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" 4696 3299 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) 3300 + 3301 + def TimeChartByCPU(self): 3302 + TimeChartByCPUWindow(self.glb, self) 4697 3303 4698 3304 def TableMenu(self, tables, menu): 4699 3305 table_menu = menu.addMenu("&Tables") ··· 4870 3470 self.have_disassembler = True 4871 3471 except: 4872 3472 self.have_disassembler = False 3473 + self.host_machine_id = 0 3474 + self.host_start_time = 0 3475 + self.host_finish_time = 0 4873 3476 4874 3477 def FileFromBuildId(self, build_id): 4875 3478 file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" ··· 4904 3501 x.Shutdown() 4905 3502 except: 4906 3503 pass 3504 + 3505 + def GetHostMachineId(self): 3506 + query = QSqlQuery(self.db) 3507 + QueryExec(query, "SELECT id FROM machines WHERE pid = -1") 3508 + if query.next(): 3509 + self.host_machine_id = query.value(0) 3510 + else: 3511 + self.host_machine_id = 0 3512 + return self.host_machine_id 3513 + 3514 + def HostMachineId(self): 3515 + if self.host_machine_id: 3516 + return self.host_machine_id 3517 + return self.GetHostMachineId() 3518 + 3519 + def SelectValue(self, sql): 3520 + query = QSqlQuery(self.db) 3521 + try: 3522 + QueryExec(query, sql) 3523 + except: 3524 + return None 3525 + if query.next(): 3526 + return Decimal(query.value(0)) 3527 + return None 3528 + 3529 + def SwitchesMinTime(self, machine_id): 3530 + return self.SelectValue("SELECT time" 3531 + " FROM context_switches" 3532 + " WHERE time != 0 AND machine_id = " + str(machine_id) + 3533 + " ORDER BY id LIMIT 1") 3534 + 3535 + def SwitchesMaxTime(self, machine_id): 3536 + return self.SelectValue("SELECT time" 3537 + " FROM context_switches" 3538 + " WHERE time != 0 AND machine_id = " + str(machine_id) + 3539 + " ORDER BY id DESC LIMIT 1") 3540 + 3541 + def SamplesMinTime(self, machine_id): 3542 + return self.SelectValue("SELECT time" 3543 + " FROM samples" 3544 + " WHERE time != 0 AND machine_id = " + str(machine_id) + 3545 + " ORDER BY id LIMIT 1") 3546 + 3547 + def SamplesMaxTime(self, machine_id): 3548 + return self.SelectValue("SELECT time" 3549 + " FROM samples" 3550 + " WHERE time != 0 AND machine_id = " + str(machine_id) + 3551 + " ORDER BY id DESC LIMIT 1") 3552 + 3553 + def CallsMinTime(self, machine_id): 3554 + return self.SelectValue("SELECT calls.call_time" 3555 + " FROM calls" 3556 + " INNER JOIN threads ON threads.thread_id = calls.thread_id" 3557 + " WHERE calls.call_time != 0 AND threads.machine_id = " + str(machine_id) + 3558 + " ORDER BY calls.id LIMIT 1") 3559 + 3560 + def CallsMaxTime(self, machine_id): 3561 + return self.SelectValue("SELECT calls.return_time" 3562 + " FROM calls" 3563 + " INNER JOIN threads ON threads.thread_id = calls.thread_id" 3564 + " WHERE calls.return_time != 0 AND threads.machine_id = " + str(machine_id) + 3565 + " ORDER BY calls.return_time DESC LIMIT 1") 3566 + 3567 + def GetStartTime(self, machine_id): 3568 + t0 = self.SwitchesMinTime(machine_id) 3569 + t1 = self.SamplesMinTime(machine_id) 3570 + t2 = self.CallsMinTime(machine_id) 3571 + if t0 is None or (not(t1 is None) and t1 < t0): 3572 + t0 = t1 3573 + if t0 is None or (not(t2 is None) and t2 < t0): 3574 + t0 = t2 3575 + return t0 3576 + 3577 + def GetFinishTime(self, machine_id): 3578 + t0 = self.SwitchesMaxTime(machine_id) 3579 + t1 = self.SamplesMaxTime(machine_id) 3580 + t2 = self.CallsMaxTime(machine_id) 3581 + if t0 is None or (not(t1 is None) and t1 > t0): 3582 + t0 = t1 3583 + if t0 is None or (not(t2 is None) and t2 > t0): 3584 + t0 = t2 3585 + return t0 3586 + 3587 + def HostStartTime(self): 3588 + if self.host_start_time: 3589 + return self.host_start_time 3590 + self.host_start_time = self.GetStartTime(self.HostMachineId()) 3591 + return self.host_start_time 3592 + 3593 + def HostFinishTime(self): 3594 + if self.host_finish_time: 3595 + return self.host_finish_time 3596 + self.host_finish_time = self.GetFinishTime(self.HostMachineId()) 3597 + return self.host_finish_time 3598 + 3599 + def StartTime(self, machine_id): 3600 + if machine_id == self.HostMachineId(): 3601 + return self.HostStartTime() 3602 + return self.GetStartTime(machine_id) 3603 + 3604 + def FinishTime(self, machine_id): 3605 + if machine_id == self.HostMachineId(): 3606 + return self.HostFinishTime() 3607 + return self.GetFinishTime(machine_id) 4907 3608 4908 3609 # Database reference 4909 3610
+4 -3
tools/perf/tests/backward-ring-buffer.c
··· 13 13 #include "util/mmap.h" 14 14 #include <errno.h> 15 15 #include <linux/string.h> 16 + #include <perf/mmap.h> 16 17 17 18 #define NR_ITERS 111 18 19 ··· 38 37 struct mmap *map = &evlist->overwrite_mmap[i]; 39 38 union perf_event *event; 40 39 41 - perf_mmap__read_init(map); 42 - while ((event = perf_mmap__read_event(map)) != NULL) { 40 + perf_mmap__read_init(&map->core); 41 + while ((event = perf_mmap__read_event(&map->core)) != NULL) { 43 42 const u32 type = event->header.type; 44 43 45 44 switch (type) { ··· 54 53 return TEST_FAIL; 55 54 } 56 55 } 57 - perf_mmap__read_done(map); 56 + perf_mmap__read_done(&map->core); 58 57 } 59 58 return TEST_OK; 60 59 }
+4 -3
tools/perf/tests/bpf.c
··· 15 15 #include <linux/string.h> 16 16 #include <api/fs/fs.h> 17 17 #include <bpf/bpf.h> 18 + #include <perf/mmap.h> 18 19 #include "tests.h" 19 20 #include "llvm.h" 20 21 #include "debug.h" ··· 185 184 struct mmap *md; 186 185 187 186 md = &evlist->mmap[i]; 188 - if (perf_mmap__read_init(md) < 0) 187 + if (perf_mmap__read_init(&md->core) < 0) 189 188 continue; 190 189 191 - while ((event = perf_mmap__read_event(md)) != NULL) { 190 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 192 191 const u32 type = event->header.type; 193 192 194 193 if (type == PERF_RECORD_SAMPLE) 195 194 count ++; 196 195 } 197 - perf_mmap__read_done(md); 196 + perf_mmap__read_done(&md->core); 198 197 } 199 198 200 199 if (count != expect) {
+5 -4
tools/perf/tests/code-reading.c
··· 10 10 #include <sys/param.h> 11 11 #include <perf/cpumap.h> 12 12 #include <perf/evlist.h> 13 + #include <perf/mmap.h> 13 14 14 15 #include "debug.h" 15 16 #include "dso.h" ··· 426 425 427 426 for (i = 0; i < evlist->core.nr_mmaps; i++) { 428 427 md = &evlist->mmap[i]; 429 - if (perf_mmap__read_init(md) < 0) 428 + if (perf_mmap__read_init(&md->core) < 0) 430 429 continue; 431 430 432 - while ((event = perf_mmap__read_event(md)) != NULL) { 431 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 433 432 ret = process_event(machine, evlist, event, state); 434 - perf_mmap__consume(md); 433 + perf_mmap__consume(&md->core); 435 434 if (ret < 0) 436 435 return ret; 437 436 } 438 - perf_mmap__read_done(md); 437 + perf_mmap__read_done(&md->core); 439 438 } 440 439 return 0; 441 440 }
+5 -4
tools/perf/tests/keep-tracking.c
··· 5 5 #include <sys/prctl.h> 6 6 #include <perf/cpumap.h> 7 7 #include <perf/evlist.h> 8 + #include <perf/mmap.h> 8 9 9 10 #include "debug.h" 10 11 #include "parse-events.h" ··· 39 38 found = 0; 40 39 for (i = 0; i < evlist->core.nr_mmaps; i++) { 41 40 md = &evlist->mmap[i]; 42 - if (perf_mmap__read_init(md) < 0) 41 + if (perf_mmap__read_init(&md->core) < 0) 43 42 continue; 44 - while ((event = perf_mmap__read_event(md)) != NULL) { 43 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 45 44 if (event->header.type == PERF_RECORD_COMM && 46 45 (pid_t)event->comm.pid == getpid() && 47 46 (pid_t)event->comm.tid == getpid() && 48 47 strcmp(event->comm.comm, comm) == 0) 49 48 found += 1; 50 - perf_mmap__consume(md); 49 + perf_mmap__consume(&md->core); 51 50 } 52 - perf_mmap__read_done(md); 51 + perf_mmap__read_done(&md->core); 53 52 } 54 53 return found; 55 54 }
+5 -4
tools/perf/tests/mmap-basic.c
··· 16 16 #include <linux/kernel.h> 17 17 #include <linux/string.h> 18 18 #include <perf/evlist.h> 19 + #include <perf/mmap.h> 19 20 20 21 /* 21 22 * This test will generate random numbers of calls to some getpid syscalls, ··· 114 113 } 115 114 116 115 md = &evlist->mmap[0]; 117 - if (perf_mmap__read_init(md) < 0) 116 + if (perf_mmap__read_init(&md->core) < 0) 118 117 goto out_init; 119 118 120 - while ((event = perf_mmap__read_event(md)) != NULL) { 119 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 121 120 struct perf_sample sample; 122 121 123 122 if (event->header.type != PERF_RECORD_SAMPLE) { ··· 140 139 goto out_delete_evlist; 141 140 } 142 141 nr_events[evsel->idx]++; 143 - perf_mmap__consume(md); 142 + perf_mmap__consume(&md->core); 144 143 } 145 - perf_mmap__read_done(md); 144 + perf_mmap__read_done(&md->core); 146 145 147 146 out_init: 148 147 err = 0;
+5 -4
tools/perf/tests/openat-syscall-tp-fields.c
··· 13 13 #include "debug.h" 14 14 #include "util/mmap.h" 15 15 #include <errno.h> 16 + #include <perf/mmap.h> 16 17 17 18 #ifndef O_DIRECTORY 18 19 #define O_DIRECTORY 00200000 ··· 93 92 struct mmap *md; 94 93 95 94 md = &evlist->mmap[i]; 96 - if (perf_mmap__read_init(md) < 0) 95 + if (perf_mmap__read_init(&md->core) < 0) 97 96 continue; 98 97 99 - while ((event = perf_mmap__read_event(md)) != NULL) { 98 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 100 99 const u32 type = event->header.type; 101 100 int tp_flags; 102 101 struct perf_sample sample; ··· 104 103 ++nr_events; 105 104 106 105 if (type != PERF_RECORD_SAMPLE) { 107 - perf_mmap__consume(md); 106 + perf_mmap__consume(&md->core); 108 107 continue; 109 108 } 110 109 ··· 124 123 125 124 goto out_ok; 126 125 } 127 - perf_mmap__read_done(md); 126 + perf_mmap__read_done(&md->core); 128 127 } 129 128 130 129 if (nr_events == before)
+5 -4
tools/perf/tests/perf-record.c
··· 6 6 #include <pthread.h> 7 7 8 8 #include <sched.h> 9 + #include <perf/mmap.h> 9 10 #include "evlist.h" 10 11 #include "evsel.h" 11 12 #include "debug.h" ··· 171 170 struct mmap *md; 172 171 173 172 md = &evlist->mmap[i]; 174 - if (perf_mmap__read_init(md) < 0) 173 + if (perf_mmap__read_init(&md->core) < 0) 175 174 continue; 176 175 177 - while ((event = perf_mmap__read_event(md)) != NULL) { 176 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 178 177 const u32 type = event->header.type; 179 178 const char *name = perf_event__name(type); 180 179 ··· 277 276 ++errs; 278 277 } 279 278 280 - perf_mmap__consume(md); 279 + perf_mmap__consume(&md->core); 281 280 } 282 - perf_mmap__read_done(md); 281 + perf_mmap__read_done(&md->core); 283 282 } 284 283 285 284 /*
+5 -4
tools/perf/tests/sw-clock.c
··· 15 15 #include "util/mmap.h" 16 16 #include "util/thread_map.h" 17 17 #include <perf/evlist.h> 18 + #include <perf/mmap.h> 18 19 19 20 #define NR_LOOPS 10000000 20 21 ··· 100 99 evlist__disable(evlist); 101 100 102 101 md = &evlist->mmap[0]; 103 - if (perf_mmap__read_init(md) < 0) 102 + if (perf_mmap__read_init(&md->core) < 0) 104 103 goto out_init; 105 104 106 - while ((event = perf_mmap__read_event(md)) != NULL) { 105 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 107 106 struct perf_sample sample; 108 107 109 108 if (event->header.type != PERF_RECORD_SAMPLE) ··· 118 117 total_periods += sample.period; 119 118 nr_samples++; 120 119 next_event: 121 - perf_mmap__consume(md); 120 + perf_mmap__consume(&md->core); 122 121 } 123 - perf_mmap__read_done(md); 122 + perf_mmap__read_done(&md->core); 124 123 125 124 out_init: 126 125 if ((u64) nr_samples == total_periods) {
+5 -4
tools/perf/tests/switch-tracking.c
··· 8 8 #include <linux/zalloc.h> 9 9 #include <perf/cpumap.h> 10 10 #include <perf/evlist.h> 11 + #include <perf/mmap.h> 11 12 12 13 #include "debug.h" 13 14 #include "parse-events.h" ··· 270 269 271 270 for (i = 0; i < evlist->core.nr_mmaps; i++) { 272 271 md = &evlist->mmap[i]; 273 - if (perf_mmap__read_init(md) < 0) 272 + if (perf_mmap__read_init(&md->core) < 0) 274 273 continue; 275 274 276 - while ((event = perf_mmap__read_event(md)) != NULL) { 275 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 277 276 cnt += 1; 278 277 ret = add_event(evlist, &events, event); 279 - perf_mmap__consume(md); 278 + perf_mmap__consume(&md->core); 280 279 if (ret < 0) 281 280 goto out_free_nodes; 282 281 } 283 - perf_mmap__read_done(md); 282 + perf_mmap__read_done(&md->core); 284 283 } 285 284 286 285 events_array = calloc(cnt, sizeof(struct event_node));
+5 -4
tools/perf/tests/task-exit.c
··· 12 12 #include <linux/string.h> 13 13 #include <perf/cpumap.h> 14 14 #include <perf/evlist.h> 15 + #include <perf/mmap.h> 15 16 16 17 static int exited; 17 18 static int nr_exit; ··· 118 117 119 118 retry: 120 119 md = &evlist->mmap[0]; 121 - if (perf_mmap__read_init(md) < 0) 120 + if (perf_mmap__read_init(&md->core) < 0) 122 121 goto out_init; 123 122 124 - while ((event = perf_mmap__read_event(md)) != NULL) { 123 + while ((event = perf_mmap__read_event(&md->core)) != NULL) { 125 124 if (event->header.type == PERF_RECORD_EXIT) 126 125 nr_exit++; 127 126 128 - perf_mmap__consume(md); 127 + perf_mmap__consume(&md->core); 129 128 } 130 - perf_mmap__read_done(md); 129 + perf_mmap__read_done(&md->core); 131 130 132 131 out_init: 133 132 if (!exited || !nr_exit) {
+1
tools/perf/trace/beauty/Build
··· 17 17 perf-y += socket.o 18 18 perf-y += statx.o 19 19 perf-y += sync_file_range.o 20 + perf-y += tracepoints/
+15 -1
tools/perf/trace/beauty/beauty.h
··· 5 5 #include <linux/kernel.h> 6 6 #include <linux/types.h> 7 7 #include <sys/types.h> 8 + #include <stdbool.h> 8 9 9 10 struct strarray { 10 - int offset; 11 + u64 offset; 11 12 int nr_entries; 12 13 const char *prefix; 13 14 const char **entries; ··· 29 28 30 29 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); 31 30 size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, bool show_prefix, unsigned long flags); 31 + 32 + bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret); 32 33 33 34 struct trace; 34 35 struct thread; ··· 53 50 } 54 51 55 52 size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); 53 + 54 + bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret); 56 55 57 56 size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); 58 57 ··· 83 78 u64 value[]; 84 79 }; 85 80 81 + struct syscall_arg_fmt; 82 + 86 83 /** 87 84 * @val: value of syscall argument being formatted 88 85 * @args: All the args, use syscall_args__val(arg, nth) to access one ··· 101 94 struct syscall_arg { 102 95 unsigned long val; 103 96 unsigned char *args; 97 + struct syscall_arg_fmt *fmt; 104 98 struct { 105 99 struct augmented_arg *args; 106 100 int size; ··· 118 110 119 111 size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg); 120 112 #define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags 113 + 114 + size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg); 115 + #define SCA_X86_MSR syscall_arg__scnprintf_x86_MSR 116 + 117 + bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); 118 + #define STUL_X86_MSR syscall_arg__strtoul_x86_MSR 121 119 122 120 size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); 123 121 #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+1
tools/perf/trace/beauty/tracepoints/Build
··· 1 + perf-y += x86_msr.o
+39
tools/perf/trace/beauty/tracepoints/x86_msr.c
··· 1 + // SPDX-License-Identifier: LGPL-2.1 2 + /* 3 + * trace/beauty/x86_msr.c 4 + * 5 + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 6 + */ 7 + 8 + #include "trace/beauty/beauty.h" 9 + 10 + #include "trace/beauty/generated/x86_arch_MSRs_array.c" 11 + 12 + static DEFINE_STRARRAY(x86_MSRs, "MSR_"); 13 + static DEFINE_STRARRAY_OFFSET(x86_64_specific_MSRs, "MSR_", x86_64_specific_MSRs_offset); 14 + static DEFINE_STRARRAY_OFFSET(x86_AMD_V_KVM_MSRs, "MSR_", x86_AMD_V_KVM_MSRs_offset); 15 + 16 + static struct strarray *x86_MSRs_tables[] = { 17 + &strarray__x86_MSRs, 18 + &strarray__x86_64_specific_MSRs, 19 + &strarray__x86_AMD_V_KVM_MSRs, 20 + }; 21 + 22 + static DEFINE_STRARRAYS(x86_MSRs_tables); 23 + 24 + static size_t x86_MSR__scnprintf(unsigned long msr, char *bf, size_t size, bool show_prefix) 25 + { 26 + return strarrays__scnprintf(&strarrays__x86_MSRs_tables, bf, size, "%#x", show_prefix, msr); 27 + } 28 + 29 + size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg) 30 + { 31 + unsigned long flags = arg->val; 32 + 33 + return x86_MSR__scnprintf(flags, bf, size, arg->show_string_prefix); 34 + } 35 + 36 + bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg __maybe_unused, u64 *ret) 37 + { 38 + return strarrays__strtoul(&strarrays__x86_MSRs_tables, bf, size, ret); 39 + }
+40
tools/perf/trace/beauty/tracepoints/x86_msr.sh
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: LGPL-2.1 3 + 4 + if [ $# -ne 1 ] ; then 5 + arch_x86_header_dir=tools/arch/x86/include/asm/ 6 + else 7 + arch_x86_header_dir=$1 8 + fi 9 + 10 + x86_msr_index=${arch_x86_header_dir}/msr-index.h 11 + 12 + # Support all later, with some hash table, for now chop off 13 + # Just the ones starting with 0x00000 so as to have a simple 14 + # array. 15 + 16 + printf "static const char *x86_MSRs[] = {\n" 17 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' 18 + egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ 19 + sed -r "s/$regex/\2 \1/g" | sort -n | \ 20 + xargs printf "\t[%s] = \"%s\",\n" 21 + printf "};\n\n" 22 + 23 + # Remove MSR_K6_WHCR, clashes with MSR_LSTAR 24 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*' 25 + printf "#define x86_64_specific_MSRs_offset " 26 + egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 27 + printf "static const char *x86_64_specific_MSRs[] = {\n" 28 + egrep $regex ${x86_msr_index} | \ 29 + sed -r "s/$regex/\2 \1/g" | egrep -vw 'K6_WHCR' | sort -n | \ 30 + xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n" 31 + printf "};\n\n" 32 + 33 + regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*' 34 + printf "#define x86_AMD_V_KVM_MSRs_offset " 35 + egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 36 + printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n" 37 + egrep $regex ${x86_msr_index} | \ 38 + sed -r "s/$regex/\2 \1/g" | sort -n | \ 39 + xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n" 40 + printf "};\n"
+1
tools/perf/util/Build
··· 95 95 perf-y += call-path.o 96 96 perf-y += rwsem.o 97 97 perf-y += thread-stack.o 98 + perf-y += spark.o 98 99 perf-$(CONFIG_AUXTRACE) += auxtrace.o 99 100 perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 100 101 perf-$(CONFIG_AUXTRACE) += intel-pt.o
+4
tools/perf/util/annotate.c
··· 853 853 ch[offset].start < start) 854 854 return 0; 855 855 } 856 + 857 + if (ch[offset].num < NUM_SPARKS) 858 + ch[offset].cycles_spark[ch[offset].num] = cycles; 859 + 856 860 ch[offset].have_start = have_start; 857 861 ch[offset].start = start; 858 862 ch[offset].cycles += cycles;
+2
tools/perf/util/annotate.h
··· 11 11 #include <pthread.h> 12 12 #include <asm/bug.h> 13 13 #include "symbol_conf.h" 14 + #include "spark.h" 14 15 15 16 struct hist_browser_timer; 16 17 struct hist_entry; ··· 236 235 u64 cycles_aggr; 237 236 u64 cycles_max; 238 237 u64 cycles_min; 238 + s64 cycles_spark[NUM_SPARKS]; 239 239 u32 num; 240 240 u32 num_aggr; 241 241 u8 have_start;
+16
tools/perf/util/env.c
··· 2 2 #include "cpumap.h" 3 3 #include "debug.h" 4 4 #include "env.h" 5 + #include "util/header.h" 5 6 #include <linux/ctype.h> 6 7 #include <linux/zalloc.h> 7 8 #include "bpf-event.h" ··· 254 253 } 255 254 256 255 env->nr_cpus_avail = nr_cpus; 256 + return 0; 257 + } 258 + 259 + int perf_env__read_cpuid(struct perf_env *env) 260 + { 261 + char cpuid[128]; 262 + int err = get_cpuid(cpuid, sizeof(cpuid)); 263 + 264 + if (err) 265 + return err; 266 + 267 + free(env->cpuid); 268 + env->cpuid = strdup(cpuid); 269 + if (env->cpuid == NULL) 270 + return ENOMEM; 257 271 return 0; 258 272 } 259 273
+1
tools/perf/util/env.h
··· 104 104 105 105 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); 106 106 107 + int perf_env__read_cpuid(struct perf_env *env); 107 108 int perf_env__read_cpu_topology_map(struct perf_env *env); 108 109 109 110 void cpu_cache_level__free(struct cpu_cache_level *cache);
+132 -192
tools/perf/util/evlist.c
··· 42 42 #include <perf/evlist.h> 43 43 #include <perf/evsel.h> 44 44 #include <perf/cpumap.h> 45 + #include <perf/mmap.h> 45 46 46 47 #include <internal/xyarray.h> 47 48 ··· 58 57 { 59 58 perf_evlist__init(&evlist->core); 60 59 perf_evlist__set_maps(&evlist->core, cpus, threads); 61 - fdarray__init(&evlist->core.pollfd, 64); 62 60 evlist->workload.pid = -1; 63 61 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 64 62 } ··· 138 138 { 139 139 zfree(&evlist->mmap); 140 140 zfree(&evlist->overwrite_mmap); 141 - fdarray__exit(&evlist->core.pollfd); 141 + perf_evlist__exit(&evlist->core); 142 142 } 143 143 144 144 void evlist__delete(struct evlist *evlist) ··· 148 148 149 149 evlist__munmap(evlist); 150 150 evlist__close(evlist); 151 - perf_cpu_map__put(evlist->core.cpus); 152 - perf_thread_map__put(evlist->core.threads); 153 - evlist->core.cpus = NULL; 154 - evlist->core.threads = NULL; 155 151 evlist__purge(evlist); 156 152 evlist__exit(evlist); 157 153 free(evlist); ··· 180 184 list_del_init(&evsel->core.node); 181 185 evlist__add(evlist, evsel); 182 186 } 187 + } 188 + 189 + int __evlist__set_tracepoints_handlers(struct evlist *evlist, 190 + const struct evsel_str_handler *assocs, size_t nr_assocs) 191 + { 192 + struct evsel *evsel; 193 + size_t i; 194 + int err; 195 + 196 + for (i = 0; i < nr_assocs; i++) { 197 + // Adding a handler for an event not in this evlist, just ignore it. 198 + evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name); 199 + if (evsel == NULL) 200 + continue; 201 + 202 + err = -EEXIST; 203 + if (evsel->handler != NULL) 204 + goto out; 205 + evsel->handler = assocs[i].handler; 206 + } 207 + 208 + err = 0; 209 + out: 210 + return err; 183 211 } 184 212 185 213 void __perf_evlist__set_leader(struct list_head *list) ··· 423 403 return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); 424 404 } 425 405 426 - static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 427 - void *arg __maybe_unused) 428 - { 429 - struct mmap *map = fda->priv[fd].ptr; 430 - 431 - if (map) 432 - perf_mmap__put(map); 433 - } 434 - 435 406 int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) 436 407 { 437 - return fdarray__filter(&evlist->core.pollfd, revents_and_mask, 438 - perf_evlist__munmap_filtered, NULL); 408 + return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask); 439 409 } 440 410 441 411 int evlist__poll(struct evlist *evlist, int timeout) 442 412 { 443 413 return perf_evlist__poll(&evlist->core, timeout); 444 - } 445 - 446 - static void perf_evlist__set_sid_idx(struct evlist *evlist, 447 - struct evsel *evsel, int idx, int cpu, 448 - int thread) 449 - { 450 - struct perf_sample_id *sid = SID(evsel, cpu, thread); 451 - sid->idx = idx; 452 - if (evlist->core.cpus && cpu >= 0) 453 - sid->cpu = evlist->core.cpus->map[cpu]; 454 - else 455 - sid->cpu = -1; 456 - if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) 457 - sid->tid = perf_thread_map__pid(evlist->core.threads, thread); 458 - else 459 - sid->tid = -1; 460 414 } 461 415 462 416 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) ··· 571 577 572 578 if (evlist->mmap) 573 579 for (i = 0; i < evlist->core.nr_mmaps; i++) 574 - perf_mmap__munmap(&evlist->mmap[i]); 580 + perf_mmap__munmap(&evlist->mmap[i].core); 575 581 576 582 if (evlist->overwrite_mmap) 577 583 for (i = 0; i < evlist->core.nr_mmaps; i++) 578 - perf_mmap__munmap(&evlist->overwrite_mmap[i]); 584 + perf_mmap__munmap(&evlist->overwrite_mmap[i].core); 579 585 } 580 586 581 587 void evlist__munmap(struct evlist *evlist) ··· 583 589 evlist__munmap_nofree(evlist); 584 590 zfree(&evlist->mmap); 585 591 zfree(&evlist->overwrite_mmap); 592 + } 593 + 594 + static void perf_mmap__unmap_cb(struct perf_mmap *map) 595 + { 596 + struct mmap *m = container_of(map, struct mmap, core); 597 + 598 + mmap__munmap(m); 586 599 } 587 600 588 601 static struct mmap *evlist__alloc_mmap(struct evlist *evlist, ··· 606 605 return NULL; 607 606 608 607 for (i = 0; i < evlist->core.nr_mmaps; i++) { 609 - map[i].core.fd = -1; 610 - map[i].core.overwrite = overwrite; 611 608 /* 612 609 * When the perf_mmap() call is made we grab one refcount, plus 613 610 * one extra to let perf_mmap__consume() get the last ··· 615 616 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 616 617 * thus does perf_mmap__get() on it. 617 618 */ 618 - refcount_set(&map[i].core.refcnt, 0); 619 + perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb); 619 620 } 621 + 620 622 return map; 621 623 } 622 624 623 - static bool 624 - perf_evlist__should_poll(struct evlist *evlist __maybe_unused, 625 - struct evsel *evsel) 625 + static void 626 + perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, 627 + struct perf_mmap_param *_mp, 628 + int idx, bool per_cpu) 626 629 { 627 - if (evsel->core.attr.write_backward) 628 - return false; 629 - return true; 630 + struct evlist *evlist = container_of(_evlist, struct evlist, core); 631 + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); 632 + 633 + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); 630 634 } 631 635 632 - static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, 633 - struct mmap_params *mp, int cpu_idx, 634 - int thread, int *_output, int *_output_overwrite) 636 + static struct perf_mmap* 637 + perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) 635 638 { 636 - struct evsel *evsel; 637 - int revent; 638 - int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); 639 + struct evlist *evlist = container_of(_evlist, struct evlist, core); 640 + struct mmap *maps = evlist->mmap; 639 641 640 - evlist__for_each_entry(evlist, evsel) { 641 - struct mmap *maps = evlist->mmap; 642 - int *output = _output; 643 - int fd; 644 - int cpu; 642 + if (overwrite) { 643 + maps = evlist->overwrite_mmap; 645 644 646 - mp->prot = PROT_READ | PROT_WRITE; 647 - if (evsel->core.attr.write_backward) { 648 - output = _output_overwrite; 649 - maps = evlist->overwrite_mmap; 645 + if (!maps) { 646 + maps = evlist__alloc_mmap(evlist, true); 647 + if (!maps) 648 + return NULL; 650 649 651 - if (!maps) { 652 - maps = evlist__alloc_mmap(evlist, true); 653 - if (!maps) 654 - return -1; 655 - evlist->overwrite_mmap = maps; 656 - if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 657 - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 658 - } 659 - mp->prot &= ~PROT_WRITE; 660 - } 661 - 662 - if (evsel->core.system_wide && thread) 663 - continue; 664 - 665 - cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); 666 - if (cpu == -1) 667 - continue; 668 - 669 - fd = FD(evsel, cpu, thread); 670 - 671 - if (*output == -1) { 672 - *output = fd; 673 - 674 - if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 675 - return -1; 676 - } else { 677 - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 678 - return -1; 679 - 680 - perf_mmap__get(&maps[idx]); 681 - } 682 - 683 - revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 684 - 685 - /* 686 - * The system_wide flag causes a selected event to be opened 687 - * always without a pid. Consequently it will never get a 688 - * POLLHUP, but it is used for tracking in combination with 689 - * other events, so it should not need to be polled anyway. 690 - * Therefore don't add it for polling. 691 - */ 692 - if (!evsel->core.system_wide && 693 - perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { 694 - perf_mmap__put(&maps[idx]); 695 - return -1; 696 - } 697 - 698 - if (evsel->core.attr.read_format & PERF_FORMAT_ID) { 699 - if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, 700 - fd) < 0) 701 - return -1; 702 - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 703 - thread); 650 + evlist->overwrite_mmap = maps; 651 + if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 652 + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 704 653 } 705 654 } 706 655 707 - return 0; 656 + return &maps[idx].core; 708 657 } 709 658 710 - static int evlist__mmap_per_cpu(struct evlist *evlist, 711 - struct mmap_params *mp) 659 + static int 660 + perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, 661 + int output, int cpu) 712 662 { 713 - int cpu, thread; 714 - int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); 715 - int nr_threads = perf_thread_map__nr(evlist->core.threads); 663 + struct mmap *map = container_of(_map, struct mmap, core); 664 + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); 716 665 717 - pr_debug2("perf event ring buffer mmapped per cpu\n"); 718 - for (cpu = 0; cpu < nr_cpus; cpu++) { 719 - int output = -1; 720 - int output_overwrite = -1; 721 - 722 - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 723 - true); 724 - 725 - for (thread = 0; thread < nr_threads; thread++) { 726 - if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 727 - thread, &output, &output_overwrite)) 728 - goto out_unmap; 729 - } 730 - } 731 - 732 - return 0; 733 - 734 - out_unmap: 735 - evlist__munmap_nofree(evlist); 736 - return -1; 737 - } 738 - 739 - static int evlist__mmap_per_thread(struct evlist *evlist, 740 - struct mmap_params *mp) 741 - { 742 - int thread; 743 - int nr_threads = perf_thread_map__nr(evlist->core.threads); 744 - 745 - pr_debug2("perf event ring buffer mmapped per thread\n"); 746 - for (thread = 0; thread < nr_threads; thread++) { 747 - int output = -1; 748 - int output_overwrite = -1; 749 - 750 - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 751 - false); 752 - 753 - if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 754 - &output, &output_overwrite)) 755 - goto out_unmap; 756 - } 757 - 758 - return 0; 759 - 760 - out_unmap: 761 - evlist__munmap_nofree(evlist); 762 - return -1; 666 + return mmap__mmap(map, mp, output, cpu); 763 667 } 764 668 765 669 unsigned long perf_event_mlock_kb_in_pages(void) ··· 792 890 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 793 891 int comp_level) 794 892 { 795 - struct evsel *evsel; 796 - const struct perf_cpu_map *cpus = evlist->core.cpus; 797 - const struct perf_thread_map *threads = evlist->core.threads; 798 893 /* 799 894 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 800 895 * Its value is decided by evsel's write_backward. 801 896 * So &mp should not be passed through const pointer. 802 897 */ 803 - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 804 - .comp_level = comp_level }; 898 + struct mmap_params mp = { 899 + .nr_cblocks = nr_cblocks, 900 + .affinity = affinity, 901 + .flush = flush, 902 + .comp_level = comp_level 903 + }; 904 + struct perf_evlist_mmap_ops ops = { 905 + .idx = perf_evlist__mmap_cb_idx, 906 + .get = perf_evlist__mmap_cb_get, 907 + .mmap = perf_evlist__mmap_cb_mmap, 908 + }; 805 909 806 910 if (!evlist->mmap) 807 911 evlist->mmap = evlist__alloc_mmap(evlist, false); 808 912 if (!evlist->mmap) 809 913 return -ENOMEM; 810 914 811 - if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0) 812 - return -ENOMEM; 813 - 814 915 evlist->core.mmap_len = evlist__mmap_size(pages); 815 916 pr_debug("mmap size %zuB\n", evlist->core.mmap_len); 816 - mp.mask = evlist->core.mmap_len - page_size - 1; 917 + mp.core.mask = evlist->core.mmap_len - page_size - 1; 817 918 818 919 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, 819 920 auxtrace_pages, auxtrace_overwrite); 820 921 821 - evlist__for_each_entry(evlist, evsel) { 822 - if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && 823 - evsel->core.sample_id == NULL && 824 - perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0) 825 - return -ENOMEM; 826 - } 827 - 828 - if (perf_cpu_map__empty(cpus)) 829 - return evlist__mmap_per_thread(evlist, &mp); 830 - 831 - return evlist__mmap_per_cpu(evlist, &mp); 922 + return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core); 832 923 } 833 924 834 925 int evlist__mmap(struct evlist *evlist, unsigned int pages) ··· 924 1029 struct evsel *evsel; 925 1030 int err = 0; 926 1031 1032 + if (filter == NULL) 1033 + return -1; 1034 + 927 1035 evlist__for_each_entry(evlist, evsel) { 928 1036 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 929 1037 continue; ··· 939 1041 return err; 940 1042 } 941 1043 942 - int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1044 + int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) 1045 + { 1046 + struct evsel *evsel; 1047 + int err = 0; 1048 + 1049 + if (filter == NULL) 1050 + return -1; 1051 + 1052 + evlist__for_each_entry(evlist, evsel) { 1053 + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 1054 + continue; 1055 + 1056 + err = perf_evsel__append_tp_filter(evsel, filter); 1057 + if (err) 1058 + break; 1059 + } 1060 + 1061 + return err; 1062 + } 1063 + 1064 + static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) 943 1065 { 944 1066 char *filter; 945 - int ret = -1; 946 1067 size_t i; 947 1068 948 1069 for (i = 0; i < npids; ++i) { 949 1070 if (i == 0) { 950 1071 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 951 - return -1; 1072 + return NULL; 952 1073 } else { 953 1074 char *tmp; 954 1075 ··· 979 1062 } 980 1063 } 981 1064 982 - ret = perf_evlist__set_tp_filter(evlist, filter); 1065 + return filter; 983 1066 out_free: 1067 + free(filter); 1068 + return NULL; 1069 + } 1070 + 1071 + int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1072 + { 1073 + char *filter = asprintf__tp_filter_pids(npids, pids); 1074 + int ret = perf_evlist__set_tp_filter(evlist, filter); 1075 + 984 1076 free(filter); 985 1077 return ret; 986 1078 } ··· 997 1071 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) 998 1072 { 999 1073 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1074 + } 1075 + 1076 + int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1077 + { 1078 + char *filter = asprintf__tp_filter_pids(npids, pids); 1079 + int ret = perf_evlist__append_tp_filter(evlist, filter); 1080 + 1081 + free(filter); 1082 + return ret; 1083 + } 1084 + 1085 + int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) 1086 + { 1087 + return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); 1000 1088 } 1001 1089 1002 1090 bool perf_evlist__valid_sample_type(struct evlist *evlist) ··· 1669 1729 struct mmap *map = &evlist->mmap[i]; 1670 1730 union perf_event *event; 1671 1731 1672 - if (perf_mmap__read_init(map)) 1732 + if (perf_mmap__read_init(&map->core)) 1673 1733 continue; 1674 - while ((event = perf_mmap__read_event(map)) != NULL) { 1734 + while ((event = perf_mmap__read_event(&map->core)) != NULL) { 1675 1735 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1676 1736 1677 1737 if (evsel && evsel->side_band.cb) ··· 1679 1739 else 1680 1740 pr_warning("cannot locate proper evsel for the side band event\n"); 1681 1741 1682 - perf_mmap__consume(map); 1742 + perf_mmap__consume(&map->core); 1683 1743 got_data = true; 1684 1744 } 1685 - perf_mmap__read_done(map); 1745 + perf_mmap__read_done(&map->core); 1686 1746 } 1687 1747 1688 1748 if (draining && !got_data)
+12
tools/perf/util/evlist.h
··· 118 118 int perf_evlist__add_newtp(struct evlist *evlist, 119 119 const char *sys, const char *name, void *handler); 120 120 121 + int __evlist__set_tracepoints_handlers(struct evlist *evlist, 122 + const struct evsel_str_handler *assocs, 123 + size_t nr_assocs); 124 + 125 + #define evlist__set_tracepoints_handlers(evlist, array) \ 126 + __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) 127 + 121 128 void __perf_evlist__set_sample_bit(struct evlist *evlist, 122 129 enum perf_event_sample_format bit); 123 130 void __perf_evlist__reset_sample_bit(struct evlist *evlist, ··· 139 132 int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); 140 133 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); 141 134 int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); 135 + 136 + int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); 137 + 138 + int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); 139 + int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); 142 140 143 141 struct evsel * 144 142 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
+15 -245
tools/perf/util/mmap.c
··· 13 13 #include <stdlib.h> 14 14 #include <string.h> 15 15 #include <unistd.h> // sysconf() 16 + #include <perf/mmap.h> 16 17 #ifdef HAVE_LIBNUMA_SUPPORT 17 18 #include <numaif.h> 18 19 #endif ··· 24 23 #include "../perf.h" 25 24 #include <internal/lib.h> /* page_size */ 26 25 27 - size_t perf_mmap__mmap_len(struct mmap *map) 26 + size_t mmap__mmap_len(struct mmap *map) 28 27 { 29 - return map->core.mask + 1 + page_size; 30 - } 31 - 32 - /* When check_messup is true, 'end' must points to a good entry */ 33 - static union perf_event *perf_mmap__read(struct mmap *map, 34 - u64 *startp, u64 end) 35 - { 36 - unsigned char *data = map->core.base + page_size; 37 - union perf_event *event = NULL; 38 - int diff = end - *startp; 39 - 40 - if (diff >= (int)sizeof(event->header)) { 41 - size_t size; 42 - 43 - event = (union perf_event *)&data[*startp & map->core.mask]; 44 - size = event->header.size; 45 - 46 - if (size < sizeof(event->header) || diff < (int)size) 47 - return NULL; 48 - 49 - /* 50 - * Event straddles the mmap boundary -- header should always 51 - * be inside due to u64 alignment of output. 52 - */ 53 - if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { 54 - unsigned int offset = *startp; 55 - unsigned int len = min(sizeof(*event), size), cpy; 56 - void *dst = map->core.event_copy; 57 - 58 - do { 59 - cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); 60 - memcpy(dst, &data[offset & map->core.mask], cpy); 61 - offset += cpy; 62 - dst += cpy; 63 - len -= cpy; 64 - } while (len); 65 - 66 - event = (union perf_event *)map->core.event_copy; 67 - } 68 - 69 - *startp += size; 70 - } 71 - 72 - return event; 73 - } 74 - 75 - /* 76 - * Read event from ring buffer one by one. 77 - * Return one event for each call. 78 - * 79 - * Usage: 80 - * perf_mmap__read_init() 81 - * while(event = perf_mmap__read_event()) { 82 - * //process the event 83 - * perf_mmap__consume() 84 - * } 85 - * perf_mmap__read_done() 86 - */ 87 - union perf_event *perf_mmap__read_event(struct mmap *map) 88 - { 89 - union perf_event *event; 90 - 91 - /* 92 - * Check if event was unmapped due to a POLLHUP/POLLERR. 93 - */ 94 - if (!refcount_read(&map->core.refcnt)) 95 - return NULL; 96 - 97 - /* non-overwirte doesn't pause the ringbuffer */ 98 - if (!map->core.overwrite) 99 - map->core.end = perf_mmap__read_head(map); 100 - 101 - event = perf_mmap__read(map, &map->core.start, map->core.end); 102 - 103 - if (!map->core.overwrite) 104 - map->core.prev = map->core.start; 105 - 106 - return event; 107 - } 108 - 109 - static bool perf_mmap__empty(struct mmap *map) 110 - { 111 - return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; 112 - } 113 - 114 - void perf_mmap__get(struct mmap *map) 115 - { 116 - refcount_inc(&map->core.refcnt); 117 - } 118 - 119 - void perf_mmap__put(struct mmap *map) 120 - { 121 - BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); 122 - 123 - if (refcount_dec_and_test(&map->core.refcnt)) 124 - perf_mmap__munmap(map); 125 - } 126 - 127 - void perf_mmap__consume(struct mmap *map) 128 - { 129 - if (!map->core.overwrite) { 130 - u64 old = map->core.prev; 131 - 132 - perf_mmap__write_tail(map, old); 133 - } 134 - 135 - if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) 136 - perf_mmap__put(map); 28 + return perf_mmap__mmap_len(&map->core); 137 29 } 138 30 139 31 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, ··· 64 170 #ifdef HAVE_LIBNUMA_SUPPORT 65 171 static int perf_mmap__aio_alloc(struct mmap *map, int idx) 66 172 { 67 - map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, 173 + map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, 68 174 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); 69 175 if (map->aio.data[idx] == MAP_FAILED) { 70 176 map->aio.data[idx] = NULL; ··· 77 183 static void perf_mmap__aio_free(struct mmap *map, int idx) 78 184 { 79 185 if (map->aio.data[idx]) { 80 - munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); 186 + munmap(map->aio.data[idx], mmap__mmap_len(map)); 81 187 map->aio.data[idx] = NULL; 82 188 } 83 189 } ··· 90 196 91 197 if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { 92 198 data = map->aio.data[idx]; 93 - mmap_len = perf_mmap__mmap_len(map); 199 + mmap_len = mmap__mmap_len(map); 94 200 node_mask = 1UL << cpu__get_node(cpu); 95 201 if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { 96 202 pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", ··· 104 210 #else /* !HAVE_LIBNUMA_SUPPORT */ 105 211 static int perf_mmap__aio_alloc(struct mmap *map, int idx) 106 212 { 107 - map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); 213 + map->aio.data[idx] = malloc(mmap__mmap_len(map)); 108 214 if (map->aio.data[idx] == NULL) 109 215 return -1; 110 216 ··· 205 311 } 206 312 #endif 207 313 208 - void perf_mmap__munmap(struct mmap *map) 314 + void mmap__munmap(struct mmap *map) 209 315 { 210 316 perf_mmap__aio_munmap(map); 211 317 if (map->data != NULL) { 212 - munmap(map->data, perf_mmap__mmap_len(map)); 318 + munmap(map->data, mmap__mmap_len(map)); 213 319 map->data = NULL; 214 - } 215 - if (map->core.base != NULL) { 216 - munmap(map->core.base, perf_mmap__mmap_len(map)); 217 - map->core.base = NULL; 218 - map->core.fd = -1; 219 - refcount_set(&map->core.refcnt, 0); 220 320 } 221 321 auxtrace_mmap__munmap(&map->auxtrace_mmap); 222 322 } ··· 241 353 CPU_SET(map->core.cpu, &map->affinity_mask); 242 354 } 243 355 244 - int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) 356 + int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) 245 357 { 246 - /* 247 - * The last one will be done at perf_mmap__consume(), so that we 248 - * make sure we don't prevent tools from consuming every last event in 249 - * the ring buffer. 250 - * 251 - * I.e. we can get the POLLHUP meaning that the fd doesn't exist 252 - * anymore, but the last events for it are still in the ring buffer, 253 - * waiting to be consumed. 254 - * 255 - * Tools can chose to ignore this at their own discretion, but the 256 - * evlist layer can't just drop it when filtering events in 257 - * perf_evlist__filter_pollfd(). 258 - */ 259 - refcount_set(&map->core.refcnt, 2); 260 - map->core.prev = 0; 261 - map->core.mask = mp->mask; 262 - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 263 - MAP_SHARED, fd, 0); 264 - if (map->core.base == MAP_FAILED) { 358 + if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { 265 359 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 266 360 errno); 267 - map->core.base = NULL; 268 361 return -1; 269 362 } 270 - map->core.fd = fd; 271 - map->core.cpu = cpu; 272 363 273 364 perf_mmap__setup_affinity_mask(map, mp); 274 365 ··· 256 389 map->comp_level = mp->comp_level; 257 390 258 391 if (map->comp_level && !perf_mmap__aio_enabled(map)) { 259 - map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, 392 + map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, 260 393 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); 261 394 if (map->data == MAP_FAILED) { 262 395 pr_debug2("failed to mmap data buffer, error %d\n", ··· 273 406 return perf_mmap__aio_mmap(map, mp); 274 407 } 275 408 276 - static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) 277 - { 278 - struct perf_event_header *pheader; 279 - u64 evt_head = *start; 280 - int size = mask + 1; 281 - 282 - pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); 283 - pheader = (struct perf_event_header *)(buf + (*start & mask)); 284 - while (true) { 285 - if (evt_head - *start >= (unsigned int)size) { 286 - pr_debug("Finished reading overwrite ring buffer: rewind\n"); 287 - if (evt_head - *start > (unsigned int)size) 288 - evt_head -= pheader->size; 289 - *end = evt_head; 290 - return 0; 291 - } 292 - 293 - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 294 - 295 - if (pheader->size == 0) { 296 - pr_debug("Finished reading overwrite ring buffer: get start\n"); 297 - *end = evt_head; 298 - return 0; 299 - } 300 - 301 - evt_head += pheader->size; 302 - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 303 - } 304 - WARN_ONCE(1, "Shouldn't get here\n"); 305 - return -1; 306 - } 307 - 308 - /* 309 - * Report the start and end of the available data in ringbuffer 310 - */ 311 - static int __perf_mmap__read_init(struct mmap *md) 312 - { 313 - u64 head = perf_mmap__read_head(md); 314 - u64 old = md->core.prev; 315 - unsigned char *data = md->core.base + page_size; 316 - unsigned long size; 317 - 318 - md->core.start = md->core.overwrite ? head : old; 319 - md->core.end = md->core.overwrite ? old : head; 320 - 321 - if ((md->core.end - md->core.start) < md->core.flush) 322 - return -EAGAIN; 323 - 324 - size = md->core.end - md->core.start; 325 - if (size > (unsigned long)(md->core.mask) + 1) { 326 - if (!md->core.overwrite) { 327 - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 328 - 329 - md->core.prev = head; 330 - perf_mmap__consume(md); 331 - return -EAGAIN; 332 - } 333 - 334 - /* 335 - * Backward ring buffer is full. We still have a chance to read 336 - * most of data from it. 337 - */ 338 - if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) 339 - return -EINVAL; 340 - } 341 - 342 - return 0; 343 - } 344 - 345 - int perf_mmap__read_init(struct mmap *map) 346 - { 347 - /* 348 - * Check if event was unmapped due to a POLLHUP/POLLERR. 349 - */ 350 - if (!refcount_read(&map->core.refcnt)) 351 - return -ENOENT; 352 - 353 - return __perf_mmap__read_init(map); 354 - } 355 - 356 409 int perf_mmap__push(struct mmap *md, void *to, 357 410 int push(struct mmap *map, void *to, void *buf, size_t size)) 358 411 { 359 - u64 head = perf_mmap__read_head(md); 412 + u64 head = perf_mmap__read_head(&md->core); 360 413 unsigned char *data = md->core.base + page_size; 361 414 unsigned long size; 362 415 void *buf; 363 416 int rc = 0; 364 417 365 - rc = perf_mmap__read_init(md); 418 + rc = perf_mmap__read_init(&md->core); 366 419 if (rc < 0) 367 420 return (rc == -EAGAIN) ? 1 : -1; 368 421 ··· 309 522 } 310 523 311 524 md->core.prev = head; 312 - perf_mmap__consume(md); 525 + perf_mmap__consume(&md->core); 313 526 out: 314 527 return rc; 315 - } 316 - 317 - /* 318 - * Mandatory for overwrite mode 319 - * The direction of overwrite mode is backward. 320 - * The last perf_mmap__read() will set tail to map->core.prev. 321 - * Need to correct the map->core.prev to head which is the end of next read. 322 - */ 323 - void perf_mmap__read_done(struct mmap *map) 324 - { 325 - /* 326 - * Check if event was unmapped due to a POLLHUP/POLLERR. 327 - */ 328 - if (!refcount_read(&map->core.refcnt)) 329 - return; 330 - 331 - map->core.prev = perf_mmap__read_head(map); 332 528 }
+5 -23
tools/perf/util/mmap.h
··· 37 37 }; 38 38 39 39 struct mmap_params { 40 - int prot, mask, nr_cblocks, affinity, flush, comp_level; 40 + struct perf_mmap_param core; 41 + int nr_cblocks, affinity, flush, comp_level; 41 42 struct auxtrace_mmap_params auxtrace_mp; 42 43 }; 43 44 44 - int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); 45 - void perf_mmap__munmap(struct mmap *map); 46 - 47 - void perf_mmap__get(struct mmap *map); 48 - void perf_mmap__put(struct mmap *map); 49 - 50 - void perf_mmap__consume(struct mmap *map); 51 - 52 - static inline u64 perf_mmap__read_head(struct mmap *mm) 53 - { 54 - return ring_buffer_read_head(mm->core.base); 55 - } 56 - 57 - static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) 58 - { 59 - ring_buffer_write_tail(md->core.base, tail); 60 - } 45 + int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); 46 + void mmap__munmap(struct mmap *map); 61 47 62 48 union perf_event *perf_mmap__read_forward(struct mmap *map); 63 - 64 - union perf_event *perf_mmap__read_event(struct mmap *map); 65 49 66 50 int perf_mmap__push(struct mmap *md, void *to, 67 51 int push(struct mmap *map, void *to, void *buf, size_t size)); 68 52 69 - size_t perf_mmap__mmap_len(struct mmap *map); 53 + size_t mmap__mmap_len(struct mmap *map); 70 54 71 - int perf_mmap__read_init(struct mmap *md); 72 - void perf_mmap__read_done(struct mmap *map); 73 55 #endif /*__PERF_MMAP_H */
+6 -2
tools/perf/util/parse-regs-options.c
··· 13 13 __parse_regs(const struct option *opt, const char *str, int unset, bool intr) 14 14 { 15 15 uint64_t *mode = (uint64_t *)opt->value; 16 - const struct sample_reg *r; 16 + const struct sample_reg *r = NULL; 17 17 char *s, *os = NULL, *p; 18 18 int ret = -1; 19 19 uint64_t mask; ··· 46 46 47 47 if (!strcmp(s, "?")) { 48 48 fprintf(stderr, "available registers: "); 49 + #ifdef HAVE_PERF_REGS_SUPPORT 49 50 for (r = sample_reg_masks; r->name; r++) { 50 51 if (r->mask & mask) 51 52 fprintf(stderr, "%s ", r->name); 52 53 } 54 + #endif 53 55 fputc('\n', stderr); 54 56 /* just printing available regs */ 55 57 return -1; 56 58 } 59 + #ifdef HAVE_PERF_REGS_SUPPORT 57 60 for (r = sample_reg_masks; r->name; r++) { 58 61 if ((r->mask & mask) && !strcasecmp(s, r->name)) 59 62 break; 60 63 } 61 - if (!r->name) { 64 + #endif 65 + if (!r || !r->name) { 62 66 ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", 63 67 s, intr ? "-I" : "--user-regs="); 64 68 goto error;
-4
tools/perf/util/perf_regs.c
··· 3 3 #include "perf_regs.h" 4 4 #include "event.h" 5 5 6 - const struct sample_reg __weak sample_reg_masks[] = { 7 - SMPL_REG_END 8 - }; 9 - 10 6 int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, 11 7 char **new_op __maybe_unused) 12 8 {
+2 -2
tools/perf/util/perf_regs.h
··· 15 15 #define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } 16 16 #define SMPL_REG_END { .name = NULL } 17 17 18 - extern const struct sample_reg sample_reg_masks[]; 19 - 20 18 enum { 21 19 SDT_ARG_VALID = 0, 22 20 SDT_ARG_SKIP, ··· 25 27 uint64_t arch__user_reg_mask(void); 26 28 27 29 #ifdef HAVE_PERF_REGS_SUPPORT 30 + extern const struct sample_reg sample_reg_masks[]; 31 + 28 32 #include <perf_regs.h> 29 33 30 34 #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
+4 -3
tools/perf/util/python.c
··· 6 6 #include <linux/err.h> 7 7 #include <perf/cpumap.h> 8 8 #include <traceevent/event-parse.h> 9 + #include <perf/mmap.h> 9 10 #include "evlist.h" 10 11 #include "callchain.h" 11 12 #include "evsel.h" ··· 1023 1022 if (!md) 1024 1023 return NULL; 1025 1024 1026 - if (perf_mmap__read_init(md) < 0) 1025 + if (perf_mmap__read_init(&md->core) < 0) 1027 1026 goto end; 1028 1027 1029 - event = perf_mmap__read_event(md); 1028 + event = perf_mmap__read_event(&md->core); 1030 1029 if (event != NULL) { 1031 1030 PyObject *pyevent = pyrf_event__new(event); 1032 1031 struct pyrf_event *pevent = (struct pyrf_event *)pyevent; ··· 1046 1045 err = perf_evsel__parse_sample(evsel, event, &pevent->sample); 1047 1046 1048 1047 /* Consume the even only after we parsed it out. */ 1049 - perf_mmap__consume(md); 1048 + perf_mmap__consume(&md->core); 1050 1049 1051 1050 if (err) 1052 1051 return PyErr_Format(PyExc_OSError,
-29
tools/perf/util/session.c
··· 2355 2355 fprintf(fp, "# ========\n#\n"); 2356 2356 } 2357 2357 2358 - 2359 - int __perf_session__set_tracepoints_handlers(struct perf_session *session, 2360 - const struct evsel_str_handler *assocs, 2361 - size_t nr_assocs) 2362 - { 2363 - struct evsel *evsel; 2364 - size_t i; 2365 - int err; 2366 - 2367 - for (i = 0; i < nr_assocs; i++) { 2368 - /* 2369 - * Adding a handler for an event not in the session, 2370 - * just ignore it. 2371 - */ 2372 - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); 2373 - if (evsel == NULL) 2374 - continue; 2375 - 2376 - err = -EEXIST; 2377 - if (evsel->handler != NULL) 2378 - goto out; 2379 - evsel->handler = assocs[i].handler; 2380 - } 2381 - 2382 - err = 0; 2383 - out: 2384 - return err; 2385 - } 2386 - 2387 2358 int perf_event__process_id_index(struct perf_session *session, 2388 2359 union perf_event *event) 2389 2360 {
+1 -5
tools/perf/util/session.h
··· 120 120 121 121 struct evsel_str_handler; 122 122 123 - int __perf_session__set_tracepoints_handlers(struct perf_session *session, 124 - const struct evsel_str_handler *assocs, 125 - size_t nr_assocs); 126 - 127 123 #define perf_session__set_tracepoints_handlers(session, array) \ 128 - __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) 124 + __evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array)) 129 125 130 126 extern volatile int session_done; 131 127
+4
tools/perf/util/sort.h
··· 10 10 #include "callchain.h" 11 11 #include "values.h" 12 12 #include "hist.h" 13 + #include "stat.h" 14 + #include "spark.h" 13 15 14 16 struct option; 15 17 struct thread; ··· 73 71 /* PERF_HPP_DIFF__CYCLES */ 74 72 s64 cycles; 75 73 }; 74 + struct stats stats; 75 + unsigned long svals[NUM_SPARKS]; 76 76 }; 77 77 78 78 struct hist_entry_ops {
+34
tools/perf/util/spark.c
··· 1 + #include <stdio.h> 2 + #include <limits.h> 3 + #include <string.h> 4 + #include <stdlib.h> 5 + #include "spark.h" 6 + #include "stat.h" 7 + 8 + #define SPARK_SHIFT 8 9 + 10 + /* Print spark lines on outf for numval values in val. */ 11 + int print_spark(char *bf, int size, unsigned long *val, int numval) 12 + { 13 + static const char *ticks[NUM_SPARKS] = { 14 + "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█" 15 + }; 16 + int i, printed = 0; 17 + unsigned long min = ULONG_MAX, max = 0, f; 18 + 19 + for (i = 0; i < numval; i++) { 20 + if (val[i] < min) 21 + min = val[i]; 22 + if (val[i] > max) 23 + max = val[i]; 24 + } 25 + f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1); 26 + if (f < 1) 27 + f = 1; 28 + for (i = 0; i < numval; i++) { 29 + printed += scnprintf(bf + printed, size - printed, "%s", 30 + ticks[((val[i] - min) << SPARK_SHIFT) / f]); 31 + } 32 + 33 + return printed; 34 + }
+8
tools/perf/util/spark.h
··· 1 + #ifndef SPARK_H 2 + #define SPARK_H 1 3 + 4 + #define NUM_SPARKS 8 5 + 6 + int print_spark(char *bf, int size, unsigned long *val, int numval); 7 + 8 + #endif
+2
tools/perf/util/symbol.h
··· 11 11 #include <stdio.h> 12 12 #include "path.h" 13 13 #include "symbol_conf.h" 14 + #include "spark.h" 14 15 15 16 #ifdef HAVE_LIBELF_SUPPORT 16 17 #include <libelf.h> ··· 112 111 u64 end; 113 112 u64 cycles; 114 113 u64 cycles_aggr; 114 + s64 cycles_spark[NUM_SPARKS]; 115 115 int num; 116 116 int num_aggr; 117 117 refcount_t refcnt;