···685685{686686 int err;687687688688+ /* does not support taken branch sampling */689689+ if (has_branch_stack(event))690690+ return -EOPNOTSUPP;691691+688692 switch (event->attr.type) {689693 case PERF_TYPE_RAW:690694 case PERF_TYPE_HARDWARE:
+4
arch/arm/kernel/perf_event.c
···539539 int err = 0;540540 atomic_t *active_events = &armpmu->active_events;541541542542+ /* does not support taken branch sampling */543543+ if (has_branch_stack(event))544544+ return -EOPNOTSUPP;545545+542546 if (armpmu->map_event(event) == -ENOENT)543547 return -ENOENT;544548
+4
arch/mips/kernel/perf_event_mipsxx.c
···606606{607607 int err = 0;608608609609+ /* does not support taken branch sampling */610610+ if (has_branch_stack(event))611611+ return -EOPNOTSUPP;612612+609613 switch (event->attr.type) {610614 case PERF_TYPE_RAW:611615 case PERF_TYPE_HARDWARE:
+4
arch/powerpc/kernel/perf_event.c
···10841084 if (!ppmu)10851085 return -ENOENT;1086108610871087+ /* does not support taken branch sampling */10881088+ if (has_branch_stack(event))10891089+ return -EOPNOTSUPP;10901090+10871091 switch (event->attr.type) {10881092 case PERF_TYPE_HARDWARE:10891093 ev = event->attr.config;
+4
arch/sh/kernel/perf_event.c
···310310{311311 int err;312312313313+ /* does not support taken branch sampling */314314+ if (has_branch_stack(event))315315+ return -EOPNOTSUPP;316316+313317 switch (event->attr.type) {314318 case PERF_TYPE_RAW:315319 case PERF_TYPE_HW_CACHE:
+4
arch/sparc/kernel/perf_event.c
···11051105 if (atomic_read(&nmi_active) < 0)11061106 return -ENODEV;1107110711081108+ /* does not support taken branch sampling */11091109+ if (has_branch_stack(event))11101110+ return -EOPNOTSUPP;11111111+11081112 switch (attr->type) {11091113 case PERF_TYPE_HARDWARE:11101114 if (attr->config >= sparc_pmu->max_events)
···139139 if (ret)140140 return ret;141141142142+ if (has_branch_stack(event))143143+ return -EOPNOTSUPP;144144+142145 if (event->attr.exclude_host && event->attr.exclude_guest)143146 /*144147 * When HO == GO == 1 the hardware treats that as GO == HO == 0
+92-26
arch/x86/kernel/cpu/perf_event_intel.c
···728728 },729729};730730731731+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)732732+{733733+ /* user explicitly requested branch sampling */734734+ if (has_branch_stack(event))735735+ return true;736736+737737+ /* implicit branch sampling to correct PEBS skid */738738+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)739739+ return true;740740+741741+ return false;742742+}743743+731744static void intel_pmu_disable_all(void)732745{733746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);···895882 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);896883 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);897884885885+ /*886886+ * must disable before any actual event887887+ * because any event may be combined with LBR888888+ */889889+ if (intel_pmu_needs_lbr_smpl(event))890890+ intel_pmu_lbr_disable(event);891891+898892 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {899893 intel_pmu_disable_fixed(hwc);900894 return;···956936 intel_pmu_enable_bts(hwc->config);957937 return;958938 }939939+ /*940940+ * must enabled before any actual event941941+ * because any event may be combined with LBR942942+ */943943+ if (intel_pmu_needs_lbr_smpl(event))944944+ intel_pmu_lbr_enable(event);959945960946 if (event->attr.exclude_host)961947 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);···1084105810851059 data.period = event->hw.last_period;1086106010611061+ if (has_branch_stack(event))10621062+ data.br_stack = &cpuc->lbr_stack;10631063+10871064 if (perf_event_overflow(event, &data, regs))10881065 x86_pmu_stop(event, 0);10891066 }···11531124 */11541125static struct event_constraint *11551126__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,11561156- struct perf_event *event)11271127+ struct perf_event *event,11281128+ struct hw_perf_event_extra *reg)11571129{11581130 struct event_constraint *c = &emptyconstraint;11591159- struct hw_perf_event_extra *reg = &event->hw.extra_reg;11601131 struct er_account *era;11611132 unsigned long flags;11621133 int orig_idx = reg->idx;1163113411641135 /* already allocated shared msr */11651136 if (reg->alloc)11661166- return &unconstrained;11371137+ return NULL; /* call x86_get_event_constraint() */1167113811681139again:11691140 era = &cpuc->shared_regs->regs[reg->idx];···11861157 reg->alloc = 1;1187115811881159 /*11891189- * All events using extra_reg are unconstrained.11901190- * Avoids calling x86_get_event_constraints()11911191- *11921192- * Must revisit if extra_reg controlling events11931193- * ever have constraints. Worst case we go through11941194- * the regular event constraint table.11601160+ * need to call x86_get_event_constraint()11611161+ * to check if associated event has constraints11951162 */11961196- c = &unconstrained;11631163+ c = NULL;11971164 } else if (intel_try_alt_er(event, orig_idx)) {11981165 raw_spin_unlock_irqrestore(&era->lock, flags);11991166 goto again;···12261201intel_shared_regs_constraints(struct cpu_hw_events *cpuc,12271202 struct perf_event *event)12281203{12291229- struct event_constraint *c = NULL;12041204+ struct event_constraint *c = NULL, *d;12051205+ struct hw_perf_event_extra *xreg, *breg;1230120612311231- if (event->hw.extra_reg.idx != EXTRA_REG_NONE)12321232- c = __intel_shared_reg_get_constraints(cpuc, event);12331233-12071207+ xreg = &event->hw.extra_reg;12081208+ if (xreg->idx != EXTRA_REG_NONE) {12091209+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);12101210+ if (c == &emptyconstraint)12111211+ return c;12121212+ }12131213+ breg = &event->hw.branch_reg;12141214+ if (breg->idx != EXTRA_REG_NONE) {12151215+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);12161216+ if (d == &emptyconstraint) {12171217+ __intel_shared_reg_put_constraints(cpuc, xreg);12181218+ c = d;12191219+ }12201220+ }12341221 return c;12351222}12361223···12901253 reg = &event->hw.extra_reg;12911254 if (reg->idx != EXTRA_REG_NONE)12921255 __intel_shared_reg_put_constraints(cpuc, reg);12561256+12571257+ reg = &event->hw.branch_reg;12581258+ if (reg->idx != EXTRA_REG_NONE)12591259+ __intel_shared_reg_put_constraints(cpuc, reg);12931260}1294126112951262static void intel_put_event_constraints(struct cpu_hw_events *cpuc,···1334129313351294 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);13361295 event->hw.config = alt_config;12961296+ }12971297+12981298+ if (intel_pmu_needs_lbr_smpl(event)) {12991299+ ret = intel_pmu_setup_lbr_filter(event);13001300+ if (ret)13011301+ return ret;13371302 }1338130313391304 if (event->attr.type != PERF_TYPE_RAW)···14801433{14811434 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);1482143514831483- if (!x86_pmu.extra_regs)14361436+ if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))14841437 return NOTIFY_OK;1485143814861439 cpuc->shared_regs = allocate_shared_regs(cpu);···15021455 */15031456 intel_pmu_lbr_reset();1504145715051505- if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))14581458+ cpuc->lbr_sel = NULL;14591459+14601460+ if (!cpuc->shared_regs)15061461 return;1507146215081508- for_each_cpu(i, topology_thread_cpumask(cpu)) {15091509- struct intel_shared_regs *pc;14631463+ if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {14641464+ for_each_cpu(i, topology_thread_cpumask(cpu)) {14651465+ struct intel_shared_regs *pc;1510146615111511- pc = per_cpu(cpu_hw_events, i).shared_regs;15121512- if (pc && pc->core_id == core_id) {15131513- cpuc->kfree_on_online = cpuc->shared_regs;15141514- cpuc->shared_regs = pc;15151515- break;14671467+ pc = per_cpu(cpu_hw_events, i).shared_regs;14681468+ if (pc && pc->core_id == core_id) {14691469+ cpuc->kfree_on_online = cpuc->shared_regs;14701470+ cpuc->shared_regs = pc;14711471+ break;14721472+ }15161473 }14741474+ cpuc->shared_regs->core_id = core_id;14751475+ cpuc->shared_regs->refcnt++;15171476 }1518147715191519- cpuc->shared_regs->core_id = core_id;15201520- cpuc->shared_regs->refcnt++;14781478+ if (x86_pmu.lbr_sel_map)14791479+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];15211480}1522148115231482static void intel_pmu_cpu_dying(int cpu)···15391486 }1540148715411488 fini_debug_store_on_cpu(cpu);14891489+}14901490+14911491+static void intel_pmu_flush_branch_stack(void)14921492+{14931493+ /*14941494+ * Intel LBR does not tag entries with the14951495+ * PID of the current task, then we need to14961496+ * flush it on ctxsw14971497+ * For now, we simply reset it14981498+ */14991499+ if (x86_pmu.lbr_nr)15001500+ intel_pmu_lbr_reset();15421501}1543150215441503static __initconst const struct x86_pmu intel_pmu = {···15801515 .cpu_starting = intel_pmu_cpu_starting,15811516 .cpu_dying = intel_pmu_cpu_dying,15821517 .guest_get_msrs = intel_guest_get_msrs,15181518+ .flush_branch_stack = intel_pmu_flush_branch_stack,15831519};1584152015851521static __init void intel_clovertown_quirk(void)···18111745 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,18121746 sizeof(hw_cache_event_ids));1813174718141814- intel_pmu_lbr_init_nhm();17481748+ intel_pmu_lbr_init_snb();1815174918161750 x86_pmu.event_constraints = intel_snb_event_constraints;18171751 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+5-17
arch/x86/kernel/cpu/perf_event_intel_ds.c
···33#include <linux/slab.h>4455#include <asm/perf_event.h>66+#include <asm/insn.h>6778#include "perf_event.h"89···440439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;441440442441 cpuc->pebs_enabled |= 1ULL << hwc->idx;443443-444444- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)445445- intel_pmu_lbr_enable(event);446442}447443448444void intel_pmu_pebs_disable(struct perf_event *event)···452454 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);453455454456 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;455455-456456- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)457457- intel_pmu_lbr_disable(event);458457}459458460459void intel_pmu_pebs_enable_all(void)···468473469474 if (cpuc->pebs_enabled)470475 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);471471-}472472-473473-#include <asm/insn.h>474474-475475-static inline bool kernel_ip(unsigned long ip)476476-{477477-#ifdef CONFIG_X86_32478478- return ip > PAGE_OFFSET;479479-#else480480- return (long)ip < 0;481481-#endif482476}483477484478static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)···556572 * both formats and we don't use the other fields in this557573 * routine.558574 */575575+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);559576 struct pebs_record_core *pebs = __pebs;560577 struct perf_sample_data data;561578 struct pt_regs regs;···586601 regs.flags |= PERF_EFLAGS_EXACT;587602 else588603 regs.flags &= ~PERF_EFLAGS_EXACT;604604+605605+ if (has_branch_stack(event))606606+ data.br_stack = &cpuc->lbr_stack;589607590608 if (perf_event_overflow(event, &data, ®s))591609 x86_pmu_stop(event, 0);
+505-21
arch/x86/kernel/cpu/perf_event_intel_lbr.c
···3344#include <asm/perf_event.h>55#include <asm/msr.h>66+#include <asm/insn.h>6778#include "perf_event.h"89···1514};16151716/*1717+ * Intel LBR_SELECT bits1818+ * Intel Vol3a, April 2011, Section 16.7 Table 16-101919+ *2020+ * Hardware branch filter (not available on all CPUs)2121+ */2222+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */2323+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */2424+#define LBR_JCC_BIT 2 /* do not capture conditional branches */2525+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */2626+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */2727+#define LBR_RETURN_BIT 5 /* do not capture near returns */2828+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */2929+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */3030+#define LBR_FAR_BIT 8 /* do not capture far branches */3131+3232+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)3333+#define LBR_USER (1 << LBR_USER_BIT)3434+#define LBR_JCC (1 << LBR_JCC_BIT)3535+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)3636+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)3737+#define LBR_RETURN (1 << LBR_RETURN_BIT)3838+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)3939+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)4040+#define LBR_FAR (1 << LBR_FAR_BIT)4141+4242+#define LBR_PLM (LBR_KERNEL | LBR_USER)4343+4444+#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */4545+#define LBR_NOT_SUPP -1 /* LBR filter not supported */4646+#define LBR_IGN 0 /* ignored */4747+4848+#define LBR_ANY \4949+ (LBR_JCC |\5050+ LBR_REL_CALL |\5151+ LBR_IND_CALL |\5252+ LBR_RETURN |\5353+ LBR_REL_JMP |\5454+ LBR_IND_JMP |\5555+ LBR_FAR)5656+5757+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)5858+5959+#define for_each_branch_sample_type(x) \6060+ for ((x) = PERF_SAMPLE_BRANCH_USER; \6161+ (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)6262+6363+/*6464+ * x86control flow change classification6565+ * x86control flow changes include branches, interrupts, traps, faults6666+ */6767+enum {6868+ X86_BR_NONE = 0, /* unknown */6969+7070+ X86_BR_USER = 1 << 0, /* branch target is user */7171+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */7272+7373+ X86_BR_CALL = 1 << 2, /* call */7474+ X86_BR_RET = 1 << 3, /* return */7575+ X86_BR_SYSCALL = 1 << 4, /* syscall */7676+ X86_BR_SYSRET = 1 << 5, /* syscall return */7777+ X86_BR_INT = 1 << 6, /* sw interrupt */7878+ X86_BR_IRET = 1 << 7, /* return from interrupt */7979+ X86_BR_JCC = 1 << 8, /* conditional */8080+ X86_BR_JMP = 1 << 9, /* jump */8181+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */8282+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */8383+};8484+8585+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)8686+8787+#define X86_BR_ANY \8888+ (X86_BR_CALL |\8989+ X86_BR_RET |\9090+ X86_BR_SYSCALL |\9191+ X86_BR_SYSRET |\9292+ X86_BR_INT |\9393+ X86_BR_IRET |\9494+ X86_BR_JCC |\9595+ X86_BR_JMP |\9696+ X86_BR_IRQ |\9797+ X86_BR_IND_CALL)9898+9999+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)100100+101101+#define X86_BR_ANY_CALL \102102+ (X86_BR_CALL |\103103+ X86_BR_IND_CALL |\104104+ X86_BR_SYSCALL |\105105+ X86_BR_IRQ |\106106+ X86_BR_INT)107107+108108+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);109109+110110+/*18111 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI19112 * otherwise it becomes near impossible to get a reliable stack.20113 */···11621static void __intel_pmu_lbr_enable(void)11722{11823 u64 debugctl;2424+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);2525+2626+ if (cpuc->lbr_sel)2727+ wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);1192812029 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);12130 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);···17576 * Reset the LBR stack if we changed task context to17677 * avoid data leaks.17778 */178178-17979 if (event->ctx->task && cpuc->lbr_context != event->ctx) {18080 intel_pmu_lbr_reset();18181 cpuc->lbr_context = event->ctx;18282 }8383+ cpuc->br_sel = event->hw.branch_reg.reg;1838418485 cpuc->lbr_users++;18586}···19495 cpuc->lbr_users--;19596 WARN_ON_ONCE(cpuc->lbr_users < 0);19697197197- if (cpuc->enabled && !cpuc->lbr_users)9898+ if (cpuc->enabled && !cpuc->lbr_users) {19899 __intel_pmu_lbr_disable();100100+ /* avoid stale pointer */101101+ cpuc->lbr_context = NULL;102102+ }199103}200104201105void intel_pmu_lbr_enable_all(void)···217115 __intel_pmu_lbr_disable();218116}219117118118+/*119119+ * TOS = most recently recorded branch120120+ */220121static inline u64 intel_pmu_lbr_tos(void)221122{222123 u64 tos;···247142248143 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);249144250250- cpuc->lbr_entries[i].from = msr_lastbranch.from;251251- cpuc->lbr_entries[i].to = msr_lastbranch.to;252252- cpuc->lbr_entries[i].flags = 0;145145+ cpuc->lbr_entries[i].from = msr_lastbranch.from;146146+ cpuc->lbr_entries[i].to = msr_lastbranch.to;147147+ cpuc->lbr_entries[i].mispred = 0;148148+ cpuc->lbr_entries[i].predicted = 0;149149+ cpuc->lbr_entries[i].reserved = 0;253150 }254151 cpuc->lbr_stack.nr = i;255152}256256-257257-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)258153259154/*260155 * Due to lack of segmentation in Linux the effective address (offset)···270165271166 for (i = 0; i < x86_pmu.lbr_nr; i++) {272167 unsigned long lbr_idx = (tos - i) & mask;273273- u64 from, to, flags = 0;168168+ u64 from, to, mis = 0, pred = 0;274169275170 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);276171 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);277172278173 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {279279- flags = !!(from & LBR_FROM_FLAG_MISPRED);174174+ mis = !!(from & LBR_FROM_FLAG_MISPRED);175175+ pred = !mis;280176 from = (u64)((((s64)from) << 1) >> 1);281177 }282178283283- cpuc->lbr_entries[i].from = from;284284- cpuc->lbr_entries[i].to = to;285285- cpuc->lbr_entries[i].flags = flags;179179+ cpuc->lbr_entries[i].from = from;180180+ cpuc->lbr_entries[i].to = to;181181+ cpuc->lbr_entries[i].mispred = mis;182182+ cpuc->lbr_entries[i].predicted = pred;183183+ cpuc->lbr_entries[i].reserved = 0;286184 }287185 cpuc->lbr_stack.nr = i;288186}···301193 intel_pmu_lbr_read_32(cpuc);302194 else303195 intel_pmu_lbr_read_64(cpuc);196196+197197+ intel_pmu_lbr_filter(cpuc);304198}305199200200+/*201201+ * SW filter is used:202202+ * - in case there is no HW filter203203+ * - in case the HW filter has errata or limitations204204+ */205205+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)206206+{207207+ u64 br_type = event->attr.branch_sample_type;208208+ int mask = 0;209209+210210+ if (br_type & PERF_SAMPLE_BRANCH_USER)211211+ mask |= X86_BR_USER;212212+213213+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)214214+ mask |= X86_BR_KERNEL;215215+216216+ /* we ignore BRANCH_HV here */217217+218218+ if (br_type & PERF_SAMPLE_BRANCH_ANY)219219+ mask |= X86_BR_ANY;220220+221221+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)222222+ mask |= X86_BR_ANY_CALL;223223+224224+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)225225+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;226226+227227+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)228228+ mask |= X86_BR_IND_CALL;229229+ /*230230+ * stash actual user request into reg, it may231231+ * be used by fixup code for some CPU232232+ */233233+ event->hw.branch_reg.reg = mask;234234+}235235+236236+/*237237+ * setup the HW LBR filter238238+ * Used only when available, may not be enough to disambiguate239239+ * all branches, may need the help of the SW filter240240+ */241241+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)242242+{243243+ struct hw_perf_event_extra *reg;244244+ u64 br_type = event->attr.branch_sample_type;245245+ u64 mask = 0, m;246246+ u64 v;247247+248248+ for_each_branch_sample_type(m) {249249+ if (!(br_type & m))250250+ continue;251251+252252+ v = x86_pmu.lbr_sel_map[m];253253+ if (v == LBR_NOT_SUPP)254254+ return -EOPNOTSUPP;255255+256256+ if (v != LBR_IGN)257257+ mask |= v;258258+ }259259+ reg = &event->hw.branch_reg;260260+ reg->idx = EXTRA_REG_LBR;261261+262262+ /* LBR_SELECT operates in suppress mode so invert mask */263263+ reg->config = ~mask & x86_pmu.lbr_sel_mask;264264+265265+ return 0;266266+}267267+268268+int intel_pmu_setup_lbr_filter(struct perf_event *event)269269+{270270+ int ret = 0;271271+272272+ /*273273+ * no LBR on this PMU274274+ */275275+ if (!x86_pmu.lbr_nr)276276+ return -EOPNOTSUPP;277277+278278+ /*279279+ * setup SW LBR filter280280+ */281281+ intel_pmu_setup_sw_lbr_filter(event);282282+283283+ /*284284+ * setup HW LBR filter, if any285285+ */286286+ if (x86_pmu.lbr_sel_map)287287+ ret = intel_pmu_setup_hw_lbr_filter(event);288288+289289+ return ret;290290+}291291+292292+/*293293+ * return the type of control flow change at address "from"294294+ * intruction is not necessarily a branch (in case of interrupt).295295+ *296296+ * The branch type returned also includes the priv level of the297297+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).298298+ *299299+ * If a branch type is unknown OR the instruction cannot be300300+ * decoded (e.g., text page not present), then X86_BR_NONE is301301+ * returned.302302+ */303303+static int branch_type(unsigned long from, unsigned long to)304304+{305305+ struct insn insn;306306+ void *addr;307307+ int bytes, size = MAX_INSN_SIZE;308308+ int ret = X86_BR_NONE;309309+ int ext, to_plm, from_plm;310310+ u8 buf[MAX_INSN_SIZE];311311+ int is64 = 0;312312+313313+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;314314+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;315315+316316+ /*317317+ * maybe zero if lbr did not fill up after a reset by the time318318+ * we get a PMU interrupt319319+ */320320+ if (from == 0 || to == 0)321321+ return X86_BR_NONE;322322+323323+ if (from_plm == X86_BR_USER) {324324+ /*325325+ * can happen if measuring at the user level only326326+ * and we interrupt in a kernel thread, e.g., idle.327327+ */328328+ if (!current->mm)329329+ return X86_BR_NONE;330330+331331+ /* may fail if text not present */332332+ bytes = copy_from_user_nmi(buf, (void __user *)from, size);333333+ if (bytes != size)334334+ return X86_BR_NONE;335335+336336+ addr = buf;337337+ } else338338+ addr = (void *)from;339339+340340+ /*341341+ * decoder needs to know the ABI especially342342+ * on 64-bit systems running 32-bit apps343343+ */344344+#ifdef CONFIG_X86_64345345+ is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);346346+#endif347347+ insn_init(&insn, addr, is64);348348+ insn_get_opcode(&insn);349349+350350+ switch (insn.opcode.bytes[0]) {351351+ case 0xf:352352+ switch (insn.opcode.bytes[1]) {353353+ case 0x05: /* syscall */354354+ case 0x34: /* sysenter */355355+ ret = X86_BR_SYSCALL;356356+ break;357357+ case 0x07: /* sysret */358358+ case 0x35: /* sysexit */359359+ ret = X86_BR_SYSRET;360360+ break;361361+ case 0x80 ... 0x8f: /* conditional */362362+ ret = X86_BR_JCC;363363+ break;364364+ default:365365+ ret = X86_BR_NONE;366366+ }367367+ break;368368+ case 0x70 ... 0x7f: /* conditional */369369+ ret = X86_BR_JCC;370370+ break;371371+ case 0xc2: /* near ret */372372+ case 0xc3: /* near ret */373373+ case 0xca: /* far ret */374374+ case 0xcb: /* far ret */375375+ ret = X86_BR_RET;376376+ break;377377+ case 0xcf: /* iret */378378+ ret = X86_BR_IRET;379379+ break;380380+ case 0xcc ... 0xce: /* int */381381+ ret = X86_BR_INT;382382+ break;383383+ case 0xe8: /* call near rel */384384+ case 0x9a: /* call far absolute */385385+ ret = X86_BR_CALL;386386+ break;387387+ case 0xe0 ... 0xe3: /* loop jmp */388388+ ret = X86_BR_JCC;389389+ break;390390+ case 0xe9 ... 0xeb: /* jmp */391391+ ret = X86_BR_JMP;392392+ break;393393+ case 0xff: /* call near absolute, call far absolute ind */394394+ insn_get_modrm(&insn);395395+ ext = (insn.modrm.bytes[0] >> 3) & 0x7;396396+ switch (ext) {397397+ case 2: /* near ind call */398398+ case 3: /* far ind call */399399+ ret = X86_BR_IND_CALL;400400+ break;401401+ case 4:402402+ case 5:403403+ ret = X86_BR_JMP;404404+ break;405405+ }406406+ break;407407+ default:408408+ ret = X86_BR_NONE;409409+ }410410+ /*411411+ * interrupts, traps, faults (and thus ring transition) may412412+ * occur on any instructions. Thus, to classify them correctly,413413+ * we need to first look at the from and to priv levels. If they414414+ * are different and to is in the kernel, then it indicates415415+ * a ring transition. If the from instruction is not a ring416416+ * transition instr (syscall, systenter, int), then it means417417+ * it was a irq, trap or fault.418418+ *419419+ * we have no way of detecting kernel to kernel faults.420420+ */421421+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL422422+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)423423+ ret = X86_BR_IRQ;424424+425425+ /*426426+ * branch priv level determined by target as427427+ * is done by HW when LBR_SELECT is implemented428428+ */429429+ if (ret != X86_BR_NONE)430430+ ret |= to_plm;431431+432432+ return ret;433433+}434434+435435+/*436436+ * implement actual branch filter based on user demand.437437+ * Hardware may not exactly satisfy that request, thus438438+ * we need to inspect opcodes. Mismatched branches are439439+ * discarded. Therefore, the number of branches returned440440+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.441441+ */442442+static void443443+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)444444+{445445+ u64 from, to;446446+ int br_sel = cpuc->br_sel;447447+ int i, j, type;448448+ bool compress = false;449449+450450+ /* if sampling all branches, then nothing to filter */451451+ if ((br_sel & X86_BR_ALL) == X86_BR_ALL)452452+ return;453453+454454+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {455455+456456+ from = cpuc->lbr_entries[i].from;457457+ to = cpuc->lbr_entries[i].to;458458+459459+ type = branch_type(from, to);460460+461461+ /* if type does not correspond, then discard */462462+ if (type == X86_BR_NONE || (br_sel & type) != type) {463463+ cpuc->lbr_entries[i].from = 0;464464+ compress = true;465465+ }466466+ }467467+468468+ if (!compress)469469+ return;470470+471471+ /* remove all entries with from=0 */472472+ for (i = 0; i < cpuc->lbr_stack.nr; ) {473473+ if (!cpuc->lbr_entries[i].from) {474474+ j = i;475475+ while (++j < cpuc->lbr_stack.nr)476476+ cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];477477+ cpuc->lbr_stack.nr--;478478+ if (!cpuc->lbr_entries[i].from)479479+ continue;480480+ }481481+ i++;482482+ }483483+}484484+485485+/*486486+ * Map interface branch filters onto LBR filters487487+ */488488+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {489489+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,490490+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,491491+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,492492+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,493493+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP494494+ | LBR_IND_JMP | LBR_FAR,495495+ /*496496+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches497497+ */498498+ [PERF_SAMPLE_BRANCH_ANY_CALL] =499499+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,500500+ /*501501+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL502502+ */503503+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,504504+};505505+506506+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {507507+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,508508+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,509509+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,510510+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,511511+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,512512+ [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL513513+ | LBR_FAR,514514+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,515515+};516516+517517+/* core */306518void intel_pmu_lbr_init_core(void)307519{308520 x86_pmu.lbr_nr = 4;309309- x86_pmu.lbr_tos = 0x01c9;310310- x86_pmu.lbr_from = 0x40;311311- x86_pmu.lbr_to = 0x60;521521+ x86_pmu.lbr_tos = MSR_LBR_TOS;522522+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;523523+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;524524+525525+ /*526526+ * SW branch filter usage:527527+ * - compensate for lack of HW filter528528+ */529529+ pr_cont("4-deep LBR, ");312530}313531532532+/* nehalem/westmere */314533void intel_pmu_lbr_init_nhm(void)315534{316535 x86_pmu.lbr_nr = 16;317317- x86_pmu.lbr_tos = 0x01c9;318318- x86_pmu.lbr_from = 0x680;319319- x86_pmu.lbr_to = 0x6c0;536536+ x86_pmu.lbr_tos = MSR_LBR_TOS;537537+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;538538+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;539539+540540+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;541541+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;542542+543543+ /*544544+ * SW branch filter usage:545545+ * - workaround LBR_SEL errata (see above)546546+ * - support syscall, sysret capture.547547+ * That requires LBR_FAR but that means far548548+ * jmp need to be filtered out549549+ */550550+ pr_cont("16-deep LBR, ");320551}321552553553+/* sandy bridge */554554+void intel_pmu_lbr_init_snb(void)555555+{556556+ x86_pmu.lbr_nr = 16;557557+ x86_pmu.lbr_tos = MSR_LBR_TOS;558558+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;559559+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;560560+561561+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;562562+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;563563+564564+ /*565565+ * SW branch filter usage:566566+ * - support syscall, sysret capture.567567+ * That requires LBR_FAR but that means far568568+ * jmp need to be filtered out569569+ */570570+ pr_cont("16-deep LBR, ");571571+}572572+573573+/* atom */322574void intel_pmu_lbr_init_atom(void)323575{576576+ /*577577+ * only models starting at stepping 10 seems578578+ * to have an operational LBR which can freeze579579+ * on PMU interrupt580580+ */581581+ if (boot_cpu_data.x86_mask < 10) {582582+ pr_cont("LBR disabled due to erratum");583583+ return;584584+ }585585+324586 x86_pmu.lbr_nr = 8;325325- x86_pmu.lbr_tos = 0x01c9;326326- x86_pmu.lbr_from = 0x40;327327- x86_pmu.lbr_to = 0x60;587587+ x86_pmu.lbr_tos = MSR_LBR_TOS;588588+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;589589+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;590590+591591+ /*592592+ * SW branch filter usage:593593+ * - compensate for lack of HW filter594594+ */595595+ pr_cont("8-deep LBR, ");328596}
+77-5
include/linux/perf_event.h
···129129 PERF_SAMPLE_PERIOD = 1U << 8,130130 PERF_SAMPLE_STREAM_ID = 1U << 9,131131 PERF_SAMPLE_RAW = 1U << 10,132132+ PERF_SAMPLE_BRANCH_STACK = 1U << 11,132133133133- PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */134134+ PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */134135};136136+137137+/*138138+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set139139+ *140140+ * If the user does not pass priv level information via branch_sample_type,141141+ * the kernel uses the event's priv level. Branch and event priv levels do142142+ * not have to match. Branch priv level is checked for permissions.143143+ *144144+ * The branch types can be combined, however BRANCH_ANY covers all types145145+ * of branches and therefore it supersedes all the other types.146146+ */147147+enum perf_branch_sample_type {148148+ PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */149149+ PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */150150+ PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */151151+152152+ PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */153153+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */154154+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */155155+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */156156+157157+ PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */158158+};159159+160160+#define PERF_SAMPLE_BRANCH_PLM_ALL \161161+ (PERF_SAMPLE_BRANCH_USER|\162162+ PERF_SAMPLE_BRANCH_KERNEL|\163163+ PERF_SAMPLE_BRANCH_HV)135164136165/*137166 * The format of the data returned by read() on a perf event fd,···192163};193164194165#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */166166+#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */167167+#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */195168196169/*197170 * Hardware event_id to monitor via a performance monitoring event:···271240 __u64 bp_len;272241 __u64 config2; /* extension of config1 */273242 };243243+ __u64 branch_sample_type; /* enum branch_sample_type */274244};275245276246/*···490458 *491459 * { u32 size;492460 * char data[size];}&& PERF_SAMPLE_RAW461461+ *462462+ * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK493463 * };494464 */495465 PERF_RECORD_SAMPLE = 9,···564530 void *data;565531};566532533533+/*534534+ * single taken branch record layout:535535+ *536536+ * from: source instruction (may not always be a branch insn)537537+ * to: branch target538538+ * mispred: branch target was mispredicted539539+ * predicted: branch target was predicted540540+ *541541+ * support for mispred, predicted is optional. In case it542542+ * is not supported mispred = predicted = 0.543543+ */567544struct perf_branch_entry {568568- __u64 from;569569- __u64 to;570570- __u64 flags;545545+ __u64 from;546546+ __u64 to;547547+ __u64 mispred:1, /* target mispredicted */548548+ predicted:1,/* target predicted */549549+ reserved:62;571550};572551552552+/*553553+ * branch stack layout:554554+ * nr: number of taken branches stored in entries[]555555+ *556556+ * Note that nr can vary from sample to sample557557+ * branches (to, from) are stored from most recent558558+ * to least recent, i.e., entries[0] contains the most559559+ * recent branch.560560+ */573561struct perf_branch_stack {574562 __u64 nr;575563 struct perf_branch_entry entries[0];···622566 unsigned long event_base;623567 int idx;624568 int last_cpu;569569+625570 struct hw_perf_event_extra extra_reg;571571+ struct hw_perf_event_extra branch_reg;626572 };627573 struct { /* software */628574 struct hrtimer hrtimer;···748690 * if no implementation is provided it will default to: event->hw.idx + 1.749691 */750692 int (*event_idx) (struct perf_event *event); /*optional */693693+694694+ /*695695+ * flush branch stack on context-switches (needed in cpu-wide mode)696696+ */697697+ void (*flush_branch_stack) (void);751698};752699753700/**···986923 u64 parent_gen;987924 u64 generation;988925 int pin_count;989989- int nr_cgroups; /* cgroup events present */926926+ int nr_cgroups; /* cgroup evts */927927+ int nr_branch_stack; /* branch_stack evt */990928 struct rcu_head rcu_head;991929};992930···1052988extern u64 perf_event_read_value(struct perf_event *event,1053989 u64 *enabled, u64 *running);1054990991991+1055992struct perf_sample_data {1056993 u64 type;1057994···10721007 u64 period;10731008 struct perf_callchain_entry *callchain;10741009 struct perf_raw_record *raw;10101010+ struct perf_branch_stack *br_stack;10751011};1076101210771013static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)10781014{10791015 data->addr = addr;10801016 data->raw = NULL;10171017+ data->br_stack = NULL;10811018}1082101910831020extern void perf_output_sample(struct perf_output_handle *handle,···12171150 (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)12181151# define perf_instruction_pointer(regs) instruction_pointer(regs)12191152#endif11531153+11541154+static inline bool has_branch_stack(struct perf_event *event)11551155+{11561156+ return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;11571157+}1220115812211159extern int perf_output_begin(struct perf_output_handle *handle,12221160 struct perf_event *event, unsigned int size);
+177
kernel/events/core.c
···118118 PERF_FLAG_FD_OUTPUT |\119119 PERF_FLAG_PID_CGROUP)120120121121+/*122122+ * branch priv levels that need permission checks123123+ */124124+#define PERF_SAMPLE_BRANCH_PERM_PLM \125125+ (PERF_SAMPLE_BRANCH_KERNEL |\126126+ PERF_SAMPLE_BRANCH_HV)127127+121128enum event_type_t {122129 EVENT_FLEXIBLE = 0x1,123130 EVENT_PINNED = 0x2,···137130 */138131struct static_key_deferred perf_sched_events __read_mostly;139132static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);133133+static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);140134141135static atomic_t nr_mmap_events __read_mostly;142136static atomic_t nr_comm_events __read_mostly;···889881 if (is_cgroup_event(event))890882 ctx->nr_cgroups++;891883884884+ if (has_branch_stack(event))885885+ ctx->nr_branch_stack++;886886+892887 list_add_rcu(&event->event_entry, &ctx->event_list);893888 if (!ctx->nr_events)894889 perf_pmu_rotate_start(ctx->pmu);···10301019 if (!ctx->nr_cgroups)10311020 cpuctx->cgrp = NULL;10321021 }10221022+10231023+ if (has_branch_stack(event))10241024+ ctx->nr_branch_stack--;1033102510341026 ctx->nr_events--;10351027 if (event->attr.inherit_stat)···22092195}2210219622112197/*21982198+ * When sampling the branck stack in system-wide, it may be necessary21992199+ * to flush the stack on context switch. This happens when the branch22002200+ * stack does not tag its entries with the pid of the current task.22012201+ * Otherwise it becomes impossible to associate a branch entry with a22022202+ * task. This ambiguity is more likely to appear when the branch stack22032203+ * supports priv level filtering and the user sets it to monitor only22042204+ * at the user level (which could be a useful measurement in system-wide22052205+ * mode). In that case, the risk is high of having a branch stack with22062206+ * branch from multiple tasks. Flushing may mean dropping the existing22072207+ * entries or stashing them somewhere in the PMU specific code layer.22082208+ *22092209+ * This function provides the context switch callback to the lower code22102210+ * layer. It is invoked ONLY when there is at least one system-wide context22112211+ * with at least one active event using taken branch sampling.22122212+ */22132213+static void perf_branch_stack_sched_in(struct task_struct *prev,22142214+ struct task_struct *task)22152215+{22162216+ struct perf_cpu_context *cpuctx;22172217+ struct pmu *pmu;22182218+ unsigned long flags;22192219+22202220+ /* no need to flush branch stack if not changing task */22212221+ if (prev == task)22222222+ return;22232223+22242224+ local_irq_save(flags);22252225+22262226+ rcu_read_lock();22272227+22282228+ list_for_each_entry_rcu(pmu, &pmus, entry) {22292229+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);22302230+22312231+ /*22322232+ * check if the context has at least one22332233+ * event using PERF_SAMPLE_BRANCH_STACK22342234+ */22352235+ if (cpuctx->ctx.nr_branch_stack > 022362236+ && pmu->flush_branch_stack) {22372237+22382238+ pmu = cpuctx->ctx.pmu;22392239+22402240+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);22412241+22422242+ perf_pmu_disable(pmu);22432243+22442244+ pmu->flush_branch_stack();22452245+22462246+ perf_pmu_enable(pmu);22472247+22482248+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);22492249+ }22502250+ }22512251+22522252+ rcu_read_unlock();22532253+22542254+ local_irq_restore(flags);22552255+}22562256+22572257+/*22122258 * Called from scheduler to add the events of the current task22132259 * with interrupts disabled.22142260 *···22992225 */23002226 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))23012227 perf_cgroup_sched_in(prev, task);22282228+22292229+ /* check for system-wide branch_stack events */22302230+ if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))22312231+ perf_branch_stack_sched_in(prev, task);23022232}2303223323042234static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)···28682790 if (is_cgroup_event(event)) {28692791 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));28702792 static_key_slow_dec_deferred(&perf_sched_events);27932793+ }27942794+27952795+ if (has_branch_stack(event)) {27962796+ static_key_slow_dec_deferred(&perf_sched_events);27972797+ /* is system-wide event */27982798+ if (!(event->attach_state & PERF_ATTACH_TASK))27992799+ atomic_dec(&per_cpu(perf_branch_stack_events,28002800+ event->cpu));28712801 }28722802 }28732803···39933907 }39943908 }39953909 }39103910+39113911+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {39123912+ if (data->br_stack) {39133913+ size_t size;39143914+39153915+ size = data->br_stack->nr39163916+ * sizeof(struct perf_branch_entry);39173917+39183918+ perf_output_put(handle, data->br_stack->nr);39193919+ perf_output_copy(handle, data->br_stack->entries, size);39203920+ } else {39213921+ /*39223922+ * we always store at least the value of nr39233923+ */39243924+ u64 nr = 0;39253925+ perf_output_put(handle, nr);39263926+ }39273927+ }39963928}3997392939983930void perf_prepare_sample(struct perf_event_header *header,···40513947 size += sizeof(u32);4052394840533949 WARN_ON_ONCE(size & (sizeof(u64)-1));39503950+ header->size += size;39513951+ }39523952+39533953+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {39543954+ int size = sizeof(u64); /* nr */39553955+ if (data->br_stack) {39563956+ size += data->br_stack->nr39573957+ * sizeof(struct perf_branch_entry);39583958+ }40543959 header->size += size;40553960 }40563961}···51235010 if (event->attr.type != PERF_TYPE_SOFTWARE)51245011 return -ENOENT;5125501250135013+ /*50145014+ * no branch sampling for software events50155015+ */50165016+ if (has_branch_stack(event))50175017+ return -EOPNOTSUPP;50185018+51265019 switch (event_id) {51275020 case PERF_COUNT_SW_CPU_CLOCK:51285021 case PERF_COUNT_SW_TASK_CLOCK:···5238511952395120 if (event->attr.type != PERF_TYPE_TRACEPOINT)52405121 return -ENOENT;51225122+51235123+ /*51245124+ * no branch sampling for tracepoint events51255125+ */51265126+ if (has_branch_stack(event))51275127+ return -EOPNOTSUPP;5241512852425129 err = perf_trace_init(event);52435130 if (err)···54705345 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)54715346 return -ENOENT;5472534753485348+ /*53495349+ * no branch sampling for software events53505350+ */53515351+ if (has_branch_stack(event))53525352+ return -EOPNOTSUPP;53535353+54735354 perf_swevent_init_hrtimer(event);5474535554755356 return 0;···5549541855505419 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)55515420 return -ENOENT;54215421+54225422+ /*54235423+ * no branch sampling for software events54245424+ */54255425+ if (has_branch_stack(event))54265426+ return -EOPNOTSUPP;5552542755535428 perf_swevent_init_hrtimer(event);55545429···60035866 return ERR_PTR(err);60045867 }60055868 }58695869+ if (has_branch_stack(event)) {58705870+ static_key_slow_inc(&perf_sched_events.key);58715871+ if (!(event->attach_state & PERF_ATTACH_TASK))58725872+ atomic_inc(&per_cpu(perf_branch_stack_events,58735873+ event->cpu));58745874+ }60065875 }6007587660085877 return event;···60785935 if (attr->read_format & ~(PERF_FORMAT_MAX-1))60795936 return -EINVAL;6080593759385938+ if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {59395939+ u64 mask = attr->branch_sample_type;59405940+59415941+ /* only using defined bits */59425942+ if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))59435943+ return -EINVAL;59445944+59455945+ /* at least one branch bit must be set */59465946+ if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))59475947+ return -EINVAL;59485948+59495949+ /* kernel level capture: check permissions */59505950+ if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)59515951+ && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))59525952+ return -EACCES;59535953+59545954+ /* propagate priv level, when not set for branch */59555955+ if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {59565956+59575957+ /* exclude_kernel checked on syscall entry */59585958+ if (!attr->exclude_kernel)59595959+ mask |= PERF_SAMPLE_BRANCH_KERNEL;59605960+59615961+ if (!attr->exclude_user)59625962+ mask |= PERF_SAMPLE_BRANCH_USER;59635963+59645964+ if (!attr->exclude_hv)59655965+ mask |= PERF_SAMPLE_BRANCH_HV;59665966+ /*59675967+ * adjust user setting (for HW filter setup)59685968+ */59695969+ attr->branch_sample_type = mask;59705970+ }59715971+ }60815972out:60825973 return ret;60835974
+6
kernel/events/hw_breakpoint.c
···581581 if (bp->attr.type != PERF_TYPE_BREAKPOINT)582582 return -ENOENT;583583584584+ /*585585+ * no branch sampling for breakpoint events586586+ */587587+ if (has_branch_stack(bp))588588+ return -EOPNOTSUPP;589589+584590 err = register_perf_hw_breakpoint(bp);585591 if (err)586592 return err;
+30
tools/perf/Documentation/perf-record.txt
···152152corresponding events, i.e., they always refer to events defined earlier on the command153153line.154154155155+-b::156156+--branch-any::157157+Enable taken branch stack sampling. Any type of taken branch may be sampled.158158+This is a shortcut for --branch-filter any. See --branch-filter for more infos.159159+160160+-j::161161+--branch-filter::162162+Enable taken branch stack sampling. Each sample captures a series of consecutive163163+taken branches. The number of branches captured with each sample depends on the164164+underlying hardware, the type of branches of interest, and the executed code.165165+It is possible to select the types of branches captured by enabling filters. The166166+following filters are defined:167167+168168+ - any: any type of branches169169+ - any_call: any function call or system call170170+ - any_ret: any function return or system call return171171+ - any_ind: any indirect branch172172+ - u: only when the branch target is at the user level173173+ - k: only when the branch target is in the kernel174174+ - hv: only when the target is at the hypervisor level175175+176176++177177+The option requires at least one branch type among any, any_call, any_ret, ind_call.178178+The privilege levels may be ommitted, in which case, the privilege levels of the associated179179+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege180180+levels are subject to permissions. When sampling on multiple events, branch stack sampling181181+is enabled for all the sampling events. The sampled branch type is the same for all events.182182+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k183183+Note that this feature may not be available on all processors.184184+155185SEE ALSO156186--------157187linkperf:perf-stat[1], linkperf:perf-list[1]
+10
tools/perf/Documentation/perf-report.txt
···153153 information which may be very large and thus may clutter the display.154154 It currently includes: cpu and numa topology of the host system.155155156156+-b::157157+--branch-stack::158158+ Use the addresses of sampled taken branches instead of the instruction159159+ address to build the histograms. To generate meaningful output, the160160+ perf.data file must have been obtained using perf record -b or161161+ perf record --branch-filter xxx where xxx is a branch filter option.162162+ perf report is able to auto-detect whether a perf.data file contains163163+ branch stacks and it will automatically switch to the branch view mode,164164+ unless --no-branch-stack is used.165165+156166SEE ALSO157167--------158168linkperf:perf-stat[1], linkperf:perf-annotate[1]
+95
tools/perf/builtin-record.c
···473473 if (!have_tracepoints(&evsel_list->entries))474474 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);475475476476+ if (!rec->opts.branch_stack)477477+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);478478+476479 if (!rec->file_new) {477480 err = perf_session__read_header(session, output);478481 if (err < 0)···641638 return err;642639}643640641641+#define BRANCH_OPT(n, m) \642642+ { .name = n, .mode = (m) }643643+644644+#define BRANCH_END { .name = NULL }645645+646646+struct branch_mode {647647+ const char *name;648648+ int mode;649649+};650650+651651+static const struct branch_mode branch_modes[] = {652652+ BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),653653+ BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),654654+ BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),655655+ BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),656656+ BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),657657+ BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),658658+ BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),659659+ BRANCH_END660660+};661661+662662+static int663663+parse_branch_stack(const struct option *opt, const char *str, int unset)664664+{665665+#define ONLY_PLM \666666+ (PERF_SAMPLE_BRANCH_USER |\667667+ PERF_SAMPLE_BRANCH_KERNEL |\668668+ PERF_SAMPLE_BRANCH_HV)669669+670670+ uint64_t *mode = (uint64_t *)opt->value;671671+ const struct branch_mode *br;672672+ char *s, *os = NULL, *p;673673+ int ret = -1;674674+675675+ if (unset)676676+ return 0;677677+678678+ /*679679+ * cannot set it twice, -b + --branch-filter for instance680680+ */681681+ if (*mode)682682+ return -1;683683+684684+ /* str may be NULL in case no arg is passed to -b */685685+ if (str) {686686+ /* because str is read-only */687687+ s = os = strdup(str);688688+ if (!s)689689+ return -1;690690+691691+ for (;;) {692692+ p = strchr(s, ',');693693+ if (p)694694+ *p = '\0';695695+696696+ for (br = branch_modes; br->name; br++) {697697+ if (!strcasecmp(s, br->name))698698+ break;699699+ }700700+ if (!br->name) {701701+ ui__warning("unknown branch filter %s,"702702+ " check man page\n", s);703703+ goto error;704704+ }705705+706706+ *mode |= br->mode;707707+708708+ if (!p)709709+ break;710710+711711+ s = p + 1;712712+ }713713+ }714714+ ret = 0;715715+716716+ /* default to any branch */717717+ if ((*mode & ~ONLY_PLM) == 0) {718718+ *mode = PERF_SAMPLE_BRANCH_ANY;719719+ }720720+error:721721+ free(os);722722+ return ret;723723+}724724+644725static const char * const record_usage[] = {645726 "perf record [<options>] [<command>]",646727 "perf record [<options>] -- <command> [<options>]",···814727 "monitor event in cgroup name only",815728 parse_cgroups),816729 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),730730+731731+ OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,732732+ "branch any", "sample any taken branches",733733+ parse_branch_stack),734734+735735+ OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,736736+ "branch filter mask", "branch stack filter modes",737737+ parse_branch_stack),817738 OPT_END()818739};819740
+156-22
tools/perf/builtin-report.c
···5353 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);5454};55555656+static int perf_report__add_branch_hist_entry(struct perf_tool *tool,5757+ struct addr_location *al,5858+ struct perf_sample *sample,5959+ struct perf_evsel *evsel,6060+ struct machine *machine)6161+{6262+ struct perf_report *rep = container_of(tool, struct perf_report, tool);6363+ struct symbol *parent = NULL;6464+ int err = 0;6565+ unsigned i;6666+ struct hist_entry *he;6767+ struct branch_info *bi, *bx;6868+6969+ if ((sort__has_parent || symbol_conf.use_callchain)7070+ && sample->callchain) {7171+ err = machine__resolve_callchain(machine, evsel, al->thread,7272+ sample->callchain, &parent);7373+ if (err)7474+ return err;7575+ }7676+7777+ bi = machine__resolve_bstack(machine, al->thread,7878+ sample->branch_stack);7979+ if (!bi)8080+ return -ENOMEM;8181+8282+ for (i = 0; i < sample->branch_stack->nr; i++) {8383+ if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))8484+ continue;8585+ /*8686+ * The report shows the percentage of total branches captured8787+ * and not events sampled. Thus we use a pseudo period of 1.8888+ */8989+ he = __hists__add_branch_entry(&evsel->hists, al, parent,9090+ &bi[i], 1);9191+ if (he) {9292+ struct annotation *notes;9393+ err = -ENOMEM;9494+ bx = he->branch_info;9595+ if (bx->from.sym && use_browser > 0) {9696+ notes = symbol__annotation(bx->from.sym);9797+ if (!notes->src9898+ && symbol__alloc_hist(bx->from.sym) < 0)9999+ goto out;100100+101101+ err = symbol__inc_addr_samples(bx->from.sym,102102+ bx->from.map,103103+ evsel->idx,104104+ bx->from.al_addr);105105+ if (err)106106+ goto out;107107+ }108108+109109+ if (bx->to.sym && use_browser > 0) {110110+ notes = symbol__annotation(bx->to.sym);111111+ if (!notes->src112112+ && symbol__alloc_hist(bx->to.sym) < 0)113113+ goto out;114114+115115+ err = symbol__inc_addr_samples(bx->to.sym,116116+ bx->to.map,117117+ evsel->idx,118118+ bx->to.al_addr);119119+ if (err)120120+ goto out;121121+ }122122+ evsel->hists.stats.total_period += 1;123123+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);124124+ err = 0;125125+ } else126126+ return -ENOMEM;127127+ }128128+out:129129+ return err;130130+}131131+56132static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,57133 struct addr_location *al,58134 struct perf_sample *sample,···202126 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))203127 return 0;204128205205- if (al.map != NULL)206206- al.map->dso->hit = 1;129129+ if (sort__branch_mode == 1) {130130+ if (perf_report__add_branch_hist_entry(tool, &al, sample,131131+ evsel, machine)) {132132+ pr_debug("problem adding lbr entry, skipping event\n");133133+ return -1;134134+ }135135+ } else {136136+ if (al.map != NULL)137137+ al.map->dso->hit = 1;207138208208- if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {209209- pr_debug("problem incrementing symbol period, skipping event\n");210210- return -1;139139+ if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {140140+ pr_debug("problem incrementing symbol period, skipping event\n");141141+ return -1;142142+ }211143 }212212-213144 return 0;214145}215146···269186 "params.\n");270187 return -EINVAL;271188 }189189+ }190190+191191+ if (sort__branch_mode == 1) {192192+ if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {193193+ fprintf(stderr, "selected -b but no branch data."194194+ " Did you call perf record without"195195+ " -b?\n");196196+ return -1;197197+ }272198 }273199274200 return 0;···338246{339247 int ret = -EINVAL;340248 u64 nr_samples;341341- struct perf_session *session;249249+ struct perf_session *session = rep->session;342250 struct perf_evsel *pos;343251 struct map *kernel_map;344252 struct kmap *kernel_kmap;345253 const char *help = "For a higher level overview, try: perf report --sort comm,dso";346254347255 signal(SIGINT, sig_handler);348348-349349- session = perf_session__new(rep->input_name, O_RDONLY,350350- rep->force, false, &rep->tool);351351- if (session == NULL)352352- return -ENOMEM;353353-354354- rep->session = session;355256356257 if (rep->cpu_list) {357258 ret = perf_session__cpu_bitmap(session, rep->cpu_list,···512427 return 0;513428}514429430430+static int431431+parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)432432+{433433+ sort__branch_mode = !unset;434434+ return 0;435435+}436436+515437int cmd_report(int argc, const char **argv, const char *prefix __used)516438{439439+ struct perf_session *session;517440 struct stat st;441441+ bool has_br_stack = false;442442+ int ret = -1;518443 char callchain_default_opt[] = "fractal,0.5,callee";519444 const char * const report_usage[] = {520445 "perf report [<options>]",···572477 OPT_BOOLEAN(0, "stdio", &report.use_stdio,573478 "Use the stdio interface"),574479 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",575575- "sort by key(s): pid, comm, dso, symbol, parent"),480480+ "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"481481+ " dso_from, symbol_to, symbol_from, mispredict"),576482 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,577483 "Show sample percentage for different cpu modes"),578484 OPT_STRING('p', "parent", &parent_pattern, "regex",···613517 "Specify disassembler style (e.g. -M intel for intel syntax)"),614518 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,615519 "Show a column with the sum of periods"),520520+ OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",521521+ "use branch records for histogram filling", parse_branch_mode),616522 OPT_END()617523 };618524···634536 else635537 report.input_name = "perf.data";636538 }539539+ session = perf_session__new(report.input_name, O_RDONLY,540540+ report.force, false, &report.tool);541541+ if (session == NULL)542542+ return -ENOMEM;637543638638- if (strcmp(report.input_name, "-") != 0)544544+ report.session = session;545545+546546+ has_br_stack = perf_header__has_feat(&session->header,547547+ HEADER_BRANCH_STACK);548548+549549+ if (sort__branch_mode == -1 && has_br_stack)550550+ sort__branch_mode = 1;551551+552552+ /* sort__branch_mode could be 0 if --no-branch-stack */553553+ if (sort__branch_mode == 1) {554554+ /*555555+ * if no sort_order is provided, then specify556556+ * branch-mode specific order557557+ */558558+ if (sort_order == default_sort_order)559559+ sort_order = "comm,dso_from,symbol_from,"560560+ "dso_to,symbol_to";561561+562562+ }563563+564564+ if (strcmp(report.input_name, "-") != 0) {639565 setup_browser(true);640640- else566566+ } else {641567 use_browser = 0;568568+ }642569643570 /*644571 * Only in the newt browser we are doing integrated annotation,···691568 }692569693570 if (symbol__init() < 0)694694- return -1;571571+ goto error;695572696573 setup_sorting(report_usage, options);697574698575 if (parent_pattern != default_parent_pattern) {699576 if (sort_dimension__add("parent") < 0)700700- return -1;577577+ goto error;701578702579 /*703580 * Only show the parent fields if we explicitly···715592 if (argc)716593 usage_with_options(report_usage, options);717594718718- sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);719595 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);720720- sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);721596722722- return __cmd_report(&report);597597+ if (sort__branch_mode == 1) {598598+ sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);599599+ sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);600600+ sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);601601+ sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);602602+ } else {603603+ sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);604604+ sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);605605+ }606606+607607+ ret = __cmd_report(&report);608608+error:609609+ perf_session__delete(session);610610+ return ret;723611}
···10231023 return do_write_string(fd, buffer);10241024}1025102510261026+static int write_branch_stack(int fd __used, struct perf_header *h __used,10271027+ struct perf_evlist *evlist __used)10281028+{10291029+ return 0;10301030+}10311031+10261032static void print_hostname(struct perf_header *ph, int fd, FILE *fp)10271033{10281034 char *str = do_read_string(fd, ph);···11501144 uint64_t id;11511145 void *buf = NULL;11521146 char *str;11531153- u32 nre, sz, nr, i, j, msz;11541154- int ret;11471147+ u32 nre, sz, nr, i, j;11481148+ ssize_t ret;11491149+ size_t msz;1155115011561151 /* number of events */11571152 ret = read(fd, &nre, sizeof(nre));···11691162 if (ph->needs_swap)11701163 sz = bswap_32(sz);1171116411721172- /*11731173- * ensure it is at least to our ABI rev11741174- */11751175- if (sz < (u32)sizeof(attr))11761176- goto error;11771177-11781165 memset(&attr, 0, sizeof(attr));1179116611801180- /* read entire region to sync up to next field */11671167+ /* buffer to hold on file attr struct */11811168 buf = malloc(sz);11821169 if (!buf)11831170 goto error;1184117111851172 msz = sizeof(attr);11861186- if (sz < msz)11731173+ if (sz < (ssize_t)msz)11871174 msz = sz;1188117511891176 for (i = 0 ; i < nre; i++) {1190117711781178+ /*11791179+ * must read entire on-file attr struct to11801180+ * sync up with layout.11811181+ */11911182 ret = read(fd, buf, sz);11921183 if (ret != (ssize_t)sz)11931184 goto error;···13191314 char *str = do_read_string(fd, ph);13201315 fprintf(fp, "# cpuid : %s\n", str);13211316 free(str);13171317+}13181318+13191319+static void print_branch_stack(struct perf_header *ph __used, int fd __used,13201320+ FILE *fp)13211321+{13221322+ fprintf(fp, "# contains samples with branch stack\n");13221323}1323132413241325static int __event_process_build_id(struct build_id_event *bev,···15311520 FEAT_OPA(HEADER_CMDLINE, cmdline),15321521 FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),15331522 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),15231523+ FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),15341524};1535152515361526struct header_print_data {···18161804 return err;18171805}1818180618191819-static int check_magic_endian(u64 *magic, struct perf_file_header *header,18201820- struct perf_header *ph)18071807+static const int attr_file_abi_sizes[] = {18081808+ [0] = PERF_ATTR_SIZE_VER0,18091809+ [1] = PERF_ATTR_SIZE_VER1,18101810+ 0,18111811+};18121812+18131813+/*18141814+ * In the legacy file format, the magic number is not used to encode endianness.18151815+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based18161816+ * on ABI revisions, we need to try all combinations for all endianness to18171817+ * detect the endianness.18181818+ */18191819+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)18201820+{18211821+ uint64_t ref_size, attr_size;18221822+ int i;18231823+18241824+ for (i = 0 ; attr_file_abi_sizes[i]; i++) {18251825+ ref_size = attr_file_abi_sizes[i]18261826+ + sizeof(struct perf_file_section);18271827+ if (hdr_sz != ref_size) {18281828+ attr_size = bswap_64(hdr_sz);18291829+ if (attr_size != ref_size)18301830+ continue;18311831+18321832+ ph->needs_swap = true;18331833+ }18341834+ pr_debug("ABI%d perf.data file detected, need_swap=%d\n",18351835+ i,18361836+ ph->needs_swap);18371837+ return 0;18381838+ }18391839+ /* could not determine endianness */18401840+ return -1;18411841+}18421842+18431843+#define PERF_PIPE_HDR_VER0 1618441844+18451845+static const size_t attr_pipe_abi_sizes[] = {18461846+ [0] = PERF_PIPE_HDR_VER0,18471847+ 0,18481848+};18491849+18501850+/*18511851+ * In the legacy pipe format, there is an implicit assumption that endiannesss18521852+ * between host recording the samples, and host parsing the samples is the18531853+ * same. This is not always the case given that the pipe output may always be18541854+ * redirected into a file and analyzed on a different machine with possibly a18551855+ * different endianness and perf_event ABI revsions in the perf tool itself.18561856+ */18571857+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)18581858+{18591859+ u64 attr_size;18601860+ int i;18611861+18621862+ for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {18631863+ if (hdr_sz != attr_pipe_abi_sizes[i]) {18641864+ attr_size = bswap_64(hdr_sz);18651865+ if (attr_size != hdr_sz)18661866+ continue;18671867+18681868+ ph->needs_swap = true;18691869+ }18701870+ pr_debug("Pipe ABI%d perf.data file detected\n", i);18711871+ return 0;18721872+ }18731873+ return -1;18741874+}18751875+18761876+static int check_magic_endian(u64 magic, uint64_t hdr_sz,18771877+ bool is_pipe, struct perf_header *ph)18211878{18221879 int ret;1823188018241881 /* check for legacy format */18251825- ret = memcmp(magic, __perf_magic1, sizeof(*magic));18821882+ ret = memcmp(&magic, __perf_magic1, sizeof(magic));18261883 if (ret == 0) {18271884 pr_debug("legacy perf.data format\n");18281828- if (!header)18291829- return -1;18851885+ if (is_pipe)18861886+ return try_all_pipe_abis(hdr_sz, ph);1830188718311831- if (header->attr_size != sizeof(struct perf_file_attr)) {18321832- u64 attr_size = bswap_64(header->attr_size);18331833-18341834- if (attr_size != sizeof(struct perf_file_attr))18351835- return -1;18361836-18371837- ph->needs_swap = true;18381838- }18391839- return 0;18881888+ return try_all_file_abis(hdr_sz, ph);18401889 }18901890+ /*18911891+ * the new magic number serves two purposes:18921892+ * - unique number to identify actual perf.data files18931893+ * - encode endianness of file18941894+ */1841189518421842- /* check magic number with same endianness */18431843- if (*magic == __perf_magic2)18961896+ /* check magic number with one endianness */18971897+ if (magic == __perf_magic2)18441898 return 0;1845189918461846- /* check magic number but opposite endianness */18471847- if (*magic != __perf_magic2_sw)19001900+ /* check magic number with opposite endianness */19011901+ if (magic != __perf_magic2_sw)18481902 return -1;1849190318501904 ph->needs_swap = true;···19291851 if (ret <= 0)19301852 return -1;1931185319321932- if (check_magic_endian(&header->magic, header, ph) < 0)18541854+ if (check_magic_endian(header->magic,18551855+ header->attr_size, false, ph) < 0) {18561856+ pr_debug("magic/endian check failed\n");19331857 return -1;18581858+ }1934185919351860 if (ph->needs_swap) {19361861 mem_bswap_64(header, offsetof(struct perf_file_header,···20201939 if (ret <= 0)20211940 return -1;2022194120232023- if (check_magic_endian(&header->magic, NULL, ph) < 0)19421942+ if (check_magic_endian(header->magic, header->size, true, ph) < 0) {19431943+ pr_debug("endian/magic failed\n");20241944 return -1;19451945+ }19461946+19471947+ if (ph->needs_swap)19481948+ header->size = bswap_64(header->size);2025194920261950 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)20271951 return -1;20282028-20292029- if (header->size != sizeof(*header)) {20302030- u64 size = bswap_64(header->size);20312031-20322032- if (size != sizeof(*header))20332033- return -1;20342034-20352035- ph->needs_swap = true;20362036- }2037195220381953 return 0;20391954}···20501973 return 0;20511974}2052197519761976+static int read_attr(int fd, struct perf_header *ph,19771977+ struct perf_file_attr *f_attr)19781978+{19791979+ struct perf_event_attr *attr = &f_attr->attr;19801980+ size_t sz, left;19811981+ size_t our_sz = sizeof(f_attr->attr);19821982+ int ret;19831983+19841984+ memset(f_attr, 0, sizeof(*f_attr));19851985+19861986+ /* read minimal guaranteed structure */19871987+ ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);19881988+ if (ret <= 0) {19891989+ pr_debug("cannot read %d bytes of header attr\n",19901990+ PERF_ATTR_SIZE_VER0);19911991+ return -1;19921992+ }19931993+19941994+ /* on file perf_event_attr size */19951995+ sz = attr->size;19961996+19971997+ if (ph->needs_swap)19981998+ sz = bswap_32(sz);19991999+20002000+ if (sz == 0) {20012001+ /* assume ABI0 */20022002+ sz = PERF_ATTR_SIZE_VER0;20032003+ } else if (sz > our_sz) {20042004+ pr_debug("file uses a more recent and unsupported ABI"20052005+ " (%zu bytes extra)\n", sz - our_sz);20062006+ return -1;20072007+ }20082008+ /* what we have not yet read and that we know about */20092009+ left = sz - PERF_ATTR_SIZE_VER0;20102010+ if (left) {20112011+ void *ptr = attr;20122012+ ptr += PERF_ATTR_SIZE_VER0;20132013+20142014+ ret = readn(fd, ptr, left);20152015+ }20162016+ /* read perf_file_section, ids are read in caller */20172017+ ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));20182018+20192019+ return ret <= 0 ? -1 : 0;20202020+}20212021+20532022int perf_session__read_header(struct perf_session *session, int fd)20542023{20552024 struct perf_header *header = &session->header;···21111988 if (session->fd_pipe)21121989 return perf_header__read_pipe(session, fd);2113199021142114- if (perf_file_header__read(&f_header, header, fd) < 0) {21152115- pr_debug("incompatible file format\n");19911991+ if (perf_file_header__read(&f_header, header, fd) < 0)21161992 return -EINVAL;21172117- }2118199321192119- nr_attrs = f_header.attrs.size / sizeof(f_attr);19941994+ nr_attrs = f_header.attrs.size / f_header.attr_size;21201995 lseek(fd, f_header.attrs.offset, SEEK_SET);2121199621221997 for (i = 0; i < nr_attrs; i++) {21231998 struct perf_evsel *evsel;21241999 off_t tmp;2125200021262126- if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)20012001+ if (read_attr(fd, header, &f_attr) < 0)21272002 goto out_errno;2128200321292004 if (header->needs_swap)