include/linux/perf_event.h at v6.15 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / perf_event.h
at v6.15 2042 lines 60 kB view raw
   1/*
   2 * Performance events:
   3 *
   4 *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
   5 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
   6 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
   7 *
   8 * Data type definitions, declarations, prototypes.
   9 *
  10 *    Started by: Thomas Gleixner and Ingo Molnar
  11 *
  12 * For licencing details see kernel-base/COPYING
  13 */
  14#ifndef _LINUX_PERF_EVENT_H
  15#define _LINUX_PERF_EVENT_H
  16
  17#include <uapi/linux/perf_event.h>
  18#include <uapi/linux/bpf_perf_event.h>
  19
  20/*
  21 * Kernel-internal data types and definitions:
  22 */
  23
  24#ifdef CONFIG_PERF_EVENTS
  25# include <asm/perf_event.h>
  26# include <asm/local64.h>
  27#endif
  28
  29#define PERF_GUEST_ACTIVE	0x01
  30#define PERF_GUEST_USER	0x02
  31
  32struct perf_guest_info_callbacks {
  33	unsigned int			(*state)(void);
  34	unsigned long			(*get_ip)(void);
  35	unsigned int			(*handle_intel_pt_intr)(void);
  36};
  37
  38#ifdef CONFIG_HAVE_HW_BREAKPOINT
  39#include <linux/rhashtable-types.h>
  40#include <asm/hw_breakpoint.h>
  41#endif
  42
  43#include <linux/list.h>
  44#include <linux/mutex.h>
  45#include <linux/rculist.h>
  46#include <linux/rcupdate.h>
  47#include <linux/spinlock.h>
  48#include <linux/hrtimer.h>
  49#include <linux/fs.h>
  50#include <linux/pid_namespace.h>
  51#include <linux/workqueue.h>
  52#include <linux/ftrace.h>
  53#include <linux/cpu.h>
  54#include <linux/irq_work.h>
  55#include <linux/static_key.h>
  56#include <linux/jump_label_ratelimit.h>
  57#include <linux/atomic.h>
  58#include <linux/sysfs.h>
  59#include <linux/perf_regs.h>
  60#include <linux/cgroup.h>
  61#include <linux/refcount.h>
  62#include <linux/security.h>
  63#include <linux/static_call.h>
  64#include <linux/lockdep.h>
  65#include <asm/local.h>
  66
  67struct perf_callchain_entry {
  68	__u64				nr;
  69	__u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
  70};
  71
  72struct perf_callchain_entry_ctx {
  73	struct perf_callchain_entry *entry;
  74	u32			    max_stack;
  75	u32			    nr;
  76	short			    contexts;
  77	bool			    contexts_maxed;
  78};
  79
  80typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
  81				     unsigned long off, unsigned long len);
  82
  83struct perf_raw_frag {
  84	union {
  85		struct perf_raw_frag	*next;
  86		unsigned long		pad;
  87	};
  88	perf_copy_f			copy;
  89	void				*data;
  90	u32				size;
  91} __packed;
  92
  93struct perf_raw_record {
  94	struct perf_raw_frag		frag;
  95	u32				size;
  96};
  97
  98static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
  99{
 100	return frag->pad < sizeof(u64);
 101}
 102
 103/*
 104 * branch stack layout:
 105 *  nr: number of taken branches stored in entries[]
 106 *  hw_idx: The low level index of raw branch records
 107 *          for the most recent branch.
 108 *          -1ULL means invalid/unknown.
 109 *
 110 * Note that nr can vary from sample to sample
 111 * branches (to, from) are stored from most recent
 112 * to least recent, i.e., entries[0] contains the most
 113 * recent branch.
 114 * The entries[] is an abstraction of raw branch records,
 115 * which may not be stored in age order in HW, e.g. Intel LBR.
 116 * The hw_idx is to expose the low level index of raw
 117 * branch record for the most recent branch aka entries[0].
 118 * The hw_idx index is between -1 (unknown) and max depth,
 119 * which can be retrieved in /sys/devices/cpu/caps/branches.
 120 * For the architectures whose raw branch records are
 121 * already stored in age order, the hw_idx should be 0.
 122 */
 123struct perf_branch_stack {
 124	__u64				nr;
 125	__u64				hw_idx;
 126	struct perf_branch_entry	entries[];
 127};
 128
 129struct task_struct;
 130
 131/*
 132 * extra PMU register associated with an event
 133 */
 134struct hw_perf_event_extra {
 135	u64		config;	/* register value */
 136	unsigned int	reg;	/* register address or index */
 137	int		alloc;	/* extra register already allocated */
 138	int		idx;	/* index in shared_regs->regs[] */
 139};
 140
 141/**
 142 * hw_perf_event::flag values
 143 *
 144 * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
 145 * usage.
 146 */
 147#define PERF_EVENT_FLAG_ARCH			0x000fffff
 148#define PERF_EVENT_FLAG_USER_READ_CNT		0x80000000
 149
 150static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
 151
 152/**
 153 * struct hw_perf_event - performance event hardware details:
 154 */
 155struct hw_perf_event {
 156#ifdef CONFIG_PERF_EVENTS
 157	union {
 158		struct { /* hardware */
 159			u64		config;
 160			u64		last_tag;
 161			unsigned long	config_base;
 162			unsigned long	event_base;
 163			int		event_base_rdpmc;
 164			int		idx;
 165			int		last_cpu;
 166			int		flags;
 167
 168			struct hw_perf_event_extra extra_reg;
 169			struct hw_perf_event_extra branch_reg;
 170		};
 171		struct { /* aux / Intel-PT */
 172			u64		aux_config;
 173			/*
 174			 * For AUX area events, aux_paused cannot be a state
 175			 * flag because it can be updated asynchronously to
 176			 * state.
 177			 */
 178			unsigned int	aux_paused;
 179		};
 180		struct { /* software */
 181			struct hrtimer	hrtimer;
 182		};
 183		struct { /* tracepoint */
 184			/* for tp_event->class */
 185			struct list_head	tp_list;
 186		};
 187		struct { /* amd_power */
 188			u64	pwr_acc;
 189			u64	ptsc;
 190		};
 191#ifdef CONFIG_HAVE_HW_BREAKPOINT
 192		struct { /* breakpoint */
 193			/*
 194			 * Crufty hack to avoid the chicken and egg
 195			 * problem hw_breakpoint has with context
 196			 * creation and event initalization.
 197			 */
 198			struct arch_hw_breakpoint	info;
 199			struct rhlist_head		bp_list;
 200		};
 201#endif
 202		struct { /* amd_iommu */
 203			u8	iommu_bank;
 204			u8	iommu_cntr;
 205			u16	padding;
 206			u64	conf;
 207			u64	conf1;
 208		};
 209	};
 210	/*
 211	 * If the event is a per task event, this will point to the task in
 212	 * question. See the comment in perf_event_alloc().
 213	 */
 214	struct task_struct		*target;
 215
 216	/*
 217	 * PMU would store hardware filter configuration
 218	 * here.
 219	 */
 220	void				*addr_filters;
 221
 222	/* Last sync'ed generation of filters */
 223	unsigned long			addr_filters_gen;
 224
 225/*
 226 * hw_perf_event::state flags; used to track the PERF_EF_* state.
 227 */
 228#define PERF_HES_STOPPED	0x01 /* the counter is stopped */
 229#define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
 230#define PERF_HES_ARCH		0x04
 231
 232	int				state;
 233
 234	/*
 235	 * The last observed hardware counter value, updated with a
 236	 * local64_cmpxchg() such that pmu::read() can be called nested.
 237	 */
 238	local64_t			prev_count;
 239
 240	/*
 241	 * The period to start the next sample with.
 242	 */
 243	u64				sample_period;
 244
 245	union {
 246		struct { /* Sampling */
 247			/*
 248			 * The period we started this sample with.
 249			 */
 250			u64				last_period;
 251
 252			/*
 253			 * However much is left of the current period;
 254			 * note that this is a full 64bit value and
 255			 * allows for generation of periods longer
 256			 * than hardware might allow.
 257			 */
 258			local64_t			period_left;
 259		};
 260		struct { /* Topdown events counting for context switch */
 261			u64				saved_metric;
 262			u64				saved_slots;
 263		};
 264	};
 265
 266	/*
 267	 * State for throttling the event, see __perf_event_overflow() and
 268	 * perf_adjust_freq_unthr_context().
 269	 */
 270	u64                             interrupts_seq;
 271	u64				interrupts;
 272
 273	/*
 274	 * State for freq target events, see __perf_event_overflow() and
 275	 * perf_adjust_freq_unthr_context().
 276	 */
 277	u64				freq_time_stamp;
 278	u64				freq_count_stamp;
 279#endif
 280};
 281
 282struct perf_event;
 283struct perf_event_pmu_context;
 284
 285/*
 286 * Common implementation detail of pmu::{start,commit,cancel}_txn
 287 */
 288#define PERF_PMU_TXN_ADD  0x1		/* txn to add/schedule event on PMU */
 289#define PERF_PMU_TXN_READ 0x2		/* txn to read event group from PMU */
 290
 291/**
 292 * pmu::capabilities flags
 293 */
 294#define PERF_PMU_CAP_NO_INTERRUPT		0x0001
 295#define PERF_PMU_CAP_NO_NMI			0x0002
 296#define PERF_PMU_CAP_AUX_NO_SG			0x0004
 297#define PERF_PMU_CAP_EXTENDED_REGS		0x0008
 298#define PERF_PMU_CAP_EXCLUSIVE			0x0010
 299#define PERF_PMU_CAP_ITRACE			0x0020
 300#define PERF_PMU_CAP_NO_EXCLUDE			0x0040
 301#define PERF_PMU_CAP_AUX_OUTPUT			0x0080
 302#define PERF_PMU_CAP_EXTENDED_HW_TYPE		0x0100
 303#define PERF_PMU_CAP_AUX_PAUSE			0x0200
 304
 305/**
 306 * pmu::scope
 307 */
 308enum perf_pmu_scope {
 309	PERF_PMU_SCOPE_NONE	= 0,
 310	PERF_PMU_SCOPE_CORE,
 311	PERF_PMU_SCOPE_DIE,
 312	PERF_PMU_SCOPE_CLUSTER,
 313	PERF_PMU_SCOPE_PKG,
 314	PERF_PMU_SCOPE_SYS_WIDE,
 315	PERF_PMU_MAX_SCOPE,
 316};
 317
 318struct perf_output_handle;
 319
 320#define PMU_NULL_DEV	((void *)(~0UL))
 321
 322/**
 323 * struct pmu - generic performance monitoring unit
 324 */
 325struct pmu {
 326	struct list_head		entry;
 327
 328	struct module			*module;
 329	struct device			*dev;
 330	struct device			*parent;
 331	const struct attribute_group	**attr_groups;
 332	const struct attribute_group	**attr_update;
 333	const char			*name;
 334	int				type;
 335
 336	/*
 337	 * various common per-pmu feature flags
 338	 */
 339	int				capabilities;
 340
 341	/*
 342	 * PMU scope
 343	 */
 344	unsigned int			scope;
 345
 346	struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
 347	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
 348	int				task_ctx_nr;
 349	int				hrtimer_interval_ms;
 350
 351	/* number of address filters this PMU can do */
 352	unsigned int			nr_addr_filters;
 353
 354	/*
 355	 * Fully disable/enable this PMU, can be used to protect from the PMI
 356	 * as well as for lazy/batch writing of the MSRs.
 357	 */
 358	void (*pmu_enable)		(struct pmu *pmu); /* optional */
 359	void (*pmu_disable)		(struct pmu *pmu); /* optional */
 360
 361	/*
 362	 * Try and initialize the event for this PMU.
 363	 *
 364	 * Returns:
 365	 *  -ENOENT	-- @event is not for this PMU
 366	 *
 367	 *  -ENODEV	-- @event is for this PMU but PMU not present
 368	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
 369	 *  -EINVAL	-- @event is for this PMU but @event is not valid
 370	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
 371	 *  -EACCES	-- @event is for this PMU, @event is valid, but no privileges
 372	 *
 373	 *  0		-- @event is for this PMU and valid
 374	 *
 375	 * Other error return values are allowed.
 376	 */
 377	int (*event_init)		(struct perf_event *event);
 378
 379	/*
 380	 * Notification that the event was mapped or unmapped.  Called
 381	 * in the context of the mapping task.
 382	 */
 383	void (*event_mapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
 384	void (*event_unmapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
 385
 386	/*
 387	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
 388	 * matching hw_perf_event::state flags.
 389	 */
 390#define PERF_EF_START	0x01		/* start the counter when adding    */
 391#define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
 392#define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
 393#define PERF_EF_PAUSE	0x08		/* AUX area event, pause tracing */
 394#define PERF_EF_RESUME	0x10		/* AUX area event, resume tracing */
 395
 396	/*
 397	 * Adds/Removes a counter to/from the PMU, can be done inside a
 398	 * transaction, see the ->*_txn() methods.
 399	 *
 400	 * The add/del callbacks will reserve all hardware resources required
 401	 * to service the event, this includes any counter constraint
 402	 * scheduling etc.
 403	 *
 404	 * Called with IRQs disabled and the PMU disabled on the CPU the event
 405	 * is on.
 406	 *
 407	 * ->add() called without PERF_EF_START should result in the same state
 408	 *  as ->add() followed by ->stop().
 409	 *
 410	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
 411	 *  ->stop() that must deal with already being stopped without
 412	 *  PERF_EF_UPDATE.
 413	 */
 414	int  (*add)			(struct perf_event *event, int flags);
 415	void (*del)			(struct perf_event *event, int flags);
 416
 417	/*
 418	 * Starts/Stops a counter present on the PMU.
 419	 *
 420	 * The PMI handler should stop the counter when perf_event_overflow()
 421	 * returns !0. ->start() will be used to continue.
 422	 *
 423	 * Also used to change the sample period.
 424	 *
 425	 * Called with IRQs disabled and the PMU disabled on the CPU the event
 426	 * is on -- will be called from NMI context with the PMU generates
 427	 * NMIs.
 428	 *
 429	 * ->stop() with PERF_EF_UPDATE will read the counter and update
 430	 *  period/count values like ->read() would.
 431	 *
 432	 * ->start() with PERF_EF_RELOAD will reprogram the counter
 433	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
 434	 *
 435	 * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
 436	 * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
 437	 * PERF_EF_RESUME.
 438	 *
 439	 * ->start() with PERF_EF_RESUME will start as simply as possible but
 440	 * only if the counter is not otherwise stopped. Will not overlap
 441	 * another ->start() with PERF_EF_RESUME nor ->stop() with
 442	 * PERF_EF_PAUSE.
 443	 *
 444	 * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
 445	 * ->stop()/->start() invocations, just not itself.
 446	 */
 447	void (*start)			(struct perf_event *event, int flags);
 448	void (*stop)			(struct perf_event *event, int flags);
 449
 450	/*
 451	 * Updates the counter value of the event.
 452	 *
 453	 * For sampling capable PMUs this will also update the software period
 454	 * hw_perf_event::period_left field.
 455	 */
 456	void (*read)			(struct perf_event *event);
 457
 458	/*
 459	 * Group events scheduling is treated as a transaction, add
 460	 * group events as a whole and perform one schedulability test.
 461	 * If the test fails, roll back the whole group
 462	 *
 463	 * Start the transaction, after this ->add() doesn't need to
 464	 * do schedulability tests.
 465	 *
 466	 * Optional.
 467	 */
 468	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
 469	/*
 470	 * If ->start_txn() disabled the ->add() schedulability test
 471	 * then ->commit_txn() is required to perform one. On success
 472	 * the transaction is closed. On error the transaction is kept
 473	 * open until ->cancel_txn() is called.
 474	 *
 475	 * Optional.
 476	 */
 477	int  (*commit_txn)		(struct pmu *pmu);
 478	/*
 479	 * Will cancel the transaction, assumes ->del() is called
 480	 * for each successful ->add() during the transaction.
 481	 *
 482	 * Optional.
 483	 */
 484	void (*cancel_txn)		(struct pmu *pmu);
 485
 486	/*
 487	 * Will return the value for perf_event_mmap_page::index for this event,
 488	 * if no implementation is provided it will default to 0 (see
 489	 * perf_event_idx_default).
 490	 */
 491	int (*event_idx)		(struct perf_event *event); /*optional */
 492
 493	/*
 494	 * context-switches callback
 495	 */
 496	void (*sched_task)		(struct perf_event_pmu_context *pmu_ctx,
 497					 struct task_struct *task, bool sched_in);
 498
 499	/*
 500	 * Kmem cache of PMU specific data
 501	 */
 502	struct kmem_cache		*task_ctx_cache;
 503
 504	/*
 505	 * Set up pmu-private data structures for an AUX area
 506	 */
 507	void *(*setup_aux)		(struct perf_event *event, void **pages,
 508					 int nr_pages, bool overwrite);
 509					/* optional */
 510
 511	/*
 512	 * Free pmu-private AUX data structures
 513	 */
 514	void (*free_aux)		(void *aux); /* optional */
 515
 516	/*
 517	 * Take a snapshot of the AUX buffer without touching the event
 518	 * state, so that preempting ->start()/->stop() callbacks does
 519	 * not interfere with their logic. Called in PMI context.
 520	 *
 521	 * Returns the size of AUX data copied to the output handle.
 522	 *
 523	 * Optional.
 524	 */
 525	long (*snapshot_aux)		(struct perf_event *event,
 526					 struct perf_output_handle *handle,
 527					 unsigned long size);
 528
 529	/*
 530	 * Validate address range filters: make sure the HW supports the
 531	 * requested configuration and number of filters; return 0 if the
 532	 * supplied filters are valid, -errno otherwise.
 533	 *
 534	 * Runs in the context of the ioctl()ing process and is not serialized
 535	 * with the rest of the PMU callbacks.
 536	 */
 537	int (*addr_filters_validate)	(struct list_head *filters);
 538					/* optional */
 539
 540	/*
 541	 * Synchronize address range filter configuration:
 542	 * translate hw-agnostic filters into hardware configuration in
 543	 * event::hw::addr_filters.
 544	 *
 545	 * Runs as a part of filter sync sequence that is done in ->start()
 546	 * callback by calling perf_event_addr_filters_sync().
 547	 *
 548	 * May (and should) traverse event::addr_filters::list, for which its
 549	 * caller provides necessary serialization.
 550	 */
 551	void (*addr_filters_sync)	(struct perf_event *event);
 552					/* optional */
 553
 554	/*
 555	 * Check if event can be used for aux_output purposes for
 556	 * events of this PMU.
 557	 *
 558	 * Runs from perf_event_open(). Should return 0 for "no match"
 559	 * or non-zero for "match".
 560	 */
 561	int (*aux_output_match)		(struct perf_event *event);
 562					/* optional */
 563
 564	/*
 565	 * Skip programming this PMU on the given CPU. Typically needed for
 566	 * big.LITTLE things.
 567	 */
 568	bool (*filter)			(struct pmu *pmu, int cpu); /* optional */
 569
 570	/*
 571	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 572	 */
 573	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
 574};
 575
 576enum perf_addr_filter_action_t {
 577	PERF_ADDR_FILTER_ACTION_STOP = 0,
 578	PERF_ADDR_FILTER_ACTION_START,
 579	PERF_ADDR_FILTER_ACTION_FILTER,
 580};
 581
 582/**
 583 * struct perf_addr_filter - address range filter definition
 584 * @entry:	event's filter list linkage
 585 * @path:	object file's path for file-based filters
 586 * @offset:	filter range offset
 587 * @size:	filter range size (size==0 means single address trigger)
 588 * @action:	filter/start/stop
 589 *
 590 * This is a hardware-agnostic filter configuration as specified by the user.
 591 */
 592struct perf_addr_filter {
 593	struct list_head	entry;
 594	struct path		path;
 595	unsigned long		offset;
 596	unsigned long		size;
 597	enum perf_addr_filter_action_t	action;
 598};
 599
 600/**
 601 * struct perf_addr_filters_head - container for address range filters
 602 * @list:	list of filters for this event
 603 * @lock:	spinlock that serializes accesses to the @list and event's
 604 *		(and its children's) filter generations.
 605 * @nr_file_filters:	number of file-based filters
 606 *
 607 * A child event will use parent's @list (and therefore @lock), so they are
 608 * bundled together; see perf_event_addr_filters().
 609 */
 610struct perf_addr_filters_head {
 611	struct list_head	list;
 612	raw_spinlock_t		lock;
 613	unsigned int		nr_file_filters;
 614};
 615
 616struct perf_addr_filter_range {
 617	unsigned long		start;
 618	unsigned long		size;
 619};
 620
 621/**
 622 * enum perf_event_state - the states of an event:
 623 */
 624enum perf_event_state {
 625	PERF_EVENT_STATE_DEAD		= -4,
 626	PERF_EVENT_STATE_EXIT		= -3,
 627	PERF_EVENT_STATE_ERROR		= -2,
 628	PERF_EVENT_STATE_OFF		= -1,
 629	PERF_EVENT_STATE_INACTIVE	=  0,
 630	PERF_EVENT_STATE_ACTIVE		=  1,
 631};
 632
 633struct file;
 634struct perf_sample_data;
 635
 636typedef void (*perf_overflow_handler_t)(struct perf_event *,
 637					struct perf_sample_data *,
 638					struct pt_regs *regs);
 639
 640/*
 641 * Event capabilities. For event_caps and groups caps.
 642 *
 643 * PERF_EV_CAP_SOFTWARE: Is a software event.
 644 * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
 645 * from any CPU in the package where it is active.
 646 * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
 647 * cannot be a group leader. If an event with this flag is detached from the
 648 * group it is scheduled out and moved into an unrecoverable ERROR state.
 649 * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
 650 * PMU scope where it is active.
 651 */
 652#define PERF_EV_CAP_SOFTWARE		BIT(0)
 653#define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 654#define PERF_EV_CAP_SIBLING		BIT(2)
 655#define PERF_EV_CAP_READ_SCOPE		BIT(3)
 656
 657#define SWEVENT_HLIST_BITS		8
 658#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
 659
 660struct swevent_hlist {
 661	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
 662	struct rcu_head			rcu_head;
 663};
 664
 665#define PERF_ATTACH_CONTEXT	0x0001
 666#define PERF_ATTACH_GROUP	0x0002
 667#define PERF_ATTACH_TASK	0x0004
 668#define PERF_ATTACH_TASK_DATA	0x0008
 669#define PERF_ATTACH_GLOBAL_DATA	0x0010
 670#define PERF_ATTACH_SCHED_CB	0x0020
 671#define PERF_ATTACH_CHILD	0x0040
 672#define PERF_ATTACH_EXCLUSIVE	0x0080
 673#define PERF_ATTACH_CALLCHAIN	0x0100
 674#define PERF_ATTACH_ITRACE	0x0200
 675
 676struct bpf_prog;
 677struct perf_cgroup;
 678struct perf_buffer;
 679
 680struct pmu_event_list {
 681	raw_spinlock_t		lock;
 682	struct list_head	list;
 683};
 684
 685/*
 686 * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
 687 * as such iteration must hold either lock. However, since ctx->lock is an IRQ
 688 * safe lock, and is only held by the CPU doing the modification, having IRQs
 689 * disabled is sufficient since it will hold-off the IPIs.
 690 */
 691#ifdef CONFIG_PROVE_LOCKING
 692#define lockdep_assert_event_ctx(event)				\
 693	WARN_ON_ONCE(__lockdep_enabled &&			\
 694		     (this_cpu_read(hardirqs_enabled) &&	\
 695		      lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
 696#else
 697#define lockdep_assert_event_ctx(event)
 698#endif
 699
 700#define for_each_sibling_event(sibling, event)			\
 701	lockdep_assert_event_ctx(event);			\
 702	if ((event)->group_leader == (event))			\
 703		list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
 704
 705/**
 706 * struct perf_event - performance event kernel representation:
 707 */
 708struct perf_event {
 709#ifdef CONFIG_PERF_EVENTS
 710	/*
 711	 * entry onto perf_event_context::event_list;
 712	 *   modifications require ctx->lock
 713	 *   RCU safe iterations.
 714	 */
 715	struct list_head		event_entry;
 716
 717	/*
 718	 * Locked for modification by both ctx->mutex and ctx->lock; holding
 719	 * either sufficies for read.
 720	 */
 721	struct list_head		sibling_list;
 722	struct list_head		active_list;
 723	/*
 724	 * Node on the pinned or flexible tree located at the event context;
 725	 */
 726	struct rb_node			group_node;
 727	u64				group_index;
 728	/*
 729	 * We need storage to track the entries in perf_pmu_migrate_context; we
 730	 * cannot use the event_entry because of RCU and we want to keep the
 731	 * group in tact which avoids us using the other two entries.
 732	 */
 733	struct list_head		migrate_entry;
 734
 735	struct hlist_node		hlist_entry;
 736	struct list_head		active_entry;
 737	int				nr_siblings;
 738
 739	/* Not serialized. Only written during event initialization. */
 740	int				event_caps;
 741	/* The cumulative AND of all event_caps for events in this group. */
 742	int				group_caps;
 743
 744	unsigned int			group_generation;
 745	struct perf_event		*group_leader;
 746	/*
 747	 * event->pmu will always point to pmu in which this event belongs.
 748	 * Whereas event->pmu_ctx->pmu may point to other pmu when group of
 749	 * different pmu events is created.
 750	 */
 751	struct pmu			*pmu;
 752	void				*pmu_private;
 753
 754	enum perf_event_state		state;
 755	unsigned int			attach_state;
 756	local64_t			count;
 757	atomic64_t			child_count;
 758
 759	/*
 760	 * These are the total time in nanoseconds that the event
 761	 * has been enabled (i.e. eligible to run, and the task has
 762	 * been scheduled in, if this is a per-task event)
 763	 * and running (scheduled onto the CPU), respectively.
 764	 */
 765	u64				total_time_enabled;
 766	u64				total_time_running;
 767	u64				tstamp;
 768
 769	struct perf_event_attr		attr;
 770	u16				header_size;
 771	u16				id_header_size;
 772	u16				read_size;
 773	struct hw_perf_event		hw;
 774
 775	struct perf_event_context	*ctx;
 776	/*
 777	 * event->pmu_ctx points to perf_event_pmu_context in which the event
 778	 * is added. This pmu_ctx can be of other pmu for sw event when that
 779	 * sw event is part of a group which also contains non-sw events.
 780	 */
 781	struct perf_event_pmu_context	*pmu_ctx;
 782	atomic_long_t			refcount;
 783
 784	/*
 785	 * These accumulate total time (in nanoseconds) that children
 786	 * events have been enabled and running, respectively.
 787	 */
 788	atomic64_t			child_total_time_enabled;
 789	atomic64_t			child_total_time_running;
 790
 791	/*
 792	 * Protect attach/detach and child_list:
 793	 */
 794	struct mutex			child_mutex;
 795	struct list_head		child_list;
 796	struct perf_event		*parent;
 797
 798	int				oncpu;
 799	int				cpu;
 800
 801	struct list_head		owner_entry;
 802	struct task_struct		*owner;
 803
 804	/* mmap bits */
 805	struct mutex			mmap_mutex;
 806	atomic_t			mmap_count;
 807
 808	struct perf_buffer		*rb;
 809	struct list_head		rb_entry;
 810	unsigned long			rcu_batches;
 811	int				rcu_pending;
 812
 813	/* poll related */
 814	wait_queue_head_t		waitq;
 815	struct fasync_struct		*fasync;
 816
 817	/* delayed work for NMIs and such */
 818	unsigned int			pending_wakeup;
 819	unsigned int			pending_kill;
 820	unsigned int			pending_disable;
 821	unsigned long			pending_addr;	/* SIGTRAP */
 822	struct irq_work			pending_irq;
 823	struct irq_work			pending_disable_irq;
 824	struct callback_head		pending_task;
 825	unsigned int			pending_work;
 826
 827	atomic_t			event_limit;
 828
 829	/* address range filters */
 830	struct perf_addr_filters_head	addr_filters;
 831	/* vma address array for file-based filders */
 832	struct perf_addr_filter_range	*addr_filter_ranges;
 833	unsigned long			addr_filters_gen;
 834
 835	/* for aux_output events */
 836	struct perf_event		*aux_event;
 837
 838	void (*destroy)(struct perf_event *);
 839	struct rcu_head			rcu_head;
 840
 841	struct pid_namespace		*ns;
 842	u64				id;
 843
 844	atomic64_t			lost_samples;
 845
 846	u64				(*clock)(void);
 847	perf_overflow_handler_t		overflow_handler;
 848	void				*overflow_handler_context;
 849	struct bpf_prog			*prog;
 850	u64				bpf_cookie;
 851
 852#ifdef CONFIG_EVENT_TRACING
 853	struct trace_event_call		*tp_event;
 854	struct event_filter		*filter;
 855#ifdef CONFIG_FUNCTION_TRACER
 856	struct ftrace_ops               ftrace_ops;
 857#endif
 858#endif
 859
 860#ifdef CONFIG_CGROUP_PERF
 861	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
 862#endif
 863
 864#ifdef CONFIG_SECURITY
 865	void *security;
 866#endif
 867	struct list_head		sb_list;
 868
 869	/*
 870	 * Certain events gets forwarded to another pmu internally by over-
 871	 * writing kernel copy of event->attr.type without user being aware
 872	 * of it. event->orig_type contains original 'type' requested by
 873	 * user.
 874	 */
 875	__u32				orig_type;
 876#endif /* CONFIG_PERF_EVENTS */
 877};
 878
 879/*
 880 *           ,-----------------------[1:n]------------------------.
 881 *           V                                                    V
 882 * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
 883 *                                        |                       |
 884 *                                        `--[n:1]-> pmu <-[1:n]--'
 885 *
 886 *
 887 * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
 888 * (similar to perf_event_context). Locking is as if it were a member of
 889 * perf_event_context; specifically:
 890 *
 891 *   modification, both: ctx->mutex && ctx->lock
 892 *   reading, either:    ctx->mutex || ctx->lock
 893 *
 894 * There is one exception to this; namely put_pmu_ctx() isn't always called
 895 * with ctx->mutex held; this means that as long as we can guarantee the epc
 896 * has events the above rules hold.
 897 *
 898 * Specificially, sys_perf_event_open()'s group_leader case depends on
 899 * ctx->mutex pinning the configuration. Since we hold a reference on
 900 * group_leader (through the filedesc) it can't go away, therefore it's
 901 * associated pmu_ctx must exist and cannot change due to ctx->mutex.
 902 *
 903 * perf_event holds a refcount on perf_event_context
 904 * perf_event holds a refcount on perf_event_pmu_context
 905 */
 906struct perf_event_pmu_context {
 907	struct pmu			*pmu;
 908	struct perf_event_context       *ctx;
 909
 910	struct list_head		pmu_ctx_entry;
 911
 912	struct list_head		pinned_active;
 913	struct list_head		flexible_active;
 914
 915	/* Used to identify the per-cpu perf_event_pmu_context */
 916	unsigned int			embedded : 1;
 917
 918	unsigned int			nr_events;
 919	unsigned int			nr_cgroups;
 920	unsigned int			nr_freq;
 921
 922	atomic_t			refcount; /* event <-> epc */
 923	struct rcu_head			rcu_head;
 924
 925	/*
 926	 * Set when one or more (plausibly active) event can't be scheduled
 927	 * due to pmu overcommit or pmu constraints, except tolerant to
 928	 * events not necessary to be active due to scheduling constraints,
 929	 * such as cgroups.
 930	 */
 931	int				rotate_necessary;
 932};
 933
 934static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
 935{
 936	return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
 937}
 938
 939struct perf_event_groups {
 940	struct rb_root	tree;
 941	u64		index;
 942};
 943
 944
 945/**
 946 * struct perf_event_context - event context structure
 947 *
 948 * Used as a container for task events and CPU events as well:
 949 */
 950struct perf_event_context {
 951	/*
 952	 * Protect the states of the events in the list,
 953	 * nr_active, and the list:
 954	 */
 955	raw_spinlock_t			lock;
 956	/*
 957	 * Protect the list of events.  Locking either mutex or lock
 958	 * is sufficient to ensure the list doesn't change; to change
 959	 * the list you need to lock both the mutex and the spinlock.
 960	 */
 961	struct mutex			mutex;
 962
 963	struct list_head		pmu_ctx_list;
 964	struct perf_event_groups	pinned_groups;
 965	struct perf_event_groups	flexible_groups;
 966	struct list_head		event_list;
 967
 968	int				nr_events;
 969	int				nr_user;
 970	int				is_active;
 971
 972	int				nr_stat;
 973	int				nr_freq;
 974	int				rotate_disable;
 975
 976	refcount_t			refcount; /* event <-> ctx */
 977	struct task_struct		*task;
 978
 979	/*
 980	 * Context clock, runs when context enabled.
 981	 */
 982	u64				time;
 983	u64				timestamp;
 984	u64				timeoffset;
 985
 986	/*
 987	 * These fields let us detect when two contexts have both
 988	 * been cloned (inherited) from a common ancestor.
 989	 */
 990	struct perf_event_context	*parent_ctx;
 991	u64				parent_gen;
 992	u64				generation;
 993	int				pin_count;
 994#ifdef CONFIG_CGROUP_PERF
 995	int				nr_cgroups;	 /* cgroup evts */
 996#endif
 997	struct rcu_head			rcu_head;
 998
 999	/*
1000	 * The count of events for which using the switch-out fast path
1001	 * should be avoided.
1002	 *
1003	 * Sum (event->pending_work + events with
1004	 *    (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
1005	 *
1006	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
1007	 * that until the signal is delivered.
1008	 */
1009	local_t				nr_no_switch_fast;
1010};
1011
1012/**
1013 * struct perf_ctx_data - PMU specific data for a task
1014 * @rcu_head:  To avoid the race on free PMU specific data
1015 * @refcount:  To track users
1016 * @global:    To track system-wide users
1017 * @ctx_cache: Kmem cache of PMU specific data
1018 * @data:      PMU specific data
1019 *
1020 * Currently, the struct is only used in Intel LBR call stack mode to
1021 * save/restore the call stack of a task on context switches.
1022 *
1023 * The rcu_head is used to prevent the race on free the data.
1024 * The data only be allocated when Intel LBR call stack mode is enabled.
1025 * The data will be freed when the mode is disabled.
1026 * The content of the data will only be accessed in context switch, which
1027 * should be protected by rcu_read_lock().
1028 *
1029 * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
1030 * is used to allocate the PMU specific data. The ctx_cache is to track
1031 * the Kmem cache.
1032 *
1033 * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
1034 * When system-wide Intel LBR call stack mode is enabled, a buffer with
1035 * constant size will be allocated for each task.
1036 * Also, system memory consumption can further grow when the size of
1037 * struct perf_ctx_data enlarges.
1038 */
1039struct perf_ctx_data {
1040	struct rcu_head			rcu_head;
1041	refcount_t			refcount;
1042	int				global;
1043	struct kmem_cache		*ctx_cache;
1044	void				*data;
1045};
1046
1047struct perf_cpu_pmu_context {
1048	struct perf_event_pmu_context	epc;
1049	struct perf_event_pmu_context	*task_epc;
1050
1051	struct list_head		sched_cb_entry;
1052	int				sched_cb_usage;
1053
1054	int				active_oncpu;
1055	int				exclusive;
1056	int				pmu_disable_count;
1057
1058	raw_spinlock_t			hrtimer_lock;
1059	struct hrtimer			hrtimer;
1060	ktime_t				hrtimer_interval;
1061	unsigned int			hrtimer_active;
1062};
1063
1064/**
1065 * struct perf_event_cpu_context - per cpu event context structure
1066 */
1067struct perf_cpu_context {
1068	struct perf_event_context	ctx;
1069	struct perf_event_context	*task_ctx;
1070	int				online;
1071
1072#ifdef CONFIG_CGROUP_PERF
1073	struct perf_cgroup		*cgrp;
1074#endif
1075
1076	/*
1077	 * Per-CPU storage for iterators used in visit_groups_merge. The default
1078	 * storage is of size 2 to hold the CPU and any CPU event iterators.
1079	 */
1080	int				heap_size;
1081	struct perf_event		**heap;
1082	struct perf_event		*heap_default[2];
1083};
1084
1085struct perf_output_handle {
1086	struct perf_event		*event;
1087	struct perf_buffer		*rb;
1088	unsigned long			wakeup;
1089	unsigned long			size;
1090	union {
1091		u64			flags;		/* perf_output*() */
1092		u64			aux_flags;	/* perf_aux_output*() */
1093		struct {
1094			u64		skip_read : 1;
1095		};
1096	};
1097	union {
1098		void			*addr;
1099		unsigned long		head;
1100	};
1101	int				page;
1102};
1103
1104struct bpf_perf_event_data_kern {
1105	bpf_user_pt_regs_t *regs;
1106	struct perf_sample_data *data;
1107	struct perf_event *event;
1108};
1109
1110#ifdef CONFIG_CGROUP_PERF
1111
1112/*
1113 * perf_cgroup_info keeps track of time_enabled for a cgroup.
1114 * This is a per-cpu dynamically allocated data structure.
1115 */
1116struct perf_cgroup_info {
1117	u64				time;
1118	u64				timestamp;
1119	u64				timeoffset;
1120	int				active;
1121};
1122
1123struct perf_cgroup {
1124	struct cgroup_subsys_state	css;
1125	struct perf_cgroup_info	__percpu *info;
1126};
1127
1128/*
1129 * Must ensure cgroup is pinned (css_get) before calling
1130 * this function. In other words, we cannot call this function
1131 * if there is no cgroup event for the current CPU context.
1132 */
1133static inline struct perf_cgroup *
1134perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
1135{
1136	return container_of(task_css_check(task, perf_event_cgrp_id,
1137					   ctx ? lockdep_is_held(&ctx->lock)
1138					       : true),
1139			    struct perf_cgroup, css);
1140}
1141#endif /* CONFIG_CGROUP_PERF */
1142
1143#ifdef CONFIG_PERF_EVENTS
1144
1145extern struct perf_event_context *perf_cpu_task_ctx(void);
1146
1147extern void *perf_aux_output_begin(struct perf_output_handle *handle,
1148				   struct perf_event *event);
1149extern void perf_aux_output_end(struct perf_output_handle *handle,
1150				unsigned long size);
1151extern int perf_aux_output_skip(struct perf_output_handle *handle,
1152				unsigned long size);
1153extern void *perf_get_aux(struct perf_output_handle *handle);
1154extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
1155extern void perf_event_itrace_started(struct perf_event *event);
1156
1157extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
1158extern void perf_pmu_unregister(struct pmu *pmu);
1159
1160extern void __perf_event_task_sched_in(struct task_struct *prev,
1161				       struct task_struct *task);
1162extern void __perf_event_task_sched_out(struct task_struct *prev,
1163					struct task_struct *next);
1164extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
1165extern void perf_event_exit_task(struct task_struct *child);
1166extern void perf_event_free_task(struct task_struct *task);
1167extern void perf_event_delayed_put(struct task_struct *task);
1168extern struct file *perf_event_get(unsigned int fd);
1169extern const struct perf_event *perf_get_event(struct file *file);
1170extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
1171extern void perf_event_print_debug(void);
1172extern void perf_pmu_disable(struct pmu *pmu);
1173extern void perf_pmu_enable(struct pmu *pmu);
1174extern void perf_sched_cb_dec(struct pmu *pmu);
1175extern void perf_sched_cb_inc(struct pmu *pmu);
1176extern int perf_event_task_disable(void);
1177extern int perf_event_task_enable(void);
1178
1179extern void perf_pmu_resched(struct pmu *pmu);
1180
1181extern int perf_event_refresh(struct perf_event *event, int refresh);
1182extern void perf_event_update_userpage(struct perf_event *event);
1183extern int perf_event_release_kernel(struct perf_event *event);
1184extern struct perf_event *
1185perf_event_create_kernel_counter(struct perf_event_attr *attr,
1186				int cpu,
1187				struct task_struct *task,
1188				perf_overflow_handler_t callback,
1189				void *context);
1190extern void perf_pmu_migrate_context(struct pmu *pmu,
1191				int src_cpu, int dst_cpu);
1192int perf_event_read_local(struct perf_event *event, u64 *value,
1193			  u64 *enabled, u64 *running);
1194extern u64 perf_event_read_value(struct perf_event *event,
1195				 u64 *enabled, u64 *running);
1196
1197extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
1198
1199static inline bool branch_sample_no_flags(const struct perf_event *event)
1200{
1201	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
1202}
1203
1204static inline bool branch_sample_no_cycles(const struct perf_event *event)
1205{
1206	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
1207}
1208
1209static inline bool branch_sample_type(const struct perf_event *event)
1210{
1211	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
1212}
1213
1214static inline bool branch_sample_hw_index(const struct perf_event *event)
1215{
1216	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
1217}
1218
1219static inline bool branch_sample_priv(const struct perf_event *event)
1220{
1221	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
1222}
1223
1224static inline bool branch_sample_counters(const struct perf_event *event)
1225{
1226	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1227}
1228
1229static inline bool branch_sample_call_stack(const struct perf_event *event)
1230{
1231	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
1232}
1233
1234struct perf_sample_data {
1235	/*
1236	 * Fields set by perf_sample_data_init() unconditionally,
1237	 * group so as to minimize the cachelines touched.
1238	 */
1239	u64				sample_flags;
1240	u64				period;
1241	u64				dyn_size;
1242
1243	/*
1244	 * Fields commonly set by __perf_event_header__init_id(),
1245	 * group so as to minimize the cachelines touched.
1246	 */
1247	u64				type;
1248	struct {
1249		u32	pid;
1250		u32	tid;
1251	}				tid_entry;
1252	u64				time;
1253	u64				id;
1254	struct {
1255		u32	cpu;
1256		u32	reserved;
1257	}				cpu_entry;
1258
1259	/*
1260	 * The other fields, optionally {set,used} by
1261	 * perf_{prepare,output}_sample().
1262	 */
1263	u64				ip;
1264	struct perf_callchain_entry	*callchain;
1265	struct perf_raw_record		*raw;
1266	struct perf_branch_stack	*br_stack;
1267	u64				*br_stack_cntr;
1268	union perf_sample_weight	weight;
1269	union  perf_mem_data_src	data_src;
1270	u64				txn;
1271
1272	struct perf_regs		regs_user;
1273	struct perf_regs		regs_intr;
1274	u64				stack_user_size;
1275
1276	u64				stream_id;
1277	u64				cgroup;
1278	u64				addr;
1279	u64				phys_addr;
1280	u64				data_page_size;
1281	u64				code_page_size;
1282	u64				aux_size;
1283} ____cacheline_aligned;
1284
1285/* default value for data source */
1286#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
1287		    PERF_MEM_S(LVL, NA)   |\
1288		    PERF_MEM_S(SNOOP, NA) |\
1289		    PERF_MEM_S(LOCK, NA)  |\
1290		    PERF_MEM_S(TLB, NA)   |\
1291		    PERF_MEM_S(LVLNUM, NA))
1292
1293static inline void perf_sample_data_init(struct perf_sample_data *data,
1294					 u64 addr, u64 period)
1295{
1296	/* remaining struct members initialized in perf_prepare_sample() */
1297	data->sample_flags = PERF_SAMPLE_PERIOD;
1298	data->period = period;
1299	data->dyn_size = 0;
1300
1301	if (addr) {
1302		data->addr = addr;
1303		data->sample_flags |= PERF_SAMPLE_ADDR;
1304	}
1305}
1306
1307static inline void perf_sample_save_callchain(struct perf_sample_data *data,
1308					      struct perf_event *event,
1309					      struct pt_regs *regs)
1310{
1311	int size = 1;
1312
1313	if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
1314		return;
1315	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
1316		return;
1317
1318	data->callchain = perf_callchain(event, regs);
1319	size += data->callchain->nr;
1320
1321	data->dyn_size += size * sizeof(u64);
1322	data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
1323}
1324
1325static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
1326					     struct perf_event *event,
1327					     struct perf_raw_record *raw)
1328{
1329	struct perf_raw_frag *frag = &raw->frag;
1330	u32 sum = 0;
1331	int size;
1332
1333	if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
1334		return;
1335	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
1336		return;
1337
1338	do {
1339		sum += frag->size;
1340		if (perf_raw_frag_last(frag))
1341			break;
1342		frag = frag->next;
1343	} while (1);
1344
1345	size = round_up(sum + sizeof(u32), sizeof(u64));
1346	raw->size = size - sizeof(u32);
1347	frag->pad = raw->size - sum;
1348
1349	data->raw = raw;
1350	data->dyn_size += size;
1351	data->sample_flags |= PERF_SAMPLE_RAW;
1352}
1353
1354static inline bool has_branch_stack(struct perf_event *event)
1355{
1356	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1357}
1358
1359static inline void perf_sample_save_brstack(struct perf_sample_data *data,
1360					    struct perf_event *event,
1361					    struct perf_branch_stack *brs,
1362					    u64 *brs_cntr)
1363{
1364	int size = sizeof(u64); /* nr */
1365
1366	if (!has_branch_stack(event))
1367		return;
1368	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
1369		return;
1370
1371	if (branch_sample_hw_index(event))
1372		size += sizeof(u64);
1373
1374	brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
1375
1376	size += brs->nr * sizeof(struct perf_branch_entry);
1377
1378	/*
1379	 * The extension space for counters is appended after the
1380	 * struct perf_branch_stack. It is used to store the occurrences
1381	 * of events of each branch.
1382	 */
1383	if (brs_cntr)
1384		size += brs->nr * sizeof(u64);
1385
1386	data->br_stack = brs;
1387	data->br_stack_cntr = brs_cntr;
1388	data->dyn_size += size;
1389	data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
1390}
1391
1392static inline u32 perf_sample_data_size(struct perf_sample_data *data,
1393					struct perf_event *event)
1394{
1395	u32 size = sizeof(struct perf_event_header);
1396
1397	size += event->header_size + event->id_header_size;
1398	size += data->dyn_size;
1399
1400	return size;
1401}
1402
1403/*
1404 * Clear all bitfields in the perf_branch_entry.
1405 * The to and from fields are not cleared because they are
1406 * systematically modified by caller.
1407 */
1408static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
1409{
1410	br->mispred = 0;
1411	br->predicted = 0;
1412	br->in_tx = 0;
1413	br->abort = 0;
1414	br->cycles = 0;
1415	br->type = 0;
1416	br->spec = PERF_BR_SPEC_NA;
1417	br->reserved = 0;
1418}
1419
1420extern void perf_output_sample(struct perf_output_handle *handle,
1421			       struct perf_event_header *header,
1422			       struct perf_sample_data *data,
1423			       struct perf_event *event);
1424extern void perf_prepare_sample(struct perf_sample_data *data,
1425				struct perf_event *event,
1426				struct pt_regs *regs);
1427extern void perf_prepare_header(struct perf_event_header *header,
1428				struct perf_sample_data *data,
1429				struct perf_event *event,
1430				struct pt_regs *regs);
1431
1432extern int perf_event_overflow(struct perf_event *event,
1433				 struct perf_sample_data *data,
1434				 struct pt_regs *regs);
1435
1436extern void perf_event_output_forward(struct perf_event *event,
1437				     struct perf_sample_data *data,
1438				     struct pt_regs *regs);
1439extern void perf_event_output_backward(struct perf_event *event,
1440				       struct perf_sample_data *data,
1441				       struct pt_regs *regs);
1442extern int perf_event_output(struct perf_event *event,
1443			     struct perf_sample_data *data,
1444			     struct pt_regs *regs);
1445
1446static inline bool
1447is_default_overflow_handler(struct perf_event *event)
1448{
1449	perf_overflow_handler_t overflow_handler = event->overflow_handler;
1450
1451	if (likely(overflow_handler == perf_event_output_forward))
1452		return true;
1453	if (unlikely(overflow_handler == perf_event_output_backward))
1454		return true;
1455	return false;
1456}
1457
1458extern void
1459perf_event_header__init_id(struct perf_event_header *header,
1460			   struct perf_sample_data *data,
1461			   struct perf_event *event);
1462extern void
1463perf_event__output_id_sample(struct perf_event *event,
1464			     struct perf_output_handle *handle,
1465			     struct perf_sample_data *sample);
1466
1467extern void
1468perf_log_lost_samples(struct perf_event *event, u64 lost);
1469
1470static inline bool event_has_any_exclude_flag(struct perf_event *event)
1471{
1472	struct perf_event_attr *attr = &event->attr;
1473
1474	return attr->exclude_idle || attr->exclude_user ||
1475	       attr->exclude_kernel || attr->exclude_hv ||
1476	       attr->exclude_guest || attr->exclude_host;
1477}
1478
1479static inline bool is_sampling_event(struct perf_event *event)
1480{
1481	return event->attr.sample_period != 0;
1482}
1483
1484/*
1485 * Return 1 for a software event, 0 for a hardware event
1486 */
1487static inline int is_software_event(struct perf_event *event)
1488{
1489	return event->event_caps & PERF_EV_CAP_SOFTWARE;
1490}
1491
1492/*
1493 * Return 1 for event in sw context, 0 for event in hw context
1494 */
1495static inline int in_software_context(struct perf_event *event)
1496{
1497	return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
1498}
1499
1500static inline int is_exclusive_pmu(struct pmu *pmu)
1501{
1502	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
1503}
1504
1505extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1506
1507extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
1508extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1509
1510#ifndef perf_arch_fetch_caller_regs
1511static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1512#endif
1513
1514/*
1515 * When generating a perf sample in-line, instead of from an interrupt /
1516 * exception, we lack a pt_regs. This is typically used from software events
1517 * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
1518 *
1519 * We typically don't need a full set, but (for x86) do require:
1520 * - ip for PERF_SAMPLE_IP
1521 * - cs for user_mode() tests
1522 * - sp for PERF_SAMPLE_CALLCHAIN
1523 * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
1524 *
1525 * NOTE: assumes @regs is otherwise already 0 filled; this is important for
1526 * things like PERF_SAMPLE_REGS_INTR.
1527 */
1528static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1529{
1530	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1531}
1532
1533static __always_inline void
1534perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1535{
1536	if (static_key_false(&perf_swevent_enabled[event_id]))
1537		__perf_sw_event(event_id, nr, regs, addr);
1538}
1539
1540DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
1541
1542/*
1543 * 'Special' version for the scheduler, it hard assumes no recursion,
1544 * which is guaranteed by us not actually scheduling inside other swevents
1545 * because those disable preemption.
1546 */
1547static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
1548{
1549	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
1550
1551	perf_fetch_caller_regs(regs);
1552	___perf_sw_event(event_id, nr, regs, addr);
1553}
1554
1555extern struct static_key_false perf_sched_events;
1556
1557static __always_inline bool __perf_sw_enabled(int swevt)
1558{
1559	return static_key_false(&perf_swevent_enabled[swevt]);
1560}
1561
1562static inline void perf_event_task_migrate(struct task_struct *task)
1563{
1564	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
1565		task->sched_migrated = 1;
1566}
1567
1568static inline void perf_event_task_sched_in(struct task_struct *prev,
1569					    struct task_struct *task)
1570{
1571	if (static_branch_unlikely(&perf_sched_events))
1572		__perf_event_task_sched_in(prev, task);
1573
1574	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
1575	    task->sched_migrated) {
1576		__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1577		task->sched_migrated = 0;
1578	}
1579}
1580
1581static inline void perf_event_task_sched_out(struct task_struct *prev,
1582					     struct task_struct *next)
1583{
1584	if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
1585		__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
1586
1587#ifdef CONFIG_CGROUP_PERF
1588	if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
1589	    perf_cgroup_from_task(prev, NULL) !=
1590	    perf_cgroup_from_task(next, NULL))
1591		__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
1592#endif
1593
1594	if (static_branch_unlikely(&perf_sched_events))
1595		__perf_event_task_sched_out(prev, next);
1596}
1597
1598extern void perf_event_mmap(struct vm_area_struct *vma);
1599
1600extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1601			       bool unregister, const char *sym);
1602extern void perf_event_bpf_event(struct bpf_prog *prog,
1603				 enum perf_bpf_event_type type,
1604				 u16 flags);
1605
1606#ifdef CONFIG_GUEST_PERF_EVENTS
1607extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1608
1609DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1610DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1611DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1612
1613static inline unsigned int perf_guest_state(void)
1614{
1615	return static_call(__perf_guest_state)();
1616}
1617static inline unsigned long perf_guest_get_ip(void)
1618{
1619	return static_call(__perf_guest_get_ip)();
1620}
1621static inline unsigned int perf_guest_handle_intel_pt_intr(void)
1622{
1623	return static_call(__perf_guest_handle_intel_pt_intr)();
1624}
1625extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1626extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1627#else
1628static inline unsigned int perf_guest_state(void)		 { return 0; }
1629static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
1630static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
1631#endif /* CONFIG_GUEST_PERF_EVENTS */
1632
1633extern void perf_event_exec(void);
1634extern void perf_event_comm(struct task_struct *tsk, bool exec);
1635extern void perf_event_namespaces(struct task_struct *tsk);
1636extern void perf_event_fork(struct task_struct *tsk);
1637extern void perf_event_text_poke(const void *addr,
1638				 const void *old_bytes, size_t old_len,
1639				 const void *new_bytes, size_t new_len);
1640
1641/* Callchains */
1642DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1643
1644extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1645extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1646extern struct perf_callchain_entry *
1647get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
1648		   u32 max_stack, bool crosstask, bool add_mark);
1649extern int get_callchain_buffers(int max_stack);
1650extern void put_callchain_buffers(void);
1651extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
1652extern void put_callchain_entry(int rctx);
1653
1654extern int sysctl_perf_event_max_stack;
1655extern int sysctl_perf_event_max_contexts_per_stack;
1656
1657static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1658{
1659	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1660		struct perf_callchain_entry *entry = ctx->entry;
1661		entry->ip[entry->nr++] = ip;
1662		++ctx->contexts;
1663		return 0;
1664	} else {
1665		ctx->contexts_maxed = true;
1666		return -1; /* no more room, stop walking the stack */
1667	}
1668}
1669
1670static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1671{
1672	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1673		struct perf_callchain_entry *entry = ctx->entry;
1674		entry->ip[entry->nr++] = ip;
1675		++ctx->nr;
1676		return 0;
1677	} else {
1678		return -1; /* no more room, stop walking the stack */
1679	}
1680}
1681
1682extern int sysctl_perf_event_paranoid;
1683extern int sysctl_perf_event_sample_rate;
1684
1685extern void perf_sample_event_took(u64 sample_len_ns);
1686
1687/* Access to perf_event_open(2) syscall. */
1688#define PERF_SECURITY_OPEN		0
1689
1690/* Finer grained perf_event_open(2) access control. */
1691#define PERF_SECURITY_CPU		1
1692#define PERF_SECURITY_KERNEL		2
1693#define PERF_SECURITY_TRACEPOINT	3
1694
1695static inline int perf_is_paranoid(void)
1696{
1697	return sysctl_perf_event_paranoid > -1;
1698}
1699
1700int perf_allow_kernel(void);
1701
1702static inline int perf_allow_cpu(void)
1703{
1704	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
1705		return -EACCES;
1706
1707	return security_perf_event_open(PERF_SECURITY_CPU);
1708}
1709
1710static inline int perf_allow_tracepoint(void)
1711{
1712	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
1713		return -EPERM;
1714
1715	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
1716}
1717
1718extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
1719
1720extern void perf_event_init(void);
1721extern void perf_tp_event(u16 event_type, u64 count, void *record,
1722			  int entry_size, struct pt_regs *regs,
1723			  struct hlist_head *head, int rctx,
1724			  struct task_struct *task);
1725extern void perf_bp_event(struct perf_event *event, void *data);
1726
1727extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
1728extern unsigned long perf_instruction_pointer(struct perf_event *event,
1729					      struct pt_regs *regs);
1730
1731#ifndef perf_arch_misc_flags
1732# define perf_arch_misc_flags(regs) \
1733		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1734# define perf_arch_instruction_pointer(regs)	instruction_pointer(regs)
1735#endif
1736#ifndef perf_arch_bpf_user_pt_regs
1737# define perf_arch_bpf_user_pt_regs(regs) regs
1738#endif
1739
1740#ifndef perf_arch_guest_misc_flags
1741static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
1742{
1743	unsigned long guest_state = perf_guest_state();
1744
1745	if (!(guest_state & PERF_GUEST_ACTIVE))
1746		return 0;
1747
1748	if (guest_state & PERF_GUEST_USER)
1749		return PERF_RECORD_MISC_GUEST_USER;
1750	else
1751		return PERF_RECORD_MISC_GUEST_KERNEL;
1752}
1753# define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
1754#endif
1755
1756static inline bool needs_branch_stack(struct perf_event *event)
1757{
1758	return event->attr.branch_sample_type != 0;
1759}
1760
1761static inline bool has_aux(struct perf_event *event)
1762{
1763	return event->pmu->setup_aux;
1764}
1765
1766static inline bool has_aux_action(struct perf_event *event)
1767{
1768	return event->attr.aux_sample_size ||
1769	       event->attr.aux_pause ||
1770	       event->attr.aux_resume;
1771}
1772
1773static inline bool is_write_backward(struct perf_event *event)
1774{
1775	return !!event->attr.write_backward;
1776}
1777
1778static inline bool has_addr_filter(struct perf_event *event)
1779{
1780	return event->pmu->nr_addr_filters;
1781}
1782
1783/*
1784 * An inherited event uses parent's filters
1785 */
1786static inline struct perf_addr_filters_head *
1787perf_event_addr_filters(struct perf_event *event)
1788{
1789	struct perf_addr_filters_head *ifh = &event->addr_filters;
1790
1791	if (event->parent)
1792		ifh = &event->parent->addr_filters;
1793
1794	return ifh;
1795}
1796
1797static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
1798{
1799	/* Only the parent has fasync state */
1800	if (event->parent)
1801		event = event->parent;
1802	return &event->fasync;
1803}
1804
1805extern void perf_event_addr_filters_sync(struct perf_event *event);
1806extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
1807
1808extern int perf_output_begin(struct perf_output_handle *handle,
1809			     struct perf_sample_data *data,
1810			     struct perf_event *event, unsigned int size);
1811extern int perf_output_begin_forward(struct perf_output_handle *handle,
1812				     struct perf_sample_data *data,
1813				     struct perf_event *event,
1814				     unsigned int size);
1815extern int perf_output_begin_backward(struct perf_output_handle *handle,
1816				      struct perf_sample_data *data,
1817				      struct perf_event *event,
1818				      unsigned int size);
1819
1820extern void perf_output_end(struct perf_output_handle *handle);
1821extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1822			     const void *buf, unsigned int len);
1823extern unsigned int perf_output_skip(struct perf_output_handle *handle,
1824				     unsigned int len);
1825extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
1826				 struct perf_output_handle *handle,
1827				 unsigned long from, unsigned long to);
1828extern int perf_swevent_get_recursion_context(void);
1829extern void perf_swevent_put_recursion_context(int rctx);
1830extern u64 perf_swevent_set_period(struct perf_event *event);
1831extern void perf_event_enable(struct perf_event *event);
1832extern void perf_event_disable(struct perf_event *event);
1833extern void perf_event_disable_local(struct perf_event *event);
1834extern void perf_event_disable_inatomic(struct perf_event *event);
1835extern void perf_event_task_tick(void);
1836extern int perf_event_account_interrupt(struct perf_event *event);
1837extern int perf_event_period(struct perf_event *event, u64 value);
1838extern u64 perf_event_pause(struct perf_event *event, bool reset);
1839#else /* !CONFIG_PERF_EVENTS: */
1840static inline void *
1841perf_aux_output_begin(struct perf_output_handle *handle,
1842		      struct perf_event *event)				{ return NULL; }
1843static inline void
1844perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
1845									{ }
1846static inline int
1847perf_aux_output_skip(struct perf_output_handle *handle,
1848		     unsigned long size)				{ return -EINVAL; }
1849static inline void *
1850perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
1851static inline void
1852perf_event_task_migrate(struct task_struct *task)			{ }
1853static inline void
1854perf_event_task_sched_in(struct task_struct *prev,
1855			 struct task_struct *task)			{ }
1856static inline void
1857perf_event_task_sched_out(struct task_struct *prev,
1858			  struct task_struct *next)			{ }
1859static inline int perf_event_init_task(struct task_struct *child,
1860				       u64 clone_flags)			{ return 0; }
1861static inline void perf_event_exit_task(struct task_struct *child)	{ }
1862static inline void perf_event_free_task(struct task_struct *task)	{ }
1863static inline void perf_event_delayed_put(struct task_struct *task)	{ }
1864static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
1865static inline const struct perf_event *perf_get_event(struct file *file)
1866{
1867	return ERR_PTR(-EINVAL);
1868}
1869static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1870{
1871	return ERR_PTR(-EINVAL);
1872}
1873static inline int perf_event_read_local(struct perf_event *event, u64 *value,
1874					u64 *enabled, u64 *running)
1875{
1876	return -EINVAL;
1877}
1878static inline void perf_event_print_debug(void)				{ }
1879static inline int perf_event_task_disable(void)				{ return -EINVAL; }
1880static inline int perf_event_task_enable(void)				{ return -EINVAL; }
1881static inline int perf_event_refresh(struct perf_event *event, int refresh)
1882{
1883	return -EINVAL;
1884}
1885
1886static inline void
1887perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
1888static inline void
1889perf_bp_event(struct perf_event *event, void *data)			{ }
1890
1891static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
1892
1893typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
1894static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1895				      bool unregister, const char *sym)	{ }
1896static inline void perf_event_bpf_event(struct bpf_prog *prog,
1897					enum perf_bpf_event_type type,
1898					u16 flags)			{ }
1899static inline void perf_event_exec(void)				{ }
1900static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
1901static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
1902static inline void perf_event_fork(struct task_struct *tsk)		{ }
1903static inline void perf_event_text_poke(const void *addr,
1904					const void *old_bytes,
1905					size_t old_len,
1906					const void *new_bytes,
1907					size_t new_len)			{ }
1908static inline void perf_event_init(void)				{ }
1909static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
1910static inline void perf_swevent_put_recursion_context(int rctx)		{ }
1911static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
1912static inline void perf_event_enable(struct perf_event *event)		{ }
1913static inline void perf_event_disable(struct perf_event *event)		{ }
1914static inline int __perf_event_disable(void *info)			{ return -1; }
1915static inline void perf_event_task_tick(void)				{ }
1916static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
1917static inline int perf_event_period(struct perf_event *event, u64 value)
1918{
1919	return -EINVAL;
1920}
1921static inline u64 perf_event_pause(struct perf_event *event, bool reset)
1922{
1923	return 0;
1924}
1925static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
1926{
1927	return 0;
1928}
1929#endif
1930
1931#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
1932extern void perf_restore_debug_store(void);
1933#else
1934static inline void perf_restore_debug_store(void)			{ }
1935#endif
1936
1937#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
1938
1939struct perf_pmu_events_attr {
1940	struct device_attribute attr;
1941	u64 id;
1942	const char *event_str;
1943};
1944
1945struct perf_pmu_events_ht_attr {
1946	struct device_attribute			attr;
1947	u64					id;
1948	const char				*event_str_ht;
1949	const char				*event_str_noht;
1950};
1951
1952struct perf_pmu_events_hybrid_attr {
1953	struct device_attribute			attr;
1954	u64					id;
1955	const char				*event_str;
1956	u64					pmu_type;
1957};
1958
1959struct perf_pmu_format_hybrid_attr {
1960	struct device_attribute			attr;
1961	u64					pmu_type;
1962};
1963
1964ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
1965			      char *page);
1966
1967#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
1968static struct perf_pmu_events_attr _var = {				\
1969	.attr = __ATTR(_name, 0444, _show, NULL),			\
1970	.id   =  _id,							\
1971};
1972
1973#define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
1974static struct perf_pmu_events_attr _var = {				    \
1975	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
1976	.id		= 0,						    \
1977	.event_str	= _str,						    \
1978};
1979
1980#define PMU_EVENT_ATTR_ID(_name, _show, _id)				\
1981	(&((struct perf_pmu_events_attr[]) {				\
1982		{ .attr = __ATTR(_name, 0444, _show, NULL),		\
1983		  .id = _id, }						\
1984	})[0].attr.attr)
1985
1986#define PMU_FORMAT_ATTR_SHOW(_name, _format)				\
1987static ssize_t								\
1988_name##_show(struct device *dev,					\
1989			       struct device_attribute *attr,		\
1990			       char *page)				\
1991{									\
1992	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
1993	return sprintf(page, _format "\n");				\
1994}									\
1995
1996#define PMU_FORMAT_ATTR(_name, _format)					\
1997	PMU_FORMAT_ATTR_SHOW(_name, _format)				\
1998									\
1999static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
2000
2001/* Performance counter hotplug functions */
2002#ifdef CONFIG_PERF_EVENTS
2003int perf_event_init_cpu(unsigned int cpu);
2004int perf_event_exit_cpu(unsigned int cpu);
2005#else
2006#define perf_event_init_cpu	NULL
2007#define perf_event_exit_cpu	NULL
2008#endif
2009
2010extern void arch_perf_update_userpage(struct perf_event *event,
2011				      struct perf_event_mmap_page *userpg,
2012				      u64 now);
2013
2014/*
2015 * Snapshot branch stack on software events.
2016 *
2017 * Branch stack can be very useful in understanding software events. For
2018 * example, when a long function, e.g. sys_perf_event_open, returns an
2019 * errno, it is not obvious why the function failed. Branch stack could
2020 * provide very helpful information in this type of scenarios.
2021 *
2022 * On software event, it is necessary to stop the hardware branch recorder
2023 * fast. Otherwise, the hardware register/buffer will be flushed with
2024 * entries of the triggering event. Therefore, static call is used to
2025 * stop the hardware recorder.
2026 */
2027
2028/*
2029 * cnt is the number of entries allocated for entries.
2030 * Return number of entries copied to .
2031 */
2032typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
2033					   unsigned int cnt);
2034DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
2035
2036#ifndef PERF_NEEDS_LOPWR_CB
2037static inline void perf_lopwr_cb(bool mode)
2038{
2039}
2040#endif
2041
2042#endif /* _LINUX_PERF_EVENT_H */