include/linux/perf_event.h at master · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / perf_event.h
at master 62 kB view raw
   1/*
   2 * Performance events:
   3 *
   4 *    Copyright (C) 2008-2009, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org>
   5 *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
   6 *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
   7 *
   8 * Data type definitions, declarations, prototypes.
   9 *
  10 *    Started by: Thomas Gleixner and Ingo Molnar
  11 *
  12 * For licencing details see kernel-base/COPYING
  13 */
  14#ifndef _LINUX_PERF_EVENT_H
  15#define _LINUX_PERF_EVENT_H
  16
  17#include <uapi/linux/perf_event.h>
  18#include <uapi/linux/bpf_perf_event.h>
  19
  20/*
  21 * Kernel-internal data types and definitions:
  22 */
  23
  24#ifdef CONFIG_PERF_EVENTS
  25# include <asm/perf_event.h>
  26# include <asm/local64.h>
  27#endif
  28
  29#ifdef CONFIG_HAVE_HW_BREAKPOINT
  30# include <linux/rhashtable-types.h>
  31# include <asm/hw_breakpoint.h>
  32#endif
  33
  34#include <linux/list.h>
  35#include <linux/mutex.h>
  36#include <linux/rculist.h>
  37#include <linux/rcupdate.h>
  38#include <linux/spinlock.h>
  39#include <linux/hrtimer.h>
  40#include <linux/fs.h>
  41#include <linux/pid_namespace.h>
  42#include <linux/workqueue.h>
  43#include <linux/ftrace.h>
  44#include <linux/cpu.h>
  45#include <linux/irq_work.h>
  46#include <linux/static_key.h>
  47#include <linux/jump_label_ratelimit.h>
  48#include <linux/atomic.h>
  49#include <linux/sysfs.h>
  50#include <linux/perf_regs.h>
  51#include <linux/cgroup.h>
  52#include <linux/refcount.h>
  53#include <linux/security.h>
  54#include <linux/static_call.h>
  55#include <linux/lockdep.h>
  56
  57#include <asm/local.h>
  58
  59struct perf_callchain_entry {
  60	u64				nr;
  61	u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
  62};
  63
  64struct perf_callchain_entry_ctx {
  65	struct perf_callchain_entry	*entry;
  66	u32				max_stack;
  67	u32				nr;
  68	short				contexts;
  69	bool				contexts_maxed;
  70};
  71
  72typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
  73				     unsigned long off, unsigned long len);
  74
  75struct perf_raw_frag {
  76	union {
  77		struct perf_raw_frag	*next;
  78		unsigned long		pad;
  79	};
  80	perf_copy_f			copy;
  81	void				*data;
  82	u32				size;
  83} __packed;
  84
  85struct perf_raw_record {
  86	struct perf_raw_frag		frag;
  87	u32				size;
  88};
  89
  90static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
  91{
  92	return frag->pad < sizeof(u64);
  93}
  94
  95/*
  96 * branch stack layout:
  97 *  nr: number of taken branches stored in entries[]
  98 *  hw_idx: The low level index of raw branch records
  99 *          for the most recent branch.
 100 *          -1ULL means invalid/unknown.
 101 *
 102 * Note that nr can vary from sample to sample
 103 * branches (to, from) are stored from most recent
 104 * to least recent, i.e., entries[0] contains the most
 105 * recent branch.
 106 * The entries[] is an abstraction of raw branch records,
 107 * which may not be stored in age order in HW, e.g. Intel LBR.
 108 * The hw_idx is to expose the low level index of raw
 109 * branch record for the most recent branch aka entries[0].
 110 * The hw_idx index is between -1 (unknown) and max depth,
 111 * which can be retrieved in /sys/devices/cpu/caps/branches.
 112 * For the architectures whose raw branch records are
 113 * already stored in age order, the hw_idx should be 0.
 114 */
 115struct perf_branch_stack {
 116	u64				nr;
 117	u64				hw_idx;
 118	struct perf_branch_entry	entries[];
 119};
 120
 121struct task_struct;
 122
 123/*
 124 * extra PMU register associated with an event
 125 */
 126struct hw_perf_event_extra {
 127	u64				config;	/* register value */
 128	unsigned int			reg;	/* register address or index */
 129	int				alloc;	/* extra register already allocated */
 130	int				idx;	/* index in shared_regs->regs[] */
 131};
 132
 133/**
 134 * hw_perf_event::flag values
 135 *
 136 * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
 137 * usage.
 138 */
 139#define PERF_EVENT_FLAG_ARCH		0x0fffffff
 140#define PERF_EVENT_FLAG_USER_READ_CNT	0x80000000
 141
 142static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
 143
 144/**
 145 * struct hw_perf_event - performance event hardware details:
 146 */
 147struct hw_perf_event {
 148#ifdef CONFIG_PERF_EVENTS
 149	union {
 150		struct { /* hardware */
 151			u64		config;
 152			u64		config1;
 153			u64		last_tag;
 154			u64		dyn_constraint;
 155			unsigned long	config_base;
 156			unsigned long	event_base;
 157			int		event_base_rdpmc;
 158			int		idx;
 159			int		last_cpu;
 160			int		flags;
 161
 162			struct hw_perf_event_extra extra_reg;
 163			struct hw_perf_event_extra branch_reg;
 164		};
 165		struct { /* aux / Intel-PT */
 166			u64		aux_config;
 167			/*
 168			 * For AUX area events, aux_paused cannot be a state
 169			 * flag because it can be updated asynchronously to
 170			 * state.
 171			 */
 172			unsigned int	aux_paused;
 173		};
 174		struct { /* software */
 175			struct hrtimer	hrtimer;
 176		};
 177		struct { /* tracepoint */
 178			/* for tp_event->class */
 179			struct list_head	tp_list;
 180		};
 181		struct { /* amd_power */
 182			u64	pwr_acc;
 183			u64	ptsc;
 184		};
 185#ifdef CONFIG_HAVE_HW_BREAKPOINT
 186		struct { /* breakpoint */
 187			/*
 188			 * Crufty hack to avoid the chicken and egg
 189			 * problem hw_breakpoint has with context
 190			 * creation and event initalization.
 191			 */
 192			struct arch_hw_breakpoint	info;
 193			struct rhlist_head		bp_list;
 194		};
 195#endif
 196		struct { /* amd_iommu */
 197			u8	iommu_bank;
 198			u8	iommu_cntr;
 199			u16	padding;
 200			u64	conf;
 201			u64	conf1;
 202		};
 203	};
 204	/*
 205	 * If the event is a per task event, this will point to the task in
 206	 * question. See the comment in perf_event_alloc().
 207	 */
 208	struct task_struct		*target;
 209
 210	/*
 211	 * PMU would store hardware filter configuration
 212	 * here.
 213	 */
 214	void				*addr_filters;
 215
 216	/* Last sync'ed generation of filters */
 217	unsigned long			addr_filters_gen;
 218
 219/*
 220 * hw_perf_event::state flags; used to track the PERF_EF_* state.
 221 */
 222
 223/* the counter is stopped */
 224#define PERF_HES_STOPPED		0x01
 225
 226/* event->count up-to-date */
 227#define PERF_HES_UPTODATE		0x02
 228
 229#define PERF_HES_ARCH			0x04
 230
 231	int				state;
 232
 233	/*
 234	 * The last observed hardware counter value, updated with a
 235	 * local64_cmpxchg() such that pmu::read() can be called nested.
 236	 */
 237	local64_t			prev_count;
 238
 239	/*
 240	 * The period to start the next sample with.
 241	 */
 242	u64				sample_period;
 243
 244	union {
 245		struct { /* Sampling */
 246			/*
 247			 * The period we started this sample with.
 248			 */
 249			u64				last_period;
 250
 251			/*
 252			 * However much is left of the current period;
 253			 * note that this is a full 64bit value and
 254			 * allows for generation of periods longer
 255			 * than hardware might allow.
 256			 */
 257			local64_t			period_left;
 258		};
 259		struct { /* Topdown events counting for context switch */
 260			u64				saved_metric;
 261			u64				saved_slots;
 262		};
 263	};
 264
 265	/*
 266	 * State for throttling the event, see __perf_event_overflow() and
 267	 * perf_adjust_freq_unthr_context().
 268	 */
 269	u64                             interrupts_seq;
 270	u64				interrupts;
 271
 272	/*
 273	 * State for freq target events, see __perf_event_overflow() and
 274	 * perf_adjust_freq_unthr_context().
 275	 */
 276	u64				freq_time_stamp;
 277	u64				freq_count_stamp;
 278#endif /* CONFIG_PERF_EVENTS */
 279};
 280
 281struct perf_event;
 282struct perf_event_pmu_context;
 283
 284/*
 285 * Common implementation detail of pmu::{start,commit,cancel}_txn
 286 */
 287
 288/* txn to add/schedule event on PMU */
 289#define PERF_PMU_TXN_ADD		0x1
 290
 291/* txn to read event group from PMU */
 292#define PERF_PMU_TXN_READ		0x2
 293
 294/**
 295 * pmu::capabilities flags
 296 */
 297#define PERF_PMU_CAP_NO_INTERRUPT	0x0001
 298#define PERF_PMU_CAP_NO_NMI		0x0002
 299#define PERF_PMU_CAP_AUX_NO_SG		0x0004
 300#define PERF_PMU_CAP_EXTENDED_REGS	0x0008
 301#define PERF_PMU_CAP_EXCLUSIVE		0x0010
 302#define PERF_PMU_CAP_ITRACE		0x0020
 303#define PERF_PMU_CAP_NO_EXCLUDE		0x0040
 304#define PERF_PMU_CAP_AUX_OUTPUT		0x0080
 305#define PERF_PMU_CAP_EXTENDED_HW_TYPE	0x0100
 306#define PERF_PMU_CAP_AUX_PAUSE		0x0200
 307#define PERF_PMU_CAP_AUX_PREFER_LARGE	0x0400
 308
 309/**
 310 * pmu::scope
 311 */
 312enum perf_pmu_scope {
 313	PERF_PMU_SCOPE_NONE = 0,
 314	PERF_PMU_SCOPE_CORE,
 315	PERF_PMU_SCOPE_DIE,
 316	PERF_PMU_SCOPE_CLUSTER,
 317	PERF_PMU_SCOPE_PKG,
 318	PERF_PMU_SCOPE_SYS_WIDE,
 319	PERF_PMU_MAX_SCOPE,
 320};
 321
 322struct perf_output_handle;
 323
 324#define PMU_NULL_DEV	((void *)(~0UL))
 325
 326/**
 327 * struct pmu - generic performance monitoring unit
 328 */
 329struct pmu {
 330	struct list_head		entry;
 331
 332	spinlock_t			events_lock;
 333	struct list_head		events;
 334
 335	struct module			*module;
 336	struct device			*dev;
 337	struct device			*parent;
 338	const struct attribute_group	**attr_groups;
 339	const struct attribute_group	**attr_update;
 340	const char			*name;
 341	int				type;
 342
 343	/*
 344	 * various common per-pmu feature flags
 345	 */
 346	int				capabilities;
 347
 348	/*
 349	 * PMU scope
 350	 */
 351	unsigned int			scope;
 352
 353	struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
 354	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
 355	int				task_ctx_nr;
 356	int				hrtimer_interval_ms;
 357
 358	/* number of address filters this PMU can do */
 359	unsigned int			nr_addr_filters;
 360
 361	/*
 362	 * Fully disable/enable this PMU, can be used to protect from the PMI
 363	 * as well as for lazy/batch writing of the MSRs.
 364	 */
 365	void (*pmu_enable)		(struct pmu *pmu); /* optional */
 366	void (*pmu_disable)		(struct pmu *pmu); /* optional */
 367
 368	/*
 369	 * Try and initialize the event for this PMU.
 370	 *
 371	 * Returns:
 372	 *  -ENOENT	-- @event is not for this PMU
 373	 *
 374	 *  -ENODEV	-- @event is for this PMU but PMU not present
 375	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
 376	 *  -EINVAL	-- @event is for this PMU but @event is not valid
 377	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
 378	 *  -EACCES	-- @event is for this PMU, @event is valid, but no privileges
 379	 *
 380	 *  0		-- @event is for this PMU and valid
 381	 *
 382	 * Other error return values are allowed.
 383	 */
 384	int (*event_init)		(struct perf_event *event);
 385
 386	/*
 387	 * Notification that the event was mapped or unmapped.  Called
 388	 * in the context of the mapping task.
 389	 */
 390	void (*event_mapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
 391	void (*event_unmapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
 392
 393	/*
 394	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
 395	 * matching hw_perf_event::state flags.
 396	 */
 397
 398/* start the counter when adding    */
 399#define PERF_EF_START			0x01
 400
 401/* reload the counter when starting */
 402#define PERF_EF_RELOAD			0x02
 403
 404/* update the counter when stopping */
 405#define PERF_EF_UPDATE			0x04
 406
 407/* AUX area event, pause tracing */
 408#define PERF_EF_PAUSE			0x08
 409
 410/* AUX area event, resume tracing */
 411#define PERF_EF_RESUME			0x10
 412
 413	/*
 414	 * Adds/Removes a counter to/from the PMU, can be done inside a
 415	 * transaction, see the ->*_txn() methods.
 416	 *
 417	 * The add/del callbacks will reserve all hardware resources required
 418	 * to service the event, this includes any counter constraint
 419	 * scheduling etc.
 420	 *
 421	 * Called with IRQs disabled and the PMU disabled on the CPU the event
 422	 * is on.
 423	 *
 424	 * ->add() called without PERF_EF_START should result in the same state
 425	 *  as ->add() followed by ->stop().
 426	 *
 427	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
 428	 *  ->stop() that must deal with already being stopped without
 429	 *  PERF_EF_UPDATE.
 430	 */
 431	int  (*add)			(struct perf_event *event, int flags);
 432	void (*del)			(struct perf_event *event, int flags);
 433
 434	/*
 435	 * Starts/Stops a counter present on the PMU.
 436	 *
 437	 * The PMI handler should stop the counter when perf_event_overflow()
 438	 * returns !0. ->start() will be used to continue.
 439	 *
 440	 * Also used to change the sample period.
 441	 *
 442	 * Called with IRQs disabled and the PMU disabled on the CPU the event
 443	 * is on -- will be called from NMI context with the PMU generates
 444	 * NMIs.
 445	 *
 446	 * ->stop() with PERF_EF_UPDATE will read the counter and update
 447	 *  period/count values like ->read() would.
 448	 *
 449	 * ->start() with PERF_EF_RELOAD will reprogram the counter
 450	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
 451	 *
 452	 * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
 453	 * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
 454	 * PERF_EF_RESUME.
 455	 *
 456	 * ->start() with PERF_EF_RESUME will start as simply as possible but
 457	 * only if the counter is not otherwise stopped. Will not overlap
 458	 * another ->start() with PERF_EF_RESUME nor ->stop() with
 459	 * PERF_EF_PAUSE.
 460	 *
 461	 * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
 462	 * ->stop()/->start() invocations, just not itself.
 463	 */
 464	void (*start)			(struct perf_event *event, int flags);
 465	void (*stop)			(struct perf_event *event, int flags);
 466
 467	/*
 468	 * Updates the counter value of the event.
 469	 *
 470	 * For sampling capable PMUs this will also update the software period
 471	 * hw_perf_event::period_left field.
 472	 */
 473	void (*read)			(struct perf_event *event);
 474
 475	/*
 476	 * Group events scheduling is treated as a transaction, add
 477	 * group events as a whole and perform one schedulability test.
 478	 * If the test fails, roll back the whole group
 479	 *
 480	 * Start the transaction, after this ->add() doesn't need to
 481	 * do schedulability tests.
 482	 *
 483	 * Optional.
 484	 */
 485	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
 486	/*
 487	 * If ->start_txn() disabled the ->add() schedulability test
 488	 * then ->commit_txn() is required to perform one. On success
 489	 * the transaction is closed. On error the transaction is kept
 490	 * open until ->cancel_txn() is called.
 491	 *
 492	 * Optional.
 493	 */
 494	int  (*commit_txn)		(struct pmu *pmu);
 495	/*
 496	 * Will cancel the transaction, assumes ->del() is called
 497	 * for each successful ->add() during the transaction.
 498	 *
 499	 * Optional.
 500	 */
 501	void (*cancel_txn)		(struct pmu *pmu);
 502
 503	/*
 504	 * Will return the value for perf_event_mmap_page::index for this event,
 505	 * if no implementation is provided it will default to 0 (see
 506	 * perf_event_idx_default).
 507	 */
 508	int (*event_idx)		(struct perf_event *event); /*optional */
 509
 510	/*
 511	 * context-switches callback
 512	 */
 513	void (*sched_task)		(struct perf_event_pmu_context *pmu_ctx,
 514					 struct task_struct *task, bool sched_in);
 515
 516	/*
 517	 * Kmem cache of PMU specific data
 518	 */
 519	struct kmem_cache		*task_ctx_cache;
 520
 521	/*
 522	 * Set up pmu-private data structures for an AUX area
 523	 */
 524	void *(*setup_aux)		(struct perf_event *event, void **pages,
 525					 int nr_pages, bool overwrite);
 526					/* optional */
 527
 528	/*
 529	 * Free pmu-private AUX data structures
 530	 */
 531	void (*free_aux)		(void *aux); /* optional */
 532
 533	/*
 534	 * Take a snapshot of the AUX buffer without touching the event
 535	 * state, so that preempting ->start()/->stop() callbacks does
 536	 * not interfere with their logic. Called in PMI context.
 537	 *
 538	 * Returns the size of AUX data copied to the output handle.
 539	 *
 540	 * Optional.
 541	 */
 542	long (*snapshot_aux)		(struct perf_event *event,
 543					 struct perf_output_handle *handle,
 544					 unsigned long size);
 545
 546	/*
 547	 * Validate address range filters: make sure the HW supports the
 548	 * requested configuration and number of filters; return 0 if the
 549	 * supplied filters are valid, -errno otherwise.
 550	 *
 551	 * Runs in the context of the ioctl()ing process and is not serialized
 552	 * with the rest of the PMU callbacks.
 553	 */
 554	int (*addr_filters_validate)	(struct list_head *filters);
 555					/* optional */
 556
 557	/*
 558	 * Synchronize address range filter configuration:
 559	 * translate hw-agnostic filters into hardware configuration in
 560	 * event::hw::addr_filters.
 561	 *
 562	 * Runs as a part of filter sync sequence that is done in ->start()
 563	 * callback by calling perf_event_addr_filters_sync().
 564	 *
 565	 * May (and should) traverse event::addr_filters::list, for which its
 566	 * caller provides necessary serialization.
 567	 */
 568	void (*addr_filters_sync)	(struct perf_event *event);
 569					/* optional */
 570
 571	/*
 572	 * Check if event can be used for aux_output purposes for
 573	 * events of this PMU.
 574	 *
 575	 * Runs from perf_event_open(). Should return 0 for "no match"
 576	 * or non-zero for "match".
 577	 */
 578	int (*aux_output_match)		(struct perf_event *event);
 579					/* optional */
 580
 581	/*
 582	 * Skip programming this PMU on the given CPU. Typically needed for
 583	 * big.LITTLE things.
 584	 */
 585	bool (*filter)			(struct pmu *pmu, int cpu); /* optional */
 586
 587	/*
 588	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 589	 */
 590	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
 591};
 592
 593enum perf_addr_filter_action_t {
 594	PERF_ADDR_FILTER_ACTION_STOP = 0,
 595	PERF_ADDR_FILTER_ACTION_START,
 596	PERF_ADDR_FILTER_ACTION_FILTER,
 597};
 598
 599/**
 600 * struct perf_addr_filter - address range filter definition
 601 * @entry:	event's filter list linkage
 602 * @path:	object file's path for file-based filters
 603 * @offset:	filter range offset
 604 * @size:	filter range size (size==0 means single address trigger)
 605 * @action:	filter/start/stop
 606 *
 607 * This is a hardware-agnostic filter configuration as specified by the user.
 608 */
 609struct perf_addr_filter {
 610	struct list_head		entry;
 611	struct path			path;
 612	unsigned long			offset;
 613	unsigned long			size;
 614	enum perf_addr_filter_action_t	action;
 615};
 616
 617/**
 618 * struct perf_addr_filters_head - container for address range filters
 619 * @list:	list of filters for this event
 620 * @lock:	spinlock that serializes accesses to the @list and event's
 621 *		(and its children's) filter generations.
 622 * @nr_file_filters:	number of file-based filters
 623 *
 624 * A child event will use parent's @list (and therefore @lock), so they are
 625 * bundled together; see perf_event_addr_filters().
 626 */
 627struct perf_addr_filters_head {
 628	struct list_head		list;
 629	raw_spinlock_t			lock;
 630	unsigned int			nr_file_filters;
 631};
 632
 633struct perf_addr_filter_range {
 634	unsigned long			start;
 635	unsigned long			size;
 636};
 637
 638/*
 639 * The normal states are:
 640 *
 641 *            ACTIVE    --.
 642 *               ^        |
 643 *               |        |
 644 *       sched_{in,out}() |
 645 *               |        |
 646 *               v        |
 647 *      ,---> INACTIVE  --+ <-.
 648 *      |                 |   |
 649 *      |                {dis,en}able()
 650 *   sched_in()           |   |
 651 *      |       OFF    <--' --+
 652 *      |                     |
 653 *      `--->  ERROR    ------'
 654 *
 655 * That is:
 656 *
 657 * sched_in:       INACTIVE          -> {ACTIVE,ERROR}
 658 * sched_out:      ACTIVE            -> INACTIVE
 659 * disable:        {ACTIVE,INACTIVE} -> OFF
 660 * enable:         {OFF,ERROR}       -> INACTIVE
 661 *
 662 * Where {OFF,ERROR} are disabled states.
 663 *
 664 * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
 665 * defunct events:
 666 *
 667 *  - EXIT means task that the even was assigned to died, but child events
 668 *    still live, and further children can still be created. But the event
 669 *    itself will never be active again. It can only transition to
 670 *    {REVOKED,DEAD};
 671 *
 672 *  - REVOKED means the PMU the event was associated with is gone; all
 673 *    functionality is stopped but the event is still alive. Can only
 674 *    transition to DEAD;
 675 *
 676 *  - DEAD event really is DYING tearing down state and freeing bits.
 677 *
 678 */
 679enum perf_event_state {
 680	PERF_EVENT_STATE_DEAD		= -5,
 681	PERF_EVENT_STATE_REVOKED	= -4, /* pmu gone, must not touch */
 682	PERF_EVENT_STATE_EXIT		= -3, /* task died, still inherit */
 683	PERF_EVENT_STATE_ERROR		= -2, /* scheduling error, can enable */
 684	PERF_EVENT_STATE_OFF		= -1,
 685	PERF_EVENT_STATE_INACTIVE	=  0,
 686	PERF_EVENT_STATE_ACTIVE		=  1,
 687};
 688
 689struct file;
 690struct perf_sample_data;
 691
 692typedef void (*perf_overflow_handler_t)(struct perf_event *,
 693					struct perf_sample_data *,
 694					struct pt_regs *regs);
 695
 696/*
 697 * Event capabilities. For event_caps and groups caps.
 698 *
 699 * PERF_EV_CAP_SOFTWARE: Is a software event.
 700 * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
 701 * from any CPU in the package where it is active.
 702 * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
 703 * cannot be a group leader. If an event with this flag is detached from the
 704 * group it is scheduled out and moved into an unrecoverable ERROR state.
 705 * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
 706 * PMU scope where it is active.
 707 */
 708#define PERF_EV_CAP_SOFTWARE		BIT(0)
 709#define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 710#define PERF_EV_CAP_SIBLING		BIT(2)
 711#define PERF_EV_CAP_READ_SCOPE		BIT(3)
 712
 713#define SWEVENT_HLIST_BITS		8
 714#define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
 715
 716struct swevent_hlist {
 717	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
 718	struct rcu_head			rcu_head;
 719};
 720
 721#define PERF_ATTACH_CONTEXT		0x0001
 722#define PERF_ATTACH_GROUP		0x0002
 723#define PERF_ATTACH_TASK		0x0004
 724#define PERF_ATTACH_TASK_DATA		0x0008
 725#define PERF_ATTACH_GLOBAL_DATA		0x0010
 726#define PERF_ATTACH_SCHED_CB		0x0020
 727#define PERF_ATTACH_CHILD		0x0040
 728#define PERF_ATTACH_EXCLUSIVE		0x0080
 729#define PERF_ATTACH_CALLCHAIN		0x0100
 730#define PERF_ATTACH_ITRACE		0x0200
 731
 732struct bpf_prog;
 733struct perf_cgroup;
 734struct perf_buffer;
 735
 736struct pmu_event_list {
 737	raw_spinlock_t			lock;
 738	struct list_head		list;
 739};
 740
 741/*
 742 * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
 743 * as such iteration must hold either lock. However, since ctx->lock is an IRQ
 744 * safe lock, and is only held by the CPU doing the modification, having IRQs
 745 * disabled is sufficient since it will hold-off the IPIs.
 746 */
 747#ifdef CONFIG_PROVE_LOCKING
 748# define lockdep_assert_event_ctx(event)			\
 749	WARN_ON_ONCE(__lockdep_enabled &&			\
 750		     (this_cpu_read(hardirqs_enabled) &&	\
 751		      lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
 752#else
 753# define lockdep_assert_event_ctx(event)
 754#endif
 755
 756#define for_each_sibling_event(sibling, event)			\
 757	lockdep_assert_event_ctx(event);			\
 758	if ((event)->group_leader == (event))			\
 759		list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
 760
 761/**
 762 * struct perf_event - performance event kernel representation:
 763 */
 764struct perf_event {
 765#ifdef CONFIG_PERF_EVENTS
 766	/*
 767	 * entry onto perf_event_context::event_list;
 768	 *   modifications require ctx->lock
 769	 *   RCU safe iterations.
 770	 */
 771	struct list_head		event_entry;
 772
 773	/*
 774	 * Locked for modification by both ctx->mutex and ctx->lock; holding
 775	 * either sufficies for read.
 776	 */
 777	struct list_head		sibling_list;
 778	struct list_head		active_list;
 779	/*
 780	 * Node on the pinned or flexible tree located at the event context;
 781	 */
 782	struct rb_node			group_node;
 783	u64				group_index;
 784	/*
 785	 * We need storage to track the entries in perf_pmu_migrate_context; we
 786	 * cannot use the event_entry because of RCU and we want to keep the
 787	 * group in tact which avoids us using the other two entries.
 788	 */
 789	struct list_head		migrate_entry;
 790
 791	struct hlist_node		hlist_entry;
 792	struct list_head		active_entry;
 793	int				nr_siblings;
 794
 795	/* Not serialized. Only written during event initialization. */
 796	int				event_caps;
 797	/* The cumulative AND of all event_caps for events in this group. */
 798	int				group_caps;
 799
 800	unsigned int			group_generation;
 801	struct perf_event		*group_leader;
 802	/*
 803	 * event->pmu will always point to pmu in which this event belongs.
 804	 * Whereas event->pmu_ctx->pmu may point to other pmu when group of
 805	 * different pmu events is created.
 806	 */
 807	struct pmu			*pmu;
 808	void				*pmu_private;
 809
 810	enum perf_event_state		state;
 811	unsigned int			attach_state;
 812	local64_t			count;
 813	atomic64_t			child_count;
 814
 815	/*
 816	 * These are the total time in nanoseconds that the event
 817	 * has been enabled (i.e. eligible to run, and the task has
 818	 * been scheduled in, if this is a per-task event)
 819	 * and running (scheduled onto the CPU), respectively.
 820	 */
 821	u64				total_time_enabled;
 822	u64				total_time_running;
 823	u64				tstamp;
 824
 825	struct perf_event_attr		attr;
 826	u16				header_size;
 827	u16				id_header_size;
 828	u16				read_size;
 829	struct hw_perf_event		hw;
 830
 831	struct perf_event_context	*ctx;
 832	/*
 833	 * event->pmu_ctx points to perf_event_pmu_context in which the event
 834	 * is added. This pmu_ctx can be of other pmu for sw event when that
 835	 * sw event is part of a group which also contains non-sw events.
 836	 */
 837	struct perf_event_pmu_context	*pmu_ctx;
 838	atomic_long_t			refcount;
 839
 840	/*
 841	 * These accumulate total time (in nanoseconds) that children
 842	 * events have been enabled and running, respectively.
 843	 */
 844	atomic64_t			child_total_time_enabled;
 845	atomic64_t			child_total_time_running;
 846
 847	/*
 848	 * Protect attach/detach and child_list:
 849	 */
 850	struct mutex			child_mutex;
 851	struct list_head		child_list;
 852	struct perf_event		*parent;
 853
 854	int				oncpu;
 855	int				cpu;
 856
 857	struct list_head		owner_entry;
 858	struct task_struct		*owner;
 859
 860	/* mmap bits */
 861	struct mutex			mmap_mutex;
 862	refcount_t			mmap_count;
 863
 864	struct perf_buffer		*rb;
 865	struct list_head		rb_entry;
 866	unsigned long			rcu_batches;
 867	int				rcu_pending;
 868
 869	/* poll related */
 870	wait_queue_head_t		waitq;
 871	struct fasync_struct		*fasync;
 872
 873	/* delayed work for NMIs and such */
 874	unsigned int			pending_wakeup;
 875	unsigned int			pending_kill;
 876	unsigned int			pending_disable;
 877	unsigned long			pending_addr;	/* SIGTRAP */
 878	struct irq_work			pending_irq;
 879	struct irq_work			pending_disable_irq;
 880	struct callback_head		pending_task;
 881	unsigned int			pending_work;
 882
 883	atomic_t			event_limit;
 884
 885	/* address range filters */
 886	struct perf_addr_filters_head	addr_filters;
 887	/* vma address array for file-based filders */
 888	struct perf_addr_filter_range	*addr_filter_ranges;
 889	unsigned long			addr_filters_gen;
 890
 891	/* for aux_output events */
 892	struct perf_event		*aux_event;
 893
 894	void (*destroy)(struct perf_event *);
 895	struct rcu_head			rcu_head;
 896
 897	struct pid_namespace		*ns;
 898	u64				id;
 899
 900	atomic64_t			lost_samples;
 901
 902	u64				(*clock)(void);
 903	perf_overflow_handler_t		overflow_handler;
 904	void				*overflow_handler_context;
 905	struct bpf_prog			*prog;
 906	u64				bpf_cookie;
 907
 908#ifdef CONFIG_EVENT_TRACING
 909	struct trace_event_call		*tp_event;
 910	struct event_filter		*filter;
 911# ifdef CONFIG_FUNCTION_TRACER
 912	struct ftrace_ops               ftrace_ops;
 913# endif
 914#endif
 915
 916#ifdef CONFIG_CGROUP_PERF
 917	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
 918#endif
 919
 920#ifdef CONFIG_SECURITY
 921	void *security;
 922#endif
 923	struct list_head		sb_list;
 924	struct list_head		pmu_list;
 925
 926	/*
 927	 * Certain events gets forwarded to another pmu internally by over-
 928	 * writing kernel copy of event->attr.type without user being aware
 929	 * of it. event->orig_type contains original 'type' requested by
 930	 * user.
 931	 */
 932	u32				orig_type;
 933#endif /* CONFIG_PERF_EVENTS */
 934};
 935
 936/*
 937 *           ,-----------------------[1:n]------------------------.
 938 *           V                                                    V
 939 * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
 940 *                                        |                       |
 941 *                                        `--[n:1]-> pmu <-[1:n]--'
 942 *
 943 *
 944 * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
 945 * (similar to perf_event_context). Locking is as if it were a member of
 946 * perf_event_context; specifically:
 947 *
 948 *   modification, both: ctx->mutex && ctx->lock
 949 *   reading, either:    ctx->mutex || ctx->lock
 950 *
 951 * There is one exception to this; namely put_pmu_ctx() isn't always called
 952 * with ctx->mutex held; this means that as long as we can guarantee the epc
 953 * has events the above rules hold.
 954 *
 955 * Specificially, sys_perf_event_open()'s group_leader case depends on
 956 * ctx->mutex pinning the configuration. Since we hold a reference on
 957 * group_leader (through the filedesc) it can't go away, therefore it's
 958 * associated pmu_ctx must exist and cannot change due to ctx->mutex.
 959 *
 960 * perf_event holds a refcount on perf_event_context
 961 * perf_event holds a refcount on perf_event_pmu_context
 962 */
 963struct perf_event_pmu_context {
 964	struct pmu			*pmu;
 965	struct perf_event_context       *ctx;
 966
 967	struct list_head		pmu_ctx_entry;
 968
 969	struct list_head		pinned_active;
 970	struct list_head		flexible_active;
 971
 972	/* Used to identify the per-cpu perf_event_pmu_context */
 973	unsigned int			embedded : 1;
 974
 975	unsigned int			nr_events;
 976	unsigned int			nr_cgroups;
 977	unsigned int			nr_freq;
 978
 979	atomic_t			refcount; /* event <-> epc */
 980	struct rcu_head			rcu_head;
 981
 982	/*
 983	 * Set when one or more (plausibly active) event can't be scheduled
 984	 * due to pmu overcommit or pmu constraints, except tolerant to
 985	 * events not necessary to be active due to scheduling constraints,
 986	 * such as cgroups.
 987	 */
 988	int				rotate_necessary;
 989};
 990
 991static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
 992{
 993	return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
 994}
 995
 996struct perf_event_groups {
 997	struct rb_root			tree;
 998	u64				index;
 999};
1000
1001
1002/**
1003 * struct perf_event_context - event context structure
1004 *
1005 * Used as a container for task events and CPU events as well:
1006 */
1007struct perf_event_context {
1008	/*
1009	 * Protect the states of the events in the list,
1010	 * nr_active, and the list:
1011	 */
1012	raw_spinlock_t			lock;
1013	/*
1014	 * Protect the list of events.  Locking either mutex or lock
1015	 * is sufficient to ensure the list doesn't change; to change
1016	 * the list you need to lock both the mutex and the spinlock.
1017	 */
1018	struct mutex			mutex;
1019
1020	struct list_head		pmu_ctx_list;
1021	struct perf_event_groups	pinned_groups;
1022	struct perf_event_groups	flexible_groups;
1023	struct list_head		event_list;
1024
1025	int				nr_events;
1026	int				nr_user;
1027	int				is_active;
1028
1029	int				nr_stat;
1030	int				nr_freq;
1031	int				rotate_disable;
1032
1033	refcount_t			refcount; /* event <-> ctx */
1034	struct task_struct		*task;
1035
1036	/*
1037	 * Context clock, runs when context enabled.
1038	 */
1039	u64				time;
1040	u64				timestamp;
1041	u64				timeoffset;
1042
1043	/*
1044	 * These fields let us detect when two contexts have both
1045	 * been cloned (inherited) from a common ancestor.
1046	 */
1047	struct perf_event_context	*parent_ctx;
1048	u64				parent_gen;
1049	u64				generation;
1050	int				pin_count;
1051#ifdef CONFIG_CGROUP_PERF
1052	int				nr_cgroups;	 /* cgroup evts */
1053#endif
1054	struct rcu_head			rcu_head;
1055
1056	/*
1057	 * The count of events for which using the switch-out fast path
1058	 * should be avoided.
1059	 *
1060	 * Sum (event->pending_work + events with
1061	 *    (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
1062	 *
1063	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
1064	 * that until the signal is delivered.
1065	 */
1066	local_t				nr_no_switch_fast;
1067};
1068
1069/**
1070 * struct perf_ctx_data - PMU specific data for a task
1071 * @rcu_head:  To avoid the race on free PMU specific data
1072 * @refcount:  To track users
1073 * @global:    To track system-wide users
1074 * @ctx_cache: Kmem cache of PMU specific data
1075 * @data:      PMU specific data
1076 *
1077 * Currently, the struct is only used in Intel LBR call stack mode to
1078 * save/restore the call stack of a task on context switches.
1079 *
1080 * The rcu_head is used to prevent the race on free the data.
1081 * The data only be allocated when Intel LBR call stack mode is enabled.
1082 * The data will be freed when the mode is disabled.
1083 * The content of the data will only be accessed in context switch, which
1084 * should be protected by rcu_read_lock().
1085 *
1086 * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
1087 * is used to allocate the PMU specific data. The ctx_cache is to track
1088 * the Kmem cache.
1089 *
1090 * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
1091 * When system-wide Intel LBR call stack mode is enabled, a buffer with
1092 * constant size will be allocated for each task.
1093 * Also, system memory consumption can further grow when the size of
1094 * struct perf_ctx_data enlarges.
1095 */
1096struct perf_ctx_data {
1097	struct rcu_head			rcu_head;
1098	refcount_t			refcount;
1099	int				global;
1100	struct kmem_cache		*ctx_cache;
1101	void				*data;
1102};
1103
1104struct perf_cpu_pmu_context {
1105	struct perf_event_pmu_context	epc;
1106	struct perf_event_pmu_context	*task_epc;
1107
1108	struct list_head		sched_cb_entry;
1109	int				sched_cb_usage;
1110
1111	int				active_oncpu;
1112	int				exclusive;
1113	int				pmu_disable_count;
1114
1115	raw_spinlock_t			hrtimer_lock;
1116	struct hrtimer			hrtimer;
1117	ktime_t				hrtimer_interval;
1118	unsigned int			hrtimer_active;
1119};
1120
1121/**
1122 * struct perf_event_cpu_context - per cpu event context structure
1123 */
1124struct perf_cpu_context {
1125	struct perf_event_context	ctx;
1126	struct perf_event_context	*task_ctx;
1127	int				online;
1128
1129#ifdef CONFIG_CGROUP_PERF
1130	struct perf_cgroup		*cgrp;
1131#endif
1132
1133	/*
1134	 * Per-CPU storage for iterators used in visit_groups_merge. The default
1135	 * storage is of size 2 to hold the CPU and any CPU event iterators.
1136	 */
1137	int				heap_size;
1138	struct perf_event		**heap;
1139	struct perf_event		*heap_default[2];
1140};
1141
1142struct perf_output_handle {
1143	struct perf_event		*event;
1144	struct perf_buffer		*rb;
1145	unsigned long			wakeup;
1146	unsigned long			size;
1147	union {
1148		u64			flags;		/* perf_output*() */
1149		u64			aux_flags;	/* perf_aux_output*() */
1150		struct {
1151			u64		skip_read : 1;
1152		};
1153	};
1154	union {
1155		void			*addr;
1156		unsigned long		head;
1157	};
1158	int				page;
1159};
1160
1161struct bpf_perf_event_data_kern {
1162	bpf_user_pt_regs_t *regs;
1163	struct perf_sample_data *data;
1164	struct perf_event *event;
1165};
1166
1167#ifdef CONFIG_CGROUP_PERF
1168
1169/*
1170 * perf_cgroup_info keeps track of time_enabled for a cgroup.
1171 * This is a per-cpu dynamically allocated data structure.
1172 */
1173struct perf_cgroup_info {
1174	u64				time;
1175	u64				timestamp;
1176	u64				timeoffset;
1177	int				active;
1178};
1179
1180struct perf_cgroup {
1181	struct cgroup_subsys_state	css;
1182	struct perf_cgroup_info	__percpu *info;
1183};
1184
1185/*
1186 * Must ensure cgroup is pinned (css_get) before calling
1187 * this function. In other words, we cannot call this function
1188 * if there is no cgroup event for the current CPU context.
1189 */
1190static inline struct perf_cgroup *
1191perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
1192{
1193	return container_of(task_css_check(task, perf_event_cgrp_id,
1194					   ctx ? lockdep_is_held(&ctx->lock)
1195					       : true),
1196			    struct perf_cgroup, css);
1197}
1198#endif /* CONFIG_CGROUP_PERF */
1199
1200#ifdef CONFIG_PERF_EVENTS
1201
1202extern struct perf_event_context *perf_cpu_task_ctx(void);
1203
1204extern void *perf_aux_output_begin(struct perf_output_handle *handle,
1205				   struct perf_event *event);
1206extern void perf_aux_output_end(struct perf_output_handle *handle,
1207				unsigned long size);
1208extern int perf_aux_output_skip(struct perf_output_handle *handle,
1209				unsigned long size);
1210extern void *perf_get_aux(struct perf_output_handle *handle);
1211extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
1212extern void perf_event_itrace_started(struct perf_event *event);
1213
1214extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
1215extern int perf_pmu_unregister(struct pmu *pmu);
1216
1217extern void __perf_event_task_sched_in(struct task_struct *prev,
1218				       struct task_struct *task);
1219extern void __perf_event_task_sched_out(struct task_struct *prev,
1220					struct task_struct *next);
1221extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
1222extern void perf_event_exit_task(struct task_struct *child);
1223extern void perf_event_free_task(struct task_struct *task);
1224extern void perf_event_delayed_put(struct task_struct *task);
1225extern struct file *perf_event_get(unsigned int fd);
1226extern const struct perf_event *perf_get_event(struct file *file);
1227extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
1228extern void perf_event_print_debug(void);
1229extern void perf_pmu_disable(struct pmu *pmu);
1230extern void perf_pmu_enable(struct pmu *pmu);
1231extern void perf_sched_cb_dec(struct pmu *pmu);
1232extern void perf_sched_cb_inc(struct pmu *pmu);
1233extern int perf_event_task_disable(void);
1234extern int perf_event_task_enable(void);
1235
1236extern void perf_pmu_resched(struct pmu *pmu);
1237
1238extern int perf_event_refresh(struct perf_event *event, int refresh);
1239extern void perf_event_update_userpage(struct perf_event *event);
1240extern int perf_event_release_kernel(struct perf_event *event);
1241
1242extern struct perf_event *
1243perf_event_create_kernel_counter(struct perf_event_attr *attr,
1244				 int cpu,
1245				 struct task_struct *task,
1246				 perf_overflow_handler_t callback,
1247				 void *context);
1248
1249extern void perf_pmu_migrate_context(struct pmu *pmu,
1250				     int src_cpu, int dst_cpu);
1251extern int perf_event_read_local(struct perf_event *event, u64 *value,
1252				 u64 *enabled, u64 *running);
1253extern u64 perf_event_read_value(struct perf_event *event,
1254				 u64 *enabled, u64 *running);
1255
1256extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
1257
1258static inline bool branch_sample_no_flags(const struct perf_event *event)
1259{
1260	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
1261}
1262
1263static inline bool branch_sample_no_cycles(const struct perf_event *event)
1264{
1265	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
1266}
1267
1268static inline bool branch_sample_type(const struct perf_event *event)
1269{
1270	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
1271}
1272
1273static inline bool branch_sample_hw_index(const struct perf_event *event)
1274{
1275	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
1276}
1277
1278static inline bool branch_sample_priv(const struct perf_event *event)
1279{
1280	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
1281}
1282
1283static inline bool branch_sample_counters(const struct perf_event *event)
1284{
1285	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1286}
1287
1288static inline bool branch_sample_call_stack(const struct perf_event *event)
1289{
1290	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
1291}
1292
1293struct perf_sample_data {
1294	/*
1295	 * Fields set by perf_sample_data_init() unconditionally,
1296	 * group so as to minimize the cachelines touched.
1297	 */
1298	u64				sample_flags;
1299	u64				period;
1300	u64				dyn_size;
1301
1302	/*
1303	 * Fields commonly set by __perf_event_header__init_id(),
1304	 * group so as to minimize the cachelines touched.
1305	 */
1306	u64				type;
1307	struct {
1308		u32	pid;
1309		u32	tid;
1310	}				tid_entry;
1311	u64				time;
1312	u64				id;
1313	struct {
1314		u32	cpu;
1315		u32	reserved;
1316	}				cpu_entry;
1317
1318	/*
1319	 * The other fields, optionally {set,used} by
1320	 * perf_{prepare,output}_sample().
1321	 */
1322	u64				ip;
1323	struct perf_callchain_entry	*callchain;
1324	struct perf_raw_record		*raw;
1325	struct perf_branch_stack	*br_stack;
1326	u64				*br_stack_cntr;
1327	union perf_sample_weight	weight;
1328	union  perf_mem_data_src	data_src;
1329	u64				txn;
1330
1331	struct perf_regs		regs_user;
1332	struct perf_regs		regs_intr;
1333	u64				stack_user_size;
1334
1335	u64				stream_id;
1336	u64				cgroup;
1337	u64				addr;
1338	u64				phys_addr;
1339	u64				data_page_size;
1340	u64				code_page_size;
1341	u64				aux_size;
1342} ____cacheline_aligned;
1343
1344/* default value for data source */
1345#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
1346		    PERF_MEM_S(LVL, NA)   |\
1347		    PERF_MEM_S(SNOOP, NA) |\
1348		    PERF_MEM_S(LOCK, NA)  |\
1349		    PERF_MEM_S(TLB, NA)   |\
1350		    PERF_MEM_S(LVLNUM, NA))
1351
1352static inline void perf_sample_data_init(struct perf_sample_data *data,
1353					 u64 addr, u64 period)
1354{
1355	/* remaining struct members initialized in perf_prepare_sample() */
1356	data->sample_flags = PERF_SAMPLE_PERIOD;
1357	data->period = period;
1358	data->dyn_size = 0;
1359
1360	if (addr) {
1361		data->addr = addr;
1362		data->sample_flags |= PERF_SAMPLE_ADDR;
1363	}
1364}
1365
1366static inline void perf_sample_save_callchain(struct perf_sample_data *data,
1367					      struct perf_event *event,
1368					      struct pt_regs *regs)
1369{
1370	int size = 1;
1371
1372	if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
1373		return;
1374	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
1375		return;
1376
1377	data->callchain = perf_callchain(event, regs);
1378	size += data->callchain->nr;
1379
1380	data->dyn_size += size * sizeof(u64);
1381	data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
1382}
1383
1384static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
1385					     struct perf_event *event,
1386					     struct perf_raw_record *raw)
1387{
1388	struct perf_raw_frag *frag = &raw->frag;
1389	u32 sum = 0;
1390	int size;
1391
1392	if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
1393		return;
1394	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
1395		return;
1396
1397	do {
1398		sum += frag->size;
1399		if (perf_raw_frag_last(frag))
1400			break;
1401		frag = frag->next;
1402	} while (1);
1403
1404	size = round_up(sum + sizeof(u32), sizeof(u64));
1405	raw->size = size - sizeof(u32);
1406	frag->pad = raw->size - sum;
1407
1408	data->raw = raw;
1409	data->dyn_size += size;
1410	data->sample_flags |= PERF_SAMPLE_RAW;
1411}
1412
1413static inline bool has_branch_stack(struct perf_event *event)
1414{
1415	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1416}
1417
1418static inline void perf_sample_save_brstack(struct perf_sample_data *data,
1419					    struct perf_event *event,
1420					    struct perf_branch_stack *brs,
1421					    u64 *brs_cntr)
1422{
1423	int size = sizeof(u64); /* nr */
1424
1425	if (!has_branch_stack(event))
1426		return;
1427	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
1428		return;
1429
1430	if (branch_sample_hw_index(event))
1431		size += sizeof(u64);
1432
1433	brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
1434
1435	size += brs->nr * sizeof(struct perf_branch_entry);
1436
1437	/*
1438	 * The extension space for counters is appended after the
1439	 * struct perf_branch_stack. It is used to store the occurrences
1440	 * of events of each branch.
1441	 */
1442	if (brs_cntr)
1443		size += brs->nr * sizeof(u64);
1444
1445	data->br_stack = brs;
1446	data->br_stack_cntr = brs_cntr;
1447	data->dyn_size += size;
1448	data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
1449}
1450
1451static inline u32 perf_sample_data_size(struct perf_sample_data *data,
1452					struct perf_event *event)
1453{
1454	u32 size = sizeof(struct perf_event_header);
1455
1456	size += event->header_size + event->id_header_size;
1457	size += data->dyn_size;
1458
1459	return size;
1460}
1461
1462/*
1463 * Clear all bitfields in the perf_branch_entry.
1464 * The to and from fields are not cleared because they are
1465 * systematically modified by caller.
1466 */
1467static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
1468{
1469	br->mispred	= 0;
1470	br->predicted	= 0;
1471	br->in_tx	= 0;
1472	br->abort	= 0;
1473	br->cycles	= 0;
1474	br->type	= 0;
1475	br->spec	= PERF_BR_SPEC_NA;
1476	br->reserved	= 0;
1477}
1478
1479extern void perf_output_sample(struct perf_output_handle *handle,
1480			       struct perf_event_header *header,
1481			       struct perf_sample_data *data,
1482			       struct perf_event *event);
1483extern void perf_prepare_sample(struct perf_sample_data *data,
1484				struct perf_event *event,
1485				struct pt_regs *regs);
1486extern void perf_prepare_header(struct perf_event_header *header,
1487				struct perf_sample_data *data,
1488				struct perf_event *event,
1489				struct pt_regs *regs);
1490
1491extern int perf_event_overflow(struct perf_event *event,
1492				 struct perf_sample_data *data,
1493				 struct pt_regs *regs);
1494
1495extern void perf_event_output_forward(struct perf_event *event,
1496				     struct perf_sample_data *data,
1497				     struct pt_regs *regs);
1498extern void perf_event_output_backward(struct perf_event *event,
1499				       struct perf_sample_data *data,
1500				       struct pt_regs *regs);
1501extern int perf_event_output(struct perf_event *event,
1502			     struct perf_sample_data *data,
1503			     struct pt_regs *regs);
1504
1505static inline bool
1506is_default_overflow_handler(struct perf_event *event)
1507{
1508	perf_overflow_handler_t overflow_handler = event->overflow_handler;
1509
1510	if (likely(overflow_handler == perf_event_output_forward))
1511		return true;
1512	if (unlikely(overflow_handler == perf_event_output_backward))
1513		return true;
1514	return false;
1515}
1516
1517extern void
1518perf_event_header__init_id(struct perf_event_header *header,
1519			   struct perf_sample_data *data,
1520			   struct perf_event *event);
1521extern void
1522perf_event__output_id_sample(struct perf_event *event,
1523			     struct perf_output_handle *handle,
1524			     struct perf_sample_data *sample);
1525
1526extern void
1527perf_log_lost_samples(struct perf_event *event, u64 lost);
1528
1529static inline bool event_has_any_exclude_flag(struct perf_event *event)
1530{
1531	struct perf_event_attr *attr = &event->attr;
1532
1533	return attr->exclude_idle || attr->exclude_user ||
1534	       attr->exclude_kernel || attr->exclude_hv ||
1535	       attr->exclude_guest || attr->exclude_host;
1536}
1537
1538static inline bool is_sampling_event(struct perf_event *event)
1539{
1540	return event->attr.sample_period != 0;
1541}
1542
1543/*
1544 * Return 1 for a software event, 0 for a hardware event
1545 */
1546static inline int is_software_event(struct perf_event *event)
1547{
1548	return event->event_caps & PERF_EV_CAP_SOFTWARE;
1549}
1550
1551/*
1552 * Return 1 for event in sw context, 0 for event in hw context
1553 */
1554static inline int in_software_context(struct perf_event *event)
1555{
1556	return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
1557}
1558
1559static inline int is_exclusive_pmu(struct pmu *pmu)
1560{
1561	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
1562}
1563
1564extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1565
1566extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
1567extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1568
1569#ifndef perf_arch_fetch_caller_regs
1570static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1571#endif
1572
1573/*
1574 * When generating a perf sample in-line, instead of from an interrupt /
1575 * exception, we lack a pt_regs. This is typically used from software events
1576 * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
1577 *
1578 * We typically don't need a full set, but (for x86) do require:
1579 * - ip for PERF_SAMPLE_IP
1580 * - cs for user_mode() tests
1581 * - sp for PERF_SAMPLE_CALLCHAIN
1582 * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
1583 *
1584 * NOTE: assumes @regs is otherwise already 0 filled; this is important for
1585 * things like PERF_SAMPLE_REGS_INTR.
1586 */
1587static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1588{
1589	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1590}
1591
1592static __always_inline void
1593perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1594{
1595	if (static_key_false(&perf_swevent_enabled[event_id]))
1596		__perf_sw_event(event_id, nr, regs, addr);
1597}
1598
1599DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
1600
1601/*
1602 * 'Special' version for the scheduler, it hard assumes no recursion,
1603 * which is guaranteed by us not actually scheduling inside other swevents
1604 * because those disable preemption.
1605 */
1606static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
1607{
1608	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
1609
1610	perf_fetch_caller_regs(regs);
1611	___perf_sw_event(event_id, nr, regs, addr);
1612}
1613
1614extern struct static_key_false perf_sched_events;
1615
1616static __always_inline bool __perf_sw_enabled(int swevt)
1617{
1618	return static_key_false(&perf_swevent_enabled[swevt]);
1619}
1620
1621static inline void perf_event_task_migrate(struct task_struct *task)
1622{
1623	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
1624		task->sched_migrated = 1;
1625}
1626
1627static inline void perf_event_task_sched_in(struct task_struct *prev,
1628					    struct task_struct *task)
1629{
1630	if (static_branch_unlikely(&perf_sched_events))
1631		__perf_event_task_sched_in(prev, task);
1632
1633	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
1634	    task->sched_migrated) {
1635		__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1636		task->sched_migrated = 0;
1637	}
1638}
1639
1640static inline void perf_event_task_sched_out(struct task_struct *prev,
1641					     struct task_struct *next)
1642{
1643	if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
1644		__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
1645
1646#ifdef CONFIG_CGROUP_PERF
1647	if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
1648	    perf_cgroup_from_task(prev, NULL) !=
1649	    perf_cgroup_from_task(next, NULL))
1650		__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
1651#endif
1652
1653	if (static_branch_unlikely(&perf_sched_events))
1654		__perf_event_task_sched_out(prev, next);
1655}
1656
1657extern void perf_event_mmap(struct vm_area_struct *vma);
1658
1659extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1660			       bool unregister, const char *sym);
1661extern void perf_event_bpf_event(struct bpf_prog *prog,
1662				 enum perf_bpf_event_type type,
1663				 u16 flags);
1664
1665#define PERF_GUEST_ACTIVE		0x01
1666#define PERF_GUEST_USER			0x02
1667
1668struct perf_guest_info_callbacks {
1669	unsigned int			(*state)(void);
1670	unsigned long			(*get_ip)(void);
1671	unsigned int			(*handle_intel_pt_intr)(void);
1672};
1673
1674#ifdef CONFIG_GUEST_PERF_EVENTS
1675
1676extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1677
1678DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1679DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1680DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1681
1682static inline unsigned int perf_guest_state(void)
1683{
1684	return static_call(__perf_guest_state)();
1685}
1686
1687static inline unsigned long perf_guest_get_ip(void)
1688{
1689	return static_call(__perf_guest_get_ip)();
1690}
1691
1692static inline unsigned int perf_guest_handle_intel_pt_intr(void)
1693{
1694	return static_call(__perf_guest_handle_intel_pt_intr)();
1695}
1696
1697extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1698extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1699
1700#else /* !CONFIG_GUEST_PERF_EVENTS: */
1701
1702static inline unsigned int perf_guest_state(void)		 { return 0; }
1703static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
1704static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
1705
1706#endif /* !CONFIG_GUEST_PERF_EVENTS */
1707
1708extern void perf_event_exec(void);
1709extern void perf_event_comm(struct task_struct *tsk, bool exec);
1710extern void perf_event_namespaces(struct task_struct *tsk);
1711extern void perf_event_fork(struct task_struct *tsk);
1712extern void perf_event_text_poke(const void *addr,
1713				 const void *old_bytes, size_t old_len,
1714				 const void *new_bytes, size_t new_len);
1715
1716/* Callchains */
1717DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1718
1719extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1720extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1721extern struct perf_callchain_entry *
1722get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
1723		   u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie);
1724extern int get_callchain_buffers(int max_stack);
1725extern void put_callchain_buffers(void);
1726extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
1727extern void put_callchain_entry(int rctx);
1728
1729extern int sysctl_perf_event_max_stack;
1730extern int sysctl_perf_event_max_contexts_per_stack;
1731
1732static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1733{
1734	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1735		struct perf_callchain_entry *entry = ctx->entry;
1736
1737		entry->ip[entry->nr++] = ip;
1738		++ctx->contexts;
1739		return 0;
1740	} else {
1741		ctx->contexts_maxed = true;
1742		return -1; /* no more room, stop walking the stack */
1743	}
1744}
1745
1746static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1747{
1748	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1749		struct perf_callchain_entry *entry = ctx->entry;
1750
1751		entry->ip[entry->nr++] = ip;
1752		++ctx->nr;
1753		return 0;
1754	} else {
1755		return -1; /* no more room, stop walking the stack */
1756	}
1757}
1758
1759extern int sysctl_perf_event_paranoid;
1760extern int sysctl_perf_event_sample_rate;
1761
1762extern void perf_sample_event_took(u64 sample_len_ns);
1763
1764/* Access to perf_event_open(2) syscall. */
1765#define PERF_SECURITY_OPEN		0
1766
1767/* Finer grained perf_event_open(2) access control. */
1768#define PERF_SECURITY_CPU		1
1769#define PERF_SECURITY_KERNEL		2
1770#define PERF_SECURITY_TRACEPOINT	3
1771
1772static inline int perf_is_paranoid(void)
1773{
1774	return sysctl_perf_event_paranoid > -1;
1775}
1776
1777extern int perf_allow_kernel(void);
1778
1779static inline int perf_allow_cpu(void)
1780{
1781	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
1782		return -EACCES;
1783
1784	return security_perf_event_open(PERF_SECURITY_CPU);
1785}
1786
1787static inline int perf_allow_tracepoint(void)
1788{
1789	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
1790		return -EPERM;
1791
1792	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
1793}
1794
1795extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
1796
1797extern void perf_event_init(void);
1798extern void perf_tp_event(u16 event_type, u64 count, void *record,
1799			  int entry_size, struct pt_regs *regs,
1800			  struct hlist_head *head, int rctx,
1801			  struct task_struct *task);
1802extern void perf_bp_event(struct perf_event *event, void *data);
1803
1804extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
1805extern unsigned long perf_instruction_pointer(struct perf_event *event,
1806					      struct pt_regs *regs);
1807
1808#ifndef perf_arch_misc_flags
1809# define perf_arch_misc_flags(regs) \
1810		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1811# define perf_arch_instruction_pointer(regs)	instruction_pointer(regs)
1812#endif
1813#ifndef perf_arch_bpf_user_pt_regs
1814# define perf_arch_bpf_user_pt_regs(regs) regs
1815#endif
1816
1817#ifndef perf_arch_guest_misc_flags
1818static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
1819{
1820	unsigned long guest_state = perf_guest_state();
1821
1822	if (!(guest_state & PERF_GUEST_ACTIVE))
1823		return 0;
1824
1825	if (guest_state & PERF_GUEST_USER)
1826		return PERF_RECORD_MISC_GUEST_USER;
1827	else
1828		return PERF_RECORD_MISC_GUEST_KERNEL;
1829}
1830# define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
1831#endif
1832
1833static inline bool needs_branch_stack(struct perf_event *event)
1834{
1835	return event->attr.branch_sample_type != 0;
1836}
1837
1838static inline bool has_aux(struct perf_event *event)
1839{
1840	return event->pmu && event->pmu->setup_aux;
1841}
1842
1843static inline bool has_aux_action(struct perf_event *event)
1844{
1845	return event->attr.aux_sample_size ||
1846	       event->attr.aux_pause ||
1847	       event->attr.aux_resume;
1848}
1849
1850static inline bool is_write_backward(struct perf_event *event)
1851{
1852	return !!event->attr.write_backward;
1853}
1854
1855static inline bool has_addr_filter(struct perf_event *event)
1856{
1857	return event->pmu->nr_addr_filters;
1858}
1859
1860/*
1861 * An inherited event uses parent's filters
1862 */
1863static inline struct perf_addr_filters_head *
1864perf_event_addr_filters(struct perf_event *event)
1865{
1866	struct perf_addr_filters_head *ifh = &event->addr_filters;
1867
1868	if (event->parent)
1869		ifh = &event->parent->addr_filters;
1870
1871	return ifh;
1872}
1873
1874static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
1875{
1876	/* Only the parent has fasync state */
1877	if (event->parent)
1878		event = event->parent;
1879	return &event->fasync;
1880}
1881
1882extern void perf_event_addr_filters_sync(struct perf_event *event);
1883extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
1884
1885extern int perf_output_begin(struct perf_output_handle *handle,
1886			     struct perf_sample_data *data,
1887			     struct perf_event *event, unsigned int size);
1888extern int perf_output_begin_forward(struct perf_output_handle *handle,
1889				     struct perf_sample_data *data,
1890				     struct perf_event *event,
1891				     unsigned int size);
1892extern int perf_output_begin_backward(struct perf_output_handle *handle,
1893				      struct perf_sample_data *data,
1894				      struct perf_event *event,
1895				      unsigned int size);
1896
1897extern void perf_output_end(struct perf_output_handle *handle);
1898extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1899				     const void *buf, unsigned int len);
1900extern unsigned int perf_output_skip(struct perf_output_handle *handle,
1901				     unsigned int len);
1902extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
1903				 struct perf_output_handle *handle,
1904				 unsigned long from, unsigned long to);
1905extern int perf_swevent_get_recursion_context(void);
1906extern void perf_swevent_put_recursion_context(int rctx);
1907extern u64 perf_swevent_set_period(struct perf_event *event);
1908extern void perf_event_enable(struct perf_event *event);
1909extern void perf_event_disable(struct perf_event *event);
1910extern void perf_event_disable_local(struct perf_event *event);
1911extern void perf_event_disable_inatomic(struct perf_event *event);
1912extern void perf_event_task_tick(void);
1913extern int perf_event_account_interrupt(struct perf_event *event);
1914extern int perf_event_period(struct perf_event *event, u64 value);
1915extern u64 perf_event_pause(struct perf_event *event, bool reset);
1916
1917#else /* !CONFIG_PERF_EVENTS: */
1918
1919static inline void *
1920perf_aux_output_begin(struct perf_output_handle *handle,
1921		      struct perf_event *event)				{ return NULL; }
1922static inline void
1923perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
1924									{ }
1925static inline int
1926perf_aux_output_skip(struct perf_output_handle *handle,
1927		     unsigned long size)				{ return -EINVAL; }
1928static inline void *
1929perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
1930static inline void
1931perf_event_task_migrate(struct task_struct *task)			{ }
1932static inline void
1933perf_event_task_sched_in(struct task_struct *prev,
1934			 struct task_struct *task)			{ }
1935static inline void
1936perf_event_task_sched_out(struct task_struct *prev,
1937			  struct task_struct *next)			{ }
1938static inline int perf_event_init_task(struct task_struct *child,
1939				       u64 clone_flags)			{ return 0; }
1940static inline void perf_event_exit_task(struct task_struct *child)	{ }
1941static inline void perf_event_free_task(struct task_struct *task)	{ }
1942static inline void perf_event_delayed_put(struct task_struct *task)	{ }
1943static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
1944static inline const struct perf_event *perf_get_event(struct file *file)
1945{
1946	return ERR_PTR(-EINVAL);
1947}
1948static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1949{
1950	return ERR_PTR(-EINVAL);
1951}
1952static inline int perf_event_read_local(struct perf_event *event, u64 *value,
1953					u64 *enabled, u64 *running)
1954{
1955	return -EINVAL;
1956}
1957static inline void perf_event_print_debug(void)				{ }
1958static inline int perf_event_task_disable(void)				{ return -EINVAL; }
1959static inline int perf_event_task_enable(void)				{ return -EINVAL; }
1960static inline int perf_event_refresh(struct perf_event *event, int refresh)
1961{
1962	return -EINVAL;
1963}
1964
1965static inline void
1966perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
1967static inline void
1968perf_bp_event(struct perf_event *event, void *data)			{ }
1969
1970static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
1971
1972typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
1973static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1974				      bool unregister, const char *sym)	{ }
1975static inline void perf_event_bpf_event(struct bpf_prog *prog,
1976					enum perf_bpf_event_type type,
1977					u16 flags)			{ }
1978static inline void perf_event_exec(void)				{ }
1979static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
1980static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
1981static inline void perf_event_fork(struct task_struct *tsk)		{ }
1982static inline void perf_event_text_poke(const void *addr,
1983					const void *old_bytes,
1984					size_t old_len,
1985					const void *new_bytes,
1986					size_t new_len)			{ }
1987static inline void perf_event_init(void)				{ }
1988static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
1989static inline void perf_swevent_put_recursion_context(int rctx)		{ }
1990static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
1991static inline void perf_event_enable(struct perf_event *event)		{ }
1992static inline void perf_event_disable(struct perf_event *event)		{ }
1993static inline int __perf_event_disable(void *info)			{ return -1; }
1994static inline void perf_event_task_tick(void)				{ }
1995static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
1996static inline int
1997perf_event_period(struct perf_event *event, u64 value)			{ return -EINVAL; }
1998static inline u64
1999perf_event_pause(struct perf_event *event, bool reset)			{ return 0; }
2000static inline int
2001perf_exclude_event(struct perf_event *event, struct pt_regs *regs)	{ return 0; }
2002
2003#endif /* !CONFIG_PERF_EVENTS */
2004
2005#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
2006extern void perf_restore_debug_store(void);
2007#else
2008static inline void perf_restore_debug_store(void)			{ }
2009#endif
2010
2011#define perf_output_put(handle, x)	perf_output_copy((handle), &(x), sizeof(x))
2012
2013struct perf_pmu_events_attr {
2014	struct device_attribute		attr;
2015	u64				id;
2016	const char			*event_str;
2017};
2018
2019struct perf_pmu_events_ht_attr {
2020	struct device_attribute		attr;
2021	u64				id;
2022	const char			*event_str_ht;
2023	const char			*event_str_noht;
2024};
2025
2026struct perf_pmu_events_hybrid_attr {
2027	struct device_attribute		attr;
2028	u64				id;
2029	const char			*event_str;
2030	u64				pmu_type;
2031};
2032
2033struct perf_pmu_format_hybrid_attr {
2034	struct device_attribute		attr;
2035	u64				pmu_type;
2036};
2037
2038ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
2039			      char *page);
2040
2041#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
2042static struct perf_pmu_events_attr _var = {				\
2043	.attr = __ATTR(_name, 0444, _show, NULL),			\
2044	.id   =  _id,							\
2045};
2046
2047#define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
2048static struct perf_pmu_events_attr _var = {				    \
2049	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
2050	.id		= 0,						    \
2051	.event_str	= _str,						    \
2052};
2053
2054#define PMU_EVENT_ATTR_ID(_name, _show, _id)				\
2055	(&((struct perf_pmu_events_attr[]) {				\
2056		{ .attr = __ATTR(_name, 0444, _show, NULL),		\
2057		  .id = _id, }						\
2058	})[0].attr.attr)
2059
2060#define PMU_FORMAT_ATTR_SHOW(_name, _format)				\
2061static ssize_t								\
2062_name##_show(struct device *dev,					\
2063			       struct device_attribute *attr,		\
2064			       char *page)				\
2065{									\
2066	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
2067	return sprintf(page, _format "\n");				\
2068}									\
2069
2070#define PMU_FORMAT_ATTR(_name, _format)					\
2071	PMU_FORMAT_ATTR_SHOW(_name, _format)				\
2072									\
2073static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
2074
2075/* Performance counter hotplug functions */
2076#ifdef CONFIG_PERF_EVENTS
2077extern int perf_event_init_cpu(unsigned int cpu);
2078extern int perf_event_exit_cpu(unsigned int cpu);
2079#else
2080# define perf_event_init_cpu		NULL
2081# define perf_event_exit_cpu		NULL
2082#endif
2083
2084extern void arch_perf_update_userpage(struct perf_event *event,
2085				      struct perf_event_mmap_page *userpg,
2086				      u64 now);
2087
2088/*
2089 * Snapshot branch stack on software events.
2090 *
2091 * Branch stack can be very useful in understanding software events. For
2092 * example, when a long function, e.g. sys_perf_event_open, returns an
2093 * errno, it is not obvious why the function failed. Branch stack could
2094 * provide very helpful information in this type of scenarios.
2095 *
2096 * On software event, it is necessary to stop the hardware branch recorder
2097 * fast. Otherwise, the hardware register/buffer will be flushed with
2098 * entries of the triggering event. Therefore, static call is used to
2099 * stop the hardware recorder.
2100 */
2101
2102/*
2103 * cnt is the number of entries allocated for entries.
2104 * Return number of entries copied to .
2105 */
2106typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
2107					   unsigned int cnt);
2108DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
2109
2110#ifndef PERF_NEEDS_LOPWR_CB
2111static inline void perf_lopwr_cb(bool mode)
2112{
2113}
2114#endif
2115
2116#endif /* _LINUX_PERF_EVENT_H */