arch/x86/events/amd/core.c at v6.19

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / arch / x86 / events / amd / core.c
at v6.19 1595 lines 43 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/perf_event.h>
   3#include <linux/jump_label.h>
   4#include <linux/export.h>
   5#include <linux/kvm_types.h>
   6#include <linux/types.h>
   7#include <linux/init.h>
   8#include <linux/slab.h>
   9#include <linux/delay.h>
  10#include <linux/jiffies.h>
  11#include <asm/apicdef.h>
  12#include <asm/apic.h>
  13#include <asm/msr.h>
  14#include <asm/nmi.h>
  15
  16#include "../perf_event.h"
  17
  18static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
  19static unsigned long perf_nmi_window;
  20
  21/* AMD Event 0xFFF: Merge.  Used with Large Increment per Cycle events */
  22#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
  23#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
  24
  25/* PMC Enable and Overflow bits for PerfCntrGlobal* registers */
  26static u64 amd_pmu_global_cntr_mask __read_mostly;
  27
  28static __initconst const u64 amd_hw_cache_event_ids
  29				[PERF_COUNT_HW_CACHE_MAX]
  30				[PERF_COUNT_HW_CACHE_OP_MAX]
  31				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
  32{
  33 [ C(L1D) ] = {
  34	[ C(OP_READ) ] = {
  35		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  36		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
  37	},
  38	[ C(OP_WRITE) ] = {
  39		[ C(RESULT_ACCESS) ] = 0,
  40		[ C(RESULT_MISS)   ] = 0,
  41	},
  42	[ C(OP_PREFETCH) ] = {
  43		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
  44		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
  45	},
  46 },
  47 [ C(L1I ) ] = {
  48	[ C(OP_READ) ] = {
  49		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
  50		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
  51	},
  52	[ C(OP_WRITE) ] = {
  53		[ C(RESULT_ACCESS) ] = -1,
  54		[ C(RESULT_MISS)   ] = -1,
  55	},
  56	[ C(OP_PREFETCH) ] = {
  57		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
  58		[ C(RESULT_MISS)   ] = 0,
  59	},
  60 },
  61 [ C(LL  ) ] = {
  62	[ C(OP_READ) ] = {
  63		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
  64		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
  65	},
  66	[ C(OP_WRITE) ] = {
  67		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
  68		[ C(RESULT_MISS)   ] = 0,
  69	},
  70	[ C(OP_PREFETCH) ] = {
  71		[ C(RESULT_ACCESS) ] = 0,
  72		[ C(RESULT_MISS)   ] = 0,
  73	},
  74 },
  75 [ C(DTLB) ] = {
  76	[ C(OP_READ) ] = {
  77		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
  78		[ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
  79	},
  80	[ C(OP_WRITE) ] = {
  81		[ C(RESULT_ACCESS) ] = 0,
  82		[ C(RESULT_MISS)   ] = 0,
  83	},
  84	[ C(OP_PREFETCH) ] = {
  85		[ C(RESULT_ACCESS) ] = 0,
  86		[ C(RESULT_MISS)   ] = 0,
  87	},
  88 },
  89 [ C(ITLB) ] = {
  90	[ C(OP_READ) ] = {
  91		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
  92		[ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
  93	},
  94	[ C(OP_WRITE) ] = {
  95		[ C(RESULT_ACCESS) ] = -1,
  96		[ C(RESULT_MISS)   ] = -1,
  97	},
  98	[ C(OP_PREFETCH) ] = {
  99		[ C(RESULT_ACCESS) ] = -1,
 100		[ C(RESULT_MISS)   ] = -1,
 101	},
 102 },
 103 [ C(BPU ) ] = {
 104	[ C(OP_READ) ] = {
 105		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
 106		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
 107	},
 108	[ C(OP_WRITE) ] = {
 109		[ C(RESULT_ACCESS) ] = -1,
 110		[ C(RESULT_MISS)   ] = -1,
 111	},
 112	[ C(OP_PREFETCH) ] = {
 113		[ C(RESULT_ACCESS) ] = -1,
 114		[ C(RESULT_MISS)   ] = -1,
 115	},
 116 },
 117 [ C(NODE) ] = {
 118	[ C(OP_READ) ] = {
 119		[ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
 120		[ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
 121	},
 122	[ C(OP_WRITE) ] = {
 123		[ C(RESULT_ACCESS) ] = -1,
 124		[ C(RESULT_MISS)   ] = -1,
 125	},
 126	[ C(OP_PREFETCH) ] = {
 127		[ C(RESULT_ACCESS) ] = -1,
 128		[ C(RESULT_MISS)   ] = -1,
 129	},
 130 },
 131};
 132
 133static __initconst const u64 amd_hw_cache_event_ids_f17h
 134				[PERF_COUNT_HW_CACHE_MAX]
 135				[PERF_COUNT_HW_CACHE_OP_MAX]
 136				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 137[C(L1D)] = {
 138	[C(OP_READ)] = {
 139		[C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
 140		[C(RESULT_MISS)]   = 0xc860, /* L2$ access from DC Miss */
 141	},
 142	[C(OP_WRITE)] = {
 143		[C(RESULT_ACCESS)] = 0,
 144		[C(RESULT_MISS)]   = 0,
 145	},
 146	[C(OP_PREFETCH)] = {
 147		[C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
 148		[C(RESULT_MISS)]   = 0,
 149	},
 150},
 151[C(L1I)] = {
 152	[C(OP_READ)] = {
 153		[C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches  */
 154		[C(RESULT_MISS)]   = 0x0081, /* Instruction cache misses   */
 155	},
 156	[C(OP_WRITE)] = {
 157		[C(RESULT_ACCESS)] = -1,
 158		[C(RESULT_MISS)]   = -1,
 159	},
 160	[C(OP_PREFETCH)] = {
 161		[C(RESULT_ACCESS)] = 0,
 162		[C(RESULT_MISS)]   = 0,
 163	},
 164},
 165[C(LL)] = {
 166	[C(OP_READ)] = {
 167		[C(RESULT_ACCESS)] = 0,
 168		[C(RESULT_MISS)]   = 0,
 169	},
 170	[C(OP_WRITE)] = {
 171		[C(RESULT_ACCESS)] = 0,
 172		[C(RESULT_MISS)]   = 0,
 173	},
 174	[C(OP_PREFETCH)] = {
 175		[C(RESULT_ACCESS)] = 0,
 176		[C(RESULT_MISS)]   = 0,
 177	},
 178},
 179[C(DTLB)] = {
 180	[C(OP_READ)] = {
 181		[C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
 182		[C(RESULT_MISS)]   = 0xf045, /* L2 DTLB misses (PT walks) */
 183	},
 184	[C(OP_WRITE)] = {
 185		[C(RESULT_ACCESS)] = 0,
 186		[C(RESULT_MISS)]   = 0,
 187	},
 188	[C(OP_PREFETCH)] = {
 189		[C(RESULT_ACCESS)] = 0,
 190		[C(RESULT_MISS)]   = 0,
 191	},
 192},
 193[C(ITLB)] = {
 194	[C(OP_READ)] = {
 195		[C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
 196		[C(RESULT_MISS)]   = 0xff85, /* L1 ITLB misses, L2 misses */
 197	},
 198	[C(OP_WRITE)] = {
 199		[C(RESULT_ACCESS)] = -1,
 200		[C(RESULT_MISS)]   = -1,
 201	},
 202	[C(OP_PREFETCH)] = {
 203		[C(RESULT_ACCESS)] = -1,
 204		[C(RESULT_MISS)]   = -1,
 205	},
 206},
 207[C(BPU)] = {
 208	[C(OP_READ)] = {
 209		[C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr.      */
 210		[C(RESULT_MISS)]   = 0x00c3, /* Retired Mispredicted BI    */
 211	},
 212	[C(OP_WRITE)] = {
 213		[C(RESULT_ACCESS)] = -1,
 214		[C(RESULT_MISS)]   = -1,
 215	},
 216	[C(OP_PREFETCH)] = {
 217		[C(RESULT_ACCESS)] = -1,
 218		[C(RESULT_MISS)]   = -1,
 219	},
 220},
 221[C(NODE)] = {
 222	[C(OP_READ)] = {
 223		[C(RESULT_ACCESS)] = 0,
 224		[C(RESULT_MISS)]   = 0,
 225	},
 226	[C(OP_WRITE)] = {
 227		[C(RESULT_ACCESS)] = -1,
 228		[C(RESULT_MISS)]   = -1,
 229	},
 230	[C(OP_PREFETCH)] = {
 231		[C(RESULT_ACCESS)] = -1,
 232		[C(RESULT_MISS)]   = -1,
 233	},
 234},
 235};
 236
 237/*
 238 * AMD Performance Monitor K7 and later, up to and including Family 16h:
 239 */
 240static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
 241{
 242	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
 243	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
 244	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x077d,
 245	[PERF_COUNT_HW_CACHE_MISSES]		= 0x077e,
 246	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
 247	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
 248	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
 249	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
 250};
 251
 252/*
 253 * AMD Performance Monitor Family 17h and later:
 254 */
 255static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
 256{
 257	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
 258	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
 259	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
 260	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
 261	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
 262	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
 263	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x0287,
 264	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x0187,
 265};
 266
 267static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
 268{
 269	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
 270	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
 271	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
 272	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
 273	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
 274	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
 275	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
 276};
 277
 278static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
 279{
 280	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
 281	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
 282	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0xff60,
 283	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0964,
 284	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
 285	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
 286	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00a9,
 287	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x100000120,
 288};
 289
 290static u64 amd_pmu_event_map(int hw_event)
 291{
 292	if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
 293		return amd_zen4_perfmon_event_map[hw_event];
 294
 295	if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
 296		return amd_zen2_perfmon_event_map[hw_event];
 297
 298	if (cpu_feature_enabled(X86_FEATURE_ZEN1))
 299		return amd_zen1_perfmon_event_map[hw_event];
 300
 301	return amd_perfmon_event_map[hw_event];
 302}
 303
 304/*
 305 * Previously calculated offsets
 306 */
 307static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
 308static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
 309
 310/*
 311 * Legacy CPUs:
 312 *   4 counters starting at 0xc0010000 each offset by 1
 313 *
 314 * CPUs with core performance counter extensions:
 315 *   6 counters starting at 0xc0010200 each offset by 2
 316 */
 317static inline int amd_pmu_addr_offset(int index, bool eventsel)
 318{
 319	int offset;
 320
 321	if (!index)
 322		return index;
 323
 324	if (eventsel)
 325		offset = event_offsets[index];
 326	else
 327		offset = count_offsets[index];
 328
 329	if (offset)
 330		return offset;
 331
 332	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
 333		offset = index;
 334	else
 335		offset = index << 1;
 336
 337	if (eventsel)
 338		event_offsets[index] = offset;
 339	else
 340		count_offsets[index] = offset;
 341
 342	return offset;
 343}
 344
 345/*
 346 * AMD64 events are detected based on their event codes.
 347 */
 348static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
 349{
 350	return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
 351}
 352
 353static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
 354{
 355	if (!(x86_pmu.flags & PMU_FL_PAIR))
 356		return false;
 357
 358	switch (amd_get_event_code(hwc)) {
 359	case 0x003:	return true;	/* Retired SSE/AVX FLOPs */
 360	default:	return false;
 361	}
 362}
 363
 364DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
 365
 366static int amd_core_hw_config(struct perf_event *event)
 367{
 368	if (event->attr.exclude_host && event->attr.exclude_guest)
 369		/*
 370		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
 371		 * and will count in both modes. We don't want to count in that
 372		 * case so we emulate no-counting by setting US = OS = 0.
 373		 */
 374		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
 375				      ARCH_PERFMON_EVENTSEL_OS);
 376	else if (event->attr.exclude_host)
 377		event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
 378	else if (event->attr.exclude_guest)
 379		event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
 380
 381	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
 382		event->hw.flags |= PERF_X86_EVENT_PAIR;
 383
 384	if (has_branch_stack(event))
 385		return static_call(amd_pmu_branch_hw_config)(event);
 386
 387	return 0;
 388}
 389
 390static inline int amd_is_nb_event(struct hw_perf_event *hwc)
 391{
 392	return (hwc->config & 0xe0) == 0xe0;
 393}
 394
 395static inline int amd_has_nb(struct cpu_hw_events *cpuc)
 396{
 397	struct amd_nb *nb = cpuc->amd_nb;
 398
 399	return nb && nb->nb_id != -1;
 400}
 401
 402static int amd_pmu_hw_config(struct perf_event *event)
 403{
 404	int ret;
 405
 406	/* pass precise event sampling to ibs: */
 407	if (event->attr.precise_ip && get_ibs_caps())
 408		return forward_event_to_ibs(event);
 409
 410	if (has_branch_stack(event) && !x86_pmu.lbr_nr)
 411		return -EOPNOTSUPP;
 412
 413	ret = x86_pmu_hw_config(event);
 414	if (ret)
 415		return ret;
 416
 417	if (event->attr.type == PERF_TYPE_RAW)
 418		event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
 419
 420	return amd_core_hw_config(event);
 421}
 422
 423static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
 424					   struct perf_event *event)
 425{
 426	struct amd_nb *nb = cpuc->amd_nb;
 427	int i;
 428
 429	/*
 430	 * need to scan whole list because event may not have
 431	 * been assigned during scheduling
 432	 *
 433	 * no race condition possible because event can only
 434	 * be removed on one CPU at a time AND PMU is disabled
 435	 * when we come here
 436	 */
 437	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 438		struct perf_event *tmp = event;
 439
 440		if (try_cmpxchg(nb->owners + i, &tmp, NULL))
 441			break;
 442	}
 443}
 444
 445 /*
 446  * AMD64 NorthBridge events need special treatment because
 447  * counter access needs to be synchronized across all cores
 448  * of a package. Refer to BKDG section 3.12
 449  *
 450  * NB events are events measuring L3 cache, Hypertransport
 451  * traffic. They are identified by an event code >= 0xe00.
 452  * They measure events on the NorthBride which is shared
 453  * by all cores on a package. NB events are counted on a
 454  * shared set of counters. When a NB event is programmed
 455  * in a counter, the data actually comes from a shared
 456  * counter. Thus, access to those counters needs to be
 457  * synchronized.
 458  *
 459  * We implement the synchronization such that no two cores
 460  * can be measuring NB events using the same counters. Thus,
 461  * we maintain a per-NB allocation table. The available slot
 462  * is propagated using the event_constraint structure.
 463  *
 464  * We provide only one choice for each NB event based on
 465  * the fact that only NB events have restrictions. Consequently,
 466  * if a counter is available, there is a guarantee the NB event
 467  * will be assigned to it. If no slot is available, an empty
 468  * constraint is returned and scheduling will eventually fail
 469  * for this event.
 470  *
 471  * Note that all cores attached the same NB compete for the same
 472  * counters to host NB events, this is why we use atomic ops. Some
 473  * multi-chip CPUs may have more than one NB.
 474  *
 475  * Given that resources are allocated (cmpxchg), they must be
 476  * eventually freed for others to use. This is accomplished by
 477  * calling __amd_put_nb_event_constraints()
 478  *
 479  * Non NB events are not impacted by this restriction.
 480  */
 481static struct event_constraint *
 482__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 483			       struct event_constraint *c)
 484{
 485	struct hw_perf_event *hwc = &event->hw;
 486	struct amd_nb *nb = cpuc->amd_nb;
 487	struct perf_event *old;
 488	int idx, new = -1;
 489
 490	if (!c)
 491		c = &unconstrained;
 492
 493	if (cpuc->is_fake)
 494		return c;
 495
 496	/*
 497	 * detect if already present, if so reuse
 498	 *
 499	 * cannot merge with actual allocation
 500	 * because of possible holes
 501	 *
 502	 * event can already be present yet not assigned (in hwc->idx)
 503	 * because of successive calls to x86_schedule_events() from
 504	 * hw_perf_group_sched_in() without hw_perf_enable()
 505	 */
 506	for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
 507		if (new == -1 || hwc->idx == idx)
 508			/* assign free slot, prefer hwc->idx */
 509			old = cmpxchg(nb->owners + idx, NULL, event);
 510		else if (nb->owners[idx] == event)
 511			/* event already present */
 512			old = event;
 513		else
 514			continue;
 515
 516		if (old && old != event)
 517			continue;
 518
 519		/* reassign to this slot */
 520		if (new != -1)
 521			cmpxchg(nb->owners + new, event, NULL);
 522		new = idx;
 523
 524		/* already present, reuse */
 525		if (old == event)
 526			break;
 527	}
 528
 529	if (new == -1)
 530		return &emptyconstraint;
 531
 532	return &nb->event_constraints[new];
 533}
 534
 535static struct amd_nb *amd_alloc_nb(int cpu)
 536{
 537	struct amd_nb *nb;
 538	int i;
 539
 540	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
 541	if (!nb)
 542		return NULL;
 543
 544	nb->nb_id = -1;
 545
 546	/*
 547	 * initialize all possible NB constraints
 548	 */
 549	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 550		__set_bit(i, nb->event_constraints[i].idxmsk);
 551		nb->event_constraints[i].weight = 1;
 552	}
 553	return nb;
 554}
 555
 556typedef void (amd_pmu_branch_reset_t)(void);
 557DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t);
 558
 559static void amd_pmu_cpu_reset(int cpu)
 560{
 561	if (x86_pmu.lbr_nr)
 562		static_call(amd_pmu_branch_reset)();
 563
 564	if (x86_pmu.version < 2)
 565		return;
 566
 567	/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
 568	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
 569
 570	/*
 571	 * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
 572	 * and PerfCntrGLobalStatus.PerfCntrOvfl
 573	 */
 574	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
 575	       GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
 576}
 577
 578static int amd_pmu_cpu_prepare(int cpu)
 579{
 580	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 581
 582	cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL,
 583				     cpu_to_node(cpu));
 584	if (!cpuc->lbr_sel)
 585		return -ENOMEM;
 586
 587	WARN_ON_ONCE(cpuc->amd_nb);
 588
 589	if (!x86_pmu.amd_nb_constraints)
 590		return 0;
 591
 592	cpuc->amd_nb = amd_alloc_nb(cpu);
 593	if (cpuc->amd_nb)
 594		return 0;
 595
 596	kfree(cpuc->lbr_sel);
 597	cpuc->lbr_sel = NULL;
 598
 599	return -ENOMEM;
 600}
 601
 602static void amd_pmu_cpu_starting(int cpu)
 603{
 604	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 605	void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
 606	struct amd_nb *nb;
 607	int i, nb_id;
 608
 609	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
 610	amd_pmu_cpu_reset(cpu);
 611
 612	if (!x86_pmu.amd_nb_constraints)
 613		return;
 614
 615	nb_id = topology_amd_node_id(cpu);
 616	WARN_ON_ONCE(nb_id == BAD_APICID);
 617
 618	for_each_online_cpu(i) {
 619		nb = per_cpu(cpu_hw_events, i).amd_nb;
 620		if (WARN_ON_ONCE(!nb))
 621			continue;
 622
 623		if (nb->nb_id == nb_id) {
 624			*onln = cpuc->amd_nb;
 625			cpuc->amd_nb = nb;
 626			break;
 627		}
 628	}
 629
 630	cpuc->amd_nb->nb_id = nb_id;
 631	cpuc->amd_nb->refcnt++;
 632}
 633
 634static void amd_pmu_cpu_dead(int cpu)
 635{
 636	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 637
 638	kfree(cpuhw->lbr_sel);
 639	cpuhw->lbr_sel = NULL;
 640
 641	if (!x86_pmu.amd_nb_constraints)
 642		return;
 643
 644	if (cpuhw->amd_nb) {
 645		struct amd_nb *nb = cpuhw->amd_nb;
 646
 647		if (nb->nb_id == -1 || --nb->refcnt == 0)
 648			kfree(nb);
 649
 650		cpuhw->amd_nb = NULL;
 651	}
 652}
 653
 654static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
 655{
 656	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
 657}
 658
 659static inline u64 amd_pmu_get_global_status(void)
 660{
 661	u64 status;
 662
 663	/* PerfCntrGlobalStatus is read-only */
 664	rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
 665
 666	return status;
 667}
 668
 669static inline void amd_pmu_ack_global_status(u64 status)
 670{
 671	/*
 672	 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
 673	 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
 674	 * clears the same bit in PerfCntrGlobalStatus
 675	 */
 676
 677	wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
 678}
 679
 680static bool amd_pmu_test_overflow_topbit(int idx)
 681{
 682	u64 counter;
 683
 684	rdmsrq(x86_pmu_event_addr(idx), counter);
 685
 686	return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
 687}
 688
 689static bool amd_pmu_test_overflow_status(int idx)
 690{
 691	return amd_pmu_get_global_status() & BIT_ULL(idx);
 692}
 693
 694DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit);
 695
 696/*
 697 * When a PMC counter overflows, an NMI is used to process the event and
 698 * reset the counter. NMI latency can result in the counter being updated
 699 * before the NMI can run, which can result in what appear to be spurious
 700 * NMIs. This function is intended to wait for the NMI to run and reset
 701 * the counter to avoid possible unhandled NMI messages.
 702 */
 703#define OVERFLOW_WAIT_COUNT	50
 704
 705static void amd_pmu_wait_on_overflow(int idx)
 706{
 707	unsigned int i;
 708
 709	/*
 710	 * Wait for the counter to be reset if it has overflowed. This loop
 711	 * should exit very, very quickly, but just in case, don't wait
 712	 * forever...
 713	 */
 714	for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
 715		if (!static_call(amd_pmu_test_overflow)(idx))
 716			break;
 717
 718		/* Might be in IRQ context, so can't sleep */
 719		udelay(1);
 720	}
 721}
 722
 723static void amd_pmu_check_overflow(void)
 724{
 725	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 726	int idx;
 727
 728	/*
 729	 * This shouldn't be called from NMI context, but add a safeguard here
 730	 * to return, since if we're in NMI context we can't wait for an NMI
 731	 * to reset an overflowed counter value.
 732	 */
 733	if (in_nmi())
 734		return;
 735
 736	/*
 737	 * Check each counter for overflow and wait for it to be reset by the
 738	 * NMI if it has overflowed. This relies on the fact that all active
 739	 * counters are always enabled when this function is called and
 740	 * ARCH_PERFMON_EVENTSEL_INT is always set.
 741	 */
 742	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 743		if (!test_bit(idx, cpuc->active_mask))
 744			continue;
 745
 746		amd_pmu_wait_on_overflow(idx);
 747	}
 748}
 749
 750static void amd_pmu_enable_event(struct perf_event *event)
 751{
 752	x86_pmu_enable_event(event);
 753}
 754
 755static void amd_pmu_enable_all(int added)
 756{
 757	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 758	int idx;
 759
 760	amd_brs_enable_all();
 761
 762	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 763		/* only activate events which are marked as active */
 764		if (!test_bit(idx, cpuc->active_mask))
 765			continue;
 766
 767		/*
 768		 * FIXME: cpuc->events[idx] can become NULL in a subtle race
 769		 * condition with NMI->throttle->x86_pmu_stop().
 770		 */
 771		if (cpuc->events[idx])
 772			amd_pmu_enable_event(cpuc->events[idx]);
 773	}
 774}
 775
 776static void amd_pmu_v2_enable_event(struct perf_event *event)
 777{
 778	struct hw_perf_event *hwc = &event->hw;
 779
 780	/*
 781	 * Testing cpu_hw_events.enabled should be skipped in this case unlike
 782	 * in x86_pmu_enable_event().
 783	 *
 784	 * Since cpu_hw_events.enabled is set only after returning from
 785	 * x86_pmu_start(), the PMCs must be programmed and kept ready.
 786	 * Counting starts only after x86_pmu_enable_all() is called.
 787	 */
 788	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 789}
 790
 791static __always_inline void amd_pmu_core_enable_all(void)
 792{
 793	amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
 794}
 795
 796static void amd_pmu_v2_enable_all(int added)
 797{
 798	amd_pmu_lbr_enable_all();
 799	amd_pmu_core_enable_all();
 800}
 801
 802static void amd_pmu_disable_event(struct perf_event *event)
 803{
 804	x86_pmu_disable_event(event);
 805
 806	/*
 807	 * This can be called from NMI context (via x86_pmu_stop). The counter
 808	 * may have overflowed, but either way, we'll never see it get reset
 809	 * by the NMI if we're already in the NMI. And the NMI latency support
 810	 * below will take care of any pending NMI that might have been
 811	 * generated by the overflow.
 812	 */
 813	if (in_nmi())
 814		return;
 815
 816	amd_pmu_wait_on_overflow(event->hw.idx);
 817}
 818
 819static void amd_pmu_disable_all(void)
 820{
 821	amd_brs_disable_all();
 822	x86_pmu_disable_all();
 823	amd_pmu_check_overflow();
 824}
 825
 826static __always_inline void amd_pmu_core_disable_all(void)
 827{
 828	amd_pmu_set_global_ctl(0);
 829}
 830
 831static void amd_pmu_v2_disable_all(void)
 832{
 833	amd_pmu_core_disable_all();
 834	amd_pmu_lbr_disable_all();
 835	amd_pmu_check_overflow();
 836}
 837
 838DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add);
 839
 840static void amd_pmu_add_event(struct perf_event *event)
 841{
 842	if (needs_branch_stack(event))
 843		static_call(amd_pmu_branch_add)(event);
 844}
 845
 846DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del);
 847
 848static void amd_pmu_del_event(struct perf_event *event)
 849{
 850	if (needs_branch_stack(event))
 851		static_call(amd_pmu_branch_del)(event);
 852}
 853
 854/*
 855 * Because of NMI latency, if multiple PMC counters are active or other sources
 856 * of NMIs are received, the perf NMI handler can handle one or more overflowed
 857 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
 858 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
 859 * back-to-back NMI support won't be active. This PMC handler needs to take into
 860 * account that this can occur, otherwise this could result in unknown NMI
 861 * messages being issued. Examples of this is PMC overflow while in the NMI
 862 * handler when multiple PMCs are active or PMC overflow while handling some
 863 * other source of an NMI.
 864 *
 865 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
 866 * received during this window will be claimed. This prevents extending the
 867 * window past when it is possible that latent NMIs should be received. The
 868 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
 869 * handled a counter. When an un-handled NMI is received, it will be claimed
 870 * only if arriving within that window.
 871 */
 872static inline int amd_pmu_adjust_nmi_window(int handled)
 873{
 874	/*
 875	 * If a counter was handled, record a timestamp such that un-handled
 876	 * NMIs will be claimed if arriving within that window.
 877	 */
 878	if (handled) {
 879		this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
 880
 881		return handled;
 882	}
 883
 884	if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
 885		return NMI_DONE;
 886
 887	return NMI_HANDLED;
 888}
 889
 890static int amd_pmu_handle_irq(struct pt_regs *regs)
 891{
 892	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 893	int handled;
 894	int pmu_enabled;
 895
 896	/*
 897	 * Save the PMU state.
 898	 * It needs to be restored when leaving the handler.
 899	 */
 900	pmu_enabled = cpuc->enabled;
 901	cpuc->enabled = 0;
 902
 903	amd_brs_disable_all();
 904
 905	/* Drain BRS is in use (could be inactive) */
 906	if (cpuc->lbr_users)
 907		amd_brs_drain();
 908
 909	/* Process any counter overflows */
 910	handled = x86_pmu_handle_irq(regs);
 911
 912	cpuc->enabled = pmu_enabled;
 913	if (pmu_enabled)
 914		amd_brs_enable_all();
 915
 916	return amd_pmu_adjust_nmi_window(handled);
 917}
 918
 919/*
 920 * AMD-specific callback invoked through perf_snapshot_branch_stack static
 921 * call, defined in include/linux/perf_event.h. See its definition for API
 922 * details. It's up to caller to provide enough space in *entries* to fit all
 923 * LBR records, otherwise returned result will be truncated to *cnt* entries.
 924 */
 925static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
 926{
 927	struct cpu_hw_events *cpuc;
 928	unsigned long flags;
 929
 930	/*
 931	 * The sequence of steps to freeze LBR should be completely inlined
 932	 * and contain no branches to minimize contamination of LBR snapshot
 933	 */
 934	local_irq_save(flags);
 935	amd_pmu_core_disable_all();
 936	__amd_pmu_lbr_disable();
 937
 938	cpuc = this_cpu_ptr(&cpu_hw_events);
 939
 940	amd_pmu_lbr_read();
 941	cnt = min(cnt, x86_pmu.lbr_nr);
 942	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
 943
 944	amd_pmu_v2_enable_all(0);
 945	local_irq_restore(flags);
 946
 947	return cnt;
 948}
 949
 950static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 951{
 952	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 953	static atomic64_t status_warned = ATOMIC64_INIT(0);
 954	u64 reserved, status, mask, new_bits, prev_bits;
 955	struct perf_sample_data data;
 956	struct hw_perf_event *hwc;
 957	struct perf_event *event;
 958	int handled = 0, idx;
 959	bool pmu_enabled;
 960
 961	/*
 962	 * Save the PMU state as it needs to be restored when leaving the
 963	 * handler
 964	 */
 965	pmu_enabled = cpuc->enabled;
 966	cpuc->enabled = 0;
 967
 968	/* Stop counting but do not disable LBR */
 969	amd_pmu_core_disable_all();
 970
 971	status = amd_pmu_get_global_status();
 972
 973	/* Check if any overflows are pending */
 974	if (!status)
 975		goto done;
 976
 977	/* Read branch records */
 978	if (x86_pmu.lbr_nr) {
 979		amd_pmu_lbr_read();
 980		status &= ~GLOBAL_STATUS_LBRS_FROZEN;
 981	}
 982
 983	reserved = status & ~amd_pmu_global_cntr_mask;
 984	if (reserved)
 985		pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n",
 986			     reserved);
 987
 988	/* Clear any reserved bits set by buggy microcode */
 989	status &= amd_pmu_global_cntr_mask;
 990
 991	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
 992		if (!test_bit(idx, cpuc->active_mask))
 993			continue;
 994
 995		event = cpuc->events[idx];
 996		hwc = &event->hw;
 997		x86_perf_event_update(event);
 998		mask = BIT_ULL(idx);
 999
1000		if (!(status & mask))
1001			continue;
1002
1003		/* Event overflow */
1004		handled++;
1005		status &= ~mask;
1006		perf_sample_data_init(&data, 0, hwc->last_period);
1007
1008		if (!x86_perf_event_set_period(event))
1009			continue;
1010
1011		perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
1012
1013		perf_event_overflow(event, &data, regs);
1014	}
1015
1016	/*
1017	 * It should never be the case that some overflows are not handled as
1018	 * the corresponding PMCs are expected to be inactive according to the
1019	 * active_mask
1020	 */
1021	if (status > 0) {
1022		prev_bits = atomic64_fetch_or(status, &status_warned);
1023		// A new bit was set for the very first time.
1024		new_bits = status & ~prev_bits;
1025		WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
1026	}
1027
1028	/* Clear overflow and freeze bits */
1029	amd_pmu_ack_global_status(~status);
1030
1031	/*
1032	 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
1033	 * PMI entry is not set by the local APIC when a PMC overflow occurs
1034	 */
1035	inc_irq_stat(apic_perf_irqs);
1036
1037done:
1038	cpuc->enabled = pmu_enabled;
1039
1040	/* Resume counting only if PMU is active */
1041	if (pmu_enabled)
1042		amd_pmu_core_enable_all();
1043
1044	return amd_pmu_adjust_nmi_window(handled);
1045}
1046
1047static struct event_constraint *
1048amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
1049			  struct perf_event *event)
1050{
1051	/*
1052	 * if not NB event or no NB, then no constraints
1053	 */
1054	if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
1055		return &unconstrained;
1056
1057	return __amd_get_nb_event_constraints(cpuc, event, NULL);
1058}
1059
1060static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
1061				      struct perf_event *event)
1062{
1063	if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
1064		__amd_put_nb_event_constraints(cpuc, event);
1065}
1066
1067PMU_FORMAT_ATTR(event,	"config:0-7,32-35");
1068PMU_FORMAT_ATTR(umask,	"config:8-15"	);
1069PMU_FORMAT_ATTR(edge,	"config:18"	);
1070PMU_FORMAT_ATTR(inv,	"config:23"	);
1071PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
1072
1073static struct attribute *amd_format_attr[] = {
1074	&format_attr_event.attr,
1075	&format_attr_umask.attr,
1076	&format_attr_edge.attr,
1077	&format_attr_inv.attr,
1078	&format_attr_cmask.attr,
1079	NULL,
1080};
1081
1082/* AMD Family 15h */
1083
1084#define AMD_EVENT_TYPE_MASK	0x000000F0ULL
1085
1086#define AMD_EVENT_FP		0x00000000ULL ... 0x00000010ULL
1087#define AMD_EVENT_LS		0x00000020ULL ... 0x00000030ULL
1088#define AMD_EVENT_DC		0x00000040ULL ... 0x00000050ULL
1089#define AMD_EVENT_CU		0x00000060ULL ... 0x00000070ULL
1090#define AMD_EVENT_IC_DE		0x00000080ULL ... 0x00000090ULL
1091#define AMD_EVENT_EX_LS		0x000000C0ULL
1092#define AMD_EVENT_DE		0x000000D0ULL
1093#define AMD_EVENT_NB		0x000000E0ULL ... 0x000000F0ULL
1094
1095/*
1096 * AMD family 15h event code/PMC mappings:
1097 *
1098 * type = event_code & 0x0F0:
1099 *
1100 * 0x000	FP	PERF_CTL[5:3]
1101 * 0x010	FP	PERF_CTL[5:3]
1102 * 0x020	LS	PERF_CTL[5:0]
1103 * 0x030	LS	PERF_CTL[5:0]
1104 * 0x040	DC	PERF_CTL[5:0]
1105 * 0x050	DC	PERF_CTL[5:0]
1106 * 0x060	CU	PERF_CTL[2:0]
1107 * 0x070	CU	PERF_CTL[2:0]
1108 * 0x080	IC/DE	PERF_CTL[2:0]
1109 * 0x090	IC/DE	PERF_CTL[2:0]
1110 * 0x0A0	---
1111 * 0x0B0	---
1112 * 0x0C0	EX/LS	PERF_CTL[5:0]
1113 * 0x0D0	DE	PERF_CTL[2:0]
1114 * 0x0E0	NB	NB_PERF_CTL[3:0]
1115 * 0x0F0	NB	NB_PERF_CTL[3:0]
1116 *
1117 * Exceptions:
1118 *
1119 * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1120 * 0x003	FP	PERF_CTL[3]
1121 * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
1122 * 0x00B	FP	PERF_CTL[3]
1123 * 0x00D	FP	PERF_CTL[3]
1124 * 0x023	DE	PERF_CTL[2:0]
1125 * 0x02D	LS	PERF_CTL[3]
1126 * 0x02E	LS	PERF_CTL[3,0]
1127 * 0x031	LS	PERF_CTL[2:0] (**)
1128 * 0x043	CU	PERF_CTL[2:0]
1129 * 0x045	CU	PERF_CTL[2:0]
1130 * 0x046	CU	PERF_CTL[2:0]
1131 * 0x054	CU	PERF_CTL[2:0]
1132 * 0x055	CU	PERF_CTL[2:0]
1133 * 0x08F	IC	PERF_CTL[0]
1134 * 0x187	DE	PERF_CTL[0]
1135 * 0x188	DE	PERF_CTL[0]
1136 * 0x0DB	EX	PERF_CTL[5:0]
1137 * 0x0DC	LS	PERF_CTL[5:0]
1138 * 0x0DD	LS	PERF_CTL[5:0]
1139 * 0x0DE	LS	PERF_CTL[5:0]
1140 * 0x0DF	LS	PERF_CTL[5:0]
1141 * 0x1C0	EX	PERF_CTL[5:3]
1142 * 0x1D6	EX	PERF_CTL[5:0]
1143 * 0x1D8	EX	PERF_CTL[5:0]
1144 *
1145 * (*)  depending on the umask all FPU counters may be used
1146 * (**) only one unitmask enabled at a time
1147 */
1148
1149static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
1150static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
1151static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
1152static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
1153static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
1154static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
1155
1156static struct event_constraint *
1157amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
1158			       struct perf_event *event)
1159{
1160	struct hw_perf_event *hwc = &event->hw;
1161	unsigned int event_code = amd_get_event_code(hwc);
1162
1163	switch (event_code & AMD_EVENT_TYPE_MASK) {
1164	case AMD_EVENT_FP:
1165		switch (event_code) {
1166		case 0x000:
1167			if (!(hwc->config & 0x0000F000ULL))
1168				break;
1169			if (!(hwc->config & 0x00000F00ULL))
1170				break;
1171			return &amd_f15_PMC3;
1172		case 0x004:
1173			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1174				break;
1175			return &amd_f15_PMC3;
1176		case 0x003:
1177		case 0x00B:
1178		case 0x00D:
1179			return &amd_f15_PMC3;
1180		}
1181		return &amd_f15_PMC53;
1182	case AMD_EVENT_LS:
1183	case AMD_EVENT_DC:
1184	case AMD_EVENT_EX_LS:
1185		switch (event_code) {
1186		case 0x023:
1187		case 0x043:
1188		case 0x045:
1189		case 0x046:
1190		case 0x054:
1191		case 0x055:
1192			return &amd_f15_PMC20;
1193		case 0x02D:
1194			return &amd_f15_PMC3;
1195		case 0x02E:
1196			return &amd_f15_PMC30;
1197		case 0x031:
1198			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
1199				return &amd_f15_PMC20;
1200			return &emptyconstraint;
1201		case 0x1C0:
1202			return &amd_f15_PMC53;
1203		default:
1204			return &amd_f15_PMC50;
1205		}
1206	case AMD_EVENT_CU:
1207	case AMD_EVENT_IC_DE:
1208	case AMD_EVENT_DE:
1209		switch (event_code) {
1210		case 0x08F:
1211		case 0x187:
1212		case 0x188:
1213			return &amd_f15_PMC0;
1214		case 0x0DB ... 0x0DF:
1215		case 0x1D6:
1216		case 0x1D8:
1217			return &amd_f15_PMC50;
1218		default:
1219			return &amd_f15_PMC20;
1220		}
1221	case AMD_EVENT_NB:
1222		/* moved to uncore.c */
1223		return &emptyconstraint;
1224	default:
1225		return &emptyconstraint;
1226	}
1227}
1228
1229static struct event_constraint pair_constraint;
1230
1231static struct event_constraint *
1232amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
1233			       struct perf_event *event)
1234{
1235	struct hw_perf_event *hwc = &event->hw;
1236
1237	if (amd_is_pair_event_code(hwc))
1238		return &pair_constraint;
1239
1240	return &unconstrained;
1241}
1242
1243static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
1244					   struct perf_event *event)
1245{
1246	struct hw_perf_event *hwc = &event->hw;
1247
1248	if (is_counter_pair(hwc))
1249		--cpuc->n_pair;
1250}
1251
1252/*
1253 * Because of the way BRS operates with an inactive and active phases, and
1254 * the link to one counter, it is not possible to have two events using BRS
1255 * scheduled at the same time. There would be an issue with enforcing the
1256 * period of each one and given that the BRS saturates, it would not be possible
1257 * to guarantee correlated content for all events. Therefore, in situations
1258 * where multiple events want to use BRS, the kernel enforces mutual exclusion.
1259 * Exclusion is enforced by choosing only one counter for events using BRS.
1260 * The event scheduling logic will then automatically multiplex the
1261 * events and ensure that at most one event is actively using BRS.
1262 *
1263 * The BRS counter could be any counter, but there is no constraint on Fam19h,
1264 * therefore all counters are equal and thus we pick the first one: PMC0
1265 */
1266static struct event_constraint amd_fam19h_brs_cntr0_constraint =
1267	EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK);
1268
1269static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =
1270	__EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR);
1271
1272static struct event_constraint *
1273amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx,
1274			  struct perf_event *event)
1275{
1276	struct hw_perf_event *hwc = &event->hw;
1277	bool has_brs = has_amd_brs(hwc);
1278
1279	/*
1280	 * In case BRS is used with an event requiring a counter pair,
1281	 * the kernel allows it but only on counter 0 & 1 to enforce
1282	 * multiplexing requiring to protect BRS in case of multiple
1283	 * BRS users
1284	 */
1285	if (amd_is_pair_event_code(hwc)) {
1286		return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint
1287			       : &pair_constraint;
1288	}
1289
1290	if (has_brs)
1291		return &amd_fam19h_brs_cntr0_constraint;
1292
1293	return &unconstrained;
1294}
1295
1296
1297static ssize_t amd_event_sysfs_show(char *page, u64 config)
1298{
1299	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
1300		    (config & AMD64_EVENTSEL_EVENT) >> 24;
1301
1302	return x86_event_sysfs_show(page, config, event);
1303}
1304
1305static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
1306{
1307	/*
1308	 * Decrease period by the depth of the BRS feature to get the last N
1309	 * taken branches and approximate the desired period
1310	 */
1311	if (has_branch_stack(event) && *left > x86_pmu.lbr_nr)
1312		*left -= x86_pmu.lbr_nr;
1313}
1314
1315static __initconst const struct x86_pmu amd_pmu = {
1316	.name			= "AMD",
1317	.handle_irq		= amd_pmu_handle_irq,
1318	.disable_all		= amd_pmu_disable_all,
1319	.enable_all		= amd_pmu_enable_all,
1320	.enable			= amd_pmu_enable_event,
1321	.disable		= amd_pmu_disable_event,
1322	.hw_config		= amd_pmu_hw_config,
1323	.schedule_events	= x86_schedule_events,
1324	.eventsel		= MSR_K7_EVNTSEL0,
1325	.perfctr		= MSR_K7_PERFCTR0,
1326	.addr_offset            = amd_pmu_addr_offset,
1327	.event_map		= amd_pmu_event_map,
1328	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
1329	.cntr_mask64		= GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
1330	.add			= amd_pmu_add_event,
1331	.del			= amd_pmu_del_event,
1332	.cntval_bits		= 48,
1333	.cntval_mask		= (1ULL << 48) - 1,
1334	.apic			= 1,
1335	/* use highest bit to detect overflow */
1336	.max_period		= (1ULL << 47) - 1,
1337	.get_event_constraints	= amd_get_event_constraints,
1338	.put_event_constraints	= amd_put_event_constraints,
1339
1340	.format_attrs		= amd_format_attr,
1341	.events_sysfs_show	= amd_event_sysfs_show,
1342
1343	.cpu_prepare		= amd_pmu_cpu_prepare,
1344	.cpu_starting		= amd_pmu_cpu_starting,
1345	.cpu_dead		= amd_pmu_cpu_dead,
1346
1347	.amd_nb_constraints	= 1,
1348};
1349
1350static ssize_t branches_show(struct device *cdev,
1351			      struct device_attribute *attr,
1352			      char *buf)
1353{
1354	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
1355}
1356
1357static DEVICE_ATTR_RO(branches);
1358
1359static struct attribute *amd_pmu_branches_attrs[] = {
1360	&dev_attr_branches.attr,
1361	NULL,
1362};
1363
1364static umode_t
1365amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
1366{
1367	return x86_pmu.lbr_nr ? attr->mode : 0;
1368}
1369
1370static struct attribute_group group_caps_amd_branches = {
1371	.name  = "caps",
1372	.attrs = amd_pmu_branches_attrs,
1373	.is_visible = amd_branches_is_visible,
1374};
1375
1376#ifdef CONFIG_PERF_EVENTS_AMD_BRS
1377
1378EVENT_ATTR_STR(branch-brs, amd_branch_brs,
1379	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
1380
1381static struct attribute *amd_brs_events_attrs[] = {
1382	EVENT_PTR(amd_branch_brs),
1383	NULL,
1384};
1385
1386static umode_t
1387amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
1388{
1389	return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ?
1390	       attr->mode : 0;
1391}
1392
1393static struct attribute_group group_events_amd_brs = {
1394	.name       = "events",
1395	.attrs      = amd_brs_events_attrs,
1396	.is_visible = amd_brs_is_visible,
1397};
1398
1399#endif	/* CONFIG_PERF_EVENTS_AMD_BRS */
1400
1401static const struct attribute_group *amd_attr_update[] = {
1402	&group_caps_amd_branches,
1403#ifdef CONFIG_PERF_EVENTS_AMD_BRS
1404	&group_events_amd_brs,
1405#endif
1406	NULL,
1407};
1408
1409static int __init amd_core_pmu_init(void)
1410{
1411	union cpuid_0x80000022_ebx ebx;
1412	u64 even_ctr_mask = 0ULL;
1413	int i;
1414
1415	if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
1416		return 0;
1417
1418	/* Avoid calculating the value each time in the NMI handler */
1419	perf_nmi_window = msecs_to_jiffies(100);
1420
1421	/*
1422	 * If core performance counter extensions exists, we must use
1423	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
1424	 * amd_pmu_addr_offset().
1425	 */
1426	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
1427	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
1428	x86_pmu.cntr_mask64	= GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
1429
1430	/* Check for Performance Monitoring v2 support */
1431	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
1432		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
1433
1434		/* Update PMU version for later usage */
1435		x86_pmu.version = 2;
1436
1437		/* Find the number of available Core PMCs */
1438		x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
1439
1440		amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
1441
1442		/* Update PMC handling functions */
1443		x86_pmu.enable_all = amd_pmu_v2_enable_all;
1444		x86_pmu.disable_all = amd_pmu_v2_disable_all;
1445		x86_pmu.enable = amd_pmu_v2_enable_event;
1446		x86_pmu.handle_irq = amd_pmu_v2_handle_irq;
1447		static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status);
1448	}
1449
1450	/*
1451	 * AMD Core perfctr has separate MSRs for the NB events, see
1452	 * the amd/uncore.c driver.
1453	 */
1454	x86_pmu.amd_nb_constraints = 0;
1455
1456	if (boot_cpu_data.x86 == 0x15) {
1457		pr_cont("Fam15h ");
1458		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
1459	}
1460	if (boot_cpu_data.x86 >= 0x17) {
1461		pr_cont("Fam17h+ ");
1462		/*
1463		 * Family 17h and compatibles have constraints for Large
1464		 * Increment per Cycle events: they may only be assigned an
1465		 * even numbered counter that has a consecutive adjacent odd
1466		 * numbered counter following it.
1467		 */
1468		for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
1469			even_ctr_mask |= BIT_ULL(i);
1470
1471		pair_constraint = (struct event_constraint)
1472				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1473				    x86_pmu_max_num_counters(NULL) / 2, 0,
1474				    PERF_X86_EVENT_PAIR);
1475
1476		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
1477		x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
1478		x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
1479		x86_pmu.flags |= PMU_FL_PAIR;
1480	}
1481
1482	/* LBR and BRS are mutually exclusive features */
1483	if (!amd_pmu_lbr_init()) {
1484		/* LBR requires flushing on context switch */
1485		x86_pmu.sched_task = amd_pmu_lbr_sched_task;
1486		static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
1487		static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
1488		static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
1489		static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
1490
1491		/* Only support branch_stack snapshot on perfmon v2 */
1492		if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
1493			static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
1494	} else if (!amd_brs_init()) {
1495		/*
1496		 * BRS requires special event constraints and flushing on ctxsw.
1497		 */
1498		x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
1499		x86_pmu.sched_task = amd_pmu_brs_sched_task;
1500		x86_pmu.limit_period = amd_pmu_limit_period;
1501
1502		static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config);
1503		static_call_update(amd_pmu_branch_reset, amd_brs_reset);
1504		static_call_update(amd_pmu_branch_add, amd_pmu_brs_add);
1505		static_call_update(amd_pmu_branch_del, amd_pmu_brs_del);
1506
1507		/*
1508		 * put_event_constraints callback same as Fam17h, set above
1509		 */
1510
1511		/* branch sampling must be stopped when entering low power */
1512		amd_brs_lopwr_init();
1513	}
1514
1515	x86_pmu.attr_update = amd_attr_update;
1516
1517	pr_cont("core perfctr, ");
1518	return 0;
1519}
1520
1521__init int amd_pmu_init(void)
1522{
1523	int ret;
1524
1525	/* Performance-monitoring supported from K7 and later: */
1526	if (boot_cpu_data.x86 < 6)
1527		return -ENODEV;
1528
1529	x86_pmu = amd_pmu;
1530
1531	ret = amd_core_pmu_init();
1532	if (ret)
1533		return ret;
1534
1535	if (num_possible_cpus() == 1) {
1536		/*
1537		 * No point in allocating data structures to serialize
1538		 * against other CPUs, when there is only the one CPU.
1539		 */
1540		x86_pmu.amd_nb_constraints = 0;
1541	}
1542
1543	if (boot_cpu_data.x86 >= 0x17)
1544		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
1545	else
1546		memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
1547
1548	return 0;
1549}
1550
1551static inline void amd_pmu_reload_virt(void)
1552{
1553	if (x86_pmu.version >= 2) {
1554		/*
1555		 * Clear global enable bits, reprogram the PERF_CTL
1556		 * registers with updated perf_ctr_virt_mask and then
1557		 * set global enable bits once again
1558		 */
1559		amd_pmu_v2_disable_all();
1560		amd_pmu_enable_all(0);
1561		amd_pmu_v2_enable_all(0);
1562		return;
1563	}
1564
1565	amd_pmu_disable_all();
1566	amd_pmu_enable_all(0);
1567}
1568
1569void amd_pmu_enable_virt(void)
1570{
1571	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1572
1573	cpuc->perf_ctr_virt_mask = 0;
1574
1575	/* Reload all events */
1576	amd_pmu_reload_virt();
1577}
1578EXPORT_SYMBOL_FOR_KVM(amd_pmu_enable_virt);
1579
1580void amd_pmu_disable_virt(void)
1581{
1582	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1583
1584	/*
1585	 * We only mask out the Host-only bit so that host-only counting works
1586	 * when SVM is disabled. If someone sets up a guest-only counter when
1587	 * SVM is disabled the Guest-only bits still gets set and the counter
1588	 * will not count anything.
1589	 */
1590	cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
1591
1592	/* Reload all events */
1593	amd_pmu_reload_virt();
1594}
1595EXPORT_SYMBOL_FOR_KVM(amd_pmu_disable_virt);
Configure Feed

Configure Feed