perf/x86/intel: Fix unchecked MSR access error for Alder Lake N

For some Alder Lake N machine, the below unchecked MSR access error may be
triggered.

[ 0.088017] rcu: Hierarchical SRCU implementation.
[ 0.088017] unchecked MSR access error: WRMSR to 0x38f (tried to write
0x0001000f0000003f) at rIP: 0xffffffffb5684de8 (native_write_msr+0x8/0x30)
[ 0.088017] Call Trace:
[ 0.088017] <TASK>
[ 0.088017] __intel_pmu_enable_all.constprop.46+0x4a/0xa0

The Alder Lake N only has e-cores. The X86_FEATURE_HYBRID_CPU flag is
not set. The perf cannot retrieve the correct CPU type via
get_this_hybrid_cpu_type(). The model specific get_hybrid_cpu_type() is
hardcode to p-core. The wrong CPU type is given to the PMU of the
Alder Lake N.

Since Alder Lake N isn't in fact a hybrid CPU, remove ALDERLAKE_N from
the rest of {ALDER,RAPTOP}LAKE and create a non-hybrid PMU setup.

The differences between Gracemont and the previous Tremont are,
- Number of GP counters
- Load and store latency Events
- PEBS event_constraints
- Instruction Latency support
- Data source encoding
- Memory access latency encoding

Fixes: c2a960f7c574 ("perf/x86: Add new Alder Lake and Raptor Lake support")
Reported-by: Jianfeng Gao <jianfeng.gao@intel.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220831142702.153110-1-kan.liang@linux.intel.com

authored by Kan Liang and committed by Peter Zijlstra 24919fde b90cb105

Changed files
+48 -3
arch
x86
events
+39 -1
arch/x86/events/intel/core.c
··· 2102 2102 EVENT_EXTRA_END 2103 2103 }; 2104 2104 2105 + EVENT_ATTR_STR(mem-loads, mem_ld_grt, "event=0xd0,umask=0x5,ldlat=3"); 2106 + EVENT_ATTR_STR(mem-stores, mem_st_grt, "event=0xd0,umask=0x6"); 2107 + 2108 + static struct attribute *grt_mem_attrs[] = { 2109 + EVENT_PTR(mem_ld_grt), 2110 + EVENT_PTR(mem_st_grt), 2111 + NULL 2112 + }; 2113 + 2105 2114 static struct extra_reg intel_grt_extra_regs[] __read_mostly = { 2106 2115 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 2107 2116 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), ··· 5983 5974 name = "Tremont"; 5984 5975 break; 5985 5976 5977 + case INTEL_FAM6_ALDERLAKE_N: 5978 + x86_pmu.mid_ack = true; 5979 + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, 5980 + sizeof(hw_cache_event_ids)); 5981 + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, 5982 + sizeof(hw_cache_extra_regs)); 5983 + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; 5984 + 5985 + x86_pmu.event_constraints = intel_slm_event_constraints; 5986 + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; 5987 + x86_pmu.extra_regs = intel_grt_extra_regs; 5988 + 5989 + x86_pmu.pebs_aliases = NULL; 5990 + x86_pmu.pebs_prec_dist = true; 5991 + x86_pmu.pebs_block = true; 5992 + x86_pmu.lbr_pt_coexist = true; 5993 + x86_pmu.flags |= PMU_FL_HAS_RSP_1; 5994 + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; 5995 + 5996 + intel_pmu_pebs_data_source_grt(); 5997 + x86_pmu.pebs_latency_data = adl_latency_data_small; 5998 + x86_pmu.get_event_constraints = tnt_get_event_constraints; 5999 + x86_pmu.limit_period = spr_limit_period; 6000 + td_attr = tnt_events_attrs; 6001 + mem_attr = grt_mem_attrs; 6002 + extra_attr = nhm_format_attr; 6003 + pr_cont("Gracemont events, "); 6004 + name = "gracemont"; 6005 + break; 6006 + 5986 6007 case INTEL_FAM6_WESTMERE: 5987 6008 case INTEL_FAM6_WESTMERE_EP: 5988 6009 case INTEL_FAM6_WESTMERE_EX: ··· 6355 6316 6356 6317 case INTEL_FAM6_ALDERLAKE: 6357 6318 case INTEL_FAM6_ALDERLAKE_L: 6358 - case INTEL_FAM6_ALDERLAKE_N: 6359 6319 case INTEL_FAM6_RAPTORLAKE: 6360 6320 case INTEL_FAM6_RAPTORLAKE_P: 6361 6321 /*
+7 -2
arch/x86/events/intel/ds.c
··· 110 110 __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); 111 111 } 112 112 113 - static void __init intel_pmu_pebs_data_source_grt(u64 *data_source) 113 + static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source) 114 114 { 115 115 data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 116 116 data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 117 117 data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); 118 + } 119 + 120 + void __init intel_pmu_pebs_data_source_grt(void) 121 + { 122 + __intel_pmu_pebs_data_source_grt(pebs_data_source); 118 123 } 119 124 120 125 void __init intel_pmu_pebs_data_source_adl(void) ··· 132 127 133 128 data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; 134 129 memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); 135 - intel_pmu_pebs_data_source_grt(data_source); 130 + __intel_pmu_pebs_data_source_grt(data_source); 136 131 } 137 132 138 133 static u64 precise_store_data(u64 status)
+2
arch/x86/events/perf_event.h
··· 1516 1516 1517 1517 void intel_pmu_pebs_data_source_adl(void); 1518 1518 1519 + void intel_pmu_pebs_data_source_grt(void); 1520 + 1519 1521 int intel_pmu_setup_lbr_filter(struct perf_event *event); 1520 1522 1521 1523 void intel_pt_interrupt(void);