Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'x86_cpu_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpuid updates from Borislav Petkov:

- Add a feature flag which denotes AMD CPUs supporting workload
classification with the purpose of using such hints when making
scheduling decisions

- Determine the boost enumerator for each AMD core based on its type:
efficiency or performance, in the cppc driver

- Add the type of a CPU to the topology CPU descriptor with the goal of
supporting and making decisions based on the type of the respective
core

- Add a feature flag to denote AMD cores which have heterogeneous
topology and enable SD_ASYM_PACKING for those

- Check microcode revisions before disabling PCID on Intel

- Cleanups and fixlets

* tag 'x86_cpu_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/cpu: Remove redundant CONFIG_NUMA guard around numa_add_cpu()
x86/cpu: Fix FAM5_QUARK_X1000 to use X86_MATCH_VFM()
x86/cpu: Fix formatting of cpuid_bits[] in scattered.c
x86/cpufeatures: Add X86_FEATURE_AMD_WORKLOAD_CLASS feature bit
x86/amd: Use heterogeneous core topology for identifying boost numerator
x86/cpu: Add CPU type to struct cpuinfo_topology
x86/cpu: Enable SD_ASYM_PACKING for PKG domain on AMD
x86/cpufeatures: Add X86_FEATURE_AMD_HETEROGENEOUS_CORES
x86/cpufeatures: Rename X86_FEATURE_FAST_CPPC to have AMD prefix
x86/mm: Don't disable PCID when INVLPG has been fixed by microcode

+149 -49
+3 -1
arch/x86/include/asm/cpufeatures.h
··· 473 473 #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ 474 474 #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ 475 475 #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ 476 - #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ 476 + #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ 477 + #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ 478 + #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ 477 479 478 480 /* 479 481 * BUG word(s)
+6 -1
arch/x86/include/asm/intel-family.h
··· 177 177 #define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ 178 178 179 179 /* Family 5 */ 180 - #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ 181 180 #define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ 182 181 183 182 /* Family 19 */ 184 183 #define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */ 184 + 185 + /* CPU core types */ 186 + enum intel_cpu_type { 187 + INTEL_CPU_TYPE_ATOM = 0x20, 188 + INTEL_CPU_TYPE_CORE = 0x40, 189 + }; 185 190 186 191 #endif /* _ASM_X86_INTEL_FAMILY_H */
+18
arch/x86/include/asm/processor.h
··· 105 105 // Cache level topology IDs 106 106 u32 llc_id; 107 107 u32 l2c_id; 108 + 109 + // Hardware defined CPU-type 110 + union { 111 + u32 cpu_type; 112 + struct { 113 + // CPUID.1A.EAX[23-0] 114 + u32 intel_native_model_id :24; 115 + // CPUID.1A.EAX[31-24] 116 + u32 intel_type :8; 117 + }; 118 + struct { 119 + // CPUID 0x80000026.EBX 120 + u32 amd_num_processors :16, 121 + amd_power_eff_ranking :8, 122 + amd_native_model_id :4, 123 + amd_type :4; 124 + }; 125 + }; 108 126 }; 109 127 110 128 struct cpuinfo_x86 {
+9
arch/x86/include/asm/topology.h
··· 114 114 TOPO_MAX_DOMAIN, 115 115 }; 116 116 117 + enum x86_topology_cpu_type { 118 + TOPO_CPU_TYPE_PERFORMANCE, 119 + TOPO_CPU_TYPE_EFFICIENCY, 120 + TOPO_CPU_TYPE_UNKNOWN, 121 + }; 122 + 117 123 struct x86_topology_system { 118 124 unsigned int dom_shifts[TOPO_MAX_DOMAIN]; 119 125 unsigned int dom_size[TOPO_MAX_DOMAIN]; ··· 154 148 extern unsigned int __max_threads_per_core; 155 149 extern unsigned int __num_threads_per_package; 156 150 extern unsigned int __num_cores_per_package; 151 + 152 + const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c); 153 + enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c); 157 154 158 155 static inline unsigned int topology_max_packages(void) 159 156 {
+23
arch/x86/kernel/acpi/cppc.c
··· 239 239 */ 240 240 int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) 241 241 { 242 + enum x86_topology_cpu_type core_type = get_topology_cpu_type(&cpu_data(cpu)); 242 243 bool prefcore; 243 244 int ret; 245 + u32 tmp; 244 246 245 247 ret = amd_detect_prefcore(&prefcore); 246 248 if (ret) ··· 268 266 break; 269 267 } 270 268 } 269 + 270 + /* detect if running on heterogeneous design */ 271 + if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) { 272 + switch (core_type) { 273 + case TOPO_CPU_TYPE_UNKNOWN: 274 + pr_warn("Undefined core type found for cpu %d\n", cpu); 275 + break; 276 + case TOPO_CPU_TYPE_PERFORMANCE: 277 + /* use the max scale for performance cores */ 278 + *numerator = CPPC_HIGHEST_PERF_PERFORMANCE; 279 + return 0; 280 + case TOPO_CPU_TYPE_EFFICIENCY: 281 + /* use the highest perf value for efficiency cores */ 282 + ret = amd_get_highest_perf(cpu, &tmp); 283 + if (ret) 284 + return ret; 285 + *numerator = tmp; 286 + return 0; 287 + } 288 + } 289 + 271 290 *numerator = CPPC_HIGHEST_PERF_PREFCORE; 272 291 273 292 return 0;
-2
arch/x86/kernel/cpu/common.c
··· 1906 1906 /* Init Machine Check Exception if available. */ 1907 1907 mcheck_cpu_init(c); 1908 1908 1909 - #ifdef CONFIG_NUMA 1910 1909 numa_add_cpu(smp_processor_id()); 1911 - #endif 1912 1910 } 1913 1911 1914 1912 /*
+1
arch/x86/kernel/cpu/debugfs.c
··· 22 22 seq_printf(m, "die_id: %u\n", c->topo.die_id); 23 23 seq_printf(m, "cu_id: %u\n", c->topo.cu_id); 24 24 seq_printf(m, "core_id: %u\n", c->topo.core_id); 25 + seq_printf(m, "cpu_type: %s\n", get_topology_cpu_type_name(c)); 25 26 seq_printf(m, "logical_pkg_id: %u\n", c->topo.logical_pkg_id); 26 27 seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); 27 28 seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
+29 -27
arch/x86/kernel/cpu/scattered.c
··· 24 24 * levels are different and there is a separate entry for each. 25 25 */ 26 26 static const struct cpuid_bit cpuid_bits[] = { 27 - { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 28 - { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 29 - { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, 30 - { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, 31 - { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, 32 - { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, 33 - { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, 34 - { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, 35 - { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, 36 - { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, 37 - { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, 38 - { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, 39 - { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, 40 - { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, 41 - { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, 42 - { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, 43 - { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, 44 - { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, 45 - { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, 46 - { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, 47 - { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, 48 - { X86_FEATURE_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, 49 - { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, 50 - { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, 51 - { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, 52 - { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, 53 - { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, 27 + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 28 + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 29 + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, 30 + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, 31 + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, 32 + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, 33 + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, 34 + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, 35 + { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, 36 + { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, 37 + { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, 38 + { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, 39 + { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, 40 + { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, 41 + { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 }, 42 + { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 }, 43 + { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, 44 + { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, 45 + { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, 46 + { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, 47 + { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, 48 + { X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 }, 49 + { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, 50 + { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, 51 + { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, 52 + { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, 53 + { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, 54 + { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, 54 55 { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, 56 + { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 }, 55 57 { 0, 0, 0, 0, 0 } 56 58 }; 57 59
+3
arch/x86/kernel/cpu/topology_amd.c
··· 182 182 if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) 183 183 has_topoext = cpu_parse_topology_ext(tscan); 184 184 185 + if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) 186 + tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); 187 + 185 188 if (!has_topoext && !parse_8000_0008(tscan)) 186 189 return; 187 190
+34
arch/x86/kernel/cpu/topology_common.c
··· 3 3 4 4 #include <xen/xen.h> 5 5 6 + #include <asm/intel-family.h> 6 7 #include <asm/apic.h> 7 8 #include <asm/processor.h> 8 9 #include <asm/smp.h> ··· 25 24 for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { 26 25 tscan->dom_shifts[dom] = tscan->dom_shifts[dom - 1]; 27 26 tscan->dom_ncpus[dom] = tscan->dom_ncpus[dom - 1]; 27 + } 28 + } 29 + 30 + enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c) 31 + { 32 + if (c->x86_vendor == X86_VENDOR_INTEL) { 33 + switch (c->topo.intel_type) { 34 + case INTEL_CPU_TYPE_ATOM: return TOPO_CPU_TYPE_EFFICIENCY; 35 + case INTEL_CPU_TYPE_CORE: return TOPO_CPU_TYPE_PERFORMANCE; 36 + } 37 + } 38 + if (c->x86_vendor == X86_VENDOR_AMD) { 39 + switch (c->topo.amd_type) { 40 + case 0: return TOPO_CPU_TYPE_PERFORMANCE; 41 + case 1: return TOPO_CPU_TYPE_EFFICIENCY; 42 + } 43 + } 44 + 45 + return TOPO_CPU_TYPE_UNKNOWN; 46 + } 47 + 48 + const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c) 49 + { 50 + switch (get_topology_cpu_type(c)) { 51 + case TOPO_CPU_TYPE_PERFORMANCE: 52 + return "performance"; 53 + case TOPO_CPU_TYPE_EFFICIENCY: 54 + return "efficiency"; 55 + default: 56 + return "unknown"; 28 57 } 29 58 } 30 59 ··· 118 87 .cu_id = 0xff, 119 88 .llc_id = BAD_APICID, 120 89 .l2c_id = BAD_APICID, 90 + .cpu_type = TOPO_CPU_TYPE_UNKNOWN, 121 91 }; 122 92 struct cpuinfo_x86 *c = tscan->c; 123 93 struct { ··· 164 132 case X86_VENDOR_INTEL: 165 133 if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) 166 134 parse_legacy(tscan); 135 + if (c->cpuid_level >= 0x1a) 136 + c->topo.cpu_type = cpuid_eax(0x1a); 167 137 break; 168 138 case X86_VENDOR_HYGON: 169 139 if (IS_ENABLED(CONFIG_CPU_SUP_HYGON))
+3 -2
arch/x86/kernel/smpboot.c
··· 497 497 498 498 static int x86_die_flags(void) 499 499 { 500 - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) 501 - return x86_sched_itmt_flags(); 500 + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU) || 501 + cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) 502 + return x86_sched_itmt_flags(); 502 503 503 504 return 0; 504 505 }
+14 -9
arch/x86/mm/init.c
··· 263 263 } 264 264 265 265 /* 266 - * INVLPG may not properly flush Global entries 267 - * on these CPUs when PCIDs are enabled. 266 + * INVLPG may not properly flush Global entries on 267 + * these CPUs. New microcode fixes the issue. 268 268 */ 269 269 static const struct x86_cpu_id invlpg_miss_ids[] = { 270 - X86_MATCH_VFM(INTEL_ALDERLAKE, 0), 271 - X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0), 272 - X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0), 273 - X86_MATCH_VFM(INTEL_RAPTORLAKE, 0), 274 - X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0), 275 - X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0), 270 + X86_MATCH_VFM(INTEL_ALDERLAKE, 0x2e), 271 + X86_MATCH_VFM(INTEL_ALDERLAKE_L, 0x42c), 272 + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, 0x11), 273 + X86_MATCH_VFM(INTEL_RAPTORLAKE, 0x118), 274 + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, 0x4117), 275 + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, 0x2e), 276 276 {} 277 277 }; 278 278 279 279 static void setup_pcid(void) 280 280 { 281 + const struct x86_cpu_id *invlpg_miss_match; 282 + 281 283 if (!IS_ENABLED(CONFIG_X86_64)) 282 284 return; 283 285 284 286 if (!boot_cpu_has(X86_FEATURE_PCID)) 285 287 return; 286 288 287 - if (x86_match_cpu(invlpg_miss_ids)) { 289 + invlpg_miss_match = x86_match_cpu(invlpg_miss_ids); 290 + 291 + if (invlpg_miss_match && 292 + boot_cpu_data.microcode < invlpg_miss_match->driver_data) { 288 293 pr_info("Incomplete global flushes, disabling PCID"); 289 294 setup_clear_cpu_cap(X86_FEATURE_PCID); 290 295 return;
+1 -2
arch/x86/platform/efi/quirks.c
··· 656 656 } 657 657 658 658 static const struct x86_cpu_id efi_capsule_quirk_ids[] = { 659 - X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, 660 - &qrk_capsule_setup_info), 659 + X86_MATCH_VFM(INTEL_QUARK_X1000, &qrk_capsule_setup_info), 661 660 { } 662 661 }; 663 662
+1 -1
arch/x86/platform/intel-quark/imr.c
··· 569 569 } 570 570 571 571 static const struct x86_cpu_id imr_ids[] __initconst = { 572 - X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 572 + X86_MATCH_VFM(INTEL_QUARK_X1000, NULL), 573 573 {} 574 574 }; 575 575
+1 -1
arch/x86/platform/intel-quark/imr_selftest.c
··· 105 105 } 106 106 107 107 static const struct x86_cpu_id imr_ids[] __initconst = { 108 - X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 108 + X86_MATCH_VFM(INTEL_QUARK_X1000, NULL), 109 109 {} 110 110 }; 111 111
+1 -1
drivers/cpufreq/amd-pstate.c
··· 850 850 851 851 transition_delay_ns = cppc_get_transition_latency(cpu); 852 852 if (transition_delay_ns == CPUFREQ_ETERNAL) { 853 - if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC)) 853 + if (cpu_feature_enabled(X86_FEATURE_AMD_FAST_CPPC)) 854 854 return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY; 855 855 else 856 856 return AMD_PSTATE_TRANSITION_DELAY;
+1 -1
drivers/thermal/intel/intel_quark_dts_thermal.c
··· 401 401 } 402 402 403 403 static const struct x86_cpu_id qrk_thermal_ids[] __initconst = { 404 - X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL), 404 + X86_MATCH_VFM(INTEL_QUARK_X1000, NULL), 405 405 {} 406 406 }; 407 407 MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids);
+1 -1
tools/arch/x86/include/asm/cpufeatures.h
··· 472 472 #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ 473 473 #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ 474 474 #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ 475 - #define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ 475 + #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ 476 476 477 477 /* 478 478 * BUG word(s)