Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/resctrl: Fix SNC detection

Now that the x86 topology code has a sensible nodes-per-package
measure, that does not depend on the online status of CPUs, use this
to divinate the SNC mode.

Note that when Cluster on Die (CoD) is configured on older systems this
will also show multiple NUMA nodes per package. Intel Resource Director
Technology is incomaptible with CoD. Print a warning and do not use the
fixup MSR_RMID_SNC_CONFIG.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Link: https://patch.msgid.link/aaCxbbgjL6OZ6VMd@agluck-desk3
Link: https://patch.msgid.link/20260303110100.367976706@infradead.org

authored by

Tony Luck and committed by
Peter Zijlstra
59674fc9 528d89a4

+5 -31
+5 -31
arch/x86/kernel/cpu/resctrl/monitor.c
··· 364 364 msr_clear_bit(MSR_RMID_SNC_CONFIG, 0); 365 365 } 366 366 367 - /* CPU models that support MSR_RMID_SNC_CONFIG */ 367 + /* CPU models that support SNC and MSR_RMID_SNC_CONFIG */ 368 368 static const struct x86_cpu_id snc_cpu_ids[] __initconst = { 369 369 X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 370 370 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0), ··· 375 375 {} 376 376 }; 377 377 378 - /* 379 - * There isn't a simple hardware bit that indicates whether a CPU is running 380 - * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the 381 - * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in 382 - * the same NUMA node as CPU0. 383 - * It is not possible to accurately determine SNC state if the system is 384 - * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes 385 - * to L3 caches. It will be OK if system is booted with hyperthreading 386 - * disabled (since this doesn't affect the ratio). 387 - */ 388 378 static __init int snc_get_config(void) 389 379 { 390 - struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE); 391 - const cpumask_t *node0_cpumask; 392 - int cpus_per_node, cpus_per_l3; 393 - int ret; 380 + int ret = topology_num_nodes_per_package(); 394 381 395 - if (!x86_match_cpu(snc_cpu_ids) || !ci) 382 + if (ret > 1 && !x86_match_cpu(snc_cpu_ids)) { 383 + pr_warn("CoD enabled system? Resctrl not supported\n"); 396 384 return 1; 397 - 398 - cpus_read_lock(); 399 - if (num_online_cpus() != num_present_cpus()) 400 - pr_warn("Some CPUs offline, SNC detection may be incorrect\n"); 401 - cpus_read_unlock(); 402 - 403 - node0_cpumask = cpumask_of_node(cpu_to_node(0)); 404 - 405 - cpus_per_node = cpumask_weight(node0_cpumask); 406 - cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map); 407 - 408 - if (!cpus_per_node || !cpus_per_l3) 409 - return 1; 410 - 411 - ret = cpus_per_l3 / cpus_per_node; 385 + } 412 386 413 387 /* sanity check: Only valid results are 1, 2, 3, 4, 6 */ 414 388 switch (ret) {