Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cacheinfo: calculate size of per-CPU data cache slice

This can be used to estimate the size of the data cache slice that can be
used by one CPU under ideal circumstances. Both DATA caches and UNIFIED
caches are used in calculation. So, the users need to consider the impact
of the code cache usage.

Because the cache inclusive/non-inclusive information isn't available now,
we just use the size of the per-CPU slice of LLC to make the result more
predictable across architectures. This may be improved when more cache
information is available in the future.

A brute-force algorithm to iterate all online CPUs is used to avoid to
allocate an extra cpumask, especially in offline callback.

Link: https://lkml.kernel.org/r/20231016053002.756205-3-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Huang Ying and committed by
Andrew Morton
94a3bfe4 ca71fe1a

+49 -1
+48 -1
drivers/base/cacheinfo.c
··· 898 898 return rc; 899 899 } 900 900 901 + /* 902 + * Calculate the size of the per-CPU data cache slice. This can be 903 + * used to estimate the size of the data cache slice that can be used 904 + * by one CPU under ideal circumstances. UNIFIED caches are counted 905 + * in addition to DATA caches. So, please consider code cache usage 906 + * when use the result. 907 + * 908 + * Because the cache inclusive/non-inclusive information isn't 909 + * available, we just use the size of the per-CPU slice of LLC to make 910 + * the result more predictable across architectures. 911 + */ 912 + static void update_per_cpu_data_slice_size_cpu(unsigned int cpu) 913 + { 914 + struct cpu_cacheinfo *ci; 915 + struct cacheinfo *llc; 916 + unsigned int nr_shared; 917 + 918 + if (!last_level_cache_is_valid(cpu)) 919 + return; 920 + 921 + ci = ci_cacheinfo(cpu); 922 + llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1); 923 + 924 + if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED) 925 + return; 926 + 927 + nr_shared = cpumask_weight(&llc->shared_cpu_map); 928 + if (nr_shared) 929 + ci->per_cpu_data_slice_size = llc->size / nr_shared; 930 + } 931 + 932 + static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu) 933 + { 934 + unsigned int icpu; 935 + 936 + for_each_online_cpu(icpu) { 937 + if (!cpu_online && icpu == cpu) 938 + continue; 939 + update_per_cpu_data_slice_size_cpu(icpu); 940 + } 941 + } 942 + 901 943 static int cacheinfo_cpu_online(unsigned int cpu) 902 944 { 903 945 int rc = detect_cache_attributes(cpu); ··· 948 906 return rc; 949 907 rc = cache_add_dev(cpu); 950 908 if (rc) 951 - free_cache_attributes(cpu); 909 + goto err; 910 + update_per_cpu_data_slice_size(true, cpu); 911 + return 0; 912 + err: 913 + free_cache_attributes(cpu); 952 914 return rc; 953 915 } 954 916 ··· 962 916 cpu_cache_sysfs_exit(cpu); 963 917 964 918 free_cache_attributes(cpu); 919 + update_per_cpu_data_slice_size(false, cpu); 965 920 return 0; 966 921 } 967 922
+1
include/linux/cacheinfo.h
··· 73 73 74 74 struct cpu_cacheinfo { 75 75 struct cacheinfo *info_list; 76 + unsigned int per_cpu_data_slice_size; 76 77 unsigned int num_levels; 77 78 unsigned int num_leaves; 78 79 bool cpu_map_populated;