Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch_topology: Allow multiple entities to provide sched_freq_tick() callback

This patch attempts to make it generic enough so other parts of the
kernel can also provide their own implementation of scale_freq_tick()
callback, which is called by the scheduler periodically to update the
per-cpu arch_freq_scale variable.

The implementations now need to provide 'struct scale_freq_data' for the
CPUs for which they have hardware counters available, and a callback
gets registered for each possible CPU in a per-cpu variable.

The arch specific (or ARM AMU) counters are updated to adapt to this and
they take the highest priority if they are available, i.e. they will be
used instead of CPPC based counters for example.

The special code to rebuild the sched domains, in case invariance status
change for the system, is moved out of arm64 specific code and is added
to arch_topology.c.

Note that this also defines SCALE_FREQ_SOURCE_CPUFREQ but doesn't use it
and it is added to show that cpufreq is also acts as source of
information for FIE and will be used by default if no other counters are
supported for a platform.

Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
Tested-by: Ionela Voinescu <ionela.voinescu@arm.com>
Acked-by: Will Deacon <will@kernel.org> # for arm64
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

+131 -80
+1 -9
arch/arm64/include/asm/topology.h
··· 17 17 #include <linux/arch_topology.h> 18 18 19 19 void update_freq_counters_refs(void); 20 - void topology_scale_freq_tick(void); 21 - 22 - #ifdef CONFIG_ARM64_AMU_EXTN 23 - /* 24 - * Replace task scheduler's default counter-based 25 - * frequency-invariance scale factor setting. 26 - */ 27 - #define arch_scale_freq_tick topology_scale_freq_tick 28 - #endif /* CONFIG_ARM64_AMU_EXTN */ 29 20 30 21 /* Replace task scheduler's default frequency-invariant accounting */ 22 + #define arch_scale_freq_tick topology_scale_freq_tick 31 23 #define arch_set_freq_scale topology_set_freq_scale 32 24 #define arch_scale_freq_capacity topology_get_freq_scale 33 25 #define arch_scale_freq_invariant topology_scale_freq_invariant
+39 -66
arch/arm64/kernel/topology.c
··· 199 199 return 0; 200 200 } 201 201 202 - static DEFINE_STATIC_KEY_FALSE(amu_fie_key); 203 - #define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) 202 + static void amu_scale_freq_tick(void) 203 + { 204 + u64 prev_core_cnt, prev_const_cnt; 205 + u64 core_cnt, const_cnt, scale; 206 + 207 + prev_const_cnt = this_cpu_read(arch_const_cycles_prev); 208 + prev_core_cnt = this_cpu_read(arch_core_cycles_prev); 209 + 210 + update_freq_counters_refs(); 211 + 212 + const_cnt = this_cpu_read(arch_const_cycles_prev); 213 + core_cnt = this_cpu_read(arch_core_cycles_prev); 214 + 215 + if (unlikely(core_cnt <= prev_core_cnt || 216 + const_cnt <= prev_const_cnt)) 217 + return; 218 + 219 + /* 220 + * /\core arch_max_freq_scale 221 + * scale = ------- * -------------------- 222 + * /\const SCHED_CAPACITY_SCALE 223 + * 224 + * See validate_cpu_freq_invariance_counters() for details on 225 + * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. 226 + */ 227 + scale = core_cnt - prev_core_cnt; 228 + scale *= this_cpu_read(arch_max_freq_scale); 229 + scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, 230 + const_cnt - prev_const_cnt); 231 + 232 + scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); 233 + this_cpu_write(arch_freq_scale, (unsigned long)scale); 234 + } 235 + 236 + static struct scale_freq_data amu_sfd = { 237 + .source = SCALE_FREQ_SOURCE_ARCH, 238 + .set_freq_scale = amu_scale_freq_tick, 239 + }; 204 240 205 241 static void amu_fie_setup(const struct cpumask *cpus) 206 242 { 207 - bool invariant; 208 243 int cpu; 209 244 210 245 /* We are already set since the last insmod of cpufreq driver */ ··· 256 221 257 222 cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); 258 223 259 - invariant = topology_scale_freq_invariant(); 260 - 261 - /* We aren't fully invariant yet */ 262 - if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask)) 263 - return; 264 - 265 - static_branch_enable(&amu_fie_key); 224 + topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus); 266 225 267 226 pr_debug("CPUs[%*pbl]: counters will be used for FIE.", 268 227 cpumask_pr_args(cpus)); 269 - 270 - /* 271 - * Task scheduler behavior depends on frequency invariance support, 272 - * either cpufreq or counter driven. If the support status changes as 273 - * a result of counter initialisation and use, retrigger the build of 274 - * scheduling domains to ensure the information is propagated properly. 275 - */ 276 - if (!invariant) 277 - rebuild_sched_domains_energy(); 278 228 } 279 229 280 230 static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, ··· 302 282 return ret; 303 283 } 304 284 core_initcall(init_amu_fie); 305 - 306 - bool arch_freq_counters_available(const struct cpumask *cpus) 307 - { 308 - return amu_freq_invariant() && 309 - cpumask_subset(cpus, amu_fie_cpus); 310 - } 311 - 312 - void topology_scale_freq_tick(void) 313 - { 314 - u64 prev_core_cnt, prev_const_cnt; 315 - u64 core_cnt, const_cnt, scale; 316 - int cpu = smp_processor_id(); 317 - 318 - if (!amu_freq_invariant()) 319 - return; 320 - 321 - if (!cpumask_test_cpu(cpu, amu_fie_cpus)) 322 - return; 323 - 324 - prev_const_cnt = this_cpu_read(arch_const_cycles_prev); 325 - prev_core_cnt = this_cpu_read(arch_core_cycles_prev); 326 - 327 - update_freq_counters_refs(); 328 - 329 - const_cnt = this_cpu_read(arch_const_cycles_prev); 330 - core_cnt = this_cpu_read(arch_core_cycles_prev); 331 - 332 - if (unlikely(core_cnt <= prev_core_cnt || 333 - const_cnt <= prev_const_cnt)) 334 - return; 335 - 336 - /* 337 - * /\core arch_max_freq_scale 338 - * scale = ------- * -------------------- 339 - * /\const SCHED_CAPACITY_SCALE 340 - * 341 - * See validate_cpu_freq_invariance_counters() for details on 342 - * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT. 343 - */ 344 - scale = core_cnt - prev_core_cnt; 345 - scale *= this_cpu_read(arch_max_freq_scale); 346 - scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT, 347 - const_cnt - prev_const_cnt); 348 - 349 - scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE); 350 - this_cpu_write(arch_freq_scale, (unsigned long)scale); 351 - } 352 285 353 286 #ifdef CONFIG_ACPI_CPPC_LIB 354 287 #include <acpi/cppc_acpi.h>
+78 -4
drivers/base/arch_topology.c
··· 21 21 #include <linux/sched.h> 22 22 #include <linux/smp.h> 23 23 24 + static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); 25 + static struct cpumask scale_freq_counters_mask; 26 + static bool scale_freq_invariant; 27 + 28 + static bool supports_scale_freq_counters(const struct cpumask *cpus) 29 + { 30 + return cpumask_subset(cpus, &scale_freq_counters_mask); 31 + } 32 + 24 33 bool topology_scale_freq_invariant(void) 25 34 { 26 35 return cpufreq_supports_freq_invariance() || 27 - arch_freq_counters_available(cpu_online_mask); 36 + supports_scale_freq_counters(cpu_online_mask); 28 37 } 29 38 30 - __weak bool arch_freq_counters_available(const struct cpumask *cpus) 39 + static void update_scale_freq_invariant(bool status) 31 40 { 32 - return false; 41 + if (scale_freq_invariant == status) 42 + return; 43 + 44 + /* 45 + * Task scheduler behavior depends on frequency invariance support, 46 + * either cpufreq or counter driven. If the support status changes as 47 + * a result of counter initialisation and use, retrigger the build of 48 + * scheduling domains to ensure the information is propagated properly. 49 + */ 50 + if (topology_scale_freq_invariant() == status) { 51 + scale_freq_invariant = status; 52 + rebuild_sched_domains_energy(); 53 + } 33 54 } 55 + 56 + void topology_set_scale_freq_source(struct scale_freq_data *data, 57 + const struct cpumask *cpus) 58 + { 59 + struct scale_freq_data *sfd; 60 + int cpu; 61 + 62 + /* 63 + * Avoid calling rebuild_sched_domains() unnecessarily if FIE is 64 + * supported by cpufreq. 65 + */ 66 + if (cpumask_empty(&scale_freq_counters_mask)) 67 + scale_freq_invariant = topology_scale_freq_invariant(); 68 + 69 + for_each_cpu(cpu, cpus) { 70 + sfd = per_cpu(sft_data, cpu); 71 + 72 + /* Use ARCH provided counters whenever possible */ 73 + if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { 74 + per_cpu(sft_data, cpu) = data; 75 + cpumask_set_cpu(cpu, &scale_freq_counters_mask); 76 + } 77 + } 78 + 79 + update_scale_freq_invariant(true); 80 + } 81 + 82 + void topology_clear_scale_freq_source(enum scale_freq_source source, 83 + const struct cpumask *cpus) 84 + { 85 + struct scale_freq_data *sfd; 86 + int cpu; 87 + 88 + for_each_cpu(cpu, cpus) { 89 + sfd = per_cpu(sft_data, cpu); 90 + 91 + if (sfd && sfd->source == source) { 92 + per_cpu(sft_data, cpu) = NULL; 93 + cpumask_clear_cpu(cpu, &scale_freq_counters_mask); 94 + } 95 + } 96 + 97 + update_scale_freq_invariant(false); 98 + } 99 + 100 + void topology_scale_freq_tick(void) 101 + { 102 + struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); 103 + 104 + if (sfd) 105 + sfd->set_freq_scale(); 106 + } 107 + 34 108 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; 35 109 36 110 void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, ··· 121 47 * want to update the scale factor with information from CPUFREQ. 122 48 * Instead the scale factor will be updated from arch_scale_freq_tick. 123 49 */ 124 - if (arch_freq_counters_available(cpus)) 50 + if (supports_scale_freq_counters(cpus)) 125 51 return; 126 52 127 53 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
+13 -1
include/linux/arch_topology.h
··· 34 34 unsigned long max_freq); 35 35 bool topology_scale_freq_invariant(void); 36 36 37 - bool arch_freq_counters_available(const struct cpumask *cpus); 37 + enum scale_freq_source { 38 + SCALE_FREQ_SOURCE_CPUFREQ = 0, 39 + SCALE_FREQ_SOURCE_ARCH, 40 + }; 41 + 42 + struct scale_freq_data { 43 + enum scale_freq_source source; 44 + void (*set_freq_scale)(void); 45 + }; 46 + 47 + void topology_scale_freq_tick(void); 48 + void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus); 49 + void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus); 38 50 39 51 DECLARE_PER_CPU(unsigned long, thermal_pressure); 40 52