Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: cpufreq: Allow remote cpufreq callbacks

With Android UI and benchmarks the latency of cpufreq response to
certain scheduling events can become very critical. Currently, callbacks
into cpufreq governors are only made from the scheduler if the target
CPU of the event is the same as the current CPU. This means there are
certain situations where a target CPU may not run the cpufreq governor
for some time.

One testcase to show this behavior is where a task starts running on
CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the
system is configured such that the new tasks should receive maximum
demand initially, this should result in CPU0 increasing frequency
immediately. But because of the above mentioned limitation though, this
does not occur.

This patch updates the scheduler core to call the cpufreq callbacks for
remote CPUs as well.

The schedutil, ondemand and conservative governors are updated to
process cpufreq utilization update hooks called for remote CPUs where
the remote CPU is managed by the cpufreq policy of the local CPU.

The intel_pstate driver is updated to always reject remote callbacks.

This is tested with couple of usecases (Android: hackbench, recentfling,
galleryfling, vellamo, Ubuntu: hackbench) on ARM hikey board (64 bit
octa-core, single policy). Only galleryfling showed minor improvements,
while others didn't had much deviation.

The reason being that this patch only targets a corner case, where
following are required to be true to improve performance and that
doesn't happen too often with these tests:

- Task is migrated to another CPU.
- The task has high demand, and should take the target CPU to higher
OPPs.
- And the target CPU doesn't call into the cpufreq governor until the
next tick.

Based on initial work from Steve Muckle.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Saravana Kannan <skannan@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Viresh Kumar and committed by
Rafael J. Wysocki
674e7541 251accf9

+55 -18
+3
drivers/cpufreq/cpufreq_governor.c
··· 275 275 struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; 276 276 u64 delta_ns, lst; 277 277 278 + if (!cpufreq_can_do_remote_dvfs(policy_dbs->policy)) 279 + return; 280 + 278 281 /* 279 282 * The work may not be allowed to be queued up right now. 280 283 * Possible reasons:
+8
drivers/cpufreq/intel_pstate.c
··· 1747 1747 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1748 1748 u64 delta_ns = time - cpu->sample.time; 1749 1749 1750 + /* Don't allow remote callbacks */ 1751 + if (smp_processor_id() != cpu->cpu) 1752 + return; 1753 + 1750 1754 if ((s64)delta_ns < pid_params.sample_rate_ns) 1751 1755 return; 1752 1756 ··· 1767 1763 { 1768 1764 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1769 1765 u64 delta_ns; 1766 + 1767 + /* Don't allow remote callbacks */ 1768 + if (smp_processor_id() != cpu->cpu) 1769 + return; 1770 1770 1771 1771 if (flags & SCHED_CPUFREQ_IOWAIT) { 1772 1772 cpu->iowait_boost = int_tofp(1);
+9
include/linux/cpufreq.h
··· 562 562 size_t count); 563 563 }; 564 564 565 + static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) 566 + { 567 + /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ 568 + if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) 569 + return true; 570 + 571 + return false; 572 + } 573 + 565 574 /********************************************************************* 566 575 * FREQUENCY TABLE HELPERS * 567 576 *********************************************************************/
+26 -5
kernel/sched/cpufreq_schedutil.c
··· 52 52 struct sugov_cpu { 53 53 struct update_util_data update_util; 54 54 struct sugov_policy *sg_policy; 55 + unsigned int cpu; 55 56 56 57 bool iowait_boost_pending; 57 58 unsigned int iowait_boost; ··· 77 76 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) 78 77 { 79 78 s64 delta_ns; 79 + 80 + /* 81 + * Since cpufreq_update_util() is called with rq->lock held for 82 + * the @target_cpu, our per-cpu data is fully serialized. 83 + * 84 + * However, drivers cannot in general deal with cross-cpu 85 + * requests, so while get_next_freq() will work, our 86 + * sugov_update_commit() call may not. 87 + * 88 + * Hence stop here for remote requests if they aren't supported 89 + * by the hardware, as calculating the frequency is pointless if 90 + * we cannot in fact act on it. 91 + */ 92 + if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) 93 + return false; 80 94 81 95 if (sg_policy->work_in_progress) 82 96 return false; ··· 171 155 return cpufreq_driver_resolve_freq(policy, freq); 172 156 } 173 157 174 - static void sugov_get_util(unsigned long *util, unsigned long *max) 158 + static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) 175 159 { 176 - struct rq *rq = this_rq(); 160 + struct rq *rq = cpu_rq(cpu); 177 161 unsigned long cfs_max; 178 162 179 - cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); 163 + cfs_max = arch_scale_cpu_capacity(NULL, cpu); 180 164 181 165 *util = min(rq->cfs.avg.util_avg, cfs_max); 182 166 *max = cfs_max; ··· 270 254 if (flags & SCHED_CPUFREQ_RT_DL) { 271 255 next_f = policy->cpuinfo.max_freq; 272 256 } else { 273 - sugov_get_util(&util, &max); 257 + sugov_get_util(&util, &max, sg_cpu->cpu); 274 258 sugov_iowait_boost(sg_cpu, &util, &max); 275 259 next_f = get_next_freq(sg_policy, util, max); 276 260 /* ··· 332 316 unsigned long util, max; 333 317 unsigned int next_f; 334 318 335 - sugov_get_util(&util, &max); 319 + sugov_get_util(&util, &max, sg_cpu->cpu); 336 320 337 321 raw_spin_lock(&sg_policy->update_lock); 338 322 ··· 713 697 714 698 static int __init sugov_register(void) 715 699 { 700 + int cpu; 701 + 702 + for_each_possible_cpu(cpu) 703 + per_cpu(sugov_cpu, cpu).cpu = cpu; 704 + 716 705 return cpufreq_register_governor(&schedutil_gov); 717 706 } 718 707 fs_initcall(sugov_register);
+1 -1
kernel/sched/deadline.c
··· 1136 1136 } 1137 1137 1138 1138 /* kick cpufreq (see the comment in kernel/sched/sched.h). */ 1139 - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); 1139 + cpufreq_update_util(rq, SCHED_CPUFREQ_DL); 1140 1140 1141 1141 schedstat_set(curr->se.statistics.exec_max, 1142 1142 max(curr->se.statistics.exec_max, delta_exec));
+5 -3
kernel/sched/fair.c
··· 3278 3278 3279 3279 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) 3280 3280 { 3281 - if (&this_rq()->cfs == cfs_rq) { 3281 + struct rq *rq = rq_of(cfs_rq); 3282 + 3283 + if (&rq->cfs == cfs_rq) { 3282 3284 /* 3283 3285 * There are a few boundary cases this might miss but it should 3284 3286 * get called often enough that that should (hopefully) not be ··· 3297 3295 * 3298 3296 * See cpu_util(). 3299 3297 */ 3300 - cpufreq_update_util(rq_of(cfs_rq), 0); 3298 + cpufreq_update_util(rq, 0); 3301 3299 } 3302 3300 } 3303 3301 ··· 4877 4875 * passed. 4878 4876 */ 4879 4877 if (p->in_iowait) 4880 - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); 4878 + cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); 4881 4879 4882 4880 for_each_sched_entity(se) { 4883 4881 if (se->on_rq)
+1 -1
kernel/sched/rt.c
··· 970 970 return; 971 971 972 972 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 973 - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); 973 + cpufreq_update_util(rq, SCHED_CPUFREQ_RT); 974 974 975 975 schedstat_set(curr->se.statistics.exec_max, 976 976 max(curr->se.statistics.exec_max, delta_exec));
+2 -8
kernel/sched/sched.h
··· 2070 2070 { 2071 2071 struct update_util_data *data; 2072 2072 2073 - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); 2073 + data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, 2074 + cpu_of(rq))); 2074 2075 if (data) 2075 2076 data->func(data, rq_clock(rq), flags); 2076 2077 } 2077 - 2078 - static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) 2079 - { 2080 - if (cpu_of(rq) == smp_processor_id()) 2081 - cpufreq_update_util(rq, flags); 2082 - } 2083 2078 #else 2084 2079 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} 2085 - static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} 2086 2080 #endif /* CONFIG_CPU_FREQ */ 2087 2081 2088 2082 #ifdef arch_scale_freq_capacity