Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched/cpufreq: Take cpufreq feedback into account

Aggregate the different pressures applied on the capacity of CPUs and
create a new function that returns the actual capacity of the CPU:
get_actual_cpu_capacity().

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Qais Yousef <qyousef@layalina.io>
Link: https://lore.kernel.org/r/20240326091616.3696851-3-vincent.guittot@linaro.org

authored by

Vincent Guittot and committed by
Ingo Molnar
f1f8d0a2 75d65931

+25 -20
+25 -20
kernel/sched/fair.c
··· 4965 4965 trace_sched_util_est_se_tp(&p->se); 4966 4966 } 4967 4967 4968 + static inline unsigned long get_actual_cpu_capacity(int cpu) 4969 + { 4970 + unsigned long capacity = arch_scale_cpu_capacity(cpu); 4971 + 4972 + capacity -= max(thermal_load_avg(cpu_rq(cpu)), cpufreq_get_pressure(cpu)); 4973 + 4974 + return capacity; 4975 + } 4976 + 4968 4977 static inline int util_fits_cpu(unsigned long util, 4969 4978 unsigned long uclamp_min, 4970 4979 unsigned long uclamp_max, 4971 4980 int cpu) 4972 4981 { 4973 - unsigned long capacity_orig, capacity_orig_thermal; 4974 4982 unsigned long capacity = capacity_of(cpu); 4983 + unsigned long capacity_orig; 4975 4984 bool fits, uclamp_max_fits; 4976 4985 4977 4986 /* ··· 5012 5003 * goal is to cap the task. So it's okay if it's getting less. 5013 5004 */ 5014 5005 capacity_orig = arch_scale_cpu_capacity(cpu); 5015 - capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); 5016 5006 5017 5007 /* 5018 5008 * We want to force a task to fit a cpu as implied by uclamp_max. ··· 5086 5078 * handle the case uclamp_min > uclamp_max. 5087 5079 */ 5088 5080 uclamp_min = min(uclamp_min, uclamp_max); 5089 - if (fits && (util < uclamp_min) && (uclamp_min > capacity_orig_thermal)) 5081 + if (fits && (util < uclamp_min) && 5082 + (uclamp_min > get_actual_cpu_capacity(cpu))) 5090 5083 return -1; 5091 5084 5092 5085 return fits; ··· 7503 7494 * Look for the CPU with best capacity. 7504 7495 */ 7505 7496 else if (fits < 0) 7506 - cpu_cap = arch_scale_cpu_capacity(cpu) - thermal_load_avg(cpu_rq(cpu)); 7497 + cpu_cap = get_actual_cpu_capacity(cpu); 7507 7498 7508 7499 /* 7509 7500 * First, select CPU which fits better (-1 being better than 0). ··· 7996 7987 struct root_domain *rd = this_rq()->rd; 7997 7988 int cpu, best_energy_cpu, target = -1; 7998 7989 int prev_fits = -1, best_fits = -1; 7999 - unsigned long best_thermal_cap = 0; 8000 - unsigned long prev_thermal_cap = 0; 7990 + unsigned long best_actual_cap = 0; 7991 + unsigned long prev_actual_cap = 0; 8001 7992 struct sched_domain *sd; 8002 7993 struct perf_domain *pd; 8003 7994 struct energy_env eenv; ··· 8027 8018 8028 8019 for (; pd; pd = pd->next) { 8029 8020 unsigned long util_min = p_util_min, util_max = p_util_max; 8030 - unsigned long cpu_cap, cpu_thermal_cap, util; 8021 + unsigned long cpu_cap, cpu_actual_cap, util; 8031 8022 long prev_spare_cap = -1, max_spare_cap = -1; 8032 8023 unsigned long rq_util_min, rq_util_max; 8033 8024 unsigned long cur_delta, base_energy; ··· 8039 8030 if (cpumask_empty(cpus)) 8040 8031 continue; 8041 8032 8042 - /* Account thermal pressure for the energy estimation */ 8033 + /* Account external pressure for the energy estimation */ 8043 8034 cpu = cpumask_first(cpus); 8044 - cpu_thermal_cap = arch_scale_cpu_capacity(cpu); 8045 - cpu_thermal_cap -= arch_scale_thermal_pressure(cpu); 8035 + cpu_actual_cap = get_actual_cpu_capacity(cpu); 8046 8036 8047 - eenv.cpu_cap = cpu_thermal_cap; 8037 + eenv.cpu_cap = cpu_actual_cap; 8048 8038 eenv.pd_cap = 0; 8049 8039 8050 8040 for_each_cpu(cpu, cpus) { 8051 8041 struct rq *rq = cpu_rq(cpu); 8052 8042 8053 - eenv.pd_cap += cpu_thermal_cap; 8043 + eenv.pd_cap += cpu_actual_cap; 8054 8044 8055 8045 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) 8056 8046 continue; ··· 8120 8112 if (prev_delta < base_energy) 8121 8113 goto unlock; 8122 8114 prev_delta -= base_energy; 8123 - prev_thermal_cap = cpu_thermal_cap; 8115 + prev_actual_cap = cpu_actual_cap; 8124 8116 best_delta = min(best_delta, prev_delta); 8125 8117 } 8126 8118 ··· 8135 8127 * but best energy cpu has better capacity. 8136 8128 */ 8137 8129 if ((max_fits < 0) && 8138 - (cpu_thermal_cap <= best_thermal_cap)) 8130 + (cpu_actual_cap <= best_actual_cap)) 8139 8131 continue; 8140 8132 8141 8133 cur_delta = compute_energy(&eenv, pd, cpus, p, ··· 8156 8148 best_delta = cur_delta; 8157 8149 best_energy_cpu = max_spare_cap_cpu; 8158 8150 best_fits = max_fits; 8159 - best_thermal_cap = cpu_thermal_cap; 8151 + best_actual_cap = cpu_actual_cap; 8160 8152 } 8161 8153 } 8162 8154 rcu_read_unlock(); 8163 8155 8164 8156 if ((best_fits > prev_fits) || 8165 8157 ((best_fits > 0) && (best_delta < prev_delta)) || 8166 - ((best_fits < 0) && (best_thermal_cap > prev_thermal_cap))) 8158 + ((best_fits < 0) && (best_actual_cap > prev_actual_cap))) 8167 8159 target = best_energy_cpu; 8168 8160 8169 8161 return target; ··· 9568 9560 9569 9561 static unsigned long scale_rt_capacity(int cpu) 9570 9562 { 9563 + unsigned long max = get_actual_cpu_capacity(cpu); 9571 9564 struct rq *rq = cpu_rq(cpu); 9572 - unsigned long max = arch_scale_cpu_capacity(cpu); 9573 9565 unsigned long used, free; 9574 9566 unsigned long irq; 9575 9567 ··· 9581 9573 /* 9582 9574 * avg_rt.util_avg and avg_dl.util_avg track binary signals 9583 9575 * (running and not running) with weights 0 and 1024 respectively. 9584 - * avg_thermal.load_avg tracks thermal pressure and the weighted 9585 - * average uses the actual delta max capacity(load). 9586 9576 */ 9587 9577 used = cpu_util_rt(rq); 9588 9578 used += cpu_util_dl(rq); 9589 - used += thermal_load_avg(rq); 9590 9579 9591 9580 if (unlikely(used >= max)) 9592 9581 return 1;