Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "cpufreq: CPPC: Add support for frequency invariance"

This reverts commit 4c38f2df71c8e33c0b64865992d693f5022eeaad.

There are few races in the frequency invariance support for CPPC driver,
namely the driver doesn't stop the kthread_work and irq_work on policy
exit during suspend/resume or CPU hotplug.

A proper fix won't be possible for the 5.13-rc, as it requires a lot of
changes. Lets revert the patch instead for now.

Fixes: 4c38f2df71c8 ("cpufreq: CPPC: Add support for frequency invariance")
Reported-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Viresh Kumar and committed by
Rafael J. Wysocki
771fac5e 009c9aa5

+12 -245
-10
drivers/cpufreq/Kconfig.arm
··· 19 19 20 20 If in doubt, say N. 21 21 22 - config ACPI_CPPC_CPUFREQ_FIE 23 - bool "Frequency Invariance support for CPPC cpufreq driver" 24 - depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY 25 - default y 26 - help 27 - This extends frequency invariance support in the CPPC cpufreq driver, 28 - by using CPPC delivered and reference performance counters. 29 - 30 - If in doubt, say N. 31 - 32 22 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM 33 23 tristate "Allwinner nvmem based SUN50I CPUFreq driver" 34 24 depends on ARCH_SUNXI
+12 -233
drivers/cpufreq/cppc_cpufreq.c
··· 10 10 11 11 #define pr_fmt(fmt) "CPPC Cpufreq:" fmt 12 12 13 - #include <linux/arch_topology.h> 14 13 #include <linux/kernel.h> 15 14 #include <linux/module.h> 16 15 #include <linux/delay.h> 17 16 #include <linux/cpu.h> 18 17 #include <linux/cpufreq.h> 19 18 #include <linux/dmi.h> 20 - #include <linux/irq_work.h> 21 - #include <linux/kthread.h> 22 19 #include <linux/time.h> 23 20 #include <linux/vmalloc.h> 24 - #include <uapi/linux/sched/types.h> 25 21 26 22 #include <asm/unaligned.h> 27 23 ··· 56 60 .oem_revision = 0, 57 61 } 58 62 }; 59 - 60 - #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE 61 - 62 - /* Frequency invariance support */ 63 - struct cppc_freq_invariance { 64 - int cpu; 65 - struct irq_work irq_work; 66 - struct kthread_work work; 67 - struct cppc_perf_fb_ctrs prev_perf_fb_ctrs; 68 - struct cppc_cpudata *cpu_data; 69 - }; 70 - 71 - static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); 72 - static struct kthread_worker *kworker_fie; 73 - static bool fie_disabled; 74 - 75 - static struct cpufreq_driver cppc_cpufreq_driver; 76 - static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); 77 - static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, 78 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 79 - struct cppc_perf_fb_ctrs fb_ctrs_t1); 80 - 81 - /** 82 - * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance 83 - * @work: The work item. 84 - * 85 - * The CPPC driver register itself with the topology core to provide its own 86 - * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which 87 - * gets called by the scheduler on every tick. 88 - * 89 - * Note that the arch specific counters have higher priority than CPPC counters, 90 - * if available, though the CPPC driver doesn't need to have any special 91 - * handling for that. 92 - * 93 - * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we 94 - * reach here from hard-irq context), which then schedules a normal work item 95 - * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable 96 - * based on the counter updates since the last tick. 97 - */ 98 - static void cppc_scale_freq_workfn(struct kthread_work *work) 99 - { 100 - struct cppc_freq_invariance *cppc_fi; 101 - struct cppc_perf_fb_ctrs fb_ctrs = {0}; 102 - struct cppc_cpudata *cpu_data; 103 - unsigned long local_freq_scale; 104 - u64 perf; 105 - 106 - cppc_fi = container_of(work, struct cppc_freq_invariance, work); 107 - cpu_data = cppc_fi->cpu_data; 108 - 109 - if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { 110 - pr_warn("%s: failed to read perf counters\n", __func__); 111 - return; 112 - } 113 - 114 - cppc_fi->prev_perf_fb_ctrs = fb_ctrs; 115 - perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs, 116 - fb_ctrs); 117 - 118 - perf <<= SCHED_CAPACITY_SHIFT; 119 - local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf); 120 - if (WARN_ON(local_freq_scale > 1024)) 121 - local_freq_scale = 1024; 122 - 123 - per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; 124 - } 125 - 126 - static void cppc_irq_work(struct irq_work *irq_work) 127 - { 128 - struct cppc_freq_invariance *cppc_fi; 129 - 130 - cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work); 131 - kthread_queue_work(kworker_fie, &cppc_fi->work); 132 - } 133 - 134 - static void cppc_scale_freq_tick(void) 135 - { 136 - struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); 137 - 138 - /* 139 - * cppc_get_perf_ctrs() can potentially sleep, call that from the right 140 - * context. 141 - */ 142 - irq_work_queue(&cppc_fi->irq_work); 143 - } 144 - 145 - static struct scale_freq_data cppc_sftd = { 146 - .source = SCALE_FREQ_SOURCE_CPPC, 147 - .set_freq_scale = cppc_scale_freq_tick, 148 - }; 149 - 150 - static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, 151 - struct cppc_cpudata *cpu_data) 152 - { 153 - struct cppc_perf_fb_ctrs fb_ctrs = {0}; 154 - struct cppc_freq_invariance *cppc_fi; 155 - int i, ret; 156 - 157 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 158 - return; 159 - 160 - if (fie_disabled) 161 - return; 162 - 163 - for_each_cpu(i, policy->cpus) { 164 - cppc_fi = &per_cpu(cppc_freq_inv, i); 165 - cppc_fi->cpu = i; 166 - cppc_fi->cpu_data = cpu_data; 167 - kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); 168 - init_irq_work(&cppc_fi->irq_work, cppc_irq_work); 169 - 170 - ret = cppc_get_perf_ctrs(i, &fb_ctrs); 171 - if (ret) { 172 - pr_warn("%s: failed to read perf counters: %d\n", 173 - __func__, ret); 174 - fie_disabled = true; 175 - } else { 176 - cppc_fi->prev_perf_fb_ctrs = fb_ctrs; 177 - } 178 - } 179 - } 180 - 181 - static void __init cppc_freq_invariance_init(void) 182 - { 183 - struct sched_attr attr = { 184 - .size = sizeof(struct sched_attr), 185 - .sched_policy = SCHED_DEADLINE, 186 - .sched_nice = 0, 187 - .sched_priority = 0, 188 - /* 189 - * Fake (unused) bandwidth; workaround to "fix" 190 - * priority inheritance. 191 - */ 192 - .sched_runtime = 1000000, 193 - .sched_deadline = 10000000, 194 - .sched_period = 10000000, 195 - }; 196 - int ret; 197 - 198 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 199 - return; 200 - 201 - if (fie_disabled) 202 - return; 203 - 204 - kworker_fie = kthread_create_worker(0, "cppc_fie"); 205 - if (IS_ERR(kworker_fie)) 206 - return; 207 - 208 - ret = sched_setattr_nocheck(kworker_fie->task, &attr); 209 - if (ret) { 210 - pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, 211 - ret); 212 - kthread_destroy_worker(kworker_fie); 213 - return; 214 - } 215 - 216 - /* Register for freq-invariance */ 217 - topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask); 218 - } 219 - 220 - static void cppc_freq_invariance_exit(void) 221 - { 222 - struct cppc_freq_invariance *cppc_fi; 223 - int i; 224 - 225 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 226 - return; 227 - 228 - if (fie_disabled) 229 - return; 230 - 231 - topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask); 232 - 233 - for_each_possible_cpu(i) { 234 - cppc_fi = &per_cpu(cppc_freq_inv, i); 235 - irq_work_sync(&cppc_fi->irq_work); 236 - } 237 - 238 - kthread_destroy_worker(kworker_fie); 239 - kworker_fie = NULL; 240 - } 241 - 242 - #else 243 - static inline void 244 - cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, 245 - struct cppc_cpudata *cpu_data) 246 - { 247 - } 248 - 249 - static inline void cppc_freq_invariance_init(void) 250 - { 251 - } 252 - 253 - static inline void cppc_freq_invariance_exit(void) 254 - { 255 - } 256 - #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ 257 63 258 64 /* Callback function used to retrieve the max frequency from DMI */ 259 65 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) ··· 345 547 cpu_data->perf_ctrls.desired_perf = caps->highest_perf; 346 548 347 549 ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); 348 - if (ret) { 550 + if (ret) 349 551 pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", 350 552 caps->highest_perf, cpu, ret); 351 - } else { 352 - cppc_freq_invariance_policy_init(policy, cpu_data); 353 - } 354 553 355 554 return ret; 356 555 } ··· 360 565 return (u32)t1 - (u32)t0; 361 566 } 362 567 363 - static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, 364 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 365 - struct cppc_perf_fb_ctrs fb_ctrs_t1) 568 + static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, 569 + struct cppc_perf_fb_ctrs fb_ctrs_t0, 570 + struct cppc_perf_fb_ctrs fb_ctrs_t1) 366 571 { 367 572 u64 delta_reference, delta_delivered; 368 - u64 reference_perf; 573 + u64 reference_perf, delivered_perf; 369 574 370 575 reference_perf = fb_ctrs_t0.reference_perf; 371 576 ··· 374 579 delta_delivered = get_delta(fb_ctrs_t1.delivered, 375 580 fb_ctrs_t0.delivered); 376 581 377 - /* Check to avoid divide-by zero and invalid delivered_perf */ 378 - if (!delta_reference || !delta_delivered) 379 - return cpu_data->perf_ctrls.desired_perf; 380 - 381 - return (reference_perf * delta_delivered) / delta_reference; 382 - } 383 - 384 - static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, 385 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 386 - struct cppc_perf_fb_ctrs fb_ctrs_t1) 387 - { 388 - u64 delivered_perf; 389 - 390 - delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0, 391 - fb_ctrs_t1); 582 + /* Check to avoid divide-by zero */ 583 + if (delta_reference || delta_delivered) 584 + delivered_perf = (reference_perf * delta_delivered) / 585 + delta_reference; 586 + else 587 + delivered_perf = cpu_data->perf_ctrls.desired_perf; 392 588 393 589 return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); 394 590 } ··· 504 718 505 719 static int __init cppc_cpufreq_init(void) 506 720 { 507 - int ret; 508 - 509 721 if ((acpi_disabled) || !acpi_cpc_valid()) 510 722 return -ENODEV; 511 723 ··· 511 727 512 728 cppc_check_hisi_workaround(); 513 729 514 - ret = cpufreq_register_driver(&cppc_cpufreq_driver); 515 - if (!ret) 516 - cppc_freq_invariance_init(); 517 - 518 - return ret; 730 + return cpufreq_register_driver(&cppc_cpufreq_driver); 519 731 } 520 732 521 733 static inline void free_cpu_data(void) ··· 528 748 529 749 static void __exit cppc_cpufreq_exit(void) 530 750 { 531 - cppc_freq_invariance_exit(); 532 751 cpufreq_unregister_driver(&cppc_cpufreq_driver); 533 752 534 753 free_cpu_data();
-1
include/linux/arch_topology.h
··· 37 37 enum scale_freq_source { 38 38 SCALE_FREQ_SOURCE_CPUFREQ = 0, 39 39 SCALE_FREQ_SOURCE_ARCH, 40 - SCALE_FREQ_SOURCE_CPPC, 41 40 }; 42 41 43 42 struct scale_freq_data {
-1
kernel/sched/core.c
··· 6389 6389 { 6390 6390 return __sched_setscheduler(p, attr, false, true); 6391 6391 } 6392 - EXPORT_SYMBOL_GPL(sched_setattr_nocheck); 6393 6392 6394 6393 /** 6395 6394 * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.