Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull more power management updates from Rafael Wysocki:
"These are fixes and cleanups on top of the power management material
for 5.12-rc1 merged previously.

Specifics:

- Address cpufreq regression introduced in 5.11 that causes CPU
frequency reporting to be distorted on systems with CPPC that use
acpi-cpufreq as the scaling driver (Rafael Wysocki).

- Fix regression introduced during the 5.10 development cycle related
to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver
(Shawn Guo).

- Fix recent regression in the operating performance points (OPP)
framework that may cause frequency updates to be skipped by mistake
in some cases (Jonathan Marek).

- Simplify schedutil governor code and remove a misleading comment
from it (Yue Hu).

- Fix kerneldoc comment typo in the cpufreq core (Yue Hu)"

* tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
cpufreq: Fix typo in kerneldoc comment
cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition
cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit()
cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known
cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks
opp: Don't skip freq update for different frequency

Linus Torvalds 5 years ago 005d3bd9 e0fbd25b

+69 -67

7 changed files

expand all

drivers

cpufreq

acpi-cpufreq.c

cpufreq.c

freq_table.c

qcom-cpufreq-hw.c

opp

core.c

opp.h

kernel

sched

cpufreq_schedutil.c

+16 -46

drivers/cpufreq/acpi-cpufreq.c

··· 54 54 unsigned int resume; 55 55 unsigned int cpu_feature; 56 56 unsigned int acpi_perf_cpu; 57 - unsigned int first_perf_state; 58 57 cpumask_var_t freqdomain_cpus; 59 58 void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); 60 59 u32 (*cpu_freq_read)(struct acpi_pct_register *reg); ··· 222 223 223 224 perf = to_perf_data(data); 224 225 225 - cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) 226 + cpufreq_for_each_entry(pos, policy->freq_table) 226 227 if (msr == perf->states[pos->driver_data].status) 227 228 return pos->frequency; 228 - return policy->freq_table[data->first_perf_state].frequency; 229 + return policy->freq_table[0].frequency; 229 230 } 230 231 231 232 static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) ··· 364 365 struct cpufreq_policy *policy; 365 366 unsigned int freq; 366 367 unsigned int cached_freq; 367 - unsigned int state; 368 368 369 369 pr_debug("%s (%d)\n", __func__, cpu); 370 370 ··· 375 377 if (unlikely(!data || !policy->freq_table)) 376 378 return 0; 377 379 378 - state = to_perf_data(data)->state; 379 - if (state < data->first_perf_state) 380 - state = data->first_perf_state; 381 - 382 - cached_freq = policy->freq_table[state].frequency; 380 + cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; 383 381 freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); 384 382 if (freq != cached_freq) { 385 383 /* ··· 674 680 struct cpuinfo_x86 *c = &cpu_data(cpu); 675 681 unsigned int valid_states = 0; 676 682 unsigned int result = 0; 677 - unsigned int state_count; 678 683 u64 max_boost_ratio; 679 684 unsigned int i; 680 685 #ifdef CONFIG_SMP ··· 788 795 goto err_unreg; 789 796 } 790 797 791 - state_count = perf->state_count + 1; 792 - 793 - max_boost_ratio = get_max_boost_ratio(cpu); 794 - if (max_boost_ratio) { 795 - /* 796 - * Make a room for one more entry to represent the highest 797 - * available "boost" frequency. 798 - */ 799 - state_count++; 800 - valid_states++; 801 - data->first_perf_state = valid_states; 802 - } else { 803 - /* 804 - * If the maximum "boost" frequency is unknown, ask the arch 805 - * scale-invariance code to use the "nominal" performance for 806 - * CPU utilization scaling so as to prevent the schedutil 807 - * governor from selecting inadequate CPU frequencies. 808 - */ 809 - arch_set_max_freq_ratio(true); 810 - } 811 - 812 - freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); 798 + freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), 799 + GFP_KERNEL); 813 800 if (!freq_table) { 814 801 result = -ENOMEM; 815 802 goto err_unreg; ··· 824 851 } 825 852 freq_table[valid_states].frequency = CPUFREQ_TABLE_END; 826 853 854 + max_boost_ratio = get_max_boost_ratio(cpu); 827 855 if (max_boost_ratio) { 828 - unsigned int state = data->first_perf_state; 829 - unsigned int freq = freq_table[state].frequency; 856 + unsigned int freq = freq_table[0].frequency; 830 857 831 858 /* 832 859 * Because the loop above sorts the freq_table entries in the 833 860 * descending order, freq is the maximum frequency in the table. 834 861 * Assume that it corresponds to the CPPC nominal frequency and 835 - * use it to populate the frequency field of the extra "boost" 836 - * frequency entry. 862 + * use it to set cpuinfo.max_freq. 837 863 */ 838 - freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; 864 + policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; 865 + } else { 839 866 /* 840 - * The purpose of the extra "boost" frequency entry is to make 841 - * the rest of cpufreq aware of the real maximum frequency, but 842 - * the way to request it is the same as for the first_perf_state 843 - * entry that is expected to cover the entire range of "boost" 844 - * frequencies of the CPU, so copy the driver_data value from 845 - * that entry. 867 + * If the maximum "boost" frequency is unknown, ask the arch 868 + * scale-invariance code to use the "nominal" performance for 869 + * CPU utilization scaling so as to prevent the schedutil 870 + * governor from selecting inadequate CPU frequencies. 846 871 */ 847 - freq_table[0].driver_data = freq_table[state].driver_data; 872 + arch_set_max_freq_ratio(true); 848 873 } 849 874 850 875 policy->freq_table = freq_table; ··· 918 947 { 919 948 struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, 920 949 policy->cpu); 921 - struct acpi_cpufreq_data *data = policy->driver_data; 922 - unsigned int freq = policy->freq_table[data->first_perf_state].frequency; 950 + unsigned int freq = policy->freq_table[0].frequency; 923 951 924 952 if (perf->states[0].core_frequency * 1000 != freq) 925 953 pr_warn(FW_WARN "P-state 0 is not max freq\n");

+1 -1

drivers/cpufreq/cpufreq.c

··· 2101 2101 * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go. 2102 2102 * @cpu: Target CPU. 2103 2103 * @min_perf: Minimum (required) performance level (units of @capacity). 2104 - * @target_perf: Terget (desired) performance level (units of @capacity). 2104 + * @target_perf: Target (desired) performance level (units of @capacity). 2105 2105 * @capacity: Capacity of the target CPU. 2106 2106 * 2107 2107 * Carry out a fast performance level switch of @cpu without sleeping.

+7 -1

drivers/cpufreq/freq_table.c

··· 52 52 } 53 53 54 54 policy->min = policy->cpuinfo.min_freq = min_freq; 55 - policy->max = policy->cpuinfo.max_freq = max_freq; 55 + policy->max = max_freq; 56 + /* 57 + * If the driver has set its own cpuinfo.max_freq above max_freq, leave 58 + * it as is. 59 + */ 60 + if (policy->cpuinfo.max_freq < max_freq) 61 + policy->max = policy->cpuinfo.max_freq = max_freq; 56 62 57 63 if (policy->min == ~0) 58 64 return -EINVAL;

+32 -8

drivers/cpufreq/qcom-cpufreq-hw.c

··· 32 32 33 33 struct qcom_cpufreq_data { 34 34 void __iomem *base; 35 + struct resource *res; 35 36 const struct qcom_cpufreq_soc_data *soc_data; 36 37 }; 37 38 ··· 281 280 struct of_phandle_args args; 282 281 struct device_node *cpu_np; 283 282 struct device *cpu_dev; 283 + struct resource *res; 284 284 void __iomem *base; 285 285 struct qcom_cpufreq_data *data; 286 286 int ret, index; ··· 305 303 306 304 index = args.args[0]; 307 305 308 - base = devm_platform_ioremap_resource(pdev, index); 309 - if (IS_ERR(base)) 310 - return PTR_ERR(base); 306 + res = platform_get_resource(pdev, IORESOURCE_MEM, index); 307 + if (!res) { 308 + dev_err(dev, "failed to get mem resource %d\n", index); 309 + return -ENODEV; 310 + } 311 311 312 - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); 312 + if (!request_mem_region(res->start, resource_size(res), res->name)) { 313 + dev_err(dev, "failed to request resource %pR\n", res); 314 + return -EBUSY; 315 + } 316 + 317 + base = ioremap(res->start, resource_size(res)); 318 + if (IS_ERR(base)) { 319 + dev_err(dev, "failed to map resource %pR\n", res); 320 + ret = PTR_ERR(base); 321 + goto release_region; 322 + } 323 + 324 + data = kzalloc(sizeof(*data), GFP_KERNEL); 313 325 if (!data) { 314 326 ret = -ENOMEM; 315 - goto error; 327 + goto unmap_base; 316 328 } 317 329 318 330 data->soc_data = of_device_get_match_data(&pdev->dev); 319 331 data->base = base; 332 + data->res = res; 320 333 321 334 /* HW should be in enabled state to proceed */ 322 335 if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { ··· 372 355 373 356 return 0; 374 357 error: 375 - devm_iounmap(dev, base); 358 + kfree(data); 359 + unmap_base: 360 + iounmap(data->base); 361 + release_region: 362 + release_mem_region(res->start, resource_size(res)); 376 363 return ret; 377 364 } 378 365 ··· 384 363 { 385 364 struct device *cpu_dev = get_cpu_device(policy->cpu); 386 365 struct qcom_cpufreq_data *data = policy->driver_data; 387 - struct platform_device *pdev = cpufreq_get_driver_data(); 366 + struct resource *res = data->res; 367 + void __iomem *base = data->base; 388 368 389 369 dev_pm_opp_remove_all_dynamic(cpu_dev); 390 370 dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); 391 371 kfree(policy->freq_table); 392 - devm_iounmap(&pdev->dev, data->base); 372 + kfree(data); 373 + iounmap(base); 374 + release_mem_region(res->start, resource_size(res)); 393 375 394 376 return 0; 395 377 }

+5 -3

drivers/opp/core.c

··· 998 998 old_opp = opp_table->current_opp; 999 999 1000 1000 /* Return early if nothing to do */ 1001 - if (opp_table->enabled && old_opp == opp) { 1001 + if (old_opp == opp && opp_table->current_rate == freq && 1002 + opp_table->enabled) { 1002 1003 dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__); 1003 1004 return 0; 1004 1005 } 1005 1006 1006 1007 dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n", 1007 - __func__, old_opp->rate, freq, old_opp->level, opp->level, 1008 - old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0, 1008 + __func__, opp_table->current_rate, freq, old_opp->level, 1009 + opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0, 1009 1010 opp->bandwidth ? opp->bandwidth[0].peak : 0); 1010 1011 1011 1012 scaling_down = _opp_compare_key(old_opp, opp); ··· 1062 1061 /* Make sure current_opp doesn't get freed */ 1063 1062 dev_pm_opp_get(opp); 1064 1063 opp_table->current_opp = opp; 1064 + opp_table->current_rate = freq; 1065 1065 1066 1066 return ret; 1067 1067 }

drivers/opp/opp.h

··· 135 135 * @clock_latency_ns_max: Max clock latency in nanoseconds. 136 136 * @parsed_static_opps: Count of devices for which OPPs are initialized from DT. 137 137 * @shared_opp: OPP is shared between multiple devices. 138 + * @current_rate: Currently configured frequency. 138 139 * @current_opp: Currently configured OPP for the table. 139 140 * @suspend_opp: Pointer to OPP to be used during device suspend. 140 141 * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers. ··· 185 184 186 185 unsigned int parsed_static_opps; 187 186 enum opp_table_access shared_opp; 187 + unsigned long current_rate; 188 188 struct dev_pm_opp *current_opp; 189 189 struct dev_pm_opp *suspend_opp; 190 190

+6 -8

kernel/sched/cpufreq_schedutil.c

··· 26 26 struct sugov_tunables *tunables; 27 27 struct list_head tunables_hook; 28 28 29 - raw_spinlock_t update_lock; /* For shared policies */ 29 + raw_spinlock_t update_lock; 30 30 u64 last_freq_update_time; 31 31 s64 freq_update_delay_ns; 32 32 unsigned int next_freq; ··· 320 320 * Make sugov_should_update_freq() ignore the rate limit when DL 321 321 * has increased the utilization. 322 322 */ 323 - static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) 323 + static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) 324 324 { 325 325 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) 326 - sg_policy->limits_changed = true; 326 + sg_cpu->sg_policy->limits_changed = true; 327 327 } 328 328 329 329 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, 330 330 u64 time, unsigned int flags) 331 331 { 332 - struct sugov_policy *sg_policy = sg_cpu->sg_policy; 333 - 334 332 sugov_iowait_boost(sg_cpu, time, flags); 335 333 sg_cpu->last_update = time; 336 334 337 - ignore_dl_rate_limit(sg_cpu, sg_policy); 335 + ignore_dl_rate_limit(sg_cpu); 338 336 339 - if (!sugov_should_update_freq(sg_policy, time)) 337 + if (!sugov_should_update_freq(sg_cpu->sg_policy, time)) 340 338 return false; 341 339 342 340 sugov_get_util(sg_cpu); ··· 449 451 sugov_iowait_boost(sg_cpu, time, flags); 450 452 sg_cpu->last_update = time; 451 453 452 - ignore_dl_rate_limit(sg_cpu, sg_policy); 454 + ignore_dl_rate_limit(sg_cpu); 453 455 454 456 if (sugov_should_update_freq(sg_policy, time)) { 455 457 next_f = sugov_next_freq_shared(sg_cpu, time);