Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branches 'pm-cpufreq', 'pm-sleep' and 'pm-em'

* pm-cpufreq:
cpufreq: intel_pstate: hybrid: Rework HWP calibration
ACPI: CPPC: Introduce cppc_get_nominal_perf()

* pm-sleep:
PM: sleep: core: Avoid setting power.must_resume to false
PM: sleep: wakeirq: drop useless parameter from dev_pm_attach_wake_irq()

* pm-em:
Documentation: power: include kernel-doc in Energy Model doc
PM: EM: fix kernel-doc comments

+128 -145
+12 -3
Documentation/power/energy-model.rst
··· 101 101 the same scale. If there are different scales, these subsystems might decide 102 102 to: return warning/error, stop working or panic. 103 103 See Section 3. for an example of driver implementing this 104 - callback, and kernel/power/energy_model.c for further documentation on this 105 - API. 104 + callback, or Section 2.4 for further documentation on this API 106 105 107 106 108 107 2.3 Accessing performance domains ··· 122 123 CPUfreq governor is in use in case of CPU device. Currently this calculation is 123 124 not provided for other type of devices. 124 125 125 - More details about the above APIs can be found in include/linux/energy_model.h. 126 + More details about the above APIs can be found in ``<linux/energy_model.h>`` 127 + or in Section 2.4 128 + 129 + 130 + 2.4 Description details of this API 131 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 132 + .. kernel-doc:: include/linux/energy_model.h 133 + :internal: 134 + 135 + .. kernel-doc:: kernel/power/energy_model.c 136 + :export: 126 137 127 138 128 139 3. Example driver
+31 -16
drivers/acpi/cppc_acpi.c
··· 1008 1008 return ret_val; 1009 1009 } 1010 1010 1011 - /** 1012 - * cppc_get_desired_perf - Get the value of desired performance register. 1013 - * @cpunum: CPU from which to get desired performance. 1014 - * @desired_perf: address of a variable to store the returned desired performance 1015 - * 1016 - * Return: 0 for success, -EIO otherwise. 1017 - */ 1018 - int cppc_get_desired_perf(int cpunum, u64 *desired_perf) 1011 + static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) 1019 1012 { 1020 1013 struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); 1021 - int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); 1022 - struct cpc_register_resource *desired_reg; 1023 - struct cppc_pcc_data *pcc_ss_data = NULL; 1014 + struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx]; 1024 1015 1025 - desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; 1026 - 1027 - if (CPC_IN_PCC(desired_reg)) { 1016 + if (CPC_IN_PCC(reg)) { 1017 + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); 1018 + struct cppc_pcc_data *pcc_ss_data = NULL; 1028 1019 int ret = 0; 1029 1020 1030 1021 if (pcc_ss_id < 0) ··· 1026 1035 down_write(&pcc_ss_data->pcc_lock); 1027 1036 1028 1037 if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) 1029 - cpc_read(cpunum, desired_reg, desired_perf); 1038 + cpc_read(cpunum, reg, perf); 1030 1039 else 1031 1040 ret = -EIO; 1032 1041 ··· 1035 1044 return ret; 1036 1045 } 1037 1046 1038 - cpc_read(cpunum, desired_reg, desired_perf); 1047 + cpc_read(cpunum, reg, perf); 1039 1048 1040 1049 return 0; 1041 1050 } 1051 + 1052 + /** 1053 + * cppc_get_desired_perf - Get the desired performance register value. 1054 + * @cpunum: CPU from which to get desired performance. 1055 + * @desired_perf: Return address. 1056 + * 1057 + * Return: 0 for success, -EIO otherwise. 1058 + */ 1059 + int cppc_get_desired_perf(int cpunum, u64 *desired_perf) 1060 + { 1061 + return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf); 1062 + } 1042 1063 EXPORT_SYMBOL_GPL(cppc_get_desired_perf); 1064 + 1065 + /** 1066 + * cppc_get_nominal_perf - Get the nominal performance register value. 1067 + * @cpunum: CPU from which to get nominal performance. 1068 + * @nominal_perf: Return address. 1069 + * 1070 + * Return: 0 for success, -EIO otherwise. 1071 + */ 1072 + int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) 1073 + { 1074 + return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); 1075 + } 1043 1076 1044 1077 /** 1045 1078 * cppc_get_perf_caps - Get a CPU's performance capabilities.
+1 -1
drivers/base/power/main.c
··· 1642 1642 } 1643 1643 1644 1644 dev->power.may_skip_resume = true; 1645 - dev->power.must_resume = false; 1645 + dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); 1646 1646 1647 1647 dpm_watchdog_set(&wd, dev); 1648 1648 device_lock(dev);
+4 -7
drivers/base/power/wakeirq.c
··· 12 12 /** 13 13 * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ 14 14 * @dev: Device entry 15 - * @irq: Device wake-up capable interrupt 16 15 * @wirq: Wake irq specific data 17 16 * 18 - * Internal function to attach either a device IO interrupt or a 19 - * dedicated wake-up interrupt as a wake IRQ. 17 + * Internal function to attach a dedicated wake-up interrupt as a wake IRQ. 20 18 */ 21 - static int dev_pm_attach_wake_irq(struct device *dev, int irq, 22 - struct wake_irq *wirq) 19 + static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq) 23 20 { 24 21 unsigned long flags; 25 22 ··· 62 65 wirq->dev = dev; 63 66 wirq->irq = irq; 64 67 65 - err = dev_pm_attach_wake_irq(dev, irq, wirq); 68 + err = dev_pm_attach_wake_irq(dev, wirq); 66 69 if (err) 67 70 kfree(wirq); 68 71 ··· 193 196 if (err) 194 197 goto err_free_name; 195 198 196 - err = dev_pm_attach_wake_irq(dev, irq, wirq); 199 + err = dev_pm_attach_wake_irq(dev, wirq); 197 200 if (err) 198 201 goto err_free_irq; 199 202
+71 -114
drivers/cpufreq/intel_pstate.c
··· 268 268 * @get_min: Callback to get minimum P state 269 269 * @get_turbo: Callback to get turbo P state 270 270 * @get_scaling: Callback to get frequency scaling factor 271 + * @get_cpu_scaling: Get frequency scaling factor for a given cpu 271 272 * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference 272 273 * @get_val: Callback to convert P state to actual MSR write value 273 274 * @get_vid: Callback to get VID data for Atom platforms ··· 282 281 int (*get_min)(void); 283 282 int (*get_turbo)(void); 284 283 int (*get_scaling)(void); 284 + int (*get_cpu_scaling)(int cpu); 285 285 int (*get_aperf_mperf_shift)(void); 286 286 u64 (*get_val)(struct cpudata*, int pstate); 287 287 void (*get_vid)(struct cpudata *); ··· 386 384 return cppc_perf.nominal_perf; 387 385 } 388 386 387 + static u32 intel_pstate_cppc_nominal(int cpu) 388 + { 389 + u64 nominal_perf; 390 + 391 + if (cppc_get_nominal_perf(cpu, &nominal_perf)) 392 + return 0; 393 + 394 + return nominal_perf; 395 + } 389 396 #else /* CONFIG_ACPI_CPPC_LIB */ 390 397 static inline void intel_pstate_set_itmt_prio(int cpu) 391 398 { ··· 481 470 482 471 acpi_processor_unregister_performance(policy->cpu); 483 472 } 484 - 485 - static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps) 486 - { 487 - return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf; 488 - } 489 - 490 - static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu, 491 - struct cppc_perf_caps *caps) 492 - { 493 - if (cppc_get_perf_caps(cpu->cpu, caps)) 494 - return false; 495 - 496 - return caps->highest_perf && caps->lowest_perf <= caps->highest_perf; 497 - } 498 473 #else /* CONFIG_ACPI */ 499 474 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 500 475 { ··· 503 506 } 504 507 #endif /* CONFIG_ACPI_CPPC_LIB */ 505 508 506 - static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) 507 - { 508 - pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu); 509 - 510 - cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling; 511 - } 512 - 513 509 /** 514 - * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels. 510 + * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. 515 511 * @cpu: Target CPU. 516 512 * 517 513 * On hybrid processors, HWP may expose more performance levels than there are ··· 512 522 * scaling factor between HWP performance levels and CPU frequency will be less 513 523 * than the scaling factor between P-state values and CPU frequency. 514 524 * 515 - * In that case, the scaling factor between HWP performance levels and CPU 516 - * frequency needs to be determined which can be done with the help of the 517 - * observation that certain HWP performance levels should correspond to certain 518 - * P-states, like for example the HWP highest performance should correspond 519 - * to the maximum turbo P-state of the CPU. 525 + * In that case, adjust the CPU parameters used in computations accordingly. 520 526 */ 521 - static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu) 527 + static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) 522 528 { 523 529 int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; 524 530 int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; 525 531 int perf_ctl_turbo = pstate_funcs.get_turbo(); 526 532 int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; 527 - int perf_ctl_max = pstate_funcs.get_max(); 528 - int max_freq = perf_ctl_max * perf_ctl_scaling; 529 - int scaling = INT_MAX; 530 - int freq; 533 + int scaling = cpu->pstate.scaling; 531 534 532 535 pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); 533 - pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max); 536 + pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); 534 537 pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); 535 538 pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); 536 - 537 539 pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); 538 540 pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); 539 - 540 - #ifdef CONFIG_ACPI 541 - if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) { 542 - struct cppc_perf_caps caps; 543 - 544 - if (intel_pstate_cppc_perf_caps(cpu, &caps)) { 545 - if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) { 546 - pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu); 547 - 548 - /* 549 - * If the CPPC nominal performance is valid, it 550 - * can be assumed to correspond to cpu_khz. 551 - */ 552 - if (caps.nominal_perf == perf_ctl_max_phys) { 553 - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); 554 - return; 555 - } 556 - scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf); 557 - } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) { 558 - pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu); 559 - 560 - /* 561 - * If the CPPC guaranteed performance is valid, 562 - * it can be assumed to correspond to max_freq. 563 - */ 564 - if (caps.guaranteed_perf == perf_ctl_max) { 565 - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); 566 - return; 567 - } 568 - scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf); 569 - } 570 - } 571 - } 572 - #endif 573 - /* 574 - * If using the CPPC data to compute the HWP-to-frequency scaling factor 575 - * doesn't work, use the HWP_CAP gauranteed perf for this purpose with 576 - * the assumption that it corresponds to max_freq. 577 - */ 578 - if (scaling > perf_ctl_scaling) { 579 - pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu); 580 - 581 - if (cpu->pstate.max_pstate == perf_ctl_max) { 582 - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); 583 - return; 584 - } 585 - scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate); 586 - if (scaling > perf_ctl_scaling) { 587 - /* 588 - * This should not happen, because it would mean that 589 - * the number of HWP perf levels was less than the 590 - * number of P-states, so use the PERF_CTL scaling in 591 - * that case. 592 - */ 593 - pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu, 594 - scaling); 595 - 596 - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); 597 - return; 598 - } 599 - } 541 + pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); 600 542 601 543 /* 602 - * If the product of the HWP performance scaling factor obtained above 603 - * and the HWP_CAP highest performance is greater than the maximum turbo 604 - * frequency corresponding to the pstate_funcs.get_turbo() return value, 605 - * the scaling factor is too high, so recompute it so that the HWP_CAP 606 - * highest performance corresponds to the maximum turbo frequency. 544 + * If the product of the HWP performance scaling factor and the HWP_CAP 545 + * highest performance is greater than the maximum turbo frequency 546 + * corresponding to the pstate_funcs.get_turbo() return value, the 547 + * scaling factor is too high, so recompute it to make the HWP_CAP 548 + * highest performance correspond to the maximum turbo frequency. 607 549 */ 608 550 if (turbo_freq < cpu->pstate.turbo_pstate * scaling) { 609 - pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling); 610 - 611 551 cpu->pstate.turbo_freq = turbo_freq; 612 552 scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); 553 + cpu->pstate.scaling = scaling; 554 + 555 + pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", 556 + cpu->cpu, scaling); 613 557 } 614 - 615 - cpu->pstate.scaling = scaling; 616 - 617 - pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); 618 558 619 559 cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, 620 560 perf_ctl_scaling); 621 561 622 - freq = perf_ctl_max_phys * perf_ctl_scaling; 623 - cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling); 562 + cpu->pstate.max_pstate_physical = 563 + DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, 564 + scaling); 624 565 625 566 cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; 626 567 /* ··· 1782 1861 return ret; 1783 1862 } 1784 1863 1864 + #ifdef CONFIG_ACPI_CPPC_LIB 1865 + static u32 hybrid_ref_perf; 1866 + 1867 + static int hybrid_get_cpu_scaling(int cpu) 1868 + { 1869 + return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, 1870 + intel_pstate_cppc_nominal(cpu)); 1871 + } 1872 + 1873 + static void intel_pstate_cppc_set_cpu_scaling(void) 1874 + { 1875 + u32 min_nominal_perf = U32_MAX; 1876 + int cpu; 1877 + 1878 + for_each_present_cpu(cpu) { 1879 + u32 nominal_perf = intel_pstate_cppc_nominal(cpu); 1880 + 1881 + if (nominal_perf && nominal_perf < min_nominal_perf) 1882 + min_nominal_perf = nominal_perf; 1883 + } 1884 + 1885 + if (min_nominal_perf < U32_MAX) { 1886 + hybrid_ref_perf = min_nominal_perf; 1887 + pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; 1888 + } 1889 + } 1890 + #else 1891 + static inline void intel_pstate_cppc_set_cpu_scaling(void) 1892 + { 1893 + } 1894 + #endif /* CONFIG_ACPI_CPPC_LIB */ 1895 + 1785 1896 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) 1786 1897 { 1787 1898 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); ··· 1842 1889 1843 1890 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) 1844 1891 { 1845 - bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU); 1846 1892 int perf_ctl_max_phys = pstate_funcs.get_max_physical(); 1847 - int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys : 1848 - pstate_funcs.get_scaling(); 1893 + int perf_ctl_scaling = pstate_funcs.get_scaling(); 1849 1894 1850 1895 cpu->pstate.min_pstate = pstate_funcs.get_min(); 1851 1896 cpu->pstate.max_pstate_physical = perf_ctl_max_phys; ··· 1852 1901 if (hwp_active && !hwp_mode_bdw) { 1853 1902 __intel_pstate_get_hwp_cap(cpu); 1854 1903 1855 - if (hybrid_cpu) 1856 - intel_pstate_hybrid_hwp_calibrate(cpu); 1857 - else 1904 + if (pstate_funcs.get_cpu_scaling) { 1905 + cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); 1906 + if (cpu->pstate.scaling != perf_ctl_scaling) 1907 + intel_pstate_hybrid_hwp_adjust(cpu); 1908 + } else { 1858 1909 cpu->pstate.scaling = perf_ctl_scaling; 1910 + } 1859 1911 } else { 1860 1912 cpu->pstate.scaling = perf_ctl_scaling; 1861 1913 cpu->pstate.max_pstate = pstate_funcs.get_max(); ··· 3229 3275 intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf; 3230 3276 if (!default_driver) 3231 3277 default_driver = &intel_pstate; 3278 + 3279 + if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) 3280 + intel_pstate_cppc_set_cpu_scaling(); 3232 3281 3233 3282 goto hwp_cpu_matched; 3234 3283 }
+5
include/acpi/cppc_acpi.h
··· 135 135 136 136 #ifdef CONFIG_ACPI_CPPC_LIB 137 137 extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); 138 + extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); 138 139 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); 139 140 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); 140 141 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); ··· 147 146 extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); 148 147 #else /* !CONFIG_ACPI_CPPC_LIB */ 149 148 static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) 149 + { 150 + return -ENOTSUPP; 151 + } 152 + static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) 150 153 { 151 154 return -ENOTSUPP; 152 155 }
+4 -4
include/linux/energy_model.h
··· 11 11 #include <linux/types.h> 12 12 13 13 /** 14 - * em_perf_state - Performance state of a performance domain 14 + * struct em_perf_state - Performance state of a performance domain 15 15 * @frequency: The frequency in KHz, for consistency with CPUFreq 16 16 * @power: The power consumed at this level (by 1 CPU or by a registered 17 17 * device). It can be a total power: static and dynamic. ··· 25 25 }; 26 26 27 27 /** 28 - * em_perf_domain - Performance domain 28 + * struct em_perf_domain - Performance domain 29 29 * @table: List of performance states, in ascending order 30 30 * @nr_perf_states: Number of performance states 31 31 * @milliwatts: Flag indicating the power values are in milli-Watts ··· 103 103 104 104 /** 105 105 * em_cpu_energy() - Estimates the energy consumed by the CPUs of a 106 - performance domain 106 + * performance domain 107 107 * @pd : performance domain for which energy has to be estimated 108 108 * @max_util : highest utilization among CPUs of the domain 109 109 * @sum_util : sum of the utilization of all CPUs in the domain 110 110 * @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which 111 - might reflect reduced frequency (due to thermal) 111 + * might reflect reduced frequency (due to thermal) 112 112 * 113 113 * This function must be used only for CPU devices. There is no validation, 114 114 * i.e. if the EM is a CPU type and has cpumask allocated. It is called from