Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'amd-pstate'

Merge AMD P-state driver changes from Perry Yuan for v6.10:

"- Enable CPPC v2 for certain processors in the family 17H, as requested
by TR40 processor users who expect improved performance and lower
system temperature.

- Change latency and delay values to be read from platform firmware
firstly for more accurate timing.

- A new quirk is introduced for supporting amd-pstate on legacy
processors which either lack CPPC capability, or only only have CPPC
v2 capability."

* amd-pstate:
MAINTAINERS: cpufreq: amd-pstate: Add co-maintainers and reviewer
cpufreq: amd-pstate: remove unused variable lowest_nonlinear_freq
cpufreq: amd-pstate: fix code format problems
cpufreq: amd-pstate: Add quirk for the pstate CPPC capabilities missing
cppc_acpi: print error message if CPPC is unsupported
cpufreq: amd-pstate: get transition delay and latency value from ACPI tables
cpufreq: amd-pstate: Bail out if min/max/nominal_freq is 0
cpufreq: amd-pstate: Remove amd_get_{min,max,nominal,lowest_nonlinear}_freq()
cpufreq: amd-pstate: Unify computation of {max,min,nominal,lowest_nonlinear}_freq
cpufreq: amd-pstate: Document the units for freq variables in amd_cpudata
cpufreq: amd-pstate: Document *_limit_* fields in struct amd_cpudata

+176 -109
+3
MAINTAINERS
··· 1062 1062 1063 1063 AMD PSTATE DRIVER 1064 1064 M: Huang Rui <ray.huang@amd.com> 1065 + M: Gautham R. Shenoy <gautham.shenoy@amd.com> 1066 + M: Mario Limonciello <mario.limonciello@amd.com> 1067 + R: Perry Yuan <perry.yuan@amd.com> 1065 1068 L: linux-pm@vger.kernel.org 1066 1069 S: Supported 1067 1070 F: Documentation/admin-guide/pm/amd-pstate.rst
+3 -1
drivers/acpi/cppc_acpi.c
··· 686 686 687 687 if (!osc_sb_cppc2_support_acked) { 688 688 pr_debug("CPPC v2 _OSC not acked\n"); 689 - if (!cpc_supported_by_cpu()) 689 + if (!cpc_supported_by_cpu()) { 690 + pr_debug("CPPC is not supported by the CPU\n"); 690 691 return -ENODEV; 692 + } 691 693 } 692 694 693 695 /* Parse the ACPI _CPC table for this CPU. */
+155 -103
drivers/cpufreq/amd-pstate.c
··· 67 67 static int cppc_state = AMD_PSTATE_UNDEFINED; 68 68 static bool cppc_enabled; 69 69 static bool amd_pstate_prefcore = true; 70 + static struct quirk_entry *quirks; 70 71 71 72 /* 72 73 * AMD Energy Preference Performance (EPP) ··· 111 110 }; 112 111 113 112 typedef int (*cppc_mode_transition_fn)(int); 113 + 114 + static struct quirk_entry quirk_amd_7k62 = { 115 + .nominal_freq = 2600, 116 + .lowest_freq = 550, 117 + }; 118 + 119 + static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) 120 + { 121 + /** 122 + * match the broken bios for family 17h processor support CPPC V2 123 + * broken BIOS lack of nominal_freq and lowest_freq capabilities 124 + * definition in ACPI tables 125 + */ 126 + if (boot_cpu_has(X86_FEATURE_ZEN2)) { 127 + quirks = dmi->driver_data; 128 + pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); 129 + return 1; 130 + } 131 + 132 + return 0; 133 + } 134 + 135 + static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { 136 + { 137 + .callback = dmi_matched_7k62_bios_bug, 138 + .ident = "AMD EPYC 7K62", 139 + .matches = { 140 + DMI_MATCH(DMI_BIOS_VERSION, "5.14"), 141 + DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), 142 + }, 143 + .driver_data = &quirk_amd_7k62, 144 + }, 145 + {} 146 + }; 147 + MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); 114 148 115 149 static inline int get_mode_idx_from_str(const char *str, size_t size) 116 150 { ··· 640 604 cpufreq_cpu_put(policy); 641 605 } 642 606 643 - static int amd_get_min_freq(struct amd_cpudata *cpudata) 644 - { 645 - struct cppc_perf_caps cppc_perf; 646 - 647 - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 648 - if (ret) 649 - return ret; 650 - 651 - /* Switch to khz */ 652 - return cppc_perf.lowest_freq * 1000; 653 - } 654 - 655 - static int amd_get_max_freq(struct amd_cpudata *cpudata) 656 - { 657 - struct cppc_perf_caps cppc_perf; 658 - u32 max_perf, max_freq, nominal_freq, nominal_perf; 659 - u64 boost_ratio; 660 - 661 - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 662 - if (ret) 663 - return ret; 664 - 665 - nominal_freq = cppc_perf.nominal_freq; 666 - nominal_perf = READ_ONCE(cpudata->nominal_perf); 667 - max_perf = READ_ONCE(cpudata->highest_perf); 668 - 669 - boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, 670 - nominal_perf); 671 - 672 - max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; 673 - 674 - /* Switch to khz */ 675 - return max_freq * 1000; 676 - } 677 - 678 - static int amd_get_nominal_freq(struct amd_cpudata *cpudata) 679 - { 680 - struct cppc_perf_caps cppc_perf; 681 - 682 - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 683 - if (ret) 684 - return ret; 685 - 686 - /* Switch to khz */ 687 - return cppc_perf.nominal_freq * 1000; 688 - } 689 - 690 - static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) 691 - { 692 - struct cppc_perf_caps cppc_perf; 693 - u32 lowest_nonlinear_freq, lowest_nonlinear_perf, 694 - nominal_freq, nominal_perf; 695 - u64 lowest_nonlinear_ratio; 696 - 697 - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 698 - if (ret) 699 - return ret; 700 - 701 - nominal_freq = cppc_perf.nominal_freq; 702 - nominal_perf = READ_ONCE(cpudata->nominal_perf); 703 - 704 - lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; 705 - 706 - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 707 - nominal_perf); 708 - 709 - lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; 710 - 711 - /* Switch to khz */ 712 - return lowest_nonlinear_freq * 1000; 713 - } 714 - 715 607 static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) 716 608 { 717 609 struct amd_cpudata *cpudata = policy->driver_data; ··· 792 828 mutex_unlock(&amd_pstate_driver_lock); 793 829 } 794 830 831 + /* 832 + * Get pstate transition delay time from ACPI tables that firmware set 833 + * instead of using hardcode value directly. 834 + */ 835 + static u32 amd_pstate_get_transition_delay_us(unsigned int cpu) 836 + { 837 + u32 transition_delay_ns; 838 + 839 + transition_delay_ns = cppc_get_transition_latency(cpu); 840 + if (transition_delay_ns == CPUFREQ_ETERNAL) 841 + return AMD_PSTATE_TRANSITION_DELAY; 842 + 843 + return transition_delay_ns / NSEC_PER_USEC; 844 + } 845 + 846 + /* 847 + * Get pstate transition latency value from ACPI tables that firmware 848 + * set instead of using hardcode value directly. 849 + */ 850 + static u32 amd_pstate_get_transition_latency(unsigned int cpu) 851 + { 852 + u32 transition_latency; 853 + 854 + transition_latency = cppc_get_transition_latency(cpu); 855 + if (transition_latency == CPUFREQ_ETERNAL) 856 + return AMD_PSTATE_TRANSITION_LATENCY; 857 + 858 + return transition_latency; 859 + } 860 + 861 + /* 862 + * amd_pstate_init_freq: Initialize the max_freq, min_freq, 863 + * nominal_freq and lowest_nonlinear_freq for 864 + * the @cpudata object. 865 + * 866 + * Requires: highest_perf, lowest_perf, nominal_perf and 867 + * lowest_nonlinear_perf members of @cpudata to be 868 + * initialized. 869 + * 870 + * Returns 0 on success, non-zero value on failure. 871 + */ 872 + static int amd_pstate_init_freq(struct amd_cpudata *cpudata) 873 + { 874 + int ret; 875 + u32 min_freq; 876 + u32 highest_perf, max_freq; 877 + u32 nominal_perf, nominal_freq; 878 + u32 lowest_nonlinear_perf, lowest_nonlinear_freq; 879 + u32 boost_ratio, lowest_nonlinear_ratio; 880 + struct cppc_perf_caps cppc_perf; 881 + 882 + ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); 883 + if (ret) 884 + return ret; 885 + 886 + if (quirks && quirks->lowest_freq) 887 + min_freq = quirks->lowest_freq * 1000; 888 + else 889 + min_freq = cppc_perf.lowest_freq * 1000; 890 + 891 + if (quirks && quirks->nominal_freq) 892 + nominal_freq = quirks->nominal_freq ; 893 + else 894 + nominal_freq = cppc_perf.nominal_freq; 895 + 896 + nominal_perf = READ_ONCE(cpudata->nominal_perf); 897 + 898 + highest_perf = READ_ONCE(cpudata->highest_perf); 899 + boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); 900 + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; 901 + 902 + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); 903 + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, 904 + nominal_perf); 905 + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; 906 + 907 + WRITE_ONCE(cpudata->min_freq, min_freq); 908 + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); 909 + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); 910 + WRITE_ONCE(cpudata->max_freq, max_freq); 911 + 912 + return 0; 913 + } 914 + 795 915 static int amd_pstate_cpu_init(struct cpufreq_policy *policy) 796 916 { 797 - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 917 + int min_freq, max_freq, nominal_freq, ret; 798 918 struct device *dev; 799 919 struct amd_cpudata *cpudata; 800 920 ··· 903 855 if (ret) 904 856 goto free_cpudata1; 905 857 906 - min_freq = amd_get_min_freq(cpudata); 907 - max_freq = amd_get_max_freq(cpudata); 908 - nominal_freq = amd_get_nominal_freq(cpudata); 909 - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 858 + ret = amd_pstate_init_freq(cpudata); 859 + if (ret) 860 + goto free_cpudata1; 910 861 911 - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 912 - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 913 - min_freq, max_freq); 862 + min_freq = READ_ONCE(cpudata->min_freq); 863 + max_freq = READ_ONCE(cpudata->max_freq); 864 + nominal_freq = READ_ONCE(cpudata->nominal_freq); 865 + 866 + if (min_freq <= 0 || max_freq <= 0 || 867 + nominal_freq <= 0 || min_freq > max_freq) { 868 + dev_err(dev, 869 + "min_freq(%d) or max_freq(%d) or nominal_freq (%d) value is incorrect, check _CPC in ACPI tables\n", 870 + min_freq, max_freq, nominal_freq); 914 871 ret = -EINVAL; 915 872 goto free_cpudata1; 916 873 } 917 874 918 - policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY; 919 - policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY; 875 + policy->cpuinfo.transition_latency = amd_pstate_get_transition_latency(policy->cpu); 876 + policy->transition_delay_us = amd_pstate_get_transition_delay_us(policy->cpu); 920 877 921 878 policy->min = min_freq; 922 879 policy->max = max_freq; ··· 949 896 goto free_cpudata2; 950 897 } 951 898 952 - /* Initial processor data capability frequencies */ 953 - cpudata->max_freq = max_freq; 954 - cpudata->min_freq = min_freq; 955 899 cpudata->max_limit_freq = max_freq; 956 900 cpudata->min_limit_freq = min_freq; 957 - cpudata->nominal_freq = nominal_freq; 958 - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 959 901 960 902 policy->driver_data = cpudata; 961 903 ··· 1014 966 int max_freq; 1015 967 struct amd_cpudata *cpudata = policy->driver_data; 1016 968 1017 - max_freq = amd_get_max_freq(cpudata); 969 + max_freq = READ_ONCE(cpudata->max_freq); 1018 970 if (max_freq < 0) 1019 971 return max_freq; 1020 972 ··· 1027 979 int freq; 1028 980 struct amd_cpudata *cpudata = policy->driver_data; 1029 981 1030 - freq = amd_get_lowest_nonlinear_freq(cpudata); 982 + freq = READ_ONCE(cpudata->lowest_nonlinear_freq); 1031 983 if (freq < 0) 1032 984 return freq; 1033 985 ··· 1338 1290 1339 1291 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1340 1292 { 1341 - int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; 1293 + int min_freq, max_freq, nominal_freq, ret; 1342 1294 struct amd_cpudata *cpudata; 1343 1295 struct device *dev; 1344 1296 u64 value; ··· 1365 1317 if (ret) 1366 1318 goto free_cpudata1; 1367 1319 1368 - min_freq = amd_get_min_freq(cpudata); 1369 - max_freq = amd_get_max_freq(cpudata); 1370 - nominal_freq = amd_get_nominal_freq(cpudata); 1371 - lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); 1372 - if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { 1373 - dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", 1374 - min_freq, max_freq); 1320 + ret = amd_pstate_init_freq(cpudata); 1321 + if (ret) 1322 + goto free_cpudata1; 1323 + 1324 + min_freq = READ_ONCE(cpudata->min_freq); 1325 + max_freq = READ_ONCE(cpudata->max_freq); 1326 + nominal_freq = READ_ONCE(cpudata->nominal_freq); 1327 + if (min_freq <= 0 || max_freq <= 0 || 1328 + nominal_freq <= 0 || min_freq > max_freq) { 1329 + dev_err(dev, 1330 + "min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect, check _CPC in ACPI tables\n", 1331 + min_freq, max_freq, nominal_freq); 1375 1332 ret = -EINVAL; 1376 1333 goto free_cpudata1; 1377 1334 } ··· 1385 1332 policy->cpuinfo.max_freq = max_freq; 1386 1333 /* It will be updated by governor */ 1387 1334 policy->cur = policy->cpuinfo.min_freq; 1388 - 1389 - /* Initial processor data capability frequencies */ 1390 - cpudata->max_freq = max_freq; 1391 - cpudata->min_freq = min_freq; 1392 - cpudata->nominal_freq = nominal_freq; 1393 - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; 1394 1335 1395 1336 policy->driver_data = cpudata; 1396 1337 ··· 1702 1655 /* don't keep reloading if cpufreq_driver exists */ 1703 1656 if (cpufreq_get_current_driver()) 1704 1657 return -EEXIST; 1658 + 1659 + quirks = NULL; 1660 + 1661 + /* check if this machine need CPPC quirks */ 1662 + dmi_check_system(amd_pstate_quirks_table); 1705 1663 1706 1664 switch (cppc_state) { 1707 1665 case AMD_PSTATE_UNDEFINED:
+15 -5
include/linux/amd-pstate.h
··· 49 49 * @lowest_perf: the absolute lowest performance level of the processor 50 50 * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher 51 51 * priority. 52 - * @max_freq: the frequency that mapped to highest_perf 53 - * @min_freq: the frequency that mapped to lowest_perf 54 - * @nominal_freq: the frequency that mapped to nominal_perf 55 - * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf 52 + * @min_limit_perf: Cached value of the performance corresponding to policy->min 53 + * @max_limit_perf: Cached value of the performance corresponding to policy->max 54 + * @min_limit_freq: Cached value of policy->min (in khz) 55 + * @max_limit_freq: Cached value of policy->max (in khz) 56 + * @max_freq: the frequency (in khz) that mapped to highest_perf 57 + * @min_freq: the frequency (in khz) that mapped to lowest_perf 58 + * @nominal_freq: the frequency (in khz) that mapped to nominal_perf 59 + * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf 56 60 * @cur: Difference of Aperf/Mperf/tsc count between last and current sample 57 61 * @prev: Last Aperf/Mperf/tsc count value read from register 58 - * @freq: current cpu frequency value 62 + * @freq: current cpu frequency value (in khz) 59 63 * @boost_supported: check whether the Processor or SBIOS supports boost mode 60 64 * @hw_prefcore: check whether HW supports preferred core featue. 61 65 * Only when hw_prefcore and early prefcore param are true, ··· 128 124 [AMD_PSTATE_GUIDED] = "guided", 129 125 NULL, 130 126 }; 127 + 128 + struct quirk_entry { 129 + u32 nominal_freq; 130 + u32 lowest_freq; 131 + }; 132 + 131 133 #endif /* _LINUX_AMD_PSTATE_H */