Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management updates from Rafael Wysocki:
"These add Intel TPMI (Topology Aware Register and PM Capsule
Interface) support to the power capping subsystem, extend the
intel_idle driver to work in VM guests where MWAIT is not available,
extend the system-wide power management diagnostics, fix bugs and
clean up code.

Specifics:

- Introduce power capping core support for Intel TPMI (Topology Aware
Register and PM Capsule Interface) and a TPMI interface driver for
Intel RAPL (Zhang Rui, Dan Carpenter)

- Fix CONFIG_IOSF_MBI dependency in the Intel RAPL power capping
driver (Zhang Rui)

- Fix invalid initialization for pl4_supported field in the Intel
RAPL power capping driver (Sumeet Pawnikar)

- Clean up the intel_idle driver, make it work with VM guests that
cannot use the MWAIT instruction and address the case in which the
host may enter a deep idle state when the guest is idle (Arjan van
de Ven)

- Prevent cpufreq drivers that provide the ->adjust_perf() callback
without a ->fast_switch() one which is used as a fallback from the
former in some cases (Wyes Karny)

- Fix some issues related to the AMD P-state cpufreq driver (Mario
Limonciello, Wyes Karny)

- Fix the energy_performance_preference attribute handling in the
intel_pstate driver in passive mode (Tero Kristo)

- Fix the handling of pm_suspend_target_state when CONFIG_PM is unset
(Kai-Heng Feng)

- Correct spelling mistake in a comment in the hibernation code (Wang
Honghui)

- Add arch_resume_nosmt() prototype to avoid a "missing prototypes"
build warning (Arnd Bergmann)

- Restrict pm_pr_dbg() to system-wide power transitions and use it in
a few additional places (Mario Limonciello)

- Drop verification of in-params from genpd_add_device() and ensure
that all of its callers will do it (Ulf Hansson)

- Prevent possible integer overflows from occurring in
genpd_parse_state() (Nikita Zhandarovich)

- Reorder fieldls in 'struct devfreq_dev_status' to reduce its size
somewhat (Christophe JAILLET)

- Ensure that the Exynos PPMU driver is already loaded before the
Exynos Bus driver starts probing so as to avoid a possible freeze
loading of the kernel modules (Marek Szyprowski)

- Fix variable deferencing before NULL check in the mtk-cci devfreq
driver (Sukrut Bellary)"

* tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (42 commits)
intel_idle: Add a "Long HLT" C1 state for the VM guest mode
cpufreq: intel_pstate: Fix energy_performance_preference for passive
cpufreq: amd-pstate: Add a kernel config option to set default mode
cpufreq: amd-pstate: Set a fallback policy based on preferred_profile
ACPI: CPPC: Add definition for undefined FADT preferred PM profile value
cpufreq: amd-pstate: Set default governor to schedutil
PM: domains: Move the verification of in-params from genpd_add_device()
cpufreq: amd-pstate: Make amd-pstate EPP driver name hyphenated
cpufreq: amd-pstate: Write CPPC enable bit per-socket
intel_idle: Add support for using intel_idle in a VM guest using just hlt
cpufreq: Fail driver register if it has adjust_perf without fast_switch
intel_idle: clean up the (new) state_update_enter_method function
intel_idle: refactor state->enter manipulation into its own function
platform/x86/amd: pmc: Use pm_pr_dbg() for suspend related messages
pinctrl: amd: Use pm_pr_dbg to show debugging messages
ACPI: x86: Add pm_debug_messages for LPS0 _DSM state tracking
include/linux/suspend.h: Only show pm_pr_dbg messages at suspend/resume
powercap: RAPL: Fix a NULL vs IS_ERR() bug
powercap: RAPL: Fix CONFIG_IOSF_MBI dependency
powercap: RAPL: fix invalid initialization for pl4_supported field
...

+1326 -496
+46 -6
drivers/acpi/x86/s2idle.c
··· 59 59 60 60 static guid_t lps0_dsm_guid_microsoft; 61 61 static int lps0_dsm_func_mask_microsoft; 62 + static int lps0_dsm_state; 62 63 63 64 /* Device constraint entry structure */ 64 65 struct lpi_device_info { ··· 321 320 } 322 321 } 323 322 323 + static bool acpi_s2idle_vendor_amd(void) 324 + { 325 + return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; 326 + } 327 + 328 + static const char *acpi_sleep_dsm_state_to_str(unsigned int state) 329 + { 330 + if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) { 331 + switch (state) { 332 + case ACPI_LPS0_SCREEN_OFF: 333 + return "screen off"; 334 + case ACPI_LPS0_SCREEN_ON: 335 + return "screen on"; 336 + case ACPI_LPS0_ENTRY: 337 + return "lps0 entry"; 338 + case ACPI_LPS0_EXIT: 339 + return "lps0 exit"; 340 + case ACPI_LPS0_MS_ENTRY: 341 + return "lps0 ms entry"; 342 + case ACPI_LPS0_MS_EXIT: 343 + return "lps0 ms exit"; 344 + } 345 + } else { 346 + switch (state) { 347 + case ACPI_LPS0_SCREEN_ON_AMD: 348 + return "screen on"; 349 + case ACPI_LPS0_SCREEN_OFF_AMD: 350 + return "screen off"; 351 + case ACPI_LPS0_ENTRY_AMD: 352 + return "lps0 entry"; 353 + case ACPI_LPS0_EXIT_AMD: 354 + return "lps0 exit"; 355 + } 356 + } 357 + 358 + return "unknown"; 359 + } 360 + 324 361 static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid) 325 362 { 326 363 union acpi_object *out_obj; ··· 370 331 rev_id, func, NULL); 371 332 ACPI_FREE(out_obj); 372 333 373 - acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n", 374 - func, out_obj ? "successful" : "failed"); 334 + lps0_dsm_state = func; 335 + if (pm_debug_messages_on) { 336 + acpi_handle_info(lps0_device_handle, 337 + "%s transitioned to state %s\n", 338 + out_obj ? "Successfully" : "Failed to", 339 + acpi_sleep_dsm_state_to_str(lps0_dsm_state)); 340 + } 375 341 } 376 342 377 - static bool acpi_s2idle_vendor_amd(void) 378 - { 379 - return boot_cpu_data.x86_vendor == X86_VENDOR_AMD; 380 - } 381 343 382 344 static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid) 383 345 {
+9 -6
drivers/base/power/domain.c
··· 1632 1632 1633 1633 dev_dbg(dev, "%s()\n", __func__); 1634 1634 1635 - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) 1636 - return -EINVAL; 1637 - 1638 1635 gpd_data = genpd_alloc_dev_data(dev, gd); 1639 1636 if (IS_ERR(gpd_data)) 1640 1637 return PTR_ERR(gpd_data); ··· 1672 1675 int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) 1673 1676 { 1674 1677 int ret; 1678 + 1679 + if (!genpd || !dev) 1680 + return -EINVAL; 1675 1681 1676 1682 mutex_lock(&gpd_list_lock); 1677 1683 ret = genpd_add_device(genpd, dev, dev); ··· 2523 2523 struct generic_pm_domain *genpd; 2524 2524 int ret; 2525 2525 2526 + if (!dev) 2527 + return -EINVAL; 2528 + 2526 2529 mutex_lock(&gpd_list_lock); 2527 2530 2528 2531 genpd = genpd_get_from_provider(genpdspec); ··· 2942 2939 2943 2940 err = of_property_read_u32(state_node, "min-residency-us", &residency); 2944 2941 if (!err) 2945 - genpd_state->residency_ns = 1000 * residency; 2942 + genpd_state->residency_ns = 1000LL * residency; 2946 2943 2947 - genpd_state->power_on_latency_ns = 1000 * exit_latency; 2948 - genpd_state->power_off_latency_ns = 1000 * entry_latency; 2944 + genpd_state->power_on_latency_ns = 1000LL * exit_latency; 2945 + genpd_state->power_off_latency_ns = 1000LL * entry_latency; 2949 2946 genpd_state->fwnode = &state_node->fwnode; 2950 2947 2951 2948 return 0;
-5
drivers/base/power/wakeup.c
··· 19 19 20 20 #include "power.h" 21 21 22 - #ifndef CONFIG_SUSPEND 23 - suspend_state_t pm_suspend_target_state; 24 - #define pm_suspend_target_state (PM_SUSPEND_ON) 25 - #endif 26 - 27 22 #define list_for_each_entry_rcu_locked(pos, head, member) \ 28 23 list_for_each_entry_rcu(pos, head, member, \ 29 24 srcu_read_lock_held(&wakeup_srcu))
+1 -1
drivers/cpufreq/Kconfig
··· 38 38 prompt "Default CPUFreq governor" 39 39 default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ 40 40 default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM 41 - default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP 41 + default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP 42 42 default CPU_FREQ_DEFAULT_GOV_PERFORMANCE 43 43 help 44 44 This option sets which CPUFreq governor shall be loaded at
+17
drivers/cpufreq/Kconfig.x86
··· 51 51 52 52 If in doubt, say N. 53 53 54 + config X86_AMD_PSTATE_DEFAULT_MODE 55 + int "AMD Processor P-State default mode" 56 + depends on X86_AMD_PSTATE 57 + default 3 if X86_AMD_PSTATE 58 + range 1 4 59 + help 60 + Select the default mode the amd-pstate driver will use on 61 + supported hardware. 62 + The value set has the following meanings: 63 + 1 -> Disabled 64 + 2 -> Passive 65 + 3 -> Active (EPP) 66 + 4 -> Guided 67 + 68 + For details, take a look at: 69 + <file:Documentation/admin-guide/pm/amd-pstate.rst>. 70 + 54 71 config X86_AMD_PSTATE_UT 55 72 tristate "selftest for AMD Processor P-State driver" 56 73 depends on X86 && ACPI_PROCESSOR
+102 -29
drivers/cpufreq/amd-pstate.c
··· 62 62 static struct cpufreq_driver *current_pstate_driver; 63 63 static struct cpufreq_driver amd_pstate_driver; 64 64 static struct cpufreq_driver amd_pstate_epp_driver; 65 - static int cppc_state = AMD_PSTATE_DISABLE; 65 + static int cppc_state = AMD_PSTATE_UNDEFINED; 66 + static bool cppc_enabled; 66 67 67 68 /* 68 69 * AMD Energy Preference Performance (EPP) ··· 229 228 230 229 static inline int pstate_enable(bool enable) 231 230 { 232 - return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); 231 + int ret, cpu; 232 + unsigned long logical_proc_id_mask = 0; 233 + 234 + if (enable == cppc_enabled) 235 + return 0; 236 + 237 + for_each_present_cpu(cpu) { 238 + unsigned long logical_id = topology_logical_die_id(cpu); 239 + 240 + if (test_bit(logical_id, &logical_proc_id_mask)) 241 + continue; 242 + 243 + set_bit(logical_id, &logical_proc_id_mask); 244 + 245 + ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE, 246 + enable); 247 + if (ret) 248 + return ret; 249 + } 250 + 251 + cppc_enabled = enable; 252 + return 0; 233 253 } 234 254 235 255 static int cppc_enable(bool enable) 236 256 { 237 257 int cpu, ret = 0; 238 258 struct cppc_perf_ctrls perf_ctrls; 259 + 260 + if (enable == cppc_enabled) 261 + return 0; 239 262 240 263 for_each_present_cpu(cpu) { 241 264 ret = cppc_set_enable(cpu, enable); ··· 276 251 } 277 252 } 278 253 254 + cppc_enabled = enable; 279 255 return ret; 280 256 } 281 257 ··· 1071 1045 .attrs = pstate_global_attributes, 1072 1046 }; 1073 1047 1048 + static bool amd_pstate_acpi_pm_profile_server(void) 1049 + { 1050 + switch (acpi_gbl_FADT.preferred_profile) { 1051 + case PM_ENTERPRISE_SERVER: 1052 + case PM_SOHO_SERVER: 1053 + case PM_PERFORMANCE_SERVER: 1054 + return true; 1055 + } 1056 + return false; 1057 + } 1058 + 1059 + static bool amd_pstate_acpi_pm_profile_undefined(void) 1060 + { 1061 + if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED) 1062 + return true; 1063 + if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES) 1064 + return true; 1065 + return false; 1066 + } 1067 + 1074 1068 static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) 1075 1069 { 1076 1070 int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; ··· 1148 1102 policy->max = policy->cpuinfo.max_freq; 1149 1103 1150 1104 /* 1151 - * Set the policy to powersave to provide a valid fallback value in case 1105 + * Set the policy to provide a valid fallback value in case 1152 1106 * the default cpufreq governor is neither powersave nor performance. 1153 1107 */ 1154 - policy->policy = CPUFREQ_POLICY_POWERSAVE; 1108 + if (amd_pstate_acpi_pm_profile_server() || 1109 + amd_pstate_acpi_pm_profile_undefined()) 1110 + policy->policy = CPUFREQ_POLICY_PERFORMANCE; 1111 + else 1112 + policy->policy = CPUFREQ_POLICY_POWERSAVE; 1155 1113 1156 1114 if (boot_cpu_has(X86_FEATURE_CPPC)) { 1157 1115 ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); ··· 1406 1356 .online = amd_pstate_epp_cpu_online, 1407 1357 .suspend = amd_pstate_epp_suspend, 1408 1358 .resume = amd_pstate_epp_resume, 1409 - .name = "amd_pstate_epp", 1359 + .name = "amd-pstate-epp", 1410 1360 .attr = amd_pstate_epp_attr, 1411 1361 }; 1362 + 1363 + static int __init amd_pstate_set_driver(int mode_idx) 1364 + { 1365 + if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1366 + cppc_state = mode_idx; 1367 + if (cppc_state == AMD_PSTATE_DISABLE) 1368 + pr_info("driver is explicitly disabled\n"); 1369 + 1370 + if (cppc_state == AMD_PSTATE_ACTIVE) 1371 + current_pstate_driver = &amd_pstate_epp_driver; 1372 + 1373 + if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1374 + current_pstate_driver = &amd_pstate_driver; 1375 + 1376 + return 0; 1377 + } 1378 + 1379 + return -EINVAL; 1380 + } 1412 1381 1413 1382 static int __init amd_pstate_init(void) 1414 1383 { ··· 1436 1367 1437 1368 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 1438 1369 return -ENODEV; 1439 - /* 1440 - * by default the pstate driver is disabled to load 1441 - * enable the amd_pstate passive mode driver explicitly 1442 - * with amd_pstate=passive or other modes in kernel command line 1443 - */ 1444 - if (cppc_state == AMD_PSTATE_DISABLE) { 1445 - pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1446 - return -ENODEV; 1447 - } 1448 1370 1449 1371 if (!acpi_cpc_valid()) { 1450 1372 pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); ··· 1445 1385 /* don't keep reloading if cpufreq_driver exists */ 1446 1386 if (cpufreq_get_current_driver()) 1447 1387 return -EEXIST; 1388 + 1389 + switch (cppc_state) { 1390 + case AMD_PSTATE_UNDEFINED: 1391 + /* Disable on the following configs by default: 1392 + * 1. Undefined platforms 1393 + * 2. Server platforms 1394 + * 3. Shared memory designs 1395 + */ 1396 + if (amd_pstate_acpi_pm_profile_undefined() || 1397 + amd_pstate_acpi_pm_profile_server() || 1398 + !boot_cpu_has(X86_FEATURE_CPPC)) { 1399 + pr_info("driver load is disabled, boot with specific mode to enable this\n"); 1400 + return -ENODEV; 1401 + } 1402 + ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE); 1403 + if (ret) 1404 + return ret; 1405 + break; 1406 + case AMD_PSTATE_DISABLE: 1407 + return -ENODEV; 1408 + case AMD_PSTATE_PASSIVE: 1409 + case AMD_PSTATE_ACTIVE: 1410 + case AMD_PSTATE_GUIDED: 1411 + break; 1412 + default: 1413 + return -EINVAL; 1414 + } 1448 1415 1449 1416 /* capability check */ 1450 1417 if (boot_cpu_has(X86_FEATURE_CPPC)) { ··· 1525 1438 size = strlen(str); 1526 1439 mode_idx = get_mode_idx_from_str(str, size); 1527 1440 1528 - if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) { 1529 - cppc_state = mode_idx; 1530 - if (cppc_state == AMD_PSTATE_DISABLE) 1531 - pr_info("driver is explicitly disabled\n"); 1532 - 1533 - if (cppc_state == AMD_PSTATE_ACTIVE) 1534 - current_pstate_driver = &amd_pstate_epp_driver; 1535 - 1536 - if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED) 1537 - current_pstate_driver = &amd_pstate_driver; 1538 - 1539 - return 0; 1540 - } 1541 - 1542 - return -EINVAL; 1441 + return amd_pstate_set_driver(mode_idx); 1543 1442 } 1544 1443 early_param("amd_pstate", amd_pstate_param); 1545 1444
+2 -1
drivers/cpufreq/cpufreq.c
··· 2828 2828 (driver_data->setpolicy && (driver_data->target_index || 2829 2829 driver_data->target)) || 2830 2830 (!driver_data->get_intermediate != !driver_data->target_intermediate) || 2831 - (!driver_data->online != !driver_data->offline)) 2831 + (!driver_data->online != !driver_data->offline) || 2832 + (driver_data->adjust_perf && !driver_data->fast_switch)) 2832 2833 return -EINVAL; 2833 2834 2834 2835 pr_debug("trying to register driver %s\n", driver_data->name);
+2
drivers/cpufreq/intel_pstate.c
··· 824 824 err = cpufreq_start_governor(policy); 825 825 if (!ret) 826 826 ret = err; 827 + } else { 828 + ret = 0; 827 829 } 828 830 } 829 831
+1
drivers/devfreq/exynos-bus.c
··· 518 518 }; 519 519 module_platform_driver(exynos_bus_platdrv); 520 520 521 + MODULE_SOFTDEP("pre: exynos_ppmu"); 521 522 MODULE_DESCRIPTION("Generic Exynos Bus frequency driver"); 522 523 MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>"); 523 524 MODULE_LICENSE("GPL v2");
+2 -1
drivers/devfreq/mtk-cci-devfreq.c
··· 127 127 u32 flags) 128 128 { 129 129 struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev); 130 - struct clk *cci_pll = clk_get_parent(drv->cci_clk); 130 + struct clk *cci_pll; 131 131 struct dev_pm_opp *opp; 132 132 unsigned long opp_rate; 133 133 int voltage, pre_voltage, inter_voltage, target_voltage, ret; ··· 139 139 return 0; 140 140 141 141 inter_voltage = drv->inter_voltage; 142 + cci_pll = clk_get_parent(drv->cci_clk); 142 143 143 144 opp_rate = *freq; 144 145 opp = devfreq_recommended_opp(dev, &opp_rate, 1);
+208 -23
drivers/idle/intel_idle.c
··· 199 199 return __intel_idle(dev, drv, index); 200 200 } 201 201 202 + static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev, 203 + struct cpuidle_driver *drv, int index) 204 + { 205 + raw_safe_halt(); 206 + raw_local_irq_disable(); 207 + return index; 208 + } 209 + 210 + /** 211 + * intel_idle_hlt - Ask the processor to enter the given idle state using hlt. 212 + * @dev: cpuidle device of the target CPU. 213 + * @drv: cpuidle driver (assumed to point to intel_idle_driver). 214 + * @index: Target idle state index. 215 + * 216 + * Use the HLT instruction to notify the processor that the CPU represented by 217 + * @dev is idle and it can try to enter the idle state corresponding to @index. 218 + * 219 + * Must be called under local_irq_disable(). 220 + */ 221 + static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev, 222 + struct cpuidle_driver *drv, int index) 223 + { 224 + return __intel_idle_hlt(dev, drv, index); 225 + } 226 + 227 + static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev, 228 + struct cpuidle_driver *drv, int index) 229 + { 230 + int ret; 231 + 232 + raw_local_irq_enable(); 233 + ret = __intel_idle_hlt(dev, drv, index); 234 + raw_local_irq_disable(); 235 + 236 + return ret; 237 + } 238 + 202 239 /** 203 240 * intel_idle_s2idle - Ask the processor to enter the given idle state. 204 241 * @dev: cpuidle device of the target CPU. ··· 1279 1242 .enter = NULL } 1280 1243 }; 1281 1244 1245 + static struct cpuidle_state vmguest_cstates[] __initdata = { 1246 + { 1247 + .name = "C1", 1248 + .desc = "HLT", 1249 + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, 1250 + .exit_latency = 5, 1251 + .target_residency = 10, 1252 + .enter = &intel_idle_hlt, }, 1253 + { 1254 + .name = "C1L", 1255 + .desc = "Long HLT", 1256 + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED, 1257 + .exit_latency = 5, 1258 + .target_residency = 200, 1259 + .enter = &intel_idle_hlt, }, 1260 + { 1261 + .enter = NULL } 1262 + }; 1263 + 1282 1264 static const struct idle_cpu idle_cpu_nehalem __initconst = { 1283 1265 .state_table = nehalem_cstates, 1284 1266 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, ··· 1895 1839 return true; 1896 1840 } 1897 1841 1842 + static void state_update_enter_method(struct cpuidle_state *state, int cstate) 1843 + { 1844 + if (state->enter == intel_idle_hlt) { 1845 + if (force_irq_on) { 1846 + pr_info("forced intel_idle_irq for state %d\n", cstate); 1847 + state->enter = intel_idle_hlt_irq_on; 1848 + } 1849 + return; 1850 + } 1851 + if (state->enter == intel_idle_hlt_irq_on) 1852 + return; /* no update scenarios */ 1853 + 1854 + if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 1855 + /* 1856 + * Combining with XSTATE with IBRS or IRQ_ENABLE flags 1857 + * is not currently supported but this driver. 1858 + */ 1859 + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 1860 + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1861 + state->enter = intel_idle_xstate; 1862 + return; 1863 + } 1864 + 1865 + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1866 + state->flags & CPUIDLE_FLAG_IBRS) { 1867 + /* 1868 + * IBRS mitigation requires that C-states are entered 1869 + * with interrupts disabled. 1870 + */ 1871 + WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 1872 + state->enter = intel_idle_ibrs; 1873 + return; 1874 + } 1875 + 1876 + if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 1877 + state->enter = intel_idle_irq; 1878 + return; 1879 + } 1880 + 1881 + if (force_irq_on) { 1882 + pr_info("forced intel_idle_irq for state %d\n", cstate); 1883 + state->enter = intel_idle_irq; 1884 + } 1885 + } 1886 + 1887 + /* 1888 + * For mwait based states, we want to verify the cpuid data to see if the state 1889 + * is actually supported by this specific CPU. 1890 + * For non-mwait based states, this check should be skipped. 1891 + */ 1892 + static bool should_verify_mwait(struct cpuidle_state *state) 1893 + { 1894 + if (state->enter == intel_idle_hlt) 1895 + return false; 1896 + if (state->enter == intel_idle_hlt_irq_on) 1897 + return false; 1898 + 1899 + return true; 1900 + } 1901 + 1898 1902 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1899 1903 { 1900 1904 int cstate; ··· 2003 1887 } 2004 1888 2005 1889 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 2006 - if (!intel_idle_verify_cstate(mwait_hint)) 1890 + if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint)) 2007 1891 continue; 2008 1892 2009 1893 /* Structure copy. */ 2010 1894 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 2011 1895 state = &drv->states[drv->state_count]; 2012 1896 2013 - if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) { 2014 - /* 2015 - * Combining with XSTATE with IBRS or IRQ_ENABLE flags 2016 - * is not currently supported but this driver. 2017 - */ 2018 - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS); 2019 - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 2020 - state->enter = intel_idle_xstate; 2021 - } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 2022 - state->flags & CPUIDLE_FLAG_IBRS) { 2023 - /* 2024 - * IBRS mitigation requires that C-states are entered 2025 - * with interrupts disabled. 2026 - */ 2027 - WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE); 2028 - state->enter = intel_idle_ibrs; 2029 - } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) { 2030 - state->enter = intel_idle_irq; 2031 - } else if (force_irq_on) { 2032 - pr_info("forced intel_idle_irq for state %d\n", cstate); 2033 - state->enter = intel_idle_irq; 2034 - } 1897 + state_update_enter_method(state, cstate); 1898 + 2035 1899 2036 1900 if ((disabled_states_mask & BIT(drv->state_count)) || 2037 1901 ((icpu->use_acpi || force_use_acpi) && ··· 2137 2041 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 2138 2042 } 2139 2043 2044 + /* 2045 + * Match up the latency and break even point of the bare metal (cpu based) 2046 + * states with the deepest VM available state. 2047 + * 2048 + * We only want to do this for the deepest state, the ones that has 2049 + * the TLB_FLUSHED flag set on the . 2050 + * 2051 + * All our short idle states are dominated by vmexit/vmenter latencies, 2052 + * not the underlying hardware latencies so we keep our values for these. 2053 + */ 2054 + static void matchup_vm_state_with_baremetal(void) 2055 + { 2056 + int cstate; 2057 + 2058 + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 2059 + int matching_cstate; 2060 + 2061 + if (intel_idle_max_cstate_reached(cstate)) 2062 + break; 2063 + 2064 + if (!cpuidle_state_table[cstate].enter) 2065 + break; 2066 + 2067 + if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED)) 2068 + continue; 2069 + 2070 + for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) { 2071 + if (!icpu->state_table[matching_cstate].enter) 2072 + break; 2073 + if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) { 2074 + cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency; 2075 + cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency; 2076 + } 2077 + } 2078 + 2079 + } 2080 + } 2081 + 2082 + 2083 + static int __init intel_idle_vminit(const struct x86_cpu_id *id) 2084 + { 2085 + int retval; 2086 + 2087 + cpuidle_state_table = vmguest_cstates; 2088 + 2089 + icpu = (const struct idle_cpu *)id->driver_data; 2090 + 2091 + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 2092 + boot_cpu_data.x86_model); 2093 + 2094 + intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 2095 + if (!intel_idle_cpuidle_devices) 2096 + return -ENOMEM; 2097 + 2098 + /* 2099 + * We don't know exactly what the host will do when we go idle, but as a worst estimate 2100 + * we can assume that the exit latency of the deepest host state will be hit for our 2101 + * deep (long duration) guest idle state. 2102 + * The same logic applies to the break even point for the long duration guest idle state. 2103 + * So lets copy these two properties from the table we found for the host CPU type. 2104 + */ 2105 + matchup_vm_state_with_baremetal(); 2106 + 2107 + intel_idle_cpuidle_driver_init(&intel_idle_driver); 2108 + 2109 + retval = cpuidle_register_driver(&intel_idle_driver); 2110 + if (retval) { 2111 + struct cpuidle_driver *drv = cpuidle_get_driver(); 2112 + printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 2113 + drv ? drv->name : "none"); 2114 + goto init_driver_fail; 2115 + } 2116 + 2117 + retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 2118 + intel_idle_cpu_online, NULL); 2119 + if (retval < 0) 2120 + goto hp_setup_fail; 2121 + 2122 + return 0; 2123 + hp_setup_fail: 2124 + intel_idle_cpuidle_devices_uninit(); 2125 + cpuidle_unregister_driver(&intel_idle_driver); 2126 + init_driver_fail: 2127 + free_percpu(intel_idle_cpuidle_devices); 2128 + return retval; 2129 + } 2130 + 2140 2131 static int __init intel_idle_init(void) 2141 2132 { 2142 2133 const struct x86_cpu_id *id; ··· 2242 2059 id = x86_match_cpu(intel_idle_ids); 2243 2060 if (id) { 2244 2061 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 2062 + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 2063 + return intel_idle_vminit(id); 2245 2064 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 2246 2065 return -ENODEV; 2247 2066 }
+3 -3
drivers/pinctrl/pinctrl-amd.c
··· 30 30 #include <linux/pinctrl/pinconf.h> 31 31 #include <linux/pinctrl/pinconf-generic.h> 32 32 #include <linux/pinctrl/pinmux.h> 33 + #include <linux/suspend.h> 33 34 34 35 #include "core.h" 35 36 #include "pinctrl-utils.h" ··· 637 636 regval = readl(regs + i); 638 637 639 638 if (regval & PIN_IRQ_PENDING) 640 - dev_dbg(&gpio_dev->pdev->dev, 641 - "GPIO %d is active: 0x%x", 642 - irqnr + i, regval); 639 + pm_pr_dbg("GPIO %d is active: 0x%x", 640 + irqnr + i, regval); 643 641 644 642 /* caused wake on resume context for shared IRQ */ 645 643 if (irq < 0 && (regval & BIT(WAKE_STS_OFF)))
+2 -2
drivers/platform/x86/amd/pmc.c
··· 543 543 } 544 544 545 545 if (dev) 546 - dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val); 546 + pm_pr_dbg("SMU idlemask s0i3: 0x%x\n", val); 547 547 548 548 if (s) 549 549 seq_printf(s, "SMU idlemask : 0x%x\n", val); ··· 769 769 770 770 *arg |= (duration << 16); 771 771 rc = rtc_alarm_irq_enable(rtc_device, 0); 772 - dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration); 772 + pm_pr_dbg("wakeup timer programmed for %lld seconds\n", duration); 773 773 774 774 return rc; 775 775 }
+17 -1
drivers/powercap/Kconfig
··· 18 18 # Client driver configurations go here. 19 19 config INTEL_RAPL_CORE 20 20 tristate 21 + depends on PCI 22 + select IOSF_MBI 21 23 22 24 config INTEL_RAPL 23 25 tristate "Intel RAPL Support via MSR Interface" 24 - depends on X86 && IOSF_MBI 26 + depends on X86 && PCI 25 27 select INTEL_RAPL_CORE 26 28 help 27 29 This enables support for the Intel Running Average Power Limit (RAPL) ··· 34 32 fine grained control. These domains include processor package, DRAM 35 33 controller, CPU core (Power Plane 0), graphics uncore (Power Plane 36 34 1), etc. 35 + 36 + config INTEL_RAPL_TPMI 37 + tristate "Intel RAPL Support via TPMI Interface" 38 + depends on X86 39 + depends on INTEL_TPMI 40 + select INTEL_RAPL_CORE 41 + help 42 + This enables support for the Intel Running Average Power Limit (RAPL) 43 + technology via TPMI interface, which allows power limits to be enforced 44 + and monitored. 45 + 46 + In RAPL, the platform level settings are divided into domains for 47 + fine grained control. These domains include processor package, DRAM 48 + controller, platform, etc. 37 49 38 50 config IDLE_INJECT 39 51 bool "Idle injection framework"
+1
drivers/powercap/Makefile
··· 5 5 obj-$(CONFIG_POWERCAP) += powercap_sys.o 6 6 obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o 7 7 obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o 8 + obj-$(CONFIG_INTEL_RAPL_TPMI) += intel_rapl_tpmi.o 8 9 obj-$(CONFIG_IDLE_INJECT) += idle_inject.o 9 10 obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o
+510 -377
drivers/powercap/intel_rapl_common.c
··· 75 75 #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) 76 76 #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) 77 77 78 + /* bitmasks for RAPL TPMI, used by primitive access functions */ 79 + #define TPMI_POWER_LIMIT_MASK 0x3FFFF 80 + #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62) 81 + #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18) 82 + #define TPMI_INFO_SPEC_MASK 0x3FFFF 83 + #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18) 84 + #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36) 85 + #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54) 86 + 78 87 /* Non HW constants */ 79 88 #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ 80 89 #define RAPL_PRIMITIVE_DUMMY BIT(2) ··· 103 94 104 95 #define DOMAIN_STATE_INACTIVE BIT(0) 105 96 #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) 106 - #define DOMAIN_STATE_BIOS_LOCKED BIT(2) 107 97 108 - static const char pl1_name[] = "long_term"; 109 - static const char pl2_name[] = "short_term"; 110 - static const char pl4_name[] = "peak_power"; 98 + static const char *pl_names[NR_POWER_LIMITS] = { 99 + [POWER_LIMIT1] = "long_term", 100 + [POWER_LIMIT2] = "short_term", 101 + [POWER_LIMIT4] = "peak_power", 102 + }; 103 + 104 + enum pl_prims { 105 + PL_ENABLE, 106 + PL_CLAMP, 107 + PL_LIMIT, 108 + PL_TIME_WINDOW, 109 + PL_MAX_POWER, 110 + PL_LOCK, 111 + }; 112 + 113 + static bool is_pl_valid(struct rapl_domain *rd, int pl) 114 + { 115 + if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4) 116 + return false; 117 + return rd->rpl[pl].name ? true : false; 118 + } 119 + 120 + static int get_pl_lock_prim(struct rapl_domain *rd, int pl) 121 + { 122 + if (rd->rp->priv->type == RAPL_IF_TPMI) { 123 + if (pl == POWER_LIMIT1) 124 + return PL1_LOCK; 125 + if (pl == POWER_LIMIT2) 126 + return PL2_LOCK; 127 + if (pl == POWER_LIMIT4) 128 + return PL4_LOCK; 129 + } 130 + 131 + /* MSR/MMIO Interface doesn't have Lock bit for PL4 */ 132 + if (pl == POWER_LIMIT4) 133 + return -EINVAL; 134 + 135 + /* 136 + * Power Limit register that supports two power limits has a different 137 + * bit position for the Lock bit. 138 + */ 139 + if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2)) 140 + return FW_HIGH_LOCK; 141 + return FW_LOCK; 142 + } 143 + 144 + static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim) 145 + { 146 + switch (pl) { 147 + case POWER_LIMIT1: 148 + if (prim == PL_ENABLE) 149 + return PL1_ENABLE; 150 + if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 151 + return PL1_CLAMP; 152 + if (prim == PL_LIMIT) 153 + return POWER_LIMIT1; 154 + if (prim == PL_TIME_WINDOW) 155 + return TIME_WINDOW1; 156 + if (prim == PL_MAX_POWER) 157 + return THERMAL_SPEC_POWER; 158 + if (prim == PL_LOCK) 159 + return get_pl_lock_prim(rd, pl); 160 + return -EINVAL; 161 + case POWER_LIMIT2: 162 + if (prim == PL_ENABLE) 163 + return PL2_ENABLE; 164 + if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) 165 + return PL2_CLAMP; 166 + if (prim == PL_LIMIT) 167 + return POWER_LIMIT2; 168 + if (prim == PL_TIME_WINDOW) 169 + return TIME_WINDOW2; 170 + if (prim == PL_MAX_POWER) 171 + return MAX_POWER; 172 + if (prim == PL_LOCK) 173 + return get_pl_lock_prim(rd, pl); 174 + return -EINVAL; 175 + case POWER_LIMIT4: 176 + if (prim == PL_LIMIT) 177 + return POWER_LIMIT4; 178 + if (prim == PL_ENABLE) 179 + return PL4_ENABLE; 180 + /* PL4 would be around two times PL2, use same prim as PL2. */ 181 + if (prim == PL_MAX_POWER) 182 + return MAX_POWER; 183 + if (prim == PL_LOCK) 184 + return get_pl_lock_prim(rd, pl); 185 + return -EINVAL; 186 + default: 187 + return -EINVAL; 188 + } 189 + } 111 190 112 191 #define power_zone_to_rapl_domain(_zone) \ 113 192 container_of(_zone, struct rapl_domain, power_zone) 114 193 115 194 struct rapl_defaults { 116 195 u8 floor_freq_reg_addr; 117 - int (*check_unit)(struct rapl_package *rp, int cpu); 196 + int (*check_unit)(struct rapl_domain *rd); 118 197 void (*set_floor_freq)(struct rapl_domain *rd, bool mode); 119 - u64 (*compute_time_window)(struct rapl_package *rp, u64 val, 198 + u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, 120 199 bool to_raw); 121 200 unsigned int dram_domain_energy_unit; 122 201 unsigned int psys_domain_energy_unit; 123 202 bool spr_psys_bits; 124 203 }; 125 - static struct rapl_defaults *rapl_defaults; 204 + static struct rapl_defaults *defaults_msr; 205 + static const struct rapl_defaults defaults_tpmi; 206 + 207 + static struct rapl_defaults *get_defaults(struct rapl_package *rp) 208 + { 209 + return rp->priv->defaults; 210 + } 126 211 127 212 /* Sideband MBI registers */ 128 213 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2) ··· 252 149 bool xlate, u64 *data); 253 150 static int rapl_write_data_raw(struct rapl_domain *rd, 254 151 enum rapl_primitives prim, 152 + unsigned long long value); 153 + static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 154 + enum pl_prims pl_prim, 155 + bool xlate, u64 *data); 156 + static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 157 + enum pl_prims pl_prim, 255 158 unsigned long long value); 256 159 static u64 rapl_unit_xlate(struct rapl_domain *rd, 257 160 enum unit_type type, u64 value, int to_raw); ··· 326 217 int i, nr_pl = 0; 327 218 328 219 for (i = 0; i < NR_POWER_LIMITS; i++) { 329 - if (rd->rpl[i].name) 220 + if (is_pl_valid(rd, i)) 330 221 nr_pl++; 331 222 } 332 223 ··· 336 227 static int set_domain_enable(struct powercap_zone *power_zone, bool mode) 337 228 { 338 229 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 339 - 340 - if (rd->state & DOMAIN_STATE_BIOS_LOCKED) 341 - return -EACCES; 230 + struct rapl_defaults *defaults = get_defaults(rd->rp); 231 + int ret; 342 232 343 233 cpus_read_lock(); 344 - rapl_write_data_raw(rd, PL1_ENABLE, mode); 345 - if (rapl_defaults->set_floor_freq) 346 - rapl_defaults->set_floor_freq(rd, mode); 234 + ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); 235 + if (!ret && defaults->set_floor_freq) 236 + defaults->set_floor_freq(rd, mode); 347 237 cpus_read_unlock(); 348 238 349 - return 0; 239 + return ret; 350 240 } 351 241 352 242 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) 353 243 { 354 244 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 355 245 u64 val; 246 + int ret; 356 247 357 - if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { 248 + if (rd->rpl[POWER_LIMIT1].locked) { 358 249 *mode = false; 359 250 return 0; 360 251 } 361 252 cpus_read_lock(); 362 - if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) { 363 - cpus_read_unlock(); 364 - return -EIO; 365 - } 366 - *mode = val; 253 + ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val); 254 + if (!ret) 255 + *mode = val; 367 256 cpus_read_unlock(); 368 257 369 - return 0; 258 + return ret; 370 259 } 371 260 372 261 /* per RAPL domain ops, in the order of rapl_domain_type */ ··· 420 313 { 421 314 int i, j; 422 315 423 - for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) { 424 - if ((rd->rpl[i].name) && j++ == cid) { 316 + for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) { 317 + if (is_pl_valid(rd, i) && j++ == cid) { 425 318 pr_debug("%s: index %d\n", __func__, i); 426 319 return i; 427 320 } ··· 442 335 cpus_read_lock(); 443 336 rd = power_zone_to_rapl_domain(power_zone); 444 337 id = contraint_to_pl(rd, cid); 445 - if (id < 0) { 446 - ret = id; 447 - goto set_exit; 448 - } 449 - 450 338 rp = rd->rp; 451 339 452 - if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { 453 - dev_warn(&power_zone->dev, 454 - "%s locked by BIOS, monitoring only\n", rd->name); 455 - ret = -EACCES; 456 - goto set_exit; 457 - } 458 - 459 - switch (rd->rpl[id].prim_id) { 460 - case PL1_ENABLE: 461 - rapl_write_data_raw(rd, POWER_LIMIT1, power_limit); 462 - break; 463 - case PL2_ENABLE: 464 - rapl_write_data_raw(rd, POWER_LIMIT2, power_limit); 465 - break; 466 - case PL4_ENABLE: 467 - rapl_write_data_raw(rd, POWER_LIMIT4, power_limit); 468 - break; 469 - default: 470 - ret = -EINVAL; 471 - } 340 + ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit); 472 341 if (!ret) 473 342 package_power_limit_irq_save(rp); 474 - set_exit: 475 343 cpus_read_unlock(); 476 344 return ret; 477 345 } ··· 456 374 { 457 375 struct rapl_domain *rd; 458 376 u64 val; 459 - int prim; 460 377 int ret = 0; 461 378 int id; 462 379 463 380 cpus_read_lock(); 464 381 rd = power_zone_to_rapl_domain(power_zone); 465 382 id = contraint_to_pl(rd, cid); 466 - if (id < 0) { 467 - ret = id; 468 - goto get_exit; 469 - } 470 383 471 - switch (rd->rpl[id].prim_id) { 472 - case PL1_ENABLE: 473 - prim = POWER_LIMIT1; 474 - break; 475 - case PL2_ENABLE: 476 - prim = POWER_LIMIT2; 477 - break; 478 - case PL4_ENABLE: 479 - prim = POWER_LIMIT4; 480 - break; 481 - default: 482 - cpus_read_unlock(); 483 - return -EINVAL; 484 - } 485 - if (rapl_read_data_raw(rd, prim, true, &val)) 486 - ret = -EIO; 487 - else 384 + ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val); 385 + if (!ret) 488 386 *data = val; 489 387 490 - get_exit: 491 388 cpus_read_unlock(); 492 389 493 390 return ret; ··· 482 421 cpus_read_lock(); 483 422 rd = power_zone_to_rapl_domain(power_zone); 484 423 id = contraint_to_pl(rd, cid); 485 - if (id < 0) { 486 - ret = id; 487 - goto set_time_exit; 488 - } 489 424 490 - switch (rd->rpl[id].prim_id) { 491 - case PL1_ENABLE: 492 - rapl_write_data_raw(rd, TIME_WINDOW1, window); 493 - break; 494 - case PL2_ENABLE: 495 - rapl_write_data_raw(rd, TIME_WINDOW2, window); 496 - break; 497 - default: 498 - ret = -EINVAL; 499 - } 425 + ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window); 500 426 501 - set_time_exit: 502 427 cpus_read_unlock(); 503 428 return ret; 504 429 } ··· 500 453 cpus_read_lock(); 501 454 rd = power_zone_to_rapl_domain(power_zone); 502 455 id = contraint_to_pl(rd, cid); 503 - if (id < 0) { 504 - ret = id; 505 - goto get_time_exit; 506 - } 507 456 508 - switch (rd->rpl[id].prim_id) { 509 - case PL1_ENABLE: 510 - ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val); 511 - break; 512 - case PL2_ENABLE: 513 - ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val); 514 - break; 515 - case PL4_ENABLE: 516 - /* 517 - * Time window parameter is not applicable for PL4 entry 518 - * so assigining '0' as default value. 519 - */ 520 - val = 0; 521 - break; 522 - default: 523 - cpus_read_unlock(); 524 - return -EINVAL; 525 - } 457 + ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val); 526 458 if (!ret) 527 459 *data = val; 528 460 529 - get_time_exit: 530 461 cpus_read_unlock(); 531 462 532 463 return ret; ··· 524 499 return NULL; 525 500 } 526 501 527 - static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data) 502 + static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data) 528 503 { 529 504 struct rapl_domain *rd; 530 505 u64 val; 531 - int prim; 532 506 int ret = 0; 507 + int id; 533 508 534 509 cpus_read_lock(); 535 510 rd = power_zone_to_rapl_domain(power_zone); 536 - switch (rd->rpl[id].prim_id) { 537 - case PL1_ENABLE: 538 - prim = THERMAL_SPEC_POWER; 539 - break; 540 - case PL2_ENABLE: 541 - prim = MAX_POWER; 542 - break; 543 - case PL4_ENABLE: 544 - prim = MAX_POWER; 545 - break; 546 - default: 547 - cpus_read_unlock(); 548 - return -EINVAL; 549 - } 550 - if (rapl_read_data_raw(rd, prim, true, &val)) 551 - ret = -EIO; 552 - else 511 + id = contraint_to_pl(rd, cid); 512 + 513 + ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val); 514 + if (!ret) 553 515 *data = val; 554 516 555 517 /* As a generalization rule, PL4 would be around two times PL2. */ 556 - if (rd->rpl[id].prim_id == PL4_ENABLE) 518 + if (id == POWER_LIMIT4) 557 519 *data = *data * 2; 558 520 559 521 cpus_read_unlock(); ··· 557 545 .get_name = get_constraint_name, 558 546 }; 559 547 548 + /* Return the id used for read_raw/write_raw callback */ 549 + static int get_rid(struct rapl_package *rp) 550 + { 551 + return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id; 552 + } 553 + 560 554 /* called after domain detection and package level data are set */ 561 555 static void rapl_init_domains(struct rapl_package *rp) 562 556 { ··· 572 554 573 555 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 574 556 unsigned int mask = rp->domain_map & (1 << i); 557 + int t; 575 558 576 559 if (!mask) 577 560 continue; ··· 581 562 582 563 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) { 583 564 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d", 584 - topology_physical_package_id(rp->lead_cpu)); 585 - } else 565 + rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) : 566 + rp->id); 567 + } else { 586 568 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s", 587 569 rapl_domain_names[i]); 588 - 589 - rd->id = i; 590 - rd->rpl[0].prim_id = PL1_ENABLE; 591 - rd->rpl[0].name = pl1_name; 592 - 593 - /* 594 - * The PL2 power domain is applicable for limits two 595 - * and limits three 596 - */ 597 - if (rp->priv->limits[i] >= 2) { 598 - rd->rpl[1].prim_id = PL2_ENABLE; 599 - rd->rpl[1].name = pl2_name; 600 570 } 601 571 602 - /* Enable PL4 domain if the total power limits are three */ 603 - if (rp->priv->limits[i] == 3) { 604 - rd->rpl[2].prim_id = PL4_ENABLE; 605 - rd->rpl[2].name = pl4_name; 572 + rd->id = i; 573 + 574 + /* PL1 is supported by default */ 575 + rp->priv->limits[i] |= BIT(POWER_LIMIT1); 576 + 577 + for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) { 578 + if (rp->priv->limits[i] & BIT(t)) 579 + rd->rpl[t].name = pl_names[t]; 606 580 } 607 581 608 582 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) 609 583 rd->regs[j] = rp->priv->regs[i][j]; 610 584 611 - switch (i) { 612 - case RAPL_DOMAIN_DRAM: 613 - rd->domain_energy_unit = 614 - rapl_defaults->dram_domain_energy_unit; 615 - if (rd->domain_energy_unit) 616 - pr_info("DRAM domain energy unit %dpj\n", 617 - rd->domain_energy_unit); 618 - break; 619 - case RAPL_DOMAIN_PLATFORM: 620 - rd->domain_energy_unit = 621 - rapl_defaults->psys_domain_energy_unit; 622 - if (rd->domain_energy_unit) 623 - pr_info("Platform domain energy unit %dpj\n", 624 - rd->domain_energy_unit); 625 - break; 626 - default: 627 - break; 628 - } 629 585 rd++; 630 586 } 631 587 } ··· 609 615 u64 value, int to_raw) 610 616 { 611 617 u64 units = 1; 612 - struct rapl_package *rp = rd->rp; 618 + struct rapl_defaults *defaults = get_defaults(rd->rp); 613 619 u64 scale = 1; 614 620 615 621 switch (type) { 616 622 case POWER_UNIT: 617 - units = rp->power_unit; 623 + units = rd->power_unit; 618 624 break; 619 625 case ENERGY_UNIT: 620 626 scale = ENERGY_UNIT_SCALE; 621 - /* per domain unit takes precedence */ 622 - if (rd->domain_energy_unit) 623 - units = rd->domain_energy_unit; 624 - else 625 - units = rp->energy_unit; 627 + units = rd->energy_unit; 626 628 break; 627 629 case TIME_UNIT: 628 - return rapl_defaults->compute_time_window(rp, value, to_raw); 630 + return defaults->compute_time_window(rd, value, to_raw); 629 631 case ARBITRARY_UNIT: 630 632 default: 631 633 return value; ··· 635 645 return div64_u64(value, scale); 636 646 } 637 647 638 - /* in the order of enum rapl_primitives */ 639 - static struct rapl_primitive_info rpi[] = { 648 + /* RAPL primitives for MSR and MMIO I/F */ 649 + static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = { 640 650 /* name, mask, shift, msr index, unit divisor */ 641 - PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 642 - RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 643 - PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, 651 + [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, 644 652 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 645 - PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, 653 + [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, 646 654 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 647 - PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, 655 + [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, 648 656 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 649 - PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, 657 + [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 658 + RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 659 + [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, 650 660 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 651 - PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, 661 + [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63, 652 662 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 653 - PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, 663 + [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, 654 664 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 655 - PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, 665 + [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, 656 666 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 657 - PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, 667 + [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, 658 668 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 659 - PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0, 669 + [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, 670 + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 671 + [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0, 660 672 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 661 - PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, 673 + [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, 662 674 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 663 - PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, 675 + [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, 664 676 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 665 - PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, 677 + [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, 666 678 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 667 - PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, 679 + [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, 668 680 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 669 - PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, 681 + [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, 670 682 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 671 - PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, 683 + [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, 672 684 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 673 - PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 685 + [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 674 686 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 675 - PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, 687 + [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, 676 688 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), 677 - PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, 689 + [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, 678 690 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 679 - PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, 691 + [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, 680 692 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 681 - PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, 693 + [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, 682 694 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 683 - PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, 695 + [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, 684 696 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 685 - PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, 697 + [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, 686 698 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 687 - PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, 699 + [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, 688 700 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 689 701 /* non-hardware */ 690 - PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, 702 + [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, 691 703 RAPL_PRIMITIVE_DERIVED), 692 - {NULL, 0, 0, 0}, 693 704 }; 705 + 706 + /* RAPL primitives for TPMI I/F */ 707 + static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = { 708 + /* name, mask, shift, msr index, unit divisor */ 709 + [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0, 710 + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), 711 + [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0, 712 + RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0), 713 + [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0, 714 + RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), 715 + [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, 716 + RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), 717 + [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63, 718 + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 719 + [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63, 720 + RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 721 + [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63, 722 + RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 723 + [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 724 + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), 725 + [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 726 + RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), 727 + [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, 728 + RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), 729 + [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18, 730 + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), 731 + [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18, 732 + RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0), 733 + [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0, 734 + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 735 + [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36, 736 + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 737 + [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18, 738 + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), 739 + [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54, 740 + RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), 741 + [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, 742 + RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), 743 + /* non-hardware */ 744 + [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, 745 + POWER_UNIT, RAPL_PRIMITIVE_DERIVED), 746 + }; 747 + 748 + static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) 749 + { 750 + struct rapl_primitive_info *rpi = rp->priv->rpi; 751 + 752 + if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) 753 + return NULL; 754 + 755 + return &rpi[prim]; 756 + } 757 + 758 + static int rapl_config(struct rapl_package *rp) 759 + { 760 + switch (rp->priv->type) { 761 + /* MMIO I/F shares the same register layout as MSR registers */ 762 + case RAPL_IF_MMIO: 763 + case RAPL_IF_MSR: 764 + rp->priv->defaults = (void *)defaults_msr; 765 + rp->priv->rpi = (void *)rpi_msr; 766 + break; 767 + case RAPL_IF_TPMI: 768 + rp->priv->defaults = (void *)&defaults_tpmi; 769 + rp->priv->rpi = (void *)rpi_tpmi; 770 + break; 771 + default: 772 + return -EINVAL; 773 + } 774 + return 0; 775 + } 694 776 695 777 static enum rapl_primitives 696 778 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) 697 779 { 698 - if (!rapl_defaults->spr_psys_bits) 780 + struct rapl_defaults *defaults = get_defaults(rd->rp); 781 + 782 + if (!defaults->spr_psys_bits) 699 783 return prim; 700 784 701 785 if (rd->id != RAPL_DOMAIN_PLATFORM) ··· 811 747 { 812 748 u64 value; 813 749 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 814 - struct rapl_primitive_info *rp = &rpi[prim_fixed]; 750 + struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 815 751 struct reg_action ra; 816 - int cpu; 817 752 818 - if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY) 753 + if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 819 754 return -EINVAL; 820 755 821 - ra.reg = rd->regs[rp->id]; 756 + ra.reg = rd->regs[rpi->id]; 822 757 if (!ra.reg) 823 758 return -EINVAL; 824 759 825 - cpu = rd->rp->lead_cpu; 826 - 827 - /* domain with 2 limits has different bit */ 828 - if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) { 829 - rp->mask = POWER_HIGH_LOCK; 830 - rp->shift = 63; 831 - } 832 760 /* non-hardware data are collected by the polling thread */ 833 - if (rp->flag & RAPL_PRIMITIVE_DERIVED) { 761 + if (rpi->flag & RAPL_PRIMITIVE_DERIVED) { 834 762 *data = rd->rdd.primitives[prim]; 835 763 return 0; 836 764 } 837 765 838 - ra.mask = rp->mask; 766 + ra.mask = rpi->mask; 839 767 840 - if (rd->rp->priv->read_raw(cpu, &ra)) { 841 - pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu); 768 + if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 769 + pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name); 842 770 return -EIO; 843 771 } 844 772 845 - value = ra.value >> rp->shift; 773 + value = ra.value >> rpi->shift; 846 774 847 775 if (xlate) 848 - *data = rapl_unit_xlate(rd, rp->unit, value, 0); 776 + *data = rapl_unit_xlate(rd, rpi->unit, value, 0); 849 777 else 850 778 *data = value; 851 779 ··· 850 794 unsigned long long value) 851 795 { 852 796 enum rapl_primitives prim_fixed = prim_fixups(rd, prim); 853 - struct rapl_primitive_info *rp = &rpi[prim_fixed]; 854 - int cpu; 797 + struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); 855 798 u64 bits; 856 799 struct reg_action ra; 857 800 int ret; 858 801 859 - cpu = rd->rp->lead_cpu; 860 - bits = rapl_unit_xlate(rd, rp->unit, value, 1); 861 - bits <<= rp->shift; 862 - bits &= rp->mask; 802 + if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) 803 + return -EINVAL; 804 + 805 + bits = rapl_unit_xlate(rd, rpi->unit, value, 1); 806 + bits <<= rpi->shift; 807 + bits &= rpi->mask; 863 808 864 809 memset(&ra, 0, sizeof(ra)); 865 810 866 - ra.reg = rd->regs[rp->id]; 867 - ra.mask = rp->mask; 811 + ra.reg = rd->regs[rpi->id]; 812 + ra.mask = rpi->mask; 868 813 ra.value = bits; 869 814 870 - ret = rd->rp->priv->write_raw(cpu, &ra); 815 + ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra); 871 816 872 817 return ret; 873 818 } 874 819 820 + static int rapl_read_pl_data(struct rapl_domain *rd, int pl, 821 + enum pl_prims pl_prim, bool xlate, u64 *data) 822 + { 823 + enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 824 + 825 + if (!is_pl_valid(rd, pl)) 826 + return -EINVAL; 827 + 828 + return rapl_read_data_raw(rd, prim, xlate, data); 829 + } 830 + 831 + static int rapl_write_pl_data(struct rapl_domain *rd, int pl, 832 + enum pl_prims pl_prim, 833 + unsigned long long value) 834 + { 835 + enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); 836 + 837 + if (!is_pl_valid(rd, pl)) 838 + return -EINVAL; 839 + 840 + if (rd->rpl[pl].locked) { 841 + pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); 842 + return -EACCES; 843 + } 844 + 845 + return rapl_write_data_raw(rd, prim, value); 846 + } 875 847 /* 876 848 * Raw RAPL data stored in MSRs are in certain scales. We need to 877 849 * convert them into standard units based on the units reported in ··· 911 827 * power unit : microWatts : Represented in milliWatts by default 912 828 * time unit : microseconds: Represented in seconds by default 913 829 */ 914 - static int rapl_check_unit_core(struct rapl_package *rp, int cpu) 830 + static int rapl_check_unit_core(struct rapl_domain *rd) 915 831 { 916 832 struct reg_action ra; 917 833 u32 value; 918 834 919 - ra.reg = rp->priv->reg_unit; 835 + ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 920 836 ra.mask = ~0; 921 - if (rp->priv->read_raw(cpu, &ra)) { 922 - pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", 923 - rp->priv->reg_unit, cpu); 837 + if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 838 + pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 839 + ra.reg, rd->rp->name, rd->name); 924 840 return -ENODEV; 925 841 } 926 842 927 843 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 928 - rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 844 + rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 929 845 930 846 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 931 - rp->power_unit = 1000000 / (1 << value); 847 + rd->power_unit = 1000000 / (1 << value); 932 848 933 849 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 934 - rp->time_unit = 1000000 / (1 << value); 850 + rd->time_unit = 1000000 / (1 << value); 935 851 936 - pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n", 937 - rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); 852 + pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 853 + rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 938 854 939 855 return 0; 940 856 } 941 857 942 - static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) 858 + static int rapl_check_unit_atom(struct rapl_domain *rd) 943 859 { 944 860 struct reg_action ra; 945 861 u32 value; 946 862 947 - ra.reg = rp->priv->reg_unit; 863 + ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 948 864 ra.mask = ~0; 949 - if (rp->priv->read_raw(cpu, &ra)) { 950 - pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", 951 - rp->priv->reg_unit, cpu); 865 + if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 866 + pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 867 + ra.reg, rd->rp->name, rd->name); 952 868 return -ENODEV; 953 869 } 954 870 955 871 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 956 - rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value; 872 + rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value; 957 873 958 874 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 959 - rp->power_unit = (1 << value) * 1000; 875 + rd->power_unit = (1 << value) * 1000; 960 876 961 877 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 962 - rp->time_unit = 1000000 / (1 << value); 878 + rd->time_unit = 1000000 / (1 << value); 963 879 964 - pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n", 965 - rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); 880 + pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n", 881 + rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 966 882 967 883 return 0; 968 884 } ··· 994 910 995 911 static void package_power_limit_irq_save(struct rapl_package *rp) 996 912 { 913 + if (rp->lead_cpu < 0) 914 + return; 915 + 997 916 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 998 917 return; 999 918 ··· 1010 923 static void package_power_limit_irq_restore(struct rapl_package *rp) 1011 924 { 1012 925 u32 l, h; 926 + 927 + if (rp->lead_cpu < 0) 928 + return; 1013 929 1014 930 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) 1015 931 return; ··· 1033 943 1034 944 static void set_floor_freq_default(struct rapl_domain *rd, bool mode) 1035 945 { 1036 - int nr_powerlimit = find_nr_power_limit(rd); 946 + int i; 1037 947 1038 948 /* always enable clamp such that p-state can go below OS requested 1039 949 * range. power capping priority over guranteed frequency. 1040 950 */ 1041 - rapl_write_data_raw(rd, PL1_CLAMP, mode); 951 + rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode); 1042 952 1043 - /* some domains have pl2 */ 1044 - if (nr_powerlimit > 1) { 1045 - rapl_write_data_raw(rd, PL2_ENABLE, mode); 1046 - rapl_write_data_raw(rd, PL2_CLAMP, mode); 953 + for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) { 954 + rapl_write_pl_data(rd, i, PL_ENABLE, mode); 955 + rapl_write_pl_data(rd, i, PL_CLAMP, mode); 1047 956 } 1048 957 } 1049 958 1050 959 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) 1051 960 { 1052 961 static u32 power_ctrl_orig_val; 962 + struct rapl_defaults *defaults = get_defaults(rd->rp); 1053 963 u32 mdata; 1054 964 1055 - if (!rapl_defaults->floor_freq_reg_addr) { 965 + if (!defaults->floor_freq_reg_addr) { 1056 966 pr_err("Invalid floor frequency config register\n"); 1057 967 return; 1058 968 } 1059 969 1060 970 if (!power_ctrl_orig_val) 1061 971 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ, 1062 - rapl_defaults->floor_freq_reg_addr, 972 + defaults->floor_freq_reg_addr, 1063 973 &power_ctrl_orig_val); 1064 974 mdata = power_ctrl_orig_val; 1065 975 if (enable) { ··· 1067 977 mdata |= 1 << 8; 1068 978 } 1069 979 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE, 1070 - rapl_defaults->floor_freq_reg_addr, mdata); 980 + defaults->floor_freq_reg_addr, mdata); 1071 981 } 1072 982 1073 - static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, 983 + static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value, 1074 984 bool to_raw) 1075 985 { 1076 986 u64 f, y; /* fraction and exp. used for time unit */ ··· 1082 992 if (!to_raw) { 1083 993 f = (value & 0x60) >> 5; 1084 994 y = value & 0x1f; 1085 - value = (1 << y) * (4 + f) * rp->time_unit / 4; 995 + value = (1 << y) * (4 + f) * rd->time_unit / 4; 1086 996 } else { 1087 - if (value < rp->time_unit) 997 + if (value < rd->time_unit) 1088 998 return 0; 1089 999 1090 - do_div(value, rp->time_unit); 1000 + do_div(value, rd->time_unit); 1091 1001 y = ilog2(value); 1092 1002 1093 1003 /* ··· 1103 1013 return value; 1104 1014 } 1105 1015 1106 - static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, 1016 + static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value, 1107 1017 bool to_raw) 1108 1018 { 1109 1019 /* ··· 1111 1021 * where time_unit is default to 1 sec. Never 0. 1112 1022 */ 1113 1023 if (!to_raw) 1114 - return (value) ? value * rp->time_unit : rp->time_unit; 1024 + return (value) ? value * rd->time_unit : rd->time_unit; 1115 1025 1116 - value = div64_u64(value, rp->time_unit); 1026 + value = div64_u64(value, rd->time_unit); 1117 1027 1118 1028 return value; 1119 1029 } 1030 + 1031 + /* TPMI Unit register has different layout */ 1032 + #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET 1033 + #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK 1034 + #define TPMI_ENERGY_UNIT_OFFSET 0x06 1035 + #define TPMI_ENERGY_UNIT_MASK 0x7C0 1036 + #define TPMI_TIME_UNIT_OFFSET 0x0C 1037 + #define TPMI_TIME_UNIT_MASK 0xF000 1038 + 1039 + static int rapl_check_unit_tpmi(struct rapl_domain *rd) 1040 + { 1041 + struct reg_action ra; 1042 + u32 value; 1043 + 1044 + ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; 1045 + ra.mask = ~0; 1046 + if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { 1047 + pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", 1048 + ra.reg, rd->rp->name, rd->name); 1049 + return -ENODEV; 1050 + } 1051 + 1052 + value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET; 1053 + rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); 1054 + 1055 + value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET; 1056 + rd->power_unit = 1000000 / (1 << value); 1057 + 1058 + value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET; 1059 + rd->time_unit = 1000000 / (1 << value); 1060 + 1061 + pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", 1062 + rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); 1063 + 1064 + return 0; 1065 + } 1066 + 1067 + static const struct rapl_defaults defaults_tpmi = { 1068 + .check_unit = rapl_check_unit_tpmi, 1069 + /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */ 1070 + .set_floor_freq = set_floor_freq_default, 1071 + .compute_time_window = rapl_compute_time_window_core, 1072 + }; 1120 1073 1121 1074 static const struct rapl_defaults rapl_defaults_core = { 1122 1075 .floor_freq_reg_addr = 0, ··· 1292 1159 rp->domains[dmn].name); 1293 1160 /* exclude non-raw primitives */ 1294 1161 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { 1162 + struct rapl_primitive_info *rpi = get_rpi(rp, prim); 1163 + 1295 1164 if (!rapl_read_data_raw(&rp->domains[dmn], prim, 1296 - rpi[prim].unit, &val)) 1165 + rpi->unit, &val)) 1297 1166 rp->domains[dmn].rdd.primitives[prim] = val; 1298 1167 } 1299 1168 } ··· 1374 1239 return ret; 1375 1240 } 1376 1241 1377 - static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) 1242 + static int rapl_check_domain(int domain, struct rapl_package *rp) 1378 1243 { 1379 1244 struct reg_action ra; 1380 1245 ··· 1395 1260 */ 1396 1261 1397 1262 ra.mask = ENERGY_STATUS_MASK; 1398 - if (rp->priv->read_raw(cpu, &ra) || !ra.value) 1263 + if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value) 1399 1264 return -ENODEV; 1400 1265 1266 + return 0; 1267 + } 1268 + 1269 + /* 1270 + * Get per domain energy/power/time unit. 1271 + * RAPL Interfaces without per domain unit register will use the package 1272 + * scope unit register to set per domain units. 1273 + */ 1274 + static int rapl_get_domain_unit(struct rapl_domain *rd) 1275 + { 1276 + struct rapl_defaults *defaults = get_defaults(rd->rp); 1277 + int ret; 1278 + 1279 + if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) { 1280 + if (!rd->rp->priv->reg_unit) { 1281 + pr_err("No valid Unit register found\n"); 1282 + return -ENODEV; 1283 + } 1284 + rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit; 1285 + } 1286 + 1287 + if (!defaults->check_unit) { 1288 + pr_err("missing .check_unit() callback\n"); 1289 + return -ENODEV; 1290 + } 1291 + 1292 + ret = defaults->check_unit(rd); 1293 + if (ret) 1294 + return ret; 1295 + 1296 + if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit) 1297 + rd->energy_unit = defaults->dram_domain_energy_unit; 1298 + if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit) 1299 + rd->energy_unit = defaults->psys_domain_energy_unit; 1401 1300 return 0; 1402 1301 } 1403 1302 ··· 1449 1280 u64 val64; 1450 1281 int i; 1451 1282 1452 - /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */ 1453 - if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) { 1454 - if (val64) { 1455 - pr_info("RAPL %s domain %s locked by BIOS\n", 1456 - rd->rp->name, rd->name); 1457 - rd->state |= DOMAIN_STATE_BIOS_LOCKED; 1283 + for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1284 + if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) { 1285 + if (val64) { 1286 + rd->rpl[i].locked = true; 1287 + pr_info("%s:%s:%s locked by BIOS\n", 1288 + rd->rp->name, rd->name, pl_names[i]); 1289 + } 1458 1290 } 1459 - } 1460 - /* check if power limit MSR exists, otherwise domain is monitoring only */ 1461 - for (i = 0; i < NR_POWER_LIMITS; i++) { 1462 - int prim = rd->rpl[i].prim_id; 1463 1291 1464 - if (rapl_read_data_raw(rd, prim, false, &val64)) 1292 + if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64)) 1465 1293 rd->rpl[i].name = NULL; 1466 1294 } 1467 1295 } ··· 1466 1300 /* Detect active and valid domains for the given CPU, caller must 1467 1301 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. 1468 1302 */ 1469 - static int rapl_detect_domains(struct rapl_package *rp, int cpu) 1303 + static int rapl_detect_domains(struct rapl_package *rp) 1470 1304 { 1471 1305 struct rapl_domain *rd; 1472 1306 int i; 1473 1307 1474 1308 for (i = 0; i < RAPL_DOMAIN_MAX; i++) { 1475 1309 /* use physical package id to read counters */ 1476 - if (!rapl_check_domain(cpu, i, rp)) { 1310 + if (!rapl_check_domain(i, rp)) { 1477 1311 rp->domain_map |= 1 << i; 1478 1312 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); 1479 1313 } ··· 1492 1326 1493 1327 rapl_init_domains(rp); 1494 1328 1495 - for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) 1329 + for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1330 + rapl_get_domain_unit(rd); 1496 1331 rapl_detect_powerlimit(rd); 1332 + } 1497 1333 1498 1334 return 0; 1499 1335 } ··· 1508 1340 package_power_limit_irq_restore(rp); 1509 1341 1510 1342 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1511 - rapl_write_data_raw(rd, PL1_ENABLE, 0); 1512 - rapl_write_data_raw(rd, PL1_CLAMP, 0); 1513 - if (find_nr_power_limit(rd) > 1) { 1514 - rapl_write_data_raw(rd, PL2_ENABLE, 0); 1515 - rapl_write_data_raw(rd, PL2_CLAMP, 0); 1516 - rapl_write_data_raw(rd, PL4_ENABLE, 0); 1343 + int i; 1344 + 1345 + for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1346 + rapl_write_pl_data(rd, i, PL_ENABLE, 0); 1347 + rapl_write_pl_data(rd, i, PL_CLAMP, 0); 1517 1348 } 1349 + 1518 1350 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1519 1351 rd_package = rd; 1520 1352 continue; ··· 1533 1365 EXPORT_SYMBOL_GPL(rapl_remove_package); 1534 1366 1535 1367 /* caller to ensure CPU hotplug lock is held */ 1536 - struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv) 1368 + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1537 1369 { 1538 - int id = topology_logical_die_id(cpu); 1539 1370 struct rapl_package *rp; 1371 + int uid; 1372 + 1373 + if (id_is_cpu) 1374 + uid = topology_logical_die_id(id); 1375 + else 1376 + uid = id; 1540 1377 1541 1378 list_for_each_entry(rp, &rapl_packages, plist) { 1542 - if (rp->id == id 1379 + if (rp->id == uid 1543 1380 && rp->priv->control_type == priv->control_type) 1544 1381 return rp; 1545 1382 } ··· 1554 1381 EXPORT_SYMBOL_GPL(rapl_find_package_domain); 1555 1382 1556 1383 /* called from CPU hotplug notifier, hotplug lock held */ 1557 - struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) 1384 + struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) 1558 1385 { 1559 - int id = topology_logical_die_id(cpu); 1560 1386 struct rapl_package *rp; 1561 1387 int ret; 1562 - 1563 - if (!rapl_defaults) 1564 - return ERR_PTR(-ENODEV); 1565 1388 1566 1389 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); 1567 1390 if (!rp) 1568 1391 return ERR_PTR(-ENOMEM); 1569 1392 1570 - /* add the new package to the list */ 1571 - rp->id = id; 1572 - rp->lead_cpu = cpu; 1573 - rp->priv = priv; 1393 + if (id_is_cpu) { 1394 + rp->id = topology_logical_die_id(id); 1395 + rp->lead_cpu = id; 1396 + if (topology_max_die_per_package() > 1) 1397 + snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", 1398 + topology_physical_package_id(id), topology_die_id(id)); 1399 + else 1400 + snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", 1401 + topology_physical_package_id(id)); 1402 + } else { 1403 + rp->id = id; 1404 + rp->lead_cpu = -1; 1405 + snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id); 1406 + } 1574 1407 1575 - if (topology_max_die_per_package() > 1) 1576 - snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, 1577 - "package-%d-die-%d", 1578 - topology_physical_package_id(cpu), topology_die_id(cpu)); 1579 - else 1580 - snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", 1581 - topology_physical_package_id(cpu)); 1408 + rp->priv = priv; 1409 + ret = rapl_config(rp); 1410 + if (ret) 1411 + goto err_free_package; 1582 1412 1583 1413 /* check if the package contains valid domains */ 1584 - if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) { 1414 + if (rapl_detect_domains(rp)) { 1585 1415 ret = -ENODEV; 1586 1416 goto err_free_package; 1587 1417 } ··· 1606 1430 { 1607 1431 struct rapl_package *rp; 1608 1432 struct rapl_domain *rd; 1609 - int nr_pl, ret, i; 1433 + int ret, i; 1610 1434 1611 1435 cpus_read_lock(); 1612 1436 list_for_each_entry(rp, &rapl_packages, plist) { 1613 1437 if (!rp->power_zone) 1614 1438 continue; 1615 1439 rd = power_zone_to_rapl_domain(rp->power_zone); 1616 - nr_pl = find_nr_power_limit(rd); 1617 - for (i = 0; i < nr_pl; i++) { 1618 - switch (rd->rpl[i].prim_id) { 1619 - case PL1_ENABLE: 1620 - ret = rapl_read_data_raw(rd, 1621 - POWER_LIMIT1, true, 1440 + for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { 1441 + ret = rapl_read_pl_data(rd, i, PL_LIMIT, true, 1622 1442 &rd->rpl[i].last_power_limit); 1623 - if (ret) 1624 - rd->rpl[i].last_power_limit = 0; 1625 - break; 1626 - case PL2_ENABLE: 1627 - ret = rapl_read_data_raw(rd, 1628 - POWER_LIMIT2, true, 1629 - &rd->rpl[i].last_power_limit); 1630 - if (ret) 1631 - rd->rpl[i].last_power_limit = 0; 1632 - break; 1633 - case PL4_ENABLE: 1634 - ret = rapl_read_data_raw(rd, 1635 - POWER_LIMIT4, true, 1636 - &rd->rpl[i].last_power_limit); 1637 - if (ret) 1638 - rd->rpl[i].last_power_limit = 0; 1639 - break; 1640 - } 1443 + if (ret) 1444 + rd->rpl[i].last_power_limit = 0; 1641 1445 } 1642 1446 } 1643 1447 cpus_read_unlock(); ··· 1627 1471 { 1628 1472 struct rapl_package *rp; 1629 1473 struct rapl_domain *rd; 1630 - int nr_pl, i; 1474 + int i; 1631 1475 1632 1476 cpus_read_lock(); 1633 1477 list_for_each_entry(rp, &rapl_packages, plist) { 1634 1478 if (!rp->power_zone) 1635 1479 continue; 1636 1480 rd = power_zone_to_rapl_domain(rp->power_zone); 1637 - nr_pl = find_nr_power_limit(rd); 1638 - for (i = 0; i < nr_pl; i++) { 1639 - switch (rd->rpl[i].prim_id) { 1640 - case PL1_ENABLE: 1641 - if (rd->rpl[i].last_power_limit) 1642 - rapl_write_data_raw(rd, POWER_LIMIT1, 1643 - rd->rpl[i].last_power_limit); 1644 - break; 1645 - case PL2_ENABLE: 1646 - if (rd->rpl[i].last_power_limit) 1647 - rapl_write_data_raw(rd, POWER_LIMIT2, 1648 - rd->rpl[i].last_power_limit); 1649 - break; 1650 - case PL4_ENABLE: 1651 - if (rd->rpl[i].last_power_limit) 1652 - rapl_write_data_raw(rd, POWER_LIMIT4, 1653 - rd->rpl[i].last_power_limit); 1654 - break; 1655 - } 1656 - } 1481 + for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) 1482 + if (rd->rpl[i].last_power_limit) 1483 + rapl_write_pl_data(rd, i, PL_LIMIT, 1484 + rd->rpl[i].last_power_limit); 1657 1485 } 1658 1486 cpus_read_unlock(); 1659 1487 } ··· 1668 1528 int ret; 1669 1529 1670 1530 id = x86_match_cpu(rapl_ids); 1671 - if (!id) { 1672 - pr_err("driver does not support CPU family %d model %d\n", 1673 - boot_cpu_data.x86, boot_cpu_data.x86_model); 1531 + if (id) { 1532 + defaults_msr = (struct rapl_defaults *)id->driver_data; 1674 1533 1675 - return -ENODEV; 1534 + rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); 1535 + if (!rapl_msr_platdev) 1536 + return -ENOMEM; 1537 + 1538 + ret = platform_device_add(rapl_msr_platdev); 1539 + if (ret) { 1540 + platform_device_put(rapl_msr_platdev); 1541 + return ret; 1542 + } 1676 1543 } 1677 - 1678 - rapl_defaults = (struct rapl_defaults *)id->driver_data; 1679 1544 1680 1545 ret = register_pm_notifier(&rapl_pm_notifier); 1681 - if (ret) 1682 - return ret; 1683 - 1684 - rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); 1685 - if (!rapl_msr_platdev) { 1686 - ret = -ENOMEM; 1687 - goto end; 1688 - } 1689 - 1690 - ret = platform_device_add(rapl_msr_platdev); 1691 - if (ret) 1546 + if (ret && rapl_msr_platdev) { 1547 + platform_device_del(rapl_msr_platdev); 1692 1548 platform_device_put(rapl_msr_platdev); 1693 - 1694 - end: 1695 - if (ret) 1696 - unregister_pm_notifier(&rapl_pm_notifier); 1549 + } 1697 1550 1698 1551 return ret; 1699 1552 }
+16 -15
drivers/powercap/intel_rapl_msr.c
··· 22 22 #include <linux/processor.h> 23 23 #include <linux/platform_device.h> 24 24 25 - #include <asm/iosf_mbi.h> 26 25 #include <asm/cpu_device_id.h> 27 26 #include <asm/intel-family.h> 28 27 ··· 33 34 static struct rapl_if_priv *rapl_msr_priv; 34 35 35 36 static struct rapl_if_priv rapl_msr_priv_intel = { 37 + .type = RAPL_IF_MSR, 36 38 .reg_unit = MSR_RAPL_POWER_UNIT, 37 39 .regs[RAPL_DOMAIN_PACKAGE] = { 38 40 MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO }, ··· 45 45 MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO }, 46 46 .regs[RAPL_DOMAIN_PLATFORM] = { 47 47 MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0}, 48 - .limits[RAPL_DOMAIN_PACKAGE] = 2, 49 - .limits[RAPL_DOMAIN_PLATFORM] = 2, 48 + .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2), 49 + .limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2), 50 50 }; 51 51 52 52 static struct rapl_if_priv rapl_msr_priv_amd = { 53 + .type = RAPL_IF_MSR, 53 54 .reg_unit = MSR_AMD_RAPL_POWER_UNIT, 54 55 .regs[RAPL_DOMAIN_PACKAGE] = { 55 56 0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 }, ··· 69 68 { 70 69 struct rapl_package *rp; 71 70 72 - rp = rapl_find_package_domain(cpu, rapl_msr_priv); 71 + rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); 73 72 if (!rp) { 74 - rp = rapl_add_package(cpu, rapl_msr_priv); 73 + rp = rapl_add_package(cpu, rapl_msr_priv, true); 75 74 if (IS_ERR(rp)) 76 75 return PTR_ERR(rp); 77 76 } ··· 84 83 struct rapl_package *rp; 85 84 int lead_cpu; 86 85 87 - rp = rapl_find_package_domain(cpu, rapl_msr_priv); 86 + rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); 88 87 if (!rp) 89 88 return 0; 90 89 ··· 138 137 139 138 /* List of verified CPUs. */ 140 139 static const struct x86_cpu_id pl4_support_ids[] = { 141 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY }, 142 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY }, 143 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY }, 144 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY }, 145 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY }, 146 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY }, 147 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE, X86_FEATURE_ANY }, 148 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE_L, X86_FEATURE_ANY }, 140 + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL), 141 + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL), 142 + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL), 143 + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL), 144 + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL), 145 + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL), 146 + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL), 147 + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, NULL), 149 148 {} 150 149 }; 151 150 ··· 170 169 rapl_msr_priv->write_raw = rapl_msr_write_raw; 171 170 172 171 if (id) { 173 - rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] = 3; 172 + rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4); 174 173 rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] = 175 174 MSR_VR_CURRENT_CONFIG; 176 175 pr_info("PL4 support detected.\n");
+325
drivers/powercap/intel_rapl_tpmi.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * intel_rapl_tpmi: Intel RAPL driver via TPMI interface 4 + * 5 + * Copyright (c) 2023, Intel Corporation. 6 + * All Rights Reserved. 7 + * 8 + */ 9 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 + 11 + #include <linux/auxiliary_bus.h> 12 + #include <linux/io.h> 13 + #include <linux/intel_tpmi.h> 14 + #include <linux/intel_rapl.h> 15 + #include <linux/module.h> 16 + #include <linux/slab.h> 17 + 18 + #define TPMI_RAPL_VERSION 1 19 + 20 + /* 1 header + 10 registers + 5 reserved. 8 bytes for each. */ 21 + #define TPMI_RAPL_DOMAIN_SIZE 128 22 + 23 + enum tpmi_rapl_domain_type { 24 + TPMI_RAPL_DOMAIN_INVALID, 25 + TPMI_RAPL_DOMAIN_SYSTEM, 26 + TPMI_RAPL_DOMAIN_PACKAGE, 27 + TPMI_RAPL_DOMAIN_RESERVED, 28 + TPMI_RAPL_DOMAIN_MEMORY, 29 + TPMI_RAPL_DOMAIN_MAX, 30 + }; 31 + 32 + enum tpmi_rapl_register { 33 + TPMI_RAPL_REG_HEADER, 34 + TPMI_RAPL_REG_UNIT, 35 + TPMI_RAPL_REG_PL1, 36 + TPMI_RAPL_REG_PL2, 37 + TPMI_RAPL_REG_PL3, 38 + TPMI_RAPL_REG_PL4, 39 + TPMI_RAPL_REG_RESERVED, 40 + TPMI_RAPL_REG_ENERGY_STATUS, 41 + TPMI_RAPL_REG_PERF_STATUS, 42 + TPMI_RAPL_REG_POWER_INFO, 43 + TPMI_RAPL_REG_INTERRUPT, 44 + TPMI_RAPL_REG_MAX = 15, 45 + }; 46 + 47 + struct tpmi_rapl_package { 48 + struct rapl_if_priv priv; 49 + struct intel_tpmi_plat_info *tpmi_info; 50 + struct rapl_package *rp; 51 + void __iomem *base; 52 + struct list_head node; 53 + }; 54 + 55 + static LIST_HEAD(tpmi_rapl_packages); 56 + static DEFINE_MUTEX(tpmi_rapl_lock); 57 + 58 + static struct powercap_control_type *tpmi_control_type; 59 + 60 + static int tpmi_rapl_read_raw(int id, struct reg_action *ra) 61 + { 62 + if (!ra->reg) 63 + return -EINVAL; 64 + 65 + ra->value = readq((void __iomem *)ra->reg); 66 + 67 + ra->value &= ra->mask; 68 + return 0; 69 + } 70 + 71 + static int tpmi_rapl_write_raw(int id, struct reg_action *ra) 72 + { 73 + u64 val; 74 + 75 + if (!ra->reg) 76 + return -EINVAL; 77 + 78 + val = readq((void __iomem *)ra->reg); 79 + 80 + val &= ~ra->mask; 81 + val |= ra->value; 82 + 83 + writeq(val, (void __iomem *)ra->reg); 84 + return 0; 85 + } 86 + 87 + static struct tpmi_rapl_package *trp_alloc(int pkg_id) 88 + { 89 + struct tpmi_rapl_package *trp; 90 + int ret; 91 + 92 + mutex_lock(&tpmi_rapl_lock); 93 + 94 + if (list_empty(&tpmi_rapl_packages)) { 95 + tpmi_control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); 96 + if (IS_ERR(tpmi_control_type)) { 97 + ret = PTR_ERR(tpmi_control_type); 98 + goto err_unlock; 99 + } 100 + } 101 + 102 + trp = kzalloc(sizeof(*trp), GFP_KERNEL); 103 + if (!trp) { 104 + ret = -ENOMEM; 105 + goto err_del_powercap; 106 + } 107 + 108 + list_add(&trp->node, &tpmi_rapl_packages); 109 + 110 + mutex_unlock(&tpmi_rapl_lock); 111 + return trp; 112 + 113 + err_del_powercap: 114 + if (list_empty(&tpmi_rapl_packages)) 115 + powercap_unregister_control_type(tpmi_control_type); 116 + err_unlock: 117 + mutex_unlock(&tpmi_rapl_lock); 118 + return ERR_PTR(ret); 119 + } 120 + 121 + static void trp_release(struct tpmi_rapl_package *trp) 122 + { 123 + mutex_lock(&tpmi_rapl_lock); 124 + list_del(&trp->node); 125 + 126 + if (list_empty(&tpmi_rapl_packages)) 127 + powercap_unregister_control_type(tpmi_control_type); 128 + 129 + kfree(trp); 130 + mutex_unlock(&tpmi_rapl_lock); 131 + } 132 + 133 + static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) 134 + { 135 + u8 tpmi_domain_version; 136 + enum rapl_domain_type domain_type; 137 + enum tpmi_rapl_domain_type tpmi_domain_type; 138 + enum tpmi_rapl_register reg_index; 139 + enum rapl_domain_reg_id reg_id; 140 + int tpmi_domain_size, tpmi_domain_flags; 141 + u64 *tpmi_rapl_regs = trp->base + offset; 142 + u64 tpmi_domain_header = readq((void __iomem *)tpmi_rapl_regs); 143 + 144 + /* Domain Parent bits are ignored for now */ 145 + tpmi_domain_version = tpmi_domain_header & 0xff; 146 + tpmi_domain_type = tpmi_domain_header >> 8 & 0xff; 147 + tpmi_domain_size = tpmi_domain_header >> 16 & 0xff; 148 + tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff; 149 + 150 + if (tpmi_domain_version != TPMI_RAPL_VERSION) { 151 + pr_warn(FW_BUG "Unsupported version:%d\n", tpmi_domain_version); 152 + return -ENODEV; 153 + } 154 + 155 + /* Domain size: in unit of 128 Bytes */ 156 + if (tpmi_domain_size != 1) { 157 + pr_warn(FW_BUG "Invalid Domain size %d\n", tpmi_domain_size); 158 + return -EINVAL; 159 + } 160 + 161 + /* Unit register and Energy Status register are mandatory for each domain */ 162 + if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_UNIT)) || 163 + !(tpmi_domain_flags & BIT(TPMI_RAPL_REG_ENERGY_STATUS))) { 164 + pr_warn(FW_BUG "Invalid Domain flag 0x%x\n", tpmi_domain_flags); 165 + return -EINVAL; 166 + } 167 + 168 + switch (tpmi_domain_type) { 169 + case TPMI_RAPL_DOMAIN_PACKAGE: 170 + domain_type = RAPL_DOMAIN_PACKAGE; 171 + break; 172 + case TPMI_RAPL_DOMAIN_SYSTEM: 173 + domain_type = RAPL_DOMAIN_PLATFORM; 174 + break; 175 + case TPMI_RAPL_DOMAIN_MEMORY: 176 + domain_type = RAPL_DOMAIN_DRAM; 177 + break; 178 + default: 179 + pr_warn(FW_BUG "Unsupported Domain type %d\n", tpmi_domain_type); 180 + return -EINVAL; 181 + } 182 + 183 + if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT]) { 184 + pr_warn(FW_BUG "Duplicate Domain type %d\n", tpmi_domain_type); 185 + return -EINVAL; 186 + } 187 + 188 + reg_index = TPMI_RAPL_REG_HEADER; 189 + while (++reg_index != TPMI_RAPL_REG_MAX) { 190 + if (!(tpmi_domain_flags & BIT(reg_index))) 191 + continue; 192 + 193 + switch (reg_index) { 194 + case TPMI_RAPL_REG_UNIT: 195 + reg_id = RAPL_DOMAIN_REG_UNIT; 196 + break; 197 + case TPMI_RAPL_REG_PL1: 198 + reg_id = RAPL_DOMAIN_REG_LIMIT; 199 + trp->priv.limits[domain_type] |= BIT(POWER_LIMIT1); 200 + break; 201 + case TPMI_RAPL_REG_PL2: 202 + reg_id = RAPL_DOMAIN_REG_PL2; 203 + trp->priv.limits[domain_type] |= BIT(POWER_LIMIT2); 204 + break; 205 + case TPMI_RAPL_REG_PL4: 206 + reg_id = RAPL_DOMAIN_REG_PL4; 207 + trp->priv.limits[domain_type] |= BIT(POWER_LIMIT4); 208 + break; 209 + case TPMI_RAPL_REG_ENERGY_STATUS: 210 + reg_id = RAPL_DOMAIN_REG_STATUS; 211 + break; 212 + case TPMI_RAPL_REG_PERF_STATUS: 213 + reg_id = RAPL_DOMAIN_REG_PERF; 214 + break; 215 + case TPMI_RAPL_REG_POWER_INFO: 216 + reg_id = RAPL_DOMAIN_REG_INFO; 217 + break; 218 + default: 219 + continue; 220 + } 221 + trp->priv.regs[domain_type][reg_id] = (u64)&tpmi_rapl_regs[reg_index]; 222 + } 223 + 224 + return 0; 225 + } 226 + 227 + static int intel_rapl_tpmi_probe(struct auxiliary_device *auxdev, 228 + const struct auxiliary_device_id *id) 229 + { 230 + struct tpmi_rapl_package *trp; 231 + struct intel_tpmi_plat_info *info; 232 + struct resource *res; 233 + u32 offset; 234 + int ret; 235 + 236 + info = tpmi_get_platform_data(auxdev); 237 + if (!info) 238 + return -ENODEV; 239 + 240 + trp = trp_alloc(info->package_id); 241 + if (IS_ERR(trp)) 242 + return PTR_ERR(trp); 243 + 244 + if (tpmi_get_resource_count(auxdev) > 1) { 245 + dev_err(&auxdev->dev, "does not support multiple resources\n"); 246 + ret = -EINVAL; 247 + goto err; 248 + } 249 + 250 + res = tpmi_get_resource_at_index(auxdev, 0); 251 + if (!res) { 252 + dev_err(&auxdev->dev, "can't fetch device resource info\n"); 253 + ret = -EIO; 254 + goto err; 255 + } 256 + 257 + trp->base = devm_ioremap_resource(&auxdev->dev, res); 258 + if (IS_ERR(trp->base)) { 259 + ret = PTR_ERR(trp->base); 260 + goto err; 261 + } 262 + 263 + for (offset = 0; offset < resource_size(res); offset += TPMI_RAPL_DOMAIN_SIZE) { 264 + ret = parse_one_domain(trp, offset); 265 + if (ret) 266 + goto err; 267 + } 268 + 269 + trp->tpmi_info = info; 270 + trp->priv.type = RAPL_IF_TPMI; 271 + trp->priv.read_raw = tpmi_rapl_read_raw; 272 + trp->priv.write_raw = tpmi_rapl_write_raw; 273 + trp->priv.control_type = tpmi_control_type; 274 + 275 + /* RAPL TPMI I/F is per physical package */ 276 + trp->rp = rapl_find_package_domain(info->package_id, &trp->priv, false); 277 + if (trp->rp) { 278 + dev_err(&auxdev->dev, "Domain for Package%d already exists\n", info->package_id); 279 + ret = -EEXIST; 280 + goto err; 281 + } 282 + 283 + trp->rp = rapl_add_package(info->package_id, &trp->priv, false); 284 + if (IS_ERR(trp->rp)) { 285 + dev_err(&auxdev->dev, "Failed to add RAPL Domain for Package%d, %ld\n", 286 + info->package_id, PTR_ERR(trp->rp)); 287 + ret = PTR_ERR(trp->rp); 288 + goto err; 289 + } 290 + 291 + auxiliary_set_drvdata(auxdev, trp); 292 + 293 + return 0; 294 + err: 295 + trp_release(trp); 296 + return ret; 297 + } 298 + 299 + static void intel_rapl_tpmi_remove(struct auxiliary_device *auxdev) 300 + { 301 + struct tpmi_rapl_package *trp = auxiliary_get_drvdata(auxdev); 302 + 303 + rapl_remove_package(trp->rp); 304 + trp_release(trp); 305 + } 306 + 307 + static const struct auxiliary_device_id intel_rapl_tpmi_ids[] = { 308 + {.name = "intel_vsec.tpmi-rapl" }, 309 + { } 310 + }; 311 + 312 + MODULE_DEVICE_TABLE(auxiliary, intel_rapl_tpmi_ids); 313 + 314 + static struct auxiliary_driver intel_rapl_tpmi_driver = { 315 + .probe = intel_rapl_tpmi_probe, 316 + .remove = intel_rapl_tpmi_remove, 317 + .id_table = intel_rapl_tpmi_ids, 318 + }; 319 + 320 + module_auxiliary_driver(intel_rapl_tpmi_driver) 321 + 322 + MODULE_IMPORT_NS(INTEL_TPMI); 323 + 324 + MODULE_DESCRIPTION("Intel RAPL TPMI Driver"); 325 + MODULE_LICENSE("GPL");
+6 -5
drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
··· 15 15 .reg_unit = 0x5938, 16 16 .regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930}, 17 17 .regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0}, 18 - .limits[RAPL_DOMAIN_PACKAGE] = 2, 19 - .limits[RAPL_DOMAIN_DRAM] = 2, 18 + .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2), 19 + .limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2), 20 20 }; 21 21 22 22 static int rapl_mmio_cpu_online(unsigned int cpu) ··· 27 27 if (topology_physical_package_id(cpu)) 28 28 return 0; 29 29 30 - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv); 30 + rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); 31 31 if (!rp) { 32 - rp = rapl_add_package(cpu, &rapl_mmio_priv); 32 + rp = rapl_add_package(cpu, &rapl_mmio_priv, true); 33 33 if (IS_ERR(rp)) 34 34 return PTR_ERR(rp); 35 35 } ··· 42 42 struct rapl_package *rp; 43 43 int lead_cpu; 44 44 45 - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv); 45 + rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); 46 46 if (!rp) 47 47 return 0; 48 48 ··· 97 97 rapl_regs->regs[domain][reg]; 98 98 rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain]; 99 99 } 100 + rapl_mmio_priv.type = RAPL_IF_MMIO; 100 101 rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit; 101 102 102 103 rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
+2 -1
include/acpi/actbl.h
··· 307 307 PM_SOHO_SERVER = 5, 308 308 PM_APPLIANCE_PC = 6, 309 309 PM_PERFORMANCE_SERVER = 7, 310 - PM_TABLET = 8 310 + PM_TABLET = 8, 311 + NR_PM_PROFILES = 9 311 312 }; 312 313 313 314 /* Values for sleep_status and sleep_control registers (V5+ FADT) */
+3 -1
include/linux/amd-pstate.h
··· 94 94 * enum amd_pstate_mode - driver working mode of amd pstate 95 95 */ 96 96 enum amd_pstate_mode { 97 - AMD_PSTATE_DISABLE = 0, 97 + AMD_PSTATE_UNDEFINED = 0, 98 + AMD_PSTATE_DISABLE, 98 99 AMD_PSTATE_PASSIVE, 99 100 AMD_PSTATE_ACTIVE, 100 101 AMD_PSTATE_GUIDED, ··· 103 102 }; 104 103 105 104 static const char * const amd_pstate_mode_string[] = { 105 + [AMD_PSTATE_UNDEFINED] = "undefined", 106 106 [AMD_PSTATE_DISABLE] = "disable", 107 107 [AMD_PSTATE_PASSIVE] = "passive", 108 108 [AMD_PSTATE_ACTIVE] = "active",
+4 -1
include/linux/cpufreq.h
··· 340 340 /* 341 341 * ->fast_switch() replacement for drivers that use an internal 342 342 * representation of performance levels and can pass hints other than 343 - * the target performance level to the hardware. 343 + * the target performance level to the hardware. This can only be set 344 + * if ->fast_switch is set too, because in those cases (under specific 345 + * conditions) scale invariance can be disabled, which causes the 346 + * schedutil governor to fall back to the latter. 344 347 */ 345 348 void (*adjust_perf)(unsigned int cpu, 346 349 unsigned long min_perf,
+2 -1
include/linux/devfreq.h
··· 108 108 unsigned long initial_freq; 109 109 unsigned int polling_ms; 110 110 enum devfreq_timer timer; 111 - bool is_cooling_device; 112 111 113 112 int (*target)(struct device *dev, unsigned long *freq, u32 flags); 114 113 int (*get_dev_status)(struct device *dev, ··· 117 118 118 119 unsigned long *freq_table; 119 120 unsigned int max_state; 121 + 122 + bool is_cooling_device; 120 123 }; 121 124 122 125 /**
+28 -12
include/linux/intel_rapl.h
··· 14 14 #include <linux/powercap.h> 15 15 #include <linux/cpuhotplug.h> 16 16 17 + enum rapl_if_type { 18 + RAPL_IF_MSR, /* RAPL I/F using MSR registers */ 19 + RAPL_IF_MMIO, /* RAPL I/F using MMIO registers */ 20 + RAPL_IF_TPMI, /* RAPL I/F using TPMI registers */ 21 + }; 22 + 17 23 enum rapl_domain_type { 18 24 RAPL_DOMAIN_PACKAGE, /* entire package/socket */ 19 25 RAPL_DOMAIN_PP0, /* core power plane */ ··· 36 30 RAPL_DOMAIN_REG_POLICY, 37 31 RAPL_DOMAIN_REG_INFO, 38 32 RAPL_DOMAIN_REG_PL4, 33 + RAPL_DOMAIN_REG_UNIT, 34 + RAPL_DOMAIN_REG_PL2, 39 35 RAPL_DOMAIN_REG_MAX, 40 36 }; 41 37 42 38 struct rapl_domain; 43 39 44 40 enum rapl_primitives { 45 - ENERGY_COUNTER, 46 41 POWER_LIMIT1, 47 42 POWER_LIMIT2, 48 43 POWER_LIMIT4, 44 + ENERGY_COUNTER, 49 45 FW_LOCK, 46 + FW_HIGH_LOCK, 47 + PL1_LOCK, 48 + PL2_LOCK, 49 + PL4_LOCK, 50 50 51 51 PL1_ENABLE, /* power limit 1, aka long term */ 52 52 PL1_CLAMP, /* allow frequency to go below OS request */ ··· 86 74 unsigned long timestamp; 87 75 }; 88 76 89 - #define NR_POWER_LIMITS (3) 77 + #define NR_POWER_LIMITS (POWER_LIMIT4 + 1) 78 + 90 79 struct rapl_power_limit { 91 80 struct powercap_zone_constraint *constraint; 92 - int prim_id; /* primitive ID used to enable */ 93 81 struct rapl_domain *domain; 94 82 const char *name; 83 + bool locked; 95 84 u64 last_power_limit; 96 85 }; 97 86 ··· 109 96 struct rapl_power_limit rpl[NR_POWER_LIMITS]; 110 97 u64 attr_map; /* track capabilities */ 111 98 unsigned int state; 112 - unsigned int domain_energy_unit; 99 + unsigned int power_unit; 100 + unsigned int energy_unit; 101 + unsigned int time_unit; 113 102 struct rapl_package *rp; 114 103 }; 115 104 ··· 136 121 * registers. 137 122 * @write_raw: Callback for writing RAPL interface specific 138 123 * registers. 124 + * @defaults: internal pointer to interface default settings 125 + * @rpi: internal pointer to interface primitive info 139 126 */ 140 127 struct rapl_if_priv { 128 + enum rapl_if_type type; 141 129 struct powercap_control_type *control_type; 142 - struct rapl_domain *platform_rapl_domain; 143 130 enum cpuhp_state pcap_rapl_online; 144 131 u64 reg_unit; 145 132 u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; 146 133 int limits[RAPL_DOMAIN_MAX]; 147 - int (*read_raw)(int cpu, struct reg_action *ra); 148 - int (*write_raw)(int cpu, struct reg_action *ra); 134 + int (*read_raw)(int id, struct reg_action *ra); 135 + int (*write_raw)(int id, struct reg_action *ra); 136 + void *defaults; 137 + void *rpi; 149 138 }; 150 139 151 140 /* maximum rapl package domain name: package-%d-die-%d */ ··· 159 140 unsigned int id; /* logical die id, equals physical 1-die systems */ 160 141 unsigned int nr_domains; 161 142 unsigned long domain_map; /* bit map of active domains */ 162 - unsigned int power_unit; 163 - unsigned int energy_unit; 164 - unsigned int time_unit; 165 143 struct rapl_domain *domains; /* array of domains, sized at runtime */ 166 144 struct powercap_zone *power_zone; /* keep track of parent zone */ 167 145 unsigned long power_limit_irq; /* keep track of package power limit ··· 172 156 struct rapl_if_priv *priv; 173 157 }; 174 158 175 - struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv); 176 - struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv); 159 + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); 160 + struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); 177 161 void rapl_remove_package(struct rapl_package *rp); 178 162 179 163 #endif /* __INTEL_RAPL_H__ */
+10 -4
include/linux/suspend.h
··· 202 202 }; 203 203 204 204 #ifdef CONFIG_SUSPEND 205 + extern suspend_state_t pm_suspend_target_state; 205 206 extern suspend_state_t mem_sleep_current; 206 207 extern suspend_state_t mem_sleep_default; 207 208 ··· 337 336 extern bool sync_on_suspend_enabled; 338 337 #else /* !CONFIG_SUSPEND */ 339 338 #define suspend_valid_only_mem NULL 339 + 340 + #define pm_suspend_target_state (PM_SUSPEND_ON) 340 341 341 342 static inline void pm_suspend_clear_flags(void) {} 342 343 static inline void pm_set_suspend_via_firmware(void) {} ··· 475 472 } 476 473 #endif /* CONFIG_HIBERNATION */ 477 474 475 + int arch_resume_nosmt(void); 476 + 478 477 #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV 479 478 int is_hibernate_resume_dev(dev_t dev); 480 479 #else ··· 512 507 513 508 /* drivers/base/power/wakeup.c */ 514 509 extern bool events_check_enabled; 515 - extern suspend_state_t pm_suspend_target_state; 516 510 517 511 extern bool pm_wakeup_pending(void); 518 512 extern void pm_system_wakeup(void); ··· 559 555 #ifdef CONFIG_PM_SLEEP_DEBUG 560 556 extern bool pm_print_times_enabled; 561 557 extern bool pm_debug_messages_on; 558 + extern bool pm_debug_messages_should_print(void); 562 559 static inline int pm_dyn_debug_messages_on(void) 563 560 { 564 561 #ifdef CONFIG_DYNAMIC_DEBUG ··· 573 568 #endif 574 569 #define __pm_pr_dbg(fmt, ...) \ 575 570 do { \ 576 - if (pm_debug_messages_on) \ 571 + if (pm_debug_messages_should_print()) \ 577 572 printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 578 573 else if (pm_dyn_debug_messages_on()) \ 579 574 pr_debug(fmt, ##__VA_ARGS__); \ 580 575 } while (0) 581 576 #define __pm_deferred_pr_dbg(fmt, ...) \ 582 577 do { \ 583 - if (pm_debug_messages_on) \ 578 + if (pm_debug_messages_should_print()) \ 584 579 printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 585 580 } while (0) 586 581 #else ··· 598 593 /** 599 594 * pm_pr_dbg - print pm sleep debug messages 600 595 * 601 - * If pm_debug_messages_on is enabled, print message. 596 + * If pm_debug_messages_on is enabled and the system is entering/leaving 597 + * suspend, print message. 602 598 * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled, 603 599 * print message only from instances explicitly enabled on dynamic debug's 604 600 * control.
+6
kernel/power/main.c
··· 556 556 557 557 bool pm_debug_messages_on __read_mostly; 558 558 559 + bool pm_debug_messages_should_print(void) 560 + { 561 + return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON; 562 + } 563 + EXPORT_SYMBOL_GPL(pm_debug_messages_should_print); 564 + 559 565 static ssize_t pm_debug_messages_show(struct kobject *kobj, 560 566 struct kobj_attribute *attr, char *buf) 561 567 {
+1 -1
kernel/power/snapshot.c
··· 398 398 unsigned int blocks; /* Number of Bitmap Blocks */ 399 399 }; 400 400 401 - /* strcut bm_position is used for browsing memory bitmaps */ 401 + /* struct bm_position is used for browsing memory bitmaps */ 402 402 403 403 struct bm_position { 404 404 struct mem_zone_bm_rtree *zone;