Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'pm-5.11-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more power management updates from Rafael Wysocki:
"These update the CPPC cpufreq driver and intel_pstate (which involves
updating the cpufreq core and the schedutil governor) and make
janitorial changes in the ACPI code handling processor objects.

Specifics:

- Rework the passive-mode "fast switch" path in the intel_pstate
driver to allow it receive the minimum (required) and target
(desired) performance information from the schedutil governor so as
to avoid running some workloads too fast (Rafael Wysocki).

- Make the intel_pstate driver allow the policy max limit to be
increased after the guaranteed performance value for the given CPU
has increased (Rafael Wysocki).

- Clean up the handling of CPU coordination types in the CPPC cpufreq
driver and make it export frequency domains information to user
space via sysfs (Ionela Voinescu).

- Fix the ACPI code handling processor objects to use a correct
coordination type when it fails to map frequency domains and drop a
redundant CPU map initialization from it (Ionela Voinescu, Punit
Agrawal)"

* tag 'pm-5.11-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
cpufreq: intel_pstate: Use most recent guaranteed performance values
cpufreq: intel_pstate: Implement the ->adjust_perf() callback
cpufreq: Add special-purpose fast-switching callback for drivers
cpufreq: schedutil: Add util to struct sg_cpu
cppc_cpufreq: replace per-cpu data array with a list
cppc_cpufreq: expose information on frequency domains
cppc_cpufreq: clarify support for coordination types
cppc_cpufreq: use policy->cpu as driver of frequency setting
ACPI: processor: fix NONE coordination for domain mapping failure

+388 -219
+2 -1
Documentation/ABI/testing/sysfs-devices-system-cpu
··· 264 264 attribute is useful for user space DVFS controllers to get better 265 265 power/performance results for platforms using acpi-cpufreq. 266 266 267 - This file is only present if the acpi-cpufreq driver is in use. 267 + This file is only present if the acpi-cpufreq or the cppc-cpufreq 268 + drivers are in use. 268 269 269 270 270 271 What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
+60 -81
drivers/acpi/cppc_acpi.c
··· 414 414 return result; 415 415 } 416 416 417 + bool acpi_cpc_valid(void) 418 + { 419 + struct cpc_desc *cpc_ptr; 420 + int cpu; 421 + 422 + for_each_possible_cpu(cpu) { 423 + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); 424 + if (!cpc_ptr) 425 + return false; 426 + } 427 + 428 + return true; 429 + } 430 + EXPORT_SYMBOL_GPL(acpi_cpc_valid); 431 + 417 432 /** 418 - * acpi_get_psd_map - Map the CPUs in a common freq domain. 419 - * @all_cpu_data: Ptrs to CPU specific CPPC data including PSD info. 433 + * acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu 434 + * @cpu: Find all CPUs that share a domain with cpu. 435 + * @cpu_data: Pointer to CPU specific CPPC data including PSD info. 420 436 * 421 437 * Return: 0 for success or negative value for err. 422 438 */ 423 - int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data) 439 + int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data) 424 440 { 425 - int count_target; 426 - int retval = 0; 427 - unsigned int i, j; 428 - cpumask_var_t covered_cpus; 429 - struct cppc_cpudata *pr, *match_pr; 430 - struct acpi_psd_package *pdomain; 431 - struct acpi_psd_package *match_pdomain; 432 441 struct cpc_desc *cpc_ptr, *match_cpc_ptr; 433 - 434 - if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)) 435 - return -ENOMEM; 442 + struct acpi_psd_package *match_pdomain; 443 + struct acpi_psd_package *pdomain; 444 + int count_target, i; 436 445 437 446 /* 438 447 * Now that we have _PSD data from all CPUs, let's setup P-state 439 448 * domain info. 440 449 */ 450 + cpc_ptr = per_cpu(cpc_desc_ptr, cpu); 451 + if (!cpc_ptr) 452 + return -EFAULT; 453 + 454 + pdomain = &(cpc_ptr->domain_info); 455 + cpumask_set_cpu(cpu, cpu_data->shared_cpu_map); 456 + if (pdomain->num_processors <= 1) 457 + return 0; 458 + 459 + /* Validate the Domain info */ 460 + count_target = pdomain->num_processors; 461 + if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) 462 + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ALL; 463 + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) 464 + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_HW; 465 + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) 466 + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY; 467 + 441 468 for_each_possible_cpu(i) { 442 - if (cpumask_test_cpu(i, covered_cpus)) 469 + if (i == cpu) 443 470 continue; 444 471 445 - pr = all_cpu_data[i]; 446 - cpc_ptr = per_cpu(cpc_desc_ptr, i); 447 - if (!cpc_ptr) { 448 - retval = -EFAULT; 449 - goto err_ret; 450 - } 472 + match_cpc_ptr = per_cpu(cpc_desc_ptr, i); 473 + if (!match_cpc_ptr) 474 + goto err_fault; 451 475 452 - pdomain = &(cpc_ptr->domain_info); 453 - cpumask_set_cpu(i, pr->shared_cpu_map); 454 - cpumask_set_cpu(i, covered_cpus); 455 - if (pdomain->num_processors <= 1) 476 + match_pdomain = &(match_cpc_ptr->domain_info); 477 + if (match_pdomain->domain != pdomain->domain) 456 478 continue; 457 479 458 - /* Validate the Domain info */ 459 - count_target = pdomain->num_processors; 460 - if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) 461 - pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; 462 - else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) 463 - pr->shared_type = CPUFREQ_SHARED_TYPE_HW; 464 - else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) 465 - pr->shared_type = CPUFREQ_SHARED_TYPE_ANY; 480 + /* Here i and cpu are in the same domain */ 481 + if (match_pdomain->num_processors != count_target) 482 + goto err_fault; 466 483 467 - for_each_possible_cpu(j) { 468 - if (i == j) 469 - continue; 484 + if (pdomain->coord_type != match_pdomain->coord_type) 485 + goto err_fault; 470 486 471 - match_cpc_ptr = per_cpu(cpc_desc_ptr, j); 472 - if (!match_cpc_ptr) { 473 - retval = -EFAULT; 474 - goto err_ret; 475 - } 476 - 477 - match_pdomain = &(match_cpc_ptr->domain_info); 478 - if (match_pdomain->domain != pdomain->domain) 479 - continue; 480 - 481 - /* Here i and j are in the same domain */ 482 - if (match_pdomain->num_processors != count_target) { 483 - retval = -EFAULT; 484 - goto err_ret; 485 - } 486 - 487 - if (pdomain->coord_type != match_pdomain->coord_type) { 488 - retval = -EFAULT; 489 - goto err_ret; 490 - } 491 - 492 - cpumask_set_cpu(j, covered_cpus); 493 - cpumask_set_cpu(j, pr->shared_cpu_map); 494 - } 495 - 496 - for_each_cpu(j, pr->shared_cpu_map) { 497 - if (i == j) 498 - continue; 499 - 500 - match_pr = all_cpu_data[j]; 501 - match_pr->shared_type = pr->shared_type; 502 - cpumask_copy(match_pr->shared_cpu_map, 503 - pr->shared_cpu_map); 504 - } 487 + cpumask_set_cpu(i, cpu_data->shared_cpu_map); 505 488 } 506 - goto out; 507 489 508 - err_ret: 509 - for_each_possible_cpu(i) { 510 - pr = all_cpu_data[i]; 490 + return 0; 511 491 512 - /* Assume no coordination on any error parsing domain info */ 513 - cpumask_clear(pr->shared_cpu_map); 514 - cpumask_set_cpu(i, pr->shared_cpu_map); 515 - pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; 516 - } 517 - out: 518 - free_cpumask_var(covered_cpus); 519 - return retval; 492 + err_fault: 493 + /* Assume no coordination on any error parsing domain info */ 494 + cpumask_clear(cpu_data->shared_cpu_map); 495 + cpumask_set_cpu(cpu, cpu_data->shared_cpu_map); 496 + cpu_data->shared_type = CPUFREQ_SHARED_TYPE_NONE; 497 + 498 + return -EFAULT; 520 499 } 521 500 EXPORT_SYMBOL_GPL(acpi_get_psd_map); 522 501
+1 -1
drivers/acpi/processor_perflib.c
··· 708 708 if (retval) { 709 709 cpumask_clear(pr->performance->shared_cpu_map); 710 710 cpumask_set_cpu(i, pr->performance->shared_cpu_map); 711 - pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL; 711 + pr->performance->shared_type = CPUFREQ_SHARED_TYPE_NONE; 712 712 } 713 713 pr->performance = NULL; /* Will be set for real in register */ 714 714 }
+116 -88
drivers/cpufreq/cppc_cpufreq.c
··· 30 30 #define DMI_PROCESSOR_MAX_SPEED 0x14 31 31 32 32 /* 33 - * These structs contain information parsed from per CPU 34 - * ACPI _CPC structures. 35 - * e.g. For each CPU the highest, lowest supported 36 - * performance capabilities, desired performance level 37 - * requested etc. 33 + * This list contains information parsed from per CPU ACPI _CPC and _PSD 34 + * structures: e.g. the highest and lowest supported performance, capabilities, 35 + * desired performance, level requested etc. Depending on the share_type, not 36 + * all CPUs will have an entry in the list. 38 37 */ 39 - static struct cppc_cpudata **all_cpu_data; 38 + static LIST_HEAD(cpu_data_list); 39 + 40 40 static bool boost_supported; 41 41 42 42 struct cppc_workaround_oem_info { ··· 148 148 static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, 149 149 unsigned int target_freq, 150 150 unsigned int relation) 151 + 151 152 { 152 - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; 153 + struct cppc_cpudata *cpu_data = policy->driver_data; 154 + unsigned int cpu = policy->cpu; 153 155 struct cpufreq_freqs freqs; 154 156 u32 desired_perf; 155 157 int ret = 0; ··· 166 164 freqs.new = target_freq; 167 165 168 166 cpufreq_freq_transition_begin(policy, &freqs); 169 - ret = cppc_set_perf(cpu_data->cpu, &cpu_data->perf_ctrls); 167 + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); 170 168 cpufreq_freq_transition_end(policy, &freqs, ret != 0); 171 169 172 170 if (ret) 173 171 pr_debug("Failed to set target on CPU:%d. ret:%d\n", 174 - cpu_data->cpu, ret); 172 + cpu, ret); 175 173 176 174 return ret; 177 175 } ··· 184 182 185 183 static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) 186 184 { 187 - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; 185 + struct cppc_cpudata *cpu_data = policy->driver_data; 188 186 struct cppc_perf_caps *caps = &cpu_data->perf_caps; 189 187 unsigned int cpu = policy->cpu; 190 188 int ret; ··· 195 193 if (ret) 196 194 pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", 197 195 caps->lowest_perf, cpu, ret); 196 + 197 + /* Remove CPU node from list and free driver data for policy */ 198 + free_cpumask_var(cpu_data->shared_cpu_map); 199 + list_del(&cpu_data->node); 200 + kfree(policy->driver_data); 201 + policy->driver_data = NULL; 198 202 } 199 203 200 204 /* ··· 246 238 } 247 239 #endif 248 240 249 - static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) 241 + 242 + static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) 250 243 { 251 - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; 252 - struct cppc_perf_caps *caps = &cpu_data->perf_caps; 253 - unsigned int cpu = policy->cpu; 254 - int ret = 0; 244 + struct cppc_cpudata *cpu_data; 245 + int ret; 255 246 256 - cpu_data->cpu = cpu; 257 - ret = cppc_get_perf_caps(cpu, caps); 247 + cpu_data = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL); 248 + if (!cpu_data) 249 + goto out; 258 250 251 + if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL)) 252 + goto free_cpu; 253 + 254 + ret = acpi_get_psd_map(cpu, cpu_data); 259 255 if (ret) { 260 - pr_debug("Err reading CPU%d perf capabilities. ret:%d\n", 261 - cpu, ret); 262 - return ret; 256 + pr_debug("Err parsing CPU%d PSD data: ret:%d\n", cpu, ret); 257 + goto free_mask; 258 + } 259 + 260 + ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps); 261 + if (ret) { 262 + pr_debug("Err reading CPU%d perf caps: ret:%d\n", cpu, ret); 263 + goto free_mask; 263 264 } 264 265 265 266 /* Convert the lowest and nominal freq from MHz to KHz */ 266 - caps->lowest_freq *= 1000; 267 - caps->nominal_freq *= 1000; 267 + cpu_data->perf_caps.lowest_freq *= 1000; 268 + cpu_data->perf_caps.nominal_freq *= 1000; 269 + 270 + list_add(&cpu_data->node, &cpu_data_list); 271 + 272 + return cpu_data; 273 + 274 + free_mask: 275 + free_cpumask_var(cpu_data->shared_cpu_map); 276 + free_cpu: 277 + kfree(cpu_data); 278 + out: 279 + return NULL; 280 + } 281 + 282 + static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) 283 + { 284 + unsigned int cpu = policy->cpu; 285 + struct cppc_cpudata *cpu_data; 286 + struct cppc_perf_caps *caps; 287 + int ret; 288 + 289 + cpu_data = cppc_cpufreq_get_cpu_data(cpu); 290 + if (!cpu_data) { 291 + pr_err("Error in acquiring _CPC/_PSD data for CPU%d.\n", cpu); 292 + return -ENODEV; 293 + } 294 + caps = &cpu_data->perf_caps; 295 + policy->driver_data = cpu_data; 268 296 269 297 /* 270 298 * Set min to lowest nonlinear perf to avoid any efficiency penalty (see ··· 324 280 policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); 325 281 policy->shared_type = cpu_data->shared_type; 326 282 327 - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 328 - int i; 329 - 283 + switch (policy->shared_type) { 284 + case CPUFREQ_SHARED_TYPE_HW: 285 + case CPUFREQ_SHARED_TYPE_NONE: 286 + /* Nothing to be done - we'll have a policy for each CPU */ 287 + break; 288 + case CPUFREQ_SHARED_TYPE_ANY: 289 + /* 290 + * All CPUs in the domain will share a policy and all cpufreq 291 + * operations will use a single cppc_cpudata structure stored 292 + * in policy->driver_data. 293 + */ 330 294 cpumask_copy(policy->cpus, cpu_data->shared_cpu_map); 331 - 332 - for_each_cpu(i, policy->cpus) { 333 - if (unlikely(i == cpu)) 334 - continue; 335 - 336 - memcpy(&all_cpu_data[i]->perf_caps, caps, 337 - sizeof(cpu_data->perf_caps)); 338 - } 339 - } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) { 340 - /* Support only SW_ANY for now. */ 341 - pr_debug("Unsupported CPU co-ord type\n"); 295 + break; 296 + default: 297 + pr_debug("Unsupported CPU co-ord type: %d\n", 298 + policy->shared_type); 342 299 return -EFAULT; 343 300 } 344 - 345 - cpu_data->cur_policy = policy; 346 301 347 302 /* 348 303 * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost ··· 397 354 static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) 398 355 { 399 356 struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; 400 - struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; 357 + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 358 + struct cppc_cpudata *cpu_data = policy->driver_data; 401 359 int ret; 360 + 361 + cpufreq_cpu_put(policy); 402 362 403 363 ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0); 404 364 if (ret) ··· 418 372 419 373 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) 420 374 { 421 - struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu]; 375 + struct cppc_cpudata *cpu_data = policy->driver_data; 422 376 struct cppc_perf_caps *caps = &cpu_data->perf_caps; 423 377 int ret; 424 378 ··· 442 396 return 0; 443 397 } 444 398 399 + static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) 400 + { 401 + struct cppc_cpudata *cpu_data = policy->driver_data; 402 + 403 + return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf); 404 + } 405 + cpufreq_freq_attr_ro(freqdomain_cpus); 406 + 407 + static struct freq_attr *cppc_cpufreq_attr[] = { 408 + &freqdomain_cpus, 409 + NULL, 410 + }; 411 + 445 412 static struct cpufreq_driver cppc_cpufreq_driver = { 446 413 .flags = CPUFREQ_CONST_LOOPS, 447 414 .verify = cppc_verify_policy, ··· 463 404 .init = cppc_cpufreq_cpu_init, 464 405 .stop_cpu = cppc_cpufreq_stop_cpu, 465 406 .set_boost = cppc_cpufreq_set_boost, 407 + .attr = cppc_cpufreq_attr, 466 408 .name = "cppc_cpufreq", 467 409 }; 468 410 ··· 475 415 */ 476 416 static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu) 477 417 { 478 - struct cppc_cpudata *cpu_data = all_cpu_data[cpu]; 418 + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); 419 + struct cppc_cpudata *cpu_data = policy->driver_data; 479 420 u64 desired_perf; 480 421 int ret; 422 + 423 + cpufreq_cpu_put(policy); 481 424 482 425 ret = cppc_get_desired_perf(cpu, &desired_perf); 483 426 if (ret < 0) ··· 514 451 515 452 static int __init cppc_cpufreq_init(void) 516 453 { 517 - struct cppc_cpudata *cpu_data; 518 - int i, ret = 0; 519 - 520 - if (acpi_disabled) 454 + if ((acpi_disabled) || !acpi_cpc_valid()) 521 455 return -ENODEV; 522 456 523 - all_cpu_data = kcalloc(num_possible_cpus(), sizeof(void *), 524 - GFP_KERNEL); 525 - if (!all_cpu_data) 526 - return -ENOMEM; 527 - 528 - for_each_possible_cpu(i) { 529 - all_cpu_data[i] = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL); 530 - if (!all_cpu_data[i]) 531 - goto out; 532 - 533 - cpu_data = all_cpu_data[i]; 534 - if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL)) 535 - goto out; 536 - } 537 - 538 - ret = acpi_get_psd_map(all_cpu_data); 539 - if (ret) { 540 - pr_debug("Error parsing PSD data. Aborting cpufreq registration.\n"); 541 - goto out; 542 - } 457 + INIT_LIST_HEAD(&cpu_data_list); 543 458 544 459 cppc_check_hisi_workaround(); 545 460 546 - ret = cpufreq_register_driver(&cppc_cpufreq_driver); 547 - if (ret) 548 - goto out; 461 + return cpufreq_register_driver(&cppc_cpufreq_driver); 462 + } 549 463 550 - return ret; 464 + static inline void free_cpu_data(void) 465 + { 466 + struct cppc_cpudata *iter, *tmp; 551 467 552 - out: 553 - for_each_possible_cpu(i) { 554 - cpu_data = all_cpu_data[i]; 555 - if (!cpu_data) 556 - break; 557 - free_cpumask_var(cpu_data->shared_cpu_map); 558 - kfree(cpu_data); 468 + list_for_each_entry_safe(iter, tmp, &cpu_data_list, node) { 469 + free_cpumask_var(iter->shared_cpu_map); 470 + list_del(&iter->node); 471 + kfree(iter); 559 472 } 560 473 561 - kfree(all_cpu_data); 562 - return -ENODEV; 563 474 } 564 475 565 476 static void __exit cppc_cpufreq_exit(void) 566 477 { 567 - struct cppc_cpudata *cpu_data; 568 - int i; 569 - 570 478 cpufreq_unregister_driver(&cppc_cpufreq_driver); 571 479 572 - for_each_possible_cpu(i) { 573 - cpu_data = all_cpu_data[i]; 574 - free_cpumask_var(cpu_data->shared_cpu_map); 575 - kfree(cpu_data); 576 - } 577 - 578 - kfree(all_cpu_data); 480 + free_cpu_data(); 579 481 } 580 482 581 483 module_exit(cppc_cpufreq_exit);
+40
drivers/cpufreq/cpufreq.c
··· 2097 2097 } 2098 2098 EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch); 2099 2099 2100 + /** 2101 + * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go. 2102 + * @cpu: Target CPU. 2103 + * @min_perf: Minimum (required) performance level (units of @capacity). 2104 + * @target_perf: Terget (desired) performance level (units of @capacity). 2105 + * @capacity: Capacity of the target CPU. 2106 + * 2107 + * Carry out a fast performance level switch of @cpu without sleeping. 2108 + * 2109 + * The driver's ->adjust_perf() callback invoked by this function must be 2110 + * suitable for being called from within RCU-sched read-side critical sections 2111 + * and it is expected to select a suitable performance level equal to or above 2112 + * @min_perf and preferably equal to or below @target_perf. 2113 + * 2114 + * This function must not be called if policy->fast_switch_enabled is unset. 2115 + * 2116 + * Governors calling this function must guarantee that it will never be invoked 2117 + * twice in parallel for the same CPU and that it will never be called in 2118 + * parallel with either ->target() or ->target_index() or ->fast_switch() for 2119 + * the same CPU. 2120 + */ 2121 + void cpufreq_driver_adjust_perf(unsigned int cpu, 2122 + unsigned long min_perf, 2123 + unsigned long target_perf, 2124 + unsigned long capacity) 2125 + { 2126 + cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity); 2127 + } 2128 + 2129 + /** 2130 + * cpufreq_driver_has_adjust_perf - Check "direct fast switch" callback. 2131 + * 2132 + * Return 'true' if the ->adjust_perf callback is present for the 2133 + * current driver or 'false' otherwise. 2134 + */ 2135 + bool cpufreq_driver_has_adjust_perf(void) 2136 + { 2137 + return !!cpufreq_driver->adjust_perf; 2138 + } 2139 + 2100 2140 /* Must set freqs->new to intermediate frequency */ 2101 2141 static int __target_intermediate(struct cpufreq_policy *policy, 2102 2142 struct cpufreq_freqs *freqs, int index)
+71 -15
drivers/cpufreq/intel_pstate.c
··· 2207 2207 unsigned int policy_min, 2208 2208 unsigned int policy_max) 2209 2209 { 2210 - int max_freq = intel_pstate_get_max_freq(cpu); 2211 2210 int32_t max_policy_perf, min_policy_perf; 2212 2211 int max_state, turbo_max; 2212 + int max_freq; 2213 2213 2214 2214 /* 2215 2215 * HWP needs some special consideration, because on BDX the ··· 2223 2223 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; 2224 2224 turbo_max = cpu->pstate.turbo_pstate; 2225 2225 } 2226 + max_freq = max_state * cpu->pstate.scaling; 2226 2227 2227 2228 max_policy_perf = max_state * policy_max / max_freq; 2228 2229 if (policy_max == policy_min) { ··· 2326 2325 static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, 2327 2326 struct cpufreq_policy_data *policy) 2328 2327 { 2328 + int max_freq; 2329 + 2329 2330 update_turbo_state(); 2330 - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 2331 - intel_pstate_get_max_freq(cpu)); 2331 + if (hwp_active) { 2332 + int max_state, turbo_max; 2333 + 2334 + intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); 2335 + max_freq = max_state * cpu->pstate.scaling; 2336 + } else { 2337 + max_freq = intel_pstate_get_max_freq(cpu); 2338 + } 2339 + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq); 2332 2340 2333 2341 intel_pstate_adjust_policy_max(cpu, policy); 2334 2342 } ··· 2536 2526 fp_toint(cpu->iowait_boost * 100)); 2537 2527 } 2538 2528 2539 - static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate, 2540 - bool strict, bool fast_switch) 2529 + static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max, 2530 + u32 desired, bool fast_switch) 2541 2531 { 2542 2532 u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev; 2543 2533 2544 2534 value &= ~HWP_MIN_PERF(~0L); 2545 - value |= HWP_MIN_PERF(target_pstate); 2535 + value |= HWP_MIN_PERF(min); 2546 2536 2547 - /* 2548 - * The entire MSR needs to be updated in order to update the HWP min 2549 - * field in it, so opportunistically update the max too if needed. 2550 - */ 2551 2537 value &= ~HWP_MAX_PERF(~0L); 2552 - value |= HWP_MAX_PERF(strict ? target_pstate : cpu->max_perf_ratio); 2538 + value |= HWP_MAX_PERF(max); 2539 + 2540 + value &= ~HWP_DESIRED_PERF(~0L); 2541 + value |= HWP_DESIRED_PERF(desired); 2553 2542 2554 2543 if (value == prev) 2555 2544 return; ··· 2578 2569 int old_pstate = cpu->pstate.current_pstate; 2579 2570 2580 2571 target_pstate = intel_pstate_prepare_request(cpu, target_pstate); 2581 - if (hwp_active) 2582 - intel_cpufreq_adjust_hwp(cpu, target_pstate, 2583 - policy->strict_target, fast_switch); 2584 - else if (target_pstate != old_pstate) 2572 + if (hwp_active) { 2573 + int max_pstate = policy->strict_target ? 2574 + target_pstate : cpu->max_perf_ratio; 2575 + 2576 + intel_cpufreq_adjust_hwp(cpu, target_pstate, max_pstate, 0, 2577 + fast_switch); 2578 + } else if (target_pstate != old_pstate) { 2585 2579 intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); 2580 + } 2586 2581 2587 2582 cpu->pstate.current_pstate = target_pstate; 2588 2583 ··· 2645 2632 target_pstate = intel_cpufreq_update_pstate(policy, target_pstate, true); 2646 2633 2647 2634 return target_pstate * cpu->pstate.scaling; 2635 + } 2636 + 2637 + static void intel_cpufreq_adjust_perf(unsigned int cpunum, 2638 + unsigned long min_perf, 2639 + unsigned long target_perf, 2640 + unsigned long capacity) 2641 + { 2642 + struct cpudata *cpu = all_cpu_data[cpunum]; 2643 + int old_pstate = cpu->pstate.current_pstate; 2644 + int cap_pstate, min_pstate, max_pstate, target_pstate; 2645 + 2646 + update_turbo_state(); 2647 + cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate : 2648 + cpu->pstate.turbo_pstate; 2649 + 2650 + /* Optimization: Avoid unnecessary divisions. */ 2651 + 2652 + target_pstate = cap_pstate; 2653 + if (target_perf < capacity) 2654 + target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity); 2655 + 2656 + min_pstate = cap_pstate; 2657 + if (min_perf < capacity) 2658 + min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity); 2659 + 2660 + if (min_pstate < cpu->pstate.min_pstate) 2661 + min_pstate = cpu->pstate.min_pstate; 2662 + 2663 + if (min_pstate < cpu->min_perf_ratio) 2664 + min_pstate = cpu->min_perf_ratio; 2665 + 2666 + max_pstate = min(cap_pstate, cpu->max_perf_ratio); 2667 + if (max_pstate < min_pstate) 2668 + max_pstate = min_pstate; 2669 + 2670 + target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate); 2671 + 2672 + intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate, target_pstate, true); 2673 + 2674 + cpu->pstate.current_pstate = target_pstate; 2675 + intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); 2648 2676 } 2649 2677 2650 2678 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) ··· 3086 3032 intel_pstate.attr = hwp_cpufreq_attrs; 3087 3033 intel_cpufreq.attr = hwp_cpufreq_attrs; 3088 3034 intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; 3035 + intel_cpufreq.fast_switch = NULL; 3036 + intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf; 3089 3037 if (!default_driver) 3090 3038 default_driver = &intel_pstate; 3091 3039
+3 -3
include/acpi/cppc_acpi.h
··· 124 124 125 125 /* Per CPU container for runtime CPPC management. */ 126 126 struct cppc_cpudata { 127 - int cpu; 127 + struct list_head node; 128 128 struct cppc_perf_caps perf_caps; 129 129 struct cppc_perf_ctrls perf_ctrls; 130 130 struct cppc_perf_fb_ctrs perf_fb_ctrs; 131 - struct cpufreq_policy *cur_policy; 132 131 unsigned int shared_type; 133 132 cpumask_var_t shared_cpu_map; 134 133 }; ··· 136 137 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); 137 138 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); 138 139 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); 139 - extern int acpi_get_psd_map(struct cppc_cpudata **); 140 + extern bool acpi_cpc_valid(void); 141 + extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); 140 142 extern unsigned int cppc_get_transition_latency(int cpu); 141 143 extern bool cpc_ffh_supported(void); 142 144 extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val);
+14
include/linux/cpufreq.h
··· 320 320 unsigned int index); 321 321 unsigned int (*fast_switch)(struct cpufreq_policy *policy, 322 322 unsigned int target_freq); 323 + /* 324 + * ->fast_switch() replacement for drivers that use an internal 325 + * representation of performance levels and can pass hints other than 326 + * the target performance level to the hardware. 327 + */ 328 + void (*adjust_perf)(unsigned int cpu, 329 + unsigned long min_perf, 330 + unsigned long target_perf, 331 + unsigned long capacity); 323 332 324 333 /* 325 334 * Caches and returns the lowest driver-supported frequency greater than ··· 597 588 /* Pass a target to the cpufreq driver */ 598 589 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, 599 590 unsigned int target_freq); 591 + void cpufreq_driver_adjust_perf(unsigned int cpu, 592 + unsigned long min_perf, 593 + unsigned long target_perf, 594 + unsigned long capacity); 595 + bool cpufreq_driver_has_adjust_perf(void); 600 596 int cpufreq_driver_target(struct cpufreq_policy *policy, 601 597 unsigned int target_freq, 602 598 unsigned int relation);
+5
include/linux/sched/cpufreq.h
··· 28 28 { 29 29 return (freq + (freq >> 2)) * util / cap; 30 30 } 31 + 32 + static inline unsigned long map_util_perf(unsigned long util) 33 + { 34 + return util + (util >> 2); 35 + } 31 36 #endif /* CONFIG_CPU_FREQ */ 32 37 33 38 #endif /* _LINUX_SCHED_CPUFREQ_H */
+76 -30
kernel/sched/cpufreq_schedutil.c
··· 53 53 unsigned int iowait_boost; 54 54 u64 last_update; 55 55 56 + unsigned long util; 56 57 unsigned long bw_dl; 57 58 unsigned long max; 58 59 ··· 277 276 return min(max, util); 278 277 } 279 278 280 - static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) 279 + static void sugov_get_util(struct sugov_cpu *sg_cpu) 281 280 { 282 281 struct rq *rq = cpu_rq(sg_cpu->cpu); 283 - unsigned long util = cpu_util_cfs(rq); 284 282 unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); 285 283 286 284 sg_cpu->max = max; 287 285 sg_cpu->bw_dl = cpu_bw_dl(rq); 288 - 289 - return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); 286 + sg_cpu->util = schedutil_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max, 287 + FREQUENCY_UTIL, NULL); 290 288 } 291 289 292 290 /** ··· 362 362 * sugov_iowait_apply() - Apply the IO boost to a CPU. 363 363 * @sg_cpu: the sugov data for the cpu to boost 364 364 * @time: the update time from the caller 365 - * @util: the utilization to (eventually) boost 366 - * @max: the maximum value the utilization can be boosted to 367 365 * 368 366 * A CPU running a task which woken up after an IO operation can have its 369 367 * utilization boosted to speed up the completion of those IO operations. ··· 375 377 * This mechanism is designed to boost high frequently IO waiting tasks, while 376 378 * being more conservative on tasks which does sporadic IO operations. 377 379 */ 378 - static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 379 - unsigned long util, unsigned long max) 380 + static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) 380 381 { 381 382 unsigned long boost; 382 383 383 384 /* No boost currently required */ 384 385 if (!sg_cpu->iowait_boost) 385 - return util; 386 + return; 386 387 387 388 /* Reset boost if the CPU appears to have been idle enough */ 388 389 if (sugov_iowait_reset(sg_cpu, time, false)) 389 - return util; 390 + return; 390 391 391 392 if (!sg_cpu->iowait_boost_pending) { 392 393 /* ··· 394 397 sg_cpu->iowait_boost >>= 1; 395 398 if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { 396 399 sg_cpu->iowait_boost = 0; 397 - return util; 400 + return; 398 401 } 399 402 } 400 403 401 404 sg_cpu->iowait_boost_pending = false; 402 405 403 406 /* 404 - * @util is already in capacity scale; convert iowait_boost 407 + * sg_cpu->util is already in capacity scale; convert iowait_boost 405 408 * into the same scale so we can compare. 406 409 */ 407 - boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; 408 - return max(boost, util); 410 + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; 411 + if (sg_cpu->util < boost) 412 + sg_cpu->util = boost; 409 413 } 410 414 411 415 #ifdef CONFIG_NO_HZ_COMMON ··· 432 434 sg_policy->limits_changed = true; 433 435 } 434 436 435 - static void sugov_update_single(struct update_util_data *hook, u64 time, 436 - unsigned int flags) 437 + static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, 438 + u64 time, unsigned int flags) 437 439 { 438 - struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 439 440 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 440 - unsigned long util, max; 441 - unsigned int next_f; 442 - unsigned int cached_freq = sg_policy->cached_raw_freq; 443 441 444 442 sugov_iowait_boost(sg_cpu, time, flags); 445 443 sg_cpu->last_update = time; ··· 443 449 ignore_dl_rate_limit(sg_cpu, sg_policy); 444 450 445 451 if (!sugov_should_update_freq(sg_policy, time)) 452 + return false; 453 + 454 + sugov_get_util(sg_cpu); 455 + sugov_iowait_apply(sg_cpu, time); 456 + 457 + return true; 458 + } 459 + 460 + static void sugov_update_single_freq(struct update_util_data *hook, u64 time, 461 + unsigned int flags) 462 + { 463 + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 464 + struct sugov_policy *sg_policy = sg_cpu->sg_policy; 465 + unsigned int cached_freq = sg_policy->cached_raw_freq; 466 + unsigned int next_f; 467 + 468 + if (!sugov_update_single_common(sg_cpu, time, flags)) 446 469 return; 447 470 448 - util = sugov_get_util(sg_cpu); 449 - max = sg_cpu->max; 450 - util = sugov_iowait_apply(sg_cpu, time, util, max); 451 - next_f = get_next_freq(sg_policy, util, max); 471 + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); 452 472 /* 453 473 * Do not reduce the frequency if the CPU has not been idle 454 474 * recently, as the reduction is likely to be premature then. ··· 488 480 } 489 481 } 490 482 483 + static void sugov_update_single_perf(struct update_util_data *hook, u64 time, 484 + unsigned int flags) 485 + { 486 + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 487 + unsigned long prev_util = sg_cpu->util; 488 + 489 + /* 490 + * Fall back to the "frequency" path if frequency invariance is not 491 + * supported, because the direct mapping between the utilization and 492 + * the performance levels depends on the frequency invariance. 493 + */ 494 + if (!arch_scale_freq_invariant()) { 495 + sugov_update_single_freq(hook, time, flags); 496 + return; 497 + } 498 + 499 + if (!sugov_update_single_common(sg_cpu, time, flags)) 500 + return; 501 + 502 + /* 503 + * Do not reduce the target performance level if the CPU has not been 504 + * idle recently, as the reduction is likely to be premature then. 505 + */ 506 + if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) 507 + sg_cpu->util = prev_util; 508 + 509 + cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), 510 + map_util_perf(sg_cpu->util), sg_cpu->max); 511 + 512 + sg_cpu->sg_policy->last_freq_update_time = time; 513 + } 514 + 491 515 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) 492 516 { 493 517 struct sugov_policy *sg_policy = sg_cpu->sg_policy; ··· 531 491 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); 532 492 unsigned long j_util, j_max; 533 493 534 - j_util = sugov_get_util(j_sg_cpu); 494 + sugov_get_util(j_sg_cpu); 495 + sugov_iowait_apply(j_sg_cpu, time); 496 + j_util = j_sg_cpu->util; 535 497 j_max = j_sg_cpu->max; 536 - j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); 537 498 538 499 if (j_util * max > j_max * util) { 539 500 util = j_util; ··· 858 817 static int sugov_start(struct cpufreq_policy *policy) 859 818 { 860 819 struct sugov_policy *sg_policy = policy->governor_data; 820 + void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); 861 821 unsigned int cpu; 862 822 863 823 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; ··· 878 836 sg_cpu->sg_policy = sg_policy; 879 837 } 880 838 839 + if (policy_is_shared(policy)) 840 + uu = sugov_update_shared; 841 + else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) 842 + uu = sugov_update_single_perf; 843 + else 844 + uu = sugov_update_single_freq; 845 + 881 846 for_each_cpu(cpu, policy->cpus) { 882 847 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); 883 848 884 - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 885 - policy_is_shared(policy) ? 886 - sugov_update_shared : 887 - sugov_update_single); 849 + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); 888 850 } 889 851 return 0; 890 852 }