at v4.2-rc2 1032 lines 32 kB view raw
1/* 2 * linux/drivers/thermal/cpu_cooling.c 3 * 4 * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) 5 * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org> 6 * 7 * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org> 8 * 9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; version 2 of the License. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write to the Free Software Foundation, Inc., 21 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 22 * 23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 24 */ 25#include <linux/module.h> 26#include <linux/thermal.h> 27#include <linux/cpufreq.h> 28#include <linux/err.h> 29#include <linux/pm_opp.h> 30#include <linux/slab.h> 31#include <linux/cpu.h> 32#include <linux/cpu_cooling.h> 33 34#include <trace/events/thermal.h> 35 36/* 37 * Cooling state <-> CPUFreq frequency 38 * 39 * Cooling states are translated to frequencies throughout this driver and this 40 * is the relation between them. 41 * 42 * Highest cooling state corresponds to lowest possible frequency. 43 * 44 * i.e. 45 * level 0 --> 1st Max Freq 46 * level 1 --> 2nd Max Freq 47 * ... 48 */ 49 50/** 51 * struct power_table - frequency to power conversion 52 * @frequency: frequency in KHz 53 * @power: power in mW 54 * 55 * This structure is built when the cooling device registers and helps 56 * in translating frequency to power and viceversa. 57 */ 58struct power_table { 59 u32 frequency; 60 u32 power; 61}; 62 63/** 64 * struct cpufreq_cooling_device - data for cooling device with cpufreq 65 * @id: unique integer value corresponding to each cpufreq_cooling_device 66 * registered. 67 * @cool_dev: thermal_cooling_device pointer to keep track of the 68 * registered cooling device. 69 * @cpufreq_state: integer value representing the current state of cpufreq 70 * cooling devices. 71 * @cpufreq_val: integer value representing the absolute value of the clipped 72 * frequency. 73 * @max_level: maximum cooling level. One less than total number of valid 74 * cpufreq frequencies. 75 * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device. 76 * @node: list_head to link all cpufreq_cooling_device together. 77 * @last_load: load measured by the latest call to cpufreq_get_actual_power() 78 * @time_in_idle: previous reading of the absolute time that this cpu was idle 79 * @time_in_idle_timestamp: wall time of the last invocation of 80 * get_cpu_idle_time_us() 81 * @dyn_power_table: array of struct power_table for frequency to power 82 * conversion, sorted in ascending order. 83 * @dyn_power_table_entries: number of entries in the @dyn_power_table array 84 * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered 85 * @plat_get_static_power: callback to calculate the static power 86 * 87 * This structure is required for keeping information of each registered 88 * cpufreq_cooling_device. 89 */ 90struct cpufreq_cooling_device { 91 int id; 92 struct thermal_cooling_device *cool_dev; 93 unsigned int cpufreq_state; 94 unsigned int cpufreq_val; 95 unsigned int max_level; 96 unsigned int *freq_table; /* In descending order */ 97 struct cpumask allowed_cpus; 98 struct list_head node; 99 u32 last_load; 100 u64 *time_in_idle; 101 u64 *time_in_idle_timestamp; 102 struct power_table *dyn_power_table; 103 int dyn_power_table_entries; 104 struct device *cpu_dev; 105 get_static_t plat_get_static_power; 106}; 107static DEFINE_IDR(cpufreq_idr); 108static DEFINE_MUTEX(cooling_cpufreq_lock); 109 110static LIST_HEAD(cpufreq_dev_list); 111 112/** 113 * get_idr - function to get a unique id. 114 * @idr: struct idr * handle used to create a id. 115 * @id: int * value generated by this function. 116 * 117 * This function will populate @id with an unique 118 * id, using the idr API. 119 * 120 * Return: 0 on success, an error code on failure. 121 */ 122static int get_idr(struct idr *idr, int *id) 123{ 124 int ret; 125 126 mutex_lock(&cooling_cpufreq_lock); 127 ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); 128 mutex_unlock(&cooling_cpufreq_lock); 129 if (unlikely(ret < 0)) 130 return ret; 131 *id = ret; 132 133 return 0; 134} 135 136/** 137 * release_idr - function to free the unique id. 138 * @idr: struct idr * handle used for creating the id. 139 * @id: int value representing the unique id. 140 */ 141static void release_idr(struct idr *idr, int id) 142{ 143 mutex_lock(&cooling_cpufreq_lock); 144 idr_remove(idr, id); 145 mutex_unlock(&cooling_cpufreq_lock); 146} 147 148/* Below code defines functions to be used for cpufreq as cooling device */ 149 150/** 151 * get_level: Find the level for a particular frequency 152 * @cpufreq_dev: cpufreq_dev for which the property is required 153 * @freq: Frequency 154 * 155 * Return: level on success, THERMAL_CSTATE_INVALID on error. 156 */ 157static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev, 158 unsigned int freq) 159{ 160 unsigned long level; 161 162 for (level = 0; level <= cpufreq_dev->max_level; level++) { 163 if (freq == cpufreq_dev->freq_table[level]) 164 return level; 165 166 if (freq > cpufreq_dev->freq_table[level]) 167 break; 168 } 169 170 return THERMAL_CSTATE_INVALID; 171} 172 173/** 174 * cpufreq_cooling_get_level - for a given cpu, return the cooling level. 175 * @cpu: cpu for which the level is required 176 * @freq: the frequency of interest 177 * 178 * This function will match the cooling level corresponding to the 179 * requested @freq and return it. 180 * 181 * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID 182 * otherwise. 183 */ 184unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) 185{ 186 struct cpufreq_cooling_device *cpufreq_dev; 187 188 mutex_lock(&cooling_cpufreq_lock); 189 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { 190 if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { 191 mutex_unlock(&cooling_cpufreq_lock); 192 return get_level(cpufreq_dev, freq); 193 } 194 } 195 mutex_unlock(&cooling_cpufreq_lock); 196 197 pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu); 198 return THERMAL_CSTATE_INVALID; 199} 200EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level); 201 202/** 203 * cpufreq_thermal_notifier - notifier callback for cpufreq policy change. 204 * @nb: struct notifier_block * with callback info. 205 * @event: value showing cpufreq event for which this function invoked. 206 * @data: callback-specific data 207 * 208 * Callback to hijack the notification on cpufreq policy transition. 209 * Every time there is a change in policy, we will intercept and 210 * update the cpufreq policy with thermal constraints. 211 * 212 * Return: 0 (success) 213 */ 214static int cpufreq_thermal_notifier(struct notifier_block *nb, 215 unsigned long event, void *data) 216{ 217 struct cpufreq_policy *policy = data; 218 unsigned long max_freq = 0; 219 struct cpufreq_cooling_device *cpufreq_dev; 220 221 switch (event) { 222 223 case CPUFREQ_ADJUST: 224 mutex_lock(&cooling_cpufreq_lock); 225 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { 226 if (!cpumask_test_cpu(policy->cpu, 227 &cpufreq_dev->allowed_cpus)) 228 continue; 229 230 max_freq = cpufreq_dev->cpufreq_val; 231 232 if (policy->max != max_freq) 233 cpufreq_verify_within_limits(policy, 0, 234 max_freq); 235 } 236 mutex_unlock(&cooling_cpufreq_lock); 237 break; 238 default: 239 return NOTIFY_DONE; 240 } 241 242 return NOTIFY_OK; 243} 244 245/** 246 * build_dyn_power_table() - create a dynamic power to frequency table 247 * @cpufreq_device: the cpufreq cooling device in which to store the table 248 * @capacitance: dynamic power coefficient for these cpus 249 * 250 * Build a dynamic power to frequency table for this cpu and store it 251 * in @cpufreq_device. This table will be used in cpu_power_to_freq() and 252 * cpu_freq_to_power() to convert between power and frequency 253 * efficiently. Power is stored in mW, frequency in KHz. The 254 * resulting table is in ascending order. 255 * 256 * Return: 0 on success, -E* on error. 257 */ 258static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, 259 u32 capacitance) 260{ 261 struct power_table *power_table; 262 struct dev_pm_opp *opp; 263 struct device *dev = NULL; 264 int num_opps = 0, cpu, i, ret = 0; 265 unsigned long freq; 266 267 rcu_read_lock(); 268 269 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { 270 dev = get_cpu_device(cpu); 271 if (!dev) { 272 dev_warn(&cpufreq_device->cool_dev->device, 273 "No cpu device for cpu %d\n", cpu); 274 continue; 275 } 276 277 num_opps = dev_pm_opp_get_opp_count(dev); 278 if (num_opps > 0) { 279 break; 280 } else if (num_opps < 0) { 281 ret = num_opps; 282 goto unlock; 283 } 284 } 285 286 if (num_opps == 0) { 287 ret = -EINVAL; 288 goto unlock; 289 } 290 291 power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL); 292 if (!power_table) { 293 ret = -ENOMEM; 294 goto unlock; 295 } 296 297 for (freq = 0, i = 0; 298 opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp); 299 freq++, i++) { 300 u32 freq_mhz, voltage_mv; 301 u64 power; 302 303 freq_mhz = freq / 1000000; 304 voltage_mv = dev_pm_opp_get_voltage(opp) / 1000; 305 306 /* 307 * Do the multiplication with MHz and millivolt so as 308 * to not overflow. 309 */ 310 power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv; 311 do_div(power, 1000000000); 312 313 /* frequency is stored in power_table in KHz */ 314 power_table[i].frequency = freq / 1000; 315 316 /* power is stored in mW */ 317 power_table[i].power = power; 318 } 319 320 if (i == 0) { 321 ret = PTR_ERR(opp); 322 goto unlock; 323 } 324 325 cpufreq_device->cpu_dev = dev; 326 cpufreq_device->dyn_power_table = power_table; 327 cpufreq_device->dyn_power_table_entries = i; 328 329unlock: 330 rcu_read_unlock(); 331 return ret; 332} 333 334static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, 335 u32 freq) 336{ 337 int i; 338 struct power_table *pt = cpufreq_device->dyn_power_table; 339 340 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) 341 if (freq < pt[i].frequency) 342 break; 343 344 return pt[i - 1].power; 345} 346 347static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, 348 u32 power) 349{ 350 int i; 351 struct power_table *pt = cpufreq_device->dyn_power_table; 352 353 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) 354 if (power < pt[i].power) 355 break; 356 357 return pt[i - 1].frequency; 358} 359 360/** 361 * get_load() - get load for a cpu since last updated 362 * @cpufreq_device: &struct cpufreq_cooling_device for this cpu 363 * @cpu: cpu number 364 * 365 * Return: The average load of cpu @cpu in percentage since this 366 * function was last called. 367 */ 368static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu) 369{ 370 u32 load; 371 u64 now, now_idle, delta_time, delta_idle; 372 373 now_idle = get_cpu_idle_time(cpu, &now, 0); 374 delta_idle = now_idle - cpufreq_device->time_in_idle[cpu]; 375 delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu]; 376 377 if (delta_time <= delta_idle) 378 load = 0; 379 else 380 load = div64_u64(100 * (delta_time - delta_idle), delta_time); 381 382 cpufreq_device->time_in_idle[cpu] = now_idle; 383 cpufreq_device->time_in_idle_timestamp[cpu] = now; 384 385 return load; 386} 387 388/** 389 * get_static_power() - calculate the static power consumed by the cpus 390 * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev 391 * @tz: thermal zone device in which we're operating 392 * @freq: frequency in KHz 393 * @power: pointer in which to store the calculated static power 394 * 395 * Calculate the static power consumed by the cpus described by 396 * @cpu_actor running at frequency @freq. This function relies on a 397 * platform specific function that should have been provided when the 398 * actor was registered. If it wasn't, the static power is assumed to 399 * be negligible. The calculated static power is stored in @power. 400 * 401 * Return: 0 on success, -E* on failure. 402 */ 403static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, 404 struct thermal_zone_device *tz, unsigned long freq, 405 u32 *power) 406{ 407 struct dev_pm_opp *opp; 408 unsigned long voltage; 409 struct cpumask *cpumask = &cpufreq_device->allowed_cpus; 410 unsigned long freq_hz = freq * 1000; 411 412 if (!cpufreq_device->plat_get_static_power || 413 !cpufreq_device->cpu_dev) { 414 *power = 0; 415 return 0; 416 } 417 418 rcu_read_lock(); 419 420 opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz, 421 true); 422 voltage = dev_pm_opp_get_voltage(opp); 423 424 rcu_read_unlock(); 425 426 if (voltage == 0) { 427 dev_warn_ratelimited(cpufreq_device->cpu_dev, 428 "Failed to get voltage for frequency %lu: %ld\n", 429 freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); 430 return -EINVAL; 431 } 432 433 return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay, 434 voltage, power); 435} 436 437/** 438 * get_dynamic_power() - calculate the dynamic power 439 * @cpufreq_device: &cpufreq_cooling_device for this cdev 440 * @freq: current frequency 441 * 442 * Return: the dynamic power consumed by the cpus described by 443 * @cpufreq_device. 444 */ 445static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device, 446 unsigned long freq) 447{ 448 u32 raw_cpu_power; 449 450 raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq); 451 return (raw_cpu_power * cpufreq_device->last_load) / 100; 452} 453 454/* cpufreq cooling device callback functions are defined below */ 455 456/** 457 * cpufreq_get_max_state - callback function to get the max cooling state. 458 * @cdev: thermal cooling device pointer. 459 * @state: fill this variable with the max cooling state. 460 * 461 * Callback for the thermal cooling device to return the cpufreq 462 * max cooling state. 463 * 464 * Return: 0 on success, an error code otherwise. 465 */ 466static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, 467 unsigned long *state) 468{ 469 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 470 471 *state = cpufreq_device->max_level; 472 return 0; 473} 474 475/** 476 * cpufreq_get_cur_state - callback function to get the current cooling state. 477 * @cdev: thermal cooling device pointer. 478 * @state: fill this variable with the current cooling state. 479 * 480 * Callback for the thermal cooling device to return the cpufreq 481 * current cooling state. 482 * 483 * Return: 0 on success, an error code otherwise. 484 */ 485static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev, 486 unsigned long *state) 487{ 488 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 489 490 *state = cpufreq_device->cpufreq_state; 491 492 return 0; 493} 494 495/** 496 * cpufreq_set_cur_state - callback function to set the current cooling state. 497 * @cdev: thermal cooling device pointer. 498 * @state: set this variable to the current cooling state. 499 * 500 * Callback for the thermal cooling device to change the cpufreq 501 * current cooling state. 502 * 503 * Return: 0 on success, an error code otherwise. 504 */ 505static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, 506 unsigned long state) 507{ 508 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 509 unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus); 510 unsigned int clip_freq; 511 512 /* Request state should be less than max_level */ 513 if (WARN_ON(state > cpufreq_device->max_level)) 514 return -EINVAL; 515 516 /* Check if the old cooling action is same as new cooling action */ 517 if (cpufreq_device->cpufreq_state == state) 518 return 0; 519 520 clip_freq = cpufreq_device->freq_table[state]; 521 cpufreq_device->cpufreq_state = state; 522 cpufreq_device->cpufreq_val = clip_freq; 523 524 cpufreq_update_policy(cpu); 525 526 return 0; 527} 528 529/** 530 * cpufreq_get_requested_power() - get the current power 531 * @cdev: &thermal_cooling_device pointer 532 * @tz: a valid thermal zone device pointer 533 * @power: pointer in which to store the resulting power 534 * 535 * Calculate the current power consumption of the cpus in milliwatts 536 * and store it in @power. This function should actually calculate 537 * the requested power, but it's hard to get the frequency that 538 * cpufreq would have assigned if there were no thermal limits. 539 * Instead, we calculate the current power on the assumption that the 540 * immediate future will look like the immediate past. 541 * 542 * We use the current frequency and the average load since this 543 * function was last called. In reality, there could have been 544 * multiple opps since this function was last called and that affects 545 * the load calculation. While it's not perfectly accurate, this 546 * simplification is good enough and works. REVISIT this, as more 547 * complex code may be needed if experiments show that it's not 548 * accurate enough. 549 * 550 * Return: 0 on success, -E* if getting the static power failed. 551 */ 552static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, 553 struct thermal_zone_device *tz, 554 u32 *power) 555{ 556 unsigned long freq; 557 int i = 0, cpu, ret; 558 u32 static_power, dynamic_power, total_load = 0; 559 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 560 u32 *load_cpu = NULL; 561 562 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); 563 564 /* 565 * All the CPUs are offline, thus the requested power by 566 * the cdev is 0 567 */ 568 if (cpu >= nr_cpu_ids) { 569 *power = 0; 570 return 0; 571 } 572 573 freq = cpufreq_quick_get(cpu); 574 575 if (trace_thermal_power_cpu_get_power_enabled()) { 576 u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus); 577 578 load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu), 579 GFP_KERNEL); 580 } 581 582 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { 583 u32 load; 584 585 if (cpu_online(cpu)) 586 load = get_load(cpufreq_device, cpu); 587 else 588 load = 0; 589 590 total_load += load; 591 if (trace_thermal_power_cpu_limit_enabled() && load_cpu) 592 load_cpu[i] = load; 593 594 i++; 595 } 596 597 cpufreq_device->last_load = total_load; 598 599 dynamic_power = get_dynamic_power(cpufreq_device, freq); 600 ret = get_static_power(cpufreq_device, tz, freq, &static_power); 601 if (ret) { 602 if (load_cpu) 603 devm_kfree(&cdev->device, load_cpu); 604 return ret; 605 } 606 607 if (load_cpu) { 608 trace_thermal_power_cpu_get_power( 609 &cpufreq_device->allowed_cpus, 610 freq, load_cpu, i, dynamic_power, static_power); 611 612 devm_kfree(&cdev->device, load_cpu); 613 } 614 615 *power = static_power + dynamic_power; 616 return 0; 617} 618 619/** 620 * cpufreq_state2power() - convert a cpu cdev state to power consumed 621 * @cdev: &thermal_cooling_device pointer 622 * @tz: a valid thermal zone device pointer 623 * @state: cooling device state to be converted 624 * @power: pointer in which to store the resulting power 625 * 626 * Convert cooling device state @state into power consumption in 627 * milliwatts assuming 100% load. Store the calculated power in 628 * @power. 629 * 630 * Return: 0 on success, -EINVAL if the cooling device state could not 631 * be converted into a frequency or other -E* if there was an error 632 * when calculating the static power. 633 */ 634static int cpufreq_state2power(struct thermal_cooling_device *cdev, 635 struct thermal_zone_device *tz, 636 unsigned long state, u32 *power) 637{ 638 unsigned int freq, num_cpus; 639 cpumask_t cpumask; 640 u32 static_power, dynamic_power; 641 int ret; 642 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 643 644 cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask); 645 num_cpus = cpumask_weight(&cpumask); 646 647 /* None of our cpus are online, so no power */ 648 if (num_cpus == 0) { 649 *power = 0; 650 return 0; 651 } 652 653 freq = cpufreq_device->freq_table[state]; 654 if (!freq) 655 return -EINVAL; 656 657 dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus; 658 ret = get_static_power(cpufreq_device, tz, freq, &static_power); 659 if (ret) 660 return ret; 661 662 *power = static_power + dynamic_power; 663 return 0; 664} 665 666/** 667 * cpufreq_power2state() - convert power to a cooling device state 668 * @cdev: &thermal_cooling_device pointer 669 * @tz: a valid thermal zone device pointer 670 * @power: power in milliwatts to be converted 671 * @state: pointer in which to store the resulting state 672 * 673 * Calculate a cooling device state for the cpus described by @cdev 674 * that would allow them to consume at most @power mW and store it in 675 * @state. Note that this calculation depends on external factors 676 * such as the cpu load or the current static power. Calling this 677 * function with the same power as input can yield different cooling 678 * device states depending on those external factors. 679 * 680 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if 681 * the calculated frequency could not be converted to a valid state. 682 * The latter should not happen unless the frequencies available to 683 * cpufreq have changed since the initialization of the cpu cooling 684 * device. 685 */ 686static int cpufreq_power2state(struct thermal_cooling_device *cdev, 687 struct thermal_zone_device *tz, u32 power, 688 unsigned long *state) 689{ 690 unsigned int cpu, cur_freq, target_freq; 691 int ret; 692 s32 dyn_power; 693 u32 last_load, normalised_power, static_power; 694 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 695 696 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); 697 698 /* None of our cpus are online */ 699 if (cpu >= nr_cpu_ids) 700 return -ENODEV; 701 702 cur_freq = cpufreq_quick_get(cpu); 703 ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power); 704 if (ret) 705 return ret; 706 707 dyn_power = power - static_power; 708 dyn_power = dyn_power > 0 ? dyn_power : 0; 709 last_load = cpufreq_device->last_load ?: 1; 710 normalised_power = (dyn_power * 100) / last_load; 711 target_freq = cpu_power_to_freq(cpufreq_device, normalised_power); 712 713 *state = cpufreq_cooling_get_level(cpu, target_freq); 714 if (*state == THERMAL_CSTATE_INVALID) { 715 dev_warn_ratelimited(&cdev->device, 716 "Failed to convert %dKHz for cpu %d into a cdev state\n", 717 target_freq, cpu); 718 return -EINVAL; 719 } 720 721 trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus, 722 target_freq, *state, power); 723 return 0; 724} 725 726/* Bind cpufreq callbacks to thermal cooling device ops */ 727static struct thermal_cooling_device_ops cpufreq_cooling_ops = { 728 .get_max_state = cpufreq_get_max_state, 729 .get_cur_state = cpufreq_get_cur_state, 730 .set_cur_state = cpufreq_set_cur_state, 731}; 732 733/* Notifier for cpufreq policy change */ 734static struct notifier_block thermal_cpufreq_notifier_block = { 735 .notifier_call = cpufreq_thermal_notifier, 736}; 737 738static unsigned int find_next_max(struct cpufreq_frequency_table *table, 739 unsigned int prev_max) 740{ 741 struct cpufreq_frequency_table *pos; 742 unsigned int max = 0; 743 744 cpufreq_for_each_valid_entry(pos, table) { 745 if (pos->frequency > max && pos->frequency < prev_max) 746 max = pos->frequency; 747 } 748 749 return max; 750} 751 752/** 753 * __cpufreq_cooling_register - helper function to create cpufreq cooling device 754 * @np: a valid struct device_node to the cooling device device tree node 755 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 756 * Normally this should be same as cpufreq policy->related_cpus. 757 * @capacitance: dynamic power coefficient for these cpus 758 * @plat_static_func: function to calculate the static power consumed by these 759 * cpus (optional) 760 * 761 * This interface function registers the cpufreq cooling device with the name 762 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 763 * cooling devices. It also gives the opportunity to link the cooling device 764 * with a device tree node, in order to bind it via the thermal DT code. 765 * 766 * Return: a valid struct thermal_cooling_device pointer on success, 767 * on failure, it returns a corresponding ERR_PTR(). 768 */ 769static struct thermal_cooling_device * 770__cpufreq_cooling_register(struct device_node *np, 771 const struct cpumask *clip_cpus, u32 capacitance, 772 get_static_t plat_static_func) 773{ 774 struct thermal_cooling_device *cool_dev; 775 struct cpufreq_cooling_device *cpufreq_dev; 776 char dev_name[THERMAL_NAME_LENGTH]; 777 struct cpufreq_frequency_table *pos, *table; 778 unsigned int freq, i, num_cpus; 779 int ret; 780 781 table = cpufreq_frequency_get_table(cpumask_first(clip_cpus)); 782 if (!table) { 783 pr_debug("%s: CPUFreq table not found\n", __func__); 784 return ERR_PTR(-EPROBE_DEFER); 785 } 786 787 cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL); 788 if (!cpufreq_dev) 789 return ERR_PTR(-ENOMEM); 790 791 num_cpus = cpumask_weight(clip_cpus); 792 cpufreq_dev->time_in_idle = kcalloc(num_cpus, 793 sizeof(*cpufreq_dev->time_in_idle), 794 GFP_KERNEL); 795 if (!cpufreq_dev->time_in_idle) { 796 cool_dev = ERR_PTR(-ENOMEM); 797 goto free_cdev; 798 } 799 800 cpufreq_dev->time_in_idle_timestamp = 801 kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp), 802 GFP_KERNEL); 803 if (!cpufreq_dev->time_in_idle_timestamp) { 804 cool_dev = ERR_PTR(-ENOMEM); 805 goto free_time_in_idle; 806 } 807 808 /* Find max levels */ 809 cpufreq_for_each_valid_entry(pos, table) 810 cpufreq_dev->max_level++; 811 812 cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) * 813 cpufreq_dev->max_level, GFP_KERNEL); 814 if (!cpufreq_dev->freq_table) { 815 cool_dev = ERR_PTR(-ENOMEM); 816 goto free_time_in_idle_timestamp; 817 } 818 819 /* max_level is an index, not a counter */ 820 cpufreq_dev->max_level--; 821 822 cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); 823 824 if (capacitance) { 825 cpufreq_cooling_ops.get_requested_power = 826 cpufreq_get_requested_power; 827 cpufreq_cooling_ops.state2power = cpufreq_state2power; 828 cpufreq_cooling_ops.power2state = cpufreq_power2state; 829 cpufreq_dev->plat_get_static_power = plat_static_func; 830 831 ret = build_dyn_power_table(cpufreq_dev, capacitance); 832 if (ret) { 833 cool_dev = ERR_PTR(ret); 834 goto free_table; 835 } 836 } 837 838 ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); 839 if (ret) { 840 cool_dev = ERR_PTR(ret); 841 goto free_table; 842 } 843 844 snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", 845 cpufreq_dev->id); 846 847 cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, 848 &cpufreq_cooling_ops); 849 if (IS_ERR(cool_dev)) 850 goto remove_idr; 851 852 /* Fill freq-table in descending order of frequencies */ 853 for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) { 854 freq = find_next_max(table, freq); 855 cpufreq_dev->freq_table[i] = freq; 856 857 /* Warn for duplicate entries */ 858 if (!freq) 859 pr_warn("%s: table has duplicate entries\n", __func__); 860 else 861 pr_debug("%s: freq:%u KHz\n", __func__, freq); 862 } 863 864 cpufreq_dev->cpufreq_val = cpufreq_dev->freq_table[0]; 865 cpufreq_dev->cool_dev = cool_dev; 866 867 mutex_lock(&cooling_cpufreq_lock); 868 869 /* Register the notifier for first cpufreq cooling device */ 870 if (list_empty(&cpufreq_dev_list)) 871 cpufreq_register_notifier(&thermal_cpufreq_notifier_block, 872 CPUFREQ_POLICY_NOTIFIER); 873 list_add(&cpufreq_dev->node, &cpufreq_dev_list); 874 875 mutex_unlock(&cooling_cpufreq_lock); 876 877 return cool_dev; 878 879remove_idr: 880 release_idr(&cpufreq_idr, cpufreq_dev->id); 881free_table: 882 kfree(cpufreq_dev->freq_table); 883free_time_in_idle_timestamp: 884 kfree(cpufreq_dev->time_in_idle_timestamp); 885free_time_in_idle: 886 kfree(cpufreq_dev->time_in_idle); 887free_cdev: 888 kfree(cpufreq_dev); 889 890 return cool_dev; 891} 892 893/** 894 * cpufreq_cooling_register - function to create cpufreq cooling device. 895 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 896 * 897 * This interface function registers the cpufreq cooling device with the name 898 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 899 * cooling devices. 900 * 901 * Return: a valid struct thermal_cooling_device pointer on success, 902 * on failure, it returns a corresponding ERR_PTR(). 903 */ 904struct thermal_cooling_device * 905cpufreq_cooling_register(const struct cpumask *clip_cpus) 906{ 907 return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL); 908} 909EXPORT_SYMBOL_GPL(cpufreq_cooling_register); 910 911/** 912 * of_cpufreq_cooling_register - function to create cpufreq cooling device. 913 * @np: a valid struct device_node to the cooling device device tree node 914 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 915 * 916 * This interface function registers the cpufreq cooling device with the name 917 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 918 * cooling devices. Using this API, the cpufreq cooling device will be 919 * linked to the device tree node provided. 920 * 921 * Return: a valid struct thermal_cooling_device pointer on success, 922 * on failure, it returns a corresponding ERR_PTR(). 923 */ 924struct thermal_cooling_device * 925of_cpufreq_cooling_register(struct device_node *np, 926 const struct cpumask *clip_cpus) 927{ 928 if (!np) 929 return ERR_PTR(-EINVAL); 930 931 return __cpufreq_cooling_register(np, clip_cpus, 0, NULL); 932} 933EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); 934 935/** 936 * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions 937 * @clip_cpus: cpumask of cpus where the frequency constraints will happen 938 * @capacitance: dynamic power coefficient for these cpus 939 * @plat_static_func: function to calculate the static power consumed by these 940 * cpus (optional) 941 * 942 * This interface function registers the cpufreq cooling device with 943 * the name "thermal-cpufreq-%x". This api can support multiple 944 * instances of cpufreq cooling devices. Using this function, the 945 * cooling device will implement the power extensions by using a 946 * simple cpu power model. The cpus must have registered their OPPs 947 * using the OPP library. 948 * 949 * An optional @plat_static_func may be provided to calculate the 950 * static power consumed by these cpus. If the platform's static 951 * power consumption is unknown or negligible, make it NULL. 952 * 953 * Return: a valid struct thermal_cooling_device pointer on success, 954 * on failure, it returns a corresponding ERR_PTR(). 955 */ 956struct thermal_cooling_device * 957cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance, 958 get_static_t plat_static_func) 959{ 960 return __cpufreq_cooling_register(NULL, clip_cpus, capacitance, 961 plat_static_func); 962} 963EXPORT_SYMBOL(cpufreq_power_cooling_register); 964 965/** 966 * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions 967 * @np: a valid struct device_node to the cooling device device tree node 968 * @clip_cpus: cpumask of cpus where the frequency constraints will happen 969 * @capacitance: dynamic power coefficient for these cpus 970 * @plat_static_func: function to calculate the static power consumed by these 971 * cpus (optional) 972 * 973 * This interface function registers the cpufreq cooling device with 974 * the name "thermal-cpufreq-%x". This api can support multiple 975 * instances of cpufreq cooling devices. Using this API, the cpufreq 976 * cooling device will be linked to the device tree node provided. 977 * Using this function, the cooling device will implement the power 978 * extensions by using a simple cpu power model. The cpus must have 979 * registered their OPPs using the OPP library. 980 * 981 * An optional @plat_static_func may be provided to calculate the 982 * static power consumed by these cpus. If the platform's static 983 * power consumption is unknown or negligible, make it NULL. 984 * 985 * Return: a valid struct thermal_cooling_device pointer on success, 986 * on failure, it returns a corresponding ERR_PTR(). 987 */ 988struct thermal_cooling_device * 989of_cpufreq_power_cooling_register(struct device_node *np, 990 const struct cpumask *clip_cpus, 991 u32 capacitance, 992 get_static_t plat_static_func) 993{ 994 if (!np) 995 return ERR_PTR(-EINVAL); 996 997 return __cpufreq_cooling_register(np, clip_cpus, capacitance, 998 plat_static_func); 999} 1000EXPORT_SYMBOL(of_cpufreq_power_cooling_register); 1001 1002/** 1003 * cpufreq_cooling_unregister - function to remove cpufreq cooling device. 1004 * @cdev: thermal cooling device pointer. 1005 * 1006 * This interface function unregisters the "thermal-cpufreq-%x" cooling device. 1007 */ 1008void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) 1009{ 1010 struct cpufreq_cooling_device *cpufreq_dev; 1011 1012 if (!cdev) 1013 return; 1014 1015 cpufreq_dev = cdev->devdata; 1016 mutex_lock(&cooling_cpufreq_lock); 1017 list_del(&cpufreq_dev->node); 1018 1019 /* Unregister the notifier for the last cpufreq cooling device */ 1020 if (list_empty(&cpufreq_dev_list)) 1021 cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, 1022 CPUFREQ_POLICY_NOTIFIER); 1023 mutex_unlock(&cooling_cpufreq_lock); 1024 1025 thermal_cooling_device_unregister(cpufreq_dev->cool_dev); 1026 release_idr(&cpufreq_idr, cpufreq_dev->id); 1027 kfree(cpufreq_dev->time_in_idle_timestamp); 1028 kfree(cpufreq_dev->time_in_idle); 1029 kfree(cpufreq_dev->freq_table); 1030 kfree(cpufreq_dev); 1031} 1032EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);