Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.4-rc8 1055 lines 33 kB view raw
1/* 2 * linux/drivers/thermal/cpu_cooling.c 3 * 4 * Copyright (C) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com) 5 * Copyright (C) 2012 Amit Daniel <amit.kachhap@linaro.org> 6 * 7 * Copyright (C) 2014 Viresh Kumar <viresh.kumar@linaro.org> 8 * 9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; version 2 of the License. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write to the Free Software Foundation, Inc., 21 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 22 * 23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 24 */ 25#include <linux/module.h> 26#include <linux/thermal.h> 27#include <linux/cpufreq.h> 28#include <linux/err.h> 29#include <linux/pm_opp.h> 30#include <linux/slab.h> 31#include <linux/cpu.h> 32#include <linux/cpu_cooling.h> 33 34#include <trace/events/thermal.h> 35 36/* 37 * Cooling state <-> CPUFreq frequency 38 * 39 * Cooling states are translated to frequencies throughout this driver and this 40 * is the relation between them. 41 * 42 * Highest cooling state corresponds to lowest possible frequency. 43 * 44 * i.e. 45 * level 0 --> 1st Max Freq 46 * level 1 --> 2nd Max Freq 47 * ... 48 */ 49 50/** 51 * struct power_table - frequency to power conversion 52 * @frequency: frequency in KHz 53 * @power: power in mW 54 * 55 * This structure is built when the cooling device registers and helps 56 * in translating frequency to power and viceversa. 57 */ 58struct power_table { 59 u32 frequency; 60 u32 power; 61}; 62 63/** 64 * struct cpufreq_cooling_device - data for cooling device with cpufreq 65 * @id: unique integer value corresponding to each cpufreq_cooling_device 66 * registered. 67 * @cool_dev: thermal_cooling_device pointer to keep track of the 68 * registered cooling device. 69 * @cpufreq_state: integer value representing the current state of cpufreq 70 * cooling devices. 71 * @clipped_freq: integer value representing the absolute value of the clipped 72 * frequency. 73 * @max_level: maximum cooling level. One less than total number of valid 74 * cpufreq frequencies. 75 * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device. 76 * @node: list_head to link all cpufreq_cooling_device together. 77 * @last_load: load measured by the latest call to cpufreq_get_actual_power() 78 * @time_in_idle: previous reading of the absolute time that this cpu was idle 79 * @time_in_idle_timestamp: wall time of the last invocation of 80 * get_cpu_idle_time_us() 81 * @dyn_power_table: array of struct power_table for frequency to power 82 * conversion, sorted in ascending order. 83 * @dyn_power_table_entries: number of entries in the @dyn_power_table array 84 * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered 85 * @plat_get_static_power: callback to calculate the static power 86 * 87 * This structure is required for keeping information of each registered 88 * cpufreq_cooling_device. 89 */ 90struct cpufreq_cooling_device { 91 int id; 92 struct thermal_cooling_device *cool_dev; 93 unsigned int cpufreq_state; 94 unsigned int clipped_freq; 95 unsigned int max_level; 96 unsigned int *freq_table; /* In descending order */ 97 struct cpumask allowed_cpus; 98 struct list_head node; 99 u32 last_load; 100 u64 *time_in_idle; 101 u64 *time_in_idle_timestamp; 102 struct power_table *dyn_power_table; 103 int dyn_power_table_entries; 104 struct device *cpu_dev; 105 get_static_t plat_get_static_power; 106}; 107static DEFINE_IDR(cpufreq_idr); 108static DEFINE_MUTEX(cooling_cpufreq_lock); 109 110static unsigned int cpufreq_dev_count; 111 112static DEFINE_MUTEX(cooling_list_lock); 113static LIST_HEAD(cpufreq_dev_list); 114 115/** 116 * get_idr - function to get a unique id. 117 * @idr: struct idr * handle used to create a id. 118 * @id: int * value generated by this function. 119 * 120 * This function will populate @id with an unique 121 * id, using the idr API. 122 * 123 * Return: 0 on success, an error code on failure. 124 */ 125static int get_idr(struct idr *idr, int *id) 126{ 127 int ret; 128 129 mutex_lock(&cooling_cpufreq_lock); 130 ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL); 131 mutex_unlock(&cooling_cpufreq_lock); 132 if (unlikely(ret < 0)) 133 return ret; 134 *id = ret; 135 136 return 0; 137} 138 139/** 140 * release_idr - function to free the unique id. 141 * @idr: struct idr * handle used for creating the id. 142 * @id: int value representing the unique id. 143 */ 144static void release_idr(struct idr *idr, int id) 145{ 146 mutex_lock(&cooling_cpufreq_lock); 147 idr_remove(idr, id); 148 mutex_unlock(&cooling_cpufreq_lock); 149} 150 151/* Below code defines functions to be used for cpufreq as cooling device */ 152 153/** 154 * get_level: Find the level for a particular frequency 155 * @cpufreq_dev: cpufreq_dev for which the property is required 156 * @freq: Frequency 157 * 158 * Return: level on success, THERMAL_CSTATE_INVALID on error. 159 */ 160static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev, 161 unsigned int freq) 162{ 163 unsigned long level; 164 165 for (level = 0; level <= cpufreq_dev->max_level; level++) { 166 if (freq == cpufreq_dev->freq_table[level]) 167 return level; 168 169 if (freq > cpufreq_dev->freq_table[level]) 170 break; 171 } 172 173 return THERMAL_CSTATE_INVALID; 174} 175 176/** 177 * cpufreq_cooling_get_level - for a given cpu, return the cooling level. 178 * @cpu: cpu for which the level is required 179 * @freq: the frequency of interest 180 * 181 * This function will match the cooling level corresponding to the 182 * requested @freq and return it. 183 * 184 * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID 185 * otherwise. 186 */ 187unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) 188{ 189 struct cpufreq_cooling_device *cpufreq_dev; 190 191 mutex_lock(&cooling_list_lock); 192 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { 193 if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { 194 mutex_unlock(&cooling_list_lock); 195 return get_level(cpufreq_dev, freq); 196 } 197 } 198 mutex_unlock(&cooling_list_lock); 199 200 pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu); 201 return THERMAL_CSTATE_INVALID; 202} 203EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level); 204 205/** 206 * cpufreq_thermal_notifier - notifier callback for cpufreq policy change. 207 * @nb: struct notifier_block * with callback info. 208 * @event: value showing cpufreq event for which this function invoked. 209 * @data: callback-specific data 210 * 211 * Callback to hijack the notification on cpufreq policy transition. 212 * Every time there is a change in policy, we will intercept and 213 * update the cpufreq policy with thermal constraints. 214 * 215 * Return: 0 (success) 216 */ 217static int cpufreq_thermal_notifier(struct notifier_block *nb, 218 unsigned long event, void *data) 219{ 220 struct cpufreq_policy *policy = data; 221 unsigned long clipped_freq; 222 struct cpufreq_cooling_device *cpufreq_dev; 223 224 if (event != CPUFREQ_ADJUST) 225 return NOTIFY_DONE; 226 227 mutex_lock(&cooling_list_lock); 228 list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { 229 if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus)) 230 continue; 231 232 /* 233 * policy->max is the maximum allowed frequency defined by user 234 * and clipped_freq is the maximum that thermal constraints 235 * allow. 236 * 237 * If clipped_freq is lower than policy->max, then we need to 238 * readjust policy->max. 239 * 240 * But, if clipped_freq is greater than policy->max, we don't 241 * need to do anything. 242 */ 243 clipped_freq = cpufreq_dev->clipped_freq; 244 245 if (policy->max > clipped_freq) 246 cpufreq_verify_within_limits(policy, 0, clipped_freq); 247 break; 248 } 249 mutex_unlock(&cooling_list_lock); 250 251 return NOTIFY_OK; 252} 253 254/** 255 * build_dyn_power_table() - create a dynamic power to frequency table 256 * @cpufreq_device: the cpufreq cooling device in which to store the table 257 * @capacitance: dynamic power coefficient for these cpus 258 * 259 * Build a dynamic power to frequency table for this cpu and store it 260 * in @cpufreq_device. This table will be used in cpu_power_to_freq() and 261 * cpu_freq_to_power() to convert between power and frequency 262 * efficiently. Power is stored in mW, frequency in KHz. The 263 * resulting table is in ascending order. 264 * 265 * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs, 266 * -ENOMEM if we run out of memory or -EAGAIN if an OPP was 267 * added/enabled while the function was executing. 268 */ 269static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, 270 u32 capacitance) 271{ 272 struct power_table *power_table; 273 struct dev_pm_opp *opp; 274 struct device *dev = NULL; 275 int num_opps = 0, cpu, i, ret = 0; 276 unsigned long freq; 277 278 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { 279 dev = get_cpu_device(cpu); 280 if (!dev) { 281 dev_warn(&cpufreq_device->cool_dev->device, 282 "No cpu device for cpu %d\n", cpu); 283 continue; 284 } 285 286 num_opps = dev_pm_opp_get_opp_count(dev); 287 if (num_opps > 0) 288 break; 289 else if (num_opps < 0) 290 return num_opps; 291 } 292 293 if (num_opps == 0) 294 return -EINVAL; 295 296 power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL); 297 if (!power_table) 298 return -ENOMEM; 299 300 rcu_read_lock(); 301 302 for (freq = 0, i = 0; 303 opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp); 304 freq++, i++) { 305 u32 freq_mhz, voltage_mv; 306 u64 power; 307 308 if (i >= num_opps) { 309 rcu_read_unlock(); 310 ret = -EAGAIN; 311 goto free_power_table; 312 } 313 314 freq_mhz = freq / 1000000; 315 voltage_mv = dev_pm_opp_get_voltage(opp) / 1000; 316 317 /* 318 * Do the multiplication with MHz and millivolt so as 319 * to not overflow. 320 */ 321 power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv; 322 do_div(power, 1000000000); 323 324 /* frequency is stored in power_table in KHz */ 325 power_table[i].frequency = freq / 1000; 326 327 /* power is stored in mW */ 328 power_table[i].power = power; 329 } 330 331 rcu_read_unlock(); 332 333 if (i != num_opps) { 334 ret = PTR_ERR(opp); 335 goto free_power_table; 336 } 337 338 cpufreq_device->cpu_dev = dev; 339 cpufreq_device->dyn_power_table = power_table; 340 cpufreq_device->dyn_power_table_entries = i; 341 342 return 0; 343 344free_power_table: 345 kfree(power_table); 346 347 return ret; 348} 349 350static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, 351 u32 freq) 352{ 353 int i; 354 struct power_table *pt = cpufreq_device->dyn_power_table; 355 356 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) 357 if (freq < pt[i].frequency) 358 break; 359 360 return pt[i - 1].power; 361} 362 363static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, 364 u32 power) 365{ 366 int i; 367 struct power_table *pt = cpufreq_device->dyn_power_table; 368 369 for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) 370 if (power < pt[i].power) 371 break; 372 373 return pt[i - 1].frequency; 374} 375 376/** 377 * get_load() - get load for a cpu since last updated 378 * @cpufreq_device: &struct cpufreq_cooling_device for this cpu 379 * @cpu: cpu number 380 * 381 * Return: The average load of cpu @cpu in percentage since this 382 * function was last called. 383 */ 384static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu) 385{ 386 u32 load; 387 u64 now, now_idle, delta_time, delta_idle; 388 389 now_idle = get_cpu_idle_time(cpu, &now, 0); 390 delta_idle = now_idle - cpufreq_device->time_in_idle[cpu]; 391 delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu]; 392 393 if (delta_time <= delta_idle) 394 load = 0; 395 else 396 load = div64_u64(100 * (delta_time - delta_idle), delta_time); 397 398 cpufreq_device->time_in_idle[cpu] = now_idle; 399 cpufreq_device->time_in_idle_timestamp[cpu] = now; 400 401 return load; 402} 403 404/** 405 * get_static_power() - calculate the static power consumed by the cpus 406 * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev 407 * @tz: thermal zone device in which we're operating 408 * @freq: frequency in KHz 409 * @power: pointer in which to store the calculated static power 410 * 411 * Calculate the static power consumed by the cpus described by 412 * @cpu_actor running at frequency @freq. This function relies on a 413 * platform specific function that should have been provided when the 414 * actor was registered. If it wasn't, the static power is assumed to 415 * be negligible. The calculated static power is stored in @power. 416 * 417 * Return: 0 on success, -E* on failure. 418 */ 419static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, 420 struct thermal_zone_device *tz, unsigned long freq, 421 u32 *power) 422{ 423 struct dev_pm_opp *opp; 424 unsigned long voltage; 425 struct cpumask *cpumask = &cpufreq_device->allowed_cpus; 426 unsigned long freq_hz = freq * 1000; 427 428 if (!cpufreq_device->plat_get_static_power || 429 !cpufreq_device->cpu_dev) { 430 *power = 0; 431 return 0; 432 } 433 434 rcu_read_lock(); 435 436 opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz, 437 true); 438 voltage = dev_pm_opp_get_voltage(opp); 439 440 rcu_read_unlock(); 441 442 if (voltage == 0) { 443 dev_warn_ratelimited(cpufreq_device->cpu_dev, 444 "Failed to get voltage for frequency %lu: %ld\n", 445 freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); 446 return -EINVAL; 447 } 448 449 return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay, 450 voltage, power); 451} 452 453/** 454 * get_dynamic_power() - calculate the dynamic power 455 * @cpufreq_device: &cpufreq_cooling_device for this cdev 456 * @freq: current frequency 457 * 458 * Return: the dynamic power consumed by the cpus described by 459 * @cpufreq_device. 460 */ 461static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device, 462 unsigned long freq) 463{ 464 u32 raw_cpu_power; 465 466 raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq); 467 return (raw_cpu_power * cpufreq_device->last_load) / 100; 468} 469 470/* cpufreq cooling device callback functions are defined below */ 471 472/** 473 * cpufreq_get_max_state - callback function to get the max cooling state. 474 * @cdev: thermal cooling device pointer. 475 * @state: fill this variable with the max cooling state. 476 * 477 * Callback for the thermal cooling device to return the cpufreq 478 * max cooling state. 479 * 480 * Return: 0 on success, an error code otherwise. 481 */ 482static int cpufreq_get_max_state(struct thermal_cooling_device *cdev, 483 unsigned long *state) 484{ 485 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 486 487 *state = cpufreq_device->max_level; 488 return 0; 489} 490 491/** 492 * cpufreq_get_cur_state - callback function to get the current cooling state. 493 * @cdev: thermal cooling device pointer. 494 * @state: fill this variable with the current cooling state. 495 * 496 * Callback for the thermal cooling device to return the cpufreq 497 * current cooling state. 498 * 499 * Return: 0 on success, an error code otherwise. 500 */ 501static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev, 502 unsigned long *state) 503{ 504 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 505 506 *state = cpufreq_device->cpufreq_state; 507 508 return 0; 509} 510 511/** 512 * cpufreq_set_cur_state - callback function to set the current cooling state. 513 * @cdev: thermal cooling device pointer. 514 * @state: set this variable to the current cooling state. 515 * 516 * Callback for the thermal cooling device to change the cpufreq 517 * current cooling state. 518 * 519 * Return: 0 on success, an error code otherwise. 520 */ 521static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, 522 unsigned long state) 523{ 524 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 525 unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus); 526 unsigned int clip_freq; 527 528 /* Request state should be less than max_level */ 529 if (WARN_ON(state > cpufreq_device->max_level)) 530 return -EINVAL; 531 532 /* Check if the old cooling action is same as new cooling action */ 533 if (cpufreq_device->cpufreq_state == state) 534 return 0; 535 536 clip_freq = cpufreq_device->freq_table[state]; 537 cpufreq_device->cpufreq_state = state; 538 cpufreq_device->clipped_freq = clip_freq; 539 540 cpufreq_update_policy(cpu); 541 542 return 0; 543} 544 545/** 546 * cpufreq_get_requested_power() - get the current power 547 * @cdev: &thermal_cooling_device pointer 548 * @tz: a valid thermal zone device pointer 549 * @power: pointer in which to store the resulting power 550 * 551 * Calculate the current power consumption of the cpus in milliwatts 552 * and store it in @power. This function should actually calculate 553 * the requested power, but it's hard to get the frequency that 554 * cpufreq would have assigned if there were no thermal limits. 555 * Instead, we calculate the current power on the assumption that the 556 * immediate future will look like the immediate past. 557 * 558 * We use the current frequency and the average load since this 559 * function was last called. In reality, there could have been 560 * multiple opps since this function was last called and that affects 561 * the load calculation. While it's not perfectly accurate, this 562 * simplification is good enough and works. REVISIT this, as more 563 * complex code may be needed if experiments show that it's not 564 * accurate enough. 565 * 566 * Return: 0 on success, -E* if getting the static power failed. 567 */ 568static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, 569 struct thermal_zone_device *tz, 570 u32 *power) 571{ 572 unsigned long freq; 573 int i = 0, cpu, ret; 574 u32 static_power, dynamic_power, total_load = 0; 575 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 576 u32 *load_cpu = NULL; 577 578 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); 579 580 /* 581 * All the CPUs are offline, thus the requested power by 582 * the cdev is 0 583 */ 584 if (cpu >= nr_cpu_ids) { 585 *power = 0; 586 return 0; 587 } 588 589 freq = cpufreq_quick_get(cpu); 590 591 if (trace_thermal_power_cpu_get_power_enabled()) { 592 u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus); 593 594 load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL); 595 } 596 597 for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { 598 u32 load; 599 600 if (cpu_online(cpu)) 601 load = get_load(cpufreq_device, cpu); 602 else 603 load = 0; 604 605 total_load += load; 606 if (trace_thermal_power_cpu_limit_enabled() && load_cpu) 607 load_cpu[i] = load; 608 609 i++; 610 } 611 612 cpufreq_device->last_load = total_load; 613 614 dynamic_power = get_dynamic_power(cpufreq_device, freq); 615 ret = get_static_power(cpufreq_device, tz, freq, &static_power); 616 if (ret) { 617 kfree(load_cpu); 618 return ret; 619 } 620 621 if (load_cpu) { 622 trace_thermal_power_cpu_get_power( 623 &cpufreq_device->allowed_cpus, 624 freq, load_cpu, i, dynamic_power, static_power); 625 626 kfree(load_cpu); 627 } 628 629 *power = static_power + dynamic_power; 630 return 0; 631} 632 633/** 634 * cpufreq_state2power() - convert a cpu cdev state to power consumed 635 * @cdev: &thermal_cooling_device pointer 636 * @tz: a valid thermal zone device pointer 637 * @state: cooling device state to be converted 638 * @power: pointer in which to store the resulting power 639 * 640 * Convert cooling device state @state into power consumption in 641 * milliwatts assuming 100% load. Store the calculated power in 642 * @power. 643 * 644 * Return: 0 on success, -EINVAL if the cooling device state could not 645 * be converted into a frequency or other -E* if there was an error 646 * when calculating the static power. 647 */ 648static int cpufreq_state2power(struct thermal_cooling_device *cdev, 649 struct thermal_zone_device *tz, 650 unsigned long state, u32 *power) 651{ 652 unsigned int freq, num_cpus; 653 cpumask_t cpumask; 654 u32 static_power, dynamic_power; 655 int ret; 656 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 657 658 cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask); 659 num_cpus = cpumask_weight(&cpumask); 660 661 /* None of our cpus are online, so no power */ 662 if (num_cpus == 0) { 663 *power = 0; 664 return 0; 665 } 666 667 freq = cpufreq_device->freq_table[state]; 668 if (!freq) 669 return -EINVAL; 670 671 dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus; 672 ret = get_static_power(cpufreq_device, tz, freq, &static_power); 673 if (ret) 674 return ret; 675 676 *power = static_power + dynamic_power; 677 return 0; 678} 679 680/** 681 * cpufreq_power2state() - convert power to a cooling device state 682 * @cdev: &thermal_cooling_device pointer 683 * @tz: a valid thermal zone device pointer 684 * @power: power in milliwatts to be converted 685 * @state: pointer in which to store the resulting state 686 * 687 * Calculate a cooling device state for the cpus described by @cdev 688 * that would allow them to consume at most @power mW and store it in 689 * @state. Note that this calculation depends on external factors 690 * such as the cpu load or the current static power. Calling this 691 * function with the same power as input can yield different cooling 692 * device states depending on those external factors. 693 * 694 * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if 695 * the calculated frequency could not be converted to a valid state. 696 * The latter should not happen unless the frequencies available to 697 * cpufreq have changed since the initialization of the cpu cooling 698 * device. 699 */ 700static int cpufreq_power2state(struct thermal_cooling_device *cdev, 701 struct thermal_zone_device *tz, u32 power, 702 unsigned long *state) 703{ 704 unsigned int cpu, cur_freq, target_freq; 705 int ret; 706 s32 dyn_power; 707 u32 last_load, normalised_power, static_power; 708 struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; 709 710 cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); 711 712 /* None of our cpus are online */ 713 if (cpu >= nr_cpu_ids) 714 return -ENODEV; 715 716 cur_freq = cpufreq_quick_get(cpu); 717 ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power); 718 if (ret) 719 return ret; 720 721 dyn_power = power - static_power; 722 dyn_power = dyn_power > 0 ? dyn_power : 0; 723 last_load = cpufreq_device->last_load ?: 1; 724 normalised_power = (dyn_power * 100) / last_load; 725 target_freq = cpu_power_to_freq(cpufreq_device, normalised_power); 726 727 *state = cpufreq_cooling_get_level(cpu, target_freq); 728 if (*state == THERMAL_CSTATE_INVALID) { 729 dev_warn_ratelimited(&cdev->device, 730 "Failed to convert %dKHz for cpu %d into a cdev state\n", 731 target_freq, cpu); 732 return -EINVAL; 733 } 734 735 trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus, 736 target_freq, *state, power); 737 return 0; 738} 739 740/* Bind cpufreq callbacks to thermal cooling device ops */ 741static struct thermal_cooling_device_ops cpufreq_cooling_ops = { 742 .get_max_state = cpufreq_get_max_state, 743 .get_cur_state = cpufreq_get_cur_state, 744 .set_cur_state = cpufreq_set_cur_state, 745}; 746 747/* Notifier for cpufreq policy change */ 748static struct notifier_block thermal_cpufreq_notifier_block = { 749 .notifier_call = cpufreq_thermal_notifier, 750}; 751 752static unsigned int find_next_max(struct cpufreq_frequency_table *table, 753 unsigned int prev_max) 754{ 755 struct cpufreq_frequency_table *pos; 756 unsigned int max = 0; 757 758 cpufreq_for_each_valid_entry(pos, table) { 759 if (pos->frequency > max && pos->frequency < prev_max) 760 max = pos->frequency; 761 } 762 763 return max; 764} 765 766/** 767 * __cpufreq_cooling_register - helper function to create cpufreq cooling device 768 * @np: a valid struct device_node to the cooling device device tree node 769 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 770 * Normally this should be same as cpufreq policy->related_cpus. 771 * @capacitance: dynamic power coefficient for these cpus 772 * @plat_static_func: function to calculate the static power consumed by these 773 * cpus (optional) 774 * 775 * This interface function registers the cpufreq cooling device with the name 776 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 777 * cooling devices. It also gives the opportunity to link the cooling device 778 * with a device tree node, in order to bind it via the thermal DT code. 779 * 780 * Return: a valid struct thermal_cooling_device pointer on success, 781 * on failure, it returns a corresponding ERR_PTR(). 782 */ 783static struct thermal_cooling_device * 784__cpufreq_cooling_register(struct device_node *np, 785 const struct cpumask *clip_cpus, u32 capacitance, 786 get_static_t plat_static_func) 787{ 788 struct thermal_cooling_device *cool_dev; 789 struct cpufreq_cooling_device *cpufreq_dev; 790 char dev_name[THERMAL_NAME_LENGTH]; 791 struct cpufreq_frequency_table *pos, *table; 792 unsigned int freq, i, num_cpus; 793 int ret; 794 795 table = cpufreq_frequency_get_table(cpumask_first(clip_cpus)); 796 if (!table) { 797 pr_debug("%s: CPUFreq table not found\n", __func__); 798 return ERR_PTR(-EPROBE_DEFER); 799 } 800 801 cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL); 802 if (!cpufreq_dev) 803 return ERR_PTR(-ENOMEM); 804 805 num_cpus = cpumask_weight(clip_cpus); 806 cpufreq_dev->time_in_idle = kcalloc(num_cpus, 807 sizeof(*cpufreq_dev->time_in_idle), 808 GFP_KERNEL); 809 if (!cpufreq_dev->time_in_idle) { 810 cool_dev = ERR_PTR(-ENOMEM); 811 goto free_cdev; 812 } 813 814 cpufreq_dev->time_in_idle_timestamp = 815 kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp), 816 GFP_KERNEL); 817 if (!cpufreq_dev->time_in_idle_timestamp) { 818 cool_dev = ERR_PTR(-ENOMEM); 819 goto free_time_in_idle; 820 } 821 822 /* Find max levels */ 823 cpufreq_for_each_valid_entry(pos, table) 824 cpufreq_dev->max_level++; 825 826 cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) * 827 cpufreq_dev->max_level, GFP_KERNEL); 828 if (!cpufreq_dev->freq_table) { 829 cool_dev = ERR_PTR(-ENOMEM); 830 goto free_time_in_idle_timestamp; 831 } 832 833 /* max_level is an index, not a counter */ 834 cpufreq_dev->max_level--; 835 836 cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); 837 838 if (capacitance) { 839 cpufreq_cooling_ops.get_requested_power = 840 cpufreq_get_requested_power; 841 cpufreq_cooling_ops.state2power = cpufreq_state2power; 842 cpufreq_cooling_ops.power2state = cpufreq_power2state; 843 cpufreq_dev->plat_get_static_power = plat_static_func; 844 845 ret = build_dyn_power_table(cpufreq_dev, capacitance); 846 if (ret) { 847 cool_dev = ERR_PTR(ret); 848 goto free_table; 849 } 850 } 851 852 ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); 853 if (ret) { 854 cool_dev = ERR_PTR(ret); 855 goto free_power_table; 856 } 857 858 snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d", 859 cpufreq_dev->id); 860 861 cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, 862 &cpufreq_cooling_ops); 863 if (IS_ERR(cool_dev)) 864 goto remove_idr; 865 866 /* Fill freq-table in descending order of frequencies */ 867 for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) { 868 freq = find_next_max(table, freq); 869 cpufreq_dev->freq_table[i] = freq; 870 871 /* Warn for duplicate entries */ 872 if (!freq) 873 pr_warn("%s: table has duplicate entries\n", __func__); 874 else 875 pr_debug("%s: freq:%u KHz\n", __func__, freq); 876 } 877 878 cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0]; 879 cpufreq_dev->cool_dev = cool_dev; 880 881 mutex_lock(&cooling_cpufreq_lock); 882 883 mutex_lock(&cooling_list_lock); 884 list_add(&cpufreq_dev->node, &cpufreq_dev_list); 885 mutex_unlock(&cooling_list_lock); 886 887 /* Register the notifier for first cpufreq cooling device */ 888 if (!cpufreq_dev_count++) 889 cpufreq_register_notifier(&thermal_cpufreq_notifier_block, 890 CPUFREQ_POLICY_NOTIFIER); 891 mutex_unlock(&cooling_cpufreq_lock); 892 893 return cool_dev; 894 895remove_idr: 896 release_idr(&cpufreq_idr, cpufreq_dev->id); 897free_power_table: 898 kfree(cpufreq_dev->dyn_power_table); 899free_table: 900 kfree(cpufreq_dev->freq_table); 901free_time_in_idle_timestamp: 902 kfree(cpufreq_dev->time_in_idle_timestamp); 903free_time_in_idle: 904 kfree(cpufreq_dev->time_in_idle); 905free_cdev: 906 kfree(cpufreq_dev); 907 908 return cool_dev; 909} 910 911/** 912 * cpufreq_cooling_register - function to create cpufreq cooling device. 913 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 914 * 915 * This interface function registers the cpufreq cooling device with the name 916 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 917 * cooling devices. 918 * 919 * Return: a valid struct thermal_cooling_device pointer on success, 920 * on failure, it returns a corresponding ERR_PTR(). 921 */ 922struct thermal_cooling_device * 923cpufreq_cooling_register(const struct cpumask *clip_cpus) 924{ 925 return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL); 926} 927EXPORT_SYMBOL_GPL(cpufreq_cooling_register); 928 929/** 930 * of_cpufreq_cooling_register - function to create cpufreq cooling device. 931 * @np: a valid struct device_node to the cooling device device tree node 932 * @clip_cpus: cpumask of cpus where the frequency constraints will happen. 933 * 934 * This interface function registers the cpufreq cooling device with the name 935 * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq 936 * cooling devices. Using this API, the cpufreq cooling device will be 937 * linked to the device tree node provided. 938 * 939 * Return: a valid struct thermal_cooling_device pointer on success, 940 * on failure, it returns a corresponding ERR_PTR(). 941 */ 942struct thermal_cooling_device * 943of_cpufreq_cooling_register(struct device_node *np, 944 const struct cpumask *clip_cpus) 945{ 946 if (!np) 947 return ERR_PTR(-EINVAL); 948 949 return __cpufreq_cooling_register(np, clip_cpus, 0, NULL); 950} 951EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); 952 953/** 954 * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions 955 * @clip_cpus: cpumask of cpus where the frequency constraints will happen 956 * @capacitance: dynamic power coefficient for these cpus 957 * @plat_static_func: function to calculate the static power consumed by these 958 * cpus (optional) 959 * 960 * This interface function registers the cpufreq cooling device with 961 * the name "thermal-cpufreq-%x". This api can support multiple 962 * instances of cpufreq cooling devices. Using this function, the 963 * cooling device will implement the power extensions by using a 964 * simple cpu power model. The cpus must have registered their OPPs 965 * using the OPP library. 966 * 967 * An optional @plat_static_func may be provided to calculate the 968 * static power consumed by these cpus. If the platform's static 969 * power consumption is unknown or negligible, make it NULL. 970 * 971 * Return: a valid struct thermal_cooling_device pointer on success, 972 * on failure, it returns a corresponding ERR_PTR(). 973 */ 974struct thermal_cooling_device * 975cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance, 976 get_static_t plat_static_func) 977{ 978 return __cpufreq_cooling_register(NULL, clip_cpus, capacitance, 979 plat_static_func); 980} 981EXPORT_SYMBOL(cpufreq_power_cooling_register); 982 983/** 984 * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions 985 * @np: a valid struct device_node to the cooling device device tree node 986 * @clip_cpus: cpumask of cpus where the frequency constraints will happen 987 * @capacitance: dynamic power coefficient for these cpus 988 * @plat_static_func: function to calculate the static power consumed by these 989 * cpus (optional) 990 * 991 * This interface function registers the cpufreq cooling device with 992 * the name "thermal-cpufreq-%x". This api can support multiple 993 * instances of cpufreq cooling devices. Using this API, the cpufreq 994 * cooling device will be linked to the device tree node provided. 995 * Using this function, the cooling device will implement the power 996 * extensions by using a simple cpu power model. The cpus must have 997 * registered their OPPs using the OPP library. 998 * 999 * An optional @plat_static_func may be provided to calculate the 1000 * static power consumed by these cpus. If the platform's static 1001 * power consumption is unknown or negligible, make it NULL. 1002 * 1003 * Return: a valid struct thermal_cooling_device pointer on success, 1004 * on failure, it returns a corresponding ERR_PTR(). 1005 */ 1006struct thermal_cooling_device * 1007of_cpufreq_power_cooling_register(struct device_node *np, 1008 const struct cpumask *clip_cpus, 1009 u32 capacitance, 1010 get_static_t plat_static_func) 1011{ 1012 if (!np) 1013 return ERR_PTR(-EINVAL); 1014 1015 return __cpufreq_cooling_register(np, clip_cpus, capacitance, 1016 plat_static_func); 1017} 1018EXPORT_SYMBOL(of_cpufreq_power_cooling_register); 1019 1020/** 1021 * cpufreq_cooling_unregister - function to remove cpufreq cooling device. 1022 * @cdev: thermal cooling device pointer. 1023 * 1024 * This interface function unregisters the "thermal-cpufreq-%x" cooling device. 1025 */ 1026void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) 1027{ 1028 struct cpufreq_cooling_device *cpufreq_dev; 1029 1030 if (!cdev) 1031 return; 1032 1033 cpufreq_dev = cdev->devdata; 1034 1035 /* Unregister the notifier for the last cpufreq cooling device */ 1036 mutex_lock(&cooling_cpufreq_lock); 1037 if (!--cpufreq_dev_count) 1038 cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, 1039 CPUFREQ_POLICY_NOTIFIER); 1040 1041 mutex_lock(&cooling_list_lock); 1042 list_del(&cpufreq_dev->node); 1043 mutex_unlock(&cooling_list_lock); 1044 1045 mutex_unlock(&cooling_cpufreq_lock); 1046 1047 thermal_cooling_device_unregister(cpufreq_dev->cool_dev); 1048 release_idr(&cpufreq_idr, cpufreq_dev->id); 1049 kfree(cpufreq_dev->dyn_power_table); 1050 kfree(cpufreq_dev->time_in_idle_timestamp); 1051 kfree(cpufreq_dev->time_in_idle); 1052 kfree(cpufreq_dev->freq_table); 1053 kfree(cpufreq_dev); 1054} 1055EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);