thermal: devfreq_cooling: add new interface for direct power read

This patch introduces a new interface for device drivers connected to
devfreq_cooling in the thermal framework: get_real_power().

Some devices have more sophisticated methods (like power counters)
to approximate the actual power that they use.
In the previous implementation we had a pre-calculated power
table which was then scaled by 'utilization'
('busy_time' and 'total_time' taken from devfreq 'last_status').

With this new interface the driver can provide more precise data
regarding actual power to the thermal governor every time the power
budget is calculated. We then use this value and calculate the real
resource utilization scaling factor.

Reviewed-by: Chris Diamand <chris.diamand@arm.com>
Acked-by: Javi Merino <javi.merino@kernel.org>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>

authored by Lukasz Luba and committed by Zhang Rui 2be83da8 e34cab4c

+101 -23
+82 -23
drivers/thermal/devfreq_cooling.c
··· 28 28 29 29 #include <trace/events/thermal.h> 30 30 31 + #define SCALE_ERROR_MITIGATION 100 32 + 31 33 static DEFINE_IDA(devfreq_ida); 32 34 33 35 /** ··· 47 45 * @freq_table_size: Size of the @freq_table and @power_table 48 46 * @power_ops: Pointer to devfreq_cooling_power, used to generate the 49 47 * @power_table. 48 + * @res_util: Resource utilization scaling factor for the power. 49 + * It is multiplied by 100 to minimize the error. It is used 50 + * for estimation of the power budget instead of using 51 + * 'utilization' (which is 'busy_time / 'total_time'). 52 + * The 'res_util' range is from 100 to (power_table[state] * 100) 53 + * for the corresponding 'state'. 50 54 */ 51 55 struct devfreq_cooling_device { 52 56 int id; ··· 63 55 u32 *freq_table; 64 56 size_t freq_table_size; 65 57 struct devfreq_cooling_power *power_ops; 58 + u32 res_util; 59 + int capped_state; 66 60 }; 67 61 68 62 /** ··· 260 250 return power; 261 251 } 262 252 253 + 254 + static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc, 255 + unsigned long freq, 256 + unsigned long voltage) 257 + { 258 + return get_static_power(dfc, freq) + get_dynamic_power(dfc, freq, 259 + voltage); 260 + } 261 + 262 + 263 263 static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, 264 264 struct thermal_zone_device *tz, 265 265 u32 *power) ··· 279 259 struct devfreq_dev_status *status = &df->last_status; 280 260 unsigned long state; 281 261 unsigned long freq = status->current_frequency; 282 - u32 dyn_power, static_power; 262 + unsigned long voltage; 263 + u32 dyn_power = 0; 264 + u32 static_power = 0; 265 + int res; 283 266 284 - /* Get dynamic power for state */ 285 267 state = freq_get_state(dfc, freq); 286 - if (state == THERMAL_CSTATE_INVALID) 287 - return -EAGAIN; 268 + if (state == THERMAL_CSTATE_INVALID) { 269 + res = -EAGAIN; 270 + goto fail; 271 + } 288 272 289 - dyn_power = dfc->power_table[state]; 273 + if (dfc->power_ops->get_real_power) { 274 + voltage = get_voltage(df, freq); 275 + if (voltage == 0) { 276 + res = -EINVAL; 277 + goto fail; 278 + } 290 279 291 - /* Scale dynamic power for utilization */ 292 - dyn_power = (dyn_power * status->busy_time) / status->total_time; 280 + res = dfc->power_ops->get_real_power(df, power, freq, voltage); 281 + if (!res) { 282 + state = dfc->capped_state; 283 + dfc->res_util = dfc->power_table[state]; 284 + dfc->res_util *= SCALE_ERROR_MITIGATION; 293 285 294 - /* Get static power */ 295 - static_power = get_static_power(dfc, freq); 286 + if (*power > 1) 287 + dfc->res_util /= *power; 288 + } else { 289 + goto fail; 290 + } 291 + } else { 292 + dyn_power = dfc->power_table[state]; 293 + 294 + /* Scale dynamic power for utilization */ 295 + dyn_power *= status->busy_time; 296 + dyn_power /= status->total_time; 297 + /* Get static power */ 298 + static_power = get_static_power(dfc, freq); 299 + 300 + *power = dyn_power + static_power; 301 + } 296 302 297 303 trace_thermal_power_devfreq_get_power(cdev, status, freq, dyn_power, 298 304 static_power); 299 305 300 - *power = dyn_power + static_power; 301 - 302 306 return 0; 307 + fail: 308 + /* It is safe to set max in this case */ 309 + dfc->res_util = SCALE_ERROR_MITIGATION; 310 + return res; 303 311 } 304 312 305 313 static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, ··· 360 312 unsigned long busy_time; 361 313 s32 dyn_power; 362 314 u32 static_power; 315 + s32 est_power; 363 316 int i; 364 317 365 - static_power = get_static_power(dfc, freq); 318 + if (dfc->power_ops->get_real_power) { 319 + /* Scale for resource utilization */ 320 + est_power = power * dfc->res_util; 321 + est_power /= SCALE_ERROR_MITIGATION; 322 + } else { 323 + static_power = get_static_power(dfc, freq); 366 324 367 - dyn_power = power - static_power; 368 - dyn_power = dyn_power > 0 ? dyn_power : 0; 325 + dyn_power = power - static_power; 326 + dyn_power = dyn_power > 0 ? dyn_power : 0; 369 327 370 - /* Scale dynamic power for utilization */ 371 - busy_time = status->busy_time ?: 1; 372 - dyn_power = (dyn_power * status->total_time) / busy_time; 328 + /* Scale dynamic power for utilization */ 329 + busy_time = status->busy_time ?: 1; 330 + est_power = (dyn_power * status->total_time) / busy_time; 331 + } 373 332 374 333 /* 375 334 * Find the first cooling state that is within the power 376 335 * budget for dynamic power. 377 336 */ 378 337 for (i = 0; i < dfc->freq_table_size - 1; i++) 379 - if (dyn_power >= dfc->power_table[i]) 338 + if (est_power >= dfc->power_table[i]) 380 339 break; 381 340 382 341 *state = i; 342 + dfc->capped_state = i; 383 343 trace_thermal_power_devfreq_limit(cdev, freq, *state, power); 384 344 return 0; 385 345 } ··· 443 387 } 444 388 445 389 for (i = 0, freq = ULONG_MAX; i < num_opps; i++, freq--) { 446 - unsigned long power_dyn, voltage; 390 + unsigned long power, voltage; 447 391 struct dev_pm_opp *opp; 448 392 449 393 opp = dev_pm_opp_find_freq_floor(dev, &freq); ··· 456 400 dev_pm_opp_put(opp); 457 401 458 402 if (dfc->power_ops) { 459 - power_dyn = get_dynamic_power(dfc, freq, voltage); 403 + if (dfc->power_ops->get_real_power) 404 + power = get_total_power(dfc, freq, voltage); 405 + else 406 + power = get_dynamic_power(dfc, freq, voltage); 460 407 461 - dev_dbg(dev, "Dynamic power table: %lu MHz @ %lu mV: %lu = %lu mW\n", 462 - freq / 1000000, voltage, power_dyn, power_dyn); 408 + dev_dbg(dev, "Power table: %lu MHz @ %lu mV: %lu = %lu mW\n", 409 + freq / 1000000, voltage, power, power); 463 410 464 - power_table[i] = power_dyn; 411 + power_table[i] = power; 465 412 } 466 413 467 414 freq_table[i] = freq;
+19
include/linux/devfreq_cooling.h
··· 34 34 * If get_dynamic_power() is NULL, then the 35 35 * dynamic power is calculated as 36 36 * @dyn_power_coeff * frequency * voltage^2 37 + * @get_real_power: When this is set, the framework uses it to ask the 38 + * device driver for the actual power. 39 + * Some devices have more sophisticated methods 40 + * (like power counters) to approximate the actual power 41 + * that they use. 42 + * This function provides more accurate data to the 43 + * thermal governor. When the driver does not provide 44 + * such function, framework just uses pre-calculated 45 + * table and scale the power by 'utilization' 46 + * (based on 'busy_time' and 'total_time' taken from 47 + * devfreq 'last_status'). 48 + * The value returned by this function must be lower 49 + * or equal than the maximum power value 50 + * for the current state 51 + * (which can be found in power_table[state]). 52 + * When this interface is used, the power_table holds 53 + * max total (static + dynamic) power value for each OPP. 37 54 */ 38 55 struct devfreq_cooling_power { 39 56 unsigned long (*get_static_power)(struct devfreq *devfreq, ··· 58 41 unsigned long (*get_dynamic_power)(struct devfreq *devfreq, 59 42 unsigned long freq, 60 43 unsigned long voltage); 44 + int (*get_real_power)(struct devfreq *df, u32 *power, 45 + unsigned long freq, unsigned long voltage); 61 46 unsigned long dyn_power_coeff; 62 47 }; 63 48