Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powercap / idle_inject: Add an idle injection framework

Initially, the cpu_cooling device for ARM was changed by adding a new
policy inserting idle cycles. The intel_powerclamp driver does a
similar action.

Instead of implementing idle injections privately in the cpu_cooling
device, move the idle injection code in a dedicated framework and give
the opportunity to other frameworks to make use of it.

The framework relies on the smpboot kthreads which handles via its
main loop the common code for hotplugging and [un]parking.

This code was previously tested with the cpu cooling device and went
through several iterations. It results now in split code and API
exported in the header file. It was tested with the cpu cooling device
with success.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rewrite of all comments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Daniel Lezcano and committed by
Rafael J. Wysocki
88763a5c 021c9179

+396
+10
drivers/powercap/Kconfig
··· 29 29 controller, CPU core (Power Plance 0), graphics uncore (Power Plane 30 30 1), etc. 31 31 32 + config IDLE_INJECT 33 + bool "Idle injection framework" 34 + depends on CPU_IDLE 35 + default n 36 + help 37 + This enables support for the idle injection framework. It 38 + provides a way to force idle periods on a set of specified 39 + CPUs for power capping. Idle period can be injected 40 + synchronously on a set of specified CPUs or alternatively 41 + on a per CPU basis. 32 42 endif
+1
drivers/powercap/Makefile
··· 1 1 obj-$(CONFIG_POWERCAP) += powercap_sys.o 2 2 obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o 3 + obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
+356
drivers/powercap/idle_inject.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright 2018 Linaro Limited 4 + * 5 + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 + * 7 + * The idle injection framework provides a way to force CPUs to enter idle 8 + * states for a specified fraction of time over a specified period. 9 + * 10 + * It relies on the smpboot kthreads feature providing common code for CPU 11 + * hotplug and thread [un]parking. 12 + * 13 + * All of the kthreads used for idle injection are created at init time. 14 + * 15 + * Next, the users of the the idle injection framework provide a cpumask via 16 + * its register function. The kthreads will be synchronized with respect to 17 + * this cpumask. 18 + * 19 + * The idle + run duration is specified via separate helpers and that allows 20 + * idle injection to be started. 21 + * 22 + * The idle injection kthreads will call play_idle() with the idle duration 23 + * specified as per the above. 24 + * 25 + * After all of them have been woken up, a timer is set to start the next idle 26 + * injection cycle. 27 + * 28 + * The timer interrupt handler will wake up the idle injection kthreads for 29 + * all of the CPUs in the cpumask provided by the user. 30 + * 31 + * Idle injection is stopped synchronously and no leftover idle injection 32 + * kthread activity after its completion is guaranteed. 33 + * 34 + * It is up to the user of this framework to provide a lock for higher-level 35 + * synchronization to prevent race conditions like starting idle injection 36 + * while unregistering from the framework. 37 + */ 38 + #define pr_fmt(fmt) "ii_dev: " fmt 39 + 40 + #include <linux/cpu.h> 41 + #include <linux/hrtimer.h> 42 + #include <linux/kthread.h> 43 + #include <linux/sched.h> 44 + #include <linux/slab.h> 45 + #include <linux/smpboot.h> 46 + 47 + #include <uapi/linux/sched/types.h> 48 + 49 + /** 50 + * struct idle_inject_thread - task on/off switch structure 51 + * @tsk: task injecting the idle cycles 52 + * @should_run: whether or not to run the task (for the smpboot kthread API) 53 + */ 54 + struct idle_inject_thread { 55 + struct task_struct *tsk; 56 + int should_run; 57 + }; 58 + 59 + /** 60 + * struct idle_inject_device - idle injection data 61 + * @timer: idle injection period timer 62 + * @idle_duration_ms: duration of CPU idle time to inject 63 + * @run_duration_ms: duration of CPU run time to allow 64 + * @cpumask: mask of CPUs affected by idle injection 65 + */ 66 + struct idle_inject_device { 67 + struct hrtimer timer; 68 + unsigned int idle_duration_ms; 69 + unsigned int run_duration_ms; 70 + unsigned long int cpumask[0]; 71 + }; 72 + 73 + static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 74 + static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 75 + 76 + /** 77 + * idle_inject_wakeup - Wake up idle injection threads 78 + * @ii_dev: target idle injection device 79 + * 80 + * Every idle injection task associated with the given idle injection device 81 + * and running on an online CPU will be woken up. 82 + */ 83 + static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 84 + { 85 + struct idle_inject_thread *iit; 86 + unsigned int cpu; 87 + 88 + for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 89 + iit = per_cpu_ptr(&idle_inject_thread, cpu); 90 + iit->should_run = 1; 91 + wake_up_process(iit->tsk); 92 + } 93 + } 94 + 95 + /** 96 + * idle_inject_timer_fn - idle injection timer function 97 + * @timer: idle injection hrtimer 98 + * 99 + * This function is called when the idle injection timer expires. It wakes up 100 + * idle injection tasks associated with the timer and they, in turn, invoke 101 + * play_idle() to inject a specified amount of CPU idle time. 102 + * 103 + * Return: HRTIMER_RESTART. 104 + */ 105 + static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 106 + { 107 + unsigned int duration_ms; 108 + struct idle_inject_device *ii_dev = 109 + container_of(timer, struct idle_inject_device, timer); 110 + 111 + duration_ms = READ_ONCE(ii_dev->run_duration_ms); 112 + duration_ms += READ_ONCE(ii_dev->idle_duration_ms); 113 + 114 + idle_inject_wakeup(ii_dev); 115 + 116 + hrtimer_forward_now(timer, ms_to_ktime(duration_ms)); 117 + 118 + return HRTIMER_RESTART; 119 + } 120 + 121 + /** 122 + * idle_inject_fn - idle injection work function 123 + * @cpu: the CPU owning the task 124 + * 125 + * This function calls play_idle() to inject a specified amount of CPU idle 126 + * time. 127 + */ 128 + static void idle_inject_fn(unsigned int cpu) 129 + { 130 + struct idle_inject_device *ii_dev; 131 + struct idle_inject_thread *iit; 132 + 133 + ii_dev = per_cpu(idle_inject_device, cpu); 134 + iit = per_cpu_ptr(&idle_inject_thread, cpu); 135 + 136 + /* 137 + * Let the smpboot main loop know that the task should not run again. 138 + */ 139 + iit->should_run = 0; 140 + 141 + play_idle(READ_ONCE(ii_dev->idle_duration_ms)); 142 + } 143 + 144 + /** 145 + * idle_inject_set_duration - idle and run duration update helper 146 + * @run_duration_ms: CPU run time to allow in milliseconds 147 + * @idle_duration_ms: CPU idle time to inject in milliseconds 148 + */ 149 + void idle_inject_set_duration(struct idle_inject_device *ii_dev, 150 + unsigned int run_duration_ms, 151 + unsigned int idle_duration_ms) 152 + { 153 + if (run_duration_ms && idle_duration_ms) { 154 + WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms); 155 + WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms); 156 + } 157 + } 158 + 159 + /** 160 + * idle_inject_get_duration - idle and run duration retrieval helper 161 + * @run_duration_ms: memory location to store the current CPU run time 162 + * @idle_duration_ms: memory location to store the current CPU idle time 163 + */ 164 + void idle_inject_get_duration(struct idle_inject_device *ii_dev, 165 + unsigned int *run_duration_ms, 166 + unsigned int *idle_duration_ms) 167 + { 168 + *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); 169 + *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); 170 + } 171 + 172 + /** 173 + * idle_inject_start - start idle injections 174 + * @ii_dev: idle injection control device structure 175 + * 176 + * The function starts idle injection by first waking up all of the idle 177 + * injection kthreads associated with @ii_dev to let them inject CPU idle time 178 + * sets up a timer to start the next idle injection period. 179 + * 180 + * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 181 + */ 182 + int idle_inject_start(struct idle_inject_device *ii_dev) 183 + { 184 + unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); 185 + unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); 186 + 187 + if (!idle_duration_ms || !run_duration_ms) 188 + return -EINVAL; 189 + 190 + pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 191 + cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 192 + 193 + idle_inject_wakeup(ii_dev); 194 + 195 + hrtimer_start(&ii_dev->timer, 196 + ms_to_ktime(idle_duration_ms + run_duration_ms), 197 + HRTIMER_MODE_REL); 198 + 199 + return 0; 200 + } 201 + 202 + /** 203 + * idle_inject_stop - stops idle injections 204 + * @ii_dev: idle injection control device structure 205 + * 206 + * The function stops idle injection and waits for the threads to finish work. 207 + * If CPU idle time is being injected when this function runs, then it will 208 + * wait until the end of the cycle. 209 + * 210 + * When it returns, there is no more idle injection kthread activity. The 211 + * kthreads are scheduled out and the periodic timer is off. 212 + */ 213 + void idle_inject_stop(struct idle_inject_device *ii_dev) 214 + { 215 + struct idle_inject_thread *iit; 216 + unsigned int cpu; 217 + 218 + pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 219 + cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 220 + 221 + hrtimer_cancel(&ii_dev->timer); 222 + 223 + /* 224 + * Stopping idle injection requires all of the idle injection kthreads 225 + * associated with the given cpumask to be parked and stay that way, so 226 + * prevent CPUs from going online at this point. Any CPUs going online 227 + * after the loop below will be covered by clearing the should_run flag 228 + * that will cause the smpboot main loop to schedule them out. 229 + */ 230 + cpu_hotplug_disable(); 231 + 232 + /* 233 + * Iterate over all (online + offline) CPUs here in case one of them 234 + * goes offline with the should_run flag set so as to prevent its idle 235 + * injection kthread from running when the CPU goes online again after 236 + * the ii_dev has been freed. 237 + */ 238 + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 239 + iit = per_cpu_ptr(&idle_inject_thread, cpu); 240 + iit->should_run = 0; 241 + 242 + wait_task_inactive(iit->tsk, 0); 243 + } 244 + 245 + cpu_hotplug_enable(); 246 + } 247 + 248 + /** 249 + * idle_inject_setup - prepare the current task for idle injection 250 + * @cpu: not used 251 + * 252 + * Called once, this function is in charge of setting the current task's 253 + * scheduler parameters to make it an RT task. 254 + */ 255 + static void idle_inject_setup(unsigned int cpu) 256 + { 257 + struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; 258 + 259 + sched_setscheduler(current, SCHED_FIFO, &param); 260 + } 261 + 262 + /** 263 + * idle_inject_should_run - function helper for the smpboot API 264 + * @cpu: CPU the kthread is running on 265 + * 266 + * Return: whether or not the thread can run. 267 + */ 268 + static int idle_inject_should_run(unsigned int cpu) 269 + { 270 + struct idle_inject_thread *iit = 271 + per_cpu_ptr(&idle_inject_thread, cpu); 272 + 273 + return iit->should_run; 274 + } 275 + 276 + /** 277 + * idle_inject_register - initialize idle injection on a set of CPUs 278 + * @cpumask: CPUs to be affected by idle injection 279 + * 280 + * This function creates an idle injection control device structure for the 281 + * given set of CPUs and initializes the timer associated with it. It does not 282 + * start any injection cycles. 283 + * 284 + * Return: NULL if memory allocation fails, idle injection control device 285 + * pointer on success. 286 + */ 287 + struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 288 + { 289 + struct idle_inject_device *ii_dev; 290 + int cpu, cpu_rb; 291 + 292 + ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 293 + if (!ii_dev) 294 + return NULL; 295 + 296 + cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 297 + hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 298 + ii_dev->timer.function = idle_inject_timer_fn; 299 + 300 + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 301 + 302 + if (per_cpu(idle_inject_device, cpu)) { 303 + pr_err("cpu%d is already registered\n", cpu); 304 + goto out_rollback; 305 + } 306 + 307 + per_cpu(idle_inject_device, cpu) = ii_dev; 308 + } 309 + 310 + return ii_dev; 311 + 312 + out_rollback: 313 + for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 314 + if (cpu == cpu_rb) 315 + break; 316 + per_cpu(idle_inject_device, cpu_rb) = NULL; 317 + } 318 + 319 + kfree(ii_dev); 320 + 321 + return NULL; 322 + } 323 + 324 + /** 325 + * idle_inject_unregister - unregister idle injection control device 326 + * @ii_dev: idle injection control device to unregister 327 + * 328 + * The function stops idle injection for the given control device, 329 + * unregisters its kthreads and frees memory allocated when that device was 330 + * created. 331 + */ 332 + void idle_inject_unregister(struct idle_inject_device *ii_dev) 333 + { 334 + unsigned int cpu; 335 + 336 + idle_inject_stop(ii_dev); 337 + 338 + for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 339 + per_cpu(idle_inject_device, cpu) = NULL; 340 + 341 + kfree(ii_dev); 342 + } 343 + 344 + static struct smp_hotplug_thread idle_inject_threads = { 345 + .store = &idle_inject_thread.tsk, 346 + .setup = idle_inject_setup, 347 + .thread_fn = idle_inject_fn, 348 + .thread_comm = "idle_inject/%u", 349 + .thread_should_run = idle_inject_should_run, 350 + }; 351 + 352 + static int __init idle_inject_init(void) 353 + { 354 + return smpboot_register_percpu_thread(&idle_inject_threads); 355 + } 356 + early_initcall(idle_inject_init);
+29
include/linux/idle_inject.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2018 Linaro Ltd 4 + * 5 + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 + * 7 + */ 8 + #ifndef __IDLE_INJECT_H__ 9 + #define __IDLE_INJECT_H__ 10 + 11 + /* private idle injection device structure */ 12 + struct idle_inject_device; 13 + 14 + struct idle_inject_device *idle_inject_register(struct cpumask *cpumask); 15 + 16 + void idle_inject_unregister(struct idle_inject_device *ii_dev); 17 + 18 + int idle_inject_start(struct idle_inject_device *ii_dev); 19 + 20 + void idle_inject_stop(struct idle_inject_device *ii_dev); 21 + 22 + void idle_inject_set_duration(struct idle_inject_device *ii_dev, 23 + unsigned int run_duration_ms, 24 + unsigned int idle_duration_ms); 25 + 26 + void idle_inject_get_duration(struct idle_inject_device *ii_dev, 27 + unsigned int *run_duration_ms, 28 + unsigned int *idle_duration_ms); 29 + #endif /* __IDLE_INJECT_H__ */