Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * intel_idle.c - native hardware idle loop for modern Intel processors
4 *
5 * Copyright (c) 2013 - 2020, Intel Corporation.
6 * Len Brown <len.brown@intel.com>
7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8 */
9
10/*
11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12 * in lieu of the legacy ACPI processor_idle driver. The intent is to
13 * make Linux more efficient on these processors, as intel_idle knows
14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15 */
16
17/*
18 * Design Assumptions
19 *
20 * All CPUs have same idle states as boot CPU
21 *
22 * Chipset BM_STS (bus master status) bit is a NOP
23 * for preventing entry into deep C-states
24 *
25 * CPU will flush caches as needed when entering a C-state via MWAIT
26 * (in contrast to entering ACPI C3, in which case the WBINVD
27 * instruction needs to be executed to flush the caches)
28 */
29
30/*
31 * Known limitations
32 *
33 * ACPI has a .suspend hack to turn off deep c-statees during suspend
34 * to avoid complications with the lapic timer workaround.
35 * Have not seen issues with suspend, but may need same workaround here.
36 *
37 */
38
39/* un-comment DEBUG to enable pr_debug() statements */
40/* #define DEBUG */
41
42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44#include <linux/acpi.h>
45#include <linux/kernel.h>
46#include <linux/cpuidle.h>
47#include <linux/tick.h>
48#include <trace/events/power.h>
49#include <linux/sched.h>
50#include <linux/sched/smt.h>
51#include <linux/notifier.h>
52#include <linux/cpu.h>
53#include <linux/moduleparam.h>
54#include <asm/cpu_device_id.h>
55#include <asm/intel-family.h>
56#include <asm/nospec-branch.h>
57#include <asm/mwait.h>
58#include <asm/msr.h>
59#include <asm/fpu/api.h>
60
61#define INTEL_IDLE_VERSION "0.5.1"
62
63static struct cpuidle_driver intel_idle_driver = {
64 .name = "intel_idle",
65 .owner = THIS_MODULE,
66};
67/* intel_idle.max_cstate=0 disables driver */
68static int max_cstate = CPUIDLE_STATE_MAX - 1;
69static unsigned int disabled_states_mask;
70static unsigned int preferred_states_mask;
71
72static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
73
74static unsigned long auto_demotion_disable_flags;
75
76static enum {
77 C1E_PROMOTION_PRESERVE,
78 C1E_PROMOTION_ENABLE,
79 C1E_PROMOTION_DISABLE
80} c1e_promotion = C1E_PROMOTION_PRESERVE;
81
82struct idle_cpu {
83 struct cpuidle_state *state_table;
84
85 /*
86 * Hardware C-state auto-demotion may not always be optimal.
87 * Indicate which enable bits to clear here.
88 */
89 unsigned long auto_demotion_disable_flags;
90 bool byt_auto_demotion_disable_flag;
91 bool disable_promotion_to_c1e;
92 bool use_acpi;
93};
94
95static const struct idle_cpu *icpu __initdata;
96static struct cpuidle_state *cpuidle_state_table __initdata;
97
98static unsigned int mwait_substates __initdata;
99
100/*
101 * Enable interrupts before entering the C-state. On some platforms and for
102 * some C-states, this may measurably decrease interrupt latency.
103 */
104#define CPUIDLE_FLAG_IRQ_ENABLE BIT(14)
105
106/*
107 * Enable this state by default even if the ACPI _CST does not list it.
108 */
109#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
110
111/*
112 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
113 * above.
114 */
115#define CPUIDLE_FLAG_IBRS BIT(16)
116
117/*
118 * Initialize large xstate for the C6-state entrance.
119 */
120#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
121
122/*
123 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
124 * the C-state (top nibble) and sub-state (bottom nibble)
125 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
126 *
127 * We store the hint at the top of our "flags" for each state.
128 */
129#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
130#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
131
132static __always_inline int __intel_idle(struct cpuidle_device *dev,
133 struct cpuidle_driver *drv, int index)
134{
135 struct cpuidle_state *state = &drv->states[index];
136 unsigned long eax = flg2MWAIT(state->flags);
137 unsigned long ecx = 1; /* break on interrupt flag */
138
139 mwait_idle_with_hints(eax, ecx);
140
141 return index;
142}
143
144/**
145 * intel_idle - Ask the processor to enter the given idle state.
146 * @dev: cpuidle device of the target CPU.
147 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
148 * @index: Target idle state index.
149 *
150 * Use the MWAIT instruction to notify the processor that the CPU represented by
151 * @dev is idle and it can try to enter the idle state corresponding to @index.
152 *
153 * If the local APIC timer is not known to be reliable in the target idle state,
154 * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
155 *
156 * Must be called under local_irq_disable().
157 */
158static __cpuidle int intel_idle(struct cpuidle_device *dev,
159 struct cpuidle_driver *drv, int index)
160{
161 return __intel_idle(dev, drv, index);
162}
163
164static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
165 struct cpuidle_driver *drv, int index)
166{
167 int ret;
168
169 raw_local_irq_enable();
170 ret = __intel_idle(dev, drv, index);
171 raw_local_irq_disable();
172
173 return ret;
174}
175
176static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
177 struct cpuidle_driver *drv, int index)
178{
179 bool smt_active = sched_smt_active();
180 u64 spec_ctrl = spec_ctrl_current();
181 int ret;
182
183 if (smt_active)
184 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
185
186 ret = __intel_idle(dev, drv, index);
187
188 if (smt_active)
189 native_wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
190
191 return ret;
192}
193
194static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
195 struct cpuidle_driver *drv, int index)
196{
197 fpu_idle_fpregs();
198 return __intel_idle(dev, drv, index);
199}
200
201/**
202 * intel_idle_s2idle - Ask the processor to enter the given idle state.
203 * @dev: cpuidle device of the target CPU.
204 * @drv: cpuidle driver (assumed to point to intel_idle_driver).
205 * @index: Target idle state index.
206 *
207 * Use the MWAIT instruction to notify the processor that the CPU represented by
208 * @dev is idle and it can try to enter the idle state corresponding to @index.
209 *
210 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
211 * scheduler tick and suspended scheduler clock on the target CPU.
212 */
213static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
214 struct cpuidle_driver *drv, int index)
215{
216 unsigned long ecx = 1; /* break on interrupt flag */
217 struct cpuidle_state *state = &drv->states[index];
218 unsigned long eax = flg2MWAIT(state->flags);
219
220 if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
221 fpu_idle_fpregs();
222
223 mwait_idle_with_hints(eax, ecx);
224
225 return 0;
226}
227
228/*
229 * States are indexed by the cstate number,
230 * which is also the index into the MWAIT hint array.
231 * Thus C0 is a dummy.
232 */
233static struct cpuidle_state nehalem_cstates[] __initdata = {
234 {
235 .name = "C1",
236 .desc = "MWAIT 0x00",
237 .flags = MWAIT2flg(0x00),
238 .exit_latency = 3,
239 .target_residency = 6,
240 .enter = &intel_idle,
241 .enter_s2idle = intel_idle_s2idle, },
242 {
243 .name = "C1E",
244 .desc = "MWAIT 0x01",
245 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
246 .exit_latency = 10,
247 .target_residency = 20,
248 .enter = &intel_idle,
249 .enter_s2idle = intel_idle_s2idle, },
250 {
251 .name = "C3",
252 .desc = "MWAIT 0x10",
253 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
254 .exit_latency = 20,
255 .target_residency = 80,
256 .enter = &intel_idle,
257 .enter_s2idle = intel_idle_s2idle, },
258 {
259 .name = "C6",
260 .desc = "MWAIT 0x20",
261 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
262 .exit_latency = 200,
263 .target_residency = 800,
264 .enter = &intel_idle,
265 .enter_s2idle = intel_idle_s2idle, },
266 {
267 .enter = NULL }
268};
269
270static struct cpuidle_state snb_cstates[] __initdata = {
271 {
272 .name = "C1",
273 .desc = "MWAIT 0x00",
274 .flags = MWAIT2flg(0x00),
275 .exit_latency = 2,
276 .target_residency = 2,
277 .enter = &intel_idle,
278 .enter_s2idle = intel_idle_s2idle, },
279 {
280 .name = "C1E",
281 .desc = "MWAIT 0x01",
282 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
283 .exit_latency = 10,
284 .target_residency = 20,
285 .enter = &intel_idle,
286 .enter_s2idle = intel_idle_s2idle, },
287 {
288 .name = "C3",
289 .desc = "MWAIT 0x10",
290 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
291 .exit_latency = 80,
292 .target_residency = 211,
293 .enter = &intel_idle,
294 .enter_s2idle = intel_idle_s2idle, },
295 {
296 .name = "C6",
297 .desc = "MWAIT 0x20",
298 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
299 .exit_latency = 104,
300 .target_residency = 345,
301 .enter = &intel_idle,
302 .enter_s2idle = intel_idle_s2idle, },
303 {
304 .name = "C7",
305 .desc = "MWAIT 0x30",
306 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
307 .exit_latency = 109,
308 .target_residency = 345,
309 .enter = &intel_idle,
310 .enter_s2idle = intel_idle_s2idle, },
311 {
312 .enter = NULL }
313};
314
315static struct cpuidle_state byt_cstates[] __initdata = {
316 {
317 .name = "C1",
318 .desc = "MWAIT 0x00",
319 .flags = MWAIT2flg(0x00),
320 .exit_latency = 1,
321 .target_residency = 1,
322 .enter = &intel_idle,
323 .enter_s2idle = intel_idle_s2idle, },
324 {
325 .name = "C6N",
326 .desc = "MWAIT 0x58",
327 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
328 .exit_latency = 300,
329 .target_residency = 275,
330 .enter = &intel_idle,
331 .enter_s2idle = intel_idle_s2idle, },
332 {
333 .name = "C6S",
334 .desc = "MWAIT 0x52",
335 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
336 .exit_latency = 500,
337 .target_residency = 560,
338 .enter = &intel_idle,
339 .enter_s2idle = intel_idle_s2idle, },
340 {
341 .name = "C7",
342 .desc = "MWAIT 0x60",
343 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
344 .exit_latency = 1200,
345 .target_residency = 4000,
346 .enter = &intel_idle,
347 .enter_s2idle = intel_idle_s2idle, },
348 {
349 .name = "C7S",
350 .desc = "MWAIT 0x64",
351 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
352 .exit_latency = 10000,
353 .target_residency = 20000,
354 .enter = &intel_idle,
355 .enter_s2idle = intel_idle_s2idle, },
356 {
357 .enter = NULL }
358};
359
360static struct cpuidle_state cht_cstates[] __initdata = {
361 {
362 .name = "C1",
363 .desc = "MWAIT 0x00",
364 .flags = MWAIT2flg(0x00),
365 .exit_latency = 1,
366 .target_residency = 1,
367 .enter = &intel_idle,
368 .enter_s2idle = intel_idle_s2idle, },
369 {
370 .name = "C6N",
371 .desc = "MWAIT 0x58",
372 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
373 .exit_latency = 80,
374 .target_residency = 275,
375 .enter = &intel_idle,
376 .enter_s2idle = intel_idle_s2idle, },
377 {
378 .name = "C6S",
379 .desc = "MWAIT 0x52",
380 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
381 .exit_latency = 200,
382 .target_residency = 560,
383 .enter = &intel_idle,
384 .enter_s2idle = intel_idle_s2idle, },
385 {
386 .name = "C7",
387 .desc = "MWAIT 0x60",
388 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
389 .exit_latency = 1200,
390 .target_residency = 4000,
391 .enter = &intel_idle,
392 .enter_s2idle = intel_idle_s2idle, },
393 {
394 .name = "C7S",
395 .desc = "MWAIT 0x64",
396 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
397 .exit_latency = 10000,
398 .target_residency = 20000,
399 .enter = &intel_idle,
400 .enter_s2idle = intel_idle_s2idle, },
401 {
402 .enter = NULL }
403};
404
405static struct cpuidle_state ivb_cstates[] __initdata = {
406 {
407 .name = "C1",
408 .desc = "MWAIT 0x00",
409 .flags = MWAIT2flg(0x00),
410 .exit_latency = 1,
411 .target_residency = 1,
412 .enter = &intel_idle,
413 .enter_s2idle = intel_idle_s2idle, },
414 {
415 .name = "C1E",
416 .desc = "MWAIT 0x01",
417 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
418 .exit_latency = 10,
419 .target_residency = 20,
420 .enter = &intel_idle,
421 .enter_s2idle = intel_idle_s2idle, },
422 {
423 .name = "C3",
424 .desc = "MWAIT 0x10",
425 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
426 .exit_latency = 59,
427 .target_residency = 156,
428 .enter = &intel_idle,
429 .enter_s2idle = intel_idle_s2idle, },
430 {
431 .name = "C6",
432 .desc = "MWAIT 0x20",
433 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
434 .exit_latency = 80,
435 .target_residency = 300,
436 .enter = &intel_idle,
437 .enter_s2idle = intel_idle_s2idle, },
438 {
439 .name = "C7",
440 .desc = "MWAIT 0x30",
441 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
442 .exit_latency = 87,
443 .target_residency = 300,
444 .enter = &intel_idle,
445 .enter_s2idle = intel_idle_s2idle, },
446 {
447 .enter = NULL }
448};
449
450static struct cpuidle_state ivt_cstates[] __initdata = {
451 {
452 .name = "C1",
453 .desc = "MWAIT 0x00",
454 .flags = MWAIT2flg(0x00),
455 .exit_latency = 1,
456 .target_residency = 1,
457 .enter = &intel_idle,
458 .enter_s2idle = intel_idle_s2idle, },
459 {
460 .name = "C1E",
461 .desc = "MWAIT 0x01",
462 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
463 .exit_latency = 10,
464 .target_residency = 80,
465 .enter = &intel_idle,
466 .enter_s2idle = intel_idle_s2idle, },
467 {
468 .name = "C3",
469 .desc = "MWAIT 0x10",
470 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
471 .exit_latency = 59,
472 .target_residency = 156,
473 .enter = &intel_idle,
474 .enter_s2idle = intel_idle_s2idle, },
475 {
476 .name = "C6",
477 .desc = "MWAIT 0x20",
478 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
479 .exit_latency = 82,
480 .target_residency = 300,
481 .enter = &intel_idle,
482 .enter_s2idle = intel_idle_s2idle, },
483 {
484 .enter = NULL }
485};
486
487static struct cpuidle_state ivt_cstates_4s[] __initdata = {
488 {
489 .name = "C1",
490 .desc = "MWAIT 0x00",
491 .flags = MWAIT2flg(0x00),
492 .exit_latency = 1,
493 .target_residency = 1,
494 .enter = &intel_idle,
495 .enter_s2idle = intel_idle_s2idle, },
496 {
497 .name = "C1E",
498 .desc = "MWAIT 0x01",
499 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
500 .exit_latency = 10,
501 .target_residency = 250,
502 .enter = &intel_idle,
503 .enter_s2idle = intel_idle_s2idle, },
504 {
505 .name = "C3",
506 .desc = "MWAIT 0x10",
507 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
508 .exit_latency = 59,
509 .target_residency = 300,
510 .enter = &intel_idle,
511 .enter_s2idle = intel_idle_s2idle, },
512 {
513 .name = "C6",
514 .desc = "MWAIT 0x20",
515 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
516 .exit_latency = 84,
517 .target_residency = 400,
518 .enter = &intel_idle,
519 .enter_s2idle = intel_idle_s2idle, },
520 {
521 .enter = NULL }
522};
523
524static struct cpuidle_state ivt_cstates_8s[] __initdata = {
525 {
526 .name = "C1",
527 .desc = "MWAIT 0x00",
528 .flags = MWAIT2flg(0x00),
529 .exit_latency = 1,
530 .target_residency = 1,
531 .enter = &intel_idle,
532 .enter_s2idle = intel_idle_s2idle, },
533 {
534 .name = "C1E",
535 .desc = "MWAIT 0x01",
536 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
537 .exit_latency = 10,
538 .target_residency = 500,
539 .enter = &intel_idle,
540 .enter_s2idle = intel_idle_s2idle, },
541 {
542 .name = "C3",
543 .desc = "MWAIT 0x10",
544 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
545 .exit_latency = 59,
546 .target_residency = 600,
547 .enter = &intel_idle,
548 .enter_s2idle = intel_idle_s2idle, },
549 {
550 .name = "C6",
551 .desc = "MWAIT 0x20",
552 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
553 .exit_latency = 88,
554 .target_residency = 700,
555 .enter = &intel_idle,
556 .enter_s2idle = intel_idle_s2idle, },
557 {
558 .enter = NULL }
559};
560
561static struct cpuidle_state hsw_cstates[] __initdata = {
562 {
563 .name = "C1",
564 .desc = "MWAIT 0x00",
565 .flags = MWAIT2flg(0x00),
566 .exit_latency = 2,
567 .target_residency = 2,
568 .enter = &intel_idle,
569 .enter_s2idle = intel_idle_s2idle, },
570 {
571 .name = "C1E",
572 .desc = "MWAIT 0x01",
573 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
574 .exit_latency = 10,
575 .target_residency = 20,
576 .enter = &intel_idle,
577 .enter_s2idle = intel_idle_s2idle, },
578 {
579 .name = "C3",
580 .desc = "MWAIT 0x10",
581 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
582 .exit_latency = 33,
583 .target_residency = 100,
584 .enter = &intel_idle,
585 .enter_s2idle = intel_idle_s2idle, },
586 {
587 .name = "C6",
588 .desc = "MWAIT 0x20",
589 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
590 .exit_latency = 133,
591 .target_residency = 400,
592 .enter = &intel_idle,
593 .enter_s2idle = intel_idle_s2idle, },
594 {
595 .name = "C7s",
596 .desc = "MWAIT 0x32",
597 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
598 .exit_latency = 166,
599 .target_residency = 500,
600 .enter = &intel_idle,
601 .enter_s2idle = intel_idle_s2idle, },
602 {
603 .name = "C8",
604 .desc = "MWAIT 0x40",
605 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
606 .exit_latency = 300,
607 .target_residency = 900,
608 .enter = &intel_idle,
609 .enter_s2idle = intel_idle_s2idle, },
610 {
611 .name = "C9",
612 .desc = "MWAIT 0x50",
613 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
614 .exit_latency = 600,
615 .target_residency = 1800,
616 .enter = &intel_idle,
617 .enter_s2idle = intel_idle_s2idle, },
618 {
619 .name = "C10",
620 .desc = "MWAIT 0x60",
621 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
622 .exit_latency = 2600,
623 .target_residency = 7700,
624 .enter = &intel_idle,
625 .enter_s2idle = intel_idle_s2idle, },
626 {
627 .enter = NULL }
628};
629static struct cpuidle_state bdw_cstates[] __initdata = {
630 {
631 .name = "C1",
632 .desc = "MWAIT 0x00",
633 .flags = MWAIT2flg(0x00),
634 .exit_latency = 2,
635 .target_residency = 2,
636 .enter = &intel_idle,
637 .enter_s2idle = intel_idle_s2idle, },
638 {
639 .name = "C1E",
640 .desc = "MWAIT 0x01",
641 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
642 .exit_latency = 10,
643 .target_residency = 20,
644 .enter = &intel_idle,
645 .enter_s2idle = intel_idle_s2idle, },
646 {
647 .name = "C3",
648 .desc = "MWAIT 0x10",
649 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
650 .exit_latency = 40,
651 .target_residency = 100,
652 .enter = &intel_idle,
653 .enter_s2idle = intel_idle_s2idle, },
654 {
655 .name = "C6",
656 .desc = "MWAIT 0x20",
657 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
658 .exit_latency = 133,
659 .target_residency = 400,
660 .enter = &intel_idle,
661 .enter_s2idle = intel_idle_s2idle, },
662 {
663 .name = "C7s",
664 .desc = "MWAIT 0x32",
665 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
666 .exit_latency = 166,
667 .target_residency = 500,
668 .enter = &intel_idle,
669 .enter_s2idle = intel_idle_s2idle, },
670 {
671 .name = "C8",
672 .desc = "MWAIT 0x40",
673 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
674 .exit_latency = 300,
675 .target_residency = 900,
676 .enter = &intel_idle,
677 .enter_s2idle = intel_idle_s2idle, },
678 {
679 .name = "C9",
680 .desc = "MWAIT 0x50",
681 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
682 .exit_latency = 600,
683 .target_residency = 1800,
684 .enter = &intel_idle,
685 .enter_s2idle = intel_idle_s2idle, },
686 {
687 .name = "C10",
688 .desc = "MWAIT 0x60",
689 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
690 .exit_latency = 2600,
691 .target_residency = 7700,
692 .enter = &intel_idle,
693 .enter_s2idle = intel_idle_s2idle, },
694 {
695 .enter = NULL }
696};
697
698static struct cpuidle_state skl_cstates[] __initdata = {
699 {
700 .name = "C1",
701 .desc = "MWAIT 0x00",
702 .flags = MWAIT2flg(0x00),
703 .exit_latency = 2,
704 .target_residency = 2,
705 .enter = &intel_idle,
706 .enter_s2idle = intel_idle_s2idle, },
707 {
708 .name = "C1E",
709 .desc = "MWAIT 0x01",
710 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
711 .exit_latency = 10,
712 .target_residency = 20,
713 .enter = &intel_idle,
714 .enter_s2idle = intel_idle_s2idle, },
715 {
716 .name = "C3",
717 .desc = "MWAIT 0x10",
718 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
719 .exit_latency = 70,
720 .target_residency = 100,
721 .enter = &intel_idle,
722 .enter_s2idle = intel_idle_s2idle, },
723 {
724 .name = "C6",
725 .desc = "MWAIT 0x20",
726 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
727 .exit_latency = 85,
728 .target_residency = 200,
729 .enter = &intel_idle,
730 .enter_s2idle = intel_idle_s2idle, },
731 {
732 .name = "C7s",
733 .desc = "MWAIT 0x33",
734 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
735 .exit_latency = 124,
736 .target_residency = 800,
737 .enter = &intel_idle,
738 .enter_s2idle = intel_idle_s2idle, },
739 {
740 .name = "C8",
741 .desc = "MWAIT 0x40",
742 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
743 .exit_latency = 200,
744 .target_residency = 800,
745 .enter = &intel_idle,
746 .enter_s2idle = intel_idle_s2idle, },
747 {
748 .name = "C9",
749 .desc = "MWAIT 0x50",
750 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
751 .exit_latency = 480,
752 .target_residency = 5000,
753 .enter = &intel_idle,
754 .enter_s2idle = intel_idle_s2idle, },
755 {
756 .name = "C10",
757 .desc = "MWAIT 0x60",
758 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
759 .exit_latency = 890,
760 .target_residency = 5000,
761 .enter = &intel_idle,
762 .enter_s2idle = intel_idle_s2idle, },
763 {
764 .enter = NULL }
765};
766
767static struct cpuidle_state skx_cstates[] __initdata = {
768 {
769 .name = "C1",
770 .desc = "MWAIT 0x00",
771 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
772 .exit_latency = 2,
773 .target_residency = 2,
774 .enter = &intel_idle,
775 .enter_s2idle = intel_idle_s2idle, },
776 {
777 .name = "C1E",
778 .desc = "MWAIT 0x01",
779 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
780 .exit_latency = 10,
781 .target_residency = 20,
782 .enter = &intel_idle,
783 .enter_s2idle = intel_idle_s2idle, },
784 {
785 .name = "C6",
786 .desc = "MWAIT 0x20",
787 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
788 .exit_latency = 133,
789 .target_residency = 600,
790 .enter = &intel_idle,
791 .enter_s2idle = intel_idle_s2idle, },
792 {
793 .enter = NULL }
794};
795
796static struct cpuidle_state icx_cstates[] __initdata = {
797 {
798 .name = "C1",
799 .desc = "MWAIT 0x00",
800 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
801 .exit_latency = 1,
802 .target_residency = 1,
803 .enter = &intel_idle,
804 .enter_s2idle = intel_idle_s2idle, },
805 {
806 .name = "C1E",
807 .desc = "MWAIT 0x01",
808 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
809 .exit_latency = 4,
810 .target_residency = 4,
811 .enter = &intel_idle,
812 .enter_s2idle = intel_idle_s2idle, },
813 {
814 .name = "C6",
815 .desc = "MWAIT 0x20",
816 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
817 .exit_latency = 170,
818 .target_residency = 600,
819 .enter = &intel_idle,
820 .enter_s2idle = intel_idle_s2idle, },
821 {
822 .enter = NULL }
823};
824
825/*
826 * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
827 * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
828 * But in this case there is effectively no C1, because C1 requests are
829 * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
830 * and C1E requests end up with C1, so there is effectively no C1E.
831 *
832 * By default we enable C1E and disable C1 by marking it with
833 * 'CPUIDLE_FLAG_UNUSABLE'.
834 */
835static struct cpuidle_state adl_cstates[] __initdata = {
836 {
837 .name = "C1",
838 .desc = "MWAIT 0x00",
839 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
840 .exit_latency = 1,
841 .target_residency = 1,
842 .enter = &intel_idle,
843 .enter_s2idle = intel_idle_s2idle, },
844 {
845 .name = "C1E",
846 .desc = "MWAIT 0x01",
847 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
848 .exit_latency = 2,
849 .target_residency = 4,
850 .enter = &intel_idle,
851 .enter_s2idle = intel_idle_s2idle, },
852 {
853 .name = "C6",
854 .desc = "MWAIT 0x20",
855 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
856 .exit_latency = 220,
857 .target_residency = 600,
858 .enter = &intel_idle,
859 .enter_s2idle = intel_idle_s2idle, },
860 {
861 .name = "C8",
862 .desc = "MWAIT 0x40",
863 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
864 .exit_latency = 280,
865 .target_residency = 800,
866 .enter = &intel_idle,
867 .enter_s2idle = intel_idle_s2idle, },
868 {
869 .name = "C10",
870 .desc = "MWAIT 0x60",
871 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
872 .exit_latency = 680,
873 .target_residency = 2000,
874 .enter = &intel_idle,
875 .enter_s2idle = intel_idle_s2idle, },
876 {
877 .enter = NULL }
878};
879
880static struct cpuidle_state adl_l_cstates[] __initdata = {
881 {
882 .name = "C1",
883 .desc = "MWAIT 0x00",
884 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
885 .exit_latency = 1,
886 .target_residency = 1,
887 .enter = &intel_idle,
888 .enter_s2idle = intel_idle_s2idle, },
889 {
890 .name = "C1E",
891 .desc = "MWAIT 0x01",
892 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
893 .exit_latency = 2,
894 .target_residency = 4,
895 .enter = &intel_idle,
896 .enter_s2idle = intel_idle_s2idle, },
897 {
898 .name = "C6",
899 .desc = "MWAIT 0x20",
900 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
901 .exit_latency = 170,
902 .target_residency = 500,
903 .enter = &intel_idle,
904 .enter_s2idle = intel_idle_s2idle, },
905 {
906 .name = "C8",
907 .desc = "MWAIT 0x40",
908 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
909 .exit_latency = 200,
910 .target_residency = 600,
911 .enter = &intel_idle,
912 .enter_s2idle = intel_idle_s2idle, },
913 {
914 .name = "C10",
915 .desc = "MWAIT 0x60",
916 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
917 .exit_latency = 230,
918 .target_residency = 700,
919 .enter = &intel_idle,
920 .enter_s2idle = intel_idle_s2idle, },
921 {
922 .enter = NULL }
923};
924
925static struct cpuidle_state adl_n_cstates[] __initdata = {
926 {
927 .name = "C1",
928 .desc = "MWAIT 0x00",
929 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
930 .exit_latency = 1,
931 .target_residency = 1,
932 .enter = &intel_idle,
933 .enter_s2idle = intel_idle_s2idle, },
934 {
935 .name = "C1E",
936 .desc = "MWAIT 0x01",
937 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
938 .exit_latency = 2,
939 .target_residency = 4,
940 .enter = &intel_idle,
941 .enter_s2idle = intel_idle_s2idle, },
942 {
943 .name = "C6",
944 .desc = "MWAIT 0x20",
945 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
946 .exit_latency = 195,
947 .target_residency = 585,
948 .enter = &intel_idle,
949 .enter_s2idle = intel_idle_s2idle, },
950 {
951 .name = "C8",
952 .desc = "MWAIT 0x40",
953 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
954 .exit_latency = 260,
955 .target_residency = 1040,
956 .enter = &intel_idle,
957 .enter_s2idle = intel_idle_s2idle, },
958 {
959 .name = "C10",
960 .desc = "MWAIT 0x60",
961 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
962 .exit_latency = 660,
963 .target_residency = 1980,
964 .enter = &intel_idle,
965 .enter_s2idle = intel_idle_s2idle, },
966 {
967 .enter = NULL }
968};
969
970static struct cpuidle_state spr_cstates[] __initdata = {
971 {
972 .name = "C1",
973 .desc = "MWAIT 0x00",
974 .flags = MWAIT2flg(0x00),
975 .exit_latency = 1,
976 .target_residency = 1,
977 .enter = &intel_idle,
978 .enter_s2idle = intel_idle_s2idle, },
979 {
980 .name = "C1E",
981 .desc = "MWAIT 0x01",
982 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
983 .exit_latency = 2,
984 .target_residency = 4,
985 .enter = &intel_idle,
986 .enter_s2idle = intel_idle_s2idle, },
987 {
988 .name = "C6",
989 .desc = "MWAIT 0x20",
990 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
991 CPUIDLE_FLAG_INIT_XSTATE,
992 .exit_latency = 290,
993 .target_residency = 800,
994 .enter = &intel_idle,
995 .enter_s2idle = intel_idle_s2idle, },
996 {
997 .enter = NULL }
998};
999
1000static struct cpuidle_state atom_cstates[] __initdata = {
1001 {
1002 .name = "C1E",
1003 .desc = "MWAIT 0x00",
1004 .flags = MWAIT2flg(0x00),
1005 .exit_latency = 10,
1006 .target_residency = 20,
1007 .enter = &intel_idle,
1008 .enter_s2idle = intel_idle_s2idle, },
1009 {
1010 .name = "C2",
1011 .desc = "MWAIT 0x10",
1012 .flags = MWAIT2flg(0x10),
1013 .exit_latency = 20,
1014 .target_residency = 80,
1015 .enter = &intel_idle,
1016 .enter_s2idle = intel_idle_s2idle, },
1017 {
1018 .name = "C4",
1019 .desc = "MWAIT 0x30",
1020 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1021 .exit_latency = 100,
1022 .target_residency = 400,
1023 .enter = &intel_idle,
1024 .enter_s2idle = intel_idle_s2idle, },
1025 {
1026 .name = "C6",
1027 .desc = "MWAIT 0x52",
1028 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1029 .exit_latency = 140,
1030 .target_residency = 560,
1031 .enter = &intel_idle,
1032 .enter_s2idle = intel_idle_s2idle, },
1033 {
1034 .enter = NULL }
1035};
1036static struct cpuidle_state tangier_cstates[] __initdata = {
1037 {
1038 .name = "C1",
1039 .desc = "MWAIT 0x00",
1040 .flags = MWAIT2flg(0x00),
1041 .exit_latency = 1,
1042 .target_residency = 4,
1043 .enter = &intel_idle,
1044 .enter_s2idle = intel_idle_s2idle, },
1045 {
1046 .name = "C4",
1047 .desc = "MWAIT 0x30",
1048 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1049 .exit_latency = 100,
1050 .target_residency = 400,
1051 .enter = &intel_idle,
1052 .enter_s2idle = intel_idle_s2idle, },
1053 {
1054 .name = "C6",
1055 .desc = "MWAIT 0x52",
1056 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1057 .exit_latency = 140,
1058 .target_residency = 560,
1059 .enter = &intel_idle,
1060 .enter_s2idle = intel_idle_s2idle, },
1061 {
1062 .name = "C7",
1063 .desc = "MWAIT 0x60",
1064 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1065 .exit_latency = 1200,
1066 .target_residency = 4000,
1067 .enter = &intel_idle,
1068 .enter_s2idle = intel_idle_s2idle, },
1069 {
1070 .name = "C9",
1071 .desc = "MWAIT 0x64",
1072 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1073 .exit_latency = 10000,
1074 .target_residency = 20000,
1075 .enter = &intel_idle,
1076 .enter_s2idle = intel_idle_s2idle, },
1077 {
1078 .enter = NULL }
1079};
1080static struct cpuidle_state avn_cstates[] __initdata = {
1081 {
1082 .name = "C1",
1083 .desc = "MWAIT 0x00",
1084 .flags = MWAIT2flg(0x00),
1085 .exit_latency = 2,
1086 .target_residency = 2,
1087 .enter = &intel_idle,
1088 .enter_s2idle = intel_idle_s2idle, },
1089 {
1090 .name = "C6",
1091 .desc = "MWAIT 0x51",
1092 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1093 .exit_latency = 15,
1094 .target_residency = 45,
1095 .enter = &intel_idle,
1096 .enter_s2idle = intel_idle_s2idle, },
1097 {
1098 .enter = NULL }
1099};
1100static struct cpuidle_state knl_cstates[] __initdata = {
1101 {
1102 .name = "C1",
1103 .desc = "MWAIT 0x00",
1104 .flags = MWAIT2flg(0x00),
1105 .exit_latency = 1,
1106 .target_residency = 2,
1107 .enter = &intel_idle,
1108 .enter_s2idle = intel_idle_s2idle },
1109 {
1110 .name = "C6",
1111 .desc = "MWAIT 0x10",
1112 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1113 .exit_latency = 120,
1114 .target_residency = 500,
1115 .enter = &intel_idle,
1116 .enter_s2idle = intel_idle_s2idle },
1117 {
1118 .enter = NULL }
1119};
1120
1121static struct cpuidle_state bxt_cstates[] __initdata = {
1122 {
1123 .name = "C1",
1124 .desc = "MWAIT 0x00",
1125 .flags = MWAIT2flg(0x00),
1126 .exit_latency = 2,
1127 .target_residency = 2,
1128 .enter = &intel_idle,
1129 .enter_s2idle = intel_idle_s2idle, },
1130 {
1131 .name = "C1E",
1132 .desc = "MWAIT 0x01",
1133 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1134 .exit_latency = 10,
1135 .target_residency = 20,
1136 .enter = &intel_idle,
1137 .enter_s2idle = intel_idle_s2idle, },
1138 {
1139 .name = "C6",
1140 .desc = "MWAIT 0x20",
1141 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1142 .exit_latency = 133,
1143 .target_residency = 133,
1144 .enter = &intel_idle,
1145 .enter_s2idle = intel_idle_s2idle, },
1146 {
1147 .name = "C7s",
1148 .desc = "MWAIT 0x31",
1149 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1150 .exit_latency = 155,
1151 .target_residency = 155,
1152 .enter = &intel_idle,
1153 .enter_s2idle = intel_idle_s2idle, },
1154 {
1155 .name = "C8",
1156 .desc = "MWAIT 0x40",
1157 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1158 .exit_latency = 1000,
1159 .target_residency = 1000,
1160 .enter = &intel_idle,
1161 .enter_s2idle = intel_idle_s2idle, },
1162 {
1163 .name = "C9",
1164 .desc = "MWAIT 0x50",
1165 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1166 .exit_latency = 2000,
1167 .target_residency = 2000,
1168 .enter = &intel_idle,
1169 .enter_s2idle = intel_idle_s2idle, },
1170 {
1171 .name = "C10",
1172 .desc = "MWAIT 0x60",
1173 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1174 .exit_latency = 10000,
1175 .target_residency = 10000,
1176 .enter = &intel_idle,
1177 .enter_s2idle = intel_idle_s2idle, },
1178 {
1179 .enter = NULL }
1180};
1181
1182static struct cpuidle_state dnv_cstates[] __initdata = {
1183 {
1184 .name = "C1",
1185 .desc = "MWAIT 0x00",
1186 .flags = MWAIT2flg(0x00),
1187 .exit_latency = 2,
1188 .target_residency = 2,
1189 .enter = &intel_idle,
1190 .enter_s2idle = intel_idle_s2idle, },
1191 {
1192 .name = "C1E",
1193 .desc = "MWAIT 0x01",
1194 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1195 .exit_latency = 10,
1196 .target_residency = 20,
1197 .enter = &intel_idle,
1198 .enter_s2idle = intel_idle_s2idle, },
1199 {
1200 .name = "C6",
1201 .desc = "MWAIT 0x20",
1202 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1203 .exit_latency = 50,
1204 .target_residency = 500,
1205 .enter = &intel_idle,
1206 .enter_s2idle = intel_idle_s2idle, },
1207 {
1208 .enter = NULL }
1209};
1210
1211/*
1212 * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1213 * C6, and this is indicated in the CPUID mwait leaf.
1214 */
1215static struct cpuidle_state snr_cstates[] __initdata = {
1216 {
1217 .name = "C1",
1218 .desc = "MWAIT 0x00",
1219 .flags = MWAIT2flg(0x00),
1220 .exit_latency = 2,
1221 .target_residency = 2,
1222 .enter = &intel_idle,
1223 .enter_s2idle = intel_idle_s2idle, },
1224 {
1225 .name = "C1E",
1226 .desc = "MWAIT 0x01",
1227 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1228 .exit_latency = 15,
1229 .target_residency = 25,
1230 .enter = &intel_idle,
1231 .enter_s2idle = intel_idle_s2idle, },
1232 {
1233 .name = "C6",
1234 .desc = "MWAIT 0x20",
1235 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1236 .exit_latency = 130,
1237 .target_residency = 500,
1238 .enter = &intel_idle,
1239 .enter_s2idle = intel_idle_s2idle, },
1240 {
1241 .enter = NULL }
1242};
1243
1244static const struct idle_cpu idle_cpu_nehalem __initconst = {
1245 .state_table = nehalem_cstates,
1246 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1247 .disable_promotion_to_c1e = true,
1248};
1249
1250static const struct idle_cpu idle_cpu_nhx __initconst = {
1251 .state_table = nehalem_cstates,
1252 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1253 .disable_promotion_to_c1e = true,
1254 .use_acpi = true,
1255};
1256
1257static const struct idle_cpu idle_cpu_atom __initconst = {
1258 .state_table = atom_cstates,
1259};
1260
1261static const struct idle_cpu idle_cpu_tangier __initconst = {
1262 .state_table = tangier_cstates,
1263};
1264
1265static const struct idle_cpu idle_cpu_lincroft __initconst = {
1266 .state_table = atom_cstates,
1267 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1268};
1269
1270static const struct idle_cpu idle_cpu_snb __initconst = {
1271 .state_table = snb_cstates,
1272 .disable_promotion_to_c1e = true,
1273};
1274
1275static const struct idle_cpu idle_cpu_snx __initconst = {
1276 .state_table = snb_cstates,
1277 .disable_promotion_to_c1e = true,
1278 .use_acpi = true,
1279};
1280
1281static const struct idle_cpu idle_cpu_byt __initconst = {
1282 .state_table = byt_cstates,
1283 .disable_promotion_to_c1e = true,
1284 .byt_auto_demotion_disable_flag = true,
1285};
1286
1287static const struct idle_cpu idle_cpu_cht __initconst = {
1288 .state_table = cht_cstates,
1289 .disable_promotion_to_c1e = true,
1290 .byt_auto_demotion_disable_flag = true,
1291};
1292
1293static const struct idle_cpu idle_cpu_ivb __initconst = {
1294 .state_table = ivb_cstates,
1295 .disable_promotion_to_c1e = true,
1296};
1297
1298static const struct idle_cpu idle_cpu_ivt __initconst = {
1299 .state_table = ivt_cstates,
1300 .disable_promotion_to_c1e = true,
1301 .use_acpi = true,
1302};
1303
1304static const struct idle_cpu idle_cpu_hsw __initconst = {
1305 .state_table = hsw_cstates,
1306 .disable_promotion_to_c1e = true,
1307};
1308
1309static const struct idle_cpu idle_cpu_hsx __initconst = {
1310 .state_table = hsw_cstates,
1311 .disable_promotion_to_c1e = true,
1312 .use_acpi = true,
1313};
1314
1315static const struct idle_cpu idle_cpu_bdw __initconst = {
1316 .state_table = bdw_cstates,
1317 .disable_promotion_to_c1e = true,
1318};
1319
1320static const struct idle_cpu idle_cpu_bdx __initconst = {
1321 .state_table = bdw_cstates,
1322 .disable_promotion_to_c1e = true,
1323 .use_acpi = true,
1324};
1325
1326static const struct idle_cpu idle_cpu_skl __initconst = {
1327 .state_table = skl_cstates,
1328 .disable_promotion_to_c1e = true,
1329};
1330
1331static const struct idle_cpu idle_cpu_skx __initconst = {
1332 .state_table = skx_cstates,
1333 .disable_promotion_to_c1e = true,
1334 .use_acpi = true,
1335};
1336
1337static const struct idle_cpu idle_cpu_icx __initconst = {
1338 .state_table = icx_cstates,
1339 .disable_promotion_to_c1e = true,
1340 .use_acpi = true,
1341};
1342
1343static const struct idle_cpu idle_cpu_adl __initconst = {
1344 .state_table = adl_cstates,
1345};
1346
1347static const struct idle_cpu idle_cpu_adl_l __initconst = {
1348 .state_table = adl_l_cstates,
1349};
1350
1351static const struct idle_cpu idle_cpu_adl_n __initconst = {
1352 .state_table = adl_n_cstates,
1353};
1354
1355static const struct idle_cpu idle_cpu_spr __initconst = {
1356 .state_table = spr_cstates,
1357 .disable_promotion_to_c1e = true,
1358 .use_acpi = true,
1359};
1360
1361static const struct idle_cpu idle_cpu_avn __initconst = {
1362 .state_table = avn_cstates,
1363 .disable_promotion_to_c1e = true,
1364 .use_acpi = true,
1365};
1366
1367static const struct idle_cpu idle_cpu_knl __initconst = {
1368 .state_table = knl_cstates,
1369 .use_acpi = true,
1370};
1371
1372static const struct idle_cpu idle_cpu_bxt __initconst = {
1373 .state_table = bxt_cstates,
1374 .disable_promotion_to_c1e = true,
1375};
1376
1377static const struct idle_cpu idle_cpu_dnv __initconst = {
1378 .state_table = dnv_cstates,
1379 .disable_promotion_to_c1e = true,
1380 .use_acpi = true,
1381};
1382
1383static const struct idle_cpu idle_cpu_snr __initconst = {
1384 .state_table = snr_cstates,
1385 .disable_promotion_to_c1e = true,
1386 .use_acpi = true,
1387};
1388
1389static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1390 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx),
1391 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem),
1392 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem),
1393 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem),
1394 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx),
1395 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx),
1396 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom),
1397 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft),
1398 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx),
1399 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb),
1400 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx),
1401 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom),
1402 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt),
1403 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1404 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht),
1405 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb),
1406 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt),
1407 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw),
1408 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx),
1409 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw),
1410 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw),
1411 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn),
1412 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw),
1413 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw),
1414 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx),
1415 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx),
1416 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl),
1417 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl),
1418 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl),
1419 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
1420 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
1421 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
1422 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
1423 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
1424 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
1425 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
1426 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
1427 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
1428 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
1429 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
1430 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
1431 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt),
1432 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv),
1433 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr),
1434 {}
1435};
1436
1437static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1438 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1439 {}
1440};
1441
1442static bool __init intel_idle_max_cstate_reached(int cstate)
1443{
1444 if (cstate + 1 > max_cstate) {
1445 pr_info("max_cstate %d reached\n", max_cstate);
1446 return true;
1447 }
1448 return false;
1449}
1450
1451static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1452{
1453 unsigned long eax = flg2MWAIT(state->flags);
1454
1455 if (boot_cpu_has(X86_FEATURE_ARAT))
1456 return false;
1457
1458 /*
1459 * Switch over to one-shot tick broadcast if the target C-state
1460 * is deeper than C1.
1461 */
1462 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1463}
1464
1465#ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1466#include <acpi/processor.h>
1467
1468static bool no_acpi __read_mostly;
1469module_param(no_acpi, bool, 0444);
1470MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1471
1472static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1473module_param_named(use_acpi, force_use_acpi, bool, 0444);
1474MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1475
1476static struct acpi_processor_power acpi_state_table __initdata;
1477
1478/**
1479 * intel_idle_cst_usable - Check if the _CST information can be used.
1480 *
1481 * Check if all of the C-states listed by _CST in the max_cstate range are
1482 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1483 */
1484static bool __init intel_idle_cst_usable(void)
1485{
1486 int cstate, limit;
1487
1488 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1489 acpi_state_table.count);
1490
1491 for (cstate = 1; cstate < limit; cstate++) {
1492 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1493
1494 if (cx->entry_method != ACPI_CSTATE_FFH)
1495 return false;
1496 }
1497
1498 return true;
1499}
1500
1501static bool __init intel_idle_acpi_cst_extract(void)
1502{
1503 unsigned int cpu;
1504
1505 if (no_acpi) {
1506 pr_debug("Not allowed to use ACPI _CST\n");
1507 return false;
1508 }
1509
1510 for_each_possible_cpu(cpu) {
1511 struct acpi_processor *pr = per_cpu(processors, cpu);
1512
1513 if (!pr)
1514 continue;
1515
1516 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1517 continue;
1518
1519 acpi_state_table.count++;
1520
1521 if (!intel_idle_cst_usable())
1522 continue;
1523
1524 if (!acpi_processor_claim_cst_control())
1525 break;
1526
1527 return true;
1528 }
1529
1530 acpi_state_table.count = 0;
1531 pr_debug("ACPI _CST not found or not usable\n");
1532 return false;
1533}
1534
1535static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1536{
1537 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1538
1539 /*
1540 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1541 * the interesting states are ACPI_CSTATE_FFH.
1542 */
1543 for (cstate = 1; cstate < limit; cstate++) {
1544 struct acpi_processor_cx *cx;
1545 struct cpuidle_state *state;
1546
1547 if (intel_idle_max_cstate_reached(cstate - 1))
1548 break;
1549
1550 cx = &acpi_state_table.states[cstate];
1551
1552 state = &drv->states[drv->state_count++];
1553
1554 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1555 strscpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1556 state->exit_latency = cx->latency;
1557 /*
1558 * For C1-type C-states use the same number for both the exit
1559 * latency and target residency, because that is the case for
1560 * C1 in the majority of the static C-states tables above.
1561 * For the other types of C-states, however, set the target
1562 * residency to 3 times the exit latency which should lead to
1563 * a reasonable balance between energy-efficiency and
1564 * performance in the majority of interesting cases.
1565 */
1566 state->target_residency = cx->latency;
1567 if (cx->type > ACPI_STATE_C1)
1568 state->target_residency *= 3;
1569
1570 state->flags = MWAIT2flg(cx->address);
1571 if (cx->type > ACPI_STATE_C2)
1572 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1573
1574 if (disabled_states_mask & BIT(cstate))
1575 state->flags |= CPUIDLE_FLAG_OFF;
1576
1577 if (intel_idle_state_needs_timer_stop(state))
1578 state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1579
1580 state->enter = intel_idle;
1581 state->enter_s2idle = intel_idle_s2idle;
1582 }
1583}
1584
1585static bool __init intel_idle_off_by_default(u32 mwait_hint)
1586{
1587 int cstate, limit;
1588
1589 /*
1590 * If there are no _CST C-states, do not disable any C-states by
1591 * default.
1592 */
1593 if (!acpi_state_table.count)
1594 return false;
1595
1596 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1597 /*
1598 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1599 * the interesting states are ACPI_CSTATE_FFH.
1600 */
1601 for (cstate = 1; cstate < limit; cstate++) {
1602 if (acpi_state_table.states[cstate].address == mwait_hint)
1603 return false;
1604 }
1605 return true;
1606}
1607#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1608#define force_use_acpi (false)
1609
1610static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1611static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1612static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1613#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1614
1615/**
1616 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1617 *
1618 * Tune IVT multi-socket targets.
1619 * Assumption: num_sockets == (max_package_num + 1).
1620 */
1621static void __init ivt_idle_state_table_update(void)
1622{
1623 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1624 int cpu, package_num, num_sockets = 1;
1625
1626 for_each_online_cpu(cpu) {
1627 package_num = topology_physical_package_id(cpu);
1628 if (package_num + 1 > num_sockets) {
1629 num_sockets = package_num + 1;
1630
1631 if (num_sockets > 4) {
1632 cpuidle_state_table = ivt_cstates_8s;
1633 return;
1634 }
1635 }
1636 }
1637
1638 if (num_sockets > 2)
1639 cpuidle_state_table = ivt_cstates_4s;
1640
1641 /* else, 1 and 2 socket systems use default ivt_cstates */
1642}
1643
1644/**
1645 * irtl_2_usec - IRTL to microseconds conversion.
1646 * @irtl: IRTL MSR value.
1647 *
1648 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1649 */
1650static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1651{
1652 static const unsigned int irtl_ns_units[] __initconst = {
1653 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1654 };
1655 unsigned long long ns;
1656
1657 if (!irtl)
1658 return 0;
1659
1660 ns = irtl_ns_units[(irtl >> 10) & 0x7];
1661
1662 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1663}
1664
1665/**
1666 * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1667 *
1668 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1669 * definitive maximum latency and use the same value for target_residency.
1670 */
1671static void __init bxt_idle_state_table_update(void)
1672{
1673 unsigned long long msr;
1674 unsigned int usec;
1675
1676 rdmsrl(MSR_PKGC6_IRTL, msr);
1677 usec = irtl_2_usec(msr);
1678 if (usec) {
1679 bxt_cstates[2].exit_latency = usec;
1680 bxt_cstates[2].target_residency = usec;
1681 }
1682
1683 rdmsrl(MSR_PKGC7_IRTL, msr);
1684 usec = irtl_2_usec(msr);
1685 if (usec) {
1686 bxt_cstates[3].exit_latency = usec;
1687 bxt_cstates[3].target_residency = usec;
1688 }
1689
1690 rdmsrl(MSR_PKGC8_IRTL, msr);
1691 usec = irtl_2_usec(msr);
1692 if (usec) {
1693 bxt_cstates[4].exit_latency = usec;
1694 bxt_cstates[4].target_residency = usec;
1695 }
1696
1697 rdmsrl(MSR_PKGC9_IRTL, msr);
1698 usec = irtl_2_usec(msr);
1699 if (usec) {
1700 bxt_cstates[5].exit_latency = usec;
1701 bxt_cstates[5].target_residency = usec;
1702 }
1703
1704 rdmsrl(MSR_PKGC10_IRTL, msr);
1705 usec = irtl_2_usec(msr);
1706 if (usec) {
1707 bxt_cstates[6].exit_latency = usec;
1708 bxt_cstates[6].target_residency = usec;
1709 }
1710
1711}
1712
1713/**
1714 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1715 *
1716 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1717 */
1718static void __init sklh_idle_state_table_update(void)
1719{
1720 unsigned long long msr;
1721 unsigned int eax, ebx, ecx, edx;
1722
1723
1724 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1725 if (max_cstate <= 7)
1726 return;
1727
1728 /* if PC10 not present in CPUID.MWAIT.EDX */
1729 if ((mwait_substates & (0xF << 28)) == 0)
1730 return;
1731
1732 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1733
1734 /* PC10 is not enabled in PKG C-state limit */
1735 if ((msr & 0xF) != 8)
1736 return;
1737
1738 ecx = 0;
1739 cpuid(7, &eax, &ebx, &ecx, &edx);
1740
1741 /* if SGX is present */
1742 if (ebx & (1 << 2)) {
1743
1744 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1745
1746 /* if SGX is enabled */
1747 if (msr & (1 << 18))
1748 return;
1749 }
1750
1751 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */
1752 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */
1753}
1754
1755/**
1756 * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1757 * idle states table.
1758 */
1759static void __init skx_idle_state_table_update(void)
1760{
1761 unsigned long long msr;
1762
1763 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1764
1765 /*
1766 * 000b: C0/C1 (no package C-state support)
1767 * 001b: C2
1768 * 010b: C6 (non-retention)
1769 * 011b: C6 (retention)
1770 * 111b: No Package C state limits.
1771 */
1772 if ((msr & 0x7) < 2) {
1773 /*
1774 * Uses the CC6 + PC0 latency and 3 times of
1775 * latency for target_residency if the PC6
1776 * is disabled in BIOS. This is consistent
1777 * with how intel_idle driver uses _CST
1778 * to set the target_residency.
1779 */
1780 skx_cstates[2].exit_latency = 92;
1781 skx_cstates[2].target_residency = 276;
1782 }
1783}
1784
1785/**
1786 * adl_idle_state_table_update - Adjust AlderLake idle states table.
1787 */
1788static void __init adl_idle_state_table_update(void)
1789{
1790 /* Check if user prefers C1 over C1E. */
1791 if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1792 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1793 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1794
1795 /* Disable C1E by clearing the "C1E promotion" bit. */
1796 c1e_promotion = C1E_PROMOTION_DISABLE;
1797 return;
1798 }
1799
1800 /* Make sure C1E is enabled by default */
1801 c1e_promotion = C1E_PROMOTION_ENABLE;
1802}
1803
1804/**
1805 * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1806 */
1807static void __init spr_idle_state_table_update(void)
1808{
1809 unsigned long long msr;
1810
1811 /*
1812 * By default, the C6 state assumes the worst-case scenario of package
1813 * C6. However, if PC6 is disabled, we update the numbers to match
1814 * core C6.
1815 */
1816 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1817
1818 /* Limit value 2 and above allow for PC6. */
1819 if ((msr & 0x7) < 2) {
1820 spr_cstates[2].exit_latency = 190;
1821 spr_cstates[2].target_residency = 600;
1822 }
1823}
1824
1825static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1826{
1827 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1828 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1829 MWAIT_SUBSTATE_MASK;
1830
1831 /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1832 if (num_substates == 0)
1833 return false;
1834
1835 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1836 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1837
1838 return true;
1839}
1840
1841static bool force_irq_on __read_mostly;
1842module_param(force_irq_on, bool, 0444);
1843
1844static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1845{
1846 int cstate;
1847
1848 switch (boot_cpu_data.x86_model) {
1849 case INTEL_FAM6_IVYBRIDGE_X:
1850 ivt_idle_state_table_update();
1851 break;
1852 case INTEL_FAM6_ATOM_GOLDMONT:
1853 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1854 bxt_idle_state_table_update();
1855 break;
1856 case INTEL_FAM6_SKYLAKE:
1857 sklh_idle_state_table_update();
1858 break;
1859 case INTEL_FAM6_SKYLAKE_X:
1860 skx_idle_state_table_update();
1861 break;
1862 case INTEL_FAM6_SAPPHIRERAPIDS_X:
1863 case INTEL_FAM6_EMERALDRAPIDS_X:
1864 spr_idle_state_table_update();
1865 break;
1866 case INTEL_FAM6_ALDERLAKE:
1867 case INTEL_FAM6_ALDERLAKE_L:
1868 case INTEL_FAM6_ALDERLAKE_N:
1869 adl_idle_state_table_update();
1870 break;
1871 }
1872
1873 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1874 unsigned int mwait_hint;
1875
1876 if (intel_idle_max_cstate_reached(cstate))
1877 break;
1878
1879 if (!cpuidle_state_table[cstate].enter &&
1880 !cpuidle_state_table[cstate].enter_s2idle)
1881 break;
1882
1883 /* If marked as unusable, skip this state. */
1884 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1885 pr_debug("state %s is disabled\n",
1886 cpuidle_state_table[cstate].name);
1887 continue;
1888 }
1889
1890 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1891 if (!intel_idle_verify_cstate(mwait_hint))
1892 continue;
1893
1894 /* Structure copy. */
1895 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1896
1897 if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on) {
1898 printk("intel_idle: forced intel_idle_irq for state %d\n", cstate);
1899 drv->states[drv->state_count].enter = intel_idle_irq;
1900 }
1901
1902 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1903 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1904 WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1905 drv->states[drv->state_count].enter = intel_idle_ibrs;
1906 }
1907
1908 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
1909 drv->states[drv->state_count].enter = intel_idle_xstate;
1910
1911 if ((disabled_states_mask & BIT(drv->state_count)) ||
1912 ((icpu->use_acpi || force_use_acpi) &&
1913 intel_idle_off_by_default(mwait_hint) &&
1914 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1915 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1916
1917 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1918 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1919
1920 drv->state_count++;
1921 }
1922
1923 if (icpu->byt_auto_demotion_disable_flag) {
1924 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1925 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1926 }
1927}
1928
1929/**
1930 * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1931 * @drv: cpuidle driver structure to initialize.
1932 */
1933static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1934{
1935 cpuidle_poll_state_init(drv);
1936
1937 if (disabled_states_mask & BIT(0))
1938 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1939
1940 drv->state_count = 1;
1941
1942 if (icpu)
1943 intel_idle_init_cstates_icpu(drv);
1944 else
1945 intel_idle_init_cstates_acpi(drv);
1946}
1947
1948static void auto_demotion_disable(void)
1949{
1950 unsigned long long msr_bits;
1951
1952 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1953 msr_bits &= ~auto_demotion_disable_flags;
1954 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1955}
1956
1957static void c1e_promotion_enable(void)
1958{
1959 unsigned long long msr_bits;
1960
1961 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1962 msr_bits |= 0x2;
1963 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1964}
1965
1966static void c1e_promotion_disable(void)
1967{
1968 unsigned long long msr_bits;
1969
1970 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1971 msr_bits &= ~0x2;
1972 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1973}
1974
1975/**
1976 * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1977 * @cpu: CPU to initialize.
1978 *
1979 * Register a cpuidle device object for @cpu and update its MSRs in accordance
1980 * with the processor model flags.
1981 */
1982static int intel_idle_cpu_init(unsigned int cpu)
1983{
1984 struct cpuidle_device *dev;
1985
1986 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1987 dev->cpu = cpu;
1988
1989 if (cpuidle_register_device(dev)) {
1990 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1991 return -EIO;
1992 }
1993
1994 if (auto_demotion_disable_flags)
1995 auto_demotion_disable();
1996
1997 if (c1e_promotion == C1E_PROMOTION_ENABLE)
1998 c1e_promotion_enable();
1999 else if (c1e_promotion == C1E_PROMOTION_DISABLE)
2000 c1e_promotion_disable();
2001
2002 return 0;
2003}
2004
2005static int intel_idle_cpu_online(unsigned int cpu)
2006{
2007 struct cpuidle_device *dev;
2008
2009 if (!boot_cpu_has(X86_FEATURE_ARAT))
2010 tick_broadcast_enable();
2011
2012 /*
2013 * Some systems can hotplug a cpu at runtime after
2014 * the kernel has booted, we have to initialize the
2015 * driver in this case
2016 */
2017 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
2018 if (!dev->registered)
2019 return intel_idle_cpu_init(cpu);
2020
2021 return 0;
2022}
2023
2024/**
2025 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
2026 */
2027static void __init intel_idle_cpuidle_devices_uninit(void)
2028{
2029 int i;
2030
2031 for_each_online_cpu(i)
2032 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
2033}
2034
2035static int __init intel_idle_init(void)
2036{
2037 const struct x86_cpu_id *id;
2038 unsigned int eax, ebx, ecx;
2039 int retval;
2040
2041 /* Do not load intel_idle at all for now if idle= is passed */
2042 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
2043 return -ENODEV;
2044
2045 if (max_cstate == 0) {
2046 pr_debug("disabled\n");
2047 return -EPERM;
2048 }
2049
2050 id = x86_match_cpu(intel_idle_ids);
2051 if (id) {
2052 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2053 pr_debug("Please enable MWAIT in BIOS SETUP\n");
2054 return -ENODEV;
2055 }
2056 } else {
2057 id = x86_match_cpu(intel_mwait_ids);
2058 if (!id)
2059 return -ENODEV;
2060 }
2061
2062 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
2063 return -ENODEV;
2064
2065 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
2066
2067 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2068 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2069 !mwait_substates)
2070 return -ENODEV;
2071
2072 pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2073
2074 icpu = (const struct idle_cpu *)id->driver_data;
2075 if (icpu) {
2076 cpuidle_state_table = icpu->state_table;
2077 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2078 if (icpu->disable_promotion_to_c1e)
2079 c1e_promotion = C1E_PROMOTION_DISABLE;
2080 if (icpu->use_acpi || force_use_acpi)
2081 intel_idle_acpi_cst_extract();
2082 } else if (!intel_idle_acpi_cst_extract()) {
2083 return -ENODEV;
2084 }
2085
2086 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2087 boot_cpu_data.x86_model);
2088
2089 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2090 if (!intel_idle_cpuidle_devices)
2091 return -ENOMEM;
2092
2093 intel_idle_cpuidle_driver_init(&intel_idle_driver);
2094
2095 retval = cpuidle_register_driver(&intel_idle_driver);
2096 if (retval) {
2097 struct cpuidle_driver *drv = cpuidle_get_driver();
2098 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2099 drv ? drv->name : "none");
2100 goto init_driver_fail;
2101 }
2102
2103 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2104 intel_idle_cpu_online, NULL);
2105 if (retval < 0)
2106 goto hp_setup_fail;
2107
2108 pr_debug("Local APIC timer is reliable in %s\n",
2109 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2110
2111 return 0;
2112
2113hp_setup_fail:
2114 intel_idle_cpuidle_devices_uninit();
2115 cpuidle_unregister_driver(&intel_idle_driver);
2116init_driver_fail:
2117 free_percpu(intel_idle_cpuidle_devices);
2118 return retval;
2119
2120}
2121device_initcall(intel_idle_init);
2122
2123/*
2124 * We are not really modular, but we used to support that. Meaning we also
2125 * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2126 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2127 * is the easiest way (currently) to continue doing that.
2128 */
2129module_param(max_cstate, int, 0444);
2130/*
2131 * The positions of the bits that are set in this number are the indices of the
2132 * idle states to be disabled by default (as reflected by the names of the
2133 * corresponding idle state directories in sysfs, "state0", "state1" ...
2134 * "state<i>" ..., where <i> is the index of the given state).
2135 */
2136module_param_named(states_off, disabled_states_mask, uint, 0444);
2137MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2138/*
2139 * Some platforms come with mutually exclusive C-states, so that if one is
2140 * enabled, the other C-states must not be used. Example: C1 and C1E on
2141 * Sapphire Rapids platform. This parameter allows for selecting the
2142 * preferred C-states among the groups of mutually exclusive C-states - the
2143 * selected C-states will be registered, the other C-states from the mutually
2144 * exclusive group won't be registered. If the platform has no mutually
2145 * exclusive C-states, this parameter has no effect.
2146 */
2147module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2148MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");