Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

intel_idle: disable NHM/WSM HW C-state auto-demotion

Hardware C-state auto-demotion is a mechanism where the HW overrides
the OS C-state request, instead demoting to a shallower state,
which is less expensive, but saves less power.

Modern Linux should generally get exactly the states it requests.
In particular, when a CPU is taken off-line, it must not be demoted, else
it can prevent the entire package from reaching deep C-states.

https://bugzilla.kernel.org/show_bug.cgi?id=25252

Signed-off-by: Len Brown <len.brown@intel.com>

Len Brown 14796fca 100b33c8

+24
+4
arch/x86/include/asm/msr-index.h
··· 36 36 #define MSR_IA32_PERFCTR1 0x000000c2 37 37 #define MSR_FSB_FREQ 0x000000cd 38 38 39 + #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 40 + #define NHM_C3_AUTO_DEMOTE (1UL << 25) 41 + #define NHM_C1_AUTO_DEMOTE (1UL << 26) 42 + 39 43 #define MSR_MTRRcap 0x000000fe 40 44 #define MSR_IA32_BBL_CR_CTL 0x00000119 41 45
+20
drivers/idle/intel_idle.c
··· 62 62 #include <linux/notifier.h> 63 63 #include <linux/cpu.h> 64 64 #include <asm/mwait.h> 65 + #include <asm/msr.h> 65 66 66 67 #define INTEL_IDLE_VERSION "0.4" 67 68 #define PREFIX "intel_idle: " ··· 84 83 static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); 85 84 86 85 static struct cpuidle_state *cpuidle_state_table; 86 + 87 + /* 88 + * Hardware C-state auto-demotion may not always be optimal. 89 + * Indicate which enable bits to clear here. 90 + */ 91 + static unsigned long long auto_demotion_disable_flags; 87 92 88 93 /* 89 94 * Set this flag for states where the HW flushes the TLB for us ··· 288 281 .notifier_call = setup_broadcast_cpuhp_notify, 289 282 }; 290 283 284 + static void auto_demotion_disable(void *dummy) 285 + { 286 + unsigned long long msr_bits; 287 + 288 + rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); 289 + msr_bits &= ~auto_demotion_disable_flags; 290 + wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); 291 + } 292 + 291 293 /* 292 294 * intel_idle_probe() 293 295 */ ··· 340 324 case 0x25: /* Westmere */ 341 325 case 0x2C: /* Westmere */ 342 326 cpuidle_state_table = nehalem_cstates; 327 + auto_demotion_disable_flags = 328 + (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE); 343 329 break; 344 330 345 331 case 0x1C: /* 28 - Atom Processor */ ··· 454 436 return -EIO; 455 437 } 456 438 } 439 + if (auto_demotion_disable_flags) 440 + smp_call_function(auto_demotion_disable, NULL, 1); 457 441 458 442 return 0; 459 443 }