Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR mask bits

Some older processors don't allow BIT(13) and BIT(15) in the current
mask set by "THERM_STATUS_CLEAR_CORE_MASK". This results in:

unchecked MSR access error: WRMSR to 0x19c (tried to
write 0x000000000000aaa8) at rIP: 0xffffffff816f66a6
(throttle_active_work+0xa6/0x1d0)

To avoid unchecked MSR issues, check CPUID for each relevant feature and
use that information to set the supported feature bits only in the
"clear" mask for cores. Do the same for the analogous package mask set
by "THERM_STATUS_CLEAR_PKG_MASK".

Introduce functions thermal_intr_init_core_clear_mask() and
thermal_intr_init_pkg_clear_mask() to set core and package mask bits,
respectively. These functions are called during initialization.

Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status")
Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
Link: https://lore.kernel.org/lkml/cdf43fb423368ee3994124a9e8c9b4f8d00712c6.camel@linux.intel.com/T/
Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 6.2+ <stable@kernel.org> # 6.2+
[ rjw: Renamed 2 funtions and 2 static variables, edited subject and
changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Srinivas Pandruvada and committed by
Rafael J. Wysocki
117e4e5b 09a9639e

+66 -7
+66 -7
drivers/thermal/intel/therm_throt.c
··· 193 193 #define THERM_THROT_POLL_INTERVAL HZ 194 194 #define THERM_STATUS_PROCHOT_LOG BIT(1) 195 195 196 - #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) 197 - #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) 196 + static u64 therm_intr_core_clear_mask; 197 + static u64 therm_intr_pkg_clear_mask; 198 + 199 + static void thermal_intr_init_core_clear_mask(void) 200 + { 201 + if (therm_intr_core_clear_mask) 202 + return; 203 + 204 + /* 205 + * Reference: Intel SDM Volume 4 206 + * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C 207 + * IA32_THERM_STATUS. 208 + */ 209 + 210 + /* 211 + * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not 212 + * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI. 213 + */ 214 + therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5)); 215 + 216 + /* 217 + * Bit 7 and 9: Thermal Threshold #1 and #2 log 218 + * If CPUID.01H:ECX[8] = 1 219 + */ 220 + if (boot_cpu_has(X86_FEATURE_TM2)) 221 + therm_intr_core_clear_mask |= (BIT(7) | BIT(9)); 222 + 223 + /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */ 224 + if (boot_cpu_has(X86_FEATURE_PLN)) 225 + therm_intr_core_clear_mask |= BIT(11); 226 + 227 + /* 228 + * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 229 + * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 230 + */ 231 + if (boot_cpu_has(X86_FEATURE_HWP)) 232 + therm_intr_core_clear_mask |= (BIT(13) | BIT(15)); 233 + } 234 + 235 + static void thermal_intr_init_pkg_clear_mask(void) 236 + { 237 + if (therm_intr_pkg_clear_mask) 238 + return; 239 + 240 + /* 241 + * Reference: Intel SDM Volume 4 242 + * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1 243 + * IA32_PACKAGE_THERM_STATUS. 244 + */ 245 + 246 + /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */ 247 + if (boot_cpu_has(X86_FEATURE_PTS)) 248 + therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)); 249 + 250 + /* 251 + * Intel SDM Volume 2A: Thermal and Power Management Leaf 252 + * Bit 26: CPUID.06H: EAX[19] = 1 253 + */ 254 + if (boot_cpu_has(X86_FEATURE_HFI)) 255 + therm_intr_pkg_clear_mask |= BIT(26); 256 + } 198 257 199 258 /* 200 259 * Clear the bits in package thermal status register for bit = 1 ··· 266 207 267 208 if (level == CORE_LEVEL) { 268 209 msr = MSR_IA32_THERM_STATUS; 269 - msr_val = THERM_STATUS_CLEAR_CORE_MASK; 210 + msr_val = therm_intr_core_clear_mask; 270 211 } else { 271 212 msr = MSR_IA32_PACKAGE_THERM_STATUS; 272 - msr_val = THERM_STATUS_CLEAR_PKG_MASK; 273 - if (boot_cpu_has(X86_FEATURE_HFI)) 274 - msr_val |= BIT(26); 275 - 213 + msr_val = therm_intr_pkg_clear_mask; 276 214 } 277 215 278 216 msr_val &= ~bit_mask; ··· 763 707 /* We'll mask the thermal vector in the lapic till we're ready: */ 764 708 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 765 709 apic_write(APIC_LVTTHMR, h); 710 + 711 + thermal_intr_init_core_clear_mask(); 712 + thermal_intr_init_pkg_clear_mask(); 766 713 767 714 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 768 715 if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)