Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Decouple PAT and MTRR handling

Today, PAT is usable only with MTRR being active, with some nasty tweaks
to make PAT usable when running as a Xen PV guest which doesn't support
MTRR.

The reason for this coupling is that both PAT MSR changes and MTRR
changes require a similar sequence and so full PAT support was added
using the already available MTRR handling.

Xen PV PAT handling can work without MTRR, as it just needs to consume
the PAT MSR setting done by the hypervisor without the ability and need
to change it. This in turn has resulted in a convoluted initialization
sequence and wrong decisions regarding cache mode availability due to
misguiding PAT availability flags.

Fix all of that by allowing to use PAT without MTRR and by reworking
the current PAT initialization sequence to match better with the newly
introduced generic cache initialization.

This removes the need of the recently added pat_force_disabled flag, so
remove the remnants of the patch adding it.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20221102074713.21493-14-jgross@suse.com
Signed-off-by: Borislav Petkov <bp@suse.de>

authored by

Juergen Gross and committed by
Borislav Petkov
adfe7512 0b9a6a8b

+57 -128
+2 -3
arch/x86/include/asm/memtype.h
··· 6 6 #include <asm/pgtable_types.h> 7 7 8 8 extern bool pat_enabled(void); 9 - extern void pat_disable(const char *reason); 10 - extern void pat_init(void); 11 - extern void init_cache_modes(void); 9 + extern void pat_bp_init(void); 10 + extern void pat_cpu_init(void); 12 11 13 12 extern int memtype_reserve(u64 start, u64 end, 14 13 enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+2 -1
arch/x86/kernel/cpu/cacheinfo.c
··· 1133 1133 mtrr_generic_set_state(); 1134 1134 1135 1135 if (memory_caching_control & CACHE_PAT) 1136 - pat_init(); 1136 + pat_cpu_init(); 1137 1137 1138 1138 cache_enable(); 1139 1139 local_irq_restore(flags); ··· 1162 1162 void __init cache_bp_init(void) 1163 1163 { 1164 1164 mtrr_bp_init(); 1165 + pat_bp_init(); 1165 1166 1166 1167 if (memory_caching_control) 1167 1168 cache_cpu_init();
+2 -10
arch/x86/kernel/cpu/mtrr/mtrr.c
··· 725 725 if (mtrr_if == &generic_mtrr_ops) { 726 726 /* BIOS may override */ 727 727 if (get_mtrr_state()) { 728 - memory_caching_control |= CACHE_MTRR | CACHE_PAT; 728 + memory_caching_control |= CACHE_MTRR; 729 729 changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr); 730 730 } else { 731 731 mtrr_if = NULL; ··· 733 733 } 734 734 } 735 735 736 - if (!mtrr_enabled()) { 736 + if (!mtrr_enabled()) 737 737 pr_info("Disabled\n"); 738 - 739 - /* 740 - * PAT initialization relies on MTRR's rendezvous handler. 741 - * Skip PAT init until the handler can initialize both 742 - * features independently. 743 - */ 744 - pat_disable("MTRRs disabled, skipping PAT initialization too."); 745 - } 746 738 } 747 739 748 740 /**
+1 -12
arch/x86/kernel/setup.c
··· 1075 1075 max_pfn = e820__end_of_ram_pfn(); 1076 1076 1077 1077 /* update e820 for memory not covered by WB MTRRs */ 1078 - if (IS_ENABLED(CONFIG_MTRR)) 1079 - cache_bp_init(); 1080 - else 1081 - pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); 1082 - 1078 + cache_bp_init(); 1083 1079 if (mtrr_trim_uncached_memory(max_pfn)) 1084 1080 max_pfn = e820__end_of_ram_pfn(); 1085 1081 1086 1082 max_possible_pfn = max_pfn; 1087 - 1088 - /* 1089 - * This call is required when the CPU does not support PAT. If 1090 - * mtrr_bp_init() invoked it already via pat_init() the call has no 1091 - * effect. 1092 - */ 1093 - init_cache_modes(); 1094 1083 1095 1084 /* 1096 1085 * Define random base addresses for memory sections after max_pfn is
+50 -102
arch/x86/mm/pat/memtype.c
··· 43 43 #include <linux/rbtree.h> 44 44 45 45 #include <asm/cacheflush.h> 46 + #include <asm/cacheinfo.h> 46 47 #include <asm/processor.h> 47 48 #include <asm/tlbflush.h> 48 49 #include <asm/x86_init.h> ··· 61 60 #undef pr_fmt 62 61 #define pr_fmt(fmt) "" fmt 63 62 64 - static bool __read_mostly pat_bp_initialized; 65 63 static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT); 66 - static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT); 67 - static bool __read_mostly pat_bp_enabled; 68 - static bool __read_mostly pat_cm_initialized; 64 + static u64 __ro_after_init pat_msr_val; 69 65 70 66 /* 71 67 * PAT support is enabled by default, but can be disabled for 72 68 * various user-requested or hardware-forced reasons: 73 69 */ 74 - void pat_disable(const char *msg_reason) 70 + static void __init pat_disable(const char *msg_reason) 75 71 { 76 72 if (pat_disabled) 77 73 return; 78 74 79 - if (pat_bp_initialized) { 80 - WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); 81 - return; 82 - } 83 - 84 75 pat_disabled = true; 85 76 pr_info("x86/PAT: %s\n", msg_reason); 77 + 78 + memory_caching_control &= ~CACHE_PAT; 86 79 } 87 80 88 81 static int __init nopat(char *str) 89 82 { 90 83 pat_disable("PAT support disabled via boot option."); 91 - pat_force_disabled = true; 92 84 return 0; 93 85 } 94 86 early_param("nopat", nopat); 95 87 96 88 bool pat_enabled(void) 97 89 { 98 - return pat_bp_enabled; 90 + return !pat_disabled; 99 91 } 100 92 EXPORT_SYMBOL_GPL(pat_enabled); 101 93 ··· 186 192 187 193 #define CM(c) (_PAGE_CACHE_MODE_ ## c) 188 194 189 - static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) 195 + static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, 196 + char *msg) 190 197 { 191 198 enum page_cache_mode cache; 192 199 char *cache_mode; ··· 214 219 * configuration. 215 220 * Using lower indices is preferred, so we start with highest index. 216 221 */ 217 - static void __init_cache_modes(u64 pat) 222 + static void __init init_cache_modes(u64 pat) 218 223 { 219 224 enum page_cache_mode cache; 220 225 char pat_msg[33]; 221 226 int i; 222 - 223 - WARN_ON_ONCE(pat_cm_initialized); 224 227 225 228 pat_msg[32] = 0; 226 229 for (i = 7; i >= 0; i--) { ··· 227 234 update_cache_mode_entry(i, cache); 228 235 } 229 236 pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg); 230 - 231 - pat_cm_initialized = true; 232 237 } 233 238 234 - #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) 235 - 236 - static void pat_bp_init(u64 pat) 237 - { 238 - u64 tmp_pat; 239 - 240 - if (!boot_cpu_has(X86_FEATURE_PAT)) { 241 - pat_disable("PAT not supported by the CPU."); 242 - return; 243 - } 244 - 245 - rdmsrl(MSR_IA32_CR_PAT, tmp_pat); 246 - if (!tmp_pat) { 247 - pat_disable("PAT support disabled by the firmware."); 248 - return; 249 - } 250 - 251 - wrmsrl(MSR_IA32_CR_PAT, pat); 252 - pat_bp_enabled = true; 253 - 254 - __init_cache_modes(pat); 255 - } 256 - 257 - static void pat_ap_init(u64 pat) 239 + void pat_cpu_init(void) 258 240 { 259 241 if (!boot_cpu_has(X86_FEATURE_PAT)) { 260 242 /* ··· 239 271 panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); 240 272 } 241 273 242 - wrmsrl(MSR_IA32_CR_PAT, pat); 274 + wrmsrl(MSR_IA32_CR_PAT, pat_msr_val); 243 275 } 244 276 245 - void __init init_cache_modes(void) 277 + /** 278 + * pat_bp_init - Initialize the PAT MSR value and PAT table 279 + * 280 + * This function initializes PAT MSR value and PAT table with an OS-defined 281 + * value to enable additional cache attributes, WC, WT and WP. 282 + * 283 + * This function prepares the calls of pat_cpu_init() via cache_cpu_init() 284 + * on all CPUs. 285 + */ 286 + void __init pat_bp_init(void) 246 287 { 247 - u64 pat = 0; 288 + struct cpuinfo_x86 *c = &boot_cpu_data; 289 + #define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ 290 + (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \ 291 + ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \ 292 + ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \ 293 + ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56)) 248 294 249 - if (pat_cm_initialized) 250 - return; 251 295 252 - if (boot_cpu_has(X86_FEATURE_PAT)) { 253 - /* 254 - * CPU supports PAT. Set PAT table to be consistent with 255 - * PAT MSR. This case supports "nopat" boot option, and 256 - * virtual machine environments which support PAT without 257 - * MTRRs. In specific, Xen has unique setup to PAT MSR. 258 - * 259 - * If PAT MSR returns 0, it is considered invalid and emulates 260 - * as No PAT. 261 - */ 262 - rdmsrl(MSR_IA32_CR_PAT, pat); 263 - } 296 + if (!IS_ENABLED(CONFIG_X86_PAT)) 297 + pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); 264 298 265 - if (!pat) { 299 + if (!cpu_feature_enabled(X86_FEATURE_PAT)) 300 + pat_disable("PAT not supported by the CPU."); 301 + else 302 + rdmsrl(MSR_IA32_CR_PAT, pat_msr_val); 303 + 304 + if (!pat_msr_val) { 305 + pat_disable("PAT support disabled by the firmware."); 306 + 266 307 /* 267 308 * No PAT. Emulate the PAT table that corresponds to the two 268 309 * cache bits, PWT (Write Through) and PCD (Cache Disable). ··· 290 313 * NOTE: When WC or WP is used, it is redirected to UC- per 291 314 * the default setup in __cachemode2pte_tbl[]. 292 315 */ 293 - pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | 294 - PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); 295 - } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { 296 - /* 297 - * Clearly PAT is enabled underneath. Allow pat_enabled() to 298 - * reflect this. 299 - */ 300 - pat_bp_enabled = true; 316 + pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); 301 317 } 302 318 303 - __init_cache_modes(pat); 304 - } 305 - 306 - /** 307 - * pat_init - Initialize the PAT MSR and PAT table on the current CPU 308 - * 309 - * This function initializes PAT MSR and PAT table with an OS-defined value 310 - * to enable additional cache attributes, WC, WT and WP. 311 - * 312 - * This function must be called on all CPUs using the specific sequence of 313 - * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this 314 - * procedure for PAT. 315 - */ 316 - void pat_init(void) 317 - { 318 - u64 pat; 319 - struct cpuinfo_x86 *c = &boot_cpu_data; 320 - 321 - #ifndef CONFIG_X86_PAT 322 - pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); 323 - #endif 324 - 325 - if (pat_disabled) 319 + /* 320 + * Xen PV doesn't allow to set PAT MSR, but all cache modes are 321 + * supported. 322 + */ 323 + if (pat_disabled || cpu_feature_enabled(X86_FEATURE_XENPV)) { 324 + init_cache_modes(pat_msr_val); 326 325 return; 326 + } 327 327 328 328 if ((c->x86_vendor == X86_VENDOR_INTEL) && 329 329 (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || ··· 325 371 * NOTE: When WT or WP is used, it is redirected to UC- per 326 372 * the default setup in __cachemode2pte_tbl[]. 327 373 */ 328 - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 329 - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 374 + pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); 330 375 } else { 331 376 /* 332 377 * Full PAT support. We put WT in slot 7 to improve ··· 353 400 * The reserved slots are unused, but mapped to their 354 401 * corresponding types in the presence of PAT errata. 355 402 */ 356 - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 357 - PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT); 403 + pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); 358 404 } 359 405 360 - if (!pat_bp_initialized) { 361 - pat_bp_init(pat); 362 - pat_bp_initialized = true; 363 - } else { 364 - pat_ap_init(pat); 365 - } 366 - } 406 + memory_caching_control |= CACHE_PAT; 367 407 408 + init_cache_modes(pat_msr_val); 368 409 #undef PAT 410 + } 369 411 370 412 static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ 371 413