Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/mm/pat: Emulate PAT when it is disabled

In the case when PAT is disabled on the command line with
"nopat" or when virtualization doesn't support PAT (correctly) -
see

9d34cfdf4796 ("x86: Don't rely on VMWare emulating PAT MSR correctly").

we emulate it using the PWT and PCD cache attribute bits. Get
rid of boot_pat_state while at it.

Based on a conglomerate patch from Toshi Kani.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Elliott@hp.com
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: hch@lst.de
Cc: hmh@hmh.eng.br
Cc: konrad.wilk@oracle.com
Cc: linux-mm <linux-mm@kvack.org>
Cc: linux-nvdimm@lists.01.org
Cc: stefan.bader@canonical.com
Cc: yigal@plexistor.com
Link: http://lkml.kernel.org/r/1433436928-31903-3-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Borislav Petkov and committed by
Ingo Molnar
9cd25aac 9dac6290

+60 -34
+1 -1
arch/x86/include/asm/pat.h
··· 6 6 7 7 bool pat_enabled(void); 8 8 extern void pat_init(void); 9 - void pat_init_cache_modes(void); 9 + void pat_init_cache_modes(u64); 10 10 11 11 extern int reserve_memtype(u64 start, u64 end, 12 12 enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
+3 -3
arch/x86/mm/init.c
··· 40 40 */ 41 41 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { 42 42 [_PAGE_CACHE_MODE_WB ] = 0 | 0 , 43 - [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , 43 + [_PAGE_CACHE_MODE_WC ] = 0 | _PAGE_PCD, 44 44 [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, 45 45 [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, 46 46 [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, ··· 50 50 51 51 uint8_t __pte2cachemode_tbl[8] = { 52 52 [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, 53 - [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, 53 + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, 54 54 [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, 55 55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, 56 56 [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, 57 - [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, 57 + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, 58 58 [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, 59 59 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, 60 60 };
+53 -28
arch/x86/mm/pat.c
··· 68 68 } 69 69 __setup("debugpat", pat_debug_setup); 70 70 71 - static u64 __read_mostly boot_pat_state; 72 - 73 71 #ifdef CONFIG_X86_PAT 74 72 /* 75 73 * X86 PAT uses page flags WC and Uncached together to keep track of ··· 175 177 * configuration. 176 178 * Using lower indices is preferred, so we start with highest index. 177 179 */ 178 - void pat_init_cache_modes(void) 180 + void pat_init_cache_modes(u64 pat) 179 181 { 180 - int i; 181 182 enum page_cache_mode cache; 182 183 char pat_msg[33]; 183 - u64 pat; 184 + int i; 184 185 185 - rdmsrl(MSR_IA32_CR_PAT, pat); 186 186 pat_msg[32] = 0; 187 187 for (i = 7; i >= 0; i--) { 188 188 cache = pat_get_cache_mode((pat >> (i * 8)) & 7, ··· 194 198 195 199 static void pat_bsp_init(u64 pat) 196 200 { 201 + u64 tmp_pat; 202 + 197 203 if (!cpu_has_pat) { 198 204 pat_disable("PAT not supported by CPU."); 199 205 return; 200 206 } 201 207 202 - rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 203 - if (!boot_pat_state) { 208 + if (!pat_enabled()) 209 + goto done; 210 + 211 + rdmsrl(MSR_IA32_CR_PAT, tmp_pat); 212 + if (!tmp_pat) { 204 213 pat_disable("PAT MSR is 0, disabled."); 205 214 return; 206 215 } 207 216 208 217 wrmsrl(MSR_IA32_CR_PAT, pat); 209 218 210 - pat_init_cache_modes(); 219 + done: 220 + pat_init_cache_modes(pat); 211 221 } 212 222 213 223 static void pat_ap_init(u64 pat) 214 224 { 225 + if (!pat_enabled()) 226 + return; 227 + 215 228 if (!cpu_has_pat) { 216 229 /* 217 230 * If this happens we are on a secondary CPU, but switched to ··· 236 231 { 237 232 u64 pat; 238 233 239 - if (!pat_enabled()) 240 - return; 241 - 242 - /* 243 - * Set PWT to Write-Combining. All other bits stay the same: 244 - * 245 - * PTE encoding used in Linux: 246 - * PAT 247 - * |PCD 248 - * ||PWT 249 - * ||| 250 - * 000 WB _PAGE_CACHE_WB 251 - * 001 WC _PAGE_CACHE_WC 252 - * 010 UC- _PAGE_CACHE_UC_MINUS 253 - * 011 UC _PAGE_CACHE_UC 254 - * PAT bit unused 255 - */ 256 - pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 257 - PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 234 + if (!pat_enabled()) { 235 + /* 236 + * No PAT. Emulate the PAT table that corresponds to the two 237 + * cache bits, PWT (Write Through) and PCD (Cache Disable). This 238 + * setup is the same as the BIOS default setup when the system 239 + * has PAT but the "nopat" boot option has been specified. This 240 + * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. 241 + * 242 + * PTE encoding used: 243 + * 244 + * PCD 245 + * |PWT PAT 246 + * || slot 247 + * 00 0 WB : _PAGE_CACHE_MODE_WB 248 + * 01 1 WT : _PAGE_CACHE_MODE_WT 249 + * 10 2 UC-: _PAGE_CACHE_MODE_UC_MINUS 250 + * 11 3 UC : _PAGE_CACHE_MODE_UC 251 + * 252 + * NOTE: When WC or WP is used, it is redirected to UC- per 253 + * the default setup in __cachemode2pte_tbl[]. 254 + */ 255 + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | 256 + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); 257 + } else { 258 + /* 259 + * PTE encoding used in Linux: 260 + * PAT 261 + * |PCD 262 + * ||PWT 263 + * ||| 264 + * 000 WB _PAGE_CACHE_WB 265 + * 001 WC _PAGE_CACHE_WC 266 + * 010 UC- _PAGE_CACHE_UC_MINUS 267 + * 011 UC _PAGE_CACHE_UC 268 + * PAT bit unused 269 + */ 270 + pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | 271 + PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 272 + } 258 273 259 274 if (!boot_cpu_done) { 260 275 pat_bsp_init(pat);
+3 -2
arch/x86/xen/enlighten.c
··· 1467 1467 { 1468 1468 struct physdev_set_iopl set_iopl; 1469 1469 unsigned long initrd_start = 0; 1470 + u64 pat; 1470 1471 int rc; 1471 1472 1472 1473 if (!xen_start_info) ··· 1575 1574 * Modify the cache mode translation tables to match Xen's PAT 1576 1575 * configuration. 1577 1576 */ 1578 - 1579 - pat_init_cache_modes(); 1577 + rdmsrl(MSR_IA32_CR_PAT, pat); 1578 + pat_init_cache_modes(pat); 1580 1579 1581 1580 /* keep using Xen gdt for now; no urgent need to change it */ 1582 1581