"Das U-Boot" Source Tree

x86: Add support for newer CAR schemes

Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
Add support for these along with suitable configuration options.

To make the code cleaner, adjust a few definitions in processor.h so that
they can be used from assembler.

Signed-off-by: Simon Glass <sjg@chromium.org>
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>

authored by

Simon Glass and committed by
Bin Meng
f45e747d 2e2a0035

+564 -7
+16
arch/x86/Kconfig
··· 879 879 Increse it if the default size does not fit the board's needs. 880 880 This is most likely due to a large ACPI DSDT table is used. 881 881 882 + config INTEL_CAR_CQOS 883 + bool "Support Intel Cache Quality of Service" 884 + help 885 + Cache Quality of Service allows more fine-grained control of cache 886 + usage. As result, it is possible to set up a portion of L2 cache for 887 + CAR and use the remainder for actual caching. 888 + 889 + # 890 + # Each bit in QOS mask controls this many bytes. This is calculated as: 891 + # (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS 892 + # 893 + config CACHE_QOS_SIZE_PER_BIT 894 + hex 895 + depends on INTEL_CAR_CQOS 896 + default 0x20000 # 128 KB 897 + 882 898 endmenu
+8
arch/x86/cpu/intel_common/Makefile
··· 8 8 obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += report_platform.o 9 9 obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += mrc.o 10 10 endif 11 + 12 + ifdef CONFIG_INTEL_CAR_CQOS 13 + obj-$(CONFIG_TPL_BUILD) += car2.o 14 + ifndef CONFIG_SPL_BUILD 15 + obj-y += car2_uninit.o 16 + endif 17 + endif 18 + 11 19 obj-y += cpu.o 12 20 obj-y += fast_spi.o 13 21 obj-y += lpc.o
+448
arch/x86/cpu/intel_common/car2.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * This file was modified from the coreboot version. 4 + * 5 + * Copyright (C) 2015-2016 Intel Corp. 6 + */ 7 + 8 + #include <config.h> 9 + #include <asm/msr-index.h> 10 + #include <asm/mtrr.h> 11 + #include <asm/post.h> 12 + #include <asm/processor.h> 13 + #include <asm/processor-flags.h> 14 + 15 + #define KiB 1024 16 + 17 + #define IS_POWER_OF_2(x) (!((x) & ((x) - 1))) 18 + 19 + .global car_init 20 + car_init: 21 + post_code(POST_CAR_START) 22 + 23 + /* 24 + * Use the MTRR default type MSR as a proxy for detecting INIT#. 25 + * Reset the system if any known bits are set in that MSR. That is 26 + * an indication of the CPU not being properly reset. 27 + */ 28 + check_for_clean_reset: 29 + mov $MTRR_DEF_TYPE_MSR, %ecx 30 + rdmsr 31 + and $(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax 32 + cmp $0, %eax 33 + jz no_reset 34 + /* perform warm reset */ 35 + movw $IO_PORT_RESET, %dx 36 + movb $(SYS_RST | RST_CPU), %al 37 + outb %al, %dx 38 + 39 + no_reset: 40 + post_code(POST_CAR_SIPI) 41 + 42 + /* Clear/disable fixed MTRRs */ 43 + mov $fixed_mtrr_list_size, %ebx 44 + xor %eax, %eax 45 + xor %edx, %edx 46 + 47 + clear_fixed_mtrr: 48 + add $-2, %ebx 49 + movzwl fixed_mtrr_list(%ebx), %ecx 50 + wrmsr 51 + jnz clear_fixed_mtrr 52 + 53 + post_code(POST_CAR_MTRR) 54 + 55 + /* Figure put how many MTRRs we have, and clear them out */ 56 + mov $MTRR_CAP_MSR, %ecx 57 + rdmsr 58 + movzb %al, %ebx /* Number of variable MTRRs */ 59 + mov $MTRR_PHYS_BASE_MSR(0), %ecx 60 + xor %eax, %eax 61 + xor %edx, %edx 62 + 63 + clear_var_mtrr: 64 + wrmsr 65 + inc %ecx 66 + wrmsr 67 + inc %ecx 68 + dec %ebx 69 + jnz clear_var_mtrr 70 + 71 + post_code(POST_CAR_UNCACHEABLE) 72 + 73 + /* Configure default memory type to uncacheable (UC) */ 74 + mov $MTRR_DEF_TYPE_MSR, %ecx 75 + rdmsr 76 + /* Clear enable bits and set default type to UC */ 77 + and $~(MTRR_DEF_TYPE_MASK | MTRR_DEF_TYPE_EN | \ 78 + MTRR_DEF_TYPE_FIX_EN), %eax 79 + wrmsr 80 + 81 + /* 82 + * Configure MTRR_PHYS_MASK_HIGH for proper addressing above 4GB 83 + * based on the physical address size supported for this processor 84 + * This is based on read from CPUID EAX = 080000008h, EAX bits [7:0] 85 + * 86 + * Examples: 87 + * MTRR_PHYS_MASK_HIGH = 00000000Fh For 36 bit addressing 88 + * MTRR_PHYS_MASK_HIGH = 0000000FFh For 40 bit addressing 89 + */ 90 + 91 + movl $0x80000008, %eax /* Address sizes leaf */ 92 + cpuid 93 + sub $32, %al 94 + movzx %al, %eax 95 + xorl %esi, %esi 96 + bts %eax, %esi 97 + dec %esi /* esi <- MTRR_PHYS_MASK_HIGH */ 98 + 99 + post_code(POST_CAR_BASE_ADDRESS) 100 + 101 + #if IS_POWER_OF_2(CONFIG_DCACHE_RAM_SIZE) 102 + /* Configure CAR region as write-back (WB) */ 103 + mov $MTRR_PHYS_BASE_MSR(0), %ecx 104 + mov $CONFIG_DCACHE_RAM_BASE, %eax 105 + or $MTRR_TYPE_WRBACK, %eax 106 + xor %edx,%edx 107 + wrmsr 108 + 109 + /* Configure the MTRR mask for the size region */ 110 + mov $MTRR_PHYS_MASK(0), %ecx 111 + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* size mask */ 112 + dec %eax 113 + not %eax 114 + or $MTRR_PHYS_MASK_VALID, %eax 115 + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ 116 + wrmsr 117 + #elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */ 118 + /* Configure CAR region as write-back (WB) */ 119 + mov $MTRR_PHYS_BASE_MSR(0), %ecx 120 + mov $CONFIG_DCACHE_RAM_BASE, %eax 121 + or $MTRR_TYPE_WRBACK, %eax 122 + xor %edx,%edx 123 + wrmsr 124 + 125 + mov $MTRR_PHYS_MASK_MSR(0), %ecx 126 + mov $(512 * KiB), %eax /* size mask */ 127 + dec %eax 128 + not %eax 129 + or $MTRR_PHYS_MASK_VALID, %eax 130 + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ 131 + wrmsr 132 + 133 + mov $MTRR_PHYS_BASE_MSR(1), %ecx 134 + mov $(CONFIG_DCACHE_RAM_BASE + 512 * KiB), %eax 135 + or $MTRR_TYPE_WRBACK, %eax 136 + xor %edx,%edx 137 + wrmsr 138 + 139 + mov $MTRR_PHYS_MASK_MSR(1), %ecx 140 + mov $(256 * KiB), %eax /* size mask */ 141 + dec %eax 142 + not %eax 143 + or $MTRR_PHYS_MASK_VALID, %eax 144 + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ 145 + wrmsr 146 + #else 147 + #error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing" 148 + #endif 149 + post_code(POST_CAR_FILL) 150 + 151 + /* Enable variable MTRRs */ 152 + mov $MTRR_DEF_TYPE_MSR, %ecx 153 + rdmsr 154 + or $MTRR_DEF_TYPE_EN, %eax 155 + wrmsr 156 + 157 + /* Enable caching */ 158 + mov %cr0, %eax 159 + and $~(X86_CR0_CD | X86_CR0_NW), %eax 160 + invd 161 + mov %eax, %cr0 162 + 163 + #if IS_ENABLED(CONFIG_INTEL_CAR_NEM) 164 + jmp car_nem 165 + #elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) 166 + jmp car_cqos 167 + #elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) 168 + jmp car_nem_enhanced 169 + #else 170 + #error "No CAR mechanism selected: 171 + #endif 172 + jmp car_init_ret 173 + 174 + fixed_mtrr_list: 175 + .word MTRR_FIX_64K_00000_MSR 176 + .word MTRR_FIX_16K_80000_MSR 177 + .word MTRR_FIX_16K_A0000_MSR 178 + .word MTRR_FIX_4K_C0000_MSR 179 + .word MTRR_FIX_4K_C8000_MSR 180 + .word MTRR_FIX_4K_D0000_MSR 181 + .word MTRR_FIX_4K_D8000_MSR 182 + .word MTRR_FIX_4K_E0000_MSR 183 + .word MTRR_FIX_4K_E8000_MSR 184 + .word MTRR_FIX_4K_F0000_MSR 185 + .word MTRR_FIX_4K_F8000_MSR 186 + fixed_mtrr_list_size = . - fixed_mtrr_list 187 + 188 + #if IS_ENABLED(CONFIG_INTEL_CAR_NEM) 189 + .global car_nem 190 + car_nem: 191 + /* Disable cache eviction (setup stage) */ 192 + mov $MSR_EVICT_CTL, %ecx 193 + rdmsr 194 + or $0x1, %eax 195 + wrmsr 196 + 197 + post_code(0x26) 198 + 199 + /* Clear the cache memory region. This will also fill up the cache */ 200 + movl $CONFIG_DCACHE_RAM_BASE, %edi 201 + movl $CONFIG_DCACHE_RAM_SIZE, %ecx 202 + shr $0x02, %ecx 203 + xor %eax, %eax 204 + cld 205 + rep stosl 206 + 207 + post_code(0x27) 208 + 209 + /* Disable cache eviction (run stage) */ 210 + mov $MSR_EVICT_CTL, %ecx 211 + rdmsr 212 + or $0x2, %eax 213 + wrmsr 214 + 215 + post_code(0x28) 216 + 217 + jmp car_init_ret 218 + 219 + #elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) 220 + .global car_cqos 221 + car_cqos: 222 + /* 223 + * Create CBM_LEN_MASK based on CBM_LEN 224 + * Get CPUID.(EAX=10H, ECX=2H):EAX.CBM_LEN[bits 4:0] 225 + */ 226 + mov $0x10, %eax 227 + mov $0x2, %ecx 228 + cpuid 229 + and $0x1f, %eax 230 + add $1, %al 231 + 232 + mov $1, %ebx 233 + mov %al, %cl 234 + shl %cl, %ebx 235 + sub $1, %ebx 236 + 237 + /* Store the CBM_LEN_MASK in mm3 for later use */ 238 + movd %ebx, %mm3 239 + 240 + /* 241 + * Disable both L1 and L2 prefetcher. For yet-to-understood reason, 242 + * prefetchers slow down filling cache with rep stos in CQOS mode. 243 + */ 244 + mov $MSR_PREFETCH_CTL, %ecx 245 + rdmsr 246 + or $(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax 247 + wrmsr 248 + 249 + #if (CONFIG_DCACHE_RAM_SIZE == CONFIG_L2_CACHE_SIZE) 250 + /* 251 + * If CAR size is set to full L2 size, mask is calculated as all-zeros. 252 + * This is not supported by the CPU/uCode. 253 + */ 254 + #error "CQOS CAR may not use whole L2 cache area" 255 + #endif 256 + 257 + /* Calculate how many bits to be used for CAR */ 258 + xor %edx, %edx 259 + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* dividend */ 260 + mov $CONFIG_CACHE_QOS_SIZE_PER_BIT, %ecx /* divisor */ 261 + div %ecx /* result is in eax */ 262 + mov %eax, %ecx /* save to ecx */ 263 + mov $1, %ebx 264 + shl %cl, %ebx 265 + sub $1, %ebx /* resulting mask is is in ebx */ 266 + 267 + /* Set this mask for initial cache fill */ 268 + mov $MSR_L2_QOS_MASK(0), %ecx 269 + rdmsr 270 + mov %ebx, %eax 271 + wrmsr 272 + 273 + /* Set CLOS selector to 0 */ 274 + mov $MSR_IA32_PQR_ASSOC, %ecx 275 + rdmsr 276 + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* select mask 0 */ 277 + wrmsr 278 + 279 + /* We will need to block CAR region from evicts */ 280 + mov $MSR_L2_QOS_MASK(1), %ecx 281 + rdmsr 282 + /* Invert bits that are to be used for cache */ 283 + mov %ebx, %eax 284 + xor $~0, %eax /* invert 32 bits */ 285 + 286 + /* 287 + * Use CBM_LEN_MASK stored in mm3 to set bits based on Capacity Bit 288 + * Mask Length. 289 + */ 290 + movd %mm3, %ebx 291 + and %ebx, %eax 292 + wrmsr 293 + 294 + post_code(0x26) 295 + 296 + /* Clear the cache memory region. This will also fill up the cache */ 297 + movl $CONFIG_DCACHE_RAM_BASE, %edi 298 + movl $CONFIG_DCACHE_RAM_SIZE, %ecx 299 + shr $0x02, %ecx 300 + xor %eax, %eax 301 + cld 302 + rep stosl 303 + 304 + post_code(0x27) 305 + 306 + /* Cache is populated. Use mask 1 that will block evicts */ 307 + mov $MSR_IA32_PQR_ASSOC, %ecx 308 + rdmsr 309 + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* clear index bits first */ 310 + or $1, %edx /* select mask 1 */ 311 + wrmsr 312 + 313 + /* Enable prefetchers */ 314 + mov $MSR_PREFETCH_CTL, %ecx 315 + rdmsr 316 + and $~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax 317 + wrmsr 318 + 319 + post_code(0x28) 320 + 321 + jmp car_init_ret 322 + 323 + #elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) 324 + .global car_nem_enhanced 325 + car_nem_enhanced: 326 + /* Disable cache eviction (setup stage) */ 327 + mov $MSR_EVICT_CTL, %ecx 328 + rdmsr 329 + or $0x1, %eax 330 + wrmsr 331 + post_code(0x26) 332 + 333 + /* Create n-way set associativity of cache */ 334 + xorl %edi, %edi 335 + find_llc_subleaf: 336 + movl %edi, %ecx 337 + movl $0x04, %eax 338 + cpuid 339 + inc %edi 340 + and $0xe0, %al /* EAX[7:5] = Cache Level */ 341 + cmp $0x60, %al /* Check to see if it is LLC */ 342 + jnz find_llc_subleaf 343 + 344 + /* 345 + * Set MSR 0xC91 IA32_L3_MASK_! = 0xE/0xFE/0xFFE/0xFFFE 346 + * for 4/8/16 way of LLC 347 + */ 348 + shr $22, %ebx 349 + inc %ebx 350 + /* Calculate n-way associativity of LLC */ 351 + mov %bl, %cl 352 + 353 + /* 354 + * Maximizing RO cacheability while locking in the CAR to a 355 + * single way since that particular way won't be victim candidate 356 + * for evictions. 357 + * This has been done after programing LLC_WAY_MASK_1 MSR 358 + * with desired LLC way as mentioned below. 359 + * 360 + * Hence create Code and Data Size as per request 361 + * Code Size (RO) : Up to 16M 362 + * Data Size (RW) : Up to 256K 363 + */ 364 + movl $0x01, %eax 365 + /* 366 + * LLC Ways -> LLC_WAY_MASK_1: 367 + * 4: 0x000E 368 + * 8: 0x00FE 369 + * 12: 0x0FFE 370 + * 16: 0xFFFE 371 + * 372 + * These MSRs contain one bit per each way of LLC 373 + * - If this bit is '0' - the way is protected from eviction 374 + * - If this bit is '1' - the way is not protected from eviction 375 + */ 376 + shl %cl, %eax 377 + subl $0x02, %eax 378 + movl $MSR_IA32_L3_MASK_1, %ecx 379 + xorl %edx, %edx 380 + wrmsr 381 + /* 382 + * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1 383 + * 384 + * For SKL SOC, data size remains 256K consistently. 385 + * Hence, creating 1-way associative cache for Data 386 + */ 387 + mov $MSR_IA32_L3_MASK_2, %ecx 388 + mov $0x01, %eax 389 + xorl %edx, %edx 390 + wrmsr 391 + /* 392 + * Set MSR_IA32_PQR_ASSOC = 0x02 393 + * 394 + * Possible values: 395 + * 0: Default value, no way mask should be applied 396 + * 1: Apply way mask 1 to LLC 397 + * 2: Apply way mask 2 to LLC 398 + * 3: Shouldn't be use in NEM Mode 399 + */ 400 + movl $MSR_IA32_PQR_ASSOC, %ecx 401 + movl $0x02, %eax 402 + xorl %edx, %edx 403 + wrmsr 404 + 405 + movl $CONFIG_DCACHE_RAM_BASE, %edi 406 + movl $CONFIG_DCACHE_RAM_SIZE, %ecx 407 + shr $0x02, %ecx 408 + xor %eax, %eax 409 + cld 410 + rep stosl 411 + /* 412 + * Set MSR_IA32_PQR_ASSOC = 0x01 413 + * At this stage we apply LLC_WAY_MASK_1 to the cache. 414 + * i.e. way 0 is protected from eviction. 415 + */ 416 + movl $MSR_IA32_PQR_ASSOC, %ecx 417 + movl $0x01, %eax 418 + xorl %edx, %edx 419 + wrmsr 420 + 421 + post_code(0x27) 422 + /* 423 + * Enable No-Eviction Mode Run State by setting 424 + * NO_EVICT_MODE MSR 2E0h bit [1] = '1'. 425 + */ 426 + 427 + movl $MSR_EVICT_CTL, %ecx 428 + rdmsr 429 + orl $0x02, %eax 430 + wrmsr 431 + 432 + post_code(0x28) 433 + 434 + jmp car_init_ret 435 + #endif 436 + 437 + #if CONFIG_IS_ENABLED(X86_16BIT_INIT) 438 + _dt_ucode_base_size: 439 + /* These next two fields are filled in by binman */ 440 + .globl ucode_base 441 + ucode_base: /* Declared in microcode.h */ 442 + .long 0 /* microcode base */ 443 + .globl ucode_size 444 + ucode_size: /* Declared in microcode.h */ 445 + .long 0 /* microcode size */ 446 + .long CONFIG_SYS_MONITOR_BASE /* code region base */ 447 + .long CONFIG_SYS_MONITOR_LEN /* code region size */ 448 + #endif
+87
arch/x86/cpu/intel_common/car2_uninit.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright 2017 Intel Corp. 4 + * Copyright 2019 Google LLC 5 + * Taken from coreboot file exit_car.S 6 + */ 7 + 8 + #include <config.h> 9 + #include <asm/msr-index.h> 10 + #include <asm/mtrr.h> 11 + 12 + .text 13 + .global car_uninit 14 + car_uninit: 15 + 16 + /* 17 + * Retrieve return address from stack as it will get trashed below if 18 + * execution is utilizing the cache-as-ram stack. 19 + */ 20 + pop %ebx 21 + 22 + /* Disable MTRRs */ 23 + mov $(MTRR_DEF_TYPE_MSR), %ecx 24 + rdmsr 25 + and $(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax 26 + wrmsr 27 + 28 + #ifdef CONFIG_INTEL_CAR_NEM 29 + .global car_nem_teardown 30 + car_nem_teardown: 31 + 32 + /* invalidate cache contents */ 33 + invd 34 + 35 + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ 36 + mov $(MSR_EVICT_CTL), %ecx 37 + rdmsr 38 + and $(~(1 << 1)), %eax 39 + wrmsr 40 + and $(~(1 << 0)), %eax 41 + wrmsr 42 + 43 + #elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) 44 + .global car_cqos_teardown 45 + car_cqos_teardown: 46 + 47 + /* Go back to all-evicting mode, set both masks to all-1s */ 48 + mov $MSR_L2_QOS_MASK(0), %ecx 49 + rdmsr 50 + mov $~0, %al 51 + wrmsr 52 + 53 + mov $MSR_L2_QOS_MASK(1), %ecx 54 + rdmsr 55 + mov $~0, %al 56 + wrmsr 57 + 58 + /* Reset CLOS selector to 0 */ 59 + mov $MSR_IA32_PQR_ASSOC, %ecx 60 + rdmsr 61 + and $~MSR_IA32_PQR_ASSOC_MASK, %edx 62 + wrmsr 63 + 64 + #elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) 65 + .global car_nem_enhanced_teardown 66 + car_nem_enhanced_teardown: 67 + 68 + /* invalidate cache contents */ 69 + invd 70 + 71 + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ 72 + mov $(MSR_EVICT_CTL), %ecx 73 + rdmsr 74 + and $(~(1 << 1)), %eax 75 + wrmsr 76 + and $(~(1 << 0)), %eax 77 + wrmsr 78 + 79 + /* Reset CLOS selector to 0 */ 80 + mov $IA32_PQR_ASSOC, %ecx 81 + rdmsr 82 + and $~IA32_PQR_ASSOC_MASK, %edx 83 + wrmsr 84 + #endif 85 + 86 + /* Return to caller */ 87 + jmp *%ebx
+5 -7
arch/x86/include/asm/processor.h
··· 25 25 /* Length of the public header on Intel microcode blobs */ 26 26 #define UCODE_HEADER_LEN 0x30 27 27 28 - #ifndef __ASSEMBLY__ 29 - 30 28 /* 31 29 * This register is documented in (for example) the Intel Atom Processor E3800 32 30 * Product Family Datasheet in "PCU - Power Management Controller (PMC)". ··· 37 35 */ 38 36 #define IO_PORT_RESET 0xcf9 39 37 40 - enum { 41 - SYS_RST = 1 << 1, /* 0 for soft reset, 1 for hard reset */ 42 - RST_CPU = 1 << 2, /* initiate reset */ 43 - FULL_RST = 1 << 3, /* full power cycle */ 44 - }; 38 + #define SYS_RST (1 << 1) /* 0 for soft reset, 1 for hard reset */ 39 + #define RST_CPU (1 << 2) /* initiate reset */ 40 + #define FULL_RST (1 << 3) /* full power cycle */ 41 + 42 + #ifndef __ASSEMBLY__ 45 43 46 44 static inline __attribute__((always_inline)) void cpu_hlt(void) 47 45 {