Merge tag 'x86_microcode_for_v6.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+10

arch/x86/include/asm/msr-index.h

··· 166 166 * Processor MMIO stale data 167 167 * vulnerabilities. 168 168 */ 169 + #define ARCH_CAP_MCU_ENUM BIT(16) /* 170 + * Indicates the presence of microcode update 171 + * feature enumeration and status information. 172 + */ 169 173 #define ARCH_CAP_FB_CLEAR BIT(17) /* 170 174 * VERW clears CPU fill buffer 171 175 * even on MDS_NO CPUs. ··· 953 949 #define MSR_IA32_APICBASE_BASE (0xfffff<<12) 954 950 955 951 #define MSR_IA32_UCODE_WRITE 0x00000079 952 + 953 + #define MSR_IA32_MCU_ENUMERATION 0x0000007b 954 + #define MCU_STAGING BIT(4) 955 + 956 956 #define MSR_IA32_UCODE_REV 0x0000008b 957 957 958 958 /* Intel SGX Launch Enclave Public Key Hash MSRs */ ··· 1253 1245 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 1254 1246 #define MSR_IA32_VMX_VMFUNC 0x00000491 1255 1247 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 1248 + 1249 + #define MSR_IA32_MCU_STAGING_MBOX_ADDR 0x000007a5 1256 1250 1257 1251 /* Resctrl MSRs: */ 1258 1252 /* - Intel: */

+6 -6

arch/x86/include/asm/topology.h

··· 218 218 return __amd_nodes_per_pkg; 219 219 } 220 220 221 + #else /* CONFIG_SMP */ 222 + static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } 223 + static inline int topology_max_smt_threads(void) { return 1; } 224 + static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } 225 + #endif /* !CONFIG_SMP */ 226 + 221 227 extern struct cpumask __cpu_primary_thread_mask; 222 228 #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) 223 229 ··· 246 240 return pcpu >= 0 ? cpu_online(pcpu) : false; 247 241 } 248 242 #define topology_is_core_online topology_is_core_online 249 - 250 - #else /* CONFIG_SMP */ 251 - static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } 252 - static inline int topology_max_smt_threads(void) { return 1; } 253 - static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; } 254 - #endif /* !CONFIG_SMP */ 255 243 256 244 static inline void arch_fix_phys_package_id(int num, u32 slot) 257 245 {

+72 -41

arch/x86/kernel/cpu/microcode/amd.c

··· 186 186 return p.ucode_rev; 187 187 } 188 188 189 + static u32 get_cutoff_revision(u32 rev) 190 + { 191 + switch (rev >> 8) { 192 + case 0x80012: return 0x8001277; break; 193 + case 0x80082: return 0x800820f; break; 194 + case 0x83010: return 0x830107c; break; 195 + case 0x86001: return 0x860010e; break; 196 + case 0x86081: return 0x8608108; break; 197 + case 0x87010: return 0x8701034; break; 198 + case 0x8a000: return 0x8a0000a; break; 199 + case 0xa0010: return 0xa00107a; break; 200 + case 0xa0011: return 0xa0011da; break; 201 + case 0xa0012: return 0xa001243; break; 202 + case 0xa0082: return 0xa00820e; break; 203 + case 0xa1011: return 0xa101153; break; 204 + case 0xa1012: return 0xa10124e; break; 205 + case 0xa1081: return 0xa108109; break; 206 + case 0xa2010: return 0xa20102f; break; 207 + case 0xa2012: return 0xa201212; break; 208 + case 0xa4041: return 0xa404109; break; 209 + case 0xa5000: return 0xa500013; break; 210 + case 0xa6012: return 0xa60120a; break; 211 + case 0xa7041: return 0xa704109; break; 212 + case 0xa7052: return 0xa705208; break; 213 + case 0xa7080: return 0xa708009; break; 214 + case 0xa70c0: return 0xa70C009; break; 215 + case 0xaa001: return 0xaa00116; break; 216 + case 0xaa002: return 0xaa00218; break; 217 + case 0xb0021: return 0xb002146; break; 218 + case 0xb0081: return 0xb008111; break; 219 + case 0xb1010: return 0xb101046; break; 220 + case 0xb2040: return 0xb204031; break; 221 + case 0xb4040: return 0xb404031; break; 222 + case 0xb4041: return 0xb404101; break; 223 + case 0xb6000: return 0xb600031; break; 224 + case 0xb6080: return 0xb608031; break; 225 + case 0xb7000: return 0xb700031; break; 226 + default: break; 227 + 228 + } 229 + return 0; 230 + } 231 + 189 232 static bool need_sha_check(u32 cur_rev) 190 233 { 234 + u32 cutoff; 235 + 191 236 if (!cur_rev) { 192 237 cur_rev = cpuid_to_ucode_rev(bsp_cpuid_1_eax); 193 238 pr_info_once("No current revision, generating the lowest one: 0x%x\n", cur_rev); 194 239 } 195 240 196 - switch (cur_rev >> 8) { 197 - case 0x80012: return cur_rev <= 0x8001277; break; 198 - case 0x80082: return cur_rev <= 0x800820f; break; 199 - case 0x83010: return cur_rev <= 0x830107c; break; 200 - case 0x86001: return cur_rev <= 0x860010e; break; 201 - case 0x86081: return cur_rev <= 0x8608108; break; 202 - case 0x87010: return cur_rev <= 0x8701034; break; 203 - case 0x8a000: return cur_rev <= 0x8a0000a; break; 204 - case 0xa0010: return cur_rev <= 0xa00107a; break; 205 - case 0xa0011: return cur_rev <= 0xa0011da; break; 206 - case 0xa0012: return cur_rev <= 0xa001243; break; 207 - case 0xa0082: return cur_rev <= 0xa00820e; break; 208 - case 0xa1011: return cur_rev <= 0xa101153; break; 209 - case 0xa1012: return cur_rev <= 0xa10124e; break; 210 - case 0xa1081: return cur_rev <= 0xa108109; break; 211 - case 0xa2010: return cur_rev <= 0xa20102f; break; 212 - case 0xa2012: return cur_rev <= 0xa201212; break; 213 - case 0xa4041: return cur_rev <= 0xa404109; break; 214 - case 0xa5000: return cur_rev <= 0xa500013; break; 215 - case 0xa6012: return cur_rev <= 0xa60120a; break; 216 - case 0xa7041: return cur_rev <= 0xa704109; break; 217 - case 0xa7052: return cur_rev <= 0xa705208; break; 218 - case 0xa7080: return cur_rev <= 0xa708009; break; 219 - case 0xa70c0: return cur_rev <= 0xa70C009; break; 220 - case 0xaa001: return cur_rev <= 0xaa00116; break; 221 - case 0xaa002: return cur_rev <= 0xaa00218; break; 222 - case 0xb0021: return cur_rev <= 0xb002146; break; 223 - case 0xb0081: return cur_rev <= 0xb008111; break; 224 - case 0xb1010: return cur_rev <= 0xb101046; break; 225 - case 0xb2040: return cur_rev <= 0xb204031; break; 226 - case 0xb4040: return cur_rev <= 0xb404031; break; 227 - case 0xb4041: return cur_rev <= 0xb404101; break; 228 - case 0xb6000: return cur_rev <= 0xb600031; break; 229 - case 0xb6080: return cur_rev <= 0xb608031; break; 230 - case 0xb7000: return cur_rev <= 0xb700031; break; 231 - default: break; 232 - } 241 + cutoff = get_cutoff_revision(cur_rev); 242 + if (cutoff) 243 + return cur_rev <= cutoff; 233 244 234 245 pr_info("You should not be seeing this. Please send the following couple of lines to x86-<at>-kernel.org\n"); 235 246 pr_info("CPUID(1).EAX: 0x%x, current revision: 0x%x\n", bsp_cpuid_1_eax, cur_rev); ··· 505 494 { 506 495 u8 family = x86_family(bsp_cpuid_1_eax); 507 496 struct microcode_header_amd *mc_hdr; 497 + u32 cur_rev, cutoff, patch_rev; 508 498 u32 sh_psize; 509 499 u16 proc_id; 510 500 u8 patch_fam; ··· 545 533 proc_id = mc_hdr->processor_rev_id; 546 534 patch_fam = 0xf + (proc_id >> 12); 547 535 548 - ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); 549 - 550 536 if (patch_fam != family) 551 537 return 1; 538 + 539 + cur_rev = get_patch_level(); 540 + 541 + /* No cutoff revision means old/unaffected by signing algorithm weakness => matches */ 542 + cutoff = get_cutoff_revision(cur_rev); 543 + if (!cutoff) 544 + goto ok; 545 + 546 + patch_rev = mc_hdr->patch_id; 547 + 548 + ucode_dbg("cur_rev: 0x%x, cutoff: 0x%x, patch_rev: 0x%x\n", 549 + cur_rev, cutoff, patch_rev); 550 + 551 + if (cur_rev <= cutoff && patch_rev <= cutoff) 552 + goto ok; 553 + 554 + if (cur_rev > cutoff && patch_rev > cutoff) 555 + goto ok; 556 + 557 + return 1; 558 + 559 + ok: 560 + ucode_dbg("Patch-ID 0x%08x: family: 0x%x\n", mc_hdr->patch_id, patch_fam); 552 561 553 562 return 0; 554 563 } ··· 638 605 } 639 606 640 607 mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE); 641 - 642 - ucode_dbg("patch_id: 0x%x\n", mc->hdr.patch_id); 643 608 644 609 if (mc_patch_matches(mc, eq_id)) { 645 610 desc->psize = patch_size;

+12 -1

arch/x86/kernel/cpu/microcode/core.c

··· 136 136 return dis_ucode_ldr; 137 137 } 138 138 139 - static void early_parse_cmdline(void) 139 + static void __init early_parse_cmdline(void) 140 140 { 141 141 char cmd_buf[64] = {}; 142 142 char *s, *p = cmd_buf; ··· 588 588 pr_err("Late microcode loading without minimal revision check.\n"); 589 589 pr_err("You should switch to early loading, if possible.\n"); 590 590 } 591 + 592 + /* 593 + * Pre-load the microcode image into a staging device. This 594 + * process is preemptible and does not require stopping CPUs. 595 + * Successful staging simplifies the subsequent late-loading 596 + * process, reducing rendezvous time. 597 + * 598 + * Even if the transfer fails, the update will proceed as usual. 599 + */ 600 + if (microcode_ops->use_staging) 601 + microcode_ops->stage_microcode(); 591 602 592 603 atomic_set(&late_cpus_in, num_online_cpus()); 593 604 atomic_set(&offline_in_nmi, 0);

+362

arch/x86/kernel/cpu/microcode/intel.c

··· 13 13 #define pr_fmt(fmt) "microcode: " fmt 14 14 #include <linux/earlycpio.h> 15 15 #include <linux/firmware.h> 16 + #include <linux/pci_ids.h> 16 17 #include <linux/uaccess.h> 17 18 #include <linux/initrd.h> 18 19 #include <linux/kernel.h> 20 + #include <linux/delay.h> 19 21 #include <linux/slab.h> 20 22 #include <linux/cpu.h> 21 23 #include <linux/uio.h> 24 + #include <linux/io.h> 22 25 #include <linux/mm.h> 23 26 24 27 #include <asm/cpu_device_id.h> ··· 35 32 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; 36 33 37 34 #define UCODE_BSP_LOADED ((struct microcode_intel *)0x1UL) 35 + 36 + /* Defines for the microcode staging mailbox interface */ 37 + #define MBOX_REG_NUM 4 38 + #define MBOX_REG_SIZE sizeof(u32) 39 + 40 + #define MBOX_CONTROL_OFFSET 0x0 41 + #define MBOX_STATUS_OFFSET 0x4 42 + #define MBOX_WRDATA_OFFSET 0x8 43 + #define MBOX_RDDATA_OFFSET 0xc 44 + 45 + #define MASK_MBOX_CTRL_ABORT BIT(0) 46 + #define MASK_MBOX_CTRL_GO BIT(31) 47 + 48 + #define MASK_MBOX_STATUS_ERROR BIT(2) 49 + #define MASK_MBOX_STATUS_READY BIT(31) 50 + 51 + #define MASK_MBOX_RESP_SUCCESS BIT(0) 52 + #define MASK_MBOX_RESP_PROGRESS BIT(1) 53 + #define MASK_MBOX_RESP_ERROR BIT(2) 54 + 55 + #define MBOX_CMD_LOAD 0x3 56 + #define MBOX_OBJ_STAGING 0xb 57 + #define MBOX_HEADER(size) ((PCI_VENDOR_ID_INTEL) | \ 58 + (MBOX_OBJ_STAGING << 16) | \ 59 + ((u64)((size) / sizeof(u32)) << 32)) 60 + 61 + /* The size of each mailbox header */ 62 + #define MBOX_HEADER_SIZE sizeof(u64) 63 + /* The size of staging hardware response */ 64 + #define MBOX_RESPONSE_SIZE sizeof(u64) 65 + 66 + #define MBOX_XACTION_TIMEOUT_MS (10 * MSEC_PER_SEC) 38 67 39 68 /* Current microcode patch used in early patching on the APs. */ 40 69 static struct microcode_intel *ucode_patch_va __read_mostly; ··· 87 52 unsigned int cksum; 88 53 unsigned int reserved[3]; 89 54 struct extended_signature sigs[]; 55 + }; 56 + 57 + /** 58 + * struct staging_state - Track the current staging process state 59 + * 60 + * @mmio_base: MMIO base address for staging 61 + * @ucode_len: Total size of the microcode image 62 + * @chunk_size: Size of each data piece 63 + * @bytes_sent: Total bytes transmitted so far 64 + * @offset: Current offset in the microcode image 65 + */ 66 + struct staging_state { 67 + void __iomem *mmio_base; 68 + unsigned int ucode_len; 69 + unsigned int chunk_size; 70 + unsigned int bytes_sent; 71 + unsigned int offset; 90 72 }; 91 73 92 74 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) ··· 349 297 } 350 298 351 299 return size ? NULL : patch; 300 + } 301 + 302 + static inline u32 read_mbox_dword(void __iomem *mmio_base) 303 + { 304 + u32 dword = readl(mmio_base + MBOX_RDDATA_OFFSET); 305 + 306 + /* Acknowledge read completion to the staging hardware */ 307 + writel(0, mmio_base + MBOX_RDDATA_OFFSET); 308 + return dword; 309 + } 310 + 311 + static inline void write_mbox_dword(void __iomem *mmio_base, u32 dword) 312 + { 313 + writel(dword, mmio_base + MBOX_WRDATA_OFFSET); 314 + } 315 + 316 + static inline u64 read_mbox_header(void __iomem *mmio_base) 317 + { 318 + u32 high, low; 319 + 320 + low = read_mbox_dword(mmio_base); 321 + high = read_mbox_dword(mmio_base); 322 + 323 + return ((u64)high << 32) | low; 324 + } 325 + 326 + static inline void write_mbox_header(void __iomem *mmio_base, u64 value) 327 + { 328 + write_mbox_dword(mmio_base, value); 329 + write_mbox_dword(mmio_base, value >> 32); 330 + } 331 + 332 + static void write_mbox_data(void __iomem *mmio_base, u32 *chunk, unsigned int chunk_bytes) 333 + { 334 + int i; 335 + 336 + /* 337 + * The MMIO space is mapped as Uncached (UC). Each write arrives 338 + * at the device as an individual transaction in program order. 339 + * The device can then reassemble the sequence accordingly. 340 + */ 341 + for (i = 0; i < chunk_bytes / sizeof(u32); i++) 342 + write_mbox_dword(mmio_base, chunk[i]); 343 + } 344 + 345 + /* 346 + * Prepare for a new microcode transfer: reset hardware and record the 347 + * image size. 348 + */ 349 + static void init_stage(struct staging_state *ss) 350 + { 351 + ss->ucode_len = get_totalsize(&ucode_patch_late->hdr); 352 + 353 + /* 354 + * Abort any ongoing process, effectively resetting the device. 355 + * Unlike regular mailbox data processing requests, this 356 + * operation does not require a status check. 357 + */ 358 + writel(MASK_MBOX_CTRL_ABORT, ss->mmio_base + MBOX_CONTROL_OFFSET); 359 + } 360 + 361 + /* 362 + * Update the chunk size and decide whether another chunk can be sent. 363 + * This accounts for remaining data and retry limits. 364 + */ 365 + static bool can_send_next_chunk(struct staging_state *ss, int *err) 366 + { 367 + /* A page size or remaining bytes if this is the final chunk */ 368 + ss->chunk_size = min(PAGE_SIZE, ss->ucode_len - ss->offset); 369 + 370 + /* 371 + * Each microcode image is divided into chunks, each at most 372 + * one page size. A 10-chunk image would typically require 10 373 + * transactions. 374 + * 375 + * However, the hardware managing the mailbox has limited 376 + * resources and may not cache the entire image, potentially 377 + * requesting the same chunk multiple times. 378 + * 379 + * To tolerate this behavior, allow up to twice the expected 380 + * number of transactions (i.e., a 10-chunk image can take up to 381 + * 20 attempts). 382 + * 383 + * If the number of attempts exceeds this limit, treat it as 384 + * exceeding the maximum allowed transfer size. 385 + */ 386 + if (ss->bytes_sent + ss->chunk_size > ss->ucode_len * 2) { 387 + *err = -EMSGSIZE; 388 + return false; 389 + } 390 + 391 + *err = 0; 392 + return true; 393 + } 394 + 395 + /* 396 + * The hardware indicates completion by returning a sentinel end offset. 397 + */ 398 + static inline bool is_end_offset(u32 offset) 399 + { 400 + return offset == UINT_MAX; 401 + } 402 + 403 + /* 404 + * Determine whether staging is complete: either the hardware signaled 405 + * the end offset, or no more transactions are permitted (retry limit 406 + * reached). 407 + */ 408 + static inline bool staging_is_complete(struct staging_state *ss, int *err) 409 + { 410 + return is_end_offset(ss->offset) || !can_send_next_chunk(ss, err); 411 + } 412 + 413 + /* 414 + * Wait for the hardware to complete a transaction. 415 + * Return 0 on success, or an error code on failure. 416 + */ 417 + static int wait_for_transaction(struct staging_state *ss) 418 + { 419 + u32 timeout, status; 420 + 421 + /* Allow time for hardware to complete the operation: */ 422 + for (timeout = 0; timeout < MBOX_XACTION_TIMEOUT_MS; timeout++) { 423 + msleep(1); 424 + 425 + status = readl(ss->mmio_base + MBOX_STATUS_OFFSET); 426 + /* Break out early if the hardware is ready: */ 427 + if (status & MASK_MBOX_STATUS_READY) 428 + break; 429 + } 430 + 431 + /* Check for explicit error response */ 432 + if (status & MASK_MBOX_STATUS_ERROR) 433 + return -EIO; 434 + 435 + /* 436 + * Hardware has neither responded to the action nor signaled any 437 + * error. Treat this as a timeout. 438 + */ 439 + if (!(status & MASK_MBOX_STATUS_READY)) 440 + return -ETIMEDOUT; 441 + 442 + return 0; 443 + } 444 + 445 + /* 446 + * Transmit a chunk of the microcode image to the hardware. 447 + * Return 0 on success, or an error code on failure. 448 + */ 449 + static int send_data_chunk(struct staging_state *ss, void *ucode_ptr) 450 + { 451 + u32 *src_chunk = ucode_ptr + ss->offset; 452 + u16 mbox_size; 453 + 454 + /* 455 + * Write a 'request' mailbox object in this order: 456 + * 1. Mailbox header includes total size 457 + * 2. Command header specifies the load operation 458 + * 3. Data section contains a microcode chunk 459 + * 460 + * Thus, the mailbox size is two headers plus the chunk size. 461 + */ 462 + mbox_size = MBOX_HEADER_SIZE * 2 + ss->chunk_size; 463 + write_mbox_header(ss->mmio_base, MBOX_HEADER(mbox_size)); 464 + write_mbox_header(ss->mmio_base, MBOX_CMD_LOAD); 465 + write_mbox_data(ss->mmio_base, src_chunk, ss->chunk_size); 466 + ss->bytes_sent += ss->chunk_size; 467 + 468 + /* Notify the hardware that the mailbox is ready for processing. */ 469 + writel(MASK_MBOX_CTRL_GO, ss->mmio_base + MBOX_CONTROL_OFFSET); 470 + 471 + return wait_for_transaction(ss); 472 + } 473 + 474 + /* 475 + * Retrieve the next offset from the hardware response. 476 + * Return 0 on success, or an error code on failure. 477 + */ 478 + static int fetch_next_offset(struct staging_state *ss) 479 + { 480 + const u64 expected_header = MBOX_HEADER(MBOX_HEADER_SIZE + MBOX_RESPONSE_SIZE); 481 + u32 offset, status; 482 + u64 header; 483 + 484 + /* 485 + * The 'response' mailbox returns three fields, in order: 486 + * 1. Header 487 + * 2. Next offset in the microcode image 488 + * 3. Status flags 489 + */ 490 + header = read_mbox_header(ss->mmio_base); 491 + offset = read_mbox_dword(ss->mmio_base); 492 + status = read_mbox_dword(ss->mmio_base); 493 + 494 + /* All valid responses must start with the expected header. */ 495 + if (header != expected_header) { 496 + pr_err_once("staging: invalid response header (0x%llx)\n", header); 497 + return -EBADR; 498 + } 499 + 500 + /* 501 + * Verify the offset: If not at the end marker, it must not 502 + * exceed the microcode image length. 503 + */ 504 + if (!is_end_offset(offset) && offset > ss->ucode_len) { 505 + pr_err_once("staging: invalid offset (%u) past the image end (%u)\n", 506 + offset, ss->ucode_len); 507 + return -EINVAL; 508 + } 509 + 510 + /* Hardware may report errors explicitly in the status field */ 511 + if (status & MASK_MBOX_RESP_ERROR) 512 + return -EPROTO; 513 + 514 + ss->offset = offset; 515 + return 0; 516 + } 517 + 518 + /* 519 + * Handle the staging process using the mailbox MMIO interface. The 520 + * microcode image is transferred in chunks until completion. 521 + * Return 0 on success or an error code on failure. 522 + */ 523 + static int do_stage(u64 mmio_pa) 524 + { 525 + struct staging_state ss = {}; 526 + int err; 527 + 528 + ss.mmio_base = ioremap(mmio_pa, MBOX_REG_NUM * MBOX_REG_SIZE); 529 + if (WARN_ON_ONCE(!ss.mmio_base)) 530 + return -EADDRNOTAVAIL; 531 + 532 + init_stage(&ss); 533 + 534 + /* Perform the staging process while within the retry limit */ 535 + while (!staging_is_complete(&ss, &err)) { 536 + /* Send a chunk of microcode each time: */ 537 + err = send_data_chunk(&ss, ucode_patch_late); 538 + if (err) 539 + break; 540 + /* 541 + * Then, ask the hardware which piece of the image it 542 + * needs next. The same piece may be sent more than once. 543 + */ 544 + err = fetch_next_offset(&ss); 545 + if (err) 546 + break; 547 + } 548 + 549 + iounmap(ss.mmio_base); 550 + 551 + return err; 552 + } 553 + 554 + static void stage_microcode(void) 555 + { 556 + unsigned int pkg_id = UINT_MAX; 557 + int cpu, err; 558 + u64 mmio_pa; 559 + 560 + if (!IS_ALIGNED(get_totalsize(&ucode_patch_late->hdr), sizeof(u32))) { 561 + pr_err("Microcode image 32-bit misaligned (0x%x), staging failed.\n", 562 + get_totalsize(&ucode_patch_late->hdr)); 563 + return; 564 + } 565 + 566 + lockdep_assert_cpus_held(); 567 + 568 + /* 569 + * The MMIO address is unique per package, and all the SMT 570 + * primary threads are online here. Find each MMIO space by 571 + * their package IDs to avoid duplicate staging. 572 + */ 573 + for_each_cpu(cpu, cpu_primary_thread_mask) { 574 + if (topology_logical_package_id(cpu) == pkg_id) 575 + continue; 576 + 577 + pkg_id = topology_logical_package_id(cpu); 578 + 579 + err = rdmsrq_on_cpu(cpu, MSR_IA32_MCU_STAGING_MBOX_ADDR, &mmio_pa); 580 + if (WARN_ON_ONCE(err)) 581 + return; 582 + 583 + err = do_stage(mmio_pa); 584 + if (err) { 585 + pr_err("Error: staging failed (%d) for CPU%d at package %u.\n", 586 + err, cpu, pkg_id); 587 + return; 588 + } 589 + } 590 + 591 + pr_info("Staging of patch revision 0x%x succeeded.\n", ucode_patch_late->hdr.rev); 352 592 } 353 593 354 594 static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, ··· 971 627 .collect_cpu_info = collect_cpu_info, 972 628 .apply_microcode = apply_microcode_late, 973 629 .finalize_late_load = finalize_late_load, 630 + .stage_microcode = stage_microcode, 974 631 .use_nmi = IS_ENABLED(CONFIG_X86_64), 975 632 }; 976 633 ··· 983 638 llc_size_per_core = (unsigned int)llc_size; 984 639 } 985 640 641 + static __init bool staging_available(void) 642 + { 643 + u64 val; 644 + 645 + val = x86_read_arch_cap_msr(); 646 + if (!(val & ARCH_CAP_MCU_ENUM)) 647 + return false; 648 + 649 + rdmsrq(MSR_IA32_MCU_ENUMERATION, val); 650 + return !!(val & MCU_STAGING); 651 + } 652 + 986 653 struct microcode_ops * __init init_intel_microcode(void) 987 654 { 988 655 struct cpuinfo_x86 *c = &boot_cpu_data; ··· 1003 646 cpu_has(c, X86_FEATURE_IA64)) { 1004 647 pr_err("Intel CPU family 0x%x not supported\n", c->x86); 1005 648 return NULL; 649 + } 650 + 651 + if (staging_available()) { 652 + microcode_intel_ops.use_staging = true; 653 + pr_info("Enabled staging feature.\n"); 1006 654 } 1007 655 1008 656 calc_llc_size_per_core(c);

+3 -1

arch/x86/kernel/cpu/microcode/internal.h

··· 31 31 * See also the "Synchronization" section in microcode_core.c. 32 32 */ 33 33 enum ucode_state (*apply_microcode)(int cpu); 34 + void (*stage_microcode)(void); 34 35 int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); 35 36 void (*finalize_late_load)(int result); 36 37 unsigned int nmi_safe : 1, 37 - use_nmi : 1; 38 + use_nmi : 1, 39 + use_staging : 1; 38 40 }; 39 41 40 42 struct early_load_data {

-4

arch/x86/kernel/cpu/topology.c

··· 75 75 return phys_id == (u64)cpuid_to_apicid[cpu]; 76 76 } 77 77 78 - #ifdef CONFIG_SMP 79 78 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) 80 79 { 81 80 if (!(apicid & (__max_threads_per_core - 1))) 82 81 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); 83 82 } 84 - #else 85 - static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } 86 - #endif 87 83 88 84 /* 89 85 * Convert the APIC ID to a domain level ID by masking out the low bits

+3

arch/x86/kernel/cpu/topology_common.c

··· 16 16 unsigned int __amd_nodes_per_pkg __ro_after_init; 17 17 EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); 18 18 19 + /* CPUs which are the primary SMT threads */ 20 + struct cpumask __cpu_primary_thread_mask __read_mostly; 21 + 19 22 void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, 20 23 unsigned int shift, unsigned int ncpus) 21 24 {

-3

arch/x86/kernel/smpboot.c

··· 103 103 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); 104 104 EXPORT_PER_CPU_SYMBOL(cpu_die_map); 105 105 106 - /* CPUs which are the primary SMT threads */ 107 - struct cpumask __cpu_primary_thread_mask __read_mostly; 108 - 109 106 /* Representing CPUs for which sibling maps can be computed */ 110 107 static cpumask_var_t cpu_sibling_setup_mask; 111 108