x86/microcode: Fix CPU synchronization routine

Emanuel reported an issue with a hang during microcode update because my
dumb idea to use one atomic synchronization variable for both rendezvous
- before and after update - was simply bollocks:

microcode: microcode_reload_late: late_cpus: 4
microcode: __reload_late: cpu 2 entered
microcode: __reload_late: cpu 1 entered
microcode: __reload_late: cpu 3 entered
microcode: __reload_late: cpu 0 entered
microcode: __reload_late: cpu 1 left
microcode: Timeout while waiting for CPUs rendezvous, remaining: 1

CPU1 above would finish, leave and the others will still spin waiting for
it to join.

So do two synchronization atomics instead, which makes the code a lot more
straightforward.

Also, since the update is serialized and it also takes quite some time per
microcode engine, increase the exit timeout by the number of CPUs on the
system.

That's ok because the moment all CPUs are done, that timeout will be cut
short.

Furthermore, panic when some of the CPUs timeout when returning from a
microcode update: we can't allow a system with not all cores updated.

Also, as an optimization, do not do the exit sync if microcode wasn't
updated.

Reported-by: Emanuel Czirai <xftroxgpx@protonmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Emanuel Czirai <xftroxgpx@protonmail.com>
Tested-by: Ashok Raj <ashok.raj@intel.com>
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lkml.kernel.org/r/20180314183615.17629-2-bp@alien8.de

authored by Borislav Petkov and committed by Thomas Gleixner bb8c13d6 2613f36e

Changed files
+41 -27
arch
x86
kernel
cpu
microcode
+41 -27
arch/x86/kernel/cpu/microcode/core.c
··· 517 517 return -EINVAL; 518 518 } 519 519 520 - static atomic_t late_cpus; 520 + static atomic_t late_cpus_in; 521 + static atomic_t late_cpus_out; 522 + 523 + static int __wait_for_cpus(atomic_t *t, long long timeout) 524 + { 525 + int all_cpus = num_online_cpus(); 526 + 527 + atomic_inc(t); 528 + 529 + while (atomic_read(t) < all_cpus) { 530 + if (timeout < SPINUNIT) { 531 + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", 532 + all_cpus - atomic_read(t)); 533 + return 1; 534 + } 535 + 536 + ndelay(SPINUNIT); 537 + timeout -= SPINUNIT; 538 + 539 + touch_nmi_watchdog(); 540 + } 541 + return 0; 542 + } 521 543 522 544 /* 523 545 * Returns: ··· 549 527 */ 550 528 static int __reload_late(void *info) 551 529 { 552 - unsigned int timeout = NSEC_PER_SEC; 553 - int all_cpus = num_online_cpus(); 554 530 int cpu = smp_processor_id(); 555 531 enum ucode_state err; 556 532 int ret = 0; 557 - 558 - atomic_dec(&late_cpus); 559 533 560 534 /* 561 535 * Wait for all CPUs to arrive. A load will not be attempted unless all 562 536 * CPUs show up. 563 537 * */ 564 - while (atomic_read(&late_cpus)) { 565 - if (timeout < SPINUNIT) { 566 - pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", 567 - atomic_read(&late_cpus)); 568 - return -1; 569 - } 570 - 571 - ndelay(SPINUNIT); 572 - timeout -= SPINUNIT; 573 - 574 - touch_nmi_watchdog(); 575 - } 538 + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) 539 + return -1; 576 540 577 541 spin_lock(&update_lock); 578 542 apply_microcode_local(&err); ··· 566 558 567 559 if (err > UCODE_NFOUND) { 568 560 pr_warn("Error reloading microcode on CPU %d\n", cpu); 569 - ret = -1; 570 - } else if (err == UCODE_UPDATED) { 561 + return -1; 562 + /* siblings return UCODE_OK because their engine got updated already */ 563 + } else if (err == UCODE_UPDATED || err == UCODE_OK) { 571 564 ret = 1; 565 + } else { 566 + return ret; 572 567 } 573 568 574 - atomic_inc(&late_cpus); 575 - 576 - while (atomic_read(&late_cpus) != all_cpus) 577 - cpu_relax(); 569 + /* 570 + * Increase the wait timeout to a safe value here since we're 571 + * serializing the microcode update and that could take a while on a 572 + * large number of CPUs. And that is fine as the *actual* timeout will 573 + * be determined by the last CPU finished updating and thus cut short. 574 + */ 575 + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) 576 + panic("Timeout during microcode update!\n"); 578 577 579 578 return ret; 580 579 } ··· 594 579 { 595 580 int ret; 596 581 597 - atomic_set(&late_cpus, num_online_cpus()); 582 + atomic_set(&late_cpus_in, 0); 583 + atomic_set(&late_cpus_out, 0); 598 584 599 585 ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); 600 - if (ret < 0) 601 - return ret; 602 - else if (ret > 0) 586 + if (ret > 0) 603 587 microcode_check(); 604 588 605 589 return ret;