Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "x86/smpboot: Initialize secondary CPU only if master CPU will wait for it"

This reverts commit 3e1a878b7ccdb31da6d9d2b855c72ad87afeba3f.

It came in very late, and already has one reported failure: Sitsofe
reports that the current tree fails to boot on his EeePC, and bisected
it down to this. Rather than waste time trying to figure out what's
wrong, just revert it.

Reported-by: Sitsofe Wheeler <sitsofe@gmail.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+81 -49
+11 -16
arch/x86/kernel/cpu/common.c
··· 1221 #define dbg_restore_debug_regs() 1222 #endif /* ! CONFIG_KGDB */ 1223 1224 - static void wait_for_master_cpu(int cpu) 1225 - { 1226 - /* 1227 - * wait for ACK from master CPU before continuing 1228 - * with AP initialization 1229 - */ 1230 - WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); 1231 - while (!cpumask_test_cpu(cpu, cpu_callout_mask)) 1232 - cpu_relax(); 1233 - } 1234 - 1235 /* 1236 * cpu_init() initializes state that is per-CPU. Some data is already 1237 * initialized (naturally) in the bootstrap process, such as the GDT ··· 1236 struct task_struct *me; 1237 struct tss_struct *t; 1238 unsigned long v; 1239 - int cpu = stack_smp_processor_id(); 1240 int i; 1241 - 1242 - wait_for_master_cpu(cpu); 1243 1244 /* 1245 * Load microcode on this cpu if a valid microcode is available. ··· 1245 */ 1246 load_ucode_ap(); 1247 1248 t = &per_cpu(init_tss, cpu); 1249 oist = &per_cpu(orig_ist, cpu); 1250 ··· 1256 #endif 1257 1258 me = current; 1259 1260 pr_debug("Initializing CPU#%d\n", cpu); 1261 ··· 1336 struct tss_struct *t = &per_cpu(init_tss, cpu); 1337 struct thread_struct *thread = &curr->thread; 1338 1339 - wait_for_master_cpu(cpu); 1340 - 1341 show_ucode_info_early(); 1342 1343 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1344
··· 1221 #define dbg_restore_debug_regs() 1222 #endif /* ! CONFIG_KGDB */ 1223 1224 /* 1225 * cpu_init() initializes state that is per-CPU. Some data is already 1226 * initialized (naturally) in the bootstrap process, such as the GDT ··· 1247 struct task_struct *me; 1248 struct tss_struct *t; 1249 unsigned long v; 1250 + int cpu; 1251 int i; 1252 1253 /* 1254 * Load microcode on this cpu if a valid microcode is available. ··· 1258 */ 1259 load_ucode_ap(); 1260 1261 + cpu = stack_smp_processor_id(); 1262 t = &per_cpu(init_tss, cpu); 1263 oist = &per_cpu(orig_ist, cpu); 1264 ··· 1268 #endif 1269 1270 me = current; 1271 + 1272 + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 1273 + panic("CPU#%d already initialized!\n", cpu); 1274 1275 pr_debug("Initializing CPU#%d\n", cpu); 1276 ··· 1345 struct tss_struct *t = &per_cpu(init_tss, cpu); 1346 struct thread_struct *thread = &curr->thread; 1347 1348 show_ucode_info_early(); 1349 + 1350 + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1351 + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1352 + for (;;) 1353 + local_irq_enable(); 1354 + } 1355 1356 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1357
+70 -33
arch/x86/kernel/smpboot.c
··· 111 static void smp_callin(void) 112 { 113 int cpuid, phys_id; 114 115 /* 116 * If waken up by an INIT in an 82489DX configuration ··· 130 * (This works even if the APIC is not enabled.) 131 */ 132 phys_id = read_apic_id(); 133 134 /* 135 * the boot CPU has finished the init stage and is spinning ··· 750 unsigned long start_ip = real_mode_header->trampoline_start; 751 752 unsigned long boot_error = 0; 753 int cpu0_nmi_registered = 0; 754 - unsigned long timeout; 755 756 /* Just in case we booted with a single CPU. */ 757 alternatives_enable_smp(); ··· 799 } 800 801 /* 802 - * AP might wait on cpu_callout_mask in cpu_init() with 803 - * cpu_initialized_mask set if previous attempt to online 804 - * it timed-out. Clear cpu_initialized_mask so that after 805 - * INIT/SIPI it could start with a clean state. 806 - */ 807 - cpumask_clear_cpu(cpu, cpu_initialized_mask); 808 - smp_mb(); 809 - 810 - /* 811 * Wake up a CPU in difference cases: 812 * - Use the method in the APIC driver if it's defined 813 * Otherwise, ··· 810 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, 811 &cpu0_nmi_registered); 812 813 - 814 if (!boot_error) { 815 /* 816 - * Wait 10s total for a response from AP 817 */ 818 - boot_error = -1; 819 - timeout = jiffies + 10*HZ; 820 - while (time_before(jiffies, timeout)) { 821 - if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { 822 - /* 823 - * Tell AP to proceed with initialization 824 - */ 825 - cpumask_set_cpu(cpu, cpu_callout_mask); 826 - boot_error = 0; 827 - break; 828 - } 829 udelay(100); 830 - schedule(); 831 - } 832 - } 833 - 834 - if (!boot_error) { 835 - /* 836 - * Wait till AP completes initial initialization 837 - */ 838 - while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { 839 /* 840 * Allow other tasks to run while we wait for the 841 * AP to come online. This also gives a chance 842 * for the MTRR work(triggered by the AP coming online) 843 * to be completed in the stop machine context. 844 */ 845 - udelay(100); 846 schedule(); 847 } 848 } 849 850 /* mark "stuck" area as not stuck */
··· 111 static void smp_callin(void) 112 { 113 int cpuid, phys_id; 114 + unsigned long timeout; 115 116 /* 117 * If waken up by an INIT in an 82489DX configuration ··· 129 * (This works even if the APIC is not enabled.) 130 */ 131 phys_id = read_apic_id(); 132 + if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { 133 + panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 134 + phys_id, cpuid); 135 + } 136 + pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); 137 + 138 + /* 139 + * STARTUP IPIs are fragile beasts as they might sometimes 140 + * trigger some glue motherboard logic. Complete APIC bus 141 + * silence for 1 second, this overestimates the time the 142 + * boot CPU is spending to send the up to 2 STARTUP IPIs 143 + * by a factor of two. This should be enough. 144 + */ 145 + 146 + /* 147 + * Waiting 2s total for startup (udelay is not yet working) 148 + */ 149 + timeout = jiffies + 2*HZ; 150 + while (time_before(jiffies, timeout)) { 151 + /* 152 + * Has the boot CPU finished it's STARTUP sequence? 153 + */ 154 + if (cpumask_test_cpu(cpuid, cpu_callout_mask)) 155 + break; 156 + cpu_relax(); 157 + } 158 + 159 + if (!time_before(jiffies, timeout)) { 160 + panic("%s: CPU%d started up but did not get a callout!\n", 161 + __func__, cpuid); 162 + } 163 164 /* 165 * the boot CPU has finished the init stage and is spinning ··· 718 unsigned long start_ip = real_mode_header->trampoline_start; 719 720 unsigned long boot_error = 0; 721 + int timeout; 722 int cpu0_nmi_registered = 0; 723 724 /* Just in case we booted with a single CPU. */ 725 alternatives_enable_smp(); ··· 767 } 768 769 /* 770 * Wake up a CPU in difference cases: 771 * - Use the method in the APIC driver if it's defined 772 * Otherwise, ··· 787 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, 788 &cpu0_nmi_registered); 789 790 if (!boot_error) { 791 /* 792 + * allow APs to start initializing. 793 */ 794 + pr_debug("Before Callout %d\n", cpu); 795 + cpumask_set_cpu(cpu, cpu_callout_mask); 796 + pr_debug("After Callout %d\n", cpu); 797 + 798 + /* 799 + * Wait 5s total for a response 800 + */ 801 + for (timeout = 0; timeout < 50000; timeout++) { 802 + if (cpumask_test_cpu(cpu, cpu_callin_mask)) 803 + break; /* It has booted */ 804 udelay(100); 805 /* 806 * Allow other tasks to run while we wait for the 807 * AP to come online. This also gives a chance 808 * for the MTRR work(triggered by the AP coming online) 809 * to be completed in the stop machine context. 810 */ 811 schedule(); 812 } 813 + 814 + if (cpumask_test_cpu(cpu, cpu_callin_mask)) { 815 + print_cpu_msr(&cpu_data(cpu)); 816 + pr_debug("CPU%d: has booted.\n", cpu); 817 + } else { 818 + boot_error = 1; 819 + if (*trampoline_status == 0xA5A5A5A5) 820 + /* trampoline started but...? */ 821 + pr_err("CPU%d: Stuck ??\n", cpu); 822 + else 823 + /* trampoline code not run */ 824 + pr_err("CPU%d: Not responding\n", cpu); 825 + if (apic->inquire_remote_apic) 826 + apic->inquire_remote_apic(apicid); 827 + } 828 + } 829 + 830 + if (boot_error) { 831 + /* Try to put things back the way they were before ... */ 832 + numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 833 + 834 + /* was set by do_boot_cpu() */ 835 + cpumask_clear_cpu(cpu, cpu_callout_mask); 836 + 837 + /* was set by cpu_init() */ 838 + cpumask_clear_cpu(cpu, cpu_initialized_mask); 839 } 840 841 /* mark "stuck" area as not stuck */