Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Use common outgoing-CPU-notification code

This commit removes the open-coded CPU-offline notification with new
common code. Among other things, this change avoids calling scheduler
code using RCU from an offline CPU that RCU is ignoring. It also allows
Xen to notice at online time that the CPU did not go offline correctly.
Note that Xen has the surviving CPU carry out some cleanup operations,
so if the surviving CPU times out, these cleanup operations might have
been carried out while the outgoing CPU was still running. It might
therefore be unwise to bring this CPU back online, and this commit
avoids doing so.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: <x86@kernel.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: <xen-devel@lists.xenproject.org>

+44 -45
-2
arch/x86/include/asm/cpu.h
··· 34 34 #endif 35 35 #endif 36 36 37 - DECLARE_PER_CPU(int, cpu_state); 38 - 39 37 int mwait_usable(const struct cpuinfo_x86 *); 40 38 41 39 #endif /* _ASM_X86_CPU_H */
+1 -1
arch/x86/include/asm/smp.h
··· 150 150 } 151 151 152 152 void cpu_disable_common(void); 153 - void cpu_die_common(unsigned int cpu); 154 153 void native_smp_prepare_boot_cpu(void); 155 154 void native_smp_prepare_cpus(unsigned int max_cpus); 156 155 void native_smp_cpus_done(unsigned int max_cpus); 157 156 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); 158 157 int native_cpu_disable(void); 158 + int common_cpu_die(unsigned int cpu); 159 159 void native_cpu_die(unsigned int cpu); 160 160 void native_play_dead(void); 161 161 void play_dead_common(void);
+18 -21
arch/x86/kernel/smpboot.c
··· 77 77 #include <asm/realmode.h> 78 78 #include <asm/misc.h> 79 79 80 - /* State of each CPU */ 81 - DEFINE_PER_CPU(int, cpu_state) = { 0 }; 82 - 83 80 /* Number of siblings per CPU package */ 84 81 int smp_num_siblings = 1; 85 82 EXPORT_SYMBOL(smp_num_siblings); ··· 254 257 lock_vector_lock(); 255 258 set_cpu_online(smp_processor_id(), true); 256 259 unlock_vector_lock(); 257 - per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 260 + cpu_set_state_online(smp_processor_id()); 258 261 x86_platform.nmi_init(); 259 262 260 263 /* enable local interrupts */ ··· 945 948 */ 946 949 mtrr_save_state(); 947 950 948 - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 951 + /* x86 CPUs take themselves offline, so delayed offline is OK. */ 952 + err = cpu_check_up_prepare(cpu); 953 + if (err && err != -EBUSY) 954 + return err; 949 955 950 956 /* the FPU context is blank, nobody can own it */ 951 957 __cpu_disable_lazy_restore(cpu); ··· 1191 1191 switch_to_new_gdt(me); 1192 1192 /* already set me in cpu_online_mask in boot_cpu_init() */ 1193 1193 cpumask_set_cpu(me, cpu_callout_mask); 1194 - per_cpu(cpu_state, me) = CPU_ONLINE; 1194 + cpu_set_state_online(me); 1195 1195 } 1196 1196 1197 1197 void __init native_smp_cpus_done(unsigned int max_cpus) ··· 1318 1318 numa_remove_cpu(cpu); 1319 1319 } 1320 1320 1321 - static DEFINE_PER_CPU(struct completion, die_complete); 1322 - 1323 1321 void cpu_disable_common(void) 1324 1322 { 1325 1323 int cpu = smp_processor_id(); 1326 - 1327 - init_completion(&per_cpu(die_complete, smp_processor_id())); 1328 1324 1329 1325 remove_siblinginfo(cpu); 1330 1326 ··· 1345 1349 return 0; 1346 1350 } 1347 1351 1348 - void cpu_die_common(unsigned int cpu) 1352 + int common_cpu_die(unsigned int cpu) 1349 1353 { 1350 - wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); 1351 - } 1354 + int ret = 0; 1352 1355 1353 - void native_cpu_die(unsigned int cpu) 1354 - { 1355 1356 /* We don't do anything here: idle task is faking death itself. */ 1356 1357 1357 - cpu_die_common(cpu); 1358 - 1359 1358 /* They ack this in play_dead() by setting CPU_DEAD */ 1360 - if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1359 + if (cpu_wait_death(cpu, 5)) { 1361 1360 if (system_state == SYSTEM_RUNNING) 1362 1361 pr_info("CPU %u is now offline\n", cpu); 1363 1362 } else { 1364 1363 pr_err("CPU %u didn't die...\n", cpu); 1364 + ret = -1; 1365 1365 } 1366 + 1367 + return ret; 1368 + } 1369 + 1370 + void native_cpu_die(unsigned int cpu) 1371 + { 1372 + common_cpu_die(cpu); 1366 1373 } 1367 1374 1368 1375 void play_dead_common(void) ··· 1374 1375 reset_lazy_tlbstate(); 1375 1376 amd_e400_remove_cpu(raw_smp_processor_id()); 1376 1377 1377 - mb(); 1378 1378 /* Ack it */ 1379 - __this_cpu_write(cpu_state, CPU_DEAD); 1380 - complete(&per_cpu(die_complete, smp_processor_id())); 1379 + (void)cpu_report_death(); 1381 1380 1382 1381 /* 1383 1382 * With physical CPU hotplug, we should halt the cpu
+25 -21
arch/x86/xen/smp.c
··· 90 90 91 91 set_cpu_online(cpu, true); 92 92 93 - this_cpu_write(cpu_state, CPU_ONLINE); 94 - 95 - wmb(); 93 + cpu_set_state_online(cpu); /* Implies full memory barrier. */ 96 94 97 95 /* We can take interrupts now: we're officially "up". */ 98 96 local_irq_enable(); 99 - 100 - wmb(); /* make sure everything is out */ 101 97 } 102 98 103 99 /* ··· 455 459 xen_setup_timer(cpu); 456 460 xen_init_lock_cpu(cpu); 457 461 458 - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 462 + /* 463 + * PV VCPUs are always successfully taken down (see 'while' loop 464 + * in xen_cpu_die()), so -EBUSY is an error. 465 + */ 466 + rc = cpu_check_up_prepare(cpu); 467 + if (rc) 468 + return rc; 459 469 460 470 /* make sure interrupts start blocked */ 461 471 per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; ··· 481 479 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); 482 480 BUG_ON(rc); 483 481 484 - while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { 482 + while (cpu_report_state(cpu) != CPU_ONLINE) 485 483 HYPERVISOR_sched_op(SCHEDOP_yield, NULL); 486 - barrier(); 487 - } 488 484 489 485 return 0; 490 486 } ··· 511 511 schedule_timeout(HZ/10); 512 512 } 513 513 514 - cpu_die_common(cpu); 515 - 516 - xen_smp_intr_free(cpu); 517 - xen_uninit_lock_cpu(cpu); 518 - xen_teardown_timer(cpu); 514 + if (common_cpu_die(cpu) == 0) { 515 + xen_smp_intr_free(cpu); 516 + xen_uninit_lock_cpu(cpu); 517 + xen_teardown_timer(cpu); 518 + } 519 519 } 520 520 521 521 static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ ··· 747 747 static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) 748 748 { 749 749 int rc; 750 + 751 + /* 752 + * This can happen if CPU was offlined earlier and 753 + * offlining timed out in common_cpu_die(). 754 + */ 755 + if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { 756 + xen_smp_intr_free(cpu); 757 + xen_uninit_lock_cpu(cpu); 758 + } 759 + 750 760 /* 751 761 * xen_smp_intr_init() needs to run before native_cpu_up() 752 762 * so that IPI vectors are set up on the booting CPU before ··· 778 768 return rc; 779 769 } 780 770 781 - static void xen_hvm_cpu_die(unsigned int cpu) 782 - { 783 - xen_cpu_die(cpu); 784 - native_cpu_die(cpu); 785 - } 786 - 787 771 void __init xen_hvm_smp_init(void) 788 772 { 789 773 if (!xen_have_vector_callback) ··· 785 781 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; 786 782 smp_ops.smp_send_reschedule = xen_smp_send_reschedule; 787 783 smp_ops.cpu_up = xen_hvm_cpu_up; 788 - smp_ops.cpu_die = xen_hvm_cpu_die; 784 + smp_ops.cpu_die = xen_cpu_die; 789 785 smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; 790 786 smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; 791 787 smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu;