x86 MCE: Fix CPU hotplug problem with multiple multicore AMD CPUs

During CPU hot-remove the sysfs directory created by
threshold_create_bank(), defined in
arch/x86/kernel/cpu/mcheck/mce_amd_64.c, has to be removed before
its parent directory, created by mce_create_device(), defined in
arch/x86/kernel/cpu/mcheck/mce_64.c . Moreover, when the CPU in
question is hotplugged again, obviously the latter has to be created
before the former. At present, the right ordering is not enforced,
because all of these operations are carried out by CPU hotplug
notifiers which are not appropriately ordered with respect to each
other. This leads to serious problems on systems with two or more
multicore AMD CPUs, among other things during suspend and hibernation.

Fix the problem by placing threshold bank CPU hotplug callbacks in
mce_cpu_callback(), so that they are invoked at the right places,
if defined. Additionally, use kobject_del() to remove the sysfs
directory associated with the kobject created by
kobject_create_and_add() in threshold_create_bank(), to prevent the
kernel from crashing during CPU hotplug operations on systems with
two or more multicore AMD CPUs.

This patch fixes bug #11337.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Andi Kleen <andi@firstfloor.org>
Tested-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Rafael J. Wysocki and committed by Ingo Molnar 8735728e 91ede005

+11 -13
+5
arch/x86/kernel/cpu/mcheck/mce_64.c
··· 759 759 }; 760 760 761 761 DEFINE_PER_CPU(struct sys_device, device_mce); 762 + void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; 762 763 763 764 /* Why are there no generic functions for this? */ 764 765 #define ACCESSOR(name, var, start) \ ··· 884 883 case CPU_ONLINE: 885 884 case CPU_ONLINE_FROZEN: 886 885 mce_create_device(cpu); 886 + if (threshold_cpu_callback) 887 + threshold_cpu_callback(action, cpu); 887 888 break; 888 889 case CPU_DEAD: 889 890 case CPU_DEAD_FROZEN: 891 + if (threshold_cpu_callback) 892 + threshold_cpu_callback(action, cpu); 890 893 mce_remove_device(cpu); 891 894 break; 892 895 }
+5 -13
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
··· 628 628 deallocate_threshold_block(cpu, bank); 629 629 630 630 free_out: 631 + kobject_del(b->kobj); 631 632 kobject_put(b->kobj); 632 633 kfree(b); 633 634 per_cpu(threshold_banks, cpu)[bank] = NULL; ··· 646 645 } 647 646 648 647 /* get notified when a cpu comes on/off */ 649 - static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, 650 - unsigned long action, void *hcpu) 648 + static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, 649 + unsigned int cpu) 651 650 { 652 - /* cpu was unsigned int to begin with */ 653 - unsigned int cpu = (unsigned long)hcpu; 654 - 655 651 if (cpu >= NR_CPUS) 656 - goto out; 652 + return; 657 653 658 654 switch (action) { 659 655 case CPU_ONLINE: ··· 664 666 default: 665 667 break; 666 668 } 667 - out: 668 - return NOTIFY_OK; 669 669 } 670 - 671 - static struct notifier_block threshold_cpu_notifier __cpuinitdata = { 672 - .notifier_call = threshold_cpu_callback, 673 - }; 674 670 675 671 static __init int threshold_init_device(void) 676 672 { ··· 676 684 if (err) 677 685 return err; 678 686 } 679 - register_hotcpu_notifier(&threshold_cpu_notifier); 687 + threshold_cpu_callback = amd_64_threshold_cpu_callback; 680 688 return 0; 681 689 } 682 690
+1
include/asm-x86/mce.h
··· 92 92 93 93 void mce_log(struct mce *m); 94 94 DECLARE_PER_CPU(struct sys_device, device_mce); 95 + extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 95 96 96 97 #ifdef CONFIG_X86_MCE_INTEL 97 98 void mce_intel_feature_init(struct cpuinfo_x86 *c);