work_on_cpu(): rewrite it to create a kernel thread on demand

Impact: circular locking bugfix

The various implemetnations and proposed implemetnations of work_on_cpu()
are vulnerable to various deadlocks because they all used queues of some
form.

Unrelated pieces of kernel code thus gained dependencies wherein if one
work_on_cpu() caller holds a lock which some other work_on_cpu() callback
also takes, the kernel could rarely deadlock.

Fix this by creating a short-lived kernel thread for each work_on_cpu()
invokation.

This is not terribly fast, but the only current caller of work_on_cpu() is
pci_call_probe().

It would be nice to find some other way of doing the node-local
allocations in the PCI probe code so that we can zap work_on_cpu()
altogether. The code there is rather nasty. I can't think of anything
simple at this time...

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

authored by Andrew Morton and committed by Rusty Russell 6b44003e 1c99315b

+18 -16
+18 -16
kernel/workqueue.c
··· 966 966 } 967 967 968 968 #ifdef CONFIG_SMP 969 - static struct workqueue_struct *work_on_cpu_wq __read_mostly; 970 969 971 970 struct work_for_cpu { 972 - struct work_struct work; 971 + struct completion completion; 973 972 long (*fn)(void *); 974 973 void *arg; 975 974 long ret; 976 975 }; 977 976 978 - static void do_work_for_cpu(struct work_struct *w) 977 + static int do_work_for_cpu(void *_wfc) 979 978 { 980 - struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); 981 - 979 + struct work_for_cpu *wfc = _wfc; 982 980 wfc->ret = wfc->fn(wfc->arg); 981 + complete(&wfc->completion); 982 + return 0; 983 983 } 984 984 985 985 /** ··· 990 990 * 991 991 * This will return the value @fn returns. 992 992 * It is up to the caller to ensure that the cpu doesn't go offline. 993 + * The caller must not hold any locks which would prevent @fn from completing. 993 994 */ 994 995 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 995 996 { 996 - struct work_for_cpu wfc; 997 + struct task_struct *sub_thread; 998 + struct work_for_cpu wfc = { 999 + .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), 1000 + .fn = fn, 1001 + .arg = arg, 1002 + }; 997 1003 998 - INIT_WORK(&wfc.work, do_work_for_cpu); 999 - wfc.fn = fn; 1000 - wfc.arg = arg; 1001 - queue_work_on(cpu, work_on_cpu_wq, &wfc.work); 1002 - flush_work(&wfc.work); 1003 - 1004 + sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); 1005 + if (IS_ERR(sub_thread)) 1006 + return PTR_ERR(sub_thread); 1007 + kthread_bind(sub_thread, cpu); 1008 + wake_up_process(sub_thread); 1009 + wait_for_completion(&wfc.completion); 1004 1010 return wfc.ret; 1005 1011 } 1006 1012 EXPORT_SYMBOL_GPL(work_on_cpu); ··· 1022 1016 hotcpu_notifier(workqueue_cpu_callback, 0); 1023 1017 keventd_wq = create_workqueue("events"); 1024 1018 BUG_ON(!keventd_wq); 1025 - #ifdef CONFIG_SMP 1026 - work_on_cpu_wq = create_workqueue("work_on_cpu"); 1027 - BUG_ON(!work_on_cpu_wq); 1028 - #endif 1029 1019 }