Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kernel/workqueue: Use dynamic lockdep keys for workqueues

The following commit:

87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")

improved deadlock checking in the workqueue implementation. Unfortunately
that patch also introduced a few false positive lockdep complaints.

This patch suppresses these false positives by allocating the workqueue mutex
lockdep key dynamically.

An example of a false positive lockdep complaint suppressed by this patch
can be found below. The root cause of the lockdep complaint shown below
is that the direct I/O code can call alloc_workqueue() from inside a work
item created by another alloc_workqueue() call and that both workqueues
share the same lockdep key. This patch avoids that that lockdep complaint
is triggered by allocating the work queue lockdep keys dynamically.

In other words, this patch guarantees that a unique lockdep key is
associated with each work queue mutex.

======================================================
WARNING: possible circular locking dependency detected
4.19.0-dbg+ #1 Not tainted
fio/4129 is trying to acquire lock:
00000000a01cfe1a ((wq_completion)"dio/%s"sb->s_id){+.+.}, at: flush_workqueue+0xd0/0x970

but task is already holding lock:
00000000a0acecf9 (&sb->s_type->i_mutex_key#14){+.+.}, at: ext4_file_write_iter+0x154/0x710

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&sb->s_type->i_mutex_key#14){+.+.}:
down_write+0x3d/0x80
__generic_file_fsync+0x77/0xf0
ext4_sync_file+0x3c9/0x780
vfs_fsync_range+0x66/0x100
dio_complete+0x2f5/0x360
dio_aio_complete_work+0x1c/0x20
process_one_work+0x481/0x9f0
worker_thread+0x63/0x5a0
kthread+0x1cf/0x1f0
ret_from_fork+0x24/0x30

-> #1 ((work_completion)(&dio->complete_work)){+.+.}:
process_one_work+0x447/0x9f0
worker_thread+0x63/0x5a0
kthread+0x1cf/0x1f0
ret_from_fork+0x24/0x30

-> #0 ((wq_completion)"dio/%s"sb->s_id){+.+.}:
lock_acquire+0xc5/0x200
flush_workqueue+0xf3/0x970
drain_workqueue+0xec/0x220
destroy_workqueue+0x23/0x350
sb_init_dio_done_wq+0x6a/0x80
do_blockdev_direct_IO+0x1f33/0x4be0
__blockdev_direct_IO+0x79/0x86
ext4_direct_IO+0x5df/0xbb0
generic_file_direct_write+0x119/0x220
__generic_file_write_iter+0x131/0x2d0
ext4_file_write_iter+0x3fa/0x710
aio_write+0x235/0x330
io_submit_one+0x510/0xeb0
__x64_sys_io_submit+0x122/0x340
do_syscall_64+0x71/0x220
entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
(wq_completion)"dio/%s"sb->s_id --> (work_completion)(&dio->complete_work) --> &sb->s_type->i_mutex_key#14

Possible unsafe locking scenario:

CPU0 CPU1
---- ----
lock(&sb->s_type->i_mutex_key#14);
lock((work_completion)(&dio->complete_work));
lock(&sb->s_type->i_mutex_key#14);
lock((wq_completion)"dio/%s"sb->s_id);

*** DEADLOCK ***

1 lock held by fio/4129:
#0: 00000000a0acecf9 (&sb->s_type->i_mutex_key#14){+.+.}, at: ext4_file_write_iter+0x154/0x710

stack backtrace:
CPU: 3 PID: 4129 Comm: fio Not tainted 4.19.0-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
dump_stack+0x86/0xc5
print_circular_bug.isra.32+0x20a/0x218
__lock_acquire+0x1c68/0x1cf0
lock_acquire+0xc5/0x200
flush_workqueue+0xf3/0x970
drain_workqueue+0xec/0x220
destroy_workqueue+0x23/0x350
sb_init_dio_done_wq+0x6a/0x80
do_blockdev_direct_IO+0x1f33/0x4be0
__blockdev_direct_IO+0x79/0x86
ext4_direct_IO+0x5df/0xbb0
generic_file_direct_write+0x119/0x220
__generic_file_write_iter+0x131/0x2d0
ext4_file_write_iter+0x3fa/0x710
aio_write+0x235/0x330
io_submit_one+0x510/0xeb0
__x64_sys_io_submit+0x122/0x340
do_syscall_64+0x71/0x220
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190214230058.196511-20-bvanassche@acm.org
[ Reworked the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Bart Van Assche and committed by
Ingo Molnar
669de8bd 108c1485

+54 -33
+4 -24
include/linux/workqueue.h
··· 390 390 extern struct workqueue_struct *system_power_efficient_wq; 391 391 extern struct workqueue_struct *system_freezable_power_efficient_wq; 392 392 393 - extern struct workqueue_struct * 394 - __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, 395 - struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); 396 - 397 393 /** 398 394 * alloc_workqueue - allocate a workqueue 399 395 * @fmt: printf format for the name of the workqueue 400 396 * @flags: WQ_* flags 401 397 * @max_active: max in-flight work items, 0 for default 402 - * @args...: args for @fmt 398 + * remaining args: args for @fmt 403 399 * 404 400 * Allocate a workqueue with the specified parameters. For detailed 405 401 * information on WQ_* flags, please refer to 406 402 * Documentation/core-api/workqueue.rst. 407 403 * 408 - * The __lock_name macro dance is to guarantee that single lock_class_key 409 - * doesn't end up with different namesm, which isn't allowed by lockdep. 410 - * 411 404 * RETURNS: 412 405 * Pointer to the allocated workqueue on success, %NULL on failure. 413 406 */ 414 - #ifdef CONFIG_LOCKDEP 415 - #define alloc_workqueue(fmt, flags, max_active, args...) \ 416 - ({ \ 417 - static struct lock_class_key __key; \ 418 - const char *__lock_name; \ 419 - \ 420 - __lock_name = "(wq_completion)"#fmt#args; \ 421 - \ 422 - __alloc_workqueue_key((fmt), (flags), (max_active), \ 423 - &__key, __lock_name, ##args); \ 424 - }) 425 - #else 426 - #define alloc_workqueue(fmt, flags, max_active, args...) \ 427 - __alloc_workqueue_key((fmt), (flags), (max_active), \ 428 - NULL, NULL, ##args) 429 - #endif 407 + struct workqueue_struct *alloc_workqueue(const char *fmt, 408 + unsigned int flags, 409 + int max_active, ...); 430 410 431 411 /** 432 412 * alloc_ordered_workqueue - allocate an ordered workqueue
+50 -9
kernel/workqueue.c
··· 259 259 struct wq_device *wq_dev; /* I: for sysfs interface */ 260 260 #endif 261 261 #ifdef CONFIG_LOCKDEP 262 + char *lock_name; 263 + struct lock_class_key key; 262 264 struct lockdep_map lockdep_map; 263 265 #endif 264 266 char name[WQ_NAME_LEN]; /* I: workqueue name */ ··· 3339 3337 return 0; 3340 3338 } 3341 3339 3340 + #ifdef CONFIG_LOCKDEP 3341 + static void wq_init_lockdep(struct workqueue_struct *wq) 3342 + { 3343 + char *lock_name; 3344 + 3345 + lockdep_register_key(&wq->key); 3346 + lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); 3347 + if (!lock_name) 3348 + lock_name = wq->name; 3349 + lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0); 3350 + } 3351 + 3352 + static void wq_unregister_lockdep(struct workqueue_struct *wq) 3353 + { 3354 + lockdep_unregister_key(&wq->key); 3355 + } 3356 + 3357 + static void wq_free_lockdep(struct workqueue_struct *wq) 3358 + { 3359 + if (wq->lock_name != wq->name) 3360 + kfree(wq->lock_name); 3361 + } 3362 + #else 3363 + static void wq_init_lockdep(struct workqueue_struct *wq) 3364 + { 3365 + } 3366 + 3367 + static void wq_unregister_lockdep(struct workqueue_struct *wq) 3368 + { 3369 + } 3370 + 3371 + static void wq_free_lockdep(struct workqueue_struct *wq) 3372 + { 3373 + } 3374 + #endif 3375 + 3342 3376 static void rcu_free_wq(struct rcu_head *rcu) 3343 3377 { 3344 3378 struct workqueue_struct *wq = 3345 3379 container_of(rcu, struct workqueue_struct, rcu); 3380 + 3381 + wq_free_lockdep(wq); 3346 3382 3347 3383 if (!(wq->flags & WQ_UNBOUND)) 3348 3384 free_percpu(wq->cpu_pwqs); ··· 3572 3532 * If we're the last pwq going away, @wq is already dead and no one 3573 3533 * is gonna access it anymore. Schedule RCU free. 3574 3534 */ 3575 - if (is_last) 3535 + if (is_last) { 3536 + wq_unregister_lockdep(wq); 3576 3537 call_rcu(&wq->rcu, rcu_free_wq); 3538 + } 3577 3539 } 3578 3540 3579 3541 /** ··· 4109 4067 return 0; 4110 4068 } 4111 4069 4112 - struct workqueue_struct *__alloc_workqueue_key(const char *fmt, 4113 - unsigned int flags, 4114 - int max_active, 4115 - struct lock_class_key *key, 4116 - const char *lock_name, ...) 4070 + struct workqueue_struct *alloc_workqueue(const char *fmt, 4071 + unsigned int flags, 4072 + int max_active, ...) 4117 4073 { 4118 4074 size_t tbl_size = 0; 4119 4075 va_list args; ··· 4146 4106 goto err_free_wq; 4147 4107 } 4148 4108 4149 - va_start(args, lock_name); 4109 + va_start(args, max_active); 4150 4110 vsnprintf(wq->name, sizeof(wq->name), fmt, args); 4151 4111 va_end(args); 4152 4112 ··· 4163 4123 INIT_LIST_HEAD(&wq->flusher_overflow); 4164 4124 INIT_LIST_HEAD(&wq->maydays); 4165 4125 4166 - lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 4126 + wq_init_lockdep(wq); 4167 4127 INIT_LIST_HEAD(&wq->list); 4168 4128 4169 4129 if (alloc_and_link_pwqs(wq) < 0) ··· 4201 4161 destroy_workqueue(wq); 4202 4162 return NULL; 4203 4163 } 4204 - EXPORT_SYMBOL_GPL(__alloc_workqueue_key); 4164 + EXPORT_SYMBOL_GPL(alloc_workqueue); 4205 4165 4206 4166 /** 4207 4167 * destroy_workqueue - safely terminate a workqueue ··· 4254 4214 kthread_stop(wq->rescuer->task); 4255 4215 4256 4216 if (!(wq->flags & WQ_UNBOUND)) { 4217 + wq_unregister_lockdep(wq); 4257 4218 /* 4258 4219 * The base ref is never dropped on per-cpu pwqs. Directly 4259 4220 * schedule RCU free.