Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

workqueue: dump workqueues on sysrq-t

Workqueues are used extensively throughout the kernel but sometimes
it's difficult to debug stalls involving work items because visibility
into its inner workings is fairly limited. Although sysrq-t task dump
annotates each active worker task with the information on the work
item being executed, it is challenging to find out which work items
are pending or delayed on which queues and how pools are being
managed.

This patch implements show_workqueue_state() which dumps all busy
workqueues and pools and is called from the sysrq-t handler. At the
end of sysrq-t dump, something like the following is printed.

Showing busy workqueues and worker pools:
...
workqueue filler_wq: flags=0x0
pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=2/256
in-flight: 491:filler_workfn, 507:filler_workfn
pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256
in-flight: 501:filler_workfn
pending: filler_workfn
...
workqueue test_wq: flags=0x8
pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=1/1
in-flight: 510(RESCUER):test_workfn BAR(69) BAR(500)
delayed: test_workfn1 BAR(492), test_workfn2
...
pool 0: cpus=0 node=0 flags=0x0 nice=0 workers=2 manager: 137
pool 2: cpus=1 node=0 flags=0x0 nice=0 workers=3 manager: 469
pool 3: cpus=1 node=0 flags=0x0 nice=-20 workers=2 idle: 16
pool 8: cpus=0-3 flags=0x4 nice=0 workers=2 manager: 62

The above shows that test_wq is executing test_workfn() on pid 510
which is the rescuer and also that there are two tasks 69 and 500
waiting for the work item to finish in flush_work(). As test_wq has
max_active of 1, there are two work items for test_workfn1() and
test_workfn2() which are delayed till the current work item is
finished. In addition, pid 492 is flushing test_workfn1().

The work item for test_workfn() is being executed on pwq of pool 2
which is the normal priority per-cpu pool for CPU 1. The pool has
three workers, two of which are executing filler_workfn() for
filler_wq and the last one is assuming the manager role trying to
create more workers.

This extra workqueue state dump will hopefully help chasing down hangs
involving workqueues.

v3: cpulist_pr_cont() replaced with "%*pbl" printf formatting.

v2: As suggested by Andrew, minor formatting change in pr_cont_work(),
printk()'s replaced with pr_info()'s, and cpumask printing now
uses cpulist_pr_cont().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
CC: Ingo Molnar <mingo@redhat.com>

Tejun Heo 3494fc30 2607d7a6

+162
+1
drivers/tty/sysrq.c
··· 275 275 static void sysrq_handle_showstate(int key) 276 276 { 277 277 show_state(); 278 + show_workqueue_state(); 278 279 } 279 280 static struct sysrq_key_op sysrq_showstate_op = { 280 281 .handler = sysrq_handle_showstate,
+1
include/linux/workqueue.h
··· 454 454 extern unsigned int work_busy(struct work_struct *work); 455 455 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); 456 456 extern void print_worker_info(const char *log_lvl, struct task_struct *task); 457 + extern void show_workqueue_state(void); 457 458 458 459 /** 459 460 * queue_work - queue work on a workqueue
+160
kernel/workqueue.c
··· 4457 4457 } 4458 4458 } 4459 4459 4460 + static void pr_cont_pool_info(struct worker_pool *pool) 4461 + { 4462 + pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); 4463 + if (pool->node != NUMA_NO_NODE) 4464 + pr_cont(" node=%d", pool->node); 4465 + pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice); 4466 + } 4467 + 4468 + static void pr_cont_work(bool comma, struct work_struct *work) 4469 + { 4470 + if (work->func == wq_barrier_func) { 4471 + struct wq_barrier *barr; 4472 + 4473 + barr = container_of(work, struct wq_barrier, work); 4474 + 4475 + pr_cont("%s BAR(%d)", comma ? "," : "", 4476 + task_pid_nr(barr->task)); 4477 + } else { 4478 + pr_cont("%s %pf", comma ? "," : "", work->func); 4479 + } 4480 + } 4481 + 4482 + static void show_pwq(struct pool_workqueue *pwq) 4483 + { 4484 + struct worker_pool *pool = pwq->pool; 4485 + struct work_struct *work; 4486 + struct worker *worker; 4487 + bool has_in_flight = false, has_pending = false; 4488 + int bkt; 4489 + 4490 + pr_info(" pwq %d:", pool->id); 4491 + pr_cont_pool_info(pool); 4492 + 4493 + pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active, 4494 + !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); 4495 + 4496 + hash_for_each(pool->busy_hash, bkt, worker, hentry) { 4497 + if (worker->current_pwq == pwq) { 4498 + has_in_flight = true; 4499 + break; 4500 + } 4501 + } 4502 + if (has_in_flight) { 4503 + bool comma = false; 4504 + 4505 + pr_info(" in-flight:"); 4506 + hash_for_each(pool->busy_hash, bkt, worker, hentry) { 4507 + if (worker->current_pwq != pwq) 4508 + continue; 4509 + 4510 + pr_cont("%s %d%s:%pf", comma ? "," : "", 4511 + task_pid_nr(worker->task), 4512 + worker == pwq->wq->rescuer ? "(RESCUER)" : "", 4513 + worker->current_func); 4514 + list_for_each_entry(work, &worker->scheduled, entry) 4515 + pr_cont_work(false, work); 4516 + comma = true; 4517 + } 4518 + pr_cont("\n"); 4519 + } 4520 + 4521 + list_for_each_entry(work, &pool->worklist, entry) { 4522 + if (get_work_pwq(work) == pwq) { 4523 + has_pending = true; 4524 + break; 4525 + } 4526 + } 4527 + if (has_pending) { 4528 + bool comma = false; 4529 + 4530 + pr_info(" pending:"); 4531 + list_for_each_entry(work, &pool->worklist, entry) { 4532 + if (get_work_pwq(work) != pwq) 4533 + continue; 4534 + 4535 + pr_cont_work(comma, work); 4536 + comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); 4537 + } 4538 + pr_cont("\n"); 4539 + } 4540 + 4541 + if (!list_empty(&pwq->delayed_works)) { 4542 + bool comma = false; 4543 + 4544 + pr_info(" delayed:"); 4545 + list_for_each_entry(work, &pwq->delayed_works, entry) { 4546 + pr_cont_work(comma, work); 4547 + comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED); 4548 + } 4549 + pr_cont("\n"); 4550 + } 4551 + } 4552 + 4553 + /** 4554 + * show_workqueue_state - dump workqueue state 4555 + * 4556 + * Called from a sysrq handler and prints out all busy workqueues and 4557 + * pools. 4558 + */ 4559 + void show_workqueue_state(void) 4560 + { 4561 + struct workqueue_struct *wq; 4562 + struct worker_pool *pool; 4563 + unsigned long flags; 4564 + int pi; 4565 + 4566 + rcu_read_lock_sched(); 4567 + 4568 + pr_info("Showing busy workqueues and worker pools:\n"); 4569 + 4570 + list_for_each_entry_rcu(wq, &workqueues, list) { 4571 + struct pool_workqueue *pwq; 4572 + bool idle = true; 4573 + 4574 + for_each_pwq(pwq, wq) { 4575 + if (pwq->nr_active || !list_empty(&pwq->delayed_works)) { 4576 + idle = false; 4577 + break; 4578 + } 4579 + } 4580 + if (idle) 4581 + continue; 4582 + 4583 + pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); 4584 + 4585 + for_each_pwq(pwq, wq) { 4586 + spin_lock_irqsave(&pwq->pool->lock, flags); 4587 + if (pwq->nr_active || !list_empty(&pwq->delayed_works)) 4588 + show_pwq(pwq); 4589 + spin_unlock_irqrestore(&pwq->pool->lock, flags); 4590 + } 4591 + } 4592 + 4593 + for_each_pool(pool, pi) { 4594 + struct worker *worker; 4595 + bool first = true; 4596 + 4597 + spin_lock_irqsave(&pool->lock, flags); 4598 + if (pool->nr_workers == pool->nr_idle) 4599 + goto next_pool; 4600 + 4601 + pr_info("pool %d:", pool->id); 4602 + pr_cont_pool_info(pool); 4603 + pr_cont(" workers=%d", pool->nr_workers); 4604 + if (pool->manager) 4605 + pr_cont(" manager: %d", 4606 + task_pid_nr(pool->manager->task)); 4607 + list_for_each_entry(worker, &pool->idle_list, entry) { 4608 + pr_cont(" %s%d", first ? "idle: " : "", 4609 + task_pid_nr(worker->task)); 4610 + first = false; 4611 + } 4612 + pr_cont("\n"); 4613 + next_pool: 4614 + spin_unlock_irqrestore(&pool->lock, flags); 4615 + } 4616 + 4617 + rcu_read_unlock_sched(); 4618 + } 4619 + 4460 4620 /* 4461 4621 * CPU hotplug. 4462 4622 *