Merge branch 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull workqueue updates from Tejun Heo:
"Lai has been doing a lot of cleanups of workqueue and kthread_work.
No significant behavior change. Just a lot of cleanups all over the
place. Some are a bit invasive but overall nothing too dangerous"

* 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
kthread_work: remove the unused wait_queue_head
kthread_work: wake up worker only when the worker is idle
workqueue: use nr_node_ids instead of wq_numa_tbl_len
workqueue: remove the misnamed out_unlock label in get_unbound_pool()
workqueue: remove the stale comment in pwq_unbound_release_workfn()
workqueue: move rescuer pool detachment to the end
workqueue: unfold start_worker() into create_worker()
workqueue: remove @wakeup from worker_set_flags()
workqueue: remove an unneeded UNBOUND test before waking up the next worker
workqueue: wake regular worker if need_more_worker() when rescuer leave the pool
workqueue: alloc struct worker on its local node
workqueue: reuse the already calculated pwq in try_to_grab_pending()
workqueue: stronger test in process_one_work()
workqueue: clear POOL_DISASSOCIATED in rebind_workers()
workqueue: sanity check pool->cpu in wq_worker_sleeping()
workqueue: clear leftover flags when detached
workqueue: remove useless WARN_ON_ONCE()
workqueue: use schedule_timeout_interruptible() instead of open code
workqueue: remove the empty check in too_many_workers()
workqueue: use "pool->cpu < 0" to stand for an unbound pool

Linus Torvalds 11 years ago c4c3f5fb 1bff5988

+64 -151

3 changed files

expand all

include

linux

kthread.h

kernel

kthread.c

workqueue.c

+2 -11

include/linux/kthread.h

··· 73 73 struct kthread_work { 74 74 struct list_head node; 75 75 kthread_work_func_t func; 76 - wait_queue_head_t done; 77 76 struct kthread_worker *worker; 78 77 }; 79 78 ··· 84 85 #define KTHREAD_WORK_INIT(work, fn) { \ 85 86 .node = LIST_HEAD_INIT((work).node), \ 86 87 .func = (fn), \ 87 - .done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done), \ 88 88 } 89 89 90 90 #define DEFINE_KTHREAD_WORKER(worker) \ ··· 93 95 struct kthread_work work = KTHREAD_WORK_INIT(work, fn) 94 96 95 97 /* 96 - * kthread_worker.lock and kthread_work.done need their own lockdep class 97 - * keys if they are defined on stack with lockdep enabled. Use the 98 - * following macros when defining them on stack. 98 + * kthread_worker.lock needs its own lockdep class key when defined on 99 + * stack with lockdep enabled. Use the following macros in such cases. 99 100 */ 100 101 #ifdef CONFIG_LOCKDEP 101 102 # define KTHREAD_WORKER_INIT_ONSTACK(worker) \ 102 103 ({ init_kthread_worker(&worker); worker; }) 103 104 # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \ 104 105 struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker) 105 - # define KTHREAD_WORK_INIT_ONSTACK(work, fn) \ 106 - ({ init_kthread_work((&work), fn); work; }) 107 - # define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) \ 108 - struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn) 109 106 #else 110 107 # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) 111 - # define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn) 112 108 #endif 113 109 114 110 extern void __init_kthread_worker(struct kthread_worker *worker, ··· 119 127 memset((work), 0, sizeof(struct kthread_work)); \ 120 128 INIT_LIST_HEAD(&(work)->node); \ 121 129 (work)->func = (fn); \ 122 - init_waitqueue_head(&(work)->done); \ 123 130 } while (0) 124 131 125 132 int kthread_worker_fn(void *worker_ptr);

+1 -1

kernel/kthread.c

··· 591 591 592 592 list_add_tail(&work->node, pos); 593 593 work->worker = worker; 594 - if (likely(worker->task)) 594 + if (!worker->current_work && likely(worker->task)) 595 595 wake_up_process(worker->task); 596 596 } 597 597

+61 -139

kernel/workqueue.c

··· 265 265 266 266 static struct kmem_cache *pwq_cache; 267 267 268 - static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */ 269 268 static cpumask_var_t *wq_numa_possible_cpumask; 270 269 /* possible CPUs of each node */ 271 270 ··· 757 758 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 758 759 int nr_busy = pool->nr_workers - nr_idle; 759 760 760 - /* 761 - * nr_idle and idle_list may disagree if idle rebinding is in 762 - * progress. Never return %true if idle_list is empty. 763 - */ 764 - if (list_empty(&pool->idle_list)) 765 - return false; 766 - 767 761 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; 768 762 } 769 763 ··· 842 850 pool = worker->pool; 843 851 844 852 /* this can only happen on the local cpu */ 845 - if (WARN_ON_ONCE(cpu != raw_smp_processor_id())) 853 + if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu)) 846 854 return NULL; 847 855 848 856 /* ··· 866 874 * worker_set_flags - set worker flags and adjust nr_running accordingly 867 875 * @worker: self 868 876 * @flags: flags to set 869 - * @wakeup: wakeup an idle worker if necessary 870 877 * 871 - * Set @flags in @worker->flags and adjust nr_running accordingly. If 872 - * nr_running becomes zero and @wakeup is %true, an idle worker is 873 - * woken up. 878 + * Set @flags in @worker->flags and adjust nr_running accordingly. 874 879 * 875 880 * CONTEXT: 876 881 * spin_lock_irq(pool->lock) 877 882 */ 878 - static inline void worker_set_flags(struct worker *worker, unsigned int flags, 879 - bool wakeup) 883 + static inline void worker_set_flags(struct worker *worker, unsigned int flags) 880 884 { 881 885 struct worker_pool *pool = worker->pool; 882 886 883 887 WARN_ON_ONCE(worker->task != current); 884 888 885 - /* 886 - * If transitioning into NOT_RUNNING, adjust nr_running and 887 - * wake up an idle worker as necessary if requested by 888 - * @wakeup. 889 - */ 889 + /* If transitioning into NOT_RUNNING, adjust nr_running. */ 890 890 if ((flags & WORKER_NOT_RUNNING) && 891 891 !(worker->flags & WORKER_NOT_RUNNING)) { 892 - if (wakeup) { 893 - if (atomic_dec_and_test(&pool->nr_running) && 894 - !list_empty(&pool->worklist)) 895 - wake_up_worker(pool); 896 - } else 897 - atomic_dec(&pool->nr_running); 892 + atomic_dec(&pool->nr_running); 898 893 } 899 894 900 895 worker->flags |= flags; ··· 1211 1232 pwq_activate_delayed_work(work); 1212 1233 1213 1234 list_del_init(&work->entry); 1214 - pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work)); 1235 + pwq_dec_nr_in_flight(pwq, get_work_color(work)); 1215 1236 1216 1237 /* work->data points to pwq iff queued, point to pool */ 1217 1238 set_work_pool_and_keep_pending(work, pool->id); ··· 1539 1560 (worker->hentry.next || worker->hentry.pprev))) 1540 1561 return; 1541 1562 1542 - /* can't use worker_set_flags(), also called from start_worker() */ 1563 + /* can't use worker_set_flags(), also called from create_worker() */ 1543 1564 worker->flags |= WORKER_IDLE; 1544 1565 pool->nr_idle++; 1545 1566 worker->last_active = jiffies; ··· 1581 1602 list_del_init(&worker->entry); 1582 1603 } 1583 1604 1584 - static struct worker *alloc_worker(void) 1605 + static struct worker *alloc_worker(int node) 1585 1606 { 1586 1607 struct worker *worker; 1587 1608 1588 - worker = kzalloc(sizeof(*worker), GFP_KERNEL); 1609 + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node); 1589 1610 if (worker) { 1590 1611 INIT_LIST_HEAD(&worker->entry); 1591 1612 INIT_LIST_HEAD(&worker->scheduled); ··· 1649 1670 detach_completion = pool->detach_completion; 1650 1671 mutex_unlock(&pool->attach_mutex); 1651 1672 1673 + /* clear leftover flags without pool->lock after it is detached */ 1674 + worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); 1675 + 1652 1676 if (detach_completion) 1653 1677 complete(detach_completion); 1654 1678 } ··· 1660 1678 * create_worker - create a new workqueue worker 1661 1679 * @pool: pool the new worker will belong to 1662 1680 * 1663 - * Create a new worker which is attached to @pool. The new worker must be 1664 - * started by start_worker(). 1681 + * Create and start a new worker which is attached to @pool. 1665 1682 * 1666 1683 * CONTEXT: 1667 1684 * Might sleep. Does GFP_KERNEL allocations. ··· 1679 1698 if (id < 0) 1680 1699 goto fail; 1681 1700 1682 - worker = alloc_worker(); 1701 + worker = alloc_worker(pool->node); 1683 1702 if (!worker) 1684 1703 goto fail; 1685 1704 ··· 1705 1724 /* successful, attach the worker to the pool */ 1706 1725 worker_attach_to_pool(worker, pool); 1707 1726 1727 + /* start the newly created worker */ 1728 + spin_lock_irq(&pool->lock); 1729 + worker->pool->nr_workers++; 1730 + worker_enter_idle(worker); 1731 + wake_up_process(worker->task); 1732 + spin_unlock_irq(&pool->lock); 1733 + 1708 1734 return worker; 1709 1735 1710 1736 fail: ··· 1719 1731 ida_simple_remove(&pool->worker_ida, id); 1720 1732 kfree(worker); 1721 1733 return NULL; 1722 - } 1723 - 1724 - /** 1725 - * start_worker - start a newly created worker 1726 - * @worker: worker to start 1727 - * 1728 - * Make the pool aware of @worker and start it. 1729 - * 1730 - * CONTEXT: 1731 - * spin_lock_irq(pool->lock). 1732 - */ 1733 - static void start_worker(struct worker *worker) 1734 - { 1735 - worker->pool->nr_workers++; 1736 - worker_enter_idle(worker); 1737 - wake_up_process(worker->task); 1738 - } 1739 - 1740 - /** 1741 - * create_and_start_worker - create and start a worker for a pool 1742 - * @pool: the target pool 1743 - * 1744 - * Grab the managership of @pool and create and start a new worker for it. 1745 - * 1746 - * Return: 0 on success. A negative error code otherwise. 1747 - */ 1748 - static int create_and_start_worker(struct worker_pool *pool) 1749 - { 1750 - struct worker *worker; 1751 - 1752 - worker = create_worker(pool); 1753 - if (worker) { 1754 - spin_lock_irq(&pool->lock); 1755 - start_worker(worker); 1756 - spin_unlock_irq(&pool->lock); 1757 - } 1758 - 1759 - return worker ? 0 : -ENOMEM; 1760 1734 } 1761 1735 1762 1736 /** ··· 1859 1909 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); 1860 1910 1861 1911 while (true) { 1862 - struct worker *worker; 1863 - 1864 - worker = create_worker(pool); 1865 - if (worker) { 1866 - del_timer_sync(&pool->mayday_timer); 1867 - spin_lock_irq(&pool->lock); 1868 - start_worker(worker); 1869 - if (WARN_ON_ONCE(need_to_create_worker(pool))) 1870 - goto restart; 1871 - return true; 1872 - } 1873 - 1874 - if (!need_to_create_worker(pool)) 1912 + if (create_worker(pool) || !need_to_create_worker(pool)) 1875 1913 break; 1876 1914 1877 - __set_current_state(TASK_INTERRUPTIBLE); 1878 - schedule_timeout(CREATE_COOLDOWN); 1915 + schedule_timeout_interruptible(CREATE_COOLDOWN); 1879 1916 1880 1917 if (!need_to_create_worker(pool)) 1881 1918 break; ··· 1870 1933 1871 1934 del_timer_sync(&pool->mayday_timer); 1872 1935 spin_lock_irq(&pool->lock); 1936 + /* 1937 + * This is necessary even after a new worker was just successfully 1938 + * created as @pool->lock was dropped and the new worker might have 1939 + * already become busy. 1940 + */ 1873 1941 if (need_to_create_worker(pool)) 1874 1942 goto restart; 1875 1943 return true; ··· 1962 2020 1963 2021 lockdep_copy_map(&lockdep_map, &work->lockdep_map); 1964 2022 #endif 1965 - /* 1966 - * Ensure we're on the correct CPU. DISASSOCIATED test is 1967 - * necessary to avoid spurious warnings from rescuers servicing the 1968 - * unbound or a disassociated pool. 1969 - */ 1970 - WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && 1971 - !(pool->flags & POOL_DISASSOCIATED) && 2023 + WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && 1972 2024 raw_smp_processor_id() != pool->cpu); 1973 2025 1974 2026 /* ··· 1988 2052 list_del_init(&work->entry); 1989 2053 1990 2054 /* 1991 - * CPU intensive works don't participate in concurrency 1992 - * management. They're the scheduler's responsibility. 2055 + * CPU intensive works don't participate in concurrency management. 2056 + * They're the scheduler's responsibility. This takes @worker out 2057 + * of concurrency management and the next code block will chain 2058 + * execution of the pending work items. 1993 2059 */ 1994 2060 if (unlikely(cpu_intensive)) 1995 - worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); 2061 + worker_set_flags(worker, WORKER_CPU_INTENSIVE); 1996 2062 1997 2063 /* 1998 - * Unbound pool isn't concurrency managed and work items should be 1999 - * executed ASAP. Wake up another worker if necessary. 2064 + * Wake up another worker if necessary. The condition is always 2065 + * false for normal per-cpu workers since nr_running would always 2066 + * be >= 1 at this point. This is used to chain execution of the 2067 + * pending work items for WORKER_NOT_RUNNING workers such as the 2068 + * UNBOUND and CPU_INTENSIVE ones. 2000 2069 */ 2001 - if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) 2070 + if (need_more_worker(pool)) 2002 2071 wake_up_worker(pool); 2003 2072 2004 2073 /* ··· 2159 2218 } 2160 2219 } while (keep_working(pool)); 2161 2220 2162 - worker_set_flags(worker, WORKER_PREP, false); 2221 + worker_set_flags(worker, WORKER_PREP); 2163 2222 sleep: 2164 2223 /* 2165 2224 * pool->lock is held and there's no work to process and no need to ··· 2252 2311 move_linked_works(work, scheduled, &n); 2253 2312 2254 2313 process_scheduled_works(rescuer); 2255 - spin_unlock_irq(&pool->lock); 2256 - 2257 - worker_detach_from_pool(rescuer, pool); 2258 - 2259 - spin_lock_irq(&pool->lock); 2260 2314 2261 2315 /* 2262 2316 * Put the reference grabbed by send_mayday(). @pool won't 2263 - * go away while we're holding its lock. 2317 + * go away while we're still attached to it. 2264 2318 */ 2265 2319 put_pwq(pwq); 2266 2320 2267 2321 /* 2268 - * Leave this pool. If keep_working() is %true, notify a 2322 + * Leave this pool. If need_more_worker() is %true, notify a 2269 2323 * regular worker; otherwise, we end up with 0 concurrency 2270 2324 * and stalling the execution. 2271 2325 */ 2272 - if (keep_working(pool)) 2326 + if (need_more_worker(pool)) 2273 2327 wake_up_worker(pool); 2274 2328 2275 2329 rescuer->pool = NULL; 2276 - spin_unlock(&pool->lock); 2277 - spin_lock(&wq_mayday_lock); 2330 + spin_unlock_irq(&pool->lock); 2331 + 2332 + worker_detach_from_pool(rescuer, pool); 2333 + 2334 + spin_lock_irq(&wq_mayday_lock); 2278 2335 } 2279 2336 2280 2337 spin_unlock_irq(&wq_mayday_lock); ··· 3397 3458 return; 3398 3459 3399 3460 /* sanity checks */ 3400 - if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) || 3461 + if (WARN_ON(!(pool->cpu < 0)) || 3401 3462 WARN_ON(!list_empty(&pool->worklist))) 3402 3463 return; 3403 3464 ··· 3463 3524 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) { 3464 3525 if (wqattrs_equal(pool->attrs, attrs)) { 3465 3526 pool->refcnt++; 3466 - goto out_unlock; 3527 + return pool; 3467 3528 } 3468 3529 } 3469 3530 ··· 3496 3557 goto fail; 3497 3558 3498 3559 /* create and start the initial worker */ 3499 - if (create_and_start_worker(pool) < 0) 3560 + if (!create_worker(pool)) 3500 3561 goto fail; 3501 3562 3502 3563 /* install */ 3503 3564 hash_add(unbound_pool_hash, &pool->hash_node, hash); 3504 - out_unlock: 3565 + 3505 3566 return pool; 3506 3567 fail: 3507 3568 if (pool) ··· 3530 3591 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) 3531 3592 return; 3532 3593 3533 - /* 3534 - * Unlink @pwq. Synchronization against wq->mutex isn't strictly 3535 - * necessary on release but do it anyway. It's easier to verify 3536 - * and consistent with the linking path. 3537 - */ 3538 3594 mutex_lock(&wq->mutex); 3539 3595 list_del_rcu(&pwq->pwqs_node); 3540 3596 is_last = list_empty(&wq->pwqs); ··· 3626 3692 if (!list_empty(&pwq->pwqs_node)) 3627 3693 return; 3628 3694 3629 - /* 3630 - * Set the matching work_color. This is synchronized with 3631 - * wq->mutex to avoid confusing flush_workqueue(). 3632 - */ 3695 + /* set the matching work_color */ 3633 3696 pwq->work_color = wq->work_color; 3634 3697 3635 3698 /* sync max_active to the current setting */ ··· 3763 3832 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) 3764 3833 return -EINVAL; 3765 3834 3766 - pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL); 3835 + pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL); 3767 3836 new_attrs = alloc_workqueue_attrs(GFP_KERNEL); 3768 3837 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); 3769 3838 if (!pwq_tbl || !new_attrs || !tmp_attrs) ··· 4011 4080 4012 4081 /* allocate wq and format name */ 4013 4082 if (flags & WQ_UNBOUND) 4014 - tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); 4083 + tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]); 4015 4084 4016 4085 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL); 4017 4086 if (!wq) ··· 4053 4122 if (flags & WQ_MEM_RECLAIM) { 4054 4123 struct worker *rescuer; 4055 4124 4056 - rescuer = alloc_worker(); 4125 + rescuer = alloc_worker(NUMA_NO_NODE); 4057 4126 if (!rescuer) 4058 4127 goto err_destroy; 4059 4128 ··· 4401 4470 struct worker *worker; 4402 4471 4403 4472 for_each_cpu_worker_pool(pool, cpu) { 4404 - WARN_ON_ONCE(cpu != smp_processor_id()); 4405 - 4406 4473 mutex_lock(&pool->attach_mutex); 4407 4474 spin_lock_irq(&pool->lock); 4408 4475 ··· 4472 4543 pool->attrs->cpumask) < 0); 4473 4544 4474 4545 spin_lock_irq(&pool->lock); 4546 + pool->flags &= ~POOL_DISASSOCIATED; 4475 4547 4476 4548 for_each_pool_worker(worker, pool) { 4477 4549 unsigned int worker_flags = worker->flags; ··· 4562 4632 for_each_cpu_worker_pool(pool, cpu) { 4563 4633 if (pool->nr_workers) 4564 4634 continue; 4565 - if (create_and_start_worker(pool) < 0) 4635 + if (!create_worker(pool)) 4566 4636 return NOTIFY_BAD; 4567 4637 } 4568 4638 break; ··· 4575 4645 mutex_lock(&pool->attach_mutex); 4576 4646 4577 4647 if (pool->cpu == cpu) { 4578 - spin_lock_irq(&pool->lock); 4579 - pool->flags &= ~POOL_DISASSOCIATED; 4580 - spin_unlock_irq(&pool->lock); 4581 - 4582 4648 rebind_workers(pool); 4583 4649 } else if (pool->cpu < 0) { 4584 4650 restore_unbound_workers_cpumask(pool, cpu); ··· 4782 4856 cpumask_var_t *tbl; 4783 4857 int node, cpu; 4784 4858 4785 - /* determine NUMA pwq table len - highest node id + 1 */ 4786 - for_each_node(node) 4787 - wq_numa_tbl_len = max(wq_numa_tbl_len, node + 1); 4788 - 4789 4859 if (num_possible_nodes() <= 1) 4790 4860 return; 4791 4861 ··· 4798 4876 * available. Build one from cpu_to_node() which should have been 4799 4877 * fully initialized by now. 4800 4878 */ 4801 - tbl = kzalloc(wq_numa_tbl_len * sizeof(tbl[0]), GFP_KERNEL); 4879 + tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL); 4802 4880 BUG_ON(!tbl); 4803 4881 4804 4882 for_each_node(node) ··· 4858 4936 4859 4937 for_each_cpu_worker_pool(pool, cpu) { 4860 4938 pool->flags &= ~POOL_DISASSOCIATED; 4861 - BUG_ON(create_and_start_worker(pool) < 0); 4939 + BUG_ON(!create_worker(pool)); 4862 4940 } 4863 4941 } 4864 4942