Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-4.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fixes from Tejun Heo:
"Two notable fixes.

- While adding NUMA affinity support to unbound workqueues, the
assumption that an unbound workqueue with max_active == 1 is
ordered was broken.

The plan was to use explicit alloc_ordered_workqueue() for those
cases. Unfortunately, I forgot to update the documentation properly
and we grew a handful of use cases which depend on that assumption.

While we want to convert them to alloc_ordered_workqueue(), we
don't really lose anything by enforcing ordered execution on
unbound max_active == 1 workqueues and it doesn't make sense to
risk subtle bugs. Restore the assumption.

- Workqueue assumes that CPU <-> NUMA node mapping remains static.

This is a general assumption - we don't have any synchronization
mechanism around CPU <-> node mapping. Unfortunately, powerpc may
change the mapping dynamically leading to crashes. Michael added a
workaround so that we at least don't crash while powerpc hotplug
code gets updated"

* 'for-4.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
workqueue: Work around edge cases for calc of pool's cpumask
workqueue: implicit ordered attribute should be overridable
workqueue: restore WQ_UNBOUND/max_active==1 to be ordered

+29 -5
+3 -1
include/linux/workqueue.h
··· 323 323 324 324 __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ 325 325 __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ 326 + __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ 326 327 __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ 327 328 328 329 WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ ··· 423 422 * Pointer to the allocated workqueue on success, %NULL on failure. 424 423 */ 425 424 #define alloc_ordered_workqueue(fmt, flags, args...) \ 426 - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) 425 + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ 426 + __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) 427 427 428 428 #define create_workqueue(name) \ 429 429 alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
+26 -4
kernel/workqueue.c
··· 3577 3577 3578 3578 /* yeap, return possible CPUs in @node that @attrs wants */ 3579 3579 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]); 3580 + 3581 + if (cpumask_empty(cpumask)) { 3582 + pr_warn_once("WARNING: workqueue cpumask: online intersect > " 3583 + "possible intersect\n"); 3584 + return false; 3585 + } 3586 + 3580 3587 return !cpumask_equal(cpumask, attrs->cpumask); 3581 3588 3582 3589 use_dfl: ··· 3751 3744 return -EINVAL; 3752 3745 3753 3746 /* creating multiple pwqs breaks ordering guarantee */ 3754 - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) 3755 - return -EINVAL; 3747 + if (!list_empty(&wq->pwqs)) { 3748 + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) 3749 + return -EINVAL; 3750 + 3751 + wq->flags &= ~__WQ_ORDERED; 3752 + } 3756 3753 3757 3754 ctx = apply_wqattrs_prepare(wq, attrs); 3758 3755 if (!ctx) ··· 3939 3928 va_list args; 3940 3929 struct workqueue_struct *wq; 3941 3930 struct pool_workqueue *pwq; 3931 + 3932 + /* 3933 + * Unbound && max_active == 1 used to imply ordered, which is no 3934 + * longer the case on NUMA machines due to per-node pools. While 3935 + * alloc_ordered_workqueue() is the right way to create an ordered 3936 + * workqueue, keep the previous behavior to avoid subtle breakages 3937 + * on NUMA. 3938 + */ 3939 + if ((flags & WQ_UNBOUND) && max_active == 1) 3940 + flags |= __WQ_ORDERED; 3942 3941 3943 3942 /* see the comment above the definition of WQ_POWER_EFFICIENT */ 3944 3943 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) ··· 4140 4119 struct pool_workqueue *pwq; 4141 4120 4142 4121 /* disallow meddling with max_active for ordered workqueues */ 4143 - if (WARN_ON(wq->flags & __WQ_ORDERED)) 4122 + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) 4144 4123 return; 4145 4124 4146 4125 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); 4147 4126 4148 4127 mutex_lock(&wq->mutex); 4149 4128 4129 + wq->flags &= ~__WQ_ORDERED; 4150 4130 wq->saved_max_active = max_active; 4151 4131 4152 4132 for_each_pwq(pwq, wq) ··· 5275 5253 * attributes breaks ordering guarantee. Disallow exposing ordered 5276 5254 * workqueues. 5277 5255 */ 5278 - if (WARN_ON(wq->flags & __WQ_ORDERED)) 5256 + if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) 5279 5257 return -EINVAL; 5280 5258 5281 5259 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);