Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cgroup: Merge branch 'for-6.17-fixes' into for-6.18

Pull for-6.17-fixes to receive 79f919a89c9d ("cgroup: split
cgroup_destroy_wq into 3 workqueues") to resolve its conflict with
7fa33aa3b001 ("cgroup: WQ_PERCPU added to alloc_workqueue users"). The
latter adds WQ_PERCPU when creating cgroup_destroy_wq and the former splits
the workqueue into three. Resolve by applying WQ_PERCPU to the three split
workqueues.

Signed-off-by: Tejun Heo <tj@kernel.org>

Tejun Heo 4a3e62df 7fa33aa3

+39 -9
+2 -2
Documentation/admin-guide/cgroup-v2.rst
··· 435 435 Controlling Controllers 436 436 ----------------------- 437 437 438 - Availablity 439 - ~~~~~~~~~~~ 438 + Availability 439 + ~~~~~~~~~~~~ 440 440 441 441 A controller is available in a cgroup when it is supported by the kernel (i.e., 442 442 compiled in, not disabled and not attached to a v1 hierarchy) and listed in the
+37 -7
kernel/cgroup/cgroup.c
··· 126 126 * of concurrent destructions. Use a separate workqueue so that cgroup 127 127 * destruction work items don't end up filling up max_active of system_percpu_wq 128 128 * which may lead to deadlock. 129 + * 130 + * A cgroup destruction should enqueue work sequentially to: 131 + * cgroup_offline_wq: use for css offline work 132 + * cgroup_release_wq: use for css release work 133 + * cgroup_free_wq: use for free work 134 + * 135 + * Rationale for using separate workqueues: 136 + * The cgroup root free work may depend on completion of other css offline 137 + * operations. If all tasks were enqueued to a single workqueue, this could 138 + * create a deadlock scenario where: 139 + * - Free work waits for other css offline work to complete. 140 + * - But other css offline work is queued after free work in the same queue. 141 + * 142 + * Example deadlock scenario with single workqueue (cgroup_destroy_wq): 143 + * 1. umount net_prio 144 + * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx) 145 + * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx) 146 + * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline. 147 + * 5. net_prio root destruction blocks waiting for perf_event CSS A offline, 148 + * which can never complete as it's behind in the same queue and 149 + * workqueue's max_active is 1. 129 150 */ 130 - static struct workqueue_struct *cgroup_destroy_wq; 151 + static struct workqueue_struct *cgroup_offline_wq; 152 + static struct workqueue_struct *cgroup_release_wq; 153 + static struct workqueue_struct *cgroup_free_wq; 131 154 132 155 /* generate an array of cgroup subsystem pointers */ 133 156 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, ··· 4196 4173 cft->release(of); 4197 4174 put_cgroup_ns(ctx->ns); 4198 4175 kfree(ctx); 4176 + of->priv = NULL; 4199 4177 } 4200 4178 4201 4179 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, ··· 5601 5577 cgroup_unlock(); 5602 5578 5603 5579 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); 5604 - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); 5580 + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); 5605 5581 } 5606 5582 5607 5583 static void css_release(struct percpu_ref *ref) ··· 5610 5586 container_of(ref, struct cgroup_subsys_state, refcnt); 5611 5587 5612 5588 INIT_WORK(&css->destroy_work, css_release_work_fn); 5613 - queue_work(cgroup_destroy_wq, &css->destroy_work); 5589 + queue_work(cgroup_release_wq, &css->destroy_work); 5614 5590 } 5615 5591 5616 5592 static void init_and_link_css(struct cgroup_subsys_state *css, ··· 5744 5720 list_del_rcu(&css->sibling); 5745 5721 err_free_css: 5746 5722 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); 5747 - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); 5723 + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); 5748 5724 return ERR_PTR(err); 5749 5725 } 5750 5726 ··· 5984 5960 5985 5961 if (atomic_dec_and_test(&css->online_cnt)) { 5986 5962 INIT_WORK(&css->destroy_work, css_killed_work_fn); 5987 - queue_work(cgroup_destroy_wq, &css->destroy_work); 5963 + queue_work(cgroup_offline_wq, &css->destroy_work); 5988 5964 } 5989 5965 } 5990 5966 ··· 6370 6346 * We would prefer to do this in cgroup_init() above, but that 6371 6347 * is called before init_workqueues(): so leave this until after. 6372 6348 */ 6373 - cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", WQ_PERCPU, 1); 6374 - BUG_ON(!cgroup_destroy_wq); 6349 + cgroup_offline_wq = alloc_workqueue("cgroup_offline", WQ_PERCPU, 1); 6350 + BUG_ON(!cgroup_offline_wq); 6351 + 6352 + cgroup_release_wq = alloc_workqueue("cgroup_release", WQ_PERCPU, 1); 6353 + BUG_ON(!cgroup_release_wq); 6354 + 6355 + cgroup_free_wq = alloc_workqueue("cgroup_free", WQ_PERCPU, 1); 6356 + BUG_ON(!cgroup_free_wq); 6375 6357 return 0; 6376 6358 } 6377 6359 core_initcall(cgroup_wq_init);