Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull more cgroup fixes from Tejun Heo:
"Three more patches to fix cgroup_freezer breakage due to the recent
cgroup internal locking changes - an operation cgroup_freezer was
using now requires sleepable context and cgroup_freezer was invoking
that while holding a spin lock. cgroup_freezer was using an overly
elaborate hierarchical locking scheme.

While it's possible to convert the hierarchical spinlocks directly to
mutexes, this patch simplifies the overall locking so that it uses a
global mutex. This has the added benefit of avoiding iterating
potentially huge number of tasks under a spinlock. While the patch is
on the larger side in the devel cycle, the changes made are mostly
straight-forward and the locking logic is a lot simpler afterwards"

* 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: fix rcu_read_lock() leak in update_if_frozen()
cgroup_freezer: replace freezer->lock with freezer_mutex
cgroup: introduce task_css_is_root()

+65 -68
+15
include/linux/cgroup.h
··· 473 473 }; 474 474 475 475 extern struct cgroup_root cgrp_dfl_root; 476 + extern struct css_set init_css_set; 476 477 477 478 static inline bool cgroup_on_dfl(const struct cgroup *cgrp) 478 479 { ··· 699 698 int subsys_id) 700 699 { 701 700 return task_css_check(task, subsys_id, false); 701 + } 702 + 703 + /** 704 + * task_css_is_root - test whether a task belongs to the root css 705 + * @task: the target task 706 + * @subsys_id: the target subsystem ID 707 + * 708 + * Test whether @task belongs to the root css on the specified subsystem. 709 + * May be invoked in any context. 710 + */ 711 + static inline bool task_css_is_root(struct task_struct *task, int subsys_id) 712 + { 713 + return task_css_check(task, subsys_id, true) == 714 + init_css_set.subsys[subsys_id]; 702 715 } 703 716 704 717 static inline struct cgroup *task_cgroup(struct task_struct *task,
+1 -1
kernel/cgroup.c
··· 348 348 * reference-counted, to improve performance when child cgroups 349 349 * haven't been created. 350 350 */ 351 - static struct css_set init_css_set = { 351 + struct css_set init_css_set = { 352 352 .refcount = ATOMIC_INIT(1), 353 353 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), 354 354 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
+49 -67
kernel/cgroup_freezer.c
··· 21 21 #include <linux/uaccess.h> 22 22 #include <linux/freezer.h> 23 23 #include <linux/seq_file.h> 24 + #include <linux/mutex.h> 24 25 25 26 /* 26 27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is ··· 43 42 struct freezer { 44 43 struct cgroup_subsys_state css; 45 44 unsigned int state; 46 - spinlock_t lock; 47 45 }; 46 + 47 + static DEFINE_MUTEX(freezer_mutex); 48 48 49 49 static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) 50 50 { ··· 95 93 if (!freezer) 96 94 return ERR_PTR(-ENOMEM); 97 95 98 - spin_lock_init(&freezer->lock); 99 96 return &freezer->css; 100 97 } 101 98 ··· 111 110 struct freezer *freezer = css_freezer(css); 112 111 struct freezer *parent = parent_freezer(freezer); 113 112 114 - /* 115 - * The following double locking and freezing state inheritance 116 - * guarantee that @cgroup can never escape ancestors' freezing 117 - * states. See css_for_each_descendant_pre() for details. 118 - */ 119 - if (parent) 120 - spin_lock_irq(&parent->lock); 121 - spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); 113 + mutex_lock(&freezer_mutex); 122 114 123 115 freezer->state |= CGROUP_FREEZER_ONLINE; 124 116 ··· 120 126 atomic_inc(&system_freezing_cnt); 121 127 } 122 128 123 - spin_unlock(&freezer->lock); 124 - if (parent) 125 - spin_unlock_irq(&parent->lock); 126 - 129 + mutex_unlock(&freezer_mutex); 127 130 return 0; 128 131 } 129 132 ··· 135 144 { 136 145 struct freezer *freezer = css_freezer(css); 137 146 138 - spin_lock_irq(&freezer->lock); 147 + mutex_lock(&freezer_mutex); 139 148 140 149 if (freezer->state & CGROUP_FREEZING) 141 150 atomic_dec(&system_freezing_cnt); 142 151 143 152 freezer->state = 0; 144 153 145 - spin_unlock_irq(&freezer->lock); 154 + mutex_unlock(&freezer_mutex); 146 155 } 147 156 148 157 static void freezer_css_free(struct cgroup_subsys_state *css) ··· 166 175 struct task_struct *task; 167 176 bool clear_frozen = false; 168 177 169 - spin_lock_irq(&freezer->lock); 178 + mutex_lock(&freezer_mutex); 170 179 171 180 /* 172 181 * Make the new tasks conform to the current state of @new_css. ··· 188 197 } 189 198 } 190 199 191 - spin_unlock_irq(&freezer->lock); 192 - 193 - /* 194 - * Propagate FROZEN clearing upwards. We may race with 195 - * update_if_frozen(), but as long as both work bottom-up, either 196 - * update_if_frozen() sees child's FROZEN cleared or we clear the 197 - * parent's FROZEN later. No parent w/ !FROZEN children can be 198 - * left FROZEN. 199 - */ 200 + /* propagate FROZEN clearing upwards */ 200 201 while (clear_frozen && (freezer = parent_freezer(freezer))) { 201 - spin_lock_irq(&freezer->lock); 202 202 freezer->state &= ~CGROUP_FROZEN; 203 203 clear_frozen = freezer->state & CGROUP_FREEZING; 204 - spin_unlock_irq(&freezer->lock); 205 204 } 205 + 206 + mutex_unlock(&freezer_mutex); 206 207 } 207 208 208 209 /** ··· 211 228 { 212 229 struct freezer *freezer; 213 230 214 - rcu_read_lock(); 215 - freezer = task_freezer(task); 216 - 217 231 /* 218 232 * The root cgroup is non-freezable, so we can skip locking the 219 233 * freezer. This is safe regardless of race with task migration. ··· 218 238 * to do. If we lost and root is the new cgroup, noop is still the 219 239 * right thing to do. 220 240 */ 221 - if (!parent_freezer(freezer)) 222 - goto out; 241 + if (task_css_is_root(task, freezer_cgrp_id)) 242 + return; 223 243 224 - /* 225 - * Grab @freezer->lock and freeze @task after verifying @task still 226 - * belongs to @freezer and it's freezing. The former is for the 227 - * case where we have raced against task migration and lost and 228 - * @task is already in a different cgroup which may not be frozen. 229 - * This isn't strictly necessary as freeze_task() is allowed to be 230 - * called spuriously but let's do it anyway for, if nothing else, 231 - * documentation. 232 - */ 233 - spin_lock_irq(&freezer->lock); 234 - if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) 244 + mutex_lock(&freezer_mutex); 245 + rcu_read_lock(); 246 + 247 + freezer = task_freezer(task); 248 + if (freezer->state & CGROUP_FREEZING) 235 249 freeze_task(task); 236 - spin_unlock_irq(&freezer->lock); 237 - out: 250 + 238 251 rcu_read_unlock(); 252 + mutex_unlock(&freezer_mutex); 239 253 } 240 254 241 255 /** ··· 255 281 struct css_task_iter it; 256 282 struct task_struct *task; 257 283 258 - WARN_ON_ONCE(!rcu_read_lock_held()); 259 - 260 - spin_lock_irq(&freezer->lock); 284 + lockdep_assert_held(&freezer_mutex); 261 285 262 286 if (!(freezer->state & CGROUP_FREEZING) || 263 287 (freezer->state & CGROUP_FROZEN)) 264 - goto out_unlock; 288 + return; 265 289 266 290 /* are all (live) children frozen? */ 291 + rcu_read_lock(); 267 292 css_for_each_child(pos, css) { 268 293 struct freezer *child = css_freezer(pos); 269 294 270 295 if ((child->state & CGROUP_FREEZER_ONLINE) && 271 - !(child->state & CGROUP_FROZEN)) 272 - goto out_unlock; 296 + !(child->state & CGROUP_FROZEN)) { 297 + rcu_read_unlock(); 298 + return; 299 + } 273 300 } 301 + rcu_read_unlock(); 274 302 275 303 /* are all tasks frozen? */ 276 304 css_task_iter_start(css, &it); ··· 293 317 freezer->state |= CGROUP_FROZEN; 294 318 out_iter_end: 295 319 css_task_iter_end(&it); 296 - out_unlock: 297 - spin_unlock_irq(&freezer->lock); 298 320 } 299 321 300 322 static int freezer_read(struct seq_file *m, void *v) 301 323 { 302 324 struct cgroup_subsys_state *css = seq_css(m), *pos; 303 325 326 + mutex_lock(&freezer_mutex); 304 327 rcu_read_lock(); 305 328 306 329 /* update states bottom-up */ 307 - css_for_each_descendant_post(pos, css) 330 + css_for_each_descendant_post(pos, css) { 331 + if (!css_tryget(pos)) 332 + continue; 333 + rcu_read_unlock(); 334 + 308 335 update_if_frozen(pos); 309 336 337 + rcu_read_lock(); 338 + css_put(pos); 339 + } 340 + 310 341 rcu_read_unlock(); 342 + mutex_unlock(&freezer_mutex); 311 343 312 344 seq_puts(m, freezer_state_strs(css_freezer(css)->state)); 313 345 seq_putc(m, '\n'); ··· 357 373 unsigned int state) 358 374 { 359 375 /* also synchronizes against task migration, see freezer_attach() */ 360 - lockdep_assert_held(&freezer->lock); 376 + lockdep_assert_held(&freezer_mutex); 361 377 362 378 if (!(freezer->state & CGROUP_FREEZER_ONLINE)) 363 379 return; ··· 398 414 * descendant will try to inherit its parent's FREEZING state as 399 415 * CGROUP_FREEZING_PARENT. 400 416 */ 417 + mutex_lock(&freezer_mutex); 401 418 rcu_read_lock(); 402 419 css_for_each_descendant_pre(pos, &freezer->css) { 403 420 struct freezer *pos_f = css_freezer(pos); 404 421 struct freezer *parent = parent_freezer(pos_f); 405 422 406 - spin_lock_irq(&pos_f->lock); 423 + if (!css_tryget(pos)) 424 + continue; 425 + rcu_read_unlock(); 407 426 408 - if (pos_f == freezer) { 427 + if (pos_f == freezer) 409 428 freezer_apply_state(pos_f, freeze, 410 429 CGROUP_FREEZING_SELF); 411 - } else { 412 - /* 413 - * Our update to @parent->state is already visible 414 - * which is all we need. No need to lock @parent. 415 - * For more info on synchronization, see 416 - * freezer_post_create(). 417 - */ 430 + else 418 431 freezer_apply_state(pos_f, 419 432 parent->state & CGROUP_FREEZING, 420 433 CGROUP_FREEZING_PARENT); 421 - } 422 434 423 - spin_unlock_irq(&pos_f->lock); 435 + rcu_read_lock(); 436 + css_put(pos); 424 437 } 425 438 rcu_read_unlock(); 439 + mutex_unlock(&freezer_mutex); 426 440 } 427 441 428 442 static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,