at v3.10 27 kB view raw
1#ifndef _LINUX_CGROUP_H 2#define _LINUX_CGROUP_H 3/* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11#include <linux/sched.h> 12#include <linux/cpumask.h> 13#include <linux/nodemask.h> 14#include <linux/rcupdate.h> 15#include <linux/rculist.h> 16#include <linux/cgroupstats.h> 17#include <linux/prio_heap.h> 18#include <linux/rwsem.h> 19#include <linux/idr.h> 20#include <linux/workqueue.h> 21#include <linux/xattr.h> 22#include <linux/fs.h> 23 24#ifdef CONFIG_CGROUPS 25 26struct cgroupfs_root; 27struct cgroup_subsys; 28struct inode; 29struct cgroup; 30struct css_id; 31struct eventfd_ctx; 32 33extern int cgroup_init_early(void); 34extern int cgroup_init(void); 35extern void cgroup_fork(struct task_struct *p); 36extern void cgroup_post_fork(struct task_struct *p); 37extern void cgroup_exit(struct task_struct *p, int run_callbacks); 38extern int cgroupstats_build(struct cgroupstats *stats, 39 struct dentry *dentry); 40extern int cgroup_load_subsys(struct cgroup_subsys *ss); 41extern void cgroup_unload_subsys(struct cgroup_subsys *ss); 42 43extern int proc_cgroup_show(struct seq_file *, void *); 44 45/* 46 * Define the enumeration of all cgroup subsystems. 47 * 48 * We define ids for builtin subsystems and then modular ones. 49 */ 50#define SUBSYS(_x) _x ## _subsys_id, 51enum cgroup_subsys_id { 52#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) 53#include <linux/cgroup_subsys.h> 54#undef IS_SUBSYS_ENABLED 55 CGROUP_BUILTIN_SUBSYS_COUNT, 56 57 __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, 58 59#define IS_SUBSYS_ENABLED(option) IS_MODULE(option) 60#include <linux/cgroup_subsys.h> 61#undef IS_SUBSYS_ENABLED 62 CGROUP_SUBSYS_COUNT, 63}; 64#undef SUBSYS 65 66/* Per-subsystem/per-cgroup state maintained by the system. */ 67struct cgroup_subsys_state { 68 /* 69 * The cgroup that this subsystem is attached to. Useful 70 * for subsystems that want to know about the cgroup 71 * hierarchy structure 72 */ 73 struct cgroup *cgroup; 74 75 /* 76 * State maintained by the cgroup system to allow subsystems 77 * to be "busy". Should be accessed via css_get(), 78 * css_tryget() and css_put(). 79 */ 80 81 atomic_t refcnt; 82 83 unsigned long flags; 84 /* ID for this css, if possible */ 85 struct css_id __rcu *id; 86 87 /* Used to put @cgroup->dentry on the last css_put() */ 88 struct work_struct dput_work; 89}; 90 91/* bits in struct cgroup_subsys_state flags field */ 92enum { 93 CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ 94 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 95}; 96 97/* Caller must verify that the css is not for root cgroup */ 98static inline void __css_get(struct cgroup_subsys_state *css, int count) 99{ 100 atomic_add(count, &css->refcnt); 101} 102 103/* 104 * Call css_get() to hold a reference on the css; it can be used 105 * for a reference obtained via: 106 * - an existing ref-counted reference to the css 107 * - task->cgroups for a locked task 108 */ 109 110static inline void css_get(struct cgroup_subsys_state *css) 111{ 112 /* We don't need to reference count the root state */ 113 if (!(css->flags & CSS_ROOT)) 114 __css_get(css, 1); 115} 116 117/* 118 * Call css_tryget() to take a reference on a css if your existing 119 * (known-valid) reference isn't already ref-counted. Returns false if 120 * the css has been destroyed. 121 */ 122 123extern bool __css_tryget(struct cgroup_subsys_state *css); 124static inline bool css_tryget(struct cgroup_subsys_state *css) 125{ 126 if (css->flags & CSS_ROOT) 127 return true; 128 return __css_tryget(css); 129} 130 131/* 132 * css_put() should be called to release a reference taken by 133 * css_get() or css_tryget() 134 */ 135 136extern void __css_put(struct cgroup_subsys_state *css); 137static inline void css_put(struct cgroup_subsys_state *css) 138{ 139 if (!(css->flags & CSS_ROOT)) 140 __css_put(css); 141} 142 143/* bits in struct cgroup flags field */ 144enum { 145 /* Control Group is dead */ 146 CGRP_REMOVED, 147 /* 148 * Control Group has previously had a child cgroup or a task, 149 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) 150 */ 151 CGRP_RELEASABLE, 152 /* Control Group requires release notifications to userspace */ 153 CGRP_NOTIFY_ON_RELEASE, 154 /* 155 * Clone the parent's configuration when creating a new child 156 * cpuset cgroup. For historical reasons, this option can be 157 * specified at mount time and thus is implemented here. 158 */ 159 CGRP_CPUSET_CLONE_CHILDREN, 160 /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ 161 CGRP_SANE_BEHAVIOR, 162}; 163 164struct cgroup_name { 165 struct rcu_head rcu_head; 166 char name[]; 167}; 168 169struct cgroup { 170 unsigned long flags; /* "unsigned long" so bitops work */ 171 172 /* 173 * count users of this cgroup. >0 means busy, but doesn't 174 * necessarily indicate the number of tasks in the cgroup 175 */ 176 atomic_t count; 177 178 int id; /* ida allocated in-hierarchy ID */ 179 180 /* 181 * We link our 'sibling' struct into our parent's 'children'. 182 * Our children link their 'sibling' into our 'children'. 183 */ 184 struct list_head sibling; /* my parent's children */ 185 struct list_head children; /* my children */ 186 struct list_head files; /* my files */ 187 188 struct cgroup *parent; /* my parent */ 189 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 190 191 /* 192 * This is a copy of dentry->d_name, and it's needed because 193 * we can't use dentry->d_name in cgroup_path(). 194 * 195 * You must acquire rcu_read_lock() to access cgrp->name, and 196 * the only place that can change it is rename(), which is 197 * protected by parent dir's i_mutex. 198 * 199 * Normally you should use cgroup_name() wrapper rather than 200 * access it directly. 201 */ 202 struct cgroup_name __rcu *name; 203 204 /* Private pointers for each registered subsystem */ 205 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 206 207 struct cgroupfs_root *root; 208 209 /* 210 * List of cg_cgroup_links pointing at css_sets with 211 * tasks in this cgroup. Protected by css_set_lock 212 */ 213 struct list_head css_sets; 214 215 struct list_head allcg_node; /* cgroupfs_root->allcg_list */ 216 struct list_head cft_q_node; /* used during cftype add/rm */ 217 218 /* 219 * Linked list running through all cgroups that can 220 * potentially be reaped by the release agent. Protected by 221 * release_list_lock 222 */ 223 struct list_head release_list; 224 225 /* 226 * list of pidlists, up to two for each namespace (one for procs, one 227 * for tasks); created on demand. 228 */ 229 struct list_head pidlists; 230 struct mutex pidlist_mutex; 231 232 /* For RCU-protected deletion */ 233 struct rcu_head rcu_head; 234 struct work_struct free_work; 235 236 /* List of events which userspace want to receive */ 237 struct list_head event_list; 238 spinlock_t event_list_lock; 239 240 /* directory xattrs */ 241 struct simple_xattrs xattrs; 242}; 243 244#define MAX_CGROUP_ROOT_NAMELEN 64 245 246/* cgroupfs_root->flags */ 247enum { 248 /* 249 * Unfortunately, cgroup core and various controllers are riddled 250 * with idiosyncrasies and pointless options. The following flag, 251 * when set, will force sane behavior - some options are forced on, 252 * others are disallowed, and some controllers will change their 253 * hierarchical or other behaviors. 254 * 255 * The set of behaviors affected by this flag are still being 256 * determined and developed and the mount option for this flag is 257 * prefixed with __DEVEL__. The prefix will be dropped once we 258 * reach the point where all behaviors are compatible with the 259 * planned unified hierarchy, which will automatically turn on this 260 * flag. 261 * 262 * The followings are the behaviors currently affected this flag. 263 * 264 * - Mount options "noprefix" and "clone_children" are disallowed. 265 * Also, cgroupfs file cgroup.clone_children is not created. 266 * 267 * - When mounting an existing superblock, mount options should 268 * match. 269 * 270 * - Remount is disallowed. 271 * 272 * - memcg: use_hierarchy is on by default and the cgroup file for 273 * the flag is not created. 274 * 275 * The followings are planned changes. 276 * 277 * - release_agent will be disallowed once replacement notification 278 * mechanism is implemented. 279 */ 280 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), 281 282 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 283 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 284}; 285 286/* 287 * A cgroupfs_root represents the root of a cgroup hierarchy, and may be 288 * associated with a superblock to form an active hierarchy. This is 289 * internal to cgroup core. Don't access directly from controllers. 290 */ 291struct cgroupfs_root { 292 struct super_block *sb; 293 294 /* 295 * The bitmask of subsystems intended to be attached to this 296 * hierarchy 297 */ 298 unsigned long subsys_mask; 299 300 /* Unique id for this hierarchy. */ 301 int hierarchy_id; 302 303 /* The bitmask of subsystems currently attached to this hierarchy */ 304 unsigned long actual_subsys_mask; 305 306 /* A list running through the attached subsystems */ 307 struct list_head subsys_list; 308 309 /* The root cgroup for this hierarchy */ 310 struct cgroup top_cgroup; 311 312 /* Tracks how many cgroups are currently defined in hierarchy.*/ 313 int number_of_cgroups; 314 315 /* A list running through the active hierarchies */ 316 struct list_head root_list; 317 318 /* All cgroups on this root, cgroup_mutex protected */ 319 struct list_head allcg_list; 320 321 /* Hierarchy-specific flags */ 322 unsigned long flags; 323 324 /* IDs for cgroups in this hierarchy */ 325 struct ida cgroup_ida; 326 327 /* The path to use for release notifications. */ 328 char release_agent_path[PATH_MAX]; 329 330 /* The name for this hierarchy - may be empty */ 331 char name[MAX_CGROUP_ROOT_NAMELEN]; 332}; 333 334/* 335 * A css_set is a structure holding pointers to a set of 336 * cgroup_subsys_state objects. This saves space in the task struct 337 * object and speeds up fork()/exit(), since a single inc/dec and a 338 * list_add()/del() can bump the reference count on the entire cgroup 339 * set for a task. 340 */ 341 342struct css_set { 343 344 /* Reference count */ 345 atomic_t refcount; 346 347 /* 348 * List running through all cgroup groups in the same hash 349 * slot. Protected by css_set_lock 350 */ 351 struct hlist_node hlist; 352 353 /* 354 * List running through all tasks using this cgroup 355 * group. Protected by css_set_lock 356 */ 357 struct list_head tasks; 358 359 /* 360 * List of cg_cgroup_link objects on link chains from 361 * cgroups referenced from this css_set. Protected by 362 * css_set_lock 363 */ 364 struct list_head cg_links; 365 366 /* 367 * Set of subsystem states, one for each subsystem. This array 368 * is immutable after creation apart from the init_css_set 369 * during subsystem registration (at boot time) and modular subsystem 370 * loading/unloading. 371 */ 372 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 373 374 /* For RCU-protected deletion */ 375 struct rcu_head rcu_head; 376}; 377 378/* 379 * cgroup_map_cb is an abstract callback API for reporting map-valued 380 * control files 381 */ 382 383struct cgroup_map_cb { 384 int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); 385 void *state; 386}; 387 388/* 389 * struct cftype: handler definitions for cgroup control files 390 * 391 * When reading/writing to a file: 392 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata 393 * - the 'cftype' of the file is file->f_dentry->d_fsdata 394 */ 395 396/* cftype->flags */ 397#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 398#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ 399#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */ 400 401#define MAX_CFTYPE_NAME 64 402 403struct cftype { 404 /* 405 * By convention, the name should begin with the name of the 406 * subsystem, followed by a period. Zero length string indicates 407 * end of cftype array. 408 */ 409 char name[MAX_CFTYPE_NAME]; 410 int private; 411 /* 412 * If not 0, file mode is set to this value, otherwise it will 413 * be figured out automatically 414 */ 415 umode_t mode; 416 417 /* 418 * If non-zero, defines the maximum length of string that can 419 * be passed to write_string; defaults to 64 420 */ 421 size_t max_write_len; 422 423 /* CFTYPE_* flags */ 424 unsigned int flags; 425 426 int (*open)(struct inode *inode, struct file *file); 427 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 428 struct file *file, 429 char __user *buf, size_t nbytes, loff_t *ppos); 430 /* 431 * read_u64() is a shortcut for the common case of returning a 432 * single integer. Use it in place of read() 433 */ 434 u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft); 435 /* 436 * read_s64() is a signed version of read_u64() 437 */ 438 s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft); 439 /* 440 * read_map() is used for defining a map of key/value 441 * pairs. It should call cb->fill(cb, key, value) for each 442 * entry. The key/value pairs (and their ordering) should not 443 * change between reboots. 444 */ 445 int (*read_map)(struct cgroup *cont, struct cftype *cft, 446 struct cgroup_map_cb *cb); 447 /* 448 * read_seq_string() is used for outputting a simple sequence 449 * using seqfile. 450 */ 451 int (*read_seq_string)(struct cgroup *cont, struct cftype *cft, 452 struct seq_file *m); 453 454 ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, 455 struct file *file, 456 const char __user *buf, size_t nbytes, loff_t *ppos); 457 458 /* 459 * write_u64() is a shortcut for the common case of accepting 460 * a single integer (as parsed by simple_strtoull) from 461 * userspace. Use in place of write(); return 0 or error. 462 */ 463 int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val); 464 /* 465 * write_s64() is a signed version of write_u64() 466 */ 467 int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val); 468 469 /* 470 * write_string() is passed a nul-terminated kernelspace 471 * buffer of maximum length determined by max_write_len. 472 * Returns 0 or -ve error code. 473 */ 474 int (*write_string)(struct cgroup *cgrp, struct cftype *cft, 475 const char *buffer); 476 /* 477 * trigger() callback can be used to get some kick from the 478 * userspace, when the actual string written is not important 479 * at all. The private field can be used to determine the 480 * kick type for multiplexing. 481 */ 482 int (*trigger)(struct cgroup *cgrp, unsigned int event); 483 484 int (*release)(struct inode *inode, struct file *file); 485 486 /* 487 * register_event() callback will be used to add new userspace 488 * waiter for changes related to the cftype. Implement it if 489 * you want to provide this functionality. Use eventfd_signal() 490 * on eventfd to send notification to userspace. 491 */ 492 int (*register_event)(struct cgroup *cgrp, struct cftype *cft, 493 struct eventfd_ctx *eventfd, const char *args); 494 /* 495 * unregister_event() callback will be called when userspace 496 * closes the eventfd or on cgroup removing. 497 * This callback must be implemented, if you want provide 498 * notification functionality. 499 */ 500 void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, 501 struct eventfd_ctx *eventfd); 502}; 503 504/* 505 * cftype_sets describe cftypes belonging to a subsystem and are chained at 506 * cgroup_subsys->cftsets. Each cftset points to an array of cftypes 507 * terminated by zero length name. 508 */ 509struct cftype_set { 510 struct list_head node; /* chained at subsys->cftsets */ 511 struct cftype *cfts; 512}; 513 514struct cgroup_scanner { 515 struct cgroup *cg; 516 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); 517 void (*process_task)(struct task_struct *p, 518 struct cgroup_scanner *scan); 519 struct ptr_heap *heap; 520 void *data; 521}; 522 523/* 524 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This 525 * function can be called as long as @cgrp is accessible. 526 */ 527static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) 528{ 529 return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; 530} 531 532/* Caller should hold rcu_read_lock() */ 533static inline const char *cgroup_name(const struct cgroup *cgrp) 534{ 535 return rcu_dereference(cgrp->name)->name; 536} 537 538int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 539int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 540 541int cgroup_is_removed(const struct cgroup *cgrp); 542bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 543 544int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); 545 546int cgroup_task_count(const struct cgroup *cgrp); 547 548/* 549 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 550 * methods. 551 */ 552struct cgroup_taskset; 553struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 554struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 555struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset); 556int cgroup_taskset_size(struct cgroup_taskset *tset); 557 558/** 559 * cgroup_taskset_for_each - iterate cgroup_taskset 560 * @task: the loop cursor 561 * @skip_cgrp: skip if task's cgroup matches this, %NULL to iterate through all 562 * @tset: taskset to iterate 563 */ 564#define cgroup_taskset_for_each(task, skip_cgrp, tset) \ 565 for ((task) = cgroup_taskset_first((tset)); (task); \ 566 (task) = cgroup_taskset_next((tset))) \ 567 if (!(skip_cgrp) || \ 568 cgroup_taskset_cur_cgroup((tset)) != (skip_cgrp)) 569 570/* 571 * Control Group subsystem type. 572 * See Documentation/cgroups/cgroups.txt for details 573 */ 574 575struct cgroup_subsys { 576 struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp); 577 int (*css_online)(struct cgroup *cgrp); 578 void (*css_offline)(struct cgroup *cgrp); 579 void (*css_free)(struct cgroup *cgrp); 580 581 int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 582 void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 583 void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); 584 void (*fork)(struct task_struct *task); 585 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 586 struct task_struct *task); 587 void (*bind)(struct cgroup *root); 588 589 int subsys_id; 590 int disabled; 591 int early_init; 592 /* 593 * True if this subsys uses ID. ID is not available before cgroup_init() 594 * (not available in early_init time.) 595 */ 596 bool use_id; 597 598 /* 599 * If %false, this subsystem is properly hierarchical - 600 * configuration, resource accounting and restriction on a parent 601 * cgroup cover those of its children. If %true, hierarchy support 602 * is broken in some ways - some subsystems ignore hierarchy 603 * completely while others are only implemented half-way. 604 * 605 * It's now disallowed to create nested cgroups if the subsystem is 606 * broken and cgroup core will emit a warning message on such 607 * cases. Eventually, all subsystems will be made properly 608 * hierarchical and this will go away. 609 */ 610 bool broken_hierarchy; 611 bool warned_broken_hierarchy; 612 613#define MAX_CGROUP_TYPE_NAMELEN 32 614 const char *name; 615 616 /* 617 * Link to parent, and list entry in parent's children. 618 * Protected by cgroup_lock() 619 */ 620 struct cgroupfs_root *root; 621 struct list_head sibling; 622 /* used when use_id == true */ 623 struct idr idr; 624 spinlock_t id_lock; 625 626 /* list of cftype_sets */ 627 struct list_head cftsets; 628 629 /* base cftypes, automatically [de]registered with subsys itself */ 630 struct cftype *base_cftypes; 631 struct cftype_set base_cftset; 632 633 /* should be defined only by modular subsystems */ 634 struct module *module; 635}; 636 637#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; 638#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) 639#include <linux/cgroup_subsys.h> 640#undef IS_SUBSYS_ENABLED 641#undef SUBSYS 642 643static inline struct cgroup_subsys_state *cgroup_subsys_state( 644 struct cgroup *cgrp, int subsys_id) 645{ 646 return cgrp->subsys[subsys_id]; 647} 648 649/* 650 * function to get the cgroup_subsys_state which allows for extra 651 * rcu_dereference_check() conditions, such as locks used during the 652 * cgroup_subsys::attach() methods. 653 */ 654#ifdef CONFIG_PROVE_RCU 655extern struct mutex cgroup_mutex; 656#define task_subsys_state_check(task, subsys_id, __c) \ 657 rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \ 658 lockdep_is_held(&(task)->alloc_lock) || \ 659 lockdep_is_held(&cgroup_mutex) || (__c)) 660#else 661#define task_subsys_state_check(task, subsys_id, __c) \ 662 rcu_dereference((task)->cgroups->subsys[(subsys_id)]) 663#endif 664 665static inline struct cgroup_subsys_state * 666task_subsys_state(struct task_struct *task, int subsys_id) 667{ 668 return task_subsys_state_check(task, subsys_id, false); 669} 670 671static inline struct cgroup* task_cgroup(struct task_struct *task, 672 int subsys_id) 673{ 674 return task_subsys_state(task, subsys_id)->cgroup; 675} 676 677/** 678 * cgroup_for_each_child - iterate through children of a cgroup 679 * @pos: the cgroup * to use as the loop cursor 680 * @cgroup: cgroup whose children to walk 681 * 682 * Walk @cgroup's children. Must be called under rcu_read_lock(). A child 683 * cgroup which hasn't finished ->css_online() or already has finished 684 * ->css_offline() may show up during traversal and it's each subsystem's 685 * responsibility to verify that each @pos is alive. 686 * 687 * If a subsystem synchronizes against the parent in its ->css_online() and 688 * before starting iterating, a cgroup which finished ->css_online() is 689 * guaranteed to be visible in the future iterations. 690 */ 691#define cgroup_for_each_child(pos, cgroup) \ 692 list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) 693 694struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, 695 struct cgroup *cgroup); 696struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); 697 698/** 699 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants 700 * @pos: the cgroup * to use as the loop cursor 701 * @cgroup: cgroup whose descendants to walk 702 * 703 * Walk @cgroup's descendants. Must be called under rcu_read_lock(). A 704 * descendant cgroup which hasn't finished ->css_online() or already has 705 * finished ->css_offline() may show up during traversal and it's each 706 * subsystem's responsibility to verify that each @pos is alive. 707 * 708 * If a subsystem synchronizes against the parent in its ->css_online() and 709 * before starting iterating, and synchronizes against @pos on each 710 * iteration, any descendant cgroup which finished ->css_online() is 711 * guaranteed to be visible in the future iterations. 712 * 713 * In other words, the following guarantees that a descendant can't escape 714 * state updates of its ancestors. 715 * 716 * my_online(@cgrp) 717 * { 718 * Lock @cgrp->parent and @cgrp; 719 * Inherit state from @cgrp->parent; 720 * Unlock both. 721 * } 722 * 723 * my_update_state(@cgrp) 724 * { 725 * Lock @cgrp; 726 * Update @cgrp's state; 727 * Unlock @cgrp; 728 * 729 * cgroup_for_each_descendant_pre(@pos, @cgrp) { 730 * Lock @pos; 731 * Verify @pos is alive and inherit state from @pos->parent; 732 * Unlock @pos; 733 * } 734 * } 735 * 736 * As long as the inheriting step, including checking the parent state, is 737 * enclosed inside @pos locking, double-locking the parent isn't necessary 738 * while inheriting. The state update to the parent is guaranteed to be 739 * visible by walking order and, as long as inheriting operations to the 740 * same @pos are atomic to each other, multiple updates racing each other 741 * still result in the correct state. It's guaranateed that at least one 742 * inheritance happens for any cgroup after the latest update to its 743 * parent. 744 * 745 * If checking parent's state requires locking the parent, each inheriting 746 * iteration should lock and unlock both @pos->parent and @pos. 747 * 748 * Alternatively, a subsystem may choose to use a single global lock to 749 * synchronize ->css_online() and ->css_offline() against tree-walking 750 * operations. 751 */ 752#define cgroup_for_each_descendant_pre(pos, cgroup) \ 753 for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ 754 pos = cgroup_next_descendant_pre((pos), (cgroup))) 755 756struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, 757 struct cgroup *cgroup); 758 759/** 760 * cgroup_for_each_descendant_post - post-order walk of a cgroup's descendants 761 * @pos: the cgroup * to use as the loop cursor 762 * @cgroup: cgroup whose descendants to walk 763 * 764 * Similar to cgroup_for_each_descendant_pre() but performs post-order 765 * traversal instead. Note that the walk visibility guarantee described in 766 * pre-order walk doesn't apply the same to post-order walks. 767 */ 768#define cgroup_for_each_descendant_post(pos, cgroup) \ 769 for (pos = cgroup_next_descendant_post(NULL, (cgroup)); (pos); \ 770 pos = cgroup_next_descendant_post((pos), (cgroup))) 771 772/* A cgroup_iter should be treated as an opaque object */ 773struct cgroup_iter { 774 struct list_head *cg_link; 775 struct list_head *task; 776}; 777 778/* 779 * To iterate across the tasks in a cgroup: 780 * 781 * 1) call cgroup_iter_start to initialize an iterator 782 * 783 * 2) call cgroup_iter_next() to retrieve member tasks until it 784 * returns NULL or until you want to end the iteration 785 * 786 * 3) call cgroup_iter_end() to destroy the iterator. 787 * 788 * Or, call cgroup_scan_tasks() to iterate through every task in a 789 * cgroup - cgroup_scan_tasks() holds the css_set_lock when calling 790 * the test_task() callback, but not while calling the process_task() 791 * callback. 792 */ 793void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it); 794struct task_struct *cgroup_iter_next(struct cgroup *cgrp, 795 struct cgroup_iter *it); 796void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); 797int cgroup_scan_tasks(struct cgroup_scanner *scan); 798int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 799int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 800 801/* 802 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works 803 * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. 804 * CSS ID is assigned at cgroup allocation (create) automatically 805 * and removed when subsys calls free_css_id() function. This is because 806 * the lifetime of cgroup_subsys_state is subsys's matter. 807 * 808 * Looking up and scanning function should be called under rcu_read_lock(). 809 * Taking cgroup_mutex is not necessary for following calls. 810 * But the css returned by this routine can be "not populated yet" or "being 811 * destroyed". The caller should check css and cgroup's status. 812 */ 813 814/* 815 * Typically Called at ->destroy(), or somewhere the subsys frees 816 * cgroup_subsys_state. 817 */ 818void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); 819 820/* Find a cgroup_subsys_state which has given ID */ 821 822struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id); 823 824/* Returns true if root is ancestor of cg */ 825bool css_is_ancestor(struct cgroup_subsys_state *cg, 826 const struct cgroup_subsys_state *root); 827 828/* Get id and depth of css */ 829unsigned short css_id(struct cgroup_subsys_state *css); 830unsigned short css_depth(struct cgroup_subsys_state *css); 831struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); 832 833#else /* !CONFIG_CGROUPS */ 834 835static inline int cgroup_init_early(void) { return 0; } 836static inline int cgroup_init(void) { return 0; } 837static inline void cgroup_fork(struct task_struct *p) {} 838static inline void cgroup_post_fork(struct task_struct *p) {} 839static inline void cgroup_exit(struct task_struct *p, int callbacks) {} 840 841static inline void cgroup_lock(void) {} 842static inline void cgroup_unlock(void) {} 843static inline int cgroupstats_build(struct cgroupstats *stats, 844 struct dentry *dentry) 845{ 846 return -EINVAL; 847} 848 849/* No cgroups - nothing to do */ 850static inline int cgroup_attach_task_all(struct task_struct *from, 851 struct task_struct *t) 852{ 853 return 0; 854} 855 856#endif /* !CONFIG_CGROUPS */ 857 858#endif /* _LINUX_CGROUP_H */