at v3.13 28 kB view raw
1#ifndef _LINUX_CGROUP_H 2#define _LINUX_CGROUP_H 3/* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11#include <linux/sched.h> 12#include <linux/cpumask.h> 13#include <linux/nodemask.h> 14#include <linux/rcupdate.h> 15#include <linux/rculist.h> 16#include <linux/cgroupstats.h> 17#include <linux/prio_heap.h> 18#include <linux/rwsem.h> 19#include <linux/idr.h> 20#include <linux/workqueue.h> 21#include <linux/xattr.h> 22#include <linux/fs.h> 23#include <linux/percpu-refcount.h> 24 25#ifdef CONFIG_CGROUPS 26 27struct cgroupfs_root; 28struct cgroup_subsys; 29struct inode; 30struct cgroup; 31struct css_id; 32struct eventfd_ctx; 33 34extern int cgroup_init_early(void); 35extern int cgroup_init(void); 36extern void cgroup_fork(struct task_struct *p); 37extern void cgroup_post_fork(struct task_struct *p); 38extern void cgroup_exit(struct task_struct *p, int run_callbacks); 39extern int cgroupstats_build(struct cgroupstats *stats, 40 struct dentry *dentry); 41extern int cgroup_load_subsys(struct cgroup_subsys *ss); 42extern void cgroup_unload_subsys(struct cgroup_subsys *ss); 43 44extern int proc_cgroup_show(struct seq_file *, void *); 45 46/* 47 * Define the enumeration of all cgroup subsystems. 48 * 49 * We define ids for builtin subsystems and then modular ones. 50 */ 51#define SUBSYS(_x) _x ## _subsys_id, 52enum cgroup_subsys_id { 53#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) 54#include <linux/cgroup_subsys.h> 55#undef IS_SUBSYS_ENABLED 56 CGROUP_BUILTIN_SUBSYS_COUNT, 57 58 __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, 59 60#define IS_SUBSYS_ENABLED(option) IS_MODULE(option) 61#include <linux/cgroup_subsys.h> 62#undef IS_SUBSYS_ENABLED 63 CGROUP_SUBSYS_COUNT, 64}; 65#undef SUBSYS 66 67/* Per-subsystem/per-cgroup state maintained by the system. */ 68struct cgroup_subsys_state { 69 /* the cgroup that this css is attached to */ 70 struct cgroup *cgroup; 71 72 /* the cgroup subsystem that this css is attached to */ 73 struct cgroup_subsys *ss; 74 75 /* reference count - access via css_[try]get() and css_put() */ 76 struct percpu_ref refcnt; 77 78 /* the parent css */ 79 struct cgroup_subsys_state *parent; 80 81 unsigned long flags; 82 /* ID for this css, if possible */ 83 struct css_id __rcu *id; 84 85 /* percpu_ref killing and RCU release */ 86 struct rcu_head rcu_head; 87 struct work_struct destroy_work; 88}; 89 90/* bits in struct cgroup_subsys_state flags field */ 91enum { 92 CSS_ROOT = (1 << 0), /* this CSS is the root of the subsystem */ 93 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 94}; 95 96/** 97 * css_get - obtain a reference on the specified css 98 * @css: target css 99 * 100 * The caller must already have a reference. 101 */ 102static inline void css_get(struct cgroup_subsys_state *css) 103{ 104 /* We don't need to reference count the root state */ 105 if (!(css->flags & CSS_ROOT)) 106 percpu_ref_get(&css->refcnt); 107} 108 109/** 110 * css_tryget - try to obtain a reference on the specified css 111 * @css: target css 112 * 113 * Obtain a reference on @css if it's alive. The caller naturally needs to 114 * ensure that @css is accessible but doesn't have to be holding a 115 * reference on it - IOW, RCU protected access is good enough for this 116 * function. Returns %true if a reference count was successfully obtained; 117 * %false otherwise. 118 */ 119static inline bool css_tryget(struct cgroup_subsys_state *css) 120{ 121 if (css->flags & CSS_ROOT) 122 return true; 123 return percpu_ref_tryget(&css->refcnt); 124} 125 126/** 127 * css_put - put a css reference 128 * @css: target css 129 * 130 * Put a reference obtained via css_get() and css_tryget(). 131 */ 132static inline void css_put(struct cgroup_subsys_state *css) 133{ 134 if (!(css->flags & CSS_ROOT)) 135 percpu_ref_put(&css->refcnt); 136} 137 138/* bits in struct cgroup flags field */ 139enum { 140 /* Control Group is dead */ 141 CGRP_DEAD, 142 /* 143 * Control Group has previously had a child cgroup or a task, 144 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) 145 */ 146 CGRP_RELEASABLE, 147 /* Control Group requires release notifications to userspace */ 148 CGRP_NOTIFY_ON_RELEASE, 149 /* 150 * Clone the parent's configuration when creating a new child 151 * cpuset cgroup. For historical reasons, this option can be 152 * specified at mount time and thus is implemented here. 153 */ 154 CGRP_CPUSET_CLONE_CHILDREN, 155 /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ 156 CGRP_SANE_BEHAVIOR, 157}; 158 159struct cgroup_name { 160 struct rcu_head rcu_head; 161 char name[]; 162}; 163 164struct cgroup { 165 unsigned long flags; /* "unsigned long" so bitops work */ 166 167 /* 168 * idr allocated in-hierarchy ID. 169 * 170 * The ID of the root cgroup is always 0, and a new cgroup 171 * will be assigned with a smallest available ID. 172 */ 173 int id; 174 175 /* the number of attached css's */ 176 int nr_css; 177 178 /* 179 * We link our 'sibling' struct into our parent's 'children'. 180 * Our children link their 'sibling' into our 'children'. 181 */ 182 struct list_head sibling; /* my parent's children */ 183 struct list_head children; /* my children */ 184 struct list_head files; /* my files */ 185 186 struct cgroup *parent; /* my parent */ 187 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 188 189 /* 190 * Monotonically increasing unique serial number which defines a 191 * uniform order among all cgroups. It's guaranteed that all 192 * ->children lists are in the ascending order of ->serial_nr. 193 * It's used to allow interrupting and resuming iterations. 194 */ 195 u64 serial_nr; 196 197 /* 198 * This is a copy of dentry->d_name, and it's needed because 199 * we can't use dentry->d_name in cgroup_path(). 200 * 201 * You must acquire rcu_read_lock() to access cgrp->name, and 202 * the only place that can change it is rename(), which is 203 * protected by parent dir's i_mutex. 204 * 205 * Normally you should use cgroup_name() wrapper rather than 206 * access it directly. 207 */ 208 struct cgroup_name __rcu *name; 209 210 /* Private pointers for each registered subsystem */ 211 struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; 212 213 struct cgroupfs_root *root; 214 215 /* 216 * List of cgrp_cset_links pointing at css_sets with tasks in this 217 * cgroup. Protected by css_set_lock. 218 */ 219 struct list_head cset_links; 220 221 /* 222 * Linked list running through all cgroups that can 223 * potentially be reaped by the release agent. Protected by 224 * release_list_lock 225 */ 226 struct list_head release_list; 227 228 /* 229 * list of pidlists, up to two for each namespace (one for procs, one 230 * for tasks); created on demand. 231 */ 232 struct list_head pidlists; 233 struct mutex pidlist_mutex; 234 235 /* dummy css with NULL ->ss, points back to this cgroup */ 236 struct cgroup_subsys_state dummy_css; 237 238 /* For css percpu_ref killing and RCU-protected deletion */ 239 struct rcu_head rcu_head; 240 struct work_struct destroy_work; 241 242 /* List of events which userspace want to receive */ 243 struct list_head event_list; 244 spinlock_t event_list_lock; 245 246 /* directory xattrs */ 247 struct simple_xattrs xattrs; 248}; 249 250#define MAX_CGROUP_ROOT_NAMELEN 64 251 252/* cgroupfs_root->flags */ 253enum { 254 /* 255 * Unfortunately, cgroup core and various controllers are riddled 256 * with idiosyncrasies and pointless options. The following flag, 257 * when set, will force sane behavior - some options are forced on, 258 * others are disallowed, and some controllers will change their 259 * hierarchical or other behaviors. 260 * 261 * The set of behaviors affected by this flag are still being 262 * determined and developed and the mount option for this flag is 263 * prefixed with __DEVEL__. The prefix will be dropped once we 264 * reach the point where all behaviors are compatible with the 265 * planned unified hierarchy, which will automatically turn on this 266 * flag. 267 * 268 * The followings are the behaviors currently affected this flag. 269 * 270 * - Mount options "noprefix" and "clone_children" are disallowed. 271 * Also, cgroupfs file cgroup.clone_children is not created. 272 * 273 * - When mounting an existing superblock, mount options should 274 * match. 275 * 276 * - Remount is disallowed. 277 * 278 * - rename(2) is disallowed. 279 * 280 * - "tasks" is removed. Everything should be at process 281 * granularity. Use "cgroup.procs" instead. 282 * 283 * - "release_agent" and "notify_on_release" are removed. 284 * Replacement notification mechanism will be implemented. 285 * 286 * - cpuset: tasks will be kept in empty cpusets when hotplug happens 287 * and take masks of ancestors with non-empty cpus/mems, instead of 288 * being moved to an ancestor. 289 * 290 * - cpuset: a task can be moved into an empty cpuset, and again it 291 * takes masks of ancestors. 292 * 293 * - memcg: use_hierarchy is on by default and the cgroup file for 294 * the flag is not created. 295 * 296 * - blkcg: blk-throttle becomes properly hierarchical. 297 */ 298 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), 299 300 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ 301 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ 302 303 /* mount options live below bit 16 */ 304 CGRP_ROOT_OPTION_MASK = (1 << 16) - 1, 305 306 CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */ 307}; 308 309/* 310 * A cgroupfs_root represents the root of a cgroup hierarchy, and may be 311 * associated with a superblock to form an active hierarchy. This is 312 * internal to cgroup core. Don't access directly from controllers. 313 */ 314struct cgroupfs_root { 315 struct super_block *sb; 316 317 /* The bitmask of subsystems attached to this hierarchy */ 318 unsigned long subsys_mask; 319 320 /* Unique id for this hierarchy. */ 321 int hierarchy_id; 322 323 /* A list running through the attached subsystems */ 324 struct list_head subsys_list; 325 326 /* The root cgroup for this hierarchy */ 327 struct cgroup top_cgroup; 328 329 /* Tracks how many cgroups are currently defined in hierarchy.*/ 330 int number_of_cgroups; 331 332 /* A list running through the active hierarchies */ 333 struct list_head root_list; 334 335 /* Hierarchy-specific flags */ 336 unsigned long flags; 337 338 /* IDs for cgroups in this hierarchy */ 339 struct idr cgroup_idr; 340 341 /* The path to use for release notifications. */ 342 char release_agent_path[PATH_MAX]; 343 344 /* The name for this hierarchy - may be empty */ 345 char name[MAX_CGROUP_ROOT_NAMELEN]; 346}; 347 348/* 349 * A css_set is a structure holding pointers to a set of 350 * cgroup_subsys_state objects. This saves space in the task struct 351 * object and speeds up fork()/exit(), since a single inc/dec and a 352 * list_add()/del() can bump the reference count on the entire cgroup 353 * set for a task. 354 */ 355 356struct css_set { 357 358 /* Reference count */ 359 atomic_t refcount; 360 361 /* 362 * List running through all cgroup groups in the same hash 363 * slot. Protected by css_set_lock 364 */ 365 struct hlist_node hlist; 366 367 /* 368 * List running through all tasks using this cgroup 369 * group. Protected by css_set_lock 370 */ 371 struct list_head tasks; 372 373 /* 374 * List of cgrp_cset_links pointing at cgroups referenced from this 375 * css_set. Protected by css_set_lock. 376 */ 377 struct list_head cgrp_links; 378 379 /* 380 * Set of subsystem states, one for each subsystem. This array 381 * is immutable after creation apart from the init_css_set 382 * during subsystem registration (at boot time) and modular subsystem 383 * loading/unloading. 384 */ 385 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 386 387 /* For RCU-protected deletion */ 388 struct rcu_head rcu_head; 389}; 390 391/* 392 * cgroup_map_cb is an abstract callback API for reporting map-valued 393 * control files 394 */ 395 396struct cgroup_map_cb { 397 int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); 398 void *state; 399}; 400 401/* 402 * struct cftype: handler definitions for cgroup control files 403 * 404 * When reading/writing to a file: 405 * - the cgroup to use is file->f_dentry->d_parent->d_fsdata 406 * - the 'cftype' of the file is file->f_dentry->d_fsdata 407 */ 408 409/* cftype->flags */ 410enum { 411 CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ 412 CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ 413 CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ 414 CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ 415}; 416 417#define MAX_CFTYPE_NAME 64 418 419struct cftype { 420 /* 421 * By convention, the name should begin with the name of the 422 * subsystem, followed by a period. Zero length string indicates 423 * end of cftype array. 424 */ 425 char name[MAX_CFTYPE_NAME]; 426 int private; 427 /* 428 * If not 0, file mode is set to this value, otherwise it will 429 * be figured out automatically 430 */ 431 umode_t mode; 432 433 /* 434 * If non-zero, defines the maximum length of string that can 435 * be passed to write_string; defaults to 64 436 */ 437 size_t max_write_len; 438 439 /* CFTYPE_* flags */ 440 unsigned int flags; 441 442 /* 443 * The subsys this file belongs to. Initialized automatically 444 * during registration. NULL for cgroup core files. 445 */ 446 struct cgroup_subsys *ss; 447 448 int (*open)(struct inode *inode, struct file *file); 449 ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft, 450 struct file *file, 451 char __user *buf, size_t nbytes, loff_t *ppos); 452 /* 453 * read_u64() is a shortcut for the common case of returning a 454 * single integer. Use it in place of read() 455 */ 456 u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); 457 /* 458 * read_s64() is a signed version of read_u64() 459 */ 460 s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); 461 /* 462 * read_map() is used for defining a map of key/value 463 * pairs. It should call cb->fill(cb, key, value) for each 464 * entry. The key/value pairs (and their ordering) should not 465 * change between reboots. 466 */ 467 int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft, 468 struct cgroup_map_cb *cb); 469 /* 470 * read_seq_string() is used for outputting a simple sequence 471 * using seqfile. 472 */ 473 int (*read_seq_string)(struct cgroup_subsys_state *css, 474 struct cftype *cft, struct seq_file *m); 475 476 ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft, 477 struct file *file, 478 const char __user *buf, size_t nbytes, loff_t *ppos); 479 480 /* 481 * write_u64() is a shortcut for the common case of accepting 482 * a single integer (as parsed by simple_strtoull) from 483 * userspace. Use in place of write(); return 0 or error. 484 */ 485 int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, 486 u64 val); 487 /* 488 * write_s64() is a signed version of write_u64() 489 */ 490 int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, 491 s64 val); 492 493 /* 494 * write_string() is passed a nul-terminated kernelspace 495 * buffer of maximum length determined by max_write_len. 496 * Returns 0 or -ve error code. 497 */ 498 int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, 499 const char *buffer); 500 /* 501 * trigger() callback can be used to get some kick from the 502 * userspace, when the actual string written is not important 503 * at all. The private field can be used to determine the 504 * kick type for multiplexing. 505 */ 506 int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); 507 508 int (*release)(struct inode *inode, struct file *file); 509 510 /* 511 * register_event() callback will be used to add new userspace 512 * waiter for changes related to the cftype. Implement it if 513 * you want to provide this functionality. Use eventfd_signal() 514 * on eventfd to send notification to userspace. 515 */ 516 int (*register_event)(struct cgroup_subsys_state *css, 517 struct cftype *cft, struct eventfd_ctx *eventfd, 518 const char *args); 519 /* 520 * unregister_event() callback will be called when userspace 521 * closes the eventfd or on cgroup removing. 522 * This callback must be implemented, if you want provide 523 * notification functionality. 524 */ 525 void (*unregister_event)(struct cgroup_subsys_state *css, 526 struct cftype *cft, 527 struct eventfd_ctx *eventfd); 528}; 529 530/* 531 * cftype_sets describe cftypes belonging to a subsystem and are chained at 532 * cgroup_subsys->cftsets. Each cftset points to an array of cftypes 533 * terminated by zero length name. 534 */ 535struct cftype_set { 536 struct list_head node; /* chained at subsys->cftsets */ 537 struct cftype *cfts; 538}; 539 540/* 541 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This 542 * function can be called as long as @cgrp is accessible. 543 */ 544static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) 545{ 546 return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; 547} 548 549/* Caller should hold rcu_read_lock() */ 550static inline const char *cgroup_name(const struct cgroup *cgrp) 551{ 552 return rcu_dereference(cgrp->name)->name; 553} 554 555int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 556int cgroup_rm_cftypes(struct cftype *cfts); 557 558bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); 559 560int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); 561int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); 562 563int cgroup_task_count(const struct cgroup *cgrp); 564 565/* 566 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 567 * methods. 568 */ 569struct cgroup_taskset; 570struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 571struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 572struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, 573 int subsys_id); 574int cgroup_taskset_size(struct cgroup_taskset *tset); 575 576/** 577 * cgroup_taskset_for_each - iterate cgroup_taskset 578 * @task: the loop cursor 579 * @skip_css: skip if task's css matches this, %NULL to iterate through all 580 * @tset: taskset to iterate 581 */ 582#define cgroup_taskset_for_each(task, skip_css, tset) \ 583 for ((task) = cgroup_taskset_first((tset)); (task); \ 584 (task) = cgroup_taskset_next((tset))) \ 585 if (!(skip_css) || \ 586 cgroup_taskset_cur_css((tset), \ 587 (skip_css)->ss->subsys_id) != (skip_css)) 588 589/* 590 * Control Group subsystem type. 591 * See Documentation/cgroups/cgroups.txt for details 592 */ 593 594struct cgroup_subsys { 595 struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); 596 int (*css_online)(struct cgroup_subsys_state *css); 597 void (*css_offline)(struct cgroup_subsys_state *css); 598 void (*css_free)(struct cgroup_subsys_state *css); 599 600 int (*can_attach)(struct cgroup_subsys_state *css, 601 struct cgroup_taskset *tset); 602 void (*cancel_attach)(struct cgroup_subsys_state *css, 603 struct cgroup_taskset *tset); 604 void (*attach)(struct cgroup_subsys_state *css, 605 struct cgroup_taskset *tset); 606 void (*fork)(struct task_struct *task); 607 void (*exit)(struct cgroup_subsys_state *css, 608 struct cgroup_subsys_state *old_css, 609 struct task_struct *task); 610 void (*bind)(struct cgroup_subsys_state *root_css); 611 612 int subsys_id; 613 int disabled; 614 int early_init; 615 616 /* 617 * If %false, this subsystem is properly hierarchical - 618 * configuration, resource accounting and restriction on a parent 619 * cgroup cover those of its children. If %true, hierarchy support 620 * is broken in some ways - some subsystems ignore hierarchy 621 * completely while others are only implemented half-way. 622 * 623 * It's now disallowed to create nested cgroups if the subsystem is 624 * broken and cgroup core will emit a warning message on such 625 * cases. Eventually, all subsystems will be made properly 626 * hierarchical and this will go away. 627 */ 628 bool broken_hierarchy; 629 bool warned_broken_hierarchy; 630 631#define MAX_CGROUP_TYPE_NAMELEN 32 632 const char *name; 633 634 /* 635 * Link to parent, and list entry in parent's children. 636 * Protected by cgroup_lock() 637 */ 638 struct cgroupfs_root *root; 639 struct list_head sibling; 640 641 /* list of cftype_sets */ 642 struct list_head cftsets; 643 644 /* base cftypes, automatically [de]registered with subsys itself */ 645 struct cftype *base_cftypes; 646 struct cftype_set base_cftset; 647 648 /* should be defined only by modular subsystems */ 649 struct module *module; 650}; 651 652#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; 653#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) 654#include <linux/cgroup_subsys.h> 655#undef IS_SUBSYS_ENABLED 656#undef SUBSYS 657 658/** 659 * css_parent - find the parent css 660 * @css: the target cgroup_subsys_state 661 * 662 * Return the parent css of @css. This function is guaranteed to return 663 * non-NULL parent as long as @css isn't the root. 664 */ 665static inline 666struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) 667{ 668 return css->parent; 669} 670 671/** 672 * task_css_set_check - obtain a task's css_set with extra access conditions 673 * @task: the task to obtain css_set for 674 * @__c: extra condition expression to be passed to rcu_dereference_check() 675 * 676 * A task's css_set is RCU protected, initialized and exited while holding 677 * task_lock(), and can only be modified while holding both cgroup_mutex 678 * and task_lock() while the task is alive. This macro verifies that the 679 * caller is inside proper critical section and returns @task's css_set. 680 * 681 * The caller can also specify additional allowed conditions via @__c, such 682 * as locks used during the cgroup_subsys::attach() methods. 683 */ 684#ifdef CONFIG_PROVE_RCU 685extern struct mutex cgroup_mutex; 686#define task_css_set_check(task, __c) \ 687 rcu_dereference_check((task)->cgroups, \ 688 lockdep_is_held(&(task)->alloc_lock) || \ 689 lockdep_is_held(&cgroup_mutex) || (__c)) 690#else 691#define task_css_set_check(task, __c) \ 692 rcu_dereference((task)->cgroups) 693#endif 694 695/** 696 * task_css_check - obtain css for (task, subsys) w/ extra access conds 697 * @task: the target task 698 * @subsys_id: the target subsystem ID 699 * @__c: extra condition expression to be passed to rcu_dereference_check() 700 * 701 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 702 * synchronization rules are the same as task_css_set_check(). 703 */ 704#define task_css_check(task, subsys_id, __c) \ 705 task_css_set_check((task), (__c))->subsys[(subsys_id)] 706 707/** 708 * task_css_set - obtain a task's css_set 709 * @task: the task to obtain css_set for 710 * 711 * See task_css_set_check(). 712 */ 713static inline struct css_set *task_css_set(struct task_struct *task) 714{ 715 return task_css_set_check(task, false); 716} 717 718/** 719 * task_css - obtain css for (task, subsys) 720 * @task: the target task 721 * @subsys_id: the target subsystem ID 722 * 723 * See task_css_check(). 724 */ 725static inline struct cgroup_subsys_state *task_css(struct task_struct *task, 726 int subsys_id) 727{ 728 return task_css_check(task, subsys_id, false); 729} 730 731static inline struct cgroup *task_cgroup(struct task_struct *task, 732 int subsys_id) 733{ 734 return task_css(task, subsys_id)->cgroup; 735} 736 737struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, 738 struct cgroup_subsys_state *parent); 739 740struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 741 742/** 743 * css_for_each_child - iterate through children of a css 744 * @pos: the css * to use as the loop cursor 745 * @parent: css whose children to walk 746 * 747 * Walk @parent's children. Must be called under rcu_read_lock(). A child 748 * css which hasn't finished ->css_online() or already has finished 749 * ->css_offline() may show up during traversal and it's each subsystem's 750 * responsibility to verify that each @pos is alive. 751 * 752 * If a subsystem synchronizes against the parent in its ->css_online() and 753 * before starting iterating, a css which finished ->css_online() is 754 * guaranteed to be visible in the future iterations. 755 * 756 * It is allowed to temporarily drop RCU read lock during iteration. The 757 * caller is responsible for ensuring that @pos remains accessible until 758 * the start of the next iteration by, for example, bumping the css refcnt. 759 */ 760#define css_for_each_child(pos, parent) \ 761 for ((pos) = css_next_child(NULL, (parent)); (pos); \ 762 (pos) = css_next_child((pos), (parent))) 763 764struct cgroup_subsys_state * 765css_next_descendant_pre(struct cgroup_subsys_state *pos, 766 struct cgroup_subsys_state *css); 767 768struct cgroup_subsys_state * 769css_rightmost_descendant(struct cgroup_subsys_state *pos); 770 771/** 772 * css_for_each_descendant_pre - pre-order walk of a css's descendants 773 * @pos: the css * to use as the loop cursor 774 * @root: css whose descendants to walk 775 * 776 * Walk @root's descendants. @root is included in the iteration and the 777 * first node to be visited. Must be called under rcu_read_lock(). A 778 * descendant css which hasn't finished ->css_online() or already has 779 * finished ->css_offline() may show up during traversal and it's each 780 * subsystem's responsibility to verify that each @pos is alive. 781 * 782 * If a subsystem synchronizes against the parent in its ->css_online() and 783 * before starting iterating, and synchronizes against @pos on each 784 * iteration, any descendant css which finished ->css_online() is 785 * guaranteed to be visible in the future iterations. 786 * 787 * In other words, the following guarantees that a descendant can't escape 788 * state updates of its ancestors. 789 * 790 * my_online(@css) 791 * { 792 * Lock @css's parent and @css; 793 * Inherit state from the parent; 794 * Unlock both. 795 * } 796 * 797 * my_update_state(@css) 798 * { 799 * css_for_each_descendant_pre(@pos, @css) { 800 * Lock @pos; 801 * if (@pos == @css) 802 * Update @css's state; 803 * else 804 * Verify @pos is alive and inherit state from its parent; 805 * Unlock @pos; 806 * } 807 * } 808 * 809 * As long as the inheriting step, including checking the parent state, is 810 * enclosed inside @pos locking, double-locking the parent isn't necessary 811 * while inheriting. The state update to the parent is guaranteed to be 812 * visible by walking order and, as long as inheriting operations to the 813 * same @pos are atomic to each other, multiple updates racing each other 814 * still result in the correct state. It's guaranateed that at least one 815 * inheritance happens for any css after the latest update to its parent. 816 * 817 * If checking parent's state requires locking the parent, each inheriting 818 * iteration should lock and unlock both @pos->parent and @pos. 819 * 820 * Alternatively, a subsystem may choose to use a single global lock to 821 * synchronize ->css_online() and ->css_offline() against tree-walking 822 * operations. 823 * 824 * It is allowed to temporarily drop RCU read lock during iteration. The 825 * caller is responsible for ensuring that @pos remains accessible until 826 * the start of the next iteration by, for example, bumping the css refcnt. 827 */ 828#define css_for_each_descendant_pre(pos, css) \ 829 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ 830 (pos) = css_next_descendant_pre((pos), (css))) 831 832struct cgroup_subsys_state * 833css_next_descendant_post(struct cgroup_subsys_state *pos, 834 struct cgroup_subsys_state *css); 835 836/** 837 * css_for_each_descendant_post - post-order walk of a css's descendants 838 * @pos: the css * to use as the loop cursor 839 * @css: css whose descendants to walk 840 * 841 * Similar to css_for_each_descendant_pre() but performs post-order 842 * traversal instead. @root is included in the iteration and the last 843 * node to be visited. Note that the walk visibility guarantee described 844 * in pre-order walk doesn't apply the same to post-order walks. 845 */ 846#define css_for_each_descendant_post(pos, css) \ 847 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 848 (pos) = css_next_descendant_post((pos), (css))) 849 850/* A css_task_iter should be treated as an opaque object */ 851struct css_task_iter { 852 struct cgroup_subsys_state *origin_css; 853 struct list_head *cset_link; 854 struct list_head *task; 855}; 856 857void css_task_iter_start(struct cgroup_subsys_state *css, 858 struct css_task_iter *it); 859struct task_struct *css_task_iter_next(struct css_task_iter *it); 860void css_task_iter_end(struct css_task_iter *it); 861 862int css_scan_tasks(struct cgroup_subsys_state *css, 863 bool (*test)(struct task_struct *, void *), 864 void (*process)(struct task_struct *, void *), 865 void *data, struct ptr_heap *heap); 866 867int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 868int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 869 870struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, 871 struct cgroup_subsys *ss); 872 873#else /* !CONFIG_CGROUPS */ 874 875static inline int cgroup_init_early(void) { return 0; } 876static inline int cgroup_init(void) { return 0; } 877static inline void cgroup_fork(struct task_struct *p) {} 878static inline void cgroup_post_fork(struct task_struct *p) {} 879static inline void cgroup_exit(struct task_struct *p, int callbacks) {} 880 881static inline int cgroupstats_build(struct cgroupstats *stats, 882 struct dentry *dentry) 883{ 884 return -EINVAL; 885} 886 887/* No cgroups - nothing to do */ 888static inline int cgroup_attach_task_all(struct task_struct *from, 889 struct task_struct *t) 890{ 891 return 0; 892} 893 894#endif /* !CONFIG_CGROUPS */ 895 896#endif /* _LINUX_CGROUP_H */