at v4.6 21 kB view raw
1#ifndef _LINUX_CGROUP_H 2#define _LINUX_CGROUP_H 3/* 4 * cgroup interface 5 * 6 * Copyright (C) 2003 BULL SA 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc. 8 * 9 */ 10 11#include <linux/sched.h> 12#include <linux/cpumask.h> 13#include <linux/nodemask.h> 14#include <linux/rculist.h> 15#include <linux/cgroupstats.h> 16#include <linux/fs.h> 17#include <linux/seq_file.h> 18#include <linux/kernfs.h> 19#include <linux/jump_label.h> 20#include <linux/nsproxy.h> 21#include <linux/types.h> 22#include <linux/ns_common.h> 23#include <linux/nsproxy.h> 24#include <linux/user_namespace.h> 25 26#include <linux/cgroup-defs.h> 27 28#ifdef CONFIG_CGROUPS 29 30/* 31 * All weight knobs on the default hierarhcy should use the following min, 32 * default and max values. The default value is the logarithmic center of 33 * MIN and MAX and allows 100x to be expressed in both directions. 34 */ 35#define CGROUP_WEIGHT_MIN 1 36#define CGROUP_WEIGHT_DFL 100 37#define CGROUP_WEIGHT_MAX 10000 38 39/* a css_task_iter should be treated as an opaque object */ 40struct css_task_iter { 41 struct cgroup_subsys *ss; 42 43 struct list_head *cset_pos; 44 struct list_head *cset_head; 45 46 struct list_head *task_pos; 47 struct list_head *tasks_head; 48 struct list_head *mg_tasks_head; 49 50 struct css_set *cur_cset; 51 struct task_struct *cur_task; 52 struct list_head iters_node; /* css_set->task_iters */ 53}; 54 55extern struct cgroup_root cgrp_dfl_root; 56extern struct css_set init_css_set; 57 58#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; 59#include <linux/cgroup_subsys.h> 60#undef SUBSYS 61 62#define SUBSYS(_x) \ 63 extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ 64 extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; 65#include <linux/cgroup_subsys.h> 66#undef SUBSYS 67 68/** 69 * cgroup_subsys_enabled - fast test on whether a subsys is enabled 70 * @ss: subsystem in question 71 */ 72#define cgroup_subsys_enabled(ss) \ 73 static_branch_likely(&ss ## _enabled_key) 74 75/** 76 * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy 77 * @ss: subsystem in question 78 */ 79#define cgroup_subsys_on_dfl(ss) \ 80 static_branch_likely(&ss ## _on_dfl_key) 81 82bool css_has_online_children(struct cgroup_subsys_state *css); 83struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 84struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, 85 struct cgroup_subsys *ss); 86struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 87 struct cgroup_subsys *ss); 88 89struct cgroup *cgroup_get_from_path(const char *path); 90 91int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 92int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); 93 94int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 95int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 96int cgroup_rm_cftypes(struct cftype *cfts); 97void cgroup_file_notify(struct cgroup_file *cfile); 98 99char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); 100int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); 101int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, 102 struct pid *pid, struct task_struct *tsk); 103 104void cgroup_fork(struct task_struct *p); 105extern int cgroup_can_fork(struct task_struct *p); 106extern void cgroup_cancel_fork(struct task_struct *p); 107extern void cgroup_post_fork(struct task_struct *p); 108void cgroup_exit(struct task_struct *p); 109void cgroup_free(struct task_struct *p); 110 111int cgroup_init_early(void); 112int cgroup_init(void); 113 114/* 115 * Iteration helpers and macros. 116 */ 117 118struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, 119 struct cgroup_subsys_state *parent); 120struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos, 121 struct cgroup_subsys_state *css); 122struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos); 123struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, 124 struct cgroup_subsys_state *css); 125 126struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, 127 struct cgroup_subsys_state **dst_cssp); 128struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, 129 struct cgroup_subsys_state **dst_cssp); 130 131void css_task_iter_start(struct cgroup_subsys_state *css, 132 struct css_task_iter *it); 133struct task_struct *css_task_iter_next(struct css_task_iter *it); 134void css_task_iter_end(struct css_task_iter *it); 135 136/** 137 * css_for_each_child - iterate through children of a css 138 * @pos: the css * to use as the loop cursor 139 * @parent: css whose children to walk 140 * 141 * Walk @parent's children. Must be called under rcu_read_lock(). 142 * 143 * If a subsystem synchronizes ->css_online() and the start of iteration, a 144 * css which finished ->css_online() is guaranteed to be visible in the 145 * future iterations and will stay visible until the last reference is put. 146 * A css which hasn't finished ->css_online() or already finished 147 * ->css_offline() may show up during traversal. It's each subsystem's 148 * responsibility to synchronize against on/offlining. 149 * 150 * It is allowed to temporarily drop RCU read lock during iteration. The 151 * caller is responsible for ensuring that @pos remains accessible until 152 * the start of the next iteration by, for example, bumping the css refcnt. 153 */ 154#define css_for_each_child(pos, parent) \ 155 for ((pos) = css_next_child(NULL, (parent)); (pos); \ 156 (pos) = css_next_child((pos), (parent))) 157 158/** 159 * css_for_each_descendant_pre - pre-order walk of a css's descendants 160 * @pos: the css * to use as the loop cursor 161 * @root: css whose descendants to walk 162 * 163 * Walk @root's descendants. @root is included in the iteration and the 164 * first node to be visited. Must be called under rcu_read_lock(). 165 * 166 * If a subsystem synchronizes ->css_online() and the start of iteration, a 167 * css which finished ->css_online() is guaranteed to be visible in the 168 * future iterations and will stay visible until the last reference is put. 169 * A css which hasn't finished ->css_online() or already finished 170 * ->css_offline() may show up during traversal. It's each subsystem's 171 * responsibility to synchronize against on/offlining. 172 * 173 * For example, the following guarantees that a descendant can't escape 174 * state updates of its ancestors. 175 * 176 * my_online(@css) 177 * { 178 * Lock @css's parent and @css; 179 * Inherit state from the parent; 180 * Unlock both. 181 * } 182 * 183 * my_update_state(@css) 184 * { 185 * css_for_each_descendant_pre(@pos, @css) { 186 * Lock @pos; 187 * if (@pos == @css) 188 * Update @css's state; 189 * else 190 * Verify @pos is alive and inherit state from its parent; 191 * Unlock @pos; 192 * } 193 * } 194 * 195 * As long as the inheriting step, including checking the parent state, is 196 * enclosed inside @pos locking, double-locking the parent isn't necessary 197 * while inheriting. The state update to the parent is guaranteed to be 198 * visible by walking order and, as long as inheriting operations to the 199 * same @pos are atomic to each other, multiple updates racing each other 200 * still result in the correct state. It's guaranateed that at least one 201 * inheritance happens for any css after the latest update to its parent. 202 * 203 * If checking parent's state requires locking the parent, each inheriting 204 * iteration should lock and unlock both @pos->parent and @pos. 205 * 206 * Alternatively, a subsystem may choose to use a single global lock to 207 * synchronize ->css_online() and ->css_offline() against tree-walking 208 * operations. 209 * 210 * It is allowed to temporarily drop RCU read lock during iteration. The 211 * caller is responsible for ensuring that @pos remains accessible until 212 * the start of the next iteration by, for example, bumping the css refcnt. 213 */ 214#define css_for_each_descendant_pre(pos, css) \ 215 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \ 216 (pos) = css_next_descendant_pre((pos), (css))) 217 218/** 219 * css_for_each_descendant_post - post-order walk of a css's descendants 220 * @pos: the css * to use as the loop cursor 221 * @css: css whose descendants to walk 222 * 223 * Similar to css_for_each_descendant_pre() but performs post-order 224 * traversal instead. @root is included in the iteration and the last 225 * node to be visited. 226 * 227 * If a subsystem synchronizes ->css_online() and the start of iteration, a 228 * css which finished ->css_online() is guaranteed to be visible in the 229 * future iterations and will stay visible until the last reference is put. 230 * A css which hasn't finished ->css_online() or already finished 231 * ->css_offline() may show up during traversal. It's each subsystem's 232 * responsibility to synchronize against on/offlining. 233 * 234 * Note that the walk visibility guarantee example described in pre-order 235 * walk doesn't apply the same to post-order walks. 236 */ 237#define css_for_each_descendant_post(pos, css) \ 238 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 239 (pos) = css_next_descendant_post((pos), (css))) 240 241/** 242 * cgroup_taskset_for_each - iterate cgroup_taskset 243 * @task: the loop cursor 244 * @dst_css: the destination css 245 * @tset: taskset to iterate 246 * 247 * @tset may contain multiple tasks and they may belong to multiple 248 * processes. 249 * 250 * On the v2 hierarchy, there may be tasks from multiple processes and they 251 * may not share the source or destination csses. 252 * 253 * On traditional hierarchies, when there are multiple tasks in @tset, if a 254 * task of a process is in @tset, all tasks of the process are in @tset. 255 * Also, all are guaranteed to share the same source and destination csses. 256 * 257 * Iteration is not in any specific order. 258 */ 259#define cgroup_taskset_for_each(task, dst_css, tset) \ 260 for ((task) = cgroup_taskset_first((tset), &(dst_css)); \ 261 (task); \ 262 (task) = cgroup_taskset_next((tset), &(dst_css))) 263 264/** 265 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset 266 * @leader: the loop cursor 267 * @dst_css: the destination css 268 * @tset: takset to iterate 269 * 270 * Iterate threadgroup leaders of @tset. For single-task migrations, @tset 271 * may not contain any. 272 */ 273#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \ 274 for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \ 275 (leader); \ 276 (leader) = cgroup_taskset_next((tset), &(dst_css))) \ 277 if ((leader) != (leader)->group_leader) \ 278 ; \ 279 else 280 281/* 282 * Inline functions. 283 */ 284 285/** 286 * css_get - obtain a reference on the specified css 287 * @css: target css 288 * 289 * The caller must already have a reference. 290 */ 291static inline void css_get(struct cgroup_subsys_state *css) 292{ 293 if (!(css->flags & CSS_NO_REF)) 294 percpu_ref_get(&css->refcnt); 295} 296 297/** 298 * css_get_many - obtain references on the specified css 299 * @css: target css 300 * @n: number of references to get 301 * 302 * The caller must already have a reference. 303 */ 304static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) 305{ 306 if (!(css->flags & CSS_NO_REF)) 307 percpu_ref_get_many(&css->refcnt, n); 308} 309 310/** 311 * css_tryget - try to obtain a reference on the specified css 312 * @css: target css 313 * 314 * Obtain a reference on @css unless it already has reached zero and is 315 * being released. This function doesn't care whether @css is on or 316 * offline. The caller naturally needs to ensure that @css is accessible 317 * but doesn't have to be holding a reference on it - IOW, RCU protected 318 * access is good enough for this function. Returns %true if a reference 319 * count was successfully obtained; %false otherwise. 320 */ 321static inline bool css_tryget(struct cgroup_subsys_state *css) 322{ 323 if (!(css->flags & CSS_NO_REF)) 324 return percpu_ref_tryget(&css->refcnt); 325 return true; 326} 327 328/** 329 * css_tryget_online - try to obtain a reference on the specified css if online 330 * @css: target css 331 * 332 * Obtain a reference on @css if it's online. The caller naturally needs 333 * to ensure that @css is accessible but doesn't have to be holding a 334 * reference on it - IOW, RCU protected access is good enough for this 335 * function. Returns %true if a reference count was successfully obtained; 336 * %false otherwise. 337 */ 338static inline bool css_tryget_online(struct cgroup_subsys_state *css) 339{ 340 if (!(css->flags & CSS_NO_REF)) 341 return percpu_ref_tryget_live(&css->refcnt); 342 return true; 343} 344 345/** 346 * css_put - put a css reference 347 * @css: target css 348 * 349 * Put a reference obtained via css_get() and css_tryget_online(). 350 */ 351static inline void css_put(struct cgroup_subsys_state *css) 352{ 353 if (!(css->flags & CSS_NO_REF)) 354 percpu_ref_put(&css->refcnt); 355} 356 357/** 358 * css_put_many - put css references 359 * @css: target css 360 * @n: number of references to put 361 * 362 * Put references obtained via css_get() and css_tryget_online(). 363 */ 364static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) 365{ 366 if (!(css->flags & CSS_NO_REF)) 367 percpu_ref_put_many(&css->refcnt, n); 368} 369 370static inline void cgroup_put(struct cgroup *cgrp) 371{ 372 css_put(&cgrp->self); 373} 374 375/** 376 * task_css_set_check - obtain a task's css_set with extra access conditions 377 * @task: the task to obtain css_set for 378 * @__c: extra condition expression to be passed to rcu_dereference_check() 379 * 380 * A task's css_set is RCU protected, initialized and exited while holding 381 * task_lock(), and can only be modified while holding both cgroup_mutex 382 * and task_lock() while the task is alive. This macro verifies that the 383 * caller is inside proper critical section and returns @task's css_set. 384 * 385 * The caller can also specify additional allowed conditions via @__c, such 386 * as locks used during the cgroup_subsys::attach() methods. 387 */ 388#ifdef CONFIG_PROVE_RCU 389extern struct mutex cgroup_mutex; 390extern spinlock_t css_set_lock; 391#define task_css_set_check(task, __c) \ 392 rcu_dereference_check((task)->cgroups, \ 393 lockdep_is_held(&cgroup_mutex) || \ 394 lockdep_is_held(&css_set_lock) || \ 395 ((task)->flags & PF_EXITING) || (__c)) 396#else 397#define task_css_set_check(task, __c) \ 398 rcu_dereference((task)->cgroups) 399#endif 400 401/** 402 * task_css_check - obtain css for (task, subsys) w/ extra access conds 403 * @task: the target task 404 * @subsys_id: the target subsystem ID 405 * @__c: extra condition expression to be passed to rcu_dereference_check() 406 * 407 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The 408 * synchronization rules are the same as task_css_set_check(). 409 */ 410#define task_css_check(task, subsys_id, __c) \ 411 task_css_set_check((task), (__c))->subsys[(subsys_id)] 412 413/** 414 * task_css_set - obtain a task's css_set 415 * @task: the task to obtain css_set for 416 * 417 * See task_css_set_check(). 418 */ 419static inline struct css_set *task_css_set(struct task_struct *task) 420{ 421 return task_css_set_check(task, false); 422} 423 424/** 425 * task_css - obtain css for (task, subsys) 426 * @task: the target task 427 * @subsys_id: the target subsystem ID 428 * 429 * See task_css_check(). 430 */ 431static inline struct cgroup_subsys_state *task_css(struct task_struct *task, 432 int subsys_id) 433{ 434 return task_css_check(task, subsys_id, false); 435} 436 437/** 438 * task_get_css - find and get the css for (task, subsys) 439 * @task: the target task 440 * @subsys_id: the target subsystem ID 441 * 442 * Find the css for the (@task, @subsys_id) combination, increment a 443 * reference on and return it. This function is guaranteed to return a 444 * valid css. 445 */ 446static inline struct cgroup_subsys_state * 447task_get_css(struct task_struct *task, int subsys_id) 448{ 449 struct cgroup_subsys_state *css; 450 451 rcu_read_lock(); 452 while (true) { 453 css = task_css(task, subsys_id); 454 if (likely(css_tryget_online(css))) 455 break; 456 cpu_relax(); 457 } 458 rcu_read_unlock(); 459 return css; 460} 461 462/** 463 * task_css_is_root - test whether a task belongs to the root css 464 * @task: the target task 465 * @subsys_id: the target subsystem ID 466 * 467 * Test whether @task belongs to the root css on the specified subsystem. 468 * May be invoked in any context. 469 */ 470static inline bool task_css_is_root(struct task_struct *task, int subsys_id) 471{ 472 return task_css_check(task, subsys_id, true) == 473 init_css_set.subsys[subsys_id]; 474} 475 476static inline struct cgroup *task_cgroup(struct task_struct *task, 477 int subsys_id) 478{ 479 return task_css(task, subsys_id)->cgroup; 480} 481 482/** 483 * cgroup_is_descendant - test ancestry 484 * @cgrp: the cgroup to be tested 485 * @ancestor: possible ancestor of @cgrp 486 * 487 * Test whether @cgrp is a descendant of @ancestor. It also returns %true 488 * if @cgrp == @ancestor. This function is safe to call as long as @cgrp 489 * and @ancestor are accessible. 490 */ 491static inline bool cgroup_is_descendant(struct cgroup *cgrp, 492 struct cgroup *ancestor) 493{ 494 if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) 495 return false; 496 return cgrp->ancestor_ids[ancestor->level] == ancestor->id; 497} 498 499/* no synchronization, the result can only be used as a hint */ 500static inline bool cgroup_is_populated(struct cgroup *cgrp) 501{ 502 return cgrp->populated_cnt; 503} 504 505/* returns ino associated with a cgroup */ 506static inline ino_t cgroup_ino(struct cgroup *cgrp) 507{ 508 return cgrp->kn->ino; 509} 510 511/* cft/css accessors for cftype->write() operation */ 512static inline struct cftype *of_cft(struct kernfs_open_file *of) 513{ 514 return of->kn->priv; 515} 516 517struct cgroup_subsys_state *of_css(struct kernfs_open_file *of); 518 519/* cft/css accessors for cftype->seq_*() operations */ 520static inline struct cftype *seq_cft(struct seq_file *seq) 521{ 522 return of_cft(seq->private); 523} 524 525static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) 526{ 527 return of_css(seq->private); 528} 529 530/* 531 * Name / path handling functions. All are thin wrappers around the kernfs 532 * counterparts and can be called under any context. 533 */ 534 535static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) 536{ 537 return kernfs_name(cgrp->kn, buf, buflen); 538} 539 540static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, 541 size_t buflen) 542{ 543 return kernfs_path(cgrp->kn, buf, buflen); 544} 545 546static inline void pr_cont_cgroup_name(struct cgroup *cgrp) 547{ 548 pr_cont_kernfs_name(cgrp->kn); 549} 550 551static inline void pr_cont_cgroup_path(struct cgroup *cgrp) 552{ 553 pr_cont_kernfs_path(cgrp->kn); 554} 555 556#else /* !CONFIG_CGROUPS */ 557 558struct cgroup_subsys_state; 559 560static inline void css_put(struct cgroup_subsys_state *css) {} 561static inline int cgroup_attach_task_all(struct task_struct *from, 562 struct task_struct *t) { return 0; } 563static inline int cgroupstats_build(struct cgroupstats *stats, 564 struct dentry *dentry) { return -EINVAL; } 565 566static inline void cgroup_fork(struct task_struct *p) {} 567static inline int cgroup_can_fork(struct task_struct *p) { return 0; } 568static inline void cgroup_cancel_fork(struct task_struct *p) {} 569static inline void cgroup_post_fork(struct task_struct *p) {} 570static inline void cgroup_exit(struct task_struct *p) {} 571static inline void cgroup_free(struct task_struct *p) {} 572 573static inline int cgroup_init_early(void) { return 0; } 574static inline int cgroup_init(void) { return 0; } 575 576#endif /* !CONFIG_CGROUPS */ 577 578/* 579 * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data 580 * definition in cgroup-defs.h. 581 */ 582#ifdef CONFIG_SOCK_CGROUP_DATA 583 584#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) 585extern spinlock_t cgroup_sk_update_lock; 586#endif 587 588void cgroup_sk_alloc_disable(void); 589void cgroup_sk_alloc(struct sock_cgroup_data *skcd); 590void cgroup_sk_free(struct sock_cgroup_data *skcd); 591 592static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) 593{ 594#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) 595 unsigned long v; 596 597 /* 598 * @skcd->val is 64bit but the following is safe on 32bit too as we 599 * just need the lower ulong to be written and read atomically. 600 */ 601 v = READ_ONCE(skcd->val); 602 603 if (v & 1) 604 return &cgrp_dfl_root.cgrp; 605 606 return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; 607#else 608 return (struct cgroup *)(unsigned long)skcd->val; 609#endif 610} 611 612#else /* CONFIG_CGROUP_DATA */ 613 614static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} 615static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} 616 617#endif /* CONFIG_CGROUP_DATA */ 618 619struct cgroup_namespace { 620 atomic_t count; 621 struct ns_common ns; 622 struct user_namespace *user_ns; 623 struct css_set *root_cset; 624}; 625 626extern struct cgroup_namespace init_cgroup_ns; 627 628#ifdef CONFIG_CGROUPS 629 630void free_cgroup_ns(struct cgroup_namespace *ns); 631 632struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, 633 struct user_namespace *user_ns, 634 struct cgroup_namespace *old_ns); 635 636char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, 637 struct cgroup_namespace *ns); 638 639#else /* !CONFIG_CGROUPS */ 640 641static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } 642static inline struct cgroup_namespace * 643copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, 644 struct cgroup_namespace *old_ns) 645{ 646 return old_ns; 647} 648 649#endif /* !CONFIG_CGROUPS */ 650 651static inline void get_cgroup_ns(struct cgroup_namespace *ns) 652{ 653 if (ns) 654 atomic_inc(&ns->count); 655} 656 657static inline void put_cgroup_ns(struct cgroup_namespace *ns) 658{ 659 if (ns && atomic_dec_and_test(&ns->count)) 660 free_cgroup_ns(ns); 661} 662 663#endif /* _LINUX_CGROUP_H */