Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
"cgroup file type addition / removal is updated so that file types are
added and removed instead of individual files so that dynamic file
type addition / removal can be implemented by cgroup and used by
controllers. blkio controller changes which will come through block
tree are dependent on this. Other changes include res_counter cleanup
and disallowing kthread / PF_THREAD_BOUND threads to be attached to
non-root cgroups.

There's a reported bug with the file type addition / removal handling
which can lead to oops on cgroup umount. The issue is being looked
into. It shouldn't cause problems for most setups and isn't a
security concern."

Fix up trivial conflict in Documentation/feature-removal-schedule.txt

* 'for-3.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits)
res_counter: Account max_usage when calling res_counter_charge_nofail()
res_counter: Merge res_counter_charge and res_counter_charge_nofail
cgroups: disallow attaching kthreadd or PF_THREAD_BOUND threads
cgroup: remove cgroup_subsys->populate()
cgroup: get rid of populate for memcg
cgroup: pass struct mem_cgroup instead of struct cgroup to socket memcg
cgroup: make css->refcnt clearing on cgroup removal optional
cgroup: use negative bias on css->refcnt to block css_tryget()
cgroup: implement cgroup_rm_cftypes()
cgroup: introduce struct cfent
cgroup: relocate __d_cgrp() and __d_cft()
cgroup: remove cgroup_add_file[s]()
cgroup: convert memcg controller to the new cftype interface
memcg: always create memsw files if CONFIG_CGROUP_MEM_RES_CTLR_SWAP
cgroup: convert all non-memcg controllers to the new cftype interface
cgroup: relocate cftype and cgroup_subsys definitions in controllers
cgroup: merge cft_release_agent cftype array into the base files array
cgroup: implement cgroup_add_cftypes() and friends
cgroup: build list of all cgroups under a given cgroupfs_root
cgroup: move cgroup_clear_directory() call out of cgroup_populate_dir()
...

+688 -439
+2 -2
Documentation/cgroups/resource_counter.txt
··· 77 77 where the charging failed. 78 78 79 79 d. int res_counter_charge_locked 80 - (struct res_counter *rc, unsigned long val) 80 + (struct res_counter *rc, unsigned long val, bool force) 81 81 82 82 The same as res_counter_charge(), but it must not acquire/release the 83 83 res_counter->lock internally (it must be called with res_counter->lock 84 - held). 84 + held). The force parameter indicates whether we can bypass the limit. 85 85 86 86 e. void res_counter_uncharge[_locked] 87 87 (struct res_counter *rc, unsigned long val)
+11
Documentation/feature-removal-schedule.txt
··· 556 556 There are newer controls (V4L2_CID_PAN*, V4L2_CID_TILT*) that provide 557 557 similar functionality. 558 558 Who: Sylwester Nawrocki <sylvester.nawrocki@gmail.com> 559 + 560 + ---------------------------- 561 + 562 + What: cgroup option updates via remount 563 + When: March 2013 564 + Why: Remount currently allows changing bound subsystems and 565 + release_agent. Rebinding is hardly useful as it only works 566 + when the hierarchy is empty and release_agent itself should be 567 + replaced with conventional fsnotify. 568 + 569 + ----------------------------
+17 -28
block/blk-cgroup.c
··· 28 28 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 29 29 EXPORT_SYMBOL_GPL(blkio_root_cgroup); 30 30 31 - static struct cgroup_subsys_state *blkiocg_create(struct cgroup *); 32 - static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *); 33 - static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *); 34 - static void blkiocg_destroy(struct cgroup *); 35 - static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); 36 - 37 31 /* for encoding cft->private value on file */ 38 32 #define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val)) 39 33 /* What policy owns the file, proportional or throttle */ 40 34 #define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff) 41 35 #define BLKIOFILE_ATTR(val) ((val) & 0xffff) 42 - 43 - struct cgroup_subsys blkio_subsys = { 44 - .name = "blkio", 45 - .create = blkiocg_create, 46 - .can_attach = blkiocg_can_attach, 47 - .attach = blkiocg_attach, 48 - .destroy = blkiocg_destroy, 49 - .populate = blkiocg_populate, 50 - #ifdef CONFIG_BLK_CGROUP 51 - /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ 52 - .subsys_id = blkio_subsys_id, 53 - #endif 54 - .use_id = 1, 55 - .module = THIS_MODULE, 56 - }; 57 - EXPORT_SYMBOL_GPL(blkio_subsys); 58 36 59 37 static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, 60 38 struct blkio_policy_node *pn) ··· 1515 1537 .read_map = blkiocg_file_read_map, 1516 1538 }, 1517 1539 #endif 1540 + { } /* terminate */ 1518 1541 }; 1519 - 1520 - static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) 1521 - { 1522 - return cgroup_add_files(cgroup, subsys, blkio_files, 1523 - ARRAY_SIZE(blkio_files)); 1524 - } 1525 1542 1526 1543 static void blkiocg_destroy(struct cgroup *cgroup) 1527 1544 { ··· 1630 1657 } 1631 1658 } 1632 1659 } 1660 + 1661 + struct cgroup_subsys blkio_subsys = { 1662 + .name = "blkio", 1663 + .create = blkiocg_create, 1664 + .can_attach = blkiocg_can_attach, 1665 + .attach = blkiocg_attach, 1666 + .destroy = blkiocg_destroy, 1667 + #ifdef CONFIG_BLK_CGROUP 1668 + /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ 1669 + .subsys_id = blkio_subsys_id, 1670 + #endif 1671 + .base_cftypes = blkio_files, 1672 + .use_id = 1, 1673 + .module = THIS_MODULE, 1674 + }; 1675 + EXPORT_SYMBOL_GPL(blkio_subsys); 1633 1676 1634 1677 void blkio_policy_register(struct blkio_policy_type *blkiop) 1635 1678 {
+55 -26
include/linux/cgroup.h
··· 16 16 #include <linux/prio_heap.h> 17 17 #include <linux/rwsem.h> 18 18 #include <linux/idr.h> 19 + #include <linux/workqueue.h> 19 20 20 21 #ifdef CONFIG_CGROUPS 21 22 ··· 77 76 unsigned long flags; 78 77 /* ID for this css, if possible */ 79 78 struct css_id __rcu *id; 79 + 80 + /* Used to put @cgroup->dentry on the last css_put() */ 81 + struct work_struct dput_work; 80 82 }; 81 83 82 84 /* bits in struct cgroup_subsys_state flags field */ 83 85 enum { 84 86 CSS_ROOT, /* This CSS is the root of the subsystem */ 85 87 CSS_REMOVED, /* This CSS is dead */ 88 + CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */ 86 89 }; 87 90 88 91 /* Caller must verify that the css is not for root cgroup */ ··· 120 115 * the css has been destroyed. 121 116 */ 122 117 118 + extern bool __css_tryget(struct cgroup_subsys_state *css); 123 119 static inline bool css_tryget(struct cgroup_subsys_state *css) 124 120 { 125 121 if (test_bit(CSS_ROOT, &css->flags)) 126 122 return true; 127 - while (!atomic_inc_not_zero(&css->refcnt)) { 128 - if (test_bit(CSS_REMOVED, &css->flags)) 129 - return false; 130 - cpu_relax(); 131 - } 132 - return true; 123 + return __css_tryget(css); 133 124 } 134 125 135 126 /* ··· 133 132 * css_get() or css_tryget() 134 133 */ 135 134 136 - extern void __css_put(struct cgroup_subsys_state *css, int count); 135 + extern void __css_put(struct cgroup_subsys_state *css); 137 136 static inline void css_put(struct cgroup_subsys_state *css) 138 137 { 139 138 if (!test_bit(CSS_ROOT, &css->flags)) 140 - __css_put(css, 1); 139 + __css_put(css); 141 140 } 142 141 143 142 /* bits in struct cgroup flags field */ ··· 176 175 */ 177 176 struct list_head sibling; /* my parent's children */ 178 177 struct list_head children; /* my children */ 178 + struct list_head files; /* my files */ 179 179 180 180 struct cgroup *parent; /* my parent */ 181 181 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ ··· 192 190 * tasks in this cgroup. Protected by css_set_lock 193 191 */ 194 192 struct list_head css_sets; 193 + 194 + struct list_head allcg_node; /* cgroupfs_root->allcg_list */ 195 + struct list_head cft_q_node; /* used during cftype add/rm */ 195 196 196 197 /* 197 198 * Linked list running through all cgroups that can ··· 280 275 * - the 'cftype' of the file is file->f_dentry->d_fsdata 281 276 */ 282 277 283 - #define MAX_CFTYPE_NAME 64 278 + /* cftype->flags */ 279 + #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 280 + #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */ 281 + 282 + #define MAX_CFTYPE_NAME 64 283 + 284 284 struct cftype { 285 285 /* 286 286 * By convention, the name should begin with the name of the 287 - * subsystem, followed by a period 287 + * subsystem, followed by a period. Zero length string indicates 288 + * end of cftype array. 288 289 */ 289 290 char name[MAX_CFTYPE_NAME]; 290 291 int private; ··· 305 294 * be passed to write_string; defaults to 64 306 295 */ 307 296 size_t max_write_len; 297 + 298 + /* CFTYPE_* flags */ 299 + unsigned int flags; 308 300 309 301 int (*open)(struct inode *inode, struct file *file); 310 302 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, ··· 387 373 struct eventfd_ctx *eventfd); 388 374 }; 389 375 376 + /* 377 + * cftype_sets describe cftypes belonging to a subsystem and are chained at 378 + * cgroup_subsys->cftsets. Each cftset points to an array of cftypes 379 + * terminated by zero length name. 380 + */ 381 + struct cftype_set { 382 + struct list_head node; /* chained at subsys->cftsets */ 383 + const struct cftype *cfts; 384 + }; 385 + 390 386 struct cgroup_scanner { 391 387 struct cgroup *cg; 392 388 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); ··· 406 382 void *data; 407 383 }; 408 384 409 - /* 410 - * Add a new file to the given cgroup directory. Should only be 411 - * called by subsystems from within a populate() method 412 - */ 413 - int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, 414 - const struct cftype *cft); 415 - 416 - /* 417 - * Add a set of new files to the given cgroup directory. Should 418 - * only be called by subsystems from within a populate() method 419 - */ 420 - int cgroup_add_files(struct cgroup *cgrp, 421 - struct cgroup_subsys *subsys, 422 - const struct cftype cft[], 423 - int count); 385 + int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); 386 + int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); 424 387 425 388 int cgroup_is_removed(const struct cgroup *cgrp); 426 389 ··· 472 461 void (*fork)(struct task_struct *task); 473 462 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 474 463 struct task_struct *task); 475 - int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp); 476 464 void (*post_clone)(struct cgroup *cgrp); 477 465 void (*bind)(struct cgroup *root); 478 466 ··· 484 474 * (not available in early_init time.) 485 475 */ 486 476 bool use_id; 477 + 478 + /* 479 + * If %true, cgroup removal will try to clear css refs by retrying 480 + * ss->pre_destroy() until there's no css ref left. This behavior 481 + * is strictly for backward compatibility and will be removed as 482 + * soon as the current user (memcg) is updated. 483 + * 484 + * If %false, ss->pre_destroy() can't fail and cgroup removal won't 485 + * wait for css refs to drop to zero before proceeding. 486 + */ 487 + bool __DEPRECATED_clear_css_refs; 488 + 487 489 #define MAX_CGROUP_TYPE_NAMELEN 32 488 490 const char *name; 489 491 ··· 521 499 /* used when use_id == true */ 522 500 struct idr idr; 523 501 spinlock_t id_lock; 502 + 503 + /* list of cftype_sets */ 504 + struct list_head cftsets; 505 + 506 + /* base cftypes, automatically [de]registered with subsys itself */ 507 + struct cftype *base_cftypes; 508 + struct cftype_set base_cftset; 524 509 525 510 /* should be defined only by modular subsystems */ 526 511 struct module *module;
+1 -1
include/linux/res_counter.h
··· 116 116 */ 117 117 118 118 int __must_check res_counter_charge_locked(struct res_counter *counter, 119 - unsigned long val); 119 + unsigned long val, bool force); 120 120 int __must_check res_counter_charge(struct res_counter *counter, 121 121 unsigned long val, struct res_counter **limit_fail_at); 122 122 int __must_check res_counter_charge_nofail(struct res_counter *counter,
+6 -6
include/net/sock.h
··· 70 70 struct cgroup; 71 71 struct cgroup_subsys; 72 72 #ifdef CONFIG_NET 73 - int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); 74 - void mem_cgroup_sockets_destroy(struct cgroup *cgrp); 73 + int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss); 74 + void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg); 75 75 #else 76 76 static inline 77 - int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 77 + int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 78 78 { 79 79 return 0; 80 80 } 81 81 static inline 82 - void mem_cgroup_sockets_destroy(struct cgroup *cgrp) 82 + void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) 83 83 { 84 84 } 85 85 #endif ··· 914 914 * This function has to setup any files the protocol want to 915 915 * appear in the kmem cgroup filesystem. 916 916 */ 917 - int (*init_cgroup)(struct cgroup *cgrp, 917 + int (*init_cgroup)(struct mem_cgroup *memcg, 918 918 struct cgroup_subsys *ss); 919 - void (*destroy_cgroup)(struct cgroup *cgrp); 919 + void (*destroy_cgroup)(struct mem_cgroup *memcg); 920 920 struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); 921 921 #endif 922 922 };
+2 -2
include/net/tcp_memcontrol.h
··· 12 12 }; 13 13 14 14 struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); 15 - int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); 16 - void tcp_destroy_cgroup(struct cgroup *cgrp); 15 + int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); 16 + void tcp_destroy_cgroup(struct mem_cgroup *memcg); 17 17 unsigned long long tcp_max_memory(const struct mem_cgroup *memcg); 18 18 void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx); 19 19 #endif /* _TCP_MEMCG_H */
+421 -137
kernel/cgroup.c
··· 60 60 #include <linux/eventfd.h> 61 61 #include <linux/poll.h> 62 62 #include <linux/flex_array.h> /* used in cgroup_attach_proc */ 63 + #include <linux/kthread.h> 63 64 64 65 #include <linux/atomic.h> 66 + 67 + /* css deactivation bias, makes css->refcnt negative to deny new trygets */ 68 + #define CSS_DEACT_BIAS INT_MIN 65 69 66 70 /* 67 71 * cgroup_mutex is the master lock. Any modification to cgroup or its ··· 131 127 /* A list running through the active hierarchies */ 132 128 struct list_head root_list; 133 129 130 + /* All cgroups on this root, cgroup_mutex protected */ 131 + struct list_head allcg_list; 132 + 134 133 /* Hierarchy-specific flags */ 135 134 unsigned long flags; 136 135 ··· 150 143 * single cgroup, and all tasks are part of that cgroup. 151 144 */ 152 145 static struct cgroupfs_root rootnode; 146 + 147 + /* 148 + * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. 149 + */ 150 + struct cfent { 151 + struct list_head node; 152 + struct dentry *dentry; 153 + struct cftype *type; 154 + }; 153 155 154 156 /* 155 157 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when ··· 255 239 256 240 EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 257 241 242 + /* the current nr of refs, always >= 0 whether @css is deactivated or not */ 243 + static int css_refcnt(struct cgroup_subsys_state *css) 244 + { 245 + int v = atomic_read(&css->refcnt); 246 + 247 + return v >= 0 ? v : v - CSS_DEACT_BIAS; 248 + } 249 + 258 250 /* convenient tests for these bits */ 259 251 inline int cgroup_is_removed(const struct cgroup *cgrp) 260 252 { ··· 302 278 /* for_each_active_root() allows you to iterate across the active hierarchies */ 303 279 #define for_each_active_root(_root) \ 304 280 list_for_each_entry(_root, &roots, root_list) 281 + 282 + static inline struct cgroup *__d_cgrp(struct dentry *dentry) 283 + { 284 + return dentry->d_fsdata; 285 + } 286 + 287 + static inline struct cfent *__d_cfe(struct dentry *dentry) 288 + { 289 + return dentry->d_fsdata; 290 + } 291 + 292 + static inline struct cftype *__d_cft(struct dentry *dentry) 293 + { 294 + return __d_cfe(dentry)->type; 295 + } 305 296 306 297 /* the list of cgroups eligible for automatic release. Protected by 307 298 * release_list_lock */ ··· 855 816 struct cgroup_subsys *ss; 856 817 int ret = 0; 857 818 858 - for_each_subsys(cgrp->root, ss) 859 - if (ss->pre_destroy) { 860 - ret = ss->pre_destroy(cgrp); 861 - if (ret) 862 - break; 819 + for_each_subsys(cgrp->root, ss) { 820 + if (!ss->pre_destroy) 821 + continue; 822 + 823 + ret = ss->pre_destroy(cgrp); 824 + if (ret) { 825 + /* ->pre_destroy() failure is being deprecated */ 826 + WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs); 827 + break; 863 828 } 829 + } 864 830 865 831 return ret; 866 832 } ··· 908 864 BUG_ON(!list_empty(&cgrp->pidlists)); 909 865 910 866 kfree_rcu(cgrp, rcu_head); 867 + } else { 868 + struct cfent *cfe = __d_cfe(dentry); 869 + struct cgroup *cgrp = dentry->d_parent->d_fsdata; 870 + 871 + WARN_ONCE(!list_empty(&cfe->node) && 872 + cgrp != &cgrp->root->top_cgroup, 873 + "cfe still linked for %s\n", cfe->type->name); 874 + kfree(cfe); 911 875 } 912 876 iput(inode); 913 877 } ··· 934 882 dput(parent); 935 883 } 936 884 937 - static void cgroup_clear_directory(struct dentry *dentry) 885 + static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) 938 886 { 939 - struct list_head *node; 887 + struct cfent *cfe; 940 888 941 - BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 942 - spin_lock(&dentry->d_lock); 943 - node = dentry->d_subdirs.next; 944 - while (node != &dentry->d_subdirs) { 945 - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); 889 + lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); 890 + lockdep_assert_held(&cgroup_mutex); 946 891 947 - spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 948 - list_del_init(node); 949 - if (d->d_inode) { 950 - /* This should never be called on a cgroup 951 - * directory with child cgroups */ 952 - BUG_ON(d->d_inode->i_mode & S_IFDIR); 953 - dget_dlock(d); 954 - spin_unlock(&d->d_lock); 955 - spin_unlock(&dentry->d_lock); 956 - d_delete(d); 957 - simple_unlink(dentry->d_inode, d); 958 - dput(d); 959 - spin_lock(&dentry->d_lock); 960 - } else 961 - spin_unlock(&d->d_lock); 962 - node = dentry->d_subdirs.next; 892 + list_for_each_entry(cfe, &cgrp->files, node) { 893 + struct dentry *d = cfe->dentry; 894 + 895 + if (cft && cfe->type != cft) 896 + continue; 897 + 898 + dget(d); 899 + d_delete(d); 900 + simple_unlink(d->d_inode, d); 901 + list_del_init(&cfe->node); 902 + dput(d); 903 + 904 + return 0; 963 905 } 964 - spin_unlock(&dentry->d_lock); 906 + return -ENOENT; 907 + } 908 + 909 + static void cgroup_clear_directory(struct dentry *dir) 910 + { 911 + struct cgroup *cgrp = __d_cgrp(dir); 912 + 913 + while (!list_empty(&cgrp->files)) 914 + cgroup_rm_file(cgrp, NULL); 965 915 } 966 916 967 917 /* ··· 1348 1294 if (ret) 1349 1295 goto out_unlock; 1350 1296 1297 + /* See feature-removal-schedule.txt */ 1298 + if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) 1299 + pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", 1300 + task_tgid_nr(current), current->comm); 1301 + 1351 1302 /* Don't allow flags or name to change at remount */ 1352 1303 if (opts.flags != root->flags || 1353 1304 (opts.name && strcmp(opts.name, root->name))) { ··· 1367 1308 goto out_unlock; 1368 1309 } 1369 1310 1370 - /* (re)populate subsystem files */ 1311 + /* clear out any existing files and repopulate subsystem files */ 1312 + cgroup_clear_directory(cgrp->dentry); 1371 1313 cgroup_populate_dir(cgrp); 1372 1314 1373 1315 if (opts.release_agent) ··· 1393 1333 { 1394 1334 INIT_LIST_HEAD(&cgrp->sibling); 1395 1335 INIT_LIST_HEAD(&cgrp->children); 1336 + INIT_LIST_HEAD(&cgrp->files); 1396 1337 INIT_LIST_HEAD(&cgrp->css_sets); 1397 1338 INIT_LIST_HEAD(&cgrp->release_list); 1398 1339 INIT_LIST_HEAD(&cgrp->pidlists); ··· 1405 1344 static void init_cgroup_root(struct cgroupfs_root *root) 1406 1345 { 1407 1346 struct cgroup *cgrp = &root->top_cgroup; 1347 + 1408 1348 INIT_LIST_HEAD(&root->subsys_list); 1409 1349 INIT_LIST_HEAD(&root->root_list); 1350 + INIT_LIST_HEAD(&root->allcg_list); 1410 1351 root->number_of_cgroups = 1; 1411 1352 cgrp->root = root; 1412 1353 cgrp->top_cgroup = cgrp; 1354 + list_add_tail(&cgrp->allcg_node, &root->allcg_list); 1413 1355 init_cgroup_housekeeping(cgrp); 1414 1356 } 1415 1357 ··· 1755 1691 }; 1756 1692 1757 1693 static struct kobject *cgroup_kobj; 1758 - 1759 - static inline struct cgroup *__d_cgrp(struct dentry *dentry) 1760 - { 1761 - return dentry->d_fsdata; 1762 - } 1763 - 1764 - static inline struct cftype *__d_cft(struct dentry *dentry) 1765 - { 1766 - return dentry->d_fsdata; 1767 - } 1768 1694 1769 1695 /** 1770 1696 * cgroup_path - generate the path of a cgroup ··· 2226 2172 2227 2173 if (threadgroup) 2228 2174 tsk = tsk->group_leader; 2175 + 2176 + /* 2177 + * Workqueue threads may acquire PF_THREAD_BOUND and become 2178 + * trapped in a cpuset, or RT worker may be born in a cgroup 2179 + * with no rt_runtime allocated. Just say no. 2180 + */ 2181 + if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { 2182 + ret = -EINVAL; 2183 + rcu_read_unlock(); 2184 + goto out_unlock_cgroup; 2185 + } 2186 + 2229 2187 get_task_struct(tsk); 2230 2188 rcu_read_unlock(); 2231 2189 ··· 2669 2603 return mode; 2670 2604 } 2671 2605 2672 - int cgroup_add_file(struct cgroup *cgrp, 2673 - struct cgroup_subsys *subsys, 2674 - const struct cftype *cft) 2606 + static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2607 + const struct cftype *cft) 2675 2608 { 2676 2609 struct dentry *dir = cgrp->dentry; 2610 + struct cgroup *parent = __d_cgrp(dir); 2677 2611 struct dentry *dentry; 2612 + struct cfent *cfe; 2678 2613 int error; 2679 2614 umode_t mode; 2680 - 2681 2615 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; 2616 + 2617 + /* does @cft->flags tell us to skip creation on @cgrp? */ 2618 + if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) 2619 + return 0; 2620 + if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) 2621 + return 0; 2622 + 2682 2623 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { 2683 2624 strcpy(name, subsys->name); 2684 2625 strcat(name, "."); 2685 2626 } 2686 2627 strcat(name, cft->name); 2628 + 2687 2629 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); 2630 + 2631 + cfe = kzalloc(sizeof(*cfe), GFP_KERNEL); 2632 + if (!cfe) 2633 + return -ENOMEM; 2634 + 2688 2635 dentry = lookup_one_len(name, dir, strlen(name)); 2689 - if (!IS_ERR(dentry)) { 2690 - mode = cgroup_file_mode(cft); 2691 - error = cgroup_create_file(dentry, mode | S_IFREG, 2692 - cgrp->root->sb); 2693 - if (!error) 2694 - dentry->d_fsdata = (void *)cft; 2695 - dput(dentry); 2696 - } else 2636 + if (IS_ERR(dentry)) { 2697 2637 error = PTR_ERR(dentry); 2638 + goto out; 2639 + } 2640 + 2641 + mode = cgroup_file_mode(cft); 2642 + error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); 2643 + if (!error) { 2644 + cfe->type = (void *)cft; 2645 + cfe->dentry = dentry; 2646 + dentry->d_fsdata = cfe; 2647 + list_add_tail(&cfe->node, &parent->files); 2648 + cfe = NULL; 2649 + } 2650 + dput(dentry); 2651 + out: 2652 + kfree(cfe); 2698 2653 return error; 2699 2654 } 2700 - EXPORT_SYMBOL_GPL(cgroup_add_file); 2701 2655 2702 - int cgroup_add_files(struct cgroup *cgrp, 2703 - struct cgroup_subsys *subsys, 2704 - const struct cftype cft[], 2705 - int count) 2656 + static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2657 + const struct cftype cfts[], bool is_add) 2706 2658 { 2707 - int i, err; 2708 - for (i = 0; i < count; i++) { 2709 - err = cgroup_add_file(cgrp, subsys, &cft[i]); 2710 - if (err) 2711 - return err; 2659 + const struct cftype *cft; 2660 + int err, ret = 0; 2661 + 2662 + for (cft = cfts; cft->name[0] != '\0'; cft++) { 2663 + if (is_add) 2664 + err = cgroup_add_file(cgrp, subsys, cft); 2665 + else 2666 + err = cgroup_rm_file(cgrp, cft); 2667 + if (err) { 2668 + pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", 2669 + is_add ? "add" : "remove", cft->name, err); 2670 + ret = err; 2671 + } 2712 2672 } 2673 + return ret; 2674 + } 2675 + 2676 + static DEFINE_MUTEX(cgroup_cft_mutex); 2677 + 2678 + static void cgroup_cfts_prepare(void) 2679 + __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex) 2680 + { 2681 + /* 2682 + * Thanks to the entanglement with vfs inode locking, we can't walk 2683 + * the existing cgroups under cgroup_mutex and create files. 2684 + * Instead, we increment reference on all cgroups and build list of 2685 + * them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure 2686 + * exclusive access to the field. 2687 + */ 2688 + mutex_lock(&cgroup_cft_mutex); 2689 + mutex_lock(&cgroup_mutex); 2690 + } 2691 + 2692 + static void cgroup_cfts_commit(struct cgroup_subsys *ss, 2693 + const struct cftype *cfts, bool is_add) 2694 + __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) 2695 + { 2696 + LIST_HEAD(pending); 2697 + struct cgroup *cgrp, *n; 2698 + 2699 + /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ 2700 + if (cfts && ss->root != &rootnode) { 2701 + list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { 2702 + dget(cgrp->dentry); 2703 + list_add_tail(&cgrp->cft_q_node, &pending); 2704 + } 2705 + } 2706 + 2707 + mutex_unlock(&cgroup_mutex); 2708 + 2709 + /* 2710 + * All new cgroups will see @cfts update on @ss->cftsets. Add/rm 2711 + * files for all cgroups which were created before. 2712 + */ 2713 + list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) { 2714 + struct inode *inode = cgrp->dentry->d_inode; 2715 + 2716 + mutex_lock(&inode->i_mutex); 2717 + mutex_lock(&cgroup_mutex); 2718 + if (!cgroup_is_removed(cgrp)) 2719 + cgroup_addrm_files(cgrp, ss, cfts, is_add); 2720 + mutex_unlock(&cgroup_mutex); 2721 + mutex_unlock(&inode->i_mutex); 2722 + 2723 + list_del_init(&cgrp->cft_q_node); 2724 + dput(cgrp->dentry); 2725 + } 2726 + 2727 + mutex_unlock(&cgroup_cft_mutex); 2728 + } 2729 + 2730 + /** 2731 + * cgroup_add_cftypes - add an array of cftypes to a subsystem 2732 + * @ss: target cgroup subsystem 2733 + * @cfts: zero-length name terminated array of cftypes 2734 + * 2735 + * Register @cfts to @ss. Files described by @cfts are created for all 2736 + * existing cgroups to which @ss is attached and all future cgroups will 2737 + * have them too. This function can be called anytime whether @ss is 2738 + * attached or not. 2739 + * 2740 + * Returns 0 on successful registration, -errno on failure. Note that this 2741 + * function currently returns 0 as long as @cfts registration is successful 2742 + * even if some file creation attempts on existing cgroups fail. 2743 + */ 2744 + int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2745 + { 2746 + struct cftype_set *set; 2747 + 2748 + set = kzalloc(sizeof(*set), GFP_KERNEL); 2749 + if (!set) 2750 + return -ENOMEM; 2751 + 2752 + cgroup_cfts_prepare(); 2753 + set->cfts = cfts; 2754 + list_add_tail(&set->node, &ss->cftsets); 2755 + cgroup_cfts_commit(ss, cfts, true); 2756 + 2713 2757 return 0; 2714 2758 } 2715 - EXPORT_SYMBOL_GPL(cgroup_add_files); 2759 + EXPORT_SYMBOL_GPL(cgroup_add_cftypes); 2760 + 2761 + /** 2762 + * cgroup_rm_cftypes - remove an array of cftypes from a subsystem 2763 + * @ss: target cgroup subsystem 2764 + * @cfts: zero-length name terminated array of cftypes 2765 + * 2766 + * Unregister @cfts from @ss. Files described by @cfts are removed from 2767 + * all existing cgroups to which @ss is attached and all future cgroups 2768 + * won't have them either. This function can be called anytime whether @ss 2769 + * is attached or not. 2770 + * 2771 + * Returns 0 on successful unregistration, -ENOENT if @cfts is not 2772 + * registered with @ss. 2773 + */ 2774 + int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2775 + { 2776 + struct cftype_set *set; 2777 + 2778 + cgroup_cfts_prepare(); 2779 + 2780 + list_for_each_entry(set, &ss->cftsets, node) { 2781 + if (set->cfts == cfts) { 2782 + list_del_init(&set->node); 2783 + cgroup_cfts_commit(ss, cfts, false); 2784 + return 0; 2785 + } 2786 + } 2787 + 2788 + cgroup_cfts_commit(ss, NULL, false); 2789 + return -ENOENT; 2790 + } 2716 2791 2717 2792 /** 2718 2793 * cgroup_task_count - count the number of tasks in a cgroup. ··· 3832 3625 .read_u64 = cgroup_clone_children_read, 3833 3626 .write_u64 = cgroup_clone_children_write, 3834 3627 }, 3835 - }; 3836 - 3837 - static struct cftype cft_release_agent = { 3838 - .name = "release_agent", 3839 - .read_seq_string = cgroup_release_agent_show, 3840 - .write_string = cgroup_release_agent_write, 3841 - .max_write_len = PATH_MAX, 3628 + { 3629 + .name = "release_agent", 3630 + .flags = CFTYPE_ONLY_ON_ROOT, 3631 + .read_seq_string = cgroup_release_agent_show, 3632 + .write_string = cgroup_release_agent_write, 3633 + .max_write_len = PATH_MAX, 3634 + }, 3635 + { } /* terminate */ 3842 3636 }; 3843 3637 3844 3638 static int cgroup_populate_dir(struct cgroup *cgrp) ··· 3847 3639 int err; 3848 3640 struct cgroup_subsys *ss; 3849 3641 3850 - /* First clear out any existing files */ 3851 - cgroup_clear_directory(cgrp->dentry); 3852 - 3853 - err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files)); 3642 + err = cgroup_addrm_files(cgrp, NULL, files, true); 3854 3643 if (err < 0) 3855 3644 return err; 3856 3645 3857 - if (cgrp == cgrp->top_cgroup) { 3858 - if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0) 3859 - return err; 3646 + /* process cftsets of each subsystem */ 3647 + for_each_subsys(cgrp->root, ss) { 3648 + struct cftype_set *set; 3649 + 3650 + list_for_each_entry(set, &ss->cftsets, node) 3651 + cgroup_addrm_files(cgrp, ss, set->cfts, true); 3860 3652 } 3861 3653 3862 - for_each_subsys(cgrp->root, ss) { 3863 - if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) 3864 - return err; 3865 - } 3866 3654 /* This cgroup is ready now */ 3867 3655 for_each_subsys(cgrp->root, ss) { 3868 3656 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; ··· 3874 3670 return 0; 3875 3671 } 3876 3672 3673 + static void css_dput_fn(struct work_struct *work) 3674 + { 3675 + struct cgroup_subsys_state *css = 3676 + container_of(work, struct cgroup_subsys_state, dput_work); 3677 + 3678 + dput(css->cgroup->dentry); 3679 + } 3680 + 3877 3681 static void init_cgroup_css(struct cgroup_subsys_state *css, 3878 3682 struct cgroup_subsys *ss, 3879 3683 struct cgroup *cgrp) ··· 3894 3682 set_bit(CSS_ROOT, &css->flags); 3895 3683 BUG_ON(cgrp->subsys[ss->subsys_id]); 3896 3684 cgrp->subsys[ss->subsys_id] = css; 3685 + 3686 + /* 3687 + * If !clear_css_refs, css holds an extra ref to @cgrp->dentry 3688 + * which is put on the last css_put(). dput() requires process 3689 + * context, which css_put() may be called without. @css->dput_work 3690 + * will be used to invoke dput() asynchronously from css_put(). 3691 + */ 3692 + INIT_WORK(&css->dput_work, css_dput_fn); 3693 + if (ss->__DEPRECATED_clear_css_refs) 3694 + set_bit(CSS_CLEAR_CSS_REFS, &css->flags); 3897 3695 } 3898 3696 3899 3697 static void cgroup_lock_hierarchy(struct cgroupfs_root *root) ··· 4006 3784 if (err < 0) 4007 3785 goto err_remove; 4008 3786 3787 + /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */ 3788 + for_each_subsys(root, ss) 3789 + if (!ss->__DEPRECATED_clear_css_refs) 3790 + dget(dentry); 3791 + 4009 3792 /* The cgroup directory was pre-locked for us */ 4010 3793 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); 3794 + 3795 + list_add_tail(&cgrp->allcg_node, &root->allcg_list); 4011 3796 4012 3797 err = cgroup_populate_dir(cgrp); 4013 3798 /* If err < 0, we have a half-filled directory - oh well ;) */ ··· 4055 3826 return cgroup_create(c_parent, dentry, mode | S_IFDIR); 4056 3827 } 4057 3828 3829 + /* 3830 + * Check the reference count on each subsystem. Since we already 3831 + * established that there are no tasks in the cgroup, if the css refcount 3832 + * is also 1, then there should be no outstanding references, so the 3833 + * subsystem is safe to destroy. We scan across all subsystems rather than 3834 + * using the per-hierarchy linked list of mounted subsystems since we can 3835 + * be called via check_for_release() with no synchronization other than 3836 + * RCU, and the subsystem linked list isn't RCU-safe. 3837 + */ 4058 3838 static int cgroup_has_css_refs(struct cgroup *cgrp) 4059 3839 { 4060 - /* Check the reference count on each subsystem. Since we 4061 - * already established that there are no tasks in the 4062 - * cgroup, if the css refcount is also 1, then there should 4063 - * be no outstanding references, so the subsystem is safe to 4064 - * destroy. We scan across all subsystems rather than using 4065 - * the per-hierarchy linked list of mounted subsystems since 4066 - * we can be called via check_for_release() with no 4067 - * synchronization other than RCU, and the subsystem linked 4068 - * list isn't RCU-safe */ 4069 3840 int i; 3841 + 4070 3842 /* 4071 3843 * We won't need to lock the subsys array, because the subsystems 4072 3844 * we're concerned about aren't going anywhere since our cgroup root ··· 4076 3846 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4077 3847 struct cgroup_subsys *ss = subsys[i]; 4078 3848 struct cgroup_subsys_state *css; 3849 + 4079 3850 /* Skip subsystems not present or not in this hierarchy */ 4080 3851 if (ss == NULL || ss->root != cgrp->root) 4081 3852 continue; 3853 + 4082 3854 css = cgrp->subsys[ss->subsys_id]; 4083 - /* When called from check_for_release() it's possible 3855 + /* 3856 + * When called from check_for_release() it's possible 4084 3857 * that by this point the cgroup has been removed 4085 3858 * and the css deleted. But a false-positive doesn't 4086 3859 * matter, since it can only happen if the cgroup 4087 3860 * has been deleted and hence no longer needs the 4088 - * release agent to be called anyway. */ 4089 - if (css && (atomic_read(&css->refcnt) > 1)) 3861 + * release agent to be called anyway. 3862 + */ 3863 + if (css && css_refcnt(css) > 1) 4090 3864 return 1; 4091 3865 } 4092 3866 return 0; ··· 4100 3866 * Atomically mark all (or else none) of the cgroup's CSS objects as 4101 3867 * CSS_REMOVED. Return true on success, or false if the cgroup has 4102 3868 * busy subsystems. Call with cgroup_mutex held 3869 + * 3870 + * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or 3871 + * not, cgroup removal behaves differently. 3872 + * 3873 + * If clear is set, css refcnt for the subsystem should be zero before 3874 + * cgroup removal can be committed. This is implemented by 3875 + * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be 3876 + * called multiple times until all css refcnts reach zero and is allowed to 3877 + * veto removal on any invocation. This behavior is deprecated and will be 3878 + * removed as soon as the existing user (memcg) is updated. 3879 + * 3880 + * If clear is not set, each css holds an extra reference to the cgroup's 3881 + * dentry and cgroup removal proceeds regardless of css refs. 3882 + * ->pre_destroy() will be called at least once and is not allowed to fail. 3883 + * On the last put of each css, whenever that may be, the extra dentry ref 3884 + * is put so that dentry destruction happens only after all css's are 3885 + * released. 4103 3886 */ 4104 - 4105 3887 static int cgroup_clear_css_refs(struct cgroup *cgrp) 4106 3888 { 4107 3889 struct cgroup_subsys *ss; 4108 3890 unsigned long flags; 4109 3891 bool failed = false; 3892 + 4110 3893 local_irq_save(flags); 3894 + 3895 + /* 3896 + * Block new css_tryget() by deactivating refcnt. If all refcnts 3897 + * for subsystems w/ clear_css_refs set were 1 at the moment of 3898 + * deactivation, we succeeded. 3899 + */ 4111 3900 for_each_subsys(cgrp->root, ss) { 4112 3901 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4113 - int refcnt; 4114 - while (1) { 4115 - /* We can only remove a CSS with a refcnt==1 */ 4116 - refcnt = atomic_read(&css->refcnt); 4117 - if (refcnt > 1) { 4118 - failed = true; 4119 - goto done; 4120 - } 4121 - BUG_ON(!refcnt); 4122 - /* 4123 - * Drop the refcnt to 0 while we check other 4124 - * subsystems. This will cause any racing 4125 - * css_tryget() to spin until we set the 4126 - * CSS_REMOVED bits or abort 4127 - */ 4128 - if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) 4129 - break; 4130 - cpu_relax(); 4131 - } 3902 + 3903 + WARN_ON(atomic_read(&css->refcnt) < 0); 3904 + atomic_add(CSS_DEACT_BIAS, &css->refcnt); 3905 + 3906 + if (ss->__DEPRECATED_clear_css_refs) 3907 + failed |= css_refcnt(css) != 1; 4132 3908 } 4133 - done: 3909 + 3910 + /* 3911 + * If succeeded, set REMOVED and put all the base refs; otherwise, 3912 + * restore refcnts to positive values. Either way, all in-progress 3913 + * css_tryget() will be released. 3914 + */ 4134 3915 for_each_subsys(cgrp->root, ss) { 4135 3916 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4136 - if (failed) { 4137 - /* 4138 - * Restore old refcnt if we previously managed 4139 - * to clear it from 1 to 0 4140 - */ 4141 - if (!atomic_read(&css->refcnt)) 4142 - atomic_set(&css->refcnt, 1); 4143 - } else { 4144 - /* Commit the fact that the CSS is removed */ 3917 + 3918 + if (!failed) { 4145 3919 set_bit(CSS_REMOVED, &css->flags); 3920 + css_put(css); 3921 + } else { 3922 + atomic_sub(CSS_DEACT_BIAS, &css->refcnt); 4146 3923 } 4147 3924 } 3925 + 4148 3926 local_irq_restore(flags); 4149 3927 return !failed; 4150 3928 } ··· 4241 3995 list_del_init(&cgrp->sibling); 4242 3996 cgroup_unlock_hierarchy(cgrp->root); 4243 3997 3998 + list_del_init(&cgrp->allcg_node); 3999 + 4244 4000 d = dget(cgrp->dentry); 4245 4001 4246 4002 cgroup_d_remove_dir(d); ··· 4269 4021 return 0; 4270 4022 } 4271 4023 4024 + static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss) 4025 + { 4026 + INIT_LIST_HEAD(&ss->cftsets); 4027 + 4028 + /* 4029 + * base_cftset is embedded in subsys itself, no need to worry about 4030 + * deregistration. 4031 + */ 4032 + if (ss->base_cftypes) { 4033 + ss->base_cftset.cfts = ss->base_cftypes; 4034 + list_add_tail(&ss->base_cftset.node, &ss->cftsets); 4035 + } 4036 + } 4037 + 4272 4038 static void __init cgroup_init_subsys(struct cgroup_subsys *ss) 4273 4039 { 4274 4040 struct cgroup_subsys_state *css; 4275 4041 4276 4042 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); 4043 + 4044 + /* init base cftset */ 4045 + cgroup_init_cftsets(ss); 4277 4046 4278 4047 /* Create the top cgroup state for this subsystem */ 4279 4048 list_add(&ss->sibling, &rootnode.subsys_list); ··· 4360 4095 BUG_ON(subsys[ss->subsys_id] != ss); 4361 4096 return 0; 4362 4097 } 4098 + 4099 + /* init base cftset */ 4100 + cgroup_init_cftsets(ss); 4363 4101 4364 4102 /* 4365 4103 * need to register a subsys id before anything else - for example, ··· 4953 4685 } 4954 4686 4955 4687 /* Caller must verify that the css is not for root cgroup */ 4956 - void __css_put(struct cgroup_subsys_state *css, int count) 4688 + bool __css_tryget(struct cgroup_subsys_state *css) 4689 + { 4690 + do { 4691 + int v = css_refcnt(css); 4692 + 4693 + if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) 4694 + return true; 4695 + cpu_relax(); 4696 + } while (!test_bit(CSS_REMOVED, &css->flags)); 4697 + 4698 + return false; 4699 + } 4700 + EXPORT_SYMBOL_GPL(__css_tryget); 4701 + 4702 + /* Caller must verify that the css is not for root cgroup */ 4703 + void __css_put(struct cgroup_subsys_state *css) 4957 4704 { 4958 4705 struct cgroup *cgrp = css->cgroup; 4959 - int val; 4706 + 4960 4707 rcu_read_lock(); 4961 - val = atomic_sub_return(count, &css->refcnt); 4962 - if (val == 1) { 4708 + atomic_dec(&css->refcnt); 4709 + switch (css_refcnt(css)) { 4710 + case 1: 4963 4711 if (notify_on_release(cgrp)) { 4964 4712 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4965 4713 check_for_release(cgrp); 4966 4714 } 4967 4715 cgroup_wakeup_rmdir_waiter(cgrp); 4716 + break; 4717 + case 0: 4718 + if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags)) 4719 + schedule_work(&css->dput_work); 4720 + break; 4968 4721 } 4969 4722 rcu_read_unlock(); 4970 - WARN_ON_ONCE(val < 1); 4971 4723 } 4972 4724 EXPORT_SYMBOL_GPL(__css_put); 4973 4725 ··· 5106 4818 * on this or this is under rcu_read_lock(). Once css->id is allocated, 5107 4819 * it's unchanged until freed. 5108 4820 */ 5109 - cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 4821 + cssid = rcu_dereference_check(css->id, css_refcnt(css)); 5110 4822 5111 4823 if (cssid) 5112 4824 return cssid->id; ··· 5118 4830 { 5119 4831 struct css_id *cssid; 5120 4832 5121 - cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 4833 + cssid = rcu_dereference_check(css->id, css_refcnt(css)); 5122 4834 5123 4835 if (cssid) 5124 4836 return cssid->depth; ··· 5499 5211 .name = "releasable", 5500 5212 .read_u64 = releasable_read, 5501 5213 }, 5502 - }; 5503 5214 5504 - static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) 5505 - { 5506 - return cgroup_add_files(cont, ss, debug_files, 5507 - ARRAY_SIZE(debug_files)); 5508 - } 5215 + { } /* terminate */ 5216 + }; 5509 5217 5510 5218 struct cgroup_subsys debug_subsys = { 5511 5219 .name = "debug", 5512 5220 .create = debug_create, 5513 5221 .destroy = debug_destroy, 5514 - .populate = debug_populate, 5515 5222 .subsys_id = debug_subsys_id, 5223 + .base_cftypes = debug_files, 5516 5224 }; 5517 5225 #endif /* CONFIG_CGROUP_DEBUG */
+3 -8
kernel/cgroup_freezer.c
··· 358 358 static struct cftype files[] = { 359 359 { 360 360 .name = "state", 361 + .flags = CFTYPE_NOT_ON_ROOT, 361 362 .read_seq_string = freezer_read, 362 363 .write_string = freezer_write, 363 364 }, 365 + { } /* terminate */ 364 366 }; 365 - 366 - static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) 367 - { 368 - if (!cgroup->parent) 369 - return 0; 370 - return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); 371 - } 372 367 373 368 struct cgroup_subsys freezer_subsys = { 374 369 .name = "freezer", 375 370 .create = freezer_create, 376 371 .destroy = freezer_destroy, 377 - .populate = freezer_populate, 378 372 .subsys_id = freezer_subsys_id, 379 373 .can_attach = freezer_can_attach, 380 374 .fork = freezer_fork, 375 + .base_cftypes = files, 381 376 };
+11 -22
kernel/cpuset.c
··· 1765 1765 .write_u64 = cpuset_write_u64, 1766 1766 .private = FILE_SPREAD_SLAB, 1767 1767 }, 1768 + 1769 + { 1770 + .name = "memory_pressure_enabled", 1771 + .flags = CFTYPE_ONLY_ON_ROOT, 1772 + .read_u64 = cpuset_read_u64, 1773 + .write_u64 = cpuset_write_u64, 1774 + .private = FILE_MEMORY_PRESSURE_ENABLED, 1775 + }, 1776 + 1777 + { } /* terminate */ 1768 1778 }; 1769 - 1770 - static struct cftype cft_memory_pressure_enabled = { 1771 - .name = "memory_pressure_enabled", 1772 - .read_u64 = cpuset_read_u64, 1773 - .write_u64 = cpuset_write_u64, 1774 - .private = FILE_MEMORY_PRESSURE_ENABLED, 1775 - }; 1776 - 1777 - static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) 1778 - { 1779 - int err; 1780 - 1781 - err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); 1782 - if (err) 1783 - return err; 1784 - /* memory_pressure_enabled is in root cpuset only */ 1785 - if (!cont->parent) 1786 - err = cgroup_add_file(cont, ss, 1787 - &cft_memory_pressure_enabled); 1788 - return err; 1789 - } 1790 1779 1791 1780 /* 1792 1781 * post_clone() is called during cgroup_create() when the ··· 1876 1887 .destroy = cpuset_destroy, 1877 1888 .can_attach = cpuset_can_attach, 1878 1889 .attach = cpuset_attach, 1879 - .populate = cpuset_populate, 1880 1890 .post_clone = cpuset_post_clone, 1881 1891 .subsys_id = cpuset_subsys_id, 1892 + .base_cftypes = files, 1882 1893 .early_init = 1, 1883 1894 }; 1884 1895
+36 -41
kernel/res_counter.c
··· 22 22 counter->parent = parent; 23 23 } 24 24 25 - int res_counter_charge_locked(struct res_counter *counter, unsigned long val) 25 + int res_counter_charge_locked(struct res_counter *counter, unsigned long val, 26 + bool force) 26 27 { 28 + int ret = 0; 29 + 27 30 if (counter->usage + val > counter->limit) { 28 31 counter->failcnt++; 29 - return -ENOMEM; 32 + ret = -ENOMEM; 33 + if (!force) 34 + return ret; 30 35 } 31 36 32 37 counter->usage += val; 33 38 if (counter->usage > counter->max_usage) 34 39 counter->max_usage = counter->usage; 35 - return 0; 36 - } 37 - 38 - int res_counter_charge(struct res_counter *counter, unsigned long val, 39 - struct res_counter **limit_fail_at) 40 - { 41 - int ret; 42 - unsigned long flags; 43 - struct res_counter *c, *u; 44 - 45 - *limit_fail_at = NULL; 46 - local_irq_save(flags); 47 - for (c = counter; c != NULL; c = c->parent) { 48 - spin_lock(&c->lock); 49 - ret = res_counter_charge_locked(c, val); 50 - spin_unlock(&c->lock); 51 - if (ret < 0) { 52 - *limit_fail_at = c; 53 - goto undo; 54 - } 55 - } 56 - ret = 0; 57 - goto done; 58 - undo: 59 - for (u = counter; u != c; u = u->parent) { 60 - spin_lock(&u->lock); 61 - res_counter_uncharge_locked(u, val); 62 - spin_unlock(&u->lock); 63 - } 64 - done: 65 - local_irq_restore(flags); 66 40 return ret; 67 41 } 68 42 69 - int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, 70 - struct res_counter **limit_fail_at) 43 + static int __res_counter_charge(struct res_counter *counter, unsigned long val, 44 + struct res_counter **limit_fail_at, bool force) 71 45 { 72 46 int ret, r; 73 47 unsigned long flags; 74 - struct res_counter *c; 48 + struct res_counter *c, *u; 75 49 76 50 r = ret = 0; 77 51 *limit_fail_at = NULL; 78 52 local_irq_save(flags); 79 53 for (c = counter; c != NULL; c = c->parent) { 80 54 spin_lock(&c->lock); 81 - r = res_counter_charge_locked(c, val); 82 - if (r) 83 - c->usage += val; 55 + r = res_counter_charge_locked(c, val, force); 84 56 spin_unlock(&c->lock); 85 - if (r < 0 && ret == 0) { 86 - *limit_fail_at = c; 57 + if (r < 0 && !ret) { 87 58 ret = r; 59 + *limit_fail_at = c; 60 + if (!force) 61 + break; 62 + } 63 + } 64 + 65 + if (ret < 0 && !force) { 66 + for (u = counter; u != c; u = u->parent) { 67 + spin_lock(&u->lock); 68 + res_counter_uncharge_locked(u, val); 69 + spin_unlock(&u->lock); 88 70 } 89 71 } 90 72 local_irq_restore(flags); 91 73 92 74 return ret; 93 75 } 76 + 77 + int res_counter_charge(struct res_counter *counter, unsigned long val, 78 + struct res_counter **limit_fail_at) 79 + { 80 + return __res_counter_charge(counter, val, limit_fail_at, false); 81 + } 82 + 83 + int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, 84 + struct res_counter **limit_fail_at) 85 + { 86 + return __res_counter_charge(counter, val, limit_fail_at, true); 87 + } 88 + 94 89 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) 95 90 { 96 91 if (WARN_ON(counter->usage < val))
+4 -12
kernel/sched/core.c
··· 7985 7985 .write_u64 = cpu_rt_period_write_uint, 7986 7986 }, 7987 7987 #endif 7988 + { } /* terminate */ 7988 7989 }; 7989 - 7990 - static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) 7991 - { 7992 - return cgroup_add_files(cont, ss, cpu_files, ARRAY_SIZE(cpu_files)); 7993 - } 7994 7990 7995 7991 struct cgroup_subsys cpu_cgroup_subsys = { 7996 7992 .name = "cpu", ··· 7995 7999 .can_attach = cpu_cgroup_can_attach, 7996 8000 .attach = cpu_cgroup_attach, 7997 8001 .exit = cpu_cgroup_exit, 7998 - .populate = cpu_cgroup_populate, 7999 8002 .subsys_id = cpu_cgroup_subsys_id, 8003 + .base_cftypes = cpu_files, 8000 8004 .early_init = 1, 8001 8005 }; 8002 8006 ··· 8181 8185 .name = "stat", 8182 8186 .read_map = cpuacct_stats_show, 8183 8187 }, 8188 + { } /* terminate */ 8184 8189 }; 8185 - 8186 - static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 8187 - { 8188 - return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files)); 8189 - } 8190 8190 8191 8191 /* 8192 8192 * charge this task's execution time to its accounting group. ··· 8215 8223 .name = "cpuacct", 8216 8224 .create = cpuacct_create, 8217 8225 .destroy = cpuacct_destroy, 8218 - .populate = cpuacct_populate, 8219 8226 .subsys_id = cpuacct_subsys_id, 8227 + .base_cftypes = files, 8220 8228 }; 8221 8229 #endif /* CONFIG_CGROUP_CPUACCT */
+52 -63
mm/memcontrol.c
··· 3873 3873 return val << PAGE_SHIFT; 3874 3874 } 3875 3875 3876 - static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 3876 + static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, 3877 + struct file *file, char __user *buf, 3878 + size_t nbytes, loff_t *ppos) 3877 3879 { 3878 3880 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 3881 + char str[64]; 3879 3882 u64 val; 3880 - int type, name; 3883 + int type, name, len; 3881 3884 3882 3885 type = MEMFILE_TYPE(cft->private); 3883 3886 name = MEMFILE_ATTR(cft->private); 3887 + 3888 + if (!do_swap_account && type == _MEMSWAP) 3889 + return -EOPNOTSUPP; 3890 + 3884 3891 switch (type) { 3885 3892 case _MEM: 3886 3893 if (name == RES_USAGE) ··· 3904 3897 default: 3905 3898 BUG(); 3906 3899 } 3907 - return val; 3900 + 3901 + len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); 3902 + return simple_read_from_buffer(buf, nbytes, ppos, str, len); 3908 3903 } 3909 3904 /* 3910 3905 * The user of this function is... ··· 3922 3913 3923 3914 type = MEMFILE_TYPE(cft->private); 3924 3915 name = MEMFILE_ATTR(cft->private); 3916 + 3917 + if (!do_swap_account && type == _MEMSWAP) 3918 + return -EOPNOTSUPP; 3919 + 3925 3920 switch (name) { 3926 3921 case RES_LIMIT: 3927 3922 if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ ··· 3991 3978 3992 3979 static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 3993 3980 { 3994 - struct mem_cgroup *memcg; 3981 + struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 3995 3982 int type, name; 3996 3983 3997 - memcg = mem_cgroup_from_cont(cont); 3998 3984 type = MEMFILE_TYPE(event); 3999 3985 name = MEMFILE_ATTR(event); 3986 + 3987 + if (!do_swap_account && type == _MEMSWAP) 3988 + return -EOPNOTSUPP; 3989 + 4000 3990 switch (name) { 4001 3991 case RES_MAX_USAGE: 4002 3992 if (type == _MEM) ··· 4640 4624 #endif /* CONFIG_NUMA */ 4641 4625 4642 4626 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 4643 - static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4627 + static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 4644 4628 { 4645 - /* 4646 - * Part of this would be better living in a separate allocation 4647 - * function, leaving us with just the cgroup tree population work. 4648 - * We, however, depend on state such as network's proto_list that 4649 - * is only initialized after cgroup creation. I found the less 4650 - * cumbersome way to deal with it to defer it all to populate time 4651 - */ 4652 - return mem_cgroup_sockets_init(cont, ss); 4629 + return mem_cgroup_sockets_init(memcg, ss); 4653 4630 }; 4654 4631 4655 - static void kmem_cgroup_destroy(struct cgroup *cont) 4632 + static void kmem_cgroup_destroy(struct mem_cgroup *memcg) 4656 4633 { 4657 - mem_cgroup_sockets_destroy(cont); 4634 + mem_cgroup_sockets_destroy(memcg); 4658 4635 } 4659 4636 #else 4660 - static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4637 + static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 4661 4638 { 4662 4639 return 0; 4663 4640 } 4664 4641 4665 - static void kmem_cgroup_destroy(struct cgroup *cont) 4642 + static void kmem_cgroup_destroy(struct mem_cgroup *memcg) 4666 4643 { 4667 4644 } 4668 4645 #endif ··· 4664 4655 { 4665 4656 .name = "usage_in_bytes", 4666 4657 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), 4667 - .read_u64 = mem_cgroup_read, 4658 + .read = mem_cgroup_read, 4668 4659 .register_event = mem_cgroup_usage_register_event, 4669 4660 .unregister_event = mem_cgroup_usage_unregister_event, 4670 4661 }, ··· 4672 4663 .name = "max_usage_in_bytes", 4673 4664 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), 4674 4665 .trigger = mem_cgroup_reset, 4675 - .read_u64 = mem_cgroup_read, 4666 + .read = mem_cgroup_read, 4676 4667 }, 4677 4668 { 4678 4669 .name = "limit_in_bytes", 4679 4670 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), 4680 4671 .write_string = mem_cgroup_write, 4681 - .read_u64 = mem_cgroup_read, 4672 + .read = mem_cgroup_read, 4682 4673 }, 4683 4674 { 4684 4675 .name = "soft_limit_in_bytes", 4685 4676 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), 4686 4677 .write_string = mem_cgroup_write, 4687 - .read_u64 = mem_cgroup_read, 4678 + .read = mem_cgroup_read, 4688 4679 }, 4689 4680 { 4690 4681 .name = "failcnt", 4691 4682 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), 4692 4683 .trigger = mem_cgroup_reset, 4693 - .read_u64 = mem_cgroup_read, 4684 + .read = mem_cgroup_read, 4694 4685 }, 4695 4686 { 4696 4687 .name = "stat", ··· 4730 4721 .mode = S_IRUGO, 4731 4722 }, 4732 4723 #endif 4733 - }; 4734 - 4735 4724 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4736 - static struct cftype memsw_cgroup_files[] = { 4737 4725 { 4738 4726 .name = "memsw.usage_in_bytes", 4739 4727 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), 4740 - .read_u64 = mem_cgroup_read, 4728 + .read = mem_cgroup_read, 4741 4729 .register_event = mem_cgroup_usage_register_event, 4742 4730 .unregister_event = mem_cgroup_usage_unregister_event, 4743 4731 }, ··· 4742 4736 .name = "memsw.max_usage_in_bytes", 4743 4737 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), 4744 4738 .trigger = mem_cgroup_reset, 4745 - .read_u64 = mem_cgroup_read, 4739 + .read = mem_cgroup_read, 4746 4740 }, 4747 4741 { 4748 4742 .name = "memsw.limit_in_bytes", 4749 4743 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), 4750 4744 .write_string = mem_cgroup_write, 4751 - .read_u64 = mem_cgroup_read, 4745 + .read = mem_cgroup_read, 4752 4746 }, 4753 4747 { 4754 4748 .name = "memsw.failcnt", 4755 4749 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), 4756 4750 .trigger = mem_cgroup_reset, 4757 - .read_u64 = mem_cgroup_read, 4751 + .read = mem_cgroup_read, 4758 4752 }, 4759 - }; 4760 - 4761 - static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) 4762 - { 4763 - if (!do_swap_account) 4764 - return 0; 4765 - return cgroup_add_files(cont, ss, memsw_cgroup_files, 4766 - ARRAY_SIZE(memsw_cgroup_files)); 4767 - }; 4768 - #else 4769 - static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) 4770 - { 4771 - return 0; 4772 - } 4773 4753 #endif 4754 + { }, /* terminate */ 4755 + }; 4774 4756 4775 4757 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4776 4758 { ··· 5010 5016 memcg->move_charge_at_immigrate = 0; 5011 5017 mutex_init(&memcg->thresholds_lock); 5012 5018 spin_lock_init(&memcg->move_lock); 5019 + 5020 + error = memcg_init_kmem(memcg, &mem_cgroup_subsys); 5021 + if (error) { 5022 + /* 5023 + * We call put now because our (and parent's) refcnts 5024 + * are already in place. mem_cgroup_put() will internally 5025 + * call __mem_cgroup_free, so return directly 5026 + */ 5027 + mem_cgroup_put(memcg); 5028 + return ERR_PTR(error); 5029 + } 5013 5030 return &memcg->css; 5014 5031 free_out: 5015 5032 __mem_cgroup_free(memcg); ··· 5038 5033 { 5039 5034 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 5040 5035 5041 - kmem_cgroup_destroy(cont); 5036 + kmem_cgroup_destroy(memcg); 5042 5037 5043 5038 mem_cgroup_put(memcg); 5044 - } 5045 - 5046 - static int mem_cgroup_populate(struct cgroup_subsys *ss, 5047 - struct cgroup *cont) 5048 - { 5049 - int ret; 5050 - 5051 - ret = cgroup_add_files(cont, ss, mem_cgroup_files, 5052 - ARRAY_SIZE(mem_cgroup_files)); 5053 - 5054 - if (!ret) 5055 - ret = register_memsw_files(cont, ss); 5056 - 5057 - if (!ret) 5058 - ret = register_kmem_files(cont, ss); 5059 - 5060 - return ret; 5061 5039 } 5062 5040 5063 5041 #ifdef CONFIG_MMU ··· 5626 5638 .create = mem_cgroup_create, 5627 5639 .pre_destroy = mem_cgroup_pre_destroy, 5628 5640 .destroy = mem_cgroup_destroy, 5629 - .populate = mem_cgroup_populate, 5630 5641 .can_attach = mem_cgroup_can_attach, 5631 5642 .cancel_attach = mem_cgroup_cancel_attach, 5632 5643 .attach = mem_cgroup_move_task, 5644 + .base_cftypes = mem_cgroup_files, 5633 5645 .early_init = 0, 5634 5646 .use_id = 1, 5647 + .__DEPRECATED_clear_css_refs = true, 5635 5648 }; 5636 5649 5637 5650 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+11 -19
net/core/netprio_cgroup.c
··· 25 25 #include <net/sock.h> 26 26 #include <net/netprio_cgroup.h> 27 27 28 - static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); 29 - static void cgrp_destroy(struct cgroup *cgrp); 30 - static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); 31 - 32 - struct cgroup_subsys net_prio_subsys = { 33 - .name = "net_prio", 34 - .create = cgrp_create, 35 - .destroy = cgrp_destroy, 36 - .populate = cgrp_populate, 37 - #ifdef CONFIG_NETPRIO_CGROUP 38 - .subsys_id = net_prio_subsys_id, 39 - #endif 40 - .module = THIS_MODULE 41 - }; 42 - 43 28 #define PRIOIDX_SZ 128 44 29 45 30 static unsigned long prioidx_map[PRIOIDX_SZ]; ··· 244 259 .read_map = read_priomap, 245 260 .write_string = write_priomap, 246 261 }, 262 + { } /* terminate */ 247 263 }; 248 264 249 - static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 250 - { 251 - return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 252 - } 265 + struct cgroup_subsys net_prio_subsys = { 266 + .name = "net_prio", 267 + .create = cgrp_create, 268 + .destroy = cgrp_destroy, 269 + #ifdef CONFIG_NETPRIO_CGROUP 270 + .subsys_id = net_prio_subsys_id, 271 + #endif 272 + .base_cftypes = ss_files, 273 + .module = THIS_MODULE 274 + }; 253 275 254 276 static int netprio_device_event(struct notifier_block *unused, 255 277 unsigned long event, void *ptr)
+5 -5
net/core/sock.c
··· 143 143 static LIST_HEAD(proto_list); 144 144 145 145 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 146 - int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 146 + int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 147 147 { 148 148 struct proto *proto; 149 149 int ret = 0; ··· 151 151 mutex_lock(&proto_list_mutex); 152 152 list_for_each_entry(proto, &proto_list, node) { 153 153 if (proto->init_cgroup) { 154 - ret = proto->init_cgroup(cgrp, ss); 154 + ret = proto->init_cgroup(memcg, ss); 155 155 if (ret) 156 156 goto out; 157 157 } ··· 162 162 out: 163 163 list_for_each_entry_continue_reverse(proto, &proto_list, node) 164 164 if (proto->destroy_cgroup) 165 - proto->destroy_cgroup(cgrp); 165 + proto->destroy_cgroup(memcg); 166 166 mutex_unlock(&proto_list_mutex); 167 167 return ret; 168 168 } 169 169 170 - void mem_cgroup_sockets_destroy(struct cgroup *cgrp) 170 + void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) 171 171 { 172 172 struct proto *proto; 173 173 174 174 mutex_lock(&proto_list_mutex); 175 175 list_for_each_entry_reverse(proto, &proto_list, node) 176 176 if (proto->destroy_cgroup) 177 - proto->destroy_cgroup(cgrp); 177 + proto->destroy_cgroup(memcg); 178 178 mutex_unlock(&proto_list_mutex); 179 179 } 180 180 #endif
+38 -39
net/ipv4/tcp_memcontrol.c
··· 6 6 #include <linux/memcontrol.h> 7 7 #include <linux/module.h> 8 8 9 - static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); 10 - static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, 11 - const char *buffer); 12 - static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event); 13 - 14 - static struct cftype tcp_files[] = { 15 - { 16 - .name = "kmem.tcp.limit_in_bytes", 17 - .write_string = tcp_cgroup_write, 18 - .read_u64 = tcp_cgroup_read, 19 - .private = RES_LIMIT, 20 - }, 21 - { 22 - .name = "kmem.tcp.usage_in_bytes", 23 - .read_u64 = tcp_cgroup_read, 24 - .private = RES_USAGE, 25 - }, 26 - { 27 - .name = "kmem.tcp.failcnt", 28 - .private = RES_FAILCNT, 29 - .trigger = tcp_cgroup_reset, 30 - .read_u64 = tcp_cgroup_read, 31 - }, 32 - { 33 - .name = "kmem.tcp.max_usage_in_bytes", 34 - .private = RES_MAX_USAGE, 35 - .trigger = tcp_cgroup_reset, 36 - .read_u64 = tcp_cgroup_read, 37 - }, 38 - }; 39 - 40 9 static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) 41 10 { 42 11 return container_of(cg_proto, struct tcp_memcontrol, cg_proto); ··· 18 49 } 19 50 EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); 20 51 21 - int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) 52 + int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 22 53 { 23 54 /* 24 55 * The root cgroup does not use res_counters, but rather, ··· 28 59 struct res_counter *res_parent = NULL; 29 60 struct cg_proto *cg_proto, *parent_cg; 30 61 struct tcp_memcontrol *tcp; 31 - struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 32 62 struct mem_cgroup *parent = parent_mem_cgroup(memcg); 33 63 struct net *net = current->nsproxy->net_ns; 34 64 35 65 cg_proto = tcp_prot.proto_cgroup(memcg); 36 66 if (!cg_proto) 37 - goto create_files; 67 + return 0; 38 68 39 69 tcp = tcp_from_cgproto(cg_proto); 40 70 ··· 56 88 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; 57 89 cg_proto->memcg = memcg; 58 90 59 - create_files: 60 - return cgroup_add_files(cgrp, ss, tcp_files, 61 - ARRAY_SIZE(tcp_files)); 91 + return 0; 62 92 } 63 93 EXPORT_SYMBOL(tcp_init_cgroup); 64 94 65 - void tcp_destroy_cgroup(struct cgroup *cgrp) 95 + void tcp_destroy_cgroup(struct mem_cgroup *memcg) 66 96 { 67 - struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 68 97 struct cg_proto *cg_proto; 69 98 struct tcp_memcontrol *tcp; 70 99 u64 val; ··· 235 270 236 271 tcp->tcp_prot_mem[idx] = val; 237 272 } 273 + 274 + static struct cftype tcp_files[] = { 275 + { 276 + .name = "kmem.tcp.limit_in_bytes", 277 + .write_string = tcp_cgroup_write, 278 + .read_u64 = tcp_cgroup_read, 279 + .private = RES_LIMIT, 280 + }, 281 + { 282 + .name = "kmem.tcp.usage_in_bytes", 283 + .read_u64 = tcp_cgroup_read, 284 + .private = RES_USAGE, 285 + }, 286 + { 287 + .name = "kmem.tcp.failcnt", 288 + .private = RES_FAILCNT, 289 + .trigger = tcp_cgroup_reset, 290 + .read_u64 = tcp_cgroup_read, 291 + }, 292 + { 293 + .name = "kmem.tcp.max_usage_in_bytes", 294 + .private = RES_MAX_USAGE, 295 + .trigger = tcp_cgroup_reset, 296 + .read_u64 = tcp_cgroup_read, 297 + }, 298 + { } /* terminate */ 299 + }; 300 + 301 + static int __init tcp_memcontrol_init(void) 302 + { 303 + WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); 304 + return 0; 305 + } 306 + __initcall(tcp_memcontrol_init);
+11 -20
net/sched/cls_cgroup.c
··· 22 22 #include <net/sock.h> 23 23 #include <net/cls_cgroup.h> 24 24 25 - static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); 26 - static void cgrp_destroy(struct cgroup *cgrp); 27 - static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); 28 - 29 - struct cgroup_subsys net_cls_subsys = { 30 - .name = "net_cls", 31 - .create = cgrp_create, 32 - .destroy = cgrp_destroy, 33 - .populate = cgrp_populate, 34 - #ifdef CONFIG_NET_CLS_CGROUP 35 - .subsys_id = net_cls_subsys_id, 36 - #endif 37 - .module = THIS_MODULE, 38 - }; 39 - 40 - 41 25 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) 42 26 { 43 27 return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), ··· 70 86 .read_u64 = read_classid, 71 87 .write_u64 = write_classid, 72 88 }, 89 + { } /* terminate */ 73 90 }; 74 91 75 - static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 76 - { 77 - return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 78 - } 92 + struct cgroup_subsys net_cls_subsys = { 93 + .name = "net_cls", 94 + .create = cgrp_create, 95 + .destroy = cgrp_destroy, 96 + #ifdef CONFIG_NET_CLS_CGROUP 97 + .subsys_id = net_cls_subsys_id, 98 + #endif 99 + .base_cftypes = ss_files, 100 + .module = THIS_MODULE, 101 + }; 79 102 80 103 struct cls_cgroup_head { 81 104 u32 handle;
+2 -8
security/device_cgroup.c
··· 447 447 .read_seq_string = devcgroup_seq_read, 448 448 .private = DEVCG_LIST, 449 449 }, 450 + { } /* terminate */ 450 451 }; 451 - 452 - static int devcgroup_populate(struct cgroup_subsys *ss, 453 - struct cgroup *cgroup) 454 - { 455 - return cgroup_add_files(cgroup, ss, dev_cgroup_files, 456 - ARRAY_SIZE(dev_cgroup_files)); 457 - } 458 452 459 453 struct cgroup_subsys devices_subsys = { 460 454 .name = "devices", 461 455 .can_attach = devcgroup_can_attach, 462 456 .create = devcgroup_create, 463 457 .destroy = devcgroup_destroy, 464 - .populate = devcgroup_populate, 465 458 .subsys_id = devices_subsys_id, 459 + .base_cftypes = dev_cgroup_files, 466 460 }; 467 461 468 462 int __devcgroup_inode_permission(struct inode *inode, int mask)