Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: hugetlb_cgroup: convert to lockless page counters

Abandon the spinlock-protected byte counters in favor of the unlocked
page counters in the hugetlb controller as well.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
71f87bee 3e32cb2e

+63 -50
+1 -1
Documentation/cgroups/hugetlb.txt
··· 29 29 30 30 hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage 31 31 hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded 32 - hugetlb.<hugepagesize>.usage_in_bytes # show current res_counter usage for "hugepagesize" hugetlb 32 + hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb 33 33 hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit 34 34 35 35 For a system supporting two hugepage size (16M and 16G) the control
-1
include/linux/hugetlb_cgroup.h
··· 16 16 #define _LINUX_HUGETLB_CGROUP_H 17 17 18 18 #include <linux/mmdebug.h> 19 - #include <linux/res_counter.h> 20 19 21 20 struct hugetlb_cgroup; 22 21 /*
+2 -1
init/Kconfig
··· 1051 1051 1052 1052 config CGROUP_HUGETLB 1053 1053 bool "HugeTLB Resource Controller for Control Groups" 1054 - depends on RESOURCE_COUNTERS && HUGETLB_PAGE 1054 + depends on HUGETLB_PAGE 1055 + select PAGE_COUNTER 1055 1056 default n 1056 1057 help 1057 1058 Provides a cgroup Resource Controller for HugeTLB pages.
+60 -47
mm/hugetlb_cgroup.c
··· 14 14 */ 15 15 16 16 #include <linux/cgroup.h> 17 + #include <linux/page_counter.h> 17 18 #include <linux/slab.h> 18 19 #include <linux/hugetlb.h> 19 20 #include <linux/hugetlb_cgroup.h> ··· 24 23 /* 25 24 * the counter to account for hugepages from hugetlb. 26 25 */ 27 - struct res_counter hugepage[HUGE_MAX_HSTATE]; 26 + struct page_counter hugepage[HUGE_MAX_HSTATE]; 28 27 }; 29 28 30 29 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) ··· 61 60 int idx; 62 61 63 62 for (idx = 0; idx < hugetlb_max_hstate; idx++) { 64 - if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) 63 + if (page_counter_read(&h_cg->hugepage[idx])) 65 64 return true; 66 65 } 67 66 return false; ··· 80 79 81 80 if (parent_h_cgroup) { 82 81 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) 83 - res_counter_init(&h_cgroup->hugepage[idx], 84 - &parent_h_cgroup->hugepage[idx]); 82 + page_counter_init(&h_cgroup->hugepage[idx], 83 + &parent_h_cgroup->hugepage[idx]); 85 84 } else { 86 85 root_h_cgroup = h_cgroup; 87 86 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) 88 - res_counter_init(&h_cgroup->hugepage[idx], NULL); 87 + page_counter_init(&h_cgroup->hugepage[idx], NULL); 89 88 } 90 89 return &h_cgroup->css; 91 90 } ··· 109 108 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, 110 109 struct page *page) 111 110 { 112 - int csize; 113 - struct res_counter *counter; 114 - struct res_counter *fail_res; 111 + unsigned int nr_pages; 112 + struct page_counter *counter; 115 113 struct hugetlb_cgroup *page_hcg; 116 114 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); 117 115 ··· 123 123 if (!page_hcg || page_hcg != h_cg) 124 124 goto out; 125 125 126 - csize = PAGE_SIZE << compound_order(page); 126 + nr_pages = 1 << compound_order(page); 127 127 if (!parent) { 128 128 parent = root_h_cgroup; 129 129 /* root has no limit */ 130 - res_counter_charge_nofail(&parent->hugepage[idx], 131 - csize, &fail_res); 130 + page_counter_charge(&parent->hugepage[idx], nr_pages); 132 131 } 133 132 counter = &h_cg->hugepage[idx]; 134 - res_counter_uncharge_until(counter, counter->parent, csize); 133 + /* Take the pages off the local counter */ 134 + page_counter_cancel(counter, nr_pages); 135 135 136 136 set_hugetlb_cgroup(page, parent); 137 137 out: ··· 166 166 struct hugetlb_cgroup **ptr) 167 167 { 168 168 int ret = 0; 169 - struct res_counter *fail_res; 169 + struct page_counter *counter; 170 170 struct hugetlb_cgroup *h_cg = NULL; 171 - unsigned long csize = nr_pages * PAGE_SIZE; 172 171 173 172 if (hugetlb_cgroup_disabled()) 174 173 goto done; ··· 186 187 } 187 188 rcu_read_unlock(); 188 189 189 - ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res); 190 + ret = page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter); 190 191 css_put(&h_cg->css); 191 192 done: 192 193 *ptr = h_cg; ··· 212 213 struct page *page) 213 214 { 214 215 struct hugetlb_cgroup *h_cg; 215 - unsigned long csize = nr_pages * PAGE_SIZE; 216 216 217 217 if (hugetlb_cgroup_disabled()) 218 218 return; ··· 220 222 if (unlikely(!h_cg)) 221 223 return; 222 224 set_hugetlb_cgroup(page, NULL); 223 - res_counter_uncharge(&h_cg->hugepage[idx], csize); 225 + page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); 224 226 return; 225 227 } 226 228 227 229 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, 228 230 struct hugetlb_cgroup *h_cg) 229 231 { 230 - unsigned long csize = nr_pages * PAGE_SIZE; 231 - 232 232 if (hugetlb_cgroup_disabled() || !h_cg) 233 233 return; 234 234 235 235 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) 236 236 return; 237 237 238 - res_counter_uncharge(&h_cg->hugepage[idx], csize); 238 + page_counter_uncharge(&h_cg->hugepage[idx], nr_pages); 239 239 return; 240 240 } 241 + 242 + enum { 243 + RES_USAGE, 244 + RES_LIMIT, 245 + RES_MAX_USAGE, 246 + RES_FAILCNT, 247 + }; 241 248 242 249 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, 243 250 struct cftype *cft) 244 251 { 245 - int idx, name; 252 + struct page_counter *counter; 246 253 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); 247 254 248 - idx = MEMFILE_IDX(cft->private); 249 - name = MEMFILE_ATTR(cft->private); 255 + counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)]; 250 256 251 - return res_counter_read_u64(&h_cg->hugepage[idx], name); 257 + switch (MEMFILE_ATTR(cft->private)) { 258 + case RES_USAGE: 259 + return (u64)page_counter_read(counter) * PAGE_SIZE; 260 + case RES_LIMIT: 261 + return (u64)counter->limit * PAGE_SIZE; 262 + case RES_MAX_USAGE: 263 + return (u64)counter->watermark * PAGE_SIZE; 264 + case RES_FAILCNT: 265 + return counter->failcnt; 266 + default: 267 + BUG(); 268 + } 252 269 } 270 + 271 + static DEFINE_MUTEX(hugetlb_limit_mutex); 253 272 254 273 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, 255 274 char *buf, size_t nbytes, loff_t off) 256 275 { 257 - int idx, name, ret; 258 - unsigned long long val; 276 + int ret, idx; 277 + unsigned long nr_pages; 259 278 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 260 279 261 - buf = strstrip(buf); 262 - idx = MEMFILE_IDX(of_cft(of)->private); 263 - name = MEMFILE_ATTR(of_cft(of)->private); 280 + if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */ 281 + return -EINVAL; 264 282 265 - switch (name) { 283 + buf = strstrip(buf); 284 + ret = page_counter_memparse(buf, &nr_pages); 285 + if (ret) 286 + return ret; 287 + 288 + idx = MEMFILE_IDX(of_cft(of)->private); 289 + 290 + switch (MEMFILE_ATTR(of_cft(of)->private)) { 266 291 case RES_LIMIT: 267 - if (hugetlb_cgroup_is_root(h_cg)) { 268 - /* Can't set limit on root */ 269 - ret = -EINVAL; 270 - break; 271 - } 272 - /* This function does all necessary parse...reuse it */ 273 - ret = res_counter_memparse_write_strategy(buf, &val); 274 - if (ret) 275 - break; 276 - val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx])); 277 - ret = res_counter_set_limit(&h_cg->hugepage[idx], val); 292 + mutex_lock(&hugetlb_limit_mutex); 293 + ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages); 294 + mutex_unlock(&hugetlb_limit_mutex); 278 295 break; 279 296 default: 280 297 ret = -EINVAL; ··· 301 288 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, 302 289 char *buf, size_t nbytes, loff_t off) 303 290 { 304 - int idx, name, ret = 0; 291 + int ret = 0; 292 + struct page_counter *counter; 305 293 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); 306 294 307 - idx = MEMFILE_IDX(of_cft(of)->private); 308 - name = MEMFILE_ATTR(of_cft(of)->private); 295 + counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)]; 309 296 310 - switch (name) { 297 + switch (MEMFILE_ATTR(of_cft(of)->private)) { 311 298 case RES_MAX_USAGE: 312 - res_counter_reset_max(&h_cg->hugepage[idx]); 299 + page_counter_reset_watermark(counter); 313 300 break; 314 301 case RES_FAILCNT: 315 - res_counter_reset_failcnt(&h_cg->hugepage[idx]); 302 + counter->failcnt = 0; 316 303 break; 317 304 default: 318 305 ret = -EINVAL;