Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: memcg/slab: use a single set of kmem_caches for all allocations

Instead of having two sets of kmem_caches: one for system-wide and
non-accounted allocations and the second one shared by all accounted
allocations, we can use just one.

The idea is simple: space for obj_cgroup metadata can be allocated on
demand and filled only for accounted allocations.

It allows to remove a bunch of code which is required to handle kmem_cache
clones for accounted allocations. There is no more need to create them,
accumulate statistics, propagate attributes, etc. It's a quite
significant simplification.

Also, because the total number of slab_caches is reduced almost twice (not
all kmem_caches have a memcg clone), some additional memory savings are
expected. On my devvm it additionally saves about 3.5% of slab memory.

[guro@fb.com: fix build on MIPS]
Link: http://lkml.kernel.org/r/20200717214810.3733082-1-guro@fb.com

Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Link: http://lkml.kernel.org/r/20200623174037.3951353-18-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Roman Gushchin and committed by
Linus Torvalds
10befea9 15999eef

+78 -590
-2
include/linux/slab.h
··· 155 155 void kmem_cache_destroy(struct kmem_cache *); 156 156 int kmem_cache_shrink(struct kmem_cache *); 157 157 158 - void memcg_create_kmem_cache(struct kmem_cache *cachep); 159 - 160 158 /* 161 159 * Please use this macro to create slab caches. Simply specify the 162 160 * name of the structure and maybe some flags that are listed above.
-3
include/linux/slab_def.h
··· 72 72 int obj_offset; 73 73 #endif /* CONFIG_DEBUG_SLAB */ 74 74 75 - #ifdef CONFIG_MEMCG 76 - struct memcg_cache_params memcg_params; 77 - #endif 78 75 #ifdef CONFIG_KASAN 79 76 struct kasan_cache kasan_info; 80 77 #endif
-10
include/linux/slub_def.h
··· 108 108 struct list_head list; /* List of slab caches */ 109 109 #ifdef CONFIG_SYSFS 110 110 struct kobject kobj; /* For sysfs */ 111 - struct work_struct kobj_remove_work; 112 111 #endif 113 - #ifdef CONFIG_MEMCG 114 - struct memcg_cache_params memcg_params; 115 - /* For propagation, maximum size of a stored attr */ 116 - unsigned int max_attr_size; 117 - #ifdef CONFIG_SYSFS 118 - struct kset *memcg_kset; 119 - #endif 120 - #endif 121 - 122 112 #ifdef CONFIG_SLAB_FREELIST_HARDENED 123 113 unsigned long random; 124 114 #endif
+24 -1
mm/memcontrol.c
··· 2800 2800 } 2801 2801 2802 2802 #ifdef CONFIG_MEMCG_KMEM 2803 + int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, 2804 + gfp_t gfp) 2805 + { 2806 + unsigned int objects = objs_per_slab_page(s, page); 2807 + void *vec; 2808 + 2809 + vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, 2810 + page_to_nid(page)); 2811 + if (!vec) 2812 + return -ENOMEM; 2813 + 2814 + if (cmpxchg(&page->obj_cgroups, NULL, 2815 + (struct obj_cgroup **) ((unsigned long)vec | 0x1UL))) 2816 + kfree(vec); 2817 + else 2818 + kmemleak_not_leak(vec); 2819 + 2820 + return 0; 2821 + } 2822 + 2803 2823 /* 2804 2824 * Returns a pointer to the memory cgroup to which the kernel object is charged. 2805 2825 * ··· 2846 2826 2847 2827 off = obj_to_index(page->slab_cache, page, p); 2848 2828 objcg = page_obj_cgroups(page)[off]; 2849 - return obj_cgroup_memcg(objcg); 2829 + if (objcg) 2830 + return obj_cgroup_memcg(objcg); 2831 + 2832 + return NULL; 2850 2833 } 2851 2834 2852 2835 /* All other pages use page->mem_cgroup */
+3 -38
mm/slab.c
··· 1379 1379 return NULL; 1380 1380 } 1381 1381 1382 - if (charge_slab_page(page, flags, cachep->gfporder, cachep)) { 1383 - __free_pages(page, cachep->gfporder); 1384 - return NULL; 1385 - } 1386 - 1382 + charge_slab_page(page, flags, cachep->gfporder, cachep); 1387 1383 __SetPageSlab(page); 1388 1384 /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */ 1389 1385 if (sk_memalloc_socks() && page_is_pfmemalloc(page)) ··· 3795 3799 } 3796 3800 3797 3801 /* Always called with the slab_mutex held */ 3798 - static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, 3799 - int batchcount, int shared, gfp_t gfp) 3802 + static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3803 + int batchcount, int shared, gfp_t gfp) 3800 3804 { 3801 3805 struct array_cache __percpu *cpu_cache, *prev; 3802 3806 int cpu; ··· 3841 3845 return setup_kmem_cache_nodes(cachep, gfp); 3842 3846 } 3843 3847 3844 - static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3845 - int batchcount, int shared, gfp_t gfp) 3846 - { 3847 - int ret; 3848 - struct kmem_cache *c; 3849 - 3850 - ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); 3851 - 3852 - if (slab_state < FULL) 3853 - return ret; 3854 - 3855 - if ((ret < 0) || !is_root_cache(cachep)) 3856 - return ret; 3857 - 3858 - lockdep_assert_held(&slab_mutex); 3859 - c = memcg_cache(cachep); 3860 - if (c) { 3861 - /* return value determined by the root cache only */ 3862 - __do_tune_cpucache(c, limit, batchcount, shared, gfp); 3863 - } 3864 - 3865 - return ret; 3866 - } 3867 - 3868 3848 /* Called with slab_mutex held always */ 3869 3849 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) 3870 3850 { ··· 3852 3880 err = cache_random_seq_create(cachep, cachep->num, gfp); 3853 3881 if (err) 3854 3882 goto end; 3855 - 3856 - if (!is_root_cache(cachep)) { 3857 - struct kmem_cache *root = memcg_root_cache(cachep); 3858 - limit = root->limit; 3859 - shared = root->shared; 3860 - batchcount = root->batchcount; 3861 - } 3862 3883 3863 3884 if (limit && shared && batchcount) 3864 3885 goto skip_setup;
+42 -152
mm/slab.h
··· 30 30 struct list_head list; /* List of all slab caches on the system */ 31 31 }; 32 32 33 - #else /* !CONFIG_SLOB */ 34 - 35 - /* 36 - * This is the main placeholder for memcg-related information in kmem caches. 37 - * Both the root cache and the child cache will have it. Some fields are used 38 - * in both cases, other are specific to root caches. 39 - * 40 - * @root_cache: Common to root and child caches. NULL for root, pointer to 41 - * the root cache for children. 42 - * 43 - * The following fields are specific to root caches. 44 - * 45 - * @memcg_cache: pointer to memcg kmem cache, used by all non-root memory 46 - * cgroups. 47 - * @work: work struct used to create the non-root cache. 48 - */ 49 - struct memcg_cache_params { 50 - struct kmem_cache *root_cache; 51 - 52 - struct kmem_cache *memcg_cache; 53 - struct work_struct work; 54 - }; 55 33 #endif /* CONFIG_SLOB */ 56 34 57 35 #ifdef CONFIG_SLAB ··· 174 196 void __kmem_cache_release(struct kmem_cache *); 175 197 int __kmem_cache_shrink(struct kmem_cache *); 176 198 void slab_kmem_cache_release(struct kmem_cache *); 177 - void kmem_cache_shrink_all(struct kmem_cache *s); 178 199 179 200 struct seq_file; 180 201 struct file; ··· 240 263 } 241 264 242 265 #ifdef CONFIG_MEMCG_KMEM 243 - static inline bool is_root_cache(struct kmem_cache *s) 244 - { 245 - return !s->memcg_params.root_cache; 246 - } 247 - 248 - static inline bool slab_equal_or_root(struct kmem_cache *s, 249 - struct kmem_cache *p) 250 - { 251 - return p == s || p == s->memcg_params.root_cache; 252 - } 253 - 254 - /* 255 - * We use suffixes to the name in memcg because we can't have caches 256 - * created in the system with the same name. But when we print them 257 - * locally, better refer to them with the base name 258 - */ 259 - static inline const char *cache_name(struct kmem_cache *s) 260 - { 261 - if (!is_root_cache(s)) 262 - s = s->memcg_params.root_cache; 263 - return s->name; 264 - } 265 - 266 - static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) 267 - { 268 - if (is_root_cache(s)) 269 - return s; 270 - return s->memcg_params.root_cache; 271 - } 272 - 273 - static inline struct kmem_cache *memcg_cache(struct kmem_cache *s) 274 - { 275 - if (is_root_cache(s)) 276 - return s->memcg_params.memcg_cache; 277 - return NULL; 278 - } 279 - 280 266 static inline struct obj_cgroup **page_obj_cgroups(struct page *page) 281 267 { 282 268 /* ··· 257 317 return ((unsigned long)page->obj_cgroups & 0x1UL); 258 318 } 259 319 260 - static inline int memcg_alloc_page_obj_cgroups(struct page *page, 261 - struct kmem_cache *s, gfp_t gfp) 262 - { 263 - unsigned int objects = objs_per_slab_page(s, page); 264 - void *vec; 265 - 266 - vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, 267 - page_to_nid(page)); 268 - if (!vec) 269 - return -ENOMEM; 270 - 271 - kmemleak_not_leak(vec); 272 - page->obj_cgroups = (struct obj_cgroup **) ((unsigned long)vec | 0x1UL); 273 - return 0; 274 - } 320 + int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, 321 + gfp_t gfp); 275 322 276 323 static inline void memcg_free_page_obj_cgroups(struct page *page) 277 324 { ··· 275 348 return s->size + sizeof(struct obj_cgroup *); 276 349 } 277 350 278 - static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s, 279 - struct obj_cgroup **objcgp, 280 - size_t objects, gfp_t flags) 351 + static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, 352 + size_t objects, 353 + gfp_t flags) 281 354 { 282 - struct kmem_cache *cachep; 283 355 struct obj_cgroup *objcg; 284 356 285 357 if (memcg_kmem_bypass()) 286 - return s; 287 - 288 - cachep = READ_ONCE(s->memcg_params.memcg_cache); 289 - if (unlikely(!cachep)) { 290 - /* 291 - * If memcg cache does not exist yet, we schedule it's 292 - * asynchronous creation and let the current allocation 293 - * go through with the root cache. 294 - */ 295 - queue_work(system_wq, &s->memcg_params.work); 296 - return s; 297 - } 358 + return NULL; 298 359 299 360 objcg = get_obj_cgroup_from_current(); 300 361 if (!objcg) 301 - return s; 362 + return NULL; 302 363 303 364 if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) { 304 365 obj_cgroup_put(objcg); 305 - cachep = NULL; 366 + return NULL; 306 367 } 307 368 308 - *objcgp = objcg; 309 - return cachep; 369 + return objcg; 310 370 } 311 371 312 372 static inline void mod_objcg_state(struct obj_cgroup *objcg, ··· 312 398 313 399 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, 314 400 struct obj_cgroup *objcg, 315 - size_t size, void **p) 401 + gfp_t flags, size_t size, 402 + void **p) 316 403 { 317 404 struct page *page; 318 405 unsigned long off; 319 406 size_t i; 320 407 408 + if (!objcg) 409 + return; 410 + 411 + flags &= ~__GFP_ACCOUNT; 321 412 for (i = 0; i < size; i++) { 322 413 if (likely(p[i])) { 323 414 page = virt_to_head_page(p[i]); 415 + 416 + if (!page_has_obj_cgroups(page) && 417 + memcg_alloc_page_obj_cgroups(page, s, flags)) { 418 + obj_cgroup_uncharge(objcg, obj_full_size(s)); 419 + continue; 420 + } 421 + 324 422 off = obj_to_index(s, page, p[i]); 325 423 obj_cgroup_get(objcg); 326 424 page_obj_cgroups(page)[off] = objcg; ··· 351 425 struct obj_cgroup *objcg; 352 426 unsigned int off; 353 427 354 - if (!memcg_kmem_enabled() || is_root_cache(s)) 428 + if (!memcg_kmem_enabled()) 429 + return; 430 + 431 + if (!page_has_obj_cgroups(page)) 355 432 return; 356 433 357 434 off = obj_to_index(s, page, p); 358 435 objcg = page_obj_cgroups(page)[off]; 359 436 page_obj_cgroups(page)[off] = NULL; 437 + 438 + if (!objcg) 439 + return; 360 440 361 441 obj_cgroup_uncharge(objcg, obj_full_size(s)); 362 442 mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), ··· 371 439 obj_cgroup_put(objcg); 372 440 } 373 441 374 - extern void slab_init_memcg_params(struct kmem_cache *); 375 - 376 442 #else /* CONFIG_MEMCG_KMEM */ 377 - static inline bool is_root_cache(struct kmem_cache *s) 378 - { 379 - return true; 380 - } 381 - 382 - static inline bool slab_equal_or_root(struct kmem_cache *s, 383 - struct kmem_cache *p) 384 - { 385 - return s == p; 386 - } 387 - 388 - static inline const char *cache_name(struct kmem_cache *s) 389 - { 390 - return s->name; 391 - } 392 - 393 - static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) 394 - { 395 - return s; 396 - } 397 - 398 - static inline struct kmem_cache *memcg_cache(struct kmem_cache *s) 399 - { 400 - return NULL; 401 - } 402 - 403 443 static inline bool page_has_obj_cgroups(struct page *page) 404 444 { 405 445 return false; ··· 392 488 { 393 489 } 394 490 395 - static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s, 396 - struct obj_cgroup **objcgp, 397 - size_t objects, gfp_t flags) 491 + static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, 492 + size_t objects, 493 + gfp_t flags) 398 494 { 399 495 return NULL; 400 496 } 401 497 402 498 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, 403 499 struct obj_cgroup *objcg, 404 - size_t size, void **p) 500 + gfp_t flags, size_t size, 501 + void **p) 405 502 { 406 503 } 407 504 ··· 410 505 void *p) 411 506 { 412 507 } 413 - 414 - static inline void slab_init_memcg_params(struct kmem_cache *s) 415 - { 416 - } 417 - 418 508 #endif /* CONFIG_MEMCG_KMEM */ 419 509 420 510 static inline struct kmem_cache *virt_to_cache(const void *obj) ··· 423 523 return page->slab_cache; 424 524 } 425 525 426 - static __always_inline int charge_slab_page(struct page *page, 427 - gfp_t gfp, int order, 428 - struct kmem_cache *s) 526 + static __always_inline void charge_slab_page(struct page *page, 527 + gfp_t gfp, int order, 528 + struct kmem_cache *s) 429 529 { 430 - if (memcg_kmem_enabled() && !is_root_cache(s)) { 431 - int ret; 432 - 433 - ret = memcg_alloc_page_obj_cgroups(page, s, gfp); 434 - if (ret) 435 - return ret; 436 - } 437 - 438 530 mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), 439 531 PAGE_SIZE << order); 440 - return 0; 441 532 } 442 533 443 534 static __always_inline void uncharge_slab_page(struct page *page, int order, 444 535 struct kmem_cache *s) 445 536 { 446 - if (memcg_kmem_enabled() && !is_root_cache(s)) 537 + if (memcg_kmem_enabled()) 447 538 memcg_free_page_obj_cgroups(page); 448 539 449 540 mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), ··· 446 555 struct kmem_cache *cachep; 447 556 448 557 if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) && 449 - !memcg_kmem_enabled() && 450 558 !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) 451 559 return s; 452 560 453 561 cachep = virt_to_cache(x); 454 - if (WARN(cachep && !slab_equal_or_root(cachep, s), 562 + if (WARN(cachep && cachep != s, 455 563 "%s: Wrong slab cache. %s but object is from %s\n", 456 564 __func__, s->name, cachep->name)) 457 565 print_tracking(cachep, x); ··· 503 613 504 614 if (memcg_kmem_enabled() && 505 615 ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT))) 506 - return memcg_slab_pre_alloc_hook(s, objcgp, size, flags); 616 + *objcgp = memcg_slab_pre_alloc_hook(s, size, flags); 507 617 508 618 return s; 509 619 } ··· 522 632 s->flags, flags); 523 633 } 524 634 525 - if (memcg_kmem_enabled() && !is_root_cache(s)) 526 - memcg_slab_post_alloc_hook(s, objcg, size, p); 635 + if (memcg_kmem_enabled()) 636 + memcg_slab_post_alloc_hook(s, objcg, flags, size, p); 527 637 } 528 638 529 639 #ifndef CONFIG_SLOB
+5 -225
mm/slab_common.c
··· 130 130 return i; 131 131 } 132 132 133 - #ifdef CONFIG_MEMCG_KMEM 134 - static void memcg_kmem_cache_create_func(struct work_struct *work) 135 - { 136 - struct kmem_cache *cachep = container_of(work, struct kmem_cache, 137 - memcg_params.work); 138 - memcg_create_kmem_cache(cachep); 139 - } 140 - 141 - void slab_init_memcg_params(struct kmem_cache *s) 142 - { 143 - s->memcg_params.root_cache = NULL; 144 - s->memcg_params.memcg_cache = NULL; 145 - INIT_WORK(&s->memcg_params.work, memcg_kmem_cache_create_func); 146 - } 147 - 148 - static void init_memcg_params(struct kmem_cache *s, 149 - struct kmem_cache *root_cache) 150 - { 151 - if (root_cache) 152 - s->memcg_params.root_cache = root_cache; 153 - else 154 - slab_init_memcg_params(s); 155 - } 156 - #else 157 - static inline void init_memcg_params(struct kmem_cache *s, 158 - struct kmem_cache *root_cache) 159 - { 160 - } 161 - #endif /* CONFIG_MEMCG_KMEM */ 162 - 163 133 /* 164 134 * Figure out what the alignment of the objects will be given a set of 165 135 * flags, a user specified alignment and the size of the objects. ··· 165 195 int slab_unmergeable(struct kmem_cache *s) 166 196 { 167 197 if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE)) 168 - return 1; 169 - 170 - if (!is_root_cache(s)) 171 198 return 1; 172 199 173 200 if (s->ctor) ··· 253 286 s->useroffset = useroffset; 254 287 s->usersize = usersize; 255 288 256 - init_memcg_params(s, root_cache); 257 289 err = __kmem_cache_create(s, flags); 258 290 if (err) 259 291 goto out_free_cache; ··· 310 344 311 345 get_online_cpus(); 312 346 get_online_mems(); 313 - memcg_get_cache_ids(); 314 347 315 348 mutex_lock(&slab_mutex); 316 349 ··· 359 394 out_unlock: 360 395 mutex_unlock(&slab_mutex); 361 396 362 - memcg_put_cache_ids(); 363 397 put_online_mems(); 364 398 put_online_cpus(); 365 399 ··· 471 507 return 0; 472 508 } 473 509 474 - #ifdef CONFIG_MEMCG_KMEM 475 - /* 476 - * memcg_create_kmem_cache - Create a cache for non-root memory cgroups. 477 - * @root_cache: The parent of the new cache. 478 - * 479 - * This function attempts to create a kmem cache that will serve allocation 480 - * requests going all non-root memory cgroups to @root_cache. The new cache 481 - * inherits properties from its parent. 482 - */ 483 - void memcg_create_kmem_cache(struct kmem_cache *root_cache) 484 - { 485 - struct kmem_cache *s = NULL; 486 - char *cache_name; 487 - 488 - get_online_cpus(); 489 - get_online_mems(); 490 - 491 - mutex_lock(&slab_mutex); 492 - 493 - if (root_cache->memcg_params.memcg_cache) 494 - goto out_unlock; 495 - 496 - cache_name = kasprintf(GFP_KERNEL, "%s-memcg", root_cache->name); 497 - if (!cache_name) 498 - goto out_unlock; 499 - 500 - s = create_cache(cache_name, root_cache->object_size, 501 - root_cache->align, 502 - root_cache->flags & CACHE_CREATE_MASK, 503 - root_cache->useroffset, root_cache->usersize, 504 - root_cache->ctor, root_cache); 505 - /* 506 - * If we could not create a memcg cache, do not complain, because 507 - * that's not critical at all as we can always proceed with the root 508 - * cache. 509 - */ 510 - if (IS_ERR(s)) { 511 - kfree(cache_name); 512 - goto out_unlock; 513 - } 514 - 515 - /* 516 - * Since readers won't lock (see memcg_slab_pre_alloc_hook()), we need a 517 - * barrier here to ensure nobody will see the kmem_cache partially 518 - * initialized. 519 - */ 520 - smp_wmb(); 521 - root_cache->memcg_params.memcg_cache = s; 522 - 523 - out_unlock: 524 - mutex_unlock(&slab_mutex); 525 - 526 - put_online_mems(); 527 - put_online_cpus(); 528 - } 529 - 530 - static int shutdown_memcg_caches(struct kmem_cache *s) 531 - { 532 - BUG_ON(!is_root_cache(s)); 533 - 534 - if (s->memcg_params.memcg_cache) 535 - WARN_ON(shutdown_cache(s->memcg_params.memcg_cache)); 536 - 537 - return 0; 538 - } 539 - 540 - static void cancel_memcg_cache_creation(struct kmem_cache *s) 541 - { 542 - cancel_work_sync(&s->memcg_params.work); 543 - } 544 - #else 545 - static inline int shutdown_memcg_caches(struct kmem_cache *s) 546 - { 547 - return 0; 548 - } 549 - 550 - static inline void cancel_memcg_cache_creation(struct kmem_cache *s) 551 - { 552 - } 553 - #endif /* CONFIG_MEMCG_KMEM */ 554 - 555 510 void slab_kmem_cache_release(struct kmem_cache *s) 556 511 { 557 512 __kmem_cache_release(s); ··· 485 602 if (unlikely(!s)) 486 603 return; 487 604 488 - cancel_memcg_cache_creation(s); 489 - 490 605 get_online_cpus(); 491 606 get_online_mems(); 492 607 ··· 494 613 if (s->refcount) 495 614 goto out_unlock; 496 615 497 - err = shutdown_memcg_caches(s); 498 - if (!err) 499 - err = shutdown_cache(s); 500 - 616 + err = shutdown_cache(s); 501 617 if (err) { 502 618 pr_err("kmem_cache_destroy %s: Slab cache still has objects\n", 503 619 s->name); ··· 531 653 } 532 654 EXPORT_SYMBOL(kmem_cache_shrink); 533 655 534 - /** 535 - * kmem_cache_shrink_all - shrink root and memcg caches 536 - * @s: The cache pointer 537 - */ 538 - void kmem_cache_shrink_all(struct kmem_cache *s) 539 - { 540 - struct kmem_cache *c; 541 - 542 - if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || !is_root_cache(s)) { 543 - kmem_cache_shrink(s); 544 - return; 545 - } 546 - 547 - get_online_cpus(); 548 - get_online_mems(); 549 - kasan_cache_shrink(s); 550 - __kmem_cache_shrink(s); 551 - 552 - c = memcg_cache(s); 553 - if (c) { 554 - kasan_cache_shrink(c); 555 - __kmem_cache_shrink(c); 556 - } 557 - put_online_mems(); 558 - put_online_cpus(); 559 - } 560 - 561 656 bool slab_is_available(void) 562 657 { 563 658 return slab_state >= UP; ··· 558 707 559 708 s->useroffset = useroffset; 560 709 s->usersize = usersize; 561 - 562 - slab_init_memcg_params(s); 563 710 564 711 err = __kmem_cache_create(s, flags); 565 712 ··· 947 1098 mutex_unlock(&slab_mutex); 948 1099 } 949 1100 950 - static void 951 - memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 952 - { 953 - struct kmem_cache *c; 954 - struct slabinfo sinfo; 955 - 956 - c = memcg_cache(s); 957 - if (c) { 958 - memset(&sinfo, 0, sizeof(sinfo)); 959 - get_slabinfo(c, &sinfo); 960 - 961 - info->active_slabs += sinfo.active_slabs; 962 - info->num_slabs += sinfo.num_slabs; 963 - info->shared_avail += sinfo.shared_avail; 964 - info->active_objs += sinfo.active_objs; 965 - info->num_objs += sinfo.num_objs; 966 - } 967 - } 968 - 969 1101 static void cache_show(struct kmem_cache *s, struct seq_file *m) 970 1102 { 971 1103 struct slabinfo sinfo; ··· 954 1124 memset(&sinfo, 0, sizeof(sinfo)); 955 1125 get_slabinfo(s, &sinfo); 956 1126 957 - memcg_accumulate_slabinfo(s, &sinfo); 958 - 959 1127 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 960 - cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 1128 + s->name, sinfo.active_objs, sinfo.num_objs, s->size, 961 1129 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 962 1130 963 1131 seq_printf(m, " : tunables %4u %4u %4u", ··· 972 1144 973 1145 if (p == slab_caches.next) 974 1146 print_slabinfo_header(m); 975 - if (is_root_cache(s)) 976 - cache_show(s, m); 1147 + cache_show(s, m); 977 1148 return 0; 978 1149 } 979 1150 ··· 997 1170 pr_info("Name Used Total\n"); 998 1171 999 1172 list_for_each_entry_safe(s, s2, &slab_caches, list) { 1000 - if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT)) 1173 + if (s->flags & SLAB_RECLAIM_ACCOUNT) 1001 1174 continue; 1002 1175 1003 1176 get_slabinfo(s, &sinfo); 1004 1177 1005 1178 if (sinfo.num_objs > 0) 1006 - pr_info("%-17s %10luKB %10luKB\n", cache_name(s), 1179 + pr_info("%-17s %10luKB %10luKB\n", s->name, 1007 1180 (sinfo.active_objs * s->size) / 1024, 1008 1181 (sinfo.num_objs * s->size) / 1024); 1009 1182 } ··· 1062 1235 } 1063 1236 module_init(slab_proc_init); 1064 1237 1065 - #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_MEMCG_KMEM) 1066 - /* 1067 - * Display information about kmem caches that have memcg cache. 1068 - */ 1069 - static int memcg_slabinfo_show(struct seq_file *m, void *unused) 1070 - { 1071 - struct kmem_cache *s, *c; 1072 - struct slabinfo sinfo; 1073 - 1074 - mutex_lock(&slab_mutex); 1075 - seq_puts(m, "# <name> <css_id[:dead|deact]> <active_objs> <num_objs>"); 1076 - seq_puts(m, " <active_slabs> <num_slabs>\n"); 1077 - list_for_each_entry(s, &slab_caches, list) { 1078 - /* 1079 - * Skip kmem caches that don't have the memcg cache. 1080 - */ 1081 - if (!s->memcg_params.memcg_cache) 1082 - continue; 1083 - 1084 - memset(&sinfo, 0, sizeof(sinfo)); 1085 - get_slabinfo(s, &sinfo); 1086 - seq_printf(m, "%-17s root %6lu %6lu %6lu %6lu\n", 1087 - cache_name(s), sinfo.active_objs, sinfo.num_objs, 1088 - sinfo.active_slabs, sinfo.num_slabs); 1089 - 1090 - c = s->memcg_params.memcg_cache; 1091 - memset(&sinfo, 0, sizeof(sinfo)); 1092 - get_slabinfo(c, &sinfo); 1093 - seq_printf(m, "%-17s %4d %6lu %6lu %6lu %6lu\n", 1094 - cache_name(c), root_mem_cgroup->css.id, 1095 - sinfo.active_objs, sinfo.num_objs, 1096 - sinfo.active_slabs, sinfo.num_slabs); 1097 - } 1098 - mutex_unlock(&slab_mutex); 1099 - return 0; 1100 - } 1101 - DEFINE_SHOW_ATTRIBUTE(memcg_slabinfo); 1102 - 1103 - static int __init memcg_slabinfo_init(void) 1104 - { 1105 - debugfs_create_file("memcg_slabinfo", S_IFREG | S_IRUGO, 1106 - NULL, NULL, &memcg_slabinfo_fops); 1107 - return 0; 1108 - } 1109 - 1110 - late_initcall(memcg_slabinfo_init); 1111 - #endif /* CONFIG_DEBUG_FS && CONFIG_MEMCG_KMEM */ 1112 1238 #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */ 1113 1239 1114 1240 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
+4 -159
mm/slub.c
··· 218 218 #ifdef CONFIG_SYSFS 219 219 static int sysfs_slab_add(struct kmem_cache *); 220 220 static int sysfs_slab_alias(struct kmem_cache *, const char *); 221 - static void memcg_propagate_slab_attrs(struct kmem_cache *s); 222 - static void sysfs_slab_remove(struct kmem_cache *s); 223 221 #else 224 222 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 225 223 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 226 224 { return 0; } 227 - static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } 228 - static inline void sysfs_slab_remove(struct kmem_cache *s) { } 229 225 #endif 230 226 231 227 static inline void stat(const struct kmem_cache *s, enum stat_item si) ··· 1620 1624 else 1621 1625 page = __alloc_pages_node(node, flags, order); 1622 1626 1623 - if (page && charge_slab_page(page, flags, order, s)) { 1624 - __free_pages(page, order); 1625 - page = NULL; 1626 - } 1627 + if (page) 1628 + charge_slab_page(page, flags, order, s); 1627 1629 1628 1630 return page; 1629 1631 } ··· 3914 3920 if (n->nr_partial || slabs_node(s, node)) 3915 3921 return 1; 3916 3922 } 3917 - sysfs_slab_remove(s); 3918 3923 return 0; 3919 3924 } 3920 3925 ··· 4351 4358 p->slab_cache = s; 4352 4359 #endif 4353 4360 } 4354 - slab_init_memcg_params(s); 4355 4361 list_add(&s->list, &slab_caches); 4356 4362 return s; 4357 4363 } ··· 4406 4414 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, 4407 4415 slab_flags_t flags, void (*ctor)(void *)) 4408 4416 { 4409 - struct kmem_cache *s, *c; 4417 + struct kmem_cache *s; 4410 4418 4411 4419 s = find_mergeable(size, align, flags, name, ctor); 4412 4420 if (s) { ··· 4418 4426 */ 4419 4427 s->object_size = max(s->object_size, size); 4420 4428 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *))); 4421 - 4422 - c = memcg_cache(s); 4423 - if (c) { 4424 - c->object_size = s->object_size; 4425 - c->inuse = max(c->inuse, ALIGN(size, sizeof(void *))); 4426 - } 4427 4429 4428 4430 if (sysfs_slab_alias(s, name)) { 4429 4431 s->refcount--; ··· 4440 4454 if (slab_state <= UP) 4441 4455 return 0; 4442 4456 4443 - memcg_propagate_slab_attrs(s); 4444 4457 err = sysfs_slab_add(s); 4445 4458 if (err) 4446 4459 __kmem_cache_release(s); ··· 5297 5312 const char *buf, size_t length) 5298 5313 { 5299 5314 if (buf[0] == '1') 5300 - kmem_cache_shrink_all(s); 5315 + kmem_cache_shrink(s); 5301 5316 else 5302 5317 return -EINVAL; 5303 5318 return length; ··· 5521 5536 return -EIO; 5522 5537 5523 5538 err = attribute->store(s, buf, len); 5524 - #ifdef CONFIG_MEMCG 5525 - if (slab_state >= FULL && err >= 0 && is_root_cache(s)) { 5526 - struct kmem_cache *c; 5527 - 5528 - mutex_lock(&slab_mutex); 5529 - if (s->max_attr_size < len) 5530 - s->max_attr_size = len; 5531 - 5532 - /* 5533 - * This is a best effort propagation, so this function's return 5534 - * value will be determined by the parent cache only. This is 5535 - * basically because not all attributes will have a well 5536 - * defined semantics for rollbacks - most of the actions will 5537 - * have permanent effects. 5538 - * 5539 - * Returning the error value of any of the children that fail 5540 - * is not 100 % defined, in the sense that users seeing the 5541 - * error code won't be able to know anything about the state of 5542 - * the cache. 5543 - * 5544 - * Only returning the error code for the parent cache at least 5545 - * has well defined semantics. The cache being written to 5546 - * directly either failed or succeeded, in which case we loop 5547 - * through the descendants with best-effort propagation. 5548 - */ 5549 - c = memcg_cache(s); 5550 - if (c) 5551 - attribute->store(c, buf, len); 5552 - mutex_unlock(&slab_mutex); 5553 - } 5554 - #endif 5555 5539 return err; 5556 - } 5557 - 5558 - static void memcg_propagate_slab_attrs(struct kmem_cache *s) 5559 - { 5560 - #ifdef CONFIG_MEMCG 5561 - int i; 5562 - char *buffer = NULL; 5563 - struct kmem_cache *root_cache; 5564 - 5565 - if (is_root_cache(s)) 5566 - return; 5567 - 5568 - root_cache = s->memcg_params.root_cache; 5569 - 5570 - /* 5571 - * This mean this cache had no attribute written. Therefore, no point 5572 - * in copying default values around 5573 - */ 5574 - if (!root_cache->max_attr_size) 5575 - return; 5576 - 5577 - for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { 5578 - char mbuf[64]; 5579 - char *buf; 5580 - struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); 5581 - ssize_t len; 5582 - 5583 - if (!attr || !attr->store || !attr->show) 5584 - continue; 5585 - 5586 - /* 5587 - * It is really bad that we have to allocate here, so we will 5588 - * do it only as a fallback. If we actually allocate, though, 5589 - * we can just use the allocated buffer until the end. 5590 - * 5591 - * Most of the slub attributes will tend to be very small in 5592 - * size, but sysfs allows buffers up to a page, so they can 5593 - * theoretically happen. 5594 - */ 5595 - if (buffer) 5596 - buf = buffer; 5597 - else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf) && 5598 - !IS_ENABLED(CONFIG_SLUB_STATS)) 5599 - buf = mbuf; 5600 - else { 5601 - buffer = (char *) get_zeroed_page(GFP_KERNEL); 5602 - if (WARN_ON(!buffer)) 5603 - continue; 5604 - buf = buffer; 5605 - } 5606 - 5607 - len = attr->show(root_cache, buf); 5608 - if (len > 0) 5609 - attr->store(s, buf, len); 5610 - } 5611 - 5612 - if (buffer) 5613 - free_page((unsigned long)buffer); 5614 - #endif /* CONFIG_MEMCG */ 5615 5540 } 5616 5541 5617 5542 static void kmem_cache_release(struct kobject *k) ··· 5543 5648 5544 5649 static inline struct kset *cache_kset(struct kmem_cache *s) 5545 5650 { 5546 - #ifdef CONFIG_MEMCG 5547 - if (!is_root_cache(s)) 5548 - return s->memcg_params.root_cache->memcg_kset; 5549 - #endif 5550 5651 return slab_kset; 5551 5652 } 5552 5653 ··· 5585 5694 return name; 5586 5695 } 5587 5696 5588 - static void sysfs_slab_remove_workfn(struct work_struct *work) 5589 - { 5590 - struct kmem_cache *s = 5591 - container_of(work, struct kmem_cache, kobj_remove_work); 5592 - 5593 - if (!s->kobj.state_in_sysfs) 5594 - /* 5595 - * For a memcg cache, this may be called during 5596 - * deactivation and again on shutdown. Remove only once. 5597 - * A cache is never shut down before deactivation is 5598 - * complete, so no need to worry about synchronization. 5599 - */ 5600 - goto out; 5601 - 5602 - #ifdef CONFIG_MEMCG 5603 - kset_unregister(s->memcg_kset); 5604 - #endif 5605 - out: 5606 - kobject_put(&s->kobj); 5607 - } 5608 - 5609 5697 static int sysfs_slab_add(struct kmem_cache *s) 5610 5698 { 5611 5699 int err; 5612 5700 const char *name; 5613 5701 struct kset *kset = cache_kset(s); 5614 5702 int unmergeable = slab_unmergeable(s); 5615 - 5616 - INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn); 5617 5703 5618 5704 if (!kset) { 5619 5705 kobject_init(&s->kobj, &slab_ktype); ··· 5628 5760 if (err) 5629 5761 goto out_del_kobj; 5630 5762 5631 - #ifdef CONFIG_MEMCG 5632 - if (is_root_cache(s) && memcg_sysfs_enabled) { 5633 - s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj); 5634 - if (!s->memcg_kset) { 5635 - err = -ENOMEM; 5636 - goto out_del_kobj; 5637 - } 5638 - } 5639 - #endif 5640 - 5641 5763 if (!unmergeable) { 5642 5764 /* Setup first alias */ 5643 5765 sysfs_slab_alias(s, s->name); ··· 5639 5781 out_del_kobj: 5640 5782 kobject_del(&s->kobj); 5641 5783 goto out; 5642 - } 5643 - 5644 - static void sysfs_slab_remove(struct kmem_cache *s) 5645 - { 5646 - if (slab_state < FULL) 5647 - /* 5648 - * Sysfs has not been setup yet so no need to remove the 5649 - * cache from sysfs. 5650 - */ 5651 - return; 5652 - 5653 - kobject_get(&s->kobj); 5654 - schedule_work(&s->kobj_remove_work); 5655 5784 } 5656 5785 5657 5786 void sysfs_slab_unlink(struct kmem_cache *s)