at v5.17 44 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* memcontrol.h - Memory Controller 3 * 4 * Copyright IBM Corporation, 2007 5 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 6 * 7 * Copyright 2007 OpenVZ SWsoft Inc 8 * Author: Pavel Emelianov <xemul@openvz.org> 9 */ 10 11#ifndef _LINUX_MEMCONTROL_H 12#define _LINUX_MEMCONTROL_H 13#include <linux/cgroup.h> 14#include <linux/vm_event_item.h> 15#include <linux/hardirq.h> 16#include <linux/jump_label.h> 17#include <linux/page_counter.h> 18#include <linux/vmpressure.h> 19#include <linux/eventfd.h> 20#include <linux/mm.h> 21#include <linux/vmstat.h> 22#include <linux/writeback.h> 23#include <linux/page-flags.h> 24 25struct mem_cgroup; 26struct obj_cgroup; 27struct page; 28struct mm_struct; 29struct kmem_cache; 30 31/* Cgroup-specific page state, on top of universal node page state */ 32enum memcg_stat_item { 33 MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, 34 MEMCG_SOCK, 35 MEMCG_PERCPU_B, 36 MEMCG_VMALLOC, 37 MEMCG_NR_STAT, 38}; 39 40enum memcg_memory_event { 41 MEMCG_LOW, 42 MEMCG_HIGH, 43 MEMCG_MAX, 44 MEMCG_OOM, 45 MEMCG_OOM_KILL, 46 MEMCG_OOM_GROUP_KILL, 47 MEMCG_SWAP_HIGH, 48 MEMCG_SWAP_MAX, 49 MEMCG_SWAP_FAIL, 50 MEMCG_NR_MEMORY_EVENTS, 51}; 52 53struct mem_cgroup_reclaim_cookie { 54 pg_data_t *pgdat; 55 unsigned int generation; 56}; 57 58#ifdef CONFIG_MEMCG 59 60#define MEM_CGROUP_ID_SHIFT 16 61#define MEM_CGROUP_ID_MAX USHRT_MAX 62 63struct mem_cgroup_id { 64 int id; 65 refcount_t ref; 66}; 67 68/* 69 * Per memcg event counter is incremented at every pagein/pageout. With THP, 70 * it will be incremented by the number of pages. This counter is used 71 * to trigger some periodic events. This is straightforward and better 72 * than using jiffies etc. to handle periodic memcg event. 73 */ 74enum mem_cgroup_events_target { 75 MEM_CGROUP_TARGET_THRESH, 76 MEM_CGROUP_TARGET_SOFTLIMIT, 77 MEM_CGROUP_NTARGETS, 78}; 79 80struct memcg_vmstats_percpu { 81 /* Local (CPU and cgroup) page state & events */ 82 long state[MEMCG_NR_STAT]; 83 unsigned long events[NR_VM_EVENT_ITEMS]; 84 85 /* Delta calculation for lockless upward propagation */ 86 long state_prev[MEMCG_NR_STAT]; 87 unsigned long events_prev[NR_VM_EVENT_ITEMS]; 88 89 /* Cgroup1: threshold notifications & softlimit tree updates */ 90 unsigned long nr_page_events; 91 unsigned long targets[MEM_CGROUP_NTARGETS]; 92}; 93 94struct memcg_vmstats { 95 /* Aggregated (CPU and subtree) page state & events */ 96 long state[MEMCG_NR_STAT]; 97 unsigned long events[NR_VM_EVENT_ITEMS]; 98 99 /* Pending child counts during tree propagation */ 100 long state_pending[MEMCG_NR_STAT]; 101 unsigned long events_pending[NR_VM_EVENT_ITEMS]; 102}; 103 104struct mem_cgroup_reclaim_iter { 105 struct mem_cgroup *position; 106 /* scan generation, increased every round-trip */ 107 unsigned int generation; 108}; 109 110/* 111 * Bitmap and deferred work of shrinker::id corresponding to memcg-aware 112 * shrinkers, which have elements charged to this memcg. 113 */ 114struct shrinker_info { 115 struct rcu_head rcu; 116 atomic_long_t *nr_deferred; 117 unsigned long *map; 118}; 119 120struct lruvec_stats_percpu { 121 /* Local (CPU and cgroup) state */ 122 long state[NR_VM_NODE_STAT_ITEMS]; 123 124 /* Delta calculation for lockless upward propagation */ 125 long state_prev[NR_VM_NODE_STAT_ITEMS]; 126}; 127 128struct lruvec_stats { 129 /* Aggregated (CPU and subtree) state */ 130 long state[NR_VM_NODE_STAT_ITEMS]; 131 132 /* Pending child counts during tree propagation */ 133 long state_pending[NR_VM_NODE_STAT_ITEMS]; 134}; 135 136/* 137 * per-node information in memory controller. 138 */ 139struct mem_cgroup_per_node { 140 struct lruvec lruvec; 141 142 struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; 143 struct lruvec_stats lruvec_stats; 144 145 unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; 146 147 struct mem_cgroup_reclaim_iter iter; 148 149 struct shrinker_info __rcu *shrinker_info; 150 151 struct rb_node tree_node; /* RB tree node */ 152 unsigned long usage_in_excess;/* Set to the value by which */ 153 /* the soft limit is exceeded*/ 154 bool on_tree; 155 struct mem_cgroup *memcg; /* Back pointer, we cannot */ 156 /* use container_of */ 157}; 158 159struct mem_cgroup_threshold { 160 struct eventfd_ctx *eventfd; 161 unsigned long threshold; 162}; 163 164/* For threshold */ 165struct mem_cgroup_threshold_ary { 166 /* An array index points to threshold just below or equal to usage. */ 167 int current_threshold; 168 /* Size of entries[] */ 169 unsigned int size; 170 /* Array of thresholds */ 171 struct mem_cgroup_threshold entries[]; 172}; 173 174struct mem_cgroup_thresholds { 175 /* Primary thresholds array */ 176 struct mem_cgroup_threshold_ary *primary; 177 /* 178 * Spare threshold array. 179 * This is needed to make mem_cgroup_unregister_event() "never fail". 180 * It must be able to store at least primary->size - 1 entries. 181 */ 182 struct mem_cgroup_threshold_ary *spare; 183}; 184 185#if defined(CONFIG_SMP) 186struct memcg_padding { 187 char x[0]; 188} ____cacheline_internodealigned_in_smp; 189#define MEMCG_PADDING(name) struct memcg_padding name 190#else 191#define MEMCG_PADDING(name) 192#endif 193 194/* 195 * Remember four most recent foreign writebacks with dirty pages in this 196 * cgroup. Inode sharing is expected to be uncommon and, even if we miss 197 * one in a given round, we're likely to catch it later if it keeps 198 * foreign-dirtying, so a fairly low count should be enough. 199 * 200 * See mem_cgroup_track_foreign_dirty_slowpath() for details. 201 */ 202#define MEMCG_CGWB_FRN_CNT 4 203 204struct memcg_cgwb_frn { 205 u64 bdi_id; /* bdi->id of the foreign inode */ 206 int memcg_id; /* memcg->css.id of foreign inode */ 207 u64 at; /* jiffies_64 at the time of dirtying */ 208 struct wb_completion done; /* tracks in-flight foreign writebacks */ 209}; 210 211/* 212 * Bucket for arbitrarily byte-sized objects charged to a memory 213 * cgroup. The bucket can be reparented in one piece when the cgroup 214 * is destroyed, without having to round up the individual references 215 * of all live memory objects in the wild. 216 */ 217struct obj_cgroup { 218 struct percpu_ref refcnt; 219 struct mem_cgroup *memcg; 220 atomic_t nr_charged_bytes; 221 union { 222 struct list_head list; /* protected by objcg_lock */ 223 struct rcu_head rcu; 224 }; 225}; 226 227/* 228 * The memory controller data structure. The memory controller controls both 229 * page cache and RSS per cgroup. We would eventually like to provide 230 * statistics based on the statistics developed by Rik Van Riel for clock-pro, 231 * to help the administrator determine what knobs to tune. 232 */ 233struct mem_cgroup { 234 struct cgroup_subsys_state css; 235 236 /* Private memcg ID. Used to ID objects that outlive the cgroup */ 237 struct mem_cgroup_id id; 238 239 /* Accounted resources */ 240 struct page_counter memory; /* Both v1 & v2 */ 241 242 union { 243 struct page_counter swap; /* v2 only */ 244 struct page_counter memsw; /* v1 only */ 245 }; 246 247 /* Legacy consumer-oriented counters */ 248 struct page_counter kmem; /* v1 only */ 249 struct page_counter tcpmem; /* v1 only */ 250 251 /* Range enforcement for interrupt charges */ 252 struct work_struct high_work; 253 254 unsigned long soft_limit; 255 256 /* vmpressure notifications */ 257 struct vmpressure vmpressure; 258 259 /* 260 * Should the OOM killer kill all belonging tasks, had it kill one? 261 */ 262 bool oom_group; 263 264 /* protected by memcg_oom_lock */ 265 bool oom_lock; 266 int under_oom; 267 268 int swappiness; 269 /* OOM-Killer disable */ 270 int oom_kill_disable; 271 272 /* memory.events and memory.events.local */ 273 struct cgroup_file events_file; 274 struct cgroup_file events_local_file; 275 276 /* handle for "memory.swap.events" */ 277 struct cgroup_file swap_events_file; 278 279 /* protect arrays of thresholds */ 280 struct mutex thresholds_lock; 281 282 /* thresholds for memory usage. RCU-protected */ 283 struct mem_cgroup_thresholds thresholds; 284 285 /* thresholds for mem+swap usage. RCU-protected */ 286 struct mem_cgroup_thresholds memsw_thresholds; 287 288 /* For oom notifier event fd */ 289 struct list_head oom_notify; 290 291 /* 292 * Should we move charges of a task when a task is moved into this 293 * mem_cgroup ? And what type of charges should we move ? 294 */ 295 unsigned long move_charge_at_immigrate; 296 /* taken only while moving_account > 0 */ 297 spinlock_t move_lock; 298 unsigned long move_lock_flags; 299 300 MEMCG_PADDING(_pad1_); 301 302 /* memory.stat */ 303 struct memcg_vmstats vmstats; 304 305 /* memory.events */ 306 atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; 307 atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; 308 309 unsigned long socket_pressure; 310 311 /* Legacy tcp memory accounting */ 312 bool tcpmem_active; 313 int tcpmem_pressure; 314 315#ifdef CONFIG_MEMCG_KMEM 316 int kmemcg_id; 317 struct obj_cgroup __rcu *objcg; 318 /* list of inherited objcgs, protected by objcg_lock */ 319 struct list_head objcg_list; 320#endif 321 322 MEMCG_PADDING(_pad2_); 323 324 /* 325 * set > 0 if pages under this cgroup are moving to other cgroup. 326 */ 327 atomic_t moving_account; 328 struct task_struct *move_lock_task; 329 330 struct memcg_vmstats_percpu __percpu *vmstats_percpu; 331 332#ifdef CONFIG_CGROUP_WRITEBACK 333 struct list_head cgwb_list; 334 struct wb_domain cgwb_domain; 335 struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; 336#endif 337 338 /* List of events which userspace want to receive */ 339 struct list_head event_list; 340 spinlock_t event_list_lock; 341 342#ifdef CONFIG_TRANSPARENT_HUGEPAGE 343 struct deferred_split deferred_split_queue; 344#endif 345 346 struct mem_cgroup_per_node *nodeinfo[]; 347}; 348 349/* 350 * size of first charge trial. "32" comes from vmscan.c's magic value. 351 * TODO: maybe necessary to use big numbers in big irons. 352 */ 353#define MEMCG_CHARGE_BATCH 32U 354 355extern struct mem_cgroup *root_mem_cgroup; 356 357enum page_memcg_data_flags { 358 /* page->memcg_data is a pointer to an objcgs vector */ 359 MEMCG_DATA_OBJCGS = (1UL << 0), 360 /* page has been accounted as a non-slab kernel page */ 361 MEMCG_DATA_KMEM = (1UL << 1), 362 /* the next bit after the last actual flag */ 363 __NR_MEMCG_DATA_FLAGS = (1UL << 2), 364}; 365 366#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) 367 368static inline bool folio_memcg_kmem(struct folio *folio); 369 370/* 371 * After the initialization objcg->memcg is always pointing at 372 * a valid memcg, but can be atomically swapped to the parent memcg. 373 * 374 * The caller must ensure that the returned memcg won't be released: 375 * e.g. acquire the rcu_read_lock or css_set_lock. 376 */ 377static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) 378{ 379 return READ_ONCE(objcg->memcg); 380} 381 382/* 383 * __folio_memcg - Get the memory cgroup associated with a non-kmem folio 384 * @folio: Pointer to the folio. 385 * 386 * Returns a pointer to the memory cgroup associated with the folio, 387 * or NULL. This function assumes that the folio is known to have a 388 * proper memory cgroup pointer. It's not safe to call this function 389 * against some type of folios, e.g. slab folios or ex-slab folios or 390 * kmem folios. 391 */ 392static inline struct mem_cgroup *__folio_memcg(struct folio *folio) 393{ 394 unsigned long memcg_data = folio->memcg_data; 395 396 VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); 397 VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); 398 VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); 399 400 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 401} 402 403/* 404 * __folio_objcg - get the object cgroup associated with a kmem folio. 405 * @folio: Pointer to the folio. 406 * 407 * Returns a pointer to the object cgroup associated with the folio, 408 * or NULL. This function assumes that the folio is known to have a 409 * proper object cgroup pointer. It's not safe to call this function 410 * against some type of folios, e.g. slab folios or ex-slab folios or 411 * LRU folios. 412 */ 413static inline struct obj_cgroup *__folio_objcg(struct folio *folio) 414{ 415 unsigned long memcg_data = folio->memcg_data; 416 417 VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); 418 VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); 419 VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); 420 421 return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 422} 423 424/* 425 * folio_memcg - Get the memory cgroup associated with a folio. 426 * @folio: Pointer to the folio. 427 * 428 * Returns a pointer to the memory cgroup associated with the folio, 429 * or NULL. This function assumes that the folio is known to have a 430 * proper memory cgroup pointer. It's not safe to call this function 431 * against some type of folios, e.g. slab folios or ex-slab folios. 432 * 433 * For a non-kmem folio any of the following ensures folio and memcg binding 434 * stability: 435 * 436 * - the folio lock 437 * - LRU isolation 438 * - lock_page_memcg() 439 * - exclusive reference 440 * 441 * For a kmem folio a caller should hold an rcu read lock to protect memcg 442 * associated with a kmem folio from being released. 443 */ 444static inline struct mem_cgroup *folio_memcg(struct folio *folio) 445{ 446 if (folio_memcg_kmem(folio)) 447 return obj_cgroup_memcg(__folio_objcg(folio)); 448 return __folio_memcg(folio); 449} 450 451static inline struct mem_cgroup *page_memcg(struct page *page) 452{ 453 return folio_memcg(page_folio(page)); 454} 455 456/** 457 * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. 458 * @folio: Pointer to the folio. 459 * 460 * This function assumes that the folio is known to have a 461 * proper memory cgroup pointer. It's not safe to call this function 462 * against some type of folios, e.g. slab folios or ex-slab folios. 463 * 464 * Return: A pointer to the memory cgroup associated with the folio, 465 * or NULL. 466 */ 467static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) 468{ 469 unsigned long memcg_data = READ_ONCE(folio->memcg_data); 470 471 VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); 472 WARN_ON_ONCE(!rcu_read_lock_held()); 473 474 if (memcg_data & MEMCG_DATA_KMEM) { 475 struct obj_cgroup *objcg; 476 477 objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 478 return obj_cgroup_memcg(objcg); 479 } 480 481 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 482} 483 484/* 485 * page_memcg_check - get the memory cgroup associated with a page 486 * @page: a pointer to the page struct 487 * 488 * Returns a pointer to the memory cgroup associated with the page, 489 * or NULL. This function unlike page_memcg() can take any page 490 * as an argument. It has to be used in cases when it's not known if a page 491 * has an associated memory cgroup pointer or an object cgroups vector or 492 * an object cgroup. 493 * 494 * For a non-kmem page any of the following ensures page and memcg binding 495 * stability: 496 * 497 * - the page lock 498 * - LRU isolation 499 * - lock_page_memcg() 500 * - exclusive reference 501 * 502 * For a kmem page a caller should hold an rcu read lock to protect memcg 503 * associated with a kmem page from being released. 504 */ 505static inline struct mem_cgroup *page_memcg_check(struct page *page) 506{ 507 /* 508 * Because page->memcg_data might be changed asynchronously 509 * for slab pages, READ_ONCE() should be used here. 510 */ 511 unsigned long memcg_data = READ_ONCE(page->memcg_data); 512 513 if (memcg_data & MEMCG_DATA_OBJCGS) 514 return NULL; 515 516 if (memcg_data & MEMCG_DATA_KMEM) { 517 struct obj_cgroup *objcg; 518 519 objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 520 return obj_cgroup_memcg(objcg); 521 } 522 523 return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); 524} 525 526#ifdef CONFIG_MEMCG_KMEM 527/* 528 * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. 529 * @folio: Pointer to the folio. 530 * 531 * Checks if the folio has MemcgKmem flag set. The caller must ensure 532 * that the folio has an associated memory cgroup. It's not safe to call 533 * this function against some types of folios, e.g. slab folios. 534 */ 535static inline bool folio_memcg_kmem(struct folio *folio) 536{ 537 VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); 538 VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); 539 return folio->memcg_data & MEMCG_DATA_KMEM; 540} 541 542 543#else 544static inline bool folio_memcg_kmem(struct folio *folio) 545{ 546 return false; 547} 548 549#endif 550 551static inline bool PageMemcgKmem(struct page *page) 552{ 553 return folio_memcg_kmem(page_folio(page)); 554} 555 556static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 557{ 558 return (memcg == root_mem_cgroup); 559} 560 561static inline bool mem_cgroup_disabled(void) 562{ 563 return !cgroup_subsys_enabled(memory_cgrp_subsys); 564} 565 566static inline void mem_cgroup_protection(struct mem_cgroup *root, 567 struct mem_cgroup *memcg, 568 unsigned long *min, 569 unsigned long *low) 570{ 571 *min = *low = 0; 572 573 if (mem_cgroup_disabled()) 574 return; 575 576 /* 577 * There is no reclaim protection applied to a targeted reclaim. 578 * We are special casing this specific case here because 579 * mem_cgroup_protected calculation is not robust enough to keep 580 * the protection invariant for calculated effective values for 581 * parallel reclaimers with different reclaim target. This is 582 * especially a problem for tail memcgs (as they have pages on LRU) 583 * which would want to have effective values 0 for targeted reclaim 584 * but a different value for external reclaim. 585 * 586 * Example 587 * Let's have global and A's reclaim in parallel: 588 * | 589 * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) 590 * |\ 591 * | C (low = 1G, usage = 2.5G) 592 * B (low = 1G, usage = 0.5G) 593 * 594 * For the global reclaim 595 * A.elow = A.low 596 * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow 597 * C.elow = min(C.usage, C.low) 598 * 599 * With the effective values resetting we have A reclaim 600 * A.elow = 0 601 * B.elow = B.low 602 * C.elow = C.low 603 * 604 * If the global reclaim races with A's reclaim then 605 * B.elow = C.elow = 0 because children_low_usage > A.elow) 606 * is possible and reclaiming B would be violating the protection. 607 * 608 */ 609 if (root == memcg) 610 return; 611 612 *min = READ_ONCE(memcg->memory.emin); 613 *low = READ_ONCE(memcg->memory.elow); 614} 615 616void mem_cgroup_calculate_protection(struct mem_cgroup *root, 617 struct mem_cgroup *memcg); 618 619static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) 620{ 621 /* 622 * The root memcg doesn't account charges, and doesn't support 623 * protection. 624 */ 625 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); 626 627} 628 629static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) 630{ 631 if (!mem_cgroup_supports_protection(memcg)) 632 return false; 633 634 return READ_ONCE(memcg->memory.elow) >= 635 page_counter_read(&memcg->memory); 636} 637 638static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) 639{ 640 if (!mem_cgroup_supports_protection(memcg)) 641 return false; 642 643 return READ_ONCE(memcg->memory.emin) >= 644 page_counter_read(&memcg->memory); 645} 646 647int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); 648 649/** 650 * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. 651 * @folio: Folio to charge. 652 * @mm: mm context of the allocating task. 653 * @gfp: Reclaim mode. 654 * 655 * Try to charge @folio to the memcg that @mm belongs to, reclaiming 656 * pages according to @gfp if necessary. If @mm is NULL, try to 657 * charge to the active memcg. 658 * 659 * Do not use this for folios allocated for swapin. 660 * 661 * Return: 0 on success. Otherwise, an error code is returned. 662 */ 663static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, 664 gfp_t gfp) 665{ 666 if (mem_cgroup_disabled()) 667 return 0; 668 return __mem_cgroup_charge(folio, mm, gfp); 669} 670 671int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, 672 gfp_t gfp, swp_entry_t entry); 673void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); 674 675void __mem_cgroup_uncharge(struct folio *folio); 676 677/** 678 * mem_cgroup_uncharge - Uncharge a folio. 679 * @folio: Folio to uncharge. 680 * 681 * Uncharge a folio previously charged with mem_cgroup_charge(). 682 */ 683static inline void mem_cgroup_uncharge(struct folio *folio) 684{ 685 if (mem_cgroup_disabled()) 686 return; 687 __mem_cgroup_uncharge(folio); 688} 689 690void __mem_cgroup_uncharge_list(struct list_head *page_list); 691static inline void mem_cgroup_uncharge_list(struct list_head *page_list) 692{ 693 if (mem_cgroup_disabled()) 694 return; 695 __mem_cgroup_uncharge_list(page_list); 696} 697 698void mem_cgroup_migrate(struct folio *old, struct folio *new); 699 700/** 701 * mem_cgroup_lruvec - get the lru list vector for a memcg & node 702 * @memcg: memcg of the wanted lruvec 703 * @pgdat: pglist_data 704 * 705 * Returns the lru list vector holding pages for a given @memcg & 706 * @pgdat combination. This can be the node lruvec, if the memory 707 * controller is disabled. 708 */ 709static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, 710 struct pglist_data *pgdat) 711{ 712 struct mem_cgroup_per_node *mz; 713 struct lruvec *lruvec; 714 715 if (mem_cgroup_disabled()) { 716 lruvec = &pgdat->__lruvec; 717 goto out; 718 } 719 720 if (!memcg) 721 memcg = root_mem_cgroup; 722 723 mz = memcg->nodeinfo[pgdat->node_id]; 724 lruvec = &mz->lruvec; 725out: 726 /* 727 * Since a node can be onlined after the mem_cgroup was created, 728 * we have to be prepared to initialize lruvec->pgdat here; 729 * and if offlined then reonlined, we need to reinitialize it. 730 */ 731 if (unlikely(lruvec->pgdat != pgdat)) 732 lruvec->pgdat = pgdat; 733 return lruvec; 734} 735 736/** 737 * folio_lruvec - return lruvec for isolating/putting an LRU folio 738 * @folio: Pointer to the folio. 739 * 740 * This function relies on folio->mem_cgroup being stable. 741 */ 742static inline struct lruvec *folio_lruvec(struct folio *folio) 743{ 744 struct mem_cgroup *memcg = folio_memcg(folio); 745 746 VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio); 747 return mem_cgroup_lruvec(memcg, folio_pgdat(folio)); 748} 749 750struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 751 752struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); 753 754struct lruvec *folio_lruvec_lock(struct folio *folio); 755struct lruvec *folio_lruvec_lock_irq(struct folio *folio); 756struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, 757 unsigned long *flags); 758 759#ifdef CONFIG_DEBUG_VM 760void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio); 761#else 762static inline 763void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) 764{ 765} 766#endif 767 768static inline 769struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ 770 return css ? container_of(css, struct mem_cgroup, css) : NULL; 771} 772 773static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg) 774{ 775 return percpu_ref_tryget(&objcg->refcnt); 776} 777 778static inline void obj_cgroup_get(struct obj_cgroup *objcg) 779{ 780 percpu_ref_get(&objcg->refcnt); 781} 782 783static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, 784 unsigned long nr) 785{ 786 percpu_ref_get_many(&objcg->refcnt, nr); 787} 788 789static inline void obj_cgroup_put(struct obj_cgroup *objcg) 790{ 791 percpu_ref_put(&objcg->refcnt); 792} 793 794static inline void mem_cgroup_put(struct mem_cgroup *memcg) 795{ 796 if (memcg) 797 css_put(&memcg->css); 798} 799 800#define mem_cgroup_from_counter(counter, member) \ 801 container_of(counter, struct mem_cgroup, member) 802 803struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, 804 struct mem_cgroup *, 805 struct mem_cgroup_reclaim_cookie *); 806void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 807int mem_cgroup_scan_tasks(struct mem_cgroup *, 808 int (*)(struct task_struct *, void *), void *); 809 810static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 811{ 812 if (mem_cgroup_disabled()) 813 return 0; 814 815 return memcg->id.id; 816} 817struct mem_cgroup *mem_cgroup_from_id(unsigned short id); 818 819static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) 820{ 821 return mem_cgroup_from_css(seq_css(m)); 822} 823 824static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 825{ 826 struct mem_cgroup_per_node *mz; 827 828 if (mem_cgroup_disabled()) 829 return NULL; 830 831 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 832 return mz->memcg; 833} 834 835/** 836 * parent_mem_cgroup - find the accounting parent of a memcg 837 * @memcg: memcg whose parent to find 838 * 839 * Returns the parent memcg, or NULL if this is the root or the memory 840 * controller is in legacy no-hierarchy mode. 841 */ 842static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) 843{ 844 if (!memcg->memory.parent) 845 return NULL; 846 return mem_cgroup_from_counter(memcg->memory.parent, memory); 847} 848 849static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, 850 struct mem_cgroup *root) 851{ 852 if (root == memcg) 853 return true; 854 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); 855} 856 857static inline bool mm_match_cgroup(struct mm_struct *mm, 858 struct mem_cgroup *memcg) 859{ 860 struct mem_cgroup *task_memcg; 861 bool match = false; 862 863 rcu_read_lock(); 864 task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 865 if (task_memcg) 866 match = mem_cgroup_is_descendant(task_memcg, memcg); 867 rcu_read_unlock(); 868 return match; 869} 870 871struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); 872ino_t page_cgroup_ino(struct page *page); 873 874static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 875{ 876 if (mem_cgroup_disabled()) 877 return true; 878 return !!(memcg->css.flags & CSS_ONLINE); 879} 880 881void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 882 int zid, int nr_pages); 883 884static inline 885unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 886 enum lru_list lru, int zone_idx) 887{ 888 struct mem_cgroup_per_node *mz; 889 890 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 891 return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); 892} 893 894void mem_cgroup_handle_over_high(void); 895 896unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); 897 898unsigned long mem_cgroup_size(struct mem_cgroup *memcg); 899 900void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, 901 struct task_struct *p); 902 903void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg); 904 905static inline void mem_cgroup_enter_user_fault(void) 906{ 907 WARN_ON(current->in_user_fault); 908 current->in_user_fault = 1; 909} 910 911static inline void mem_cgroup_exit_user_fault(void) 912{ 913 WARN_ON(!current->in_user_fault); 914 current->in_user_fault = 0; 915} 916 917static inline bool task_in_memcg_oom(struct task_struct *p) 918{ 919 return p->memcg_in_oom; 920} 921 922bool mem_cgroup_oom_synchronize(bool wait); 923struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, 924 struct mem_cgroup *oom_domain); 925void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); 926 927#ifdef CONFIG_MEMCG_SWAP 928extern bool cgroup_memory_noswap; 929#endif 930 931void folio_memcg_lock(struct folio *folio); 932void folio_memcg_unlock(struct folio *folio); 933void lock_page_memcg(struct page *page); 934void unlock_page_memcg(struct page *page); 935 936void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); 937 938/* idx can be of type enum memcg_stat_item or node_stat_item */ 939static inline void mod_memcg_state(struct mem_cgroup *memcg, 940 int idx, int val) 941{ 942 unsigned long flags; 943 944 local_irq_save(flags); 945 __mod_memcg_state(memcg, idx, val); 946 local_irq_restore(flags); 947} 948 949static inline void mod_memcg_page_state(struct page *page, 950 int idx, int val) 951{ 952 struct mem_cgroup *memcg; 953 954 if (mem_cgroup_disabled()) 955 return; 956 957 rcu_read_lock(); 958 memcg = page_memcg(page); 959 if (memcg) 960 mod_memcg_state(memcg, idx, val); 961 rcu_read_unlock(); 962} 963 964static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) 965{ 966 return READ_ONCE(memcg->vmstats.state[idx]); 967} 968 969static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 970 enum node_stat_item idx) 971{ 972 struct mem_cgroup_per_node *pn; 973 974 if (mem_cgroup_disabled()) 975 return node_page_state(lruvec_pgdat(lruvec), idx); 976 977 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 978 return READ_ONCE(pn->lruvec_stats.state[idx]); 979} 980 981static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, 982 enum node_stat_item idx) 983{ 984 struct mem_cgroup_per_node *pn; 985 long x = 0; 986 int cpu; 987 988 if (mem_cgroup_disabled()) 989 return node_page_state(lruvec_pgdat(lruvec), idx); 990 991 pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); 992 for_each_possible_cpu(cpu) 993 x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); 994#ifdef CONFIG_SMP 995 if (x < 0) 996 x = 0; 997#endif 998 return x; 999} 1000 1001void mem_cgroup_flush_stats(void); 1002 1003void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, 1004 int val); 1005void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); 1006 1007static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1008 int val) 1009{ 1010 unsigned long flags; 1011 1012 local_irq_save(flags); 1013 __mod_lruvec_kmem_state(p, idx, val); 1014 local_irq_restore(flags); 1015} 1016 1017static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, 1018 enum node_stat_item idx, int val) 1019{ 1020 unsigned long flags; 1021 1022 local_irq_save(flags); 1023 __mod_memcg_lruvec_state(lruvec, idx, val); 1024 local_irq_restore(flags); 1025} 1026 1027void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, 1028 unsigned long count); 1029 1030static inline void count_memcg_events(struct mem_cgroup *memcg, 1031 enum vm_event_item idx, 1032 unsigned long count) 1033{ 1034 unsigned long flags; 1035 1036 local_irq_save(flags); 1037 __count_memcg_events(memcg, idx, count); 1038 local_irq_restore(flags); 1039} 1040 1041static inline void count_memcg_page_event(struct page *page, 1042 enum vm_event_item idx) 1043{ 1044 struct mem_cgroup *memcg = page_memcg(page); 1045 1046 if (memcg) 1047 count_memcg_events(memcg, idx, 1); 1048} 1049 1050static inline void count_memcg_event_mm(struct mm_struct *mm, 1051 enum vm_event_item idx) 1052{ 1053 struct mem_cgroup *memcg; 1054 1055 if (mem_cgroup_disabled()) 1056 return; 1057 1058 rcu_read_lock(); 1059 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1060 if (likely(memcg)) 1061 count_memcg_events(memcg, idx, 1); 1062 rcu_read_unlock(); 1063} 1064 1065static inline void memcg_memory_event(struct mem_cgroup *memcg, 1066 enum memcg_memory_event event) 1067{ 1068 bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || 1069 event == MEMCG_SWAP_FAIL; 1070 1071 atomic_long_inc(&memcg->memory_events_local[event]); 1072 if (!swap_event) 1073 cgroup_file_notify(&memcg->events_local_file); 1074 1075 do { 1076 atomic_long_inc(&memcg->memory_events[event]); 1077 if (swap_event) 1078 cgroup_file_notify(&memcg->swap_events_file); 1079 else 1080 cgroup_file_notify(&memcg->events_file); 1081 1082 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) 1083 break; 1084 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) 1085 break; 1086 } while ((memcg = parent_mem_cgroup(memcg)) && 1087 !mem_cgroup_is_root(memcg)); 1088} 1089 1090static inline void memcg_memory_event_mm(struct mm_struct *mm, 1091 enum memcg_memory_event event) 1092{ 1093 struct mem_cgroup *memcg; 1094 1095 if (mem_cgroup_disabled()) 1096 return; 1097 1098 rcu_read_lock(); 1099 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); 1100 if (likely(memcg)) 1101 memcg_memory_event(memcg, event); 1102 rcu_read_unlock(); 1103} 1104 1105void split_page_memcg(struct page *head, unsigned int nr); 1106 1107unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 1108 gfp_t gfp_mask, 1109 unsigned long *total_scanned); 1110 1111#else /* CONFIG_MEMCG */ 1112 1113#define MEM_CGROUP_ID_SHIFT 0 1114#define MEM_CGROUP_ID_MAX 0 1115 1116static inline struct mem_cgroup *folio_memcg(struct folio *folio) 1117{ 1118 return NULL; 1119} 1120 1121static inline struct mem_cgroup *page_memcg(struct page *page) 1122{ 1123 return NULL; 1124} 1125 1126static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) 1127{ 1128 WARN_ON_ONCE(!rcu_read_lock_held()); 1129 return NULL; 1130} 1131 1132static inline struct mem_cgroup *page_memcg_check(struct page *page) 1133{ 1134 return NULL; 1135} 1136 1137static inline bool folio_memcg_kmem(struct folio *folio) 1138{ 1139 return false; 1140} 1141 1142static inline bool PageMemcgKmem(struct page *page) 1143{ 1144 return false; 1145} 1146 1147static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) 1148{ 1149 return true; 1150} 1151 1152static inline bool mem_cgroup_disabled(void) 1153{ 1154 return true; 1155} 1156 1157static inline void memcg_memory_event(struct mem_cgroup *memcg, 1158 enum memcg_memory_event event) 1159{ 1160} 1161 1162static inline void memcg_memory_event_mm(struct mm_struct *mm, 1163 enum memcg_memory_event event) 1164{ 1165} 1166 1167static inline void mem_cgroup_protection(struct mem_cgroup *root, 1168 struct mem_cgroup *memcg, 1169 unsigned long *min, 1170 unsigned long *low) 1171{ 1172 *min = *low = 0; 1173} 1174 1175static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, 1176 struct mem_cgroup *memcg) 1177{ 1178} 1179 1180static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) 1181{ 1182 return false; 1183} 1184 1185static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) 1186{ 1187 return false; 1188} 1189 1190static inline int mem_cgroup_charge(struct folio *folio, 1191 struct mm_struct *mm, gfp_t gfp) 1192{ 1193 return 0; 1194} 1195 1196static inline int mem_cgroup_swapin_charge_page(struct page *page, 1197 struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) 1198{ 1199 return 0; 1200} 1201 1202static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) 1203{ 1204} 1205 1206static inline void mem_cgroup_uncharge(struct folio *folio) 1207{ 1208} 1209 1210static inline void mem_cgroup_uncharge_list(struct list_head *page_list) 1211{ 1212} 1213 1214static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) 1215{ 1216} 1217 1218static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, 1219 struct pglist_data *pgdat) 1220{ 1221 return &pgdat->__lruvec; 1222} 1223 1224static inline struct lruvec *folio_lruvec(struct folio *folio) 1225{ 1226 struct pglist_data *pgdat = folio_pgdat(folio); 1227 return &pgdat->__lruvec; 1228} 1229 1230static inline 1231void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio) 1232{ 1233} 1234 1235static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) 1236{ 1237 return NULL; 1238} 1239 1240static inline bool mm_match_cgroup(struct mm_struct *mm, 1241 struct mem_cgroup *memcg) 1242{ 1243 return true; 1244} 1245 1246static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) 1247{ 1248 return NULL; 1249} 1250 1251static inline 1252struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) 1253{ 1254 return NULL; 1255} 1256 1257static inline void mem_cgroup_put(struct mem_cgroup *memcg) 1258{ 1259} 1260 1261static inline struct lruvec *folio_lruvec_lock(struct folio *folio) 1262{ 1263 struct pglist_data *pgdat = folio_pgdat(folio); 1264 1265 spin_lock(&pgdat->__lruvec.lru_lock); 1266 return &pgdat->__lruvec; 1267} 1268 1269static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio) 1270{ 1271 struct pglist_data *pgdat = folio_pgdat(folio); 1272 1273 spin_lock_irq(&pgdat->__lruvec.lru_lock); 1274 return &pgdat->__lruvec; 1275} 1276 1277static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, 1278 unsigned long *flagsp) 1279{ 1280 struct pglist_data *pgdat = folio_pgdat(folio); 1281 1282 spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp); 1283 return &pgdat->__lruvec; 1284} 1285 1286static inline struct mem_cgroup * 1287mem_cgroup_iter(struct mem_cgroup *root, 1288 struct mem_cgroup *prev, 1289 struct mem_cgroup_reclaim_cookie *reclaim) 1290{ 1291 return NULL; 1292} 1293 1294static inline void mem_cgroup_iter_break(struct mem_cgroup *root, 1295 struct mem_cgroup *prev) 1296{ 1297} 1298 1299static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, 1300 int (*fn)(struct task_struct *, void *), void *arg) 1301{ 1302 return 0; 1303} 1304 1305static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) 1306{ 1307 return 0; 1308} 1309 1310static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 1311{ 1312 WARN_ON_ONCE(id); 1313 /* XXX: This should always return root_mem_cgroup */ 1314 return NULL; 1315} 1316 1317static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m) 1318{ 1319 return NULL; 1320} 1321 1322static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) 1323{ 1324 return NULL; 1325} 1326 1327static inline bool mem_cgroup_online(struct mem_cgroup *memcg) 1328{ 1329 return true; 1330} 1331 1332static inline 1333unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, 1334 enum lru_list lru, int zone_idx) 1335{ 1336 return 0; 1337} 1338 1339static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) 1340{ 1341 return 0; 1342} 1343 1344static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg) 1345{ 1346 return 0; 1347} 1348 1349static inline void 1350mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p) 1351{ 1352} 1353 1354static inline void 1355mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) 1356{ 1357} 1358 1359static inline void lock_page_memcg(struct page *page) 1360{ 1361} 1362 1363static inline void unlock_page_memcg(struct page *page) 1364{ 1365} 1366 1367static inline void folio_memcg_lock(struct folio *folio) 1368{ 1369} 1370 1371static inline void folio_memcg_unlock(struct folio *folio) 1372{ 1373} 1374 1375static inline void mem_cgroup_handle_over_high(void) 1376{ 1377} 1378 1379static inline void mem_cgroup_enter_user_fault(void) 1380{ 1381} 1382 1383static inline void mem_cgroup_exit_user_fault(void) 1384{ 1385} 1386 1387static inline bool task_in_memcg_oom(struct task_struct *p) 1388{ 1389 return false; 1390} 1391 1392static inline bool mem_cgroup_oom_synchronize(bool wait) 1393{ 1394 return false; 1395} 1396 1397static inline struct mem_cgroup *mem_cgroup_get_oom_group( 1398 struct task_struct *victim, struct mem_cgroup *oom_domain) 1399{ 1400 return NULL; 1401} 1402 1403static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) 1404{ 1405} 1406 1407static inline void __mod_memcg_state(struct mem_cgroup *memcg, 1408 int idx, 1409 int nr) 1410{ 1411} 1412 1413static inline void mod_memcg_state(struct mem_cgroup *memcg, 1414 int idx, 1415 int nr) 1416{ 1417} 1418 1419static inline void mod_memcg_page_state(struct page *page, 1420 int idx, int val) 1421{ 1422} 1423 1424static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) 1425{ 1426 return 0; 1427} 1428 1429static inline unsigned long lruvec_page_state(struct lruvec *lruvec, 1430 enum node_stat_item idx) 1431{ 1432 return node_page_state(lruvec_pgdat(lruvec), idx); 1433} 1434 1435static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, 1436 enum node_stat_item idx) 1437{ 1438 return node_page_state(lruvec_pgdat(lruvec), idx); 1439} 1440 1441static inline void mem_cgroup_flush_stats(void) 1442{ 1443} 1444 1445static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, 1446 enum node_stat_item idx, int val) 1447{ 1448} 1449 1450static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1451 int val) 1452{ 1453 struct page *page = virt_to_head_page(p); 1454 1455 __mod_node_page_state(page_pgdat(page), idx, val); 1456} 1457 1458static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, 1459 int val) 1460{ 1461 struct page *page = virt_to_head_page(p); 1462 1463 mod_node_page_state(page_pgdat(page), idx, val); 1464} 1465 1466static inline void count_memcg_events(struct mem_cgroup *memcg, 1467 enum vm_event_item idx, 1468 unsigned long count) 1469{ 1470} 1471 1472static inline void __count_memcg_events(struct mem_cgroup *memcg, 1473 enum vm_event_item idx, 1474 unsigned long count) 1475{ 1476} 1477 1478static inline void count_memcg_page_event(struct page *page, 1479 int idx) 1480{ 1481} 1482 1483static inline 1484void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) 1485{ 1486} 1487 1488static inline void split_page_memcg(struct page *head, unsigned int nr) 1489{ 1490} 1491 1492static inline 1493unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 1494 gfp_t gfp_mask, 1495 unsigned long *total_scanned) 1496{ 1497 return 0; 1498} 1499#endif /* CONFIG_MEMCG */ 1500 1501static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) 1502{ 1503 __mod_lruvec_kmem_state(p, idx, 1); 1504} 1505 1506static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx) 1507{ 1508 __mod_lruvec_kmem_state(p, idx, -1); 1509} 1510 1511static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) 1512{ 1513 struct mem_cgroup *memcg; 1514 1515 memcg = lruvec_memcg(lruvec); 1516 if (!memcg) 1517 return NULL; 1518 memcg = parent_mem_cgroup(memcg); 1519 if (!memcg) 1520 return NULL; 1521 return mem_cgroup_lruvec(memcg, lruvec_pgdat(lruvec)); 1522} 1523 1524static inline void unlock_page_lruvec(struct lruvec *lruvec) 1525{ 1526 spin_unlock(&lruvec->lru_lock); 1527} 1528 1529static inline void unlock_page_lruvec_irq(struct lruvec *lruvec) 1530{ 1531 spin_unlock_irq(&lruvec->lru_lock); 1532} 1533 1534static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, 1535 unsigned long flags) 1536{ 1537 spin_unlock_irqrestore(&lruvec->lru_lock, flags); 1538} 1539 1540/* Test requires a stable page->memcg binding, see page_memcg() */ 1541static inline bool folio_matches_lruvec(struct folio *folio, 1542 struct lruvec *lruvec) 1543{ 1544 return lruvec_pgdat(lruvec) == folio_pgdat(folio) && 1545 lruvec_memcg(lruvec) == folio_memcg(folio); 1546} 1547 1548/* Don't lock again iff page's lruvec locked */ 1549static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, 1550 struct lruvec *locked_lruvec) 1551{ 1552 if (locked_lruvec) { 1553 if (folio_matches_lruvec(folio, locked_lruvec)) 1554 return locked_lruvec; 1555 1556 unlock_page_lruvec_irq(locked_lruvec); 1557 } 1558 1559 return folio_lruvec_lock_irq(folio); 1560} 1561 1562/* Don't lock again iff page's lruvec locked */ 1563static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, 1564 struct lruvec *locked_lruvec, unsigned long *flags) 1565{ 1566 if (locked_lruvec) { 1567 if (folio_matches_lruvec(folio, locked_lruvec)) 1568 return locked_lruvec; 1569 1570 unlock_page_lruvec_irqrestore(locked_lruvec, *flags); 1571 } 1572 1573 return folio_lruvec_lock_irqsave(folio, flags); 1574} 1575 1576#ifdef CONFIG_CGROUP_WRITEBACK 1577 1578struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); 1579void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, 1580 unsigned long *pheadroom, unsigned long *pdirty, 1581 unsigned long *pwriteback); 1582 1583void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio, 1584 struct bdi_writeback *wb); 1585 1586static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, 1587 struct bdi_writeback *wb) 1588{ 1589 if (mem_cgroup_disabled()) 1590 return; 1591 1592 if (unlikely(&folio_memcg(folio)->css != wb->memcg_css)) 1593 mem_cgroup_track_foreign_dirty_slowpath(folio, wb); 1594} 1595 1596void mem_cgroup_flush_foreign(struct bdi_writeback *wb); 1597 1598#else /* CONFIG_CGROUP_WRITEBACK */ 1599 1600static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) 1601{ 1602 return NULL; 1603} 1604 1605static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, 1606 unsigned long *pfilepages, 1607 unsigned long *pheadroom, 1608 unsigned long *pdirty, 1609 unsigned long *pwriteback) 1610{ 1611} 1612 1613static inline void mem_cgroup_track_foreign_dirty(struct folio *folio, 1614 struct bdi_writeback *wb) 1615{ 1616} 1617 1618static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb) 1619{ 1620} 1621 1622#endif /* CONFIG_CGROUP_WRITEBACK */ 1623 1624struct sock; 1625bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, 1626 gfp_t gfp_mask); 1627void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); 1628#ifdef CONFIG_MEMCG 1629extern struct static_key_false memcg_sockets_enabled_key; 1630#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) 1631void mem_cgroup_sk_alloc(struct sock *sk); 1632void mem_cgroup_sk_free(struct sock *sk); 1633static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1634{ 1635 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) 1636 return true; 1637 do { 1638 if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) 1639 return true; 1640 } while ((memcg = parent_mem_cgroup(memcg))); 1641 return false; 1642} 1643 1644int alloc_shrinker_info(struct mem_cgroup *memcg); 1645void free_shrinker_info(struct mem_cgroup *memcg); 1646void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id); 1647void reparent_shrinker_deferred(struct mem_cgroup *memcg); 1648#else 1649#define mem_cgroup_sockets_enabled 0 1650static inline void mem_cgroup_sk_alloc(struct sock *sk) { }; 1651static inline void mem_cgroup_sk_free(struct sock *sk) { }; 1652static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) 1653{ 1654 return false; 1655} 1656 1657static inline void set_shrinker_bit(struct mem_cgroup *memcg, 1658 int nid, int shrinker_id) 1659{ 1660} 1661#endif 1662 1663#ifdef CONFIG_MEMCG_KMEM 1664bool mem_cgroup_kmem_disabled(void); 1665int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); 1666void __memcg_kmem_uncharge_page(struct page *page, int order); 1667 1668struct obj_cgroup *get_obj_cgroup_from_current(void); 1669 1670int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); 1671void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); 1672 1673extern struct static_key_false memcg_kmem_enabled_key; 1674 1675extern int memcg_nr_cache_ids; 1676void memcg_get_cache_ids(void); 1677void memcg_put_cache_ids(void); 1678 1679/* 1680 * Helper macro to loop through all memcg-specific caches. Callers must still 1681 * check if the cache is valid (it is either valid or NULL). 1682 * the slab_mutex must be held when looping through those caches 1683 */ 1684#define for_each_memcg_cache_index(_idx) \ 1685 for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++) 1686 1687static inline bool memcg_kmem_enabled(void) 1688{ 1689 return static_branch_likely(&memcg_kmem_enabled_key); 1690} 1691 1692static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1693 int order) 1694{ 1695 if (memcg_kmem_enabled()) 1696 return __memcg_kmem_charge_page(page, gfp, order); 1697 return 0; 1698} 1699 1700static inline void memcg_kmem_uncharge_page(struct page *page, int order) 1701{ 1702 if (memcg_kmem_enabled()) 1703 __memcg_kmem_uncharge_page(page, order); 1704} 1705 1706/* 1707 * A helper for accessing memcg's kmem_id, used for getting 1708 * corresponding LRU lists. 1709 */ 1710static inline int memcg_cache_id(struct mem_cgroup *memcg) 1711{ 1712 return memcg ? memcg->kmemcg_id : -1; 1713} 1714 1715struct mem_cgroup *mem_cgroup_from_obj(void *p); 1716 1717#else 1718static inline bool mem_cgroup_kmem_disabled(void) 1719{ 1720 return true; 1721} 1722 1723static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1724 int order) 1725{ 1726 return 0; 1727} 1728 1729static inline void memcg_kmem_uncharge_page(struct page *page, int order) 1730{ 1731} 1732 1733static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, 1734 int order) 1735{ 1736 return 0; 1737} 1738 1739static inline void __memcg_kmem_uncharge_page(struct page *page, int order) 1740{ 1741} 1742 1743#define for_each_memcg_cache_index(_idx) \ 1744 for (; NULL; ) 1745 1746static inline bool memcg_kmem_enabled(void) 1747{ 1748 return false; 1749} 1750 1751static inline int memcg_cache_id(struct mem_cgroup *memcg) 1752{ 1753 return -1; 1754} 1755 1756static inline void memcg_get_cache_ids(void) 1757{ 1758} 1759 1760static inline void memcg_put_cache_ids(void) 1761{ 1762} 1763 1764static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) 1765{ 1766 return NULL; 1767} 1768 1769#endif /* CONFIG_MEMCG_KMEM */ 1770 1771#endif /* _LINUX_MEMCONTROL_H */