include/linux/memcontrol.h at v5.3 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / memcontrol.h
at v5.3 34 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/* memcontrol.h - Memory Controller
   3 *
   4 * Copyright IBM Corporation, 2007
   5 * Author Balbir Singh <balbir@linux.vnet.ibm.com>
   6 *
   7 * Copyright 2007 OpenVZ SWsoft Inc
   8 * Author: Pavel Emelianov <xemul@openvz.org>
   9 */
  10
  11#ifndef _LINUX_MEMCONTROL_H
  12#define _LINUX_MEMCONTROL_H
  13#include <linux/cgroup.h>
  14#include <linux/vm_event_item.h>
  15#include <linux/hardirq.h>
  16#include <linux/jump_label.h>
  17#include <linux/page_counter.h>
  18#include <linux/vmpressure.h>
  19#include <linux/eventfd.h>
  20#include <linux/mm.h>
  21#include <linux/vmstat.h>
  22#include <linux/writeback.h>
  23#include <linux/page-flags.h>
  24
  25struct mem_cgroup;
  26struct page;
  27struct mm_struct;
  28struct kmem_cache;
  29
  30/* Cgroup-specific page state, on top of universal node page state */
  31enum memcg_stat_item {
  32	MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS,
  33	MEMCG_RSS,
  34	MEMCG_RSS_HUGE,
  35	MEMCG_SWAP,
  36	MEMCG_SOCK,
  37	/* XXX: why are these zone and not node counters? */
  38	MEMCG_KERNEL_STACK_KB,
  39	MEMCG_NR_STAT,
  40};
  41
  42enum memcg_memory_event {
  43	MEMCG_LOW,
  44	MEMCG_HIGH,
  45	MEMCG_MAX,
  46	MEMCG_OOM,
  47	MEMCG_OOM_KILL,
  48	MEMCG_SWAP_MAX,
  49	MEMCG_SWAP_FAIL,
  50	MEMCG_NR_MEMORY_EVENTS,
  51};
  52
  53enum mem_cgroup_protection {
  54	MEMCG_PROT_NONE,
  55	MEMCG_PROT_LOW,
  56	MEMCG_PROT_MIN,
  57};
  58
  59struct mem_cgroup_reclaim_cookie {
  60	pg_data_t *pgdat;
  61	int priority;
  62	unsigned int generation;
  63};
  64
  65#ifdef CONFIG_MEMCG
  66
  67#define MEM_CGROUP_ID_SHIFT	16
  68#define MEM_CGROUP_ID_MAX	USHRT_MAX
  69
  70struct mem_cgroup_id {
  71	int id;
  72	refcount_t ref;
  73};
  74
  75/*
  76 * Per memcg event counter is incremented at every pagein/pageout. With THP,
  77 * it will be incremated by the number of pages. This counter is used for
  78 * for trigger some periodic events. This is straightforward and better
  79 * than using jiffies etc. to handle periodic memcg event.
  80 */
  81enum mem_cgroup_events_target {
  82	MEM_CGROUP_TARGET_THRESH,
  83	MEM_CGROUP_TARGET_SOFTLIMIT,
  84	MEM_CGROUP_TARGET_NUMAINFO,
  85	MEM_CGROUP_NTARGETS,
  86};
  87
  88struct memcg_vmstats_percpu {
  89	long stat[MEMCG_NR_STAT];
  90	unsigned long events[NR_VM_EVENT_ITEMS];
  91	unsigned long nr_page_events;
  92	unsigned long targets[MEM_CGROUP_NTARGETS];
  93};
  94
  95struct mem_cgroup_reclaim_iter {
  96	struct mem_cgroup *position;
  97	/* scan generation, increased every round-trip */
  98	unsigned int generation;
  99};
 100
 101struct lruvec_stat {
 102	long count[NR_VM_NODE_STAT_ITEMS];
 103};
 104
 105/*
 106 * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
 107 * which have elements charged to this memcg.
 108 */
 109struct memcg_shrinker_map {
 110	struct rcu_head rcu;
 111	unsigned long map[0];
 112};
 113
 114/*
 115 * per-zone information in memory controller.
 116 */
 117struct mem_cgroup_per_node {
 118	struct lruvec		lruvec;
 119
 120	/* Legacy local VM stats */
 121	struct lruvec_stat __percpu *lruvec_stat_local;
 122
 123	/* Subtree VM stats (batched updates) */
 124	struct lruvec_stat __percpu *lruvec_stat_cpu;
 125	atomic_long_t		lruvec_stat[NR_VM_NODE_STAT_ITEMS];
 126
 127	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 128
 129	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 130
 131#ifdef CONFIG_MEMCG_KMEM
 132	struct memcg_shrinker_map __rcu	*shrinker_map;
 133#endif
 134	struct rb_node		tree_node;	/* RB tree node */
 135	unsigned long		usage_in_excess;/* Set to the value by which */
 136						/* the soft limit is exceeded*/
 137	bool			on_tree;
 138	bool			congested;	/* memcg has many dirty pages */
 139						/* backed by a congested BDI */
 140
 141	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
 142						/* use container_of	   */
 143};
 144
 145struct mem_cgroup_threshold {
 146	struct eventfd_ctx *eventfd;
 147	unsigned long threshold;
 148};
 149
 150/* For threshold */
 151struct mem_cgroup_threshold_ary {
 152	/* An array index points to threshold just below or equal to usage. */
 153	int current_threshold;
 154	/* Size of entries[] */
 155	unsigned int size;
 156	/* Array of thresholds */
 157	struct mem_cgroup_threshold entries[0];
 158};
 159
 160struct mem_cgroup_thresholds {
 161	/* Primary thresholds array */
 162	struct mem_cgroup_threshold_ary *primary;
 163	/*
 164	 * Spare threshold array.
 165	 * This is needed to make mem_cgroup_unregister_event() "never fail".
 166	 * It must be able to store at least primary->size - 1 entries.
 167	 */
 168	struct mem_cgroup_threshold_ary *spare;
 169};
 170
 171enum memcg_kmem_state {
 172	KMEM_NONE,
 173	KMEM_ALLOCATED,
 174	KMEM_ONLINE,
 175};
 176
 177#if defined(CONFIG_SMP)
 178struct memcg_padding {
 179	char x[0];
 180} ____cacheline_internodealigned_in_smp;
 181#define MEMCG_PADDING(name)      struct memcg_padding name;
 182#else
 183#define MEMCG_PADDING(name)
 184#endif
 185
 186/*
 187 * The memory controller data structure. The memory controller controls both
 188 * page cache and RSS per cgroup. We would eventually like to provide
 189 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
 190 * to help the administrator determine what knobs to tune.
 191 */
 192struct mem_cgroup {
 193	struct cgroup_subsys_state css;
 194
 195	/* Private memcg ID. Used to ID objects that outlive the cgroup */
 196	struct mem_cgroup_id id;
 197
 198	/* Accounted resources */
 199	struct page_counter memory;
 200	struct page_counter swap;
 201
 202	/* Legacy consumer-oriented counters */
 203	struct page_counter memsw;
 204	struct page_counter kmem;
 205	struct page_counter tcpmem;
 206
 207	/* Upper bound of normal memory consumption range */
 208	unsigned long high;
 209
 210	/* Range enforcement for interrupt charges */
 211	struct work_struct high_work;
 212
 213	unsigned long soft_limit;
 214
 215	/* vmpressure notifications */
 216	struct vmpressure vmpressure;
 217
 218	/*
 219	 * Should the accounting and control be hierarchical, per subtree?
 220	 */
 221	bool use_hierarchy;
 222
 223	/*
 224	 * Should the OOM killer kill all belonging tasks, had it kill one?
 225	 */
 226	bool oom_group;
 227
 228	/* protected by memcg_oom_lock */
 229	bool		oom_lock;
 230	int		under_oom;
 231
 232	int	swappiness;
 233	/* OOM-Killer disable */
 234	int		oom_kill_disable;
 235
 236	/* memory.events and memory.events.local */
 237	struct cgroup_file events_file;
 238	struct cgroup_file events_local_file;
 239
 240	/* handle for "memory.swap.events" */
 241	struct cgroup_file swap_events_file;
 242
 243	/* protect arrays of thresholds */
 244	struct mutex thresholds_lock;
 245
 246	/* thresholds for memory usage. RCU-protected */
 247	struct mem_cgroup_thresholds thresholds;
 248
 249	/* thresholds for mem+swap usage. RCU-protected */
 250	struct mem_cgroup_thresholds memsw_thresholds;
 251
 252	/* For oom notifier event fd */
 253	struct list_head oom_notify;
 254
 255	/*
 256	 * Should we move charges of a task when a task is moved into this
 257	 * mem_cgroup ? And what type of charges should we move ?
 258	 */
 259	unsigned long move_charge_at_immigrate;
 260	/* taken only while moving_account > 0 */
 261	spinlock_t		move_lock;
 262	unsigned long		move_lock_flags;
 263
 264	MEMCG_PADDING(_pad1_);
 265
 266	/*
 267	 * set > 0 if pages under this cgroup are moving to other cgroup.
 268	 */
 269	atomic_t		moving_account;
 270	struct task_struct	*move_lock_task;
 271
 272	/* Legacy local VM stats and events */
 273	struct memcg_vmstats_percpu __percpu *vmstats_local;
 274
 275	/* Subtree VM stats and events (batched updates) */
 276	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 277
 278	MEMCG_PADDING(_pad2_);
 279
 280	atomic_long_t		vmstats[MEMCG_NR_STAT];
 281	atomic_long_t		vmevents[NR_VM_EVENT_ITEMS];
 282
 283	/* memory.events */
 284	atomic_long_t		memory_events[MEMCG_NR_MEMORY_EVENTS];
 285	atomic_long_t		memory_events_local[MEMCG_NR_MEMORY_EVENTS];
 286
 287	unsigned long		socket_pressure;
 288
 289	/* Legacy tcp memory accounting */
 290	bool			tcpmem_active;
 291	int			tcpmem_pressure;
 292
 293#ifdef CONFIG_MEMCG_KMEM
 294        /* Index in the kmem_cache->memcg_params.memcg_caches array */
 295	int kmemcg_id;
 296	enum memcg_kmem_state kmem_state;
 297	struct list_head kmem_caches;
 298#endif
 299
 300	int last_scanned_node;
 301#if MAX_NUMNODES > 1
 302	nodemask_t	scan_nodes;
 303	atomic_t	numainfo_events;
 304	atomic_t	numainfo_updating;
 305#endif
 306
 307#ifdef CONFIG_CGROUP_WRITEBACK
 308	struct list_head cgwb_list;
 309	struct wb_domain cgwb_domain;
 310#endif
 311
 312	/* List of events which userspace want to receive */
 313	struct list_head event_list;
 314	spinlock_t event_list_lock;
 315
 316	struct mem_cgroup_per_node *nodeinfo[0];
 317	/* WARNING: nodeinfo must be the last member here */
 318};
 319
 320/*
 321 * size of first charge trial. "32" comes from vmscan.c's magic value.
 322 * TODO: maybe necessary to use big numbers in big irons.
 323 */
 324#define MEMCG_CHARGE_BATCH 32U
 325
 326extern struct mem_cgroup *root_mem_cgroup;
 327
 328static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 329{
 330	return (memcg == root_mem_cgroup);
 331}
 332
 333static inline bool mem_cgroup_disabled(void)
 334{
 335	return !cgroup_subsys_enabled(memory_cgrp_subsys);
 336}
 337
 338enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 339						struct mem_cgroup *memcg);
 340
 341int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 342			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
 343			  bool compound);
 344int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
 345			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
 346			  bool compound);
 347void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 348			      bool lrucare, bool compound);
 349void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
 350		bool compound);
 351void mem_cgroup_uncharge(struct page *page);
 352void mem_cgroup_uncharge_list(struct list_head *page_list);
 353
 354void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
 355
 356static struct mem_cgroup_per_node *
 357mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
 358{
 359	return memcg->nodeinfo[nid];
 360}
 361
 362/**
 363 * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
 364 * @node: node of the wanted lruvec
 365 * @memcg: memcg of the wanted lruvec
 366 *
 367 * Returns the lru list vector holding pages for a given @node or a given
 368 * @memcg and @zone. This can be the node lruvec, if the memory controller
 369 * is disabled.
 370 */
 371static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
 372				struct mem_cgroup *memcg)
 373{
 374	struct mem_cgroup_per_node *mz;
 375	struct lruvec *lruvec;
 376
 377	if (mem_cgroup_disabled()) {
 378		lruvec = node_lruvec(pgdat);
 379		goto out;
 380	}
 381
 382	mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
 383	lruvec = &mz->lruvec;
 384out:
 385	/*
 386	 * Since a node can be onlined after the mem_cgroup was created,
 387	 * we have to be prepared to initialize lruvec->pgdat here;
 388	 * and if offlined then reonlined, we need to reinitialize it.
 389	 */
 390	if (unlikely(lruvec->pgdat != pgdat))
 391		lruvec->pgdat = pgdat;
 392	return lruvec;
 393}
 394
 395struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
 396
 397struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 398
 399struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 400
 401struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
 402
 403static inline
 404struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 405	return css ? container_of(css, struct mem_cgroup, css) : NULL;
 406}
 407
 408static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 409{
 410	if (memcg)
 411		css_put(&memcg->css);
 412}
 413
 414#define mem_cgroup_from_counter(counter, member)	\
 415	container_of(counter, struct mem_cgroup, member)
 416
 417struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 418				   struct mem_cgroup *,
 419				   struct mem_cgroup_reclaim_cookie *);
 420void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 421int mem_cgroup_scan_tasks(struct mem_cgroup *,
 422			  int (*)(struct task_struct *, void *), void *);
 423
 424static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 425{
 426	if (mem_cgroup_disabled())
 427		return 0;
 428
 429	return memcg->id.id;
 430}
 431struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
 432
 433static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
 434{
 435	return mem_cgroup_from_css(seq_css(m));
 436}
 437
 438static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 439{
 440	struct mem_cgroup_per_node *mz;
 441
 442	if (mem_cgroup_disabled())
 443		return NULL;
 444
 445	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 446	return mz->memcg;
 447}
 448
 449/**
 450 * parent_mem_cgroup - find the accounting parent of a memcg
 451 * @memcg: memcg whose parent to find
 452 *
 453 * Returns the parent memcg, or NULL if this is the root or the memory
 454 * controller is in legacy no-hierarchy mode.
 455 */
 456static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 457{
 458	if (!memcg->memory.parent)
 459		return NULL;
 460	return mem_cgroup_from_counter(memcg->memory.parent, memory);
 461}
 462
 463static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
 464			      struct mem_cgroup *root)
 465{
 466	if (root == memcg)
 467		return true;
 468	if (!root->use_hierarchy)
 469		return false;
 470	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
 471}
 472
 473static inline bool mm_match_cgroup(struct mm_struct *mm,
 474				   struct mem_cgroup *memcg)
 475{
 476	struct mem_cgroup *task_memcg;
 477	bool match = false;
 478
 479	rcu_read_lock();
 480	task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 481	if (task_memcg)
 482		match = mem_cgroup_is_descendant(task_memcg, memcg);
 483	rcu_read_unlock();
 484	return match;
 485}
 486
 487struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page);
 488ino_t page_cgroup_ino(struct page *page);
 489
 490static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 491{
 492	if (mem_cgroup_disabled())
 493		return true;
 494	return !!(memcg->css.flags & CSS_ONLINE);
 495}
 496
 497/*
 498 * For memory reclaim.
 499 */
 500int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 501
 502void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 503		int zid, int nr_pages);
 504
 505static inline
 506unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
 507		enum lru_list lru, int zone_idx)
 508{
 509	struct mem_cgroup_per_node *mz;
 510
 511	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 512	return mz->lru_zone_size[zone_idx][lru];
 513}
 514
 515void mem_cgroup_handle_over_high(void);
 516
 517unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
 518
 519void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
 520				struct task_struct *p);
 521
 522void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);
 523
 524static inline void mem_cgroup_enter_user_fault(void)
 525{
 526	WARN_ON(current->in_user_fault);
 527	current->in_user_fault = 1;
 528}
 529
 530static inline void mem_cgroup_exit_user_fault(void)
 531{
 532	WARN_ON(!current->in_user_fault);
 533	current->in_user_fault = 0;
 534}
 535
 536static inline bool task_in_memcg_oom(struct task_struct *p)
 537{
 538	return p->memcg_in_oom;
 539}
 540
 541bool mem_cgroup_oom_synchronize(bool wait);
 542struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
 543					    struct mem_cgroup *oom_domain);
 544void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 545
 546#ifdef CONFIG_MEMCG_SWAP
 547extern int do_swap_account;
 548#endif
 549
 550struct mem_cgroup *lock_page_memcg(struct page *page);
 551void __unlock_page_memcg(struct mem_cgroup *memcg);
 552void unlock_page_memcg(struct page *page);
 553
 554/*
 555 * idx can be of type enum memcg_stat_item or node_stat_item.
 556 * Keep in sync with memcg_exact_page_state().
 557 */
 558static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 559{
 560	long x = atomic_long_read(&memcg->vmstats[idx]);
 561#ifdef CONFIG_SMP
 562	if (x < 0)
 563		x = 0;
 564#endif
 565	return x;
 566}
 567
 568/*
 569 * idx can be of type enum memcg_stat_item or node_stat_item.
 570 * Keep in sync with memcg_exact_page_state().
 571 */
 572static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
 573						   int idx)
 574{
 575	long x = 0;
 576	int cpu;
 577
 578	for_each_possible_cpu(cpu)
 579		x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
 580#ifdef CONFIG_SMP
 581	if (x < 0)
 582		x = 0;
 583#endif
 584	return x;
 585}
 586
 587void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
 588
 589/* idx can be of type enum memcg_stat_item or node_stat_item */
 590static inline void mod_memcg_state(struct mem_cgroup *memcg,
 591				   int idx, int val)
 592{
 593	unsigned long flags;
 594
 595	local_irq_save(flags);
 596	__mod_memcg_state(memcg, idx, val);
 597	local_irq_restore(flags);
 598}
 599
 600/**
 601 * mod_memcg_page_state - update page state statistics
 602 * @page: the page
 603 * @idx: page state item to account
 604 * @val: number of pages (positive or negative)
 605 *
 606 * The @page must be locked or the caller must use lock_page_memcg()
 607 * to prevent double accounting when the page is concurrently being
 608 * moved to another memcg:
 609 *
 610 *   lock_page(page) or lock_page_memcg(page)
 611 *   if (TestClearPageState(page))
 612 *     mod_memcg_page_state(page, state, -1);
 613 *   unlock_page(page) or unlock_page_memcg(page)
 614 *
 615 * Kernel pages are an exception to this, since they'll never move.
 616 */
 617static inline void __mod_memcg_page_state(struct page *page,
 618					  int idx, int val)
 619{
 620	if (page->mem_cgroup)
 621		__mod_memcg_state(page->mem_cgroup, idx, val);
 622}
 623
 624static inline void mod_memcg_page_state(struct page *page,
 625					int idx, int val)
 626{
 627	if (page->mem_cgroup)
 628		mod_memcg_state(page->mem_cgroup, idx, val);
 629}
 630
 631static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 632					      enum node_stat_item idx)
 633{
 634	struct mem_cgroup_per_node *pn;
 635	long x;
 636
 637	if (mem_cgroup_disabled())
 638		return node_page_state(lruvec_pgdat(lruvec), idx);
 639
 640	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 641	x = atomic_long_read(&pn->lruvec_stat[idx]);
 642#ifdef CONFIG_SMP
 643	if (x < 0)
 644		x = 0;
 645#endif
 646	return x;
 647}
 648
 649static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 650						    enum node_stat_item idx)
 651{
 652	struct mem_cgroup_per_node *pn;
 653	long x = 0;
 654	int cpu;
 655
 656	if (mem_cgroup_disabled())
 657		return node_page_state(lruvec_pgdat(lruvec), idx);
 658
 659	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
 660	for_each_possible_cpu(cpu)
 661		x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
 662#ifdef CONFIG_SMP
 663	if (x < 0)
 664		x = 0;
 665#endif
 666	return x;
 667}
 668
 669void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 670			int val);
 671void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
 672
 673static inline void mod_lruvec_state(struct lruvec *lruvec,
 674				    enum node_stat_item idx, int val)
 675{
 676	unsigned long flags;
 677
 678	local_irq_save(flags);
 679	__mod_lruvec_state(lruvec, idx, val);
 680	local_irq_restore(flags);
 681}
 682
 683static inline void __mod_lruvec_page_state(struct page *page,
 684					   enum node_stat_item idx, int val)
 685{
 686	pg_data_t *pgdat = page_pgdat(page);
 687	struct lruvec *lruvec;
 688
 689	/* Untracked pages have no memcg, no lruvec. Update only the node */
 690	if (!page->mem_cgroup) {
 691		__mod_node_page_state(pgdat, idx, val);
 692		return;
 693	}
 694
 695	lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
 696	__mod_lruvec_state(lruvec, idx, val);
 697}
 698
 699static inline void mod_lruvec_page_state(struct page *page,
 700					 enum node_stat_item idx, int val)
 701{
 702	unsigned long flags;
 703
 704	local_irq_save(flags);
 705	__mod_lruvec_page_state(page, idx, val);
 706	local_irq_restore(flags);
 707}
 708
 709unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 710						gfp_t gfp_mask,
 711						unsigned long *total_scanned);
 712
 713void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 714			  unsigned long count);
 715
 716static inline void count_memcg_events(struct mem_cgroup *memcg,
 717				      enum vm_event_item idx,
 718				      unsigned long count)
 719{
 720	unsigned long flags;
 721
 722	local_irq_save(flags);
 723	__count_memcg_events(memcg, idx, count);
 724	local_irq_restore(flags);
 725}
 726
 727static inline void count_memcg_page_event(struct page *page,
 728					  enum vm_event_item idx)
 729{
 730	if (page->mem_cgroup)
 731		count_memcg_events(page->mem_cgroup, idx, 1);
 732}
 733
 734static inline void count_memcg_event_mm(struct mm_struct *mm,
 735					enum vm_event_item idx)
 736{
 737	struct mem_cgroup *memcg;
 738
 739	if (mem_cgroup_disabled())
 740		return;
 741
 742	rcu_read_lock();
 743	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 744	if (likely(memcg))
 745		count_memcg_events(memcg, idx, 1);
 746	rcu_read_unlock();
 747}
 748
 749static inline void memcg_memory_event(struct mem_cgroup *memcg,
 750				      enum memcg_memory_event event)
 751{
 752	atomic_long_inc(&memcg->memory_events_local[event]);
 753	cgroup_file_notify(&memcg->events_local_file);
 754
 755	do {
 756		atomic_long_inc(&memcg->memory_events[event]);
 757		cgroup_file_notify(&memcg->events_file);
 758
 759		if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
 760			break;
 761	} while ((memcg = parent_mem_cgroup(memcg)) &&
 762		 !mem_cgroup_is_root(memcg));
 763}
 764
 765static inline void memcg_memory_event_mm(struct mm_struct *mm,
 766					 enum memcg_memory_event event)
 767{
 768	struct mem_cgroup *memcg;
 769
 770	if (mem_cgroup_disabled())
 771		return;
 772
 773	rcu_read_lock();
 774	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 775	if (likely(memcg))
 776		memcg_memory_event(memcg, event);
 777	rcu_read_unlock();
 778}
 779
 780#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 781void mem_cgroup_split_huge_fixup(struct page *head);
 782#endif
 783
 784#else /* CONFIG_MEMCG */
 785
 786#define MEM_CGROUP_ID_SHIFT	0
 787#define MEM_CGROUP_ID_MAX	0
 788
 789struct mem_cgroup;
 790
 791static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 792{
 793	return true;
 794}
 795
 796static inline bool mem_cgroup_disabled(void)
 797{
 798	return true;
 799}
 800
 801static inline void memcg_memory_event(struct mem_cgroup *memcg,
 802				      enum memcg_memory_event event)
 803{
 804}
 805
 806static inline void memcg_memory_event_mm(struct mm_struct *mm,
 807					 enum memcg_memory_event event)
 808{
 809}
 810
 811static inline enum mem_cgroup_protection mem_cgroup_protected(
 812	struct mem_cgroup *root, struct mem_cgroup *memcg)
 813{
 814	return MEMCG_PROT_NONE;
 815}
 816
 817static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 818					gfp_t gfp_mask,
 819					struct mem_cgroup **memcgp,
 820					bool compound)
 821{
 822	*memcgp = NULL;
 823	return 0;
 824}
 825
 826static inline int mem_cgroup_try_charge_delay(struct page *page,
 827					      struct mm_struct *mm,
 828					      gfp_t gfp_mask,
 829					      struct mem_cgroup **memcgp,
 830					      bool compound)
 831{
 832	*memcgp = NULL;
 833	return 0;
 834}
 835
 836static inline void mem_cgroup_commit_charge(struct page *page,
 837					    struct mem_cgroup *memcg,
 838					    bool lrucare, bool compound)
 839{
 840}
 841
 842static inline void mem_cgroup_cancel_charge(struct page *page,
 843					    struct mem_cgroup *memcg,
 844					    bool compound)
 845{
 846}
 847
 848static inline void mem_cgroup_uncharge(struct page *page)
 849{
 850}
 851
 852static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 853{
 854}
 855
 856static inline void mem_cgroup_migrate(struct page *old, struct page *new)
 857{
 858}
 859
 860static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
 861				struct mem_cgroup *memcg)
 862{
 863	return node_lruvec(pgdat);
 864}
 865
 866static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
 867						    struct pglist_data *pgdat)
 868{
 869	return &pgdat->lruvec;
 870}
 871
 872static inline bool mm_match_cgroup(struct mm_struct *mm,
 873		struct mem_cgroup *memcg)
 874{
 875	return true;
 876}
 877
 878static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 879{
 880	return NULL;
 881}
 882
 883static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
 884{
 885	return NULL;
 886}
 887
 888static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 889{
 890}
 891
 892static inline struct mem_cgroup *
 893mem_cgroup_iter(struct mem_cgroup *root,
 894		struct mem_cgroup *prev,
 895		struct mem_cgroup_reclaim_cookie *reclaim)
 896{
 897	return NULL;
 898}
 899
 900static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 901					 struct mem_cgroup *prev)
 902{
 903}
 904
 905static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 906		int (*fn)(struct task_struct *, void *), void *arg)
 907{
 908	return 0;
 909}
 910
 911static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 912{
 913	return 0;
 914}
 915
 916static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 917{
 918	WARN_ON_ONCE(id);
 919	/* XXX: This should always return root_mem_cgroup */
 920	return NULL;
 921}
 922
 923static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
 924{
 925	return NULL;
 926}
 927
 928static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 929{
 930	return NULL;
 931}
 932
 933static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 934{
 935	return true;
 936}
 937
 938static inline
 939unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
 940		enum lru_list lru, int zone_idx)
 941{
 942	return 0;
 943}
 944
 945static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
 946{
 947	return 0;
 948}
 949
 950static inline void
 951mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p)
 952{
 953}
 954
 955static inline void
 956mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 957{
 958}
 959
 960static inline struct mem_cgroup *lock_page_memcg(struct page *page)
 961{
 962	return NULL;
 963}
 964
 965static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
 966{
 967}
 968
 969static inline void unlock_page_memcg(struct page *page)
 970{
 971}
 972
 973static inline void mem_cgroup_handle_over_high(void)
 974{
 975}
 976
 977static inline void mem_cgroup_enter_user_fault(void)
 978{
 979}
 980
 981static inline void mem_cgroup_exit_user_fault(void)
 982{
 983}
 984
 985static inline bool task_in_memcg_oom(struct task_struct *p)
 986{
 987	return false;
 988}
 989
 990static inline bool mem_cgroup_oom_synchronize(bool wait)
 991{
 992	return false;
 993}
 994
 995static inline struct mem_cgroup *mem_cgroup_get_oom_group(
 996	struct task_struct *victim, struct mem_cgroup *oom_domain)
 997{
 998	return NULL;
 999}
1000
1001static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
1002{
1003}
1004
1005static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
1006{
1007	return 0;
1008}
1009
1010static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
1011						   int idx)
1012{
1013	return 0;
1014}
1015
1016static inline void __mod_memcg_state(struct mem_cgroup *memcg,
1017				     int idx,
1018				     int nr)
1019{
1020}
1021
1022static inline void mod_memcg_state(struct mem_cgroup *memcg,
1023				   int idx,
1024				   int nr)
1025{
1026}
1027
1028static inline void __mod_memcg_page_state(struct page *page,
1029					  int idx,
1030					  int nr)
1031{
1032}
1033
1034static inline void mod_memcg_page_state(struct page *page,
1035					int idx,
1036					int nr)
1037{
1038}
1039
1040static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
1041					      enum node_stat_item idx)
1042{
1043	return node_page_state(lruvec_pgdat(lruvec), idx);
1044}
1045
1046static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
1047						    enum node_stat_item idx)
1048{
1049	return node_page_state(lruvec_pgdat(lruvec), idx);
1050}
1051
1052static inline void __mod_lruvec_state(struct lruvec *lruvec,
1053				      enum node_stat_item idx, int val)
1054{
1055	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
1056}
1057
1058static inline void mod_lruvec_state(struct lruvec *lruvec,
1059				    enum node_stat_item idx, int val)
1060{
1061	mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
1062}
1063
1064static inline void __mod_lruvec_page_state(struct page *page,
1065					   enum node_stat_item idx, int val)
1066{
1067	__mod_node_page_state(page_pgdat(page), idx, val);
1068}
1069
1070static inline void mod_lruvec_page_state(struct page *page,
1071					 enum node_stat_item idx, int val)
1072{
1073	mod_node_page_state(page_pgdat(page), idx, val);
1074}
1075
1076static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
1077					   int val)
1078{
1079	struct page *page = virt_to_head_page(p);
1080
1081	__mod_node_page_state(page_pgdat(page), idx, val);
1082}
1083
1084static inline
1085unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
1086					    gfp_t gfp_mask,
1087					    unsigned long *total_scanned)
1088{
1089	return 0;
1090}
1091
1092static inline void mem_cgroup_split_huge_fixup(struct page *head)
1093{
1094}
1095
1096static inline void count_memcg_events(struct mem_cgroup *memcg,
1097				      enum vm_event_item idx,
1098				      unsigned long count)
1099{
1100}
1101
1102static inline void __count_memcg_events(struct mem_cgroup *memcg,
1103					enum vm_event_item idx,
1104					unsigned long count)
1105{
1106}
1107
1108static inline void count_memcg_page_event(struct page *page,
1109					  int idx)
1110{
1111}
1112
1113static inline
1114void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
1115{
1116}
1117#endif /* CONFIG_MEMCG */
1118
1119/* idx can be of type enum memcg_stat_item or node_stat_item */
1120static inline void __inc_memcg_state(struct mem_cgroup *memcg,
1121				     int idx)
1122{
1123	__mod_memcg_state(memcg, idx, 1);
1124}
1125
1126/* idx can be of type enum memcg_stat_item or node_stat_item */
1127static inline void __dec_memcg_state(struct mem_cgroup *memcg,
1128				     int idx)
1129{
1130	__mod_memcg_state(memcg, idx, -1);
1131}
1132
1133/* idx can be of type enum memcg_stat_item or node_stat_item */
1134static inline void __inc_memcg_page_state(struct page *page,
1135					  int idx)
1136{
1137	__mod_memcg_page_state(page, idx, 1);
1138}
1139
1140/* idx can be of type enum memcg_stat_item or node_stat_item */
1141static inline void __dec_memcg_page_state(struct page *page,
1142					  int idx)
1143{
1144	__mod_memcg_page_state(page, idx, -1);
1145}
1146
1147static inline void __inc_lruvec_state(struct lruvec *lruvec,
1148				      enum node_stat_item idx)
1149{
1150	__mod_lruvec_state(lruvec, idx, 1);
1151}
1152
1153static inline void __dec_lruvec_state(struct lruvec *lruvec,
1154				      enum node_stat_item idx)
1155{
1156	__mod_lruvec_state(lruvec, idx, -1);
1157}
1158
1159static inline void __inc_lruvec_page_state(struct page *page,
1160					   enum node_stat_item idx)
1161{
1162	__mod_lruvec_page_state(page, idx, 1);
1163}
1164
1165static inline void __dec_lruvec_page_state(struct page *page,
1166					   enum node_stat_item idx)
1167{
1168	__mod_lruvec_page_state(page, idx, -1);
1169}
1170
1171static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx)
1172{
1173	__mod_lruvec_slab_state(p, idx, 1);
1174}
1175
1176static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx)
1177{
1178	__mod_lruvec_slab_state(p, idx, -1);
1179}
1180
1181/* idx can be of type enum memcg_stat_item or node_stat_item */
1182static inline void inc_memcg_state(struct mem_cgroup *memcg,
1183				   int idx)
1184{
1185	mod_memcg_state(memcg, idx, 1);
1186}
1187
1188/* idx can be of type enum memcg_stat_item or node_stat_item */
1189static inline void dec_memcg_state(struct mem_cgroup *memcg,
1190				   int idx)
1191{
1192	mod_memcg_state(memcg, idx, -1);
1193}
1194
1195/* idx can be of type enum memcg_stat_item or node_stat_item */
1196static inline void inc_memcg_page_state(struct page *page,
1197					int idx)
1198{
1199	mod_memcg_page_state(page, idx, 1);
1200}
1201
1202/* idx can be of type enum memcg_stat_item or node_stat_item */
1203static inline void dec_memcg_page_state(struct page *page,
1204					int idx)
1205{
1206	mod_memcg_page_state(page, idx, -1);
1207}
1208
1209static inline void inc_lruvec_state(struct lruvec *lruvec,
1210				    enum node_stat_item idx)
1211{
1212	mod_lruvec_state(lruvec, idx, 1);
1213}
1214
1215static inline void dec_lruvec_state(struct lruvec *lruvec,
1216				    enum node_stat_item idx)
1217{
1218	mod_lruvec_state(lruvec, idx, -1);
1219}
1220
1221static inline void inc_lruvec_page_state(struct page *page,
1222					 enum node_stat_item idx)
1223{
1224	mod_lruvec_page_state(page, idx, 1);
1225}
1226
1227static inline void dec_lruvec_page_state(struct page *page,
1228					 enum node_stat_item idx)
1229{
1230	mod_lruvec_page_state(page, idx, -1);
1231}
1232
1233#ifdef CONFIG_CGROUP_WRITEBACK
1234
1235struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
1236void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
1237			 unsigned long *pheadroom, unsigned long *pdirty,
1238			 unsigned long *pwriteback);
1239
1240#else	/* CONFIG_CGROUP_WRITEBACK */
1241
1242static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
1243{
1244	return NULL;
1245}
1246
1247static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
1248				       unsigned long *pfilepages,
1249				       unsigned long *pheadroom,
1250				       unsigned long *pdirty,
1251				       unsigned long *pwriteback)
1252{
1253}
1254
1255#endif	/* CONFIG_CGROUP_WRITEBACK */
1256
1257struct sock;
1258bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
1259void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
1260#ifdef CONFIG_MEMCG
1261extern struct static_key_false memcg_sockets_enabled_key;
1262#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
1263void mem_cgroup_sk_alloc(struct sock *sk);
1264void mem_cgroup_sk_free(struct sock *sk);
1265static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
1266{
1267	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
1268		return true;
1269	do {
1270		if (time_before(jiffies, memcg->socket_pressure))
1271			return true;
1272	} while ((memcg = parent_mem_cgroup(memcg)));
1273	return false;
1274}
1275#else
1276#define mem_cgroup_sockets_enabled 0
1277static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
1278static inline void mem_cgroup_sk_free(struct sock *sk) { };
1279static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
1280{
1281	return false;
1282}
1283#endif
1284
1285struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
1286void memcg_kmem_put_cache(struct kmem_cache *cachep);
1287
1288#ifdef CONFIG_MEMCG_KMEM
1289int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
1290void __memcg_kmem_uncharge(struct page *page, int order);
1291int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
1292			      struct mem_cgroup *memcg);
1293void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg,
1294				 unsigned int nr_pages);
1295
1296extern struct static_key_false memcg_kmem_enabled_key;
1297extern struct workqueue_struct *memcg_kmem_cache_wq;
1298
1299extern int memcg_nr_cache_ids;
1300void memcg_get_cache_ids(void);
1301void memcg_put_cache_ids(void);
1302
1303/*
1304 * Helper macro to loop through all memcg-specific caches. Callers must still
1305 * check if the cache is valid (it is either valid or NULL).
1306 * the slab_mutex must be held when looping through those caches
1307 */
1308#define for_each_memcg_cache_index(_idx)	\
1309	for ((_idx) = 0; (_idx) < memcg_nr_cache_ids; (_idx)++)
1310
1311static inline bool memcg_kmem_enabled(void)
1312{
1313	return static_branch_unlikely(&memcg_kmem_enabled_key);
1314}
1315
1316static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
1317{
1318	if (memcg_kmem_enabled())
1319		return __memcg_kmem_charge(page, gfp, order);
1320	return 0;
1321}
1322
1323static inline void memcg_kmem_uncharge(struct page *page, int order)
1324{
1325	if (memcg_kmem_enabled())
1326		__memcg_kmem_uncharge(page, order);
1327}
1328
1329static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp,
1330					  int order, struct mem_cgroup *memcg)
1331{
1332	if (memcg_kmem_enabled())
1333		return __memcg_kmem_charge_memcg(page, gfp, order, memcg);
1334	return 0;
1335}
1336
1337static inline void memcg_kmem_uncharge_memcg(struct page *page, int order,
1338					     struct mem_cgroup *memcg)
1339{
1340	if (memcg_kmem_enabled())
1341		__memcg_kmem_uncharge_memcg(memcg, 1 << order);
1342}
1343
1344/*
1345 * helper for accessing a memcg's index. It will be used as an index in the
1346 * child cache array in kmem_cache, and also to derive its name. This function
1347 * will return -1 when this is not a kmem-limited memcg.
1348 */
1349static inline int memcg_cache_id(struct mem_cgroup *memcg)
1350{
1351	return memcg ? memcg->kmemcg_id : -1;
1352}
1353
1354extern int memcg_expand_shrinker_maps(int new_id);
1355
1356extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
1357				   int nid, int shrinker_id);
1358#else
1359
1360static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
1361{
1362	return 0;
1363}
1364
1365static inline void memcg_kmem_uncharge(struct page *page, int order)
1366{
1367}
1368
1369static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
1370{
1371	return 0;
1372}
1373
1374static inline void __memcg_kmem_uncharge(struct page *page, int order)
1375{
1376}
1377
1378#define for_each_memcg_cache_index(_idx)	\
1379	for (; NULL; )
1380
1381static inline bool memcg_kmem_enabled(void)
1382{
1383	return false;
1384}
1385
1386static inline int memcg_cache_id(struct mem_cgroup *memcg)
1387{
1388	return -1;
1389}
1390
1391static inline void memcg_get_cache_ids(void)
1392{
1393}
1394
1395static inline void memcg_put_cache_ids(void)
1396{
1397}
1398
1399static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
1400					  int nid, int shrinker_id) { }
1401#endif /* CONFIG_MEMCG_KMEM */
1402
1403#endif /* _LINUX_MEMCONTROL_H */