Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

memcg: fix numa scan information update to be triggered by memory event

commit 889976dbcb12 ("memcg: reclaim memory from nodes in round-robin
order") adds an numa node round-robin for memcg. But the information is
updated once per 10sec.

This patch changes the update trigger from jiffies to memcg's event count.
After this patch, numa scan information will be updated when we see 1024
events of pagein/pageout under a memcg.

[akpm@linux-foundation.org: attempt to repair code layout]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ying Han <yinghan@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

KAMEZAWA Hiroyuki and committed by
Linus Torvalds
453a9bf3 4d0c066d

+27 -6
+27 -6
mm/memcontrol.c
··· 108 108 enum mem_cgroup_events_target { 109 109 MEM_CGROUP_TARGET_THRESH, 110 110 MEM_CGROUP_TARGET_SOFTLIMIT, 111 + MEM_CGROUP_TARGET_NUMAINFO, 111 112 MEM_CGROUP_NTARGETS, 112 113 }; 113 114 #define THRESHOLDS_EVENTS_TARGET (128) 114 115 #define SOFTLIMIT_EVENTS_TARGET (1024) 116 + #define NUMAINFO_EVENTS_TARGET (1024) 115 117 116 118 struct mem_cgroup_stat_cpu { 117 119 long count[MEM_CGROUP_STAT_NSTATS]; ··· 239 237 int last_scanned_node; 240 238 #if MAX_NUMNODES > 1 241 239 nodemask_t scan_nodes; 242 - unsigned long next_scan_node_update; 240 + atomic_t numainfo_events; 241 + atomic_t numainfo_updating; 243 242 #endif 244 243 /* 245 244 * Should the accounting and control be hierarchical, per subtree? ··· 683 680 case MEM_CGROUP_TARGET_SOFTLIMIT: 684 681 next = val + SOFTLIMIT_EVENTS_TARGET; 685 682 break; 683 + case MEM_CGROUP_TARGET_NUMAINFO: 684 + next = val + NUMAINFO_EVENTS_TARGET; 685 + break; 686 686 default: 687 687 return; 688 688 } ··· 704 698 mem_cgroup_threshold(mem); 705 699 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); 706 700 if (unlikely(__memcg_event_check(mem, 707 - MEM_CGROUP_TARGET_SOFTLIMIT))){ 701 + MEM_CGROUP_TARGET_SOFTLIMIT))) { 708 702 mem_cgroup_update_tree(mem, page); 709 703 __mem_cgroup_target_update(mem, 710 - MEM_CGROUP_TARGET_SOFTLIMIT); 704 + MEM_CGROUP_TARGET_SOFTLIMIT); 711 705 } 706 + #if MAX_NUMNODES > 1 707 + if (unlikely(__memcg_event_check(mem, 708 + MEM_CGROUP_TARGET_NUMAINFO))) { 709 + atomic_inc(&mem->numainfo_events); 710 + __mem_cgroup_target_update(mem, 711 + MEM_CGROUP_TARGET_NUMAINFO); 712 + } 713 + #endif 712 714 } 713 715 } 714 716 ··· 1596 1582 static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) 1597 1583 { 1598 1584 int nid; 1599 - 1600 - if (time_after(mem->next_scan_node_update, jiffies)) 1585 + /* 1586 + * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET 1587 + * pagein/pageout changes since the last update. 1588 + */ 1589 + if (!atomic_read(&mem->numainfo_events)) 1590 + return; 1591 + if (atomic_inc_return(&mem->numainfo_updating) > 1) 1601 1592 return; 1602 1593 1603 - mem->next_scan_node_update = jiffies + 10*HZ; 1604 1594 /* make a nodemask where this memcg uses memory from */ 1605 1595 mem->scan_nodes = node_states[N_HIGH_MEMORY]; 1606 1596 ··· 1613 1595 if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) 1614 1596 node_clear(nid, mem->scan_nodes); 1615 1597 } 1598 + 1599 + atomic_set(&mem->numainfo_events, 0); 1600 + atomic_set(&mem->numainfo_updating, 0); 1616 1601 } 1617 1602 1618 1603 /*