Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

memcg: fix reclaimable lru check in memcg

Now, in mem_cgroup_hierarchical_reclaim(), mem_cgroup_local_usage() is
used for checking whether the memcg contains reclaimable pages or not. If
no pages in it, the routine skips it.

But, mem_cgroup_local_usage() contains Unevictable pages and cannot handle
"noswap" condition correctly. This doesn't work on a swapless system.

This patch adds test_mem_cgroup_reclaimable() and replaces
mem_cgroup_local_usage(). test_mem_cgroup_reclaimable() see LRU counter
and returns correct answer to the caller. And this new function has
"noswap" argument and can see only FILE LRU if necessary.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix kerneldoc layout]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ying Han <yinghan@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

KAMEZAWA Hiroyuki and committed by
Linus Torvalds
4d0c066d 0b43c3aa

+76 -31
+76 -31
mm/memcontrol.c
··· 577 577 return val; 578 578 } 579 579 580 - static long mem_cgroup_local_usage(struct mem_cgroup *mem) 581 - { 582 - long ret; 583 - 584 - ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); 585 - ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); 586 - return ret; 587 - } 588 - 589 580 static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, 590 581 bool charge) 591 582 { ··· 1120 1129 return MEM_CGROUP_ZSTAT(mz, lru); 1121 1130 } 1122 1131 1123 - #ifdef CONFIG_NUMA 1124 1132 static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, 1125 1133 int nid) 1126 1134 { ··· 1131 1141 return ret; 1132 1142 } 1133 1143 1144 + static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg, 1145 + int nid) 1146 + { 1147 + unsigned long ret; 1148 + 1149 + ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) + 1150 + mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON); 1151 + return ret; 1152 + } 1153 + 1154 + #if MAX_NUMNODES > 1 1134 1155 static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) 1135 1156 { 1136 1157 u64 total = 0; ··· 1151 1150 total += mem_cgroup_node_nr_file_lru_pages(memcg, nid); 1152 1151 1153 1152 return total; 1154 - } 1155 - 1156 - static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg, 1157 - int nid) 1158 - { 1159 - unsigned long ret; 1160 - 1161 - ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) + 1162 - mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON); 1163 - 1164 - return ret; 1165 1153 } 1166 1154 1167 1155 static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg) ··· 1549 1559 return ret; 1550 1560 } 1551 1561 1562 + /** 1563 + * test_mem_cgroup_node_reclaimable 1564 + * @mem: the target memcg 1565 + * @nid: the node ID to be checked. 1566 + * @noswap : specify true here if the user wants flle only information. 1567 + * 1568 + * This function returns whether the specified memcg contains any 1569 + * reclaimable pages on a node. Returns true if there are any reclaimable 1570 + * pages in the node. 1571 + */ 1572 + static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, 1573 + int nid, bool noswap) 1574 + { 1575 + if (mem_cgroup_node_nr_file_lru_pages(mem, nid)) 1576 + return true; 1577 + if (noswap || !total_swap_pages) 1578 + return false; 1579 + if (mem_cgroup_node_nr_anon_lru_pages(mem, nid)) 1580 + return true; 1581 + return false; 1582 + 1583 + } 1552 1584 #if MAX_NUMNODES > 1 1553 1585 1554 1586 /* ··· 1592 1580 1593 1581 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { 1594 1582 1595 - if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) || 1596 - mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE)) 1597 - continue; 1598 - 1599 - if (total_swap_pages && 1600 - (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) || 1601 - mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON))) 1602 - continue; 1603 - node_clear(nid, mem->scan_nodes); 1583 + if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) 1584 + node_clear(nid, mem->scan_nodes); 1604 1585 } 1605 1586 } 1606 1587 ··· 1632 1627 return node; 1633 1628 } 1634 1629 1630 + /* 1631 + * Check all nodes whether it contains reclaimable pages or not. 1632 + * For quick scan, we make use of scan_nodes. This will allow us to skip 1633 + * unused nodes. But scan_nodes is lazily updated and may not cotain 1634 + * enough new information. We need to do double check. 1635 + */ 1636 + bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1637 + { 1638 + int nid; 1639 + 1640 + /* 1641 + * quick check...making use of scan_node. 1642 + * We can skip unused nodes. 1643 + */ 1644 + if (!nodes_empty(mem->scan_nodes)) { 1645 + for (nid = first_node(mem->scan_nodes); 1646 + nid < MAX_NUMNODES; 1647 + nid = next_node(nid, mem->scan_nodes)) { 1648 + 1649 + if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1650 + return true; 1651 + } 1652 + } 1653 + /* 1654 + * Check rest of nodes. 1655 + */ 1656 + for_each_node_state(nid, N_HIGH_MEMORY) { 1657 + if (node_isset(nid, mem->scan_nodes)) 1658 + continue; 1659 + if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1660 + return true; 1661 + } 1662 + return false; 1663 + } 1664 + 1635 1665 #else 1636 1666 int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1637 1667 { 1638 1668 return 0; 1669 + } 1670 + 1671 + bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1672 + { 1673 + return test_mem_cgroup_node_reclaimable(mem, 0, noswap); 1639 1674 } 1640 1675 #endif 1641 1676 ··· 1747 1702 } 1748 1703 } 1749 1704 } 1750 - if (!mem_cgroup_local_usage(victim)) { 1705 + if (!mem_cgroup_reclaimable(victim, noswap)) { 1751 1706 /* this cgroup's local usage == 0 */ 1752 1707 css_put(&victim->css); 1753 1708 continue;