Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vmscan: split LRU lists into anon & file sets

Split the LRU lists in two, one set for pages that are backed by real file
systems ("file") and one for pages that are backed by memory and swap
("anon"). The latter includes tmpfs.

The advantage of doing this is that the VM will not have to scan over lots
of anonymous pages (which we generally do not want to swap out), just to
find the page cache pages that it should evict.

This patch has the infrastructure and a basic policy to balance how much
we scan the anon lists and how much we scan the file lists. The big
policy changes are in separate patches.

[lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset]
[kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru]
[kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page]
[hugh@veritas.com: memcg swapbacked pages active]
[hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED]
[akpm@linux-foundation.org: fix /proc/vmstat units]
[nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration]
[kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo]
[kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Rik van Riel and committed by
Linus Torvalds
4f98a2fe b2e18538

+561 -366
+33 -23
drivers/base/node.c
··· 61 61 si_meminfo_node(&i, nid); 62 62 63 63 n = sprintf(buf, "\n" 64 - "Node %d MemTotal: %8lu kB\n" 65 - "Node %d MemFree: %8lu kB\n" 66 - "Node %d MemUsed: %8lu kB\n" 67 - "Node %d Active: %8lu kB\n" 68 - "Node %d Inactive: %8lu kB\n" 64 + "Node %d MemTotal: %8lu kB\n" 65 + "Node %d MemFree: %8lu kB\n" 66 + "Node %d MemUsed: %8lu kB\n" 67 + "Node %d Active: %8lu kB\n" 68 + "Node %d Inactive: %8lu kB\n" 69 + "Node %d Active(anon): %8lu kB\n" 70 + "Node %d Inactive(anon): %8lu kB\n" 71 + "Node %d Active(file): %8lu kB\n" 72 + "Node %d Inactive(file): %8lu kB\n" 69 73 #ifdef CONFIG_HIGHMEM 70 - "Node %d HighTotal: %8lu kB\n" 71 - "Node %d HighFree: %8lu kB\n" 72 - "Node %d LowTotal: %8lu kB\n" 73 - "Node %d LowFree: %8lu kB\n" 74 + "Node %d HighTotal: %8lu kB\n" 75 + "Node %d HighFree: %8lu kB\n" 76 + "Node %d LowTotal: %8lu kB\n" 77 + "Node %d LowFree: %8lu kB\n" 74 78 #endif 75 - "Node %d Dirty: %8lu kB\n" 76 - "Node %d Writeback: %8lu kB\n" 77 - "Node %d FilePages: %8lu kB\n" 78 - "Node %d Mapped: %8lu kB\n" 79 - "Node %d AnonPages: %8lu kB\n" 80 - "Node %d PageTables: %8lu kB\n" 81 - "Node %d NFS_Unstable: %8lu kB\n" 82 - "Node %d Bounce: %8lu kB\n" 83 - "Node %d WritebackTmp: %8lu kB\n" 84 - "Node %d Slab: %8lu kB\n" 85 - "Node %d SReclaimable: %8lu kB\n" 86 - "Node %d SUnreclaim: %8lu kB\n", 79 + "Node %d Dirty: %8lu kB\n" 80 + "Node %d Writeback: %8lu kB\n" 81 + "Node %d FilePages: %8lu kB\n" 82 + "Node %d Mapped: %8lu kB\n" 83 + "Node %d AnonPages: %8lu kB\n" 84 + "Node %d PageTables: %8lu kB\n" 85 + "Node %d NFS_Unstable: %8lu kB\n" 86 + "Node %d Bounce: %8lu kB\n" 87 + "Node %d WritebackTmp: %8lu kB\n" 88 + "Node %d Slab: %8lu kB\n" 89 + "Node %d SReclaimable: %8lu kB\n" 90 + "Node %d SUnreclaim: %8lu kB\n", 87 91 nid, K(i.totalram), 88 92 nid, K(i.freeram), 89 93 nid, K(i.totalram - i.freeram), 90 - nid, K(node_page_state(nid, NR_ACTIVE)), 91 - nid, K(node_page_state(nid, NR_INACTIVE)), 94 + nid, K(node_page_state(nid, NR_ACTIVE_ANON) + 95 + node_page_state(nid, NR_ACTIVE_FILE)), 96 + nid, K(node_page_state(nid, NR_INACTIVE_ANON) + 97 + node_page_state(nid, NR_INACTIVE_FILE)), 98 + nid, K(node_page_state(nid, NR_ACTIVE_ANON)), 99 + nid, K(node_page_state(nid, NR_INACTIVE_ANON)), 100 + nid, K(node_page_state(nid, NR_ACTIVE_FILE)), 101 + nid, K(node_page_state(nid, NR_INACTIVE_FILE)), 92 102 #ifdef CONFIG_HIGHMEM 93 103 nid, K(i.totalhigh), 94 104 nid, K(i.freehigh),
+2 -2
fs/cifs/file.c
··· 1791 1791 SetPageUptodate(page); 1792 1792 unlock_page(page); 1793 1793 if (!pagevec_add(plru_pvec, page)) 1794 - __pagevec_lru_add(plru_pvec); 1794 + __pagevec_lru_add_file(plru_pvec); 1795 1795 data += PAGE_CACHE_SIZE; 1796 1796 } 1797 1797 return; ··· 1925 1925 bytes_read = 0; 1926 1926 } 1927 1927 1928 - pagevec_lru_add(&lru_pvec); 1928 + pagevec_lru_add_file(&lru_pvec); 1929 1929 1930 1930 /* need to free smb_read_data buf before exit */ 1931 1931 if (smb_read_data) {
+1 -1
fs/nfs/dir.c
··· 1517 1517 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, 1518 1518 GFP_KERNEL)) { 1519 1519 pagevec_add(&lru_pvec, page); 1520 - pagevec_lru_add(&lru_pvec); 1520 + pagevec_lru_add_file(&lru_pvec); 1521 1521 SetPageUptodate(page); 1522 1522 unlock_page(page); 1523 1523 } else
+2 -2
fs/ntfs/file.c
··· 439 439 pages[nr] = *cached_page; 440 440 page_cache_get(*cached_page); 441 441 if (unlikely(!pagevec_add(lru_pvec, *cached_page))) 442 - __pagevec_lru_add(lru_pvec); 442 + __pagevec_lru_add_file(lru_pvec); 443 443 *cached_page = NULL; 444 444 } 445 445 index++; ··· 2084 2084 OSYNC_METADATA|OSYNC_DATA); 2085 2085 } 2086 2086 } 2087 - pagevec_lru_add(&lru_pvec); 2087 + pagevec_lru_add_file(&lru_pvec); 2088 2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2089 2089 written ? "written" : "status", (unsigned long)written, 2090 2090 (long)status);
+45 -32
fs/proc/proc_misc.c
··· 136 136 unsigned long allowed; 137 137 struct vmalloc_info vmi; 138 138 long cached; 139 + unsigned long pages[NR_LRU_LISTS]; 140 + int lru; 139 141 140 142 /* 141 143 * display in kilobytes. ··· 156 154 157 155 get_vmalloc_info(&vmi); 158 156 157 + for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) 158 + pages[lru] = global_page_state(NR_LRU_BASE + lru); 159 + 159 160 /* 160 161 * Tagged format, for easy grepping and expansion. 161 162 */ 162 163 len = sprintf(page, 163 - "MemTotal: %8lu kB\n" 164 - "MemFree: %8lu kB\n" 165 - "Buffers: %8lu kB\n" 166 - "Cached: %8lu kB\n" 167 - "SwapCached: %8lu kB\n" 168 - "Active: %8lu kB\n" 169 - "Inactive: %8lu kB\n" 164 + "MemTotal: %8lu kB\n" 165 + "MemFree: %8lu kB\n" 166 + "Buffers: %8lu kB\n" 167 + "Cached: %8lu kB\n" 168 + "SwapCached: %8lu kB\n" 169 + "Active: %8lu kB\n" 170 + "Inactive: %8lu kB\n" 171 + "Active(anon): %8lu kB\n" 172 + "Inactive(anon): %8lu kB\n" 173 + "Active(file): %8lu kB\n" 174 + "Inactive(file): %8lu kB\n" 170 175 #ifdef CONFIG_HIGHMEM 171 - "HighTotal: %8lu kB\n" 172 - "HighFree: %8lu kB\n" 173 - "LowTotal: %8lu kB\n" 174 - "LowFree: %8lu kB\n" 176 + "HighTotal: %8lu kB\n" 177 + "HighFree: %8lu kB\n" 178 + "LowTotal: %8lu kB\n" 179 + "LowFree: %8lu kB\n" 175 180 #endif 176 - "SwapTotal: %8lu kB\n" 177 - "SwapFree: %8lu kB\n" 178 - "Dirty: %8lu kB\n" 179 - "Writeback: %8lu kB\n" 180 - "AnonPages: %8lu kB\n" 181 - "Mapped: %8lu kB\n" 182 - "Slab: %8lu kB\n" 183 - "SReclaimable: %8lu kB\n" 184 - "SUnreclaim: %8lu kB\n" 185 - "PageTables: %8lu kB\n" 181 + "SwapTotal: %8lu kB\n" 182 + "SwapFree: %8lu kB\n" 183 + "Dirty: %8lu kB\n" 184 + "Writeback: %8lu kB\n" 185 + "AnonPages: %8lu kB\n" 186 + "Mapped: %8lu kB\n" 187 + "Slab: %8lu kB\n" 188 + "SReclaimable: %8lu kB\n" 189 + "SUnreclaim: %8lu kB\n" 190 + "PageTables: %8lu kB\n" 186 191 #ifdef CONFIG_QUICKLIST 187 - "Quicklists: %8lu kB\n" 192 + "Quicklists: %8lu kB\n" 188 193 #endif 189 - "NFS_Unstable: %8lu kB\n" 190 - "Bounce: %8lu kB\n" 191 - "WritebackTmp: %8lu kB\n" 192 - "CommitLimit: %8lu kB\n" 193 - "Committed_AS: %8lu kB\n" 194 - "VmallocTotal: %8lu kB\n" 195 - "VmallocUsed: %8lu kB\n" 196 - "VmallocChunk: %8lu kB\n", 194 + "NFS_Unstable: %8lu kB\n" 195 + "Bounce: %8lu kB\n" 196 + "WritebackTmp: %8lu kB\n" 197 + "CommitLimit: %8lu kB\n" 198 + "Committed_AS: %8lu kB\n" 199 + "VmallocTotal: %8lu kB\n" 200 + "VmallocUsed: %8lu kB\n" 201 + "VmallocChunk: %8lu kB\n", 197 202 K(i.totalram), 198 203 K(i.freeram), 199 204 K(i.bufferram), 200 205 K(cached), 201 206 K(total_swapcache_pages), 202 - K(global_page_state(NR_ACTIVE)), 203 - K(global_page_state(NR_INACTIVE)), 207 + K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), 208 + K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), 209 + K(pages[LRU_ACTIVE_ANON]), 210 + K(pages[LRU_INACTIVE_ANON]), 211 + K(pages[LRU_ACTIVE_FILE]), 212 + K(pages[LRU_INACTIVE_FILE]), 204 213 #ifdef CONFIG_HIGHMEM 205 214 K(i.totalhigh), 206 215 K(i.freehigh),
+2 -2
fs/ramfs/file-nommu.c
··· 112 112 goto add_error; 113 113 114 114 if (!pagevec_add(&lru_pvec, page)) 115 - __pagevec_lru_add(&lru_pvec); 115 + __pagevec_lru_add_file(&lru_pvec); 116 116 117 117 unlock_page(page); 118 118 } 119 119 120 - pagevec_lru_add(&lru_pvec); 120 + pagevec_lru_add_file(&lru_pvec); 121 121 return 0; 122 122 123 123 fsize_exceeded:
+13
include/linux/backing-dev.h
··· 175 175 * BDI_CAP_READ_MAP: Can be mapped for reading 176 176 * BDI_CAP_WRITE_MAP: Can be mapped for writing 177 177 * BDI_CAP_EXEC_MAP: Can be mapped for execution 178 + * 179 + * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. 178 180 */ 179 181 #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 180 182 #define BDI_CAP_NO_WRITEBACK 0x00000002 ··· 186 184 #define BDI_CAP_WRITE_MAP 0x00000020 187 185 #define BDI_CAP_EXEC_MAP 0x00000040 188 186 #define BDI_CAP_NO_ACCT_WB 0x00000080 187 + #define BDI_CAP_SWAP_BACKED 0x00000100 189 188 190 189 #define BDI_CAP_VMFLAGS \ 191 190 (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) ··· 251 248 BDI_CAP_NO_WRITEBACK)); 252 249 } 253 250 251 + static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) 252 + { 253 + return bdi->capabilities & BDI_CAP_SWAP_BACKED; 254 + } 255 + 254 256 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 255 257 { 256 258 return bdi_cap_writeback_dirty(mapping->backing_dev_info); ··· 264 256 static inline bool mapping_cap_account_dirty(struct address_space *mapping) 265 257 { 266 258 return bdi_cap_account_dirty(mapping->backing_dev_info); 259 + } 260 + 261 + static inline bool mapping_cap_swap_backed(struct address_space *mapping) 262 + { 263 + return bdi_cap_swap_backed(mapping->backing_dev_info); 267 264 } 268 265 269 266 #endif /* _LINUX_BACKING_DEV_H */
+1 -1
include/linux/memcontrol.h
··· 44 44 unsigned long *scanned, int order, 45 45 int mode, struct zone *z, 46 46 struct mem_cgroup *mem_cont, 47 - int active); 47 + int active, int file); 48 48 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 49 49 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 50 50
+38 -12
include/linux/mm_inline.h
··· 5 5 * page_is_file_cache - should the page be on a file LRU or anon LRU? 6 6 * @page: the page to test 7 7 * 8 - * Returns !0 if @page is page cache page backed by a regular filesystem, 8 + * Returns LRU_FILE if @page is page cache page backed by a regular filesystem, 9 9 * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. 10 10 * Used by functions that manipulate the LRU lists, to sort a page 11 11 * onto the right LRU list. ··· 20 20 return 0; 21 21 22 22 /* The page is page cache backed by a normal filesystem. */ 23 - return 1; 23 + return LRU_FILE; 24 24 } 25 25 26 26 static inline void ··· 38 38 } 39 39 40 40 static inline void 41 - add_page_to_active_list(struct zone *zone, struct page *page) 41 + add_page_to_inactive_anon_list(struct zone *zone, struct page *page) 42 42 { 43 - add_page_to_lru_list(zone, page, LRU_ACTIVE); 43 + add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON); 44 44 } 45 45 46 46 static inline void 47 - add_page_to_inactive_list(struct zone *zone, struct page *page) 47 + add_page_to_active_anon_list(struct zone *zone, struct page *page) 48 48 { 49 - add_page_to_lru_list(zone, page, LRU_INACTIVE); 49 + add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON); 50 50 } 51 51 52 52 static inline void 53 - del_page_from_active_list(struct zone *zone, struct page *page) 53 + add_page_to_inactive_file_list(struct zone *zone, struct page *page) 54 54 { 55 - del_page_from_lru_list(zone, page, LRU_ACTIVE); 55 + add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE); 56 56 } 57 57 58 58 static inline void 59 - del_page_from_inactive_list(struct zone *zone, struct page *page) 59 + add_page_to_active_file_list(struct zone *zone, struct page *page) 60 60 { 61 - del_page_from_lru_list(zone, page, LRU_INACTIVE); 61 + add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE); 62 + } 63 + 64 + static inline void 65 + del_page_from_inactive_anon_list(struct zone *zone, struct page *page) 66 + { 67 + del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON); 68 + } 69 + 70 + static inline void 71 + del_page_from_active_anon_list(struct zone *zone, struct page *page) 72 + { 73 + del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON); 74 + } 75 + 76 + static inline void 77 + del_page_from_inactive_file_list(struct zone *zone, struct page *page) 78 + { 79 + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); 80 + } 81 + 82 + static inline void 83 + del_page_from_active_file_list(struct zone *zone, struct page *page) 84 + { 85 + del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE); 62 86 } 63 87 64 88 static inline void 65 89 del_page_from_lru(struct zone *zone, struct page *page) 66 90 { 67 - enum lru_list l = LRU_INACTIVE; 91 + enum lru_list l = LRU_BASE; 68 92 69 93 list_del(&page->lru); 70 94 if (PageActive(page)) { 71 95 __ClearPageActive(page); 72 - l = LRU_ACTIVE; 96 + l += LRU_ACTIVE; 73 97 } 98 + l += page_is_file_cache(page); 74 99 __dec_zone_state(zone, NR_LRU_BASE + l); 75 100 } 76 101 ··· 112 87 113 88 if (PageActive(page)) 114 89 lru += LRU_ACTIVE; 90 + lru += page_is_file_cache(page); 115 91 116 92 return lru; 117 93 }
+40 -7
include/linux/mmzone.h
··· 82 82 /* First 128 byte cacheline (assuming 64 bit words) */ 83 83 NR_FREE_PAGES, 84 84 NR_LRU_BASE, 85 - NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 86 - NR_ACTIVE, /* " " " " " */ 85 + NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 86 + NR_ACTIVE_ANON, /* " " " " " */ 87 + NR_INACTIVE_FILE, /* " " " " " */ 88 + NR_ACTIVE_FILE, /* " " " " " */ 87 89 NR_ANON_PAGES, /* Mapped anonymous pages */ 88 90 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 89 91 only modified from process context */ 90 92 NR_FILE_PAGES, 91 93 NR_FILE_DIRTY, 92 94 NR_WRITEBACK, 93 - /* Second 128 byte cacheline */ 94 95 NR_SLAB_RECLAIMABLE, 95 96 NR_SLAB_UNRECLAIMABLE, 96 97 NR_PAGETABLE, /* used for pagetables */ 97 98 NR_UNSTABLE_NFS, /* NFS unstable pages */ 98 99 NR_BOUNCE, 99 100 NR_VMSCAN_WRITE, 101 + /* Second 128 byte cacheline */ 100 102 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 101 103 #ifdef CONFIG_NUMA 102 104 NUMA_HIT, /* allocated in intended node */ ··· 110 108 #endif 111 109 NR_VM_ZONE_STAT_ITEMS }; 112 110 111 + /* 112 + * We do arithmetic on the LRU lists in various places in the code, 113 + * so it is important to keep the active lists LRU_ACTIVE higher in 114 + * the array than the corresponding inactive lists, and to keep 115 + * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. 116 + * 117 + * This has to be kept in sync with the statistics in zone_stat_item 118 + * above and the descriptions in vmstat_text in mm/vmstat.c 119 + */ 120 + #define LRU_BASE 0 121 + #define LRU_ACTIVE 1 122 + #define LRU_FILE 2 123 + 113 124 enum lru_list { 114 - LRU_BASE, 115 - LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */ 116 - LRU_ACTIVE, /* " " " " " */ 125 + LRU_INACTIVE_ANON = LRU_BASE, 126 + LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, 127 + LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, 128 + LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, 117 129 NR_LRU_LISTS }; 118 130 119 131 #define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++) 120 132 133 + static inline int is_file_lru(enum lru_list l) 134 + { 135 + return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE); 136 + } 137 + 121 138 static inline int is_active_lru(enum lru_list l) 122 139 { 123 - return (l == LRU_ACTIVE); 140 + return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE); 124 141 } 125 142 126 143 struct per_cpu_pages { ··· 290 269 struct list_head list; 291 270 unsigned long nr_scan; 292 271 } lru[NR_LRU_LISTS]; 272 + 273 + /* 274 + * The pageout code in vmscan.c keeps track of how many of the 275 + * mem/swap backed and file backed pages are refeferenced. 276 + * The higher the rotated/scanned ratio, the more valuable 277 + * that cache is. 278 + * 279 + * The anon LRU stats live in [0], file LRU stats in [1] 280 + */ 281 + unsigned long recent_rotated[2]; 282 + unsigned long recent_scanned[2]; 283 + 293 284 unsigned long pages_scanned; /* since last reclaim */ 294 285 unsigned long flags; /* zone flags, see below */ 295 286
+23 -6
include/linux/pagevec.h
··· 81 81 __pagevec_free(pvec); 82 82 } 83 83 84 - static inline void __pagevec_lru_add(struct pagevec *pvec) 84 + static inline void __pagevec_lru_add_anon(struct pagevec *pvec) 85 85 { 86 - ____pagevec_lru_add(pvec, LRU_INACTIVE); 86 + ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON); 87 87 } 88 88 89 - static inline void __pagevec_lru_add_active(struct pagevec *pvec) 89 + static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec) 90 90 { 91 - ____pagevec_lru_add(pvec, LRU_ACTIVE); 91 + ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON); 92 92 } 93 93 94 - static inline void pagevec_lru_add(struct pagevec *pvec) 94 + static inline void __pagevec_lru_add_file(struct pagevec *pvec) 95 + { 96 + ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE); 97 + } 98 + 99 + static inline void __pagevec_lru_add_active_file(struct pagevec *pvec) 100 + { 101 + ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE); 102 + } 103 + 104 + 105 + static inline void pagevec_lru_add_file(struct pagevec *pvec) 95 106 { 96 107 if (pagevec_count(pvec)) 97 - __pagevec_lru_add(pvec); 108 + __pagevec_lru_add_file(pvec); 109 + } 110 + 111 + static inline void pagevec_lru_add_anon(struct pagevec *pvec) 112 + { 113 + if (pagevec_count(pvec)) 114 + __pagevec_lru_add_anon(pvec); 98 115 } 99 116 100 117 #endif /* _LINUX_PAGEVEC_H */
+15 -5
include/linux/swap.h
··· 184 184 * lru_cache_add: add a page to the page lists 185 185 * @page: the page to add 186 186 */ 187 - static inline void lru_cache_add(struct page *page) 187 + static inline void lru_cache_add_anon(struct page *page) 188 188 { 189 - __lru_cache_add(page, LRU_INACTIVE); 189 + __lru_cache_add(page, LRU_INACTIVE_ANON); 190 190 } 191 191 192 - static inline void lru_cache_add_active(struct page *page) 192 + static inline void lru_cache_add_active_anon(struct page *page) 193 193 { 194 - __lru_cache_add(page, LRU_ACTIVE); 194 + __lru_cache_add(page, LRU_ACTIVE_ANON); 195 + } 196 + 197 + static inline void lru_cache_add_file(struct page *page) 198 + { 199 + __lru_cache_add(page, LRU_INACTIVE_FILE); 200 + } 201 + 202 + static inline void lru_cache_add_active_file(struct page *page) 203 + { 204 + __lru_cache_add(page, LRU_ACTIVE_FILE); 195 205 } 196 206 197 207 /* linux/mm/vmscan.c */ ··· 209 199 gfp_t gfp_mask); 210 200 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, 211 201 gfp_t gfp_mask); 212 - extern int __isolate_lru_page(struct page *page, int mode); 202 + extern int __isolate_lru_page(struct page *page, int mode, int file); 213 203 extern unsigned long shrink_all_memory(unsigned long nr_pages); 214 204 extern int vm_swappiness; 215 205 extern int remove_mapping(struct address_space *mapping, struct page *page);
+10
include/linux/vmstat.h
··· 159 159 return x; 160 160 } 161 161 162 + extern unsigned long global_lru_pages(void); 163 + 164 + static inline unsigned long zone_lru_pages(struct zone *zone) 165 + { 166 + return (zone_page_state(zone, NR_ACTIVE_ANON) 167 + + zone_page_state(zone, NR_ACTIVE_FILE) 168 + + zone_page_state(zone, NR_INACTIVE_ANON) 169 + + zone_page_state(zone, NR_INACTIVE_FILE)); 170 + } 171 + 162 172 #ifdef CONFIG_NUMA 163 173 /* 164 174 * Determine the per node value of a stat item. This function
+19 -3
mm/filemap.c
··· 33 33 #include <linux/cpuset.h> 34 34 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 35 35 #include <linux/memcontrol.h> 36 + #include <linux/mm_inline.h> /* for page_is_file_cache() */ 36 37 #include "internal.h" 37 38 38 39 /* ··· 493 492 int add_to_page_cache_lru(struct page *page, struct address_space *mapping, 494 493 pgoff_t offset, gfp_t gfp_mask) 495 494 { 496 - int ret = add_to_page_cache(page, mapping, offset, gfp_mask); 497 - if (ret == 0) 498 - lru_cache_add(page); 495 + int ret; 496 + 497 + /* 498 + * Splice_read and readahead add shmem/tmpfs pages into the page cache 499 + * before shmem_readpage has a chance to mark them as SwapBacked: they 500 + * need to go on the active_anon lru below, and mem_cgroup_cache_charge 501 + * (called in add_to_page_cache) needs to know where they're going too. 502 + */ 503 + if (mapping_cap_swap_backed(mapping)) 504 + SetPageSwapBacked(page); 505 + 506 + ret = add_to_page_cache(page, mapping, offset, gfp_mask); 507 + if (ret == 0) { 508 + if (page_is_file_cache(page)) 509 + lru_cache_add_file(page); 510 + else 511 + lru_cache_add_active_anon(page); 512 + } 499 513 return ret; 500 514 } 501 515
+5 -5
mm/hugetlb.c
··· 1459 1459 { 1460 1460 struct hstate *h = &default_hstate; 1461 1461 return sprintf(buf, 1462 - "HugePages_Total: %5lu\n" 1463 - "HugePages_Free: %5lu\n" 1464 - "HugePages_Rsvd: %5lu\n" 1465 - "HugePages_Surp: %5lu\n" 1466 - "Hugepagesize: %5lu kB\n", 1462 + "HugePages_Total: %5lu\n" 1463 + "HugePages_Free: %5lu\n" 1464 + "HugePages_Rsvd: %5lu\n" 1465 + "HugePages_Surp: %5lu\n" 1466 + "Hugepagesize: %8lu kB\n", 1467 1467 h->nr_huge_pages, 1468 1468 h->free_huge_pages, 1469 1469 h->resv_huge_pages,
+49 -37
mm/memcontrol.c
··· 162 162 }; 163 163 #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 164 164 #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ 165 + #define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */ 165 166 166 167 static int page_cgroup_nid(struct page_cgroup *pc) 167 168 { ··· 178 177 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 179 178 MEM_CGROUP_CHARGE_TYPE_MAPPED, 180 179 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ 180 + MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ 181 181 }; 182 182 183 183 /* ··· 290 288 static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, 291 289 struct page_cgroup *pc) 292 290 { 293 - int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 294 - int lru = !!from; 291 + int lru = LRU_BASE; 292 + 293 + if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 294 + lru += LRU_ACTIVE; 295 + if (pc->flags & PAGE_CGROUP_FLAG_FILE) 296 + lru += LRU_FILE; 295 297 296 298 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 297 299 ··· 306 300 static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 307 301 struct page_cgroup *pc) 308 302 { 309 - int lru = LRU_INACTIVE; 303 + int lru = LRU_BASE; 310 304 311 305 if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 312 306 lru += LRU_ACTIVE; 307 + if (pc->flags & PAGE_CGROUP_FLAG_FILE) 308 + lru += LRU_FILE; 313 309 314 310 MEM_CGROUP_ZSTAT(mz, lru) += 1; 315 311 list_add(&pc->lru, &mz->lists[lru]); ··· 322 314 static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 323 315 { 324 316 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); 325 - int lru = LRU_INACTIVE; 326 - 327 - if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE) 328 - lru += LRU_ACTIVE; 317 + int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 318 + int file = pc->flags & PAGE_CGROUP_FLAG_FILE; 319 + int lru = LRU_FILE * !!file + !!from; 329 320 330 321 MEM_CGROUP_ZSTAT(mz, lru) -= 1; 331 322 ··· 333 326 else 334 327 pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; 335 328 336 - lru = !!active; 329 + lru = LRU_FILE * !!file + !!active; 337 330 MEM_CGROUP_ZSTAT(mz, lru) += 1; 338 331 list_move(&pc->lru, &mz->lists[lru]); 339 332 } ··· 398 391 } 399 392 400 393 /* 401 - * This function is called from vmscan.c. In page reclaiming loop. balance 402 - * between active and inactive list is calculated. For memory controller 403 - * page reclaiming, we should use using mem_cgroup's imbalance rather than 404 - * zone's global lru imbalance. 405 - */ 406 - long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) 407 - { 408 - unsigned long active, inactive; 409 - /* active and inactive are the number of pages. 'long' is ok.*/ 410 - active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE); 411 - inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE); 412 - return (long) (active / (inactive + 1)); 413 - } 414 - 415 - /* 416 394 * prev_priority control...this will be used in memory reclaim path. 417 395 */ 418 396 int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) ··· 442 450 unsigned long *scanned, int order, 443 451 int mode, struct zone *z, 444 452 struct mem_cgroup *mem_cont, 445 - int active) 453 + int active, int file) 446 454 { 447 455 unsigned long nr_taken = 0; 448 456 struct page *page; ··· 453 461 int nid = z->zone_pgdat->node_id; 454 462 int zid = zone_idx(z); 455 463 struct mem_cgroup_per_zone *mz; 456 - int lru = !!active; 464 + int lru = LRU_FILE * !!file + !!active; 457 465 458 466 BUG_ON(!mem_cont); 459 467 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); ··· 469 477 if (unlikely(!PageLRU(page))) 470 478 continue; 471 479 480 + /* 481 + * TODO: play better with lumpy reclaim, grabbing anything. 482 + */ 472 483 if (PageActive(page) && !active) { 473 484 __mem_cgroup_move_lists(pc, true); 474 485 continue; ··· 484 489 scan++; 485 490 list_move(&pc->lru, &pc_list); 486 491 487 - if (__isolate_lru_page(page, mode) == 0) { 492 + if (__isolate_lru_page(page, mode, file) == 0) { 488 493 list_move(&page->lru, dst); 489 494 nr_taken++; 490 495 } ··· 570 575 * If a page is accounted as a page cache, insert to inactive list. 571 576 * If anon, insert to active list. 572 577 */ 573 - if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 578 + if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) { 574 579 pc->flags = PAGE_CGROUP_FLAG_CACHE; 575 - else 580 + if (page_is_file_cache(page)) 581 + pc->flags |= PAGE_CGROUP_FLAG_FILE; 582 + else 583 + pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; 584 + } else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) 576 585 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 586 + else /* MEM_CGROUP_CHARGE_TYPE_SHMEM */ 587 + pc->flags = PAGE_CGROUP_FLAG_CACHE | PAGE_CGROUP_FLAG_ACTIVE; 577 588 578 589 lock_page_cgroup(page); 579 590 if (unlikely(page_get_page_cgroup(page))) { ··· 738 737 if (pc) { 739 738 mem = pc->mem_cgroup; 740 739 css_get(&mem->css); 741 - if (pc->flags & PAGE_CGROUP_FLAG_CACHE) 742 - ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 740 + if (pc->flags & PAGE_CGROUP_FLAG_CACHE) { 741 + if (page_is_file_cache(page)) 742 + ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 743 + else 744 + ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; 745 + } 743 746 } 744 747 unlock_page_cgroup(page); 745 748 if (mem) { ··· 987 982 } 988 983 /* showing # of active pages */ 989 984 { 990 - unsigned long active, inactive; 985 + unsigned long active_anon, inactive_anon; 986 + unsigned long active_file, inactive_file; 991 987 992 - inactive = mem_cgroup_get_all_zonestat(mem_cont, 993 - LRU_INACTIVE); 994 - active = mem_cgroup_get_all_zonestat(mem_cont, 995 - LRU_ACTIVE); 996 - cb->fill(cb, "active", (active) * PAGE_SIZE); 997 - cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); 988 + inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, 989 + LRU_INACTIVE_ANON); 990 + active_anon = mem_cgroup_get_all_zonestat(mem_cont, 991 + LRU_ACTIVE_ANON); 992 + inactive_file = mem_cgroup_get_all_zonestat(mem_cont, 993 + LRU_INACTIVE_FILE); 994 + active_file = mem_cgroup_get_all_zonestat(mem_cont, 995 + LRU_ACTIVE_FILE); 996 + cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); 997 + cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); 998 + cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); 999 + cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); 998 1000 } 999 1001 return 0; 1000 1002 }
+3 -3
mm/memory.c
··· 1889 1889 set_pte_at(mm, address, page_table, entry); 1890 1890 update_mmu_cache(vma, address, entry); 1891 1891 SetPageSwapBacked(new_page); 1892 - lru_cache_add_active(new_page); 1892 + lru_cache_add_active_anon(new_page); 1893 1893 page_add_new_anon_rmap(new_page, vma, address); 1894 1894 1895 1895 if (old_page) { ··· 2384 2384 goto release; 2385 2385 inc_mm_counter(mm, anon_rss); 2386 2386 SetPageSwapBacked(page); 2387 - lru_cache_add_active(page); 2387 + lru_cache_add_active_anon(page); 2388 2388 page_add_new_anon_rmap(page, vma, address); 2389 2389 set_pte_at(mm, address, page_table, entry); 2390 2390 ··· 2526 2526 if (anon) { 2527 2527 inc_mm_counter(mm, anon_rss); 2528 2528 SetPageSwapBacked(page); 2529 - lru_cache_add_active(page); 2529 + lru_cache_add_active_anon(page); 2530 2530 page_add_new_anon_rmap(page, vma, address); 2531 2531 } else { 2532 2532 inc_mm_counter(mm, file_rss);
+2 -6
mm/page-writeback.c
··· 329 329 struct zone *z = 330 330 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; 331 331 332 - x += zone_page_state(z, NR_FREE_PAGES) 333 - + zone_page_state(z, NR_INACTIVE) 334 - + zone_page_state(z, NR_ACTIVE); 332 + x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z); 335 333 } 336 334 /* 337 335 * Make sure that the number of highmem pages is never larger ··· 353 355 { 354 356 unsigned long x; 355 357 356 - x = global_page_state(NR_FREE_PAGES) 357 - + global_page_state(NR_INACTIVE) 358 - + global_page_state(NR_ACTIVE); 358 + x = global_page_state(NR_FREE_PAGES) + global_lru_pages(); 359 359 360 360 if (!vm_highmem_is_dirtyable) 361 361 x -= highmem_dirtyable_memory(x);
+18 -7
mm/page_alloc.c
··· 1864 1864 } 1865 1865 } 1866 1866 1867 - printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1867 + printk("Active_anon:%lu active_file:%lu inactive_anon%lu\n" 1868 + " inactive_file:%lu dirty:%lu writeback:%lu unstable:%lu\n" 1868 1869 " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", 1869 - global_page_state(NR_ACTIVE), 1870 - global_page_state(NR_INACTIVE), 1870 + global_page_state(NR_ACTIVE_ANON), 1871 + global_page_state(NR_ACTIVE_FILE), 1872 + global_page_state(NR_INACTIVE_ANON), 1873 + global_page_state(NR_INACTIVE_FILE), 1871 1874 global_page_state(NR_FILE_DIRTY), 1872 1875 global_page_state(NR_WRITEBACK), 1873 1876 global_page_state(NR_UNSTABLE_NFS), ··· 1893 1890 " min:%lukB" 1894 1891 " low:%lukB" 1895 1892 " high:%lukB" 1896 - " active:%lukB" 1897 - " inactive:%lukB" 1893 + " active_anon:%lukB" 1894 + " inactive_anon:%lukB" 1895 + " active_file:%lukB" 1896 + " inactive_file:%lukB" 1898 1897 " present:%lukB" 1899 1898 " pages_scanned:%lu" 1900 1899 " all_unreclaimable? %s" ··· 1906 1901 K(zone->pages_min), 1907 1902 K(zone->pages_low), 1908 1903 K(zone->pages_high), 1909 - K(zone_page_state(zone, NR_ACTIVE)), 1910 - K(zone_page_state(zone, NR_INACTIVE)), 1904 + K(zone_page_state(zone, NR_ACTIVE_ANON)), 1905 + K(zone_page_state(zone, NR_INACTIVE_ANON)), 1906 + K(zone_page_state(zone, NR_ACTIVE_FILE)), 1907 + K(zone_page_state(zone, NR_INACTIVE_FILE)), 1911 1908 K(zone->present_pages), 1912 1909 zone->pages_scanned, 1913 1910 (zone_is_all_unreclaimable(zone) ? "yes" : "no") ··· 3479 3472 INIT_LIST_HEAD(&zone->lru[l].list); 3480 3473 zone->lru[l].nr_scan = 0; 3481 3474 } 3475 + zone->recent_rotated[0] = 0; 3476 + zone->recent_rotated[1] = 0; 3477 + zone->recent_scanned[0] = 0; 3478 + zone->recent_scanned[1] = 0; 3482 3479 zap_zone_vm_stats(zone); 3483 3480 zone->flags = 0; 3484 3481 if (!size)
+1 -1
mm/readahead.c
··· 229 229 */ 230 230 unsigned long max_sane_readahead(unsigned long nr) 231 231 { 232 - return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) 232 + return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE) 233 233 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); 234 234 } 235 235
+1 -1
mm/shmem.c
··· 199 199 200 200 static struct backing_dev_info shmem_backing_dev_info __read_mostly = { 201 201 .ra_pages = 0, /* No readahead */ 202 - .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 202 + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 203 203 .unplug_io_fn = default_unplug_io_fn, 204 204 }; 205 205
+11 -3
mm/swap.c
··· 116 116 spin_lock(&zone->lru_lock); 117 117 } 118 118 if (PageLRU(page) && !PageActive(page)) { 119 - list_move_tail(&page->lru, &zone->lru[LRU_INACTIVE].list); 119 + int lru = page_is_file_cache(page); 120 + list_move_tail(&page->lru, &zone->lru[lru].list); 120 121 pgmoved++; 121 122 } 122 123 } ··· 158 157 159 158 spin_lock_irq(&zone->lru_lock); 160 159 if (PageLRU(page) && !PageActive(page)) { 161 - del_page_from_inactive_list(zone, page); 160 + int file = page_is_file_cache(page); 161 + int lru = LRU_BASE + file; 162 + del_page_from_lru_list(zone, page, lru); 163 + 162 164 SetPageActive(page); 163 - add_page_to_active_list(zone, page); 165 + lru += LRU_ACTIVE; 166 + add_page_to_lru_list(zone, page, lru); 164 167 __count_vm_event(PGACTIVATE); 165 168 mem_cgroup_move_lists(page, true); 169 + 170 + zone->recent_rotated[!!file]++; 171 + zone->recent_scanned[!!file]++; 166 172 } 167 173 spin_unlock_irq(&zone->lru_lock); 168 174 }
+2 -2
mm/swap_state.c
··· 33 33 }; 34 34 35 35 static struct backing_dev_info swap_backing_dev_info = { 36 - .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, 37 37 .unplug_io_fn = swap_unplug_io_fn, 38 38 }; 39 39 ··· 310 310 /* 311 311 * Initiate read into locked page and return. 312 312 */ 313 - lru_cache_add_active(new_page); 313 + lru_cache_add_active_anon(new_page); 314 314 swap_readpage(NULL, new_page); 315 315 return new_page; 316 316 }
+216 -200
mm/vmscan.c
··· 78 78 unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, 79 79 unsigned long *scanned, int order, int mode, 80 80 struct zone *z, struct mem_cgroup *mem_cont, 81 - int active); 81 + int active, int file); 82 82 }; 83 83 84 84 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) ··· 680 680 * 681 681 * returns 0 on success, -ve errno on failure. 682 682 */ 683 - int __isolate_lru_page(struct page *page, int mode) 683 + int __isolate_lru_page(struct page *page, int mode, int file) 684 684 { 685 685 int ret = -EINVAL; 686 686 ··· 694 694 * of each. 695 695 */ 696 696 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) 697 + return ret; 698 + 699 + if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) 697 700 return ret; 698 701 699 702 ret = -EBUSY; ··· 729 726 * @scanned: The number of pages that were scanned. 730 727 * @order: The caller's attempted allocation order 731 728 * @mode: One of the LRU isolation modes 729 + * @file: True [1] if isolating file [!anon] pages 732 730 * 733 731 * returns how many pages were moved onto *@dst. 734 732 */ 735 733 static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 736 734 struct list_head *src, struct list_head *dst, 737 - unsigned long *scanned, int order, int mode) 735 + unsigned long *scanned, int order, int mode, int file) 738 736 { 739 737 unsigned long nr_taken = 0; 740 738 unsigned long scan; ··· 752 748 753 749 VM_BUG_ON(!PageLRU(page)); 754 750 755 - switch (__isolate_lru_page(page, mode)) { 751 + switch (__isolate_lru_page(page, mode, file)) { 756 752 case 0: 757 753 list_move(&page->lru, dst); 758 754 nr_taken++; ··· 795 791 break; 796 792 797 793 cursor_page = pfn_to_page(pfn); 794 + 798 795 /* Check that we have not crossed a zone boundary. */ 799 796 if (unlikely(page_zone_id(cursor_page) != zone_id)) 800 797 continue; 801 - switch (__isolate_lru_page(cursor_page, mode)) { 798 + switch (__isolate_lru_page(cursor_page, mode, file)) { 802 799 case 0: 803 800 list_move(&cursor_page->lru, dst); 804 801 nr_taken++; ··· 824 819 unsigned long *scanned, int order, 825 820 int mode, struct zone *z, 826 821 struct mem_cgroup *mem_cont, 827 - int active) 822 + int active, int file) 828 823 { 824 + int lru = LRU_BASE; 829 825 if (active) 830 - return isolate_lru_pages(nr, &z->lru[LRU_ACTIVE].list, dst, 831 - scanned, order, mode); 832 - else 833 - return isolate_lru_pages(nr, &z->lru[LRU_INACTIVE].list, dst, 834 - scanned, order, mode); 826 + lru += LRU_ACTIVE; 827 + if (file) 828 + lru += LRU_FILE; 829 + return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, 830 + mode, !!file); 835 831 } 836 832 837 833 /* 838 834 * clear_active_flags() is a helper for shrink_active_list(), clearing 839 835 * any active bits from the pages in the list. 840 836 */ 841 - static unsigned long clear_active_flags(struct list_head *page_list) 837 + static unsigned long clear_active_flags(struct list_head *page_list, 838 + unsigned int *count) 842 839 { 843 840 int nr_active = 0; 841 + int lru; 844 842 struct page *page; 845 843 846 - list_for_each_entry(page, page_list, lru) 844 + list_for_each_entry(page, page_list, lru) { 845 + lru = page_is_file_cache(page); 847 846 if (PageActive(page)) { 847 + lru += LRU_ACTIVE; 848 848 ClearPageActive(page); 849 849 nr_active++; 850 850 } 851 + count[lru]++; 852 + } 851 853 852 854 return nr_active; 853 855 } ··· 892 880 893 881 spin_lock_irq(&zone->lru_lock); 894 882 if (PageLRU(page) && get_page_unless_zero(page)) { 883 + int lru = LRU_BASE; 895 884 ret = 0; 896 885 ClearPageLRU(page); 897 - if (PageActive(page)) 898 - del_page_from_active_list(zone, page); 899 - else 900 - del_page_from_inactive_list(zone, page); 886 + 887 + lru += page_is_file_cache(page) + !!PageActive(page); 888 + del_page_from_lru_list(zone, page, lru); 901 889 } 902 890 spin_unlock_irq(&zone->lru_lock); 903 891 } ··· 909 897 * of reclaimed pages 910 898 */ 911 899 static unsigned long shrink_inactive_list(unsigned long max_scan, 912 - struct zone *zone, struct scan_control *sc) 900 + struct zone *zone, struct scan_control *sc, int file) 913 901 { 914 902 LIST_HEAD(page_list); 915 903 struct pagevec pvec; ··· 926 914 unsigned long nr_scan; 927 915 unsigned long nr_freed; 928 916 unsigned long nr_active; 917 + unsigned int count[NR_LRU_LISTS] = { 0, }; 918 + int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ? 919 + ISOLATE_BOTH : ISOLATE_INACTIVE; 929 920 930 921 nr_taken = sc->isolate_pages(sc->swap_cluster_max, 931 - &page_list, &nr_scan, sc->order, 932 - (sc->order > PAGE_ALLOC_COSTLY_ORDER)? 933 - ISOLATE_BOTH : ISOLATE_INACTIVE, 934 - zone, sc->mem_cgroup, 0); 935 - nr_active = clear_active_flags(&page_list); 922 + &page_list, &nr_scan, sc->order, mode, 923 + zone, sc->mem_cgroup, 0, file); 924 + nr_active = clear_active_flags(&page_list, count); 936 925 __count_vm_events(PGDEACTIVATE, nr_active); 937 926 938 - __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); 939 - __mod_zone_page_state(zone, NR_INACTIVE, 940 - -(nr_taken - nr_active)); 941 - if (scan_global_lru(sc)) 927 + __mod_zone_page_state(zone, NR_ACTIVE_FILE, 928 + -count[LRU_ACTIVE_FILE]); 929 + __mod_zone_page_state(zone, NR_INACTIVE_FILE, 930 + -count[LRU_INACTIVE_FILE]); 931 + __mod_zone_page_state(zone, NR_ACTIVE_ANON, 932 + -count[LRU_ACTIVE_ANON]); 933 + __mod_zone_page_state(zone, NR_INACTIVE_ANON, 934 + -count[LRU_INACTIVE_ANON]); 935 + 936 + if (scan_global_lru(sc)) { 942 937 zone->pages_scanned += nr_scan; 938 + zone->recent_scanned[0] += count[LRU_INACTIVE_ANON]; 939 + zone->recent_scanned[0] += count[LRU_ACTIVE_ANON]; 940 + zone->recent_scanned[1] += count[LRU_INACTIVE_FILE]; 941 + zone->recent_scanned[1] += count[LRU_ACTIVE_FILE]; 942 + } 943 943 spin_unlock_irq(&zone->lru_lock); 944 944 945 945 nr_scanned += nr_scan; ··· 971 947 * The attempt at page out may have made some 972 948 * of the pages active, mark them inactive again. 973 949 */ 974 - nr_active = clear_active_flags(&page_list); 950 + nr_active = clear_active_flags(&page_list, count); 975 951 count_vm_events(PGDEACTIVATE, nr_active); 976 952 977 953 nr_freed += shrink_page_list(&page_list, sc, ··· 1001 977 SetPageLRU(page); 1002 978 list_del(&page->lru); 1003 979 add_page_to_lru_list(zone, page, page_lru(page)); 980 + if (PageActive(page) && scan_global_lru(sc)) { 981 + int file = !!page_is_file_cache(page); 982 + zone->recent_rotated[file]++; 983 + } 1004 984 if (!pagevec_add(&pvec, page)) { 1005 985 spin_unlock_irq(&zone->lru_lock); 1006 986 __pagevec_release(&pvec); ··· 1035 1007 1036 1008 static inline int zone_is_near_oom(struct zone *zone) 1037 1009 { 1038 - return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) 1039 - + zone_page_state(zone, NR_INACTIVE))*3; 1040 - } 1041 - 1042 - /* 1043 - * Determine we should try to reclaim mapped pages. 1044 - * This is called only when sc->mem_cgroup is NULL. 1045 - */ 1046 - static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone, 1047 - int priority) 1048 - { 1049 - long mapped_ratio; 1050 - long distress; 1051 - long swap_tendency; 1052 - long imbalance; 1053 - int reclaim_mapped = 0; 1054 - int prev_priority; 1055 - 1056 - if (scan_global_lru(sc) && zone_is_near_oom(zone)) 1057 - return 1; 1058 - /* 1059 - * `distress' is a measure of how much trouble we're having 1060 - * reclaiming pages. 0 -> no problems. 100 -> great trouble. 1061 - */ 1062 - if (scan_global_lru(sc)) 1063 - prev_priority = zone->prev_priority; 1064 - else 1065 - prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup); 1066 - 1067 - distress = 100 >> min(prev_priority, priority); 1068 - 1069 - /* 1070 - * The point of this algorithm is to decide when to start 1071 - * reclaiming mapped memory instead of just pagecache. Work out 1072 - * how much memory 1073 - * is mapped. 1074 - */ 1075 - if (scan_global_lru(sc)) 1076 - mapped_ratio = ((global_page_state(NR_FILE_MAPPED) + 1077 - global_page_state(NR_ANON_PAGES)) * 100) / 1078 - vm_total_pages; 1079 - else 1080 - mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup); 1081 - 1082 - /* 1083 - * Now decide how much we really want to unmap some pages. The 1084 - * mapped ratio is downgraded - just because there's a lot of 1085 - * mapped memory doesn't necessarily mean that page reclaim 1086 - * isn't succeeding. 1087 - * 1088 - * The distress ratio is important - we don't want to start 1089 - * going oom. 1090 - * 1091 - * A 100% value of vm_swappiness overrides this algorithm 1092 - * altogether. 1093 - */ 1094 - swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; 1095 - 1096 - /* 1097 - * If there's huge imbalance between active and inactive 1098 - * (think active 100 times larger than inactive) we should 1099 - * become more permissive, or the system will take too much 1100 - * cpu before it start swapping during memory pressure. 1101 - * Distress is about avoiding early-oom, this is about 1102 - * making swappiness graceful despite setting it to low 1103 - * values. 1104 - * 1105 - * Avoid div by zero with nr_inactive+1, and max resulting 1106 - * value is vm_total_pages. 1107 - */ 1108 - if (scan_global_lru(sc)) { 1109 - imbalance = zone_page_state(zone, NR_ACTIVE); 1110 - imbalance /= zone_page_state(zone, NR_INACTIVE) + 1; 1111 - } else 1112 - imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup); 1113 - 1114 - /* 1115 - * Reduce the effect of imbalance if swappiness is low, 1116 - * this means for a swappiness very low, the imbalance 1117 - * must be much higher than 100 for this logic to make 1118 - * the difference. 1119 - * 1120 - * Max temporary value is vm_total_pages*100. 1121 - */ 1122 - imbalance *= (vm_swappiness + 1); 1123 - imbalance /= 100; 1124 - 1125 - /* 1126 - * If not much of the ram is mapped, makes the imbalance 1127 - * less relevant, it's high priority we refill the inactive 1128 - * list with mapped pages only in presence of high ratio of 1129 - * mapped pages. 1130 - * 1131 - * Max temporary value is vm_total_pages*100. 1132 - */ 1133 - imbalance *= mapped_ratio; 1134 - imbalance /= 100; 1135 - 1136 - /* apply imbalance feedback to swap_tendency */ 1137 - swap_tendency += imbalance; 1138 - 1139 - /* 1140 - * Now use this metric to decide whether to start moving mapped 1141 - * memory onto the inactive list. 1142 - */ 1143 - if (swap_tendency >= 100) 1144 - reclaim_mapped = 1; 1145 - 1146 - return reclaim_mapped; 1010 + return zone->pages_scanned >= (zone_lru_pages(zone) * 3); 1147 1011 } 1148 1012 1149 1013 /* ··· 1058 1138 1059 1139 1060 1140 static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1061 - struct scan_control *sc, int priority) 1141 + struct scan_control *sc, int priority, int file) 1062 1142 { 1063 1143 unsigned long pgmoved; 1064 1144 int pgdeactivate = 0; ··· 1068 1148 LIST_HEAD(l_inactive); 1069 1149 struct page *page; 1070 1150 struct pagevec pvec; 1071 - int reclaim_mapped = 0; 1072 - 1073 - if (sc->may_swap) 1074 - reclaim_mapped = calc_reclaim_mapped(sc, zone, priority); 1151 + enum lru_list lru; 1075 1152 1076 1153 lru_add_drain(); 1077 1154 spin_lock_irq(&zone->lru_lock); 1078 1155 pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, 1079 1156 ISOLATE_ACTIVE, zone, 1080 - sc->mem_cgroup, 1); 1157 + sc->mem_cgroup, 1, file); 1081 1158 /* 1082 1159 * zone->pages_scanned is used for detect zone's oom 1083 1160 * mem_cgroup remembers nr_scan by itself. 1084 1161 */ 1085 - if (scan_global_lru(sc)) 1162 + if (scan_global_lru(sc)) { 1086 1163 zone->pages_scanned += pgscanned; 1164 + zone->recent_scanned[!!file] += pgmoved; 1165 + } 1087 1166 1088 - __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); 1167 + if (file) 1168 + __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); 1169 + else 1170 + __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); 1089 1171 spin_unlock_irq(&zone->lru_lock); 1090 1172 1091 1173 while (!list_empty(&l_hold)) { 1092 1174 cond_resched(); 1093 1175 page = lru_to_page(&l_hold); 1094 1176 list_del(&page->lru); 1095 - if (page_mapped(page)) { 1096 - if (!reclaim_mapped || 1097 - (total_swap_pages == 0 && PageAnon(page)) || 1098 - page_referenced(page, 0, sc->mem_cgroup)) { 1099 - list_add(&page->lru, &l_active); 1100 - continue; 1101 - } 1102 - } 1103 1177 list_add(&page->lru, &l_inactive); 1104 1178 } 1105 1179 1180 + /* 1181 + * Now put the pages back on the appropriate [file or anon] inactive 1182 + * and active lists. 1183 + */ 1106 1184 pagevec_init(&pvec, 1); 1107 1185 pgmoved = 0; 1186 + lru = LRU_BASE + file * LRU_FILE; 1108 1187 spin_lock_irq(&zone->lru_lock); 1109 1188 while (!list_empty(&l_inactive)) { 1110 1189 page = lru_to_page(&l_inactive); ··· 1113 1194 VM_BUG_ON(!PageActive(page)); 1114 1195 ClearPageActive(page); 1115 1196 1116 - list_move(&page->lru, &zone->lru[LRU_INACTIVE].list); 1197 + list_move(&page->lru, &zone->lru[lru].list); 1117 1198 mem_cgroup_move_lists(page, false); 1118 1199 pgmoved++; 1119 1200 if (!pagevec_add(&pvec, page)) { 1120 - __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1201 + __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1121 1202 spin_unlock_irq(&zone->lru_lock); 1122 1203 pgdeactivate += pgmoved; 1123 1204 pgmoved = 0; ··· 1127 1208 spin_lock_irq(&zone->lru_lock); 1128 1209 } 1129 1210 } 1130 - __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); 1211 + __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1131 1212 pgdeactivate += pgmoved; 1132 1213 if (buffer_heads_over_limit) { 1133 1214 spin_unlock_irq(&zone->lru_lock); ··· 1136 1217 } 1137 1218 1138 1219 pgmoved = 0; 1220 + lru = LRU_ACTIVE + file * LRU_FILE; 1139 1221 while (!list_empty(&l_active)) { 1140 1222 page = lru_to_page(&l_active); 1141 1223 prefetchw_prev_lru_page(page, &l_active, flags); ··· 1144 1224 SetPageLRU(page); 1145 1225 VM_BUG_ON(!PageActive(page)); 1146 1226 1147 - list_move(&page->lru, &zone->lru[LRU_ACTIVE].list); 1227 + list_move(&page->lru, &zone->lru[lru].list); 1148 1228 mem_cgroup_move_lists(page, true); 1149 1229 pgmoved++; 1150 1230 if (!pagevec_add(&pvec, page)) { 1151 - __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); 1231 + __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1152 1232 pgmoved = 0; 1153 1233 spin_unlock_irq(&zone->lru_lock); 1154 1234 if (vm_swap_full()) ··· 1157 1237 spin_lock_irq(&zone->lru_lock); 1158 1238 } 1159 1239 } 1160 - __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); 1240 + __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1241 + zone->recent_rotated[!!file] += pgmoved; 1161 1242 1162 1243 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1163 1244 __count_vm_events(PGDEACTIVATE, pgdeactivate); ··· 1169 1248 pagevec_release(&pvec); 1170 1249 } 1171 1250 1172 - static unsigned long shrink_list(enum lru_list l, unsigned long nr_to_scan, 1251 + static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1173 1252 struct zone *zone, struct scan_control *sc, int priority) 1174 1253 { 1175 - if (l == LRU_ACTIVE) { 1176 - shrink_active_list(nr_to_scan, zone, sc, priority); 1254 + int file = is_file_lru(lru); 1255 + 1256 + if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) { 1257 + shrink_active_list(nr_to_scan, zone, sc, priority, file); 1177 1258 return 0; 1178 1259 } 1179 - return shrink_inactive_list(nr_to_scan, zone, sc); 1260 + return shrink_inactive_list(nr_to_scan, zone, sc, file); 1180 1261 } 1262 + 1263 + /* 1264 + * Determine how aggressively the anon and file LRU lists should be 1265 + * scanned. The relative value of each set of LRU lists is determined 1266 + * by looking at the fraction of the pages scanned we did rotate back 1267 + * onto the active list instead of evict. 1268 + * 1269 + * percent[0] specifies how much pressure to put on ram/swap backed 1270 + * memory, while percent[1] determines pressure on the file LRUs. 1271 + */ 1272 + static void get_scan_ratio(struct zone *zone, struct scan_control *sc, 1273 + unsigned long *percent) 1274 + { 1275 + unsigned long anon, file, free; 1276 + unsigned long anon_prio, file_prio; 1277 + unsigned long ap, fp; 1278 + 1279 + anon = zone_page_state(zone, NR_ACTIVE_ANON) + 1280 + zone_page_state(zone, NR_INACTIVE_ANON); 1281 + file = zone_page_state(zone, NR_ACTIVE_FILE) + 1282 + zone_page_state(zone, NR_INACTIVE_FILE); 1283 + free = zone_page_state(zone, NR_FREE_PAGES); 1284 + 1285 + /* If we have no swap space, do not bother scanning anon pages. */ 1286 + if (nr_swap_pages <= 0) { 1287 + percent[0] = 0; 1288 + percent[1] = 100; 1289 + return; 1290 + } 1291 + 1292 + /* If we have very few page cache pages, force-scan anon pages. */ 1293 + if (unlikely(file + free <= zone->pages_high)) { 1294 + percent[0] = 100; 1295 + percent[1] = 0; 1296 + return; 1297 + } 1298 + 1299 + /* 1300 + * OK, so we have swap space and a fair amount of page cache 1301 + * pages. We use the recently rotated / recently scanned 1302 + * ratios to determine how valuable each cache is. 1303 + * 1304 + * Because workloads change over time (and to avoid overflow) 1305 + * we keep these statistics as a floating average, which ends 1306 + * up weighing recent references more than old ones. 1307 + * 1308 + * anon in [0], file in [1] 1309 + */ 1310 + if (unlikely(zone->recent_scanned[0] > anon / 4)) { 1311 + spin_lock_irq(&zone->lru_lock); 1312 + zone->recent_scanned[0] /= 2; 1313 + zone->recent_rotated[0] /= 2; 1314 + spin_unlock_irq(&zone->lru_lock); 1315 + } 1316 + 1317 + if (unlikely(zone->recent_scanned[1] > file / 4)) { 1318 + spin_lock_irq(&zone->lru_lock); 1319 + zone->recent_scanned[1] /= 2; 1320 + zone->recent_rotated[1] /= 2; 1321 + spin_unlock_irq(&zone->lru_lock); 1322 + } 1323 + 1324 + /* 1325 + * With swappiness at 100, anonymous and file have the same priority. 1326 + * This scanning priority is essentially the inverse of IO cost. 1327 + */ 1328 + anon_prio = sc->swappiness; 1329 + file_prio = 200 - sc->swappiness; 1330 + 1331 + /* 1332 + * anon recent_rotated[0] 1333 + * %anon = 100 * ----------- / ----------------- * IO cost 1334 + * anon + file rotate_sum 1335 + */ 1336 + ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); 1337 + ap /= zone->recent_rotated[0] + 1; 1338 + 1339 + fp = (file_prio + 1) * (zone->recent_scanned[1] + 1); 1340 + fp /= zone->recent_rotated[1] + 1; 1341 + 1342 + /* Normalize to percentages */ 1343 + percent[0] = 100 * ap / (ap + fp + 1); 1344 + percent[1] = 100 - percent[0]; 1345 + } 1346 + 1181 1347 1182 1348 /* 1183 1349 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. ··· 1275 1267 unsigned long nr[NR_LRU_LISTS]; 1276 1268 unsigned long nr_to_scan; 1277 1269 unsigned long nr_reclaimed = 0; 1270 + unsigned long percent[2]; /* anon @ 0; file @ 1 */ 1278 1271 enum lru_list l; 1279 1272 1280 - if (scan_global_lru(sc)) { 1281 - /* 1282 - * Add one to nr_to_scan just to make sure that the kernel 1283 - * will slowly sift through the active list. 1284 - */ 1285 - for_each_lru(l) { 1286 - zone->lru[l].nr_scan += (zone_page_state(zone, 1287 - NR_LRU_BASE + l) >> priority) + 1; 1273 + get_scan_ratio(zone, sc, percent); 1274 + 1275 + for_each_lru(l) { 1276 + if (scan_global_lru(sc)) { 1277 + int file = is_file_lru(l); 1278 + int scan; 1279 + /* 1280 + * Add one to nr_to_scan just to make sure that the 1281 + * kernel will slowly sift through each list. 1282 + */ 1283 + scan = zone_page_state(zone, NR_LRU_BASE + l); 1284 + if (priority) { 1285 + scan >>= priority; 1286 + scan = (scan * percent[file]) / 100; 1287 + } 1288 + zone->lru[l].nr_scan += scan + 1; 1288 1289 nr[l] = zone->lru[l].nr_scan; 1289 1290 if (nr[l] >= sc->swap_cluster_max) 1290 1291 zone->lru[l].nr_scan = 0; 1291 1292 else 1292 1293 nr[l] = 0; 1294 + } else { 1295 + /* 1296 + * This reclaim occurs not because zone memory shortage 1297 + * but because memory controller hits its limit. 1298 + * Don't modify zone reclaim related data. 1299 + */ 1300 + nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone, 1301 + priority, l); 1293 1302 } 1294 - } else { 1295 - /* 1296 - * This reclaim occurs not because zone memory shortage but 1297 - * because memory controller hits its limit. 1298 - * Then, don't modify zone reclaim related data. 1299 - */ 1300 - nr[LRU_ACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup, 1301 - zone, priority, LRU_ACTIVE); 1302 - 1303 - nr[LRU_INACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup, 1304 - zone, priority, LRU_INACTIVE); 1305 1303 } 1306 1304 1307 - while (nr[LRU_ACTIVE] || nr[LRU_INACTIVE]) { 1305 + while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] || 1306 + nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { 1308 1307 for_each_lru(l) { 1309 1308 if (nr[l]) { 1310 1309 nr_to_scan = min(nr[l], ··· 1384 1369 1385 1370 return nr_reclaimed; 1386 1371 } 1387 - 1372 + 1388 1373 /* 1389 1374 * This is the main entry point to direct page reclaim. 1390 1375 * ··· 1427 1412 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1428 1413 continue; 1429 1414 1430 - lru_pages += zone_page_state(zone, NR_ACTIVE) 1431 - + zone_page_state(zone, NR_INACTIVE); 1415 + lru_pages += zone_lru_pages(zone); 1432 1416 } 1433 1417 } 1434 1418 ··· 1629 1615 for (i = 0; i <= end_zone; i++) { 1630 1616 struct zone *zone = pgdat->node_zones + i; 1631 1617 1632 - lru_pages += zone_page_state(zone, NR_ACTIVE) 1633 - + zone_page_state(zone, NR_INACTIVE); 1618 + lru_pages += zone_lru_pages(zone); 1634 1619 } 1635 1620 1636 1621 /* ··· 1673 1660 if (zone_is_all_unreclaimable(zone)) 1674 1661 continue; 1675 1662 if (nr_slab == 0 && zone->pages_scanned >= 1676 - (zone_page_state(zone, NR_ACTIVE) 1677 - + zone_page_state(zone, NR_INACTIVE)) * 6) 1663 + (zone_lru_pages(zone) * 6)) 1678 1664 zone_set_flag(zone, 1679 1665 ZONE_ALL_UNRECLAIMABLE); 1680 1666 /* ··· 1727 1715 1728 1716 /* 1729 1717 * The background pageout daemon, started as a kernel thread 1730 - * from the init process. 1718 + * from the init process. 1731 1719 * 1732 1720 * This basically trickles out pages so that we have _some_ 1733 1721 * free memory available even if there is no other activity ··· 1821 1809 wake_up_interruptible(&pgdat->kswapd_wait); 1822 1810 } 1823 1811 1812 + unsigned long global_lru_pages(void) 1813 + { 1814 + return global_page_state(NR_ACTIVE_ANON) 1815 + + global_page_state(NR_ACTIVE_FILE) 1816 + + global_page_state(NR_INACTIVE_ANON) 1817 + + global_page_state(NR_INACTIVE_FILE); 1818 + } 1819 + 1824 1820 #ifdef CONFIG_PM 1825 1821 /* 1826 1822 * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages ··· 1854 1834 1855 1835 for_each_lru(l) { 1856 1836 /* For pass = 0 we don't shrink the active list */ 1857 - if (pass == 0 && l == LRU_ACTIVE) 1837 + if (pass == 0 && 1838 + (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) 1858 1839 continue; 1859 1840 1860 1841 zone->lru[l].nr_scan += ··· 1875 1854 } 1876 1855 1877 1856 return ret; 1878 - } 1879 - 1880 - static unsigned long count_lru_pages(void) 1881 - { 1882 - return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE); 1883 1857 } 1884 1858 1885 1859 /* ··· 1902 1886 1903 1887 current->reclaim_state = &reclaim_state; 1904 1888 1905 - lru_pages = count_lru_pages(); 1889 + lru_pages = global_lru_pages(); 1906 1890 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); 1907 1891 /* If slab caches are huge, it's better to hit them first */ 1908 1892 while (nr_slab >= lru_pages) { ··· 1945 1929 1946 1930 reclaim_state.reclaimed_slab = 0; 1947 1931 shrink_slab(sc.nr_scanned, sc.gfp_mask, 1948 - count_lru_pages()); 1932 + global_lru_pages()); 1949 1933 ret += reclaim_state.reclaimed_slab; 1950 1934 if (ret >= nr_pages) 1951 1935 goto out; ··· 1962 1946 if (!ret) { 1963 1947 do { 1964 1948 reclaim_state.reclaimed_slab = 0; 1965 - shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); 1949 + shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); 1966 1950 ret += reclaim_state.reclaimed_slab; 1967 1951 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); 1968 1952 }
+9 -5
mm/vmstat.c
··· 619 619 static const char * const vmstat_text[] = { 620 620 /* Zoned VM counters */ 621 621 "nr_free_pages", 622 - "nr_inactive", 623 - "nr_active", 622 + "nr_inactive_anon", 623 + "nr_active_anon", 624 + "nr_inactive_file", 625 + "nr_active_file", 624 626 "nr_anon_pages", 625 627 "nr_mapped", 626 628 "nr_file_pages", ··· 690 688 "\n min %lu" 691 689 "\n low %lu" 692 690 "\n high %lu" 693 - "\n scanned %lu (a: %lu i: %lu)" 691 + "\n scanned %lu (aa: %lu ia: %lu af: %lu if: %lu)" 694 692 "\n spanned %lu" 695 693 "\n present %lu", 696 694 zone_page_state(zone, NR_FREE_PAGES), ··· 698 696 zone->pages_low, 699 697 zone->pages_high, 700 698 zone->pages_scanned, 701 - zone->lru[LRU_ACTIVE].nr_scan, 702 - zone->lru[LRU_INACTIVE].nr_scan, 699 + zone->lru[LRU_ACTIVE_ANON].nr_scan, 700 + zone->lru[LRU_INACTIVE_ANON].nr_scan, 701 + zone->lru[LRU_ACTIVE_FILE].nr_scan, 702 + zone->lru[LRU_INACTIVE_FILE].nr_scan, 703 703 zone->spanned_pages, 704 704 zone->present_pages); 705 705