[PATCH] temporarily disable swap token on memory pressure

Some users (hi Zwane) have seen a problem when running a workload that
eats nearly all of physical memory - th system does an OOM kill, even
when there is still a lot of swap free.

The problem appears to be a very big task that is holding the swap
token, and the VM has a very hard time finding any other page in the
system that is swappable.

Instead of ignoring the swap token when sc->priority reaches 0, we could
simply take the swap token away from the memory hog and make sure we
don't give it back to the memory hog for a few seconds.

This patch resolves the problem Zwane ran into.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Rik van Riel and committed by
Linus Torvalds
f7b7fd8f a93a117e

+34 -23
+2 -2
include/linux/rmap.h
··· 89 89 /* 90 90 * Called from mm/vmscan.c to handle paging out 91 91 */ 92 - int page_referenced(struct page *, int is_locked, int ignore_token); 92 + int page_referenced(struct page *, int is_locked); 93 93 int try_to_unmap(struct page *); 94 94 95 95 /* ··· 109 109 #define anon_vma_prepare(vma) (0) 110 110 #define anon_vma_link(vma) do {} while (0) 111 111 112 - #define page_referenced(page,l,i) TestClearPageReferenced(page) 112 + #define page_referenced(page,l) TestClearPageReferenced(page) 113 113 #define try_to_unmap(page) SWAP_FAIL 114 114 115 115 #endif /* CONFIG_MMU */
+6
include/linux/swap.h
··· 239 239 __put_swap_token(mm); 240 240 } 241 241 242 + static inline void disable_swap_token(void) 243 + { 244 + put_swap_token(swap_token_mm); 245 + } 246 + 242 247 #else /* CONFIG_SWAP */ 243 248 244 249 #define total_swap_pages 0 ··· 288 283 #define put_swap_token(x) do { } while(0) 289 284 #define grab_swap_token() do { } while(0) 290 285 #define has_swap_token(x) 0 286 + #define disable_swap_token() do { } while(0) 291 287 292 288 #endif /* CONFIG_SWAP */ 293 289 #endif /* __KERNEL__*/
+10 -16
mm/rmap.c
··· 290 290 * repeatedly from either page_referenced_anon or page_referenced_file. 291 291 */ 292 292 static int page_referenced_one(struct page *page, 293 - struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token) 293 + struct vm_area_struct *vma, unsigned int *mapcount) 294 294 { 295 295 struct mm_struct *mm = vma->vm_mm; 296 296 unsigned long address; ··· 311 311 312 312 /* Pretend the page is referenced if the task has the 313 313 swap token and is in the middle of a page fault. */ 314 - if (mm != current->mm && !ignore_token && has_swap_token(mm) && 314 + if (mm != current->mm && has_swap_token(mm) && 315 315 rwsem_is_locked(&mm->mmap_sem)) 316 316 referenced++; 317 317 ··· 321 321 return referenced; 322 322 } 323 323 324 - static int page_referenced_anon(struct page *page, int ignore_token) 324 + static int page_referenced_anon(struct page *page) 325 325 { 326 326 unsigned int mapcount; 327 327 struct anon_vma *anon_vma; ··· 334 334 335 335 mapcount = page_mapcount(page); 336 336 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 337 - referenced += page_referenced_one(page, vma, &mapcount, 338 - ignore_token); 337 + referenced += page_referenced_one(page, vma, &mapcount); 339 338 if (!mapcount) 340 339 break; 341 340 } ··· 353 354 * 354 355 * This function is only called from page_referenced for object-based pages. 355 356 */ 356 - static int page_referenced_file(struct page *page, int ignore_token) 357 + static int page_referenced_file(struct page *page) 357 358 { 358 359 unsigned int mapcount; 359 360 struct address_space *mapping = page->mapping; ··· 391 392 referenced++; 392 393 break; 393 394 } 394 - referenced += page_referenced_one(page, vma, &mapcount, 395 - ignore_token); 395 + referenced += page_referenced_one(page, vma, &mapcount); 396 396 if (!mapcount) 397 397 break; 398 398 } ··· 408 410 * Quick test_and_clear_referenced for all mappings to a page, 409 411 * returns the number of ptes which referenced the page. 410 412 */ 411 - int page_referenced(struct page *page, int is_locked, int ignore_token) 413 + int page_referenced(struct page *page, int is_locked) 412 414 { 413 415 int referenced = 0; 414 - 415 - if (!swap_token_default_timeout) 416 - ignore_token = 1; 417 416 418 417 if (page_test_and_clear_young(page)) 419 418 referenced++; ··· 420 425 421 426 if (page_mapped(page) && page->mapping) { 422 427 if (PageAnon(page)) 423 - referenced += page_referenced_anon(page, ignore_token); 428 + referenced += page_referenced_anon(page); 424 429 else if (is_locked) 425 - referenced += page_referenced_file(page, ignore_token); 430 + referenced += page_referenced_file(page); 426 431 else if (TestSetPageLocked(page)) 427 432 referenced++; 428 433 else { 429 434 if (page->mapping) 430 - referenced += page_referenced_file(page, 431 - ignore_token); 435 + referenced += page_referenced_file(page); 432 436 unlock_page(page); 433 437 } 434 438 }
+7 -3
mm/thrash.c
··· 57 57 /* We have the token. Let others know we still need it. */ 58 58 if (has_swap_token(current->mm)) { 59 59 current->mm->recent_pagein = 1; 60 + if (unlikely(!swap_token_default_timeout)) 61 + disable_swap_token(); 60 62 return; 61 63 } 62 64 63 65 if (time_after(jiffies, swap_token_check)) { 64 66 65 - /* Can't get swapout protection if we exceed our RSS limit. */ 66 - // if (current->mm->rss > current->mm->rlimit_rss) 67 - // return; 67 + if (!swap_token_default_timeout) { 68 + swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; 69 + return; 70 + } 68 71 69 72 /* ... or if we recently held the token. */ 70 73 if (time_before(jiffies, current->mm->swap_token_time)) ··· 98 95 { 99 96 spin_lock(&swap_token_lock); 100 97 if (likely(mm == swap_token_mm)) { 98 + mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; 101 99 swap_token_mm = &init_mm; 102 100 swap_token_check = jiffies; 103 101 }
+9 -2
mm/vmscan.c
··· 407 407 if (PageWriteback(page)) 408 408 goto keep_locked; 409 409 410 - referenced = page_referenced(page, 1, sc->priority <= 0); 410 + referenced = page_referenced(page, 1); 411 411 /* In active use or really unfreeable? Activate it. */ 412 412 if (referenced && page_mapping_inuse(page)) 413 413 goto activate_locked; ··· 756 756 if (page_mapped(page)) { 757 757 if (!reclaim_mapped || 758 758 (total_swap_pages == 0 && PageAnon(page)) || 759 - page_referenced(page, 0, sc->priority <= 0)) { 759 + page_referenced(page, 0)) { 760 760 list_add(&page->lru, &l_active); 761 761 continue; 762 762 } ··· 960 960 sc.nr_reclaimed = 0; 961 961 sc.priority = priority; 962 962 sc.swap_cluster_max = SWAP_CLUSTER_MAX; 963 + if (!priority) 964 + disable_swap_token(); 963 965 shrink_caches(zones, &sc); 964 966 shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); 965 967 if (reclaim_state) { ··· 1057 1055 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1058 1056 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 1059 1057 unsigned long lru_pages = 0; 1058 + 1059 + /* The swap token gets in the way of swapout... */ 1060 + if (!priority) 1061 + disable_swap_token(); 1060 1062 1061 1063 all_zones_ok = 1; 1062 1064 ··· 1366 1360 sc.nr_reclaimed = 0; 1367 1361 /* scan at the highest priority */ 1368 1362 sc.priority = 0; 1363 + disable_swap_token(); 1369 1364 1370 1365 if (nr_pages > SWAP_CLUSTER_MAX) 1371 1366 sc.swap_cluster_max = nr_pages;