[PATCH] temporarily disable swap token on memory pressure

Some users (hi Zwane) have seen a problem when running a workload that
eats nearly all of physical memory - th system does an OOM kill, even
when there is still a lot of swap free.

The problem appears to be a very big task that is holding the swap
token, and the VM has a very hard time finding any other page in the
system that is swappable.

Instead of ignoring the swap token when sc->priority reaches 0, we could
simply take the swap token away from the memory hog and make sure we
don't give it back to the memory hog for a few seconds.

This patch resolves the problem Zwane ran into.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Rik van Riel and committed by
Linus Torvalds
f7b7fd8f a93a117e

+34 -23
+2 -2
include/linux/rmap.h
··· 89 /* 90 * Called from mm/vmscan.c to handle paging out 91 */ 92 - int page_referenced(struct page *, int is_locked, int ignore_token); 93 int try_to_unmap(struct page *); 94 95 /* ··· 109 #define anon_vma_prepare(vma) (0) 110 #define anon_vma_link(vma) do {} while (0) 111 112 - #define page_referenced(page,l,i) TestClearPageReferenced(page) 113 #define try_to_unmap(page) SWAP_FAIL 114 115 #endif /* CONFIG_MMU */
··· 89 /* 90 * Called from mm/vmscan.c to handle paging out 91 */ 92 + int page_referenced(struct page *, int is_locked); 93 int try_to_unmap(struct page *); 94 95 /* ··· 109 #define anon_vma_prepare(vma) (0) 110 #define anon_vma_link(vma) do {} while (0) 111 112 + #define page_referenced(page,l) TestClearPageReferenced(page) 113 #define try_to_unmap(page) SWAP_FAIL 114 115 #endif /* CONFIG_MMU */
+6
include/linux/swap.h
··· 239 __put_swap_token(mm); 240 } 241 242 #else /* CONFIG_SWAP */ 243 244 #define total_swap_pages 0 ··· 288 #define put_swap_token(x) do { } while(0) 289 #define grab_swap_token() do { } while(0) 290 #define has_swap_token(x) 0 291 292 #endif /* CONFIG_SWAP */ 293 #endif /* __KERNEL__*/
··· 239 __put_swap_token(mm); 240 } 241 242 + static inline void disable_swap_token(void) 243 + { 244 + put_swap_token(swap_token_mm); 245 + } 246 + 247 #else /* CONFIG_SWAP */ 248 249 #define total_swap_pages 0 ··· 283 #define put_swap_token(x) do { } while(0) 284 #define grab_swap_token() do { } while(0) 285 #define has_swap_token(x) 0 286 + #define disable_swap_token() do { } while(0) 287 288 #endif /* CONFIG_SWAP */ 289 #endif /* __KERNEL__*/
+10 -16
mm/rmap.c
··· 290 * repeatedly from either page_referenced_anon or page_referenced_file. 291 */ 292 static int page_referenced_one(struct page *page, 293 - struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token) 294 { 295 struct mm_struct *mm = vma->vm_mm; 296 unsigned long address; ··· 311 312 /* Pretend the page is referenced if the task has the 313 swap token and is in the middle of a page fault. */ 314 - if (mm != current->mm && !ignore_token && has_swap_token(mm) && 315 rwsem_is_locked(&mm->mmap_sem)) 316 referenced++; 317 ··· 321 return referenced; 322 } 323 324 - static int page_referenced_anon(struct page *page, int ignore_token) 325 { 326 unsigned int mapcount; 327 struct anon_vma *anon_vma; ··· 334 335 mapcount = page_mapcount(page); 336 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 337 - referenced += page_referenced_one(page, vma, &mapcount, 338 - ignore_token); 339 if (!mapcount) 340 break; 341 } ··· 353 * 354 * This function is only called from page_referenced for object-based pages. 355 */ 356 - static int page_referenced_file(struct page *page, int ignore_token) 357 { 358 unsigned int mapcount; 359 struct address_space *mapping = page->mapping; ··· 391 referenced++; 392 break; 393 } 394 - referenced += page_referenced_one(page, vma, &mapcount, 395 - ignore_token); 396 if (!mapcount) 397 break; 398 } ··· 408 * Quick test_and_clear_referenced for all mappings to a page, 409 * returns the number of ptes which referenced the page. 410 */ 411 - int page_referenced(struct page *page, int is_locked, int ignore_token) 412 { 413 int referenced = 0; 414 - 415 - if (!swap_token_default_timeout) 416 - ignore_token = 1; 417 418 if (page_test_and_clear_young(page)) 419 referenced++; ··· 420 421 if (page_mapped(page) && page->mapping) { 422 if (PageAnon(page)) 423 - referenced += page_referenced_anon(page, ignore_token); 424 else if (is_locked) 425 - referenced += page_referenced_file(page, ignore_token); 426 else if (TestSetPageLocked(page)) 427 referenced++; 428 else { 429 if (page->mapping) 430 - referenced += page_referenced_file(page, 431 - ignore_token); 432 unlock_page(page); 433 } 434 }
··· 290 * repeatedly from either page_referenced_anon or page_referenced_file. 291 */ 292 static int page_referenced_one(struct page *page, 293 + struct vm_area_struct *vma, unsigned int *mapcount) 294 { 295 struct mm_struct *mm = vma->vm_mm; 296 unsigned long address; ··· 311 312 /* Pretend the page is referenced if the task has the 313 swap token and is in the middle of a page fault. */ 314 + if (mm != current->mm && has_swap_token(mm) && 315 rwsem_is_locked(&mm->mmap_sem)) 316 referenced++; 317 ··· 321 return referenced; 322 } 323 324 + static int page_referenced_anon(struct page *page) 325 { 326 unsigned int mapcount; 327 struct anon_vma *anon_vma; ··· 334 335 mapcount = page_mapcount(page); 336 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 337 + referenced += page_referenced_one(page, vma, &mapcount); 338 if (!mapcount) 339 break; 340 } ··· 354 * 355 * This function is only called from page_referenced for object-based pages. 356 */ 357 + static int page_referenced_file(struct page *page) 358 { 359 unsigned int mapcount; 360 struct address_space *mapping = page->mapping; ··· 392 referenced++; 393 break; 394 } 395 + referenced += page_referenced_one(page, vma, &mapcount); 396 if (!mapcount) 397 break; 398 } ··· 410 * Quick test_and_clear_referenced for all mappings to a page, 411 * returns the number of ptes which referenced the page. 412 */ 413 + int page_referenced(struct page *page, int is_locked) 414 { 415 int referenced = 0; 416 417 if (page_test_and_clear_young(page)) 418 referenced++; ··· 425 426 if (page_mapped(page) && page->mapping) { 427 if (PageAnon(page)) 428 + referenced += page_referenced_anon(page); 429 else if (is_locked) 430 + referenced += page_referenced_file(page); 431 else if (TestSetPageLocked(page)) 432 referenced++; 433 else { 434 if (page->mapping) 435 + referenced += page_referenced_file(page); 436 unlock_page(page); 437 } 438 }
+7 -3
mm/thrash.c
··· 57 /* We have the token. Let others know we still need it. */ 58 if (has_swap_token(current->mm)) { 59 current->mm->recent_pagein = 1; 60 return; 61 } 62 63 if (time_after(jiffies, swap_token_check)) { 64 65 - /* Can't get swapout protection if we exceed our RSS limit. */ 66 - // if (current->mm->rss > current->mm->rlimit_rss) 67 - // return; 68 69 /* ... or if we recently held the token. */ 70 if (time_before(jiffies, current->mm->swap_token_time)) ··· 98 { 99 spin_lock(&swap_token_lock); 100 if (likely(mm == swap_token_mm)) { 101 swap_token_mm = &init_mm; 102 swap_token_check = jiffies; 103 }
··· 57 /* We have the token. Let others know we still need it. */ 58 if (has_swap_token(current->mm)) { 59 current->mm->recent_pagein = 1; 60 + if (unlikely(!swap_token_default_timeout)) 61 + disable_swap_token(); 62 return; 63 } 64 65 if (time_after(jiffies, swap_token_check)) { 66 67 + if (!swap_token_default_timeout) { 68 + swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; 69 + return; 70 + } 71 72 /* ... or if we recently held the token. */ 73 if (time_before(jiffies, current->mm->swap_token_time)) ··· 95 { 96 spin_lock(&swap_token_lock); 97 if (likely(mm == swap_token_mm)) { 98 + mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; 99 swap_token_mm = &init_mm; 100 swap_token_check = jiffies; 101 }
+9 -2
mm/vmscan.c
··· 407 if (PageWriteback(page)) 408 goto keep_locked; 409 410 - referenced = page_referenced(page, 1, sc->priority <= 0); 411 /* In active use or really unfreeable? Activate it. */ 412 if (referenced && page_mapping_inuse(page)) 413 goto activate_locked; ··· 756 if (page_mapped(page)) { 757 if (!reclaim_mapped || 758 (total_swap_pages == 0 && PageAnon(page)) || 759 - page_referenced(page, 0, sc->priority <= 0)) { 760 list_add(&page->lru, &l_active); 761 continue; 762 } ··· 960 sc.nr_reclaimed = 0; 961 sc.priority = priority; 962 sc.swap_cluster_max = SWAP_CLUSTER_MAX; 963 shrink_caches(zones, &sc); 964 shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); 965 if (reclaim_state) { ··· 1057 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1058 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 1059 unsigned long lru_pages = 0; 1060 1061 all_zones_ok = 1; 1062 ··· 1366 sc.nr_reclaimed = 0; 1367 /* scan at the highest priority */ 1368 sc.priority = 0; 1369 1370 if (nr_pages > SWAP_CLUSTER_MAX) 1371 sc.swap_cluster_max = nr_pages;
··· 407 if (PageWriteback(page)) 408 goto keep_locked; 409 410 + referenced = page_referenced(page, 1); 411 /* In active use or really unfreeable? Activate it. */ 412 if (referenced && page_mapping_inuse(page)) 413 goto activate_locked; ··· 756 if (page_mapped(page)) { 757 if (!reclaim_mapped || 758 (total_swap_pages == 0 && PageAnon(page)) || 759 + page_referenced(page, 0)) { 760 list_add(&page->lru, &l_active); 761 continue; 762 } ··· 960 sc.nr_reclaimed = 0; 961 sc.priority = priority; 962 sc.swap_cluster_max = SWAP_CLUSTER_MAX; 963 + if (!priority) 964 + disable_swap_token(); 965 shrink_caches(zones, &sc); 966 shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); 967 if (reclaim_state) { ··· 1055 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1056 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 1057 unsigned long lru_pages = 0; 1058 + 1059 + /* The swap token gets in the way of swapout... */ 1060 + if (!priority) 1061 + disable_swap_token(); 1062 1063 all_zones_ok = 1; 1064 ··· 1360 sc.nr_reclaimed = 0; 1361 /* scan at the highest priority */ 1362 sc.priority = 0; 1363 + disable_swap_token(); 1364 1365 if (nr_pages > SWAP_CLUSTER_MAX) 1366 sc.swap_cluster_max = nr_pages;