Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: remove swap token code

The swap token code no longer fits in with the current VM model. It
does not play well with cgroups or the better NUMA placement code in
development, since we have only one swap token globally.

It also has the potential to mess with scalability of the system, by
increasing the number of non-reclaimable pages on the active and
inactive anon LRU lists.

Last but not least, the swap token code has been broken for a year
without complaints, as reported by Konstantin Khlebnikov. This suggests
we no longer have much use for it.

The days of sub-1G memory systems with heavy use of swap are over. If
we ever need thrashing reducing code in the future, we will have to
implement something that does scale.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Bob Picco <bpicco@meloft.net>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Rik van Riel and committed by
Linus Torvalds
e709ffd6 edad9d2c

+2 -307
-11
include/linux/mm_types.h
··· 345 345 /* Architecture-specific MM context */ 346 346 mm_context_t context; 347 347 348 - /* Swap token stuff */ 349 - /* 350 - * Last value of global fault stamp as seen by this process. 351 - * In other words, this value gives an indication of how long 352 - * it has been since this task got the token. 353 - * Look at mm/thrash.c 354 - */ 355 - unsigned int faultstamp; 356 - unsigned int token_priority; 357 - unsigned int last_interval; 358 - 359 348 unsigned long flags; /* Must use atomic bitops to access the bits */ 360 349 361 350 struct core_state *core_state; /* coredumping support */
-35
include/linux/swap.h
··· 355 355 extern int try_to_free_swap(struct page *); 356 356 struct backing_dev_info; 357 357 358 - /* linux/mm/thrash.c */ 359 - extern struct mm_struct *swap_token_mm; 360 - extern void grab_swap_token(struct mm_struct *); 361 - extern void __put_swap_token(struct mm_struct *); 362 - extern void disable_swap_token(struct mem_cgroup *memcg); 363 - 364 - static inline int has_swap_token(struct mm_struct *mm) 365 - { 366 - return (mm == swap_token_mm); 367 - } 368 - 369 - static inline void put_swap_token(struct mm_struct *mm) 370 - { 371 - if (has_swap_token(mm)) 372 - __put_swap_token(mm); 373 - } 374 - 375 358 #ifdef CONFIG_CGROUP_MEM_RES_CTLR 376 359 extern void 377 360 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); ··· 457 474 swp_entry_t entry; 458 475 entry.val = 0; 459 476 return entry; 460 - } 461 - 462 - /* linux/mm/thrash.c */ 463 - static inline void put_swap_token(struct mm_struct *mm) 464 - { 465 - } 466 - 467 - static inline void grab_swap_token(struct mm_struct *mm) 468 - { 469 - } 470 - 471 - static inline int has_swap_token(struct mm_struct *mm) 472 - { 473 - return 0; 474 - } 475 - 476 - static inline void disable_swap_token(struct mem_cgroup *memcg) 477 - { 478 477 } 479 478 480 479 static inline void
-82
include/trace/events/vmscan.h
··· 395 395 show_reclaim_flags(__entry->reclaim_flags)) 396 396 ); 397 397 398 - TRACE_EVENT(replace_swap_token, 399 - TP_PROTO(struct mm_struct *old_mm, 400 - struct mm_struct *new_mm), 401 - 402 - TP_ARGS(old_mm, new_mm), 403 - 404 - TP_STRUCT__entry( 405 - __field(struct mm_struct*, old_mm) 406 - __field(unsigned int, old_prio) 407 - __field(struct mm_struct*, new_mm) 408 - __field(unsigned int, new_prio) 409 - ), 410 - 411 - TP_fast_assign( 412 - __entry->old_mm = old_mm; 413 - __entry->old_prio = old_mm ? old_mm->token_priority : 0; 414 - __entry->new_mm = new_mm; 415 - __entry->new_prio = new_mm->token_priority; 416 - ), 417 - 418 - TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u", 419 - __entry->old_mm, __entry->old_prio, 420 - __entry->new_mm, __entry->new_prio) 421 - ); 422 - 423 - DECLARE_EVENT_CLASS(put_swap_token_template, 424 - TP_PROTO(struct mm_struct *swap_token_mm), 425 - 426 - TP_ARGS(swap_token_mm), 427 - 428 - TP_STRUCT__entry( 429 - __field(struct mm_struct*, swap_token_mm) 430 - ), 431 - 432 - TP_fast_assign( 433 - __entry->swap_token_mm = swap_token_mm; 434 - ), 435 - 436 - TP_printk("token_mm=%p", __entry->swap_token_mm) 437 - ); 438 - 439 - DEFINE_EVENT(put_swap_token_template, put_swap_token, 440 - TP_PROTO(struct mm_struct *swap_token_mm), 441 - TP_ARGS(swap_token_mm) 442 - ); 443 - 444 - DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token, 445 - TP_PROTO(struct mm_struct *swap_token_mm), 446 - TP_ARGS(swap_token_mm), 447 - TP_CONDITION(swap_token_mm != NULL) 448 - ); 449 - 450 - TRACE_EVENT_CONDITION(update_swap_token_priority, 451 - TP_PROTO(struct mm_struct *mm, 452 - unsigned int old_prio, 453 - struct mm_struct *swap_token_mm), 454 - 455 - TP_ARGS(mm, old_prio, swap_token_mm), 456 - 457 - TP_CONDITION(mm->token_priority != old_prio), 458 - 459 - TP_STRUCT__entry( 460 - __field(struct mm_struct*, mm) 461 - __field(unsigned int, old_prio) 462 - __field(unsigned int, new_prio) 463 - __field(struct mm_struct*, swap_token_mm) 464 - __field(unsigned int, swap_token_prio) 465 - ), 466 - 467 - TP_fast_assign( 468 - __entry->mm = mm; 469 - __entry->old_prio = old_prio; 470 - __entry->new_prio = mm->token_priority; 471 - __entry->swap_token_mm = swap_token_mm; 472 - __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0; 473 - ), 474 - 475 - TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u", 476 - __entry->mm, __entry->old_prio, __entry->new_prio, 477 - __entry->swap_token_mm, __entry->swap_token_prio) 478 - ); 479 - 480 398 #endif /* _TRACE_VMSCAN_H */ 481 399 482 400 /* This part must be outside protection */
-9
kernel/fork.c
··· 614 614 list_del(&mm->mmlist); 615 615 spin_unlock(&mmlist_lock); 616 616 } 617 - put_swap_token(mm); 618 617 if (mm->binfmt) 619 618 module_put(mm->binfmt->module); 620 619 mmdrop(mm); ··· 830 831 memcpy(mm, oldmm, sizeof(*mm)); 831 832 mm_init_cpumask(mm); 832 833 833 - /* Initializing for Swap token stuff */ 834 - mm->token_priority = 0; 835 - mm->last_interval = 0; 836 - 837 834 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 838 835 mm->pmd_huge_pte = NULL; 839 836 #endif ··· 908 913 goto fail_nomem; 909 914 910 915 good_mm: 911 - /* Initializing for Swap token stuff */ 912 - mm->token_priority = 0; 913 - mm->last_interval = 0; 914 - 915 916 tsk->mm = mm; 916 917 tsk->active_mm = mm; 917 918 return 0;
+1 -1
mm/Makefile
··· 25 25 obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 26 26 27 27 obj-$(CONFIG_BOUNCE) += bounce.o 28 - obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 28 + obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o 29 29 obj-$(CONFIG_HAS_DMA) += dmapool.o 30 30 obj-$(CONFIG_HUGETLBFS) += hugetlb.o 31 31 obj-$(CONFIG_NUMA) += mempolicy.o
-1
mm/memcontrol.c
··· 5598 5598 if (mm) { 5599 5599 if (mc.to) 5600 5600 mem_cgroup_move_charge(mm); 5601 - put_swap_token(mm); 5602 5601 mmput(mm); 5603 5602 } 5604 5603 if (mc.to)
+1 -1
mm/memory.c
··· 2908 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2909 2909 page = lookup_swap_cache(entry); 2910 2910 if (!page) { 2911 - grab_swap_token(mm); /* Contend for token _before_ read-in */ 2912 2911 page = swapin_readahead(entry, 2913 2912 GFP_HIGHUSER_MOVABLE, vma, address); 2914 2913 if (!page) { ··· 2937 2938 } 2938 2939 2939 2940 locked = lock_page_or_retry(page, mm, flags); 2941 + 2940 2942 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2941 2943 if (!locked) { 2942 2944 ret |= VM_FAULT_RETRY;
-6
mm/rmap.c
··· 755 755 pte_unmap_unlock(pte, ptl); 756 756 } 757 757 758 - /* Pretend the page is referenced if the task has the 759 - swap token and is in the middle of a page fault. */ 760 - if (mm != current->mm && has_swap_token(mm) && 761 - rwsem_is_locked(&mm->mmap_sem)) 762 - referenced++; 763 - 764 758 (*mapcount)--; 765 759 766 760 if (referenced)
-155
mm/thrash.c
··· 1 - /* 2 - * mm/thrash.c 3 - * 4 - * Copyright (C) 2004, Red Hat, Inc. 5 - * Copyright (C) 2004, Rik van Riel <riel@redhat.com> 6 - * Released under the GPL, see the file COPYING for details. 7 - * 8 - * Simple token based thrashing protection, using the algorithm 9 - * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html 10 - * 11 - * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com> 12 - * Improved algorithm to pass token: 13 - * Each task has a priority which is incremented if it contended 14 - * for the token in an interval less than its previous attempt. 15 - * If the token is acquired, that task's priority is boosted to prevent 16 - * the token from bouncing around too often and to let the task make 17 - * some progress in its execution. 18 - */ 19 - 20 - #include <linux/jiffies.h> 21 - #include <linux/mm.h> 22 - #include <linux/sched.h> 23 - #include <linux/swap.h> 24 - #include <linux/memcontrol.h> 25 - 26 - #include <trace/events/vmscan.h> 27 - 28 - #define TOKEN_AGING_INTERVAL (0xFF) 29 - 30 - static DEFINE_SPINLOCK(swap_token_lock); 31 - struct mm_struct *swap_token_mm; 32 - static struct mem_cgroup *swap_token_memcg; 33 - 34 - #ifdef CONFIG_CGROUP_MEM_RES_CTLR 35 - static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) 36 - { 37 - struct mem_cgroup *memcg; 38 - 39 - memcg = try_get_mem_cgroup_from_mm(mm); 40 - if (memcg) 41 - css_put(mem_cgroup_css(memcg)); 42 - 43 - return memcg; 44 - } 45 - #else 46 - static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm) 47 - { 48 - return NULL; 49 - } 50 - #endif 51 - 52 - void grab_swap_token(struct mm_struct *mm) 53 - { 54 - int current_interval; 55 - unsigned int old_prio = mm->token_priority; 56 - static unsigned int global_faults; 57 - static unsigned int last_aging; 58 - 59 - global_faults++; 60 - 61 - current_interval = global_faults - mm->faultstamp; 62 - 63 - if (!spin_trylock(&swap_token_lock)) 64 - return; 65 - 66 - /* First come first served */ 67 - if (!swap_token_mm) 68 - goto replace_token; 69 - 70 - /* 71 - * Usually, we don't need priority aging because long interval faults 72 - * makes priority decrease quickly. But there is one exception. If the 73 - * token owner task is sleeping, it never make long interval faults. 74 - * Thus, we need a priority aging mechanism instead. The requirements 75 - * of priority aging are 76 - * 1) An aging interval is reasonable enough long. Too short aging 77 - * interval makes quick swap token lost and decrease performance. 78 - * 2) The swap token owner task have to get priority aging even if 79 - * it's under sleep. 80 - */ 81 - if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) { 82 - swap_token_mm->token_priority /= 2; 83 - last_aging = global_faults; 84 - } 85 - 86 - if (mm == swap_token_mm) { 87 - mm->token_priority += 2; 88 - goto update_priority; 89 - } 90 - 91 - if (current_interval < mm->last_interval) 92 - mm->token_priority++; 93 - else { 94 - if (likely(mm->token_priority > 0)) 95 - mm->token_priority--; 96 - } 97 - 98 - /* Check if we deserve the token */ 99 - if (mm->token_priority > swap_token_mm->token_priority) 100 - goto replace_token; 101 - 102 - update_priority: 103 - trace_update_swap_token_priority(mm, old_prio, swap_token_mm); 104 - 105 - out: 106 - mm->faultstamp = global_faults; 107 - mm->last_interval = current_interval; 108 - spin_unlock(&swap_token_lock); 109 - return; 110 - 111 - replace_token: 112 - mm->token_priority += 2; 113 - trace_replace_swap_token(swap_token_mm, mm); 114 - swap_token_mm = mm; 115 - swap_token_memcg = swap_token_memcg_from_mm(mm); 116 - last_aging = global_faults; 117 - goto out; 118 - } 119 - 120 - /* Called on process exit. */ 121 - void __put_swap_token(struct mm_struct *mm) 122 - { 123 - spin_lock(&swap_token_lock); 124 - if (likely(mm == swap_token_mm)) { 125 - trace_put_swap_token(swap_token_mm); 126 - swap_token_mm = NULL; 127 - swap_token_memcg = NULL; 128 - } 129 - spin_unlock(&swap_token_lock); 130 - } 131 - 132 - static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b) 133 - { 134 - if (!a) 135 - return true; 136 - if (!b) 137 - return true; 138 - if (a == b) 139 - return true; 140 - return false; 141 - } 142 - 143 - void disable_swap_token(struct mem_cgroup *memcg) 144 - { 145 - /* memcg reclaim don't disable unrelated mm token. */ 146 - if (match_memcg(memcg, swap_token_memcg)) { 147 - spin_lock(&swap_token_lock); 148 - if (match_memcg(memcg, swap_token_memcg)) { 149 - trace_disable_swap_token(swap_token_mm); 150 - swap_token_mm = NULL; 151 - swap_token_memcg = NULL; 152 - } 153 - spin_unlock(&swap_token_lock); 154 - } 155 - }
-6
mm/vmscan.c
··· 2352 2352 2353 2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2354 2354 sc->nr_scanned = 0; 2355 - if (!priority) 2356 - disable_swap_token(sc->target_mem_cgroup); 2357 2355 aborted_reclaim = shrink_zones(priority, zonelist, sc); 2358 2356 2359 2357 /* ··· 2701 2703 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2702 2704 unsigned long lru_pages = 0; 2703 2705 int has_under_min_watermark_zone = 0; 2704 - 2705 - /* The swap token gets in the way of swapout... */ 2706 - if (!priority) 2707 - disable_swap_token(NULL); 2708 2706 2709 2707 all_zones_ok = 1; 2710 2708 balanced = 0;