Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: remove vmacache

By using the maple tree and the maple tree state, the vmacache is no
longer beneficial and is complicating the VMA code. Remove the vmacache
to reduce the work in keeping it up to date and code complexity.

Link: https://lkml.kernel.org/r/20220906194824.2110408-26-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Yu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Liam R. Howlett and committed by
Andrew Morton
7964cf8c 4dd1b841

+9 -267
-3
fs/exec.c
··· 28 28 #include <linux/file.h> 29 29 #include <linux/fdtable.h> 30 30 #include <linux/mm.h> 31 - #include <linux/vmacache.h> 32 31 #include <linux/stat.h> 33 32 #include <linux/fcntl.h> 34 33 #include <linux/swap.h> ··· 1026 1027 activate_mm(active_mm, mm); 1027 1028 if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) 1028 1029 local_irq_enable(); 1029 - tsk->mm->vmacache_seqnum = 0; 1030 - vmacache_flush(tsk); 1031 1030 task_unlock(tsk); 1032 1031 lru_gen_use_mm(mm); 1033 1032
-1
fs/proc/task_mmu.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/pagewalk.h> 3 - #include <linux/vmacache.h> 4 3 #include <linux/mm_inline.h> 5 4 #include <linux/hugetlb.h> 6 5 #include <linux/huge_mm.h>
-1
include/linux/mm_types.h
··· 475 475 struct { 476 476 struct vm_area_struct *mmap; /* list of VMAs */ 477 477 struct maple_tree mm_mt; 478 - u64 vmacache_seqnum; /* per-thread vmacache */ 479 478 #ifdef CONFIG_MMU 480 479 unsigned long (*get_unmapped_area) (struct file *filp, 481 480 unsigned long addr, unsigned long len,
-12
include/linux/mm_types_task.h
··· 25 25 #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) 26 26 27 27 /* 28 - * The per task VMA cache array: 29 - */ 30 - #define VMACACHE_BITS 2 31 - #define VMACACHE_SIZE (1U << VMACACHE_BITS) 32 - #define VMACACHE_MASK (VMACACHE_SIZE - 1) 33 - 34 - struct vmacache { 35 - u64 seqnum; 36 - struct vm_area_struct *vmas[VMACACHE_SIZE]; 37 - }; 38 - 39 - /* 40 28 * When updating this, please also update struct resident_page_types[] in 41 29 * kernel/fork.c 42 30 */
-1
include/linux/sched.h
··· 861 861 struct mm_struct *active_mm; 862 862 863 863 /* Per-thread vma caching: */ 864 - struct vmacache vmacache; 865 864 866 865 #ifdef SPLIT_RSS_COUNTING 867 866 struct task_rss_stat rss_stat;
-4
include/linux/vm_event_item.h
··· 129 129 NR_TLB_LOCAL_FLUSH_ALL, 130 130 NR_TLB_LOCAL_FLUSH_ONE, 131 131 #endif /* CONFIG_DEBUG_TLBFLUSH */ 132 - #ifdef CONFIG_DEBUG_VM_VMACACHE 133 - VMACACHE_FIND_CALLS, 134 - VMACACHE_FIND_HITS, 135 - #endif 136 132 #ifdef CONFIG_SWAP 137 133 SWAP_RA, 138 134 SWAP_RA_HIT,
-28
include/linux/vmacache.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - #ifndef __LINUX_VMACACHE_H 3 - #define __LINUX_VMACACHE_H 4 - 5 - #include <linux/sched.h> 6 - #include <linux/mm.h> 7 - 8 - static inline void vmacache_flush(struct task_struct *tsk) 9 - { 10 - memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); 11 - } 12 - 13 - extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); 14 - extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, 15 - unsigned long addr); 16 - 17 - #ifndef CONFIG_MMU 18 - extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, 19 - unsigned long start, 20 - unsigned long end); 21 - #endif 22 - 23 - static inline void vmacache_invalidate(struct mm_struct *mm) 24 - { 25 - mm->vmacache_seqnum++; 26 - } 27 - 28 - #endif /* __LINUX_VMACACHE_H */
-6
include/linux/vmstat.h
··· 125 125 #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) 126 126 #endif 127 127 128 - #ifdef CONFIG_DEBUG_VM_VMACACHE 129 - #define count_vm_vmacache_event(x) count_vm_event(x) 130 - #else 131 - #define count_vm_vmacache_event(x) do {} while (0) 132 - #endif 133 - 134 128 #define __count_zid_vm_events(item, zid, delta) \ 135 129 __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) 136 130
-12
kernel/debug/debug_core.c
··· 50 50 #include <linux/pid.h> 51 51 #include <linux/smp.h> 52 52 #include <linux/mm.h> 53 - #include <linux/vmacache.h> 54 53 #include <linux/rcupdate.h> 55 54 #include <linux/irq.h> 56 55 #include <linux/security.h> ··· 281 282 { 282 283 if (!CACHE_FLUSH_IS_SAFE) 283 284 return; 284 - 285 - if (current->mm) { 286 - int i; 287 - 288 - for (i = 0; i < VMACACHE_SIZE; i++) { 289 - if (!current->vmacache.vmas[i]) 290 - continue; 291 - flush_cache_range(current->vmacache.vmas[i], 292 - addr, addr + BREAK_INSTR_SIZE); 293 - } 294 - } 295 285 296 286 /* Force flush instruction cache if it was outside the mm */ 297 287 flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
-5
kernel/fork.c
··· 43 43 #include <linux/fs.h> 44 44 #include <linux/mm.h> 45 45 #include <linux/mm_inline.h> 46 - #include <linux/vmacache.h> 47 46 #include <linux/nsproxy.h> 48 47 #include <linux/capability.h> 49 48 #include <linux/cpu.h> ··· 1127 1128 mm->mmap = NULL; 1128 1129 mt_init_flags(&mm->mm_mt, MM_MT_FLAGS); 1129 1130 mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock); 1130 - mm->vmacache_seqnum = 0; 1131 1131 atomic_set(&mm->mm_users, 1); 1132 1132 atomic_set(&mm->mm_count, 1); 1133 1133 seqcount_init(&mm->write_protect_seq); ··· 1582 1584 oldmm = current->mm; 1583 1585 if (!oldmm) 1584 1586 return 0; 1585 - 1586 - /* initialize the new vmacache entries */ 1587 - vmacache_flush(tsk); 1588 1587 1589 1588 if (clone_flags & CLONE_VM) { 1590 1589 mmget(oldmm);
-8
lib/Kconfig.debug
··· 812 812 813 813 If unsure, say N. 814 814 815 - config DEBUG_VM_VMACACHE 816 - bool "Debug VMA caching" 817 - depends on DEBUG_VM 818 - help 819 - Enable this to turn on VMA caching debug information. Doing so 820 - can cause significant overhead, so only enable it in non-production 821 - environments. 822 - 823 815 config DEBUG_VM_MAPLE_TREE 824 816 bool "Debug VM maple trees" 825 817 depends on DEBUG_VM
+1 -1
mm/Makefile
··· 52 52 readahead.o swap.o truncate.o vmscan.o shmem.o \ 53 53 util.o mmzone.o vmstat.o backing-dev.o \ 54 54 mm_init.o percpu.o slab_common.o \ 55 - compaction.o vmacache.o \ 55 + compaction.o \ 56 56 interval_tree.o list_lru.o workingset.o \ 57 57 debug.o gup.o mmap_lock.o $(mmu-y) 58 58
+2 -2
mm/debug.c
··· 155 155 156 156 void dump_mm(const struct mm_struct *mm) 157 157 { 158 - pr_emerg("mm %px mmap %px seqnum %llu task_size %lu\n" 158 + pr_emerg("mm %px mmap %px task_size %lu\n" 159 159 #ifdef CONFIG_MMU 160 160 "get_unmapped_area %px\n" 161 161 #endif ··· 183 183 "tlb_flush_pending %d\n" 184 184 "def_flags: %#lx(%pGv)\n", 185 185 186 - mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size, 186 + mm, mm->mmap, mm->task_size, 187 187 #ifdef CONFIG_MMU 188 188 mm->get_unmapped_area, 189 189 #endif
+2 -29
mm/mmap.c
··· 14 14 #include <linux/backing-dev.h> 15 15 #include <linux/mm.h> 16 16 #include <linux/mm_inline.h> 17 - #include <linux/vmacache.h> 18 17 #include <linux/shm.h> 19 18 #include <linux/mman.h> 20 19 #include <linux/pagemap.h> ··· 679 680 /* Remove from mm linked list - also updates highest_vm_end */ 680 681 __vma_unlink_list(mm, next); 681 682 682 - /* Kill the cache */ 683 - vmacache_invalidate(mm); 684 - 685 683 if (file) 686 684 __remove_shared_vm_struct(next, file, mapping); 687 685 ··· 919 923 __vma_unlink_list(mm, next); 920 924 if (remove_next == 2) 921 925 __vma_unlink_list(mm, next_next); 922 - /* Kill the cache */ 923 - vmacache_invalidate(mm); 924 926 925 927 if (file) { 926 928 __remove_shared_vm_struct(next, file, mapping); ··· 2227 2233 unsigned long start_addr, 2228 2234 unsigned long end_addr) 2229 2235 { 2230 - struct vm_area_struct *vma; 2231 2236 unsigned long index = start_addr; 2232 2237 2233 2238 mmap_assert_locked(mm); 2234 - /* Check the cache first. */ 2235 - vma = vmacache_find(mm, start_addr); 2236 - if (likely(vma)) 2237 - return vma; 2238 - 2239 - vma = mt_find(&mm->mm_mt, &index, end_addr - 1); 2240 - if (vma) 2241 - vmacache_update(start_addr, vma); 2242 - return vma; 2239 + return mt_find(&mm->mm_mt, &index, end_addr - 1); 2243 2240 } 2244 2241 EXPORT_SYMBOL(find_vma_intersection); 2245 2242 ··· 2244 2259 */ 2245 2260 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 2246 2261 { 2247 - struct vm_area_struct *vma; 2248 2262 unsigned long index = addr; 2249 2263 2250 2264 mmap_assert_locked(mm); 2251 - /* Check the cache first. */ 2252 - vma = vmacache_find(mm, addr); 2253 - if (likely(vma)) 2254 - return vma; 2255 - 2256 - vma = mt_find(&mm->mm_mt, &index, ULONG_MAX); 2257 - if (vma) 2258 - vmacache_update(addr, vma); 2259 - return vma; 2265 + return mt_find(&mm->mm_mt, &index, ULONG_MAX); 2260 2266 } 2261 2267 EXPORT_SYMBOL(find_vma); 2262 2268 ··· 2635 2659 else 2636 2660 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; 2637 2661 tail_vma->vm_next = NULL; 2638 - 2639 - /* Kill the cache */ 2640 - vmacache_invalidate(mm); 2641 2662 2642 2663 /* 2643 2664 * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+4 -33
mm/nommu.c
··· 19 19 #include <linux/export.h> 20 20 #include <linux/mm.h> 21 21 #include <linux/sched/mm.h> 22 - #include <linux/vmacache.h> 23 22 #include <linux/mman.h> 24 23 #include <linux/swap.h> 25 24 #include <linux/file.h> ··· 597 598 */ 598 599 static void delete_vma_from_mm(struct vm_area_struct *vma) 599 600 { 600 - int i; 601 - struct address_space *mapping; 602 - struct mm_struct *mm = vma->vm_mm; 603 - struct task_struct *curr = current; 604 601 MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0); 605 602 606 - mm->map_count--; 607 - for (i = 0; i < VMACACHE_SIZE; i++) { 608 - /* if the vma is cached, invalidate the entire cache */ 609 - if (curr->vmacache.vmas[i] == vma) { 610 - vmacache_invalidate(mm); 611 - break; 612 - } 613 - } 614 - 603 + vma->vm_mm->map_count--; 615 604 /* remove the VMA from the mapping */ 616 605 if (vma->vm_file) { 606 + struct address_space *mapping; 617 607 mapping = vma->vm_file->f_mapping; 618 608 619 609 i_mmap_lock_write(mapping); ··· 614 626 615 627 /* remove from the MM's tree and list */ 616 628 vma_mas_remove(vma, &mas); 617 - __vma_unlink_list(mm, vma); 629 + __vma_unlink_list(vma->vm_mm, vma); 618 630 } 619 631 620 632 /* ··· 647 659 */ 648 660 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 649 661 { 650 - struct vm_area_struct *vma; 651 662 MA_STATE(mas, &mm->mm_mt, addr, addr); 652 663 653 - /* check the cache first */ 654 - vma = vmacache_find(mm, addr); 655 - if (likely(vma)) 656 - return vma; 657 - 658 - vma = mas_walk(&mas); 659 - 660 - if (vma) 661 - vmacache_update(addr, vma); 662 - 663 - return vma; 664 + return mas_walk(&mas); 664 665 } 665 666 EXPORT_SYMBOL(find_vma); 666 667 ··· 683 706 unsigned long end = addr + len; 684 707 MA_STATE(mas, &mm->mm_mt, addr, addr); 685 708 686 - /* check the cache first */ 687 - vma = vmacache_find_exact(mm, addr, end); 688 - if (vma) 689 - return vma; 690 - 691 709 vma = mas_walk(&mas); 692 710 if (!vma) 693 711 return NULL; ··· 691 719 if (vma->vm_end != end) 692 720 return NULL; 693 721 694 - vmacache_update(addr, vma); 695 722 return vma; 696 723 } 697 724
-117
mm/vmacache.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * Copyright (C) 2014 Davidlohr Bueso. 4 - */ 5 - #include <linux/sched/signal.h> 6 - #include <linux/sched/task.h> 7 - #include <linux/mm.h> 8 - #include <linux/vmacache.h> 9 - 10 - /* 11 - * Hash based on the pmd of addr if configured with MMU, which provides a good 12 - * hit rate for workloads with spatial locality. Otherwise, use pages. 13 - */ 14 - #ifdef CONFIG_MMU 15 - #define VMACACHE_SHIFT PMD_SHIFT 16 - #else 17 - #define VMACACHE_SHIFT PAGE_SHIFT 18 - #endif 19 - #define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK) 20 - 21 - /* 22 - * This task may be accessing a foreign mm via (for example) 23 - * get_user_pages()->find_vma(). The vmacache is task-local and this 24 - * task's vmacache pertains to a different mm (ie, its own). There is 25 - * nothing we can do here. 26 - * 27 - * Also handle the case where a kernel thread has adopted this mm via 28 - * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. 29 - */ 30 - static inline bool vmacache_valid_mm(struct mm_struct *mm) 31 - { 32 - return current->mm == mm && !(current->flags & PF_KTHREAD); 33 - } 34 - 35 - void vmacache_update(unsigned long addr, struct vm_area_struct *newvma) 36 - { 37 - if (vmacache_valid_mm(newvma->vm_mm)) 38 - current->vmacache.vmas[VMACACHE_HASH(addr)] = newvma; 39 - } 40 - 41 - static bool vmacache_valid(struct mm_struct *mm) 42 - { 43 - struct task_struct *curr; 44 - 45 - if (!vmacache_valid_mm(mm)) 46 - return false; 47 - 48 - curr = current; 49 - if (mm->vmacache_seqnum != curr->vmacache.seqnum) { 50 - /* 51 - * First attempt will always be invalid, initialize 52 - * the new cache for this task here. 53 - */ 54 - curr->vmacache.seqnum = mm->vmacache_seqnum; 55 - vmacache_flush(curr); 56 - return false; 57 - } 58 - return true; 59 - } 60 - 61 - struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) 62 - { 63 - int idx = VMACACHE_HASH(addr); 64 - int i; 65 - 66 - count_vm_vmacache_event(VMACACHE_FIND_CALLS); 67 - 68 - if (!vmacache_valid(mm)) 69 - return NULL; 70 - 71 - for (i = 0; i < VMACACHE_SIZE; i++) { 72 - struct vm_area_struct *vma = current->vmacache.vmas[idx]; 73 - 74 - if (vma) { 75 - #ifdef CONFIG_DEBUG_VM_VMACACHE 76 - if (WARN_ON_ONCE(vma->vm_mm != mm)) 77 - break; 78 - #endif 79 - if (vma->vm_start <= addr && vma->vm_end > addr) { 80 - count_vm_vmacache_event(VMACACHE_FIND_HITS); 81 - return vma; 82 - } 83 - } 84 - if (++idx == VMACACHE_SIZE) 85 - idx = 0; 86 - } 87 - 88 - return NULL; 89 - } 90 - 91 - #ifndef CONFIG_MMU 92 - struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, 93 - unsigned long start, 94 - unsigned long end) 95 - { 96 - int idx = VMACACHE_HASH(start); 97 - int i; 98 - 99 - count_vm_vmacache_event(VMACACHE_FIND_CALLS); 100 - 101 - if (!vmacache_valid(mm)) 102 - return NULL; 103 - 104 - for (i = 0; i < VMACACHE_SIZE; i++) { 105 - struct vm_area_struct *vma = current->vmacache.vmas[idx]; 106 - 107 - if (vma && vma->vm_start == start && vma->vm_end == end) { 108 - count_vm_vmacache_event(VMACACHE_FIND_HITS); 109 - return vma; 110 - } 111 - if (++idx == VMACACHE_SIZE) 112 - idx = 0; 113 - } 114 - 115 - return NULL; 116 - } 117 - #endif
-4
mm/vmstat.c
··· 1389 1389 "nr_tlb_local_flush_one", 1390 1390 #endif /* CONFIG_DEBUG_TLBFLUSH */ 1391 1391 1392 - #ifdef CONFIG_DEBUG_VM_VMACACHE 1393 - "vmacache_find_calls", 1394 - "vmacache_find_hits", 1395 - #endif 1396 1392 #ifdef CONFIG_SWAP 1397 1393 "swap_ra", 1398 1394 "swap_ra_hit",