mm: Convert i_mmap_lock to a mutex · tjh.dev/kernel@3d48ae4

+1 -1

Documentation/lockstat.txt

··· 136 136 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 137 137 &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 138 138 &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 139 - &inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 139 + &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 140 140 &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 141 141 &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 142 142 &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63

+1 -1

Documentation/vm/locking

··· 66 66 expand_stack(), it is hard to come up with a destructive scenario without 67 67 having the vmlist protection in this case. 68 68 69 - The page_table_lock nests with the inode i_mmap_lock and the kmem cache 69 + The page_table_lock nests with the inode i_mmap_mutex and the kmem cache 70 70 c_spinlock spinlocks. This is okay, since the kmem code asks for pages after 71 71 dropping c_spinlock. The page_table_lock also nests with pagecache_lock and 72 72 pagemap_lru_lock spinlocks, and no code asks for memory with these locks

+2 -2

arch/x86/mm/hugetlbpage.c

··· 72 72 if (!vma_shareable(vma, addr)) 73 73 return; 74 74 75 - spin_lock(&mapping->i_mmap_lock); 75 + mutex_lock(&mapping->i_mmap_mutex); 76 76 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { 77 77 if (svma == vma) 78 78 continue; ··· 97 97 put_page(virt_to_page(spte)); 98 98 spin_unlock(&mm->page_table_lock); 99 99 out: 100 - spin_unlock(&mapping->i_mmap_lock); 100 + mutex_unlock(&mapping->i_mmap_mutex); 101 101 } 102 102 103 103 /*

+2 -2

fs/hugetlbfs/inode.c

··· 412 412 pgoff = offset >> PAGE_SHIFT; 413 413 414 414 i_size_write(inode, offset); 415 - spin_lock(&mapping->i_mmap_lock); 415 + mutex_lock(&mapping->i_mmap_mutex); 416 416 if (!prio_tree_empty(&mapping->i_mmap)) 417 417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 418 - spin_unlock(&mapping->i_mmap_lock); 418 + mutex_unlock(&mapping->i_mmap_mutex); 419 419 truncate_hugepages(inode, offset); 420 420 return 0; 421 421 }

+1 -1

fs/inode.c

··· 326 326 memset(mapping, 0, sizeof(*mapping)); 327 327 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); 328 328 spin_lock_init(&mapping->tree_lock); 329 - spin_lock_init(&mapping->i_mmap_lock); 329 + mutex_init(&mapping->i_mmap_mutex); 330 330 INIT_LIST_HEAD(&mapping->private_list); 331 331 spin_lock_init(&mapping->private_lock); 332 332 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);

+1 -1

include/linux/fs.h

··· 634 634 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 635 635 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 636 636 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 637 - spinlock_t i_mmap_lock; /* protect tree, count, list */ 637 + struct mutex i_mmap_mutex; /* protect tree, count, list */ 638 638 unsigned long nrpages; /* number of total pages */ 639 639 pgoff_t writeback_index;/* writeback starts here */ 640 640 const struct address_space_operations *a_ops; /* methods */

+1 -1

include/linux/mmu_notifier.h

··· 150 150 * Therefore notifier chains can only be traversed when either 151 151 * 152 152 * 1. mmap_sem is held. 153 - * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock). 153 + * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->lock). 154 154 * 3. No other concurrent thread can access the list (release) 155 155 */ 156 156 struct mmu_notifier {

+2 -2

kernel/fork.c

··· 383 383 get_file(file); 384 384 if (tmp->vm_flags & VM_DENYWRITE) 385 385 atomic_dec(&inode->i_writecount); 386 - spin_lock(&mapping->i_mmap_lock); 386 + mutex_lock(&mapping->i_mmap_mutex); 387 387 if (tmp->vm_flags & VM_SHARED) 388 388 mapping->i_mmap_writable++; 389 389 flush_dcache_mmap_lock(mapping); 390 390 /* insert tmp into the share list, just after mpnt */ 391 391 vma_prio_tree_add(tmp, mpnt); 392 392 flush_dcache_mmap_unlock(mapping); 393 - spin_unlock(&mapping->i_mmap_lock); 393 + mutex_unlock(&mapping->i_mmap_mutex); 394 394 } 395 395 396 396 /*

+5 -5

mm/filemap.c

··· 58 58 /* 59 59 * Lock ordering: 60 60 * 61 - * ->i_mmap_lock (truncate_pagecache) 61 + * ->i_mmap_mutex (truncate_pagecache) 62 62 * ->private_lock (__free_pte->__set_page_dirty_buffers) 63 63 * ->swap_lock (exclusive_swap_page, others) 64 64 * ->mapping->tree_lock 65 65 * 66 66 * ->i_mutex 67 - * ->i_mmap_lock (truncate->unmap_mapping_range) 67 + * ->i_mmap_mutex (truncate->unmap_mapping_range) 68 68 * 69 69 * ->mmap_sem 70 - * ->i_mmap_lock 70 + * ->i_mmap_mutex 71 71 * ->page_table_lock or pte_lock (various, mainly in memory.c) 72 72 * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) 73 73 * ··· 84 84 * sb_lock (fs/fs-writeback.c) 85 85 * ->mapping->tree_lock (__sync_single_inode) 86 86 * 87 - * ->i_mmap_lock 87 + * ->i_mmap_mutex 88 88 * ->anon_vma.lock (vma_adjust) 89 89 * 90 90 * ->anon_vma.lock ··· 106 106 * 107 107 * (code doesn't rely on that order, so you could switch it around) 108 108 * ->tasklist_lock (memory_failure, collect_procs_ao) 109 - * ->i_mmap_lock 109 + * ->i_mmap_mutex 110 110 */ 111 111 112 112 /*

+2 -2

mm/filemap_xip.c

··· 183 183 return; 184 184 185 185 retry: 186 - spin_lock(&mapping->i_mmap_lock); 186 + mutex_lock(&mapping->i_mmap_mutex); 187 187 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 188 188 mm = vma->vm_mm; 189 189 address = vma->vm_start + ··· 201 201 page_cache_release(page); 202 202 } 203 203 } 204 - spin_unlock(&mapping->i_mmap_lock); 204 + mutex_unlock(&mapping->i_mmap_mutex); 205 205 206 206 if (locked) { 207 207 mutex_unlock(&xip_sparse_mutex);

+2 -2

mm/fremap.c

··· 211 211 } 212 212 goto out; 213 213 } 214 - spin_lock(&mapping->i_mmap_lock); 214 + mutex_lock(&mapping->i_mmap_mutex); 215 215 flush_dcache_mmap_lock(mapping); 216 216 vma->vm_flags |= VM_NONLINEAR; 217 217 vma_prio_tree_remove(vma, &mapping->i_mmap); 218 218 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); 219 219 flush_dcache_mmap_unlock(mapping); 220 - spin_unlock(&mapping->i_mmap_lock); 220 + mutex_unlock(&mapping->i_mmap_mutex); 221 221 } 222 222 223 223 if (vma->vm_flags & VM_LOCKED) {

+7 -7

mm/hugetlb.c

··· 2205 2205 unsigned long sz = huge_page_size(h); 2206 2206 2207 2207 /* 2208 - * A page gathering list, protected by per file i_mmap_lock. The 2208 + * A page gathering list, protected by per file i_mmap_mutex. The 2209 2209 * lock is used to avoid list corruption from multiple unmapping 2210 2210 * of the same page since we are using page->lru. 2211 2211 */ ··· 2274 2274 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2275 2275 unsigned long end, struct page *ref_page) 2276 2276 { 2277 - spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); 2277 + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 2278 2278 __unmap_hugepage_range(vma, start, end, ref_page); 2279 - spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); 2279 + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); 2280 2280 } 2281 2281 2282 2282 /* ··· 2308 2308 * this mapping should be shared between all the VMAs, 2309 2309 * __unmap_hugepage_range() is called as the lock is already held 2310 2310 */ 2311 - spin_lock(&mapping->i_mmap_lock); 2311 + mutex_lock(&mapping->i_mmap_mutex); 2312 2312 vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 2313 2313 /* Do not unmap the current VMA */ 2314 2314 if (iter_vma == vma) ··· 2326 2326 address, address + huge_page_size(h), 2327 2327 page); 2328 2328 } 2329 - spin_unlock(&mapping->i_mmap_lock); 2329 + mutex_unlock(&mapping->i_mmap_mutex); 2330 2330 2331 2331 return 1; 2332 2332 } ··· 2810 2810 BUG_ON(address >= end); 2811 2811 flush_cache_range(vma, address, end); 2812 2812 2813 - spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); 2813 + mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 2814 2814 spin_lock(&mm->page_table_lock); 2815 2815 for (; address < end; address += huge_page_size(h)) { 2816 2816 ptep = huge_pte_offset(mm, address); ··· 2825 2825 } 2826 2826 } 2827 2827 spin_unlock(&mm->page_table_lock); 2828 - spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); 2828 + mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); 2829 2829 2830 2830 flush_tlb_range(vma, start, end); 2831 2831 }

+2 -2

mm/memory-failure.c

··· 429 429 */ 430 430 431 431 read_lock(&tasklist_lock); 432 - spin_lock(&mapping->i_mmap_lock); 432 + mutex_lock(&mapping->i_mmap_mutex); 433 433 for_each_process(tsk) { 434 434 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 435 435 ··· 449 449 add_to_kill(tsk, page, vma, to_kill, tkc); 450 450 } 451 451 } 452 - spin_unlock(&mapping->i_mmap_lock); 452 + mutex_unlock(&mapping->i_mmap_mutex); 453 453 read_unlock(&tasklist_lock); 454 454 } 455 455

+2 -2

mm/memory.c

··· 2667 2667 details.last_index = ULONG_MAX; 2668 2668 2669 2669 2670 - spin_lock(&mapping->i_mmap_lock); 2670 + mutex_lock(&mapping->i_mmap_mutex); 2671 2671 if (unlikely(!prio_tree_empty(&mapping->i_mmap))) 2672 2672 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2673 2673 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2674 2674 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2675 - spin_unlock(&mapping->i_mmap_lock); 2675 + mutex_unlock(&mapping->i_mmap_mutex); 2676 2676 } 2677 2677 EXPORT_SYMBOL(unmap_mapping_range); 2678 2678

+11 -11

mm/mmap.c

··· 194 194 } 195 195 196 196 /* 197 - * Requires inode->i_mapping->i_mmap_lock 197 + * Requires inode->i_mapping->i_mmap_mutex 198 198 */ 199 199 static void __remove_shared_vm_struct(struct vm_area_struct *vma, 200 200 struct file *file, struct address_space *mapping) ··· 222 222 223 223 if (file) { 224 224 struct address_space *mapping = file->f_mapping; 225 - spin_lock(&mapping->i_mmap_lock); 225 + mutex_lock(&mapping->i_mmap_mutex); 226 226 __remove_shared_vm_struct(vma, file, mapping); 227 - spin_unlock(&mapping->i_mmap_lock); 227 + mutex_unlock(&mapping->i_mmap_mutex); 228 228 } 229 229 } 230 230 ··· 446 446 mapping = vma->vm_file->f_mapping; 447 447 448 448 if (mapping) 449 - spin_lock(&mapping->i_mmap_lock); 449 + mutex_lock(&mapping->i_mmap_mutex); 450 450 451 451 __vma_link(mm, vma, prev, rb_link, rb_parent); 452 452 __vma_link_file(vma); 453 453 454 454 if (mapping) 455 - spin_unlock(&mapping->i_mmap_lock); 455 + mutex_unlock(&mapping->i_mmap_mutex); 456 456 457 457 mm->map_count++; 458 458 validate_mm(mm); ··· 555 555 mapping = file->f_mapping; 556 556 if (!(vma->vm_flags & VM_NONLINEAR)) 557 557 root = &mapping->i_mmap; 558 - spin_lock(&mapping->i_mmap_lock); 558 + mutex_lock(&mapping->i_mmap_mutex); 559 559 if (insert) { 560 560 /* 561 561 * Put into prio_tree now, so instantiated pages ··· 622 622 if (anon_vma) 623 623 anon_vma_unlock(anon_vma); 624 624 if (mapping) 625 - spin_unlock(&mapping->i_mmap_lock); 625 + mutex_unlock(&mapping->i_mmap_mutex); 626 626 627 627 if (remove_next) { 628 628 if (file) { ··· 2290 2290 2291 2291 /* Insert vm structure into process list sorted by address 2292 2292 * and into the inode's i_mmap tree. If vm_file is non-NULL 2293 - * then i_mmap_lock is taken here. 2293 + * then i_mmap_mutex is taken here. 2294 2294 */ 2295 2295 int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) 2296 2296 { ··· 2532 2532 */ 2533 2533 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) 2534 2534 BUG(); 2535 - spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); 2535 + mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); 2536 2536 } 2537 2537 } 2538 2538 ··· 2559 2559 * vma in this mm is backed by the same anon_vma or address_space. 2560 2560 * 2561 2561 * We can take all the locks in random order because the VM code 2562 - * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never 2562 + * taking i_mmap_mutex or anon_vma->lock outside the mmap_sem never 2563 2563 * takes more than one of them in a row. Secondly we're protected 2564 2564 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. 2565 2565 * ··· 2631 2631 * AS_MM_ALL_LOCKS can't change to 0 from under us 2632 2632 * because we hold the mm_all_locks_mutex. 2633 2633 */ 2634 - spin_unlock(&mapping->i_mmap_lock); 2634 + mutex_unlock(&mapping->i_mmap_mutex); 2635 2635 if (!test_and_clear_bit(AS_MM_ALL_LOCKS, 2636 2636 &mapping->flags)) 2637 2637 BUG();

+2 -2

mm/mremap.c

··· 93 93 * and we propagate stale pages into the dst afterward. 94 94 */ 95 95 mapping = vma->vm_file->f_mapping; 96 - spin_lock(&mapping->i_mmap_lock); 96 + mutex_lock(&mapping->i_mmap_mutex); 97 97 } 98 98 99 99 /* ··· 122 122 pte_unmap(new_pte - 1); 123 123 pte_unmap_unlock(old_pte - 1, old_ptl); 124 124 if (mapping) 125 - spin_unlock(&mapping->i_mmap_lock); 125 + mutex_unlock(&mapping->i_mmap_mutex); 126 126 mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end); 127 127 } 128 128

+14 -14

mm/rmap.c

··· 24 24 * inode->i_alloc_sem (vmtruncate_range) 25 25 * mm->mmap_sem 26 26 * page->flags PG_locked (lock_page) 27 - * mapping->i_mmap_lock 27 + * mapping->i_mmap_mutex 28 28 * anon_vma->lock 29 29 * mm->page_table_lock or pte_lock 30 30 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) ··· 646 646 * The page lock not only makes sure that page->mapping cannot 647 647 * suddenly be NULLified by truncation, it makes sure that the 648 648 * structure at mapping cannot be freed and reused yet, 649 - * so we can safely take mapping->i_mmap_lock. 649 + * so we can safely take mapping->i_mmap_mutex. 650 650 */ 651 651 BUG_ON(!PageLocked(page)); 652 652 653 - spin_lock(&mapping->i_mmap_lock); 653 + mutex_lock(&mapping->i_mmap_mutex); 654 654 655 655 /* 656 - * i_mmap_lock does not stabilize mapcount at all, but mapcount 656 + * i_mmap_mutex does not stabilize mapcount at all, but mapcount 657 657 * is more likely to be accurate if we note it after spinning. 658 658 */ 659 659 mapcount = page_mapcount(page); ··· 675 675 break; 676 676 } 677 677 678 - spin_unlock(&mapping->i_mmap_lock); 678 + mutex_unlock(&mapping->i_mmap_mutex); 679 679 return referenced; 680 680 } 681 681 ··· 762 762 763 763 BUG_ON(PageAnon(page)); 764 764 765 - spin_lock(&mapping->i_mmap_lock); 765 + mutex_lock(&mapping->i_mmap_mutex); 766 766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 767 767 if (vma->vm_flags & VM_SHARED) { 768 768 unsigned long address = vma_address(page, vma); ··· 771 771 ret += page_mkclean_one(page, vma, address); 772 772 } 773 773 } 774 - spin_unlock(&mapping->i_mmap_lock); 774 + mutex_unlock(&mapping->i_mmap_mutex); 775 775 return ret; 776 776 } 777 777 ··· 1119 1119 /* 1120 1120 * We need mmap_sem locking, Otherwise VM_LOCKED check makes 1121 1121 * unstable result and race. Plus, We can't wait here because 1122 - * we now hold anon_vma->lock or mapping->i_mmap_lock. 1122 + * we now hold anon_vma->lock or mapping->i_mmap_mutex. 1123 1123 * if trylock failed, the page remain in evictable lru and later 1124 1124 * vmscan could retry to move the page to unevictable lru if the 1125 1125 * page is actually mlocked. ··· 1345 1345 unsigned long max_nl_size = 0; 1346 1346 unsigned int mapcount; 1347 1347 1348 - spin_lock(&mapping->i_mmap_lock); 1348 + mutex_lock(&mapping->i_mmap_mutex); 1349 1349 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1350 1350 unsigned long address = vma_address(page, vma); 1351 1351 if (address == -EFAULT) ··· 1391 1391 mapcount = page_mapcount(page); 1392 1392 if (!mapcount) 1393 1393 goto out; 1394 - cond_resched_lock(&mapping->i_mmap_lock); 1394 + cond_resched(); 1395 1395 1396 1396 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; 1397 1397 if (max_nl_cursor == 0) ··· 1413 1413 } 1414 1414 vma->vm_private_data = (void *) max_nl_cursor; 1415 1415 } 1416 - cond_resched_lock(&mapping->i_mmap_lock); 1416 + cond_resched(); 1417 1417 max_nl_cursor += CLUSTER_SIZE; 1418 1418 } while (max_nl_cursor <= max_nl_size); 1419 1419 ··· 1425 1425 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) 1426 1426 vma->vm_private_data = NULL; 1427 1427 out: 1428 - spin_unlock(&mapping->i_mmap_lock); 1428 + mutex_unlock(&mapping->i_mmap_mutex); 1429 1429 return ret; 1430 1430 } 1431 1431 ··· 1544 1544 1545 1545 if (!mapping) 1546 1546 return ret; 1547 - spin_lock(&mapping->i_mmap_lock); 1547 + mutex_lock(&mapping->i_mmap_mutex); 1548 1548 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1549 1549 unsigned long address = vma_address(page, vma); 1550 1550 if (address == -EFAULT) ··· 1558 1558 * never contain migration ptes. Decide what to do about this 1559 1559 * limitation to linear when we need rmap_walk() on nonlinear. 1560 1560 */ 1561 - spin_unlock(&mapping->i_mmap_lock); 1561 + mutex_unlock(&mapping->i_mmap_mutex); 1562 1562 return ret; 1563 1563 } 1564 1564