Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm: adjust_pte() use pte_offset_map_rw_nolock()

In do_adjust_pte(), we may modify the pte entry. The corresponding pmd
entry may have been modified concurrently. Therefore, in order to ensure
the stability if pmd entry, use pte_offset_map_rw_nolock() to replace
pte_offset_map_nolock(), and do pmd_same() check after holding the PTL.

All callers of update_mmu_cache_range() hold the vmf->ptl, so we can
determined whether split PTE locks is being used by doing the following,
just as we do elsewhere in the kernel.

ptl != vmf->ptl

And then we can delete the do_pte_lock() and do_pte_unlock().

Link: https://lkml.kernel.org/r/0eaf6b69aeb2fe35092a633fed12537efe645303.1727332572.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Qi Zheng and committed by
Andrew Morton
fc9c45b7 c8550785

+22 -31
+22 -31
arch/arm/mm/fault-armv.c
··· 61 61 return ret; 62 62 } 63 63 64 - #if defined(CONFIG_SPLIT_PTE_PTLOCKS) 65 - /* 66 - * If we are using split PTE locks, then we need to take the page 67 - * lock here. Otherwise we are using shared mm->page_table_lock 68 - * which is already locked, thus cannot take it. 69 - */ 70 - static inline void do_pte_lock(spinlock_t *ptl) 71 - { 72 - /* 73 - * Use nested version here to indicate that we are already 74 - * holding one similar spinlock. 75 - */ 76 - spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); 77 - } 78 - 79 - static inline void do_pte_unlock(spinlock_t *ptl) 80 - { 81 - spin_unlock(ptl); 82 - } 83 - #else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */ 84 - static inline void do_pte_lock(spinlock_t *ptl) {} 85 - static inline void do_pte_unlock(spinlock_t *ptl) {} 86 - #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ 87 - 88 64 static int adjust_pte(struct vm_area_struct *vma, unsigned long address, 89 - unsigned long pfn) 65 + unsigned long pfn, struct vm_fault *vmf) 90 66 { 91 67 spinlock_t *ptl; 92 68 pgd_t *pgd; ··· 70 94 pud_t *pud; 71 95 pmd_t *pmd; 72 96 pte_t *pte; 97 + pmd_t pmdval; 73 98 int ret; 74 99 75 100 pgd = pgd_offset(vma->vm_mm, address); ··· 89 112 if (pmd_none_or_clear_bad(pmd)) 90 113 return 0; 91 114 115 + again: 92 116 /* 93 117 * This is called while another page table is mapped, so we 94 118 * must use the nested version. This also means we need to 95 119 * open-code the spin-locking. 96 120 */ 97 - pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl); 121 + pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl); 98 122 if (!pte) 99 123 return 0; 100 124 101 - do_pte_lock(ptl); 125 + /* 126 + * If we are using split PTE locks, then we need to take the page 127 + * lock here. Otherwise we are using shared mm->page_table_lock 128 + * which is already locked, thus cannot take it. 129 + */ 130 + if (ptl != vmf->ptl) { 131 + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); 132 + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { 133 + pte_unmap_unlock(pte, ptl); 134 + goto again; 135 + } 136 + } 102 137 103 138 ret = do_adjust_pte(vma, address, pfn, pte); 104 139 105 - do_pte_unlock(ptl); 140 + if (ptl != vmf->ptl) 141 + spin_unlock(ptl); 106 142 pte_unmap(pte); 107 143 108 144 return ret; ··· 123 133 124 134 static void 125 135 make_coherent(struct address_space *mapping, struct vm_area_struct *vma, 126 - unsigned long addr, pte_t *ptep, unsigned long pfn) 136 + unsigned long addr, pte_t *ptep, unsigned long pfn, 137 + struct vm_fault *vmf) 127 138 { 128 139 struct mm_struct *mm = vma->vm_mm; 129 140 struct vm_area_struct *mpnt; ··· 151 160 if (!(mpnt->vm_flags & VM_MAYSHARE)) 152 161 continue; 153 162 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; 154 - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); 163 + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); 155 164 } 156 165 flush_dcache_mmap_unlock(mapping); 157 166 if (aliases) ··· 194 203 __flush_dcache_folio(mapping, folio); 195 204 if (mapping) { 196 205 if (cache_is_vivt()) 197 - make_coherent(mapping, vma, addr, ptep, pfn); 206 + make_coherent(mapping, vma, addr, ptep, pfn, vmf); 198 207 else if (vma->vm_flags & VM_EXEC) 199 208 __flush_icache_all(); 200 209 }