Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.16-rc2 317 lines 8.4 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2#define CREATE_TRACE_POINTS 3#include <trace/events/mmap_lock.h> 4 5#include <linux/mm.h> 6#include <linux/cgroup.h> 7#include <linux/memcontrol.h> 8#include <linux/mmap_lock.h> 9#include <linux/mutex.h> 10#include <linux/percpu.h> 11#include <linux/rcupdate.h> 12#include <linux/smp.h> 13#include <linux/trace_events.h> 14#include <linux/local_lock.h> 15 16EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); 17EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); 18EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); 19 20#ifdef CONFIG_TRACING 21/* 22 * Trace calls must be in a separate file, as otherwise there's a circular 23 * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. 24 */ 25 26void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) 27{ 28 trace_mmap_lock_start_locking(mm, write); 29} 30EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); 31 32void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 33 bool success) 34{ 35 trace_mmap_lock_acquire_returned(mm, write, success); 36} 37EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); 38 39void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) 40{ 41 trace_mmap_lock_released(mm, write); 42} 43EXPORT_SYMBOL(__mmap_lock_do_trace_released); 44#endif /* CONFIG_TRACING */ 45 46#ifdef CONFIG_MMU 47#ifdef CONFIG_PER_VMA_LOCK 48static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching) 49{ 50 unsigned int tgt_refcnt = VMA_LOCK_OFFSET; 51 52 /* Additional refcnt if the vma is attached. */ 53 if (!detaching) 54 tgt_refcnt++; 55 56 /* 57 * If vma is detached then only vma_mark_attached() can raise the 58 * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached(). 59 */ 60 if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt)) 61 return false; 62 63 rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_); 64 rcuwait_wait_event(&vma->vm_mm->vma_writer_wait, 65 refcount_read(&vma->vm_refcnt) == tgt_refcnt, 66 TASK_UNINTERRUPTIBLE); 67 lock_acquired(&vma->vmlock_dep_map, _RET_IP_); 68 69 return true; 70} 71 72static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached) 73{ 74 *detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt); 75 rwsem_release(&vma->vmlock_dep_map, _RET_IP_); 76} 77 78void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) 79{ 80 bool locked; 81 82 /* 83 * __vma_enter_locked() returns false immediately if the vma is not 84 * attached, otherwise it waits until refcnt is indicating that vma 85 * is attached with no readers. 86 */ 87 locked = __vma_enter_locked(vma, false); 88 89 /* 90 * We should use WRITE_ONCE() here because we can have concurrent reads 91 * from the early lockless pessimistic check in vma_start_read(). 92 * We don't really care about the correctness of that early check, but 93 * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. 94 */ 95 WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); 96 97 if (locked) { 98 bool detached; 99 100 __vma_exit_locked(vma, &detached); 101 WARN_ON_ONCE(detached); /* vma should remain attached */ 102 } 103} 104EXPORT_SYMBOL_GPL(__vma_start_write); 105 106void vma_mark_detached(struct vm_area_struct *vma) 107{ 108 vma_assert_write_locked(vma); 109 vma_assert_attached(vma); 110 111 /* 112 * We are the only writer, so no need to use vma_refcount_put(). 113 * The condition below is unlikely because the vma has been already 114 * write-locked and readers can increment vm_refcnt only temporarily 115 * before they check vm_lock_seq, realize the vma is locked and drop 116 * back the vm_refcnt. That is a narrow window for observing a raised 117 * vm_refcnt. 118 */ 119 if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { 120 /* Wait until vma is detached with no readers. */ 121 if (__vma_enter_locked(vma, true)) { 122 bool detached; 123 124 __vma_exit_locked(vma, &detached); 125 WARN_ON_ONCE(!detached); 126 } 127 } 128} 129 130/* 131 * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be 132 * stable and not isolated. If the VMA is not found or is being modified the 133 * function returns NULL. 134 */ 135struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 136 unsigned long address) 137{ 138 MA_STATE(mas, &mm->mm_mt, address, address); 139 struct vm_area_struct *vma; 140 141 rcu_read_lock(); 142retry: 143 vma = mas_walk(&mas); 144 if (!vma) 145 goto inval; 146 147 vma = vma_start_read(mm, vma); 148 if (IS_ERR_OR_NULL(vma)) { 149 /* Check if the VMA got isolated after we found it */ 150 if (PTR_ERR(vma) == -EAGAIN) { 151 count_vm_vma_lock_event(VMA_LOCK_MISS); 152 /* The area was replaced with another one */ 153 goto retry; 154 } 155 156 /* Failed to lock the VMA */ 157 goto inval; 158 } 159 /* 160 * At this point, we have a stable reference to a VMA: The VMA is 161 * locked and we know it hasn't already been isolated. 162 * From here on, we can access the VMA without worrying about which 163 * fields are accessible for RCU readers. 164 */ 165 166 /* Check if the vma we locked is the right one. */ 167 if (unlikely(vma->vm_mm != mm || 168 address < vma->vm_start || address >= vma->vm_end)) 169 goto inval_end_read; 170 171 rcu_read_unlock(); 172 return vma; 173 174inval_end_read: 175 vma_end_read(vma); 176inval: 177 rcu_read_unlock(); 178 count_vm_vma_lock_event(VMA_LOCK_ABORT); 179 return NULL; 180} 181#endif /* CONFIG_PER_VMA_LOCK */ 182 183#ifdef CONFIG_LOCK_MM_AND_FIND_VMA 184#include <linux/extable.h> 185 186static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) 187{ 188 if (likely(mmap_read_trylock(mm))) 189 return true; 190 191 if (regs && !user_mode(regs)) { 192 unsigned long ip = exception_ip(regs); 193 if (!search_exception_tables(ip)) 194 return false; 195 } 196 197 return !mmap_read_lock_killable(mm); 198} 199 200static inline bool mmap_upgrade_trylock(struct mm_struct *mm) 201{ 202 /* 203 * We don't have this operation yet. 204 * 205 * It should be easy enough to do: it's basically a 206 * atomic_long_try_cmpxchg_acquire() 207 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but 208 * it also needs the proper lockdep magic etc. 209 */ 210 return false; 211} 212 213static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) 214{ 215 mmap_read_unlock(mm); 216 if (regs && !user_mode(regs)) { 217 unsigned long ip = exception_ip(regs); 218 if (!search_exception_tables(ip)) 219 return false; 220 } 221 return !mmap_write_lock_killable(mm); 222} 223 224/* 225 * Helper for page fault handling. 226 * 227 * This is kind of equivalent to "mmap_read_lock()" followed 228 * by "find_extend_vma()", except it's a lot more careful about 229 * the locking (and will drop the lock on failure). 230 * 231 * For example, if we have a kernel bug that causes a page 232 * fault, we don't want to just use mmap_read_lock() to get 233 * the mm lock, because that would deadlock if the bug were 234 * to happen while we're holding the mm lock for writing. 235 * 236 * So this checks the exception tables on kernel faults in 237 * order to only do this all for instructions that are actually 238 * expected to fault. 239 * 240 * We can also actually take the mm lock for writing if we 241 * need to extend the vma, which helps the VM layer a lot. 242 */ 243struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, 244 unsigned long addr, struct pt_regs *regs) 245{ 246 struct vm_area_struct *vma; 247 248 if (!get_mmap_lock_carefully(mm, regs)) 249 return NULL; 250 251 vma = find_vma(mm, addr); 252 if (likely(vma && (vma->vm_start <= addr))) 253 return vma; 254 255 /* 256 * Well, dang. We might still be successful, but only 257 * if we can extend a vma to do so. 258 */ 259 if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { 260 mmap_read_unlock(mm); 261 return NULL; 262 } 263 264 /* 265 * We can try to upgrade the mmap lock atomically, 266 * in which case we can continue to use the vma 267 * we already looked up. 268 * 269 * Otherwise we'll have to drop the mmap lock and 270 * re-take it, and also look up the vma again, 271 * re-checking it. 272 */ 273 if (!mmap_upgrade_trylock(mm)) { 274 if (!upgrade_mmap_lock_carefully(mm, regs)) 275 return NULL; 276 277 vma = find_vma(mm, addr); 278 if (!vma) 279 goto fail; 280 if (vma->vm_start <= addr) 281 goto success; 282 if (!(vma->vm_flags & VM_GROWSDOWN)) 283 goto fail; 284 } 285 286 if (expand_stack_locked(vma, addr)) 287 goto fail; 288 289success: 290 mmap_write_downgrade(mm); 291 return vma; 292 293fail: 294 mmap_write_unlock(mm); 295 return NULL; 296} 297#endif /* CONFIG_LOCK_MM_AND_FIND_VMA */ 298 299#else /* CONFIG_MMU */ 300 301/* 302 * At least xtensa ends up having protection faults even with no 303 * MMU.. No stack expansion, at least. 304 */ 305struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, 306 unsigned long addr, struct pt_regs *regs) 307{ 308 struct vm_area_struct *vma; 309 310 mmap_read_lock(mm); 311 vma = vma_lookup(mm, addr); 312 if (!vma) 313 mmap_read_unlock(mm); 314 return vma; 315} 316 317#endif /* CONFIG_MMU */