at v6.16 456 lines 14 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_MMAP_LOCK_H 3#define _LINUX_MMAP_LOCK_H 4 5/* Avoid a dependency loop by declaring here. */ 6extern int rcuwait_wake_up(struct rcuwait *w); 7 8#include <linux/lockdep.h> 9#include <linux/mm_types.h> 10#include <linux/mmdebug.h> 11#include <linux/rwsem.h> 12#include <linux/tracepoint-defs.h> 13#include <linux/types.h> 14#include <linux/cleanup.h> 15 16#define MMAP_LOCK_INITIALIZER(name) \ 17 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), 18 19DECLARE_TRACEPOINT(mmap_lock_start_locking); 20DECLARE_TRACEPOINT(mmap_lock_acquire_returned); 21DECLARE_TRACEPOINT(mmap_lock_released); 22 23#ifdef CONFIG_TRACING 24 25void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); 26void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 27 bool success); 28void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); 29 30static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 31 bool write) 32{ 33 if (tracepoint_enabled(mmap_lock_start_locking)) 34 __mmap_lock_do_trace_start_locking(mm, write); 35} 36 37static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 38 bool write, bool success) 39{ 40 if (tracepoint_enabled(mmap_lock_acquire_returned)) 41 __mmap_lock_do_trace_acquire_returned(mm, write, success); 42} 43 44static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 45{ 46 if (tracepoint_enabled(mmap_lock_released)) 47 __mmap_lock_do_trace_released(mm, write); 48} 49 50#else /* !CONFIG_TRACING */ 51 52static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 53 bool write) 54{ 55} 56 57static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 58 bool write, bool success) 59{ 60} 61 62static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 63{ 64} 65 66#endif /* CONFIG_TRACING */ 67 68static inline void mmap_assert_locked(const struct mm_struct *mm) 69{ 70 rwsem_assert_held(&mm->mmap_lock); 71} 72 73static inline void mmap_assert_write_locked(const struct mm_struct *mm) 74{ 75 rwsem_assert_held_write(&mm->mmap_lock); 76} 77 78#ifdef CONFIG_PER_VMA_LOCK 79 80static inline void mm_lock_seqcount_init(struct mm_struct *mm) 81{ 82 seqcount_init(&mm->mm_lock_seq); 83} 84 85static inline void mm_lock_seqcount_begin(struct mm_struct *mm) 86{ 87 do_raw_write_seqcount_begin(&mm->mm_lock_seq); 88} 89 90static inline void mm_lock_seqcount_end(struct mm_struct *mm) 91{ 92 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); 93 do_raw_write_seqcount_end(&mm->mm_lock_seq); 94} 95 96static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 97{ 98 /* 99 * Since mmap_lock is a sleeping lock, and waiting for it to become 100 * unlocked is more or less equivalent with taking it ourselves, don't 101 * bother with the speculative path if mmap_lock is already write-locked 102 * and take the slow path, which takes the lock. 103 */ 104 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); 105} 106 107static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 108{ 109 return read_seqcount_retry(&mm->mm_lock_seq, seq); 110} 111 112static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) 113{ 114#ifdef CONFIG_DEBUG_LOCK_ALLOC 115 static struct lock_class_key lockdep_key; 116 117 lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); 118#endif 119 if (reset_refcnt) 120 refcount_set(&vma->vm_refcnt, 0); 121 vma->vm_lock_seq = UINT_MAX; 122} 123 124static inline bool is_vma_writer_only(int refcnt) 125{ 126 /* 127 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma 128 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on 129 * a detached vma happens only in vma_mark_detached() and is a rare 130 * case, therefore most of the time there will be no unnecessary wakeup. 131 */ 132 return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; 133} 134 135static inline void vma_refcount_put(struct vm_area_struct *vma) 136{ 137 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ 138 struct mm_struct *mm = vma->vm_mm; 139 int oldcnt; 140 141 rwsem_release(&vma->vmlock_dep_map, _RET_IP_); 142 if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { 143 144 if (is_vma_writer_only(oldcnt - 1)) 145 rcuwait_wake_up(&mm->vma_writer_wait); 146 } 147} 148 149/* 150 * Try to read-lock a vma. The function is allowed to occasionally yield false 151 * locked result to avoid performance overhead, in which case we fall back to 152 * using mmap_lock. The function should never yield false unlocked result. 153 * False locked result is possible if mm_lock_seq overflows or if vma gets 154 * reused and attached to a different mm before we lock it. 155 * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got 156 * detached. 157 */ 158static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, 159 struct vm_area_struct *vma) 160{ 161 int oldcnt; 162 163 /* 164 * Check before locking. A race might cause false locked result. 165 * We can use READ_ONCE() for the mm_lock_seq here, and don't need 166 * ACQUIRE semantics, because this is just a lockless check whose result 167 * we don't rely on for anything - the mm_lock_seq read against which we 168 * need ordering is below. 169 */ 170 if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) 171 return NULL; 172 173 /* 174 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() 175 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. 176 * Acquire fence is required here to avoid reordering against later 177 * vm_lock_seq check and checks inside lock_vma_under_rcu(). 178 */ 179 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, 180 VMA_REF_LIMIT))) { 181 /* return EAGAIN if vma got detached from under us */ 182 return oldcnt ? NULL : ERR_PTR(-EAGAIN); 183 } 184 185 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); 186 /* 187 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. 188 * False unlocked result is impossible because we modify and check 189 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq 190 * modification invalidates all existing locks. 191 * 192 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are 193 * racing with vma_end_write_all(), we only start reading from the VMA 194 * after it has been unlocked. 195 * This pairs with RELEASE semantics in vma_end_write_all(). 196 */ 197 if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { 198 vma_refcount_put(vma); 199 return NULL; 200 } 201 202 return vma; 203} 204 205/* 206 * Use only while holding mmap read lock which guarantees that locking will not 207 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 208 * not be used in such cases because it might fail due to mm_lock_seq overflow. 209 * This functionality is used to obtain vma read lock and drop the mmap read lock. 210 */ 211static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) 212{ 213 int oldcnt; 214 215 mmap_assert_locked(vma->vm_mm); 216 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, 217 VMA_REF_LIMIT))) 218 return false; 219 220 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); 221 return true; 222} 223 224/* 225 * Use only while holding mmap read lock which guarantees that locking will not 226 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 227 * not be used in such cases because it might fail due to mm_lock_seq overflow. 228 * This functionality is used to obtain vma read lock and drop the mmap read lock. 229 */ 230static inline bool vma_start_read_locked(struct vm_area_struct *vma) 231{ 232 return vma_start_read_locked_nested(vma, 0); 233} 234 235static inline void vma_end_read(struct vm_area_struct *vma) 236{ 237 vma_refcount_put(vma); 238} 239 240/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ 241static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) 242{ 243 mmap_assert_write_locked(vma->vm_mm); 244 245 /* 246 * current task is holding mmap_write_lock, both vma->vm_lock_seq and 247 * mm->mm_lock_seq can't be concurrently modified. 248 */ 249 *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; 250 return (vma->vm_lock_seq == *mm_lock_seq); 251} 252 253void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); 254 255/* 256 * Begin writing to a VMA. 257 * Exclude concurrent readers under the per-VMA lock until the currently 258 * write-locked mmap_lock is dropped or downgraded. 259 */ 260static inline void vma_start_write(struct vm_area_struct *vma) 261{ 262 unsigned int mm_lock_seq; 263 264 if (__is_vma_write_locked(vma, &mm_lock_seq)) 265 return; 266 267 __vma_start_write(vma, mm_lock_seq); 268} 269 270static inline void vma_assert_write_locked(struct vm_area_struct *vma) 271{ 272 unsigned int mm_lock_seq; 273 274 VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); 275} 276 277static inline void vma_assert_locked(struct vm_area_struct *vma) 278{ 279 unsigned int mm_lock_seq; 280 281 VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && 282 !__is_vma_write_locked(vma, &mm_lock_seq), vma); 283} 284 285/* 286 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these 287 * assertions should be made either under mmap_write_lock or when the object 288 * has been isolated under mmap_write_lock, ensuring no competing writers. 289 */ 290static inline void vma_assert_attached(struct vm_area_struct *vma) 291{ 292 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); 293} 294 295static inline void vma_assert_detached(struct vm_area_struct *vma) 296{ 297 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); 298} 299 300static inline void vma_mark_attached(struct vm_area_struct *vma) 301{ 302 vma_assert_write_locked(vma); 303 vma_assert_detached(vma); 304 refcount_set_release(&vma->vm_refcnt, 1); 305} 306 307void vma_mark_detached(struct vm_area_struct *vma); 308 309struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 310 unsigned long address); 311 312#else /* CONFIG_PER_VMA_LOCK */ 313 314static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} 315static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} 316static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} 317 318static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 319{ 320 return false; 321} 322 323static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 324{ 325 return true; 326} 327static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} 328static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, 329 struct vm_area_struct *vma) 330 { return NULL; } 331static inline void vma_end_read(struct vm_area_struct *vma) {} 332static inline void vma_start_write(struct vm_area_struct *vma) {} 333static inline void vma_assert_write_locked(struct vm_area_struct *vma) 334 { mmap_assert_write_locked(vma->vm_mm); } 335static inline void vma_assert_attached(struct vm_area_struct *vma) {} 336static inline void vma_assert_detached(struct vm_area_struct *vma) {} 337static inline void vma_mark_attached(struct vm_area_struct *vma) {} 338static inline void vma_mark_detached(struct vm_area_struct *vma) {} 339 340static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 341 unsigned long address) 342{ 343 return NULL; 344} 345 346static inline void vma_assert_locked(struct vm_area_struct *vma) 347{ 348 mmap_assert_locked(vma->vm_mm); 349} 350 351#endif /* CONFIG_PER_VMA_LOCK */ 352 353static inline void mmap_write_lock(struct mm_struct *mm) 354{ 355 __mmap_lock_trace_start_locking(mm, true); 356 down_write(&mm->mmap_lock); 357 mm_lock_seqcount_begin(mm); 358 __mmap_lock_trace_acquire_returned(mm, true, true); 359} 360 361static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) 362{ 363 __mmap_lock_trace_start_locking(mm, true); 364 down_write_nested(&mm->mmap_lock, subclass); 365 mm_lock_seqcount_begin(mm); 366 __mmap_lock_trace_acquire_returned(mm, true, true); 367} 368 369static inline int mmap_write_lock_killable(struct mm_struct *mm) 370{ 371 int ret; 372 373 __mmap_lock_trace_start_locking(mm, true); 374 ret = down_write_killable(&mm->mmap_lock); 375 if (!ret) 376 mm_lock_seqcount_begin(mm); 377 __mmap_lock_trace_acquire_returned(mm, true, ret == 0); 378 return ret; 379} 380 381/* 382 * Drop all currently-held per-VMA locks. 383 * This is called from the mmap_lock implementation directly before releasing 384 * a write-locked mmap_lock (or downgrading it to read-locked). 385 * This should normally NOT be called manually from other places. 386 * If you want to call this manually anyway, keep in mind that this will release 387 * *all* VMA write locks, including ones from further up the stack. 388 */ 389static inline void vma_end_write_all(struct mm_struct *mm) 390{ 391 mmap_assert_write_locked(mm); 392 mm_lock_seqcount_end(mm); 393} 394 395static inline void mmap_write_unlock(struct mm_struct *mm) 396{ 397 __mmap_lock_trace_released(mm, true); 398 vma_end_write_all(mm); 399 up_write(&mm->mmap_lock); 400} 401 402static inline void mmap_write_downgrade(struct mm_struct *mm) 403{ 404 __mmap_lock_trace_acquire_returned(mm, false, true); 405 vma_end_write_all(mm); 406 downgrade_write(&mm->mmap_lock); 407} 408 409static inline void mmap_read_lock(struct mm_struct *mm) 410{ 411 __mmap_lock_trace_start_locking(mm, false); 412 down_read(&mm->mmap_lock); 413 __mmap_lock_trace_acquire_returned(mm, false, true); 414} 415 416static inline int mmap_read_lock_killable(struct mm_struct *mm) 417{ 418 int ret; 419 420 __mmap_lock_trace_start_locking(mm, false); 421 ret = down_read_killable(&mm->mmap_lock); 422 __mmap_lock_trace_acquire_returned(mm, false, ret == 0); 423 return ret; 424} 425 426static inline bool mmap_read_trylock(struct mm_struct *mm) 427{ 428 bool ret; 429 430 __mmap_lock_trace_start_locking(mm, false); 431 ret = down_read_trylock(&mm->mmap_lock) != 0; 432 __mmap_lock_trace_acquire_returned(mm, false, ret); 433 return ret; 434} 435 436static inline void mmap_read_unlock(struct mm_struct *mm) 437{ 438 __mmap_lock_trace_released(mm, false); 439 up_read(&mm->mmap_lock); 440} 441 442DEFINE_GUARD(mmap_read_lock, struct mm_struct *, 443 mmap_read_lock(_T), mmap_read_unlock(_T)) 444 445static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) 446{ 447 __mmap_lock_trace_released(mm, false); 448 up_read_non_owner(&mm->mmap_lock); 449} 450 451static inline int mmap_lock_is_contended(struct mm_struct *mm) 452{ 453 return rwsem_is_contended(&mm->mmap_lock); 454} 455 456#endif /* _LINUX_MMAP_LOCK_H */