at master 13 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_MMAP_LOCK_H 3#define _LINUX_MMAP_LOCK_H 4 5/* Avoid a dependency loop by declaring here. */ 6extern int rcuwait_wake_up(struct rcuwait *w); 7 8#include <linux/lockdep.h> 9#include <linux/mm_types.h> 10#include <linux/mmdebug.h> 11#include <linux/rwsem.h> 12#include <linux/tracepoint-defs.h> 13#include <linux/types.h> 14#include <linux/cleanup.h> 15#include <linux/sched/mm.h> 16 17#define MMAP_LOCK_INITIALIZER(name) \ 18 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), 19 20DECLARE_TRACEPOINT(mmap_lock_start_locking); 21DECLARE_TRACEPOINT(mmap_lock_acquire_returned); 22DECLARE_TRACEPOINT(mmap_lock_released); 23 24#ifdef CONFIG_TRACING 25 26void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); 27void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 28 bool success); 29void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); 30 31static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 32 bool write) 33{ 34 if (tracepoint_enabled(mmap_lock_start_locking)) 35 __mmap_lock_do_trace_start_locking(mm, write); 36} 37 38static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 39 bool write, bool success) 40{ 41 if (tracepoint_enabled(mmap_lock_acquire_returned)) 42 __mmap_lock_do_trace_acquire_returned(mm, write, success); 43} 44 45static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 46{ 47 if (tracepoint_enabled(mmap_lock_released)) 48 __mmap_lock_do_trace_released(mm, write); 49} 50 51#else /* !CONFIG_TRACING */ 52 53static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 54 bool write) 55{ 56} 57 58static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 59 bool write, bool success) 60{ 61} 62 63static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 64{ 65} 66 67#endif /* CONFIG_TRACING */ 68 69static inline void mmap_assert_locked(const struct mm_struct *mm) 70{ 71 rwsem_assert_held(&mm->mmap_lock); 72} 73 74static inline void mmap_assert_write_locked(const struct mm_struct *mm) 75{ 76 rwsem_assert_held_write(&mm->mmap_lock); 77} 78 79#ifdef CONFIG_PER_VMA_LOCK 80 81static inline void mm_lock_seqcount_init(struct mm_struct *mm) 82{ 83 seqcount_init(&mm->mm_lock_seq); 84} 85 86static inline void mm_lock_seqcount_begin(struct mm_struct *mm) 87{ 88 do_raw_write_seqcount_begin(&mm->mm_lock_seq); 89} 90 91static inline void mm_lock_seqcount_end(struct mm_struct *mm) 92{ 93 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); 94 do_raw_write_seqcount_end(&mm->mm_lock_seq); 95} 96 97static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 98{ 99 /* 100 * Since mmap_lock is a sleeping lock, and waiting for it to become 101 * unlocked is more or less equivalent with taking it ourselves, don't 102 * bother with the speculative path if mmap_lock is already write-locked 103 * and take the slow path, which takes the lock. 104 */ 105 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); 106} 107 108static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 109{ 110 return read_seqcount_retry(&mm->mm_lock_seq, seq); 111} 112 113static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) 114{ 115#ifdef CONFIG_DEBUG_LOCK_ALLOC 116 static struct lock_class_key lockdep_key; 117 118 lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); 119#endif 120 if (reset_refcnt) 121 refcount_set(&vma->vm_refcnt, 0); 122 vma->vm_lock_seq = UINT_MAX; 123} 124 125static inline bool is_vma_writer_only(int refcnt) 126{ 127 /* 128 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma 129 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on 130 * a detached vma happens only in vma_mark_detached() and is a rare 131 * case, therefore most of the time there will be no unnecessary wakeup. 132 */ 133 return (refcnt & VMA_LOCK_OFFSET) && refcnt <= VMA_LOCK_OFFSET + 1; 134} 135 136static inline void vma_refcount_put(struct vm_area_struct *vma) 137{ 138 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ 139 struct mm_struct *mm = vma->vm_mm; 140 int oldcnt; 141 142 rwsem_release(&vma->vmlock_dep_map, _RET_IP_); 143 if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { 144 145 if (is_vma_writer_only(oldcnt - 1)) 146 rcuwait_wake_up(&mm->vma_writer_wait); 147 } 148} 149 150/* 151 * Use only while holding mmap read lock which guarantees that locking will not 152 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 153 * not be used in such cases because it might fail due to mm_lock_seq overflow. 154 * This functionality is used to obtain vma read lock and drop the mmap read lock. 155 */ 156static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) 157{ 158 int oldcnt; 159 160 mmap_assert_locked(vma->vm_mm); 161 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, 162 VMA_REF_LIMIT))) 163 return false; 164 165 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); 166 return true; 167} 168 169/* 170 * Use only while holding mmap read lock which guarantees that locking will not 171 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 172 * not be used in such cases because it might fail due to mm_lock_seq overflow. 173 * This functionality is used to obtain vma read lock and drop the mmap read lock. 174 */ 175static inline bool vma_start_read_locked(struct vm_area_struct *vma) 176{ 177 return vma_start_read_locked_nested(vma, 0); 178} 179 180static inline void vma_end_read(struct vm_area_struct *vma) 181{ 182 vma_refcount_put(vma); 183} 184 185/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ 186static inline bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) 187{ 188 mmap_assert_write_locked(vma->vm_mm); 189 190 /* 191 * current task is holding mmap_write_lock, both vma->vm_lock_seq and 192 * mm->mm_lock_seq can't be concurrently modified. 193 */ 194 *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; 195 return (vma->vm_lock_seq == *mm_lock_seq); 196} 197 198int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq, 199 int state); 200 201/* 202 * Begin writing to a VMA. 203 * Exclude concurrent readers under the per-VMA lock until the currently 204 * write-locked mmap_lock is dropped or downgraded. 205 */ 206static inline void vma_start_write(struct vm_area_struct *vma) 207{ 208 unsigned int mm_lock_seq; 209 210 if (__is_vma_write_locked(vma, &mm_lock_seq)) 211 return; 212 213 __vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE); 214} 215 216/** 217 * vma_start_write_killable - Begin writing to a VMA. 218 * @vma: The VMA we are going to modify. 219 * 220 * Exclude concurrent readers under the per-VMA lock until the currently 221 * write-locked mmap_lock is dropped or downgraded. 222 * 223 * Context: May sleep while waiting for readers to drop the vma read lock. 224 * Caller must already hold the mmap_lock for write. 225 * 226 * Return: 0 for a successful acquisition. -EINTR if a fatal signal was 227 * received. 228 */ 229static inline __must_check 230int vma_start_write_killable(struct vm_area_struct *vma) 231{ 232 unsigned int mm_lock_seq; 233 234 if (__is_vma_write_locked(vma, &mm_lock_seq)) 235 return 0; 236 return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE); 237} 238 239static inline void vma_assert_write_locked(struct vm_area_struct *vma) 240{ 241 unsigned int mm_lock_seq; 242 243 VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); 244} 245 246static inline void vma_assert_locked(struct vm_area_struct *vma) 247{ 248 unsigned int mm_lock_seq; 249 250 VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && 251 !__is_vma_write_locked(vma, &mm_lock_seq), vma); 252} 253 254/* 255 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these 256 * assertions should be made either under mmap_write_lock or when the object 257 * has been isolated under mmap_write_lock, ensuring no competing writers. 258 */ 259static inline void vma_assert_attached(struct vm_area_struct *vma) 260{ 261 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); 262} 263 264static inline void vma_assert_detached(struct vm_area_struct *vma) 265{ 266 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); 267} 268 269static inline void vma_mark_attached(struct vm_area_struct *vma) 270{ 271 vma_assert_write_locked(vma); 272 vma_assert_detached(vma); 273 refcount_set_release(&vma->vm_refcnt, 1); 274} 275 276void vma_mark_detached(struct vm_area_struct *vma); 277 278struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 279 unsigned long address); 280 281/* 282 * Locks next vma pointed by the iterator. Confirms the locked vma has not 283 * been modified and will retry under mmap_lock protection if modification 284 * was detected. Should be called from read RCU section. 285 * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the 286 * process was interrupted. 287 */ 288struct vm_area_struct *lock_next_vma(struct mm_struct *mm, 289 struct vma_iterator *iter, 290 unsigned long address); 291 292#else /* CONFIG_PER_VMA_LOCK */ 293 294static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} 295static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} 296static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} 297 298static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 299{ 300 return false; 301} 302 303static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 304{ 305 return true; 306} 307static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} 308static inline void vma_end_read(struct vm_area_struct *vma) {} 309static inline void vma_start_write(struct vm_area_struct *vma) {} 310static inline __must_check 311int vma_start_write_killable(struct vm_area_struct *vma) { return 0; } 312static inline void vma_assert_write_locked(struct vm_area_struct *vma) 313 { mmap_assert_write_locked(vma->vm_mm); } 314static inline void vma_assert_attached(struct vm_area_struct *vma) {} 315static inline void vma_assert_detached(struct vm_area_struct *vma) {} 316static inline void vma_mark_attached(struct vm_area_struct *vma) {} 317static inline void vma_mark_detached(struct vm_area_struct *vma) {} 318 319static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 320 unsigned long address) 321{ 322 return NULL; 323} 324 325static inline void vma_assert_locked(struct vm_area_struct *vma) 326{ 327 mmap_assert_locked(vma->vm_mm); 328} 329 330#endif /* CONFIG_PER_VMA_LOCK */ 331 332static inline void mmap_write_lock(struct mm_struct *mm) 333{ 334 __mmap_lock_trace_start_locking(mm, true); 335 down_write(&mm->mmap_lock); 336 mm_lock_seqcount_begin(mm); 337 __mmap_lock_trace_acquire_returned(mm, true, true); 338} 339 340static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) 341{ 342 __mmap_lock_trace_start_locking(mm, true); 343 down_write_nested(&mm->mmap_lock, subclass); 344 mm_lock_seqcount_begin(mm); 345 __mmap_lock_trace_acquire_returned(mm, true, true); 346} 347 348static inline int mmap_write_lock_killable(struct mm_struct *mm) 349{ 350 int ret; 351 352 __mmap_lock_trace_start_locking(mm, true); 353 ret = down_write_killable(&mm->mmap_lock); 354 if (!ret) 355 mm_lock_seqcount_begin(mm); 356 __mmap_lock_trace_acquire_returned(mm, true, ret == 0); 357 return ret; 358} 359 360/* 361 * Drop all currently-held per-VMA locks. 362 * This is called from the mmap_lock implementation directly before releasing 363 * a write-locked mmap_lock (or downgrading it to read-locked). 364 * This should normally NOT be called manually from other places. 365 * If you want to call this manually anyway, keep in mind that this will release 366 * *all* VMA write locks, including ones from further up the stack. 367 */ 368static inline void vma_end_write_all(struct mm_struct *mm) 369{ 370 mmap_assert_write_locked(mm); 371 mm_lock_seqcount_end(mm); 372} 373 374static inline void mmap_write_unlock(struct mm_struct *mm) 375{ 376 __mmap_lock_trace_released(mm, true); 377 vma_end_write_all(mm); 378 up_write(&mm->mmap_lock); 379} 380 381static inline void mmap_write_downgrade(struct mm_struct *mm) 382{ 383 __mmap_lock_trace_acquire_returned(mm, false, true); 384 vma_end_write_all(mm); 385 downgrade_write(&mm->mmap_lock); 386} 387 388static inline void mmap_read_lock(struct mm_struct *mm) 389{ 390 __mmap_lock_trace_start_locking(mm, false); 391 down_read(&mm->mmap_lock); 392 __mmap_lock_trace_acquire_returned(mm, false, true); 393} 394 395static inline int mmap_read_lock_killable(struct mm_struct *mm) 396{ 397 int ret; 398 399 __mmap_lock_trace_start_locking(mm, false); 400 ret = down_read_killable(&mm->mmap_lock); 401 __mmap_lock_trace_acquire_returned(mm, false, ret == 0); 402 return ret; 403} 404 405static inline bool mmap_read_trylock(struct mm_struct *mm) 406{ 407 bool ret; 408 409 __mmap_lock_trace_start_locking(mm, false); 410 ret = down_read_trylock(&mm->mmap_lock) != 0; 411 __mmap_lock_trace_acquire_returned(mm, false, ret); 412 return ret; 413} 414 415static inline void mmap_read_unlock(struct mm_struct *mm) 416{ 417 __mmap_lock_trace_released(mm, false); 418 up_read(&mm->mmap_lock); 419} 420 421DEFINE_GUARD(mmap_read_lock, struct mm_struct *, 422 mmap_read_lock(_T), mmap_read_unlock(_T)) 423 424static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) 425{ 426 __mmap_lock_trace_released(mm, false); 427 up_read_non_owner(&mm->mmap_lock); 428} 429 430static inline int mmap_lock_is_contended(struct mm_struct *mm) 431{ 432 return rwsem_is_contended(&mm->mmap_lock); 433} 434 435#endif /* _LINUX_MMAP_LOCK_H */