Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _LINUX_MMAP_LOCK_H
3#define _LINUX_MMAP_LOCK_H
4
5/* Avoid a dependency loop by declaring here. */
6extern int rcuwait_wake_up(struct rcuwait *w);
7
8#include <linux/lockdep.h>
9#include <linux/mm_types.h>
10#include <linux/mmdebug.h>
11#include <linux/rwsem.h>
12#include <linux/tracepoint-defs.h>
13#include <linux/types.h>
14#include <linux/cleanup.h>
15#include <linux/sched/mm.h>
16
17#define MMAP_LOCK_INITIALIZER(name) \
18 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
19
20DECLARE_TRACEPOINT(mmap_lock_start_locking);
21DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
22DECLARE_TRACEPOINT(mmap_lock_released);
23
24#ifdef CONFIG_TRACING
25
26void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
27void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
28 bool success);
29void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
30
31static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
32 bool write)
33{
34 if (tracepoint_enabled(mmap_lock_start_locking))
35 __mmap_lock_do_trace_start_locking(mm, write);
36}
37
38static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
39 bool write, bool success)
40{
41 if (tracepoint_enabled(mmap_lock_acquire_returned))
42 __mmap_lock_do_trace_acquire_returned(mm, write, success);
43}
44
45static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
46{
47 if (tracepoint_enabled(mmap_lock_released))
48 __mmap_lock_do_trace_released(mm, write);
49}
50
51#else /* !CONFIG_TRACING */
52
53static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
54 bool write)
55{
56}
57
58static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
59 bool write, bool success)
60{
61}
62
63static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
64{
65}
66
67#endif /* CONFIG_TRACING */
68
69static inline void mmap_assert_locked(const struct mm_struct *mm)
70{
71 rwsem_assert_held(&mm->mmap_lock);
72}
73
74static inline void mmap_assert_write_locked(const struct mm_struct *mm)
75{
76 rwsem_assert_held_write(&mm->mmap_lock);
77}
78
79#ifdef CONFIG_PER_VMA_LOCK
80
81static inline void mm_lock_seqcount_init(struct mm_struct *mm)
82{
83 seqcount_init(&mm->mm_lock_seq);
84}
85
86static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
87{
88 do_raw_write_seqcount_begin(&mm->mm_lock_seq);
89}
90
91static inline void mm_lock_seqcount_end(struct mm_struct *mm)
92{
93 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
94 do_raw_write_seqcount_end(&mm->mm_lock_seq);
95}
96
97static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
98{
99 /*
100 * Since mmap_lock is a sleeping lock, and waiting for it to become
101 * unlocked is more or less equivalent with taking it ourselves, don't
102 * bother with the speculative path if mmap_lock is already write-locked
103 * and take the slow path, which takes the lock.
104 */
105 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
106}
107
108static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
109{
110 return read_seqcount_retry(&mm->mm_lock_seq, seq);
111}
112
113static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
114{
115#ifdef CONFIG_DEBUG_LOCK_ALLOC
116 static struct lock_class_key lockdep_key;
117
118 lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
119#endif
120 if (reset_refcnt)
121 refcount_set(&vma->vm_refcnt, 0);
122 vma->vm_lock_seq = UINT_MAX;
123}
124
125static inline bool is_vma_writer_only(int refcnt)
126{
127 /*
128 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
129 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
130 * a detached vma happens only in vma_mark_detached() and is a rare
131 * case, therefore most of the time there will be no unnecessary wakeup.
132 */
133 return (refcnt & VMA_LOCK_OFFSET) && refcnt <= VMA_LOCK_OFFSET + 1;
134}
135
136static inline void vma_refcount_put(struct vm_area_struct *vma)
137{
138 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
139 struct mm_struct *mm = vma->vm_mm;
140 int oldcnt;
141
142 rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
143 if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
144
145 if (is_vma_writer_only(oldcnt - 1))
146 rcuwait_wake_up(&mm->vma_writer_wait);
147 }
148}
149
150/*
151 * Use only while holding mmap read lock which guarantees that locking will not
152 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
153 * not be used in such cases because it might fail due to mm_lock_seq overflow.
154 * This functionality is used to obtain vma read lock and drop the mmap read lock.
155 */
156static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
157{
158 int oldcnt;
159
160 mmap_assert_locked(vma->vm_mm);
161 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
162 VMA_REF_LIMIT)))
163 return false;
164
165 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
166 return true;
167}
168
169/*
170 * Use only while holding mmap read lock which guarantees that locking will not
171 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
172 * not be used in such cases because it might fail due to mm_lock_seq overflow.
173 * This functionality is used to obtain vma read lock and drop the mmap read lock.
174 */
175static inline bool vma_start_read_locked(struct vm_area_struct *vma)
176{
177 return vma_start_read_locked_nested(vma, 0);
178}
179
180static inline void vma_end_read(struct vm_area_struct *vma)
181{
182 vma_refcount_put(vma);
183}
184
185/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
186static inline bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
187{
188 mmap_assert_write_locked(vma->vm_mm);
189
190 /*
191 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
192 * mm->mm_lock_seq can't be concurrently modified.
193 */
194 *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
195 return (vma->vm_lock_seq == *mm_lock_seq);
196}
197
198int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
199 int state);
200
201/*
202 * Begin writing to a VMA.
203 * Exclude concurrent readers under the per-VMA lock until the currently
204 * write-locked mmap_lock is dropped or downgraded.
205 */
206static inline void vma_start_write(struct vm_area_struct *vma)
207{
208 unsigned int mm_lock_seq;
209
210 if (__is_vma_write_locked(vma, &mm_lock_seq))
211 return;
212
213 __vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE);
214}
215
216/**
217 * vma_start_write_killable - Begin writing to a VMA.
218 * @vma: The VMA we are going to modify.
219 *
220 * Exclude concurrent readers under the per-VMA lock until the currently
221 * write-locked mmap_lock is dropped or downgraded.
222 *
223 * Context: May sleep while waiting for readers to drop the vma read lock.
224 * Caller must already hold the mmap_lock for write.
225 *
226 * Return: 0 for a successful acquisition. -EINTR if a fatal signal was
227 * received.
228 */
229static inline __must_check
230int vma_start_write_killable(struct vm_area_struct *vma)
231{
232 unsigned int mm_lock_seq;
233
234 if (__is_vma_write_locked(vma, &mm_lock_seq))
235 return 0;
236 return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE);
237}
238
239static inline void vma_assert_write_locked(struct vm_area_struct *vma)
240{
241 unsigned int mm_lock_seq;
242
243 VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
244}
245
246static inline void vma_assert_locked(struct vm_area_struct *vma)
247{
248 unsigned int mm_lock_seq;
249
250 VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
251 !__is_vma_write_locked(vma, &mm_lock_seq), vma);
252}
253
254/*
255 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
256 * assertions should be made either under mmap_write_lock or when the object
257 * has been isolated under mmap_write_lock, ensuring no competing writers.
258 */
259static inline void vma_assert_attached(struct vm_area_struct *vma)
260{
261 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
262}
263
264static inline void vma_assert_detached(struct vm_area_struct *vma)
265{
266 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
267}
268
269static inline void vma_mark_attached(struct vm_area_struct *vma)
270{
271 vma_assert_write_locked(vma);
272 vma_assert_detached(vma);
273 refcount_set_release(&vma->vm_refcnt, 1);
274}
275
276void vma_mark_detached(struct vm_area_struct *vma);
277
278struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
279 unsigned long address);
280
281/*
282 * Locks next vma pointed by the iterator. Confirms the locked vma has not
283 * been modified and will retry under mmap_lock protection if modification
284 * was detected. Should be called from read RCU section.
285 * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the
286 * process was interrupted.
287 */
288struct vm_area_struct *lock_next_vma(struct mm_struct *mm,
289 struct vma_iterator *iter,
290 unsigned long address);
291
292#else /* CONFIG_PER_VMA_LOCK */
293
294static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
295static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
296static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
297
298static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
299{
300 return false;
301}
302
303static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
304{
305 return true;
306}
307static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
308static inline void vma_end_read(struct vm_area_struct *vma) {}
309static inline void vma_start_write(struct vm_area_struct *vma) {}
310static inline __must_check
311int vma_start_write_killable(struct vm_area_struct *vma) { return 0; }
312static inline void vma_assert_write_locked(struct vm_area_struct *vma)
313 { mmap_assert_write_locked(vma->vm_mm); }
314static inline void vma_assert_attached(struct vm_area_struct *vma) {}
315static inline void vma_assert_detached(struct vm_area_struct *vma) {}
316static inline void vma_mark_attached(struct vm_area_struct *vma) {}
317static inline void vma_mark_detached(struct vm_area_struct *vma) {}
318
319static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
320 unsigned long address)
321{
322 return NULL;
323}
324
325static inline void vma_assert_locked(struct vm_area_struct *vma)
326{
327 mmap_assert_locked(vma->vm_mm);
328}
329
330#endif /* CONFIG_PER_VMA_LOCK */
331
332static inline void mmap_write_lock(struct mm_struct *mm)
333{
334 __mmap_lock_trace_start_locking(mm, true);
335 down_write(&mm->mmap_lock);
336 mm_lock_seqcount_begin(mm);
337 __mmap_lock_trace_acquire_returned(mm, true, true);
338}
339
340static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
341{
342 __mmap_lock_trace_start_locking(mm, true);
343 down_write_nested(&mm->mmap_lock, subclass);
344 mm_lock_seqcount_begin(mm);
345 __mmap_lock_trace_acquire_returned(mm, true, true);
346}
347
348static inline int mmap_write_lock_killable(struct mm_struct *mm)
349{
350 int ret;
351
352 __mmap_lock_trace_start_locking(mm, true);
353 ret = down_write_killable(&mm->mmap_lock);
354 if (!ret)
355 mm_lock_seqcount_begin(mm);
356 __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
357 return ret;
358}
359
360/*
361 * Drop all currently-held per-VMA locks.
362 * This is called from the mmap_lock implementation directly before releasing
363 * a write-locked mmap_lock (or downgrading it to read-locked).
364 * This should normally NOT be called manually from other places.
365 * If you want to call this manually anyway, keep in mind that this will release
366 * *all* VMA write locks, including ones from further up the stack.
367 */
368static inline void vma_end_write_all(struct mm_struct *mm)
369{
370 mmap_assert_write_locked(mm);
371 mm_lock_seqcount_end(mm);
372}
373
374static inline void mmap_write_unlock(struct mm_struct *mm)
375{
376 __mmap_lock_trace_released(mm, true);
377 vma_end_write_all(mm);
378 up_write(&mm->mmap_lock);
379}
380
381static inline void mmap_write_downgrade(struct mm_struct *mm)
382{
383 __mmap_lock_trace_acquire_returned(mm, false, true);
384 vma_end_write_all(mm);
385 downgrade_write(&mm->mmap_lock);
386}
387
388static inline void mmap_read_lock(struct mm_struct *mm)
389{
390 __mmap_lock_trace_start_locking(mm, false);
391 down_read(&mm->mmap_lock);
392 __mmap_lock_trace_acquire_returned(mm, false, true);
393}
394
395static inline int mmap_read_lock_killable(struct mm_struct *mm)
396{
397 int ret;
398
399 __mmap_lock_trace_start_locking(mm, false);
400 ret = down_read_killable(&mm->mmap_lock);
401 __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
402 return ret;
403}
404
405static inline bool mmap_read_trylock(struct mm_struct *mm)
406{
407 bool ret;
408
409 __mmap_lock_trace_start_locking(mm, false);
410 ret = down_read_trylock(&mm->mmap_lock) != 0;
411 __mmap_lock_trace_acquire_returned(mm, false, ret);
412 return ret;
413}
414
415static inline void mmap_read_unlock(struct mm_struct *mm)
416{
417 __mmap_lock_trace_released(mm, false);
418 up_read(&mm->mmap_lock);
419}
420
421DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
422 mmap_read_lock(_T), mmap_read_unlock(_T))
423
424static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
425{
426 __mmap_lock_trace_released(mm, false);
427 up_read_non_owner(&mm->mmap_lock);
428}
429
430static inline int mmap_lock_is_contended(struct mm_struct *mm)
431{
432 return rwsem_is_contended(&mm->mmap_lock);
433}
434
435#endif /* _LINUX_MMAP_LOCK_H */