at v5.8 395 lines 12 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_SCHED_MM_H 3#define _LINUX_SCHED_MM_H 4 5#include <linux/kernel.h> 6#include <linux/atomic.h> 7#include <linux/sched.h> 8#include <linux/mm_types.h> 9#include <linux/gfp.h> 10#include <linux/sync_core.h> 11 12/* 13 * Routines for handling mm_structs 14 */ 15extern struct mm_struct *mm_alloc(void); 16 17/** 18 * mmgrab() - Pin a &struct mm_struct. 19 * @mm: The &struct mm_struct to pin. 20 * 21 * Make sure that @mm will not get freed even after the owning task 22 * exits. This doesn't guarantee that the associated address space 23 * will still exist later on and mmget_not_zero() has to be used before 24 * accessing it. 25 * 26 * This is a preferred way to to pin @mm for a longer/unbounded amount 27 * of time. 28 * 29 * Use mmdrop() to release the reference acquired by mmgrab(). 30 * 31 * See also <Documentation/vm/active_mm.rst> for an in-depth explanation 32 * of &mm_struct.mm_count vs &mm_struct.mm_users. 33 */ 34static inline void mmgrab(struct mm_struct *mm) 35{ 36 atomic_inc(&mm->mm_count); 37} 38 39extern void __mmdrop(struct mm_struct *mm); 40 41static inline void mmdrop(struct mm_struct *mm) 42{ 43 /* 44 * The implicit full barrier implied by atomic_dec_and_test() is 45 * required by the membarrier system call before returning to 46 * user-space, after storing to rq->curr. 47 */ 48 if (unlikely(atomic_dec_and_test(&mm->mm_count))) 49 __mmdrop(mm); 50} 51 52void mmdrop(struct mm_struct *mm); 53 54/* 55 * This has to be called after a get_task_mm()/mmget_not_zero() 56 * followed by taking the mmap_lock for writing before modifying the 57 * vmas or anything the coredump pretends not to change from under it. 58 * 59 * It also has to be called when mmgrab() is used in the context of 60 * the process, but then the mm_count refcount is transferred outside 61 * the context of the process to run down_write() on that pinned mm. 62 * 63 * NOTE: find_extend_vma() called from GUP context is the only place 64 * that can modify the "mm" (notably the vm_start/end) under mmap_lock 65 * for reading and outside the context of the process, so it is also 66 * the only case that holds the mmap_lock for reading that must call 67 * this function. Generally if the mmap_lock is hold for reading 68 * there's no need of this check after get_task_mm()/mmget_not_zero(). 69 * 70 * This function can be obsoleted and the check can be removed, after 71 * the coredump code will hold the mmap_lock for writing before 72 * invoking the ->core_dump methods. 73 */ 74static inline bool mmget_still_valid(struct mm_struct *mm) 75{ 76 return likely(!mm->core_state); 77} 78 79/** 80 * mmget() - Pin the address space associated with a &struct mm_struct. 81 * @mm: The address space to pin. 82 * 83 * Make sure that the address space of the given &struct mm_struct doesn't 84 * go away. This does not protect against parts of the address space being 85 * modified or freed, however. 86 * 87 * Never use this function to pin this address space for an 88 * unbounded/indefinite amount of time. 89 * 90 * Use mmput() to release the reference acquired by mmget(). 91 * 92 * See also <Documentation/vm/active_mm.rst> for an in-depth explanation 93 * of &mm_struct.mm_count vs &mm_struct.mm_users. 94 */ 95static inline void mmget(struct mm_struct *mm) 96{ 97 atomic_inc(&mm->mm_users); 98} 99 100static inline bool mmget_not_zero(struct mm_struct *mm) 101{ 102 return atomic_inc_not_zero(&mm->mm_users); 103} 104 105/* mmput gets rid of the mappings and all user-space */ 106extern void mmput(struct mm_struct *); 107#ifdef CONFIG_MMU 108/* same as above but performs the slow path from the async context. Can 109 * be called from the atomic context as well 110 */ 111void mmput_async(struct mm_struct *); 112#endif 113 114/* Grab a reference to a task's mm, if it is not already going away */ 115extern struct mm_struct *get_task_mm(struct task_struct *task); 116/* 117 * Grab a reference to a task's mm, if it is not already going away 118 * and ptrace_may_access with the mode parameter passed to it 119 * succeeds. 120 */ 121extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); 122/* Remove the current tasks stale references to the old mm_struct on exit() */ 123extern void exit_mm_release(struct task_struct *, struct mm_struct *); 124/* Remove the current tasks stale references to the old mm_struct on exec() */ 125extern void exec_mm_release(struct task_struct *, struct mm_struct *); 126 127#ifdef CONFIG_MEMCG 128extern void mm_update_next_owner(struct mm_struct *mm); 129#else 130static inline void mm_update_next_owner(struct mm_struct *mm) 131{ 132} 133#endif /* CONFIG_MEMCG */ 134 135#ifdef CONFIG_MMU 136extern void arch_pick_mmap_layout(struct mm_struct *mm, 137 struct rlimit *rlim_stack); 138extern unsigned long 139arch_get_unmapped_area(struct file *, unsigned long, unsigned long, 140 unsigned long, unsigned long); 141extern unsigned long 142arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, 143 unsigned long len, unsigned long pgoff, 144 unsigned long flags); 145#else 146static inline void arch_pick_mmap_layout(struct mm_struct *mm, 147 struct rlimit *rlim_stack) {} 148#endif 149 150static inline bool in_vfork(struct task_struct *tsk) 151{ 152 bool ret; 153 154 /* 155 * need RCU to access ->real_parent if CLONE_VM was used along with 156 * CLONE_PARENT. 157 * 158 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not 159 * imply CLONE_VM 160 * 161 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus 162 * ->real_parent is not necessarily the task doing vfork(), so in 163 * theory we can't rely on task_lock() if we want to dereference it. 164 * 165 * And in this case we can't trust the real_parent->mm == tsk->mm 166 * check, it can be false negative. But we do not care, if init or 167 * another oom-unkillable task does this it should blame itself. 168 */ 169 rcu_read_lock(); 170 ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; 171 rcu_read_unlock(); 172 173 return ret; 174} 175 176/* 177 * Applies per-task gfp context to the given allocation flags. 178 * PF_MEMALLOC_NOIO implies GFP_NOIO 179 * PF_MEMALLOC_NOFS implies GFP_NOFS 180 * PF_MEMALLOC_NOCMA implies no allocation from CMA region. 181 */ 182static inline gfp_t current_gfp_context(gfp_t flags) 183{ 184 if (unlikely(current->flags & 185 (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) { 186 /* 187 * NOIO implies both NOIO and NOFS and it is a weaker context 188 * so always make sure it makes precedence 189 */ 190 if (current->flags & PF_MEMALLOC_NOIO) 191 flags &= ~(__GFP_IO | __GFP_FS); 192 else if (current->flags & PF_MEMALLOC_NOFS) 193 flags &= ~__GFP_FS; 194#ifdef CONFIG_CMA 195 if (current->flags & PF_MEMALLOC_NOCMA) 196 flags &= ~__GFP_MOVABLE; 197#endif 198 } 199 return flags; 200} 201 202#ifdef CONFIG_LOCKDEP 203extern void __fs_reclaim_acquire(void); 204extern void __fs_reclaim_release(void); 205extern void fs_reclaim_acquire(gfp_t gfp_mask); 206extern void fs_reclaim_release(gfp_t gfp_mask); 207#else 208static inline void __fs_reclaim_acquire(void) { } 209static inline void __fs_reclaim_release(void) { } 210static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } 211static inline void fs_reclaim_release(gfp_t gfp_mask) { } 212#endif 213 214/** 215 * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. 216 * 217 * This functions marks the beginning of the GFP_NOIO allocation scope. 218 * All further allocations will implicitly drop __GFP_IO flag and so 219 * they are safe for the IO critical section from the allocation recursion 220 * point of view. Use memalloc_noio_restore to end the scope with flags 221 * returned by this function. 222 * 223 * This function is safe to be used from any context. 224 */ 225static inline unsigned int memalloc_noio_save(void) 226{ 227 unsigned int flags = current->flags & PF_MEMALLOC_NOIO; 228 current->flags |= PF_MEMALLOC_NOIO; 229 return flags; 230} 231 232/** 233 * memalloc_noio_restore - Ends the implicit GFP_NOIO scope. 234 * @flags: Flags to restore. 235 * 236 * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function. 237 * Always make sure that that the given flags is the return value from the 238 * pairing memalloc_noio_save call. 239 */ 240static inline void memalloc_noio_restore(unsigned int flags) 241{ 242 current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; 243} 244 245/** 246 * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope. 247 * 248 * This functions marks the beginning of the GFP_NOFS allocation scope. 249 * All further allocations will implicitly drop __GFP_FS flag and so 250 * they are safe for the FS critical section from the allocation recursion 251 * point of view. Use memalloc_nofs_restore to end the scope with flags 252 * returned by this function. 253 * 254 * This function is safe to be used from any context. 255 */ 256static inline unsigned int memalloc_nofs_save(void) 257{ 258 unsigned int flags = current->flags & PF_MEMALLOC_NOFS; 259 current->flags |= PF_MEMALLOC_NOFS; 260 return flags; 261} 262 263/** 264 * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope. 265 * @flags: Flags to restore. 266 * 267 * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function. 268 * Always make sure that that the given flags is the return value from the 269 * pairing memalloc_nofs_save call. 270 */ 271static inline void memalloc_nofs_restore(unsigned int flags) 272{ 273 current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; 274} 275 276static inline unsigned int memalloc_noreclaim_save(void) 277{ 278 unsigned int flags = current->flags & PF_MEMALLOC; 279 current->flags |= PF_MEMALLOC; 280 return flags; 281} 282 283static inline void memalloc_noreclaim_restore(unsigned int flags) 284{ 285 current->flags = (current->flags & ~PF_MEMALLOC) | flags; 286} 287 288#ifdef CONFIG_CMA 289static inline unsigned int memalloc_nocma_save(void) 290{ 291 unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; 292 293 current->flags |= PF_MEMALLOC_NOCMA; 294 return flags; 295} 296 297static inline void memalloc_nocma_restore(unsigned int flags) 298{ 299 current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; 300} 301#else 302static inline unsigned int memalloc_nocma_save(void) 303{ 304 return 0; 305} 306 307static inline void memalloc_nocma_restore(unsigned int flags) 308{ 309} 310#endif 311 312#ifdef CONFIG_MEMCG 313/** 314 * memalloc_use_memcg - Starts the remote memcg charging scope. 315 * @memcg: memcg to charge. 316 * 317 * This function marks the beginning of the remote memcg charging scope. All the 318 * __GFP_ACCOUNT allocations till the end of the scope will be charged to the 319 * given memcg. 320 * 321 * NOTE: This function is not nesting safe. 322 */ 323static inline void memalloc_use_memcg(struct mem_cgroup *memcg) 324{ 325 WARN_ON_ONCE(current->active_memcg); 326 current->active_memcg = memcg; 327} 328 329/** 330 * memalloc_unuse_memcg - Ends the remote memcg charging scope. 331 * 332 * This function marks the end of the remote memcg charging scope started by 333 * memalloc_use_memcg(). 334 */ 335static inline void memalloc_unuse_memcg(void) 336{ 337 current->active_memcg = NULL; 338} 339#else 340static inline void memalloc_use_memcg(struct mem_cgroup *memcg) 341{ 342} 343 344static inline void memalloc_unuse_memcg(void) 345{ 346} 347#endif 348 349#ifdef CONFIG_MEMBARRIER 350enum { 351 MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), 352 MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1), 353 MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2), 354 MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3), 355 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4), 356 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5), 357}; 358 359enum { 360 MEMBARRIER_FLAG_SYNC_CORE = (1U << 0), 361}; 362 363#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS 364#include <asm/membarrier.h> 365#endif 366 367static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 368{ 369 if (current->mm != mm) 370 return; 371 if (likely(!(atomic_read(&mm->membarrier_state) & 372 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE))) 373 return; 374 sync_core_before_usermode(); 375} 376 377extern void membarrier_exec_mmap(struct mm_struct *mm); 378 379#else 380#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS 381static inline void membarrier_arch_switch_mm(struct mm_struct *prev, 382 struct mm_struct *next, 383 struct task_struct *tsk) 384{ 385} 386#endif 387static inline void membarrier_exec_mmap(struct mm_struct *mm) 388{ 389} 390static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) 391{ 392} 393#endif 394 395#endif /* _LINUX_SCHED_MM_H */