at master 13 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * include/linux/userfaultfd_k.h 4 * 5 * Copyright (C) 2015 Red Hat, Inc. 6 * 7 */ 8 9#ifndef _LINUX_USERFAULTFD_K_H 10#define _LINUX_USERFAULTFD_K_H 11 12#ifdef CONFIG_USERFAULTFD 13 14#include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ 15 16#include <linux/fcntl.h> 17#include <linux/mm.h> 18#include <linux/swap.h> 19#include <linux/leafops.h> 20#include <asm-generic/pgtable_uffd.h> 21#include <linux/hugetlb_inline.h> 22 23/* The set of all possible UFFD-related VM flags. */ 24#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) 25 26/* 27 * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining 28 * new flags, since they might collide with O_* ones. We want 29 * to re-use O_* flags that couldn't possibly have a meaning 30 * from userfaultfd, in order to leave a free define-space for 31 * shared O_* flags. 32 */ 33#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) 34 35/* 36 * Start with fault_pending_wqh and fault_wqh so they're more likely 37 * to be in the same cacheline. 38 * 39 * Locking order: 40 * fd_wqh.lock 41 * fault_pending_wqh.lock 42 * fault_wqh.lock 43 * event_wqh.lock 44 * 45 * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, 46 * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's 47 * also taken in IRQ context. 48 */ 49struct userfaultfd_ctx { 50 /* waitqueue head for the pending (i.e. not read) userfaults */ 51 wait_queue_head_t fault_pending_wqh; 52 /* waitqueue head for the userfaults */ 53 wait_queue_head_t fault_wqh; 54 /* waitqueue head for the pseudo fd to wakeup poll/read */ 55 wait_queue_head_t fd_wqh; 56 /* waitqueue head for events */ 57 wait_queue_head_t event_wqh; 58 /* a refile sequence protected by fault_pending_wqh lock */ 59 seqcount_spinlock_t refile_seq; 60 /* pseudo fd refcounting */ 61 refcount_t refcount; 62 /* userfaultfd syscall flags */ 63 unsigned int flags; 64 /* features requested from the userspace */ 65 unsigned int features; 66 /* released */ 67 bool released; 68 /* 69 * Prevents userfaultfd operations (fill/move/wp) from happening while 70 * some non-cooperative event(s) is taking place. Increments are done 71 * in write-mode. Whereas, userfaultfd operations, which includes 72 * reading mmap_changing, is done under read-mode. 73 */ 74 struct rw_semaphore map_changing_lock; 75 /* memory mappings are changing because of non-cooperative event */ 76 atomic_t mmap_changing; 77 /* mm with one ore more vmas attached to this userfaultfd_ctx */ 78 struct mm_struct *mm; 79}; 80 81extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); 82 83/* A combined operation mode + behavior flags. */ 84typedef unsigned int __bitwise uffd_flags_t; 85 86/* Mutually exclusive modes of operation. */ 87enum mfill_atomic_mode { 88 MFILL_ATOMIC_COPY, 89 MFILL_ATOMIC_ZEROPAGE, 90 MFILL_ATOMIC_CONTINUE, 91 MFILL_ATOMIC_POISON, 92 NR_MFILL_ATOMIC_MODES, 93}; 94 95#define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) 96#define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) 97#define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) 98#define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) 99 100static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) 101{ 102 return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); 103} 104 105static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) 106{ 107 flags &= ~MFILL_ATOMIC_MODE_MASK; 108 return flags | ((__force uffd_flags_t) mode); 109} 110 111/* Flags controlling behavior. These behavior changes are mode-independent. */ 112#define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) 113 114extern int mfill_atomic_install_pte(pmd_t *dst_pmd, 115 struct vm_area_struct *dst_vma, 116 unsigned long dst_addr, struct page *page, 117 bool newly_allocated, uffd_flags_t flags); 118 119extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, 120 unsigned long src_start, unsigned long len, 121 uffd_flags_t flags); 122extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, 123 unsigned long dst_start, 124 unsigned long len); 125extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, 126 unsigned long len, uffd_flags_t flags); 127extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, 128 unsigned long len, uffd_flags_t flags); 129extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, 130 unsigned long len, bool enable_wp); 131extern long uffd_wp_range(struct vm_area_struct *vma, 132 unsigned long start, unsigned long len, bool enable_wp); 133 134/* move_pages */ 135void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); 136void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); 137ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, 138 unsigned long src_start, unsigned long len, __u64 flags); 139int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, 140 struct vm_area_struct *dst_vma, 141 struct vm_area_struct *src_vma, 142 unsigned long dst_addr, unsigned long src_addr); 143 144/* mm helpers */ 145static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 146 struct vm_userfaultfd_ctx vm_ctx) 147{ 148 return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; 149} 150 151/* 152 * Never enable huge pmd sharing on some uffd registered vmas: 153 * 154 * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. 155 * 156 * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for 157 * VMAs which share huge pmds. (If you have two mappings to the same 158 * underlying pages, and fault in the non-UFFD-registered one with a write, 159 * with huge pmd sharing this would *also* setup the second UFFD-registered 160 * mapping, and we'd not get minor faults.) 161 */ 162static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) 163{ 164 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 165} 166 167/* 168 * Don't do fault around for either WP or MINOR registered uffd range. For 169 * MINOR registered range, fault around will be a total disaster and ptes can 170 * be installed without notifications; for WP it should mostly be fine as long 171 * as the fault around checks for pte_none() before the installation, however 172 * to be super safe we just forbid it. 173 */ 174static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 175{ 176 return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); 177} 178 179static inline bool userfaultfd_missing(struct vm_area_struct *vma) 180{ 181 return vma->vm_flags & VM_UFFD_MISSING; 182} 183 184static inline bool userfaultfd_wp(struct vm_area_struct *vma) 185{ 186 return vma->vm_flags & VM_UFFD_WP; 187} 188 189static inline bool userfaultfd_minor(struct vm_area_struct *vma) 190{ 191 return vma->vm_flags & VM_UFFD_MINOR; 192} 193 194static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 195 pte_t pte) 196{ 197 return userfaultfd_wp(vma) && pte_uffd_wp(pte); 198} 199 200static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 201 pmd_t pmd) 202{ 203 return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); 204} 205 206static inline bool userfaultfd_armed(struct vm_area_struct *vma) 207{ 208 return vma->vm_flags & __VM_UFFD_FLAGS; 209} 210 211static inline bool vma_can_userfault(struct vm_area_struct *vma, 212 vm_flags_t vm_flags, 213 bool wp_async) 214{ 215 vm_flags &= __VM_UFFD_FLAGS; 216 217 if (vma->vm_flags & VM_DROPPABLE) 218 return false; 219 220 if ((vm_flags & VM_UFFD_MINOR) && 221 (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) 222 return false; 223 224 /* 225 * If wp async enabled, and WP is the only mode enabled, allow any 226 * memory type. 227 */ 228 if (wp_async && (vm_flags == VM_UFFD_WP)) 229 return true; 230 231 /* 232 * If user requested uffd-wp but not enabled pte markers for 233 * uffd-wp, then shmem & hugetlbfs are not supported but only 234 * anonymous. 235 */ 236 if (!uffd_supports_wp_marker() && (vm_flags & VM_UFFD_WP) && 237 !vma_is_anonymous(vma)) 238 return false; 239 240 /* By default, allow any of anon|shmem|hugetlb */ 241 return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || 242 vma_is_shmem(vma); 243} 244 245static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 246{ 247 struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; 248 249 return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; 250} 251 252extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); 253extern void dup_userfaultfd_complete(struct list_head *); 254void dup_userfaultfd_fail(struct list_head *); 255 256extern void mremap_userfaultfd_prep(struct vm_area_struct *, 257 struct vm_userfaultfd_ctx *); 258extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, 259 unsigned long from, unsigned long to, 260 unsigned long len); 261void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); 262 263extern bool userfaultfd_remove(struct vm_area_struct *vma, 264 unsigned long start, 265 unsigned long end); 266 267extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, 268 unsigned long start, unsigned long end, struct list_head *uf); 269extern void userfaultfd_unmap_complete(struct mm_struct *mm, 270 struct list_head *uf); 271extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); 272extern bool userfaultfd_wp_async(struct vm_area_struct *vma); 273 274void userfaultfd_reset_ctx(struct vm_area_struct *vma); 275 276struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, 277 struct vm_area_struct *prev, 278 struct vm_area_struct *vma, 279 unsigned long start, 280 unsigned long end); 281 282int userfaultfd_register_range(struct userfaultfd_ctx *ctx, 283 struct vm_area_struct *vma, 284 vm_flags_t vm_flags, 285 unsigned long start, unsigned long end, 286 bool wp_async); 287 288void userfaultfd_release_new(struct userfaultfd_ctx *ctx); 289 290void userfaultfd_release_all(struct mm_struct *mm, 291 struct userfaultfd_ctx *ctx); 292 293static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 294{ 295 /* Only wr-protect mode uses pte markers */ 296 if (!userfaultfd_wp(vma)) 297 return false; 298 299 /* File-based uffd-wp always need markers */ 300 if (!vma_is_anonymous(vma)) 301 return true; 302 303 /* 304 * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED 305 * enabled (to apply markers on zero pages). 306 */ 307 return userfaultfd_wp_unpopulated(vma); 308} 309 310/* 311 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 312 * forms (pte marker or a normal swap pte), false otherwise. 313 */ 314static inline bool pte_swp_uffd_wp_any(pte_t pte) 315{ 316 if (!uffd_supports_wp_marker()) 317 return false; 318 319 if (pte_present(pte)) 320 return false; 321 322 if (pte_swp_uffd_wp(pte)) 323 return true; 324 325 if (pte_is_uffd_wp_marker(pte)) 326 return true; 327 328 return false; 329} 330#else /* CONFIG_USERFAULTFD */ 331 332/* mm helpers */ 333static inline vm_fault_t handle_userfault(struct vm_fault *vmf, 334 unsigned long reason) 335{ 336 return VM_FAULT_SIGBUS; 337} 338 339static inline long uffd_wp_range(struct vm_area_struct *vma, 340 unsigned long start, unsigned long len, 341 bool enable_wp) 342{ 343 return false; 344} 345 346static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, 347 struct vm_userfaultfd_ctx vm_ctx) 348{ 349 return true; 350} 351 352static inline bool userfaultfd_missing(struct vm_area_struct *vma) 353{ 354 return false; 355} 356 357static inline bool userfaultfd_wp(struct vm_area_struct *vma) 358{ 359 return false; 360} 361 362static inline bool userfaultfd_minor(struct vm_area_struct *vma) 363{ 364 return false; 365} 366 367static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, 368 pte_t pte) 369{ 370 return false; 371} 372 373static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, 374 pmd_t pmd) 375{ 376 return false; 377} 378 379 380static inline bool userfaultfd_armed(struct vm_area_struct *vma) 381{ 382 return false; 383} 384 385static inline int dup_userfaultfd(struct vm_area_struct *vma, 386 struct list_head *l) 387{ 388 return 0; 389} 390 391static inline void dup_userfaultfd_complete(struct list_head *l) 392{ 393} 394 395static inline void dup_userfaultfd_fail(struct list_head *l) 396{ 397} 398 399static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, 400 struct vm_userfaultfd_ctx *ctx) 401{ 402} 403 404static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, 405 unsigned long from, 406 unsigned long to, 407 unsigned long len) 408{ 409} 410 411static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) 412{ 413} 414 415static inline bool userfaultfd_remove(struct vm_area_struct *vma, 416 unsigned long start, 417 unsigned long end) 418{ 419 return true; 420} 421 422static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, 423 unsigned long start, unsigned long end, 424 struct list_head *uf) 425{ 426 return 0; 427} 428 429static inline void userfaultfd_unmap_complete(struct mm_struct *mm, 430 struct list_head *uf) 431{ 432} 433 434static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) 435{ 436 return false; 437} 438 439static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) 440{ 441 return false; 442} 443 444static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) 445{ 446 return false; 447} 448 449static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) 450{ 451 return false; 452} 453 454static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) 455{ 456 return false; 457} 458 459/* 460 * Returns true if this is a swap pte and was uffd-wp wr-protected in either 461 * forms (pte marker or a normal swap pte), false otherwise. 462 */ 463static inline bool pte_swp_uffd_wp_any(pte_t pte) 464{ 465 return false; 466} 467#endif /* CONFIG_USERFAULTFD */ 468#endif /* _LINUX_USERFAULTFD_K_H */