at v6.10-rc7 25 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_RMAP_H 3#define _LINUX_RMAP_H 4/* 5 * Declarations for Reverse Mapping functions in mm/rmap.c 6 */ 7 8#include <linux/list.h> 9#include <linux/slab.h> 10#include <linux/mm.h> 11#include <linux/rwsem.h> 12#include <linux/memcontrol.h> 13#include <linux/highmem.h> 14#include <linux/pagemap.h> 15#include <linux/memremap.h> 16 17/* 18 * The anon_vma heads a list of private "related" vmas, to scan if 19 * an anonymous page pointing to this anon_vma needs to be unmapped: 20 * the vmas on the list will be related by forking, or by splitting. 21 * 22 * Since vmas come and go as they are split and merged (particularly 23 * in mprotect), the mapping field of an anonymous page cannot point 24 * directly to a vma: instead it points to an anon_vma, on whose list 25 * the related vmas can be easily linked or unlinked. 26 * 27 * After unlinking the last vma on the list, we must garbage collect 28 * the anon_vma object itself: we're guaranteed no page can be 29 * pointing to this anon_vma once its vma list is empty. 30 */ 31struct anon_vma { 32 struct anon_vma *root; /* Root of this anon_vma tree */ 33 struct rw_semaphore rwsem; /* W: modification, R: walking the list */ 34 /* 35 * The refcount is taken on an anon_vma when there is no 36 * guarantee that the vma of page tables will exist for 37 * the duration of the operation. A caller that takes 38 * the reference is responsible for clearing up the 39 * anon_vma if they are the last user on release 40 */ 41 atomic_t refcount; 42 43 /* 44 * Count of child anon_vmas. Equals to the count of all anon_vmas that 45 * have ->parent pointing to this one, including itself. 46 * 47 * This counter is used for making decision about reusing anon_vma 48 * instead of forking new one. See comments in function anon_vma_clone. 49 */ 50 unsigned long num_children; 51 /* Count of VMAs whose ->anon_vma pointer points to this object. */ 52 unsigned long num_active_vmas; 53 54 struct anon_vma *parent; /* Parent of this anon_vma */ 55 56 /* 57 * NOTE: the LSB of the rb_root.rb_node is set by 58 * mm_take_all_locks() _after_ taking the above lock. So the 59 * rb_root must only be read/written after taking the above lock 60 * to be sure to see a valid next pointer. The LSB bit itself 61 * is serialized by a system wide lock only visible to 62 * mm_take_all_locks() (mm_all_locks_mutex). 63 */ 64 65 /* Interval tree of private "related" vmas */ 66 struct rb_root_cached rb_root; 67}; 68 69/* 70 * The copy-on-write semantics of fork mean that an anon_vma 71 * can become associated with multiple processes. Furthermore, 72 * each child process will have its own anon_vma, where new 73 * pages for that process are instantiated. 74 * 75 * This structure allows us to find the anon_vmas associated 76 * with a VMA, or the VMAs associated with an anon_vma. 77 * The "same_vma" list contains the anon_vma_chains linking 78 * all the anon_vmas associated with this VMA. 79 * The "rb" field indexes on an interval tree the anon_vma_chains 80 * which link all the VMAs associated with this anon_vma. 81 */ 82struct anon_vma_chain { 83 struct vm_area_struct *vma; 84 struct anon_vma *anon_vma; 85 struct list_head same_vma; /* locked by mmap_lock & page_table_lock */ 86 struct rb_node rb; /* locked by anon_vma->rwsem */ 87 unsigned long rb_subtree_last; 88#ifdef CONFIG_DEBUG_VM_RB 89 unsigned long cached_vma_start, cached_vma_last; 90#endif 91}; 92 93enum ttu_flags { 94 TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ 95 TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ 96 TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ 97 TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ 98 TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible 99 * and caller guarantees they will 100 * do a final flush if necessary */ 101 TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: 102 * caller holds it */ 103}; 104 105#ifdef CONFIG_MMU 106static inline void get_anon_vma(struct anon_vma *anon_vma) 107{ 108 atomic_inc(&anon_vma->refcount); 109} 110 111void __put_anon_vma(struct anon_vma *anon_vma); 112 113static inline void put_anon_vma(struct anon_vma *anon_vma) 114{ 115 if (atomic_dec_and_test(&anon_vma->refcount)) 116 __put_anon_vma(anon_vma); 117} 118 119static inline void anon_vma_lock_write(struct anon_vma *anon_vma) 120{ 121 down_write(&anon_vma->root->rwsem); 122} 123 124static inline int anon_vma_trylock_write(struct anon_vma *anon_vma) 125{ 126 return down_write_trylock(&anon_vma->root->rwsem); 127} 128 129static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) 130{ 131 up_write(&anon_vma->root->rwsem); 132} 133 134static inline void anon_vma_lock_read(struct anon_vma *anon_vma) 135{ 136 down_read(&anon_vma->root->rwsem); 137} 138 139static inline int anon_vma_trylock_read(struct anon_vma *anon_vma) 140{ 141 return down_read_trylock(&anon_vma->root->rwsem); 142} 143 144static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) 145{ 146 up_read(&anon_vma->root->rwsem); 147} 148 149 150/* 151 * anon_vma helper functions. 152 */ 153void anon_vma_init(void); /* create anon_vma_cachep */ 154int __anon_vma_prepare(struct vm_area_struct *); 155void unlink_anon_vmas(struct vm_area_struct *); 156int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); 157int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); 158 159static inline int anon_vma_prepare(struct vm_area_struct *vma) 160{ 161 if (likely(vma->anon_vma)) 162 return 0; 163 164 return __anon_vma_prepare(vma); 165} 166 167static inline void anon_vma_merge(struct vm_area_struct *vma, 168 struct vm_area_struct *next) 169{ 170 VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma); 171 unlink_anon_vmas(next); 172} 173 174struct anon_vma *folio_get_anon_vma(struct folio *folio); 175 176/* RMAP flags, currently only relevant for some anon rmap operations. */ 177typedef int __bitwise rmap_t; 178 179/* 180 * No special request: A mapped anonymous (sub)page is possibly shared between 181 * processes. 182 */ 183#define RMAP_NONE ((__force rmap_t)0) 184 185/* The anonymous (sub)page is exclusive to a single process. */ 186#define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) 187 188/* 189 * Internally, we're using an enum to specify the granularity. We make the 190 * compiler emit specialized code for each granularity. 191 */ 192enum rmap_level { 193 RMAP_LEVEL_PTE = 0, 194 RMAP_LEVEL_PMD, 195}; 196 197static inline void __folio_rmap_sanity_checks(struct folio *folio, 198 struct page *page, int nr_pages, enum rmap_level level) 199{ 200 /* hugetlb folios are handled separately. */ 201 VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); 202 203 /* 204 * TODO: we get driver-allocated folios that have nothing to do with 205 * the rmap using vm_insert_page(); therefore, we cannot assume that 206 * folio_test_large_rmappable() holds for large folios. We should 207 * handle any desired mapcount+stats accounting for these folios in 208 * VM_MIXEDMAP VMAs separately, and then sanity-check here that 209 * we really only get rmappable folios. 210 */ 211 212 VM_WARN_ON_ONCE(nr_pages <= 0); 213 VM_WARN_ON_FOLIO(page_folio(page) != folio, folio); 214 VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); 215 216 switch (level) { 217 case RMAP_LEVEL_PTE: 218 break; 219 case RMAP_LEVEL_PMD: 220 /* 221 * We don't support folios larger than a single PMD yet. So 222 * when RMAP_LEVEL_PMD is set, we assume that we are creating 223 * a single "entire" mapping of the folio. 224 */ 225 VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); 226 VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); 227 break; 228 default: 229 VM_WARN_ON_ONCE(true); 230 } 231} 232 233/* 234 * rmap interfaces called when adding or removing pte of page 235 */ 236void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); 237void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, 238 struct vm_area_struct *, unsigned long address, rmap_t flags); 239#define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \ 240 folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags) 241void folio_add_anon_rmap_pmd(struct folio *, struct page *, 242 struct vm_area_struct *, unsigned long address, rmap_t flags); 243void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, 244 unsigned long address); 245void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, 246 struct vm_area_struct *); 247#define folio_add_file_rmap_pte(folio, page, vma) \ 248 folio_add_file_rmap_ptes(folio, page, 1, vma) 249void folio_add_file_rmap_pmd(struct folio *, struct page *, 250 struct vm_area_struct *); 251void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, 252 struct vm_area_struct *); 253#define folio_remove_rmap_pte(folio, page, vma) \ 254 folio_remove_rmap_ptes(folio, page, 1, vma) 255void folio_remove_rmap_pmd(struct folio *, struct page *, 256 struct vm_area_struct *); 257 258void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, 259 unsigned long address, rmap_t flags); 260void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, 261 unsigned long address); 262 263/* See folio_try_dup_anon_rmap_*() */ 264static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, 265 struct vm_area_struct *vma) 266{ 267 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 268 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 269 270 if (PageAnonExclusive(&folio->page)) { 271 if (unlikely(folio_needs_cow_for_dma(vma, folio))) 272 return -EBUSY; 273 ClearPageAnonExclusive(&folio->page); 274 } 275 atomic_inc(&folio->_entire_mapcount); 276 atomic_inc(&folio->_large_mapcount); 277 return 0; 278} 279 280/* See folio_try_share_anon_rmap_*() */ 281static inline int hugetlb_try_share_anon_rmap(struct folio *folio) 282{ 283 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 284 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 285 VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); 286 287 /* Paired with the memory barrier in try_grab_folio(). */ 288 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 289 smp_mb(); 290 291 if (unlikely(folio_maybe_dma_pinned(folio))) 292 return -EBUSY; 293 ClearPageAnonExclusive(&folio->page); 294 295 /* 296 * This is conceptually a smp_wmb() paired with the smp_rmb() in 297 * gup_must_unshare(). 298 */ 299 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 300 smp_mb__after_atomic(); 301 return 0; 302} 303 304static inline void hugetlb_add_file_rmap(struct folio *folio) 305{ 306 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 307 VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); 308 309 atomic_inc(&folio->_entire_mapcount); 310 atomic_inc(&folio->_large_mapcount); 311} 312 313static inline void hugetlb_remove_rmap(struct folio *folio) 314{ 315 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); 316 317 atomic_dec(&folio->_entire_mapcount); 318 atomic_dec(&folio->_large_mapcount); 319} 320 321static __always_inline void __folio_dup_file_rmap(struct folio *folio, 322 struct page *page, int nr_pages, enum rmap_level level) 323{ 324 const int orig_nr_pages = nr_pages; 325 326 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 327 328 switch (level) { 329 case RMAP_LEVEL_PTE: 330 if (!folio_test_large(folio)) { 331 atomic_inc(&page->_mapcount); 332 break; 333 } 334 335 do { 336 atomic_inc(&page->_mapcount); 337 } while (page++, --nr_pages > 0); 338 atomic_add(orig_nr_pages, &folio->_large_mapcount); 339 break; 340 case RMAP_LEVEL_PMD: 341 atomic_inc(&folio->_entire_mapcount); 342 atomic_inc(&folio->_large_mapcount); 343 break; 344 } 345} 346 347/** 348 * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio 349 * @folio: The folio to duplicate the mappings of 350 * @page: The first page to duplicate the mappings of 351 * @nr_pages: The number of pages of which the mapping will be duplicated 352 * 353 * The page range of the folio is defined by [page, page + nr_pages) 354 * 355 * The caller needs to hold the page table lock. 356 */ 357static inline void folio_dup_file_rmap_ptes(struct folio *folio, 358 struct page *page, int nr_pages) 359{ 360 __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); 361} 362 363static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, 364 struct page *page) 365{ 366 __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); 367} 368 369/** 370 * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio 371 * @folio: The folio to duplicate the mapping of 372 * @page: The first page to duplicate the mapping of 373 * 374 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 375 * 376 * The caller needs to hold the page table lock. 377 */ 378static inline void folio_dup_file_rmap_pmd(struct folio *folio, 379 struct page *page) 380{ 381#ifdef CONFIG_TRANSPARENT_HUGEPAGE 382 __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); 383#else 384 WARN_ON_ONCE(true); 385#endif 386} 387 388static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, 389 struct page *page, int nr_pages, struct vm_area_struct *src_vma, 390 enum rmap_level level) 391{ 392 const int orig_nr_pages = nr_pages; 393 bool maybe_pinned; 394 int i; 395 396 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 397 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 398 399 /* 400 * If this folio may have been pinned by the parent process, 401 * don't allow to duplicate the mappings but instead require to e.g., 402 * copy the subpage immediately for the child so that we'll always 403 * guarantee the pinned folio won't be randomly replaced in the 404 * future on write faults. 405 */ 406 maybe_pinned = likely(!folio_is_device_private(folio)) && 407 unlikely(folio_needs_cow_for_dma(src_vma, folio)); 408 409 /* 410 * No need to check+clear for already shared PTEs/PMDs of the 411 * folio. But if any page is PageAnonExclusive, we must fallback to 412 * copying if the folio maybe pinned. 413 */ 414 switch (level) { 415 case RMAP_LEVEL_PTE: 416 if (unlikely(maybe_pinned)) { 417 for (i = 0; i < nr_pages; i++) 418 if (PageAnonExclusive(page + i)) 419 return -EBUSY; 420 } 421 422 if (!folio_test_large(folio)) { 423 if (PageAnonExclusive(page)) 424 ClearPageAnonExclusive(page); 425 atomic_inc(&page->_mapcount); 426 break; 427 } 428 429 do { 430 if (PageAnonExclusive(page)) 431 ClearPageAnonExclusive(page); 432 atomic_inc(&page->_mapcount); 433 } while (page++, --nr_pages > 0); 434 atomic_add(orig_nr_pages, &folio->_large_mapcount); 435 break; 436 case RMAP_LEVEL_PMD: 437 if (PageAnonExclusive(page)) { 438 if (unlikely(maybe_pinned)) 439 return -EBUSY; 440 ClearPageAnonExclusive(page); 441 } 442 atomic_inc(&folio->_entire_mapcount); 443 atomic_inc(&folio->_large_mapcount); 444 break; 445 } 446 return 0; 447} 448 449/** 450 * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range 451 * of a folio 452 * @folio: The folio to duplicate the mappings of 453 * @page: The first page to duplicate the mappings of 454 * @nr_pages: The number of pages of which the mapping will be duplicated 455 * @src_vma: The vm area from which the mappings are duplicated 456 * 457 * The page range of the folio is defined by [page, page + nr_pages) 458 * 459 * The caller needs to hold the page table lock and the 460 * vma->vma_mm->write_protect_seq. 461 * 462 * Duplicating the mappings can only fail if the folio may be pinned; device 463 * private folios cannot get pinned and consequently this function cannot fail 464 * for them. 465 * 466 * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in 467 * the parent and the child. They must *not* be writable after this call 468 * succeeded. 469 * 470 * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. 471 */ 472static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, 473 struct page *page, int nr_pages, struct vm_area_struct *src_vma) 474{ 475 return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, 476 RMAP_LEVEL_PTE); 477} 478 479static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, 480 struct page *page, struct vm_area_struct *src_vma) 481{ 482 return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, 483 RMAP_LEVEL_PTE); 484} 485 486/** 487 * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range 488 * of a folio 489 * @folio: The folio to duplicate the mapping of 490 * @page: The first page to duplicate the mapping of 491 * @src_vma: The vm area from which the mapping is duplicated 492 * 493 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 494 * 495 * The caller needs to hold the page table lock and the 496 * vma->vma_mm->write_protect_seq. 497 * 498 * Duplicating the mapping can only fail if the folio may be pinned; device 499 * private folios cannot get pinned and consequently this function cannot fail 500 * for them. 501 * 502 * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in 503 * the parent and the child. They must *not* be writable after this call 504 * succeeded. 505 * 506 * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. 507 */ 508static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, 509 struct page *page, struct vm_area_struct *src_vma) 510{ 511#ifdef CONFIG_TRANSPARENT_HUGEPAGE 512 return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, 513 RMAP_LEVEL_PMD); 514#else 515 WARN_ON_ONCE(true); 516 return -EBUSY; 517#endif 518} 519 520static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, 521 struct page *page, int nr_pages, enum rmap_level level) 522{ 523 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); 524 VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); 525 __folio_rmap_sanity_checks(folio, page, nr_pages, level); 526 527 /* device private folios cannot get pinned via GUP. */ 528 if (unlikely(folio_is_device_private(folio))) { 529 ClearPageAnonExclusive(page); 530 return 0; 531 } 532 533 /* 534 * We have to make sure that when we clear PageAnonExclusive, that 535 * the page is not pinned and that concurrent GUP-fast won't succeed in 536 * concurrently pinning the page. 537 * 538 * Conceptually, PageAnonExclusive clearing consists of: 539 * (A1) Clear PTE 540 * (A2) Check if the page is pinned; back off if so. 541 * (A3) Clear PageAnonExclusive 542 * (A4) Restore PTE (optional, but certainly not writable) 543 * 544 * When clearing PageAnonExclusive, we cannot possibly map the page 545 * writable again, because anon pages that may be shared must never 546 * be writable. So in any case, if the PTE was writable it cannot 547 * be writable anymore afterwards and there would be a PTE change. Only 548 * if the PTE wasn't writable, there might not be a PTE change. 549 * 550 * Conceptually, GUP-fast pinning of an anon page consists of: 551 * (B1) Read the PTE 552 * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. 553 * (B3) Pin the mapped page 554 * (B4) Check if the PTE changed by re-reading it; back off if so. 555 * (B5) If the original PTE is not writable, check if 556 * PageAnonExclusive is not set; back off if so. 557 * 558 * If the PTE was writable, we only have to make sure that GUP-fast 559 * observes a PTE change and properly backs off. 560 * 561 * If the PTE was not writable, we have to make sure that GUP-fast either 562 * detects a (temporary) PTE change or that PageAnonExclusive is cleared 563 * and properly backs off. 564 * 565 * Consequently, when clearing PageAnonExclusive(), we have to make 566 * sure that (A1), (A2)/(A3) and (A4) happen in the right memory 567 * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) 568 * and (B5) happen in the right memory order. 569 * 570 * We assume that there might not be a memory barrier after 571 * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), 572 * so we use explicit ones here. 573 */ 574 575 /* Paired with the memory barrier in try_grab_folio(). */ 576 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 577 smp_mb(); 578 579 if (unlikely(folio_maybe_dma_pinned(folio))) 580 return -EBUSY; 581 ClearPageAnonExclusive(page); 582 583 /* 584 * This is conceptually a smp_wmb() paired with the smp_rmb() in 585 * gup_must_unshare(). 586 */ 587 if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) 588 smp_mb__after_atomic(); 589 return 0; 590} 591 592/** 593 * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page 594 * mapped by a PTE possibly shared to prepare 595 * for KSM or temporary unmapping 596 * @folio: The folio to share a mapping of 597 * @page: The mapped exclusive page 598 * 599 * The caller needs to hold the page table lock and has to have the page table 600 * entries cleared/invalidated. 601 * 602 * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during 603 * fork() to duplicate mappings, but instead to prepare for KSM or temporarily 604 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). 605 * 606 * Marking the mapped page shared can only fail if the folio maybe pinned; 607 * device private folios cannot get pinned and consequently this function cannot 608 * fail. 609 * 610 * Returns 0 if marking the mapped page possibly shared succeeded. Returns 611 * -EBUSY otherwise. 612 */ 613static inline int folio_try_share_anon_rmap_pte(struct folio *folio, 614 struct page *page) 615{ 616 return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); 617} 618 619/** 620 * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page 621 * range mapped by a PMD possibly shared to 622 * prepare for temporary unmapping 623 * @folio: The folio to share the mapping of 624 * @page: The first page to share the mapping of 625 * 626 * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) 627 * 628 * The caller needs to hold the page table lock and has to have the page table 629 * entries cleared/invalidated. 630 * 631 * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during 632 * fork() to duplicate a mapping, but instead to prepare for temporarily 633 * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). 634 * 635 * Marking the mapped pages shared can only fail if the folio maybe pinned; 636 * device private folios cannot get pinned and consequently this function cannot 637 * fail. 638 * 639 * Returns 0 if marking the mapped pages possibly shared succeeded. Returns 640 * -EBUSY otherwise. 641 */ 642static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, 643 struct page *page) 644{ 645#ifdef CONFIG_TRANSPARENT_HUGEPAGE 646 return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, 647 RMAP_LEVEL_PMD); 648#else 649 WARN_ON_ONCE(true); 650 return -EBUSY; 651#endif 652} 653 654/* 655 * Called from mm/vmscan.c to handle paging out 656 */ 657int folio_referenced(struct folio *, int is_locked, 658 struct mem_cgroup *memcg, unsigned long *vm_flags); 659 660void try_to_migrate(struct folio *folio, enum ttu_flags flags); 661void try_to_unmap(struct folio *, enum ttu_flags flags); 662 663int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, 664 unsigned long end, struct page **pages, 665 void *arg); 666 667/* Avoid racy checks */ 668#define PVMW_SYNC (1 << 0) 669/* Look for migration entries rather than present PTEs */ 670#define PVMW_MIGRATION (1 << 1) 671 672struct page_vma_mapped_walk { 673 unsigned long pfn; 674 unsigned long nr_pages; 675 pgoff_t pgoff; 676 struct vm_area_struct *vma; 677 unsigned long address; 678 pmd_t *pmd; 679 pte_t *pte; 680 spinlock_t *ptl; 681 unsigned int flags; 682}; 683 684#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ 685 struct page_vma_mapped_walk name = { \ 686 .pfn = page_to_pfn(_page), \ 687 .nr_pages = compound_nr(_page), \ 688 .pgoff = page_to_pgoff(_page), \ 689 .vma = _vma, \ 690 .address = _address, \ 691 .flags = _flags, \ 692 } 693 694#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ 695 struct page_vma_mapped_walk name = { \ 696 .pfn = folio_pfn(_folio), \ 697 .nr_pages = folio_nr_pages(_folio), \ 698 .pgoff = folio_pgoff(_folio), \ 699 .vma = _vma, \ 700 .address = _address, \ 701 .flags = _flags, \ 702 } 703 704static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) 705{ 706 /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ 707 if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma)) 708 pte_unmap(pvmw->pte); 709 if (pvmw->ptl) 710 spin_unlock(pvmw->ptl); 711} 712 713bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); 714 715/* 716 * Used by swapoff to help locate where page is expected in vma. 717 */ 718unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); 719 720/* 721 * Cleans the PTEs of shared mappings. 722 * (and since clean PTEs should also be readonly, write protects them too) 723 * 724 * returns the number of cleaned PTEs. 725 */ 726int folio_mkclean(struct folio *); 727 728int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, 729 struct vm_area_struct *vma); 730 731void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); 732 733unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); 734 735/* 736 * rmap_walk_control: To control rmap traversing for specific needs 737 * 738 * arg: passed to rmap_one() and invalid_vma() 739 * try_lock: bail out if the rmap lock is contended 740 * contended: indicate the rmap traversal bailed out due to lock contention 741 * rmap_one: executed on each vma where page is mapped 742 * done: for checking traversing termination condition 743 * anon_lock: for getting anon_lock by optimized way rather than default 744 * invalid_vma: for skipping uninterested vma 745 */ 746struct rmap_walk_control { 747 void *arg; 748 bool try_lock; 749 bool contended; 750 /* 751 * Return false if page table scanning in rmap_walk should be stopped. 752 * Otherwise, return true. 753 */ 754 bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, 755 unsigned long addr, void *arg); 756 int (*done)(struct folio *folio); 757 struct anon_vma *(*anon_lock)(struct folio *folio, 758 struct rmap_walk_control *rwc); 759 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); 760}; 761 762void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); 763void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); 764struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, 765 struct rmap_walk_control *rwc); 766 767#else /* !CONFIG_MMU */ 768 769#define anon_vma_init() do {} while (0) 770#define anon_vma_prepare(vma) (0) 771 772static inline int folio_referenced(struct folio *folio, int is_locked, 773 struct mem_cgroup *memcg, 774 unsigned long *vm_flags) 775{ 776 *vm_flags = 0; 777 return 0; 778} 779 780static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) 781{ 782} 783 784static inline int folio_mkclean(struct folio *folio) 785{ 786 return 0; 787} 788#endif /* CONFIG_MMU */ 789 790static inline int page_mkclean(struct page *page) 791{ 792 return folio_mkclean(page_folio(page)); 793} 794#endif /* _LINUX_RMAP_H */