at v4.11 948 lines 26 kB view raw
1#ifndef _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H 3 4#include <linux/pfn.h> 5 6#ifndef __ASSEMBLY__ 7#ifdef CONFIG_MMU 8 9#include <linux/mm_types.h> 10#include <linux/bug.h> 11#include <linux/errno.h> 12 13#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ 14 defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS 15#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED 16#endif 17 18/* 19 * On almost all architectures and configurations, 0 can be used as the 20 * upper ceiling to free_pgtables(): on many architectures it has the same 21 * effect as using TASK_SIZE. However, there is one configuration which 22 * must impose a more careful limit, to avoid freeing kernel pgtables. 23 */ 24#ifndef USER_PGTABLES_CEILING 25#define USER_PGTABLES_CEILING 0UL 26#endif 27 28#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 29extern int ptep_set_access_flags(struct vm_area_struct *vma, 30 unsigned long address, pte_t *ptep, 31 pte_t entry, int dirty); 32#endif 33 34#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 35#ifdef CONFIG_TRANSPARENT_HUGEPAGE 36extern int pmdp_set_access_flags(struct vm_area_struct *vma, 37 unsigned long address, pmd_t *pmdp, 38 pmd_t entry, int dirty); 39extern int pudp_set_access_flags(struct vm_area_struct *vma, 40 unsigned long address, pud_t *pudp, 41 pud_t entry, int dirty); 42#else 43static inline int pmdp_set_access_flags(struct vm_area_struct *vma, 44 unsigned long address, pmd_t *pmdp, 45 pmd_t entry, int dirty) 46{ 47 BUILD_BUG(); 48 return 0; 49} 50static inline int pudp_set_access_flags(struct vm_area_struct *vma, 51 unsigned long address, pud_t *pudp, 52 pud_t entry, int dirty) 53{ 54 BUILD_BUG(); 55 return 0; 56} 57#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 58#endif 59 60#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 61static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 62 unsigned long address, 63 pte_t *ptep) 64{ 65 pte_t pte = *ptep; 66 int r = 1; 67 if (!pte_young(pte)) 68 r = 0; 69 else 70 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 71 return r; 72} 73#endif 74 75#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 76#ifdef CONFIG_TRANSPARENT_HUGEPAGE 77static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 78 unsigned long address, 79 pmd_t *pmdp) 80{ 81 pmd_t pmd = *pmdp; 82 int r = 1; 83 if (!pmd_young(pmd)) 84 r = 0; 85 else 86 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 87 return r; 88} 89#else 90static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 91 unsigned long address, 92 pmd_t *pmdp) 93{ 94 BUILD_BUG(); 95 return 0; 96} 97#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 98#endif 99 100#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 101int ptep_clear_flush_young(struct vm_area_struct *vma, 102 unsigned long address, pte_t *ptep); 103#endif 104 105#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 106#ifdef CONFIG_TRANSPARENT_HUGEPAGE 107extern int pmdp_clear_flush_young(struct vm_area_struct *vma, 108 unsigned long address, pmd_t *pmdp); 109#else 110/* 111 * Despite relevant to THP only, this API is called from generic rmap code 112 * under PageTransHuge(), hence needs a dummy implementation for !THP 113 */ 114static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, 115 unsigned long address, pmd_t *pmdp) 116{ 117 BUILD_BUG(); 118 return 0; 119} 120#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 121#endif 122 123#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 124static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 125 unsigned long address, 126 pte_t *ptep) 127{ 128 pte_t pte = *ptep; 129 pte_clear(mm, address, ptep); 130 return pte; 131} 132#endif 133 134#ifdef CONFIG_TRANSPARENT_HUGEPAGE 135#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR 136static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 137 unsigned long address, 138 pmd_t *pmdp) 139{ 140 pmd_t pmd = *pmdp; 141 pmd_clear(pmdp); 142 return pmd; 143} 144#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ 145#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR 146static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, 147 unsigned long address, 148 pud_t *pudp) 149{ 150 pud_t pud = *pudp; 151 152 pud_clear(pudp); 153 return pud; 154} 155#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ 156#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 157 158#ifdef CONFIG_TRANSPARENT_HUGEPAGE 159#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL 160static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, 161 unsigned long address, pmd_t *pmdp, 162 int full) 163{ 164 return pmdp_huge_get_and_clear(mm, address, pmdp); 165} 166#endif 167 168#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL 169static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, 170 unsigned long address, pud_t *pudp, 171 int full) 172{ 173 return pudp_huge_get_and_clear(mm, address, pudp); 174} 175#endif 176#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 177 178#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 179static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 180 unsigned long address, pte_t *ptep, 181 int full) 182{ 183 pte_t pte; 184 pte = ptep_get_and_clear(mm, address, ptep); 185 return pte; 186} 187#endif 188 189/* 190 * Some architectures may be able to avoid expensive synchronization 191 * primitives when modifications are made to PTE's which are already 192 * not present, or in the process of an address space destruction. 193 */ 194#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 195static inline void pte_clear_not_present_full(struct mm_struct *mm, 196 unsigned long address, 197 pte_t *ptep, 198 int full) 199{ 200 pte_clear(mm, address, ptep); 201} 202#endif 203 204#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 205extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 206 unsigned long address, 207 pte_t *ptep); 208#endif 209 210#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH 211extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, 212 unsigned long address, 213 pmd_t *pmdp); 214extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, 215 unsigned long address, 216 pud_t *pudp); 217#endif 218 219#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 220struct mm_struct; 221static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 222{ 223 pte_t old_pte = *ptep; 224 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 225} 226#endif 227 228#ifndef pte_savedwrite 229#define pte_savedwrite pte_write 230#endif 231 232#ifndef pte_mk_savedwrite 233#define pte_mk_savedwrite pte_mkwrite 234#endif 235 236#ifndef pte_clear_savedwrite 237#define pte_clear_savedwrite pte_wrprotect 238#endif 239 240#ifndef pmd_savedwrite 241#define pmd_savedwrite pmd_write 242#endif 243 244#ifndef pmd_mk_savedwrite 245#define pmd_mk_savedwrite pmd_mkwrite 246#endif 247 248#ifndef pmd_clear_savedwrite 249#define pmd_clear_savedwrite pmd_wrprotect 250#endif 251 252#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 253#ifdef CONFIG_TRANSPARENT_HUGEPAGE 254static inline void pmdp_set_wrprotect(struct mm_struct *mm, 255 unsigned long address, pmd_t *pmdp) 256{ 257 pmd_t old_pmd = *pmdp; 258 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 259} 260#else 261static inline void pmdp_set_wrprotect(struct mm_struct *mm, 262 unsigned long address, pmd_t *pmdp) 263{ 264 BUILD_BUG(); 265} 266#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 267#endif 268#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT 269#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 270static inline void pudp_set_wrprotect(struct mm_struct *mm, 271 unsigned long address, pud_t *pudp) 272{ 273 pud_t old_pud = *pudp; 274 275 set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); 276} 277#else 278static inline void pudp_set_wrprotect(struct mm_struct *mm, 279 unsigned long address, pud_t *pudp) 280{ 281 BUILD_BUG(); 282} 283#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 284#endif 285 286#ifndef pmdp_collapse_flush 287#ifdef CONFIG_TRANSPARENT_HUGEPAGE 288extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 289 unsigned long address, pmd_t *pmdp); 290#else 291static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 292 unsigned long address, 293 pmd_t *pmdp) 294{ 295 BUILD_BUG(); 296 return *pmdp; 297} 298#define pmdp_collapse_flush pmdp_collapse_flush 299#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 300#endif 301 302#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT 303extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 304 pgtable_t pgtable); 305#endif 306 307#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW 308extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); 309#endif 310 311#ifndef __HAVE_ARCH_PMDP_INVALIDATE 312extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 313 pmd_t *pmdp); 314#endif 315 316#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE 317static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, 318 unsigned long address, pmd_t *pmdp) 319{ 320 321} 322#endif 323 324#ifndef __HAVE_ARCH_PTE_SAME 325static inline int pte_same(pte_t pte_a, pte_t pte_b) 326{ 327 return pte_val(pte_a) == pte_val(pte_b); 328} 329#endif 330 331#ifndef __HAVE_ARCH_PTE_UNUSED 332/* 333 * Some architectures provide facilities to virtualization guests 334 * so that they can flag allocated pages as unused. This allows the 335 * host to transparently reclaim unused pages. This function returns 336 * whether the pte's page is unused. 337 */ 338static inline int pte_unused(pte_t pte) 339{ 340 return 0; 341} 342#endif 343 344#ifndef __HAVE_ARCH_PMD_SAME 345#ifdef CONFIG_TRANSPARENT_HUGEPAGE 346static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 347{ 348 return pmd_val(pmd_a) == pmd_val(pmd_b); 349} 350 351static inline int pud_same(pud_t pud_a, pud_t pud_b) 352{ 353 return pud_val(pud_a) == pud_val(pud_b); 354} 355#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 356static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 357{ 358 BUILD_BUG(); 359 return 0; 360} 361 362static inline int pud_same(pud_t pud_a, pud_t pud_b) 363{ 364 BUILD_BUG(); 365 return 0; 366} 367#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 368#endif 369 370#ifndef __HAVE_ARCH_PGD_OFFSET_GATE 371#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 372#endif 373 374#ifndef __HAVE_ARCH_MOVE_PTE 375#define move_pte(pte, prot, old_addr, new_addr) (pte) 376#endif 377 378#ifndef pte_accessible 379# define pte_accessible(mm, pte) ((void)(pte), 1) 380#endif 381 382#ifndef flush_tlb_fix_spurious_fault 383#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 384#endif 385 386#ifndef pgprot_noncached 387#define pgprot_noncached(prot) (prot) 388#endif 389 390#ifndef pgprot_writecombine 391#define pgprot_writecombine pgprot_noncached 392#endif 393 394#ifndef pgprot_writethrough 395#define pgprot_writethrough pgprot_noncached 396#endif 397 398#ifndef pgprot_device 399#define pgprot_device pgprot_noncached 400#endif 401 402#ifndef pgprot_modify 403#define pgprot_modify pgprot_modify 404static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 405{ 406 if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot))) 407 newprot = pgprot_noncached(newprot); 408 if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot))) 409 newprot = pgprot_writecombine(newprot); 410 if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot))) 411 newprot = pgprot_device(newprot); 412 return newprot; 413} 414#endif 415 416/* 417 * When walking page tables, get the address of the next boundary, 418 * or the end address of the range if that comes earlier. Although no 419 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 420 */ 421 422#define pgd_addr_end(addr, end) \ 423({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 424 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 425}) 426 427#ifndef p4d_addr_end 428#define p4d_addr_end(addr, end) \ 429({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ 430 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 431}) 432#endif 433 434#ifndef pud_addr_end 435#define pud_addr_end(addr, end) \ 436({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 437 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 438}) 439#endif 440 441#ifndef pmd_addr_end 442#define pmd_addr_end(addr, end) \ 443({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 444 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 445}) 446#endif 447 448/* 449 * When walking page tables, we usually want to skip any p?d_none entries; 450 * and any p?d_bad entries - reporting the error before resetting to none. 451 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 452 */ 453void pgd_clear_bad(pgd_t *); 454void p4d_clear_bad(p4d_t *); 455void pud_clear_bad(pud_t *); 456void pmd_clear_bad(pmd_t *); 457 458static inline int pgd_none_or_clear_bad(pgd_t *pgd) 459{ 460 if (pgd_none(*pgd)) 461 return 1; 462 if (unlikely(pgd_bad(*pgd))) { 463 pgd_clear_bad(pgd); 464 return 1; 465 } 466 return 0; 467} 468 469static inline int p4d_none_or_clear_bad(p4d_t *p4d) 470{ 471 if (p4d_none(*p4d)) 472 return 1; 473 if (unlikely(p4d_bad(*p4d))) { 474 p4d_clear_bad(p4d); 475 return 1; 476 } 477 return 0; 478} 479 480static inline int pud_none_or_clear_bad(pud_t *pud) 481{ 482 if (pud_none(*pud)) 483 return 1; 484 if (unlikely(pud_bad(*pud))) { 485 pud_clear_bad(pud); 486 return 1; 487 } 488 return 0; 489} 490 491static inline int pmd_none_or_clear_bad(pmd_t *pmd) 492{ 493 if (pmd_none(*pmd)) 494 return 1; 495 if (unlikely(pmd_bad(*pmd))) { 496 pmd_clear_bad(pmd); 497 return 1; 498 } 499 return 0; 500} 501 502static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 503 unsigned long addr, 504 pte_t *ptep) 505{ 506 /* 507 * Get the current pte state, but zero it out to make it 508 * non-present, preventing the hardware from asynchronously 509 * updating it. 510 */ 511 return ptep_get_and_clear(mm, addr, ptep); 512} 513 514static inline void __ptep_modify_prot_commit(struct mm_struct *mm, 515 unsigned long addr, 516 pte_t *ptep, pte_t pte) 517{ 518 /* 519 * The pte is non-present, so there's no hardware state to 520 * preserve. 521 */ 522 set_pte_at(mm, addr, ptep, pte); 523} 524 525#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 526/* 527 * Start a pte protection read-modify-write transaction, which 528 * protects against asynchronous hardware modifications to the pte. 529 * The intention is not to prevent the hardware from making pte 530 * updates, but to prevent any updates it may make from being lost. 531 * 532 * This does not protect against other software modifications of the 533 * pte; the appropriate pte lock must be held over the transation. 534 * 535 * Note that this interface is intended to be batchable, meaning that 536 * ptep_modify_prot_commit may not actually update the pte, but merely 537 * queue the update to be done at some later time. The update must be 538 * actually committed before the pte lock is released, however. 539 */ 540static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 541 unsigned long addr, 542 pte_t *ptep) 543{ 544 return __ptep_modify_prot_start(mm, addr, ptep); 545} 546 547/* 548 * Commit an update to a pte, leaving any hardware-controlled bits in 549 * the PTE unmodified. 550 */ 551static inline void ptep_modify_prot_commit(struct mm_struct *mm, 552 unsigned long addr, 553 pte_t *ptep, pte_t pte) 554{ 555 __ptep_modify_prot_commit(mm, addr, ptep, pte); 556} 557#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 558#endif /* CONFIG_MMU */ 559 560/* 561 * A facility to provide lazy MMU batching. This allows PTE updates and 562 * page invalidations to be delayed until a call to leave lazy MMU mode 563 * is issued. Some architectures may benefit from doing this, and it is 564 * beneficial for both shadow and direct mode hypervisors, which may batch 565 * the PTE updates which happen during this window. Note that using this 566 * interface requires that read hazards be removed from the code. A read 567 * hazard could result in the direct mode hypervisor case, since the actual 568 * write to the page tables may not yet have taken place, so reads though 569 * a raw PTE pointer after it has been modified are not guaranteed to be 570 * up to date. This mode can only be entered and left under the protection of 571 * the page table locks for all page tables which may be modified. In the UP 572 * case, this is required so that preemption is disabled, and in the SMP case, 573 * it must synchronize the delayed page table writes properly on other CPUs. 574 */ 575#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 576#define arch_enter_lazy_mmu_mode() do {} while (0) 577#define arch_leave_lazy_mmu_mode() do {} while (0) 578#define arch_flush_lazy_mmu_mode() do {} while (0) 579#endif 580 581/* 582 * A facility to provide batching of the reload of page tables and 583 * other process state with the actual context switch code for 584 * paravirtualized guests. By convention, only one of the batched 585 * update (lazy) modes (CPU, MMU) should be active at any given time, 586 * entry should never be nested, and entry and exits should always be 587 * paired. This is for sanity of maintaining and reasoning about the 588 * kernel code. In this case, the exit (end of the context switch) is 589 * in architecture-specific code, and so doesn't need a generic 590 * definition. 591 */ 592#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 593#define arch_start_context_switch(prev) do {} while (0) 594#endif 595 596#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY 597static inline int pte_soft_dirty(pte_t pte) 598{ 599 return 0; 600} 601 602static inline int pmd_soft_dirty(pmd_t pmd) 603{ 604 return 0; 605} 606 607static inline pte_t pte_mksoft_dirty(pte_t pte) 608{ 609 return pte; 610} 611 612static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) 613{ 614 return pmd; 615} 616 617static inline pte_t pte_clear_soft_dirty(pte_t pte) 618{ 619 return pte; 620} 621 622static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) 623{ 624 return pmd; 625} 626 627static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 628{ 629 return pte; 630} 631 632static inline int pte_swp_soft_dirty(pte_t pte) 633{ 634 return 0; 635} 636 637static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 638{ 639 return pte; 640} 641#endif 642 643#ifndef __HAVE_PFNMAP_TRACKING 644/* 645 * Interfaces that can be used by architecture code to keep track of 646 * memory type of pfn mappings specified by the remap_pfn_range, 647 * vm_insert_pfn. 648 */ 649 650/* 651 * track_pfn_remap is called when a _new_ pfn mapping is being established 652 * by remap_pfn_range() for physical range indicated by pfn and size. 653 */ 654static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 655 unsigned long pfn, unsigned long addr, 656 unsigned long size) 657{ 658 return 0; 659} 660 661/* 662 * track_pfn_insert is called when a _new_ single pfn is established 663 * by vm_insert_pfn(). 664 */ 665static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 666 pfn_t pfn) 667{ 668} 669 670/* 671 * track_pfn_copy is called when vma that is covering the pfnmap gets 672 * copied through copy_page_range(). 673 */ 674static inline int track_pfn_copy(struct vm_area_struct *vma) 675{ 676 return 0; 677} 678 679/* 680 * untrack_pfn is called while unmapping a pfnmap for a region. 681 * untrack can be called for a specific region indicated by pfn and size or 682 * can be for the entire vma (in which case pfn, size are zero). 683 */ 684static inline void untrack_pfn(struct vm_area_struct *vma, 685 unsigned long pfn, unsigned long size) 686{ 687} 688 689/* 690 * untrack_pfn_moved is called while mremapping a pfnmap for a new region. 691 */ 692static inline void untrack_pfn_moved(struct vm_area_struct *vma) 693{ 694} 695#else 696extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 697 unsigned long pfn, unsigned long addr, 698 unsigned long size); 699extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 700 pfn_t pfn); 701extern int track_pfn_copy(struct vm_area_struct *vma); 702extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, 703 unsigned long size); 704extern void untrack_pfn_moved(struct vm_area_struct *vma); 705#endif 706 707#ifdef __HAVE_COLOR_ZERO_PAGE 708static inline int is_zero_pfn(unsigned long pfn) 709{ 710 extern unsigned long zero_pfn; 711 unsigned long offset_from_zero_pfn = pfn - zero_pfn; 712 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); 713} 714 715#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) 716 717#else 718static inline int is_zero_pfn(unsigned long pfn) 719{ 720 extern unsigned long zero_pfn; 721 return pfn == zero_pfn; 722} 723 724static inline unsigned long my_zero_pfn(unsigned long addr) 725{ 726 extern unsigned long zero_pfn; 727 return zero_pfn; 728} 729#endif 730 731#ifdef CONFIG_MMU 732 733#ifndef CONFIG_TRANSPARENT_HUGEPAGE 734static inline int pmd_trans_huge(pmd_t pmd) 735{ 736 return 0; 737} 738#ifndef __HAVE_ARCH_PMD_WRITE 739static inline int pmd_write(pmd_t pmd) 740{ 741 BUG(); 742 return 0; 743} 744#endif /* __HAVE_ARCH_PMD_WRITE */ 745#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 746 747#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ 748 (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ 749 !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 750static inline int pud_trans_huge(pud_t pud) 751{ 752 return 0; 753} 754#endif 755 756#ifndef pmd_read_atomic 757static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 758{ 759 /* 760 * Depend on compiler for an atomic pmd read. NOTE: this is 761 * only going to work, if the pmdval_t isn't larger than 762 * an unsigned long. 763 */ 764 return *pmdp; 765} 766#endif 767 768#ifndef arch_needs_pgtable_deposit 769#define arch_needs_pgtable_deposit() (false) 770#endif 771/* 772 * This function is meant to be used by sites walking pagetables with 773 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 774 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 775 * into a null pmd and the transhuge page fault can convert a null pmd 776 * into an hugepmd or into a regular pmd (if the hugepage allocation 777 * fails). While holding the mmap_sem in read mode the pmd becomes 778 * stable and stops changing under us only if it's not null and not a 779 * transhuge pmd. When those races occurs and this function makes a 780 * difference vs the standard pmd_none_or_clear_bad, the result is 781 * undefined so behaving like if the pmd was none is safe (because it 782 * can return none anyway). The compiler level barrier() is critically 783 * important to compute the two checks atomically on the same pmdval. 784 * 785 * For 32bit kernels with a 64bit large pmd_t this automatically takes 786 * care of reading the pmd atomically to avoid SMP race conditions 787 * against pmd_populate() when the mmap_sem is hold for reading by the 788 * caller (a special atomic read not done by "gcc" as in the generic 789 * version above, is also needed when THP is disabled because the page 790 * fault can populate the pmd from under us). 791 */ 792static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 793{ 794 pmd_t pmdval = pmd_read_atomic(pmd); 795 /* 796 * The barrier will stabilize the pmdval in a register or on 797 * the stack so that it will stop changing under the code. 798 * 799 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 800 * pmd_read_atomic is allowed to return a not atomic pmdval 801 * (for example pointing to an hugepage that has never been 802 * mapped in the pmd). The below checks will only care about 803 * the low part of the pmd with 32bit PAE x86 anyway, with the 804 * exception of pmd_none(). So the important thing is that if 805 * the low part of the pmd is found null, the high part will 806 * be also null or the pmd_none() check below would be 807 * confused. 808 */ 809#ifdef CONFIG_TRANSPARENT_HUGEPAGE 810 barrier(); 811#endif 812 if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) 813 return 1; 814 if (unlikely(pmd_bad(pmdval))) { 815 pmd_clear_bad(pmd); 816 return 1; 817 } 818 return 0; 819} 820 821/* 822 * This is a noop if Transparent Hugepage Support is not built into 823 * the kernel. Otherwise it is equivalent to 824 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 825 * places that already verified the pmd is not none and they want to 826 * walk ptes while holding the mmap sem in read mode (write mode don't 827 * need this). If THP is not enabled, the pmd can't go away under the 828 * code even if MADV_DONTNEED runs, but if THP is enabled we need to 829 * run a pmd_trans_unstable before walking the ptes after 830 * split_huge_page_pmd returns (because it may have run when the pmd 831 * become null, but then a page fault can map in a THP and not a 832 * regular page). 833 */ 834static inline int pmd_trans_unstable(pmd_t *pmd) 835{ 836#ifdef CONFIG_TRANSPARENT_HUGEPAGE 837 return pmd_none_or_trans_huge_or_clear_bad(pmd); 838#else 839 return 0; 840#endif 841} 842 843#ifndef CONFIG_NUMA_BALANCING 844/* 845 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but 846 * the only case the kernel cares is for NUMA balancing and is only ever set 847 * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked 848 * _PAGE_PROTNONE so by by default, implement the helper as "always no". It 849 * is the responsibility of the caller to distinguish between PROT_NONE 850 * protections and NUMA hinting fault protections. 851 */ 852static inline int pte_protnone(pte_t pte) 853{ 854 return 0; 855} 856 857static inline int pmd_protnone(pmd_t pmd) 858{ 859 return 0; 860} 861#endif /* CONFIG_NUMA_BALANCING */ 862 863#endif /* CONFIG_MMU */ 864 865#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 866 867#ifndef __PAGETABLE_P4D_FOLDED 868int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); 869int p4d_clear_huge(p4d_t *p4d); 870#else 871static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 872{ 873 return 0; 874} 875static inline int p4d_clear_huge(p4d_t *p4d) 876{ 877 return 0; 878} 879#endif /* !__PAGETABLE_P4D_FOLDED */ 880 881int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); 882int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 883int pud_clear_huge(pud_t *pud); 884int pmd_clear_huge(pmd_t *pmd); 885#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 886static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 887{ 888 return 0; 889} 890static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 891{ 892 return 0; 893} 894static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) 895{ 896 return 0; 897} 898static inline int p4d_clear_huge(p4d_t *p4d) 899{ 900 return 0; 901} 902static inline int pud_clear_huge(pud_t *pud) 903{ 904 return 0; 905} 906static inline int pmd_clear_huge(pmd_t *pmd) 907{ 908 return 0; 909} 910#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 911 912#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 913#ifdef CONFIG_TRANSPARENT_HUGEPAGE 914/* 915 * ARCHes with special requirements for evicting THP backing TLB entries can 916 * implement this. Otherwise also, it can help optimize normal TLB flush in 917 * THP regime. stock flush_tlb_range() typically has optimization to nuke the 918 * entire TLB TLB if flush span is greater than a threshold, which will 919 * likely be true for a single huge page. Thus a single thp flush will 920 * invalidate the entire TLB which is not desitable. 921 * e.g. see arch/arc: flush_pmd_tlb_range 922 */ 923#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 924#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 925#else 926#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() 927#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG() 928#endif 929#endif 930 931struct file; 932int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 933 unsigned long size, pgprot_t *vma_prot); 934#endif /* !__ASSEMBLY__ */ 935 936#ifndef io_remap_pfn_range 937#define io_remap_pfn_range remap_pfn_range 938#endif 939 940#ifndef has_transparent_hugepage 941#ifdef CONFIG_TRANSPARENT_HUGEPAGE 942#define has_transparent_hugepage() 1 943#else 944#define has_transparent_hugepage() 0 945#endif 946#endif 947 948#endif /* _ASM_GENERIC_PGTABLE_H */