at v4.16 1064 lines 29 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _ASM_GENERIC_PGTABLE_H 3#define _ASM_GENERIC_PGTABLE_H 4 5#include <linux/pfn.h> 6 7#ifndef __ASSEMBLY__ 8#ifdef CONFIG_MMU 9 10#include <linux/mm_types.h> 11#include <linux/bug.h> 12#include <linux/errno.h> 13 14#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ 15 defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS 16#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED 17#endif 18 19/* 20 * On almost all architectures and configurations, 0 can be used as the 21 * upper ceiling to free_pgtables(): on many architectures it has the same 22 * effect as using TASK_SIZE. However, there is one configuration which 23 * must impose a more careful limit, to avoid freeing kernel pgtables. 24 */ 25#ifndef USER_PGTABLES_CEILING 26#define USER_PGTABLES_CEILING 0UL 27#endif 28 29#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 30extern int ptep_set_access_flags(struct vm_area_struct *vma, 31 unsigned long address, pte_t *ptep, 32 pte_t entry, int dirty); 33#endif 34 35#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 36#ifdef CONFIG_TRANSPARENT_HUGEPAGE 37extern int pmdp_set_access_flags(struct vm_area_struct *vma, 38 unsigned long address, pmd_t *pmdp, 39 pmd_t entry, int dirty); 40extern int pudp_set_access_flags(struct vm_area_struct *vma, 41 unsigned long address, pud_t *pudp, 42 pud_t entry, int dirty); 43#else 44static inline int pmdp_set_access_flags(struct vm_area_struct *vma, 45 unsigned long address, pmd_t *pmdp, 46 pmd_t entry, int dirty) 47{ 48 BUILD_BUG(); 49 return 0; 50} 51static inline int pudp_set_access_flags(struct vm_area_struct *vma, 52 unsigned long address, pud_t *pudp, 53 pud_t entry, int dirty) 54{ 55 BUILD_BUG(); 56 return 0; 57} 58#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 59#endif 60 61#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 62static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 63 unsigned long address, 64 pte_t *ptep) 65{ 66 pte_t pte = *ptep; 67 int r = 1; 68 if (!pte_young(pte)) 69 r = 0; 70 else 71 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 72 return r; 73} 74#endif 75 76#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 77#ifdef CONFIG_TRANSPARENT_HUGEPAGE 78static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 79 unsigned long address, 80 pmd_t *pmdp) 81{ 82 pmd_t pmd = *pmdp; 83 int r = 1; 84 if (!pmd_young(pmd)) 85 r = 0; 86 else 87 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 88 return r; 89} 90#else 91static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 92 unsigned long address, 93 pmd_t *pmdp) 94{ 95 BUILD_BUG(); 96 return 0; 97} 98#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 99#endif 100 101#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 102int ptep_clear_flush_young(struct vm_area_struct *vma, 103 unsigned long address, pte_t *ptep); 104#endif 105 106#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 107#ifdef CONFIG_TRANSPARENT_HUGEPAGE 108extern int pmdp_clear_flush_young(struct vm_area_struct *vma, 109 unsigned long address, pmd_t *pmdp); 110#else 111/* 112 * Despite relevant to THP only, this API is called from generic rmap code 113 * under PageTransHuge(), hence needs a dummy implementation for !THP 114 */ 115static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, 116 unsigned long address, pmd_t *pmdp) 117{ 118 BUILD_BUG(); 119 return 0; 120} 121#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 122#endif 123 124#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 125static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 126 unsigned long address, 127 pte_t *ptep) 128{ 129 pte_t pte = *ptep; 130 pte_clear(mm, address, ptep); 131 return pte; 132} 133#endif 134 135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 136#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR 137static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 138 unsigned long address, 139 pmd_t *pmdp) 140{ 141 pmd_t pmd = *pmdp; 142 pmd_clear(pmdp); 143 return pmd; 144} 145#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ 146#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR 147static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, 148 unsigned long address, 149 pud_t *pudp) 150{ 151 pud_t pud = *pudp; 152 153 pud_clear(pudp); 154 return pud; 155} 156#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 158 159#ifdef CONFIG_TRANSPARENT_HUGEPAGE 160#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL 161static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, 162 unsigned long address, pmd_t *pmdp, 163 int full) 164{ 165 return pmdp_huge_get_and_clear(mm, address, pmdp); 166} 167#endif 168 169#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL 170static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, 171 unsigned long address, pud_t *pudp, 172 int full) 173{ 174 return pudp_huge_get_and_clear(mm, address, pudp); 175} 176#endif 177#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 178 179#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 180static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 181 unsigned long address, pte_t *ptep, 182 int full) 183{ 184 pte_t pte; 185 pte = ptep_get_and_clear(mm, address, ptep); 186 return pte; 187} 188#endif 189 190/* 191 * Some architectures may be able to avoid expensive synchronization 192 * primitives when modifications are made to PTE's which are already 193 * not present, or in the process of an address space destruction. 194 */ 195#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 196static inline void pte_clear_not_present_full(struct mm_struct *mm, 197 unsigned long address, 198 pte_t *ptep, 199 int full) 200{ 201 pte_clear(mm, address, ptep); 202} 203#endif 204 205#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 206extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 207 unsigned long address, 208 pte_t *ptep); 209#endif 210 211#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH 212extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, 213 unsigned long address, 214 pmd_t *pmdp); 215extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, 216 unsigned long address, 217 pud_t *pudp); 218#endif 219 220#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 221struct mm_struct; 222static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 223{ 224 pte_t old_pte = *ptep; 225 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 226} 227#endif 228 229#ifndef pte_savedwrite 230#define pte_savedwrite pte_write 231#endif 232 233#ifndef pte_mk_savedwrite 234#define pte_mk_savedwrite pte_mkwrite 235#endif 236 237#ifndef pte_clear_savedwrite 238#define pte_clear_savedwrite pte_wrprotect 239#endif 240 241#ifndef pmd_savedwrite 242#define pmd_savedwrite pmd_write 243#endif 244 245#ifndef pmd_mk_savedwrite 246#define pmd_mk_savedwrite pmd_mkwrite 247#endif 248 249#ifndef pmd_clear_savedwrite 250#define pmd_clear_savedwrite pmd_wrprotect 251#endif 252 253#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 254#ifdef CONFIG_TRANSPARENT_HUGEPAGE 255static inline void pmdp_set_wrprotect(struct mm_struct *mm, 256 unsigned long address, pmd_t *pmdp) 257{ 258 pmd_t old_pmd = *pmdp; 259 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 260} 261#else 262static inline void pmdp_set_wrprotect(struct mm_struct *mm, 263 unsigned long address, pmd_t *pmdp) 264{ 265 BUILD_BUG(); 266} 267#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 268#endif 269#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT 270#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 271static inline void pudp_set_wrprotect(struct mm_struct *mm, 272 unsigned long address, pud_t *pudp) 273{ 274 pud_t old_pud = *pudp; 275 276 set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); 277} 278#else 279static inline void pudp_set_wrprotect(struct mm_struct *mm, 280 unsigned long address, pud_t *pudp) 281{ 282 BUILD_BUG(); 283} 284#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 285#endif 286 287#ifndef pmdp_collapse_flush 288#ifdef CONFIG_TRANSPARENT_HUGEPAGE 289extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 290 unsigned long address, pmd_t *pmdp); 291#else 292static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, 293 unsigned long address, 294 pmd_t *pmdp) 295{ 296 BUILD_BUG(); 297 return *pmdp; 298} 299#define pmdp_collapse_flush pmdp_collapse_flush 300#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 301#endif 302 303#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT 304extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 305 pgtable_t pgtable); 306#endif 307 308#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW 309extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); 310#endif 311 312#ifdef CONFIG_TRANSPARENT_HUGEPAGE 313/* 314 * This is an implementation of pmdp_establish() that is only suitable for an 315 * architecture that doesn't have hardware dirty/accessed bits. In this case we 316 * can't race with CPU which sets these bits and non-atomic aproach is fine. 317 */ 318static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, 319 unsigned long address, pmd_t *pmdp, pmd_t pmd) 320{ 321 pmd_t old_pmd = *pmdp; 322 set_pmd_at(vma->vm_mm, address, pmdp, pmd); 323 return old_pmd; 324} 325#endif 326 327#ifndef __HAVE_ARCH_PMDP_INVALIDATE 328extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 329 pmd_t *pmdp); 330#endif 331 332#ifndef __HAVE_ARCH_PTE_SAME 333static inline int pte_same(pte_t pte_a, pte_t pte_b) 334{ 335 return pte_val(pte_a) == pte_val(pte_b); 336} 337#endif 338 339#ifndef __HAVE_ARCH_PTE_UNUSED 340/* 341 * Some architectures provide facilities to virtualization guests 342 * so that they can flag allocated pages as unused. This allows the 343 * host to transparently reclaim unused pages. This function returns 344 * whether the pte's page is unused. 345 */ 346static inline int pte_unused(pte_t pte) 347{ 348 return 0; 349} 350#endif 351 352#ifndef pte_access_permitted 353#define pte_access_permitted(pte, write) \ 354 (pte_present(pte) && (!(write) || pte_write(pte))) 355#endif 356 357#ifndef pmd_access_permitted 358#define pmd_access_permitted(pmd, write) \ 359 (pmd_present(pmd) && (!(write) || pmd_write(pmd))) 360#endif 361 362#ifndef pud_access_permitted 363#define pud_access_permitted(pud, write) \ 364 (pud_present(pud) && (!(write) || pud_write(pud))) 365#endif 366 367#ifndef p4d_access_permitted 368#define p4d_access_permitted(p4d, write) \ 369 (p4d_present(p4d) && (!(write) || p4d_write(p4d))) 370#endif 371 372#ifndef pgd_access_permitted 373#define pgd_access_permitted(pgd, write) \ 374 (pgd_present(pgd) && (!(write) || pgd_write(pgd))) 375#endif 376 377#ifndef __HAVE_ARCH_PMD_SAME 378#ifdef CONFIG_TRANSPARENT_HUGEPAGE 379static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 380{ 381 return pmd_val(pmd_a) == pmd_val(pmd_b); 382} 383 384static inline int pud_same(pud_t pud_a, pud_t pud_b) 385{ 386 return pud_val(pud_a) == pud_val(pud_b); 387} 388#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 389static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 390{ 391 BUILD_BUG(); 392 return 0; 393} 394 395static inline int pud_same(pud_t pud_a, pud_t pud_b) 396{ 397 BUILD_BUG(); 398 return 0; 399} 400#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 401#endif 402 403#ifndef __HAVE_ARCH_PGD_OFFSET_GATE 404#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 405#endif 406 407#ifndef __HAVE_ARCH_MOVE_PTE 408#define move_pte(pte, prot, old_addr, new_addr) (pte) 409#endif 410 411#ifndef pte_accessible 412# define pte_accessible(mm, pte) ((void)(pte), 1) 413#endif 414 415#ifndef flush_tlb_fix_spurious_fault 416#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 417#endif 418 419#ifndef pgprot_noncached 420#define pgprot_noncached(prot) (prot) 421#endif 422 423#ifndef pgprot_writecombine 424#define pgprot_writecombine pgprot_noncached 425#endif 426 427#ifndef pgprot_writethrough 428#define pgprot_writethrough pgprot_noncached 429#endif 430 431#ifndef pgprot_device 432#define pgprot_device pgprot_noncached 433#endif 434 435#ifndef pgprot_modify 436#define pgprot_modify pgprot_modify 437static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 438{ 439 if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot))) 440 newprot = pgprot_noncached(newprot); 441 if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot))) 442 newprot = pgprot_writecombine(newprot); 443 if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot))) 444 newprot = pgprot_device(newprot); 445 return newprot; 446} 447#endif 448 449/* 450 * When walking page tables, get the address of the next boundary, 451 * or the end address of the range if that comes earlier. Although no 452 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 453 */ 454 455#define pgd_addr_end(addr, end) \ 456({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 457 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 458}) 459 460#ifndef p4d_addr_end 461#define p4d_addr_end(addr, end) \ 462({ unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK; \ 463 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 464}) 465#endif 466 467#ifndef pud_addr_end 468#define pud_addr_end(addr, end) \ 469({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 470 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 471}) 472#endif 473 474#ifndef pmd_addr_end 475#define pmd_addr_end(addr, end) \ 476({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 477 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 478}) 479#endif 480 481/* 482 * When walking page tables, we usually want to skip any p?d_none entries; 483 * and any p?d_bad entries - reporting the error before resetting to none. 484 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 485 */ 486void pgd_clear_bad(pgd_t *); 487void p4d_clear_bad(p4d_t *); 488void pud_clear_bad(pud_t *); 489void pmd_clear_bad(pmd_t *); 490 491static inline int pgd_none_or_clear_bad(pgd_t *pgd) 492{ 493 if (pgd_none(*pgd)) 494 return 1; 495 if (unlikely(pgd_bad(*pgd))) { 496 pgd_clear_bad(pgd); 497 return 1; 498 } 499 return 0; 500} 501 502static inline int p4d_none_or_clear_bad(p4d_t *p4d) 503{ 504 if (p4d_none(*p4d)) 505 return 1; 506 if (unlikely(p4d_bad(*p4d))) { 507 p4d_clear_bad(p4d); 508 return 1; 509 } 510 return 0; 511} 512 513static inline int pud_none_or_clear_bad(pud_t *pud) 514{ 515 if (pud_none(*pud)) 516 return 1; 517 if (unlikely(pud_bad(*pud))) { 518 pud_clear_bad(pud); 519 return 1; 520 } 521 return 0; 522} 523 524static inline int pmd_none_or_clear_bad(pmd_t *pmd) 525{ 526 if (pmd_none(*pmd)) 527 return 1; 528 if (unlikely(pmd_bad(*pmd))) { 529 pmd_clear_bad(pmd); 530 return 1; 531 } 532 return 0; 533} 534 535static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 536 unsigned long addr, 537 pte_t *ptep) 538{ 539 /* 540 * Get the current pte state, but zero it out to make it 541 * non-present, preventing the hardware from asynchronously 542 * updating it. 543 */ 544 return ptep_get_and_clear(mm, addr, ptep); 545} 546 547static inline void __ptep_modify_prot_commit(struct mm_struct *mm, 548 unsigned long addr, 549 pte_t *ptep, pte_t pte) 550{ 551 /* 552 * The pte is non-present, so there's no hardware state to 553 * preserve. 554 */ 555 set_pte_at(mm, addr, ptep, pte); 556} 557 558#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 559/* 560 * Start a pte protection read-modify-write transaction, which 561 * protects against asynchronous hardware modifications to the pte. 562 * The intention is not to prevent the hardware from making pte 563 * updates, but to prevent any updates it may make from being lost. 564 * 565 * This does not protect against other software modifications of the 566 * pte; the appropriate pte lock must be held over the transation. 567 * 568 * Note that this interface is intended to be batchable, meaning that 569 * ptep_modify_prot_commit may not actually update the pte, but merely 570 * queue the update to be done at some later time. The update must be 571 * actually committed before the pte lock is released, however. 572 */ 573static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 574 unsigned long addr, 575 pte_t *ptep) 576{ 577 return __ptep_modify_prot_start(mm, addr, ptep); 578} 579 580/* 581 * Commit an update to a pte, leaving any hardware-controlled bits in 582 * the PTE unmodified. 583 */ 584static inline void ptep_modify_prot_commit(struct mm_struct *mm, 585 unsigned long addr, 586 pte_t *ptep, pte_t pte) 587{ 588 __ptep_modify_prot_commit(mm, addr, ptep, pte); 589} 590#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 591#endif /* CONFIG_MMU */ 592 593/* 594 * No-op macros that just return the current protection value. Defined here 595 * because these macros can be used used even if CONFIG_MMU is not defined. 596 */ 597#ifndef pgprot_encrypted 598#define pgprot_encrypted(prot) (prot) 599#endif 600 601#ifndef pgprot_decrypted 602#define pgprot_decrypted(prot) (prot) 603#endif 604 605/* 606 * A facility to provide lazy MMU batching. This allows PTE updates and 607 * page invalidations to be delayed until a call to leave lazy MMU mode 608 * is issued. Some architectures may benefit from doing this, and it is 609 * beneficial for both shadow and direct mode hypervisors, which may batch 610 * the PTE updates which happen during this window. Note that using this 611 * interface requires that read hazards be removed from the code. A read 612 * hazard could result in the direct mode hypervisor case, since the actual 613 * write to the page tables may not yet have taken place, so reads though 614 * a raw PTE pointer after it has been modified are not guaranteed to be 615 * up to date. This mode can only be entered and left under the protection of 616 * the page table locks for all page tables which may be modified. In the UP 617 * case, this is required so that preemption is disabled, and in the SMP case, 618 * it must synchronize the delayed page table writes properly on other CPUs. 619 */ 620#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 621#define arch_enter_lazy_mmu_mode() do {} while (0) 622#define arch_leave_lazy_mmu_mode() do {} while (0) 623#define arch_flush_lazy_mmu_mode() do {} while (0) 624#endif 625 626/* 627 * A facility to provide batching of the reload of page tables and 628 * other process state with the actual context switch code for 629 * paravirtualized guests. By convention, only one of the batched 630 * update (lazy) modes (CPU, MMU) should be active at any given time, 631 * entry should never be nested, and entry and exits should always be 632 * paired. This is for sanity of maintaining and reasoning about the 633 * kernel code. In this case, the exit (end of the context switch) is 634 * in architecture-specific code, and so doesn't need a generic 635 * definition. 636 */ 637#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 638#define arch_start_context_switch(prev) do {} while (0) 639#endif 640 641#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY 642#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION 643static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) 644{ 645 return pmd; 646} 647 648static inline int pmd_swp_soft_dirty(pmd_t pmd) 649{ 650 return 0; 651} 652 653static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) 654{ 655 return pmd; 656} 657#endif 658#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ 659static inline int pte_soft_dirty(pte_t pte) 660{ 661 return 0; 662} 663 664static inline int pmd_soft_dirty(pmd_t pmd) 665{ 666 return 0; 667} 668 669static inline pte_t pte_mksoft_dirty(pte_t pte) 670{ 671 return pte; 672} 673 674static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) 675{ 676 return pmd; 677} 678 679static inline pte_t pte_clear_soft_dirty(pte_t pte) 680{ 681 return pte; 682} 683 684static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) 685{ 686 return pmd; 687} 688 689static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 690{ 691 return pte; 692} 693 694static inline int pte_swp_soft_dirty(pte_t pte) 695{ 696 return 0; 697} 698 699static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 700{ 701 return pte; 702} 703 704static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) 705{ 706 return pmd; 707} 708 709static inline int pmd_swp_soft_dirty(pmd_t pmd) 710{ 711 return 0; 712} 713 714static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) 715{ 716 return pmd; 717} 718#endif 719 720#ifndef __HAVE_PFNMAP_TRACKING 721/* 722 * Interfaces that can be used by architecture code to keep track of 723 * memory type of pfn mappings specified by the remap_pfn_range, 724 * vm_insert_pfn. 725 */ 726 727/* 728 * track_pfn_remap is called when a _new_ pfn mapping is being established 729 * by remap_pfn_range() for physical range indicated by pfn and size. 730 */ 731static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 732 unsigned long pfn, unsigned long addr, 733 unsigned long size) 734{ 735 return 0; 736} 737 738/* 739 * track_pfn_insert is called when a _new_ single pfn is established 740 * by vm_insert_pfn(). 741 */ 742static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 743 pfn_t pfn) 744{ 745} 746 747/* 748 * track_pfn_copy is called when vma that is covering the pfnmap gets 749 * copied through copy_page_range(). 750 */ 751static inline int track_pfn_copy(struct vm_area_struct *vma) 752{ 753 return 0; 754} 755 756/* 757 * untrack_pfn is called while unmapping a pfnmap for a region. 758 * untrack can be called for a specific region indicated by pfn and size or 759 * can be for the entire vma (in which case pfn, size are zero). 760 */ 761static inline void untrack_pfn(struct vm_area_struct *vma, 762 unsigned long pfn, unsigned long size) 763{ 764} 765 766/* 767 * untrack_pfn_moved is called while mremapping a pfnmap for a new region. 768 */ 769static inline void untrack_pfn_moved(struct vm_area_struct *vma) 770{ 771} 772#else 773extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 774 unsigned long pfn, unsigned long addr, 775 unsigned long size); 776extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 777 pfn_t pfn); 778extern int track_pfn_copy(struct vm_area_struct *vma); 779extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, 780 unsigned long size); 781extern void untrack_pfn_moved(struct vm_area_struct *vma); 782#endif 783 784#ifdef __HAVE_COLOR_ZERO_PAGE 785static inline int is_zero_pfn(unsigned long pfn) 786{ 787 extern unsigned long zero_pfn; 788 unsigned long offset_from_zero_pfn = pfn - zero_pfn; 789 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); 790} 791 792#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) 793 794#else 795static inline int is_zero_pfn(unsigned long pfn) 796{ 797 extern unsigned long zero_pfn; 798 return pfn == zero_pfn; 799} 800 801static inline unsigned long my_zero_pfn(unsigned long addr) 802{ 803 extern unsigned long zero_pfn; 804 return zero_pfn; 805} 806#endif 807 808#ifdef CONFIG_MMU 809 810#ifndef CONFIG_TRANSPARENT_HUGEPAGE 811static inline int pmd_trans_huge(pmd_t pmd) 812{ 813 return 0; 814} 815#ifndef pmd_write 816static inline int pmd_write(pmd_t pmd) 817{ 818 BUG(); 819 return 0; 820} 821#endif /* pmd_write */ 822#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 823 824#ifndef pud_write 825static inline int pud_write(pud_t pud) 826{ 827 BUG(); 828 return 0; 829} 830#endif /* pud_write */ 831 832#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ 833 (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ 834 !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 835static inline int pud_trans_huge(pud_t pud) 836{ 837 return 0; 838} 839#endif 840 841#ifndef pmd_read_atomic 842static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 843{ 844 /* 845 * Depend on compiler for an atomic pmd read. NOTE: this is 846 * only going to work, if the pmdval_t isn't larger than 847 * an unsigned long. 848 */ 849 return *pmdp; 850} 851#endif 852 853#ifndef arch_needs_pgtable_deposit 854#define arch_needs_pgtable_deposit() (false) 855#endif 856/* 857 * This function is meant to be used by sites walking pagetables with 858 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 859 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 860 * into a null pmd and the transhuge page fault can convert a null pmd 861 * into an hugepmd or into a regular pmd (if the hugepage allocation 862 * fails). While holding the mmap_sem in read mode the pmd becomes 863 * stable and stops changing under us only if it's not null and not a 864 * transhuge pmd. When those races occurs and this function makes a 865 * difference vs the standard pmd_none_or_clear_bad, the result is 866 * undefined so behaving like if the pmd was none is safe (because it 867 * can return none anyway). The compiler level barrier() is critically 868 * important to compute the two checks atomically on the same pmdval. 869 * 870 * For 32bit kernels with a 64bit large pmd_t this automatically takes 871 * care of reading the pmd atomically to avoid SMP race conditions 872 * against pmd_populate() when the mmap_sem is hold for reading by the 873 * caller (a special atomic read not done by "gcc" as in the generic 874 * version above, is also needed when THP is disabled because the page 875 * fault can populate the pmd from under us). 876 */ 877static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 878{ 879 pmd_t pmdval = pmd_read_atomic(pmd); 880 /* 881 * The barrier will stabilize the pmdval in a register or on 882 * the stack so that it will stop changing under the code. 883 * 884 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 885 * pmd_read_atomic is allowed to return a not atomic pmdval 886 * (for example pointing to an hugepage that has never been 887 * mapped in the pmd). The below checks will only care about 888 * the low part of the pmd with 32bit PAE x86 anyway, with the 889 * exception of pmd_none(). So the important thing is that if 890 * the low part of the pmd is found null, the high part will 891 * be also null or the pmd_none() check below would be 892 * confused. 893 */ 894#ifdef CONFIG_TRANSPARENT_HUGEPAGE 895 barrier(); 896#endif 897 /* 898 * !pmd_present() checks for pmd migration entries 899 * 900 * The complete check uses is_pmd_migration_entry() in linux/swapops.h 901 * But using that requires moving current function and pmd_trans_unstable() 902 * to linux/swapops.h to resovle dependency, which is too much code move. 903 * 904 * !pmd_present() is equivalent to is_pmd_migration_entry() currently, 905 * because !pmd_present() pages can only be under migration not swapped 906 * out. 907 * 908 * pmd_none() is preseved for future condition checks on pmd migration 909 * entries and not confusing with this function name, although it is 910 * redundant with !pmd_present(). 911 */ 912 if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || 913 (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) 914 return 1; 915 if (unlikely(pmd_bad(pmdval))) { 916 pmd_clear_bad(pmd); 917 return 1; 918 } 919 return 0; 920} 921 922/* 923 * This is a noop if Transparent Hugepage Support is not built into 924 * the kernel. Otherwise it is equivalent to 925 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 926 * places that already verified the pmd is not none and they want to 927 * walk ptes while holding the mmap sem in read mode (write mode don't 928 * need this). If THP is not enabled, the pmd can't go away under the 929 * code even if MADV_DONTNEED runs, but if THP is enabled we need to 930 * run a pmd_trans_unstable before walking the ptes after 931 * split_huge_page_pmd returns (because it may have run when the pmd 932 * become null, but then a page fault can map in a THP and not a 933 * regular page). 934 */ 935static inline int pmd_trans_unstable(pmd_t *pmd) 936{ 937#ifdef CONFIG_TRANSPARENT_HUGEPAGE 938 return pmd_none_or_trans_huge_or_clear_bad(pmd); 939#else 940 return 0; 941#endif 942} 943 944#ifndef CONFIG_NUMA_BALANCING 945/* 946 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but 947 * the only case the kernel cares is for NUMA balancing and is only ever set 948 * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked 949 * _PAGE_PROTNONE so by by default, implement the helper as "always no". It 950 * is the responsibility of the caller to distinguish between PROT_NONE 951 * protections and NUMA hinting fault protections. 952 */ 953static inline int pte_protnone(pte_t pte) 954{ 955 return 0; 956} 957 958static inline int pmd_protnone(pmd_t pmd) 959{ 960 return 0; 961} 962#endif /* CONFIG_NUMA_BALANCING */ 963 964#endif /* CONFIG_MMU */ 965 966#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 967 968#ifndef __PAGETABLE_P4D_FOLDED 969int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot); 970int p4d_clear_huge(p4d_t *p4d); 971#else 972static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 973{ 974 return 0; 975} 976static inline int p4d_clear_huge(p4d_t *p4d) 977{ 978 return 0; 979} 980#endif /* !__PAGETABLE_P4D_FOLDED */ 981 982int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); 983int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 984int pud_clear_huge(pud_t *pud); 985int pmd_clear_huge(pmd_t *pmd); 986int pud_free_pmd_page(pud_t *pud); 987int pmd_free_pte_page(pmd_t *pmd); 988#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 989static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 990{ 991 return 0; 992} 993static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) 994{ 995 return 0; 996} 997static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) 998{ 999 return 0; 1000} 1001static inline int p4d_clear_huge(p4d_t *p4d) 1002{ 1003 return 0; 1004} 1005static inline int pud_clear_huge(pud_t *pud) 1006{ 1007 return 0; 1008} 1009static inline int pmd_clear_huge(pmd_t *pmd) 1010{ 1011 return 0; 1012} 1013static inline int pud_free_pmd_page(pud_t *pud) 1014{ 1015 return 0; 1016} 1017static inline int pmd_free_pte_page(pmd_t *pmd) 1018{ 1019 return 0; 1020} 1021#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 1022 1023#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 1024#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1025/* 1026 * ARCHes with special requirements for evicting THP backing TLB entries can 1027 * implement this. Otherwise also, it can help optimize normal TLB flush in 1028 * THP regime. stock flush_tlb_range() typically has optimization to nuke the 1029 * entire TLB TLB if flush span is greater than a threshold, which will 1030 * likely be true for a single huge page. Thus a single thp flush will 1031 * invalidate the entire TLB which is not desitable. 1032 * e.g. see arch/arc: flush_pmd_tlb_range 1033 */ 1034#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1035#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1036#else 1037#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() 1038#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG() 1039#endif 1040#endif 1041 1042struct file; 1043int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 1044 unsigned long size, pgprot_t *vma_prot); 1045 1046#ifndef CONFIG_X86_ESPFIX64 1047static inline void init_espfix_bsp(void) { } 1048#endif 1049 1050#endif /* !__ASSEMBLY__ */ 1051 1052#ifndef io_remap_pfn_range 1053#define io_remap_pfn_range remap_pfn_range 1054#endif 1055 1056#ifndef has_transparent_hugepage 1057#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1058#define has_transparent_hugepage() 1 1059#else 1060#define has_transparent_hugepage() 0 1061#endif 1062#endif 1063 1064#endif /* _ASM_GENERIC_PGTABLE_H */