at v3.19 22 kB view raw
1#ifndef _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H 3 4#ifndef __ASSEMBLY__ 5#ifdef CONFIG_MMU 6 7#include <linux/mm_types.h> 8#include <linux/bug.h> 9 10/* 11 * On almost all architectures and configurations, 0 can be used as the 12 * upper ceiling to free_pgtables(): on many architectures it has the same 13 * effect as using TASK_SIZE. However, there is one configuration which 14 * must impose a more careful limit, to avoid freeing kernel pgtables. 15 */ 16#ifndef USER_PGTABLES_CEILING 17#define USER_PGTABLES_CEILING 0UL 18#endif 19 20#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 21extern int ptep_set_access_flags(struct vm_area_struct *vma, 22 unsigned long address, pte_t *ptep, 23 pte_t entry, int dirty); 24#endif 25 26#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 27extern int pmdp_set_access_flags(struct vm_area_struct *vma, 28 unsigned long address, pmd_t *pmdp, 29 pmd_t entry, int dirty); 30#endif 31 32#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 33static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 34 unsigned long address, 35 pte_t *ptep) 36{ 37 pte_t pte = *ptep; 38 int r = 1; 39 if (!pte_young(pte)) 40 r = 0; 41 else 42 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 43 return r; 44} 45#endif 46 47#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 48#ifdef CONFIG_TRANSPARENT_HUGEPAGE 49static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 50 unsigned long address, 51 pmd_t *pmdp) 52{ 53 pmd_t pmd = *pmdp; 54 int r = 1; 55 if (!pmd_young(pmd)) 56 r = 0; 57 else 58 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 59 return r; 60} 61#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 62static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 63 unsigned long address, 64 pmd_t *pmdp) 65{ 66 BUG(); 67 return 0; 68} 69#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 70#endif 71 72#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 73int ptep_clear_flush_young(struct vm_area_struct *vma, 74 unsigned long address, pte_t *ptep); 75#endif 76 77#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 78int pmdp_clear_flush_young(struct vm_area_struct *vma, 79 unsigned long address, pmd_t *pmdp); 80#endif 81 82#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 83static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 84 unsigned long address, 85 pte_t *ptep) 86{ 87 pte_t pte = *ptep; 88 pte_clear(mm, address, ptep); 89 return pte; 90} 91#endif 92 93#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR 94#ifdef CONFIG_TRANSPARENT_HUGEPAGE 95static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, 96 unsigned long address, 97 pmd_t *pmdp) 98{ 99 pmd_t pmd = *pmdp; 100 pmd_clear(pmdp); 101 return pmd; 102} 103#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 104#endif 105 106#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL 107#ifdef CONFIG_TRANSPARENT_HUGEPAGE 108static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, 109 unsigned long address, pmd_t *pmdp, 110 int full) 111{ 112 return pmdp_get_and_clear(mm, address, pmdp); 113} 114#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 115#endif 116 117#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 118static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 119 unsigned long address, pte_t *ptep, 120 int full) 121{ 122 pte_t pte; 123 pte = ptep_get_and_clear(mm, address, ptep); 124 return pte; 125} 126#endif 127 128/* 129 * Some architectures may be able to avoid expensive synchronization 130 * primitives when modifications are made to PTE's which are already 131 * not present, or in the process of an address space destruction. 132 */ 133#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 134static inline void pte_clear_not_present_full(struct mm_struct *mm, 135 unsigned long address, 136 pte_t *ptep, 137 int full) 138{ 139 pte_clear(mm, address, ptep); 140} 141#endif 142 143#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 144extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 145 unsigned long address, 146 pte_t *ptep); 147#endif 148 149#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH 150extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, 151 unsigned long address, 152 pmd_t *pmdp); 153#endif 154 155#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 156struct mm_struct; 157static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 158{ 159 pte_t old_pte = *ptep; 160 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 161} 162#endif 163 164#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 165#ifdef CONFIG_TRANSPARENT_HUGEPAGE 166static inline void pmdp_set_wrprotect(struct mm_struct *mm, 167 unsigned long address, pmd_t *pmdp) 168{ 169 pmd_t old_pmd = *pmdp; 170 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 171} 172#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 173static inline void pmdp_set_wrprotect(struct mm_struct *mm, 174 unsigned long address, pmd_t *pmdp) 175{ 176 BUG(); 177} 178#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 179#endif 180 181#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH 182extern void pmdp_splitting_flush(struct vm_area_struct *vma, 183 unsigned long address, pmd_t *pmdp); 184#endif 185 186#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT 187extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 188 pgtable_t pgtable); 189#endif 190 191#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW 192extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); 193#endif 194 195#ifndef __HAVE_ARCH_PMDP_INVALIDATE 196extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 197 pmd_t *pmdp); 198#endif 199 200#ifndef __HAVE_ARCH_PTE_SAME 201static inline int pte_same(pte_t pte_a, pte_t pte_b) 202{ 203 return pte_val(pte_a) == pte_val(pte_b); 204} 205#endif 206 207#ifndef __HAVE_ARCH_PTE_UNUSED 208/* 209 * Some architectures provide facilities to virtualization guests 210 * so that they can flag allocated pages as unused. This allows the 211 * host to transparently reclaim unused pages. This function returns 212 * whether the pte's page is unused. 213 */ 214static inline int pte_unused(pte_t pte) 215{ 216 return 0; 217} 218#endif 219 220#ifndef __HAVE_ARCH_PMD_SAME 221#ifdef CONFIG_TRANSPARENT_HUGEPAGE 222static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 223{ 224 return pmd_val(pmd_a) == pmd_val(pmd_b); 225} 226#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 227static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 228{ 229 BUG(); 230 return 0; 231} 232#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 233#endif 234 235#ifndef __HAVE_ARCH_PGD_OFFSET_GATE 236#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 237#endif 238 239#ifndef __HAVE_ARCH_MOVE_PTE 240#define move_pte(pte, prot, old_addr, new_addr) (pte) 241#endif 242 243#ifndef pte_accessible 244# define pte_accessible(mm, pte) ((void)(pte), 1) 245#endif 246 247#ifndef pte_present_nonuma 248#define pte_present_nonuma(pte) pte_present(pte) 249#endif 250 251#ifndef flush_tlb_fix_spurious_fault 252#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 253#endif 254 255#ifndef pgprot_noncached 256#define pgprot_noncached(prot) (prot) 257#endif 258 259#ifndef pgprot_writecombine 260#define pgprot_writecombine pgprot_noncached 261#endif 262 263#ifndef pgprot_device 264#define pgprot_device pgprot_noncached 265#endif 266 267#ifndef pgprot_modify 268#define pgprot_modify pgprot_modify 269static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) 270{ 271 if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot))) 272 newprot = pgprot_noncached(newprot); 273 if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot))) 274 newprot = pgprot_writecombine(newprot); 275 if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot))) 276 newprot = pgprot_device(newprot); 277 return newprot; 278} 279#endif 280 281/* 282 * When walking page tables, get the address of the next boundary, 283 * or the end address of the range if that comes earlier. Although no 284 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 285 */ 286 287#define pgd_addr_end(addr, end) \ 288({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 289 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 290}) 291 292#ifndef pud_addr_end 293#define pud_addr_end(addr, end) \ 294({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 295 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 296}) 297#endif 298 299#ifndef pmd_addr_end 300#define pmd_addr_end(addr, end) \ 301({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 302 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 303}) 304#endif 305 306/* 307 * When walking page tables, we usually want to skip any p?d_none entries; 308 * and any p?d_bad entries - reporting the error before resetting to none. 309 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 310 */ 311void pgd_clear_bad(pgd_t *); 312void pud_clear_bad(pud_t *); 313void pmd_clear_bad(pmd_t *); 314 315static inline int pgd_none_or_clear_bad(pgd_t *pgd) 316{ 317 if (pgd_none(*pgd)) 318 return 1; 319 if (unlikely(pgd_bad(*pgd))) { 320 pgd_clear_bad(pgd); 321 return 1; 322 } 323 return 0; 324} 325 326static inline int pud_none_or_clear_bad(pud_t *pud) 327{ 328 if (pud_none(*pud)) 329 return 1; 330 if (unlikely(pud_bad(*pud))) { 331 pud_clear_bad(pud); 332 return 1; 333 } 334 return 0; 335} 336 337static inline int pmd_none_or_clear_bad(pmd_t *pmd) 338{ 339 if (pmd_none(*pmd)) 340 return 1; 341 if (unlikely(pmd_bad(*pmd))) { 342 pmd_clear_bad(pmd); 343 return 1; 344 } 345 return 0; 346} 347 348static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 349 unsigned long addr, 350 pte_t *ptep) 351{ 352 /* 353 * Get the current pte state, but zero it out to make it 354 * non-present, preventing the hardware from asynchronously 355 * updating it. 356 */ 357 return ptep_get_and_clear(mm, addr, ptep); 358} 359 360static inline void __ptep_modify_prot_commit(struct mm_struct *mm, 361 unsigned long addr, 362 pte_t *ptep, pte_t pte) 363{ 364 /* 365 * The pte is non-present, so there's no hardware state to 366 * preserve. 367 */ 368 set_pte_at(mm, addr, ptep, pte); 369} 370 371#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 372/* 373 * Start a pte protection read-modify-write transaction, which 374 * protects against asynchronous hardware modifications to the pte. 375 * The intention is not to prevent the hardware from making pte 376 * updates, but to prevent any updates it may make from being lost. 377 * 378 * This does not protect against other software modifications of the 379 * pte; the appropriate pte lock must be held over the transation. 380 * 381 * Note that this interface is intended to be batchable, meaning that 382 * ptep_modify_prot_commit may not actually update the pte, but merely 383 * queue the update to be done at some later time. The update must be 384 * actually committed before the pte lock is released, however. 385 */ 386static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 387 unsigned long addr, 388 pte_t *ptep) 389{ 390 return __ptep_modify_prot_start(mm, addr, ptep); 391} 392 393/* 394 * Commit an update to a pte, leaving any hardware-controlled bits in 395 * the PTE unmodified. 396 */ 397static inline void ptep_modify_prot_commit(struct mm_struct *mm, 398 unsigned long addr, 399 pte_t *ptep, pte_t pte) 400{ 401 __ptep_modify_prot_commit(mm, addr, ptep, pte); 402} 403#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 404#endif /* CONFIG_MMU */ 405 406/* 407 * A facility to provide lazy MMU batching. This allows PTE updates and 408 * page invalidations to be delayed until a call to leave lazy MMU mode 409 * is issued. Some architectures may benefit from doing this, and it is 410 * beneficial for both shadow and direct mode hypervisors, which may batch 411 * the PTE updates which happen during this window. Note that using this 412 * interface requires that read hazards be removed from the code. A read 413 * hazard could result in the direct mode hypervisor case, since the actual 414 * write to the page tables may not yet have taken place, so reads though 415 * a raw PTE pointer after it has been modified are not guaranteed to be 416 * up to date. This mode can only be entered and left under the protection of 417 * the page table locks for all page tables which may be modified. In the UP 418 * case, this is required so that preemption is disabled, and in the SMP case, 419 * it must synchronize the delayed page table writes properly on other CPUs. 420 */ 421#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 422#define arch_enter_lazy_mmu_mode() do {} while (0) 423#define arch_leave_lazy_mmu_mode() do {} while (0) 424#define arch_flush_lazy_mmu_mode() do {} while (0) 425#endif 426 427/* 428 * A facility to provide batching of the reload of page tables and 429 * other process state with the actual context switch code for 430 * paravirtualized guests. By convention, only one of the batched 431 * update (lazy) modes (CPU, MMU) should be active at any given time, 432 * entry should never be nested, and entry and exits should always be 433 * paired. This is for sanity of maintaining and reasoning about the 434 * kernel code. In this case, the exit (end of the context switch) is 435 * in architecture-specific code, and so doesn't need a generic 436 * definition. 437 */ 438#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 439#define arch_start_context_switch(prev) do {} while (0) 440#endif 441 442#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY 443static inline int pte_soft_dirty(pte_t pte) 444{ 445 return 0; 446} 447 448static inline int pmd_soft_dirty(pmd_t pmd) 449{ 450 return 0; 451} 452 453static inline pte_t pte_mksoft_dirty(pte_t pte) 454{ 455 return pte; 456} 457 458static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) 459{ 460 return pmd; 461} 462 463static inline pte_t pte_swp_mksoft_dirty(pte_t pte) 464{ 465 return pte; 466} 467 468static inline int pte_swp_soft_dirty(pte_t pte) 469{ 470 return 0; 471} 472 473static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) 474{ 475 return pte; 476} 477 478static inline pte_t pte_file_clear_soft_dirty(pte_t pte) 479{ 480 return pte; 481} 482 483static inline pte_t pte_file_mksoft_dirty(pte_t pte) 484{ 485 return pte; 486} 487 488static inline int pte_file_soft_dirty(pte_t pte) 489{ 490 return 0; 491} 492#endif 493 494#ifndef __HAVE_PFNMAP_TRACKING 495/* 496 * Interfaces that can be used by architecture code to keep track of 497 * memory type of pfn mappings specified by the remap_pfn_range, 498 * vm_insert_pfn. 499 */ 500 501/* 502 * track_pfn_remap is called when a _new_ pfn mapping is being established 503 * by remap_pfn_range() for physical range indicated by pfn and size. 504 */ 505static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 506 unsigned long pfn, unsigned long addr, 507 unsigned long size) 508{ 509 return 0; 510} 511 512/* 513 * track_pfn_insert is called when a _new_ single pfn is established 514 * by vm_insert_pfn(). 515 */ 516static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 517 unsigned long pfn) 518{ 519 return 0; 520} 521 522/* 523 * track_pfn_copy is called when vma that is covering the pfnmap gets 524 * copied through copy_page_range(). 525 */ 526static inline int track_pfn_copy(struct vm_area_struct *vma) 527{ 528 return 0; 529} 530 531/* 532 * untrack_pfn_vma is called while unmapping a pfnmap for a region. 533 * untrack can be called for a specific region indicated by pfn and size or 534 * can be for the entire vma (in which case pfn, size are zero). 535 */ 536static inline void untrack_pfn(struct vm_area_struct *vma, 537 unsigned long pfn, unsigned long size) 538{ 539} 540#else 541extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, 542 unsigned long pfn, unsigned long addr, 543 unsigned long size); 544extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, 545 unsigned long pfn); 546extern int track_pfn_copy(struct vm_area_struct *vma); 547extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, 548 unsigned long size); 549#endif 550 551#ifdef __HAVE_COLOR_ZERO_PAGE 552static inline int is_zero_pfn(unsigned long pfn) 553{ 554 extern unsigned long zero_pfn; 555 unsigned long offset_from_zero_pfn = pfn - zero_pfn; 556 return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); 557} 558 559#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) 560 561#else 562static inline int is_zero_pfn(unsigned long pfn) 563{ 564 extern unsigned long zero_pfn; 565 return pfn == zero_pfn; 566} 567 568static inline unsigned long my_zero_pfn(unsigned long addr) 569{ 570 extern unsigned long zero_pfn; 571 return zero_pfn; 572} 573#endif 574 575#ifdef CONFIG_MMU 576 577#ifndef CONFIG_TRANSPARENT_HUGEPAGE 578static inline int pmd_trans_huge(pmd_t pmd) 579{ 580 return 0; 581} 582static inline int pmd_trans_splitting(pmd_t pmd) 583{ 584 return 0; 585} 586#ifndef __HAVE_ARCH_PMD_WRITE 587static inline int pmd_write(pmd_t pmd) 588{ 589 BUG(); 590 return 0; 591} 592#endif /* __HAVE_ARCH_PMD_WRITE */ 593#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 594 595#ifndef pmd_read_atomic 596static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 597{ 598 /* 599 * Depend on compiler for an atomic pmd read. NOTE: this is 600 * only going to work, if the pmdval_t isn't larger than 601 * an unsigned long. 602 */ 603 return *pmdp; 604} 605#endif 606 607#ifndef pmd_move_must_withdraw 608static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, 609 spinlock_t *old_pmd_ptl) 610{ 611 /* 612 * With split pmd lock we also need to move preallocated 613 * PTE page table if new_pmd is on different PMD page table. 614 */ 615 return new_pmd_ptl != old_pmd_ptl; 616} 617#endif 618 619/* 620 * This function is meant to be used by sites walking pagetables with 621 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 622 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 623 * into a null pmd and the transhuge page fault can convert a null pmd 624 * into an hugepmd or into a regular pmd (if the hugepage allocation 625 * fails). While holding the mmap_sem in read mode the pmd becomes 626 * stable and stops changing under us only if it's not null and not a 627 * transhuge pmd. When those races occurs and this function makes a 628 * difference vs the standard pmd_none_or_clear_bad, the result is 629 * undefined so behaving like if the pmd was none is safe (because it 630 * can return none anyway). The compiler level barrier() is critically 631 * important to compute the two checks atomically on the same pmdval. 632 * 633 * For 32bit kernels with a 64bit large pmd_t this automatically takes 634 * care of reading the pmd atomically to avoid SMP race conditions 635 * against pmd_populate() when the mmap_sem is hold for reading by the 636 * caller (a special atomic read not done by "gcc" as in the generic 637 * version above, is also needed when THP is disabled because the page 638 * fault can populate the pmd from under us). 639 */ 640static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 641{ 642 pmd_t pmdval = pmd_read_atomic(pmd); 643 /* 644 * The barrier will stabilize the pmdval in a register or on 645 * the stack so that it will stop changing under the code. 646 * 647 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 648 * pmd_read_atomic is allowed to return a not atomic pmdval 649 * (for example pointing to an hugepage that has never been 650 * mapped in the pmd). The below checks will only care about 651 * the low part of the pmd with 32bit PAE x86 anyway, with the 652 * exception of pmd_none(). So the important thing is that if 653 * the low part of the pmd is found null, the high part will 654 * be also null or the pmd_none() check below would be 655 * confused. 656 */ 657#ifdef CONFIG_TRANSPARENT_HUGEPAGE 658 barrier(); 659#endif 660 if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) 661 return 1; 662 if (unlikely(pmd_bad(pmdval))) { 663 pmd_clear_bad(pmd); 664 return 1; 665 } 666 return 0; 667} 668 669/* 670 * This is a noop if Transparent Hugepage Support is not built into 671 * the kernel. Otherwise it is equivalent to 672 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 673 * places that already verified the pmd is not none and they want to 674 * walk ptes while holding the mmap sem in read mode (write mode don't 675 * need this). If THP is not enabled, the pmd can't go away under the 676 * code even if MADV_DONTNEED runs, but if THP is enabled we need to 677 * run a pmd_trans_unstable before walking the ptes after 678 * split_huge_page_pmd returns (because it may have run when the pmd 679 * become null, but then a page fault can map in a THP and not a 680 * regular page). 681 */ 682static inline int pmd_trans_unstable(pmd_t *pmd) 683{ 684#ifdef CONFIG_TRANSPARENT_HUGEPAGE 685 return pmd_none_or_trans_huge_or_clear_bad(pmd); 686#else 687 return 0; 688#endif 689} 690 691#ifdef CONFIG_NUMA_BALANCING 692/* 693 * _PAGE_NUMA distinguishes between an unmapped page table entry, an entry that 694 * is protected for PROT_NONE and a NUMA hinting fault entry. If the 695 * architecture defines __PAGE_PROTNONE then it should take that into account 696 * but those that do not can rely on the fact that the NUMA hinting scanner 697 * skips inaccessible VMAs. 698 * 699 * pte/pmd_present() returns true if pte/pmd_numa returns true. Page 700 * fault triggers on those regions if pte/pmd_numa returns true 701 * (because _PAGE_PRESENT is not set). 702 */ 703#ifndef pte_numa 704static inline int pte_numa(pte_t pte) 705{ 706 return ptenuma_flags(pte) == _PAGE_NUMA; 707} 708#endif 709 710#ifndef pmd_numa 711static inline int pmd_numa(pmd_t pmd) 712{ 713 return pmdnuma_flags(pmd) == _PAGE_NUMA; 714} 715#endif 716 717/* 718 * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically 719 * because they're called by the NUMA hinting minor page fault. If we 720 * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler 721 * would be forced to set it later while filling the TLB after we 722 * return to userland. That would trigger a second write to memory 723 * that we optimize away by setting _PAGE_ACCESSED here. 724 */ 725#ifndef pte_mknonnuma 726static inline pte_t pte_mknonnuma(pte_t pte) 727{ 728 pteval_t val = pte_val(pte); 729 730 val &= ~_PAGE_NUMA; 731 val |= (_PAGE_PRESENT|_PAGE_ACCESSED); 732 return __pte(val); 733} 734#endif 735 736#ifndef pmd_mknonnuma 737static inline pmd_t pmd_mknonnuma(pmd_t pmd) 738{ 739 pmdval_t val = pmd_val(pmd); 740 741 val &= ~_PAGE_NUMA; 742 val |= (_PAGE_PRESENT|_PAGE_ACCESSED); 743 744 return __pmd(val); 745} 746#endif 747 748#ifndef pte_mknuma 749static inline pte_t pte_mknuma(pte_t pte) 750{ 751 pteval_t val = pte_val(pte); 752 753 VM_BUG_ON(!(val & _PAGE_PRESENT)); 754 755 val &= ~_PAGE_PRESENT; 756 val |= _PAGE_NUMA; 757 758 return __pte(val); 759} 760#endif 761 762#ifndef ptep_set_numa 763static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, 764 pte_t *ptep) 765{ 766 pte_t ptent = *ptep; 767 768 ptent = pte_mknuma(ptent); 769 set_pte_at(mm, addr, ptep, ptent); 770 return; 771} 772#endif 773 774#ifndef pmd_mknuma 775static inline pmd_t pmd_mknuma(pmd_t pmd) 776{ 777 pmdval_t val = pmd_val(pmd); 778 779 val &= ~_PAGE_PRESENT; 780 val |= _PAGE_NUMA; 781 782 return __pmd(val); 783} 784#endif 785 786#ifndef pmdp_set_numa 787static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, 788 pmd_t *pmdp) 789{ 790 pmd_t pmd = *pmdp; 791 792 pmd = pmd_mknuma(pmd); 793 set_pmd_at(mm, addr, pmdp, pmd); 794 return; 795} 796#endif 797#else 798static inline int pmd_numa(pmd_t pmd) 799{ 800 return 0; 801} 802 803static inline int pte_numa(pte_t pte) 804{ 805 return 0; 806} 807 808static inline pte_t pte_mknonnuma(pte_t pte) 809{ 810 return pte; 811} 812 813static inline pmd_t pmd_mknonnuma(pmd_t pmd) 814{ 815 return pmd; 816} 817 818static inline pte_t pte_mknuma(pte_t pte) 819{ 820 return pte; 821} 822 823static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, 824 pte_t *ptep) 825{ 826 return; 827} 828 829 830static inline pmd_t pmd_mknuma(pmd_t pmd) 831{ 832 return pmd; 833} 834 835static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, 836 pmd_t *pmdp) 837{ 838 return ; 839} 840#endif /* CONFIG_NUMA_BALANCING */ 841 842#endif /* CONFIG_MMU */ 843 844#endif /* !__ASSEMBLY__ */ 845 846#ifndef io_remap_pfn_range 847#define io_remap_pfn_range remap_pfn_range 848#endif 849 850#endif /* _ASM_GENERIC_PGTABLE_H */