include/asm-generic/pgtable.h at v4.16 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / asm-generic / pgtable.h
at v4.16 1064 lines 29 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_GENERIC_PGTABLE_H
   3#define _ASM_GENERIC_PGTABLE_H
   4
   5#include <linux/pfn.h>
   6
   7#ifndef __ASSEMBLY__
   8#ifdef CONFIG_MMU
   9
  10#include <linux/mm_types.h>
  11#include <linux/bug.h>
  12#include <linux/errno.h>
  13
  14#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
  15	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
  16#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
  17#endif
  18
  19/*
  20 * On almost all architectures and configurations, 0 can be used as the
  21 * upper ceiling to free_pgtables(): on many architectures it has the same
  22 * effect as using TASK_SIZE.  However, there is one configuration which
  23 * must impose a more careful limit, to avoid freeing kernel pgtables.
  24 */
  25#ifndef USER_PGTABLES_CEILING
  26#define USER_PGTABLES_CEILING	0UL
  27#endif
  28
  29#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  30extern int ptep_set_access_flags(struct vm_area_struct *vma,
  31				 unsigned long address, pte_t *ptep,
  32				 pte_t entry, int dirty);
  33#endif
  34
  35#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
  36#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  37extern int pmdp_set_access_flags(struct vm_area_struct *vma,
  38				 unsigned long address, pmd_t *pmdp,
  39				 pmd_t entry, int dirty);
  40extern int pudp_set_access_flags(struct vm_area_struct *vma,
  41				 unsigned long address, pud_t *pudp,
  42				 pud_t entry, int dirty);
  43#else
  44static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
  45					unsigned long address, pmd_t *pmdp,
  46					pmd_t entry, int dirty)
  47{
  48	BUILD_BUG();
  49	return 0;
  50}
  51static inline int pudp_set_access_flags(struct vm_area_struct *vma,
  52					unsigned long address, pud_t *pudp,
  53					pud_t entry, int dirty)
  54{
  55	BUILD_BUG();
  56	return 0;
  57}
  58#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  59#endif
  60
  61#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  62static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
  63					    unsigned long address,
  64					    pte_t *ptep)
  65{
  66	pte_t pte = *ptep;
  67	int r = 1;
  68	if (!pte_young(pte))
  69		r = 0;
  70	else
  71		set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
  72	return r;
  73}
  74#endif
  75
  76#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
  77#ifdef CONFIG_TRANSPARENT_HUGEPAGE
  78static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  79					    unsigned long address,
  80					    pmd_t *pmdp)
  81{
  82	pmd_t pmd = *pmdp;
  83	int r = 1;
  84	if (!pmd_young(pmd))
  85		r = 0;
  86	else
  87		set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
  88	return r;
  89}
  90#else
  91static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
  92					    unsigned long address,
  93					    pmd_t *pmdp)
  94{
  95	BUILD_BUG();
  96	return 0;
  97}
  98#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  99#endif
 100
 101#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 102int ptep_clear_flush_young(struct vm_area_struct *vma,
 103			   unsigned long address, pte_t *ptep);
 104#endif
 105
 106#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
 107#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 108extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 109				  unsigned long address, pmd_t *pmdp);
 110#else
 111/*
 112 * Despite relevant to THP only, this API is called from generic rmap code
 113 * under PageTransHuge(), hence needs a dummy implementation for !THP
 114 */
 115static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 116					 unsigned long address, pmd_t *pmdp)
 117{
 118	BUILD_BUG();
 119	return 0;
 120}
 121#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 122#endif
 123
 124#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 125static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 126				       unsigned long address,
 127				       pte_t *ptep)
 128{
 129	pte_t pte = *ptep;
 130	pte_clear(mm, address, ptep);
 131	return pte;
 132}
 133#endif
 134
 135#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 136#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 137static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 138					    unsigned long address,
 139					    pmd_t *pmdp)
 140{
 141	pmd_t pmd = *pmdp;
 142	pmd_clear(pmdp);
 143	return pmd;
 144}
 145#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */
 146#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
 147static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
 148					    unsigned long address,
 149					    pud_t *pudp)
 150{
 151	pud_t pud = *pudp;
 152
 153	pud_clear(pudp);
 154	return pud;
 155}
 156#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */
 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 158
 159#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 160#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 161static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
 162					    unsigned long address, pmd_t *pmdp,
 163					    int full)
 164{
 165	return pmdp_huge_get_and_clear(mm, address, pmdp);
 166}
 167#endif
 168
 169#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
 170static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm,
 171					    unsigned long address, pud_t *pudp,
 172					    int full)
 173{
 174	return pudp_huge_get_and_clear(mm, address, pudp);
 175}
 176#endif
 177#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 178
 179#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 180static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 181					    unsigned long address, pte_t *ptep,
 182					    int full)
 183{
 184	pte_t pte;
 185	pte = ptep_get_and_clear(mm, address, ptep);
 186	return pte;
 187}
 188#endif
 189
 190/*
 191 * Some architectures may be able to avoid expensive synchronization
 192 * primitives when modifications are made to PTE's which are already
 193 * not present, or in the process of an address space destruction.
 194 */
 195#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
 196static inline void pte_clear_not_present_full(struct mm_struct *mm,
 197					      unsigned long address,
 198					      pte_t *ptep,
 199					      int full)
 200{
 201	pte_clear(mm, address, ptep);
 202}
 203#endif
 204
 205#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
 206extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
 207			      unsigned long address,
 208			      pte_t *ptep);
 209#endif
 210
 211#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
 212extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 213			      unsigned long address,
 214			      pmd_t *pmdp);
 215extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
 216			      unsigned long address,
 217			      pud_t *pudp);
 218#endif
 219
 220#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
 221struct mm_struct;
 222static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 223{
 224	pte_t old_pte = *ptep;
 225	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 226}
 227#endif
 228
 229#ifndef pte_savedwrite
 230#define pte_savedwrite pte_write
 231#endif
 232
 233#ifndef pte_mk_savedwrite
 234#define pte_mk_savedwrite pte_mkwrite
 235#endif
 236
 237#ifndef pte_clear_savedwrite
 238#define pte_clear_savedwrite pte_wrprotect
 239#endif
 240
 241#ifndef pmd_savedwrite
 242#define pmd_savedwrite pmd_write
 243#endif
 244
 245#ifndef pmd_mk_savedwrite
 246#define pmd_mk_savedwrite pmd_mkwrite
 247#endif
 248
 249#ifndef pmd_clear_savedwrite
 250#define pmd_clear_savedwrite pmd_wrprotect
 251#endif
 252
 253#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 254#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 255static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 256				      unsigned long address, pmd_t *pmdp)
 257{
 258	pmd_t old_pmd = *pmdp;
 259	set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
 260}
 261#else
 262static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 263				      unsigned long address, pmd_t *pmdp)
 264{
 265	BUILD_BUG();
 266}
 267#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 268#endif
 269#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT
 270#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 271static inline void pudp_set_wrprotect(struct mm_struct *mm,
 272				      unsigned long address, pud_t *pudp)
 273{
 274	pud_t old_pud = *pudp;
 275
 276	set_pud_at(mm, address, pudp, pud_wrprotect(old_pud));
 277}
 278#else
 279static inline void pudp_set_wrprotect(struct mm_struct *mm,
 280				      unsigned long address, pud_t *pudp)
 281{
 282	BUILD_BUG();
 283}
 284#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 285#endif
 286
 287#ifndef pmdp_collapse_flush
 288#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 289extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 290				 unsigned long address, pmd_t *pmdp);
 291#else
 292static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 293					unsigned long address,
 294					pmd_t *pmdp)
 295{
 296	BUILD_BUG();
 297	return *pmdp;
 298}
 299#define pmdp_collapse_flush pmdp_collapse_flush
 300#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 301#endif
 302
 303#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 304extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 305				       pgtable_t pgtable);
 306#endif
 307
 308#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
 309extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 310#endif
 311
 312#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 313/*
 314 * This is an implementation of pmdp_establish() that is only suitable for an
 315 * architecture that doesn't have hardware dirty/accessed bits. In this case we
 316 * can't race with CPU which sets these bits and non-atomic aproach is fine.
 317 */
 318static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
 319		unsigned long address, pmd_t *pmdp, pmd_t pmd)
 320{
 321	pmd_t old_pmd = *pmdp;
 322	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
 323	return old_pmd;
 324}
 325#endif
 326
 327#ifndef __HAVE_ARCH_PMDP_INVALIDATE
 328extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 329			    pmd_t *pmdp);
 330#endif
 331
 332#ifndef __HAVE_ARCH_PTE_SAME
 333static inline int pte_same(pte_t pte_a, pte_t pte_b)
 334{
 335	return pte_val(pte_a) == pte_val(pte_b);
 336}
 337#endif
 338
 339#ifndef __HAVE_ARCH_PTE_UNUSED
 340/*
 341 * Some architectures provide facilities to virtualization guests
 342 * so that they can flag allocated pages as unused. This allows the
 343 * host to transparently reclaim unused pages. This function returns
 344 * whether the pte's page is unused.
 345 */
 346static inline int pte_unused(pte_t pte)
 347{
 348	return 0;
 349}
 350#endif
 351
 352#ifndef pte_access_permitted
 353#define pte_access_permitted(pte, write) \
 354	(pte_present(pte) && (!(write) || pte_write(pte)))
 355#endif
 356
 357#ifndef pmd_access_permitted
 358#define pmd_access_permitted(pmd, write) \
 359	(pmd_present(pmd) && (!(write) || pmd_write(pmd)))
 360#endif
 361
 362#ifndef pud_access_permitted
 363#define pud_access_permitted(pud, write) \
 364	(pud_present(pud) && (!(write) || pud_write(pud)))
 365#endif
 366
 367#ifndef p4d_access_permitted
 368#define p4d_access_permitted(p4d, write) \
 369	(p4d_present(p4d) && (!(write) || p4d_write(p4d)))
 370#endif
 371
 372#ifndef pgd_access_permitted
 373#define pgd_access_permitted(pgd, write) \
 374	(pgd_present(pgd) && (!(write) || pgd_write(pgd)))
 375#endif
 376
 377#ifndef __HAVE_ARCH_PMD_SAME
 378#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 379static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 380{
 381	return pmd_val(pmd_a) == pmd_val(pmd_b);
 382}
 383
 384static inline int pud_same(pud_t pud_a, pud_t pud_b)
 385{
 386	return pud_val(pud_a) == pud_val(pud_b);
 387}
 388#else /* CONFIG_TRANSPARENT_HUGEPAGE */
 389static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 390{
 391	BUILD_BUG();
 392	return 0;
 393}
 394
 395static inline int pud_same(pud_t pud_a, pud_t pud_b)
 396{
 397	BUILD_BUG();
 398	return 0;
 399}
 400#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 401#endif
 402
 403#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 404#define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 405#endif
 406
 407#ifndef __HAVE_ARCH_MOVE_PTE
 408#define move_pte(pte, prot, old_addr, new_addr)	(pte)
 409#endif
 410
 411#ifndef pte_accessible
 412# define pte_accessible(mm, pte)	((void)(pte), 1)
 413#endif
 414
 415#ifndef flush_tlb_fix_spurious_fault
 416#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 417#endif
 418
 419#ifndef pgprot_noncached
 420#define pgprot_noncached(prot)	(prot)
 421#endif
 422
 423#ifndef pgprot_writecombine
 424#define pgprot_writecombine pgprot_noncached
 425#endif
 426
 427#ifndef pgprot_writethrough
 428#define pgprot_writethrough pgprot_noncached
 429#endif
 430
 431#ifndef pgprot_device
 432#define pgprot_device pgprot_noncached
 433#endif
 434
 435#ifndef pgprot_modify
 436#define pgprot_modify pgprot_modify
 437static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 438{
 439	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
 440		newprot = pgprot_noncached(newprot);
 441	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
 442		newprot = pgprot_writecombine(newprot);
 443	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
 444		newprot = pgprot_device(newprot);
 445	return newprot;
 446}
 447#endif
 448
 449/*
 450 * When walking page tables, get the address of the next boundary,
 451 * or the end address of the range if that comes earlier.  Although no
 452 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 453 */
 454
 455#define pgd_addr_end(addr, end)						\
 456({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
 457	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 458})
 459
 460#ifndef p4d_addr_end
 461#define p4d_addr_end(addr, end)						\
 462({	unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;	\
 463	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 464})
 465#endif
 466
 467#ifndef pud_addr_end
 468#define pud_addr_end(addr, end)						\
 469({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
 470	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 471})
 472#endif
 473
 474#ifndef pmd_addr_end
 475#define pmd_addr_end(addr, end)						\
 476({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
 477	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 478})
 479#endif
 480
 481/*
 482 * When walking page tables, we usually want to skip any p?d_none entries;
 483 * and any p?d_bad entries - reporting the error before resetting to none.
 484 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 485 */
 486void pgd_clear_bad(pgd_t *);
 487void p4d_clear_bad(p4d_t *);
 488void pud_clear_bad(pud_t *);
 489void pmd_clear_bad(pmd_t *);
 490
 491static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 492{
 493	if (pgd_none(*pgd))
 494		return 1;
 495	if (unlikely(pgd_bad(*pgd))) {
 496		pgd_clear_bad(pgd);
 497		return 1;
 498	}
 499	return 0;
 500}
 501
 502static inline int p4d_none_or_clear_bad(p4d_t *p4d)
 503{
 504	if (p4d_none(*p4d))
 505		return 1;
 506	if (unlikely(p4d_bad(*p4d))) {
 507		p4d_clear_bad(p4d);
 508		return 1;
 509	}
 510	return 0;
 511}
 512
 513static inline int pud_none_or_clear_bad(pud_t *pud)
 514{
 515	if (pud_none(*pud))
 516		return 1;
 517	if (unlikely(pud_bad(*pud))) {
 518		pud_clear_bad(pud);
 519		return 1;
 520	}
 521	return 0;
 522}
 523
 524static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 525{
 526	if (pmd_none(*pmd))
 527		return 1;
 528	if (unlikely(pmd_bad(*pmd))) {
 529		pmd_clear_bad(pmd);
 530		return 1;
 531	}
 532	return 0;
 533}
 534
 535static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 536					     unsigned long addr,
 537					     pte_t *ptep)
 538{
 539	/*
 540	 * Get the current pte state, but zero it out to make it
 541	 * non-present, preventing the hardware from asynchronously
 542	 * updating it.
 543	 */
 544	return ptep_get_and_clear(mm, addr, ptep);
 545}
 546
 547static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 548					     unsigned long addr,
 549					     pte_t *ptep, pte_t pte)
 550{
 551	/*
 552	 * The pte is non-present, so there's no hardware state to
 553	 * preserve.
 554	 */
 555	set_pte_at(mm, addr, ptep, pte);
 556}
 557
 558#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 559/*
 560 * Start a pte protection read-modify-write transaction, which
 561 * protects against asynchronous hardware modifications to the pte.
 562 * The intention is not to prevent the hardware from making pte
 563 * updates, but to prevent any updates it may make from being lost.
 564 *
 565 * This does not protect against other software modifications of the
 566 * pte; the appropriate pte lock must be held over the transation.
 567 *
 568 * Note that this interface is intended to be batchable, meaning that
 569 * ptep_modify_prot_commit may not actually update the pte, but merely
 570 * queue the update to be done at some later time.  The update must be
 571 * actually committed before the pte lock is released, however.
 572 */
 573static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 574					   unsigned long addr,
 575					   pte_t *ptep)
 576{
 577	return __ptep_modify_prot_start(mm, addr, ptep);
 578}
 579
 580/*
 581 * Commit an update to a pte, leaving any hardware-controlled bits in
 582 * the PTE unmodified.
 583 */
 584static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 585					   unsigned long addr,
 586					   pte_t *ptep, pte_t pte)
 587{
 588	__ptep_modify_prot_commit(mm, addr, ptep, pte);
 589}
 590#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 591#endif /* CONFIG_MMU */
 592
 593/*
 594 * No-op macros that just return the current protection value. Defined here
 595 * because these macros can be used used even if CONFIG_MMU is not defined.
 596 */
 597#ifndef pgprot_encrypted
 598#define pgprot_encrypted(prot)	(prot)
 599#endif
 600
 601#ifndef pgprot_decrypted
 602#define pgprot_decrypted(prot)	(prot)
 603#endif
 604
 605/*
 606 * A facility to provide lazy MMU batching.  This allows PTE updates and
 607 * page invalidations to be delayed until a call to leave lazy MMU mode
 608 * is issued.  Some architectures may benefit from doing this, and it is
 609 * beneficial for both shadow and direct mode hypervisors, which may batch
 610 * the PTE updates which happen during this window.  Note that using this
 611 * interface requires that read hazards be removed from the code.  A read
 612 * hazard could result in the direct mode hypervisor case, since the actual
 613 * write to the page tables may not yet have taken place, so reads though
 614 * a raw PTE pointer after it has been modified are not guaranteed to be
 615 * up to date.  This mode can only be entered and left under the protection of
 616 * the page table locks for all page tables which may be modified.  In the UP
 617 * case, this is required so that preemption is disabled, and in the SMP case,
 618 * it must synchronize the delayed page table writes properly on other CPUs.
 619 */
 620#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 621#define arch_enter_lazy_mmu_mode()	do {} while (0)
 622#define arch_leave_lazy_mmu_mode()	do {} while (0)
 623#define arch_flush_lazy_mmu_mode()	do {} while (0)
 624#endif
 625
 626/*
 627 * A facility to provide batching of the reload of page tables and
 628 * other process state with the actual context switch code for
 629 * paravirtualized guests.  By convention, only one of the batched
 630 * update (lazy) modes (CPU, MMU) should be active at any given time,
 631 * entry should never be nested, and entry and exits should always be
 632 * paired.  This is for sanity of maintaining and reasoning about the
 633 * kernel code.  In this case, the exit (end of the context switch) is
 634 * in architecture-specific code, and so doesn't need a generic
 635 * definition.
 636 */
 637#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
 638#define arch_start_context_switch(prev)	do {} while (0)
 639#endif
 640
 641#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 642#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
 643static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
 644{
 645	return pmd;
 646}
 647
 648static inline int pmd_swp_soft_dirty(pmd_t pmd)
 649{
 650	return 0;
 651}
 652
 653static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 654{
 655	return pmd;
 656}
 657#endif
 658#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
 659static inline int pte_soft_dirty(pte_t pte)
 660{
 661	return 0;
 662}
 663
 664static inline int pmd_soft_dirty(pmd_t pmd)
 665{
 666	return 0;
 667}
 668
 669static inline pte_t pte_mksoft_dirty(pte_t pte)
 670{
 671	return pte;
 672}
 673
 674static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 675{
 676	return pmd;
 677}
 678
 679static inline pte_t pte_clear_soft_dirty(pte_t pte)
 680{
 681	return pte;
 682}
 683
 684static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 685{
 686	return pmd;
 687}
 688
 689static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 690{
 691	return pte;
 692}
 693
 694static inline int pte_swp_soft_dirty(pte_t pte)
 695{
 696	return 0;
 697}
 698
 699static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 700{
 701	return pte;
 702}
 703
 704static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
 705{
 706	return pmd;
 707}
 708
 709static inline int pmd_swp_soft_dirty(pmd_t pmd)
 710{
 711	return 0;
 712}
 713
 714static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 715{
 716	return pmd;
 717}
 718#endif
 719
 720#ifndef __HAVE_PFNMAP_TRACKING
 721/*
 722 * Interfaces that can be used by architecture code to keep track of
 723 * memory type of pfn mappings specified by the remap_pfn_range,
 724 * vm_insert_pfn.
 725 */
 726
 727/*
 728 * track_pfn_remap is called when a _new_ pfn mapping is being established
 729 * by remap_pfn_range() for physical range indicated by pfn and size.
 730 */
 731static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 732				  unsigned long pfn, unsigned long addr,
 733				  unsigned long size)
 734{
 735	return 0;
 736}
 737
 738/*
 739 * track_pfn_insert is called when a _new_ single pfn is established
 740 * by vm_insert_pfn().
 741 */
 742static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 743				    pfn_t pfn)
 744{
 745}
 746
 747/*
 748 * track_pfn_copy is called when vma that is covering the pfnmap gets
 749 * copied through copy_page_range().
 750 */
 751static inline int track_pfn_copy(struct vm_area_struct *vma)
 752{
 753	return 0;
 754}
 755
 756/*
 757 * untrack_pfn is called while unmapping a pfnmap for a region.
 758 * untrack can be called for a specific region indicated by pfn and size or
 759 * can be for the entire vma (in which case pfn, size are zero).
 760 */
 761static inline void untrack_pfn(struct vm_area_struct *vma,
 762			       unsigned long pfn, unsigned long size)
 763{
 764}
 765
 766/*
 767 * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
 768 */
 769static inline void untrack_pfn_moved(struct vm_area_struct *vma)
 770{
 771}
 772#else
 773extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 774			   unsigned long pfn, unsigned long addr,
 775			   unsigned long size);
 776extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 777			     pfn_t pfn);
 778extern int track_pfn_copy(struct vm_area_struct *vma);
 779extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 780			unsigned long size);
 781extern void untrack_pfn_moved(struct vm_area_struct *vma);
 782#endif
 783
 784#ifdef __HAVE_COLOR_ZERO_PAGE
 785static inline int is_zero_pfn(unsigned long pfn)
 786{
 787	extern unsigned long zero_pfn;
 788	unsigned long offset_from_zero_pfn = pfn - zero_pfn;
 789	return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT);
 790}
 791
 792#define my_zero_pfn(addr)	page_to_pfn(ZERO_PAGE(addr))
 793
 794#else
 795static inline int is_zero_pfn(unsigned long pfn)
 796{
 797	extern unsigned long zero_pfn;
 798	return pfn == zero_pfn;
 799}
 800
 801static inline unsigned long my_zero_pfn(unsigned long addr)
 802{
 803	extern unsigned long zero_pfn;
 804	return zero_pfn;
 805}
 806#endif
 807
 808#ifdef CONFIG_MMU
 809
 810#ifndef CONFIG_TRANSPARENT_HUGEPAGE
 811static inline int pmd_trans_huge(pmd_t pmd)
 812{
 813	return 0;
 814}
 815#ifndef pmd_write
 816static inline int pmd_write(pmd_t pmd)
 817{
 818	BUG();
 819	return 0;
 820}
 821#endif /* pmd_write */
 822#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 823
 824#ifndef pud_write
 825static inline int pud_write(pud_t pud)
 826{
 827	BUG();
 828	return 0;
 829}
 830#endif /* pud_write */
 831
 832#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
 833	(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
 834	 !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
 835static inline int pud_trans_huge(pud_t pud)
 836{
 837	return 0;
 838}
 839#endif
 840
 841#ifndef pmd_read_atomic
 842static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 843{
 844	/*
 845	 * Depend on compiler for an atomic pmd read. NOTE: this is
 846	 * only going to work, if the pmdval_t isn't larger than
 847	 * an unsigned long.
 848	 */
 849	return *pmdp;
 850}
 851#endif
 852
 853#ifndef arch_needs_pgtable_deposit
 854#define arch_needs_pgtable_deposit() (false)
 855#endif
 856/*
 857 * This function is meant to be used by sites walking pagetables with
 858 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
 859 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
 860 * into a null pmd and the transhuge page fault can convert a null pmd
 861 * into an hugepmd or into a regular pmd (if the hugepage allocation
 862 * fails). While holding the mmap_sem in read mode the pmd becomes
 863 * stable and stops changing under us only if it's not null and not a
 864 * transhuge pmd. When those races occurs and this function makes a
 865 * difference vs the standard pmd_none_or_clear_bad, the result is
 866 * undefined so behaving like if the pmd was none is safe (because it
 867 * can return none anyway). The compiler level barrier() is critically
 868 * important to compute the two checks atomically on the same pmdval.
 869 *
 870 * For 32bit kernels with a 64bit large pmd_t this automatically takes
 871 * care of reading the pmd atomically to avoid SMP race conditions
 872 * against pmd_populate() when the mmap_sem is hold for reading by the
 873 * caller (a special atomic read not done by "gcc" as in the generic
 874 * version above, is also needed when THP is disabled because the page
 875 * fault can populate the pmd from under us).
 876 */
 877static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 878{
 879	pmd_t pmdval = pmd_read_atomic(pmd);
 880	/*
 881	 * The barrier will stabilize the pmdval in a register or on
 882	 * the stack so that it will stop changing under the code.
 883	 *
 884	 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
 885	 * pmd_read_atomic is allowed to return a not atomic pmdval
 886	 * (for example pointing to an hugepage that has never been
 887	 * mapped in the pmd). The below checks will only care about
 888	 * the low part of the pmd with 32bit PAE x86 anyway, with the
 889	 * exception of pmd_none(). So the important thing is that if
 890	 * the low part of the pmd is found null, the high part will
 891	 * be also null or the pmd_none() check below would be
 892	 * confused.
 893	 */
 894#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 895	barrier();
 896#endif
 897	/*
 898	 * !pmd_present() checks for pmd migration entries
 899	 *
 900	 * The complete check uses is_pmd_migration_entry() in linux/swapops.h
 901	 * But using that requires moving current function and pmd_trans_unstable()
 902	 * to linux/swapops.h to resovle dependency, which is too much code move.
 903	 *
 904	 * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
 905	 * because !pmd_present() pages can only be under migration not swapped
 906	 * out.
 907	 *
 908	 * pmd_none() is preseved for future condition checks on pmd migration
 909	 * entries and not confusing with this function name, although it is
 910	 * redundant with !pmd_present().
 911	 */
 912	if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
 913		(IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
 914		return 1;
 915	if (unlikely(pmd_bad(pmdval))) {
 916		pmd_clear_bad(pmd);
 917		return 1;
 918	}
 919	return 0;
 920}
 921
 922/*
 923 * This is a noop if Transparent Hugepage Support is not built into
 924 * the kernel. Otherwise it is equivalent to
 925 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
 926 * places that already verified the pmd is not none and they want to
 927 * walk ptes while holding the mmap sem in read mode (write mode don't
 928 * need this). If THP is not enabled, the pmd can't go away under the
 929 * code even if MADV_DONTNEED runs, but if THP is enabled we need to
 930 * run a pmd_trans_unstable before walking the ptes after
 931 * split_huge_page_pmd returns (because it may have run when the pmd
 932 * become null, but then a page fault can map in a THP and not a
 933 * regular page).
 934 */
 935static inline int pmd_trans_unstable(pmd_t *pmd)
 936{
 937#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 938	return pmd_none_or_trans_huge_or_clear_bad(pmd);
 939#else
 940	return 0;
 941#endif
 942}
 943
 944#ifndef CONFIG_NUMA_BALANCING
 945/*
 946 * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
 947 * the only case the kernel cares is for NUMA balancing and is only ever set
 948 * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
 949 * _PAGE_PROTNONE so by by default, implement the helper as "always no". It
 950 * is the responsibility of the caller to distinguish between PROT_NONE
 951 * protections and NUMA hinting fault protections.
 952 */
 953static inline int pte_protnone(pte_t pte)
 954{
 955	return 0;
 956}
 957
 958static inline int pmd_protnone(pmd_t pmd)
 959{
 960	return 0;
 961}
 962#endif /* CONFIG_NUMA_BALANCING */
 963
 964#endif /* CONFIG_MMU */
 965
 966#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 967
 968#ifndef __PAGETABLE_P4D_FOLDED
 969int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
 970int p4d_clear_huge(p4d_t *p4d);
 971#else
 972static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 973{
 974	return 0;
 975}
 976static inline int p4d_clear_huge(p4d_t *p4d)
 977{
 978	return 0;
 979}
 980#endif /* !__PAGETABLE_P4D_FOLDED */
 981
 982int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 983int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 984int pud_clear_huge(pud_t *pud);
 985int pmd_clear_huge(pmd_t *pmd);
 986int pud_free_pmd_page(pud_t *pud);
 987int pmd_free_pte_page(pmd_t *pmd);
 988#else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 989static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 990{
 991	return 0;
 992}
 993static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 994{
 995	return 0;
 996}
 997static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 998{
 999	return 0;
1000}
1001static inline int p4d_clear_huge(p4d_t *p4d)
1002{
1003	return 0;
1004}
1005static inline int pud_clear_huge(pud_t *pud)
1006{
1007	return 0;
1008}
1009static inline int pmd_clear_huge(pmd_t *pmd)
1010{
1011	return 0;
1012}
1013static inline int pud_free_pmd_page(pud_t *pud)
1014{
1015	return 0;
1016}
1017static inline int pmd_free_pte_page(pmd_t *pmd)
1018{
1019	return 0;
1020}
1021#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
1022
1023#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
1024#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1025/*
1026 * ARCHes with special requirements for evicting THP backing TLB entries can
1027 * implement this. Otherwise also, it can help optimize normal TLB flush in
1028 * THP regime. stock flush_tlb_range() typically has optimization to nuke the
1029 * entire TLB TLB if flush span is greater than a threshold, which will
1030 * likely be true for a single huge page. Thus a single thp flush will
1031 * invalidate the entire TLB which is not desitable.
1032 * e.g. see arch/arc: flush_pmd_tlb_range
1033 */
1034#define flush_pmd_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1035#define flush_pud_tlb_range(vma, addr, end)	flush_tlb_range(vma, addr, end)
1036#else
1037#define flush_pmd_tlb_range(vma, addr, end)	BUILD_BUG()
1038#define flush_pud_tlb_range(vma, addr, end)	BUILD_BUG()
1039#endif
1040#endif
1041
1042struct file;
1043int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
1044			unsigned long size, pgprot_t *vma_prot);
1045
1046#ifndef CONFIG_X86_ESPFIX64
1047static inline void init_espfix_bsp(void) { }
1048#endif
1049
1050#endif /* !__ASSEMBLY__ */
1051
1052#ifndef io_remap_pfn_range
1053#define io_remap_pfn_range remap_pfn_range
1054#endif
1055
1056#ifndef has_transparent_hugepage
1057#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1058#define has_transparent_hugepage() 1
1059#else
1060#define has_transparent_hugepage() 0
1061#endif
1062#endif
1063
1064#endif /* _ASM_GENERIC_PGTABLE_H */