at v3.6 16 kB view raw
1#ifndef _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H 3 4#ifndef __ASSEMBLY__ 5#ifdef CONFIG_MMU 6 7#include <linux/mm_types.h> 8#include <linux/bug.h> 9 10#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 11extern int ptep_set_access_flags(struct vm_area_struct *vma, 12 unsigned long address, pte_t *ptep, 13 pte_t entry, int dirty); 14#endif 15 16#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 17extern int pmdp_set_access_flags(struct vm_area_struct *vma, 18 unsigned long address, pmd_t *pmdp, 19 pmd_t entry, int dirty); 20#endif 21 22#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 23static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 24 unsigned long address, 25 pte_t *ptep) 26{ 27 pte_t pte = *ptep; 28 int r = 1; 29 if (!pte_young(pte)) 30 r = 0; 31 else 32 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 33 return r; 34} 35#endif 36 37#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 38#ifdef CONFIG_TRANSPARENT_HUGEPAGE 39static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 40 unsigned long address, 41 pmd_t *pmdp) 42{ 43 pmd_t pmd = *pmdp; 44 int r = 1; 45 if (!pmd_young(pmd)) 46 r = 0; 47 else 48 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 49 return r; 50} 51#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 52static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 53 unsigned long address, 54 pmd_t *pmdp) 55{ 56 BUG(); 57 return 0; 58} 59#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 60#endif 61 62#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 63int ptep_clear_flush_young(struct vm_area_struct *vma, 64 unsigned long address, pte_t *ptep); 65#endif 66 67#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 68int pmdp_clear_flush_young(struct vm_area_struct *vma, 69 unsigned long address, pmd_t *pmdp); 70#endif 71 72#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 73static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 74 unsigned long address, 75 pte_t *ptep) 76{ 77 pte_t pte = *ptep; 78 pte_clear(mm, address, ptep); 79 return pte; 80} 81#endif 82 83#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR 84#ifdef CONFIG_TRANSPARENT_HUGEPAGE 85static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, 86 unsigned long address, 87 pmd_t *pmdp) 88{ 89 pmd_t pmd = *pmdp; 90 pmd_clear(mm, address, pmdp); 91 return pmd; 92} 93#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 94#endif 95 96#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 97static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 98 unsigned long address, pte_t *ptep, 99 int full) 100{ 101 pte_t pte; 102 pte = ptep_get_and_clear(mm, address, ptep); 103 return pte; 104} 105#endif 106 107/* 108 * Some architectures may be able to avoid expensive synchronization 109 * primitives when modifications are made to PTE's which are already 110 * not present, or in the process of an address space destruction. 111 */ 112#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 113static inline void pte_clear_not_present_full(struct mm_struct *mm, 114 unsigned long address, 115 pte_t *ptep, 116 int full) 117{ 118 pte_clear(mm, address, ptep); 119} 120#endif 121 122#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 123extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 124 unsigned long address, 125 pte_t *ptep); 126#endif 127 128#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH 129extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, 130 unsigned long address, 131 pmd_t *pmdp); 132#endif 133 134#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 135struct mm_struct; 136static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 137{ 138 pte_t old_pte = *ptep; 139 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 140} 141#endif 142 143#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 144#ifdef CONFIG_TRANSPARENT_HUGEPAGE 145static inline void pmdp_set_wrprotect(struct mm_struct *mm, 146 unsigned long address, pmd_t *pmdp) 147{ 148 pmd_t old_pmd = *pmdp; 149 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 150} 151#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 152static inline void pmdp_set_wrprotect(struct mm_struct *mm, 153 unsigned long address, pmd_t *pmdp) 154{ 155 BUG(); 156} 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 158#endif 159 160#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH 161extern void pmdp_splitting_flush(struct vm_area_struct *vma, 162 unsigned long address, pmd_t *pmdp); 163#endif 164 165#ifndef __HAVE_ARCH_PTE_SAME 166static inline int pte_same(pte_t pte_a, pte_t pte_b) 167{ 168 return pte_val(pte_a) == pte_val(pte_b); 169} 170#endif 171 172#ifndef __HAVE_ARCH_PMD_SAME 173#ifdef CONFIG_TRANSPARENT_HUGEPAGE 174static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 175{ 176 return pmd_val(pmd_a) == pmd_val(pmd_b); 177} 178#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 179static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 180{ 181 BUG(); 182 return 0; 183} 184#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 185#endif 186 187#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY 188#define page_test_and_clear_dirty(pfn, mapped) (0) 189#endif 190 191#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY 192#define pte_maybe_dirty(pte) pte_dirty(pte) 193#else 194#define pte_maybe_dirty(pte) (1) 195#endif 196 197#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG 198#define page_test_and_clear_young(pfn) (0) 199#endif 200 201#ifndef __HAVE_ARCH_PGD_OFFSET_GATE 202#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 203#endif 204 205#ifndef __HAVE_ARCH_MOVE_PTE 206#define move_pte(pte, prot, old_addr, new_addr) (pte) 207#endif 208 209#ifndef flush_tlb_fix_spurious_fault 210#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 211#endif 212 213#ifndef pgprot_noncached 214#define pgprot_noncached(prot) (prot) 215#endif 216 217#ifndef pgprot_writecombine 218#define pgprot_writecombine pgprot_noncached 219#endif 220 221/* 222 * When walking page tables, get the address of the next boundary, 223 * or the end address of the range if that comes earlier. Although no 224 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 225 */ 226 227#define pgd_addr_end(addr, end) \ 228({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 229 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 230}) 231 232#ifndef pud_addr_end 233#define pud_addr_end(addr, end) \ 234({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 235 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 236}) 237#endif 238 239#ifndef pmd_addr_end 240#define pmd_addr_end(addr, end) \ 241({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 242 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 243}) 244#endif 245 246/* 247 * When walking page tables, we usually want to skip any p?d_none entries; 248 * and any p?d_bad entries - reporting the error before resetting to none. 249 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 250 */ 251void pgd_clear_bad(pgd_t *); 252void pud_clear_bad(pud_t *); 253void pmd_clear_bad(pmd_t *); 254 255static inline int pgd_none_or_clear_bad(pgd_t *pgd) 256{ 257 if (pgd_none(*pgd)) 258 return 1; 259 if (unlikely(pgd_bad(*pgd))) { 260 pgd_clear_bad(pgd); 261 return 1; 262 } 263 return 0; 264} 265 266static inline int pud_none_or_clear_bad(pud_t *pud) 267{ 268 if (pud_none(*pud)) 269 return 1; 270 if (unlikely(pud_bad(*pud))) { 271 pud_clear_bad(pud); 272 return 1; 273 } 274 return 0; 275} 276 277static inline int pmd_none_or_clear_bad(pmd_t *pmd) 278{ 279 if (pmd_none(*pmd)) 280 return 1; 281 if (unlikely(pmd_bad(*pmd))) { 282 pmd_clear_bad(pmd); 283 return 1; 284 } 285 return 0; 286} 287 288static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 289 unsigned long addr, 290 pte_t *ptep) 291{ 292 /* 293 * Get the current pte state, but zero it out to make it 294 * non-present, preventing the hardware from asynchronously 295 * updating it. 296 */ 297 return ptep_get_and_clear(mm, addr, ptep); 298} 299 300static inline void __ptep_modify_prot_commit(struct mm_struct *mm, 301 unsigned long addr, 302 pte_t *ptep, pte_t pte) 303{ 304 /* 305 * The pte is non-present, so there's no hardware state to 306 * preserve. 307 */ 308 set_pte_at(mm, addr, ptep, pte); 309} 310 311#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 312/* 313 * Start a pte protection read-modify-write transaction, which 314 * protects against asynchronous hardware modifications to the pte. 315 * The intention is not to prevent the hardware from making pte 316 * updates, but to prevent any updates it may make from being lost. 317 * 318 * This does not protect against other software modifications of the 319 * pte; the appropriate pte lock must be held over the transation. 320 * 321 * Note that this interface is intended to be batchable, meaning that 322 * ptep_modify_prot_commit may not actually update the pte, but merely 323 * queue the update to be done at some later time. The update must be 324 * actually committed before the pte lock is released, however. 325 */ 326static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 327 unsigned long addr, 328 pte_t *ptep) 329{ 330 return __ptep_modify_prot_start(mm, addr, ptep); 331} 332 333/* 334 * Commit an update to a pte, leaving any hardware-controlled bits in 335 * the PTE unmodified. 336 */ 337static inline void ptep_modify_prot_commit(struct mm_struct *mm, 338 unsigned long addr, 339 pte_t *ptep, pte_t pte) 340{ 341 __ptep_modify_prot_commit(mm, addr, ptep, pte); 342} 343#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 344#endif /* CONFIG_MMU */ 345 346/* 347 * A facility to provide lazy MMU batching. This allows PTE updates and 348 * page invalidations to be delayed until a call to leave lazy MMU mode 349 * is issued. Some architectures may benefit from doing this, and it is 350 * beneficial for both shadow and direct mode hypervisors, which may batch 351 * the PTE updates which happen during this window. Note that using this 352 * interface requires that read hazards be removed from the code. A read 353 * hazard could result in the direct mode hypervisor case, since the actual 354 * write to the page tables may not yet have taken place, so reads though 355 * a raw PTE pointer after it has been modified are not guaranteed to be 356 * up to date. This mode can only be entered and left under the protection of 357 * the page table locks for all page tables which may be modified. In the UP 358 * case, this is required so that preemption is disabled, and in the SMP case, 359 * it must synchronize the delayed page table writes properly on other CPUs. 360 */ 361#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 362#define arch_enter_lazy_mmu_mode() do {} while (0) 363#define arch_leave_lazy_mmu_mode() do {} while (0) 364#define arch_flush_lazy_mmu_mode() do {} while (0) 365#endif 366 367/* 368 * A facility to provide batching of the reload of page tables and 369 * other process state with the actual context switch code for 370 * paravirtualized guests. By convention, only one of the batched 371 * update (lazy) modes (CPU, MMU) should be active at any given time, 372 * entry should never be nested, and entry and exits should always be 373 * paired. This is for sanity of maintaining and reasoning about the 374 * kernel code. In this case, the exit (end of the context switch) is 375 * in architecture-specific code, and so doesn't need a generic 376 * definition. 377 */ 378#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 379#define arch_start_context_switch(prev) do {} while (0) 380#endif 381 382#ifndef __HAVE_PFNMAP_TRACKING 383/* 384 * Interface that can be used by architecture code to keep track of 385 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 386 * 387 * track_pfn_vma_new is called when a _new_ pfn mapping is being established 388 * for physical range indicated by pfn and size. 389 */ 390static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 391 unsigned long pfn, unsigned long size) 392{ 393 return 0; 394} 395 396/* 397 * Interface that can be used by architecture code to keep track of 398 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 399 * 400 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets 401 * copied through copy_page_range(). 402 */ 403static inline int track_pfn_vma_copy(struct vm_area_struct *vma) 404{ 405 return 0; 406} 407 408/* 409 * Interface that can be used by architecture code to keep track of 410 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 411 * 412 * untrack_pfn_vma is called while unmapping a pfnmap for a region. 413 * untrack can be called for a specific region indicated by pfn and size or 414 * can be for the entire vma (in which case size can be zero). 415 */ 416static inline void untrack_pfn_vma(struct vm_area_struct *vma, 417 unsigned long pfn, unsigned long size) 418{ 419} 420#else 421extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 422 unsigned long pfn, unsigned long size); 423extern int track_pfn_vma_copy(struct vm_area_struct *vma); 424extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, 425 unsigned long size); 426#endif 427 428#ifdef CONFIG_MMU 429 430#ifndef CONFIG_TRANSPARENT_HUGEPAGE 431static inline int pmd_trans_huge(pmd_t pmd) 432{ 433 return 0; 434} 435static inline int pmd_trans_splitting(pmd_t pmd) 436{ 437 return 0; 438} 439#ifndef __HAVE_ARCH_PMD_WRITE 440static inline int pmd_write(pmd_t pmd) 441{ 442 BUG(); 443 return 0; 444} 445#endif /* __HAVE_ARCH_PMD_WRITE */ 446#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 447 448#ifndef pmd_read_atomic 449static inline pmd_t pmd_read_atomic(pmd_t *pmdp) 450{ 451 /* 452 * Depend on compiler for an atomic pmd read. NOTE: this is 453 * only going to work, if the pmdval_t isn't larger than 454 * an unsigned long. 455 */ 456 return *pmdp; 457} 458#endif 459 460/* 461 * This function is meant to be used by sites walking pagetables with 462 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 463 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 464 * into a null pmd and the transhuge page fault can convert a null pmd 465 * into an hugepmd or into a regular pmd (if the hugepage allocation 466 * fails). While holding the mmap_sem in read mode the pmd becomes 467 * stable and stops changing under us only if it's not null and not a 468 * transhuge pmd. When those races occurs and this function makes a 469 * difference vs the standard pmd_none_or_clear_bad, the result is 470 * undefined so behaving like if the pmd was none is safe (because it 471 * can return none anyway). The compiler level barrier() is critically 472 * important to compute the two checks atomically on the same pmdval. 473 * 474 * For 32bit kernels with a 64bit large pmd_t this automatically takes 475 * care of reading the pmd atomically to avoid SMP race conditions 476 * against pmd_populate() when the mmap_sem is hold for reading by the 477 * caller (a special atomic read not done by "gcc" as in the generic 478 * version above, is also needed when THP is disabled because the page 479 * fault can populate the pmd from under us). 480 */ 481static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 482{ 483 pmd_t pmdval = pmd_read_atomic(pmd); 484 /* 485 * The barrier will stabilize the pmdval in a register or on 486 * the stack so that it will stop changing under the code. 487 * 488 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, 489 * pmd_read_atomic is allowed to return a not atomic pmdval 490 * (for example pointing to an hugepage that has never been 491 * mapped in the pmd). The below checks will only care about 492 * the low part of the pmd with 32bit PAE x86 anyway, with the 493 * exception of pmd_none(). So the important thing is that if 494 * the low part of the pmd is found null, the high part will 495 * be also null or the pmd_none() check below would be 496 * confused. 497 */ 498#ifdef CONFIG_TRANSPARENT_HUGEPAGE 499 barrier(); 500#endif 501 if (pmd_none(pmdval)) 502 return 1; 503 if (unlikely(pmd_bad(pmdval))) { 504 if (!pmd_trans_huge(pmdval)) 505 pmd_clear_bad(pmd); 506 return 1; 507 } 508 return 0; 509} 510 511/* 512 * This is a noop if Transparent Hugepage Support is not built into 513 * the kernel. Otherwise it is equivalent to 514 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 515 * places that already verified the pmd is not none and they want to 516 * walk ptes while holding the mmap sem in read mode (write mode don't 517 * need this). If THP is not enabled, the pmd can't go away under the 518 * code even if MADV_DONTNEED runs, but if THP is enabled we need to 519 * run a pmd_trans_unstable before walking the ptes after 520 * split_huge_page_pmd returns (because it may have run when the pmd 521 * become null, but then a page fault can map in a THP and not a 522 * regular page). 523 */ 524static inline int pmd_trans_unstable(pmd_t *pmd) 525{ 526#ifdef CONFIG_TRANSPARENT_HUGEPAGE 527 return pmd_none_or_trans_huge_or_clear_bad(pmd); 528#else 529 return 0; 530#endif 531} 532 533#endif /* CONFIG_MMU */ 534 535#endif /* !__ASSEMBLY__ */ 536 537#endif /* _ASM_GENERIC_PGTABLE_H */