at v3.4 15 kB view raw
1#ifndef _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H 3 4#ifndef __ASSEMBLY__ 5#ifdef CONFIG_MMU 6 7#include <linux/mm_types.h> 8#include <linux/bug.h> 9 10#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 11extern int ptep_set_access_flags(struct vm_area_struct *vma, 12 unsigned long address, pte_t *ptep, 13 pte_t entry, int dirty); 14#endif 15 16#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 17extern int pmdp_set_access_flags(struct vm_area_struct *vma, 18 unsigned long address, pmd_t *pmdp, 19 pmd_t entry, int dirty); 20#endif 21 22#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 23static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 24 unsigned long address, 25 pte_t *ptep) 26{ 27 pte_t pte = *ptep; 28 int r = 1; 29 if (!pte_young(pte)) 30 r = 0; 31 else 32 set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte)); 33 return r; 34} 35#endif 36 37#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 38#ifdef CONFIG_TRANSPARENT_HUGEPAGE 39static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 40 unsigned long address, 41 pmd_t *pmdp) 42{ 43 pmd_t pmd = *pmdp; 44 int r = 1; 45 if (!pmd_young(pmd)) 46 r = 0; 47 else 48 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd)); 49 return r; 50} 51#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 52static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 53 unsigned long address, 54 pmd_t *pmdp) 55{ 56 BUG(); 57 return 0; 58} 59#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 60#endif 61 62#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 63int ptep_clear_flush_young(struct vm_area_struct *vma, 64 unsigned long address, pte_t *ptep); 65#endif 66 67#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH 68int pmdp_clear_flush_young(struct vm_area_struct *vma, 69 unsigned long address, pmd_t *pmdp); 70#endif 71 72#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR 73static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 74 unsigned long address, 75 pte_t *ptep) 76{ 77 pte_t pte = *ptep; 78 pte_clear(mm, address, ptep); 79 return pte; 80} 81#endif 82 83#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR 84#ifdef CONFIG_TRANSPARENT_HUGEPAGE 85static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, 86 unsigned long address, 87 pmd_t *pmdp) 88{ 89 pmd_t pmd = *pmdp; 90 pmd_clear(mm, address, pmdp); 91 return pmd; 92} 93#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 94#endif 95 96#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 97static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 98 unsigned long address, pte_t *ptep, 99 int full) 100{ 101 pte_t pte; 102 pte = ptep_get_and_clear(mm, address, ptep); 103 return pte; 104} 105#endif 106 107/* 108 * Some architectures may be able to avoid expensive synchronization 109 * primitives when modifications are made to PTE's which are already 110 * not present, or in the process of an address space destruction. 111 */ 112#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL 113static inline void pte_clear_not_present_full(struct mm_struct *mm, 114 unsigned long address, 115 pte_t *ptep, 116 int full) 117{ 118 pte_clear(mm, address, ptep); 119} 120#endif 121 122#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH 123extern pte_t ptep_clear_flush(struct vm_area_struct *vma, 124 unsigned long address, 125 pte_t *ptep); 126#endif 127 128#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH 129extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, 130 unsigned long address, 131 pmd_t *pmdp); 132#endif 133 134#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT 135struct mm_struct; 136static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) 137{ 138 pte_t old_pte = *ptep; 139 set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); 140} 141#endif 142 143#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT 144#ifdef CONFIG_TRANSPARENT_HUGEPAGE 145static inline void pmdp_set_wrprotect(struct mm_struct *mm, 146 unsigned long address, pmd_t *pmdp) 147{ 148 pmd_t old_pmd = *pmdp; 149 set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); 150} 151#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 152static inline void pmdp_set_wrprotect(struct mm_struct *mm, 153 unsigned long address, pmd_t *pmdp) 154{ 155 BUG(); 156} 157#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 158#endif 159 160#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH 161extern pmd_t pmdp_splitting_flush(struct vm_area_struct *vma, 162 unsigned long address, 163 pmd_t *pmdp); 164#endif 165 166#ifndef __HAVE_ARCH_PTE_SAME 167static inline int pte_same(pte_t pte_a, pte_t pte_b) 168{ 169 return pte_val(pte_a) == pte_val(pte_b); 170} 171#endif 172 173#ifndef __HAVE_ARCH_PMD_SAME 174#ifdef CONFIG_TRANSPARENT_HUGEPAGE 175static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 176{ 177 return pmd_val(pmd_a) == pmd_val(pmd_b); 178} 179#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 180static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) 181{ 182 BUG(); 183 return 0; 184} 185#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 186#endif 187 188#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY 189#define page_test_and_clear_dirty(pfn, mapped) (0) 190#endif 191 192#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY 193#define pte_maybe_dirty(pte) pte_dirty(pte) 194#else 195#define pte_maybe_dirty(pte) (1) 196#endif 197 198#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG 199#define page_test_and_clear_young(pfn) (0) 200#endif 201 202#ifndef __HAVE_ARCH_PGD_OFFSET_GATE 203#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) 204#endif 205 206#ifndef __HAVE_ARCH_MOVE_PTE 207#define move_pte(pte, prot, old_addr, new_addr) (pte) 208#endif 209 210#ifndef flush_tlb_fix_spurious_fault 211#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) 212#endif 213 214#ifndef pgprot_noncached 215#define pgprot_noncached(prot) (prot) 216#endif 217 218#ifndef pgprot_writecombine 219#define pgprot_writecombine pgprot_noncached 220#endif 221 222/* 223 * When walking page tables, get the address of the next boundary, 224 * or the end address of the range if that comes earlier. Although no 225 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 226 */ 227 228#define pgd_addr_end(addr, end) \ 229({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 230 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 231}) 232 233#ifndef pud_addr_end 234#define pud_addr_end(addr, end) \ 235({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ 236 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 237}) 238#endif 239 240#ifndef pmd_addr_end 241#define pmd_addr_end(addr, end) \ 242({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 243 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 244}) 245#endif 246 247/* 248 * When walking page tables, we usually want to skip any p?d_none entries; 249 * and any p?d_bad entries - reporting the error before resetting to none. 250 * Do the tests inline, but report and clear the bad entry in mm/memory.c. 251 */ 252void pgd_clear_bad(pgd_t *); 253void pud_clear_bad(pud_t *); 254void pmd_clear_bad(pmd_t *); 255 256static inline int pgd_none_or_clear_bad(pgd_t *pgd) 257{ 258 if (pgd_none(*pgd)) 259 return 1; 260 if (unlikely(pgd_bad(*pgd))) { 261 pgd_clear_bad(pgd); 262 return 1; 263 } 264 return 0; 265} 266 267static inline int pud_none_or_clear_bad(pud_t *pud) 268{ 269 if (pud_none(*pud)) 270 return 1; 271 if (unlikely(pud_bad(*pud))) { 272 pud_clear_bad(pud); 273 return 1; 274 } 275 return 0; 276} 277 278static inline int pmd_none_or_clear_bad(pmd_t *pmd) 279{ 280 if (pmd_none(*pmd)) 281 return 1; 282 if (unlikely(pmd_bad(*pmd))) { 283 pmd_clear_bad(pmd); 284 return 1; 285 } 286 return 0; 287} 288 289static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 290 unsigned long addr, 291 pte_t *ptep) 292{ 293 /* 294 * Get the current pte state, but zero it out to make it 295 * non-present, preventing the hardware from asynchronously 296 * updating it. 297 */ 298 return ptep_get_and_clear(mm, addr, ptep); 299} 300 301static inline void __ptep_modify_prot_commit(struct mm_struct *mm, 302 unsigned long addr, 303 pte_t *ptep, pte_t pte) 304{ 305 /* 306 * The pte is non-present, so there's no hardware state to 307 * preserve. 308 */ 309 set_pte_at(mm, addr, ptep, pte); 310} 311 312#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 313/* 314 * Start a pte protection read-modify-write transaction, which 315 * protects against asynchronous hardware modifications to the pte. 316 * The intention is not to prevent the hardware from making pte 317 * updates, but to prevent any updates it may make from being lost. 318 * 319 * This does not protect against other software modifications of the 320 * pte; the appropriate pte lock must be held over the transation. 321 * 322 * Note that this interface is intended to be batchable, meaning that 323 * ptep_modify_prot_commit may not actually update the pte, but merely 324 * queue the update to be done at some later time. The update must be 325 * actually committed before the pte lock is released, however. 326 */ 327static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, 328 unsigned long addr, 329 pte_t *ptep) 330{ 331 return __ptep_modify_prot_start(mm, addr, ptep); 332} 333 334/* 335 * Commit an update to a pte, leaving any hardware-controlled bits in 336 * the PTE unmodified. 337 */ 338static inline void ptep_modify_prot_commit(struct mm_struct *mm, 339 unsigned long addr, 340 pte_t *ptep, pte_t pte) 341{ 342 __ptep_modify_prot_commit(mm, addr, ptep, pte); 343} 344#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 345#endif /* CONFIG_MMU */ 346 347/* 348 * A facility to provide lazy MMU batching. This allows PTE updates and 349 * page invalidations to be delayed until a call to leave lazy MMU mode 350 * is issued. Some architectures may benefit from doing this, and it is 351 * beneficial for both shadow and direct mode hypervisors, which may batch 352 * the PTE updates which happen during this window. Note that using this 353 * interface requires that read hazards be removed from the code. A read 354 * hazard could result in the direct mode hypervisor case, since the actual 355 * write to the page tables may not yet have taken place, so reads though 356 * a raw PTE pointer after it has been modified are not guaranteed to be 357 * up to date. This mode can only be entered and left under the protection of 358 * the page table locks for all page tables which may be modified. In the UP 359 * case, this is required so that preemption is disabled, and in the SMP case, 360 * it must synchronize the delayed page table writes properly on other CPUs. 361 */ 362#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE 363#define arch_enter_lazy_mmu_mode() do {} while (0) 364#define arch_leave_lazy_mmu_mode() do {} while (0) 365#define arch_flush_lazy_mmu_mode() do {} while (0) 366#endif 367 368/* 369 * A facility to provide batching of the reload of page tables and 370 * other process state with the actual context switch code for 371 * paravirtualized guests. By convention, only one of the batched 372 * update (lazy) modes (CPU, MMU) should be active at any given time, 373 * entry should never be nested, and entry and exits should always be 374 * paired. This is for sanity of maintaining and reasoning about the 375 * kernel code. In this case, the exit (end of the context switch) is 376 * in architecture-specific code, and so doesn't need a generic 377 * definition. 378 */ 379#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH 380#define arch_start_context_switch(prev) do {} while (0) 381#endif 382 383#ifndef __HAVE_PFNMAP_TRACKING 384/* 385 * Interface that can be used by architecture code to keep track of 386 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 387 * 388 * track_pfn_vma_new is called when a _new_ pfn mapping is being established 389 * for physical range indicated by pfn and size. 390 */ 391static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 392 unsigned long pfn, unsigned long size) 393{ 394 return 0; 395} 396 397/* 398 * Interface that can be used by architecture code to keep track of 399 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 400 * 401 * track_pfn_vma_copy is called when vma that is covering the pfnmap gets 402 * copied through copy_page_range(). 403 */ 404static inline int track_pfn_vma_copy(struct vm_area_struct *vma) 405{ 406 return 0; 407} 408 409/* 410 * Interface that can be used by architecture code to keep track of 411 * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) 412 * 413 * untrack_pfn_vma is called while unmapping a pfnmap for a region. 414 * untrack can be called for a specific region indicated by pfn and size or 415 * can be for the entire vma (in which case size can be zero). 416 */ 417static inline void untrack_pfn_vma(struct vm_area_struct *vma, 418 unsigned long pfn, unsigned long size) 419{ 420} 421#else 422extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 423 unsigned long pfn, unsigned long size); 424extern int track_pfn_vma_copy(struct vm_area_struct *vma); 425extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, 426 unsigned long size); 427#endif 428 429#ifdef CONFIG_MMU 430 431#ifndef CONFIG_TRANSPARENT_HUGEPAGE 432static inline int pmd_trans_huge(pmd_t pmd) 433{ 434 return 0; 435} 436static inline int pmd_trans_splitting(pmd_t pmd) 437{ 438 return 0; 439} 440#ifndef __HAVE_ARCH_PMD_WRITE 441static inline int pmd_write(pmd_t pmd) 442{ 443 BUG(); 444 return 0; 445} 446#endif /* __HAVE_ARCH_PMD_WRITE */ 447#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 448 449/* 450 * This function is meant to be used by sites walking pagetables with 451 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and 452 * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd 453 * into a null pmd and the transhuge page fault can convert a null pmd 454 * into an hugepmd or into a regular pmd (if the hugepage allocation 455 * fails). While holding the mmap_sem in read mode the pmd becomes 456 * stable and stops changing under us only if it's not null and not a 457 * transhuge pmd. When those races occurs and this function makes a 458 * difference vs the standard pmd_none_or_clear_bad, the result is 459 * undefined so behaving like if the pmd was none is safe (because it 460 * can return none anyway). The compiler level barrier() is critically 461 * important to compute the two checks atomically on the same pmdval. 462 */ 463static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) 464{ 465 /* depend on compiler for an atomic pmd read */ 466 pmd_t pmdval = *pmd; 467 /* 468 * The barrier will stabilize the pmdval in a register or on 469 * the stack so that it will stop changing under the code. 470 */ 471#ifdef CONFIG_TRANSPARENT_HUGEPAGE 472 barrier(); 473#endif 474 if (pmd_none(pmdval)) 475 return 1; 476 if (unlikely(pmd_bad(pmdval))) { 477 if (!pmd_trans_huge(pmdval)) 478 pmd_clear_bad(pmd); 479 return 1; 480 } 481 return 0; 482} 483 484/* 485 * This is a noop if Transparent Hugepage Support is not built into 486 * the kernel. Otherwise it is equivalent to 487 * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in 488 * places that already verified the pmd is not none and they want to 489 * walk ptes while holding the mmap sem in read mode (write mode don't 490 * need this). If THP is not enabled, the pmd can't go away under the 491 * code even if MADV_DONTNEED runs, but if THP is enabled we need to 492 * run a pmd_trans_unstable before walking the ptes after 493 * split_huge_page_pmd returns (because it may have run when the pmd 494 * become null, but then a page fault can map in a THP and not a 495 * regular page). 496 */ 497static inline int pmd_trans_unstable(pmd_t *pmd) 498{ 499#ifdef CONFIG_TRANSPARENT_HUGEPAGE 500 return pmd_none_or_trans_huge_or_clear_bad(pmd); 501#else 502 return 0; 503#endif 504} 505 506#endif /* CONFIG_MMU */ 507 508#endif /* !__ASSEMBLY__ */ 509 510#endif /* _ASM_GENERIC_PGTABLE_H */