Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.13-rc2 422 lines 15 kB view raw
1#ifndef _I386_PGTABLE_H 2#define _I386_PGTABLE_H 3 4#include <linux/config.h> 5 6/* 7 * The Linux memory management assumes a three-level page table setup. On 8 * the i386, we use that, but "fold" the mid level into the top-level page 9 * table, so that we physically have the same two-level page table as the 10 * i386 mmu expects. 11 * 12 * This file contains the functions and defines necessary to modify and use 13 * the i386 page table tree. 14 */ 15#ifndef __ASSEMBLY__ 16#include <asm/processor.h> 17#include <asm/fixmap.h> 18#include <linux/threads.h> 19 20#ifndef _I386_BITOPS_H 21#include <asm/bitops.h> 22#endif 23 24#include <linux/slab.h> 25#include <linux/list.h> 26#include <linux/spinlock.h> 27 28/* 29 * ZERO_PAGE is a global shared page that is always zero: used 30 * for zero-mapped memory areas etc.. 31 */ 32#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 33extern unsigned long empty_zero_page[1024]; 34extern pgd_t swapper_pg_dir[1024]; 35extern kmem_cache_t *pgd_cache; 36extern kmem_cache_t *pmd_cache; 37extern spinlock_t pgd_lock; 38extern struct page *pgd_list; 39 40void pmd_ctor(void *, kmem_cache_t *, unsigned long); 41void pgd_ctor(void *, kmem_cache_t *, unsigned long); 42void pgd_dtor(void *, kmem_cache_t *, unsigned long); 43void pgtable_cache_init(void); 44void paging_init(void); 45 46/* 47 * The Linux x86 paging architecture is 'compile-time dual-mode', it 48 * implements both the traditional 2-level x86 page tables and the 49 * newer 3-level PAE-mode page tables. 50 */ 51#ifdef CONFIG_X86_PAE 52# include <asm/pgtable-3level-defs.h> 53# define PMD_SIZE (1UL << PMD_SHIFT) 54# define PMD_MASK (~(PMD_SIZE-1)) 55#else 56# include <asm/pgtable-2level-defs.h> 57#endif 58 59#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 60#define PGDIR_MASK (~(PGDIR_SIZE-1)) 61 62#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) 63#define FIRST_USER_ADDRESS 0 64 65#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) 66#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) 67 68#define TWOLEVEL_PGDIR_SHIFT 22 69#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) 70#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) 71 72/* Just any arbitrary offset to the start of the vmalloc VM area: the 73 * current 8MB value just means that there will be a 8MB "hole" after the 74 * physical memory until the kernel virtual memory starts. That means that 75 * any out-of-bounds memory accesses will hopefully be caught. 76 * The vmalloc() routines leaves a hole of 4kB between each vmalloced 77 * area for the same reason. ;) 78 */ 79#define VMALLOC_OFFSET (8*1024*1024) 80#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ 81 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) 82#ifdef CONFIG_HIGHMEM 83# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) 84#else 85# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) 86#endif 87 88/* 89 * The 4MB page is guessing.. Detailed in the infamous "Chapter H" 90 * of the Pentium details, but assuming intel did the straightforward 91 * thing, this bit set in the page directory entry just means that 92 * the page directory entry points directly to a 4MB-aligned block of 93 * memory. 94 */ 95#define _PAGE_BIT_PRESENT 0 96#define _PAGE_BIT_RW 1 97#define _PAGE_BIT_USER 2 98#define _PAGE_BIT_PWT 3 99#define _PAGE_BIT_PCD 4 100#define _PAGE_BIT_ACCESSED 5 101#define _PAGE_BIT_DIRTY 6 102#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ 103#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ 104#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ 105#define _PAGE_BIT_UNUSED2 10 106#define _PAGE_BIT_UNUSED3 11 107#define _PAGE_BIT_NX 63 108 109#define _PAGE_PRESENT 0x001 110#define _PAGE_RW 0x002 111#define _PAGE_USER 0x004 112#define _PAGE_PWT 0x008 113#define _PAGE_PCD 0x010 114#define _PAGE_ACCESSED 0x020 115#define _PAGE_DIRTY 0x040 116#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ 117#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ 118#define _PAGE_UNUSED1 0x200 /* available for programmer */ 119#define _PAGE_UNUSED2 0x400 120#define _PAGE_UNUSED3 0x800 121 122#define _PAGE_FILE 0x040 /* set:pagecache unset:swap */ 123#define _PAGE_PROTNONE 0x080 /* If not present */ 124#ifdef CONFIG_X86_PAE 125#define _PAGE_NX (1ULL<<_PAGE_BIT_NX) 126#else 127#define _PAGE_NX 0 128#endif 129 130#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) 131#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 132#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) 133 134#define PAGE_NONE \ 135 __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) 136#define PAGE_SHARED \ 137 __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) 138 139#define PAGE_SHARED_EXEC \ 140 __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) 141#define PAGE_COPY_NOEXEC \ 142 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) 143#define PAGE_COPY_EXEC \ 144 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) 145#define PAGE_COPY \ 146 PAGE_COPY_NOEXEC 147#define PAGE_READONLY \ 148 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) 149#define PAGE_READONLY_EXEC \ 150 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) 151 152#define _PAGE_KERNEL \ 153 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) 154#define _PAGE_KERNEL_EXEC \ 155 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) 156 157extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; 158#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) 159#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) 160#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 161#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 162 163#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) 164#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) 165#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) 166#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) 167#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) 168#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) 169 170/* 171 * The i386 can't do page protection for execute, and considers that 172 * the same are read. Also, write permissions imply read permissions. 173 * This is the closest we can get.. 174 */ 175#define __P000 PAGE_NONE 176#define __P001 PAGE_READONLY 177#define __P010 PAGE_COPY 178#define __P011 PAGE_COPY 179#define __P100 PAGE_READONLY_EXEC 180#define __P101 PAGE_READONLY_EXEC 181#define __P110 PAGE_COPY_EXEC 182#define __P111 PAGE_COPY_EXEC 183 184#define __S000 PAGE_NONE 185#define __S001 PAGE_READONLY 186#define __S010 PAGE_SHARED 187#define __S011 PAGE_SHARED 188#define __S100 PAGE_READONLY_EXEC 189#define __S101 PAGE_READONLY_EXEC 190#define __S110 PAGE_SHARED_EXEC 191#define __S111 PAGE_SHARED_EXEC 192 193/* 194 * Define this if things work differently on an i386 and an i486: 195 * it will (on an i486) warn about kernel memory accesses that are 196 * done without a 'access_ok(VERIFY_WRITE,..)' 197 */ 198#undef TEST_ACCESS_OK 199 200/* The boot page tables (all created as a single array) */ 201extern unsigned long pg0[]; 202 203#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) 204#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 205 206#define pmd_none(x) (!pmd_val(x)) 207#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 208#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 209#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) 210 211 212#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) 213 214/* 215 * The following only work if pte_present() is true. 216 * Undefined behaviour if not.. 217 */ 218static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } 219static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } 220static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } 221static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } 222static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } 223 224/* 225 * The following only works if pte_present() is not true. 226 */ 227static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; } 228 229static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } 230static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } 231static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } 232static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } 233static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } 234static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } 235static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } 236static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } 237static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } 238static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } 239static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PRESENT | _PAGE_PSE; return pte; } 240 241#ifdef CONFIG_X86_PAE 242# include <asm/pgtable-3level.h> 243#else 244# include <asm/pgtable-2level.h> 245#endif 246 247static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 248{ 249 if (!pte_dirty(*ptep)) 250 return 0; 251 return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); 252} 253 254static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 255{ 256 if (!pte_young(*ptep)) 257 return 0; 258 return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); 259} 260 261static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 262{ 263 clear_bit(_PAGE_BIT_RW, &ptep->pte_low); 264} 265 266/* 267 * Macro to mark a page protection value as "uncacheable". On processors which do not support 268 * it, this is a no-op. 269 */ 270#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \ 271 ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot)) 272 273/* 274 * Conversion functions: convert a page and protection to a page entry, 275 * and a page entry and page directory to the page they refer to. 276 */ 277 278#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) 279 280static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 281{ 282 pte.pte_low &= _PAGE_CHG_MASK; 283 pte.pte_low |= pgprot_val(newprot); 284#ifdef CONFIG_X86_PAE 285 /* 286 * Chop off the NX bit (if present), and add the NX portion of 287 * the newprot (if present): 288 */ 289 pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); 290 pte.pte_high |= (pgprot_val(newprot) >> 32) & \ 291 (__supported_pte_mask >> 32); 292#endif 293 return pte; 294} 295 296#define page_pte(page) page_pte_prot(page, __pgprot(0)) 297 298#define pmd_large(pmd) \ 299((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) 300 301/* 302 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] 303 * 304 * this macro returns the index of the entry in the pgd page which would 305 * control the given virtual address 306 */ 307#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) 308#define pgd_index_k(addr) pgd_index(addr) 309 310/* 311 * pgd_offset() returns a (pgd_t *) 312 * pgd_index() is used get the offset into the pgd page's array of pgd_t's; 313 */ 314#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) 315 316/* 317 * a shortcut which implies the use of the kernel's pgd, instead 318 * of a process's 319 */ 320#define pgd_offset_k(address) pgd_offset(&init_mm, address) 321 322/* 323 * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] 324 * 325 * this macro returns the index of the entry in the pmd page which would 326 * control the given virtual address 327 */ 328#define pmd_index(address) \ 329 (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) 330 331/* 332 * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] 333 * 334 * this macro returns the index of the entry in the pte page which would 335 * control the given virtual address 336 */ 337#define pte_index(address) \ 338 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 339#define pte_offset_kernel(dir, address) \ 340 ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) 341 342/* 343 * Helper function that returns the kernel pagetable entry controlling 344 * the virtual address 'address'. NULL means no pagetable entry present. 345 * NOTE: the return type is pte_t but if the pmd is PSE then we return it 346 * as a pte too. 347 */ 348extern pte_t *lookup_address(unsigned long address); 349 350/* 351 * Make a given kernel text page executable/non-executable. 352 * Returns the previous executability setting of that page (which 353 * is used to restore the previous state). Used by the SMP bootup code. 354 * NOTE: this is an __init function for security reasons. 355 */ 356#ifdef CONFIG_X86_PAE 357 extern int set_kernel_exec(unsigned long vaddr, int enable); 358#else 359 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} 360#endif 361 362extern void noexec_setup(const char *str); 363 364#if defined(CONFIG_HIGHPTE) 365#define pte_offset_map(dir, address) \ 366 ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) 367#define pte_offset_map_nested(dir, address) \ 368 ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) 369#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) 370#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) 371#else 372#define pte_offset_map(dir, address) \ 373 ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) 374#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) 375#define pte_unmap(pte) do { } while (0) 376#define pte_unmap_nested(pte) do { } while (0) 377#endif 378 379/* 380 * The i386 doesn't have any external MMU info: the kernel page 381 * tables contain all the necessary information. 382 * 383 * Also, we only update the dirty/accessed state if we set 384 * the dirty bit by hand in the kernel, since the hardware 385 * will do the accessed bit for us, and we don't want to 386 * race with other CPU's that might be updating the dirty 387 * bit at the same time. 388 */ 389#define update_mmu_cache(vma,address,pte) do { } while (0) 390#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 391#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ 392 do { \ 393 if (__dirty) { \ 394 (__ptep)->pte_low = (__entry).pte_low; \ 395 flush_tlb_page(__vma, __address); \ 396 } \ 397 } while (0) 398 399#endif /* !__ASSEMBLY__ */ 400 401#ifdef CONFIG_FLATMEM 402#define kern_addr_valid(addr) (1) 403#endif /* CONFIG_FLATMEM */ 404 405#define io_remap_page_range(vma, vaddr, paddr, size, prot) \ 406 remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) 407 408#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ 409 remap_pfn_range(vma, vaddr, pfn, size, prot) 410 411#define MK_IOSPACE_PFN(space, pfn) (pfn) 412#define GET_IOSPACE(pfn) 0 413#define GET_PFN(pfn) (pfn) 414 415#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 416#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY 417#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 418#define __HAVE_ARCH_PTEP_SET_WRPROTECT 419#define __HAVE_ARCH_PTE_SAME 420#include <asm-generic/pgtable.h> 421 422#endif /* _I386_PGTABLE_H */