commit e28f7faf05159f1cfd564596f5e6178edba6bd49

+1 -1

arch/ppc64/mm/hash_utils.c

··· 302 302 int local = 0; 303 303 cpumask_t tmp; 304 304 305 - if ((ea & ~REGION_MASK) > EADDR_MASK) 305 + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) 306 306 return 1; 307 307 308 308 switch (REGION_ID(ea)) {

+69 -138

arch/ppc64/mm/hugetlbpage.c

··· 27 27 28 28 #include <linux/sysctl.h> 29 29 30 - #define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) 31 - #define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) 32 - #define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) 33 - 34 - #define HUGEPTE_INDEX_SIZE 9 35 - #define HUGEPGD_INDEX_SIZE 10 36 - 37 - #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) 38 - #define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) 39 - 40 - static inline int hugepgd_index(unsigned long addr) 30 + /* Modelled after find_linux_pte() */ 31 + pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 41 32 { 42 - return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; 43 - } 33 + pgd_t *pg; 34 + pud_t *pu; 35 + pmd_t *pm; 36 + pte_t *pt; 44 37 45 - static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) 46 - { 47 - int index; 48 - 49 - if (! mm->context.huge_pgdir) 50 - return NULL; 51 - 52 - 53 - index = hugepgd_index(addr); 54 - BUG_ON(index >= PTRS_PER_HUGEPGD); 55 - return (pud_t *)(mm->context.huge_pgdir + index); 56 - } 57 - 58 - static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) 59 - { 60 - int index; 61 - 62 - if (pud_none(*dir)) 63 - return NULL; 64 - 65 - index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; 66 - return (pte_t *)pud_page(*dir) + index; 67 - } 68 - 69 - static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) 70 - { 71 38 BUG_ON(! in_hugepage_area(mm->context, addr)); 72 39 73 - if (! mm->context.huge_pgdir) { 74 - pgd_t *new; 75 - spin_unlock(&mm->page_table_lock); 76 - /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ 77 - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); 78 - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); 79 - spin_lock(&mm->page_table_lock); 40 + addr &= HPAGE_MASK; 80 41 81 - /* 82 - * Because we dropped the lock, we should re-check the 83 - * entry, as somebody else could have populated it.. 84 - */ 85 - if (mm->context.huge_pgdir) 86 - pgd_free(new); 87 - else 88 - mm->context.huge_pgdir = new; 89 - } 90 - return hugepgd_offset(mm, addr); 91 - } 92 - 93 - static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) 94 - { 95 - if (! pud_present(*dir)) { 96 - pte_t *new; 97 - 98 - spin_unlock(&mm->page_table_lock); 99 - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); 100 - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); 101 - spin_lock(&mm->page_table_lock); 102 - /* 103 - * Because we dropped the lock, we should re-check the 104 - * entry, as somebody else could have populated it.. 105 - */ 106 - if (pud_present(*dir)) { 107 - if (new) 108 - kmem_cache_free(zero_cache, new); 109 - } else { 110 - struct page *ptepage; 111 - 112 - if (! new) 113 - return NULL; 114 - ptepage = virt_to_page(new); 115 - ptepage->mapping = (void *) mm; 116 - ptepage->index = addr & HUGEPGDIR_MASK; 117 - pud_populate(mm, dir, new); 42 + pg = pgd_offset(mm, addr); 43 + if (!pgd_none(*pg)) { 44 + pu = pud_offset(pg, addr); 45 + if (!pud_none(*pu)) { 46 + pm = pmd_offset(pu, addr); 47 + pt = (pte_t *)pm; 48 + BUG_ON(!pmd_none(*pm) 49 + && !(pte_present(*pt) && pte_huge(*pt))); 50 + return pt; 118 51 } 119 52 } 120 53 121 - return hugepte_offset(dir, addr); 122 - } 123 - 124 - pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 125 - { 126 - pud_t *pud; 127 - 128 - BUG_ON(! in_hugepage_area(mm->context, addr)); 129 - 130 - pud = hugepgd_offset(mm, addr); 131 - if (! pud) 132 - return NULL; 133 - 134 - return hugepte_offset(pud, addr); 54 + return NULL; 135 55 } 136 56 137 57 pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 138 58 { 139 - pud_t *pud; 59 + pgd_t *pg; 60 + pud_t *pu; 61 + pmd_t *pm; 62 + pte_t *pt; 140 63 141 64 BUG_ON(! in_hugepage_area(mm->context, addr)); 142 65 143 - pud = hugepgd_alloc(mm, addr); 144 - if (! pud) 145 - return NULL; 66 + addr &= HPAGE_MASK; 146 67 147 - return hugepte_alloc(mm, pud, addr); 68 + pg = pgd_offset(mm, addr); 69 + pu = pud_alloc(mm, pg, addr); 70 + 71 + if (pu) { 72 + pm = pmd_alloc(mm, pu, addr); 73 + if (pm) { 74 + pt = (pte_t *)pm; 75 + BUG_ON(!pmd_none(*pm) 76 + && !(pte_present(*pt) && pte_huge(*pt))); 77 + return pt; 78 + } 79 + } 80 + 81 + return NULL; 82 + } 83 + 84 + #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) 85 + 86 + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 87 + pte_t *ptep, pte_t pte) 88 + { 89 + int i; 90 + 91 + if (pte_present(*ptep)) { 92 + pte_clear(mm, addr, ptep); 93 + flush_tlb_pending(); 94 + } 95 + 96 + for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { 97 + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 98 + ptep++; 99 + } 100 + } 101 + 102 + pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 103 + pte_t *ptep) 104 + { 105 + unsigned long old = pte_update(ptep, ~0UL); 106 + int i; 107 + 108 + if (old & _PAGE_HASHPTE) 109 + hpte_update(mm, addr, old, 0); 110 + 111 + for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) 112 + ptep[i] = __pte(0); 113 + 114 + return __pte(old); 148 115 } 149 116 150 117 /* ··· 506 539 } else { 507 540 return htlb_get_high_area(len); 508 541 } 509 - } 510 - 511 - void hugetlb_mm_free_pgd(struct mm_struct *mm) 512 - { 513 - int i; 514 - pgd_t *pgdir; 515 - 516 - spin_lock(&mm->page_table_lock); 517 - 518 - pgdir = mm->context.huge_pgdir; 519 - if (! pgdir) 520 - goto out; 521 - 522 - mm->context.huge_pgdir = NULL; 523 - 524 - /* cleanup any hugepte pages leftover */ 525 - for (i = 0; i < PTRS_PER_HUGEPGD; i++) { 526 - pud_t *pud = (pud_t *)(pgdir + i); 527 - 528 - if (! pud_none(*pud)) { 529 - pte_t *pte = (pte_t *)pud_page(*pud); 530 - struct page *ptepage = virt_to_page(pte); 531 - 532 - ptepage->mapping = NULL; 533 - 534 - BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); 535 - kmem_cache_free(zero_cache, pte); 536 - } 537 - pud_clear(pud); 538 - } 539 - 540 - BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); 541 - kmem_cache_free(zero_cache, pgdir); 542 - 543 - out: 544 - spin_unlock(&mm->page_table_lock); 545 542 } 546 543 547 544 int hash_huge_page(struct mm_struct *mm, unsigned long access,

+1 -1

arch/ppc64/mm/imalloc.c

··· 31 31 break; 32 32 if ((unsigned long)tmp->addr >= ioremap_bot) 33 33 addr = tmp->size + (unsigned long) tmp->addr; 34 - if (addr > IMALLOC_END-size) 34 + if (addr >= IMALLOC_END-size) 35 35 return 1; 36 36 } 37 37 *im_addr = addr;

+41 -21

arch/ppc64/mm/init.c

··· 66 66 #include <asm/vdso.h> 67 67 #include <asm/imalloc.h> 68 68 69 + #if PGTABLE_RANGE > USER_VSID_RANGE 70 + #warning Limited user VSID range means pagetable space is wasted 71 + #endif 72 + 73 + #if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) 74 + #warning TASK_SIZE is smaller than it needs to be. 75 + #endif 76 + 69 77 int mem_init_done; 70 78 unsigned long ioremap_bot = IMALLOC_BASE; 71 79 static unsigned long phbs_io_bot = PHBS_IO_BASE; ··· 234 226 * Before that, we map using addresses going 235 227 * up from ioremap_bot. imalloc will use 236 228 * the addresses from ioremap_bot through 237 - * IMALLOC_END (0xE000001fffffffff) 229 + * IMALLOC_END 238 230 * 239 231 */ 240 232 pa = addr & PAGE_MASK; ··· 425 417 int index; 426 418 int err; 427 419 428 - #ifdef CONFIG_HUGETLB_PAGE 429 - /* We leave htlb_segs as it was, but for a fork, we need to 430 - * clear the huge_pgdir. */ 431 - mm->context.huge_pgdir = NULL; 432 - #endif 433 - 434 420 again: 435 421 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) 436 422 return -ENOMEM; ··· 455 453 spin_unlock(&mmu_context_lock); 456 454 457 455 mm->context.id = NO_CONTEXT; 458 - 459 - hugetlb_mm_free_pgd(mm); 460 456 } 461 457 462 458 /* ··· 833 833 return virt_addr; 834 834 } 835 835 836 - kmem_cache_t *zero_cache; 837 - 838 - static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) 836 + static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) 839 837 { 840 - memset(pte, 0, PAGE_SIZE); 838 + memset(addr, 0, kmem_cache_size(cache)); 841 839 } 840 + 841 + static const int pgtable_cache_size[2] = { 842 + PTE_TABLE_SIZE, PMD_TABLE_SIZE 843 + }; 844 + static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 845 + "pgd_pte_cache", "pud_pmd_cache", 846 + }; 847 + 848 + kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 842 849 843 850 void pgtable_cache_init(void) 844 851 { 845 - zero_cache = kmem_cache_create("zero", 846 - PAGE_SIZE, 847 - 0, 848 - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, 849 - zero_ctor, 850 - NULL); 851 - if (!zero_cache) 852 - panic("pgtable_cache_init(): could not create zero_cache!\n"); 852 + int i; 853 + 854 + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); 855 + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); 856 + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); 857 + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); 858 + 859 + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { 860 + int size = pgtable_cache_size[i]; 861 + const char *name = pgtable_cache_name[i]; 862 + 863 + pgtable_cache[i] = kmem_cache_create(name, 864 + size, size, 865 + SLAB_HWCACHE_ALIGN 866 + | SLAB_MUST_HWCACHE_ALIGN, 867 + zero_ctor, 868 + NULL); 869 + if (! pgtable_cache[i]) 870 + panic("pgtable_cache_init(): could not create %s!\n", 871 + name); 872 + } 853 873 } 854 874 855 875 pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,

+1 -1

arch/ppc64/mm/slb_low.S

··· 91 91 0: /* user address: proto-VSID = context<<15 | ESID */ 92 92 li r11,SLB_VSID_USER 93 93 94 - srdi. r9,r3,13 94 + srdi. r9,r3,USER_ESID_BITS 95 95 bne- 8f /* invalid ea bits set */ 96 96 97 97 #ifdef CONFIG_HUGETLB_PAGE

+55 -40

arch/ppc64/mm/tlb.c

··· 41 41 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); 42 42 unsigned long pte_freelist_forced_free; 43 43 44 - void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) 44 + struct pte_freelist_batch 45 + { 46 + struct rcu_head rcu; 47 + unsigned int index; 48 + pgtable_free_t tables[0]; 49 + }; 50 + 51 + DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); 52 + unsigned long pte_freelist_forced_free; 53 + 54 + #define PTE_FREELIST_SIZE \ 55 + ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ 56 + / sizeof(pgtable_free_t)) 57 + 58 + #ifdef CONFIG_SMP 59 + static void pte_free_smp_sync(void *arg) 60 + { 61 + /* Do nothing, just ensure we sync with all CPUs */ 62 + } 63 + #endif 64 + 65 + /* This is only called when we are critically out of memory 66 + * (and fail to get a page in pte_free_tlb). 67 + */ 68 + static void pgtable_free_now(pgtable_free_t pgf) 69 + { 70 + pte_freelist_forced_free++; 71 + 72 + smp_call_function(pte_free_smp_sync, NULL, 0, 1); 73 + 74 + pgtable_free(pgf); 75 + } 76 + 77 + static void pte_free_rcu_callback(struct rcu_head *head) 78 + { 79 + struct pte_freelist_batch *batch = 80 + container_of(head, struct pte_freelist_batch, rcu); 81 + unsigned int i; 82 + 83 + for (i = 0; i < batch->index; i++) 84 + pgtable_free(batch->tables[i]); 85 + 86 + free_page((unsigned long)batch); 87 + } 88 + 89 + static void pte_free_submit(struct pte_freelist_batch *batch) 90 + { 91 + INIT_RCU_HEAD(&batch->rcu); 92 + call_rcu(&batch->rcu, pte_free_rcu_callback); 93 + } 94 + 95 + void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) 45 96 { 46 97 /* This is safe as we are holding page_table_lock */ 47 98 cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); ··· 100 49 101 50 if (atomic_read(&tlb->mm->mm_users) < 2 || 102 51 cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { 103 - pte_free(ptepage); 52 + pgtable_free(pgf); 104 53 return; 105 54 } 106 55 107 56 if (*batchp == NULL) { 108 57 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); 109 58 if (*batchp == NULL) { 110 - pte_free_now(ptepage); 59 + pgtable_free_now(pgf); 111 60 return; 112 61 } 113 62 (*batchp)->index = 0; 114 63 } 115 - (*batchp)->pages[(*batchp)->index++] = ptepage; 64 + (*batchp)->tables[(*batchp)->index++] = pgf; 116 65 if ((*batchp)->index == PTE_FREELIST_SIZE) { 117 66 pte_free_submit(*batchp); 118 67 *batchp = NULL; ··· 181 130 flush_hash_range(batch->context, i, local); 182 131 batch->index = 0; 183 132 put_cpu(); 184 - } 185 - 186 - #ifdef CONFIG_SMP 187 - static void pte_free_smp_sync(void *arg) 188 - { 189 - /* Do nothing, just ensure we sync with all CPUs */ 190 - } 191 - #endif 192 - 193 - /* This is only called when we are critically out of memory 194 - * (and fail to get a page in pte_free_tlb). 195 - */ 196 - void pte_free_now(struct page *ptepage) 197 - { 198 - pte_freelist_forced_free++; 199 - 200 - smp_call_function(pte_free_smp_sync, NULL, 0, 1); 201 - 202 - pte_free(ptepage); 203 - } 204 - 205 - static void pte_free_rcu_callback(struct rcu_head *head) 206 - { 207 - struct pte_freelist_batch *batch = 208 - container_of(head, struct pte_freelist_batch, rcu); 209 - unsigned int i; 210 - 211 - for (i = 0; i < batch->index; i++) 212 - pte_free(batch->pages[i]); 213 - free_page((unsigned long)batch); 214 - } 215 - 216 - void pte_free_submit(struct pte_freelist_batch *batch) 217 - { 218 - INIT_RCU_HEAD(&batch->rcu); 219 - call_rcu(&batch->rcu, pte_free_rcu_callback); 220 133 } 221 134 222 135 void pte_free_finish(void)

+1 -1

include/asm-ppc64/imalloc.h

··· 6 6 */ 7 7 #define PHBS_IO_BASE VMALLOC_END 8 8 #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ 9 - #define IMALLOC_END (VMALLOC_START + EADDR_MASK) 9 + #define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) 10 10 11 11 12 12 /* imalloc region types */

+4 -3

include/asm-ppc64/mmu.h

··· 259 259 #define VSID_BITS 36 260 260 #define VSID_MODULUS ((1UL<<VSID_BITS)-1) 261 261 262 - #define CONTEXT_BITS 20 263 - #define USER_ESID_BITS 15 262 + #define CONTEXT_BITS 19 263 + #define USER_ESID_BITS 16 264 + 265 + #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) 264 266 265 267 /* 266 268 * This macro generates asm code to compute the VSID scramble ··· 304 302 typedef struct { 305 303 mm_context_id_t id; 306 304 #ifdef CONFIG_HUGETLB_PAGE 307 - pgd_t *huge_pgdir; 308 305 u16 htlb_segs; /* bitmask */ 309 306 #endif 310 307 } mm_context_t;

+15 -11

include/asm-ppc64/page.h

··· 46 46 47 47 #define ARCH_HAS_HUGEPAGE_ONLY_RANGE 48 48 #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE 49 + #define ARCH_HAS_SETCLEAR_HUGE_PTE 49 50 50 51 #define touches_hugepage_low_range(mm, addr, len) \ 51 52 (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) ··· 126 125 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. 127 126 */ 128 127 typedef struct { unsigned long pte; } pte_t; 129 - typedef struct { unsigned int pmd; } pmd_t; 130 - typedef struct { unsigned int pgd; } pgd_t; 128 + typedef struct { unsigned long pmd; } pmd_t; 129 + typedef struct { unsigned long pud; } pud_t; 130 + typedef struct { unsigned long pgd; } pgd_t; 131 131 typedef struct { unsigned long pgprot; } pgprot_t; 132 132 133 133 #define pte_val(x) ((x).pte) 134 134 #define pmd_val(x) ((x).pmd) 135 + #define pud_val(x) ((x).pud) 135 136 #define pgd_val(x) ((x).pgd) 136 137 #define pgprot_val(x) ((x).pgprot) 137 138 138 - #define __pte(x) ((pte_t) { (x) } ) 139 - #define __pmd(x) ((pmd_t) { (x) } ) 140 - #define __pgd(x) ((pgd_t) { (x) } ) 141 - #define __pgprot(x) ((pgprot_t) { (x) } ) 139 + #define __pte(x) ((pte_t) { (x) }) 140 + #define __pmd(x) ((pmd_t) { (x) }) 141 + #define __pud(x) ((pud_t) { (x) }) 142 + #define __pgd(x) ((pgd_t) { (x) }) 143 + #define __pgprot(x) ((pgprot_t) { (x) }) 142 144 143 145 #else 144 146 /* 145 147 * .. while these make it easier on the compiler 146 148 */ 147 149 typedef unsigned long pte_t; 148 - typedef unsigned int pmd_t; 149 - typedef unsigned int pgd_t; 150 + typedef unsigned long pmd_t; 151 + typedef unsigned long pud_t; 152 + typedef unsigned long pgd_t; 150 153 typedef unsigned long pgprot_t; 151 154 152 155 #define pte_val(x) (x) 153 156 #define pmd_val(x) (x) 157 + #define pud_val(x) (x) 154 158 #define pgd_val(x) (x) 155 159 #define pgprot_val(x) (x) 156 160 157 161 #define __pte(x) (x) 158 162 #define __pmd(x) (x) 163 + #define __pud(x) (x) 159 164 #define __pgd(x) (x) 160 165 #define __pgprot(x) (x) 161 166 ··· 214 207 #define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) 215 208 #define USER_REGION_ID (0UL) 216 209 #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) 217 - 218 - #define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) 219 - #define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) 220 210 221 211 #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) 222 212

+61 -32

include/asm-ppc64/pgalloc.h

··· 6 6 #include <linux/cpumask.h> 7 7 #include <linux/percpu.h> 8 8 9 - extern kmem_cache_t *zero_cache; 9 + extern kmem_cache_t *pgtable_cache[]; 10 + 11 + #define PTE_CACHE_NUM 0 12 + #define PMD_CACHE_NUM 1 13 + #define PUD_CACHE_NUM 1 14 + #define PGD_CACHE_NUM 0 10 15 11 16 /* 12 17 * This program is free software; you can redistribute it and/or ··· 20 15 * 2 of the License, or (at your option) any later version. 21 16 */ 22 17 23 - static inline pgd_t * 24 - pgd_alloc(struct mm_struct *mm) 18 + static inline pgd_t *pgd_alloc(struct mm_struct *mm) 25 19 { 26 - return kmem_cache_alloc(zero_cache, GFP_KERNEL); 20 + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); 27 21 } 28 22 29 - static inline void 30 - pgd_free(pgd_t *pgd) 23 + static inline void pgd_free(pgd_t *pgd) 31 24 { 32 - kmem_cache_free(zero_cache, pgd); 25 + kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); 26 + } 27 + 28 + #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) 29 + 30 + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 31 + { 32 + return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], 33 + GFP_KERNEL|__GFP_REPEAT); 34 + } 35 + 36 + static inline void pud_free(pud_t *pud) 37 + { 38 + kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); 33 39 } 34 40 35 41 #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) 36 42 37 - static inline pmd_t * 38 - pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 43 + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 39 44 { 40 - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 45 + return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], 46 + GFP_KERNEL|__GFP_REPEAT); 41 47 } 42 48 43 - static inline void 44 - pmd_free(pmd_t *pmd) 49 + static inline void pmd_free(pmd_t *pmd) 45 50 { 46 - kmem_cache_free(zero_cache, pmd); 51 + kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); 47 52 } 48 53 49 54 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) ··· 62 47 63 48 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 64 49 { 65 - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 50 + return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], 51 + GFP_KERNEL|__GFP_REPEAT); 66 52 } 67 53 68 54 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 69 55 { 70 - pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); 71 - if (pte) 72 - return virt_to_page(pte); 73 - return NULL; 56 + return virt_to_page(pte_alloc_one_kernel(mm, address)); 74 57 } 75 58 76 59 static inline void pte_free_kernel(pte_t *pte) 77 60 { 78 - kmem_cache_free(zero_cache, pte); 61 + kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); 79 62 } 80 63 81 64 static inline void pte_free(struct page *ptepage) 82 65 { 83 - kmem_cache_free(zero_cache, page_address(ptepage)); 66 + pte_free_kernel(page_address(ptepage)); 84 67 } 85 68 86 - struct pte_freelist_batch 69 + #define PGF_CACHENUM_MASK 0xf 70 + 71 + typedef struct pgtable_free { 72 + unsigned long val; 73 + } pgtable_free_t; 74 + 75 + static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, 76 + unsigned long mask) 87 77 { 88 - struct rcu_head rcu; 89 - unsigned int index; 90 - struct page * pages[0]; 91 - }; 78 + BUG_ON(cachenum > PGF_CACHENUM_MASK); 92 79 93 - #define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ 94 - sizeof(struct page *)) 80 + return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; 81 + } 95 82 96 - extern void pte_free_now(struct page *ptepage); 97 - extern void pte_free_submit(struct pte_freelist_batch *batch); 83 + static inline void pgtable_free(pgtable_free_t pgf) 84 + { 85 + void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); 86 + int cachenum = pgf.val & PGF_CACHENUM_MASK; 98 87 99 - DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); 88 + kmem_cache_free(pgtable_cache[cachenum], p); 89 + } 100 90 101 - void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); 102 - #define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) 91 + void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 92 + 93 + #define __pte_free_tlb(tlb, ptepage) \ 94 + pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ 95 + PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) 96 + #define __pmd_free_tlb(tlb, pmd) \ 97 + pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 98 + PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) 99 + #define __pud_free_tlb(tlb, pmd) \ 100 + pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ 101 + PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) 103 102 104 103 #define check_pgt_cache() do { } while (0) 105 104

+54 -38

include/asm-ppc64/pgtable.h

··· 15 15 #include <asm/tlbflush.h> 16 16 #endif /* __ASSEMBLY__ */ 17 17 18 - #include <asm-generic/pgtable-nopud.h> 19 - 20 18 /* 21 19 * Entries per page directory level. The PTE level must use a 64b record 22 20 * for each page table entry. The PMD and PGD level use a 32b record for 23 21 * each entry by assuming that each entry is page aligned. 24 22 */ 25 23 #define PTE_INDEX_SIZE 9 26 - #define PMD_INDEX_SIZE 10 27 - #define PGD_INDEX_SIZE 10 24 + #define PMD_INDEX_SIZE 7 25 + #define PUD_INDEX_SIZE 7 26 + #define PGD_INDEX_SIZE 9 27 + 28 + #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) 29 + #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) 30 + #define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) 31 + #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) 28 32 29 33 #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) 30 34 #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) 35 + #define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) 31 36 #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) 32 37 33 38 /* PMD_SHIFT determines what a second-level page table entry can map */ ··· 40 35 #define PMD_SIZE (1UL << PMD_SHIFT) 41 36 #define PMD_MASK (~(PMD_SIZE-1)) 42 37 43 - /* PGDIR_SHIFT determines what a third-level page table entry can map */ 44 - #define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 38 + /* PUD_SHIFT determines what a third-level page table entry can map */ 39 + #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) 40 + #define PUD_SIZE (1UL << PUD_SHIFT) 41 + #define PUD_MASK (~(PUD_SIZE-1)) 42 + 43 + /* PGDIR_SHIFT determines what a fourth-level page table entry can map */ 44 + #define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) 45 45 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) 46 46 #define PGDIR_MASK (~(PGDIR_SIZE-1)) 47 47 ··· 55 45 /* 56 46 * Size of EA range mapped by our pagetables. 57 47 */ 58 - #define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ 59 - PGD_INDEX_SIZE + PAGE_SHIFT) 60 - #define EADDR_MASK ((1UL << EADDR_SIZE) - 1) 48 + #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ 49 + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) 50 + #define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) 51 + 52 + #if TASK_SIZE_USER64 > PGTABLE_RANGE 53 + #error TASK_SIZE_USER64 exceeds pagetable range 54 + #endif 55 + 56 + #if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) 57 + #error TASK_SIZE_USER64 exceeds user VSID range 58 + #endif 61 59 62 60 /* 63 61 * Define the address range of the vmalloc VM area. 64 62 */ 65 63 #define VMALLOC_START (0xD000000000000000ul) 66 - #define VMALLOC_SIZE (0x10000000000UL) 64 + #define VMALLOC_SIZE (0x80000000000UL) 67 65 #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) 68 66 69 67 /* ··· 172 154 #ifndef __ASSEMBLY__ 173 155 int hash_huge_page(struct mm_struct *mm, unsigned long access, 174 156 unsigned long ea, unsigned long vsid, int local); 175 - 176 - void hugetlb_mm_free_pgd(struct mm_struct *mm); 177 157 #endif /* __ASSEMBLY__ */ 178 158 179 159 #define HAVE_ARCH_UNMAPPED_AREA ··· 179 163 #else 180 164 181 165 #define hash_huge_page(mm,a,ea,vsid,local) -1 182 - #define hugetlb_mm_free_pgd(mm) do {} while (0) 183 166 184 167 #endif 185 168 ··· 212 197 #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) 213 198 #define pte_page(x) pfn_to_page(pte_pfn(x)) 214 199 215 - #define pmd_set(pmdp, ptep) \ 216 - (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) 200 + #define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) 217 201 #define pmd_none(pmd) (!pmd_val(pmd)) 218 202 #define pmd_bad(pmd) (pmd_val(pmd) == 0) 219 203 #define pmd_present(pmd) (pmd_val(pmd) != 0) 220 204 #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 221 - #define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) 205 + #define pmd_page_kernel(pmd) (pmd_val(pmd)) 222 206 #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) 223 207 224 - #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) 208 + #define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) 225 209 #define pud_none(pud) (!pud_val(pud)) 226 - #define pud_bad(pud) ((pud_val(pud)) == 0UL) 227 - #define pud_present(pud) (pud_val(pud) != 0UL) 228 - #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) 229 - #define pud_page(pud) (__bpn_to_ba(pud_val(pud))) 210 + #define pud_bad(pud) ((pud_val(pud)) == 0) 211 + #define pud_present(pud) (pud_val(pud) != 0) 212 + #define pud_clear(pudp) (pud_val(*(pudp)) = 0) 213 + #define pud_page(pud) (pud_val(pud)) 214 + 215 + #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) 216 + #define pgd_none(pgd) (!pgd_val(pgd)) 217 + #define pgd_bad(pgd) (pgd_val(pgd) == 0) 218 + #define pgd_present(pgd) (pgd_val(pgd) != 0) 219 + #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) 220 + #define pgd_page(pgd) (pgd_val(pgd)) 230 221 231 222 /* 232 223 * Find an entry in a page-table-directory. We combine the address region 233 224 * (the high order N bits) and the pgd portion of the address. 234 225 */ 235 226 /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ 236 - #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) 227 + #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) 237 228 238 229 #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) 239 230 240 - /* Find an entry in the second-level page table.. */ 241 - #define pmd_offset(pudp,addr) \ 242 - ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 231 + #define pud_offset(pgdp, addr) \ 232 + (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) 243 233 244 - /* Find an entry in the third-level page table.. */ 234 + #define pmd_offset(pudp,addr) \ 235 + (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 236 + 245 237 #define pte_offset_kernel(dir,addr) \ 246 - ((pte_t *) pmd_page_kernel(*(dir)) \ 247 - + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) 238 + (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) 248 239 249 240 #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) 250 241 #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) ··· 479 458 #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) 480 459 481 460 #define pmd_ERROR(e) \ 482 - printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) 461 + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) 462 + #define pud_ERROR(e) \ 463 + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) 483 464 #define pgd_ERROR(e) \ 484 - printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) 465 + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 485 466 486 467 extern pgd_t swapper_pg_dir[]; 487 468 488 469 extern void paging_init(void); 489 470 490 - /* 491 - * Because the huge pgtables are only 2 level, they can take 492 - * at most around 4M, much less than one hugepage which the 493 - * process is presumably entitled to use. So we don't bother 494 - * freeing up the pagetables on unmap, and wait until 495 - * destroy_context() to clean up the lot. 496 - */ 497 471 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 498 - do { } while (0) 472 + free_pgd_range(tlb, addr, end, floor, ceiling) 499 473 500 474 /* 501 475 * This gets called at the end of handling a page fault, when

+2 -2

include/asm-ppc64/processor.h

··· 382 382 extern struct task_struct *last_task_used_math; 383 383 extern struct task_struct *last_task_used_altivec; 384 384 385 - /* 64-bit user address space is 41-bits (2TBs user VM) */ 386 - #define TASK_SIZE_USER64 (0x0000020000000000UL) 385 + /* 64-bit user address space is 44-bits (16TB user VM) */ 386 + #define TASK_SIZE_USER64 (0x0000100000000000UL) 387 387 388 388 /* 389 389 * 32-bit user address space is 4GB - 1 page