Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Hugetlb for BookE

Enable hugepages on Freescale BookE processors. This allows the kernel to
use huge TLB entries to map pages, which can greatly reduce the number of
TLB misses and the amount of TLB thrashing experienced by applications with
large memory footprints. Care should be taken when using this on FSL
processors, as the number of large TLB entries supported by the core is low
(16-64) on current processors.

The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g.
Page sizes larger than the max zone size are called "gigantic" pages and
must be allocated on the command line (and cannot be deallocated).

This is currently only fully implemented for Freescale 32-bit BookE
processors, but there is some infrastructure in the code for
64-bit BooKE.

Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Becky Bruce and committed by
Benjamin Herrenschmidt
41151e77 7df5659e

+765 -105
+1 -2
arch/powerpc/Kconfig
··· 429 429 def_bool y 430 430 431 431 config SYS_SUPPORTS_HUGETLBFS 432 - def_bool y 433 - depends on PPC_BOOK3S_64 432 + bool 434 433 435 434 source "mm/Kconfig" 436 435
+61 -2
arch/powerpc/include/asm/hugetlb.h
··· 1 1 #ifndef _ASM_POWERPC_HUGETLB_H 2 2 #define _ASM_POWERPC_HUGETLB_H 3 3 4 + #ifdef CONFIG_HUGETLB_PAGE 4 5 #include <asm/page.h> 6 + 7 + extern struct kmem_cache *hugepte_cache; 8 + extern void __init reserve_hugetlb_gpages(void); 9 + 10 + static inline pte_t *hugepd_page(hugepd_t hpd) 11 + { 12 + BUG_ON(!hugepd_ok(hpd)); 13 + return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); 14 + } 15 + 16 + static inline unsigned int hugepd_shift(hugepd_t hpd) 17 + { 18 + return hpd.pd & HUGEPD_SHIFT_MASK; 19 + } 20 + 21 + static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, 22 + unsigned pdshift) 23 + { 24 + /* 25 + * On 32-bit, we have multiple higher-level table entries that point to 26 + * the same hugepte. Just use the first one since they're all 27 + * identical. So for that case, idx=0. 28 + */ 29 + unsigned long idx = 0; 30 + 31 + pte_t *dir = hugepd_page(*hpdp); 32 + #ifdef CONFIG_PPC64 33 + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); 34 + #endif 35 + 36 + return dir + idx; 37 + } 5 38 6 39 pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, 7 40 unsigned long addr, unsigned *shift); 8 41 9 42 void flush_dcache_icache_hugepage(struct page *page); 10 43 44 + #if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT) 11 45 int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 12 46 unsigned long len); 47 + #else 48 + static inline int is_hugepage_only_range(struct mm_struct *mm, 49 + unsigned long addr, 50 + unsigned long len) 51 + { 52 + return 0; 53 + } 54 + #endif 55 + 56 + void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte); 57 + void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); 13 58 14 59 void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, 15 60 unsigned long end, unsigned long floor, ··· 95 50 static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 96 51 unsigned long addr, pte_t *ptep) 97 52 { 98 - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); 99 - return __pte(old); 53 + #ifdef CONFIG_PPC64 54 + return __pte(pte_update(mm, addr, ptep, ~0UL, 1)); 55 + #else 56 + return __pte(pte_update(ptep, ~0UL, 0)); 57 + #endif 100 58 } 101 59 102 60 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, ··· 140 92 static inline void arch_release_hugepage(struct page *page) 141 93 { 142 94 } 95 + 96 + #else /* ! CONFIG_HUGETLB_PAGE */ 97 + static inline void reserve_hugetlb_gpages(void) 98 + { 99 + pr_err("Cannot reserve gpages without hugetlb enabled\n"); 100 + } 101 + static inline void flush_hugetlb_page(struct vm_area_struct *vma, 102 + unsigned long vmaddr) 103 + { 104 + } 105 + #endif 143 106 144 107 #endif /* _ASM_POWERPC_HUGETLB_H */
+7
arch/powerpc/include/asm/mmu-book3e.h
··· 66 66 #define MAS2_M 0x00000004 67 67 #define MAS2_G 0x00000002 68 68 #define MAS2_E 0x00000001 69 + #define MAS2_WIMGE_MASK 0x0000001f 69 70 #define MAS2_EPN_MASK(size) (~0 << (size + 10)) 70 71 #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) 71 72 ··· 81 80 #define MAS3_SW 0x00000004 82 81 #define MAS3_UR 0x00000002 83 82 #define MAS3_SR 0x00000001 83 + #define MAS3_BAP_MASK 0x0000003f 84 84 #define MAS3_SPSIZE 0x0000003e 85 85 #define MAS3_SPSIZE_SHIFT 1 86 86 ··· 214 212 unsigned int id; 215 213 unsigned int active; 216 214 unsigned long vdso_base; 215 + #ifdef CONFIG_PPC_MM_SLICES 216 + u64 low_slices_psize; /* SLB page size encodings */ 217 + u64 high_slices_psize; /* 4 bits per slice for now */ 218 + u16 user_psize; /* page size index */ 219 + #endif 217 220 } mm_context_t; 218 221 219 222 /* Page size definitions, common between 32 and 64-bit
+1 -2
arch/powerpc/include/asm/mmu-hash64.h
··· 262 262 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 263 263 unsigned long pstart, unsigned long prot, 264 264 int psize, int ssize); 265 - extern void add_gpage(unsigned long addr, unsigned long page_size, 266 - unsigned long number_of_pages); 265 + extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); 267 266 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); 268 267 269 268 extern void hpte_init_native(void);
+9 -7
arch/powerpc/include/asm/mmu.h
··· 175 175 #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ 176 176 #define MMU_PAGE_256K 4 177 177 #define MMU_PAGE_1M 5 178 - #define MMU_PAGE_8M 6 179 - #define MMU_PAGE_16M 7 180 - #define MMU_PAGE_256M 8 181 - #define MMU_PAGE_1G 9 182 - #define MMU_PAGE_16G 10 183 - #define MMU_PAGE_64G 11 184 - #define MMU_PAGE_COUNT 12 178 + #define MMU_PAGE_4M 6 179 + #define MMU_PAGE_8M 7 180 + #define MMU_PAGE_16M 8 181 + #define MMU_PAGE_64M 9 182 + #define MMU_PAGE_256M 10 183 + #define MMU_PAGE_1G 11 184 + #define MMU_PAGE_16G 12 185 + #define MMU_PAGE_64G 13 185 186 187 + #define MMU_PAGE_COUNT 14 186 188 187 189 #if defined(CONFIG_PPC_STD_MMU_64) 188 190 /* 64-bit classic hash table MMU */
+30 -1
arch/powerpc/include/asm/page.h
··· 36 36 37 37 #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 38 38 39 + #ifndef __ASSEMBLY__ 40 + #ifdef CONFIG_HUGETLB_PAGE 41 + extern unsigned int HPAGE_SHIFT; 42 + #else 43 + #define HPAGE_SHIFT PAGE_SHIFT 44 + #endif 45 + #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 46 + #define HPAGE_MASK (~(HPAGE_SIZE - 1)) 47 + #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 48 + #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) 49 + #endif 50 + 39 51 /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ 40 52 #define __HAVE_ARCH_GATE_AREA 1 41 53 ··· 170 158 #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) 171 159 #endif 172 160 161 + /* 162 + * Use the top bit of the higher-level page table entries to indicate whether 163 + * the entries we point to contain hugepages. This works because we know that 164 + * the page tables live in kernel space. If we ever decide to support having 165 + * page tables at arbitrary addresses, this breaks and will have to change. 166 + */ 167 + #ifdef CONFIG_PPC64 168 + #define PD_HUGE 0x8000000000000000 169 + #else 170 + #define PD_HUGE 0x80000000 171 + #endif 172 + 173 + /* 174 + * Some number of bits at the level of the page table that points to 175 + * a hugepte are used to encode the size. This masks those bits. 176 + */ 177 + #define HUGEPD_SHIFT_MASK 0x3f 178 + 173 179 #ifndef __ASSEMBLY__ 174 180 175 181 #undef STRICT_MM_TYPECHECKS ··· 273 243 #endif 274 244 275 245 typedef struct { signed long pd; } hugepd_t; 276 - #define HUGEPD_SHIFT_MASK 0x3f 277 246 278 247 #ifdef CONFIG_HUGETLB_PAGE 279 248 static inline int hugepd_ok(hugepd_t hpd)
-11
arch/powerpc/include/asm/page_64.h
··· 64 64 /* Log 2 of page table size */ 65 65 extern u64 ppc64_pft_size; 66 66 67 - /* Large pages size */ 68 - #ifdef CONFIG_HUGETLB_PAGE 69 - extern unsigned int HPAGE_SHIFT; 70 - #else 71 - #define HPAGE_SHIFT PAGE_SHIFT 72 - #endif 73 - #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 74 - #define HPAGE_MASK (~(HPAGE_SIZE - 1)) 75 - #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 76 - #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) 77 - 78 67 #endif /* __ASSEMBLY__ */ 79 68 80 69 #ifdef CONFIG_PPC_MM_SLICES
+3
arch/powerpc/include/asm/pte-book3e.h
··· 72 72 #define PTE_RPN_SHIFT (24) 73 73 #endif 74 74 75 + #define PTE_WIMGE_SHIFT (19) 76 + #define PTE_BAP_SHIFT (2) 77 + 75 78 /* On 32-bit, we never clear the top part of the PTE */ 76 79 #ifdef CONFIG_PPC32 77 80 #define _PTE_NONE_MASK 0xffffffff00000000ULL
+114 -19
arch/powerpc/kernel/head_fsl_booke.S
··· 236 236 * if we find the pte (fall through): 237 237 * r11 is low pte word 238 238 * r12 is pointer to the pte 239 + * r10 is the pshift from the PGD, if we're a hugepage 239 240 */ 240 241 #ifdef CONFIG_PTE_64BIT 242 + #ifdef CONFIG_HUGETLB_PAGE 243 + #define FIND_PTE \ 244 + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ 245 + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ 246 + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ 247 + blt 1000f; /* Normal non-huge page */ \ 248 + beq 2f; /* Bail if no table */ \ 249 + oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ 250 + andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ 251 + xor r12, r10, r11; /* drop size bits from pointer */ \ 252 + b 1001f; \ 253 + 1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ 254 + li r10, 0; /* clear r10 */ \ 255 + 1001: lwz r11, 4(r12); /* Get pte entry */ 256 + #else 241 257 #define FIND_PTE \ 242 258 rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ 243 259 lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ ··· 261 245 beq 2f; /* Bail if no table */ \ 262 246 rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ 263 247 lwz r11, 4(r12); /* Get pte entry */ 264 - #else 248 + #endif /* HUGEPAGE */ 249 + #else /* !PTE_64BIT */ 265 250 #define FIND_PTE \ 266 251 rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ 267 252 lwz r11, 0(r11); /* Get L1 entry */ \ ··· 419 402 420 403 #ifdef CONFIG_PTE_64BIT 421 404 #ifdef CONFIG_SMP 422 - subf r10,r11,r12 /* create false data dep */ 423 - lwzx r13,r11,r10 /* Get upper pte bits */ 405 + subf r13,r11,r12 /* create false data dep */ 406 + lwzx r13,r11,r13 /* Get upper pte bits */ 424 407 #else 425 408 lwz r13,0(r12) /* Get upper pte bits */ 426 409 #endif ··· 500 483 501 484 #ifdef CONFIG_PTE_64BIT 502 485 #ifdef CONFIG_SMP 503 - subf r10,r11,r12 /* create false data dep */ 504 - lwzx r13,r11,r10 /* Get upper pte bits */ 486 + subf r13,r11,r12 /* create false data dep */ 487 + lwzx r13,r11,r13 /* Get upper pte bits */ 505 488 #else 506 489 lwz r13,0(r12) /* Get upper pte bits */ 507 490 #endif ··· 565 548 /* 566 549 * Both the instruction and data TLB miss get to this 567 550 * point to load the TLB. 568 - * r10 - available to use 551 + * r10 - tsize encoding (if HUGETLB_PAGE) or available to use 569 552 * r11 - TLB (info from Linux PTE) 570 553 * r12 - available to use 571 554 * r13 - upper bits of PTE (if PTE_64BIT) or available to use ··· 575 558 * Upon exit, we reload everything and RFI. 576 559 */ 577 560 finish_tlb_load: 561 + #ifdef CONFIG_HUGETLB_PAGE 562 + cmpwi 6, r10, 0 /* check for huge page */ 563 + beq 6, finish_tlb_load_cont /* !huge */ 564 + 565 + /* Alas, we need more scratch registers for hugepages */ 566 + mfspr r12, SPRN_SPRG_THREAD 567 + stw r14, THREAD_NORMSAVE(4)(r12) 568 + stw r15, THREAD_NORMSAVE(5)(r12) 569 + stw r16, THREAD_NORMSAVE(6)(r12) 570 + stw r17, THREAD_NORMSAVE(7)(r12) 571 + 572 + /* Get the next_tlbcam_idx percpu var */ 573 + #ifdef CONFIG_SMP 574 + lwz r12, THREAD_INFO-THREAD(r12) 575 + lwz r15, TI_CPU(r12) 576 + lis r14, __per_cpu_offset@h 577 + ori r14, r14, __per_cpu_offset@l 578 + rlwinm r15, r15, 2, 0, 29 579 + lwzx r16, r14, r15 580 + #else 581 + li r16, 0 582 + #endif 583 + lis r17, next_tlbcam_idx@h 584 + ori r17, r17, next_tlbcam_idx@l 585 + add r17, r17, r16 /* r17 = *next_tlbcam_idx */ 586 + lwz r15, 0(r17) /* r15 = next_tlbcam_idx */ 587 + 588 + lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */ 589 + rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */ 590 + mtspr SPRN_MAS0, r14 591 + 592 + /* Extract TLB1CFG(NENTRY) */ 593 + mfspr r16, SPRN_TLB1CFG 594 + andi. r16, r16, 0xfff 595 + 596 + /* Update next_tlbcam_idx, wrapping when necessary */ 597 + addi r15, r15, 1 598 + cmpw r15, r16 599 + blt 100f 600 + lis r14, tlbcam_index@h 601 + ori r14, r14, tlbcam_index@l 602 + lwz r15, 0(r14) 603 + 100: stw r15, 0(r17) 604 + 605 + /* 606 + * Calc MAS1_TSIZE from r10 (which has pshift encoded) 607 + * tlb_enc = (pshift - 10). 608 + */ 609 + subi r15, r10, 10 610 + mfspr r16, SPRN_MAS1 611 + rlwimi r16, r15, 7, 20, 24 612 + mtspr SPRN_MAS1, r16 613 + 614 + /* copy the pshift for use later */ 615 + mr r14, r10 616 + 617 + /* fall through */ 618 + 619 + #endif /* CONFIG_HUGETLB_PAGE */ 620 + 578 621 /* 579 622 * We set execute, because we don't have the granularity to 580 623 * properly set this at the page level (Linux problem). 581 624 * Many of these bits are software only. Bits we don't set 582 625 * here we (properly should) assume have the appropriate value. 583 626 */ 584 - 585 - mfspr r12, SPRN_MAS2 586 - #ifdef CONFIG_PTE_64BIT 587 - rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ 588 - #else 589 - rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ 590 - #endif 591 - mtspr SPRN_MAS2, r12 592 - 627 + finish_tlb_load_cont: 593 628 #ifdef CONFIG_PTE_64BIT 594 629 rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ 595 630 andi. r10, r11, _PAGE_DIRTY ··· 650 581 andc r12, r12, r10 651 582 1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ 652 583 rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ 653 - mtspr SPRN_MAS3, r12 584 + 2: mtspr SPRN_MAS3, r12 654 585 BEGIN_MMU_FTR_SECTION 655 586 srwi r10, r13, 12 /* grab RPN[12:31] */ 656 587 mtspr SPRN_MAS7, r10 657 588 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) 658 589 #else 659 590 li r10, (_PAGE_EXEC | _PAGE_PRESENT) 591 + mr r13, r11 660 592 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ 661 593 and r12, r11, r10 662 594 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ 663 595 slwi r10, r12, 1 664 596 or r10, r10, r12 665 597 iseleq r12, r12, r10 666 - rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ 667 - mtspr SPRN_MAS3, r11 598 + rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ 599 + mtspr SPRN_MAS3, r13 668 600 #endif 601 + 602 + mfspr r12, SPRN_MAS2 603 + #ifdef CONFIG_PTE_64BIT 604 + rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ 605 + #else 606 + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ 607 + #endif 608 + #ifdef CONFIG_HUGETLB_PAGE 609 + beq 6, 3f /* don't mask if page isn't huge */ 610 + li r13, 1 611 + slw r13, r13, r14 612 + subi r13, r13, 1 613 + rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */ 614 + andc r12, r12, r13 /* mask off ea bits within the page */ 615 + #endif 616 + 3: mtspr SPRN_MAS2, r12 617 + 669 618 #ifdef CONFIG_E200 670 619 /* Round robin TLB1 entries assignment */ 671 620 mfspr r12, SPRN_MAS0 ··· 709 622 mtspr SPRN_MAS0,r12 710 623 #endif /* CONFIG_E200 */ 711 624 625 + tlb_write_entry: 712 626 tlbwe 713 627 714 628 /* Done...restore registers and get out of here. */ 715 629 mfspr r10, SPRN_SPRG_THREAD 716 - lwz r11, THREAD_NORMSAVE(3)(r10) 630 + #ifdef CONFIG_HUGETLB_PAGE 631 + beq 6, 8f /* skip restore for 4k page faults */ 632 + lwz r14, THREAD_NORMSAVE(4)(r10) 633 + lwz r15, THREAD_NORMSAVE(5)(r10) 634 + lwz r16, THREAD_NORMSAVE(6)(r10) 635 + lwz r17, THREAD_NORMSAVE(7)(r10) 636 + #endif 637 + 8: lwz r11, THREAD_NORMSAVE(3)(r10) 717 638 mtcr r11 718 639 lwz r13, THREAD_NORMSAVE(2)(r10) 719 640 lwz r12, THREAD_NORMSAVE(1)(r10)
+1
arch/powerpc/mm/Makefile
··· 29 29 ifeq ($(CONFIG_HUGETLB_PAGE),y) 30 30 obj-y += hugetlbpage.o 31 31 obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o 32 + obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o 32 33 endif 33 34 obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o 34 35 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
-3
arch/powerpc/mm/hash_utils_64.c
··· 105 105 int mmu_highuser_ssize = MMU_SEGSIZE_256M; 106 106 u16 mmu_slb_size = 64; 107 107 EXPORT_SYMBOL_GPL(mmu_slb_size); 108 - #ifdef CONFIG_HUGETLB_PAGE 109 - unsigned int HPAGE_SHIFT; 110 - #endif 111 108 #ifdef CONFIG_PPC_64K_PAGES 112 109 int mmu_ci_restrictions; 113 110 #endif
+121
arch/powerpc/mm/hugetlbpage-book3e.c
··· 1 + /* 2 + * PPC Huge TLB Page Support for Book3E MMU 3 + * 4 + * Copyright (C) 2009 David Gibson, IBM Corporation. 5 + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor 6 + * 7 + */ 8 + #include <linux/mm.h> 9 + #include <linux/hugetlb.h> 10 + 11 + static inline int mmu_get_tsize(int psize) 12 + { 13 + return mmu_psize_defs[psize].enc; 14 + } 15 + 16 + static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) 17 + { 18 + int found = 0; 19 + 20 + mtspr(SPRN_MAS6, pid << 16); 21 + if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { 22 + asm volatile( 23 + "li %0,0\n" 24 + "tlbsx. 0,%1\n" 25 + "bne 1f\n" 26 + "li %0,1\n" 27 + "1:\n" 28 + : "=&r"(found) : "r"(ea)); 29 + } else { 30 + asm volatile( 31 + "tlbsx 0,%1\n" 32 + "mfspr %0,0x271\n" 33 + "srwi %0,%0,31\n" 34 + : "=&r"(found) : "r"(ea)); 35 + } 36 + 37 + return found; 38 + } 39 + 40 + void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) 41 + { 42 + unsigned long mas1, mas2; 43 + u64 mas7_3; 44 + unsigned long psize, tsize, shift; 45 + unsigned long flags; 46 + 47 + #ifdef CONFIG_PPC_FSL_BOOK3E 48 + int index, lz, ncams; 49 + struct vm_area_struct *vma; 50 + #endif 51 + 52 + if (unlikely(is_kernel_addr(ea))) 53 + return; 54 + 55 + #ifdef CONFIG_MM_SLICES 56 + psize = mmu_get_tsize(get_slice_psize(mm, ea)); 57 + tsize = mmu_get_psize(psize); 58 + shift = mmu_psize_defs[psize].shift; 59 + #else 60 + vma = find_vma(mm, ea); 61 + psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */ 62 + asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize)); 63 + shift = 31 - lz; 64 + tsize = 21 - lz; 65 + #endif 66 + 67 + /* 68 + * We can't be interrupted while we're setting up the MAS 69 + * regusters or after we've confirmed that no tlb exists. 70 + */ 71 + local_irq_save(flags); 72 + 73 + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { 74 + local_irq_restore(flags); 75 + return; 76 + } 77 + 78 + #ifdef CONFIG_PPC_FSL_BOOK3E 79 + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; 80 + 81 + /* We have to use the CAM(TLB1) on FSL parts for hugepages */ 82 + index = __get_cpu_var(next_tlbcam_idx); 83 + mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); 84 + 85 + /* Just round-robin the entries and wrap when we hit the end */ 86 + if (unlikely(index == ncams - 1)) 87 + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; 88 + else 89 + __get_cpu_var(next_tlbcam_idx)++; 90 + #endif 91 + mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); 92 + mas2 = ea & ~((1UL << shift) - 1); 93 + mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; 94 + mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; 95 + mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; 96 + if (!pte_dirty(pte)) 97 + mas7_3 &= ~(MAS3_SW|MAS3_UW); 98 + 99 + mtspr(SPRN_MAS1, mas1); 100 + mtspr(SPRN_MAS2, mas2); 101 + 102 + if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { 103 + mtspr(SPRN_MAS7_MAS3, mas7_3); 104 + } else { 105 + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); 106 + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); 107 + } 108 + 109 + asm volatile ("tlbwe"); 110 + 111 + local_irq_restore(flags); 112 + } 113 + 114 + void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 115 + { 116 + struct hstate *hstate = hstate_file(vma->vm_file); 117 + unsigned long tsize = huge_page_shift(hstate) - 10; 118 + 119 + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0); 120 + 121 + }
+337 -42
arch/powerpc/mm/hugetlbpage.c
··· 1 1 /* 2 - * PPC64 (POWER4) Huge TLB Page Support for Kernel. 2 + * PPC Huge TLB Page Support for Kernel. 3 3 * 4 4 * Copyright (C) 2003 David Gibson, IBM Corporation. 5 + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor 5 6 * 6 7 * Based on the IA-32 version: 7 8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> ··· 12 11 #include <linux/io.h> 13 12 #include <linux/slab.h> 14 13 #include <linux/hugetlb.h> 14 + #include <linux/of_fdt.h> 15 + #include <linux/memblock.h> 16 + #include <linux/bootmem.h> 15 17 #include <asm/pgtable.h> 16 18 #include <asm/pgalloc.h> 17 19 #include <asm/tlb.h> 20 + #include <asm/setup.h> 18 21 19 22 #define PAGE_SHIFT_64K 16 20 23 #define PAGE_SHIFT_16M 24 21 24 #define PAGE_SHIFT_16G 34 22 25 26 + unsigned int HPAGE_SHIFT; 27 + 28 + /* 29 + * Tracks gpages after the device tree is scanned and before the 30 + * huge_boot_pages list is ready. On 64-bit implementations, this is 31 + * just used to track 16G pages and so is a single array. 32-bit 32 + * implementations may have more than one gpage size due to limitations 33 + * of the memory allocators, so we need multiple arrays 34 + */ 35 + #ifdef CONFIG_PPC64 23 36 #define MAX_NUMBER_GPAGES 1024 24 - 25 - /* Tracks the 16G pages after the device tree is scanned and before the 26 - * huge_boot_pages list is ready. */ 27 - static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; 37 + static u64 gpage_freearray[MAX_NUMBER_GPAGES]; 28 38 static unsigned nr_gpages; 29 - 30 - /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 31 - * will choke on pointers to hugepte tables, which is handy for 32 - * catching screwups early. */ 39 + #else 40 + #define MAX_NUMBER_GPAGES 128 41 + struct psize_gpages { 42 + u64 gpage_list[MAX_NUMBER_GPAGES]; 43 + unsigned int nr_gpages; 44 + }; 45 + static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; 46 + #endif 33 47 34 48 static inline int shift_to_mmu_psize(unsigned int shift) 35 49 { ··· 64 48 } 65 49 66 50 #define hugepd_none(hpd) ((hpd).pd == 0) 67 - 68 - static inline pte_t *hugepd_page(hugepd_t hpd) 69 - { 70 - BUG_ON(!hugepd_ok(hpd)); 71 - return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); 72 - } 73 - 74 - static inline unsigned int hugepd_shift(hugepd_t hpd) 75 - { 76 - return hpd.pd & HUGEPD_SHIFT_MASK; 77 - } 78 - 79 - static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) 80 - { 81 - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); 82 - pte_t *dir = hugepd_page(*hpdp); 83 - 84 - return dir + idx; 85 - } 86 51 87 52 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) 88 53 { ··· 90 93 if (is_hugepd(pm)) 91 94 hpdp = (hugepd_t *)pm; 92 95 else if (!pmd_none(*pm)) { 93 - return pte_offset_map(pm, ea); 96 + return pte_offset_kernel(pm, ea); 94 97 } 95 98 } 96 99 } ··· 111 114 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 112 115 unsigned long address, unsigned pdshift, unsigned pshift) 113 116 { 114 - pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), 115 - GFP_KERNEL|__GFP_REPEAT); 117 + struct kmem_cache *cachep; 118 + pte_t *new; 119 + 120 + #ifdef CONFIG_PPC64 121 + cachep = PGT_CACHE(pdshift - pshift); 122 + #else 123 + int i; 124 + int num_hugepd = 1 << (pshift - pdshift); 125 + cachep = hugepte_cache; 126 + #endif 127 + 128 + new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); 116 129 117 130 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 118 131 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); ··· 131 124 return -ENOMEM; 132 125 133 126 spin_lock(&mm->page_table_lock); 127 + #ifdef CONFIG_PPC64 134 128 if (!hugepd_none(*hpdp)) 135 - kmem_cache_free(PGT_CACHE(pdshift - pshift), new); 129 + kmem_cache_free(cachep, new); 136 130 else 137 - hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; 131 + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 132 + #else 133 + /* 134 + * We have multiple higher-level entries that point to the same 135 + * actual pte location. Fill in each as we go and backtrack on error. 136 + * We need all of these so the DTLB pgtable walk code can find the 137 + * right higher-level entry without knowing if it's a hugepage or not. 138 + */ 139 + for (i = 0; i < num_hugepd; i++, hpdp++) { 140 + if (unlikely(!hugepd_none(*hpdp))) 141 + break; 142 + else 143 + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; 144 + } 145 + /* If we bailed from the for loop early, an error occurred, clean up */ 146 + if (i < num_hugepd) { 147 + for (i = i - 1 ; i >= 0; i--, hpdp--) 148 + hpdp->pd = 0; 149 + kmem_cache_free(cachep, new); 150 + } 151 + #endif 138 152 spin_unlock(&mm->page_table_lock); 139 153 return 0; 140 154 } ··· 197 169 return hugepte_offset(hpdp, addr, pdshift); 198 170 } 199 171 172 + #ifdef CONFIG_PPC32 200 173 /* Build list of addresses of gigantic pages. This function is used in early 201 174 * boot before the buddy or bootmem allocator is setup. 202 175 */ 203 - void add_gpage(unsigned long addr, unsigned long page_size, 204 - unsigned long number_of_pages) 176 + void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 177 + { 178 + unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); 179 + int i; 180 + 181 + if (addr == 0) 182 + return; 183 + 184 + gpage_freearray[idx].nr_gpages = number_of_pages; 185 + 186 + for (i = 0; i < number_of_pages; i++) { 187 + gpage_freearray[idx].gpage_list[i] = addr; 188 + addr += page_size; 189 + } 190 + } 191 + 192 + /* 193 + * Moves the gigantic page addresses from the temporary list to the 194 + * huge_boot_pages list. 195 + */ 196 + int alloc_bootmem_huge_page(struct hstate *hstate) 197 + { 198 + struct huge_bootmem_page *m; 199 + int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); 200 + int nr_gpages = gpage_freearray[idx].nr_gpages; 201 + 202 + if (nr_gpages == 0) 203 + return 0; 204 + 205 + #ifdef CONFIG_HIGHMEM 206 + /* 207 + * If gpages can be in highmem we can't use the trick of storing the 208 + * data structure in the page; allocate space for this 209 + */ 210 + m = alloc_bootmem(sizeof(struct huge_bootmem_page)); 211 + m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; 212 + #else 213 + m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); 214 + #endif 215 + 216 + list_add(&m->list, &huge_boot_pages); 217 + gpage_freearray[idx].nr_gpages = nr_gpages; 218 + gpage_freearray[idx].gpage_list[nr_gpages] = 0; 219 + m->hstate = hstate; 220 + 221 + return 1; 222 + } 223 + /* 224 + * Scan the command line hugepagesz= options for gigantic pages; store those in 225 + * a list that we use to allocate the memory once all options are parsed. 226 + */ 227 + 228 + unsigned long gpage_npages[MMU_PAGE_COUNT]; 229 + 230 + static int __init do_gpage_early_setup(char *param, char *val) 231 + { 232 + static phys_addr_t size; 233 + unsigned long npages; 234 + 235 + /* 236 + * The hugepagesz and hugepages cmdline options are interleaved. We 237 + * use the size variable to keep track of whether or not this was done 238 + * properly and skip over instances where it is incorrect. Other 239 + * command-line parsing code will issue warnings, so we don't need to. 240 + * 241 + */ 242 + if ((strcmp(param, "default_hugepagesz") == 0) || 243 + (strcmp(param, "hugepagesz") == 0)) { 244 + size = memparse(val, NULL); 245 + } else if (strcmp(param, "hugepages") == 0) { 246 + if (size != 0) { 247 + if (sscanf(val, "%lu", &npages) <= 0) 248 + npages = 0; 249 + gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; 250 + size = 0; 251 + } 252 + } 253 + return 0; 254 + } 255 + 256 + 257 + /* 258 + * This function allocates physical space for pages that are larger than the 259 + * buddy allocator can handle. We want to allocate these in highmem because 260 + * the amount of lowmem is limited. This means that this function MUST be 261 + * called before lowmem_end_addr is set up in MMU_init() in order for the lmb 262 + * allocate to grab highmem. 263 + */ 264 + void __init reserve_hugetlb_gpages(void) 265 + { 266 + static __initdata char cmdline[COMMAND_LINE_SIZE]; 267 + phys_addr_t size, base; 268 + int i; 269 + 270 + strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); 271 + parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); 272 + 273 + /* 274 + * Walk gpage list in reverse, allocating larger page sizes first. 275 + * Skip over unsupported sizes, or sizes that have 0 gpages allocated. 276 + * When we reach the point in the list where pages are no longer 277 + * considered gpages, we're done. 278 + */ 279 + for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { 280 + if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) 281 + continue; 282 + else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) 283 + break; 284 + 285 + size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); 286 + base = memblock_alloc_base(size * gpage_npages[i], size, 287 + MEMBLOCK_ALLOC_ANYWHERE); 288 + add_gpage(base, size, gpage_npages[i]); 289 + } 290 + } 291 + 292 + #else /* PPC64 */ 293 + 294 + /* Build list of addresses of gigantic pages. This function is used in early 295 + * boot before the buddy or bootmem allocator is setup. 296 + */ 297 + void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) 205 298 { 206 299 if (!addr) 207 300 return; ··· 348 199 m->hstate = hstate; 349 200 return 1; 350 201 } 202 + #endif 351 203 352 204 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 353 205 { 354 206 return 0; 355 207 } 356 208 209 + #ifdef CONFIG_PPC32 210 + #define HUGEPD_FREELIST_SIZE \ 211 + ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) 212 + 213 + struct hugepd_freelist { 214 + struct rcu_head rcu; 215 + unsigned int index; 216 + void *ptes[0]; 217 + }; 218 + 219 + static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); 220 + 221 + static void hugepd_free_rcu_callback(struct rcu_head *head) 222 + { 223 + struct hugepd_freelist *batch = 224 + container_of(head, struct hugepd_freelist, rcu); 225 + unsigned int i; 226 + 227 + for (i = 0; i < batch->index; i++) 228 + kmem_cache_free(hugepte_cache, batch->ptes[i]); 229 + 230 + free_page((unsigned long)batch); 231 + } 232 + 233 + static void hugepd_free(struct mmu_gather *tlb, void *hugepte) 234 + { 235 + struct hugepd_freelist **batchp; 236 + 237 + batchp = &__get_cpu_var(hugepd_freelist_cur); 238 + 239 + if (atomic_read(&tlb->mm->mm_users) < 2 || 240 + cpumask_equal(mm_cpumask(tlb->mm), 241 + cpumask_of(smp_processor_id()))) { 242 + kmem_cache_free(hugepte_cache, hugepte); 243 + return; 244 + } 245 + 246 + if (*batchp == NULL) { 247 + *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); 248 + (*batchp)->index = 0; 249 + } 250 + 251 + (*batchp)->ptes[(*batchp)->index++] = hugepte; 252 + if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { 253 + call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); 254 + *batchp = NULL; 255 + } 256 + } 257 + #endif 258 + 357 259 static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, 358 260 unsigned long start, unsigned long end, 359 261 unsigned long floor, unsigned long ceiling) 360 262 { 361 263 pte_t *hugepte = hugepd_page(*hpdp); 362 - unsigned shift = hugepd_shift(*hpdp); 264 + int i; 265 + 363 266 unsigned long pdmask = ~((1UL << pdshift) - 1); 267 + unsigned int num_hugepd = 1; 268 + 269 + #ifdef CONFIG_PPC64 270 + unsigned int shift = hugepd_shift(*hpdp); 271 + #else 272 + /* Note: On 32-bit the hpdp may be the first of several */ 273 + num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); 274 + #endif 364 275 365 276 start &= pdmask; 366 277 if (start < floor) ··· 433 224 if (end - 1 > ceiling - 1) 434 225 return; 435 226 436 - hpdp->pd = 0; 227 + for (i = 0; i < num_hugepd; i++, hpdp++) 228 + hpdp->pd = 0; 229 + 437 230 tlb->need_flush = 1; 231 + #ifdef CONFIG_PPC64 438 232 pgtable_free_tlb(tlb, hugepte, pdshift - shift); 233 + #else 234 + hugepd_free(tlb, hugepte); 235 + #endif 439 236 } 440 237 441 238 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ··· 546 331 * too. 547 332 */ 548 333 549 - pgd = pgd_offset(tlb->mm, addr); 550 334 do { 551 335 next = pgd_addr_end(addr, end); 336 + pgd = pgd_offset(tlb->mm, addr); 552 337 if (!is_hugepd(pgd)) { 553 338 if (pgd_none_or_clear_bad(pgd)) 554 339 continue; 555 340 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); 556 341 } else { 342 + #ifdef CONFIG_PPC32 343 + /* 344 + * Increment next by the size of the huge mapping since 345 + * on 32-bit there may be more than one entry at the pgd 346 + * level for a single hugepage, but all of them point to 347 + * the same kmem cache that holds the hugepte. 348 + */ 349 + next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); 350 + #endif 557 351 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, 558 352 addr, next, floor, ceiling); 559 353 } 560 - } while (pgd++, addr = next, addr != end); 354 + } while (addr = next, addr != end); 561 355 } 562 356 563 357 struct page * ··· 690 466 unsigned long len, unsigned long pgoff, 691 467 unsigned long flags) 692 468 { 469 + #ifdef CONFIG_MM_SLICES 693 470 struct hstate *hstate = hstate_file(file); 694 471 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 695 472 696 473 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); 474 + #else 475 + return get_unmapped_area(file, addr, len, pgoff, flags); 476 + #endif 697 477 } 698 478 699 479 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 700 480 { 481 + #ifdef CONFIG_MM_SLICES 701 482 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 702 483 703 484 return 1UL << mmu_psize_to_shift(psize); 485 + #else 486 + if (!is_vm_hugetlb_page(vma)) 487 + return PAGE_SIZE; 488 + 489 + return huge_page_size(hstate_vma(vma)); 490 + #endif 491 + } 492 + 493 + static inline bool is_power_of_4(unsigned long x) 494 + { 495 + if (is_power_of_2(x)) 496 + return (__ilog2(x) % 2) ? false : true; 497 + return false; 704 498 } 705 499 706 500 static int __init add_huge_page_size(unsigned long long size) ··· 728 486 729 487 /* Check that it is a page size supported by the hardware and 730 488 * that it fits within pagetable and slice limits. */ 489 + #ifdef CONFIG_PPC_FSL_BOOK3E 490 + if ((size < PAGE_SIZE) || !is_power_of_4(size)) 491 + return -EINVAL; 492 + #else 731 493 if (!is_power_of_2(size) 732 494 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) 733 495 return -EINVAL; 496 + #endif 734 497 735 498 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) 736 499 return -EINVAL; ··· 772 525 } 773 526 __setup("hugepagesz=", hugepage_setup_sz); 774 527 528 + #ifdef CONFIG_FSL_BOOKE 529 + struct kmem_cache *hugepte_cache; 530 + static int __init hugetlbpage_init(void) 531 + { 532 + int psize; 533 + 534 + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 535 + unsigned shift; 536 + 537 + if (!mmu_psize_defs[psize].shift) 538 + continue; 539 + 540 + shift = mmu_psize_to_shift(psize); 541 + 542 + /* Don't treat normal page sizes as huge... */ 543 + if (shift != PAGE_SHIFT) 544 + if (add_huge_page_size(1ULL << shift) < 0) 545 + continue; 546 + } 547 + 548 + /* 549 + * Create a kmem cache for hugeptes. The bottom bits in the pte have 550 + * size information encoded in them, so align them to allow this 551 + */ 552 + hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), 553 + HUGEPD_SHIFT_MASK + 1, 0, NULL); 554 + if (hugepte_cache == NULL) 555 + panic("%s: Unable to create kmem cache for hugeptes\n", 556 + __func__); 557 + 558 + /* Default hpage size = 4M */ 559 + if (mmu_psize_defs[MMU_PAGE_4M].shift) 560 + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; 561 + else 562 + panic("%s: Unable to set default huge page size\n", __func__); 563 + 564 + 565 + return 0; 566 + } 567 + #else 775 568 static int __init hugetlbpage_init(void) 776 569 { 777 570 int psize; ··· 854 567 855 568 return 0; 856 569 } 857 - 570 + #endif 858 571 module_init(hugetlbpage_init); 859 572 860 573 void flush_dcache_icache_hugepage(struct page *page) 861 574 { 862 575 int i; 576 + void *start; 863 577 864 578 BUG_ON(!PageCompound(page)); 865 579 866 - for (i = 0; i < (1UL << compound_order(page)); i++) 867 - __flush_dcache_icache(page_address(page+i)); 580 + for (i = 0; i < (1UL << compound_order(page)); i++) { 581 + if (!PageHighMem(page)) { 582 + __flush_dcache_icache(page_address(page+i)); 583 + } else { 584 + start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); 585 + __flush_dcache_icache(start); 586 + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); 587 + } 588 + } 868 589 }
+9
arch/powerpc/mm/init_32.c
··· 32 32 #include <linux/pagemap.h> 33 33 #include <linux/memblock.h> 34 34 #include <linux/gfp.h> 35 + #include <linux/slab.h> 36 + #include <linux/hugetlb.h> 35 37 36 38 #include <asm/pgalloc.h> 37 39 #include <asm/prom.h> ··· 46 44 #include <asm/tlb.h> 47 45 #include <asm/sections.h> 48 46 #include <asm/system.h> 47 + #include <asm/hugetlb.h> 49 48 50 49 #include "mmu_decl.h" 51 50 ··· 125 122 126 123 /* parse args from command line */ 127 124 MMU_setup(); 125 + 126 + /* 127 + * Reserve gigantic pages for hugetlb. This MUST occur before 128 + * lowmem_end_addr is initialized below. 129 + */ 130 + reserve_hugetlb_gpages(); 128 131 129 132 if (memblock.memory.cnt > 1) { 130 133 #ifndef CONFIG_WII
+5
arch/powerpc/mm/mem.c
··· 548 548 return; 549 549 hash_preload(vma->vm_mm, address, access, trap); 550 550 #endif /* CONFIG_PPC_STD_MMU */ 551 + #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ 552 + && defined(CONFIG_HUGETLB_PAGE) 553 + if (is_vm_hugetlb_page(vma)) 554 + book3e_hugetlb_preload(vma->vm_mm, address, *ptep); 555 + #endif 551 556 }
+5
arch/powerpc/mm/mmu_context_nohash.c
··· 292 292 mm->context.id = MMU_NO_CONTEXT; 293 293 mm->context.active = 0; 294 294 295 + #ifdef CONFIG_PPC_MM_SLICES 296 + if (slice_mm_new_context(mm)) 297 + slice_set_user_psize(mm, mmu_virtual_psize); 298 + #endif 299 + 295 300 return 0; 296 301 } 297 302
+2 -1
arch/powerpc/mm/pgtable.c
··· 27 27 #include <linux/init.h> 28 28 #include <linux/percpu.h> 29 29 #include <linux/hardirq.h> 30 + #include <linux/hugetlb.h> 30 31 #include <asm/pgalloc.h> 31 32 #include <asm/tlbflush.h> 32 33 #include <asm/tlb.h> ··· 213 212 entry = set_access_flags_filter(entry, vma, dirty); 214 213 changed = !pte_same(*(ptep), entry); 215 214 if (changed) { 216 - if (!(vma->vm_flags & VM_HUGETLB)) 215 + if (!is_vm_hugetlb_page(vma)) 217 216 assert_pte_locked(vma->vm_mm, address); 218 217 __ptep_set_access_flags(ptep, entry); 219 218 flush_tlb_page_nohash(vma, address);
+12 -12
arch/powerpc/mm/tlb_low_64e.S
··· 553 553 rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 554 554 clrrdi r10,r11,3 555 555 ldx r15,r10,r15 556 - cmpldi cr0,r15,0 557 - beq virt_page_table_tlb_miss_fault 556 + cmpdi cr0,r15,0 557 + bge virt_page_table_tlb_miss_fault 558 558 559 559 #ifndef CONFIG_PPC_64K_PAGES 560 560 /* Get to PUD entry */ 561 561 rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 562 562 clrrdi r10,r11,3 563 563 ldx r15,r10,r15 564 - cmpldi cr0,r15,0 565 - beq virt_page_table_tlb_miss_fault 564 + cmpdi cr0,r15,0 565 + bge virt_page_table_tlb_miss_fault 566 566 #endif /* CONFIG_PPC_64K_PAGES */ 567 567 568 568 /* Get to PMD entry */ 569 569 rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 570 570 clrrdi r10,r11,3 571 571 ldx r15,r10,r15 572 - cmpldi cr0,r15,0 573 - beq virt_page_table_tlb_miss_fault 572 + cmpdi cr0,r15,0 573 + bge virt_page_table_tlb_miss_fault 574 574 575 575 /* Ok, we're all right, we can now create a kernel translation for 576 576 * a 4K or 64K page from r16 -> r15. ··· 802 802 rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 803 803 clrrdi r10,r11,3 804 804 ldx r15,r10,r15 805 - cmpldi cr0,r15,0 806 - beq htw_tlb_miss_fault 805 + cmpdi cr0,r15,0 806 + bge htw_tlb_miss_fault 807 807 808 808 #ifndef CONFIG_PPC_64K_PAGES 809 809 /* Get to PUD entry */ 810 810 rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 811 811 clrrdi r10,r11,3 812 812 ldx r15,r10,r15 813 - cmpldi cr0,r15,0 814 - beq htw_tlb_miss_fault 813 + cmpdi cr0,r15,0 814 + bge htw_tlb_miss_fault 815 815 #endif /* CONFIG_PPC_64K_PAGES */ 816 816 817 817 /* Get to PMD entry */ 818 818 rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 819 819 clrrdi r10,r11,3 820 820 ldx r15,r10,r15 821 - cmpldi cr0,r15,0 822 - beq htw_tlb_miss_fault 821 + cmpdi cr0,r15,0 822 + bge htw_tlb_miss_fault 823 823 824 824 /* Ok, we're all right, we can now create an indirect entry for 825 825 * a 1M or 256M page.
+44 -2
arch/powerpc/mm/tlb_nohash.c
··· 36 36 #include <linux/spinlock.h> 37 37 #include <linux/memblock.h> 38 38 #include <linux/of_fdt.h> 39 + #include <linux/hugetlb.h> 39 40 40 41 #include <asm/tlbflush.h> 41 42 #include <asm/tlb.h> 42 43 #include <asm/code-patching.h> 44 + #include <asm/hugetlb.h> 43 45 44 46 #include "mmu_decl.h" 45 47 46 - #ifdef CONFIG_PPC_BOOK3E 48 + /* 49 + * This struct lists the sw-supported page sizes. The hardawre MMU may support 50 + * other sizes not listed here. The .ind field is only used on MMUs that have 51 + * indirect page table entries. 52 + */ 53 + #ifdef CONFIG_PPC_BOOK3E_MMU 54 + #ifdef CONFIG_FSL_BOOKE 55 + struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 56 + [MMU_PAGE_4K] = { 57 + .shift = 12, 58 + .enc = BOOK3E_PAGESZ_4K, 59 + }, 60 + [MMU_PAGE_4M] = { 61 + .shift = 22, 62 + .enc = BOOK3E_PAGESZ_4M, 63 + }, 64 + [MMU_PAGE_16M] = { 65 + .shift = 24, 66 + .enc = BOOK3E_PAGESZ_16M, 67 + }, 68 + [MMU_PAGE_64M] = { 69 + .shift = 26, 70 + .enc = BOOK3E_PAGESZ_64M, 71 + }, 72 + [MMU_PAGE_256M] = { 73 + .shift = 28, 74 + .enc = BOOK3E_PAGESZ_256M, 75 + }, 76 + [MMU_PAGE_1G] = { 77 + .shift = 30, 78 + .enc = BOOK3E_PAGESZ_1GB, 79 + }, 80 + }; 81 + #else 47 82 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 48 83 [MMU_PAGE_4K] = { 49 84 .shift = 12, ··· 112 77 .enc = BOOK3E_PAGESZ_1GB, 113 78 }, 114 79 }; 80 + #endif /* CONFIG_FSL_BOOKE */ 81 + 115 82 static inline int mmu_get_tsize(int psize) 116 83 { 117 84 return mmu_psize_defs[psize].enc; ··· 124 87 /* This isn't used on !Book3E for now */ 125 88 return 0; 126 89 } 127 - #endif 90 + #endif /* CONFIG_PPC_BOOK3E_MMU */ 128 91 129 92 /* The variables below are currently only used on 64-bit Book3E 130 93 * though this will probably be made common with other nohash ··· 303 266 304 267 void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 305 268 { 269 + #ifdef CONFIG_HUGETLB_PAGE 270 + if (is_vm_hugetlb_page(vma)) 271 + flush_hugetlb_page(vma, vmaddr); 272 + #endif 273 + 306 274 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 307 275 mmu_get_tsize(mmu_virtual_psize), 0); 308 276 }
+3 -1
arch/powerpc/platforms/Kconfig.cputype
··· 69 69 bool "Server processors" 70 70 select PPC_FPU 71 71 select PPC_HAVE_PMU_SUPPORT 72 + select SYS_SUPPORTS_HUGETLBFS 72 73 73 74 config PPC_BOOK3E_64 74 75 bool "Embedded processors" ··· 174 173 config FSL_BOOKE 175 174 bool 176 175 depends on (E200 || E500) && PPC32 176 + select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT 177 177 default y 178 178 179 179 # this is for common code between PPC32 & PPC64 FSL BOOKE ··· 298 296 299 297 config PPC_MM_SLICES 300 298 bool 301 - default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) 299 + default y if (PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES) 302 300 default n 303 301 304 302 config VIRT_CPU_ACCOUNTING