Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc

+11 -13

arch/powerpc/Kconfig

··· 120 120 config SYS_SUPPORTS_APM_EMULATION 121 121 bool 122 122 123 - # 124 - # Powerpc uses the slab allocator to manage its ptes and the 125 - # page structs of ptes are used for splitting the page table 126 - # lock for configurations supporting more than SPLIT_PTLOCK_CPUS. 127 - # 128 - # In that special configuration the page structs of slabs are modified. 129 - # This setting disables the selection of SLUB as a slab allocator. 130 - # 131 - config ARCH_USES_SLAB_PAGE_STRUCT 132 - bool 133 - default y 134 - depends on SPLIT_PTLOCK_CPUS <= NR_CPUS 135 - 136 123 config DEFAULT_UIMAGE 137 124 bool 138 125 help ··· 339 352 def_bool y 340 353 depends on PPC_STD_MMU && PPC32 341 354 355 + config PPC_MM_SLICES 356 + bool 357 + default y if HUGETLB_PAGE 358 + default n 359 + 342 360 config VIRT_CPU_ACCOUNTING 343 361 bool "Deterministic task and CPU time accounting" 344 362 depends on PPC64 ··· 533 541 def_bool y 534 542 depends on NEED_MULTIPLE_NODES 535 543 544 + config PPC_HAS_HASH_64K 545 + bool 546 + depends on PPC64 547 + default n 548 + 536 549 config PPC_64K_PAGES 537 550 bool "64k page size" 538 551 depends on PPC64 552 + select PPC_HAS_HASH_64K 539 553 help 540 554 This option changes the kernel logical page size to 64k. On machines 541 555 without processor support for 64k pages, the kernel will simulate

+11 -5

arch/powerpc/kernel/asm-offsets.c

··· 122 122 DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); 123 123 DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); 124 124 DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); 125 - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); 126 125 DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); 127 - #ifdef CONFIG_HUGETLB_PAGE 128 - DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); 129 - DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); 130 - #endif /* CONFIG_HUGETLB_PAGE */ 126 + #ifdef CONFIG_PPC_MM_SLICES 127 + DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, 128 + context.low_slices_psize)); 129 + DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, 130 + context.high_slices_psize)); 131 + DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); 132 + DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); 133 + #else 134 + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); 135 + 136 + #endif /* CONFIG_PPC_MM_SLICES */ 131 137 DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); 132 138 DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); 133 139 DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));

+2 -1

arch/powerpc/kernel/lparmap.c

··· 10 10 #include <asm/pgtable.h> 11 11 #include <asm/iseries/lpar_map.h> 12 12 13 - const struct LparMap __attribute__((__section__(".text"))) xLparMap = { 13 + /* The # is to stop gcc trying to make .text nonexecutable */ 14 + const struct LparMap __attribute__((__section__(".text #"))) xLparMap = { 14 15 .xNumberEsids = HvEsidsToMap, 15 16 .xNumberRanges = HvRangesToMap, 16 17 .xSegmentTableOffs = STAB0_PAGE,

+1

arch/powerpc/mm/Makefile

··· 18 18 obj-$(CONFIG_44x) += 44x_mmu.o 19 19 obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o 20 20 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o 21 + obj-$(CONFIG_PPC_MM_SLICES) += slice.o 21 22 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o

+4 -1

arch/powerpc/mm/hash_low_64.S

··· 615 615 li r3,-1 616 616 b htab_bail 617 617 618 + #endif /* CONFIG_PPC_64K_PAGES */ 619 + 620 + #ifdef CONFIG_PPC_HAS_HASH_64K 618 621 619 622 /***************************************************************************** 620 623 * * ··· 873 870 b ht64_bail 874 871 875 872 876 - #endif /* CONFIG_PPC_64K_PAGES */ 873 + #endif /* CONFIG_PPC_HAS_HASH_64K */ 877 874 878 875 879 876 /*****************************************************************************

+87 -55

arch/powerpc/mm/hash_utils_64.c

··· 51 51 #include <asm/cputable.h> 52 52 #include <asm/abs_addr.h> 53 53 #include <asm/sections.h> 54 + #include <asm/spu.h> 54 55 55 56 #ifdef DEBUG 56 57 #define DBG(fmt...) udbg_printf(fmt) ··· 420 419 extern unsigned int *htab_call_hpte_remove; 421 420 extern unsigned int *htab_call_hpte_updatepp; 422 421 423 - #ifdef CONFIG_PPC_64K_PAGES 422 + #ifdef CONFIG_PPC_HAS_HASH_64K 424 423 extern unsigned int *ht64_call_hpte_insert1; 425 424 extern unsigned int *ht64_call_hpte_insert2; 426 425 extern unsigned int *ht64_call_hpte_remove; ··· 597 596 * Demote a segment to using 4k pages. 598 597 * For now this makes the whole process use 4k pages. 599 598 */ 600 - void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 601 - { 602 599 #ifdef CONFIG_PPC_64K_PAGES 600 + static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 601 + { 603 602 if (mm->context.user_psize == MMU_PAGE_4K) 604 603 return; 604 + #ifdef CONFIG_PPC_MM_SLICES 605 + slice_set_user_psize(mm, MMU_PAGE_4K); 606 + #else /* CONFIG_PPC_MM_SLICES */ 605 607 mm->context.user_psize = MMU_PAGE_4K; 606 608 mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; 607 - get_paca()->context = mm->context; 608 - slb_flush_and_rebolt(); 609 + #endif /* CONFIG_PPC_MM_SLICES */ 610 + 609 611 #ifdef CONFIG_SPE_BASE 610 612 spu_flush_all_slbs(mm); 611 613 #endif 612 - #endif 613 614 } 614 - 615 - EXPORT_SYMBOL_GPL(demote_segment_4k); 615 + #endif /* CONFIG_PPC_64K_PAGES */ 616 616 617 617 /* Result code is: 618 618 * 0 - handled ··· 648 646 return 1; 649 647 } 650 648 vsid = get_vsid(mm->context.id, ea); 649 + #ifdef CONFIG_PPC_MM_SLICES 650 + psize = get_slice_psize(mm, ea); 651 + #else 651 652 psize = mm->context.user_psize; 653 + #endif 652 654 break; 653 655 case VMALLOC_REGION_ID: 654 656 mm = &init_mm; ··· 680 674 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 681 675 local = 1; 682 676 677 + #ifdef CONFIG_HUGETLB_PAGE 683 678 /* Handle hugepage regions */ 684 - if (unlikely(in_hugepage_area(mm->context, ea))) { 679 + if (HPAGE_SHIFT && psize == mmu_huge_psize) { 685 680 DBG_LOW(" -> huge page !\n"); 686 681 return hash_huge_page(mm, access, ea, vsid, local, trap); 687 682 } 683 + #endif /* CONFIG_HUGETLB_PAGE */ 684 + 685 + #ifndef CONFIG_PPC_64K_PAGES 686 + /* If we use 4K pages and our psize is not 4K, then we are hitting 687 + * a special driver mapping, we need to align the address before 688 + * we fetch the PTE 689 + */ 690 + if (psize != MMU_PAGE_4K) 691 + ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 692 + #endif /* CONFIG_PPC_64K_PAGES */ 688 693 689 694 /* Get PTE and page size from page tables */ 690 695 ptep = find_linux_pte(pgdir, ea); ··· 719 702 } 720 703 721 704 /* Do actual hashing */ 722 - #ifndef CONFIG_PPC_64K_PAGES 723 - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 724 - #else 705 + #ifdef CONFIG_PPC_64K_PAGES 725 706 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 726 707 if (pte_val(*ptep) & _PAGE_4K_PFN) { 727 708 demote_segment_4k(mm, ea); 728 709 psize = MMU_PAGE_4K; 729 710 } 730 711 731 - if (mmu_ci_restrictions) { 732 - /* If this PTE is non-cacheable, switch to 4k */ 733 - if (psize == MMU_PAGE_64K && 734 - (pte_val(*ptep) & _PAGE_NO_CACHE)) { 735 - if (user_region) { 736 - demote_segment_4k(mm, ea); 737 - psize = MMU_PAGE_4K; 738 - } else if (ea < VMALLOC_END) { 739 - /* 740 - * some driver did a non-cacheable mapping 741 - * in vmalloc space, so switch vmalloc 742 - * to 4k pages 743 - */ 744 - printk(KERN_ALERT "Reducing vmalloc segment " 745 - "to 4kB pages because of " 746 - "non-cacheable mapping\n"); 747 - psize = mmu_vmalloc_psize = MMU_PAGE_4K; 748 - } 712 + /* If this PTE is non-cacheable and we have restrictions on 713 + * using non cacheable large pages, then we switch to 4k 714 + */ 715 + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && 716 + (pte_val(*ptep) & _PAGE_NO_CACHE)) { 717 + if (user_region) { 718 + demote_segment_4k(mm, ea); 719 + psize = MMU_PAGE_4K; 720 + } else if (ea < VMALLOC_END) { 721 + /* 722 + * some driver did a non-cacheable mapping 723 + * in vmalloc space, so switch vmalloc 724 + * to 4k pages 725 + */ 726 + printk(KERN_ALERT "Reducing vmalloc segment " 727 + "to 4kB pages because of " 728 + "non-cacheable mapping\n"); 729 + psize = mmu_vmalloc_psize = MMU_PAGE_4K; 749 730 #ifdef CONFIG_SPE_BASE 750 731 spu_flush_all_slbs(mm); 751 732 #endif 752 733 } 753 - if (user_region) { 754 - if (psize != get_paca()->context.user_psize) { 755 - get_paca()->context = mm->context; 756 - slb_flush_and_rebolt(); 757 - } 758 - } else if (get_paca()->vmalloc_sllp != 759 - mmu_psize_defs[mmu_vmalloc_psize].sllp) { 760 - get_paca()->vmalloc_sllp = 761 - mmu_psize_defs[mmu_vmalloc_psize].sllp; 734 + } 735 + if (user_region) { 736 + if (psize != get_paca()->context.user_psize) { 737 + get_paca()->context.user_psize = 738 + mm->context.user_psize; 762 739 slb_flush_and_rebolt(); 763 740 } 741 + } else if (get_paca()->vmalloc_sllp != 742 + mmu_psize_defs[mmu_vmalloc_psize].sllp) { 743 + get_paca()->vmalloc_sllp = 744 + mmu_psize_defs[mmu_vmalloc_psize].sllp; 745 + slb_flush_and_rebolt(); 764 746 } 747 + #endif /* CONFIG_PPC_64K_PAGES */ 748 + 749 + #ifdef CONFIG_PPC_HAS_HASH_64K 765 750 if (psize == MMU_PAGE_64K) 766 751 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); 767 752 else 753 + #endif /* CONFIG_PPC_HAS_HASH_64K */ 768 754 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 769 - #endif /* CONFIG_PPC_64K_PAGES */ 770 755 771 756 #ifndef CONFIG_PPC_64K_PAGES 772 757 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); ··· 791 772 unsigned long flags; 792 773 int local = 0; 793 774 794 - /* We don't want huge pages prefaulted for now 795 - */ 796 - if (unlikely(in_hugepage_area(mm->context, ea))) 775 + BUG_ON(REGION_ID(ea) != USER_REGION_ID); 776 + 777 + #ifdef CONFIG_PPC_MM_SLICES 778 + /* We only prefault standard pages for now */ 779 + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); 797 780 return; 781 + #endif 798 782 799 783 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 800 784 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 801 785 802 - /* Get PTE, VSID, access mask */ 786 + /* Get Linux PTE if available */ 803 787 pgdir = mm->pgd; 804 788 if (pgdir == NULL) 805 789 return; 806 790 ptep = find_linux_pte(pgdir, ea); 807 791 if (!ptep) 808 792 return; 793 + 794 + #ifdef CONFIG_PPC_64K_PAGES 795 + /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on 796 + * a 64K kernel), then we don't preload, hash_page() will take 797 + * care of it once we actually try to access the page. 798 + * That way we don't have to duplicate all of the logic for segment 799 + * page size demotion here 800 + */ 801 + if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) 802 + return; 803 + #endif /* CONFIG_PPC_64K_PAGES */ 804 + 805 + /* Get VSID */ 809 806 vsid = get_vsid(mm->context.id, ea); 810 807 811 - /* Hash it in */ 808 + /* Hash doesn't like irqs */ 812 809 local_irq_save(flags); 810 + 811 + /* Is that local to this CPU ? */ 813 812 mask = cpumask_of_cpu(smp_processor_id()); 814 813 if (cpus_equal(mm->cpu_vm_mask, mask)) 815 814 local = 1; 816 - #ifndef CONFIG_PPC_64K_PAGES 817 - __hash_page_4K(ea, access, vsid, ptep, trap, local); 818 - #else 819 - if (mmu_ci_restrictions) { 820 - /* If this PTE is non-cacheable, switch to 4k */ 821 - if (mm->context.user_psize == MMU_PAGE_64K && 822 - (pte_val(*ptep) & _PAGE_NO_CACHE)) 823 - demote_segment_4k(mm, ea); 824 - } 815 + 816 + /* Hash it in */ 817 + #ifdef CONFIG_PPC_HAS_HASH_64K 825 818 if (mm->context.user_psize == MMU_PAGE_64K) 826 819 __hash_page_64K(ea, access, vsid, ptep, trap, local); 827 820 else 828 - __hash_page_4K(ea, access, vsid, ptep, trap, local); 829 821 #endif /* CONFIG_PPC_64K_PAGES */ 822 + __hash_page_4K(ea, access, vsid, ptep, trap, local); 823 + 830 824 local_irq_restore(flags); 831 825 } 832 826

+6 -542

arch/powerpc/mm/hugetlbpage.c

··· 91 91 pgd_t *pg; 92 92 pud_t *pu; 93 93 94 - BUG_ON(! in_hugepage_area(mm->context, addr)); 94 + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 95 95 96 96 addr &= HPAGE_MASK; 97 97 ··· 119 119 pud_t *pu; 120 120 hugepd_t *hpdp = NULL; 121 121 122 - BUG_ON(! in_hugepage_area(mm->context, addr)); 122 + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 123 123 124 124 addr &= HPAGE_MASK; 125 125 ··· 302 302 start = addr; 303 303 pgd = pgd_offset((*tlb)->mm, addr); 304 304 do { 305 - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); 305 + BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); 306 306 next = pgd_addr_end(addr, end); 307 307 if (pgd_none_or_clear_bad(pgd)) 308 308 continue; ··· 331 331 return __pte(old); 332 332 } 333 333 334 - struct slb_flush_info { 335 - struct mm_struct *mm; 336 - u16 newareas; 337 - }; 338 - 339 - static void flush_low_segments(void *parm) 340 - { 341 - struct slb_flush_info *fi = parm; 342 - unsigned long i; 343 - 344 - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); 345 - 346 - if (current->active_mm != fi->mm) 347 - return; 348 - 349 - /* Only need to do anything if this CPU is working in the same 350 - * mm as the one which has changed */ 351 - 352 - /* update the paca copy of the context struct */ 353 - get_paca()->context = current->active_mm->context; 354 - 355 - asm volatile("isync" : : : "memory"); 356 - for (i = 0; i < NUM_LOW_AREAS; i++) { 357 - if (! (fi->newareas & (1U << i))) 358 - continue; 359 - asm volatile("slbie %0" 360 - : : "r" ((i << SID_SHIFT) | SLBIE_C)); 361 - } 362 - asm volatile("isync" : : : "memory"); 363 - } 364 - 365 - static void flush_high_segments(void *parm) 366 - { 367 - struct slb_flush_info *fi = parm; 368 - unsigned long i, j; 369 - 370 - 371 - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); 372 - 373 - if (current->active_mm != fi->mm) 374 - return; 375 - 376 - /* Only need to do anything if this CPU is working in the same 377 - * mm as the one which has changed */ 378 - 379 - /* update the paca copy of the context struct */ 380 - get_paca()->context = current->active_mm->context; 381 - 382 - asm volatile("isync" : : : "memory"); 383 - for (i = 0; i < NUM_HIGH_AREAS; i++) { 384 - if (! (fi->newareas & (1U << i))) 385 - continue; 386 - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) 387 - asm volatile("slbie %0" 388 - :: "r" (((i << HTLB_AREA_SHIFT) 389 - + (j << SID_SHIFT)) | SLBIE_C)); 390 - } 391 - asm volatile("isync" : : : "memory"); 392 - } 393 - 394 - static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) 395 - { 396 - unsigned long start = area << SID_SHIFT; 397 - unsigned long end = (area+1) << SID_SHIFT; 398 - struct vm_area_struct *vma; 399 - 400 - BUG_ON(area >= NUM_LOW_AREAS); 401 - 402 - /* Check no VMAs are in the region */ 403 - vma = find_vma(mm, start); 404 - if (vma && (vma->vm_start < end)) 405 - return -EBUSY; 406 - 407 - return 0; 408 - } 409 - 410 - static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) 411 - { 412 - unsigned long start = area << HTLB_AREA_SHIFT; 413 - unsigned long end = (area+1) << HTLB_AREA_SHIFT; 414 - struct vm_area_struct *vma; 415 - 416 - BUG_ON(area >= NUM_HIGH_AREAS); 417 - 418 - /* Hack, so that each addresses is controlled by exactly one 419 - * of the high or low area bitmaps, the first high area starts 420 - * at 4GB, not 0 */ 421 - if (start == 0) 422 - start = 0x100000000UL; 423 - 424 - /* Check no VMAs are in the region */ 425 - vma = find_vma(mm, start); 426 - if (vma && (vma->vm_start < end)) 427 - return -EBUSY; 428 - 429 - return 0; 430 - } 431 - 432 - static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) 433 - { 434 - unsigned long i; 435 - struct slb_flush_info fi; 436 - 437 - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); 438 - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); 439 - 440 - newareas &= ~(mm->context.low_htlb_areas); 441 - if (! newareas) 442 - return 0; /* The segments we want are already open */ 443 - 444 - for (i = 0; i < NUM_LOW_AREAS; i++) 445 - if ((1 << i) & newareas) 446 - if (prepare_low_area_for_htlb(mm, i) != 0) 447 - return -EBUSY; 448 - 449 - mm->context.low_htlb_areas |= newareas; 450 - 451 - /* the context change must make it to memory before the flush, 452 - * so that further SLB misses do the right thing. */ 453 - mb(); 454 - 455 - fi.mm = mm; 456 - fi.newareas = newareas; 457 - on_each_cpu(flush_low_segments, &fi, 0, 1); 458 - 459 - return 0; 460 - } 461 - 462 - static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) 463 - { 464 - struct slb_flush_info fi; 465 - unsigned long i; 466 - 467 - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); 468 - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) 469 - != NUM_HIGH_AREAS); 470 - 471 - newareas &= ~(mm->context.high_htlb_areas); 472 - if (! newareas) 473 - return 0; /* The areas we want are already open */ 474 - 475 - for (i = 0; i < NUM_HIGH_AREAS; i++) 476 - if ((1 << i) & newareas) 477 - if (prepare_high_area_for_htlb(mm, i) != 0) 478 - return -EBUSY; 479 - 480 - mm->context.high_htlb_areas |= newareas; 481 - 482 - /* the context change must make it to memory before the flush, 483 - * so that further SLB misses do the right thing. */ 484 - mb(); 485 - 486 - fi.mm = mm; 487 - fi.newareas = newareas; 488 - on_each_cpu(flush_high_segments, &fi, 0, 1); 489 - 490 - return 0; 491 - } 492 - 493 - int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) 494 - { 495 - int err = 0; 496 - 497 - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) 498 - return -EINVAL; 499 - if (len & ~HPAGE_MASK) 500 - return -EINVAL; 501 - if (addr & ~HPAGE_MASK) 502 - return -EINVAL; 503 - 504 - if (addr < 0x100000000UL) 505 - err = open_low_hpage_areas(current->mm, 506 - LOW_ESID_MASK(addr, len)); 507 - if ((addr + len) > 0x100000000UL) 508 - err = open_high_hpage_areas(current->mm, 509 - HTLB_AREA_MASK(addr, len)); 510 - #ifdef CONFIG_SPE_BASE 511 - spu_flush_all_slbs(current->mm); 512 - #endif 513 - if (err) { 514 - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 515 - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", 516 - addr, len, 517 - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); 518 - return err; 519 - } 520 - 521 - return 0; 522 - } 523 - 524 334 struct page * 525 335 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) 526 336 { 527 337 pte_t *ptep; 528 338 struct page *page; 529 339 530 - if (! in_hugepage_area(mm->context, address)) 340 + if (get_slice_psize(mm, address) != mmu_huge_psize) 531 341 return ERR_PTR(-EINVAL); 532 342 533 343 ptep = huge_pte_offset(mm, address); ··· 361 551 return NULL; 362 552 } 363 553 364 - /* Because we have an exclusive hugepage region which lies within the 365 - * normal user address space, we have to take special measures to make 366 - * non-huge mmap()s evade the hugepage reserved regions. */ 367 - unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, 368 - unsigned long len, unsigned long pgoff, 369 - unsigned long flags) 370 - { 371 - struct mm_struct *mm = current->mm; 372 - struct vm_area_struct *vma; 373 - unsigned long start_addr; 374 - 375 - if (len > TASK_SIZE) 376 - return -ENOMEM; 377 - 378 - /* handle fixed mapping: prevent overlap with huge pages */ 379 - if (flags & MAP_FIXED) { 380 - if (is_hugepage_only_range(mm, addr, len)) 381 - return -EINVAL; 382 - return addr; 383 - } 384 - 385 - if (addr) { 386 - addr = PAGE_ALIGN(addr); 387 - vma = find_vma(mm, addr); 388 - if (((TASK_SIZE - len) >= addr) 389 - && (!vma || (addr+len) <= vma->vm_start) 390 - && !is_hugepage_only_range(mm, addr,len)) 391 - return addr; 392 - } 393 - if (len > mm->cached_hole_size) { 394 - start_addr = addr = mm->free_area_cache; 395 - } else { 396 - start_addr = addr = TASK_UNMAPPED_BASE; 397 - mm->cached_hole_size = 0; 398 - } 399 - 400 - full_search: 401 - vma = find_vma(mm, addr); 402 - while (TASK_SIZE - len >= addr) { 403 - BUG_ON(vma && (addr >= vma->vm_end)); 404 - 405 - if (touches_hugepage_low_range(mm, addr, len)) { 406 - addr = ALIGN(addr+1, 1<<SID_SHIFT); 407 - vma = find_vma(mm, addr); 408 - continue; 409 - } 410 - if (touches_hugepage_high_range(mm, addr, len)) { 411 - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 412 - vma = find_vma(mm, addr); 413 - continue; 414 - } 415 - if (!vma || addr + len <= vma->vm_start) { 416 - /* 417 - * Remember the place where we stopped the search: 418 - */ 419 - mm->free_area_cache = addr + len; 420 - return addr; 421 - } 422 - if (addr + mm->cached_hole_size < vma->vm_start) 423 - mm->cached_hole_size = vma->vm_start - addr; 424 - addr = vma->vm_end; 425 - vma = vma->vm_next; 426 - } 427 - 428 - /* Make sure we didn't miss any holes */ 429 - if (start_addr != TASK_UNMAPPED_BASE) { 430 - start_addr = addr = TASK_UNMAPPED_BASE; 431 - mm->cached_hole_size = 0; 432 - goto full_search; 433 - } 434 - return -ENOMEM; 435 - } 436 - 437 - /* 438 - * This mmap-allocator allocates new areas top-down from below the 439 - * stack's low limit (the base): 440 - * 441 - * Because we have an exclusive hugepage region which lies within the 442 - * normal user address space, we have to take special measures to make 443 - * non-huge mmap()s evade the hugepage reserved regions. 444 - */ 445 - unsigned long 446 - arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, 447 - const unsigned long len, const unsigned long pgoff, 448 - const unsigned long flags) 449 - { 450 - struct vm_area_struct *vma, *prev_vma; 451 - struct mm_struct *mm = current->mm; 452 - unsigned long base = mm->mmap_base, addr = addr0; 453 - unsigned long largest_hole = mm->cached_hole_size; 454 - int first_time = 1; 455 - 456 - /* requested length too big for entire address space */ 457 - if (len > TASK_SIZE) 458 - return -ENOMEM; 459 - 460 - /* handle fixed mapping: prevent overlap with huge pages */ 461 - if (flags & MAP_FIXED) { 462 - if (is_hugepage_only_range(mm, addr, len)) 463 - return -EINVAL; 464 - return addr; 465 - } 466 - 467 - /* dont allow allocations above current base */ 468 - if (mm->free_area_cache > base) 469 - mm->free_area_cache = base; 470 - 471 - /* requesting a specific address */ 472 - if (addr) { 473 - addr = PAGE_ALIGN(addr); 474 - vma = find_vma(mm, addr); 475 - if (TASK_SIZE - len >= addr && 476 - (!vma || addr + len <= vma->vm_start) 477 - && !is_hugepage_only_range(mm, addr,len)) 478 - return addr; 479 - } 480 - 481 - if (len <= largest_hole) { 482 - largest_hole = 0; 483 - mm->free_area_cache = base; 484 - } 485 - try_again: 486 - /* make sure it can fit in the remaining address space */ 487 - if (mm->free_area_cache < len) 488 - goto fail; 489 - 490 - /* either no address requested or cant fit in requested address hole */ 491 - addr = (mm->free_area_cache - len) & PAGE_MASK; 492 - do { 493 - hugepage_recheck: 494 - if (touches_hugepage_low_range(mm, addr, len)) { 495 - addr = (addr & ((~0) << SID_SHIFT)) - len; 496 - goto hugepage_recheck; 497 - } else if (touches_hugepage_high_range(mm, addr, len)) { 498 - addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; 499 - goto hugepage_recheck; 500 - } 501 - 502 - /* 503 - * Lookup failure means no vma is above this address, 504 - * i.e. return with success: 505 - */ 506 - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) 507 - return addr; 508 - 509 - /* 510 - * new region fits between prev_vma->vm_end and 511 - * vma->vm_start, use it: 512 - */ 513 - if (addr+len <= vma->vm_start && 514 - (!prev_vma || (addr >= prev_vma->vm_end))) { 515 - /* remember the address as a hint for next time */ 516 - mm->cached_hole_size = largest_hole; 517 - return (mm->free_area_cache = addr); 518 - } else { 519 - /* pull free_area_cache down to the first hole */ 520 - if (mm->free_area_cache == vma->vm_end) { 521 - mm->free_area_cache = vma->vm_start; 522 - mm->cached_hole_size = largest_hole; 523 - } 524 - } 525 - 526 - /* remember the largest hole we saw so far */ 527 - if (addr + largest_hole < vma->vm_start) 528 - largest_hole = vma->vm_start - addr; 529 - 530 - /* try just below the current vma->vm_start */ 531 - addr = vma->vm_start-len; 532 - } while (len <= vma->vm_start); 533 - 534 - fail: 535 - /* 536 - * if hint left us with no space for the requested 537 - * mapping then try again: 538 - */ 539 - if (first_time) { 540 - mm->free_area_cache = base; 541 - largest_hole = 0; 542 - first_time = 0; 543 - goto try_again; 544 - } 545 - /* 546 - * A failed mmap() very likely causes application failure, 547 - * so fall back to the bottom-up function here. This scenario 548 - * can happen with large stack limits and large mmap() 549 - * allocations. 550 - */ 551 - mm->free_area_cache = TASK_UNMAPPED_BASE; 552 - mm->cached_hole_size = ~0UL; 553 - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 554 - /* 555 - * Restore the topdown base: 556 - */ 557 - mm->free_area_cache = base; 558 - mm->cached_hole_size = ~0UL; 559 - 560 - return addr; 561 - } 562 - 563 - static int htlb_check_hinted_area(unsigned long addr, unsigned long len) 564 - { 565 - struct vm_area_struct *vma; 566 - 567 - vma = find_vma(current->mm, addr); 568 - if (TASK_SIZE - len >= addr && 569 - (!vma || ((addr + len) <= vma->vm_start))) 570 - return 0; 571 - 572 - return -ENOMEM; 573 - } 574 - 575 - static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) 576 - { 577 - unsigned long addr = 0; 578 - struct vm_area_struct *vma; 579 - 580 - vma = find_vma(current->mm, addr); 581 - while (addr + len <= 0x100000000UL) { 582 - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 583 - 584 - if (! __within_hugepage_low_range(addr, len, segmask)) { 585 - addr = ALIGN(addr+1, 1<<SID_SHIFT); 586 - vma = find_vma(current->mm, addr); 587 - continue; 588 - } 589 - 590 - if (!vma || (addr + len) <= vma->vm_start) 591 - return addr; 592 - addr = ALIGN(vma->vm_end, HPAGE_SIZE); 593 - /* Depending on segmask this might not be a confirmed 594 - * hugepage region, so the ALIGN could have skipped 595 - * some VMAs */ 596 - vma = find_vma(current->mm, addr); 597 - } 598 - 599 - return -ENOMEM; 600 - } 601 - 602 - static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) 603 - { 604 - unsigned long addr = 0x100000000UL; 605 - struct vm_area_struct *vma; 606 - 607 - vma = find_vma(current->mm, addr); 608 - while (addr + len <= TASK_SIZE_USER64) { 609 - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ 610 - 611 - if (! __within_hugepage_high_range(addr, len, areamask)) { 612 - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); 613 - vma = find_vma(current->mm, addr); 614 - continue; 615 - } 616 - 617 - if (!vma || (addr + len) <= vma->vm_start) 618 - return addr; 619 - addr = ALIGN(vma->vm_end, HPAGE_SIZE); 620 - /* Depending on segmask this might not be a confirmed 621 - * hugepage region, so the ALIGN could have skipped 622 - * some VMAs */ 623 - vma = find_vma(current->mm, addr); 624 - } 625 - 626 - return -ENOMEM; 627 - } 628 554 629 555 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 630 556 unsigned long len, unsigned long pgoff, 631 557 unsigned long flags) 632 558 { 633 - int lastshift; 634 - u16 areamask, curareas; 635 - 636 - if (HPAGE_SHIFT == 0) 637 - return -EINVAL; 638 - if (len & ~HPAGE_MASK) 639 - return -EINVAL; 640 - if (len > TASK_SIZE) 641 - return -ENOMEM; 642 - 643 - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 644 - return -EINVAL; 645 - 646 - /* Paranoia, caller should have dealt with this */ 647 - BUG_ON((addr + len) < addr); 648 - 649 - /* Handle MAP_FIXED */ 650 - if (flags & MAP_FIXED) { 651 - if (prepare_hugepage_range(addr, len, pgoff)) 652 - return -EINVAL; 653 - return addr; 654 - } 655 - 656 - if (test_thread_flag(TIF_32BIT)) { 657 - curareas = current->mm->context.low_htlb_areas; 658 - 659 - /* First see if we can use the hint address */ 660 - if (addr && (htlb_check_hinted_area(addr, len) == 0)) { 661 - areamask = LOW_ESID_MASK(addr, len); 662 - if (open_low_hpage_areas(current->mm, areamask) == 0) 663 - return addr; 664 - } 665 - 666 - /* Next see if we can map in the existing low areas */ 667 - addr = htlb_get_low_area(len, curareas); 668 - if (addr != -ENOMEM) 669 - return addr; 670 - 671 - /* Finally go looking for areas to open */ 672 - lastshift = 0; 673 - for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); 674 - ! lastshift; areamask >>=1) { 675 - if (areamask & 1) 676 - lastshift = 1; 677 - 678 - addr = htlb_get_low_area(len, curareas | areamask); 679 - if ((addr != -ENOMEM) 680 - && open_low_hpage_areas(current->mm, areamask) == 0) 681 - return addr; 682 - } 683 - } else { 684 - curareas = current->mm->context.high_htlb_areas; 685 - 686 - /* First see if we can use the hint address */ 687 - /* We discourage 64-bit processes from doing hugepage 688 - * mappings below 4GB (must use MAP_FIXED) */ 689 - if ((addr >= 0x100000000UL) 690 - && (htlb_check_hinted_area(addr, len) == 0)) { 691 - areamask = HTLB_AREA_MASK(addr, len); 692 - if (open_high_hpage_areas(current->mm, areamask) == 0) 693 - return addr; 694 - } 695 - 696 - /* Next see if we can map in the existing high areas */ 697 - addr = htlb_get_high_area(len, curareas); 698 - if (addr != -ENOMEM) 699 - return addr; 700 - 701 - /* Finally go looking for areas to open */ 702 - lastshift = 0; 703 - for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); 704 - ! lastshift; areamask >>=1) { 705 - if (areamask & 1) 706 - lastshift = 1; 707 - 708 - addr = htlb_get_high_area(len, curareas | areamask); 709 - if ((addr != -ENOMEM) 710 - && open_high_hpage_areas(current->mm, areamask) == 0) 711 - return addr; 712 - } 713 - } 714 - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" 715 - " enough areas\n"); 716 - return -ENOMEM; 559 + return slice_get_unmapped_area(addr, len, flags, 560 + mmu_huge_psize, 1, 0); 717 561 } 718 562 719 563 /*

+6 -11

arch/powerpc/mm/init_64.c

··· 146 146 memset(addr, 0, kmem_cache_size(cache)); 147 147 } 148 148 149 - #ifdef CONFIG_PPC_64K_PAGES 150 - static const unsigned int pgtable_cache_size[3] = { 151 - PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE 152 - }; 153 - static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 154 - "pte_pmd_cache", "pmd_cache", "pgd_cache", 155 - }; 156 - #else 157 149 static const unsigned int pgtable_cache_size[2] = { 158 - PTE_TABLE_SIZE, PMD_TABLE_SIZE 150 + PGD_TABLE_SIZE, PMD_TABLE_SIZE 159 151 }; 160 152 static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 161 - "pgd_pte_cache", "pud_pmd_cache", 162 - }; 153 + #ifdef CONFIG_PPC_64K_PAGES 154 + "pgd_cache", "pmd_cache", 155 + #else 156 + "pgd_cache", "pud_pmd_cache", 163 157 #endif /* CONFIG_PPC_64K_PAGES */ 158 + }; 164 159 165 160 #ifdef CONFIG_HUGETLB_PAGE 166 161 /* Hugepages need one extra cache, initialized in hugetlbpage.c. We

+25

arch/powerpc/mm/mem.c

··· 31 31 #include <linux/highmem.h> 32 32 #include <linux/initrd.h> 33 33 #include <linux/pagemap.h> 34 + #include <linux/suspend.h> 34 35 35 36 #include <asm/pgalloc.h> 36 37 #include <asm/prom.h> ··· 277 276 init_bootmem_done = 1; 278 277 } 279 278 279 + /* mark pages that don't exist as nosave */ 280 + static int __init mark_nonram_nosave(void) 281 + { 282 + unsigned long lmb_next_region_start_pfn, 283 + lmb_region_max_pfn; 284 + int i; 285 + 286 + for (i = 0; i < lmb.memory.cnt - 1; i++) { 287 + lmb_region_max_pfn = 288 + (lmb.memory.region[i].base >> PAGE_SHIFT) + 289 + (lmb.memory.region[i].size >> PAGE_SHIFT); 290 + lmb_next_region_start_pfn = 291 + lmb.memory.region[i+1].base >> PAGE_SHIFT; 292 + 293 + if (lmb_region_max_pfn < lmb_next_region_start_pfn) 294 + register_nosave_region(lmb_region_max_pfn, 295 + lmb_next_region_start_pfn); 296 + } 297 + 298 + return 0; 299 + } 300 + 280 301 /* 281 302 * paging_init() sets up the page tables - in fact we've already done this. 282 303 */ ··· 330 307 max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; 331 308 #endif 332 309 free_area_init_nodes(max_zone_pfns); 310 + 311 + mark_nonram_nosave(); 333 312 } 334 313 #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ 335 314

+10

arch/powerpc/mm/mmu_context_64.c

··· 28 28 { 29 29 int index; 30 30 int err; 31 + int new_context = (mm->context.id == 0); 31 32 32 33 again: 33 34 if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) ··· 51 50 } 52 51 53 52 mm->context.id = index; 53 + #ifdef CONFIG_PPC_MM_SLICES 54 + /* The old code would re-promote on fork, we don't do that 55 + * when using slices as it could cause problem promoting slices 56 + * that have been forced down to 4K 57 + */ 58 + if (new_context) 59 + slice_set_user_psize(mm, mmu_virtual_psize); 60 + #else 54 61 mm->context.user_psize = mmu_virtual_psize; 55 62 mm->context.sllp = SLB_VSID_USER | 56 63 mmu_psize_defs[mmu_virtual_psize].sllp; 64 + #endif 57 65 58 66 return 0; 59 67 }

+1 -1

arch/powerpc/mm/ppc_mmu_32.c

··· 185 185 186 186 if (Hash == 0) 187 187 return; 188 - pmd = pmd_offset(pgd_offset(mm, ea), ea); 188 + pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); 189 189 if (!pmd_none(*pmd)) 190 190 add_hash_page(mm->context.id, ea, pmd_val(*pmd)); 191 191 }

-11

arch/powerpc/mm/slb.c

··· 198 198 static int slb_encoding_inited; 199 199 extern unsigned int *slb_miss_kernel_load_linear; 200 200 extern unsigned int *slb_miss_kernel_load_io; 201 - #ifdef CONFIG_HUGETLB_PAGE 202 - extern unsigned int *slb_miss_user_load_huge; 203 - unsigned long huge_llp; 204 - 205 - huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; 206 - #endif 207 201 208 202 /* Prepare our SLB miss handler based on our page size */ 209 203 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; ··· 214 220 215 221 DBG("SLB: linear LLP = %04x\n", linear_llp); 216 222 DBG("SLB: io LLP = %04x\n", io_llp); 217 - #ifdef CONFIG_HUGETLB_PAGE 218 - patch_slb_encoding(slb_miss_user_load_huge, 219 - SLB_VSID_USER | huge_llp); 220 - DBG("SLB: huge LLP = %04x\n", huge_llp); 221 - #endif 222 223 } 223 224 224 225 get_paca()->stab_rr = SLB_NUM_BOLTED;

+33 -19

arch/powerpc/mm/slb_low.S

··· 82 82 srdi. r9,r10,USER_ESID_BITS 83 83 bne- 8f /* invalid ea bits set */ 84 84 85 - /* Figure out if the segment contains huge pages */ 86 - #ifdef CONFIG_HUGETLB_PAGE 87 - BEGIN_FTR_SECTION 88 - b 1f 89 - END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) 85 + 86 + /* when using slices, we extract the psize off the slice bitmaps 87 + * and then we need to get the sllp encoding off the mmu_psize_defs 88 + * array. 89 + * 90 + * XXX This is a bit inefficient especially for the normal case, 91 + * so we should try to implement a fast path for the standard page 92 + * size using the old sllp value so we avoid the array. We cannot 93 + * really do dynamic patching unfortunately as processes might flip 94 + * between 4k and 64k standard page size 95 + */ 96 + #ifdef CONFIG_PPC_MM_SLICES 90 97 cmpldi r10,16 91 98 92 - lhz r9,PACALOWHTLBAREAS(r13) 93 - mr r11,r10 99 + /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ 100 + ld r9,PACALOWSLICESPSIZE(r13) 101 + sldi r11,r10,2 94 102 blt 5f 103 + ld r9,PACAHIGHSLICEPSIZE(r13) 104 + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) 105 + andi. r11,r11,0x3c 95 106 96 - lhz r9,PACAHIGHHTLBAREAS(r13) 97 - srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) 107 + 5: /* Extract the psize and multiply to get an array offset */ 108 + srd r9,r9,r11 109 + andi. r9,r9,0xf 110 + mulli r9,r9,MMUPSIZEDEFSIZE 98 111 99 - 5: srd r9,r9,r11 100 - andi. r9,r9,1 101 - beq 1f 102 - _GLOBAL(slb_miss_user_load_huge) 103 - li r11,0 104 - b 2f 105 - 1: 106 - #endif /* CONFIG_HUGETLB_PAGE */ 107 - 112 + /* Now get to the array and obtain the sllp 113 + */ 114 + ld r11,PACATOC(r13) 115 + ld r11,mmu_psize_defs@got(r11) 116 + add r11,r11,r9 117 + ld r11,MMUPSIZESLLP(r11) 118 + ori r11,r11,SLB_VSID_USER 119 + #else 120 + /* paca context sllp already contains the SLB_VSID_USER bits */ 108 121 lhz r11,PACACONTEXTSLLP(r13) 109 - 2: 122 + #endif /* CONFIG_PPC_MM_SLICES */ 123 + 110 124 ld r9,PACACONTEXTID(r13) 111 125 rldimi r10,r9,USER_ESID_BITS,0 112 126 b slb_finish_load

+633

arch/powerpc/mm/slice.c

··· 1 + /* 2 + * address space "slices" (meta-segments) support 3 + * 4 + * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. 5 + * 6 + * Based on hugetlb implementation 7 + * 8 + * Copyright (C) 2003 David Gibson, IBM Corporation. 9 + * 10 + * This program is free software; you can redistribute it and/or modify 11 + * it under the terms of the GNU General Public License as published by 12 + * the Free Software Foundation; either version 2 of the License, or 13 + * (at your option) any later version. 14 + * 15 + * This program is distributed in the hope that it will be useful, 16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 + * GNU General Public License for more details. 19 + * 20 + * You should have received a copy of the GNU General Public License 21 + * along with this program; if not, write to the Free Software 22 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 + */ 24 + 25 + #undef DEBUG 26 + 27 + #include <linux/kernel.h> 28 + #include <linux/mm.h> 29 + #include <linux/pagemap.h> 30 + #include <linux/err.h> 31 + #include <linux/spinlock.h> 32 + #include <linux/module.h> 33 + #include <asm/mman.h> 34 + #include <asm/mmu.h> 35 + #include <asm/spu.h> 36 + 37 + static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; 38 + 39 + 40 + #ifdef DEBUG 41 + int _slice_debug = 1; 42 + 43 + static void slice_print_mask(const char *label, struct slice_mask mask) 44 + { 45 + char *p, buf[16 + 3 + 16 + 1]; 46 + int i; 47 + 48 + if (!_slice_debug) 49 + return; 50 + p = buf; 51 + for (i = 0; i < SLICE_NUM_LOW; i++) 52 + *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; 53 + *(p++) = ' '; 54 + *(p++) = '-'; 55 + *(p++) = ' '; 56 + for (i = 0; i < SLICE_NUM_HIGH; i++) 57 + *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; 58 + *(p++) = 0; 59 + 60 + printk(KERN_DEBUG "%s:%s\n", label, buf); 61 + } 62 + 63 + #define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) 64 + 65 + #else 66 + 67 + static void slice_print_mask(const char *label, struct slice_mask mask) {} 68 + #define slice_dbg(fmt...) 69 + 70 + #endif 71 + 72 + static struct slice_mask slice_range_to_mask(unsigned long start, 73 + unsigned long len) 74 + { 75 + unsigned long end = start + len - 1; 76 + struct slice_mask ret = { 0, 0 }; 77 + 78 + if (start < SLICE_LOW_TOP) { 79 + unsigned long mend = min(end, SLICE_LOW_TOP); 80 + unsigned long mstart = min(start, SLICE_LOW_TOP); 81 + 82 + ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) 83 + - (1u << GET_LOW_SLICE_INDEX(mstart)); 84 + } 85 + 86 + if ((start + len) > SLICE_LOW_TOP) 87 + ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) 88 + - (1u << GET_HIGH_SLICE_INDEX(start)); 89 + 90 + return ret; 91 + } 92 + 93 + static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, 94 + unsigned long len) 95 + { 96 + struct vm_area_struct *vma; 97 + 98 + if ((mm->task_size - len) < addr) 99 + return 0; 100 + vma = find_vma(mm, addr); 101 + return (!vma || (addr + len) <= vma->vm_start); 102 + } 103 + 104 + static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) 105 + { 106 + return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, 107 + 1ul << SLICE_LOW_SHIFT); 108 + } 109 + 110 + static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) 111 + { 112 + unsigned long start = slice << SLICE_HIGH_SHIFT; 113 + unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); 114 + 115 + /* Hack, so that each addresses is controlled by exactly one 116 + * of the high or low area bitmaps, the first high area starts 117 + * at 4GB, not 0 */ 118 + if (start == 0) 119 + start = SLICE_LOW_TOP; 120 + 121 + return !slice_area_is_free(mm, start, end - start); 122 + } 123 + 124 + static struct slice_mask slice_mask_for_free(struct mm_struct *mm) 125 + { 126 + struct slice_mask ret = { 0, 0 }; 127 + unsigned long i; 128 + 129 + for (i = 0; i < SLICE_NUM_LOW; i++) 130 + if (!slice_low_has_vma(mm, i)) 131 + ret.low_slices |= 1u << i; 132 + 133 + if (mm->task_size <= SLICE_LOW_TOP) 134 + return ret; 135 + 136 + for (i = 0; i < SLICE_NUM_HIGH; i++) 137 + if (!slice_high_has_vma(mm, i)) 138 + ret.high_slices |= 1u << i; 139 + 140 + return ret; 141 + } 142 + 143 + static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) 144 + { 145 + struct slice_mask ret = { 0, 0 }; 146 + unsigned long i; 147 + u64 psizes; 148 + 149 + psizes = mm->context.low_slices_psize; 150 + for (i = 0; i < SLICE_NUM_LOW; i++) 151 + if (((psizes >> (i * 4)) & 0xf) == psize) 152 + ret.low_slices |= 1u << i; 153 + 154 + psizes = mm->context.high_slices_psize; 155 + for (i = 0; i < SLICE_NUM_HIGH; i++) 156 + if (((psizes >> (i * 4)) & 0xf) == psize) 157 + ret.high_slices |= 1u << i; 158 + 159 + return ret; 160 + } 161 + 162 + static int slice_check_fit(struct slice_mask mask, struct slice_mask available) 163 + { 164 + return (mask.low_slices & available.low_slices) == mask.low_slices && 165 + (mask.high_slices & available.high_slices) == mask.high_slices; 166 + } 167 + 168 + static void slice_flush_segments(void *parm) 169 + { 170 + struct mm_struct *mm = parm; 171 + unsigned long flags; 172 + 173 + if (mm != current->active_mm) 174 + return; 175 + 176 + /* update the paca copy of the context struct */ 177 + get_paca()->context = current->active_mm->context; 178 + 179 + local_irq_save(flags); 180 + slb_flush_and_rebolt(); 181 + local_irq_restore(flags); 182 + } 183 + 184 + static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) 185 + { 186 + /* Write the new slice psize bits */ 187 + u64 lpsizes, hpsizes; 188 + unsigned long i, flags; 189 + 190 + slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); 191 + slice_print_mask(" mask", mask); 192 + 193 + /* We need to use a spinlock here to protect against 194 + * concurrent 64k -> 4k demotion ... 195 + */ 196 + spin_lock_irqsave(&slice_convert_lock, flags); 197 + 198 + lpsizes = mm->context.low_slices_psize; 199 + for (i = 0; i < SLICE_NUM_LOW; i++) 200 + if (mask.low_slices & (1u << i)) 201 + lpsizes = (lpsizes & ~(0xful << (i * 4))) | 202 + (((unsigned long)psize) << (i * 4)); 203 + 204 + hpsizes = mm->context.high_slices_psize; 205 + for (i = 0; i < SLICE_NUM_HIGH; i++) 206 + if (mask.high_slices & (1u << i)) 207 + hpsizes = (hpsizes & ~(0xful << (i * 4))) | 208 + (((unsigned long)psize) << (i * 4)); 209 + 210 + mm->context.low_slices_psize = lpsizes; 211 + mm->context.high_slices_psize = hpsizes; 212 + 213 + slice_dbg(" lsps=%lx, hsps=%lx\n", 214 + mm->context.low_slices_psize, 215 + mm->context.high_slices_psize); 216 + 217 + spin_unlock_irqrestore(&slice_convert_lock, flags); 218 + mb(); 219 + 220 + /* XXX this is sub-optimal but will do for now */ 221 + on_each_cpu(slice_flush_segments, mm, 0, 1); 222 + #ifdef CONFIG_SPU_BASE 223 + spu_flush_all_slbs(mm); 224 + #endif 225 + } 226 + 227 + static unsigned long slice_find_area_bottomup(struct mm_struct *mm, 228 + unsigned long len, 229 + struct slice_mask available, 230 + int psize, int use_cache) 231 + { 232 + struct vm_area_struct *vma; 233 + unsigned long start_addr, addr; 234 + struct slice_mask mask; 235 + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 236 + 237 + if (use_cache) { 238 + if (len <= mm->cached_hole_size) { 239 + start_addr = addr = TASK_UNMAPPED_BASE; 240 + mm->cached_hole_size = 0; 241 + } else 242 + start_addr = addr = mm->free_area_cache; 243 + } else 244 + start_addr = addr = TASK_UNMAPPED_BASE; 245 + 246 + full_search: 247 + for (;;) { 248 + addr = _ALIGN_UP(addr, 1ul << pshift); 249 + if ((TASK_SIZE - len) < addr) 250 + break; 251 + vma = find_vma(mm, addr); 252 + BUG_ON(vma && (addr >= vma->vm_end)); 253 + 254 + mask = slice_range_to_mask(addr, len); 255 + if (!slice_check_fit(mask, available)) { 256 + if (addr < SLICE_LOW_TOP) 257 + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); 258 + else 259 + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); 260 + continue; 261 + } 262 + if (!vma || addr + len <= vma->vm_start) { 263 + /* 264 + * Remember the place where we stopped the search: 265 + */ 266 + if (use_cache) 267 + mm->free_area_cache = addr + len; 268 + return addr; 269 + } 270 + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) 271 + mm->cached_hole_size = vma->vm_start - addr; 272 + addr = vma->vm_end; 273 + } 274 + 275 + /* Make sure we didn't miss any holes */ 276 + if (use_cache && start_addr != TASK_UNMAPPED_BASE) { 277 + start_addr = addr = TASK_UNMAPPED_BASE; 278 + mm->cached_hole_size = 0; 279 + goto full_search; 280 + } 281 + return -ENOMEM; 282 + } 283 + 284 + static unsigned long slice_find_area_topdown(struct mm_struct *mm, 285 + unsigned long len, 286 + struct slice_mask available, 287 + int psize, int use_cache) 288 + { 289 + struct vm_area_struct *vma; 290 + unsigned long addr; 291 + struct slice_mask mask; 292 + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 293 + 294 + /* check if free_area_cache is useful for us */ 295 + if (use_cache) { 296 + if (len <= mm->cached_hole_size) { 297 + mm->cached_hole_size = 0; 298 + mm->free_area_cache = mm->mmap_base; 299 + } 300 + 301 + /* either no address requested or can't fit in requested 302 + * address hole 303 + */ 304 + addr = mm->free_area_cache; 305 + 306 + /* make sure it can fit in the remaining address space */ 307 + if (addr > len) { 308 + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); 309 + mask = slice_range_to_mask(addr, len); 310 + if (slice_check_fit(mask, available) && 311 + slice_area_is_free(mm, addr, len)) 312 + /* remember the address as a hint for 313 + * next time 314 + */ 315 + return (mm->free_area_cache = addr); 316 + } 317 + } 318 + 319 + addr = mm->mmap_base; 320 + while (addr > len) { 321 + /* Go down by chunk size */ 322 + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); 323 + 324 + /* Check for hit with different page size */ 325 + mask = slice_range_to_mask(addr, len); 326 + if (!slice_check_fit(mask, available)) { 327 + if (addr < SLICE_LOW_TOP) 328 + addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); 329 + else if (addr < (1ul << SLICE_HIGH_SHIFT)) 330 + addr = SLICE_LOW_TOP; 331 + else 332 + addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); 333 + continue; 334 + } 335 + 336 + /* 337 + * Lookup failure means no vma is above this address, 338 + * else if new region fits below vma->vm_start, 339 + * return with success: 340 + */ 341 + vma = find_vma(mm, addr); 342 + if (!vma || (addr + len) <= vma->vm_start) { 343 + /* remember the address as a hint for next time */ 344 + if (use_cache) 345 + mm->free_area_cache = addr; 346 + return addr; 347 + } 348 + 349 + /* remember the largest hole we saw so far */ 350 + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) 351 + mm->cached_hole_size = vma->vm_start - addr; 352 + 353 + /* try just below the current vma->vm_start */ 354 + addr = vma->vm_start; 355 + } 356 + 357 + /* 358 + * A failed mmap() very likely causes application failure, 359 + * so fall back to the bottom-up function here. This scenario 360 + * can happen with large stack limits and large mmap() 361 + * allocations. 362 + */ 363 + addr = slice_find_area_bottomup(mm, len, available, psize, 0); 364 + 365 + /* 366 + * Restore the topdown base: 367 + */ 368 + if (use_cache) { 369 + mm->free_area_cache = mm->mmap_base; 370 + mm->cached_hole_size = ~0UL; 371 + } 372 + 373 + return addr; 374 + } 375 + 376 + 377 + static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, 378 + struct slice_mask mask, int psize, 379 + int topdown, int use_cache) 380 + { 381 + if (topdown) 382 + return slice_find_area_topdown(mm, len, mask, psize, use_cache); 383 + else 384 + return slice_find_area_bottomup(mm, len, mask, psize, use_cache); 385 + } 386 + 387 + unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, 388 + unsigned long flags, unsigned int psize, 389 + int topdown, int use_cache) 390 + { 391 + struct slice_mask mask; 392 + struct slice_mask good_mask; 393 + struct slice_mask potential_mask = {0,0} /* silence stupid warning */; 394 + int pmask_set = 0; 395 + int fixed = (flags & MAP_FIXED); 396 + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 397 + struct mm_struct *mm = current->mm; 398 + 399 + /* Sanity checks */ 400 + BUG_ON(mm->task_size == 0); 401 + 402 + slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); 403 + slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", 404 + addr, len, flags, topdown, use_cache); 405 + 406 + if (len > mm->task_size) 407 + return -ENOMEM; 408 + if (fixed && (addr & ((1ul << pshift) - 1))) 409 + return -EINVAL; 410 + if (fixed && addr > (mm->task_size - len)) 411 + return -EINVAL; 412 + 413 + /* If hint, make sure it matches our alignment restrictions */ 414 + if (!fixed && addr) { 415 + addr = _ALIGN_UP(addr, 1ul << pshift); 416 + slice_dbg(" aligned addr=%lx\n", addr); 417 + } 418 + 419 + /* First makeup a "good" mask of slices that have the right size 420 + * already 421 + */ 422 + good_mask = slice_mask_for_size(mm, psize); 423 + slice_print_mask(" good_mask", good_mask); 424 + 425 + /* First check hint if it's valid or if we have MAP_FIXED */ 426 + if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { 427 + 428 + /* Don't bother with hint if it overlaps a VMA */ 429 + if (!fixed && !slice_area_is_free(mm, addr, len)) 430 + goto search; 431 + 432 + /* Build a mask for the requested range */ 433 + mask = slice_range_to_mask(addr, len); 434 + slice_print_mask(" mask", mask); 435 + 436 + /* Check if we fit in the good mask. If we do, we just return, 437 + * nothing else to do 438 + */ 439 + if (slice_check_fit(mask, good_mask)) { 440 + slice_dbg(" fits good !\n"); 441 + return addr; 442 + } 443 + 444 + /* We don't fit in the good mask, check what other slices are 445 + * empty and thus can be converted 446 + */ 447 + potential_mask = slice_mask_for_free(mm); 448 + potential_mask.low_slices |= good_mask.low_slices; 449 + potential_mask.high_slices |= good_mask.high_slices; 450 + pmask_set = 1; 451 + slice_print_mask(" potential", potential_mask); 452 + if (slice_check_fit(mask, potential_mask)) { 453 + slice_dbg(" fits potential !\n"); 454 + goto convert; 455 + } 456 + } 457 + 458 + /* If we have MAP_FIXED and failed the above step, then error out */ 459 + if (fixed) 460 + return -EBUSY; 461 + 462 + search: 463 + slice_dbg(" search...\n"); 464 + 465 + /* Now let's see if we can find something in the existing slices 466 + * for that size 467 + */ 468 + addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); 469 + if (addr != -ENOMEM) { 470 + /* Found within the good mask, we don't have to setup, 471 + * we thus return directly 472 + */ 473 + slice_dbg(" found area at 0x%lx\n", addr); 474 + return addr; 475 + } 476 + 477 + /* Won't fit, check what can be converted */ 478 + if (!pmask_set) { 479 + potential_mask = slice_mask_for_free(mm); 480 + potential_mask.low_slices |= good_mask.low_slices; 481 + potential_mask.high_slices |= good_mask.high_slices; 482 + pmask_set = 1; 483 + slice_print_mask(" potential", potential_mask); 484 + } 485 + 486 + /* Now let's see if we can find something in the existing slices 487 + * for that size 488 + */ 489 + addr = slice_find_area(mm, len, potential_mask, psize, topdown, 490 + use_cache); 491 + if (addr == -ENOMEM) 492 + return -ENOMEM; 493 + 494 + mask = slice_range_to_mask(addr, len); 495 + slice_dbg(" found potential area at 0x%lx\n", addr); 496 + slice_print_mask(" mask", mask); 497 + 498 + convert: 499 + slice_convert(mm, mask, psize); 500 + return addr; 501 + 502 + } 503 + EXPORT_SYMBOL_GPL(slice_get_unmapped_area); 504 + 505 + unsigned long arch_get_unmapped_area(struct file *filp, 506 + unsigned long addr, 507 + unsigned long len, 508 + unsigned long pgoff, 509 + unsigned long flags) 510 + { 511 + return slice_get_unmapped_area(addr, len, flags, 512 + current->mm->context.user_psize, 513 + 0, 1); 514 + } 515 + 516 + unsigned long arch_get_unmapped_area_topdown(struct file *filp, 517 + const unsigned long addr0, 518 + const unsigned long len, 519 + const unsigned long pgoff, 520 + const unsigned long flags) 521 + { 522 + return slice_get_unmapped_area(addr0, len, flags, 523 + current->mm->context.user_psize, 524 + 1, 1); 525 + } 526 + 527 + unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) 528 + { 529 + u64 psizes; 530 + int index; 531 + 532 + if (addr < SLICE_LOW_TOP) { 533 + psizes = mm->context.low_slices_psize; 534 + index = GET_LOW_SLICE_INDEX(addr); 535 + } else { 536 + psizes = mm->context.high_slices_psize; 537 + index = GET_HIGH_SLICE_INDEX(addr); 538 + } 539 + 540 + return (psizes >> (index * 4)) & 0xf; 541 + } 542 + EXPORT_SYMBOL_GPL(get_slice_psize); 543 + 544 + /* 545 + * This is called by hash_page when it needs to do a lazy conversion of 546 + * an address space from real 64K pages to combo 4K pages (typically 547 + * when hitting a non cacheable mapping on a processor or hypervisor 548 + * that won't allow them for 64K pages). 549 + * 550 + * This is also called in init_new_context() to change back the user 551 + * psize from whatever the parent context had it set to 552 + * 553 + * This function will only change the content of the {low,high)_slice_psize 554 + * masks, it will not flush SLBs as this shall be handled lazily by the 555 + * caller. 556 + */ 557 + void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) 558 + { 559 + unsigned long flags, lpsizes, hpsizes; 560 + unsigned int old_psize; 561 + int i; 562 + 563 + slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); 564 + 565 + spin_lock_irqsave(&slice_convert_lock, flags); 566 + 567 + old_psize = mm->context.user_psize; 568 + slice_dbg(" old_psize=%d\n", old_psize); 569 + if (old_psize == psize) 570 + goto bail; 571 + 572 + mm->context.user_psize = psize; 573 + wmb(); 574 + 575 + lpsizes = mm->context.low_slices_psize; 576 + for (i = 0; i < SLICE_NUM_LOW; i++) 577 + if (((lpsizes >> (i * 4)) & 0xf) == old_psize) 578 + lpsizes = (lpsizes & ~(0xful << (i * 4))) | 579 + (((unsigned long)psize) << (i * 4)); 580 + 581 + hpsizes = mm->context.high_slices_psize; 582 + for (i = 0; i < SLICE_NUM_HIGH; i++) 583 + if (((hpsizes >> (i * 4)) & 0xf) == old_psize) 584 + hpsizes = (hpsizes & ~(0xful << (i * 4))) | 585 + (((unsigned long)psize) << (i * 4)); 586 + 587 + mm->context.low_slices_psize = lpsizes; 588 + mm->context.high_slices_psize = hpsizes; 589 + 590 + slice_dbg(" lsps=%lx, hsps=%lx\n", 591 + mm->context.low_slices_psize, 592 + mm->context.high_slices_psize); 593 + 594 + bail: 595 + spin_unlock_irqrestore(&slice_convert_lock, flags); 596 + } 597 + 598 + /* 599 + * is_hugepage_only_range() is used by generic code to verify wether 600 + * a normal mmap mapping (non hugetlbfs) is valid on a given area. 601 + * 602 + * until the generic code provides a more generic hook and/or starts 603 + * calling arch get_unmapped_area for MAP_FIXED (which our implementation 604 + * here knows how to deal with), we hijack it to keep standard mappings 605 + * away from us. 606 + * 607 + * because of that generic code limitation, MAP_FIXED mapping cannot 608 + * "convert" back a slice with no VMAs to the standard page size, only 609 + * get_unmapped_area() can. It would be possible to fix it here but I 610 + * prefer working on fixing the generic code instead. 611 + * 612 + * WARNING: This will not work if hugetlbfs isn't enabled since the 613 + * generic code will redefine that function as 0 in that. This is ok 614 + * for now as we only use slices with hugetlbfs enabled. This should 615 + * be fixed as the generic code gets fixed. 616 + */ 617 + int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 618 + unsigned long len) 619 + { 620 + struct slice_mask mask, available; 621 + 622 + mask = slice_range_to_mask(addr, len); 623 + available = slice_mask_for_size(mm, mm->context.user_psize); 624 + 625 + #if 0 /* too verbose */ 626 + slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", 627 + mm, addr, len); 628 + slice_print_mask(" mask", mask); 629 + slice_print_mask(" available", available); 630 + #endif 631 + return !slice_check_fit(mask, available); 632 + } 633 +

+2 -2

arch/powerpc/mm/tlb_32.c

··· 111 111 if (start >= end) 112 112 return; 113 113 end = (end - 1) | ~PAGE_MASK; 114 - pmd = pmd_offset(pgd_offset(mm, start), start); 114 + pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); 115 115 for (;;) { 116 116 pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; 117 117 if (pmd_end > end) ··· 169 169 return; 170 170 } 171 171 mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; 172 - pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); 172 + pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); 173 173 if (!pmd_none(*pmd)) 174 174 flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); 175 175 FINISH_FLUSH;

+9 -3

arch/powerpc/mm/tlb_64.c

··· 143 143 */ 144 144 addr &= PAGE_MASK; 145 145 146 - /* Get page size (maybe move back to caller) */ 146 + /* Get page size (maybe move back to caller). 147 + * 148 + * NOTE: when using special 64K mappings in 4K environment like 149 + * for SPEs, we obtain the page size from the slice, which thus 150 + * must still exist (and thus the VMA not reused) at the time 151 + * of this call 152 + */ 147 153 if (huge) { 148 154 #ifdef CONFIG_HUGETLB_PAGE 149 155 psize = mmu_huge_psize; 150 156 #else 151 157 BUG(); 152 - psize = pte_pagesize_index(pte); /* shutup gcc */ 158 + psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ 153 159 #endif 154 160 } else 155 - psize = pte_pagesize_index(pte); 161 + psize = pte_pagesize_index(mm, addr, pte); 156 162 157 163 /* Build full vaddr */ 158 164 if (!is_kernel_addr(addr)) {

+1 -1

arch/powerpc/platforms/86xx/mpc86xx_smp.c

··· 15 15 #include <linux/init.h> 16 16 #include <linux/delay.h> 17 17 18 - #include <asm/pgtable.h> 19 18 #include <asm/page.h> 19 + #include <asm/pgtable.h> 20 20 #include <asm/pci-bridge.h> 21 21 #include <asm-powerpc/mpic.h> 22 22 #include <asm/mpc86xx.h>

+15

arch/powerpc/platforms/cell/Kconfig

··· 35 35 Units on machines implementing the Broadband Processor 36 36 Architecture. 37 37 38 + config SPU_FS_64K_LS 39 + bool "Use 64K pages to map SPE local store" 40 + # we depend on PPC_MM_SLICES for now rather than selecting 41 + # it because we depend on hugetlbfs hooks being present. We 42 + # will fix that when the generic code has been improved to 43 + # not require hijacking hugetlbfs hooks. 44 + depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES 45 + default y 46 + select PPC_HAS_HASH_64K 47 + help 48 + This option causes SPE local stores to be mapped in process 49 + address spaces using 64K pages while the rest of the kernel 50 + uses 4K pages. This can improve performances of applications 51 + using multiple SPEs by lowering the TLB pressure on them. 52 + 38 53 config SPU_BASE 39 54 bool 40 55 default n

+4 -5

arch/powerpc/platforms/cell/spu_base.c

··· 144 144 145 145 switch(REGION_ID(ea)) { 146 146 case USER_REGION_ID: 147 - #ifdef CONFIG_HUGETLB_PAGE 148 - if (in_hugepage_area(mm->context, ea)) 149 - psize = mmu_huge_psize; 150 - else 147 + #ifdef CONFIG_PPC_MM_SLICES 148 + psize = get_slice_psize(mm, ea); 149 + #else 150 + psize = mm->context.user_psize; 151 151 #endif 152 - psize = mm->context.user_psize; 153 152 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | 154 153 SLB_VSID_USER; 155 154 break;

+1 -1

arch/powerpc/platforms/cell/spufs/Makefile

··· 1 - obj-y += switch.o fault.o 1 + obj-y += switch.o fault.o lscsa_alloc.o 2 2 3 3 obj-$(CONFIG_SPU_FS) += spufs.o 4 4 spufs-y += inode.o file.o context.o syscalls.o coredump.o

+1 -3

arch/powerpc/platforms/cell/spufs/context.c

··· 36 36 /* Binding to physical processor deferred 37 37 * until spu_activate(). 38 38 */ 39 - spu_init_csa(&ctx->csa); 40 - if (!ctx->csa.lscsa) { 39 + if (spu_init_csa(&ctx->csa)) 41 40 goto out_free; 42 - } 43 41 spin_lock_init(&ctx->mmio_lock); 44 42 spin_lock_init(&ctx->mapping_lock); 45 43 kref_init(&ctx->kref);

+65 -11

arch/powerpc/platforms/cell/spufs/file.c

··· 118 118 static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, 119 119 unsigned long address) 120 120 { 121 - struct spu_context *ctx = vma->vm_file->private_data; 122 - unsigned long pfn, offset = address - vma->vm_start; 121 + struct spu_context *ctx = vma->vm_file->private_data; 122 + unsigned long pfn, offset, addr0 = address; 123 + #ifdef CONFIG_SPU_FS_64K_LS 124 + struct spu_state *csa = &ctx->csa; 125 + int psize; 123 126 124 - offset += vma->vm_pgoff << PAGE_SHIFT; 127 + /* Check what page size we are using */ 128 + psize = get_slice_psize(vma->vm_mm, address); 125 129 130 + /* Some sanity checking */ 131 + BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K)); 132 + 133 + /* Wow, 64K, cool, we need to align the address though */ 134 + if (csa->use_big_pages) { 135 + BUG_ON(vma->vm_start & 0xffff); 136 + address &= ~0xfffful; 137 + } 138 + #endif /* CONFIG_SPU_FS_64K_LS */ 139 + 140 + offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT); 126 141 if (offset >= LS_SIZE) 127 142 return NOPFN_SIGBUS; 143 + 144 + pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n", 145 + addr0, address, offset); 128 146 129 147 spu_acquire(ctx); 130 148 ··· 167 149 .nopfn = spufs_mem_mmap_nopfn, 168 150 }; 169 151 170 - static int 171 - spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) 152 + static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) 172 153 { 154 + #ifdef CONFIG_SPU_FS_64K_LS 155 + struct spu_context *ctx = file->private_data; 156 + struct spu_state *csa = &ctx->csa; 157 + 158 + /* Sanity check VMA alignment */ 159 + if (csa->use_big_pages) { 160 + pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx," 161 + " pgoff=0x%lx\n", vma->vm_start, vma->vm_end, 162 + vma->vm_pgoff); 163 + if (vma->vm_start & 0xffff) 164 + return -EINVAL; 165 + if (vma->vm_pgoff & 0xf) 166 + return -EINVAL; 167 + } 168 + #endif /* CONFIG_SPU_FS_64K_LS */ 169 + 173 170 if (!(vma->vm_flags & VM_SHARED)) 174 171 return -EINVAL; 175 172 ··· 196 163 return 0; 197 164 } 198 165 166 + #ifdef CONFIG_SPU_FS_64K_LS 167 + unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr, 168 + unsigned long len, unsigned long pgoff, 169 + unsigned long flags) 170 + { 171 + struct spu_context *ctx = file->private_data; 172 + struct spu_state *csa = &ctx->csa; 173 + 174 + /* If not using big pages, fallback to normal MM g_u_a */ 175 + if (!csa->use_big_pages) 176 + return current->mm->get_unmapped_area(file, addr, len, 177 + pgoff, flags); 178 + 179 + /* Else, try to obtain a 64K pages slice */ 180 + return slice_get_unmapped_area(addr, len, flags, 181 + MMU_PAGE_64K, 1, 0); 182 + } 183 + #endif /* CONFIG_SPU_FS_64K_LS */ 184 + 199 185 static const struct file_operations spufs_mem_fops = { 200 - .open = spufs_mem_open, 201 - .release = spufs_mem_release, 202 - .read = spufs_mem_read, 203 - .write = spufs_mem_write, 204 - .llseek = generic_file_llseek, 205 - .mmap = spufs_mem_mmap, 186 + .open = spufs_mem_open, 187 + .read = spufs_mem_read, 188 + .write = spufs_mem_write, 189 + .llseek = generic_file_llseek, 190 + .mmap = spufs_mem_mmap, 191 + #ifdef CONFIG_SPU_FS_64K_LS 192 + .get_unmapped_area = spufs_get_unmapped_area, 193 + #endif 206 194 }; 207 195 208 196 static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,

+181

arch/powerpc/platforms/cell/spufs/lscsa_alloc.c

··· 1 + /* 2 + * SPU local store allocation routines 3 + * 4 + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License as published by 8 + * the Free Software Foundation; either version 2, or (at your option) 9 + * any later version. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 + */ 20 + 21 + #undef DEBUG 22 + 23 + #include <linux/kernel.h> 24 + #include <linux/mm.h> 25 + #include <linux/vmalloc.h> 26 + 27 + #include <asm/spu.h> 28 + #include <asm/spu_csa.h> 29 + #include <asm/mmu.h> 30 + 31 + static int spu_alloc_lscsa_std(struct spu_state *csa) 32 + { 33 + struct spu_lscsa *lscsa; 34 + unsigned char *p; 35 + 36 + lscsa = vmalloc(sizeof(struct spu_lscsa)); 37 + if (!lscsa) 38 + return -ENOMEM; 39 + memset(lscsa, 0, sizeof(struct spu_lscsa)); 40 + csa->lscsa = lscsa; 41 + 42 + /* Set LS pages reserved to allow for user-space mapping. */ 43 + for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) 44 + SetPageReserved(vmalloc_to_page(p)); 45 + 46 + return 0; 47 + } 48 + 49 + static void spu_free_lscsa_std(struct spu_state *csa) 50 + { 51 + /* Clear reserved bit before vfree. */ 52 + unsigned char *p; 53 + 54 + if (csa->lscsa == NULL) 55 + return; 56 + 57 + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) 58 + ClearPageReserved(vmalloc_to_page(p)); 59 + 60 + vfree(csa->lscsa); 61 + } 62 + 63 + #ifdef CONFIG_SPU_FS_64K_LS 64 + 65 + #define SPU_64K_PAGE_SHIFT 16 66 + #define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT) 67 + #define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER) 68 + 69 + int spu_alloc_lscsa(struct spu_state *csa) 70 + { 71 + struct page **pgarray; 72 + unsigned char *p; 73 + int i, j, n_4k; 74 + 75 + /* Check availability of 64K pages */ 76 + if (mmu_psize_defs[MMU_PAGE_64K].shift == 0) 77 + goto fail; 78 + 79 + csa->use_big_pages = 1; 80 + 81 + pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n", 82 + csa); 83 + 84 + /* First try to allocate our 64K pages. We need 5 of them 85 + * with the current implementation. In the future, we should try 86 + * to separate the lscsa with the actual local store image, thus 87 + * allowing us to require only 4 64K pages per context 88 + */ 89 + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) { 90 + /* XXX This is likely to fail, we should use a special pool 91 + * similiar to what hugetlbfs does. 92 + */ 93 + csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL, 94 + SPU_64K_PAGE_ORDER); 95 + if (csa->lscsa_pages[i] == NULL) 96 + goto fail; 97 + } 98 + 99 + pr_debug(" success ! creating vmap...\n"); 100 + 101 + /* Now we need to create a vmalloc mapping of these for the kernel 102 + * and SPU context switch code to use. Currently, we stick to a 103 + * normal kernel vmalloc mapping, which in our case will be 4K 104 + */ 105 + n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES; 106 + pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL); 107 + if (pgarray == NULL) 108 + goto fail; 109 + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) 110 + for (j = 0; j < SPU_64K_PAGE_COUNT; j++) 111 + /* We assume all the struct page's are contiguous 112 + * which should be hopefully the case for an order 4 113 + * allocation.. 114 + */ 115 + pgarray[i * SPU_64K_PAGE_COUNT + j] = 116 + csa->lscsa_pages[i] + j; 117 + csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL); 118 + kfree(pgarray); 119 + if (csa->lscsa == NULL) 120 + goto fail; 121 + 122 + memset(csa->lscsa, 0, sizeof(struct spu_lscsa)); 123 + 124 + /* Set LS pages reserved to allow for user-space mapping. 125 + * 126 + * XXX isn't that a bit obsolete ? I think we should just 127 + * make sure the page count is high enough. Anyway, won't harm 128 + * for now 129 + */ 130 + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) 131 + SetPageReserved(vmalloc_to_page(p)); 132 + 133 + pr_debug(" all good !\n"); 134 + 135 + return 0; 136 + fail: 137 + pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n"); 138 + spu_free_lscsa(csa); 139 + return spu_alloc_lscsa_std(csa); 140 + } 141 + 142 + void spu_free_lscsa(struct spu_state *csa) 143 + { 144 + unsigned char *p; 145 + int i; 146 + 147 + if (!csa->use_big_pages) { 148 + spu_free_lscsa_std(csa); 149 + return; 150 + } 151 + csa->use_big_pages = 0; 152 + 153 + if (csa->lscsa == NULL) 154 + goto free_pages; 155 + 156 + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) 157 + ClearPageReserved(vmalloc_to_page(p)); 158 + 159 + vunmap(csa->lscsa); 160 + csa->lscsa = NULL; 161 + 162 + free_pages: 163 + 164 + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) 165 + if (csa->lscsa_pages[i]) 166 + __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER); 167 + } 168 + 169 + #else /* CONFIG_SPU_FS_64K_LS */ 170 + 171 + int spu_alloc_lscsa(struct spu_state *csa) 172 + { 173 + return spu_alloc_lscsa_std(csa); 174 + } 175 + 176 + void spu_free_lscsa(struct spu_state *csa) 177 + { 178 + spu_free_lscsa_std(csa); 179 + } 180 + 181 + #endif /* !defined(CONFIG_SPU_FS_64K_LS) */

+9 -19

arch/powerpc/platforms/cell/spufs/switch.c

··· 2188 2188 * as it is by far the largest of the context save regions, 2189 2189 * and may need to be pinned or otherwise specially aligned. 2190 2190 */ 2191 - void spu_init_csa(struct spu_state *csa) 2191 + int spu_init_csa(struct spu_state *csa) 2192 2192 { 2193 - struct spu_lscsa *lscsa; 2194 - unsigned char *p; 2193 + int rc; 2195 2194 2196 2195 if (!csa) 2197 - return; 2196 + return -EINVAL; 2198 2197 memset(csa, 0, sizeof(struct spu_state)); 2199 2198 2200 - lscsa = vmalloc(sizeof(struct spu_lscsa)); 2201 - if (!lscsa) 2202 - return; 2199 + rc = spu_alloc_lscsa(csa); 2200 + if (rc) 2201 + return rc; 2203 2202 2204 - memset(lscsa, 0, sizeof(struct spu_lscsa)); 2205 - csa->lscsa = lscsa; 2206 2203 spin_lock_init(&csa->register_lock); 2207 - 2208 - /* Set LS pages reserved to allow for user-space mapping. */ 2209 - for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) 2210 - SetPageReserved(vmalloc_to_page(p)); 2211 2204 2212 2205 init_prob(csa); 2213 2206 init_priv1(csa); 2214 2207 init_priv2(csa); 2208 + 2209 + return 0; 2215 2210 } 2216 2211 EXPORT_SYMBOL_GPL(spu_init_csa); 2217 2212 2218 2213 void spu_fini_csa(struct spu_state *csa) 2219 2214 { 2220 - /* Clear reserved bit before vfree. */ 2221 - unsigned char *p; 2222 - for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) 2223 - ClearPageReserved(vmalloc_to_page(p)); 2224 - 2225 - vfree(csa->lscsa); 2215 + spu_free_lscsa(csa); 2226 2216 } 2227 2217 EXPORT_SYMBOL_GPL(spu_fini_csa);

+3 -1

arch/powerpc/platforms/iseries/Kconfig

··· 7 7 depends on PPC_ISERIES 8 8 9 9 config VIOCONS 10 - tristate "iSeries Virtual Console Support (Obsolete)" 10 + bool "iSeries Virtual Console Support (Obsolete)" 11 + depends on !HVC_ISERIES 12 + default n 11 13 help 12 14 This is the old virtual console driver for legacy iSeries. 13 15 You should use the iSeries Hypervisor Virtual Console

+85 -2

arch/powerpc/platforms/pseries/eeh.c

··· 100 100 static DEFINE_SPINLOCK(slot_errbuf_lock); 101 101 static int eeh_error_buf_size; 102 102 103 + #define EEH_PCI_REGS_LOG_LEN 4096 104 + static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 105 + 103 106 /* System monitoring statistics */ 104 107 static unsigned long no_device; 105 108 static unsigned long no_dn; ··· 118 115 /* --------------------------------------------------------------- */ 119 116 /* Below lies the EEH event infrastructure */ 120 117 121 - void eeh_slot_error_detail (struct pci_dn *pdn, int severity) 118 + static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, 119 + char *driver_log, size_t loglen) 122 120 { 123 121 int config_addr; 124 122 unsigned long flags; ··· 137 133 rc = rtas_call(ibm_slot_error_detail, 138 134 8, 1, NULL, config_addr, 139 135 BUID_HI(pdn->phb->buid), 140 - BUID_LO(pdn->phb->buid), NULL, 0, 136 + BUID_LO(pdn->phb->buid), 137 + virt_to_phys(driver_log), loglen, 141 138 virt_to_phys(slot_errbuf), 142 139 eeh_error_buf_size, 143 140 severity); ··· 146 141 if (rc == 0) 147 142 log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); 148 143 spin_unlock_irqrestore(&slot_errbuf_lock, flags); 144 + } 145 + 146 + /** 147 + * gather_pci_data - copy assorted PCI config space registers to buff 148 + * @pdn: device to report data for 149 + * @buf: point to buffer in which to log 150 + * @len: amount of room in buffer 151 + * 152 + * This routine captures assorted PCI configuration space data, 153 + * and puts them into a buffer for RTAS error logging. 154 + */ 155 + static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) 156 + { 157 + u32 cfg; 158 + int cap, i; 159 + int n = 0; 160 + 161 + n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name); 162 + printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name); 163 + 164 + rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 165 + n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 166 + printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); 167 + 168 + rtas_read_config(pdn, PCI_COMMAND, 4, &cfg); 169 + n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 170 + printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); 171 + 172 + /* Dump out the PCI-X command and status regs */ 173 + cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_PCIX); 174 + if (cap) { 175 + rtas_read_config(pdn, cap, 4, &cfg); 176 + n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 177 + printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); 178 + 179 + rtas_read_config(pdn, cap+4, 4, &cfg); 180 + n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 181 + printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); 182 + } 183 + 184 + /* If PCI-E capable, dump PCI-E cap 10, and the AER */ 185 + cap = pci_find_capability(pdn->pcidev, PCI_CAP_ID_EXP); 186 + if (cap) { 187 + n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 188 + printk(KERN_WARNING 189 + "EEH: PCI-E capabilities and status follow:\n"); 190 + 191 + for (i=0; i<=8; i++) { 192 + rtas_read_config(pdn, cap+4*i, 4, &cfg); 193 + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 194 + printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); 195 + } 196 + 197 + cap = pci_find_ext_capability(pdn->pcidev,PCI_EXT_CAP_ID_ERR); 198 + if (cap) { 199 + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 200 + printk(KERN_WARNING 201 + "EEH: PCI-E AER capability register set follows:\n"); 202 + 203 + for (i=0; i<14; i++) { 204 + rtas_read_config(pdn, cap+4*i, 4, &cfg); 205 + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 206 + printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); 207 + } 208 + } 209 + } 210 + return n; 211 + } 212 + 213 + void eeh_slot_error_detail(struct pci_dn *pdn, int severity) 214 + { 215 + size_t loglen = 0; 216 + memset(pci_regs_buf, 0, EEH_PCI_REGS_LOG_LEN); 217 + 218 + rtas_pci_enable(pdn, EEH_THAW_MMIO); 219 + loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); 220 + 221 + rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); 149 222 } 150 223 151 224 /**

+10 -4

arch/powerpc/platforms/pseries/eeh_driver.c

··· 361 361 goto hard_fail; 362 362 } 363 363 364 - eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); 365 364 printk(KERN_WARNING 366 - "EEH: This PCI device has failed %d times since last reboot: " 367 - "location=%s driver=%s pci addr=%s\n", 368 - frozen_pdn->eeh_freeze_count, location, drv_str, pci_str); 365 + "EEH: This PCI device has failed %d times in the last hour:\n", 366 + frozen_pdn->eeh_freeze_count); 367 + printk(KERN_WARNING 368 + "EEH: location=%s driver=%s pci addr=%s\n", 369 + location, drv_str, pci_str); 369 370 370 371 /* Walk the various device drivers attached to this slot through 371 372 * a reset sequence, giving each an opportunity to do what it needs ··· 375 374 * slot is dlpar removed and added. 376 375 */ 377 376 pci_walk_bus(frozen_bus, eeh_report_error, &result); 377 + 378 + /* Since rtas may enable MMIO when posting the error log, 379 + * don't post the error log until after all dev drivers 380 + * have been informed. */ 381 + eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); 378 382 379 383 /* If all device drivers were EEH-unaware, then shut 380 384 * down all of the device drivers, and hope they

+1 -1

arch/powerpc/sysdev/fsl_soc.c

··· 907 907 struct fs_platform_info fs_enet_data; 908 908 const unsigned int *id; 909 909 const unsigned int *phy_addr; 910 - void *mac_addr; 910 + const void *mac_addr; 911 911 const phandle *ph; 912 912 const char *model; 913 913

+2 -1

drivers/char/Kconfig

··· 631 631 632 632 config HVC_ISERIES 633 633 bool "iSeries Hypervisor Virtual Console support" 634 - depends on PPC_ISERIES && !VIOCONS 634 + depends on PPC_ISERIES 635 + default y 635 636 select HVC_DRIVER 636 637 help 637 638 iSeries machines support a hypervisor virtual console.

+7 -4

include/asm-powerpc/mmu-hash64.h

··· 350 350 351 351 typedef struct { 352 352 mm_context_id_t id; 353 - u16 user_psize; /* page size index */ 354 - u16 sllp; /* SLB entry page size encoding */ 355 - #ifdef CONFIG_HUGETLB_PAGE 356 - u16 low_htlb_areas, high_htlb_areas; 353 + u16 user_psize; /* page size index */ 354 + 355 + #ifdef CONFIG_PPC_MM_SLICES 356 + u64 low_slices_psize; /* SLB page size encodings */ 357 + u64 high_slices_psize; /* 4 bits per slice for now */ 358 + #else 359 + u16 sllp; /* SLB page size encoding */ 357 360 #endif 358 361 unsigned long vdso_base; 359 362 } mm_context_t;

+1 -1

include/asm-powerpc/paca.h

··· 83 83 84 84 mm_context_t context; 85 85 u16 vmalloc_sllp; 86 - u16 slb_cache[SLB_CACHE_ENTRIES]; 87 86 u16 slb_cache_ptr; 87 + u16 slb_cache[SLB_CACHE_ENTRIES]; 88 88 89 89 /* 90 90 * then miscellaneous read-write fields

+42 -44

include/asm-powerpc/page_64.h

··· 88 88 89 89 #endif /* __ASSEMBLY__ */ 90 90 91 - #ifdef CONFIG_HUGETLB_PAGE 91 + #ifdef CONFIG_PPC_MM_SLICES 92 92 93 - #define HTLB_AREA_SHIFT 40 94 - #define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) 95 - #define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) 93 + #define SLICE_LOW_SHIFT 28 94 + #define SLICE_HIGH_SHIFT 40 96 95 97 - #define LOW_ESID_MASK(addr, len) \ 98 - (((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \ 99 - - (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff) 100 - #define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ 101 - - (1U << GET_HTLB_AREA(addr))) & 0xffff) 96 + #define SLICE_LOW_TOP (0x100000000ul) 97 + #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 98 + #define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 99 + 100 + #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) 101 + #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) 102 + 103 + #ifndef __ASSEMBLY__ 104 + 105 + struct slice_mask { 106 + u16 low_slices; 107 + u16 high_slices; 108 + }; 109 + 110 + struct mm_struct; 111 + 112 + extern unsigned long slice_get_unmapped_area(unsigned long addr, 113 + unsigned long len, 114 + unsigned long flags, 115 + unsigned int psize, 116 + int topdown, 117 + int use_cache); 118 + 119 + extern unsigned int get_slice_psize(struct mm_struct *mm, 120 + unsigned long addr); 121 + 122 + extern void slice_init_context(struct mm_struct *mm, unsigned int psize); 123 + extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); 102 124 103 125 #define ARCH_HAS_HUGEPAGE_ONLY_RANGE 126 + extern int is_hugepage_only_range(struct mm_struct *m, 127 + unsigned long addr, 128 + unsigned long len); 129 + 130 + #endif /* __ASSEMBLY__ */ 131 + #else 132 + #define slice_init() 133 + #endif /* CONFIG_PPC_MM_SLICES */ 134 + 135 + #ifdef CONFIG_HUGETLB_PAGE 136 + 104 137 #define ARCH_HAS_HUGETLB_FREE_PGD_RANGE 105 - #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE 106 138 #define ARCH_HAS_SETCLEAR_HUGE_PTE 107 - 108 - #define touches_hugepage_low_range(mm, addr, len) \ 109 - (((addr) < 0x100000000UL) \ 110 - && (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas)) 111 - #define touches_hugepage_high_range(mm, addr, len) \ 112 - ((((addr) + (len)) > 0x100000000UL) \ 113 - && (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas)) 114 - 115 - #define __within_hugepage_low_range(addr, len, segmask) \ 116 - ( (((addr)+(len)) <= 0x100000000UL) \ 117 - && ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask))) 118 - #define within_hugepage_low_range(addr, len) \ 119 - __within_hugepage_low_range((addr), (len), \ 120 - current->mm->context.low_htlb_areas) 121 - #define __within_hugepage_high_range(addr, len, zonemask) \ 122 - ( ((addr) >= 0x100000000UL) \ 123 - && ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask))) 124 - #define within_hugepage_high_range(addr, len) \ 125 - __within_hugepage_high_range((addr), (len), \ 126 - current->mm->context.high_htlb_areas) 127 - 128 - #define is_hugepage_only_range(mm, addr, len) \ 129 - (touches_hugepage_high_range((mm), (addr), (len)) || \ 130 - touches_hugepage_low_range((mm), (addr), (len))) 131 139 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 132 - 133 - #define in_hugepage_area(context, addr) \ 134 - (cpu_has_feature(CPU_FTR_16M_PAGE) && \ 135 - ( ( (addr) >= 0x100000000UL) \ 136 - ? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \ 137 - : ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) 138 - 139 - #else /* !CONFIG_HUGETLB_PAGE */ 140 - 141 - #define in_hugepage_area(mm, addr) 0 142 140 143 141 #endif /* !CONFIG_HUGETLB_PAGE */ 144 142

+13 -18

include/asm-powerpc/pgalloc-64.h

··· 14 14 15 15 extern struct kmem_cache *pgtable_cache[]; 16 16 17 - #ifdef CONFIG_PPC_64K_PAGES 18 - #define PTE_CACHE_NUM 0 19 - #define PMD_CACHE_NUM 1 20 - #define PGD_CACHE_NUM 2 21 - #define HUGEPTE_CACHE_NUM 3 22 - #else 23 - #define PTE_CACHE_NUM 0 24 - #define PMD_CACHE_NUM 1 25 - #define PUD_CACHE_NUM 1 26 - #define PGD_CACHE_NUM 0 27 - #define HUGEPTE_CACHE_NUM 2 28 - #endif 17 + #define PGD_CACHE_NUM 0 18 + #define PUD_CACHE_NUM 1 19 + #define PMD_CACHE_NUM 1 20 + #define HUGEPTE_CACHE_NUM 2 21 + #define PTE_NONCACHE_NUM 3 /* from GFP rather than kmem_cache */ 29 22 30 23 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 31 24 { ··· 84 91 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 85 92 unsigned long address) 86 93 { 87 - return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], 88 - GFP_KERNEL|__GFP_REPEAT); 94 + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 89 95 } 90 96 91 97 static inline struct page *pte_alloc_one(struct mm_struct *mm, ··· 95 103 96 104 static inline void pte_free_kernel(pte_t *pte) 97 105 { 98 - kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); 106 + free_page((unsigned long)pte); 99 107 } 100 108 101 109 static inline void pte_free(struct page *ptepage) 102 110 { 103 - pte_free_kernel(page_address(ptepage)); 111 + __free_page(ptepage); 104 112 } 105 113 106 114 #define PGF_CACHENUM_MASK 0x3 ··· 122 130 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); 123 131 int cachenum = pgf.val & PGF_CACHENUM_MASK; 124 132 125 - kmem_cache_free(pgtable_cache[cachenum], p); 133 + if (cachenum == PTE_NONCACHE_NUM) 134 + free_page((unsigned long)p); 135 + else 136 + kmem_cache_free(pgtable_cache[cachenum], p); 126 137 } 127 138 128 139 extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 129 140 130 141 #define __pte_free_tlb(tlb, ptepage) \ 131 142 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ 132 - PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) 143 + PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)) 133 144 #define __pmd_free_tlb(tlb, pmd) \ 134 145 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 135 146 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))

+5 -1

include/asm-powerpc/pgtable-4k.h

··· 80 80 81 81 #define pte_iterate_hashed_end() } while(0) 82 82 83 - #define pte_pagesize_index(pte) MMU_PAGE_4K 83 + #ifdef CONFIG_PPC_HAS_HASH_64K 84 + #define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) 85 + #else 86 + #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K 87 + #endif 84 88 85 89 /* 86 90 * 4-level page tables related bits

+6 -1

include/asm-powerpc/pgtable-64k.h

··· 35 35 #define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */ 36 36 #define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */ 37 37 #define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */ 38 + 39 + /* Note the full page bits must be in the same location as for normal 40 + * 4k pages as the same asssembly will be used to insert 64K pages 41 + * wether the kernel has CONFIG_PPC_64K_PAGES or not 42 + */ 38 43 #define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ 39 44 #define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */ 40 45 ··· 93 88 94 89 #define pte_iterate_hashed_end() } while(0); } } while(0) 95 90 96 - #define pte_pagesize_index(pte) \ 91 + #define pte_pagesize_index(mm, addr, pte) \ 97 92 (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) 98 93 99 94 #define remap_4k_pfn(vma, addr, pfn, prot) \

+9 -1

include/asm-powerpc/spu_csa.h

··· 235 235 */ 236 236 struct spu_state { 237 237 struct spu_lscsa *lscsa; 238 + #ifdef CONFIG_SPU_FS_64K_LS 239 + int use_big_pages; 240 + /* One struct page per 64k page */ 241 + #define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000) 242 + struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES]; 243 + #endif 238 244 struct spu_problem_collapsed prob; 239 245 struct spu_priv1_collapsed priv1; 240 246 struct spu_priv2_collapsed priv2; ··· 253 247 spinlock_t register_lock; 254 248 }; 255 249 256 - extern void spu_init_csa(struct spu_state *csa); 250 + extern int spu_init_csa(struct spu_state *csa); 257 251 extern void spu_fini_csa(struct spu_state *csa); 258 252 extern int spu_save(struct spu_state *prev, struct spu *spu); 259 253 extern int spu_restore(struct spu_state *new, struct spu *spu); 260 254 extern int spu_switch(struct spu_state *prev, struct spu_state *new, 261 255 struct spu *spu); 256 + extern int spu_alloc_lscsa(struct spu_state *csa); 257 + extern void spu_free_lscsa(struct spu_state *csa); 262 258 263 259 #endif /* !__SPU__ */ 264 260 #endif /* __KERNEL__ */

+10 -1

include/linux/suspend.h

··· 52 52 53 53 #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) 54 54 /* kernel/power/snapshot.c */ 55 - extern void __init register_nosave_region(unsigned long, unsigned long); 55 + extern void __register_nosave_region(unsigned long b, unsigned long e, int km); 56 + static inline void register_nosave_region(unsigned long b, unsigned long e) 57 + { 58 + __register_nosave_region(b, e, 0); 59 + } 60 + static inline void register_nosave_region_late(unsigned long b, unsigned long e) 61 + { 62 + __register_nosave_region(b, e, 1); 63 + } 56 64 extern int swsusp_page_is_forbidden(struct page *); 57 65 extern void swsusp_set_page_free(struct page *); 58 66 extern void swsusp_unset_page_free(struct page *); ··· 70 62 extern int hibernate(void); 71 63 #else 72 64 static inline void register_nosave_region(unsigned long b, unsigned long e) {} 65 + static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} 73 66 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } 74 67 static inline void swsusp_set_page_free(struct page *p) {} 75 68 static inline void swsusp_unset_page_free(struct page *p) {}

+9 -3

kernel/power/snapshot.c

··· 607 607 */ 608 608 609 609 void __init 610 - register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) 610 + __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 611 + int use_kmalloc) 611 612 { 612 613 struct nosave_region *region; 613 614 ··· 624 623 goto Report; 625 624 } 626 625 } 627 - /* This allocation cannot fail */ 628 - region = alloc_bootmem_low(sizeof(struct nosave_region)); 626 + if (use_kmalloc) { 627 + /* during init, this shouldn't fail */ 628 + region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 629 + BUG_ON(!region); 630 + } else 631 + /* This allocation cannot fail */ 632 + region = alloc_bootmem_low(sizeof(struct nosave_region)); 629 633 region->start_pfn = start_pfn; 630 634 region->end_pfn = end_pfn; 631 635 list_add_tail(&region->list, &nosave_regions);