Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.0-rc2 1280 lines 35 kB view raw
1/* 2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen 3 * {mikejc|engebret}@us.ibm.com 4 * 5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> 6 * 7 * SMP scalability work: 8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 9 * 10 * Module name: htab.c 11 * 12 * Description: 13 * PowerPC Hashed Page Table functions 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 */ 20 21#undef DEBUG 22#undef DEBUG_LOW 23 24#include <linux/spinlock.h> 25#include <linux/errno.h> 26#include <linux/sched.h> 27#include <linux/proc_fs.h> 28#include <linux/stat.h> 29#include <linux/sysctl.h> 30#include <linux/ctype.h> 31#include <linux/cache.h> 32#include <linux/init.h> 33#include <linux/signal.h> 34#include <linux/memblock.h> 35 36#include <asm/processor.h> 37#include <asm/pgtable.h> 38#include <asm/mmu.h> 39#include <asm/mmu_context.h> 40#include <asm/page.h> 41#include <asm/types.h> 42#include <asm/system.h> 43#include <asm/uaccess.h> 44#include <asm/machdep.h> 45#include <asm/prom.h> 46#include <asm/abs_addr.h> 47#include <asm/tlbflush.h> 48#include <asm/io.h> 49#include <asm/eeh.h> 50#include <asm/tlb.h> 51#include <asm/cacheflush.h> 52#include <asm/cputable.h> 53#include <asm/sections.h> 54#include <asm/spu.h> 55#include <asm/udbg.h> 56#include <asm/code-patching.h> 57 58#ifdef DEBUG 59#define DBG(fmt...) udbg_printf(fmt) 60#else 61#define DBG(fmt...) 62#endif 63 64#ifdef DEBUG_LOW 65#define DBG_LOW(fmt...) udbg_printf(fmt) 66#else 67#define DBG_LOW(fmt...) 68#endif 69 70#define KB (1024) 71#define MB (1024*KB) 72#define GB (1024L*MB) 73 74/* 75 * Note: pte --> Linux PTE 76 * HPTE --> PowerPC Hashed Page Table Entry 77 * 78 * Execution context: 79 * htab_initialize is called with the MMU off (of course), but 80 * the kernel has been copied down to zero so it can directly 81 * reference global data. At this point it is very difficult 82 * to print debug info. 83 * 84 */ 85 86#ifdef CONFIG_U3_DART 87extern unsigned long dart_tablebase; 88#endif /* CONFIG_U3_DART */ 89 90static unsigned long _SDR1; 91struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 92 93struct hash_pte *htab_address; 94unsigned long htab_size_bytes; 95unsigned long htab_hash_mask; 96EXPORT_SYMBOL_GPL(htab_hash_mask); 97int mmu_linear_psize = MMU_PAGE_4K; 98int mmu_virtual_psize = MMU_PAGE_4K; 99int mmu_vmalloc_psize = MMU_PAGE_4K; 100#ifdef CONFIG_SPARSEMEM_VMEMMAP 101int mmu_vmemmap_psize = MMU_PAGE_4K; 102#endif 103int mmu_io_psize = MMU_PAGE_4K; 104int mmu_kernel_ssize = MMU_SEGSIZE_256M; 105int mmu_highuser_ssize = MMU_SEGSIZE_256M; 106u16 mmu_slb_size = 64; 107EXPORT_SYMBOL_GPL(mmu_slb_size); 108#ifdef CONFIG_HUGETLB_PAGE 109unsigned int HPAGE_SHIFT; 110#endif 111#ifdef CONFIG_PPC_64K_PAGES 112int mmu_ci_restrictions; 113#endif 114#ifdef CONFIG_DEBUG_PAGEALLOC 115static u8 *linear_map_hash_slots; 116static unsigned long linear_map_hash_count; 117static DEFINE_SPINLOCK(linear_map_hash_lock); 118#endif /* CONFIG_DEBUG_PAGEALLOC */ 119 120/* There are definitions of page sizes arrays to be used when none 121 * is provided by the firmware. 122 */ 123 124/* Pre-POWER4 CPUs (4k pages only) 125 */ 126static struct mmu_psize_def mmu_psize_defaults_old[] = { 127 [MMU_PAGE_4K] = { 128 .shift = 12, 129 .sllp = 0, 130 .penc = 0, 131 .avpnm = 0, 132 .tlbiel = 0, 133 }, 134}; 135 136/* POWER4, GPUL, POWER5 137 * 138 * Support for 16Mb large pages 139 */ 140static struct mmu_psize_def mmu_psize_defaults_gp[] = { 141 [MMU_PAGE_4K] = { 142 .shift = 12, 143 .sllp = 0, 144 .penc = 0, 145 .avpnm = 0, 146 .tlbiel = 1, 147 }, 148 [MMU_PAGE_16M] = { 149 .shift = 24, 150 .sllp = SLB_VSID_L, 151 .penc = 0, 152 .avpnm = 0x1UL, 153 .tlbiel = 0, 154 }, 155}; 156 157static unsigned long htab_convert_pte_flags(unsigned long pteflags) 158{ 159 unsigned long rflags = pteflags & 0x1fa; 160 161 /* _PAGE_EXEC -> NOEXEC */ 162 if ((pteflags & _PAGE_EXEC) == 0) 163 rflags |= HPTE_R_N; 164 165 /* PP bits. PAGE_USER is already PP bit 0x2, so we only 166 * need to add in 0x1 if it's a read-only user page 167 */ 168 if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) && 169 (pteflags & _PAGE_DIRTY))) 170 rflags |= 1; 171 172 /* Always add C */ 173 return rflags | HPTE_R_C; 174} 175 176int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 177 unsigned long pstart, unsigned long prot, 178 int psize, int ssize) 179{ 180 unsigned long vaddr, paddr; 181 unsigned int step, shift; 182 int ret = 0; 183 184 shift = mmu_psize_defs[psize].shift; 185 step = 1 << shift; 186 187 prot = htab_convert_pte_flags(prot); 188 189 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", 190 vstart, vend, pstart, prot, psize, ssize); 191 192 for (vaddr = vstart, paddr = pstart; vaddr < vend; 193 vaddr += step, paddr += step) { 194 unsigned long hash, hpteg; 195 unsigned long vsid = get_kernel_vsid(vaddr, ssize); 196 unsigned long va = hpt_va(vaddr, vsid, ssize); 197 unsigned long tprot = prot; 198 199 /* Make kernel text executable */ 200 if (overlaps_kernel_text(vaddr, vaddr + step)) 201 tprot &= ~HPTE_R_N; 202 203 hash = hpt_hash(va, shift, ssize); 204 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 205 206 BUG_ON(!ppc_md.hpte_insert); 207 ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot, 208 HPTE_V_BOLTED, psize, ssize); 209 210 if (ret < 0) 211 break; 212#ifdef CONFIG_DEBUG_PAGEALLOC 213 if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) 214 linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; 215#endif /* CONFIG_DEBUG_PAGEALLOC */ 216 } 217 return ret < 0 ? ret : 0; 218} 219 220#ifdef CONFIG_MEMORY_HOTPLUG 221static int htab_remove_mapping(unsigned long vstart, unsigned long vend, 222 int psize, int ssize) 223{ 224 unsigned long vaddr; 225 unsigned int step, shift; 226 227 shift = mmu_psize_defs[psize].shift; 228 step = 1 << shift; 229 230 if (!ppc_md.hpte_removebolted) { 231 printk(KERN_WARNING "Platform doesn't implement " 232 "hpte_removebolted\n"); 233 return -EINVAL; 234 } 235 236 for (vaddr = vstart; vaddr < vend; vaddr += step) 237 ppc_md.hpte_removebolted(vaddr, psize, ssize); 238 239 return 0; 240} 241#endif /* CONFIG_MEMORY_HOTPLUG */ 242 243static int __init htab_dt_scan_seg_sizes(unsigned long node, 244 const char *uname, int depth, 245 void *data) 246{ 247 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 248 u32 *prop; 249 unsigned long size = 0; 250 251 /* We are scanning "cpu" nodes only */ 252 if (type == NULL || strcmp(type, "cpu") != 0) 253 return 0; 254 255 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", 256 &size); 257 if (prop == NULL) 258 return 0; 259 for (; size >= 4; size -= 4, ++prop) { 260 if (prop[0] == 40) { 261 DBG("1T segment support detected\n"); 262 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; 263 return 1; 264 } 265 } 266 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; 267 return 0; 268} 269 270static void __init htab_init_seg_sizes(void) 271{ 272 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); 273} 274 275static int __init htab_dt_scan_page_sizes(unsigned long node, 276 const char *uname, int depth, 277 void *data) 278{ 279 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 280 u32 *prop; 281 unsigned long size = 0; 282 283 /* We are scanning "cpu" nodes only */ 284 if (type == NULL || strcmp(type, "cpu") != 0) 285 return 0; 286 287 prop = (u32 *)of_get_flat_dt_prop(node, 288 "ibm,segment-page-sizes", &size); 289 if (prop != NULL) { 290 DBG("Page sizes from device-tree:\n"); 291 size /= 4; 292 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); 293 while(size > 0) { 294 unsigned int shift = prop[0]; 295 unsigned int slbenc = prop[1]; 296 unsigned int lpnum = prop[2]; 297 unsigned int lpenc = 0; 298 struct mmu_psize_def *def; 299 int idx = -1; 300 301 size -= 3; prop += 3; 302 while(size > 0 && lpnum) { 303 if (prop[0] == shift) 304 lpenc = prop[1]; 305 prop += 2; size -= 2; 306 lpnum--; 307 } 308 switch(shift) { 309 case 0xc: 310 idx = MMU_PAGE_4K; 311 break; 312 case 0x10: 313 idx = MMU_PAGE_64K; 314 break; 315 case 0x14: 316 idx = MMU_PAGE_1M; 317 break; 318 case 0x18: 319 idx = MMU_PAGE_16M; 320 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; 321 break; 322 case 0x22: 323 idx = MMU_PAGE_16G; 324 break; 325 } 326 if (idx < 0) 327 continue; 328 def = &mmu_psize_defs[idx]; 329 def->shift = shift; 330 if (shift <= 23) 331 def->avpnm = 0; 332 else 333 def->avpnm = (1 << (shift - 23)) - 1; 334 def->sllp = slbenc; 335 def->penc = lpenc; 336 /* We don't know for sure what's up with tlbiel, so 337 * for now we only set it for 4K and 64K pages 338 */ 339 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) 340 def->tlbiel = 1; 341 else 342 def->tlbiel = 0; 343 344 DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " 345 "tlbiel=%d, penc=%d\n", 346 idx, shift, def->sllp, def->avpnm, def->tlbiel, 347 def->penc); 348 } 349 return 1; 350 } 351 return 0; 352} 353 354#ifdef CONFIG_HUGETLB_PAGE 355/* Scan for 16G memory blocks that have been set aside for huge pages 356 * and reserve those blocks for 16G huge pages. 357 */ 358static int __init htab_dt_scan_hugepage_blocks(unsigned long node, 359 const char *uname, int depth, 360 void *data) { 361 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 362 unsigned long *addr_prop; 363 u32 *page_count_prop; 364 unsigned int expected_pages; 365 long unsigned int phys_addr; 366 long unsigned int block_size; 367 368 /* We are scanning "memory" nodes only */ 369 if (type == NULL || strcmp(type, "memory") != 0) 370 return 0; 371 372 /* This property is the log base 2 of the number of virtual pages that 373 * will represent this memory block. */ 374 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); 375 if (page_count_prop == NULL) 376 return 0; 377 expected_pages = (1 << page_count_prop[0]); 378 addr_prop = of_get_flat_dt_prop(node, "reg", NULL); 379 if (addr_prop == NULL) 380 return 0; 381 phys_addr = addr_prop[0]; 382 block_size = addr_prop[1]; 383 if (block_size != (16 * GB)) 384 return 0; 385 printk(KERN_INFO "Huge page(16GB) memory: " 386 "addr = 0x%lX size = 0x%lX pages = %d\n", 387 phys_addr, block_size, expected_pages); 388 if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { 389 memblock_reserve(phys_addr, block_size * expected_pages); 390 add_gpage(phys_addr, block_size, expected_pages); 391 } 392 return 0; 393} 394#endif /* CONFIG_HUGETLB_PAGE */ 395 396static void __init htab_init_page_sizes(void) 397{ 398 int rc; 399 400 /* Default to 4K pages only */ 401 memcpy(mmu_psize_defs, mmu_psize_defaults_old, 402 sizeof(mmu_psize_defaults_old)); 403 404 /* 405 * Try to find the available page sizes in the device-tree 406 */ 407 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); 408 if (rc != 0) /* Found */ 409 goto found; 410 411 /* 412 * Not in the device-tree, let's fallback on known size 413 * list for 16M capable GP & GR 414 */ 415 if (mmu_has_feature(MMU_FTR_16M_PAGE)) 416 memcpy(mmu_psize_defs, mmu_psize_defaults_gp, 417 sizeof(mmu_psize_defaults_gp)); 418 found: 419#ifndef CONFIG_DEBUG_PAGEALLOC 420 /* 421 * Pick a size for the linear mapping. Currently, we only support 422 * 16M, 1M and 4K which is the default 423 */ 424 if (mmu_psize_defs[MMU_PAGE_16M].shift) 425 mmu_linear_psize = MMU_PAGE_16M; 426 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 427 mmu_linear_psize = MMU_PAGE_1M; 428#endif /* CONFIG_DEBUG_PAGEALLOC */ 429 430#ifdef CONFIG_PPC_64K_PAGES 431 /* 432 * Pick a size for the ordinary pages. Default is 4K, we support 433 * 64K for user mappings and vmalloc if supported by the processor. 434 * We only use 64k for ioremap if the processor 435 * (and firmware) support cache-inhibited large pages. 436 * If not, we use 4k and set mmu_ci_restrictions so that 437 * hash_page knows to switch processes that use cache-inhibited 438 * mappings to 4k pages. 439 */ 440 if (mmu_psize_defs[MMU_PAGE_64K].shift) { 441 mmu_virtual_psize = MMU_PAGE_64K; 442 mmu_vmalloc_psize = MMU_PAGE_64K; 443 if (mmu_linear_psize == MMU_PAGE_4K) 444 mmu_linear_psize = MMU_PAGE_64K; 445 if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) { 446 /* 447 * Don't use 64k pages for ioremap on pSeries, since 448 * that would stop us accessing the HEA ethernet. 449 */ 450 if (!machine_is(pseries)) 451 mmu_io_psize = MMU_PAGE_64K; 452 } else 453 mmu_ci_restrictions = 1; 454 } 455#endif /* CONFIG_PPC_64K_PAGES */ 456 457#ifdef CONFIG_SPARSEMEM_VMEMMAP 458 /* We try to use 16M pages for vmemmap if that is supported 459 * and we have at least 1G of RAM at boot 460 */ 461 if (mmu_psize_defs[MMU_PAGE_16M].shift && 462 memblock_phys_mem_size() >= 0x40000000) 463 mmu_vmemmap_psize = MMU_PAGE_16M; 464 else if (mmu_psize_defs[MMU_PAGE_64K].shift) 465 mmu_vmemmap_psize = MMU_PAGE_64K; 466 else 467 mmu_vmemmap_psize = MMU_PAGE_4K; 468#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 469 470 printk(KERN_DEBUG "Page orders: linear mapping = %d, " 471 "virtual = %d, io = %d" 472#ifdef CONFIG_SPARSEMEM_VMEMMAP 473 ", vmemmap = %d" 474#endif 475 "\n", 476 mmu_psize_defs[mmu_linear_psize].shift, 477 mmu_psize_defs[mmu_virtual_psize].shift, 478 mmu_psize_defs[mmu_io_psize].shift 479#ifdef CONFIG_SPARSEMEM_VMEMMAP 480 ,mmu_psize_defs[mmu_vmemmap_psize].shift 481#endif 482 ); 483 484#ifdef CONFIG_HUGETLB_PAGE 485 /* Reserve 16G huge page memory sections for huge pages */ 486 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); 487#endif /* CONFIG_HUGETLB_PAGE */ 488} 489 490static int __init htab_dt_scan_pftsize(unsigned long node, 491 const char *uname, int depth, 492 void *data) 493{ 494 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 495 u32 *prop; 496 497 /* We are scanning "cpu" nodes only */ 498 if (type == NULL || strcmp(type, "cpu") != 0) 499 return 0; 500 501 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); 502 if (prop != NULL) { 503 /* pft_size[0] is the NUMA CEC cookie */ 504 ppc64_pft_size = prop[1]; 505 return 1; 506 } 507 return 0; 508} 509 510static unsigned long __init htab_get_table_size(void) 511{ 512 unsigned long mem_size, rnd_mem_size, pteg_count, psize; 513 514 /* If hash size isn't already provided by the platform, we try to 515 * retrieve it from the device-tree. If it's not there neither, we 516 * calculate it now based on the total RAM size 517 */ 518 if (ppc64_pft_size == 0) 519 of_scan_flat_dt(htab_dt_scan_pftsize, NULL); 520 if (ppc64_pft_size) 521 return 1UL << ppc64_pft_size; 522 523 /* round mem_size up to next power of 2 */ 524 mem_size = memblock_phys_mem_size(); 525 rnd_mem_size = 1UL << __ilog2(mem_size); 526 if (rnd_mem_size < mem_size) 527 rnd_mem_size <<= 1; 528 529 /* # pages / 2 */ 530 psize = mmu_psize_defs[mmu_virtual_psize].shift; 531 pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); 532 533 return pteg_count << 7; 534} 535 536#ifdef CONFIG_MEMORY_HOTPLUG 537void create_section_mapping(unsigned long start, unsigned long end) 538{ 539 BUG_ON(htab_bolt_mapping(start, end, __pa(start), 540 pgprot_val(PAGE_KERNEL), mmu_linear_psize, 541 mmu_kernel_ssize)); 542} 543 544int remove_section_mapping(unsigned long start, unsigned long end) 545{ 546 return htab_remove_mapping(start, end, mmu_linear_psize, 547 mmu_kernel_ssize); 548} 549#endif /* CONFIG_MEMORY_HOTPLUG */ 550 551#define FUNCTION_TEXT(A) ((*(unsigned long *)(A))) 552 553static void __init htab_finish_init(void) 554{ 555 extern unsigned int *htab_call_hpte_insert1; 556 extern unsigned int *htab_call_hpte_insert2; 557 extern unsigned int *htab_call_hpte_remove; 558 extern unsigned int *htab_call_hpte_updatepp; 559 560#ifdef CONFIG_PPC_HAS_HASH_64K 561 extern unsigned int *ht64_call_hpte_insert1; 562 extern unsigned int *ht64_call_hpte_insert2; 563 extern unsigned int *ht64_call_hpte_remove; 564 extern unsigned int *ht64_call_hpte_updatepp; 565 566 patch_branch(ht64_call_hpte_insert1, 567 FUNCTION_TEXT(ppc_md.hpte_insert), 568 BRANCH_SET_LINK); 569 patch_branch(ht64_call_hpte_insert2, 570 FUNCTION_TEXT(ppc_md.hpte_insert), 571 BRANCH_SET_LINK); 572 patch_branch(ht64_call_hpte_remove, 573 FUNCTION_TEXT(ppc_md.hpte_remove), 574 BRANCH_SET_LINK); 575 patch_branch(ht64_call_hpte_updatepp, 576 FUNCTION_TEXT(ppc_md.hpte_updatepp), 577 BRANCH_SET_LINK); 578 579#endif /* CONFIG_PPC_HAS_HASH_64K */ 580 581 patch_branch(htab_call_hpte_insert1, 582 FUNCTION_TEXT(ppc_md.hpte_insert), 583 BRANCH_SET_LINK); 584 patch_branch(htab_call_hpte_insert2, 585 FUNCTION_TEXT(ppc_md.hpte_insert), 586 BRANCH_SET_LINK); 587 patch_branch(htab_call_hpte_remove, 588 FUNCTION_TEXT(ppc_md.hpte_remove), 589 BRANCH_SET_LINK); 590 patch_branch(htab_call_hpte_updatepp, 591 FUNCTION_TEXT(ppc_md.hpte_updatepp), 592 BRANCH_SET_LINK); 593} 594 595static void __init htab_initialize(void) 596{ 597 unsigned long table; 598 unsigned long pteg_count; 599 unsigned long prot; 600 unsigned long base = 0, size = 0, limit; 601 struct memblock_region *reg; 602 603 DBG(" -> htab_initialize()\n"); 604 605 /* Initialize segment sizes */ 606 htab_init_seg_sizes(); 607 608 /* Initialize page sizes */ 609 htab_init_page_sizes(); 610 611 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { 612 mmu_kernel_ssize = MMU_SEGSIZE_1T; 613 mmu_highuser_ssize = MMU_SEGSIZE_1T; 614 printk(KERN_INFO "Using 1TB segments\n"); 615 } 616 617 /* 618 * Calculate the required size of the htab. We want the number of 619 * PTEGs to equal one half the number of real pages. 620 */ 621 htab_size_bytes = htab_get_table_size(); 622 pteg_count = htab_size_bytes >> 7; 623 624 htab_hash_mask = pteg_count - 1; 625 626 if (firmware_has_feature(FW_FEATURE_LPAR)) { 627 /* Using a hypervisor which owns the htab */ 628 htab_address = NULL; 629 _SDR1 = 0; 630 } else { 631 /* Find storage for the HPT. Must be contiguous in 632 * the absolute address space. On cell we want it to be 633 * in the first 2 Gig so we can use it for IOMMU hacks. 634 */ 635 if (machine_is(cell)) 636 limit = 0x80000000; 637 else 638 limit = MEMBLOCK_ALLOC_ANYWHERE; 639 640 table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); 641 642 DBG("Hash table allocated at %lx, size: %lx\n", table, 643 htab_size_bytes); 644 645 htab_address = abs_to_virt(table); 646 647 /* htab absolute addr + encoded htabsize */ 648 _SDR1 = table + __ilog2(pteg_count) - 11; 649 650 /* Initialize the HPT with no entries */ 651 memset((void *)table, 0, htab_size_bytes); 652 653 /* Set SDR1 */ 654 mtspr(SPRN_SDR1, _SDR1); 655 } 656 657 prot = pgprot_val(PAGE_KERNEL); 658 659#ifdef CONFIG_DEBUG_PAGEALLOC 660 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; 661 linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, 662 1, ppc64_rma_size)); 663 memset(linear_map_hash_slots, 0, linear_map_hash_count); 664#endif /* CONFIG_DEBUG_PAGEALLOC */ 665 666 /* On U3 based machines, we need to reserve the DART area and 667 * _NOT_ map it to avoid cache paradoxes as it's remapped non 668 * cacheable later on 669 */ 670 671 /* create bolted the linear mapping in the hash table */ 672 for_each_memblock(memory, reg) { 673 base = (unsigned long)__va(reg->base); 674 size = reg->size; 675 676 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", 677 base, size, prot); 678 679#ifdef CONFIG_U3_DART 680 /* Do not map the DART space. Fortunately, it will be aligned 681 * in such a way that it will not cross two memblock regions and 682 * will fit within a single 16Mb page. 683 * The DART space is assumed to be a full 16Mb region even if 684 * we only use 2Mb of that space. We will use more of it later 685 * for AGP GART. We have to use a full 16Mb large page. 686 */ 687 DBG("DART base: %lx\n", dart_tablebase); 688 689 if (dart_tablebase != 0 && dart_tablebase >= base 690 && dart_tablebase < (base + size)) { 691 unsigned long dart_table_end = dart_tablebase + 16 * MB; 692 if (base != dart_tablebase) 693 BUG_ON(htab_bolt_mapping(base, dart_tablebase, 694 __pa(base), prot, 695 mmu_linear_psize, 696 mmu_kernel_ssize)); 697 if ((base + size) > dart_table_end) 698 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, 699 base + size, 700 __pa(dart_table_end), 701 prot, 702 mmu_linear_psize, 703 mmu_kernel_ssize)); 704 continue; 705 } 706#endif /* CONFIG_U3_DART */ 707 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 708 prot, mmu_linear_psize, mmu_kernel_ssize)); 709 } 710 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 711 712 /* 713 * If we have a memory_limit and we've allocated TCEs then we need to 714 * explicitly map the TCE area at the top of RAM. We also cope with the 715 * case that the TCEs start below memory_limit. 716 * tce_alloc_start/end are 16MB aligned so the mapping should work 717 * for either 4K or 16MB pages. 718 */ 719 if (tce_alloc_start) { 720 tce_alloc_start = (unsigned long)__va(tce_alloc_start); 721 tce_alloc_end = (unsigned long)__va(tce_alloc_end); 722 723 if (base + size >= tce_alloc_start) 724 tce_alloc_start = base + size + 1; 725 726 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, 727 __pa(tce_alloc_start), prot, 728 mmu_linear_psize, mmu_kernel_ssize)); 729 } 730 731 htab_finish_init(); 732 733 DBG(" <- htab_initialize()\n"); 734} 735#undef KB 736#undef MB 737 738void __init early_init_mmu(void) 739{ 740 /* Setup initial STAB address in the PACA */ 741 get_paca()->stab_real = __pa((u64)&initial_stab); 742 get_paca()->stab_addr = (u64)&initial_stab; 743 744 /* Initialize the MMU Hash table and create the linear mapping 745 * of memory. Has to be done before stab/slb initialization as 746 * this is currently where the page size encoding is obtained 747 */ 748 htab_initialize(); 749 750 /* Initialize stab / SLB management except on iSeries 751 */ 752 if (mmu_has_feature(MMU_FTR_SLB)) 753 slb_initialize(); 754 else if (!firmware_has_feature(FW_FEATURE_ISERIES)) 755 stab_initialize(get_paca()->stab_real); 756} 757 758#ifdef CONFIG_SMP 759void __cpuinit early_init_mmu_secondary(void) 760{ 761 /* Initialize hash table for that CPU */ 762 if (!firmware_has_feature(FW_FEATURE_LPAR)) 763 mtspr(SPRN_SDR1, _SDR1); 764 765 /* Initialize STAB/SLB. We use a virtual address as it works 766 * in real mode on pSeries and we want a virtual address on 767 * iSeries anyway 768 */ 769 if (mmu_has_feature(MMU_FTR_SLB)) 770 slb_initialize(); 771 else 772 stab_initialize(get_paca()->stab_addr); 773} 774#endif /* CONFIG_SMP */ 775 776/* 777 * Called by asm hashtable.S for doing lazy icache flush 778 */ 779unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) 780{ 781 struct page *page; 782 783 if (!pfn_valid(pte_pfn(pte))) 784 return pp; 785 786 page = pte_page(pte); 787 788 /* page is dirty */ 789 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 790 if (trap == 0x400) { 791 flush_dcache_icache_page(page); 792 set_bit(PG_arch_1, &page->flags); 793 } else 794 pp |= HPTE_R_N; 795 } 796 return pp; 797} 798 799#ifdef CONFIG_PPC_MM_SLICES 800unsigned int get_paca_psize(unsigned long addr) 801{ 802 unsigned long index, slices; 803 804 if (addr < SLICE_LOW_TOP) { 805 slices = get_paca()->context.low_slices_psize; 806 index = GET_LOW_SLICE_INDEX(addr); 807 } else { 808 slices = get_paca()->context.high_slices_psize; 809 index = GET_HIGH_SLICE_INDEX(addr); 810 } 811 return (slices >> (index * 4)) & 0xF; 812} 813 814#else 815unsigned int get_paca_psize(unsigned long addr) 816{ 817 return get_paca()->context.user_psize; 818} 819#endif 820 821/* 822 * Demote a segment to using 4k pages. 823 * For now this makes the whole process use 4k pages. 824 */ 825#ifdef CONFIG_PPC_64K_PAGES 826void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 827{ 828 if (get_slice_psize(mm, addr) == MMU_PAGE_4K) 829 return; 830 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); 831#ifdef CONFIG_SPU_BASE 832 spu_flush_all_slbs(mm); 833#endif 834 if (get_paca_psize(addr) != MMU_PAGE_4K) { 835 get_paca()->context = mm->context; 836 slb_flush_and_rebolt(); 837 } 838} 839#endif /* CONFIG_PPC_64K_PAGES */ 840 841#ifdef CONFIG_PPC_SUBPAGE_PROT 842/* 843 * This looks up a 2-bit protection code for a 4k subpage of a 64k page. 844 * Userspace sets the subpage permissions using the subpage_prot system call. 845 * 846 * Result is 0: full permissions, _PAGE_RW: read-only, 847 * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. 848 */ 849static int subpage_protection(struct mm_struct *mm, unsigned long ea) 850{ 851 struct subpage_prot_table *spt = &mm->context.spt; 852 u32 spp = 0; 853 u32 **sbpm, *sbpp; 854 855 if (ea >= spt->maxaddr) 856 return 0; 857 if (ea < 0x100000000) { 858 /* addresses below 4GB use spt->low_prot */ 859 sbpm = spt->low_prot; 860 } else { 861 sbpm = spt->protptrs[ea >> SBP_L3_SHIFT]; 862 if (!sbpm) 863 return 0; 864 } 865 sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; 866 if (!sbpp) 867 return 0; 868 spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)]; 869 870 /* extract 2-bit bitfield for this 4k subpage */ 871 spp >>= 30 - 2 * ((ea >> 12) & 0xf); 872 873 /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */ 874 spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0); 875 return spp; 876} 877 878#else /* CONFIG_PPC_SUBPAGE_PROT */ 879static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) 880{ 881 return 0; 882} 883#endif 884 885void hash_failure_debug(unsigned long ea, unsigned long access, 886 unsigned long vsid, unsigned long trap, 887 int ssize, int psize, unsigned long pte) 888{ 889 if (!printk_ratelimit()) 890 return; 891 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", 892 ea, access, current->comm); 893 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", 894 trap, vsid, ssize, psize, pte); 895} 896 897/* Result code is: 898 * 0 - handled 899 * 1 - normal page fault 900 * -1 - critical hash insertion error 901 * -2 - access not permitted by subpage protection mechanism 902 */ 903int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 904{ 905 pgd_t *pgdir; 906 unsigned long vsid; 907 struct mm_struct *mm; 908 pte_t *ptep; 909 unsigned hugeshift; 910 const struct cpumask *tmp; 911 int rc, user_region = 0, local = 0; 912 int psize, ssize; 913 914 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 915 ea, access, trap); 916 917 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { 918 DBG_LOW(" out of pgtable range !\n"); 919 return 1; 920 } 921 922 /* Get region & vsid */ 923 switch (REGION_ID(ea)) { 924 case USER_REGION_ID: 925 user_region = 1; 926 mm = current->mm; 927 if (! mm) { 928 DBG_LOW(" user region with no mm !\n"); 929 return 1; 930 } 931 psize = get_slice_psize(mm, ea); 932 ssize = user_segment_size(ea); 933 vsid = get_vsid(mm->context.id, ea, ssize); 934 break; 935 case VMALLOC_REGION_ID: 936 mm = &init_mm; 937 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 938 if (ea < VMALLOC_END) 939 psize = mmu_vmalloc_psize; 940 else 941 psize = mmu_io_psize; 942 ssize = mmu_kernel_ssize; 943 break; 944 default: 945 /* Not a valid range 946 * Send the problem up to do_page_fault 947 */ 948 return 1; 949 } 950 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); 951 952 /* Get pgdir */ 953 pgdir = mm->pgd; 954 if (pgdir == NULL) 955 return 1; 956 957 /* Check CPU locality */ 958 tmp = cpumask_of(smp_processor_id()); 959 if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) 960 local = 1; 961 962#ifndef CONFIG_PPC_64K_PAGES 963 /* If we use 4K pages and our psize is not 4K, then we might 964 * be hitting a special driver mapping, and need to align the 965 * address before we fetch the PTE. 966 * 967 * It could also be a hugepage mapping, in which case this is 968 * not necessary, but it's not harmful, either. 969 */ 970 if (psize != MMU_PAGE_4K) 971 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 972#endif /* CONFIG_PPC_64K_PAGES */ 973 974 /* Get PTE and page size from page tables */ 975 ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); 976 if (ptep == NULL || !pte_present(*ptep)) { 977 DBG_LOW(" no PTE !\n"); 978 return 1; 979 } 980 981 /* Add _PAGE_PRESENT to the required access perm */ 982 access |= _PAGE_PRESENT; 983 984 /* Pre-check access permissions (will be re-checked atomically 985 * in __hash_page_XX but this pre-check is a fast path 986 */ 987 if (access & ~pte_val(*ptep)) { 988 DBG_LOW(" no access !\n"); 989 return 1; 990 } 991 992#ifdef CONFIG_HUGETLB_PAGE 993 if (hugeshift) 994 return __hash_page_huge(ea, access, vsid, ptep, trap, local, 995 ssize, hugeshift, psize); 996#endif /* CONFIG_HUGETLB_PAGE */ 997 998#ifndef CONFIG_PPC_64K_PAGES 999 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); 1000#else 1001 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), 1002 pte_val(*(ptep + PTRS_PER_PTE))); 1003#endif 1004 /* Do actual hashing */ 1005#ifdef CONFIG_PPC_64K_PAGES 1006 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 1007 if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { 1008 demote_segment_4k(mm, ea); 1009 psize = MMU_PAGE_4K; 1010 } 1011 1012 /* If this PTE is non-cacheable and we have restrictions on 1013 * using non cacheable large pages, then we switch to 4k 1014 */ 1015 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && 1016 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 1017 if (user_region) { 1018 demote_segment_4k(mm, ea); 1019 psize = MMU_PAGE_4K; 1020 } else if (ea < VMALLOC_END) { 1021 /* 1022 * some driver did a non-cacheable mapping 1023 * in vmalloc space, so switch vmalloc 1024 * to 4k pages 1025 */ 1026 printk(KERN_ALERT "Reducing vmalloc segment " 1027 "to 4kB pages because of " 1028 "non-cacheable mapping\n"); 1029 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 1030#ifdef CONFIG_SPU_BASE 1031 spu_flush_all_slbs(mm); 1032#endif 1033 } 1034 } 1035 if (user_region) { 1036 if (psize != get_paca_psize(ea)) { 1037 get_paca()->context = mm->context; 1038 slb_flush_and_rebolt(); 1039 } 1040 } else if (get_paca()->vmalloc_sllp != 1041 mmu_psize_defs[mmu_vmalloc_psize].sllp) { 1042 get_paca()->vmalloc_sllp = 1043 mmu_psize_defs[mmu_vmalloc_psize].sllp; 1044 slb_vmalloc_update(); 1045 } 1046#endif /* CONFIG_PPC_64K_PAGES */ 1047 1048#ifdef CONFIG_PPC_HAS_HASH_64K 1049 if (psize == MMU_PAGE_64K) 1050 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1051 else 1052#endif /* CONFIG_PPC_HAS_HASH_64K */ 1053 { 1054 int spp = subpage_protection(mm, ea); 1055 if (access & spp) 1056 rc = -2; 1057 else 1058 rc = __hash_page_4K(ea, access, vsid, ptep, trap, 1059 local, ssize, spp); 1060 } 1061 1062 /* Dump some info in case of hash insertion failure, they should 1063 * never happen so it is really useful to know if/when they do 1064 */ 1065 if (rc == -1) 1066 hash_failure_debug(ea, access, vsid, trap, ssize, psize, 1067 pte_val(*ptep)); 1068#ifndef CONFIG_PPC_64K_PAGES 1069 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 1070#else 1071 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), 1072 pte_val(*(ptep + PTRS_PER_PTE))); 1073#endif 1074 DBG_LOW(" -> rc=%d\n", rc); 1075 return rc; 1076} 1077EXPORT_SYMBOL_GPL(hash_page); 1078 1079void hash_preload(struct mm_struct *mm, unsigned long ea, 1080 unsigned long access, unsigned long trap) 1081{ 1082 unsigned long vsid; 1083 pgd_t *pgdir; 1084 pte_t *ptep; 1085 unsigned long flags; 1086 int rc, ssize, local = 0; 1087 1088 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1089 1090#ifdef CONFIG_PPC_MM_SLICES 1091 /* We only prefault standard pages for now */ 1092 if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) 1093 return; 1094#endif 1095 1096 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 1097 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 1098 1099 /* Get Linux PTE if available */ 1100 pgdir = mm->pgd; 1101 if (pgdir == NULL) 1102 return; 1103 ptep = find_linux_pte(pgdir, ea); 1104 if (!ptep) 1105 return; 1106 1107#ifdef CONFIG_PPC_64K_PAGES 1108 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on 1109 * a 64K kernel), then we don't preload, hash_page() will take 1110 * care of it once we actually try to access the page. 1111 * That way we don't have to duplicate all of the logic for segment 1112 * page size demotion here 1113 */ 1114 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) 1115 return; 1116#endif /* CONFIG_PPC_64K_PAGES */ 1117 1118 /* Get VSID */ 1119 ssize = user_segment_size(ea); 1120 vsid = get_vsid(mm->context.id, ea, ssize); 1121 1122 /* Hash doesn't like irqs */ 1123 local_irq_save(flags); 1124 1125 /* Is that local to this CPU ? */ 1126 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 1127 local = 1; 1128 1129 /* Hash it in */ 1130#ifdef CONFIG_PPC_HAS_HASH_64K 1131 if (mm->context.user_psize == MMU_PAGE_64K) 1132 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1133 else 1134#endif /* CONFIG_PPC_HAS_HASH_64K */ 1135 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, 1136 subpage_protection(mm, ea)); 1137 1138 /* Dump some info in case of hash insertion failure, they should 1139 * never happen so it is really useful to know if/when they do 1140 */ 1141 if (rc == -1) 1142 hash_failure_debug(ea, access, vsid, trap, ssize, 1143 mm->context.user_psize, pte_val(*ptep)); 1144 1145 local_irq_restore(flags); 1146} 1147 1148/* WARNING: This is called from hash_low_64.S, if you change this prototype, 1149 * do not forget to update the assembly call site ! 1150 */ 1151void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, 1152 int local) 1153{ 1154 unsigned long hash, index, shift, hidx, slot; 1155 1156 DBG_LOW("flush_hash_page(va=%016lx)\n", va); 1157 pte_iterate_hashed_subpages(pte, psize, va, index, shift) { 1158 hash = hpt_hash(va, shift, ssize); 1159 hidx = __rpte_to_hidx(pte, index); 1160 if (hidx & _PTEIDX_SECONDARY) 1161 hash = ~hash; 1162 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1163 slot += hidx & _PTEIDX_GROUP_IX; 1164 DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); 1165 ppc_md.hpte_invalidate(slot, va, psize, ssize, local); 1166 } pte_iterate_hashed_end(); 1167} 1168 1169void flush_hash_range(unsigned long number, int local) 1170{ 1171 if (ppc_md.flush_hash_range) 1172 ppc_md.flush_hash_range(number, local); 1173 else { 1174 int i; 1175 struct ppc64_tlb_batch *batch = 1176 &__get_cpu_var(ppc64_tlb_batch); 1177 1178 for (i = 0; i < number; i++) 1179 flush_hash_page(batch->vaddr[i], batch->pte[i], 1180 batch->psize, batch->ssize, local); 1181 } 1182} 1183 1184/* 1185 * low_hash_fault is called when we the low level hash code failed 1186 * to instert a PTE due to an hypervisor error 1187 */ 1188void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) 1189{ 1190 if (user_mode(regs)) { 1191#ifdef CONFIG_PPC_SUBPAGE_PROT 1192 if (rc == -2) 1193 _exception(SIGSEGV, regs, SEGV_ACCERR, address); 1194 else 1195#endif 1196 _exception(SIGBUS, regs, BUS_ADRERR, address); 1197 } else 1198 bad_page_fault(regs, address, SIGBUS); 1199} 1200 1201#ifdef CONFIG_DEBUG_PAGEALLOC 1202static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) 1203{ 1204 unsigned long hash, hpteg; 1205 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1206 unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); 1207 unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); 1208 int ret; 1209 1210 hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); 1211 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 1212 1213 ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr), 1214 mode, HPTE_V_BOLTED, 1215 mmu_linear_psize, mmu_kernel_ssize); 1216 BUG_ON (ret < 0); 1217 spin_lock(&linear_map_hash_lock); 1218 BUG_ON(linear_map_hash_slots[lmi] & 0x80); 1219 linear_map_hash_slots[lmi] = ret | 0x80; 1220 spin_unlock(&linear_map_hash_lock); 1221} 1222 1223static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) 1224{ 1225 unsigned long hash, hidx, slot; 1226 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1227 unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); 1228 1229 hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); 1230 spin_lock(&linear_map_hash_lock); 1231 BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); 1232 hidx = linear_map_hash_slots[lmi] & 0x7f; 1233 linear_map_hash_slots[lmi] = 0; 1234 spin_unlock(&linear_map_hash_lock); 1235 if (hidx & _PTEIDX_SECONDARY) 1236 hash = ~hash; 1237 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1238 slot += hidx & _PTEIDX_GROUP_IX; 1239 ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0); 1240} 1241 1242void kernel_map_pages(struct page *page, int numpages, int enable) 1243{ 1244 unsigned long flags, vaddr, lmi; 1245 int i; 1246 1247 local_irq_save(flags); 1248 for (i = 0; i < numpages; i++, page++) { 1249 vaddr = (unsigned long)page_address(page); 1250 lmi = __pa(vaddr) >> PAGE_SHIFT; 1251 if (lmi >= linear_map_hash_count) 1252 continue; 1253 if (enable) 1254 kernel_map_linear_page(vaddr, lmi); 1255 else 1256 kernel_unmap_linear_page(vaddr, lmi); 1257 } 1258 local_irq_restore(flags); 1259} 1260#endif /* CONFIG_DEBUG_PAGEALLOC */ 1261 1262void setup_initial_memory_limit(phys_addr_t first_memblock_base, 1263 phys_addr_t first_memblock_size) 1264{ 1265 /* We don't currently support the first MEMBLOCK not mapping 0 1266 * physical on those processors 1267 */ 1268 BUG_ON(first_memblock_base != 0); 1269 1270 /* On LPAR systems, the first entry is our RMA region, 1271 * non-LPAR 64-bit hash MMU systems don't have a limitation 1272 * on real mode access, but using the first entry works well 1273 * enough. We also clamp it to 1G to avoid some funky things 1274 * such as RTAS bugs etc... 1275 */ 1276 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); 1277 1278 /* Finally limit subsequent allocations */ 1279 memblock_set_current_limit(ppc64_rma_size); 1280}