Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.4-rc4 1285 lines 35 kB view raw
1/* 2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen 3 * {mikejc|engebret}@us.ibm.com 4 * 5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> 6 * 7 * SMP scalability work: 8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 9 * 10 * Module name: htab.c 11 * 12 * Description: 13 * PowerPC Hashed Page Table functions 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 */ 20 21#undef DEBUG 22#undef DEBUG_LOW 23 24#include <linux/spinlock.h> 25#include <linux/errno.h> 26#include <linux/sched.h> 27#include <linux/proc_fs.h> 28#include <linux/stat.h> 29#include <linux/sysctl.h> 30#include <linux/export.h> 31#include <linux/ctype.h> 32#include <linux/cache.h> 33#include <linux/init.h> 34#include <linux/signal.h> 35#include <linux/memblock.h> 36 37#include <asm/processor.h> 38#include <asm/pgtable.h> 39#include <asm/mmu.h> 40#include <asm/mmu_context.h> 41#include <asm/page.h> 42#include <asm/types.h> 43#include <asm/uaccess.h> 44#include <asm/machdep.h> 45#include <asm/prom.h> 46#include <asm/abs_addr.h> 47#include <asm/tlbflush.h> 48#include <asm/io.h> 49#include <asm/eeh.h> 50#include <asm/tlb.h> 51#include <asm/cacheflush.h> 52#include <asm/cputable.h> 53#include <asm/sections.h> 54#include <asm/spu.h> 55#include <asm/udbg.h> 56#include <asm/code-patching.h> 57#include <asm/fadump.h> 58#include <asm/firmware.h> 59 60#ifdef DEBUG 61#define DBG(fmt...) udbg_printf(fmt) 62#else 63#define DBG(fmt...) 64#endif 65 66#ifdef DEBUG_LOW 67#define DBG_LOW(fmt...) udbg_printf(fmt) 68#else 69#define DBG_LOW(fmt...) 70#endif 71 72#define KB (1024) 73#define MB (1024*KB) 74#define GB (1024L*MB) 75 76/* 77 * Note: pte --> Linux PTE 78 * HPTE --> PowerPC Hashed Page Table Entry 79 * 80 * Execution context: 81 * htab_initialize is called with the MMU off (of course), but 82 * the kernel has been copied down to zero so it can directly 83 * reference global data. At this point it is very difficult 84 * to print debug info. 85 * 86 */ 87 88#ifdef CONFIG_U3_DART 89extern unsigned long dart_tablebase; 90#endif /* CONFIG_U3_DART */ 91 92static unsigned long _SDR1; 93struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 94 95struct hash_pte *htab_address; 96unsigned long htab_size_bytes; 97unsigned long htab_hash_mask; 98EXPORT_SYMBOL_GPL(htab_hash_mask); 99int mmu_linear_psize = MMU_PAGE_4K; 100int mmu_virtual_psize = MMU_PAGE_4K; 101int mmu_vmalloc_psize = MMU_PAGE_4K; 102#ifdef CONFIG_SPARSEMEM_VMEMMAP 103int mmu_vmemmap_psize = MMU_PAGE_4K; 104#endif 105int mmu_io_psize = MMU_PAGE_4K; 106int mmu_kernel_ssize = MMU_SEGSIZE_256M; 107int mmu_highuser_ssize = MMU_SEGSIZE_256M; 108u16 mmu_slb_size = 64; 109EXPORT_SYMBOL_GPL(mmu_slb_size); 110#ifdef CONFIG_PPC_64K_PAGES 111int mmu_ci_restrictions; 112#endif 113#ifdef CONFIG_DEBUG_PAGEALLOC 114static u8 *linear_map_hash_slots; 115static unsigned long linear_map_hash_count; 116static DEFINE_SPINLOCK(linear_map_hash_lock); 117#endif /* CONFIG_DEBUG_PAGEALLOC */ 118 119/* There are definitions of page sizes arrays to be used when none 120 * is provided by the firmware. 121 */ 122 123/* Pre-POWER4 CPUs (4k pages only) 124 */ 125static struct mmu_psize_def mmu_psize_defaults_old[] = { 126 [MMU_PAGE_4K] = { 127 .shift = 12, 128 .sllp = 0, 129 .penc = 0, 130 .avpnm = 0, 131 .tlbiel = 0, 132 }, 133}; 134 135/* POWER4, GPUL, POWER5 136 * 137 * Support for 16Mb large pages 138 */ 139static struct mmu_psize_def mmu_psize_defaults_gp[] = { 140 [MMU_PAGE_4K] = { 141 .shift = 12, 142 .sllp = 0, 143 .penc = 0, 144 .avpnm = 0, 145 .tlbiel = 1, 146 }, 147 [MMU_PAGE_16M] = { 148 .shift = 24, 149 .sllp = SLB_VSID_L, 150 .penc = 0, 151 .avpnm = 0x1UL, 152 .tlbiel = 0, 153 }, 154}; 155 156static unsigned long htab_convert_pte_flags(unsigned long pteflags) 157{ 158 unsigned long rflags = pteflags & 0x1fa; 159 160 /* _PAGE_EXEC -> NOEXEC */ 161 if ((pteflags & _PAGE_EXEC) == 0) 162 rflags |= HPTE_R_N; 163 164 /* PP bits. PAGE_USER is already PP bit 0x2, so we only 165 * need to add in 0x1 if it's a read-only user page 166 */ 167 if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) && 168 (pteflags & _PAGE_DIRTY))) 169 rflags |= 1; 170 171 /* Always add C */ 172 return rflags | HPTE_R_C; 173} 174 175int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 176 unsigned long pstart, unsigned long prot, 177 int psize, int ssize) 178{ 179 unsigned long vaddr, paddr; 180 unsigned int step, shift; 181 int ret = 0; 182 183 shift = mmu_psize_defs[psize].shift; 184 step = 1 << shift; 185 186 prot = htab_convert_pte_flags(prot); 187 188 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", 189 vstart, vend, pstart, prot, psize, ssize); 190 191 for (vaddr = vstart, paddr = pstart; vaddr < vend; 192 vaddr += step, paddr += step) { 193 unsigned long hash, hpteg; 194 unsigned long vsid = get_kernel_vsid(vaddr, ssize); 195 unsigned long va = hpt_va(vaddr, vsid, ssize); 196 unsigned long tprot = prot; 197 198 /* Make kernel text executable */ 199 if (overlaps_kernel_text(vaddr, vaddr + step)) 200 tprot &= ~HPTE_R_N; 201 202 hash = hpt_hash(va, shift, ssize); 203 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 204 205 BUG_ON(!ppc_md.hpte_insert); 206 ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot, 207 HPTE_V_BOLTED, psize, ssize); 208 209 if (ret < 0) 210 break; 211#ifdef CONFIG_DEBUG_PAGEALLOC 212 if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) 213 linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; 214#endif /* CONFIG_DEBUG_PAGEALLOC */ 215 } 216 return ret < 0 ? ret : 0; 217} 218 219#ifdef CONFIG_MEMORY_HOTPLUG 220static int htab_remove_mapping(unsigned long vstart, unsigned long vend, 221 int psize, int ssize) 222{ 223 unsigned long vaddr; 224 unsigned int step, shift; 225 226 shift = mmu_psize_defs[psize].shift; 227 step = 1 << shift; 228 229 if (!ppc_md.hpte_removebolted) { 230 printk(KERN_WARNING "Platform doesn't implement " 231 "hpte_removebolted\n"); 232 return -EINVAL; 233 } 234 235 for (vaddr = vstart; vaddr < vend; vaddr += step) 236 ppc_md.hpte_removebolted(vaddr, psize, ssize); 237 238 return 0; 239} 240#endif /* CONFIG_MEMORY_HOTPLUG */ 241 242static int __init htab_dt_scan_seg_sizes(unsigned long node, 243 const char *uname, int depth, 244 void *data) 245{ 246 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 247 u32 *prop; 248 unsigned long size = 0; 249 250 /* We are scanning "cpu" nodes only */ 251 if (type == NULL || strcmp(type, "cpu") != 0) 252 return 0; 253 254 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", 255 &size); 256 if (prop == NULL) 257 return 0; 258 for (; size >= 4; size -= 4, ++prop) { 259 if (prop[0] == 40) { 260 DBG("1T segment support detected\n"); 261 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; 262 return 1; 263 } 264 } 265 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; 266 return 0; 267} 268 269static void __init htab_init_seg_sizes(void) 270{ 271 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); 272} 273 274static int __init htab_dt_scan_page_sizes(unsigned long node, 275 const char *uname, int depth, 276 void *data) 277{ 278 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 279 u32 *prop; 280 unsigned long size = 0; 281 282 /* We are scanning "cpu" nodes only */ 283 if (type == NULL || strcmp(type, "cpu") != 0) 284 return 0; 285 286 prop = (u32 *)of_get_flat_dt_prop(node, 287 "ibm,segment-page-sizes", &size); 288 if (prop != NULL) { 289 DBG("Page sizes from device-tree:\n"); 290 size /= 4; 291 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); 292 while(size > 0) { 293 unsigned int shift = prop[0]; 294 unsigned int slbenc = prop[1]; 295 unsigned int lpnum = prop[2]; 296 unsigned int lpenc = 0; 297 struct mmu_psize_def *def; 298 int idx = -1; 299 300 size -= 3; prop += 3; 301 while(size > 0 && lpnum) { 302 if (prop[0] == shift) 303 lpenc = prop[1]; 304 prop += 2; size -= 2; 305 lpnum--; 306 } 307 switch(shift) { 308 case 0xc: 309 idx = MMU_PAGE_4K; 310 break; 311 case 0x10: 312 idx = MMU_PAGE_64K; 313 break; 314 case 0x14: 315 idx = MMU_PAGE_1M; 316 break; 317 case 0x18: 318 idx = MMU_PAGE_16M; 319 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; 320 break; 321 case 0x22: 322 idx = MMU_PAGE_16G; 323 break; 324 } 325 if (idx < 0) 326 continue; 327 def = &mmu_psize_defs[idx]; 328 def->shift = shift; 329 if (shift <= 23) 330 def->avpnm = 0; 331 else 332 def->avpnm = (1 << (shift - 23)) - 1; 333 def->sllp = slbenc; 334 def->penc = lpenc; 335 /* We don't know for sure what's up with tlbiel, so 336 * for now we only set it for 4K and 64K pages 337 */ 338 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) 339 def->tlbiel = 1; 340 else 341 def->tlbiel = 0; 342 343 DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " 344 "tlbiel=%d, penc=%d\n", 345 idx, shift, def->sllp, def->avpnm, def->tlbiel, 346 def->penc); 347 } 348 return 1; 349 } 350 return 0; 351} 352 353#ifdef CONFIG_HUGETLB_PAGE 354/* Scan for 16G memory blocks that have been set aside for huge pages 355 * and reserve those blocks for 16G huge pages. 356 */ 357static int __init htab_dt_scan_hugepage_blocks(unsigned long node, 358 const char *uname, int depth, 359 void *data) { 360 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 361 unsigned long *addr_prop; 362 u32 *page_count_prop; 363 unsigned int expected_pages; 364 long unsigned int phys_addr; 365 long unsigned int block_size; 366 367 /* We are scanning "memory" nodes only */ 368 if (type == NULL || strcmp(type, "memory") != 0) 369 return 0; 370 371 /* This property is the log base 2 of the number of virtual pages that 372 * will represent this memory block. */ 373 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); 374 if (page_count_prop == NULL) 375 return 0; 376 expected_pages = (1 << page_count_prop[0]); 377 addr_prop = of_get_flat_dt_prop(node, "reg", NULL); 378 if (addr_prop == NULL) 379 return 0; 380 phys_addr = addr_prop[0]; 381 block_size = addr_prop[1]; 382 if (block_size != (16 * GB)) 383 return 0; 384 printk(KERN_INFO "Huge page(16GB) memory: " 385 "addr = 0x%lX size = 0x%lX pages = %d\n", 386 phys_addr, block_size, expected_pages); 387 if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { 388 memblock_reserve(phys_addr, block_size * expected_pages); 389 add_gpage(phys_addr, block_size, expected_pages); 390 } 391 return 0; 392} 393#endif /* CONFIG_HUGETLB_PAGE */ 394 395static void __init htab_init_page_sizes(void) 396{ 397 int rc; 398 399 /* Default to 4K pages only */ 400 memcpy(mmu_psize_defs, mmu_psize_defaults_old, 401 sizeof(mmu_psize_defaults_old)); 402 403 /* 404 * Try to find the available page sizes in the device-tree 405 */ 406 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); 407 if (rc != 0) /* Found */ 408 goto found; 409 410 /* 411 * Not in the device-tree, let's fallback on known size 412 * list for 16M capable GP & GR 413 */ 414 if (mmu_has_feature(MMU_FTR_16M_PAGE)) 415 memcpy(mmu_psize_defs, mmu_psize_defaults_gp, 416 sizeof(mmu_psize_defaults_gp)); 417 found: 418#ifndef CONFIG_DEBUG_PAGEALLOC 419 /* 420 * Pick a size for the linear mapping. Currently, we only support 421 * 16M, 1M and 4K which is the default 422 */ 423 if (mmu_psize_defs[MMU_PAGE_16M].shift) 424 mmu_linear_psize = MMU_PAGE_16M; 425 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 426 mmu_linear_psize = MMU_PAGE_1M; 427#endif /* CONFIG_DEBUG_PAGEALLOC */ 428 429#ifdef CONFIG_PPC_64K_PAGES 430 /* 431 * Pick a size for the ordinary pages. Default is 4K, we support 432 * 64K for user mappings and vmalloc if supported by the processor. 433 * We only use 64k for ioremap if the processor 434 * (and firmware) support cache-inhibited large pages. 435 * If not, we use 4k and set mmu_ci_restrictions so that 436 * hash_page knows to switch processes that use cache-inhibited 437 * mappings to 4k pages. 438 */ 439 if (mmu_psize_defs[MMU_PAGE_64K].shift) { 440 mmu_virtual_psize = MMU_PAGE_64K; 441 mmu_vmalloc_psize = MMU_PAGE_64K; 442 if (mmu_linear_psize == MMU_PAGE_4K) 443 mmu_linear_psize = MMU_PAGE_64K; 444 if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) { 445 /* 446 * Don't use 64k pages for ioremap on pSeries, since 447 * that would stop us accessing the HEA ethernet. 448 */ 449 if (!machine_is(pseries)) 450 mmu_io_psize = MMU_PAGE_64K; 451 } else 452 mmu_ci_restrictions = 1; 453 } 454#endif /* CONFIG_PPC_64K_PAGES */ 455 456#ifdef CONFIG_SPARSEMEM_VMEMMAP 457 /* We try to use 16M pages for vmemmap if that is supported 458 * and we have at least 1G of RAM at boot 459 */ 460 if (mmu_psize_defs[MMU_PAGE_16M].shift && 461 memblock_phys_mem_size() >= 0x40000000) 462 mmu_vmemmap_psize = MMU_PAGE_16M; 463 else if (mmu_psize_defs[MMU_PAGE_64K].shift) 464 mmu_vmemmap_psize = MMU_PAGE_64K; 465 else 466 mmu_vmemmap_psize = MMU_PAGE_4K; 467#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 468 469 printk(KERN_DEBUG "Page orders: linear mapping = %d, " 470 "virtual = %d, io = %d" 471#ifdef CONFIG_SPARSEMEM_VMEMMAP 472 ", vmemmap = %d" 473#endif 474 "\n", 475 mmu_psize_defs[mmu_linear_psize].shift, 476 mmu_psize_defs[mmu_virtual_psize].shift, 477 mmu_psize_defs[mmu_io_psize].shift 478#ifdef CONFIG_SPARSEMEM_VMEMMAP 479 ,mmu_psize_defs[mmu_vmemmap_psize].shift 480#endif 481 ); 482 483#ifdef CONFIG_HUGETLB_PAGE 484 /* Reserve 16G huge page memory sections for huge pages */ 485 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); 486#endif /* CONFIG_HUGETLB_PAGE */ 487} 488 489static int __init htab_dt_scan_pftsize(unsigned long node, 490 const char *uname, int depth, 491 void *data) 492{ 493 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 494 u32 *prop; 495 496 /* We are scanning "cpu" nodes only */ 497 if (type == NULL || strcmp(type, "cpu") != 0) 498 return 0; 499 500 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); 501 if (prop != NULL) { 502 /* pft_size[0] is the NUMA CEC cookie */ 503 ppc64_pft_size = prop[1]; 504 return 1; 505 } 506 return 0; 507} 508 509static unsigned long __init htab_get_table_size(void) 510{ 511 unsigned long mem_size, rnd_mem_size, pteg_count, psize; 512 513 /* If hash size isn't already provided by the platform, we try to 514 * retrieve it from the device-tree. If it's not there neither, we 515 * calculate it now based on the total RAM size 516 */ 517 if (ppc64_pft_size == 0) 518 of_scan_flat_dt(htab_dt_scan_pftsize, NULL); 519 if (ppc64_pft_size) 520 return 1UL << ppc64_pft_size; 521 522 /* round mem_size up to next power of 2 */ 523 mem_size = memblock_phys_mem_size(); 524 rnd_mem_size = 1UL << __ilog2(mem_size); 525 if (rnd_mem_size < mem_size) 526 rnd_mem_size <<= 1; 527 528 /* # pages / 2 */ 529 psize = mmu_psize_defs[mmu_virtual_psize].shift; 530 pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); 531 532 return pteg_count << 7; 533} 534 535#ifdef CONFIG_MEMORY_HOTPLUG 536int create_section_mapping(unsigned long start, unsigned long end) 537{ 538 return htab_bolt_mapping(start, end, __pa(start), 539 pgprot_val(PAGE_KERNEL), mmu_linear_psize, 540 mmu_kernel_ssize); 541} 542 543int remove_section_mapping(unsigned long start, unsigned long end) 544{ 545 return htab_remove_mapping(start, end, mmu_linear_psize, 546 mmu_kernel_ssize); 547} 548#endif /* CONFIG_MEMORY_HOTPLUG */ 549 550#define FUNCTION_TEXT(A) ((*(unsigned long *)(A))) 551 552static void __init htab_finish_init(void) 553{ 554 extern unsigned int *htab_call_hpte_insert1; 555 extern unsigned int *htab_call_hpte_insert2; 556 extern unsigned int *htab_call_hpte_remove; 557 extern unsigned int *htab_call_hpte_updatepp; 558 559#ifdef CONFIG_PPC_HAS_HASH_64K 560 extern unsigned int *ht64_call_hpte_insert1; 561 extern unsigned int *ht64_call_hpte_insert2; 562 extern unsigned int *ht64_call_hpte_remove; 563 extern unsigned int *ht64_call_hpte_updatepp; 564 565 patch_branch(ht64_call_hpte_insert1, 566 FUNCTION_TEXT(ppc_md.hpte_insert), 567 BRANCH_SET_LINK); 568 patch_branch(ht64_call_hpte_insert2, 569 FUNCTION_TEXT(ppc_md.hpte_insert), 570 BRANCH_SET_LINK); 571 patch_branch(ht64_call_hpte_remove, 572 FUNCTION_TEXT(ppc_md.hpte_remove), 573 BRANCH_SET_LINK); 574 patch_branch(ht64_call_hpte_updatepp, 575 FUNCTION_TEXT(ppc_md.hpte_updatepp), 576 BRANCH_SET_LINK); 577 578#endif /* CONFIG_PPC_HAS_HASH_64K */ 579 580 patch_branch(htab_call_hpte_insert1, 581 FUNCTION_TEXT(ppc_md.hpte_insert), 582 BRANCH_SET_LINK); 583 patch_branch(htab_call_hpte_insert2, 584 FUNCTION_TEXT(ppc_md.hpte_insert), 585 BRANCH_SET_LINK); 586 patch_branch(htab_call_hpte_remove, 587 FUNCTION_TEXT(ppc_md.hpte_remove), 588 BRANCH_SET_LINK); 589 patch_branch(htab_call_hpte_updatepp, 590 FUNCTION_TEXT(ppc_md.hpte_updatepp), 591 BRANCH_SET_LINK); 592} 593 594static void __init htab_initialize(void) 595{ 596 unsigned long table; 597 unsigned long pteg_count; 598 unsigned long prot; 599 unsigned long base = 0, size = 0, limit; 600 struct memblock_region *reg; 601 602 DBG(" -> htab_initialize()\n"); 603 604 /* Initialize segment sizes */ 605 htab_init_seg_sizes(); 606 607 /* Initialize page sizes */ 608 htab_init_page_sizes(); 609 610 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { 611 mmu_kernel_ssize = MMU_SEGSIZE_1T; 612 mmu_highuser_ssize = MMU_SEGSIZE_1T; 613 printk(KERN_INFO "Using 1TB segments\n"); 614 } 615 616 /* 617 * Calculate the required size of the htab. We want the number of 618 * PTEGs to equal one half the number of real pages. 619 */ 620 htab_size_bytes = htab_get_table_size(); 621 pteg_count = htab_size_bytes >> 7; 622 623 htab_hash_mask = pteg_count - 1; 624 625 if (firmware_has_feature(FW_FEATURE_LPAR)) { 626 /* Using a hypervisor which owns the htab */ 627 htab_address = NULL; 628 _SDR1 = 0; 629#ifdef CONFIG_FA_DUMP 630 /* 631 * If firmware assisted dump is active firmware preserves 632 * the contents of htab along with entire partition memory. 633 * Clear the htab if firmware assisted dump is active so 634 * that we dont end up using old mappings. 635 */ 636 if (is_fadump_active() && ppc_md.hpte_clear_all) 637 ppc_md.hpte_clear_all(); 638#endif 639 } else { 640 /* Find storage for the HPT. Must be contiguous in 641 * the absolute address space. On cell we want it to be 642 * in the first 2 Gig so we can use it for IOMMU hacks. 643 */ 644 if (machine_is(cell)) 645 limit = 0x80000000; 646 else 647 limit = MEMBLOCK_ALLOC_ANYWHERE; 648 649 table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); 650 651 DBG("Hash table allocated at %lx, size: %lx\n", table, 652 htab_size_bytes); 653 654 htab_address = abs_to_virt(table); 655 656 /* htab absolute addr + encoded htabsize */ 657 _SDR1 = table + __ilog2(pteg_count) - 11; 658 659 /* Initialize the HPT with no entries */ 660 memset((void *)table, 0, htab_size_bytes); 661 662 /* Set SDR1 */ 663 mtspr(SPRN_SDR1, _SDR1); 664 } 665 666 prot = pgprot_val(PAGE_KERNEL); 667 668#ifdef CONFIG_DEBUG_PAGEALLOC 669 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; 670 linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, 671 1, ppc64_rma_size)); 672 memset(linear_map_hash_slots, 0, linear_map_hash_count); 673#endif /* CONFIG_DEBUG_PAGEALLOC */ 674 675 /* On U3 based machines, we need to reserve the DART area and 676 * _NOT_ map it to avoid cache paradoxes as it's remapped non 677 * cacheable later on 678 */ 679 680 /* create bolted the linear mapping in the hash table */ 681 for_each_memblock(memory, reg) { 682 base = (unsigned long)__va(reg->base); 683 size = reg->size; 684 685 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", 686 base, size, prot); 687 688#ifdef CONFIG_U3_DART 689 /* Do not map the DART space. Fortunately, it will be aligned 690 * in such a way that it will not cross two memblock regions and 691 * will fit within a single 16Mb page. 692 * The DART space is assumed to be a full 16Mb region even if 693 * we only use 2Mb of that space. We will use more of it later 694 * for AGP GART. We have to use a full 16Mb large page. 695 */ 696 DBG("DART base: %lx\n", dart_tablebase); 697 698 if (dart_tablebase != 0 && dart_tablebase >= base 699 && dart_tablebase < (base + size)) { 700 unsigned long dart_table_end = dart_tablebase + 16 * MB; 701 if (base != dart_tablebase) 702 BUG_ON(htab_bolt_mapping(base, dart_tablebase, 703 __pa(base), prot, 704 mmu_linear_psize, 705 mmu_kernel_ssize)); 706 if ((base + size) > dart_table_end) 707 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, 708 base + size, 709 __pa(dart_table_end), 710 prot, 711 mmu_linear_psize, 712 mmu_kernel_ssize)); 713 continue; 714 } 715#endif /* CONFIG_U3_DART */ 716 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 717 prot, mmu_linear_psize, mmu_kernel_ssize)); 718 } 719 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 720 721 /* 722 * If we have a memory_limit and we've allocated TCEs then we need to 723 * explicitly map the TCE area at the top of RAM. We also cope with the 724 * case that the TCEs start below memory_limit. 725 * tce_alloc_start/end are 16MB aligned so the mapping should work 726 * for either 4K or 16MB pages. 727 */ 728 if (tce_alloc_start) { 729 tce_alloc_start = (unsigned long)__va(tce_alloc_start); 730 tce_alloc_end = (unsigned long)__va(tce_alloc_end); 731 732 if (base + size >= tce_alloc_start) 733 tce_alloc_start = base + size + 1; 734 735 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, 736 __pa(tce_alloc_start), prot, 737 mmu_linear_psize, mmu_kernel_ssize)); 738 } 739 740 htab_finish_init(); 741 742 DBG(" <- htab_initialize()\n"); 743} 744#undef KB 745#undef MB 746 747void __init early_init_mmu(void) 748{ 749 /* Setup initial STAB address in the PACA */ 750 get_paca()->stab_real = __pa((u64)&initial_stab); 751 get_paca()->stab_addr = (u64)&initial_stab; 752 753 /* Initialize the MMU Hash table and create the linear mapping 754 * of memory. Has to be done before stab/slb initialization as 755 * this is currently where the page size encoding is obtained 756 */ 757 htab_initialize(); 758 759 /* Initialize stab / SLB management */ 760 if (mmu_has_feature(MMU_FTR_SLB)) 761 slb_initialize(); 762} 763 764#ifdef CONFIG_SMP 765void __cpuinit early_init_mmu_secondary(void) 766{ 767 /* Initialize hash table for that CPU */ 768 if (!firmware_has_feature(FW_FEATURE_LPAR)) 769 mtspr(SPRN_SDR1, _SDR1); 770 771 /* Initialize STAB/SLB. We use a virtual address as it works 772 * in real mode on pSeries. 773 */ 774 if (mmu_has_feature(MMU_FTR_SLB)) 775 slb_initialize(); 776 else 777 stab_initialize(get_paca()->stab_addr); 778} 779#endif /* CONFIG_SMP */ 780 781/* 782 * Called by asm hashtable.S for doing lazy icache flush 783 */ 784unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) 785{ 786 struct page *page; 787 788 if (!pfn_valid(pte_pfn(pte))) 789 return pp; 790 791 page = pte_page(pte); 792 793 /* page is dirty */ 794 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 795 if (trap == 0x400) { 796 flush_dcache_icache_page(page); 797 set_bit(PG_arch_1, &page->flags); 798 } else 799 pp |= HPTE_R_N; 800 } 801 return pp; 802} 803 804#ifdef CONFIG_PPC_MM_SLICES 805unsigned int get_paca_psize(unsigned long addr) 806{ 807 unsigned long index, slices; 808 809 if (addr < SLICE_LOW_TOP) { 810 slices = get_paca()->context.low_slices_psize; 811 index = GET_LOW_SLICE_INDEX(addr); 812 } else { 813 slices = get_paca()->context.high_slices_psize; 814 index = GET_HIGH_SLICE_INDEX(addr); 815 } 816 return (slices >> (index * 4)) & 0xF; 817} 818 819#else 820unsigned int get_paca_psize(unsigned long addr) 821{ 822 return get_paca()->context.user_psize; 823} 824#endif 825 826/* 827 * Demote a segment to using 4k pages. 828 * For now this makes the whole process use 4k pages. 829 */ 830#ifdef CONFIG_PPC_64K_PAGES 831void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 832{ 833 if (get_slice_psize(mm, addr) == MMU_PAGE_4K) 834 return; 835 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); 836#ifdef CONFIG_SPU_BASE 837 spu_flush_all_slbs(mm); 838#endif 839 if (get_paca_psize(addr) != MMU_PAGE_4K) { 840 get_paca()->context = mm->context; 841 slb_flush_and_rebolt(); 842 } 843} 844#endif /* CONFIG_PPC_64K_PAGES */ 845 846#ifdef CONFIG_PPC_SUBPAGE_PROT 847/* 848 * This looks up a 2-bit protection code for a 4k subpage of a 64k page. 849 * Userspace sets the subpage permissions using the subpage_prot system call. 850 * 851 * Result is 0: full permissions, _PAGE_RW: read-only, 852 * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. 853 */ 854static int subpage_protection(struct mm_struct *mm, unsigned long ea) 855{ 856 struct subpage_prot_table *spt = &mm->context.spt; 857 u32 spp = 0; 858 u32 **sbpm, *sbpp; 859 860 if (ea >= spt->maxaddr) 861 return 0; 862 if (ea < 0x100000000) { 863 /* addresses below 4GB use spt->low_prot */ 864 sbpm = spt->low_prot; 865 } else { 866 sbpm = spt->protptrs[ea >> SBP_L3_SHIFT]; 867 if (!sbpm) 868 return 0; 869 } 870 sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; 871 if (!sbpp) 872 return 0; 873 spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)]; 874 875 /* extract 2-bit bitfield for this 4k subpage */ 876 spp >>= 30 - 2 * ((ea >> 12) & 0xf); 877 878 /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */ 879 spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0); 880 return spp; 881} 882 883#else /* CONFIG_PPC_SUBPAGE_PROT */ 884static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) 885{ 886 return 0; 887} 888#endif 889 890void hash_failure_debug(unsigned long ea, unsigned long access, 891 unsigned long vsid, unsigned long trap, 892 int ssize, int psize, unsigned long pte) 893{ 894 if (!printk_ratelimit()) 895 return; 896 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", 897 ea, access, current->comm); 898 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", 899 trap, vsid, ssize, psize, pte); 900} 901 902/* Result code is: 903 * 0 - handled 904 * 1 - normal page fault 905 * -1 - critical hash insertion error 906 * -2 - access not permitted by subpage protection mechanism 907 */ 908int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 909{ 910 pgd_t *pgdir; 911 unsigned long vsid; 912 struct mm_struct *mm; 913 pte_t *ptep; 914 unsigned hugeshift; 915 const struct cpumask *tmp; 916 int rc, user_region = 0, local = 0; 917 int psize, ssize; 918 919 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 920 ea, access, trap); 921 922 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { 923 DBG_LOW(" out of pgtable range !\n"); 924 return 1; 925 } 926 927 /* Get region & vsid */ 928 switch (REGION_ID(ea)) { 929 case USER_REGION_ID: 930 user_region = 1; 931 mm = current->mm; 932 if (! mm) { 933 DBG_LOW(" user region with no mm !\n"); 934 return 1; 935 } 936 psize = get_slice_psize(mm, ea); 937 ssize = user_segment_size(ea); 938 vsid = get_vsid(mm->context.id, ea, ssize); 939 break; 940 case VMALLOC_REGION_ID: 941 mm = &init_mm; 942 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 943 if (ea < VMALLOC_END) 944 psize = mmu_vmalloc_psize; 945 else 946 psize = mmu_io_psize; 947 ssize = mmu_kernel_ssize; 948 break; 949 default: 950 /* Not a valid range 951 * Send the problem up to do_page_fault 952 */ 953 return 1; 954 } 955 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); 956 957 /* Get pgdir */ 958 pgdir = mm->pgd; 959 if (pgdir == NULL) 960 return 1; 961 962 /* Check CPU locality */ 963 tmp = cpumask_of(smp_processor_id()); 964 if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) 965 local = 1; 966 967#ifndef CONFIG_PPC_64K_PAGES 968 /* If we use 4K pages and our psize is not 4K, then we might 969 * be hitting a special driver mapping, and need to align the 970 * address before we fetch the PTE. 971 * 972 * It could also be a hugepage mapping, in which case this is 973 * not necessary, but it's not harmful, either. 974 */ 975 if (psize != MMU_PAGE_4K) 976 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 977#endif /* CONFIG_PPC_64K_PAGES */ 978 979 /* Get PTE and page size from page tables */ 980 ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); 981 if (ptep == NULL || !pte_present(*ptep)) { 982 DBG_LOW(" no PTE !\n"); 983 return 1; 984 } 985 986 /* Add _PAGE_PRESENT to the required access perm */ 987 access |= _PAGE_PRESENT; 988 989 /* Pre-check access permissions (will be re-checked atomically 990 * in __hash_page_XX but this pre-check is a fast path 991 */ 992 if (access & ~pte_val(*ptep)) { 993 DBG_LOW(" no access !\n"); 994 return 1; 995 } 996 997#ifdef CONFIG_HUGETLB_PAGE 998 if (hugeshift) 999 return __hash_page_huge(ea, access, vsid, ptep, trap, local, 1000 ssize, hugeshift, psize); 1001#endif /* CONFIG_HUGETLB_PAGE */ 1002 1003#ifndef CONFIG_PPC_64K_PAGES 1004 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); 1005#else 1006 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), 1007 pte_val(*(ptep + PTRS_PER_PTE))); 1008#endif 1009 /* Do actual hashing */ 1010#ifdef CONFIG_PPC_64K_PAGES 1011 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 1012 if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { 1013 demote_segment_4k(mm, ea); 1014 psize = MMU_PAGE_4K; 1015 } 1016 1017 /* If this PTE is non-cacheable and we have restrictions on 1018 * using non cacheable large pages, then we switch to 4k 1019 */ 1020 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && 1021 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 1022 if (user_region) { 1023 demote_segment_4k(mm, ea); 1024 psize = MMU_PAGE_4K; 1025 } else if (ea < VMALLOC_END) { 1026 /* 1027 * some driver did a non-cacheable mapping 1028 * in vmalloc space, so switch vmalloc 1029 * to 4k pages 1030 */ 1031 printk(KERN_ALERT "Reducing vmalloc segment " 1032 "to 4kB pages because of " 1033 "non-cacheable mapping\n"); 1034 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 1035#ifdef CONFIG_SPU_BASE 1036 spu_flush_all_slbs(mm); 1037#endif 1038 } 1039 } 1040 if (user_region) { 1041 if (psize != get_paca_psize(ea)) { 1042 get_paca()->context = mm->context; 1043 slb_flush_and_rebolt(); 1044 } 1045 } else if (get_paca()->vmalloc_sllp != 1046 mmu_psize_defs[mmu_vmalloc_psize].sllp) { 1047 get_paca()->vmalloc_sllp = 1048 mmu_psize_defs[mmu_vmalloc_psize].sllp; 1049 slb_vmalloc_update(); 1050 } 1051#endif /* CONFIG_PPC_64K_PAGES */ 1052 1053#ifdef CONFIG_PPC_HAS_HASH_64K 1054 if (psize == MMU_PAGE_64K) 1055 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1056 else 1057#endif /* CONFIG_PPC_HAS_HASH_64K */ 1058 { 1059 int spp = subpage_protection(mm, ea); 1060 if (access & spp) 1061 rc = -2; 1062 else 1063 rc = __hash_page_4K(ea, access, vsid, ptep, trap, 1064 local, ssize, spp); 1065 } 1066 1067 /* Dump some info in case of hash insertion failure, they should 1068 * never happen so it is really useful to know if/when they do 1069 */ 1070 if (rc == -1) 1071 hash_failure_debug(ea, access, vsid, trap, ssize, psize, 1072 pte_val(*ptep)); 1073#ifndef CONFIG_PPC_64K_PAGES 1074 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 1075#else 1076 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), 1077 pte_val(*(ptep + PTRS_PER_PTE))); 1078#endif 1079 DBG_LOW(" -> rc=%d\n", rc); 1080 return rc; 1081} 1082EXPORT_SYMBOL_GPL(hash_page); 1083 1084void hash_preload(struct mm_struct *mm, unsigned long ea, 1085 unsigned long access, unsigned long trap) 1086{ 1087 unsigned long vsid; 1088 pgd_t *pgdir; 1089 pte_t *ptep; 1090 unsigned long flags; 1091 int rc, ssize, local = 0; 1092 1093 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1094 1095#ifdef CONFIG_PPC_MM_SLICES 1096 /* We only prefault standard pages for now */ 1097 if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) 1098 return; 1099#endif 1100 1101 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 1102 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 1103 1104 /* Get Linux PTE if available */ 1105 pgdir = mm->pgd; 1106 if (pgdir == NULL) 1107 return; 1108 ptep = find_linux_pte(pgdir, ea); 1109 if (!ptep) 1110 return; 1111 1112#ifdef CONFIG_PPC_64K_PAGES 1113 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on 1114 * a 64K kernel), then we don't preload, hash_page() will take 1115 * care of it once we actually try to access the page. 1116 * That way we don't have to duplicate all of the logic for segment 1117 * page size demotion here 1118 */ 1119 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) 1120 return; 1121#endif /* CONFIG_PPC_64K_PAGES */ 1122 1123 /* Get VSID */ 1124 ssize = user_segment_size(ea); 1125 vsid = get_vsid(mm->context.id, ea, ssize); 1126 1127 /* Hash doesn't like irqs */ 1128 local_irq_save(flags); 1129 1130 /* Is that local to this CPU ? */ 1131 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 1132 local = 1; 1133 1134 /* Hash it in */ 1135#ifdef CONFIG_PPC_HAS_HASH_64K 1136 if (mm->context.user_psize == MMU_PAGE_64K) 1137 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1138 else 1139#endif /* CONFIG_PPC_HAS_HASH_64K */ 1140 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, 1141 subpage_protection(mm, ea)); 1142 1143 /* Dump some info in case of hash insertion failure, they should 1144 * never happen so it is really useful to know if/when they do 1145 */ 1146 if (rc == -1) 1147 hash_failure_debug(ea, access, vsid, trap, ssize, 1148 mm->context.user_psize, pte_val(*ptep)); 1149 1150 local_irq_restore(flags); 1151} 1152 1153/* WARNING: This is called from hash_low_64.S, if you change this prototype, 1154 * do not forget to update the assembly call site ! 1155 */ 1156void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, 1157 int local) 1158{ 1159 unsigned long hash, index, shift, hidx, slot; 1160 1161 DBG_LOW("flush_hash_page(va=%016lx)\n", va); 1162 pte_iterate_hashed_subpages(pte, psize, va, index, shift) { 1163 hash = hpt_hash(va, shift, ssize); 1164 hidx = __rpte_to_hidx(pte, index); 1165 if (hidx & _PTEIDX_SECONDARY) 1166 hash = ~hash; 1167 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1168 slot += hidx & _PTEIDX_GROUP_IX; 1169 DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); 1170 ppc_md.hpte_invalidate(slot, va, psize, ssize, local); 1171 } pte_iterate_hashed_end(); 1172} 1173 1174void flush_hash_range(unsigned long number, int local) 1175{ 1176 if (ppc_md.flush_hash_range) 1177 ppc_md.flush_hash_range(number, local); 1178 else { 1179 int i; 1180 struct ppc64_tlb_batch *batch = 1181 &__get_cpu_var(ppc64_tlb_batch); 1182 1183 for (i = 0; i < number; i++) 1184 flush_hash_page(batch->vaddr[i], batch->pte[i], 1185 batch->psize, batch->ssize, local); 1186 } 1187} 1188 1189/* 1190 * low_hash_fault is called when we the low level hash code failed 1191 * to instert a PTE due to an hypervisor error 1192 */ 1193void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) 1194{ 1195 if (user_mode(regs)) { 1196#ifdef CONFIG_PPC_SUBPAGE_PROT 1197 if (rc == -2) 1198 _exception(SIGSEGV, regs, SEGV_ACCERR, address); 1199 else 1200#endif 1201 _exception(SIGBUS, regs, BUS_ADRERR, address); 1202 } else 1203 bad_page_fault(regs, address, SIGBUS); 1204} 1205 1206#ifdef CONFIG_DEBUG_PAGEALLOC 1207static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) 1208{ 1209 unsigned long hash, hpteg; 1210 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1211 unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); 1212 unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); 1213 int ret; 1214 1215 hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); 1216 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 1217 1218 ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr), 1219 mode, HPTE_V_BOLTED, 1220 mmu_linear_psize, mmu_kernel_ssize); 1221 BUG_ON (ret < 0); 1222 spin_lock(&linear_map_hash_lock); 1223 BUG_ON(linear_map_hash_slots[lmi] & 0x80); 1224 linear_map_hash_slots[lmi] = ret | 0x80; 1225 spin_unlock(&linear_map_hash_lock); 1226} 1227 1228static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) 1229{ 1230 unsigned long hash, hidx, slot; 1231 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1232 unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); 1233 1234 hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); 1235 spin_lock(&linear_map_hash_lock); 1236 BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); 1237 hidx = linear_map_hash_slots[lmi] & 0x7f; 1238 linear_map_hash_slots[lmi] = 0; 1239 spin_unlock(&linear_map_hash_lock); 1240 if (hidx & _PTEIDX_SECONDARY) 1241 hash = ~hash; 1242 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1243 slot += hidx & _PTEIDX_GROUP_IX; 1244 ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0); 1245} 1246 1247void kernel_map_pages(struct page *page, int numpages, int enable) 1248{ 1249 unsigned long flags, vaddr, lmi; 1250 int i; 1251 1252 local_irq_save(flags); 1253 for (i = 0; i < numpages; i++, page++) { 1254 vaddr = (unsigned long)page_address(page); 1255 lmi = __pa(vaddr) >> PAGE_SHIFT; 1256 if (lmi >= linear_map_hash_count) 1257 continue; 1258 if (enable) 1259 kernel_map_linear_page(vaddr, lmi); 1260 else 1261 kernel_unmap_linear_page(vaddr, lmi); 1262 } 1263 local_irq_restore(flags); 1264} 1265#endif /* CONFIG_DEBUG_PAGEALLOC */ 1266 1267void setup_initial_memory_limit(phys_addr_t first_memblock_base, 1268 phys_addr_t first_memblock_size) 1269{ 1270 /* We don't currently support the first MEMBLOCK not mapping 0 1271 * physical on those processors 1272 */ 1273 BUG_ON(first_memblock_base != 0); 1274 1275 /* On LPAR systems, the first entry is our RMA region, 1276 * non-LPAR 64-bit hash MMU systems don't have a limitation 1277 * on real mode access, but using the first entry works well 1278 * enough. We also clamp it to 1G to avoid some funky things 1279 * such as RTAS bugs etc... 1280 */ 1281 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); 1282 1283 /* Finally limit subsequent allocations */ 1284 memblock_set_current_limit(ppc64_rma_size); 1285}