Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.10 1409 lines 38 kB view raw
1/* 2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen 3 * {mikejc|engebret}@us.ibm.com 4 * 5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> 6 * 7 * SMP scalability work: 8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 9 * 10 * Module name: htab.c 11 * 12 * Description: 13 * PowerPC Hashed Page Table functions 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 */ 20 21#undef DEBUG 22#undef DEBUG_LOW 23 24#include <linux/spinlock.h> 25#include <linux/errno.h> 26#include <linux/sched.h> 27#include <linux/proc_fs.h> 28#include <linux/stat.h> 29#include <linux/sysctl.h> 30#include <linux/export.h> 31#include <linux/ctype.h> 32#include <linux/cache.h> 33#include <linux/init.h> 34#include <linux/signal.h> 35#include <linux/memblock.h> 36#include <linux/context_tracking.h> 37 38#include <asm/processor.h> 39#include <asm/pgtable.h> 40#include <asm/mmu.h> 41#include <asm/mmu_context.h> 42#include <asm/page.h> 43#include <asm/types.h> 44#include <asm/uaccess.h> 45#include <asm/machdep.h> 46#include <asm/prom.h> 47#include <asm/tlbflush.h> 48#include <asm/io.h> 49#include <asm/eeh.h> 50#include <asm/tlb.h> 51#include <asm/cacheflush.h> 52#include <asm/cputable.h> 53#include <asm/sections.h> 54#include <asm/spu.h> 55#include <asm/udbg.h> 56#include <asm/code-patching.h> 57#include <asm/fadump.h> 58#include <asm/firmware.h> 59#include <asm/tm.h> 60 61#ifdef DEBUG 62#define DBG(fmt...) udbg_printf(fmt) 63#else 64#define DBG(fmt...) 65#endif 66 67#ifdef DEBUG_LOW 68#define DBG_LOW(fmt...) udbg_printf(fmt) 69#else 70#define DBG_LOW(fmt...) 71#endif 72 73#define KB (1024) 74#define MB (1024*KB) 75#define GB (1024L*MB) 76 77/* 78 * Note: pte --> Linux PTE 79 * HPTE --> PowerPC Hashed Page Table Entry 80 * 81 * Execution context: 82 * htab_initialize is called with the MMU off (of course), but 83 * the kernel has been copied down to zero so it can directly 84 * reference global data. At this point it is very difficult 85 * to print debug info. 86 * 87 */ 88 89#ifdef CONFIG_U3_DART 90extern unsigned long dart_tablebase; 91#endif /* CONFIG_U3_DART */ 92 93static unsigned long _SDR1; 94struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 95 96struct hash_pte *htab_address; 97unsigned long htab_size_bytes; 98unsigned long htab_hash_mask; 99EXPORT_SYMBOL_GPL(htab_hash_mask); 100int mmu_linear_psize = MMU_PAGE_4K; 101int mmu_virtual_psize = MMU_PAGE_4K; 102int mmu_vmalloc_psize = MMU_PAGE_4K; 103#ifdef CONFIG_SPARSEMEM_VMEMMAP 104int mmu_vmemmap_psize = MMU_PAGE_4K; 105#endif 106int mmu_io_psize = MMU_PAGE_4K; 107int mmu_kernel_ssize = MMU_SEGSIZE_256M; 108int mmu_highuser_ssize = MMU_SEGSIZE_256M; 109u16 mmu_slb_size = 64; 110EXPORT_SYMBOL_GPL(mmu_slb_size); 111#ifdef CONFIG_PPC_64K_PAGES 112int mmu_ci_restrictions; 113#endif 114#ifdef CONFIG_DEBUG_PAGEALLOC 115static u8 *linear_map_hash_slots; 116static unsigned long linear_map_hash_count; 117static DEFINE_SPINLOCK(linear_map_hash_lock); 118#endif /* CONFIG_DEBUG_PAGEALLOC */ 119 120/* There are definitions of page sizes arrays to be used when none 121 * is provided by the firmware. 122 */ 123 124/* Pre-POWER4 CPUs (4k pages only) 125 */ 126static struct mmu_psize_def mmu_psize_defaults_old[] = { 127 [MMU_PAGE_4K] = { 128 .shift = 12, 129 .sllp = 0, 130 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1}, 131 .avpnm = 0, 132 .tlbiel = 0, 133 }, 134}; 135 136/* POWER4, GPUL, POWER5 137 * 138 * Support for 16Mb large pages 139 */ 140static struct mmu_psize_def mmu_psize_defaults_gp[] = { 141 [MMU_PAGE_4K] = { 142 .shift = 12, 143 .sllp = 0, 144 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1}, 145 .avpnm = 0, 146 .tlbiel = 1, 147 }, 148 [MMU_PAGE_16M] = { 149 .shift = 24, 150 .sllp = SLB_VSID_L, 151 .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0, 152 [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 }, 153 .avpnm = 0x1UL, 154 .tlbiel = 0, 155 }, 156}; 157 158static unsigned long htab_convert_pte_flags(unsigned long pteflags) 159{ 160 unsigned long rflags = pteflags & 0x1fa; 161 162 /* _PAGE_EXEC -> NOEXEC */ 163 if ((pteflags & _PAGE_EXEC) == 0) 164 rflags |= HPTE_R_N; 165 166 /* PP bits. PAGE_USER is already PP bit 0x2, so we only 167 * need to add in 0x1 if it's a read-only user page 168 */ 169 if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) && 170 (pteflags & _PAGE_DIRTY))) 171 rflags |= 1; 172 173 /* Always add C */ 174 return rflags | HPTE_R_C; 175} 176 177int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 178 unsigned long pstart, unsigned long prot, 179 int psize, int ssize) 180{ 181 unsigned long vaddr, paddr; 182 unsigned int step, shift; 183 int ret = 0; 184 185 shift = mmu_psize_defs[psize].shift; 186 step = 1 << shift; 187 188 prot = htab_convert_pte_flags(prot); 189 190 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", 191 vstart, vend, pstart, prot, psize, ssize); 192 193 for (vaddr = vstart, paddr = pstart; vaddr < vend; 194 vaddr += step, paddr += step) { 195 unsigned long hash, hpteg; 196 unsigned long vsid = get_kernel_vsid(vaddr, ssize); 197 unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); 198 unsigned long tprot = prot; 199 200 /* 201 * If we hit a bad address return error. 202 */ 203 if (!vsid) 204 return -1; 205 /* Make kernel text executable */ 206 if (overlaps_kernel_text(vaddr, vaddr + step)) 207 tprot &= ~HPTE_R_N; 208 209 hash = hpt_hash(vpn, shift, ssize); 210 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 211 212 BUG_ON(!ppc_md.hpte_insert); 213 ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, 214 HPTE_V_BOLTED, psize, psize, ssize); 215 216 if (ret < 0) 217 break; 218#ifdef CONFIG_DEBUG_PAGEALLOC 219 if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) 220 linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; 221#endif /* CONFIG_DEBUG_PAGEALLOC */ 222 } 223 return ret < 0 ? ret : 0; 224} 225 226#ifdef CONFIG_MEMORY_HOTPLUG 227static int htab_remove_mapping(unsigned long vstart, unsigned long vend, 228 int psize, int ssize) 229{ 230 unsigned long vaddr; 231 unsigned int step, shift; 232 233 shift = mmu_psize_defs[psize].shift; 234 step = 1 << shift; 235 236 if (!ppc_md.hpte_removebolted) { 237 printk(KERN_WARNING "Platform doesn't implement " 238 "hpte_removebolted\n"); 239 return -EINVAL; 240 } 241 242 for (vaddr = vstart; vaddr < vend; vaddr += step) 243 ppc_md.hpte_removebolted(vaddr, psize, ssize); 244 245 return 0; 246} 247#endif /* CONFIG_MEMORY_HOTPLUG */ 248 249static int __init htab_dt_scan_seg_sizes(unsigned long node, 250 const char *uname, int depth, 251 void *data) 252{ 253 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 254 u32 *prop; 255 unsigned long size = 0; 256 257 /* We are scanning "cpu" nodes only */ 258 if (type == NULL || strcmp(type, "cpu") != 0) 259 return 0; 260 261 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", 262 &size); 263 if (prop == NULL) 264 return 0; 265 for (; size >= 4; size -= 4, ++prop) { 266 if (prop[0] == 40) { 267 DBG("1T segment support detected\n"); 268 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; 269 return 1; 270 } 271 } 272 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; 273 return 0; 274} 275 276static void __init htab_init_seg_sizes(void) 277{ 278 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); 279} 280 281static int __init get_idx_from_shift(unsigned int shift) 282{ 283 int idx = -1; 284 285 switch (shift) { 286 case 0xc: 287 idx = MMU_PAGE_4K; 288 break; 289 case 0x10: 290 idx = MMU_PAGE_64K; 291 break; 292 case 0x14: 293 idx = MMU_PAGE_1M; 294 break; 295 case 0x18: 296 idx = MMU_PAGE_16M; 297 break; 298 case 0x22: 299 idx = MMU_PAGE_16G; 300 break; 301 } 302 return idx; 303} 304 305static int __init htab_dt_scan_page_sizes(unsigned long node, 306 const char *uname, int depth, 307 void *data) 308{ 309 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 310 u32 *prop; 311 unsigned long size = 0; 312 313 /* We are scanning "cpu" nodes only */ 314 if (type == NULL || strcmp(type, "cpu") != 0) 315 return 0; 316 317 prop = (u32 *)of_get_flat_dt_prop(node, 318 "ibm,segment-page-sizes", &size); 319 if (prop != NULL) { 320 pr_info("Page sizes from device-tree:\n"); 321 size /= 4; 322 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); 323 while(size > 0) { 324 unsigned int base_shift = prop[0]; 325 unsigned int slbenc = prop[1]; 326 unsigned int lpnum = prop[2]; 327 struct mmu_psize_def *def; 328 int idx, base_idx; 329 330 size -= 3; prop += 3; 331 base_idx = get_idx_from_shift(base_shift); 332 if (base_idx < 0) { 333 /* 334 * skip the pte encoding also 335 */ 336 prop += lpnum * 2; size -= lpnum * 2; 337 continue; 338 } 339 def = &mmu_psize_defs[base_idx]; 340 if (base_idx == MMU_PAGE_16M) 341 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; 342 343 def->shift = base_shift; 344 if (base_shift <= 23) 345 def->avpnm = 0; 346 else 347 def->avpnm = (1 << (base_shift - 23)) - 1; 348 def->sllp = slbenc; 349 /* 350 * We don't know for sure what's up with tlbiel, so 351 * for now we only set it for 4K and 64K pages 352 */ 353 if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) 354 def->tlbiel = 1; 355 else 356 def->tlbiel = 0; 357 358 while (size > 0 && lpnum) { 359 unsigned int shift = prop[0]; 360 int penc = prop[1]; 361 362 prop += 2; size -= 2; 363 lpnum--; 364 365 idx = get_idx_from_shift(shift); 366 if (idx < 0) 367 continue; 368 369 if (penc == -1) 370 pr_err("Invalid penc for base_shift=%d " 371 "shift=%d\n", base_shift, shift); 372 373 def->penc[idx] = penc; 374 pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," 375 " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", 376 base_shift, shift, def->sllp, 377 def->avpnm, def->tlbiel, def->penc[idx]); 378 } 379 } 380 return 1; 381 } 382 return 0; 383} 384 385#ifdef CONFIG_HUGETLB_PAGE 386/* Scan for 16G memory blocks that have been set aside for huge pages 387 * and reserve those blocks for 16G huge pages. 388 */ 389static int __init htab_dt_scan_hugepage_blocks(unsigned long node, 390 const char *uname, int depth, 391 void *data) { 392 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 393 unsigned long *addr_prop; 394 u32 *page_count_prop; 395 unsigned int expected_pages; 396 long unsigned int phys_addr; 397 long unsigned int block_size; 398 399 /* We are scanning "memory" nodes only */ 400 if (type == NULL || strcmp(type, "memory") != 0) 401 return 0; 402 403 /* This property is the log base 2 of the number of virtual pages that 404 * will represent this memory block. */ 405 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); 406 if (page_count_prop == NULL) 407 return 0; 408 expected_pages = (1 << page_count_prop[0]); 409 addr_prop = of_get_flat_dt_prop(node, "reg", NULL); 410 if (addr_prop == NULL) 411 return 0; 412 phys_addr = addr_prop[0]; 413 block_size = addr_prop[1]; 414 if (block_size != (16 * GB)) 415 return 0; 416 printk(KERN_INFO "Huge page(16GB) memory: " 417 "addr = 0x%lX size = 0x%lX pages = %d\n", 418 phys_addr, block_size, expected_pages); 419 if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { 420 memblock_reserve(phys_addr, block_size * expected_pages); 421 add_gpage(phys_addr, block_size, expected_pages); 422 } 423 return 0; 424} 425#endif /* CONFIG_HUGETLB_PAGE */ 426 427static void mmu_psize_set_default_penc(void) 428{ 429 int bpsize, apsize; 430 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) 431 for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++) 432 mmu_psize_defs[bpsize].penc[apsize] = -1; 433} 434 435static void __init htab_init_page_sizes(void) 436{ 437 int rc; 438 439 /* se the invalid penc to -1 */ 440 mmu_psize_set_default_penc(); 441 442 /* Default to 4K pages only */ 443 memcpy(mmu_psize_defs, mmu_psize_defaults_old, 444 sizeof(mmu_psize_defaults_old)); 445 446 /* 447 * Try to find the available page sizes in the device-tree 448 */ 449 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); 450 if (rc != 0) /* Found */ 451 goto found; 452 453 /* 454 * Not in the device-tree, let's fallback on known size 455 * list for 16M capable GP & GR 456 */ 457 if (mmu_has_feature(MMU_FTR_16M_PAGE)) 458 memcpy(mmu_psize_defs, mmu_psize_defaults_gp, 459 sizeof(mmu_psize_defaults_gp)); 460 found: 461#ifndef CONFIG_DEBUG_PAGEALLOC 462 /* 463 * Pick a size for the linear mapping. Currently, we only support 464 * 16M, 1M and 4K which is the default 465 */ 466 if (mmu_psize_defs[MMU_PAGE_16M].shift) 467 mmu_linear_psize = MMU_PAGE_16M; 468 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 469 mmu_linear_psize = MMU_PAGE_1M; 470#endif /* CONFIG_DEBUG_PAGEALLOC */ 471 472#ifdef CONFIG_PPC_64K_PAGES 473 /* 474 * Pick a size for the ordinary pages. Default is 4K, we support 475 * 64K for user mappings and vmalloc if supported by the processor. 476 * We only use 64k for ioremap if the processor 477 * (and firmware) support cache-inhibited large pages. 478 * If not, we use 4k and set mmu_ci_restrictions so that 479 * hash_page knows to switch processes that use cache-inhibited 480 * mappings to 4k pages. 481 */ 482 if (mmu_psize_defs[MMU_PAGE_64K].shift) { 483 mmu_virtual_psize = MMU_PAGE_64K; 484 mmu_vmalloc_psize = MMU_PAGE_64K; 485 if (mmu_linear_psize == MMU_PAGE_4K) 486 mmu_linear_psize = MMU_PAGE_64K; 487 if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) { 488 /* 489 * Don't use 64k pages for ioremap on pSeries, since 490 * that would stop us accessing the HEA ethernet. 491 */ 492 if (!machine_is(pseries)) 493 mmu_io_psize = MMU_PAGE_64K; 494 } else 495 mmu_ci_restrictions = 1; 496 } 497#endif /* CONFIG_PPC_64K_PAGES */ 498 499#ifdef CONFIG_SPARSEMEM_VMEMMAP 500 /* We try to use 16M pages for vmemmap if that is supported 501 * and we have at least 1G of RAM at boot 502 */ 503 if (mmu_psize_defs[MMU_PAGE_16M].shift && 504 memblock_phys_mem_size() >= 0x40000000) 505 mmu_vmemmap_psize = MMU_PAGE_16M; 506 else if (mmu_psize_defs[MMU_PAGE_64K].shift) 507 mmu_vmemmap_psize = MMU_PAGE_64K; 508 else 509 mmu_vmemmap_psize = MMU_PAGE_4K; 510#endif /* CONFIG_SPARSEMEM_VMEMMAP */ 511 512 printk(KERN_DEBUG "Page orders: linear mapping = %d, " 513 "virtual = %d, io = %d" 514#ifdef CONFIG_SPARSEMEM_VMEMMAP 515 ", vmemmap = %d" 516#endif 517 "\n", 518 mmu_psize_defs[mmu_linear_psize].shift, 519 mmu_psize_defs[mmu_virtual_psize].shift, 520 mmu_psize_defs[mmu_io_psize].shift 521#ifdef CONFIG_SPARSEMEM_VMEMMAP 522 ,mmu_psize_defs[mmu_vmemmap_psize].shift 523#endif 524 ); 525 526#ifdef CONFIG_HUGETLB_PAGE 527 /* Reserve 16G huge page memory sections for huge pages */ 528 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); 529#endif /* CONFIG_HUGETLB_PAGE */ 530} 531 532static int __init htab_dt_scan_pftsize(unsigned long node, 533 const char *uname, int depth, 534 void *data) 535{ 536 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 537 u32 *prop; 538 539 /* We are scanning "cpu" nodes only */ 540 if (type == NULL || strcmp(type, "cpu") != 0) 541 return 0; 542 543 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); 544 if (prop != NULL) { 545 /* pft_size[0] is the NUMA CEC cookie */ 546 ppc64_pft_size = prop[1]; 547 return 1; 548 } 549 return 0; 550} 551 552static unsigned long __init htab_get_table_size(void) 553{ 554 unsigned long mem_size, rnd_mem_size, pteg_count, psize; 555 556 /* If hash size isn't already provided by the platform, we try to 557 * retrieve it from the device-tree. If it's not there neither, we 558 * calculate it now based on the total RAM size 559 */ 560 if (ppc64_pft_size == 0) 561 of_scan_flat_dt(htab_dt_scan_pftsize, NULL); 562 if (ppc64_pft_size) 563 return 1UL << ppc64_pft_size; 564 565 /* round mem_size up to next power of 2 */ 566 mem_size = memblock_phys_mem_size(); 567 rnd_mem_size = 1UL << __ilog2(mem_size); 568 if (rnd_mem_size < mem_size) 569 rnd_mem_size <<= 1; 570 571 /* # pages / 2 */ 572 psize = mmu_psize_defs[mmu_virtual_psize].shift; 573 pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11); 574 575 return pteg_count << 7; 576} 577 578#ifdef CONFIG_MEMORY_HOTPLUG 579int create_section_mapping(unsigned long start, unsigned long end) 580{ 581 return htab_bolt_mapping(start, end, __pa(start), 582 pgprot_val(PAGE_KERNEL), mmu_linear_psize, 583 mmu_kernel_ssize); 584} 585 586int remove_section_mapping(unsigned long start, unsigned long end) 587{ 588 return htab_remove_mapping(start, end, mmu_linear_psize, 589 mmu_kernel_ssize); 590} 591#endif /* CONFIG_MEMORY_HOTPLUG */ 592 593#define FUNCTION_TEXT(A) ((*(unsigned long *)(A))) 594 595static void __init htab_finish_init(void) 596{ 597 extern unsigned int *htab_call_hpte_insert1; 598 extern unsigned int *htab_call_hpte_insert2; 599 extern unsigned int *htab_call_hpte_remove; 600 extern unsigned int *htab_call_hpte_updatepp; 601 602#ifdef CONFIG_PPC_HAS_HASH_64K 603 extern unsigned int *ht64_call_hpte_insert1; 604 extern unsigned int *ht64_call_hpte_insert2; 605 extern unsigned int *ht64_call_hpte_remove; 606 extern unsigned int *ht64_call_hpte_updatepp; 607 608 patch_branch(ht64_call_hpte_insert1, 609 FUNCTION_TEXT(ppc_md.hpte_insert), 610 BRANCH_SET_LINK); 611 patch_branch(ht64_call_hpte_insert2, 612 FUNCTION_TEXT(ppc_md.hpte_insert), 613 BRANCH_SET_LINK); 614 patch_branch(ht64_call_hpte_remove, 615 FUNCTION_TEXT(ppc_md.hpte_remove), 616 BRANCH_SET_LINK); 617 patch_branch(ht64_call_hpte_updatepp, 618 FUNCTION_TEXT(ppc_md.hpte_updatepp), 619 BRANCH_SET_LINK); 620 621#endif /* CONFIG_PPC_HAS_HASH_64K */ 622 623 patch_branch(htab_call_hpte_insert1, 624 FUNCTION_TEXT(ppc_md.hpte_insert), 625 BRANCH_SET_LINK); 626 patch_branch(htab_call_hpte_insert2, 627 FUNCTION_TEXT(ppc_md.hpte_insert), 628 BRANCH_SET_LINK); 629 patch_branch(htab_call_hpte_remove, 630 FUNCTION_TEXT(ppc_md.hpte_remove), 631 BRANCH_SET_LINK); 632 patch_branch(htab_call_hpte_updatepp, 633 FUNCTION_TEXT(ppc_md.hpte_updatepp), 634 BRANCH_SET_LINK); 635} 636 637static void __init htab_initialize(void) 638{ 639 unsigned long table; 640 unsigned long pteg_count; 641 unsigned long prot; 642 unsigned long base = 0, size = 0, limit; 643 struct memblock_region *reg; 644 645 DBG(" -> htab_initialize()\n"); 646 647 /* Initialize segment sizes */ 648 htab_init_seg_sizes(); 649 650 /* Initialize page sizes */ 651 htab_init_page_sizes(); 652 653 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { 654 mmu_kernel_ssize = MMU_SEGSIZE_1T; 655 mmu_highuser_ssize = MMU_SEGSIZE_1T; 656 printk(KERN_INFO "Using 1TB segments\n"); 657 } 658 659 /* 660 * Calculate the required size of the htab. We want the number of 661 * PTEGs to equal one half the number of real pages. 662 */ 663 htab_size_bytes = htab_get_table_size(); 664 pteg_count = htab_size_bytes >> 7; 665 666 htab_hash_mask = pteg_count - 1; 667 668 if (firmware_has_feature(FW_FEATURE_LPAR)) { 669 /* Using a hypervisor which owns the htab */ 670 htab_address = NULL; 671 _SDR1 = 0; 672#ifdef CONFIG_FA_DUMP 673 /* 674 * If firmware assisted dump is active firmware preserves 675 * the contents of htab along with entire partition memory. 676 * Clear the htab if firmware assisted dump is active so 677 * that we dont end up using old mappings. 678 */ 679 if (is_fadump_active() && ppc_md.hpte_clear_all) 680 ppc_md.hpte_clear_all(); 681#endif 682 } else { 683 /* Find storage for the HPT. Must be contiguous in 684 * the absolute address space. On cell we want it to be 685 * in the first 2 Gig so we can use it for IOMMU hacks. 686 */ 687 if (machine_is(cell)) 688 limit = 0x80000000; 689 else 690 limit = MEMBLOCK_ALLOC_ANYWHERE; 691 692 table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); 693 694 DBG("Hash table allocated at %lx, size: %lx\n", table, 695 htab_size_bytes); 696 697 htab_address = __va(table); 698 699 /* htab absolute addr + encoded htabsize */ 700 _SDR1 = table + __ilog2(pteg_count) - 11; 701 702 /* Initialize the HPT with no entries */ 703 memset((void *)table, 0, htab_size_bytes); 704 705 /* Set SDR1 */ 706 mtspr(SPRN_SDR1, _SDR1); 707 } 708 709 prot = pgprot_val(PAGE_KERNEL); 710 711#ifdef CONFIG_DEBUG_PAGEALLOC 712 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; 713 linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, 714 1, ppc64_rma_size)); 715 memset(linear_map_hash_slots, 0, linear_map_hash_count); 716#endif /* CONFIG_DEBUG_PAGEALLOC */ 717 718 /* On U3 based machines, we need to reserve the DART area and 719 * _NOT_ map it to avoid cache paradoxes as it's remapped non 720 * cacheable later on 721 */ 722 723 /* create bolted the linear mapping in the hash table */ 724 for_each_memblock(memory, reg) { 725 base = (unsigned long)__va(reg->base); 726 size = reg->size; 727 728 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", 729 base, size, prot); 730 731#ifdef CONFIG_U3_DART 732 /* Do not map the DART space. Fortunately, it will be aligned 733 * in such a way that it will not cross two memblock regions and 734 * will fit within a single 16Mb page. 735 * The DART space is assumed to be a full 16Mb region even if 736 * we only use 2Mb of that space. We will use more of it later 737 * for AGP GART. We have to use a full 16Mb large page. 738 */ 739 DBG("DART base: %lx\n", dart_tablebase); 740 741 if (dart_tablebase != 0 && dart_tablebase >= base 742 && dart_tablebase < (base + size)) { 743 unsigned long dart_table_end = dart_tablebase + 16 * MB; 744 if (base != dart_tablebase) 745 BUG_ON(htab_bolt_mapping(base, dart_tablebase, 746 __pa(base), prot, 747 mmu_linear_psize, 748 mmu_kernel_ssize)); 749 if ((base + size) > dart_table_end) 750 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, 751 base + size, 752 __pa(dart_table_end), 753 prot, 754 mmu_linear_psize, 755 mmu_kernel_ssize)); 756 continue; 757 } 758#endif /* CONFIG_U3_DART */ 759 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 760 prot, mmu_linear_psize, mmu_kernel_ssize)); 761 } 762 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 763 764 /* 765 * If we have a memory_limit and we've allocated TCEs then we need to 766 * explicitly map the TCE area at the top of RAM. We also cope with the 767 * case that the TCEs start below memory_limit. 768 * tce_alloc_start/end are 16MB aligned so the mapping should work 769 * for either 4K or 16MB pages. 770 */ 771 if (tce_alloc_start) { 772 tce_alloc_start = (unsigned long)__va(tce_alloc_start); 773 tce_alloc_end = (unsigned long)__va(tce_alloc_end); 774 775 if (base + size >= tce_alloc_start) 776 tce_alloc_start = base + size + 1; 777 778 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, 779 __pa(tce_alloc_start), prot, 780 mmu_linear_psize, mmu_kernel_ssize)); 781 } 782 783 htab_finish_init(); 784 785 DBG(" <- htab_initialize()\n"); 786} 787#undef KB 788#undef MB 789 790void __init early_init_mmu(void) 791{ 792 /* Setup initial STAB address in the PACA */ 793 get_paca()->stab_real = __pa((u64)&initial_stab); 794 get_paca()->stab_addr = (u64)&initial_stab; 795 796 /* Initialize the MMU Hash table and create the linear mapping 797 * of memory. Has to be done before stab/slb initialization as 798 * this is currently where the page size encoding is obtained 799 */ 800 htab_initialize(); 801 802 /* Initialize stab / SLB management */ 803 if (mmu_has_feature(MMU_FTR_SLB)) 804 slb_initialize(); 805 else 806 stab_initialize(get_paca()->stab_real); 807} 808 809#ifdef CONFIG_SMP 810void __cpuinit early_init_mmu_secondary(void) 811{ 812 /* Initialize hash table for that CPU */ 813 if (!firmware_has_feature(FW_FEATURE_LPAR)) 814 mtspr(SPRN_SDR1, _SDR1); 815 816 /* Initialize STAB/SLB. We use a virtual address as it works 817 * in real mode on pSeries. 818 */ 819 if (mmu_has_feature(MMU_FTR_SLB)) 820 slb_initialize(); 821 else 822 stab_initialize(get_paca()->stab_addr); 823} 824#endif /* CONFIG_SMP */ 825 826/* 827 * Called by asm hashtable.S for doing lazy icache flush 828 */ 829unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) 830{ 831 struct page *page; 832 833 if (!pfn_valid(pte_pfn(pte))) 834 return pp; 835 836 page = pte_page(pte); 837 838 /* page is dirty */ 839 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 840 if (trap == 0x400) { 841 flush_dcache_icache_page(page); 842 set_bit(PG_arch_1, &page->flags); 843 } else 844 pp |= HPTE_R_N; 845 } 846 return pp; 847} 848 849#ifdef CONFIG_PPC_MM_SLICES 850unsigned int get_paca_psize(unsigned long addr) 851{ 852 u64 lpsizes; 853 unsigned char *hpsizes; 854 unsigned long index, mask_index; 855 856 if (addr < SLICE_LOW_TOP) { 857 lpsizes = get_paca()->context.low_slices_psize; 858 index = GET_LOW_SLICE_INDEX(addr); 859 return (lpsizes >> (index * 4)) & 0xF; 860 } 861 hpsizes = get_paca()->context.high_slices_psize; 862 index = GET_HIGH_SLICE_INDEX(addr); 863 mask_index = index & 0x1; 864 return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; 865} 866 867#else 868unsigned int get_paca_psize(unsigned long addr) 869{ 870 return get_paca()->context.user_psize; 871} 872#endif 873 874/* 875 * Demote a segment to using 4k pages. 876 * For now this makes the whole process use 4k pages. 877 */ 878#ifdef CONFIG_PPC_64K_PAGES 879void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 880{ 881 if (get_slice_psize(mm, addr) == MMU_PAGE_4K) 882 return; 883 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); 884#ifdef CONFIG_SPU_BASE 885 spu_flush_all_slbs(mm); 886#endif 887 if (get_paca_psize(addr) != MMU_PAGE_4K) { 888 get_paca()->context = mm->context; 889 slb_flush_and_rebolt(); 890 } 891} 892#endif /* CONFIG_PPC_64K_PAGES */ 893 894#ifdef CONFIG_PPC_SUBPAGE_PROT 895/* 896 * This looks up a 2-bit protection code for a 4k subpage of a 64k page. 897 * Userspace sets the subpage permissions using the subpage_prot system call. 898 * 899 * Result is 0: full permissions, _PAGE_RW: read-only, 900 * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. 901 */ 902static int subpage_protection(struct mm_struct *mm, unsigned long ea) 903{ 904 struct subpage_prot_table *spt = &mm->context.spt; 905 u32 spp = 0; 906 u32 **sbpm, *sbpp; 907 908 if (ea >= spt->maxaddr) 909 return 0; 910 if (ea < 0x100000000) { 911 /* addresses below 4GB use spt->low_prot */ 912 sbpm = spt->low_prot; 913 } else { 914 sbpm = spt->protptrs[ea >> SBP_L3_SHIFT]; 915 if (!sbpm) 916 return 0; 917 } 918 sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; 919 if (!sbpp) 920 return 0; 921 spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)]; 922 923 /* extract 2-bit bitfield for this 4k subpage */ 924 spp >>= 30 - 2 * ((ea >> 12) & 0xf); 925 926 /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */ 927 spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0); 928 return spp; 929} 930 931#else /* CONFIG_PPC_SUBPAGE_PROT */ 932static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) 933{ 934 return 0; 935} 936#endif 937 938void hash_failure_debug(unsigned long ea, unsigned long access, 939 unsigned long vsid, unsigned long trap, 940 int ssize, int psize, int lpsize, unsigned long pte) 941{ 942 if (!printk_ratelimit()) 943 return; 944 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", 945 ea, access, current->comm); 946 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n", 947 trap, vsid, ssize, psize, lpsize, pte); 948} 949 950/* Result code is: 951 * 0 - handled 952 * 1 - normal page fault 953 * -1 - critical hash insertion error 954 * -2 - access not permitted by subpage protection mechanism 955 */ 956int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 957{ 958 enum ctx_state prev_state = exception_enter(); 959 pgd_t *pgdir; 960 unsigned long vsid; 961 struct mm_struct *mm; 962 pte_t *ptep; 963 unsigned hugeshift; 964 const struct cpumask *tmp; 965 int rc, user_region = 0, local = 0; 966 int psize, ssize; 967 968 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 969 ea, access, trap); 970 971 /* Get region & vsid */ 972 switch (REGION_ID(ea)) { 973 case USER_REGION_ID: 974 user_region = 1; 975 mm = current->mm; 976 if (! mm) { 977 DBG_LOW(" user region with no mm !\n"); 978 rc = 1; 979 goto bail; 980 } 981 psize = get_slice_psize(mm, ea); 982 ssize = user_segment_size(ea); 983 vsid = get_vsid(mm->context.id, ea, ssize); 984 break; 985 case VMALLOC_REGION_ID: 986 mm = &init_mm; 987 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); 988 if (ea < VMALLOC_END) 989 psize = mmu_vmalloc_psize; 990 else 991 psize = mmu_io_psize; 992 ssize = mmu_kernel_ssize; 993 break; 994 default: 995 /* Not a valid range 996 * Send the problem up to do_page_fault 997 */ 998 rc = 1; 999 goto bail; 1000 } 1001 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); 1002 1003 /* Bad address. */ 1004 if (!vsid) { 1005 DBG_LOW("Bad address!\n"); 1006 rc = 1; 1007 goto bail; 1008 } 1009 /* Get pgdir */ 1010 pgdir = mm->pgd; 1011 if (pgdir == NULL) { 1012 rc = 1; 1013 goto bail; 1014 } 1015 1016 /* Check CPU locality */ 1017 tmp = cpumask_of(smp_processor_id()); 1018 if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) 1019 local = 1; 1020 1021#ifndef CONFIG_PPC_64K_PAGES 1022 /* If we use 4K pages and our psize is not 4K, then we might 1023 * be hitting a special driver mapping, and need to align the 1024 * address before we fetch the PTE. 1025 * 1026 * It could also be a hugepage mapping, in which case this is 1027 * not necessary, but it's not harmful, either. 1028 */ 1029 if (psize != MMU_PAGE_4K) 1030 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 1031#endif /* CONFIG_PPC_64K_PAGES */ 1032 1033 /* Get PTE and page size from page tables */ 1034 ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); 1035 if (ptep == NULL || !pte_present(*ptep)) { 1036 DBG_LOW(" no PTE !\n"); 1037 rc = 1; 1038 goto bail; 1039 } 1040 1041 /* Add _PAGE_PRESENT to the required access perm */ 1042 access |= _PAGE_PRESENT; 1043 1044 /* Pre-check access permissions (will be re-checked atomically 1045 * in __hash_page_XX but this pre-check is a fast path 1046 */ 1047 if (access & ~pte_val(*ptep)) { 1048 DBG_LOW(" no access !\n"); 1049 rc = 1; 1050 goto bail; 1051 } 1052 1053#ifdef CONFIG_HUGETLB_PAGE 1054 if (hugeshift) { 1055 rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, 1056 ssize, hugeshift, psize); 1057 goto bail; 1058 } 1059#endif /* CONFIG_HUGETLB_PAGE */ 1060 1061#ifndef CONFIG_PPC_64K_PAGES 1062 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); 1063#else 1064 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), 1065 pte_val(*(ptep + PTRS_PER_PTE))); 1066#endif 1067 /* Do actual hashing */ 1068#ifdef CONFIG_PPC_64K_PAGES 1069 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 1070 if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { 1071 demote_segment_4k(mm, ea); 1072 psize = MMU_PAGE_4K; 1073 } 1074 1075 /* If this PTE is non-cacheable and we have restrictions on 1076 * using non cacheable large pages, then we switch to 4k 1077 */ 1078 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && 1079 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 1080 if (user_region) { 1081 demote_segment_4k(mm, ea); 1082 psize = MMU_PAGE_4K; 1083 } else if (ea < VMALLOC_END) { 1084 /* 1085 * some driver did a non-cacheable mapping 1086 * in vmalloc space, so switch vmalloc 1087 * to 4k pages 1088 */ 1089 printk(KERN_ALERT "Reducing vmalloc segment " 1090 "to 4kB pages because of " 1091 "non-cacheable mapping\n"); 1092 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 1093#ifdef CONFIG_SPU_BASE 1094 spu_flush_all_slbs(mm); 1095#endif 1096 } 1097 } 1098 if (user_region) { 1099 if (psize != get_paca_psize(ea)) { 1100 get_paca()->context = mm->context; 1101 slb_flush_and_rebolt(); 1102 } 1103 } else if (get_paca()->vmalloc_sllp != 1104 mmu_psize_defs[mmu_vmalloc_psize].sllp) { 1105 get_paca()->vmalloc_sllp = 1106 mmu_psize_defs[mmu_vmalloc_psize].sllp; 1107 slb_vmalloc_update(); 1108 } 1109#endif /* CONFIG_PPC_64K_PAGES */ 1110 1111#ifdef CONFIG_PPC_HAS_HASH_64K 1112 if (psize == MMU_PAGE_64K) 1113 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1114 else 1115#endif /* CONFIG_PPC_HAS_HASH_64K */ 1116 { 1117 int spp = subpage_protection(mm, ea); 1118 if (access & spp) 1119 rc = -2; 1120 else 1121 rc = __hash_page_4K(ea, access, vsid, ptep, trap, 1122 local, ssize, spp); 1123 } 1124 1125 /* Dump some info in case of hash insertion failure, they should 1126 * never happen so it is really useful to know if/when they do 1127 */ 1128 if (rc == -1) 1129 hash_failure_debug(ea, access, vsid, trap, ssize, psize, 1130 psize, pte_val(*ptep)); 1131#ifndef CONFIG_PPC_64K_PAGES 1132 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 1133#else 1134 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), 1135 pte_val(*(ptep + PTRS_PER_PTE))); 1136#endif 1137 DBG_LOW(" -> rc=%d\n", rc); 1138 1139bail: 1140 exception_exit(prev_state); 1141 return rc; 1142} 1143EXPORT_SYMBOL_GPL(hash_page); 1144 1145void hash_preload(struct mm_struct *mm, unsigned long ea, 1146 unsigned long access, unsigned long trap) 1147{ 1148 unsigned long vsid; 1149 pgd_t *pgdir; 1150 pte_t *ptep; 1151 unsigned long flags; 1152 int rc, ssize, local = 0; 1153 1154 BUG_ON(REGION_ID(ea) != USER_REGION_ID); 1155 1156#ifdef CONFIG_PPC_MM_SLICES 1157 /* We only prefault standard pages for now */ 1158 if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) 1159 return; 1160#endif 1161 1162 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 1163 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 1164 1165 /* Get Linux PTE if available */ 1166 pgdir = mm->pgd; 1167 if (pgdir == NULL) 1168 return; 1169 ptep = find_linux_pte(pgdir, ea); 1170 if (!ptep) 1171 return; 1172 1173#ifdef CONFIG_PPC_64K_PAGES 1174 /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on 1175 * a 64K kernel), then we don't preload, hash_page() will take 1176 * care of it once we actually try to access the page. 1177 * That way we don't have to duplicate all of the logic for segment 1178 * page size demotion here 1179 */ 1180 if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) 1181 return; 1182#endif /* CONFIG_PPC_64K_PAGES */ 1183 1184 /* Get VSID */ 1185 ssize = user_segment_size(ea); 1186 vsid = get_vsid(mm->context.id, ea, ssize); 1187 if (!vsid) 1188 return; 1189 1190 /* Hash doesn't like irqs */ 1191 local_irq_save(flags); 1192 1193 /* Is that local to this CPU ? */ 1194 if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 1195 local = 1; 1196 1197 /* Hash it in */ 1198#ifdef CONFIG_PPC_HAS_HASH_64K 1199 if (mm->context.user_psize == MMU_PAGE_64K) 1200 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); 1201 else 1202#endif /* CONFIG_PPC_HAS_HASH_64K */ 1203 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, 1204 subpage_protection(mm, ea)); 1205 1206 /* Dump some info in case of hash insertion failure, they should 1207 * never happen so it is really useful to know if/when they do 1208 */ 1209 if (rc == -1) 1210 hash_failure_debug(ea, access, vsid, trap, ssize, 1211 mm->context.user_psize, 1212 mm->context.user_psize, 1213 pte_val(*ptep)); 1214 1215 local_irq_restore(flags); 1216} 1217 1218/* WARNING: This is called from hash_low_64.S, if you change this prototype, 1219 * do not forget to update the assembly call site ! 1220 */ 1221void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, 1222 int local) 1223{ 1224 unsigned long hash, index, shift, hidx, slot; 1225 1226 DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); 1227 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 1228 hash = hpt_hash(vpn, shift, ssize); 1229 hidx = __rpte_to_hidx(pte, index); 1230 if (hidx & _PTEIDX_SECONDARY) 1231 hash = ~hash; 1232 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1233 slot += hidx & _PTEIDX_GROUP_IX; 1234 DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); 1235 ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); 1236 } pte_iterate_hashed_end(); 1237 1238#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1239 /* Transactions are not aborted by tlbiel, only tlbie. 1240 * Without, syncing a page back to a block device w/ PIO could pick up 1241 * transactional data (bad!) so we force an abort here. Before the 1242 * sync the page will be made read-only, which will flush_hash_page. 1243 * BIG ISSUE here: if the kernel uses a page from userspace without 1244 * unmapping it first, it may see the speculated version. 1245 */ 1246 if (local && cpu_has_feature(CPU_FTR_TM) && 1247 current->thread.regs && 1248 MSR_TM_ACTIVE(current->thread.regs->msr)) { 1249 tm_enable(); 1250 tm_abort(TM_CAUSE_TLBI); 1251 } 1252#endif 1253} 1254 1255void flush_hash_range(unsigned long number, int local) 1256{ 1257 if (ppc_md.flush_hash_range) 1258 ppc_md.flush_hash_range(number, local); 1259 else { 1260 int i; 1261 struct ppc64_tlb_batch *batch = 1262 &__get_cpu_var(ppc64_tlb_batch); 1263 1264 for (i = 0; i < number; i++) 1265 flush_hash_page(batch->vpn[i], batch->pte[i], 1266 batch->psize, batch->ssize, local); 1267 } 1268} 1269 1270/* 1271 * low_hash_fault is called when we the low level hash code failed 1272 * to instert a PTE due to an hypervisor error 1273 */ 1274void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) 1275{ 1276 enum ctx_state prev_state = exception_enter(); 1277 1278 if (user_mode(regs)) { 1279#ifdef CONFIG_PPC_SUBPAGE_PROT 1280 if (rc == -2) 1281 _exception(SIGSEGV, regs, SEGV_ACCERR, address); 1282 else 1283#endif 1284 _exception(SIGBUS, regs, BUS_ADRERR, address); 1285 } else 1286 bad_page_fault(regs, address, SIGBUS); 1287 1288 exception_exit(prev_state); 1289} 1290 1291long hpte_insert_repeating(unsigned long hash, unsigned long vpn, 1292 unsigned long pa, unsigned long rflags, 1293 unsigned long vflags, int psize, int ssize) 1294{ 1295 unsigned long hpte_group; 1296 long slot; 1297 1298repeat: 1299 hpte_group = ((hash & htab_hash_mask) * 1300 HPTES_PER_GROUP) & ~0x7UL; 1301 1302 /* Insert into the hash table, primary slot */ 1303 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, 1304 psize, psize, ssize); 1305 1306 /* Primary is full, try the secondary */ 1307 if (unlikely(slot == -1)) { 1308 hpte_group = ((~hash & htab_hash_mask) * 1309 HPTES_PER_GROUP) & ~0x7UL; 1310 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 1311 vflags | HPTE_V_SECONDARY, 1312 psize, psize, ssize); 1313 if (slot == -1) { 1314 if (mftb() & 0x1) 1315 hpte_group = ((hash & htab_hash_mask) * 1316 HPTES_PER_GROUP)&~0x7UL; 1317 1318 ppc_md.hpte_remove(hpte_group); 1319 goto repeat; 1320 } 1321 } 1322 1323 return slot; 1324} 1325 1326#ifdef CONFIG_DEBUG_PAGEALLOC 1327static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) 1328{ 1329 unsigned long hash; 1330 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1331 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); 1332 unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); 1333 long ret; 1334 1335 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); 1336 1337 /* Don't create HPTE entries for bad address */ 1338 if (!vsid) 1339 return; 1340 1341 ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, 1342 HPTE_V_BOLTED, 1343 mmu_linear_psize, mmu_kernel_ssize); 1344 1345 BUG_ON (ret < 0); 1346 spin_lock(&linear_map_hash_lock); 1347 BUG_ON(linear_map_hash_slots[lmi] & 0x80); 1348 linear_map_hash_slots[lmi] = ret | 0x80; 1349 spin_unlock(&linear_map_hash_lock); 1350} 1351 1352static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) 1353{ 1354 unsigned long hash, hidx, slot; 1355 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); 1356 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); 1357 1358 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); 1359 spin_lock(&linear_map_hash_lock); 1360 BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); 1361 hidx = linear_map_hash_slots[lmi] & 0x7f; 1362 linear_map_hash_slots[lmi] = 0; 1363 spin_unlock(&linear_map_hash_lock); 1364 if (hidx & _PTEIDX_SECONDARY) 1365 hash = ~hash; 1366 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 1367 slot += hidx & _PTEIDX_GROUP_IX; 1368 ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); 1369} 1370 1371void kernel_map_pages(struct page *page, int numpages, int enable) 1372{ 1373 unsigned long flags, vaddr, lmi; 1374 int i; 1375 1376 local_irq_save(flags); 1377 for (i = 0; i < numpages; i++, page++) { 1378 vaddr = (unsigned long)page_address(page); 1379 lmi = __pa(vaddr) >> PAGE_SHIFT; 1380 if (lmi >= linear_map_hash_count) 1381 continue; 1382 if (enable) 1383 kernel_map_linear_page(vaddr, lmi); 1384 else 1385 kernel_unmap_linear_page(vaddr, lmi); 1386 } 1387 local_irq_restore(flags); 1388} 1389#endif /* CONFIG_DEBUG_PAGEALLOC */ 1390 1391void setup_initial_memory_limit(phys_addr_t first_memblock_base, 1392 phys_addr_t first_memblock_size) 1393{ 1394 /* We don't currently support the first MEMBLOCK not mapping 0 1395 * physical on those processors 1396 */ 1397 BUG_ON(first_memblock_base != 0); 1398 1399 /* On LPAR systems, the first entry is our RMA region, 1400 * non-LPAR 64-bit hash MMU systems don't have a limitation 1401 * on real mode access, but using the first entry works well 1402 * enough. We also clamp it to 1G to avoid some funky things 1403 * such as RTAS bugs etc... 1404 */ 1405 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); 1406 1407 /* Finally limit subsequent allocations */ 1408 memblock_set_current_limit(ppc64_rma_size); 1409}