Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.18-rc4 821 lines 21 kB view raw
1/* 2 * PowerPC64 port by Mike Corrigan and Dave Engebretsen 3 * {mikejc|engebret}@us.ibm.com 4 * 5 * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com> 6 * 7 * SMP scalability work: 8 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 9 * 10 * Module name: htab.c 11 * 12 * Description: 13 * PowerPC Hashed Page Table functions 14 * 15 * This program is free software; you can redistribute it and/or 16 * modify it under the terms of the GNU General Public License 17 * as published by the Free Software Foundation; either version 18 * 2 of the License, or (at your option) any later version. 19 */ 20 21#undef DEBUG 22#undef DEBUG_LOW 23 24#include <linux/spinlock.h> 25#include <linux/errno.h> 26#include <linux/sched.h> 27#include <linux/proc_fs.h> 28#include <linux/stat.h> 29#include <linux/sysctl.h> 30#include <linux/ctype.h> 31#include <linux/cache.h> 32#include <linux/init.h> 33#include <linux/signal.h> 34 35#include <asm/processor.h> 36#include <asm/pgtable.h> 37#include <asm/mmu.h> 38#include <asm/mmu_context.h> 39#include <asm/page.h> 40#include <asm/types.h> 41#include <asm/system.h> 42#include <asm/uaccess.h> 43#include <asm/machdep.h> 44#include <asm/lmb.h> 45#include <asm/abs_addr.h> 46#include <asm/tlbflush.h> 47#include <asm/io.h> 48#include <asm/eeh.h> 49#include <asm/tlb.h> 50#include <asm/cacheflush.h> 51#include <asm/cputable.h> 52#include <asm/abs_addr.h> 53#include <asm/sections.h> 54 55#ifdef DEBUG 56#define DBG(fmt...) udbg_printf(fmt) 57#else 58#define DBG(fmt...) 59#endif 60 61#ifdef DEBUG_LOW 62#define DBG_LOW(fmt...) udbg_printf(fmt) 63#else 64#define DBG_LOW(fmt...) 65#endif 66 67#define KB (1024) 68#define MB (1024*KB) 69 70/* 71 * Note: pte --> Linux PTE 72 * HPTE --> PowerPC Hashed Page Table Entry 73 * 74 * Execution context: 75 * htab_initialize is called with the MMU off (of course), but 76 * the kernel has been copied down to zero so it can directly 77 * reference global data. At this point it is very difficult 78 * to print debug info. 79 * 80 */ 81 82#ifdef CONFIG_U3_DART 83extern unsigned long dart_tablebase; 84#endif /* CONFIG_U3_DART */ 85 86static unsigned long _SDR1; 87struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 88 89hpte_t *htab_address; 90unsigned long htab_size_bytes; 91unsigned long htab_hash_mask; 92int mmu_linear_psize = MMU_PAGE_4K; 93int mmu_virtual_psize = MMU_PAGE_4K; 94int mmu_vmalloc_psize = MMU_PAGE_4K; 95int mmu_io_psize = MMU_PAGE_4K; 96#ifdef CONFIG_HUGETLB_PAGE 97int mmu_huge_psize = MMU_PAGE_16M; 98unsigned int HPAGE_SHIFT; 99#endif 100#ifdef CONFIG_PPC_64K_PAGES 101int mmu_ci_restrictions; 102#endif 103 104/* There are definitions of page sizes arrays to be used when none 105 * is provided by the firmware. 106 */ 107 108/* Pre-POWER4 CPUs (4k pages only) 109 */ 110struct mmu_psize_def mmu_psize_defaults_old[] = { 111 [MMU_PAGE_4K] = { 112 .shift = 12, 113 .sllp = 0, 114 .penc = 0, 115 .avpnm = 0, 116 .tlbiel = 0, 117 }, 118}; 119 120/* POWER4, GPUL, POWER5 121 * 122 * Support for 16Mb large pages 123 */ 124struct mmu_psize_def mmu_psize_defaults_gp[] = { 125 [MMU_PAGE_4K] = { 126 .shift = 12, 127 .sllp = 0, 128 .penc = 0, 129 .avpnm = 0, 130 .tlbiel = 1, 131 }, 132 [MMU_PAGE_16M] = { 133 .shift = 24, 134 .sllp = SLB_VSID_L, 135 .penc = 0, 136 .avpnm = 0x1UL, 137 .tlbiel = 0, 138 }, 139}; 140 141 142int htab_bolt_mapping(unsigned long vstart, unsigned long vend, 143 unsigned long pstart, unsigned long mode, int psize) 144{ 145 unsigned long vaddr, paddr; 146 unsigned int step, shift; 147 unsigned long tmp_mode; 148 int ret = 0; 149 150 shift = mmu_psize_defs[psize].shift; 151 step = 1 << shift; 152 153 for (vaddr = vstart, paddr = pstart; vaddr < vend; 154 vaddr += step, paddr += step) { 155 unsigned long vpn, hash, hpteg; 156 unsigned long vsid = get_kernel_vsid(vaddr); 157 unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); 158 159 vpn = va >> shift; 160 tmp_mode = mode; 161 162 /* Make non-kernel text non-executable */ 163 if (!in_kernel_text(vaddr)) 164 tmp_mode = mode | HPTE_R_N; 165 166 hash = hpt_hash(va, shift); 167 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); 168 169 DBG("htab_bolt_mapping: calling %p\n", ppc_md.hpte_insert); 170 171 BUG_ON(!ppc_md.hpte_insert); 172 ret = ppc_md.hpte_insert(hpteg, va, paddr, 173 tmp_mode, HPTE_V_BOLTED, psize); 174 175 if (ret < 0) 176 break; 177 } 178 return ret < 0 ? ret : 0; 179} 180 181static int __init htab_dt_scan_page_sizes(unsigned long node, 182 const char *uname, int depth, 183 void *data) 184{ 185 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 186 u32 *prop; 187 unsigned long size = 0; 188 189 /* We are scanning "cpu" nodes only */ 190 if (type == NULL || strcmp(type, "cpu") != 0) 191 return 0; 192 193 prop = (u32 *)of_get_flat_dt_prop(node, 194 "ibm,segment-page-sizes", &size); 195 if (prop != NULL) { 196 DBG("Page sizes from device-tree:\n"); 197 size /= 4; 198 cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE); 199 while(size > 0) { 200 unsigned int shift = prop[0]; 201 unsigned int slbenc = prop[1]; 202 unsigned int lpnum = prop[2]; 203 unsigned int lpenc = 0; 204 struct mmu_psize_def *def; 205 int idx = -1; 206 207 size -= 3; prop += 3; 208 while(size > 0 && lpnum) { 209 if (prop[0] == shift) 210 lpenc = prop[1]; 211 prop += 2; size -= 2; 212 lpnum--; 213 } 214 switch(shift) { 215 case 0xc: 216 idx = MMU_PAGE_4K; 217 break; 218 case 0x10: 219 idx = MMU_PAGE_64K; 220 break; 221 case 0x14: 222 idx = MMU_PAGE_1M; 223 break; 224 case 0x18: 225 idx = MMU_PAGE_16M; 226 cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE; 227 break; 228 case 0x22: 229 idx = MMU_PAGE_16G; 230 break; 231 } 232 if (idx < 0) 233 continue; 234 def = &mmu_psize_defs[idx]; 235 def->shift = shift; 236 if (shift <= 23) 237 def->avpnm = 0; 238 else 239 def->avpnm = (1 << (shift - 23)) - 1; 240 def->sllp = slbenc; 241 def->penc = lpenc; 242 /* We don't know for sure what's up with tlbiel, so 243 * for now we only set it for 4K and 64K pages 244 */ 245 if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) 246 def->tlbiel = 1; 247 else 248 def->tlbiel = 0; 249 250 DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, " 251 "tlbiel=%d, penc=%d\n", 252 idx, shift, def->sllp, def->avpnm, def->tlbiel, 253 def->penc); 254 } 255 return 1; 256 } 257 return 0; 258} 259 260 261static void __init htab_init_page_sizes(void) 262{ 263 int rc; 264 265 /* Default to 4K pages only */ 266 memcpy(mmu_psize_defs, mmu_psize_defaults_old, 267 sizeof(mmu_psize_defaults_old)); 268 269 /* 270 * Try to find the available page sizes in the device-tree 271 */ 272 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); 273 if (rc != 0) /* Found */ 274 goto found; 275 276 /* 277 * Not in the device-tree, let's fallback on known size 278 * list for 16M capable GP & GR 279 */ 280 if (cpu_has_feature(CPU_FTR_16M_PAGE) && !machine_is(iseries)) 281 memcpy(mmu_psize_defs, mmu_psize_defaults_gp, 282 sizeof(mmu_psize_defaults_gp)); 283 found: 284 /* 285 * Pick a size for the linear mapping. Currently, we only support 286 * 16M, 1M and 4K which is the default 287 */ 288 if (mmu_psize_defs[MMU_PAGE_16M].shift) 289 mmu_linear_psize = MMU_PAGE_16M; 290 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 291 mmu_linear_psize = MMU_PAGE_1M; 292 293#ifdef CONFIG_PPC_64K_PAGES 294 /* 295 * Pick a size for the ordinary pages. Default is 4K, we support 296 * 64K for user mappings and vmalloc if supported by the processor. 297 * We only use 64k for ioremap if the processor 298 * (and firmware) support cache-inhibited large pages. 299 * If not, we use 4k and set mmu_ci_restrictions so that 300 * hash_page knows to switch processes that use cache-inhibited 301 * mappings to 4k pages. 302 */ 303 if (mmu_psize_defs[MMU_PAGE_64K].shift) { 304 mmu_virtual_psize = MMU_PAGE_64K; 305 mmu_vmalloc_psize = MMU_PAGE_64K; 306 if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) 307 mmu_io_psize = MMU_PAGE_64K; 308 else 309 mmu_ci_restrictions = 1; 310 } 311#endif 312 313 printk(KERN_DEBUG "Page orders: linear mapping = %d, " 314 "virtual = %d, io = %d\n", 315 mmu_psize_defs[mmu_linear_psize].shift, 316 mmu_psize_defs[mmu_virtual_psize].shift, 317 mmu_psize_defs[mmu_io_psize].shift); 318 319#ifdef CONFIG_HUGETLB_PAGE 320 /* Init large page size. Currently, we pick 16M or 1M depending 321 * on what is available 322 */ 323 if (mmu_psize_defs[MMU_PAGE_16M].shift) 324 mmu_huge_psize = MMU_PAGE_16M; 325 /* With 4k/4level pagetables, we can't (for now) cope with a 326 * huge page size < PMD_SIZE */ 327 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 328 mmu_huge_psize = MMU_PAGE_1M; 329 330 /* Calculate HPAGE_SHIFT and sanity check it */ 331 if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT && 332 mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT) 333 HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift; 334 else 335 HPAGE_SHIFT = 0; /* No huge pages dude ! */ 336#endif /* CONFIG_HUGETLB_PAGE */ 337} 338 339static int __init htab_dt_scan_pftsize(unsigned long node, 340 const char *uname, int depth, 341 void *data) 342{ 343 char *type = of_get_flat_dt_prop(node, "device_type", NULL); 344 u32 *prop; 345 346 /* We are scanning "cpu" nodes only */ 347 if (type == NULL || strcmp(type, "cpu") != 0) 348 return 0; 349 350 prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); 351 if (prop != NULL) { 352 /* pft_size[0] is the NUMA CEC cookie */ 353 ppc64_pft_size = prop[1]; 354 return 1; 355 } 356 return 0; 357} 358 359static unsigned long __init htab_get_table_size(void) 360{ 361 unsigned long mem_size, rnd_mem_size, pteg_count; 362 363 /* If hash size isn't already provided by the platform, we try to 364 * retrieve it from the device-tree. If it's not there neither, we 365 * calculate it now based on the total RAM size 366 */ 367 if (ppc64_pft_size == 0) 368 of_scan_flat_dt(htab_dt_scan_pftsize, NULL); 369 if (ppc64_pft_size) 370 return 1UL << ppc64_pft_size; 371 372 /* round mem_size up to next power of 2 */ 373 mem_size = lmb_phys_mem_size(); 374 rnd_mem_size = 1UL << __ilog2(mem_size); 375 if (rnd_mem_size < mem_size) 376 rnd_mem_size <<= 1; 377 378 /* # pages / 2 */ 379 pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); 380 381 return pteg_count << 7; 382} 383 384#ifdef CONFIG_MEMORY_HOTPLUG 385void create_section_mapping(unsigned long start, unsigned long end) 386{ 387 BUG_ON(htab_bolt_mapping(start, end, __pa(start), 388 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX, 389 mmu_linear_psize)); 390} 391#endif /* CONFIG_MEMORY_HOTPLUG */ 392 393static inline void make_bl(unsigned int *insn_addr, void *func) 394{ 395 unsigned long funcp = *((unsigned long *)func); 396 int offset = funcp - (unsigned long)insn_addr; 397 398 *insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc)); 399 flush_icache_range((unsigned long)insn_addr, 4+ 400 (unsigned long)insn_addr); 401} 402 403static void __init htab_finish_init(void) 404{ 405 extern unsigned int *htab_call_hpte_insert1; 406 extern unsigned int *htab_call_hpte_insert2; 407 extern unsigned int *htab_call_hpte_remove; 408 extern unsigned int *htab_call_hpte_updatepp; 409 410#ifdef CONFIG_PPC_64K_PAGES 411 extern unsigned int *ht64_call_hpte_insert1; 412 extern unsigned int *ht64_call_hpte_insert2; 413 extern unsigned int *ht64_call_hpte_remove; 414 extern unsigned int *ht64_call_hpte_updatepp; 415 416 make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert); 417 make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert); 418 make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove); 419 make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp); 420#endif /* CONFIG_PPC_64K_PAGES */ 421 422 make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); 423 make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); 424 make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); 425 make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp); 426} 427 428void __init htab_initialize(void) 429{ 430 unsigned long table; 431 unsigned long pteg_count; 432 unsigned long mode_rw; 433 unsigned long base = 0, size = 0; 434 int i; 435 436 extern unsigned long tce_alloc_start, tce_alloc_end; 437 438 DBG(" -> htab_initialize()\n"); 439 440 /* Initialize page sizes */ 441 htab_init_page_sizes(); 442 443 /* 444 * Calculate the required size of the htab. We want the number of 445 * PTEGs to equal one half the number of real pages. 446 */ 447 htab_size_bytes = htab_get_table_size(); 448 pteg_count = htab_size_bytes >> 7; 449 450 htab_hash_mask = pteg_count - 1; 451 452 if (firmware_has_feature(FW_FEATURE_LPAR)) { 453 /* Using a hypervisor which owns the htab */ 454 htab_address = NULL; 455 _SDR1 = 0; 456 } else { 457 /* Find storage for the HPT. Must be contiguous in 458 * the absolute address space. 459 */ 460 table = lmb_alloc(htab_size_bytes, htab_size_bytes); 461 462 DBG("Hash table allocated at %lx, size: %lx\n", table, 463 htab_size_bytes); 464 465 htab_address = abs_to_virt(table); 466 467 /* htab absolute addr + encoded htabsize */ 468 _SDR1 = table + __ilog2(pteg_count) - 11; 469 470 /* Initialize the HPT with no entries */ 471 memset((void *)table, 0, htab_size_bytes); 472 473 /* Set SDR1 */ 474 mtspr(SPRN_SDR1, _SDR1); 475 } 476 477 mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; 478 479 /* On U3 based machines, we need to reserve the DART area and 480 * _NOT_ map it to avoid cache paradoxes as it's remapped non 481 * cacheable later on 482 */ 483 484 /* create bolted the linear mapping in the hash table */ 485 for (i=0; i < lmb.memory.cnt; i++) { 486 base = (unsigned long)__va(lmb.memory.region[i].base); 487 size = lmb.memory.region[i].size; 488 489 DBG("creating mapping for region: %lx : %lx\n", base, size); 490 491#ifdef CONFIG_U3_DART 492 /* Do not map the DART space. Fortunately, it will be aligned 493 * in such a way that it will not cross two lmb regions and 494 * will fit within a single 16Mb page. 495 * The DART space is assumed to be a full 16Mb region even if 496 * we only use 2Mb of that space. We will use more of it later 497 * for AGP GART. We have to use a full 16Mb large page. 498 */ 499 DBG("DART base: %lx\n", dart_tablebase); 500 501 if (dart_tablebase != 0 && dart_tablebase >= base 502 && dart_tablebase < (base + size)) { 503 unsigned long dart_table_end = dart_tablebase + 16 * MB; 504 if (base != dart_tablebase) 505 BUG_ON(htab_bolt_mapping(base, dart_tablebase, 506 __pa(base), mode_rw, 507 mmu_linear_psize)); 508 if ((base + size) > dart_table_end) 509 BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, 510 base + size, 511 __pa(dart_table_end), 512 mode_rw, 513 mmu_linear_psize)); 514 continue; 515 } 516#endif /* CONFIG_U3_DART */ 517 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), 518 mode_rw, mmu_linear_psize)); 519 } 520 521 /* 522 * If we have a memory_limit and we've allocated TCEs then we need to 523 * explicitly map the TCE area at the top of RAM. We also cope with the 524 * case that the TCEs start below memory_limit. 525 * tce_alloc_start/end are 16MB aligned so the mapping should work 526 * for either 4K or 16MB pages. 527 */ 528 if (tce_alloc_start) { 529 tce_alloc_start = (unsigned long)__va(tce_alloc_start); 530 tce_alloc_end = (unsigned long)__va(tce_alloc_end); 531 532 if (base + size >= tce_alloc_start) 533 tce_alloc_start = base + size + 1; 534 535 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, 536 __pa(tce_alloc_start), mode_rw, 537 mmu_linear_psize)); 538 } 539 540 htab_finish_init(); 541 542 DBG(" <- htab_initialize()\n"); 543} 544#undef KB 545#undef MB 546 547void htab_initialize_secondary(void) 548{ 549 if (!firmware_has_feature(FW_FEATURE_LPAR)) 550 mtspr(SPRN_SDR1, _SDR1); 551} 552 553/* 554 * Called by asm hashtable.S for doing lazy icache flush 555 */ 556unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) 557{ 558 struct page *page; 559 560 if (!pfn_valid(pte_pfn(pte))) 561 return pp; 562 563 page = pte_page(pte); 564 565 /* page is dirty */ 566 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 567 if (trap == 0x400) { 568 __flush_dcache_icache(page_address(page)); 569 set_bit(PG_arch_1, &page->flags); 570 } else 571 pp |= HPTE_R_N; 572 } 573 return pp; 574} 575 576/* Result code is: 577 * 0 - handled 578 * 1 - normal page fault 579 * -1 - critical hash insertion error 580 */ 581int hash_page(unsigned long ea, unsigned long access, unsigned long trap) 582{ 583 void *pgdir; 584 unsigned long vsid; 585 struct mm_struct *mm; 586 pte_t *ptep; 587 cpumask_t tmp; 588 int rc, user_region = 0, local = 0; 589 int psize; 590 591 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", 592 ea, access, trap); 593 594 if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { 595 DBG_LOW(" out of pgtable range !\n"); 596 return 1; 597 } 598 599 /* Get region & vsid */ 600 switch (REGION_ID(ea)) { 601 case USER_REGION_ID: 602 user_region = 1; 603 mm = current->mm; 604 if (! mm) { 605 DBG_LOW(" user region with no mm !\n"); 606 return 1; 607 } 608 vsid = get_vsid(mm->context.id, ea); 609 psize = mm->context.user_psize; 610 break; 611 case VMALLOC_REGION_ID: 612 mm = &init_mm; 613 vsid = get_kernel_vsid(ea); 614 if (ea < VMALLOC_END) 615 psize = mmu_vmalloc_psize; 616 else 617 psize = mmu_io_psize; 618 break; 619 default: 620 /* Not a valid range 621 * Send the problem up to do_page_fault 622 */ 623 return 1; 624 } 625 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); 626 627 /* Get pgdir */ 628 pgdir = mm->pgd; 629 if (pgdir == NULL) 630 return 1; 631 632 /* Check CPU locality */ 633 tmp = cpumask_of_cpu(smp_processor_id()); 634 if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) 635 local = 1; 636 637 /* Handle hugepage regions */ 638 if (unlikely(in_hugepage_area(mm->context, ea))) { 639 DBG_LOW(" -> huge page !\n"); 640 return hash_huge_page(mm, access, ea, vsid, local, trap); 641 } 642 643 /* Get PTE and page size from page tables */ 644 ptep = find_linux_pte(pgdir, ea); 645 if (ptep == NULL || !pte_present(*ptep)) { 646 DBG_LOW(" no PTE !\n"); 647 return 1; 648 } 649 650#ifndef CONFIG_PPC_64K_PAGES 651 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); 652#else 653 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), 654 pte_val(*(ptep + PTRS_PER_PTE))); 655#endif 656 /* Pre-check access permissions (will be re-checked atomically 657 * in __hash_page_XX but this pre-check is a fast path 658 */ 659 if (access & ~pte_val(*ptep)) { 660 DBG_LOW(" no access !\n"); 661 return 1; 662 } 663 664 /* Do actual hashing */ 665#ifndef CONFIG_PPC_64K_PAGES 666 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 667#else 668 if (mmu_ci_restrictions) { 669 /* If this PTE is non-cacheable, switch to 4k */ 670 if (psize == MMU_PAGE_64K && 671 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 672 if (user_region) { 673 psize = MMU_PAGE_4K; 674 mm->context.user_psize = MMU_PAGE_4K; 675 mm->context.sllp = SLB_VSID_USER | 676 mmu_psize_defs[MMU_PAGE_4K].sllp; 677 } else if (ea < VMALLOC_END) { 678 /* 679 * some driver did a non-cacheable mapping 680 * in vmalloc space, so switch vmalloc 681 * to 4k pages 682 */ 683 printk(KERN_ALERT "Reducing vmalloc segment " 684 "to 4kB pages because of " 685 "non-cacheable mapping\n"); 686 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 687 } 688 } 689 if (user_region) { 690 if (psize != get_paca()->context.user_psize) { 691 get_paca()->context = mm->context; 692 slb_flush_and_rebolt(); 693 } 694 } else if (get_paca()->vmalloc_sllp != 695 mmu_psize_defs[mmu_vmalloc_psize].sllp) { 696 get_paca()->vmalloc_sllp = 697 mmu_psize_defs[mmu_vmalloc_psize].sllp; 698 slb_flush_and_rebolt(); 699 } 700 } 701 if (psize == MMU_PAGE_64K) 702 rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); 703 else 704 rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); 705#endif /* CONFIG_PPC_64K_PAGES */ 706 707#ifndef CONFIG_PPC_64K_PAGES 708 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); 709#else 710 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), 711 pte_val(*(ptep + PTRS_PER_PTE))); 712#endif 713 DBG_LOW(" -> rc=%d\n", rc); 714 return rc; 715} 716EXPORT_SYMBOL_GPL(hash_page); 717 718void hash_preload(struct mm_struct *mm, unsigned long ea, 719 unsigned long access, unsigned long trap) 720{ 721 unsigned long vsid; 722 void *pgdir; 723 pte_t *ptep; 724 cpumask_t mask; 725 unsigned long flags; 726 int local = 0; 727 728 /* We don't want huge pages prefaulted for now 729 */ 730 if (unlikely(in_hugepage_area(mm->context, ea))) 731 return; 732 733 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," 734 " trap=%lx\n", mm, mm->pgd, ea, access, trap); 735 736 /* Get PTE, VSID, access mask */ 737 pgdir = mm->pgd; 738 if (pgdir == NULL) 739 return; 740 ptep = find_linux_pte(pgdir, ea); 741 if (!ptep) 742 return; 743 vsid = get_vsid(mm->context.id, ea); 744 745 /* Hash it in */ 746 local_irq_save(flags); 747 mask = cpumask_of_cpu(smp_processor_id()); 748 if (cpus_equal(mm->cpu_vm_mask, mask)) 749 local = 1; 750#ifndef CONFIG_PPC_64K_PAGES 751 __hash_page_4K(ea, access, vsid, ptep, trap, local); 752#else 753 if (mmu_ci_restrictions) { 754 /* If this PTE is non-cacheable, switch to 4k */ 755 if (mm->context.user_psize == MMU_PAGE_64K && 756 (pte_val(*ptep) & _PAGE_NO_CACHE)) { 757 mm->context.user_psize = MMU_PAGE_4K; 758 mm->context.sllp = SLB_VSID_USER | 759 mmu_psize_defs[MMU_PAGE_4K].sllp; 760 get_paca()->context = mm->context; 761 slb_flush_and_rebolt(); 762 } 763 } 764 if (mm->context.user_psize == MMU_PAGE_64K) 765 __hash_page_64K(ea, access, vsid, ptep, trap, local); 766 else 767 __hash_page_4K(ea, access, vsid, ptep, trap, local); 768#endif /* CONFIG_PPC_64K_PAGES */ 769 local_irq_restore(flags); 770} 771 772void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local) 773{ 774 unsigned long hash, index, shift, hidx, slot; 775 776 DBG_LOW("flush_hash_page(va=%016x)\n", va); 777 pte_iterate_hashed_subpages(pte, psize, va, index, shift) { 778 hash = hpt_hash(va, shift); 779 hidx = __rpte_to_hidx(pte, index); 780 if (hidx & _PTEIDX_SECONDARY) 781 hash = ~hash; 782 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 783 slot += hidx & _PTEIDX_GROUP_IX; 784 DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); 785 ppc_md.hpte_invalidate(slot, va, psize, local); 786 } pte_iterate_hashed_end(); 787} 788 789void flush_hash_range(unsigned long number, int local) 790{ 791 if (ppc_md.flush_hash_range) 792 ppc_md.flush_hash_range(number, local); 793 else { 794 int i; 795 struct ppc64_tlb_batch *batch = 796 &__get_cpu_var(ppc64_tlb_batch); 797 798 for (i = 0; i < number; i++) 799 flush_hash_page(batch->vaddr[i], batch->pte[i], 800 batch->psize, local); 801 } 802} 803 804/* 805 * low_hash_fault is called when we the low level hash code failed 806 * to instert a PTE due to an hypervisor error 807 */ 808void low_hash_fault(struct pt_regs *regs, unsigned long address) 809{ 810 if (user_mode(regs)) { 811 siginfo_t info; 812 813 info.si_signo = SIGBUS; 814 info.si_errno = 0; 815 info.si_code = BUS_ADRERR; 816 info.si_addr = (void __user *)address; 817 force_sig_info(SIGBUS, &info, current); 818 return; 819 } 820 bad_page_fault(regs, address, SIGBUS); 821}