Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.38 590 lines 15 kB view raw
1/* 2 * This file contains the routines for TLB flushing. 3 * On machines where the MMU does not use a hash table to store virtual to 4 * physical translations (ie, SW loaded TLBs or Book3E compilant processors, 5 * this does -not- include 603 however which shares the implementation with 6 * hash based processors) 7 * 8 * -- BenH 9 * 10 * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org> 11 * IBM Corp. 12 * 13 * Derived from arch/ppc/mm/init.c: 14 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 15 * 16 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 17 * and Cort Dougan (PReP) (cort@cs.nmt.edu) 18 * Copyright (C) 1996 Paul Mackerras 19 * 20 * Derived from "arch/i386/mm/init.c" 21 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 22 * 23 * This program is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU General Public License 25 * as published by the Free Software Foundation; either version 26 * 2 of the License, or (at your option) any later version. 27 * 28 */ 29 30#include <linux/kernel.h> 31#include <linux/mm.h> 32#include <linux/init.h> 33#include <linux/highmem.h> 34#include <linux/pagemap.h> 35#include <linux/preempt.h> 36#include <linux/spinlock.h> 37#include <linux/memblock.h> 38 39#include <asm/tlbflush.h> 40#include <asm/tlb.h> 41#include <asm/code-patching.h> 42 43#include "mmu_decl.h" 44 45#ifdef CONFIG_PPC_BOOK3E 46struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 47 [MMU_PAGE_4K] = { 48 .shift = 12, 49 .ind = 20, 50 .enc = BOOK3E_PAGESZ_4K, 51 }, 52 [MMU_PAGE_16K] = { 53 .shift = 14, 54 .enc = BOOK3E_PAGESZ_16K, 55 }, 56 [MMU_PAGE_64K] = { 57 .shift = 16, 58 .ind = 28, 59 .enc = BOOK3E_PAGESZ_64K, 60 }, 61 [MMU_PAGE_1M] = { 62 .shift = 20, 63 .enc = BOOK3E_PAGESZ_1M, 64 }, 65 [MMU_PAGE_16M] = { 66 .shift = 24, 67 .ind = 36, 68 .enc = BOOK3E_PAGESZ_16M, 69 }, 70 [MMU_PAGE_256M] = { 71 .shift = 28, 72 .enc = BOOK3E_PAGESZ_256M, 73 }, 74 [MMU_PAGE_1G] = { 75 .shift = 30, 76 .enc = BOOK3E_PAGESZ_1GB, 77 }, 78}; 79static inline int mmu_get_tsize(int psize) 80{ 81 return mmu_psize_defs[psize].enc; 82} 83#else 84static inline int mmu_get_tsize(int psize) 85{ 86 /* This isn't used on !Book3E for now */ 87 return 0; 88} 89#endif 90 91/* The variables below are currently only used on 64-bit Book3E 92 * though this will probably be made common with other nohash 93 * implementations at some point 94 */ 95#ifdef CONFIG_PPC64 96 97int mmu_linear_psize; /* Page size used for the linear mapping */ 98int mmu_pte_psize; /* Page size used for PTE pages */ 99int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ 100int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ 101unsigned long linear_map_top; /* Top of linear mapping */ 102 103#endif /* CONFIG_PPC64 */ 104 105/* 106 * Base TLB flushing operations: 107 * 108 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 109 * - flush_tlb_page(vma, vmaddr) flushes one page 110 * - flush_tlb_range(vma, start, end) flushes a range of pages 111 * - flush_tlb_kernel_range(start, end) flushes kernel pages 112 * 113 * - local_* variants of page and mm only apply to the current 114 * processor 115 */ 116 117/* 118 * These are the base non-SMP variants of page and mm flushing 119 */ 120void local_flush_tlb_mm(struct mm_struct *mm) 121{ 122 unsigned int pid; 123 124 preempt_disable(); 125 pid = mm->context.id; 126 if (pid != MMU_NO_CONTEXT) 127 _tlbil_pid(pid); 128 preempt_enable(); 129} 130EXPORT_SYMBOL(local_flush_tlb_mm); 131 132void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 133 int tsize, int ind) 134{ 135 unsigned int pid; 136 137 preempt_disable(); 138 pid = mm ? mm->context.id : 0; 139 if (pid != MMU_NO_CONTEXT) 140 _tlbil_va(vmaddr, pid, tsize, ind); 141 preempt_enable(); 142} 143 144void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 145{ 146 __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 147 mmu_get_tsize(mmu_virtual_psize), 0); 148} 149EXPORT_SYMBOL(local_flush_tlb_page); 150 151/* 152 * And here are the SMP non-local implementations 153 */ 154#ifdef CONFIG_SMP 155 156static DEFINE_RAW_SPINLOCK(tlbivax_lock); 157 158static int mm_is_core_local(struct mm_struct *mm) 159{ 160 return cpumask_subset(mm_cpumask(mm), 161 topology_thread_cpumask(smp_processor_id())); 162} 163 164struct tlb_flush_param { 165 unsigned long addr; 166 unsigned int pid; 167 unsigned int tsize; 168 unsigned int ind; 169}; 170 171static void do_flush_tlb_mm_ipi(void *param) 172{ 173 struct tlb_flush_param *p = param; 174 175 _tlbil_pid(p ? p->pid : 0); 176} 177 178static void do_flush_tlb_page_ipi(void *param) 179{ 180 struct tlb_flush_param *p = param; 181 182 _tlbil_va(p->addr, p->pid, p->tsize, p->ind); 183} 184 185 186/* Note on invalidations and PID: 187 * 188 * We snapshot the PID with preempt disabled. At this point, it can still 189 * change either because: 190 * - our context is being stolen (PID -> NO_CONTEXT) on another CPU 191 * - we are invaliating some target that isn't currently running here 192 * and is concurrently acquiring a new PID on another CPU 193 * - some other CPU is re-acquiring a lost PID for this mm 194 * etc... 195 * 196 * However, this shouldn't be a problem as we only guarantee 197 * invalidation of TLB entries present prior to this call, so we 198 * don't care about the PID changing, and invalidating a stale PID 199 * is generally harmless. 200 */ 201 202void flush_tlb_mm(struct mm_struct *mm) 203{ 204 unsigned int pid; 205 206 preempt_disable(); 207 pid = mm->context.id; 208 if (unlikely(pid == MMU_NO_CONTEXT)) 209 goto no_context; 210 if (!mm_is_core_local(mm)) { 211 struct tlb_flush_param p = { .pid = pid }; 212 /* Ignores smp_processor_id() even if set. */ 213 smp_call_function_many(mm_cpumask(mm), 214 do_flush_tlb_mm_ipi, &p, 1); 215 } 216 _tlbil_pid(pid); 217 no_context: 218 preempt_enable(); 219} 220EXPORT_SYMBOL(flush_tlb_mm); 221 222void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 223 int tsize, int ind) 224{ 225 struct cpumask *cpu_mask; 226 unsigned int pid; 227 228 preempt_disable(); 229 pid = mm ? mm->context.id : 0; 230 if (unlikely(pid == MMU_NO_CONTEXT)) 231 goto bail; 232 cpu_mask = mm_cpumask(mm); 233 if (!mm_is_core_local(mm)) { 234 /* If broadcast tlbivax is supported, use it */ 235 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { 236 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); 237 if (lock) 238 raw_spin_lock(&tlbivax_lock); 239 _tlbivax_bcast(vmaddr, pid, tsize, ind); 240 if (lock) 241 raw_spin_unlock(&tlbivax_lock); 242 goto bail; 243 } else { 244 struct tlb_flush_param p = { 245 .pid = pid, 246 .addr = vmaddr, 247 .tsize = tsize, 248 .ind = ind, 249 }; 250 /* Ignores smp_processor_id() even if set in cpu_mask */ 251 smp_call_function_many(cpu_mask, 252 do_flush_tlb_page_ipi, &p, 1); 253 } 254 } 255 _tlbil_va(vmaddr, pid, tsize, ind); 256 bail: 257 preempt_enable(); 258} 259 260void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 261{ 262 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 263 mmu_get_tsize(mmu_virtual_psize), 0); 264} 265EXPORT_SYMBOL(flush_tlb_page); 266 267#endif /* CONFIG_SMP */ 268 269/* 270 * Flush kernel TLB entries in the given range 271 */ 272void flush_tlb_kernel_range(unsigned long start, unsigned long end) 273{ 274#ifdef CONFIG_SMP 275 preempt_disable(); 276 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); 277 _tlbil_pid(0); 278 preempt_enable(); 279#else 280 _tlbil_pid(0); 281#endif 282} 283EXPORT_SYMBOL(flush_tlb_kernel_range); 284 285/* 286 * Currently, for range flushing, we just do a full mm flush. This should 287 * be optimized based on a threshold on the size of the range, since 288 * some implementation can stack multiple tlbivax before a tlbsync but 289 * for now, we keep it that way 290 */ 291void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 292 unsigned long end) 293 294{ 295 flush_tlb_mm(vma->vm_mm); 296} 297EXPORT_SYMBOL(flush_tlb_range); 298 299void tlb_flush(struct mmu_gather *tlb) 300{ 301 flush_tlb_mm(tlb->mm); 302 303 /* Push out batch of freed page tables */ 304 pte_free_finish(); 305} 306 307/* 308 * Below are functions specific to the 64-bit variant of Book3E though that 309 * may change in the future 310 */ 311 312#ifdef CONFIG_PPC64 313 314/* 315 * Handling of virtual linear page tables or indirect TLB entries 316 * flushing when PTE pages are freed 317 */ 318void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) 319{ 320 int tsize = mmu_psize_defs[mmu_pte_psize].enc; 321 322 if (book3e_htw_enabled) { 323 unsigned long start = address & PMD_MASK; 324 unsigned long end = address + PMD_SIZE; 325 unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; 326 327 /* This isn't the most optimal, ideally we would factor out the 328 * while preempt & CPU mask mucking around, or even the IPI but 329 * it will do for now 330 */ 331 while (start < end) { 332 __flush_tlb_page(tlb->mm, start, tsize, 1); 333 start += size; 334 } 335 } else { 336 unsigned long rmask = 0xf000000000000000ul; 337 unsigned long rid = (address & rmask) | 0x1000000000000000ul; 338 unsigned long vpte = address & ~rmask; 339 340#ifdef CONFIG_PPC_64K_PAGES 341 vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; 342#else 343 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; 344#endif 345 vpte |= rid; 346 __flush_tlb_page(tlb->mm, vpte, tsize, 0); 347 } 348} 349 350static void setup_page_sizes(void) 351{ 352 unsigned int tlb0cfg; 353 unsigned int tlb0ps; 354 unsigned int eptcfg; 355 int i, psize; 356 357#ifdef CONFIG_PPC_FSL_BOOK3E 358 unsigned int mmucfg = mfspr(SPRN_MMUCFG); 359 360 if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) && 361 (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) { 362 unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); 363 unsigned int min_pg, max_pg; 364 365 min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; 366 max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; 367 368 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 369 struct mmu_psize_def *def; 370 unsigned int shift; 371 372 def = &mmu_psize_defs[psize]; 373 shift = def->shift; 374 375 if (shift == 0) 376 continue; 377 378 /* adjust to be in terms of 4^shift Kb */ 379 shift = (shift - 10) >> 1; 380 381 if ((shift >= min_pg) && (shift <= max_pg)) 382 def->flags |= MMU_PAGE_SIZE_DIRECT; 383 } 384 385 goto no_indirect; 386 } 387#endif 388 389 tlb0cfg = mfspr(SPRN_TLB0CFG); 390 tlb0ps = mfspr(SPRN_TLB0PS); 391 eptcfg = mfspr(SPRN_EPTCFG); 392 393 /* Look for supported direct sizes */ 394 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 395 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 396 397 if (tlb0ps & (1U << (def->shift - 10))) 398 def->flags |= MMU_PAGE_SIZE_DIRECT; 399 } 400 401 /* Indirect page sizes supported ? */ 402 if ((tlb0cfg & TLBnCFG_IND) == 0) 403 goto no_indirect; 404 405 /* Now, we only deal with one IND page size for each 406 * direct size. Hopefully all implementations today are 407 * unambiguous, but we might want to be careful in the 408 * future. 409 */ 410 for (i = 0; i < 3; i++) { 411 unsigned int ps, sps; 412 413 sps = eptcfg & 0x1f; 414 eptcfg >>= 5; 415 ps = eptcfg & 0x1f; 416 eptcfg >>= 5; 417 if (!ps || !sps) 418 continue; 419 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 420 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 421 422 if (ps == (def->shift - 10)) 423 def->flags |= MMU_PAGE_SIZE_INDIRECT; 424 if (sps == (def->shift - 10)) 425 def->ind = ps + 10; 426 } 427 } 428 no_indirect: 429 430 /* Cleanup array and print summary */ 431 pr_info("MMU: Supported page sizes\n"); 432 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 433 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 434 const char *__page_type_names[] = { 435 "unsupported", 436 "direct", 437 "indirect", 438 "direct & indirect" 439 }; 440 if (def->flags == 0) { 441 def->shift = 0; 442 continue; 443 } 444 pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), 445 __page_type_names[def->flags & 0x3]); 446 } 447} 448 449static void setup_mmu_htw(void) 450{ 451 extern unsigned int interrupt_base_book3e; 452 extern unsigned int exc_data_tlb_miss_htw_book3e; 453 extern unsigned int exc_instruction_tlb_miss_htw_book3e; 454 455 unsigned int *ibase = &interrupt_base_book3e; 456 457 /* Check if HW tablewalk is present, and if yes, enable it by: 458 * 459 * - patching the TLB miss handlers to branch to the 460 * one dedicates to it 461 * 462 * - setting the global book3e_htw_enabled 463 */ 464 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); 465 466 if ((tlb0cfg & TLBnCFG_IND) && 467 (tlb0cfg & TLBnCFG_PT)) { 468 /* Our exceptions vectors start with a NOP and -then- a branch 469 * to deal with single stepping from userspace which stops on 470 * the second instruction. Thus we need to patch the second 471 * instruction of the exception, not the first one 472 */ 473 patch_branch(ibase + (0x1c0 / 4) + 1, 474 (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); 475 patch_branch(ibase + (0x1e0 / 4) + 1, 476 (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); 477 book3e_htw_enabled = 1; 478 } 479 pr_info("MMU: Book3E Page Tables %s\n", 480 book3e_htw_enabled ? "Enabled" : "Disabled"); 481} 482 483/* 484 * Early initialization of the MMU TLB code 485 */ 486static void __early_init_mmu(int boot_cpu) 487{ 488 unsigned int mas4; 489 490 /* XXX This will have to be decided at runtime, but right 491 * now our boot and TLB miss code hard wires it. Ideally 492 * we should find out a suitable page size and patch the 493 * TLB miss code (either that or use the PACA to store 494 * the value we want) 495 */ 496 mmu_linear_psize = MMU_PAGE_1G; 497 498 /* XXX This should be decided at runtime based on supported 499 * page sizes in the TLB, but for now let's assume 16M is 500 * always there and a good fit (which it probably is) 501 */ 502 mmu_vmemmap_psize = MMU_PAGE_16M; 503 504 /* XXX This code only checks for TLB 0 capabilities and doesn't 505 * check what page size combos are supported by the HW. It 506 * also doesn't handle the case where a separate array holds 507 * the IND entries from the array loaded by the PT. 508 */ 509 if (boot_cpu) { 510 /* Look for supported page sizes */ 511 setup_page_sizes(); 512 513 /* Look for HW tablewalk support */ 514 setup_mmu_htw(); 515 } 516 517 /* Set MAS4 based on page table setting */ 518 519 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 520 if (book3e_htw_enabled) { 521 mas4 |= mas4 | MAS4_INDD; 522#ifdef CONFIG_PPC_64K_PAGES 523 mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; 524 mmu_pte_psize = MMU_PAGE_256M; 525#else 526 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; 527 mmu_pte_psize = MMU_PAGE_1M; 528#endif 529 } else { 530#ifdef CONFIG_PPC_64K_PAGES 531 mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; 532#else 533 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; 534#endif 535 mmu_pte_psize = mmu_virtual_psize; 536 } 537 mtspr(SPRN_MAS4, mas4); 538 539 /* Set the global containing the top of the linear mapping 540 * for use by the TLB miss code 541 */ 542 linear_map_top = memblock_end_of_DRAM(); 543 544#ifdef CONFIG_PPC_FSL_BOOK3E 545 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { 546 unsigned int num_cams; 547 548 /* use a quarter of the TLBCAM for bolted linear map */ 549 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; 550 linear_map_top = map_mem_in_cams(linear_map_top, num_cams); 551 552 /* limit memory so we dont have linear faults */ 553 memblock_enforce_memory_limit(linear_map_top); 554 memblock_analyze(); 555 } 556#endif 557 558 /* A sync won't hurt us after mucking around with 559 * the MMU configuration 560 */ 561 mb(); 562 563 memblock_set_current_limit(linear_map_top); 564} 565 566void __init early_init_mmu(void) 567{ 568 __early_init_mmu(1); 569} 570 571void __cpuinit early_init_mmu_secondary(void) 572{ 573 __early_init_mmu(0); 574} 575 576void setup_initial_memory_limit(phys_addr_t first_memblock_base, 577 phys_addr_t first_memblock_size) 578{ 579 /* On Embedded 64-bit, we adjust the RMA size to match 580 * the bolted TLB entry. We know for now that only 1G 581 * entries are supported though that may eventually 582 * change. We crop it to the size of the first MEMBLOCK to 583 * avoid going over total available memory just in case... 584 */ 585 ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); 586 587 /* Finally limit subsequent allocations */ 588 memblock_set_current_limit(first_memblock_base + ppc64_rma_size); 589} 590#endif /* CONFIG_PPC64 */