Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.35-rc3 447 lines 11 kB view raw
1/* 2 * This file contains the routines for TLB flushing. 3 * On machines where the MMU does not use a hash table to store virtual to 4 * physical translations (ie, SW loaded TLBs or Book3E compilant processors, 5 * this does -not- include 603 however which shares the implementation with 6 * hash based processors) 7 * 8 * -- BenH 9 * 10 * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org> 11 * IBM Corp. 12 * 13 * Derived from arch/ppc/mm/init.c: 14 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 15 * 16 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 17 * and Cort Dougan (PReP) (cort@cs.nmt.edu) 18 * Copyright (C) 1996 Paul Mackerras 19 * 20 * Derived from "arch/i386/mm/init.c" 21 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 22 * 23 * This program is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU General Public License 25 * as published by the Free Software Foundation; either version 26 * 2 of the License, or (at your option) any later version. 27 * 28 */ 29 30#include <linux/kernel.h> 31#include <linux/mm.h> 32#include <linux/init.h> 33#include <linux/highmem.h> 34#include <linux/pagemap.h> 35#include <linux/preempt.h> 36#include <linux/spinlock.h> 37#include <linux/lmb.h> 38 39#include <asm/tlbflush.h> 40#include <asm/tlb.h> 41#include <asm/code-patching.h> 42 43#include "mmu_decl.h" 44 45#ifdef CONFIG_PPC_BOOK3E 46struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 47 [MMU_PAGE_4K] = { 48 .shift = 12, 49 .enc = BOOK3E_PAGESZ_4K, 50 }, 51 [MMU_PAGE_16K] = { 52 .shift = 14, 53 .enc = BOOK3E_PAGESZ_16K, 54 }, 55 [MMU_PAGE_64K] = { 56 .shift = 16, 57 .enc = BOOK3E_PAGESZ_64K, 58 }, 59 [MMU_PAGE_1M] = { 60 .shift = 20, 61 .enc = BOOK3E_PAGESZ_1M, 62 }, 63 [MMU_PAGE_16M] = { 64 .shift = 24, 65 .enc = BOOK3E_PAGESZ_16M, 66 }, 67 [MMU_PAGE_256M] = { 68 .shift = 28, 69 .enc = BOOK3E_PAGESZ_256M, 70 }, 71 [MMU_PAGE_1G] = { 72 .shift = 30, 73 .enc = BOOK3E_PAGESZ_1GB, 74 }, 75}; 76static inline int mmu_get_tsize(int psize) 77{ 78 return mmu_psize_defs[psize].enc; 79} 80#else 81static inline int mmu_get_tsize(int psize) 82{ 83 /* This isn't used on !Book3E for now */ 84 return 0; 85} 86#endif 87 88/* The variables below are currently only used on 64-bit Book3E 89 * though this will probably be made common with other nohash 90 * implementations at some point 91 */ 92#ifdef CONFIG_PPC64 93 94int mmu_linear_psize; /* Page size used for the linear mapping */ 95int mmu_pte_psize; /* Page size used for PTE pages */ 96int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ 97int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ 98unsigned long linear_map_top; /* Top of linear mapping */ 99 100#endif /* CONFIG_PPC64 */ 101 102/* 103 * Base TLB flushing operations: 104 * 105 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 106 * - flush_tlb_page(vma, vmaddr) flushes one page 107 * - flush_tlb_range(vma, start, end) flushes a range of pages 108 * - flush_tlb_kernel_range(start, end) flushes kernel pages 109 * 110 * - local_* variants of page and mm only apply to the current 111 * processor 112 */ 113 114/* 115 * These are the base non-SMP variants of page and mm flushing 116 */ 117void local_flush_tlb_mm(struct mm_struct *mm) 118{ 119 unsigned int pid; 120 121 preempt_disable(); 122 pid = mm->context.id; 123 if (pid != MMU_NO_CONTEXT) 124 _tlbil_pid(pid); 125 preempt_enable(); 126} 127EXPORT_SYMBOL(local_flush_tlb_mm); 128 129void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 130 int tsize, int ind) 131{ 132 unsigned int pid; 133 134 preempt_disable(); 135 pid = mm ? mm->context.id : 0; 136 if (pid != MMU_NO_CONTEXT) 137 _tlbil_va(vmaddr, pid, tsize, ind); 138 preempt_enable(); 139} 140 141void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 142{ 143 __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 144 mmu_get_tsize(mmu_virtual_psize), 0); 145} 146EXPORT_SYMBOL(local_flush_tlb_page); 147 148/* 149 * And here are the SMP non-local implementations 150 */ 151#ifdef CONFIG_SMP 152 153static DEFINE_RAW_SPINLOCK(tlbivax_lock); 154 155static int mm_is_core_local(struct mm_struct *mm) 156{ 157 return cpumask_subset(mm_cpumask(mm), 158 topology_thread_cpumask(smp_processor_id())); 159} 160 161struct tlb_flush_param { 162 unsigned long addr; 163 unsigned int pid; 164 unsigned int tsize; 165 unsigned int ind; 166}; 167 168static void do_flush_tlb_mm_ipi(void *param) 169{ 170 struct tlb_flush_param *p = param; 171 172 _tlbil_pid(p ? p->pid : 0); 173} 174 175static void do_flush_tlb_page_ipi(void *param) 176{ 177 struct tlb_flush_param *p = param; 178 179 _tlbil_va(p->addr, p->pid, p->tsize, p->ind); 180} 181 182 183/* Note on invalidations and PID: 184 * 185 * We snapshot the PID with preempt disabled. At this point, it can still 186 * change either because: 187 * - our context is being stolen (PID -> NO_CONTEXT) on another CPU 188 * - we are invaliating some target that isn't currently running here 189 * and is concurrently acquiring a new PID on another CPU 190 * - some other CPU is re-acquiring a lost PID for this mm 191 * etc... 192 * 193 * However, this shouldn't be a problem as we only guarantee 194 * invalidation of TLB entries present prior to this call, so we 195 * don't care about the PID changing, and invalidating a stale PID 196 * is generally harmless. 197 */ 198 199void flush_tlb_mm(struct mm_struct *mm) 200{ 201 unsigned int pid; 202 203 preempt_disable(); 204 pid = mm->context.id; 205 if (unlikely(pid == MMU_NO_CONTEXT)) 206 goto no_context; 207 if (!mm_is_core_local(mm)) { 208 struct tlb_flush_param p = { .pid = pid }; 209 /* Ignores smp_processor_id() even if set. */ 210 smp_call_function_many(mm_cpumask(mm), 211 do_flush_tlb_mm_ipi, &p, 1); 212 } 213 _tlbil_pid(pid); 214 no_context: 215 preempt_enable(); 216} 217EXPORT_SYMBOL(flush_tlb_mm); 218 219void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 220 int tsize, int ind) 221{ 222 struct cpumask *cpu_mask; 223 unsigned int pid; 224 225 preempt_disable(); 226 pid = mm ? mm->context.id : 0; 227 if (unlikely(pid == MMU_NO_CONTEXT)) 228 goto bail; 229 cpu_mask = mm_cpumask(mm); 230 if (!mm_is_core_local(mm)) { 231 /* If broadcast tlbivax is supported, use it */ 232 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { 233 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); 234 if (lock) 235 raw_spin_lock(&tlbivax_lock); 236 _tlbivax_bcast(vmaddr, pid, tsize, ind); 237 if (lock) 238 raw_spin_unlock(&tlbivax_lock); 239 goto bail; 240 } else { 241 struct tlb_flush_param p = { 242 .pid = pid, 243 .addr = vmaddr, 244 .tsize = tsize, 245 .ind = ind, 246 }; 247 /* Ignores smp_processor_id() even if set in cpu_mask */ 248 smp_call_function_many(cpu_mask, 249 do_flush_tlb_page_ipi, &p, 1); 250 } 251 } 252 _tlbil_va(vmaddr, pid, tsize, ind); 253 bail: 254 preempt_enable(); 255} 256 257void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 258{ 259 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 260 mmu_get_tsize(mmu_virtual_psize), 0); 261} 262EXPORT_SYMBOL(flush_tlb_page); 263 264#endif /* CONFIG_SMP */ 265 266/* 267 * Flush kernel TLB entries in the given range 268 */ 269void flush_tlb_kernel_range(unsigned long start, unsigned long end) 270{ 271#ifdef CONFIG_SMP 272 preempt_disable(); 273 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); 274 _tlbil_pid(0); 275 preempt_enable(); 276#else 277 _tlbil_pid(0); 278#endif 279} 280EXPORT_SYMBOL(flush_tlb_kernel_range); 281 282/* 283 * Currently, for range flushing, we just do a full mm flush. This should 284 * be optimized based on a threshold on the size of the range, since 285 * some implementation can stack multiple tlbivax before a tlbsync but 286 * for now, we keep it that way 287 */ 288void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 289 unsigned long end) 290 291{ 292 flush_tlb_mm(vma->vm_mm); 293} 294EXPORT_SYMBOL(flush_tlb_range); 295 296void tlb_flush(struct mmu_gather *tlb) 297{ 298 flush_tlb_mm(tlb->mm); 299 300 /* Push out batch of freed page tables */ 301 pte_free_finish(); 302} 303 304/* 305 * Below are functions specific to the 64-bit variant of Book3E though that 306 * may change in the future 307 */ 308 309#ifdef CONFIG_PPC64 310 311/* 312 * Handling of virtual linear page tables or indirect TLB entries 313 * flushing when PTE pages are freed 314 */ 315void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) 316{ 317 int tsize = mmu_psize_defs[mmu_pte_psize].enc; 318 319 if (book3e_htw_enabled) { 320 unsigned long start = address & PMD_MASK; 321 unsigned long end = address + PMD_SIZE; 322 unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; 323 324 /* This isn't the most optimal, ideally we would factor out the 325 * while preempt & CPU mask mucking around, or even the IPI but 326 * it will do for now 327 */ 328 while (start < end) { 329 __flush_tlb_page(tlb->mm, start, tsize, 1); 330 start += size; 331 } 332 } else { 333 unsigned long rmask = 0xf000000000000000ul; 334 unsigned long rid = (address & rmask) | 0x1000000000000000ul; 335 unsigned long vpte = address & ~rmask; 336 337#ifdef CONFIG_PPC_64K_PAGES 338 vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; 339#else 340 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; 341#endif 342 vpte |= rid; 343 __flush_tlb_page(tlb->mm, vpte, tsize, 0); 344 } 345} 346 347/* 348 * Early initialization of the MMU TLB code 349 */ 350static void __early_init_mmu(int boot_cpu) 351{ 352 extern unsigned int interrupt_base_book3e; 353 extern unsigned int exc_data_tlb_miss_htw_book3e; 354 extern unsigned int exc_instruction_tlb_miss_htw_book3e; 355 356 unsigned int *ibase = &interrupt_base_book3e; 357 unsigned int mas4; 358 359 /* XXX This will have to be decided at runtime, but right 360 * now our boot and TLB miss code hard wires it. Ideally 361 * we should find out a suitable page size and patch the 362 * TLB miss code (either that or use the PACA to store 363 * the value we want) 364 */ 365 mmu_linear_psize = MMU_PAGE_1G; 366 367 /* XXX This should be decided at runtime based on supported 368 * page sizes in the TLB, but for now let's assume 16M is 369 * always there and a good fit (which it probably is) 370 */ 371 mmu_vmemmap_psize = MMU_PAGE_16M; 372 373 /* Check if HW tablewalk is present, and if yes, enable it by: 374 * 375 * - patching the TLB miss handlers to branch to the 376 * one dedicates to it 377 * 378 * - setting the global book3e_htw_enabled 379 * 380 * - Set MAS4:INDD and default page size 381 */ 382 383 /* XXX This code only checks for TLB 0 capabilities and doesn't 384 * check what page size combos are supported by the HW. It 385 * also doesn't handle the case where a separate array holds 386 * the IND entries from the array loaded by the PT. 387 */ 388 if (boot_cpu) { 389 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); 390 391 /* Check if HW loader is supported */ 392 if ((tlb0cfg & TLBnCFG_IND) && 393 (tlb0cfg & TLBnCFG_PT)) { 394 patch_branch(ibase + (0x1c0 / 4), 395 (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); 396 patch_branch(ibase + (0x1e0 / 4), 397 (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); 398 book3e_htw_enabled = 1; 399 } 400 pr_info("MMU: Book3E Page Tables %s\n", 401 book3e_htw_enabled ? "Enabled" : "Disabled"); 402 } 403 404 /* Set MAS4 based on page table setting */ 405 406 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 407 if (book3e_htw_enabled) { 408 mas4 |= mas4 | MAS4_INDD; 409#ifdef CONFIG_PPC_64K_PAGES 410 mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; 411 mmu_pte_psize = MMU_PAGE_256M; 412#else 413 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; 414 mmu_pte_psize = MMU_PAGE_1M; 415#endif 416 } else { 417#ifdef CONFIG_PPC_64K_PAGES 418 mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; 419#else 420 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; 421#endif 422 mmu_pte_psize = mmu_virtual_psize; 423 } 424 mtspr(SPRN_MAS4, mas4); 425 426 /* Set the global containing the top of the linear mapping 427 * for use by the TLB miss code 428 */ 429 linear_map_top = lmb_end_of_DRAM(); 430 431 /* A sync won't hurt us after mucking around with 432 * the MMU configuration 433 */ 434 mb(); 435} 436 437void __init early_init_mmu(void) 438{ 439 __early_init_mmu(1); 440} 441 442void __cpuinit early_init_mmu_secondary(void) 443{ 444 __early_init_mmu(0); 445} 446 447#endif /* CONFIG_PPC64 */