Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.10-rc2 661 lines 17 kB view raw
1/* 2 * native hashtable management. 3 * 4 * SMP scalability work: 5 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13#undef DEBUG_LOW 14 15#include <linux/spinlock.h> 16#include <linux/bitops.h> 17#include <linux/of.h> 18#include <linux/threads.h> 19#include <linux/smp.h> 20 21#include <asm/machdep.h> 22#include <asm/mmu.h> 23#include <asm/mmu_context.h> 24#include <asm/pgtable.h> 25#include <asm/tlbflush.h> 26#include <asm/tlb.h> 27#include <asm/cputable.h> 28#include <asm/udbg.h> 29#include <asm/kexec.h> 30#include <asm/ppc-opcode.h> 31 32#ifdef DEBUG_LOW 33#define DBG_LOW(fmt...) udbg_printf(fmt) 34#else 35#define DBG_LOW(fmt...) 36#endif 37 38#define HPTE_LOCK_BIT 3 39 40DEFINE_RAW_SPINLOCK(native_tlbie_lock); 41 42static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) 43{ 44 unsigned long va; 45 unsigned int penc; 46 47 /* 48 * We need 14 to 65 bits of va for a tlibe of 4K page 49 * With vpn we ignore the lower VPN_SHIFT bits already. 50 * And top two bits are already ignored because we can 51 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT 52 * of 12. 53 */ 54 va = vpn << VPN_SHIFT; 55 /* 56 * clear top 16 bits of 64bit va, non SLS segment 57 * Older versions of the architecture (2.02 and earler) require the 58 * masking of the top 16 bits. 59 */ 60 va &= ~(0xffffULL << 48); 61 62 switch (psize) { 63 case MMU_PAGE_4K: 64 /* clear out bits after (52) [0....52.....63] */ 65 va &= ~((1ul << (64 - 52)) - 1); 66 va |= ssize << 8; 67 va |= mmu_psize_defs[apsize].sllp << 6; 68 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) 69 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 70 : "memory"); 71 break; 72 default: 73 /* We need 14 to 14 + i bits of va */ 74 penc = mmu_psize_defs[psize].penc[apsize]; 75 va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); 76 va |= penc << 12; 77 va |= ssize << 8; 78 /* Add AVAL part */ 79 if (psize != apsize) { 80 /* 81 * MPSS, 64K base page size and 16MB parge page size 82 * We don't need all the bits, but rest of the bits 83 * must be ignored by the processor. 84 * vpn cover upto 65 bits of va. (0...65) and we need 85 * 58..64 bits of va. 86 */ 87 va |= (vpn & 0xfe); 88 } 89 va |= 1; /* L */ 90 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) 91 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 92 : "memory"); 93 break; 94 } 95} 96 97static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) 98{ 99 unsigned long va; 100 unsigned int penc; 101 102 /* VPN_SHIFT can be atmost 12 */ 103 va = vpn << VPN_SHIFT; 104 /* 105 * clear top 16 bits of 64 bit va, non SLS segment 106 * Older versions of the architecture (2.02 and earler) require the 107 * masking of the top 16 bits. 108 */ 109 va &= ~(0xffffULL << 48); 110 111 switch (psize) { 112 case MMU_PAGE_4K: 113 /* clear out bits after(52) [0....52.....63] */ 114 va &= ~((1ul << (64 - 52)) - 1); 115 va |= ssize << 8; 116 va |= mmu_psize_defs[apsize].sllp << 6; 117 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" 118 : : "r"(va) : "memory"); 119 break; 120 default: 121 /* We need 14 to 14 + i bits of va */ 122 penc = mmu_psize_defs[psize].penc[apsize]; 123 va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); 124 va |= penc << 12; 125 va |= ssize << 8; 126 /* Add AVAL part */ 127 if (psize != apsize) { 128 /* 129 * MPSS, 64K base page size and 16MB parge page size 130 * We don't need all the bits, but rest of the bits 131 * must be ignored by the processor. 132 * vpn cover upto 65 bits of va. (0...65) and we need 133 * 58..64 bits of va. 134 */ 135 va |= (vpn & 0xfe); 136 } 137 va |= 1; /* L */ 138 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" 139 : : "r"(va) : "memory"); 140 break; 141 } 142 143} 144 145static inline void tlbie(unsigned long vpn, int psize, int apsize, 146 int ssize, int local) 147{ 148 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); 149 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 150 151 if (use_local) 152 use_local = mmu_psize_defs[psize].tlbiel; 153 if (lock_tlbie && !use_local) 154 raw_spin_lock(&native_tlbie_lock); 155 asm volatile("ptesync": : :"memory"); 156 if (use_local) { 157 __tlbiel(vpn, psize, apsize, ssize); 158 asm volatile("ptesync": : :"memory"); 159 } else { 160 __tlbie(vpn, psize, apsize, ssize); 161 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 162 } 163 if (lock_tlbie && !use_local) 164 raw_spin_unlock(&native_tlbie_lock); 165} 166 167static inline void native_lock_hpte(struct hash_pte *hptep) 168{ 169 unsigned long *word = &hptep->v; 170 171 while (1) { 172 if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) 173 break; 174 while(test_bit(HPTE_LOCK_BIT, word)) 175 cpu_relax(); 176 } 177} 178 179static inline void native_unlock_hpte(struct hash_pte *hptep) 180{ 181 unsigned long *word = &hptep->v; 182 183 clear_bit_unlock(HPTE_LOCK_BIT, word); 184} 185 186static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, 187 unsigned long pa, unsigned long rflags, 188 unsigned long vflags, int psize, int apsize, int ssize) 189{ 190 struct hash_pte *hptep = htab_address + hpte_group; 191 unsigned long hpte_v, hpte_r; 192 int i; 193 194 if (!(vflags & HPTE_V_BOLTED)) { 195 DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," 196 " rflags=%lx, vflags=%lx, psize=%d)\n", 197 hpte_group, vpn, pa, rflags, vflags, psize); 198 } 199 200 for (i = 0; i < HPTES_PER_GROUP; i++) { 201 if (! (hptep->v & HPTE_V_VALID)) { 202 /* retry with lock held */ 203 native_lock_hpte(hptep); 204 if (! (hptep->v & HPTE_V_VALID)) 205 break; 206 native_unlock_hpte(hptep); 207 } 208 209 hptep++; 210 } 211 212 if (i == HPTES_PER_GROUP) 213 return -1; 214 215 hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; 216 hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; 217 218 if (!(vflags & HPTE_V_BOLTED)) { 219 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", 220 i, hpte_v, hpte_r); 221 } 222 223 hptep->r = hpte_r; 224 /* Guarantee the second dword is visible before the valid bit */ 225 eieio(); 226 /* 227 * Now set the first dword including the valid bit 228 * NOTE: this also unlocks the hpte 229 */ 230 hptep->v = hpte_v; 231 232 __asm__ __volatile__ ("ptesync" : : : "memory"); 233 234 return i | (!!(vflags & HPTE_V_SECONDARY) << 3); 235} 236 237static long native_hpte_remove(unsigned long hpte_group) 238{ 239 struct hash_pte *hptep; 240 int i; 241 int slot_offset; 242 unsigned long hpte_v; 243 244 DBG_LOW(" remove(group=%lx)\n", hpte_group); 245 246 /* pick a random entry to start at */ 247 slot_offset = mftb() & 0x7; 248 249 for (i = 0; i < HPTES_PER_GROUP; i++) { 250 hptep = htab_address + hpte_group + slot_offset; 251 hpte_v = hptep->v; 252 253 if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { 254 /* retry with lock held */ 255 native_lock_hpte(hptep); 256 hpte_v = hptep->v; 257 if ((hpte_v & HPTE_V_VALID) 258 && !(hpte_v & HPTE_V_BOLTED)) 259 break; 260 native_unlock_hpte(hptep); 261 } 262 263 slot_offset++; 264 slot_offset &= 0x7; 265 } 266 267 if (i == HPTES_PER_GROUP) 268 return -1; 269 270 /* Invalidate the hpte. NOTE: this also unlocks it */ 271 hptep->v = 0; 272 273 return i; 274} 275 276static inline int __hpte_actual_psize(unsigned int lp, int psize) 277{ 278 int i, shift; 279 unsigned int mask; 280 281 /* start from 1 ignoring MMU_PAGE_4K */ 282 for (i = 1; i < MMU_PAGE_COUNT; i++) { 283 284 /* invalid penc */ 285 if (mmu_psize_defs[psize].penc[i] == -1) 286 continue; 287 /* 288 * encoding bits per actual page size 289 * PTE LP actual page size 290 * rrrr rrrz >=8KB 291 * rrrr rrzz >=16KB 292 * rrrr rzzz >=32KB 293 * rrrr zzzz >=64KB 294 * ....... 295 */ 296 shift = mmu_psize_defs[i].shift - LP_SHIFT; 297 if (shift > LP_BITS) 298 shift = LP_BITS; 299 mask = (1 << shift) - 1; 300 if ((lp & mask) == mmu_psize_defs[psize].penc[i]) 301 return i; 302 } 303 return -1; 304} 305 306static inline int hpte_actual_psize(struct hash_pte *hptep, int psize) 307{ 308 /* Look at the 8 bit LP value */ 309 unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); 310 311 if (!(hptep->v & HPTE_V_VALID)) 312 return -1; 313 314 /* First check if it is large page */ 315 if (!(hptep->v & HPTE_V_LARGE)) 316 return MMU_PAGE_4K; 317 318 return __hpte_actual_psize(lp, psize); 319} 320 321static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, 322 unsigned long vpn, int psize, int ssize, 323 int local) 324{ 325 struct hash_pte *hptep = htab_address + slot; 326 unsigned long hpte_v, want_v; 327 int ret = 0; 328 int actual_psize; 329 330 want_v = hpte_encode_avpn(vpn, psize, ssize); 331 332 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", 333 vpn, want_v & HPTE_V_AVPN, slot, newpp); 334 335 native_lock_hpte(hptep); 336 337 hpte_v = hptep->v; 338 actual_psize = hpte_actual_psize(hptep, psize); 339 if (actual_psize < 0) { 340 native_unlock_hpte(hptep); 341 return -1; 342 } 343 /* Even if we miss, we need to invalidate the TLB */ 344 if (!HPTE_V_COMPARE(hpte_v, want_v)) { 345 DBG_LOW(" -> miss\n"); 346 ret = -1; 347 } else { 348 DBG_LOW(" -> hit\n"); 349 /* Update the HPTE */ 350 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 351 (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); 352 } 353 native_unlock_hpte(hptep); 354 355 /* Ensure it is out of the tlb too. */ 356 tlbie(vpn, psize, actual_psize, ssize, local); 357 358 return ret; 359} 360 361static long native_hpte_find(unsigned long vpn, int psize, int ssize) 362{ 363 struct hash_pte *hptep; 364 unsigned long hash; 365 unsigned long i; 366 long slot; 367 unsigned long want_v, hpte_v; 368 369 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 370 want_v = hpte_encode_avpn(vpn, psize, ssize); 371 372 /* Bolted mappings are only ever in the primary group */ 373 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 374 for (i = 0; i < HPTES_PER_GROUP; i++) { 375 hptep = htab_address + slot; 376 hpte_v = hptep->v; 377 378 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 379 /* HPTE matches */ 380 return slot; 381 ++slot; 382 } 383 384 return -1; 385} 386 387/* 388 * Update the page protection bits. Intended to be used to create 389 * guard pages for kernel data structures on pages which are bolted 390 * in the HPT. Assumes pages being operated on will not be stolen. 391 * 392 * No need to lock here because we should be the only user. 393 */ 394static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, 395 int psize, int ssize) 396{ 397 int actual_psize; 398 unsigned long vpn; 399 unsigned long vsid; 400 long slot; 401 struct hash_pte *hptep; 402 403 vsid = get_kernel_vsid(ea, ssize); 404 vpn = hpt_vpn(ea, vsid, ssize); 405 406 slot = native_hpte_find(vpn, psize, ssize); 407 if (slot == -1) 408 panic("could not find page to bolt\n"); 409 hptep = htab_address + slot; 410 actual_psize = hpte_actual_psize(hptep, psize); 411 if (actual_psize < 0) 412 return; 413 414 /* Update the HPTE */ 415 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 416 (newpp & (HPTE_R_PP | HPTE_R_N)); 417 418 /* Ensure it is out of the tlb too. */ 419 tlbie(vpn, psize, actual_psize, ssize, 0); 420} 421 422static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, 423 int psize, int ssize, int local) 424{ 425 struct hash_pte *hptep = htab_address + slot; 426 unsigned long hpte_v; 427 unsigned long want_v; 428 unsigned long flags; 429 int actual_psize; 430 431 local_irq_save(flags); 432 433 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); 434 435 want_v = hpte_encode_avpn(vpn, psize, ssize); 436 native_lock_hpte(hptep); 437 hpte_v = hptep->v; 438 439 actual_psize = hpte_actual_psize(hptep, psize); 440 if (actual_psize < 0) { 441 native_unlock_hpte(hptep); 442 local_irq_restore(flags); 443 return; 444 } 445 /* Even if we miss, we need to invalidate the TLB */ 446 if (!HPTE_V_COMPARE(hpte_v, want_v)) 447 native_unlock_hpte(hptep); 448 else 449 /* Invalidate the hpte. NOTE: this also unlocks it */ 450 hptep->v = 0; 451 452 /* Invalidate the TLB */ 453 tlbie(vpn, psize, actual_psize, ssize, local); 454 455 local_irq_restore(flags); 456} 457 458static void hpte_decode(struct hash_pte *hpte, unsigned long slot, 459 int *psize, int *apsize, int *ssize, unsigned long *vpn) 460{ 461 unsigned long avpn, pteg, vpi; 462 unsigned long hpte_v = hpte->v; 463 unsigned long vsid, seg_off; 464 int size, a_size, shift; 465 /* Look at the 8 bit LP value */ 466 unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); 467 468 if (!(hpte_v & HPTE_V_LARGE)) { 469 size = MMU_PAGE_4K; 470 a_size = MMU_PAGE_4K; 471 } else { 472 for (size = 0; size < MMU_PAGE_COUNT; size++) { 473 474 /* valid entries have a shift value */ 475 if (!mmu_psize_defs[size].shift) 476 continue; 477 478 a_size = __hpte_actual_psize(lp, size); 479 if (a_size != -1) 480 break; 481 } 482 } 483 /* This works for all page sizes, and for 256M and 1T segments */ 484 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; 485 shift = mmu_psize_defs[size].shift; 486 487 avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); 488 pteg = slot / HPTES_PER_GROUP; 489 if (hpte_v & HPTE_V_SECONDARY) 490 pteg = ~pteg; 491 492 switch (*ssize) { 493 case MMU_SEGSIZE_256M: 494 /* We only have 28 - 23 bits of seg_off in avpn */ 495 seg_off = (avpn & 0x1f) << 23; 496 vsid = avpn >> 5; 497 /* We can find more bits from the pteg value */ 498 if (shift < 23) { 499 vpi = (vsid ^ pteg) & htab_hash_mask; 500 seg_off |= vpi << shift; 501 } 502 *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; 503 case MMU_SEGSIZE_1T: 504 /* We only have 40 - 23 bits of seg_off in avpn */ 505 seg_off = (avpn & 0x1ffff) << 23; 506 vsid = avpn >> 17; 507 if (shift < 23) { 508 vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; 509 seg_off |= vpi << shift; 510 } 511 *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; 512 default: 513 *vpn = size = 0; 514 } 515 *psize = size; 516 *apsize = a_size; 517} 518 519/* 520 * clear all mappings on kexec. All cpus are in real mode (or they will 521 * be when they isi), and we are the only one left. We rely on our kernel 522 * mapping being 0xC0's and the hardware ignoring those two real bits. 523 * 524 * TODO: add batching support when enabled. remember, no dynamic memory here, 525 * athough there is the control page available... 526 */ 527static void native_hpte_clear(void) 528{ 529 unsigned long vpn = 0; 530 unsigned long slot, slots, flags; 531 struct hash_pte *hptep = htab_address; 532 unsigned long hpte_v; 533 unsigned long pteg_count; 534 int psize, apsize, ssize; 535 536 pteg_count = htab_hash_mask + 1; 537 538 local_irq_save(flags); 539 540 /* we take the tlbie lock and hold it. Some hardware will 541 * deadlock if we try to tlbie from two processors at once. 542 */ 543 raw_spin_lock(&native_tlbie_lock); 544 545 slots = pteg_count * HPTES_PER_GROUP; 546 547 for (slot = 0; slot < slots; slot++, hptep++) { 548 /* 549 * we could lock the pte here, but we are the only cpu 550 * running, right? and for crash dump, we probably 551 * don't want to wait for a maybe bad cpu. 552 */ 553 hpte_v = hptep->v; 554 555 /* 556 * Call __tlbie() here rather than tlbie() since we 557 * already hold the native_tlbie_lock. 558 */ 559 if (hpte_v & HPTE_V_VALID) { 560 hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); 561 hptep->v = 0; 562 __tlbie(vpn, psize, apsize, ssize); 563 } 564 } 565 566 asm volatile("eieio; tlbsync; ptesync":::"memory"); 567 raw_spin_unlock(&native_tlbie_lock); 568 local_irq_restore(flags); 569} 570 571/* 572 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing 573 * the lock all the time 574 */ 575static void native_flush_hash_range(unsigned long number, int local) 576{ 577 unsigned long vpn; 578 unsigned long hash, index, hidx, shift, slot; 579 struct hash_pte *hptep; 580 unsigned long hpte_v; 581 unsigned long want_v; 582 unsigned long flags; 583 real_pte_t pte; 584 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 585 unsigned long psize = batch->psize; 586 int ssize = batch->ssize; 587 int i; 588 589 local_irq_save(flags); 590 591 for (i = 0; i < number; i++) { 592 vpn = batch->vpn[i]; 593 pte = batch->pte[i]; 594 595 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 596 hash = hpt_hash(vpn, shift, ssize); 597 hidx = __rpte_to_hidx(pte, index); 598 if (hidx & _PTEIDX_SECONDARY) 599 hash = ~hash; 600 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 601 slot += hidx & _PTEIDX_GROUP_IX; 602 hptep = htab_address + slot; 603 want_v = hpte_encode_avpn(vpn, psize, ssize); 604 native_lock_hpte(hptep); 605 hpte_v = hptep->v; 606 if (!HPTE_V_COMPARE(hpte_v, want_v) || 607 !(hpte_v & HPTE_V_VALID)) 608 native_unlock_hpte(hptep); 609 else 610 hptep->v = 0; 611 } pte_iterate_hashed_end(); 612 } 613 614 if (mmu_has_feature(MMU_FTR_TLBIEL) && 615 mmu_psize_defs[psize].tlbiel && local) { 616 asm volatile("ptesync":::"memory"); 617 for (i = 0; i < number; i++) { 618 vpn = batch->vpn[i]; 619 pte = batch->pte[i]; 620 621 pte_iterate_hashed_subpages(pte, psize, 622 vpn, index, shift) { 623 __tlbiel(vpn, psize, psize, ssize); 624 } pte_iterate_hashed_end(); 625 } 626 asm volatile("ptesync":::"memory"); 627 } else { 628 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 629 630 if (lock_tlbie) 631 raw_spin_lock(&native_tlbie_lock); 632 633 asm volatile("ptesync":::"memory"); 634 for (i = 0; i < number; i++) { 635 vpn = batch->vpn[i]; 636 pte = batch->pte[i]; 637 638 pte_iterate_hashed_subpages(pte, psize, 639 vpn, index, shift) { 640 __tlbie(vpn, psize, psize, ssize); 641 } pte_iterate_hashed_end(); 642 } 643 asm volatile("eieio; tlbsync; ptesync":::"memory"); 644 645 if (lock_tlbie) 646 raw_spin_unlock(&native_tlbie_lock); 647 } 648 649 local_irq_restore(flags); 650} 651 652void __init hpte_init_native(void) 653{ 654 ppc_md.hpte_invalidate = native_hpte_invalidate; 655 ppc_md.hpte_updatepp = native_hpte_updatepp; 656 ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; 657 ppc_md.hpte_insert = native_hpte_insert; 658 ppc_md.hpte_remove = native_hpte_remove; 659 ppc_md.hpte_clear_all = native_hpte_clear; 660 ppc_md.flush_hash_range = native_flush_hash_range; 661}