Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.7 581 lines 15 kB view raw
1/* 2 * native hashtable management. 3 * 4 * SMP scalability work: 5 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13#undef DEBUG_LOW 14 15#include <linux/spinlock.h> 16#include <linux/bitops.h> 17#include <linux/of.h> 18#include <linux/threads.h> 19#include <linux/smp.h> 20 21#include <asm/machdep.h> 22#include <asm/mmu.h> 23#include <asm/mmu_context.h> 24#include <asm/pgtable.h> 25#include <asm/tlbflush.h> 26#include <asm/tlb.h> 27#include <asm/cputable.h> 28#include <asm/udbg.h> 29#include <asm/kexec.h> 30#include <asm/ppc-opcode.h> 31 32#ifdef DEBUG_LOW 33#define DBG_LOW(fmt...) udbg_printf(fmt) 34#else 35#define DBG_LOW(fmt...) 36#endif 37 38#define HPTE_LOCK_BIT 3 39 40DEFINE_RAW_SPINLOCK(native_tlbie_lock); 41 42static inline void __tlbie(unsigned long vpn, int psize, int ssize) 43{ 44 unsigned long va; 45 unsigned int penc; 46 47 /* 48 * We need 14 to 65 bits of va for a tlibe of 4K page 49 * With vpn we ignore the lower VPN_SHIFT bits already. 50 * And top two bits are already ignored because we can 51 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT 52 * of 12. 53 */ 54 va = vpn << VPN_SHIFT; 55 /* 56 * clear top 16 bits of 64bit va, non SLS segment 57 * Older versions of the architecture (2.02 and earler) require the 58 * masking of the top 16 bits. 59 */ 60 va &= ~(0xffffULL << 48); 61 62 switch (psize) { 63 case MMU_PAGE_4K: 64 va |= ssize << 8; 65 asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) 66 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 67 : "memory"); 68 break; 69 default: 70 /* We need 14 to 14 + i bits of va */ 71 penc = mmu_psize_defs[psize].penc; 72 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 73 va |= penc << 12; 74 va |= ssize << 8; 75 va |= 1; /* L */ 76 asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) 77 : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) 78 : "memory"); 79 break; 80 } 81} 82 83static inline void __tlbiel(unsigned long vpn, int psize, int ssize) 84{ 85 unsigned long va; 86 unsigned int penc; 87 88 /* VPN_SHIFT can be atmost 12 */ 89 va = vpn << VPN_SHIFT; 90 /* 91 * clear top 16 bits of 64 bit va, non SLS segment 92 * Older versions of the architecture (2.02 and earler) require the 93 * masking of the top 16 bits. 94 */ 95 va &= ~(0xffffULL << 48); 96 97 switch (psize) { 98 case MMU_PAGE_4K: 99 va |= ssize << 8; 100 asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" 101 : : "r"(va) : "memory"); 102 break; 103 default: 104 /* We need 14 to 14 + i bits of va */ 105 penc = mmu_psize_defs[psize].penc; 106 va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); 107 va |= penc << 12; 108 va |= ssize << 8; 109 va |= 1; /* L */ 110 asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" 111 : : "r"(va) : "memory"); 112 break; 113 } 114 115} 116 117static inline void tlbie(unsigned long vpn, int psize, int ssize, int local) 118{ 119 unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); 120 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 121 122 if (use_local) 123 use_local = mmu_psize_defs[psize].tlbiel; 124 if (lock_tlbie && !use_local) 125 raw_spin_lock(&native_tlbie_lock); 126 asm volatile("ptesync": : :"memory"); 127 if (use_local) { 128 __tlbiel(vpn, psize, ssize); 129 asm volatile("ptesync": : :"memory"); 130 } else { 131 __tlbie(vpn, psize, ssize); 132 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 133 } 134 if (lock_tlbie && !use_local) 135 raw_spin_unlock(&native_tlbie_lock); 136} 137 138static inline void native_lock_hpte(struct hash_pte *hptep) 139{ 140 unsigned long *word = &hptep->v; 141 142 while (1) { 143 if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) 144 break; 145 while(test_bit(HPTE_LOCK_BIT, word)) 146 cpu_relax(); 147 } 148} 149 150static inline void native_unlock_hpte(struct hash_pte *hptep) 151{ 152 unsigned long *word = &hptep->v; 153 154 clear_bit_unlock(HPTE_LOCK_BIT, word); 155} 156 157static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, 158 unsigned long pa, unsigned long rflags, 159 unsigned long vflags, int psize, int ssize) 160{ 161 struct hash_pte *hptep = htab_address + hpte_group; 162 unsigned long hpte_v, hpte_r; 163 int i; 164 165 if (!(vflags & HPTE_V_BOLTED)) { 166 DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," 167 " rflags=%lx, vflags=%lx, psize=%d)\n", 168 hpte_group, vpn, pa, rflags, vflags, psize); 169 } 170 171 for (i = 0; i < HPTES_PER_GROUP; i++) { 172 if (! (hptep->v & HPTE_V_VALID)) { 173 /* retry with lock held */ 174 native_lock_hpte(hptep); 175 if (! (hptep->v & HPTE_V_VALID)) 176 break; 177 native_unlock_hpte(hptep); 178 } 179 180 hptep++; 181 } 182 183 if (i == HPTES_PER_GROUP) 184 return -1; 185 186 hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; 187 hpte_r = hpte_encode_r(pa, psize) | rflags; 188 189 if (!(vflags & HPTE_V_BOLTED)) { 190 DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", 191 i, hpte_v, hpte_r); 192 } 193 194 hptep->r = hpte_r; 195 /* Guarantee the second dword is visible before the valid bit */ 196 eieio(); 197 /* 198 * Now set the first dword including the valid bit 199 * NOTE: this also unlocks the hpte 200 */ 201 hptep->v = hpte_v; 202 203 __asm__ __volatile__ ("ptesync" : : : "memory"); 204 205 return i | (!!(vflags & HPTE_V_SECONDARY) << 3); 206} 207 208static long native_hpte_remove(unsigned long hpte_group) 209{ 210 struct hash_pte *hptep; 211 int i; 212 int slot_offset; 213 unsigned long hpte_v; 214 215 DBG_LOW(" remove(group=%lx)\n", hpte_group); 216 217 /* pick a random entry to start at */ 218 slot_offset = mftb() & 0x7; 219 220 for (i = 0; i < HPTES_PER_GROUP; i++) { 221 hptep = htab_address + hpte_group + slot_offset; 222 hpte_v = hptep->v; 223 224 if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { 225 /* retry with lock held */ 226 native_lock_hpte(hptep); 227 hpte_v = hptep->v; 228 if ((hpte_v & HPTE_V_VALID) 229 && !(hpte_v & HPTE_V_BOLTED)) 230 break; 231 native_unlock_hpte(hptep); 232 } 233 234 slot_offset++; 235 slot_offset &= 0x7; 236 } 237 238 if (i == HPTES_PER_GROUP) 239 return -1; 240 241 /* Invalidate the hpte. NOTE: this also unlocks it */ 242 hptep->v = 0; 243 244 return i; 245} 246 247static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, 248 unsigned long vpn, int psize, int ssize, 249 int local) 250{ 251 struct hash_pte *hptep = htab_address + slot; 252 unsigned long hpte_v, want_v; 253 int ret = 0; 254 255 want_v = hpte_encode_v(vpn, psize, ssize); 256 257 DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", 258 vpn, want_v & HPTE_V_AVPN, slot, newpp); 259 260 native_lock_hpte(hptep); 261 262 hpte_v = hptep->v; 263 264 /* Even if we miss, we need to invalidate the TLB */ 265 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { 266 DBG_LOW(" -> miss\n"); 267 ret = -1; 268 } else { 269 DBG_LOW(" -> hit\n"); 270 /* Update the HPTE */ 271 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 272 (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); 273 } 274 native_unlock_hpte(hptep); 275 276 /* Ensure it is out of the tlb too. */ 277 tlbie(vpn, psize, ssize, local); 278 279 return ret; 280} 281 282static long native_hpte_find(unsigned long vpn, int psize, int ssize) 283{ 284 struct hash_pte *hptep; 285 unsigned long hash; 286 unsigned long i; 287 long slot; 288 unsigned long want_v, hpte_v; 289 290 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 291 want_v = hpte_encode_v(vpn, psize, ssize); 292 293 /* Bolted mappings are only ever in the primary group */ 294 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 295 for (i = 0; i < HPTES_PER_GROUP; i++) { 296 hptep = htab_address + slot; 297 hpte_v = hptep->v; 298 299 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 300 /* HPTE matches */ 301 return slot; 302 ++slot; 303 } 304 305 return -1; 306} 307 308/* 309 * Update the page protection bits. Intended to be used to create 310 * guard pages for kernel data structures on pages which are bolted 311 * in the HPT. Assumes pages being operated on will not be stolen. 312 * 313 * No need to lock here because we should be the only user. 314 */ 315static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, 316 int psize, int ssize) 317{ 318 unsigned long vpn; 319 unsigned long vsid; 320 long slot; 321 struct hash_pte *hptep; 322 323 vsid = get_kernel_vsid(ea, ssize); 324 vpn = hpt_vpn(ea, vsid, ssize); 325 326 slot = native_hpte_find(vpn, psize, ssize); 327 if (slot == -1) 328 panic("could not find page to bolt\n"); 329 hptep = htab_address + slot; 330 331 /* Update the HPTE */ 332 hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | 333 (newpp & (HPTE_R_PP | HPTE_R_N)); 334 335 /* Ensure it is out of the tlb too. */ 336 tlbie(vpn, psize, ssize, 0); 337} 338 339static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, 340 int psize, int ssize, int local) 341{ 342 struct hash_pte *hptep = htab_address + slot; 343 unsigned long hpte_v; 344 unsigned long want_v; 345 unsigned long flags; 346 347 local_irq_save(flags); 348 349 DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); 350 351 want_v = hpte_encode_v(vpn, psize, ssize); 352 native_lock_hpte(hptep); 353 hpte_v = hptep->v; 354 355 /* Even if we miss, we need to invalidate the TLB */ 356 if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) 357 native_unlock_hpte(hptep); 358 else 359 /* Invalidate the hpte. NOTE: this also unlocks it */ 360 hptep->v = 0; 361 362 /* Invalidate the TLB */ 363 tlbie(vpn, psize, ssize, local); 364 365 local_irq_restore(flags); 366} 367 368#define LP_SHIFT 12 369#define LP_BITS 8 370#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) 371 372static void hpte_decode(struct hash_pte *hpte, unsigned long slot, 373 int *psize, int *ssize, unsigned long *vpn) 374{ 375 unsigned long avpn, pteg, vpi; 376 unsigned long hpte_r = hpte->r; 377 unsigned long hpte_v = hpte->v; 378 unsigned long vsid, seg_off; 379 int i, size, shift, penc; 380 381 if (!(hpte_v & HPTE_V_LARGE)) 382 size = MMU_PAGE_4K; 383 else { 384 for (i = 0; i < LP_BITS; i++) { 385 if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1)) 386 break; 387 } 388 penc = LP_MASK(i+1) >> LP_SHIFT; 389 for (size = 0; size < MMU_PAGE_COUNT; size++) { 390 391 /* 4K pages are not represented by LP */ 392 if (size == MMU_PAGE_4K) 393 continue; 394 395 /* valid entries have a shift value */ 396 if (!mmu_psize_defs[size].shift) 397 continue; 398 399 if (penc == mmu_psize_defs[size].penc) 400 break; 401 } 402 } 403 404 /* This works for all page sizes, and for 256M and 1T segments */ 405 *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; 406 shift = mmu_psize_defs[size].shift; 407 408 avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); 409 pteg = slot / HPTES_PER_GROUP; 410 if (hpte_v & HPTE_V_SECONDARY) 411 pteg = ~pteg; 412 413 switch (*ssize) { 414 case MMU_SEGSIZE_256M: 415 /* We only have 28 - 23 bits of seg_off in avpn */ 416 seg_off = (avpn & 0x1f) << 23; 417 vsid = avpn >> 5; 418 /* We can find more bits from the pteg value */ 419 if (shift < 23) { 420 vpi = (vsid ^ pteg) & htab_hash_mask; 421 seg_off |= vpi << shift; 422 } 423 *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; 424 case MMU_SEGSIZE_1T: 425 /* We only have 40 - 23 bits of seg_off in avpn */ 426 seg_off = (avpn & 0x1ffff) << 23; 427 vsid = avpn >> 17; 428 if (shift < 23) { 429 vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; 430 seg_off |= vpi << shift; 431 } 432 *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; 433 default: 434 *vpn = size = 0; 435 } 436 *psize = size; 437} 438 439/* 440 * clear all mappings on kexec. All cpus are in real mode (or they will 441 * be when they isi), and we are the only one left. We rely on our kernel 442 * mapping being 0xC0's and the hardware ignoring those two real bits. 443 * 444 * TODO: add batching support when enabled. remember, no dynamic memory here, 445 * athough there is the control page available... 446 */ 447static void native_hpte_clear(void) 448{ 449 unsigned long vpn = 0; 450 unsigned long slot, slots, flags; 451 struct hash_pte *hptep = htab_address; 452 unsigned long hpte_v; 453 unsigned long pteg_count; 454 int psize, ssize; 455 456 pteg_count = htab_hash_mask + 1; 457 458 local_irq_save(flags); 459 460 /* we take the tlbie lock and hold it. Some hardware will 461 * deadlock if we try to tlbie from two processors at once. 462 */ 463 raw_spin_lock(&native_tlbie_lock); 464 465 slots = pteg_count * HPTES_PER_GROUP; 466 467 for (slot = 0; slot < slots; slot++, hptep++) { 468 /* 469 * we could lock the pte here, but we are the only cpu 470 * running, right? and for crash dump, we probably 471 * don't want to wait for a maybe bad cpu. 472 */ 473 hpte_v = hptep->v; 474 475 /* 476 * Call __tlbie() here rather than tlbie() since we 477 * already hold the native_tlbie_lock. 478 */ 479 if (hpte_v & HPTE_V_VALID) { 480 hpte_decode(hptep, slot, &psize, &ssize, &vpn); 481 hptep->v = 0; 482 __tlbie(vpn, psize, ssize); 483 } 484 } 485 486 asm volatile("eieio; tlbsync; ptesync":::"memory"); 487 raw_spin_unlock(&native_tlbie_lock); 488 local_irq_restore(flags); 489} 490 491/* 492 * Batched hash table flush, we batch the tlbie's to avoid taking/releasing 493 * the lock all the time 494 */ 495static void native_flush_hash_range(unsigned long number, int local) 496{ 497 unsigned long vpn; 498 unsigned long hash, index, hidx, shift, slot; 499 struct hash_pte *hptep; 500 unsigned long hpte_v; 501 unsigned long want_v; 502 unsigned long flags; 503 real_pte_t pte; 504 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 505 unsigned long psize = batch->psize; 506 int ssize = batch->ssize; 507 int i; 508 509 local_irq_save(flags); 510 511 for (i = 0; i < number; i++) { 512 vpn = batch->vpn[i]; 513 pte = batch->pte[i]; 514 515 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 516 hash = hpt_hash(vpn, shift, ssize); 517 hidx = __rpte_to_hidx(pte, index); 518 if (hidx & _PTEIDX_SECONDARY) 519 hash = ~hash; 520 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 521 slot += hidx & _PTEIDX_GROUP_IX; 522 hptep = htab_address + slot; 523 want_v = hpte_encode_v(vpn, psize, ssize); 524 native_lock_hpte(hptep); 525 hpte_v = hptep->v; 526 if (!HPTE_V_COMPARE(hpte_v, want_v) || 527 !(hpte_v & HPTE_V_VALID)) 528 native_unlock_hpte(hptep); 529 else 530 hptep->v = 0; 531 } pte_iterate_hashed_end(); 532 } 533 534 if (mmu_has_feature(MMU_FTR_TLBIEL) && 535 mmu_psize_defs[psize].tlbiel && local) { 536 asm volatile("ptesync":::"memory"); 537 for (i = 0; i < number; i++) { 538 vpn = batch->vpn[i]; 539 pte = batch->pte[i]; 540 541 pte_iterate_hashed_subpages(pte, psize, 542 vpn, index, shift) { 543 __tlbiel(vpn, psize, ssize); 544 } pte_iterate_hashed_end(); 545 } 546 asm volatile("ptesync":::"memory"); 547 } else { 548 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 549 550 if (lock_tlbie) 551 raw_spin_lock(&native_tlbie_lock); 552 553 asm volatile("ptesync":::"memory"); 554 for (i = 0; i < number; i++) { 555 vpn = batch->vpn[i]; 556 pte = batch->pte[i]; 557 558 pte_iterate_hashed_subpages(pte, psize, 559 vpn, index, shift) { 560 __tlbie(vpn, psize, ssize); 561 } pte_iterate_hashed_end(); 562 } 563 asm volatile("eieio; tlbsync; ptesync":::"memory"); 564 565 if (lock_tlbie) 566 raw_spin_unlock(&native_tlbie_lock); 567 } 568 569 local_irq_restore(flags); 570} 571 572void __init hpte_init_native(void) 573{ 574 ppc_md.hpte_invalidate = native_hpte_invalidate; 575 ppc_md.hpte_updatepp = native_hpte_updatepp; 576 ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; 577 ppc_md.hpte_insert = native_hpte_insert; 578 ppc_md.hpte_remove = native_hpte_remove; 579 ppc_md.hpte_clear_all = native_hpte_clear; 580 ppc_md.flush_hash_range = native_flush_hash_range; 581}