Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: Optimized cache handling for SH-4/SH-4A caches.

This reworks some of the SH-4 cache handling code to more easily
accomodate newer-style caches (particularly for the > direct-mapped
case), as well as optimizing some of the old code.

Signed-off-by: Richard Curnow <richard.curnow@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>

authored by

Richard Curnow and committed by
Paul Mundt
b638d0b9 fdfc74f9

+475 -190
+15 -1
arch/sh/kernel/cpu/init.c
··· 4 4 * CPU init code 5 5 * 6 6 * Copyright (C) 2002, 2003 Paul Mundt 7 + * Copyright (C) 2003 Richard Curnow 7 8 * 8 9 * This file is subject to the terms and conditions of the GNU General Public 9 10 * License. See the file "COPYING" in the main directory of this archive ··· 52 51 ccr = ctrl_inl(CCR); 53 52 54 53 /* 55 - * If the cache is already enabled .. flush it. 54 + * At this point we don't know whether the cache is enabled or not - a 55 + * bootloader may have enabled it. There are at least 2 things that 56 + * could be dirty in the cache at this point: 57 + * 1. kernel command line set up by boot loader 58 + * 2. spilled registers from the prolog of this function 59 + * => before re-initialising the cache, we must do a purge of the whole 60 + * cache out to memory for safety. As long as nothing is spilled 61 + * during the loop to lines that have already been done, this is safe. 62 + * - RPC 56 63 */ 57 64 if (ccr & CCR_CACHE_ENABLE) { 58 65 unsigned long ways, waysize, addrstart; ··· 107 98 /* Force EMODE if possible */ 108 99 if (cpu_data->dcache.ways > 1) 109 100 flags |= CCR_CACHE_EMODE; 101 + else 102 + flags &= ~CCR_CACHE_EMODE; 110 103 #endif 111 104 112 105 #ifdef CONFIG_SH_WRITETHROUGH ··· 123 112 /* Turn on OCRAM -- halve the OC */ 124 113 flags |= CCR_CACHE_ORA; 125 114 cpu_data->dcache.sets >>= 1; 115 + 116 + cpu_data->dcache.way_size = cpu_data->dcache.sets * 117 + cpu_data->dcache.linesz; 126 118 #endif 127 119 128 120 ctrl_outl(flags, CCR);
+11
arch/sh/kernel/cpu/sh4/probe.c
··· 113 113 break; 114 114 } 115 115 116 + #ifdef CONFIG_SH_DIRECT_MAPPED 117 + cpu_data->icache.ways = 1; 118 + cpu_data->dcache.ways = 1; 119 + #endif 120 + 116 121 /* 117 122 * On anything that's not a direct-mapped cache, look to the CVR 118 123 * for I/D-cache specifics. ··· 130 125 (cpu_data->icache.way_incr - (1 << 5)); 131 126 } 132 127 128 + cpu_data->icache.way_size = cpu_data->icache.sets * 129 + cpu_data->icache.linesz; 130 + 133 131 if (cpu_data->dcache.ways > 1) { 134 132 size = sizes[(cvr >> 16) & 0xf]; 135 133 cpu_data->dcache.way_incr = (size >> 1); ··· 140 132 cpu_data->dcache.entry_mask = 141 133 (cpu_data->dcache.way_incr - (1 << 5)); 142 134 } 135 + 136 + cpu_data->dcache.way_size = cpu_data->dcache.sets * 137 + cpu_data->dcache.linesz; 143 138 144 139 return 0; 145 140 }
+430 -87
arch/sh/mm/cache-sh4.c
··· 25 25 #include <asm/mmu_context.h> 26 26 #include <asm/cacheflush.h> 27 27 28 - extern void __flush_cache_4096(unsigned long addr, unsigned long phys, 28 + static void __flush_dcache_segment_1way(unsigned long start, 29 + unsigned long extent); 30 + static void __flush_dcache_segment_2way(unsigned long start, 31 + unsigned long extent); 32 + static void __flush_dcache_segment_4way(unsigned long start, 33 + unsigned long extent); 34 + 35 + static void __flush_cache_4096(unsigned long addr, unsigned long phys, 29 36 unsigned long exec_offset); 30 - extern void __flush_cache_4096_all(unsigned long start); 31 - static void __flush_cache_4096_all_ex(unsigned long start); 32 - extern void __flush_dcache_all(void); 33 - static void __flush_dcache_all_ex(void); 37 + 38 + /* 39 + * This is initialised here to ensure that it is not placed in the BSS. If 40 + * that were to happen, note that cache_init gets called before the BSS is 41 + * cleared, so this would get nulled out which would be hopeless. 42 + */ 43 + static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = 44 + (void (*)(unsigned long, unsigned long))0xdeadbeef; 45 + 46 + static void compute_alias(struct cache_info *c) 47 + { 48 + c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); 49 + c->n_aliases = (c->alias_mask >> PAGE_SHIFT) + 1; 50 + } 51 + 52 + static void __init emit_cache_params(void) 53 + { 54 + printk("PVR=%08x CVR=%08x PRR=%08x\n", 55 + ctrl_inl(CCN_PVR), 56 + ctrl_inl(CCN_CVR), 57 + ctrl_inl(CCN_PRR)); 58 + printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n", 59 + cpu_data->icache.ways, 60 + cpu_data->icache.sets, 61 + cpu_data->icache.way_incr); 62 + printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", 63 + cpu_data->icache.entry_mask, 64 + cpu_data->icache.alias_mask, 65 + cpu_data->icache.n_aliases); 66 + printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n", 67 + cpu_data->dcache.ways, 68 + cpu_data->dcache.sets, 69 + cpu_data->dcache.way_incr); 70 + printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", 71 + cpu_data->dcache.entry_mask, 72 + cpu_data->dcache.alias_mask, 73 + cpu_data->dcache.n_aliases); 74 + 75 + if (!__flush_dcache_segment_fn) 76 + panic("unknown number of cache ways\n"); 77 + } 34 78 35 79 /* 36 80 * SH-4 has virtually indexed and physically tagged cache. 37 81 */ 38 82 39 - struct semaphore p3map_sem[4]; 83 + /* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */ 84 + #define MAX_P3_SEMAPHORES 16 85 + 86 + struct semaphore p3map_sem[MAX_P3_SEMAPHORES]; 40 87 41 88 void __init p3_cache_init(void) 42 89 { 43 - if (remap_area_pages(P3SEG, 0, PAGE_SIZE*4, _PAGE_CACHABLE)) 90 + int i; 91 + 92 + compute_alias(&cpu_data->icache); 93 + compute_alias(&cpu_data->dcache); 94 + 95 + switch (cpu_data->dcache.ways) { 96 + case 1: 97 + __flush_dcache_segment_fn = __flush_dcache_segment_1way; 98 + break; 99 + case 2: 100 + __flush_dcache_segment_fn = __flush_dcache_segment_2way; 101 + break; 102 + case 4: 103 + __flush_dcache_segment_fn = __flush_dcache_segment_4way; 104 + break; 105 + default: 106 + __flush_dcache_segment_fn = NULL; 107 + break; 108 + } 109 + 110 + emit_cache_params(); 111 + 112 + if (remap_area_pages(P3SEG, 0, PAGE_SIZE * 4, _PAGE_CACHABLE)) 44 113 panic("%s failed.", __FUNCTION__); 45 114 46 - sema_init (&p3map_sem[0], 1); 47 - sema_init (&p3map_sem[1], 1); 48 - sema_init (&p3map_sem[2], 1); 49 - sema_init (&p3map_sem[3], 1); 115 + for (i = 0; i < cpu_data->dcache.n_aliases; i++) 116 + sema_init(&p3map_sem[i], 1); 50 117 } 51 118 52 119 /* ··· 158 91 } 159 92 } 160 93 161 - 162 94 /* 163 95 * No write back please 164 96 */ ··· 174 108 : /* no output */ 175 109 : "m" (__m(v))); 176 110 } 177 - } 178 - 179 - static void __flush_dcache_all_ex(void) 180 - { 181 - unsigned long addr, end_addr, entry_offset; 182 - 183 - end_addr = CACHE_OC_ADDRESS_ARRAY + 184 - (cpu_data->dcache.sets << cpu_data->dcache.entry_shift) * 185 - cpu_data->dcache.ways; 186 - 187 - entry_offset = 1 << cpu_data->dcache.entry_shift; 188 - for (addr = CACHE_OC_ADDRESS_ARRAY; 189 - addr < end_addr; 190 - addr += entry_offset) { 191 - ctrl_outl(0, addr); 192 - } 193 - } 194 - 195 - static void __flush_cache_4096_all_ex(unsigned long start) 196 - { 197 - unsigned long addr, entry_offset; 198 - int i; 199 - 200 - entry_offset = 1 << cpu_data->dcache.entry_shift; 201 - for (i = 0; i < cpu_data->dcache.ways; 202 - i++, start += cpu_data->dcache.way_incr) { 203 - for (addr = CACHE_OC_ADDRESS_ARRAY + start; 204 - addr < CACHE_OC_ADDRESS_ARRAY + 4096 + start; 205 - addr += entry_offset) { 206 - ctrl_outl(0, addr); 207 - } 208 - } 209 - } 210 - 211 - void flush_cache_4096_all(unsigned long start) 212 - { 213 - if (cpu_data->dcache.ways == 1) 214 - __flush_cache_4096_all(start); 215 - else 216 - __flush_cache_4096_all_ex(start); 217 111 } 218 112 219 113 /* ··· 206 180 207 181 local_irq_save(flags); 208 182 jump_to_P2(); 183 + 209 184 for (i = 0; i < cpu_data->icache.ways; 210 185 i++, index += cpu_data->icache.way_incr) 211 186 ctrl_outl(0, index); /* Clear out Valid-bit */ 187 + 212 188 back_to_P1(); 213 189 wmb(); 214 190 local_irq_restore(flags); ··· 222 194 unsigned long flags; 223 195 224 196 /* 225 - * SH7751, SH7751R, and ST40 have no restriction to handle cache. 226 - * (While SH7750 must do that at P2 area.) 197 + * All types of SH-4 require PC to be in P2 to operate on the I-cache. 198 + * Some types of SH-4 require PC to be in P2 to operate on the D-cache. 227 199 */ 228 200 if ((cpu_data->flags & CPU_HAS_P2_FLUSH_BUG) 229 201 || start < CACHE_OC_ADDRESS_ARRAY) { ··· 245 217 { 246 218 if (test_bit(PG_mapped, &page->flags)) { 247 219 unsigned long phys = PHYSADDR(page_address(page)); 220 + unsigned long addr = CACHE_OC_ADDRESS_ARRAY; 221 + int i, n; 248 222 249 223 /* Loop all the D-cache */ 250 - flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); 251 - flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); 252 - flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); 253 - flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); 224 + n = cpu_data->dcache.n_aliases; 225 + for (i = 0; i < n; i++, addr += PAGE_SIZE) 226 + flush_cache_4096(addr, phys); 254 227 } 255 228 256 229 wmb(); ··· 275 246 276 247 void flush_dcache_all(void) 277 248 { 278 - if (cpu_data->dcache.ways == 1) 279 - __flush_dcache_all(); 280 - else 281 - __flush_dcache_all_ex(); 249 + (*__flush_dcache_segment_fn)(0UL, cpu_data->dcache.way_size); 282 250 wmb(); 283 251 } 284 252 ··· 287 261 288 262 void flush_cache_mm(struct mm_struct *mm) 289 263 { 264 + /* 265 + * Note : (RPC) since the caches are physically tagged, the only point 266 + * of flush_cache_mm for SH-4 is to get rid of aliases from the 267 + * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that 268 + * lines can stay resident so long as the virtual address they were 269 + * accessed with (hence cache set) is in accord with the physical 270 + * address (i.e. tag). It's no different here. So I reckon we don't 271 + * need to flush the I-cache, since aliases don't matter for that. We 272 + * should try that. 273 + */ 290 274 flush_cache_all(); 291 275 } 292 276 ··· 309 273 void flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) 310 274 { 311 275 unsigned long phys = pfn << PAGE_SHIFT; 276 + unsigned int alias_mask; 277 + 278 + alias_mask = cpu_data->dcache.alias_mask; 312 279 313 280 /* We only need to flush D-cache when we have alias */ 314 - if ((address^phys) & CACHE_ALIAS) { 281 + if ((address^phys) & alias_mask) { 315 282 /* Loop 4K of the D-cache */ 316 283 flush_cache_4096( 317 - CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), 284 + CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), 318 285 phys); 319 286 /* Loop another 4K of the D-cache */ 320 287 flush_cache_4096( 321 - CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), 288 + CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), 322 289 phys); 323 290 } 324 291 325 - if (vma->vm_flags & VM_EXEC) 326 - /* Loop 4K (half) of the I-cache */ 292 + alias_mask = cpu_data->icache.alias_mask; 293 + if (vma->vm_flags & VM_EXEC) { 294 + /* 295 + * Evict entries from the portion of the cache from which code 296 + * may have been executed at this address (virtual). There's 297 + * no need to evict from the portion corresponding to the 298 + * physical address as for the D-cache, because we know the 299 + * kernel has never executed the code through its identity 300 + * translation. 301 + */ 327 302 flush_cache_4096( 328 - CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), 303 + CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), 329 304 phys); 305 + } 330 306 } 331 307 332 308 /* ··· 353 305 void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 354 306 unsigned long end) 355 307 { 356 - unsigned long p = start & PAGE_MASK; 308 + unsigned long d = 0, p = start & PAGE_MASK; 309 + unsigned long alias_mask = cpu_data->dcache.alias_mask; 310 + unsigned long n_aliases = cpu_data->dcache.n_aliases; 311 + unsigned long select_bit; 312 + unsigned long all_aliases_mask; 313 + unsigned long addr_offset; 314 + unsigned long phys; 357 315 pgd_t *dir; 358 316 pmd_t *pmd; 359 317 pud_t *pud; 360 318 pte_t *pte; 361 319 pte_t entry; 362 - unsigned long phys; 363 - unsigned long d = 0; 320 + int i; 321 + 322 + /* 323 + * If cache is only 4k-per-way, there are never any 'aliases'. Since 324 + * the cache is physically tagged, the data can just be left in there. 325 + */ 326 + if (n_aliases == 0) 327 + return; 328 + 329 + all_aliases_mask = (1 << n_aliases) - 1; 364 330 365 331 /* 366 332 * Don't bother with the lookup and alias check if we have a ··· 397 335 398 336 do { 399 337 if (pmd_none(*pmd) || pmd_bad(*pmd)) { 400 - p &= ~((1 << PMD_SHIFT) -1); 338 + p &= ~((1 << PMD_SHIFT) - 1); 401 339 p += (1 << PMD_SHIFT); 402 340 pmd++; 341 + 403 342 continue; 404 343 } 344 + 405 345 pte = pte_offset_kernel(pmd, p); 346 + 406 347 do { 407 348 entry = *pte; 349 + 408 350 if ((pte_val(entry) & _PAGE_PRESENT)) { 409 - phys = pte_val(entry)&PTE_PHYS_MASK; 410 - if ((p^phys) & CACHE_ALIAS) { 411 - d |= 1 << ((p & CACHE_ALIAS)>>12); 412 - d |= 1 << ((phys & CACHE_ALIAS)>>12); 413 - if (d == 0x0f) 351 + phys = pte_val(entry) & PTE_PHYS_MASK; 352 + 353 + if ((p ^ phys) & alias_mask) { 354 + d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); 355 + d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); 356 + 357 + if (d == all_aliases_mask) 414 358 goto loop_exit; 415 359 } 416 360 } 361 + 417 362 pte++; 418 363 p += PAGE_SIZE; 419 364 } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); 420 365 pmd++; 421 366 } while (p < end); 422 - loop_exit: 423 - if (d & 1) 424 - flush_cache_4096_all(0); 425 - if (d & 2) 426 - flush_cache_4096_all(0x1000); 427 - if (d & 4) 428 - flush_cache_4096_all(0x2000); 429 - if (d & 8) 430 - flush_cache_4096_all(0x3000); 431 - if (vma->vm_flags & VM_EXEC) 367 + 368 + loop_exit: 369 + for (i = 0, select_bit = 0x1, addr_offset = 0x0; i < n_aliases; 370 + i++, select_bit <<= 1, addr_offset += PAGE_SIZE) 371 + if (d & select_bit) { 372 + (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); 373 + wmb(); 374 + } 375 + 376 + if (vma->vm_flags & VM_EXEC) { 377 + /* 378 + * TODO: Is this required??? Need to look at how I-cache 379 + * coherency is assured when new programs are loaded to see if 380 + * this matters. 381 + */ 432 382 flush_icache_all(); 383 + } 433 384 } 434 385 435 386 /* ··· 457 382 { 458 383 flush_cache_page(vma, addr, page_to_pfn(page)); 459 384 mb(); 385 + } 386 + 387 + /** 388 + * __flush_cache_4096 389 + * 390 + * @addr: address in memory mapped cache array 391 + * @phys: P1 address to flush (has to match tags if addr has 'A' bit 392 + * set i.e. associative write) 393 + * @exec_offset: set to 0x20000000 if flush has to be executed from P2 394 + * region else 0x0 395 + * 396 + * The offset into the cache array implied by 'addr' selects the 397 + * 'colour' of the virtual address range that will be flushed. The 398 + * operation (purge/write-back) is selected by the lower 2 bits of 399 + * 'phys'. 400 + */ 401 + static void __flush_cache_4096(unsigned long addr, unsigned long phys, 402 + unsigned long exec_offset) 403 + { 404 + int way_count; 405 + unsigned long base_addr = addr; 406 + struct cache_info *dcache; 407 + unsigned long way_incr; 408 + unsigned long a, ea, p; 409 + unsigned long temp_pc; 410 + 411 + dcache = &cpu_data->dcache; 412 + /* Write this way for better assembly. */ 413 + way_count = dcache->ways; 414 + way_incr = dcache->way_incr; 415 + 416 + /* 417 + * Apply exec_offset (i.e. branch to P2 if required.). 418 + * 419 + * FIXME: 420 + * 421 + * If I write "=r" for the (temp_pc), it puts this in r6 hence 422 + * trashing exec_offset before it's been added on - why? Hence 423 + * "=&r" as a 'workaround' 424 + */ 425 + asm volatile("mov.l 1f, %0\n\t" 426 + "add %1, %0\n\t" 427 + "jmp @%0\n\t" 428 + "nop\n\t" 429 + ".balign 4\n\t" 430 + "1: .long 2f\n\t" 431 + "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); 432 + 433 + /* 434 + * We know there will be >=1 iteration, so write as do-while to avoid 435 + * pointless nead-of-loop check for 0 iterations. 436 + */ 437 + do { 438 + ea = base_addr + PAGE_SIZE; 439 + a = base_addr; 440 + p = phys; 441 + 442 + do { 443 + *(volatile unsigned long *)a = p; 444 + /* 445 + * Next line: intentionally not p+32, saves an add, p 446 + * will do since only the cache tag bits need to 447 + * match. 448 + */ 449 + *(volatile unsigned long *)(a+32) = p; 450 + a += 64; 451 + p += 64; 452 + } while (a < ea); 453 + 454 + base_addr += way_incr; 455 + } while (--way_count != 0); 456 + } 457 + 458 + /* 459 + * Break the 1, 2 and 4 way variants of this out into separate functions to 460 + * avoid nearly all the overhead of having the conditional stuff in the function 461 + * bodies (+ the 1 and 2 way cases avoid saving any registers too). 462 + */ 463 + static void __flush_dcache_segment_1way(unsigned long start, 464 + unsigned long extent_per_way) 465 + { 466 + unsigned long orig_sr, sr_with_bl; 467 + unsigned long base_addr; 468 + unsigned long way_incr, linesz, way_size; 469 + struct cache_info *dcache; 470 + register unsigned long a0, a0e; 471 + 472 + asm volatile("stc sr, %0" : "=r" (orig_sr)); 473 + sr_with_bl = orig_sr | (1<<28); 474 + base_addr = ((unsigned long)&empty_zero_page[0]); 475 + 476 + /* 477 + * The previous code aligned base_addr to 16k, i.e. the way_size of all 478 + * existing SH-4 D-caches. Whilst I don't see a need to have this 479 + * aligned to any better than the cache line size (which it will be 480 + * anyway by construction), let's align it to at least the way_size of 481 + * any existing or conceivable SH-4 D-cache. -- RPC 482 + */ 483 + base_addr = ((base_addr >> 16) << 16); 484 + base_addr |= start; 485 + 486 + dcache = &cpu_data->dcache; 487 + linesz = dcache->linesz; 488 + way_incr = dcache->way_incr; 489 + way_size = dcache->way_size; 490 + 491 + a0 = base_addr; 492 + a0e = base_addr + extent_per_way; 493 + do { 494 + asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 495 + asm volatile("movca.l r0, @%0\n\t" 496 + "ocbi @%0" : : "r" (a0)); 497 + a0 += linesz; 498 + asm volatile("movca.l r0, @%0\n\t" 499 + "ocbi @%0" : : "r" (a0)); 500 + a0 += linesz; 501 + asm volatile("movca.l r0, @%0\n\t" 502 + "ocbi @%0" : : "r" (a0)); 503 + a0 += linesz; 504 + asm volatile("movca.l r0, @%0\n\t" 505 + "ocbi @%0" : : "r" (a0)); 506 + asm volatile("ldc %0, sr" : : "r" (orig_sr)); 507 + a0 += linesz; 508 + } while (a0 < a0e); 509 + } 510 + 511 + static void __flush_dcache_segment_2way(unsigned long start, 512 + unsigned long extent_per_way) 513 + { 514 + unsigned long orig_sr, sr_with_bl; 515 + unsigned long base_addr; 516 + unsigned long way_incr, linesz, way_size; 517 + struct cache_info *dcache; 518 + register unsigned long a0, a1, a0e; 519 + 520 + asm volatile("stc sr, %0" : "=r" (orig_sr)); 521 + sr_with_bl = orig_sr | (1<<28); 522 + base_addr = ((unsigned long)&empty_zero_page[0]); 523 + 524 + /* See comment under 1-way above */ 525 + base_addr = ((base_addr >> 16) << 16); 526 + base_addr |= start; 527 + 528 + dcache = &cpu_data->dcache; 529 + linesz = dcache->linesz; 530 + way_incr = dcache->way_incr; 531 + way_size = dcache->way_size; 532 + 533 + a0 = base_addr; 534 + a1 = a0 + way_incr; 535 + a0e = base_addr + extent_per_way; 536 + do { 537 + asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 538 + asm volatile("movca.l r0, @%0\n\t" 539 + "movca.l r0, @%1\n\t" 540 + "ocbi @%0\n\t" 541 + "ocbi @%1" : : 542 + "r" (a0), "r" (a1)); 543 + a0 += linesz; 544 + a1 += linesz; 545 + asm volatile("movca.l r0, @%0\n\t" 546 + "movca.l r0, @%1\n\t" 547 + "ocbi @%0\n\t" 548 + "ocbi @%1" : : 549 + "r" (a0), "r" (a1)); 550 + a0 += linesz; 551 + a1 += linesz; 552 + asm volatile("movca.l r0, @%0\n\t" 553 + "movca.l r0, @%1\n\t" 554 + "ocbi @%0\n\t" 555 + "ocbi @%1" : : 556 + "r" (a0), "r" (a1)); 557 + a0 += linesz; 558 + a1 += linesz; 559 + asm volatile("movca.l r0, @%0\n\t" 560 + "movca.l r0, @%1\n\t" 561 + "ocbi @%0\n\t" 562 + "ocbi @%1" : : 563 + "r" (a0), "r" (a1)); 564 + asm volatile("ldc %0, sr" : : "r" (orig_sr)); 565 + a0 += linesz; 566 + a1 += linesz; 567 + } while (a0 < a0e); 568 + } 569 + 570 + static void __flush_dcache_segment_4way(unsigned long start, 571 + unsigned long extent_per_way) 572 + { 573 + unsigned long orig_sr, sr_with_bl; 574 + unsigned long base_addr; 575 + unsigned long way_incr, linesz, way_size; 576 + struct cache_info *dcache; 577 + register unsigned long a0, a1, a2, a3, a0e; 578 + 579 + asm volatile("stc sr, %0" : "=r" (orig_sr)); 580 + sr_with_bl = orig_sr | (1<<28); 581 + base_addr = ((unsigned long)&empty_zero_page[0]); 582 + 583 + /* See comment under 1-way above */ 584 + base_addr = ((base_addr >> 16) << 16); 585 + base_addr |= start; 586 + 587 + dcache = &cpu_data->dcache; 588 + linesz = dcache->linesz; 589 + way_incr = dcache->way_incr; 590 + way_size = dcache->way_size; 591 + 592 + a0 = base_addr; 593 + a1 = a0 + way_incr; 594 + a2 = a1 + way_incr; 595 + a3 = a2 + way_incr; 596 + a0e = base_addr + extent_per_way; 597 + do { 598 + asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); 599 + asm volatile("movca.l r0, @%0\n\t" 600 + "movca.l r0, @%1\n\t" 601 + "movca.l r0, @%2\n\t" 602 + "movca.l r0, @%3\n\t" 603 + "ocbi @%0\n\t" 604 + "ocbi @%1\n\t" 605 + "ocbi @%2\n\t" 606 + "ocbi @%3\n\t" : : 607 + "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 608 + a0 += linesz; 609 + a1 += linesz; 610 + a2 += linesz; 611 + a3 += linesz; 612 + asm volatile("movca.l r0, @%0\n\t" 613 + "movca.l r0, @%1\n\t" 614 + "movca.l r0, @%2\n\t" 615 + "movca.l r0, @%3\n\t" 616 + "ocbi @%0\n\t" 617 + "ocbi @%1\n\t" 618 + "ocbi @%2\n\t" 619 + "ocbi @%3\n\t" : : 620 + "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 621 + a0 += linesz; 622 + a1 += linesz; 623 + a2 += linesz; 624 + a3 += linesz; 625 + asm volatile("movca.l r0, @%0\n\t" 626 + "movca.l r0, @%1\n\t" 627 + "movca.l r0, @%2\n\t" 628 + "movca.l r0, @%3\n\t" 629 + "ocbi @%0\n\t" 630 + "ocbi @%1\n\t" 631 + "ocbi @%2\n\t" 632 + "ocbi @%3\n\t" : : 633 + "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 634 + a0 += linesz; 635 + a1 += linesz; 636 + a2 += linesz; 637 + a3 += linesz; 638 + asm volatile("movca.l r0, @%0\n\t" 639 + "movca.l r0, @%1\n\t" 640 + "movca.l r0, @%2\n\t" 641 + "movca.l r0, @%3\n\t" 642 + "ocbi @%0\n\t" 643 + "ocbi @%1\n\t" 644 + "ocbi @%2\n\t" 645 + "ocbi @%3\n\t" : : 646 + "r" (a0), "r" (a1), "r" (a2), "r" (a3)); 647 + asm volatile("ldc %0, sr" : : "r" (orig_sr)); 648 + a0 += linesz; 649 + a1 += linesz; 650 + a2 += linesz; 651 + a3 += linesz; 652 + } while (a0 < a0e); 460 653 } 461 654
+1 -98
arch/sh/mm/clear_page.S
··· 193 193 nop 194 194 .L4096: .word 4096 195 195 196 - ENTRY(__flush_cache_4096) 197 - mov.l 1f,r3 198 - add r6,r3 199 - mov r4,r0 200 - mov #64,r2 201 - shll r2 202 - mov #64,r6 203 - jmp @r3 204 - mov #96,r7 205 - .align 2 206 - 1: .long 2f 207 - 2: 208 - .rept 32 209 - mov.l r5,@r0 210 - mov.l r5,@(32,r0) 211 - mov.l r5,@(r0,r6) 212 - mov.l r5,@(r0,r7) 213 - add r2,r5 214 - add r2,r0 215 - .endr 216 - nop 217 - nop 218 - nop 219 - nop 220 - nop 221 - nop 222 - nop 223 - rts 224 - nop 225 - 226 - ENTRY(__flush_dcache_all) 227 - mov.l 2f,r0 228 - mov.l 3f,r4 229 - and r0,r4 ! r4 = (unsigned long)&empty_zero_page[0] & ~0xffffc000 230 - stc sr,r1 ! save SR 231 - mov.l 4f,r2 232 - or r1,r2 233 - mov #32,r3 234 - shll2 r3 235 - 1: 236 - ldc r2,sr ! set BL bit 237 - movca.l r0,@r4 238 - ocbi @r4 239 - add #32,r4 240 - movca.l r0,@r4 241 - ocbi @r4 242 - add #32,r4 243 - movca.l r0,@r4 244 - ocbi @r4 245 - add #32,r4 246 - movca.l r0,@r4 247 - ocbi @r4 248 - ldc r1,sr ! restore SR 249 - dt r3 250 - bf/s 1b 251 - add #32,r4 252 - 253 - rts 254 - nop 255 - .align 2 256 - 2: .long 0xffffc000 257 - 3: .long empty_zero_page 258 - 4: .long 0x10000000 ! BL bit 259 - 260 - /* __flush_cache_4096_all(unsigned long addr) */ 261 - ENTRY(__flush_cache_4096_all) 262 - mov.l 2f,r0 263 - mov.l 3f,r2 264 - and r0,r2 265 - or r2,r4 ! r4 = addr | (unsigned long)&empty_zero_page[0] & ~0x3fff 266 - stc sr,r1 ! save SR 267 - mov.l 4f,r2 268 - or r1,r2 269 - mov #32,r3 270 - 1: 271 - ldc r2,sr ! set BL bit 272 - movca.l r0,@r4 273 - ocbi @r4 274 - add #32,r4 275 - movca.l r0,@r4 276 - ocbi @r4 277 - add #32,r4 278 - movca.l r0,@r4 279 - ocbi @r4 280 - add #32,r4 281 - movca.l r0,@r4 282 - ocbi @r4 283 - ldc r1,sr ! restore SR 284 - dt r3 285 - bf/s 1b 286 - add #32,r4 287 - 288 - rts 289 - nop 290 - .align 2 291 - 2: .long 0xffffc000 292 - 3: .long empty_zero_page 293 - 4: .long 0x10000000 ! BL bit 294 196 #endif 197 +
+18 -4
include/asm-sh/cache.h
··· 23 23 #define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) 24 24 25 25 struct cache_info { 26 - unsigned int ways; 27 - unsigned int sets; 28 - unsigned int linesz; 26 + unsigned int ways; /* Number of cache ways */ 27 + unsigned int sets; /* Number of cache sets */ 28 + unsigned int linesz; /* Cache line size (bytes) */ 29 29 30 + unsigned int way_size; /* sets * line size */ 31 + 32 + /* 33 + * way_incr is the address offset for accessing the next way 34 + * in memory mapped cache array ops. 35 + */ 30 36 unsigned int way_incr; 31 - 32 37 unsigned int entry_shift; 33 38 unsigned int entry_mask; 39 + 40 + /* 41 + * Compute a mask which selects the address bits which overlap between 42 + * 1. those used to select the cache set during indexing 43 + * 2. those in the physical page number. 44 + */ 45 + unsigned int alias_mask; 46 + 47 + unsigned int n_aliases; /* Number of aliases */ 34 48 35 49 unsigned long flags; 36 50 };