Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: Support for extended ASIDs on PTEAEX-capable SH-X3 cores.

This adds support for extended ASIDs (up to 16-bits) on newer SH-X3 cores
that implement the PTAEX register and respective functionality. Presently
only the 65nm SH7786 (90nm only supports legacy 8-bit ASIDs).

The main change is in how the PTE is written out when loading the entry
in to the TLB, as well as in how the TLB entry is selectively flushed.

While SH-X2 extended mode splits out the memory-mapped U and I-TLB data
arrays for extra bits, extended ASID mode splits out the address arrays.
While we don't use the memory-mapped data array access, the address
array accesses are necessary for selective TLB flushes, so these are
implemented newly and replace the generic SH-4 implementation.

With this, TLB flushes in switch_mm() are almost non-existent on newer
parts.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

+148 -28
+1
arch/sh/Kconfig
··· 365 365 bool "Support SH7786 processor" 366 366 select CPU_SH4A 367 367 select CPU_SHX3 368 + select CPU_HAS_PTEAEX 368 369 select ARCH_SPARSEMEM_ENABLE 369 370 select SYS_SUPPORTS_NUMA 370 371
+3
arch/sh/Kconfig.cpu
··· 104 104 config CPU_HAS_PTEA 105 105 bool 106 106 107 + config CPU_HAS_PTEAEX 108 + bool 109 + 107 110 config CPU_HAS_DSP 108 111 bool 109 112
+1
arch/sh/include/asm/cpu-features.h
··· 21 21 #define CPU_HAS_LLSC 0x0040 /* movli.l/movco.l */ 22 22 #define CPU_HAS_L2_CACHE 0x0080 /* Secondary cache / URAM */ 23 23 #define CPU_HAS_OP32 0x0100 /* 32-bit instruction support */ 24 + #define CPU_HAS_PTEAEX 0x0200 /* PTE ASID Extension support */ 24 25 25 26 #endif /* __ASM_SH_CPU_FEATURES_H */
+10 -5
arch/sh/include/asm/mmu_context.h
··· 19 19 * (a) TLB cache version (or round, cycle whatever expression you like) 20 20 * (b) ASID (Address Space IDentifier) 21 21 */ 22 + #ifdef CONFIG_CPU_HAS_PTEAEX 23 + #define MMU_CONTEXT_ASID_MASK 0x0000ffff 24 + #else 22 25 #define MMU_CONTEXT_ASID_MASK 0x000000ff 23 - #define MMU_CONTEXT_VERSION_MASK 0xffffff00 24 - #define MMU_CONTEXT_FIRST_VERSION 0x00000100 25 - #define NO_CONTEXT 0UL 26 + #endif 26 27 27 - /* ASID is 8-bit value, so it can't be 0x100 */ 28 - #define MMU_NO_ASID 0x100 28 + #define MMU_CONTEXT_VERSION_MASK (~0UL & ~MMU_CONTEXT_ASID_MASK) 29 + #define MMU_CONTEXT_FIRST_VERSION (MMU_CONTEXT_ASID_MASK + 1) 30 + 31 + /* Impossible ASID value, to differentiate from NO_CONTEXT. */ 32 + #define MMU_NO_ASID MMU_CONTEXT_FIRST_VERSION 33 + #define NO_CONTEXT 0UL 29 34 30 35 #define asid_cache(cpu) (cpu_data[cpu].asid_cache) 31 36
+12
arch/sh/include/asm/mmu_context_32.h
··· 10 10 /* Do nothing */ 11 11 } 12 12 13 + #ifdef CONFIG_CPU_HAS_PTEAEX 14 + static inline void set_asid(unsigned long asid) 15 + { 16 + __raw_writel(asid, MMU_PTEAEX); 17 + } 18 + 19 + static inline unsigned long get_asid(void) 20 + { 21 + return __raw_readl(MMU_PTEAEX) & MMU_CONTEXT_ASID_MASK; 22 + } 23 + #else 13 24 static inline void set_asid(unsigned long asid) 14 25 { 15 26 unsigned long __dummy; ··· 44 33 asid &= MMU_CONTEXT_ASID_MASK; 45 34 return asid; 46 35 } 36 + #endif /* CONFIG_CPU_HAS_PTEAEX */ 47 37 48 38 /* MMU_TTB is used for optimizing the fault handling. */ 49 39 static inline void set_TTB(pgd_t *pgd)
+16 -19
arch/sh/include/cpu-sh4/cpu/mmu_context.h
··· 14 14 #define MMU_PTEL 0xFF000004 /* Page table entry register LOW */ 15 15 #define MMU_TTB 0xFF000008 /* Translation table base register */ 16 16 #define MMU_TEA 0xFF00000C /* TLB Exception Address */ 17 - #define MMU_PTEA 0xFF000034 /* Page table entry assistance register */ 17 + #define MMU_PTEA 0xFF000034 /* PTE assistance register */ 18 + #define MMU_PTEAEX 0xFF00007C /* PTE ASID extension register */ 18 19 19 20 #define MMUCR 0xFF000010 /* MMU Control Register */ 20 21 21 - #define MMU_ITLB_ADDRESS_ARRAY 0xF2000000 22 22 #define MMU_UTLB_ADDRESS_ARRAY 0xF6000000 23 + #define MMU_UTLB_ADDRESS_ARRAY2 0xF6800000 23 24 #define MMU_PAGE_ASSOC_BIT 0x80 24 25 25 26 #define MMUCR_TI (1<<2) 26 - 27 - #ifdef CONFIG_X2TLB 28 - #define MMUCR_ME (1 << 7) 29 - #else 30 - #define MMUCR_ME (0) 31 - #endif 32 27 33 28 #if defined(CONFIG_32BIT) && defined(CONFIG_CPU_SUBTYPE_ST40) 34 29 #define MMUCR_SE (1 << 4) 35 30 #else 36 31 #define MMUCR_SE (0) 32 + #endif 33 + 34 + #ifdef CONFIG_CPU_HAS_PTEAEX 35 + #define MMUCR_AEX (1 << 6) 36 + #else 37 + #define MMUCR_AEX (0) 38 + #endif 39 + 40 + #ifdef CONFIG_X2TLB 41 + #define MMUCR_ME (1 << 7) 42 + #else 43 + #define MMUCR_ME (0) 37 44 #endif 38 45 39 46 #ifdef CONFIG_SH_STORE_QUEUES ··· 50 43 #endif 51 44 52 45 #define MMU_NTLB_ENTRIES 64 53 - #define MMU_CONTROL_INIT (0x05|MMUCR_SQMD|MMUCR_ME|MMUCR_SE) 54 - 55 - #define MMU_ITLB_DATA_ARRAY 0xF3000000 56 - #define MMU_UTLB_DATA_ARRAY 0xF7000000 57 - 58 - #define MMU_UTLB_ENTRIES 64 59 - #define MMU_U_ENTRY_SHIFT 8 60 - #define MMU_UTLB_VALID 0x100 61 - #define MMU_ITLB_ENTRIES 4 62 - #define MMU_I_ENTRY_SHIFT 8 63 - #define MMU_ITLB_VALID 0x100 46 + #define MMU_CONTROL_INIT (0x05|MMUCR_SQMD|MMUCR_ME|MMUCR_SE|MMUCR_AEX) 64 47 65 48 #define TRA 0xff000020 66 49 #define EXPEVT 0xff000024
+1 -1
arch/sh/kernel/cpu/sh4/probe.c
··· 134 134 boot_cpu_data.icache.ways = 4; 135 135 boot_cpu_data.dcache.ways = 4; 136 136 boot_cpu_data.flags |= CPU_HAS_FPU | CPU_HAS_PERF_COUNTER | 137 - CPU_HAS_LLSC; 137 + CPU_HAS_LLSC | CPU_HAS_PTEAEX; 138 138 break; 139 139 case 0x3008: 140 140 boot_cpu_data.icache.ways = 4;
+1 -1
arch/sh/kernel/setup.c
··· 449 449 /* Symbolic CPU flags, keep in sync with asm/cpu-features.h */ 450 450 static const char *cpu_flags[] = { 451 451 "none", "fpu", "p2flush", "mmuassoc", "dsp", "perfctr", 452 - "ptea", "llsc", "l2", "op32", NULL 452 + "ptea", "llsc", "l2", "op32", "pteaex", NULL 453 453 }; 454 454 455 455 static void show_cpuflags(struct seq_file *m, struct sh_cpuinfo *c)
+4 -2
arch/sh/mm/Makefile_32
··· 25 25 endif 26 26 27 27 ifdef CONFIG_MMU 28 - obj-$(CONFIG_CPU_SH3) += tlb-sh3.o 29 - obj-$(CONFIG_CPU_SH4) += tlb-sh4.o 28 + tlb-$(CONFIG_CPU_SH3) := tlb-sh3.o 29 + tlb-$(CONFIG_CPU_SH4) := tlb-sh4.o 30 + tlb-$(CONFIG_CPU_HAS_PTEAEX) := tlb-pteaex.o 31 + obj-y += $(tlb-y) 30 32 ifndef CONFIG_CACHE_OFF 31 33 obj-$(CONFIG_CPU_SH4) += pg-sh4.o 32 34 obj-$(CONFIG_SH7705_CACHE_32KB) += pg-sh7705.o
+99
arch/sh/mm/tlb-pteaex.c
··· 1 + /* 2 + * arch/sh/mm/tlb-pteaex.c 3 + * 4 + * TLB operations for SH-X3 CPUs featuring PTE ASID Extensions. 5 + * 6 + * Copyright (C) 2009 Paul Mundt 7 + * 8 + * This file is subject to the terms and conditions of the GNU General Public 9 + * License. See the file "COPYING" in the main directory of this archive 10 + * for more details. 11 + */ 12 + #include <linux/kernel.h> 13 + #include <linux/mm.h> 14 + #include <linux/io.h> 15 + #include <asm/system.h> 16 + #include <asm/mmu_context.h> 17 + #include <asm/cacheflush.h> 18 + 19 + void update_mmu_cache(struct vm_area_struct * vma, 20 + unsigned long address, pte_t pte) 21 + { 22 + unsigned long flags; 23 + unsigned long pteval; 24 + unsigned long vpn; 25 + 26 + /* Ptrace may call this routine. */ 27 + if (vma && current->active_mm != vma->vm_mm) 28 + return; 29 + 30 + #ifndef CONFIG_CACHE_OFF 31 + { 32 + unsigned long pfn = pte_pfn(pte); 33 + 34 + if (pfn_valid(pfn)) { 35 + struct page *page = pfn_to_page(pfn); 36 + 37 + if (!test_bit(PG_mapped, &page->flags)) { 38 + unsigned long phys = pte_val(pte) & PTE_PHYS_MASK; 39 + __flush_wback_region((void *)P1SEGADDR(phys), 40 + PAGE_SIZE); 41 + __set_bit(PG_mapped, &page->flags); 42 + } 43 + } 44 + } 45 + #endif 46 + 47 + local_irq_save(flags); 48 + 49 + /* Set PTEH register */ 50 + vpn = address & MMU_VPN_MASK; 51 + __raw_writel(vpn, MMU_PTEH); 52 + 53 + /* Set PTEAEX */ 54 + __raw_writel(get_asid(), MMU_PTEAEX); 55 + 56 + pteval = pte.pte_low; 57 + 58 + /* Set PTEA register */ 59 + #ifdef CONFIG_X2TLB 60 + /* 61 + * For the extended mode TLB this is trivial, only the ESZ and 62 + * EPR bits need to be written out to PTEA, with the remainder of 63 + * the protection bits (with the exception of the compat-mode SZ 64 + * and PR bits, which are cleared) being written out in PTEL. 65 + */ 66 + __raw_writel(pte.pte_high, MMU_PTEA); 67 + #else 68 + /* TODO: make this look less hacky */ 69 + __raw_writel(((pteval >> 28) & 0xe) | (pteval & 0x1), MMU_PTEA); 70 + #endif 71 + 72 + /* Set PTEL register */ 73 + pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */ 74 + #ifdef CONFIG_CACHE_WRITETHROUGH 75 + pteval |= _PAGE_WT; 76 + #endif 77 + /* conveniently, we want all the software flags to be 0 anyway */ 78 + __raw_writel(pteval, MMU_PTEL); 79 + 80 + /* Load the TLB */ 81 + asm volatile("ldtlb": /* no output */ : /* no input */ : "memory"); 82 + local_irq_restore(flags); 83 + } 84 + 85 + /* 86 + * While SH-X2 extended TLB mode splits out the memory-mapped I/UTLB 87 + * data arrays, SH-X3 cores with PTEAEX split out the memory-mapped 88 + * address arrays. In compat mode the second array is inaccessible, while 89 + * in extended mode, the legacy 8-bit ASID field in address array 1 has 90 + * undefined behaviour. 91 + */ 92 + void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid, 93 + unsigned long page) 94 + { 95 + jump_to_uncached(); 96 + __raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT); 97 + __raw_writel(asid, MMU_UTLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT); 98 + back_to_cached(); 99 + }