Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: mm: tlb flush optim: elide repeated uTLB invalidate in loop

The unconditional full TLB flush (on say ASID rollover) iterates over each
entry and uses TLBWrite to zero it out. TLBWrite by design also invalidates
the uTLBs thus we end up invalidating it as many times as numbe rof
entries (512 or 1k)

Optimize this by using a weaker TLBWriteNI cmd in loop, which doesn't
tinker with uTLBs and an explicit one time IVUTLB, outside the loop to
invalidate them all once.

And given the optimiztion, the IVUTLB is now needed on MMUv4 too where
the uTLBs and JTLBs are otherwise coherent given the TLBInsertEntry /
TLBDeleteEntry commands

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

+29 -45
+29 -45
arch/arc/mm/tlb.c
··· 118 118 write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); 119 119 } 120 120 121 + static void utlb_invalidate(void) 122 + { 123 + #if (CONFIG_ARC_MMU_VER >= 2) 124 + 125 + #if (CONFIG_ARC_MMU_VER == 2) 126 + /* MMU v2 introduced the uTLB Flush command. 127 + * There was however an obscure hardware bug, where uTLB flush would 128 + * fail when a prior probe for J-TLB (both totally unrelated) would 129 + * return lkup err - because the entry didn't exist in MMU. 130 + * The Workround was to set Index reg with some valid value, prior to 131 + * flush. This was fixed in MMU v3 132 + */ 133 + unsigned int idx; 134 + 135 + /* make sure INDEX Reg is valid */ 136 + idx = read_aux_reg(ARC_REG_TLBINDEX); 137 + 138 + /* If not write some dummy val */ 139 + if (unlikely(idx & TLB_LKUP_ERR)) 140 + write_aux_reg(ARC_REG_TLBINDEX, 0xa); 141 + #endif 142 + 143 + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); 144 + #endif 145 + 146 + } 147 + 121 148 #if (CONFIG_ARC_MMU_VER < 4) 122 149 123 150 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) ··· 174 147 WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", 175 148 vaddr_n_asid); 176 149 } 177 - } 178 - 179 - /**************************************************************************** 180 - * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) 181 - * 182 - * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB 183 - * 184 - * utlb_invalidate ( ) 185 - * -For v2 MMU calls Flush uTLB Cmd 186 - * -For v1 MMU does nothing (except for Metal Fix v1 MMU) 187 - * This is because in v1 TLBWrite itself invalidate uTLBs 188 - ***************************************************************************/ 189 - 190 - static void utlb_invalidate(void) 191 - { 192 - #if (CONFIG_ARC_MMU_VER >= 2) 193 - 194 - #if (CONFIG_ARC_MMU_VER == 2) 195 - /* MMU v2 introduced the uTLB Flush command. 196 - * There was however an obscure hardware bug, where uTLB flush would 197 - * fail when a prior probe for J-TLB (both totally unrelated) would 198 - * return lkup err - because the entry didn't exist in MMU. 199 - * The Workround was to set Index reg with some valid value, prior to 200 - * flush. This was fixed in MMU v3 hence not needed any more 201 - */ 202 - unsigned int idx; 203 - 204 - /* make sure INDEX Reg is valid */ 205 - idx = read_aux_reg(ARC_REG_TLBINDEX); 206 - 207 - /* If not write some dummy val */ 208 - if (unlikely(idx & TLB_LKUP_ERR)) 209 - write_aux_reg(ARC_REG_TLBINDEX, 0xa); 210 - #endif 211 - 212 - write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); 213 - #endif 214 - 215 150 } 216 151 217 152 static void tlb_entry_insert(unsigned int pd0, pte_t pd1) ··· 207 218 } 208 219 209 220 #else /* CONFIG_ARC_MMU_VER >= 4) */ 210 - 211 - static void utlb_invalidate(void) 212 - { 213 - /* No need since uTLB is always in sync with JTLB */ 214 - } 215 221 216 222 static void tlb_entry_erase(unsigned int vaddr_n_asid) 217 223 { ··· 251 267 for (entry = 0; entry < num_tlb; entry++) { 252 268 /* write this entry to the TLB */ 253 269 write_aux_reg(ARC_REG_TLBINDEX, entry); 254 - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); 270 + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); 255 271 } 256 272 257 273 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { ··· 262 278 263 279 for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { 264 280 write_aux_reg(ARC_REG_TLBINDEX, entry); 265 - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); 281 + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); 266 282 } 267 283 } 268 284