Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARC: [SMP] ASID allocation

-Track a Per CPU ASID counter
-mm-per-cpu ASID (multiple threads, or mm migrated around)

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

+37 -23
+1 -1
arch/arc/include/asm/mmu.h
··· 48 48 #ifndef __ASSEMBLY__ 49 49 50 50 typedef struct { 51 - unsigned long asid; /* 8 bit MMU PID + Generation cycle */ 51 + unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ 52 52 } mm_context_t; 53 53 54 54 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+28 -16
arch/arc/include/asm/mmu_context.h
··· 30 30 * "Fast Context Switch" i.e. no TLB flush on ctxt-switch 31 31 * 32 32 * Linux assigns each task a unique ASID. A simple round-robin allocation 33 - * of H/w ASID is done using software tracker @asid_cache. 33 + * of H/w ASID is done using software tracker @asid_cpu. 34 34 * When it reaches max 255, the allocation cycle starts afresh by flushing 35 35 * the entire TLB and wrapping ASID back to zero. 36 36 * 37 37 * A new allocation cycle, post rollover, could potentially reassign an ASID 38 38 * to a different task. Thus the rule is to refresh the ASID in a new cycle. 39 - * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits 39 + * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits 40 40 * serve as cycle/generation indicator and natural 32 bit unsigned math 41 41 * automagically increments the generation when lower 8 bits rollover. 42 42 */ ··· 47 47 #define MM_CTXT_FIRST_CYCLE (MM_CTXT_ASID_MASK + 1) 48 48 #define MM_CTXT_NO_ASID 0UL 49 49 50 - #define hw_pid(mm) (mm->context.asid & MM_CTXT_ASID_MASK) 50 + #define asid_mm(mm, cpu) mm->context.asid[cpu] 51 + #define hw_pid(mm, cpu) (asid_mm(mm, cpu) & MM_CTXT_ASID_MASK) 51 52 52 - extern unsigned int asid_cache; 53 + DECLARE_PER_CPU(unsigned int, asid_cache); 54 + #define asid_cpu(cpu) per_cpu(asid_cache, cpu) 53 55 54 56 /* 55 57 * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle) ··· 59 57 */ 60 58 static inline void get_new_mmu_context(struct mm_struct *mm) 61 59 { 60 + const unsigned int cpu = smp_processor_id(); 62 61 unsigned long flags; 63 62 64 63 local_irq_save(flags); ··· 74 71 * first need to destroy the context, setting it to invalid 75 72 * value. 76 73 */ 77 - if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK)) 74 + if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK)) 78 75 goto set_hw; 79 76 80 77 /* move to new ASID and handle rollover */ 81 - if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) { 78 + if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) { 82 79 83 80 flush_tlb_all(); 84 81 ··· 87 84 * If the container itself wrapped around, set it to a non zero 88 85 * "generation" to distinguish from no context 89 86 */ 90 - if (!asid_cache) 91 - asid_cache = MM_CTXT_FIRST_CYCLE; 87 + if (!asid_cpu(cpu)) 88 + asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE; 92 89 } 93 90 94 91 /* Assign new ASID to tsk */ 95 - mm->context.asid = asid_cache; 92 + asid_mm(mm, cpu) = asid_cpu(cpu); 96 93 97 94 set_hw: 98 - write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE); 95 + write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); 99 96 100 97 local_irq_restore(flags); 101 98 } ··· 107 104 static inline int 108 105 init_new_context(struct task_struct *tsk, struct mm_struct *mm) 109 106 { 110 - mm->context.asid = MM_CTXT_NO_ASID; 107 + int i; 108 + 109 + for_each_possible_cpu(i) 110 + asid_mm(mm, i) = MM_CTXT_NO_ASID; 111 + 111 112 return 0; 113 + } 114 + 115 + static inline void destroy_context(struct mm_struct *mm) 116 + { 117 + unsigned long flags; 118 + 119 + /* Needed to elide CONFIG_DEBUG_PREEMPT warning */ 120 + local_irq_save(flags); 121 + asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID; 122 + local_irq_restore(flags); 112 123 } 113 124 114 125 /* Prepare the MMU for task: setup PID reg with allocated ASID ··· 147 130 * only if it was unallocated 148 131 */ 149 132 #define activate_mm(prev, next) switch_mm(prev, next, NULL) 150 - 151 - static inline void destroy_context(struct mm_struct *mm) 152 - { 153 - mm->context.asid = MM_CTXT_NO_ASID; 154 - } 155 133 156 134 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping 157 135 * for retiring-mm. However destroy_context( ) still needs to do that because
+8 -6
arch/arc/mm/tlb.c
··· 100 100 101 101 102 102 /* A copy of the ASID from the PID reg is kept in asid_cache */ 103 - unsigned int asid_cache = MM_CTXT_FIRST_CYCLE; 103 + DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; 104 104 105 105 /* 106 106 * Utility Routine to erase a J-TLB entry ··· 274 274 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 275 275 unsigned long end) 276 276 { 277 + const unsigned int cpu = smp_processor_id(); 277 278 unsigned long flags; 278 279 279 280 /* If range @start to @end is more than 32 TLB entries deep, ··· 298 297 299 298 local_irq_save(flags); 300 299 301 - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { 300 + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { 302 301 while (start < end) { 303 - tlb_entry_erase(start | hw_pid(vma->vm_mm)); 302 + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); 304 303 start += PAGE_SIZE; 305 304 } 306 305 } ··· 347 346 348 347 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) 349 348 { 349 + const unsigned int cpu = smp_processor_id(); 350 350 unsigned long flags; 351 351 352 352 /* Note that it is critical that interrupts are DISABLED between ··· 355 353 */ 356 354 local_irq_save(flags); 357 355 358 - if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) { 359 - tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm)); 356 + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { 357 + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); 360 358 utlb_invalidate(); 361 359 } 362 360 ··· 402 400 403 401 local_irq_save(flags); 404 402 405 - tlb_paranoid_check(vma->vm_mm->context.asid, address); 403 + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); 406 404 407 405 address &= PAGE_MASK; 408 406