Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[IA64] Use bitmaps for efficient context allocation/free

Corrects the very inefficent method of finding free context_ids in
get_mmu_context(). Instead of walking the task_list of all processes,
2 bitmaps are used to efficently store and lookup state, inuse and
needs flushing. The entire rid address space is now used before calling
wrap_mmu_context and global tlb flushing.

Special thanks to Ken and Rohit for their review and modifications in
using a bit flushmap.

Signed-off-by: Peter Keilty <peter.keilty@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by

Peter Keilty and committed by
Tony Luck
dcc17d1b f2c84c0e

+44 -33
+1
arch/ia64/kernel/setup.c
··· 454 454 #endif 455 455 456 456 cpu_init(); /* initialize the bootstrap CPU */ 457 + mmu_context_init(); /* initialize context_id bitmap */ 457 458 458 459 #ifdef CONFIG_ACPI 459 460 acpi_boot_init();
+27 -29
arch/ia64/mm/tlb.c
··· 8 8 * Modified RID allocation for SMP 9 9 * Goutham Rao <goutham.rao@intel.com> 10 10 * IPI based ptc implementation and A-step IPI implementation. 11 + * Rohit Seth <rohit.seth@intel.com> 12 + * Ken Chen <kenneth.w.chen@intel.com> 11 13 */ 12 14 #include <linux/config.h> 13 15 #include <linux/module.h> ··· 18 16 #include <linux/sched.h> 19 17 #include <linux/smp.h> 20 18 #include <linux/mm.h> 19 + #include <linux/bootmem.h> 21 20 22 21 #include <asm/delay.h> 23 22 #include <asm/mmu_context.h> 24 23 #include <asm/pgalloc.h> 25 24 #include <asm/pal.h> 26 25 #include <asm/tlbflush.h> 26 + #include <asm/dma.h> 27 27 28 28 static struct { 29 29 unsigned long mask; /* mask of supported purge page-sizes */ ··· 35 31 struct ia64_ctx ia64_ctx = { 36 32 .lock = SPIN_LOCK_UNLOCKED, 37 33 .next = 1, 38 - .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ 39 34 .max_ctx = ~0U 40 35 }; 41 36 42 37 DEFINE_PER_CPU(u8, ia64_need_tlb_flush); 38 + 39 + /* 40 + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. 41 + * Called after cpu_init() has setup ia64_ctx.max_ctx based on 42 + * maximum RID that is supported by boot CPU. 43 + */ 44 + void __init 45 + mmu_context_init (void) 46 + { 47 + ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); 48 + ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); 49 + } 43 50 44 51 /* 45 52 * Acquire the ia64_ctx.lock before calling this function! ··· 58 43 void 59 44 wrap_mmu_context (struct mm_struct *mm) 60 45 { 61 - unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; 62 - struct task_struct *tsk; 63 46 int i; 47 + unsigned long flush_bit; 64 48 65 - if (ia64_ctx.next > max_ctx) 66 - ia64_ctx.next = 300; /* skip daemons */ 67 - ia64_ctx.limit = max_ctx + 1; 68 - 69 - /* 70 - * Scan all the task's mm->context and set proper safe range 71 - */ 72 - 73 - read_lock(&tasklist_lock); 74 - repeat: 75 - for_each_process(tsk) { 76 - if (!tsk->mm) 77 - continue; 78 - tsk_context = tsk->mm->context; 79 - if (tsk_context == ia64_ctx.next) { 80 - if (++ia64_ctx.next >= ia64_ctx.limit) { 81 - /* empty range: reset the range limit and start over */ 82 - if (ia64_ctx.next > max_ctx) 83 - ia64_ctx.next = 300; 84 - ia64_ctx.limit = max_ctx + 1; 85 - goto repeat; 86 - } 87 - } 88 - if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) 89 - ia64_ctx.limit = tsk_context; 49 + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { 50 + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); 51 + ia64_ctx.bitmap[i] ^= flush_bit; 90 52 } 91 - read_unlock(&tasklist_lock); 53 + 54 + /* use offset at 300 to skip daemons */ 55 + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, 56 + ia64_ctx.max_ctx, 300); 57 + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, 58 + ia64_ctx.max_ctx, ia64_ctx.next); 59 + 92 60 /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ 93 61 { 94 62 int cpu = get_cpu(); /* prevent preemption/migration */
+15 -4
include/asm-ia64/mmu_context.h
··· 32 32 struct ia64_ctx { 33 33 spinlock_t lock; 34 34 unsigned int next; /* next context number to use */ 35 - unsigned int limit; /* next >= limit => must call wrap_mmu_context() */ 36 - unsigned int max_ctx; /* max. context value supported by all CPUs */ 35 + unsigned int limit; /* available free range */ 36 + unsigned int max_ctx; /* max. context value supported by all CPUs */ 37 + /* call wrap_mmu_context when next >= max */ 38 + unsigned long *bitmap; /* bitmap size is max_ctx+1 */ 39 + unsigned long *flushmap;/* pending rid to be flushed */ 37 40 }; 38 41 39 42 extern struct ia64_ctx ia64_ctx; 40 43 DECLARE_PER_CPU(u8, ia64_need_tlb_flush); 41 44 45 + extern void mmu_context_init (void); 42 46 extern void wrap_mmu_context (struct mm_struct *mm); 43 47 44 48 static inline void ··· 87 83 context = mm->context; 88 84 if (context == 0) { 89 85 cpus_clear(mm->cpu_vm_mask); 90 - if (ia64_ctx.next >= ia64_ctx.limit) 91 - wrap_mmu_context(mm); 86 + if (ia64_ctx.next >= ia64_ctx.limit) { 87 + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, 88 + ia64_ctx.max_ctx, ia64_ctx.next); 89 + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, 90 + ia64_ctx.max_ctx, ia64_ctx.next); 91 + if (ia64_ctx.next >= ia64_ctx.max_ctx) 92 + wrap_mmu_context(mm); 93 + } 92 94 mm->context = context = ia64_ctx.next++; 95 + __set_bit(context, ia64_ctx.bitmap); 93 96 } 94 97 } 95 98 spin_unlock_irqrestore(&ia64_ctx.lock, flags);
+1
include/asm-ia64/tlbflush.h
··· 51 51 if (!mm) 52 52 return; 53 53 54 + set_bit(mm->context, ia64_ctx.flushmap); 54 55 mm->context = 0; 55 56 56 57 if (atomic_read(&mm->mm_users) == 0)