Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kernel: use lockless list for smp_call_function_single

Make smp_call_function_single and friends more efficient by using a
lockless list.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Christoph Hellwig and committed by
Linus Torvalds
6897fc22 0c692d07

+19 -43
+1 -4
include/linux/blkdev.h
··· 95 95 * as well! 96 96 */ 97 97 struct request { 98 - union { 99 - struct list_head queuelist; 100 - struct llist_node ll_list; 101 - }; 98 + struct list_head queuelist; 102 99 union { 103 100 struct call_single_data csd; 104 101 struct work_struct mq_flush_data;
+5 -1
include/linux/smp.h
··· 11 11 #include <linux/list.h> 12 12 #include <linux/cpumask.h> 13 13 #include <linux/init.h> 14 + #include <linux/llist.h> 14 15 15 16 extern void cpu_idle(void); 16 17 17 18 typedef void (*smp_call_func_t)(void *info); 18 19 struct call_single_data { 19 - struct list_head list; 20 + union { 21 + struct list_head list; 22 + struct llist_node llist; 23 + }; 20 24 smp_call_func_t func; 21 25 void *info; 22 26 u16 flags;
+13 -38
kernel/smp.c
··· 28 28 29 29 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); 30 30 31 - struct call_single_queue { 32 - struct list_head list; 33 - raw_spinlock_t lock; 34 - }; 35 - 36 - static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue); 31 + static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); 37 32 38 33 static int 39 34 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) ··· 80 85 void *cpu = (void *)(long)smp_processor_id(); 81 86 int i; 82 87 83 - for_each_possible_cpu(i) { 84 - struct call_single_queue *q = &per_cpu(call_single_queue, i); 85 - 86 - raw_spin_lock_init(&q->lock); 87 - INIT_LIST_HEAD(&q->list); 88 - } 88 + for_each_possible_cpu(i) 89 + init_llist_head(&per_cpu(call_single_queue, i)); 89 90 90 91 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); 91 92 register_cpu_notifier(&hotplug_cfd_notifier); ··· 132 141 */ 133 142 static void generic_exec_single(int cpu, struct call_single_data *csd, int wait) 134 143 { 135 - struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 136 - unsigned long flags; 137 - int ipi; 138 - 139 144 if (wait) 140 145 csd->flags |= CSD_FLAG_WAIT; 141 - 142 - raw_spin_lock_irqsave(&dst->lock, flags); 143 - ipi = list_empty(&dst->list); 144 - list_add_tail(&csd->list, &dst->list); 145 - raw_spin_unlock_irqrestore(&dst->lock, flags); 146 146 147 147 /* 148 148 * The list addition should be visible before sending the IPI ··· 146 164 * locking and barrier primitives. Generic code isn't really 147 165 * equipped to do the right thing... 148 166 */ 149 - if (ipi) 167 + if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 150 168 arch_send_call_function_single_ipi(cpu); 151 169 152 170 if (wait) ··· 159 177 */ 160 178 void generic_smp_call_function_single_interrupt(void) 161 179 { 162 - struct call_single_queue *q = &__get_cpu_var(call_single_queue); 163 - LIST_HEAD(list); 180 + struct llist_node *entry, *next; 164 181 165 182 /* 166 183 * Shouldn't receive this interrupt on a cpu that is not yet online. 167 184 */ 168 185 WARN_ON_ONCE(!cpu_online(smp_processor_id())); 169 186 170 - raw_spin_lock(&q->lock); 171 - list_replace_init(&q->list, &list); 172 - raw_spin_unlock(&q->lock); 187 + entry = llist_del_all(&__get_cpu_var(call_single_queue)); 188 + entry = llist_reverse_order(entry); 173 189 174 - while (!list_empty(&list)) { 190 + while (entry) { 175 191 struct call_single_data *csd; 176 192 177 - csd = list_entry(list.next, struct call_single_data, list); 178 - list_del(&csd->list); 193 + next = entry->next; 179 194 195 + csd = llist_entry(entry, struct call_single_data, llist); 180 196 csd->func(csd->info); 181 - 182 197 csd_unlock(csd); 198 + 199 + entry = next; 183 200 } 184 201 } 185 202 ··· 392 411 393 412 for_each_cpu(cpu, cfd->cpumask) { 394 413 struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); 395 - struct call_single_queue *dst = 396 - &per_cpu(call_single_queue, cpu); 397 - unsigned long flags; 398 414 399 415 csd_lock(csd); 400 416 csd->func = func; 401 417 csd->info = info; 402 - 403 - raw_spin_lock_irqsave(&dst->lock, flags); 404 - list_add_tail(&csd->list, &dst->list); 405 - raw_spin_unlock_irqrestore(&dst->lock, flags); 418 + llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)); 406 419 } 407 420 408 421 /* Send a message to all CPUs in the map */