Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

smp: Cleanup smp_call_function*()

Get rid of the __call_single_node union and cleanup the API a little
to avoid external code relying on the structure layout as much.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>

+61 -95
+3 -2
arch/mips/kernel/process.c
··· 702 702 return sp & ALMASK; 703 703 } 704 704 705 - static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); 706 705 static struct cpumask backtrace_csd_busy; 707 706 708 707 static void handle_backtrace(void *info) ··· 709 710 nmi_cpu_backtrace(get_irq_regs()); 710 711 cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); 711 712 } 713 + 714 + static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = 715 + CSD_INIT(handle_backtrace, NULL); 712 716 713 717 static void raise_backtrace(cpumask_t *mask) 714 718 { ··· 732 730 } 733 731 734 732 csd = &per_cpu(backtrace_csd, cpu); 735 - csd->func = handle_backtrace; 736 733 smp_call_function_single_async(cpu, csd); 737 734 } 738 735 }
+7 -20
arch/mips/kernel/smp.c
··· 687 687 688 688 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 689 689 690 - static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd); 690 + static void tick_broadcast_callee(void *info) 691 + { 692 + tick_receive_broadcast(); 693 + } 694 + 695 + static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) = 696 + CSD_INIT(tick_broadcast_callee, NULL); 691 697 692 698 void tick_broadcast(const struct cpumask *mask) 693 699 { ··· 705 699 smp_call_function_single_async(cpu, csd); 706 700 } 707 701 } 708 - 709 - static void tick_broadcast_callee(void *info) 710 - { 711 - tick_receive_broadcast(); 712 - } 713 - 714 - static int __init tick_broadcast_init(void) 715 - { 716 - call_single_data_t *csd; 717 - int cpu; 718 - 719 - for (cpu = 0; cpu < NR_CPUS; cpu++) { 720 - csd = &per_cpu(tick_broadcast_csd, cpu); 721 - csd->func = tick_broadcast_callee; 722 - } 723 - 724 - return 0; 725 - } 726 - early_initcall(tick_broadcast_init); 727 702 728 703 #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
+1 -3
arch/s390/pci/pci_irq.c
··· 178 178 if (atomic_inc_return(&cpu_data->scheduled) > 1) 179 179 continue; 180 180 181 - cpu_data->csd.func = zpci_handle_remote_irq; 182 - cpu_data->csd.info = &cpu_data->scheduled; 183 - cpu_data->csd.flags = 0; 181 + INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled); 184 182 smp_call_function_single_async(cpu, &cpu_data->csd); 185 183 } 186 184 }
+3 -4
arch/x86/kernel/cpuid.c
··· 74 74 75 75 init_completion(&cmd.done); 76 76 for (; count; count -= 16) { 77 - call_single_data_t csd = { 78 - .func = cpuid_smp_cpuid, 79 - .info = &cmd, 80 - }; 77 + call_single_data_t csd; 78 + 79 + INIT_CSD(&csd, cpuid_smp_cpuid, &cmd); 81 80 82 81 cmd.regs.eax = pos; 83 82 cmd.regs.ecx = pos >> 32;
+3 -4
arch/x86/lib/msr-smp.c
··· 169 169 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) 170 170 { 171 171 struct msr_info_completion rv; 172 - call_single_data_t csd = { 173 - .func = __rdmsr_safe_on_cpu, 174 - .info = &rv, 175 - }; 172 + call_single_data_t csd; 176 173 int err; 174 + 175 + INIT_CSD(&csd, __rdmsr_safe_on_cpu, &rv); 177 176 178 177 memset(&rv, 0, sizeof(rv)); 179 178 init_completion(&rv.done);
+1 -3
block/blk-mq.c
··· 671 671 return false; 672 672 673 673 if (blk_mq_complete_need_ipi(rq)) { 674 - rq->csd.func = __blk_mq_complete_request_remote; 675 - rq->csd.info = rq; 676 - rq->csd.flags = 0; 674 + INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); 677 675 smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); 678 676 } else { 679 677 if (rq->q->nr_hw_queues > 1)
+1 -2
drivers/cpuidle/coupled.c
··· 674 674 coupled->refcnt++; 675 675 676 676 csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); 677 - csd->func = cpuidle_coupled_handle_poke; 678 - csd->info = (void *)(unsigned long)dev->cpu; 677 + INIT_CSD(csd, cpuidle_coupled_handle_poke, (void *)(unsigned long)dev->cpu); 679 678 680 679 return 0; 681 680 }
+2 -7
drivers/net/ethernet/cavium/liquidio/lio_core.c
··· 729 729 droq->cpu_id == this_cpu) { 730 730 napi_schedule_irqoff(&droq->napi); 731 731 } else { 732 - call_single_data_t *csd = &droq->csd; 733 - 734 - csd->func = napi_schedule_wrapper; 735 - csd->info = &droq->napi; 736 - csd->flags = 0; 737 - 738 - smp_call_function_single_async(droq->cpu_id, csd); 732 + INIT_CSD(&droq->csd, napi_schedule_wrapper, &droq->napi); 733 + smp_call_function_single_async(droq->cpu_id, &droq->csd); 739 734 } 740 735 } 741 736
+9 -10
include/linux/smp.h
··· 21 21 * structure shares (partial) layout with struct irq_work 22 22 */ 23 23 struct __call_single_data { 24 - union { 25 - struct __call_single_node node; 26 - struct { 27 - struct llist_node llist; 28 - unsigned int flags; 29 - #ifdef CONFIG_64BIT 30 - u16 src, dst; 31 - #endif 32 - }; 33 - }; 24 + struct __call_single_node node; 34 25 smp_call_func_t func; 35 26 void *info; 36 27 }; 37 28 29 + #define CSD_INIT(_func, _info) \ 30 + (struct __call_single_data){ .func = (_func), .info = (_info), } 31 + 38 32 /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ 39 33 typedef struct __call_single_data call_single_data_t 40 34 __aligned(sizeof(struct __call_single_data)); 35 + 36 + #define INIT_CSD(_csd, _func, _info) \ 37 + do { \ 38 + *(_csd) = CSD_INIT((_func), (_info)); \ 39 + } while (0) 41 40 42 41 /* 43 42 * Enqueue a llist_node on the call_single_queue; be very careful, read
+3 -3
kernel/debug/debug_core.c
··· 225 225 * Default (weak) implementation for kgdb_roundup_cpus 226 226 */ 227 227 228 - static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd); 229 - 230 228 void __weak kgdb_call_nmi_hook(void *ignored) 231 229 { 232 230 /* ··· 238 240 kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); 239 241 } 240 242 NOKPROBE_SYMBOL(kgdb_call_nmi_hook); 243 + 244 + static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) = 245 + CSD_INIT(kgdb_call_nmi_hook, NULL); 241 246 242 247 void __weak kgdb_roundup_cpus(void) 243 248 { ··· 268 267 continue; 269 268 kgdb_info[cpu].rounding_up = true; 270 269 271 - csd->func = kgdb_call_nmi_hook; 272 270 ret = smp_call_function_single_async(cpu, csd); 273 271 if (ret) 274 272 kgdb_info[cpu].rounding_up = false;
+2 -10
kernel/sched/core.c
··· 320 320 update_rq_clock_task(rq, delta); 321 321 } 322 322 323 - static inline void 324 - rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func) 325 - { 326 - csd->flags = 0; 327 - csd->func = func; 328 - csd->info = rq; 329 - } 330 - 331 323 #ifdef CONFIG_SCHED_HRTICK 332 324 /* 333 325 * Use HR-timers to deliver accurate preemption points. ··· 420 428 static void hrtick_rq_init(struct rq *rq) 421 429 { 422 430 #ifdef CONFIG_SMP 423 - rq_csd_init(rq, &rq->hrtick_csd, __hrtick_start); 431 + INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq); 424 432 #endif 425 433 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 426 434 rq->hrtick_timer.function = hrtick; ··· 7766 7774 rq->last_blocked_load_update_tick = jiffies; 7767 7775 atomic_set(&rq->nohz_flags, 0); 7768 7776 7769 - rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); 7777 + INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq); 7770 7778 #endif 7771 7779 #ifdef CONFIG_HOTPLUG_CPU 7772 7780 rcuwait_init(&rq->hotplug_wait);
+25 -25
kernel/smp.c
··· 27 27 #include "smpboot.h" 28 28 #include "sched/smp.h" 29 29 30 - #define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK) 30 + #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) 31 31 32 32 struct call_function_data { 33 33 call_single_data_t __percpu *csd; ··· 146 146 bool firsttime; 147 147 u64 ts2, ts_delta; 148 148 call_single_data_t *cpu_cur_csd; 149 - unsigned int flags = READ_ONCE(csd->flags); 149 + unsigned int flags = READ_ONCE(csd->node.u_flags); 150 150 151 151 if (!(flags & CSD_FLAG_LOCK)) { 152 152 if (!unlikely(*bug_id)) ··· 224 224 225 225 static __always_inline void csd_lock_wait(call_single_data_t *csd) 226 226 { 227 - smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 227 + smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); 228 228 } 229 229 #endif 230 230 231 231 static __always_inline void csd_lock(call_single_data_t *csd) 232 232 { 233 233 csd_lock_wait(csd); 234 - csd->flags |= CSD_FLAG_LOCK; 234 + csd->node.u_flags |= CSD_FLAG_LOCK; 235 235 236 236 /* 237 237 * prevent CPU from reordering the above assignment ··· 243 243 244 244 static __always_inline void csd_unlock(call_single_data_t *csd) 245 245 { 246 - WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); 246 + WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK)); 247 247 248 248 /* 249 249 * ensure we're all done before releasing data: 250 250 */ 251 - smp_store_release(&csd->flags, 0); 251 + smp_store_release(&csd->node.u_flags, 0); 252 252 } 253 253 254 254 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); ··· 300 300 return -ENXIO; 301 301 } 302 302 303 - __smp_call_single_queue(cpu, &csd->llist); 303 + __smp_call_single_queue(cpu, &csd->node.llist); 304 304 305 305 return 0; 306 306 } ··· 353 353 * We don't have to use the _safe() variant here 354 354 * because we are not invoking the IPI handlers yet. 355 355 */ 356 - llist_for_each_entry(csd, entry, llist) { 356 + llist_for_each_entry(csd, entry, node.llist) { 357 357 switch (CSD_TYPE(csd)) { 358 358 case CSD_TYPE_ASYNC: 359 359 case CSD_TYPE_SYNC: ··· 378 378 * First; run all SYNC callbacks, people are waiting for us. 379 379 */ 380 380 prev = NULL; 381 - llist_for_each_entry_safe(csd, csd_next, entry, llist) { 381 + llist_for_each_entry_safe(csd, csd_next, entry, node.llist) { 382 382 /* Do we wait until *after* callback? */ 383 383 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) { 384 384 smp_call_func_t func = csd->func; 385 385 void *info = csd->info; 386 386 387 387 if (prev) { 388 - prev->next = &csd_next->llist; 388 + prev->next = &csd_next->node.llist; 389 389 } else { 390 - entry = &csd_next->llist; 390 + entry = &csd_next->node.llist; 391 391 } 392 392 393 393 csd_lock_record(csd); ··· 395 395 csd_unlock(csd); 396 396 csd_lock_record(NULL); 397 397 } else { 398 - prev = &csd->llist; 398 + prev = &csd->node.llist; 399 399 } 400 400 } 401 401 ··· 406 406 * Second; run all !SYNC callbacks. 407 407 */ 408 408 prev = NULL; 409 - llist_for_each_entry_safe(csd, csd_next, entry, llist) { 409 + llist_for_each_entry_safe(csd, csd_next, entry, node.llist) { 410 410 int type = CSD_TYPE(csd); 411 411 412 412 if (type != CSD_TYPE_TTWU) { 413 413 if (prev) { 414 - prev->next = &csd_next->llist; 414 + prev->next = &csd_next->node.llist; 415 415 } else { 416 - entry = &csd_next->llist; 416 + entry = &csd_next->node.llist; 417 417 } 418 418 419 419 if (type == CSD_TYPE_ASYNC) { ··· 429 429 } 430 430 431 431 } else { 432 - prev = &csd->llist; 432 + prev = &csd->node.llist; 433 433 } 434 434 } 435 435 ··· 465 465 { 466 466 call_single_data_t *csd; 467 467 call_single_data_t csd_stack = { 468 - .flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, 468 + .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, }, 469 469 }; 470 470 int this_cpu; 471 471 int err; ··· 502 502 csd->func = func; 503 503 csd->info = info; 504 504 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 505 - csd->src = smp_processor_id(); 506 - csd->dst = cpu; 505 + csd->node.src = smp_processor_id(); 506 + csd->node.dst = cpu; 507 507 #endif 508 508 509 509 err = generic_exec_single(cpu, csd); ··· 544 544 545 545 preempt_disable(); 546 546 547 - if (csd->flags & CSD_FLAG_LOCK) { 547 + if (csd->node.u_flags & CSD_FLAG_LOCK) { 548 548 err = -EBUSY; 549 549 goto out; 550 550 } 551 551 552 - csd->flags = CSD_FLAG_LOCK; 552 + csd->node.u_flags = CSD_FLAG_LOCK; 553 553 smp_wmb(); 554 554 555 555 err = generic_exec_single(cpu, csd); ··· 667 667 668 668 csd_lock(csd); 669 669 if (wait) 670 - csd->flags |= CSD_TYPE_SYNC; 670 + csd->node.u_flags |= CSD_TYPE_SYNC; 671 671 csd->func = func; 672 672 csd->info = info; 673 673 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 674 - csd->src = smp_processor_id(); 675 - csd->dst = cpu; 674 + csd->node.src = smp_processor_id(); 675 + csd->node.dst = cpu; 676 676 #endif 677 - if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) 677 + if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) 678 678 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 679 679 } 680 680
+1 -2
net/core/dev.c
··· 11165 11165 INIT_LIST_HEAD(&sd->poll_list); 11166 11166 sd->output_queue_tailp = &sd->output_queue; 11167 11167 #ifdef CONFIG_RPS 11168 - sd->csd.func = rps_trigger_softirq; 11169 - sd->csd.info = sd; 11168 + INIT_CSD(&sd->csd, rps_trigger_softirq, sd); 11170 11169 sd->cpu = i; 11171 11170 #endif 11172 11171