Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'smp-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull SMP cross-CPU function-call updates from Ingo Molnar:

- Remove diagnostics and adjust config for CSD lock diagnostics

- Add a generic IPI-sending tracepoint, as currently there's no easy
way to instrument IPI origins: it's arch dependent and for some major
architectures it's not even consistently available.

* tag 'smp-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
trace,smp: Trace all smp_function_call*() invocations
trace: Add trace_ipi_send_cpu()
sched, smp: Trace smp callback causing an IPI
smp: reword smp call IPI comment
treewide: Trace IPIs sent via smp_send_reschedule()
irq_work: Trace self-IPIs sent via arch_irq_work_raise()
smp: Trace IPIs sent via arch_send_call_function_ipi_mask()
sched, smp: Trace IPIs sent via send_call_function_single_ipi()
trace: Add trace_ipi_send_cpumask()
kernel/smp: Make csdlock_debug= resettable
locking/csd_lock: Remove per-CPU data indirection from CSD lock debugging
locking/csd_lock: Remove added data from CSD lock debugging
locking/csd_lock: Add Kconfig option for csd_debug default

+216 -280
+8 -9
Documentation/admin-guide/kernel-parameters.txt
··· 912 912 cs89x0_media= [HW,NET] 913 913 Format: { rj45 | aui | bnc } 914 914 915 - csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call 916 - handling. When switched on, additional debug data is 917 - printed to the console in case a hanging CPU is 918 - detected, and that CPU is pinged again in order to try 919 - to resolve the hang situation. 920 - 0: disable csdlock debugging (default) 921 - 1: enable basic csdlock debugging (minor impact) 922 - ext: enable extended csdlock debugging (more impact, 923 - but more data) 915 + csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU 916 + function call handling. When switched on, 917 + additional debug data is printed to the console 918 + in case a hanging CPU is detected, and that 919 + CPU is pinged again in order to try to resolve 920 + the hang situation. The default value of this 921 + option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT 922 + Kconfig option. 924 923 925 924 dasd= [HW,NET] 926 925 See header of drivers/s390/block/dasd_devmap.c.
+1 -1
arch/alpha/kernel/smp.c
··· 562 562 } 563 563 564 564 void 565 - smp_send_reschedule(int cpu) 565 + arch_smp_send_reschedule(int cpu) 566 566 { 567 567 #ifdef DEBUG_IPI_MSG 568 568 if (cpu == hard_smp_processor_id())
+1 -1
arch/arc/kernel/smp.c
··· 292 292 ipi_send_msg_one(cpu, msg); 293 293 } 294 294 295 - void smp_send_reschedule(int cpu) 295 + void arch_smp_send_reschedule(int cpu) 296 296 { 297 297 ipi_send_msg_one(cpu, IPI_RESCHEDULE); 298 298 }
+1 -2
arch/arm/kernel/smp.c
··· 48 48 #include <asm/mach/arch.h> 49 49 #include <asm/mpu.h> 50 50 51 - #define CREATE_TRACE_POINTS 52 51 #include <trace/events/ipi.h> 53 52 54 53 /* ··· 748 749 ipi_setup(smp_processor_id()); 749 750 } 750 751 751 - void smp_send_reschedule(int cpu) 752 + void arch_smp_send_reschedule(int cpu) 752 753 { 753 754 smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); 754 755 }
+2
arch/arm/mach-actions/platsmp.c
··· 20 20 #include <asm/smp_plat.h> 21 21 #include <asm/smp_scu.h> 22 22 23 + #include <trace/events/ipi.h> 24 + 23 25 #define OWL_CPU1_ADDR 0x50 24 26 #define OWL_CPU1_FLAG 0x5c 25 27
+1 -2
arch/arm64/kernel/smp.c
··· 51 51 #include <asm/ptrace.h> 52 52 #include <asm/virt.h> 53 53 54 - #define CREATE_TRACE_POINTS 55 54 #include <trace/events/ipi.h> 56 55 57 56 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); ··· 978 979 ipi_setup(smp_processor_id()); 979 980 } 980 981 981 - void smp_send_reschedule(int cpu) 982 + void arch_smp_send_reschedule(int cpu) 982 983 { 983 984 smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); 984 985 }
+1 -1
arch/csky/kernel/smp.c
··· 140 140 on_each_cpu(ipi_stop, NULL, 1); 141 141 } 142 142 143 - void smp_send_reschedule(int cpu) 143 + void arch_smp_send_reschedule(int cpu) 144 144 { 145 145 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); 146 146 }
+1 -1
arch/hexagon/kernel/smp.c
··· 217 217 } 218 218 } 219 219 220 - void smp_send_reschedule(int cpu) 220 + void arch_smp_send_reschedule(int cpu) 221 221 { 222 222 send_ipi(cpumask_of(cpu), IPI_RESCHEDULE); 223 223 }
+2 -2
arch/ia64/kernel/smp.c
··· 220 220 * Called with preemption disabled. 221 221 */ 222 222 void 223 - smp_send_reschedule (int cpu) 223 + arch_smp_send_reschedule (int cpu) 224 224 { 225 225 ia64_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); 226 226 } 227 - EXPORT_SYMBOL_GPL(smp_send_reschedule); 227 + EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); 228 228 229 229 /* 230 230 * Called with preemption disabled.
+2 -2
arch/loongarch/kernel/smp.c
··· 155 155 * it goes straight through and wastes no time serializing 156 156 * anything. Worst case is that we lose a reschedule ... 157 157 */ 158 - void smp_send_reschedule(int cpu) 158 + void arch_smp_send_reschedule(int cpu) 159 159 { 160 160 loongson_send_ipi_single(cpu, SMP_RESCHEDULE); 161 161 } 162 - EXPORT_SYMBOL_GPL(smp_send_reschedule); 162 + EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); 163 163 164 164 irqreturn_t loongson_ipi_interrupt(int irq, void *dev) 165 165 {
+1 -1
arch/mips/include/asm/smp.h
··· 66 66 * it goes straight through and wastes no time serializing 67 67 * anything. Worst case is that we lose a reschedule ... 68 68 */ 69 - static inline void smp_send_reschedule(int cpu) 69 + static inline void arch_smp_send_reschedule(int cpu) 70 70 { 71 71 extern const struct plat_smp_ops *mp_ops; /* private */ 72 72
+1 -1
arch/openrisc/kernel/smp.c
··· 173 173 } 174 174 } 175 175 176 - void smp_send_reschedule(int cpu) 176 + void arch_smp_send_reschedule(int cpu) 177 177 { 178 178 smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); 179 179 }
+2 -2
arch/parisc/kernel/smp.c
··· 246 246 inline void 247 247 smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } 248 248 249 - void 250 - smp_send_reschedule(int cpu) { send_IPI_single(cpu, IPI_RESCHEDULE); } 249 + void 250 + arch_smp_send_reschedule(int cpu) { send_IPI_single(cpu, IPI_RESCHEDULE); } 251 251 252 252 void 253 253 smp_send_all_nop(void)
+4 -2
arch/powerpc/kernel/smp.c
··· 61 61 #include <asm/kup.h> 62 62 #include <asm/fadump.h> 63 63 64 + #include <trace/events/ipi.h> 65 + 64 66 #ifdef DEBUG 65 67 #include <asm/udbg.h> 66 68 #define DBG(fmt...) udbg_printf(fmt) ··· 366 364 #endif 367 365 } 368 366 369 - void smp_send_reschedule(int cpu) 367 + void arch_smp_send_reschedule(int cpu) 370 368 { 371 369 if (likely(smp_ops)) 372 370 do_message_pass(cpu, PPC_MSG_RESCHEDULE); 373 371 } 374 - EXPORT_SYMBOL_GPL(smp_send_reschedule); 372 + EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); 375 373 376 374 void arch_send_call_function_single_ipi(int cpu) 377 375 {
+3
arch/powerpc/kvm/book3s_hv.c
··· 43 43 #include <linux/compiler.h> 44 44 #include <linux/of.h> 45 45 #include <linux/irqdomain.h> 46 + #include <linux/smp.h> 46 47 47 48 #include <asm/ftrace.h> 48 49 #include <asm/reg.h> ··· 80 79 #include <asm/ultravisor.h> 81 80 #include <asm/dtl.h> 82 81 #include <asm/plpar_wrappers.h> 82 + 83 + #include <trace/events/ipi.h> 83 84 84 85 #include "book3s.h" 85 86 #include "book3s_hv.h"
+2
arch/powerpc/platforms/powernv/subcore.c
··· 20 20 #include <asm/opal.h> 21 21 #include <asm/smp.h> 22 22 23 + #include <trace/events/ipi.h> 24 + 23 25 #include "subcore.h" 24 26 #include "powernv.h" 25 27
+2 -2
arch/riscv/kernel/smp.c
··· 333 333 } 334 334 #endif 335 335 336 - void smp_send_reschedule(int cpu) 336 + void arch_smp_send_reschedule(int cpu) 337 337 { 338 338 send_ipi_single(cpu, IPI_RESCHEDULE); 339 339 } 340 - EXPORT_SYMBOL_GPL(smp_send_reschedule); 340 + EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
+1 -1
arch/s390/kernel/smp.c
··· 553 553 * it goes straight through and wastes no time serializing 554 554 * anything. Worst case is that we lose a reschedule ... 555 555 */ 556 - void smp_send_reschedule(int cpu) 556 + void arch_smp_send_reschedule(int cpu) 557 557 { 558 558 pcpu_ec_call(pcpu_devices + cpu, ec_schedule); 559 559 }
+1 -1
arch/sh/kernel/smp.c
··· 256 256 (bogosum / (5000/HZ)) % 100); 257 257 } 258 258 259 - void smp_send_reschedule(int cpu) 259 + void arch_smp_send_reschedule(int cpu) 260 260 { 261 261 mp_ops->send_ipi(cpu, SMP_MSG_RESCHEDULE); 262 262 }
+1 -1
arch/sparc/kernel/smp_32.c
··· 120 120 121 121 struct linux_prom_registers smp_penguin_ctable = { 0 }; 122 122 123 - void smp_send_reschedule(int cpu) 123 + void arch_smp_send_reschedule(int cpu) 124 124 { 125 125 /* 126 126 * CPU model dependent way of implementing IPI generation targeting
+1 -1
arch/sparc/kernel/smp_64.c
··· 1430 1430 return hv_err; 1431 1431 } 1432 1432 1433 - void smp_send_reschedule(int cpu) 1433 + void arch_smp_send_reschedule(int cpu) 1434 1434 { 1435 1435 if (cpu == smp_processor_id()) { 1436 1436 WARN_ON_ONCE(preemptible());
+1 -1
arch/x86/include/asm/smp.h
··· 99 99 BUG(); 100 100 } 101 101 102 - static inline void smp_send_reschedule(int cpu) 102 + static inline void arch_smp_send_reschedule(int cpu) 103 103 { 104 104 smp_ops.smp_send_reschedule(cpu); 105 105 }
+4
arch/x86/kvm/svm/svm.c
··· 27 27 #include <linux/swap.h> 28 28 #include <linux/rwsem.h> 29 29 #include <linux/cc_platform.h> 30 + #include <linux/smp.h> 30 31 31 32 #include <asm/apic.h> 32 33 #include <asm/perf_event.h> ··· 42 41 #include <asm/fpu/api.h> 43 42 44 43 #include <asm/virtext.h> 44 + 45 + #include <trace/events/ipi.h> 46 + 45 47 #include "trace.h" 46 48 47 49 #include "svm.h"
+2
arch/x86/kvm/x86.c
··· 60 60 #include <linux/mem_encrypt.h> 61 61 #include <linux/entry-kvm.h> 62 62 #include <linux/suspend.h> 63 + #include <linux/smp.h> 63 64 65 + #include <trace/events/ipi.h> 64 66 #include <trace/events/kvm.h> 65 67 66 68 #include <asm/debugreg.h>
+1 -1
arch/xtensa/kernel/smp.c
··· 391 391 send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC); 392 392 } 393 393 394 - void smp_send_reschedule(int cpu) 394 + void arch_smp_send_reschedule(int cpu) 395 395 { 396 396 send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); 397 397 }
+9 -2
include/linux/smp.h
··· 125 125 /* 126 126 * sends a 'reschedule' event to another CPU: 127 127 */ 128 - extern void smp_send_reschedule(int cpu); 129 - 128 + extern void arch_smp_send_reschedule(int cpu); 129 + /* 130 + * scheduler_ipi() is inline so can't be passed as callback reason, but the 131 + * callsite IP should be sufficient for root-causing IPIs sent from here. 132 + */ 133 + #define smp_send_reschedule(cpu) ({ \ 134 + trace_ipi_send_cpu(cpu, _RET_IP_, NULL); \ 135 + arch_smp_send_reschedule(cpu); \ 136 + }) 130 137 131 138 /* 132 139 * Prepare machine for booting other CPUs.
+44
include/trace/events/ipi.h
··· 35 35 TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) 36 36 ); 37 37 38 + TRACE_EVENT(ipi_send_cpu, 39 + 40 + TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), 41 + 42 + TP_ARGS(cpu, callsite, callback), 43 + 44 + TP_STRUCT__entry( 45 + __field(unsigned int, cpu) 46 + __field(void *, callsite) 47 + __field(void *, callback) 48 + ), 49 + 50 + TP_fast_assign( 51 + __entry->cpu = cpu; 52 + __entry->callsite = (void *)callsite; 53 + __entry->callback = callback; 54 + ), 55 + 56 + TP_printk("cpu=%u callsite=%pS callback=%pS", 57 + __entry->cpu, __entry->callsite, __entry->callback) 58 + ); 59 + 60 + TRACE_EVENT(ipi_send_cpumask, 61 + 62 + TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), 63 + 64 + TP_ARGS(cpumask, callsite, callback), 65 + 66 + TP_STRUCT__entry( 67 + __cpumask(cpumask) 68 + __field(void *, callsite) 69 + __field(void *, callback) 70 + ), 71 + 72 + TP_fast_assign( 73 + __assign_cpumask(cpumask, cpumask_bits(cpumask)); 74 + __entry->callsite = (void *)callsite; 75 + __entry->callback = callback; 76 + ), 77 + 78 + TP_printk("cpumask=%s callsite=%pS callback=%pS", 79 + __get_cpumask(cpumask), __entry->callsite, __entry->callback) 80 + ); 81 + 38 82 DECLARE_EVENT_CLASS(ipi_handler, 39 83 40 84 TP_PROTO(const char *reason),
+11 -1
kernel/irq_work.c
··· 22 22 #include <asm/processor.h> 23 23 #include <linux/kasan.h> 24 24 25 + #include <trace/events/ipi.h> 26 + 25 27 static DEFINE_PER_CPU(struct llist_head, raised_list); 26 28 static DEFINE_PER_CPU(struct llist_head, lazy_list); 27 29 static DEFINE_PER_CPU(struct task_struct *, irq_workd); ··· 76 74 */ 77 75 } 78 76 77 + static __always_inline void irq_work_raise(struct irq_work *work) 78 + { 79 + if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt()) 80 + trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func); 81 + 82 + arch_irq_work_raise(); 83 + } 84 + 79 85 /* Enqueue on current CPU, work must already be claimed and preempt disabled */ 80 86 static void __irq_work_queue_local(struct irq_work *work) 81 87 { ··· 109 99 110 100 /* If the work is "lazy", handle it from next tick if any */ 111 101 if (!lazy_work || tick_nohz_tick_stopped()) 112 - arch_irq_work_raise(); 102 + irq_work_raise(work); 113 103 } 114 104 115 105 /* Enqueue the irq work @work on the current CPU */
+16 -6
kernel/sched/core.c
··· 80 80 #define CREATE_TRACE_POINTS 81 81 #include <linux/sched/rseq_api.h> 82 82 #include <trace/events/sched.h> 83 + #include <trace/events/ipi.h> 83 84 #undef CREATE_TRACE_POINTS 84 85 85 86 #include "sched.h" ··· 95 94 #include "../workqueue_internal.h" 96 95 #include "../../io_uring/io-wq.h" 97 96 #include "../smpboot.h" 97 + 98 + EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); 99 + EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); 98 100 99 101 /* 100 102 * Export tracepoints that act as a bare tracehook (ie: have no trace event ··· 3852 3848 rq_unlock_irqrestore(rq, &rf); 3853 3849 } 3854 3850 3855 - void send_call_function_single_ipi(int cpu) 3851 + /* 3852 + * Prepare the scene for sending an IPI for a remote smp_call 3853 + * 3854 + * Returns true if the caller can proceed with sending the IPI. 3855 + * Returns false otherwise. 3856 + */ 3857 + bool call_function_single_prep_ipi(int cpu) 3856 3858 { 3857 - struct rq *rq = cpu_rq(cpu); 3858 - 3859 - if (!set_nr_if_polling(rq->idle)) 3860 - arch_send_call_function_single_ipi(cpu); 3861 - else 3859 + if (set_nr_if_polling(cpu_rq(cpu)->idle)) { 3862 3860 trace_sched_wake_idle_without_ipi(cpu); 3861 + return false; 3862 + } 3863 + 3864 + return true; 3863 3865 } 3864 3866 3865 3867 /*
+1 -1
kernel/sched/smp.h
··· 6 6 7 7 extern void sched_ttwu_pending(void *arg); 8 8 9 - extern void send_call_function_single_ipi(int cpu); 9 + extern bool call_function_single_prep_ipi(int cpu); 10 10 11 11 #ifdef CONFIG_SMP 12 12 extern void flush_smp_call_function_queue(void);
+76 -235
kernel/smp.c
··· 26 26 #include <linux/sched/debug.h> 27 27 #include <linux/jump_label.h> 28 28 29 + #include <trace/events/ipi.h> 30 + 29 31 #include "smpboot.h" 30 32 #include "sched/smp.h" 31 33 32 34 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) 33 35 34 - #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 35 - union cfd_seq_cnt { 36 - u64 val; 37 - struct { 38 - u64 src:16; 39 - u64 dst:16; 40 - #define CFD_SEQ_NOCPU 0xffff 41 - u64 type:4; 42 - #define CFD_SEQ_QUEUE 0 43 - #define CFD_SEQ_IPI 1 44 - #define CFD_SEQ_NOIPI 2 45 - #define CFD_SEQ_PING 3 46 - #define CFD_SEQ_PINGED 4 47 - #define CFD_SEQ_HANDLE 5 48 - #define CFD_SEQ_DEQUEUE 6 49 - #define CFD_SEQ_IDLE 7 50 - #define CFD_SEQ_GOTIPI 8 51 - #define CFD_SEQ_HDLEND 9 52 - u64 cnt:28; 53 - } u; 54 - }; 55 - 56 - static char *seq_type[] = { 57 - [CFD_SEQ_QUEUE] = "queue", 58 - [CFD_SEQ_IPI] = "ipi", 59 - [CFD_SEQ_NOIPI] = "noipi", 60 - [CFD_SEQ_PING] = "ping", 61 - [CFD_SEQ_PINGED] = "pinged", 62 - [CFD_SEQ_HANDLE] = "handle", 63 - [CFD_SEQ_DEQUEUE] = "dequeue (src CPU 0 == empty)", 64 - [CFD_SEQ_IDLE] = "idle", 65 - [CFD_SEQ_GOTIPI] = "gotipi", 66 - [CFD_SEQ_HDLEND] = "hdlend (src CPU 0 == early)", 67 - }; 68 - 69 - struct cfd_seq_local { 70 - u64 ping; 71 - u64 pinged; 72 - u64 handle; 73 - u64 dequeue; 74 - u64 idle; 75 - u64 gotipi; 76 - u64 hdlend; 77 - }; 78 - #endif 79 - 80 - struct cfd_percpu { 81 - call_single_data_t csd; 82 - #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 83 - u64 seq_queue; 84 - u64 seq_ipi; 85 - u64 seq_noipi; 86 - #endif 87 - }; 88 - 89 36 struct call_function_data { 90 - struct cfd_percpu __percpu *pcpu; 37 + call_single_data_t __percpu *csd; 91 38 cpumask_var_t cpumask; 92 39 cpumask_var_t cpumask_ipi; 93 40 }; ··· 57 110 free_cpumask_var(cfd->cpumask); 58 111 return -ENOMEM; 59 112 } 60 - cfd->pcpu = alloc_percpu(struct cfd_percpu); 61 - if (!cfd->pcpu) { 113 + cfd->csd = alloc_percpu(call_single_data_t); 114 + if (!cfd->csd) { 62 115 free_cpumask_var(cfd->cpumask); 63 116 free_cpumask_var(cfd->cpumask_ipi); 64 117 return -ENOMEM; ··· 73 126 74 127 free_cpumask_var(cfd->cpumask); 75 128 free_cpumask_var(cfd->cpumask_ipi); 76 - free_percpu(cfd->pcpu); 129 + free_percpu(cfd->csd); 77 130 return 0; 78 131 } 79 132 ··· 103 156 smpcfd_prepare_cpu(smp_processor_id()); 104 157 } 105 158 159 + static __always_inline void 160 + send_call_function_single_ipi(int cpu) 161 + { 162 + if (call_function_single_prep_ipi(cpu)) { 163 + trace_ipi_send_cpu(cpu, _RET_IP_, 164 + generic_smp_call_function_single_interrupt); 165 + arch_send_call_function_single_ipi(cpu); 166 + } 167 + } 168 + 169 + static __always_inline void 170 + send_call_function_ipi_mask(struct cpumask *mask) 171 + { 172 + trace_ipi_send_cpumask(mask, _RET_IP_, 173 + generic_smp_call_function_single_interrupt); 174 + arch_send_call_function_ipi_mask(mask); 175 + } 176 + 106 177 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 107 178 108 - static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled); 109 - static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended); 179 + static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled); 110 180 181 + /* 182 + * Parse the csdlock_debug= kernel boot parameter. 183 + * 184 + * If you need to restore the old "ext" value that once provided 185 + * additional debugging information, reapply the following commits: 186 + * 187 + * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging") 188 + * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging") 189 + */ 111 190 static int __init csdlock_debug(char *str) 112 191 { 192 + int ret; 113 193 unsigned int val = 0; 114 194 115 - if (str && !strcmp(str, "ext")) { 116 - val = 1; 117 - static_branch_enable(&csdlock_debug_extended); 118 - } else 119 - get_option(&str, &val); 120 - 121 - if (val) 122 - static_branch_enable(&csdlock_debug_enabled); 195 + ret = get_option(&str, &val); 196 + if (ret) { 197 + if (val) 198 + static_branch_enable(&csdlock_debug_enabled); 199 + else 200 + static_branch_disable(&csdlock_debug_enabled); 201 + } 123 202 124 203 return 1; 125 204 } ··· 154 181 static DEFINE_PER_CPU(call_single_data_t *, cur_csd); 155 182 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); 156 183 static DEFINE_PER_CPU(void *, cur_csd_info); 157 - static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); 158 184 159 185 static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ 160 186 module_param(csd_lock_timeout, ulong, 0444); 161 187 162 188 static atomic_t csd_bug_count = ATOMIC_INIT(0); 163 - static u64 cfd_seq; 164 - 165 - #define CFD_SEQ(s, d, t, c) \ 166 - (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c } 167 - 168 - static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type) 169 - { 170 - union cfd_seq_cnt new, old; 171 - 172 - new = CFD_SEQ(src, dst, type, 0); 173 - 174 - do { 175 - old.val = READ_ONCE(cfd_seq); 176 - new.u.cnt = old.u.cnt + 1; 177 - } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val); 178 - 179 - return old.val; 180 - } 181 - 182 - #define cfd_seq_store(var, src, dst, type) \ 183 - do { \ 184 - if (static_branch_unlikely(&csdlock_debug_extended)) \ 185 - var = cfd_seq_inc(src, dst, type); \ 186 - } while (0) 187 189 188 190 /* Record current CSD work for current CPU, NULL to erase. */ 189 191 static void __csd_lock_record(struct __call_single_data *csd) ··· 190 242 if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) 191 243 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */ 192 244 return -1; 193 - } 194 - 195 - static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst, 196 - unsigned int type, union cfd_seq_cnt *data, 197 - unsigned int *n_data, unsigned int now) 198 - { 199 - union cfd_seq_cnt new[2]; 200 - unsigned int i, j, k; 201 - 202 - new[0].val = val; 203 - new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1); 204 - 205 - for (i = 0; i < 2; i++) { 206 - if (new[i].u.cnt <= now) 207 - new[i].u.cnt |= 0x80000000U; 208 - for (j = 0; j < *n_data; j++) { 209 - if (new[i].u.cnt == data[j].u.cnt) { 210 - /* Direct read value trumps generated one. */ 211 - if (i == 0) 212 - data[j].val = new[i].val; 213 - break; 214 - } 215 - if (new[i].u.cnt < data[j].u.cnt) { 216 - for (k = *n_data; k > j; k--) 217 - data[k].val = data[k - 1].val; 218 - data[j].val = new[i].val; 219 - (*n_data)++; 220 - break; 221 - } 222 - } 223 - if (j == *n_data) { 224 - data[j].val = new[i].val; 225 - (*n_data)++; 226 - } 227 - } 228 - } 229 - 230 - static const char *csd_lock_get_type(unsigned int type) 231 - { 232 - return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type]; 233 - } 234 - 235 - static void csd_lock_print_extended(struct __call_single_data *csd, int cpu) 236 - { 237 - struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu); 238 - unsigned int srccpu = csd->node.src; 239 - struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu); 240 - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 241 - unsigned int now; 242 - union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)]; 243 - unsigned int n_data = 0, i; 244 - 245 - data[0].val = READ_ONCE(cfd_seq); 246 - now = data[0].u.cnt; 247 - 248 - cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, CFD_SEQ_QUEUE, data, &n_data, now); 249 - cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, CFD_SEQ_IPI, data, &n_data, now); 250 - cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, CFD_SEQ_NOIPI, data, &n_data, now); 251 - 252 - cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now); 253 - cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now); 254 - 255 - cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE, data, &n_data, now); 256 - cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI, data, &n_data, now); 257 - cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE, data, &n_data, now); 258 - cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now); 259 - cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND, data, &n_data, now); 260 - 261 - for (i = 0; i < n_data; i++) { 262 - pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n", 263 - data[i].u.cnt & ~0x80000000U, data[i].u.src, 264 - data[i].u.dst, csd_lock_get_type(data[i].u.type)); 265 - } 266 - pr_alert("\tcsd: cnt now: %07x\n", now); 267 245 } 268 246 269 247 /* ··· 242 368 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); 243 369 } 244 370 if (cpu >= 0) { 245 - if (static_branch_unlikely(&csdlock_debug_extended)) 246 - csd_lock_print_extended(csd, cpu); 247 371 dump_cpu_task(cpu); 248 372 if (!cpu_cur_csd) { 249 373 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); ··· 284 412 285 413 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); 286 414 } 287 - 288 - static void __smp_call_single_queue_debug(int cpu, struct llist_node *node) 289 - { 290 - unsigned int this_cpu = smp_processor_id(); 291 - struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local); 292 - struct call_function_data *cfd = this_cpu_ptr(&cfd_data); 293 - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 294 - 295 - cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); 296 - if (llist_add(node, &per_cpu(call_single_queue, cpu))) { 297 - cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); 298 - cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING); 299 - send_call_function_single_ipi(cpu); 300 - cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED); 301 - } else { 302 - cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); 303 - } 304 - } 305 415 #else 306 - #define cfd_seq_store(var, src, dst, type) 307 - 308 416 static void csd_lock_record(struct __call_single_data *csd) 309 417 { 310 418 } ··· 322 470 323 471 void __smp_call_single_queue(int cpu, struct llist_node *node) 324 472 { 325 - #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 326 - if (static_branch_unlikely(&csdlock_debug_extended)) { 327 - unsigned int type; 473 + /* 474 + * We have to check the type of the CSD before queueing it, because 475 + * once queued it can have its flags cleared by 476 + * flush_smp_call_function_queue() 477 + * even if we haven't sent the smp_call IPI yet (e.g. the stopper 478 + * executes migration_cpu_stop() on the remote CPU). 479 + */ 480 + if (trace_ipi_send_cpu_enabled()) { 481 + call_single_data_t *csd; 482 + smp_call_func_t func; 328 483 329 - type = CSD_TYPE(container_of(node, call_single_data_t, 330 - node.llist)); 331 - if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) { 332 - __smp_call_single_queue_debug(cpu, node); 333 - return; 334 - } 484 + csd = container_of(node, call_single_data_t, node.llist); 485 + func = CSD_TYPE(csd) == CSD_TYPE_TTWU ? 486 + sched_ttwu_pending : csd->func; 487 + 488 + trace_ipi_send_cpu(cpu, _RET_IP_, func); 335 489 } 336 - #endif 337 490 338 491 /* 339 - * The list addition should be visible before sending the IPI 340 - * handler locks the list to pull the entry off it because of 341 - * normal cache coherency rules implied by spinlocks. 492 + * The list addition should be visible to the target CPU when it pops 493 + * the head of the list to pull the entry off it in the IPI handler 494 + * because of normal cache coherency rules implied by the underlying 495 + * llist ops. 342 496 * 343 497 * If IPIs can go out of order to the cache coherency protocol 344 498 * in an architecture, sufficient synchronisation should be added ··· 399 541 */ 400 542 void generic_smp_call_function_single_interrupt(void) 401 543 { 402 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU, 403 - smp_processor_id(), CFD_SEQ_GOTIPI); 404 544 __flush_smp_call_function_queue(true); 405 545 } 406 546 ··· 426 570 lockdep_assert_irqs_disabled(); 427 571 428 572 head = this_cpu_ptr(&call_single_queue); 429 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU, 430 - smp_processor_id(), CFD_SEQ_HANDLE); 431 573 entry = llist_del_all(head); 432 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue, 433 - /* Special meaning of source cpu: 0 == queue empty */ 434 - entry ? CFD_SEQ_NOCPU : 0, 435 - smp_processor_id(), CFD_SEQ_DEQUEUE); 436 574 entry = llist_reverse_order(entry); 437 575 438 576 /* There shouldn't be any pending callbacks on an offline CPU. */ ··· 485 635 } 486 636 } 487 637 488 - if (!entry) { 489 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, 490 - 0, smp_processor_id(), 491 - CFD_SEQ_HDLEND); 638 + if (!entry) 492 639 return; 493 - } 494 640 495 641 /* 496 642 * Second; run all !SYNC callbacks. ··· 524 678 */ 525 679 if (entry) 526 680 sched_ttwu_pending(entry); 527 - 528 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU, 529 - smp_processor_id(), CFD_SEQ_HDLEND); 530 681 } 531 682 532 683 ··· 547 704 if (llist_empty(this_cpu_ptr(&call_single_queue))) 548 705 return; 549 706 550 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, 551 - smp_processor_id(), CFD_SEQ_IDLE); 552 707 local_irq_save(flags); 553 708 /* Get the already pending soft interrupts for RT enabled kernels */ 554 709 was_pending = local_softirq_pending(); ··· 728 887 int cpu, last_cpu, this_cpu = smp_processor_id(); 729 888 struct call_function_data *cfd; 730 889 bool wait = scf_flags & SCF_WAIT; 890 + int nr_cpus = 0, nr_queued = 0; 731 891 bool run_remote = false; 732 892 bool run_local = false; 733 - int nr_cpus = 0; 734 893 735 894 lockdep_assert_preemption_disabled(); 736 895 ··· 770 929 771 930 cpumask_clear(cfd->cpumask_ipi); 772 931 for_each_cpu(cpu, cfd->cpumask) { 773 - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); 774 - call_single_data_t *csd = &pcpu->csd; 932 + call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); 775 933 776 - if (cond_func && !cond_func(cpu, info)) 934 + if (cond_func && !cond_func(cpu, info)) { 935 + __cpumask_clear_cpu(cpu, cfd->cpumask); 777 936 continue; 937 + } 778 938 779 939 csd_lock(csd); 780 940 if (wait) ··· 786 944 csd->node.src = smp_processor_id(); 787 945 csd->node.dst = cpu; 788 946 #endif 789 - cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); 790 947 if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { 791 948 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 792 949 nr_cpus++; 793 950 last_cpu = cpu; 794 - 795 - cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); 796 - } else { 797 - cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); 798 951 } 952 + nr_queued++; 799 953 } 800 954 801 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING); 955 + /* 956 + * Trace each smp_function_call_*() as an IPI, actual IPIs 957 + * will be traced with func==generic_smp_call_function_single_ipi(). 958 + */ 959 + if (nr_queued) 960 + trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func); 802 961 803 962 /* 804 963 * Choose the most efficient way to send an IPI. Note that the ··· 809 966 if (nr_cpus == 1) 810 967 send_call_function_single_ipi(last_cpu); 811 968 else if (likely(nr_cpus > 1)) 812 - arch_send_call_function_ipi_mask(cfd->cpumask_ipi); 813 - 814 - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED); 969 + send_call_function_ipi_mask(cfd->cpumask_ipi); 815 970 } 816 971 817 972 if (run_local && (!cond_func || cond_func(this_cpu, info))) { ··· 824 983 for_each_cpu(cpu, cfd->cpumask) { 825 984 call_single_data_t *csd; 826 985 827 - csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; 986 + csd = per_cpu_ptr(cfd->csd, cpu); 828 987 csd_lock_wait(csd); 829 988 } 830 989 }
+9
lib/Kconfig.debug
··· 1490 1490 include the IPI handler function currently executing (if any) 1491 1491 and relevant stack traces. 1492 1492 1493 + config CSD_LOCK_WAIT_DEBUG_DEFAULT 1494 + bool "Default csd_lock_wait() debugging on at boot time" 1495 + depends on CSD_LOCK_WAIT_DEBUG 1496 + depends on 64BIT 1497 + default n 1498 + help 1499 + This option causes the csdlock_debug= kernel boot parameter to 1500 + default to 1 (basic debugging) instead of 0 (no debugging). 1501 + 1493 1502 endmenu # lock debugging 1494 1503 1495 1504 config TRACE_IRQFLAGS
+3
virt/kvm/kvm_main.c
··· 62 62 #include "kvm_mm.h" 63 63 #include "vfio.h" 64 64 65 + #include <trace/events/ipi.h> 66 + 65 67 #define CREATE_TRACE_POINTS 66 68 #include <trace/events/kvm.h> 67 69 68 70 #include <linux/kvm_dirty_ring.h> 71 + 69 72 70 73 /* Worst case buffer size needed for holding an integer. */ 71 74 #define ITOA_MAX_LEN 12