Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: Use irq_work for wake ups and remove *_nowake_*() functions

Have the ring buffer commit function use the irq_work infrastructure to
wake up any waiters waiting on the ring buffer for new data. The irq_work
was created for such a purpose, where doing the actual wake up at the
time of adding data is too dangerous, as an event or function trace may
be in the midst of the work queue locks and cause deadlocks. The irq_work
will either delay the action to the next timer interrupt, or trigger an IPI
to itself forcing an interrupt to do the work (in a safe location).

With irq_work, all ring buffer commits can safely do wakeups, removing
the need for the ring buffer commit "nowake" variants, which were used
by events and function tracing. All commits can now safely use the
normal commit, and the "nowake" variants can be removed.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

authored by

Steven Rostedt and committed by
Steven Rostedt
0d5c6e1c 02404baf

+84 -73
+7 -7
include/linux/ftrace_event.h
··· 127 127 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, 128 128 struct ring_buffer_event *event, 129 129 unsigned long flags, int pc); 130 - void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, 131 - struct ring_buffer_event *event, 132 - unsigned long flags, int pc); 133 - void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, 134 - struct ring_buffer_event *event, 135 - unsigned long flags, int pc, 136 - struct pt_regs *regs); 130 + void trace_buffer_unlock_commit(struct ring_buffer *buffer, 131 + struct ring_buffer_event *event, 132 + unsigned long flags, int pc); 133 + void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, 134 + struct ring_buffer_event *event, 135 + unsigned long flags, int pc, 136 + struct pt_regs *regs); 137 137 void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 138 138 struct ring_buffer_event *event); 139 139
+1 -2
include/trace/ftrace.h
··· 545 545 { assign; } \ 546 546 \ 547 547 if (!filter_current_check_discard(buffer, event_call, entry, event)) \ 548 - trace_nowake_buffer_unlock_commit(buffer, \ 549 - event, irq_flags, pc); \ 548 + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \ 550 549 } 551 550 /* 552 551 * The ftrace_test_probe is compiled out, it is only here as a build time check
+1
kernel/trace/Kconfig
··· 119 119 select BINARY_PRINTF 120 120 select EVENT_TRACING 121 121 select TRACE_CLOCK 122 + select IRQ_WORK 122 123 123 124 config GENERIC_TRACER 124 125 bool
+68 -53
kernel/trace/trace.c
··· 19 19 #include <linux/seq_file.h> 20 20 #include <linux/notifier.h> 21 21 #include <linux/irqflags.h> 22 + #include <linux/irq_work.h> 22 23 #include <linux/debugfs.h> 23 24 #include <linux/pagemap.h> 24 25 #include <linux/hardirq.h> ··· 84 83 * occurred. 85 84 */ 86 85 static DEFINE_PER_CPU(bool, trace_cmdline_save); 86 + 87 + /* 88 + * When a reader is waiting for data, then this variable is 89 + * set to true. 90 + */ 91 + static bool trace_wakeup_needed; 92 + 93 + static struct irq_work trace_work_wakeup; 87 94 88 95 /* 89 96 * Kill all tracing for good (never come back). ··· 338 329 static int trace_stop_count; 339 330 static DEFINE_RAW_SPINLOCK(tracing_start_lock); 340 331 341 - static void wakeup_work_handler(struct work_struct *work) 332 + /** 333 + * trace_wake_up - wake up tasks waiting for trace input 334 + * 335 + * Schedules a delayed work to wake up any task that is blocked on the 336 + * trace_wait queue. These is used with trace_poll for tasks polling the 337 + * trace. 338 + */ 339 + static void trace_wake_up(struct irq_work *work) 342 340 { 343 - wake_up(&trace_wait); 344 - } 341 + wake_up_all(&trace_wait); 345 342 346 - static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); 343 + } 347 344 348 345 /** 349 346 * tracing_on - enable tracing buffers ··· 403 388 return !global_trace.buffer_disabled; 404 389 } 405 390 EXPORT_SYMBOL_GPL(tracing_is_on); 406 - 407 - /** 408 - * trace_wake_up - wake up tasks waiting for trace input 409 - * 410 - * Schedules a delayed work to wake up any task that is blocked on the 411 - * trace_wait queue. These is used with trace_poll for tasks polling the 412 - * trace. 413 - */ 414 - void trace_wake_up(void) 415 - { 416 - const unsigned long delay = msecs_to_jiffies(2); 417 - 418 - if (trace_flags & TRACE_ITER_BLOCK) 419 - return; 420 - schedule_delayed_work(&wakeup_work, delay); 421 - } 422 391 423 392 static int __init set_buf_size(char *str) 424 393 { ··· 751 752 arch_spin_unlock(&ftrace_max_lock); 752 753 } 753 754 #endif /* CONFIG_TRACER_MAX_TRACE */ 755 + 756 + static void default_wait_pipe(struct trace_iterator *iter) 757 + { 758 + DEFINE_WAIT(wait); 759 + 760 + prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); 761 + 762 + /* 763 + * The events can happen in critical sections where 764 + * checking a work queue can cause deadlocks. 765 + * After adding a task to the queue, this flag is set 766 + * only to notify events to try to wake up the queue 767 + * using irq_work. 768 + * 769 + * We don't clear it even if the buffer is no longer 770 + * empty. The flag only causes the next event to run 771 + * irq_work to do the work queue wake up. The worse 772 + * that can happen if we race with !trace_empty() is that 773 + * an event will cause an irq_work to try to wake up 774 + * an empty queue. 775 + * 776 + * There's no reason to protect this flag either, as 777 + * the work queue and irq_work logic will do the necessary 778 + * synchronization for the wake ups. The only thing 779 + * that is necessary is that the wake up happens after 780 + * a task has been queued. It's OK for spurious wake ups. 781 + */ 782 + trace_wakeup_needed = true; 783 + 784 + if (trace_empty(iter)) 785 + schedule(); 786 + 787 + finish_wait(&trace_wait, &wait); 788 + } 754 789 755 790 /** 756 791 * register_tracer - register a tracer with the ftrace system. ··· 1189 1156 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) 1190 1157 { 1191 1158 __this_cpu_write(trace_cmdline_save, true); 1159 + if (trace_wakeup_needed) { 1160 + trace_wakeup_needed = false; 1161 + /* irq_work_queue() supplies it's own memory barriers */ 1162 + irq_work_queue(&trace_work_wakeup); 1163 + } 1192 1164 ring_buffer_unlock_commit(buffer, event); 1193 1165 } 1194 1166 1195 1167 static inline void 1196 1168 __trace_buffer_unlock_commit(struct ring_buffer *buffer, 1197 1169 struct ring_buffer_event *event, 1198 - unsigned long flags, int pc, 1199 - int wake) 1170 + unsigned long flags, int pc) 1200 1171 { 1201 1172 __buffer_unlock_commit(buffer, event); 1202 1173 1203 1174 ftrace_trace_stack(buffer, flags, 6, pc); 1204 1175 ftrace_trace_userstack(buffer, flags, pc); 1205 - 1206 - if (wake) 1207 - trace_wake_up(); 1208 1176 } 1209 1177 1210 1178 void trace_buffer_unlock_commit(struct ring_buffer *buffer, 1211 1179 struct ring_buffer_event *event, 1212 1180 unsigned long flags, int pc) 1213 1181 { 1214 - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); 1182 + __trace_buffer_unlock_commit(buffer, event, flags, pc); 1215 1183 } 1184 + EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); 1216 1185 1217 1186 struct ring_buffer_event * 1218 1187 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, ··· 1231 1196 struct ring_buffer_event *event, 1232 1197 unsigned long flags, int pc) 1233 1198 { 1234 - __trace_buffer_unlock_commit(buffer, event, flags, pc, 1); 1199 + __trace_buffer_unlock_commit(buffer, event, flags, pc); 1235 1200 } 1236 1201 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit); 1237 1202 1238 - void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, 1239 - struct ring_buffer_event *event, 1240 - unsigned long flags, int pc) 1241 - { 1242 - __trace_buffer_unlock_commit(buffer, event, flags, pc, 0); 1243 - } 1244 - EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 1245 - 1246 - void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer, 1247 - struct ring_buffer_event *event, 1248 - unsigned long flags, int pc, 1249 - struct pt_regs *regs) 1203 + void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, 1204 + struct ring_buffer_event *event, 1205 + unsigned long flags, int pc, 1206 + struct pt_regs *regs) 1250 1207 { 1251 1208 __buffer_unlock_commit(buffer, event); 1252 1209 1253 1210 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs); 1254 1211 ftrace_trace_userstack(buffer, flags, pc); 1255 1212 } 1256 - EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs); 1213 + EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); 1257 1214 1258 1215 void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 1259 1216 struct ring_buffer_event *event) ··· 3381 3354 } 3382 3355 } 3383 3356 3384 - 3385 - void default_wait_pipe(struct trace_iterator *iter) 3386 - { 3387 - DEFINE_WAIT(wait); 3388 - 3389 - prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE); 3390 - 3391 - if (trace_empty(iter)) 3392 - schedule(); 3393 - 3394 - finish_wait(&trace_wait, &wait); 3395 - } 3396 - 3397 3357 /* 3398 3358 * This is a make-shift waitqueue. 3399 3359 * A tracer might use this callback on some rare cases: ··· 5121 5107 #endif 5122 5108 5123 5109 trace_init_cmdlines(); 5110 + init_irq_work(&trace_work_wakeup, trace_wake_up); 5124 5111 5125 5112 register_tracer(&nop_trace); 5126 5113 current_trace = &nop_trace;
-5
kernel/trace/trace.h
··· 327 327 328 328 int tracer_init(struct tracer *t, struct trace_array *tr); 329 329 int tracing_is_enabled(void); 330 - void trace_wake_up(void); 331 330 void tracing_reset(struct trace_array *tr, int cpu); 332 331 void tracing_reset_online_cpus(struct trace_array *tr); 333 332 void tracing_reset_current(int cpu); ··· 348 349 unsigned long len, 349 350 unsigned long flags, 350 351 int pc); 351 - void trace_buffer_unlock_commit(struct ring_buffer *buffer, 352 - struct ring_buffer_event *event, 353 - unsigned long flags, int pc); 354 352 355 353 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 356 354 struct trace_array_cpu *data); ··· 366 370 367 371 void tracing_iter_reset(struct trace_iterator *iter, int cpu); 368 372 369 - void default_wait_pipe(struct trace_iterator *iter); 370 373 void poll_wait_pipe(struct trace_iterator *iter); 371 374 372 375 void ftrace(struct trace_array *tr,
+1 -1
kernel/trace/trace_events.c
··· 1760 1760 entry->ip = ip; 1761 1761 entry->parent_ip = parent_ip; 1762 1762 1763 - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); 1763 + trace_buffer_unlock_commit(buffer, event, flags, pc); 1764 1764 1765 1765 out: 1766 1766 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
+4 -4
kernel/trace/trace_kprobe.c
··· 751 751 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 752 752 753 753 if (!filter_current_check_discard(buffer, call, entry, event)) 754 - trace_nowake_buffer_unlock_commit_regs(buffer, event, 755 - irq_flags, pc, regs); 754 + trace_buffer_unlock_commit_regs(buffer, event, 755 + irq_flags, pc, regs); 756 756 } 757 757 758 758 /* Kretprobe handler */ ··· 784 784 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 785 785 786 786 if (!filter_current_check_discard(buffer, call, entry, event)) 787 - trace_nowake_buffer_unlock_commit_regs(buffer, event, 788 - irq_flags, pc, regs); 787 + trace_buffer_unlock_commit_regs(buffer, event, 788 + irq_flags, pc, regs); 789 789 } 790 790 791 791 /* Event entry printers */
+1 -1
kernel/trace/trace_sched_switch.c
··· 102 102 entry->next_cpu = task_cpu(wakee); 103 103 104 104 if (!filter_check_discard(call, entry, buffer, event)) 105 - trace_nowake_buffer_unlock_commit(buffer, event, flags, pc); 105 + trace_buffer_unlock_commit(buffer, event, flags, pc); 106 106 } 107 107 108 108 static void
+1
kernel/trace/trace_selftest.c
··· 1094 1094 tracing_stop(); 1095 1095 /* check both trace buffers */ 1096 1096 ret = trace_test_buffer(tr, NULL); 1097 + printk("ret = %d\n", ret); 1097 1098 if (!ret) 1098 1099 ret = trace_test_buffer(&max_tr, &count); 1099 1100