Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: Introduce the 'trace_sched_waking' tracepoint

Mathieu reported that since 317f394160e9 ("sched: Move the second half
of ttwu() to the remote cpu") trace_sched_wakeup() can happen out of
context of the waker.

This is a problem when you want to analyse wakeup paths because it is
now very hard to correlate the wakeup event to whoever issued the
wakeup.

OTOH trace_sched_wakeup() is issued at the point where we set
p->state = TASK_RUNNING, which is right were we hand the task off to
the scheduler, so this is an important point when looking at
scheduling behaviour, up to here its been the wakeup path everything
hereafter is due to scheduler policy.

To bridge this gap, introduce a second tracepoint: trace_sched_waking.
It is guaranteed to be called in the waker context.

Reported-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Francis Giraldeau <francis.giraldeau@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150609091336.GQ3644@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Peter Zijlstra and committed by
Ingo Molnar
fbd705a0 9d7fb042

+30 -14
+21 -9
include/trace/events/sched.h
··· 55 55 */ 56 56 DECLARE_EVENT_CLASS(sched_wakeup_template, 57 57 58 - TP_PROTO(struct task_struct *p, int success), 58 + TP_PROTO(struct task_struct *p), 59 59 60 - TP_ARGS(__perf_task(p), success), 60 + TP_ARGS(__perf_task(p)), 61 61 62 62 TP_STRUCT__entry( 63 63 __array( char, comm, TASK_COMM_LEN ) ··· 71 71 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 72 72 __entry->pid = p->pid; 73 73 __entry->prio = p->prio; 74 - __entry->success = success; 74 + __entry->success = 1; /* rudiment, kill when possible */ 75 75 __entry->target_cpu = task_cpu(p); 76 76 ), 77 77 78 - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", 78 + TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", 79 79 __entry->comm, __entry->pid, __entry->prio, 80 - __entry->success, __entry->target_cpu) 80 + __entry->target_cpu) 81 81 ); 82 82 83 + /* 84 + * Tracepoint called when waking a task; this tracepoint is guaranteed to be 85 + * called from the waking context. 86 + */ 87 + DEFINE_EVENT(sched_wakeup_template, sched_waking, 88 + TP_PROTO(struct task_struct *p), 89 + TP_ARGS(p)); 90 + 91 + /* 92 + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. 93 + * It it not always called from the waking context. 94 + */ 83 95 DEFINE_EVENT(sched_wakeup_template, sched_wakeup, 84 - TP_PROTO(struct task_struct *p, int success), 85 - TP_ARGS(p, success)); 96 + TP_PROTO(struct task_struct *p), 97 + TP_ARGS(p)); 86 98 87 99 /* 88 100 * Tracepoint for waking up a new task: 89 101 */ 90 102 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, 91 - TP_PROTO(struct task_struct *p, int success), 92 - TP_ARGS(p, success)); 103 + TP_PROTO(struct task_struct *p), 104 + TP_ARGS(p)); 93 105 94 106 #ifdef CREATE_TRACE_POINTS 95 107 static inline long __trace_sched_switch_state(struct task_struct *p)
+7 -3
kernel/sched/core.c
··· 1654 1654 ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) 1655 1655 { 1656 1656 check_preempt_curr(rq, p, wake_flags); 1657 - trace_sched_wakeup(p, true); 1658 - 1659 1657 p->state = TASK_RUNNING; 1658 + trace_sched_wakeup(p); 1659 + 1660 1660 #ifdef CONFIG_SMP 1661 1661 if (p->sched_class->task_woken) { 1662 1662 /* ··· 1874 1874 if (!(p->state & state)) 1875 1875 goto out; 1876 1876 1877 + trace_sched_waking(p); 1878 + 1877 1879 success = 1; /* we're going to change ->state */ 1878 1880 cpu = task_cpu(p); 1879 1881 ··· 1950 1948 1951 1949 if (!(p->state & TASK_NORMAL)) 1952 1950 goto out; 1951 + 1952 + trace_sched_waking(p); 1953 1953 1954 1954 if (!task_on_rq_queued(p)) 1955 1955 ttwu_activate(rq, p, ENQUEUE_WAKEUP); ··· 2311 2307 rq = __task_rq_lock(p); 2312 2308 activate_task(rq, p, 0); 2313 2309 p->on_rq = TASK_ON_RQ_QUEUED; 2314 - trace_sched_wakeup_new(p, true); 2310 + trace_sched_wakeup_new(p); 2315 2311 check_preempt_curr(rq, p, WF_FORK); 2316 2312 #ifdef CONFIG_SMP 2317 2313 if (p->sched_class->task_woken)
+1 -1
kernel/trace/trace_sched_switch.c
··· 26 26 } 27 27 28 28 static void 29 - probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) 29 + probe_sched_wakeup(void *ignore, struct task_struct *wakee) 30 30 { 31 31 if (unlikely(!sched_ref)) 32 32 return;
+1 -1
kernel/trace/trace_sched_wakeup.c
··· 514 514 } 515 515 516 516 static void 517 - probe_wakeup(void *ignore, struct task_struct *p, int success) 517 + probe_wakeup(void *ignore, struct task_struct *p) 518 518 { 519 519 struct trace_array_cpu *data; 520 520 int cpu = smp_processor_id();