Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched: Add sched tracepoints for RV task model

Add the following tracepoints:
* sched_entry(bool preempt, ip)
Called while entering __schedule
* sched_exit(bool is_switch, ip)
Called while exiting __schedule
* sched_set_state(task, curr_state, state)
Called when a task changes its state (to and from running)

These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/20250305140406.350227-2-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Gabriele Monaco and committed by
Steven Rostedt (Google)
26f80681 41a4d2d3

+53 -3
+1 -1
include/linux/rv.h
··· 7 7 #ifndef _LINUX_RV_H 8 8 #define _LINUX_RV_H 9 9 10 - #define MAX_DA_NAME_LEN 24 10 + #define MAX_DA_NAME_LEN 32 11 11 12 12 #ifdef CONFIG_RV 13 13 /*
+16
include/linux/sched.h
··· 46 46 #include <linux/rv.h> 47 47 #include <linux/livepatch_sched.h> 48 48 #include <linux/uidgid_types.h> 49 + #include <linux/tracepoint-defs.h> 49 50 #include <asm/kmap_size.h> 50 51 51 52 /* task_struct member predeclarations (sorted alphabetically): */ ··· 187 186 # define debug_rtlock_wait_restore_state() do { } while (0) 188 187 #endif 189 188 189 + #define trace_set_current_state(state_value) \ 190 + do { \ 191 + if (tracepoint_enabled(sched_set_state_tp)) \ 192 + __trace_set_current_state(state_value); \ 193 + } while (0) 194 + 190 195 /* 191 196 * set_current_state() includes a barrier so that the write of current->__state 192 197 * is correctly serialised wrt the caller's subsequent test of whether to ··· 233 226 #define __set_current_state(state_value) \ 234 227 do { \ 235 228 debug_normal_state_change((state_value)); \ 229 + trace_set_current_state(state_value); \ 236 230 WRITE_ONCE(current->__state, (state_value)); \ 237 231 } while (0) 238 232 239 233 #define set_current_state(state_value) \ 240 234 do { \ 241 235 debug_normal_state_change((state_value)); \ 236 + trace_set_current_state(state_value); \ 242 237 smp_store_mb(current->__state, (state_value)); \ 243 238 } while (0) 244 239 ··· 256 247 \ 257 248 raw_spin_lock_irqsave(&current->pi_lock, flags); \ 258 249 debug_special_state_change((state_value)); \ 250 + trace_set_current_state(state_value); \ 259 251 WRITE_ONCE(current->__state, (state_value)); \ 260 252 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \ 261 253 } while (0) ··· 292 282 raw_spin_lock(&current->pi_lock); \ 293 283 current->saved_state = current->__state; \ 294 284 debug_rtlock_wait_set_state(); \ 285 + trace_set_current_state(TASK_RTLOCK_WAIT); \ 295 286 WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ 296 287 raw_spin_unlock(&current->pi_lock); \ 297 288 } while (0); ··· 302 291 lockdep_assert_irqs_disabled(); \ 303 292 raw_spin_lock(&current->pi_lock); \ 304 293 debug_rtlock_wait_restore_state(); \ 294 + trace_set_current_state(current->saved_state); \ 305 295 WRITE_ONCE(current->__state, current->saved_state); \ 306 296 current->saved_state = TASK_RUNNING; \ 307 297 raw_spin_unlock(&current->pi_lock); \ ··· 338 326 extern void io_schedule_finish(int token); 339 327 extern long io_schedule_timeout(long timeout); 340 328 extern void io_schedule(void); 329 + 330 + /* wrapper function to trace from this header file */ 331 + DECLARE_TRACEPOINT(sched_set_state_tp); 332 + extern void __trace_set_current_state(int state_value); 341 333 342 334 /** 343 335 * struct prev_cputime - snapshot of system and user cputime
+13
include/trace/events/sched.h
··· 824 824 unsigned long max_util, unsigned long busy_time), 825 825 TP_ARGS(p, dst_cpu, energy, max_util, busy_time)); 826 826 827 + DECLARE_TRACE(sched_entry_tp, 828 + TP_PROTO(bool preempt, unsigned long ip), 829 + TP_ARGS(preempt, ip)); 830 + 831 + DECLARE_TRACE(sched_exit_tp, 832 + TP_PROTO(bool is_switch, unsigned long ip), 833 + TP_ARGS(is_switch, ip)); 834 + 835 + DECLARE_TRACE_CONDITION(sched_set_state_tp, 836 + TP_PROTO(struct task_struct *tsk, int state), 837 + TP_ARGS(tsk, state), 838 + TP_CONDITION(!!(tsk->__state) != !!state)); 839 + 827 840 #endif /* _TRACE_SCHED_H */ 828 841 829 842 /* This part must be outside protection */
+22 -1
kernel/sched/core.c
··· 491 491 492 492 #endif /* CONFIG_SCHED_CORE */ 493 493 494 + /* need a wrapper since we may need to trace from modules */ 495 + EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp); 496 + 497 + /* Call via the helper macro trace_set_current_state. */ 498 + void __trace_set_current_state(int state_value) 499 + { 500 + trace_sched_set_state_tp(current, state_value); 501 + } 502 + EXPORT_SYMBOL(__trace_set_current_state); 503 + 494 504 /* 495 505 * Serialization rules: 496 506 * ··· 5317 5307 */ 5318 5308 5319 5309 finish_task_switch(prev); 5310 + /* 5311 + * This is a special case: the newly created task has just 5312 + * switched the context for the first time. It is returning from 5313 + * schedule for the first time in this path. 5314 + */ 5315 + trace_sched_exit_tp(true, CALLER_ADDR0); 5320 5316 preempt_enable(); 5321 5317 5322 5318 if (current->set_child_tid) ··· 6666 6650 * as a preemption by schedule_debug() and RCU. 6667 6651 */ 6668 6652 bool preempt = sched_mode > SM_NONE; 6653 + bool is_switch = false; 6669 6654 unsigned long *switch_count; 6670 6655 unsigned long prev_state; 6671 6656 struct rq_flags rf; 6672 6657 struct rq *rq; 6673 6658 int cpu; 6659 + 6660 + trace_sched_entry_tp(preempt, CALLER_ADDR0); 6674 6661 6675 6662 cpu = smp_processor_id(); 6676 6663 rq = cpu_rq(cpu); ··· 6742 6723 rq->last_seen_need_resched_ns = 0; 6743 6724 #endif 6744 6725 6745 - if (likely(prev != next)) { 6726 + is_switch = prev != next; 6727 + if (likely(is_switch)) { 6746 6728 rq->nr_switches++; 6747 6729 /* 6748 6730 * RCU users of rcu_dereference(rq->curr) may not see ··· 6788 6768 __balance_callbacks(rq); 6789 6769 raw_spin_rq_unlock_irq(rq); 6790 6770 } 6771 + trace_sched_exit_tp(is_switch, CALLER_ADDR0); 6791 6772 } 6792 6773 6793 6774 void __noreturn do_task_dead(void)
+1 -1
tools/verification/rv/include/rv.h
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 3 #define MAX_DESCRIPTION 1024 4 - #define MAX_DA_NAME_LEN 24 4 + #define MAX_DA_NAME_LEN 32 5 5 6 6 struct monitor { 7 7 char name[MAX_DA_NAME_LEN];