Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rv: Add opid per-cpu monitor

Add a per-cpu monitor as part of the sched model:
* opid: operations with preemption and irq disabled
Monitor to ensure wakeup and need_resched occur with irq and
preemption disabled or in irq handlers.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tomas Glozar <tglozar@redhat.com>
Cc: Juri Lelli <jlelli@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Link: https://lore.kernel.org/20250728135022.255578-10-gmonaco@redhat.com
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Acked-by: Nam Cao <namcao@linutronix.de>
Tested-by: Nam Cao <namcao@linutronix.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Gabriele Monaco and committed by
Steven Rostedt (Google)
61438453 e8440a88

+399
+55
Documentation/trace/rv/monitor_sched.rst
··· 341 341 | | switch_yield 342 342 +-----------+ wakeup 343 343 344 + Monitor opid 345 + ------------ 346 + 347 + The operations with preemption and irq disabled (opid) monitor ensures 348 + operations like ``wakeup`` and ``need_resched`` occur with interrupts and 349 + preemption disabled or during interrupt context, in such case preemption may 350 + not be disabled explicitly. 351 + ``need_resched`` can be set by some RCU internals functions, in which case it 352 + doesn't match a task wakeup and might occur with only interrupts disabled:: 353 + 354 + | sched_need_resched 355 + | sched_waking 356 + | irq_entry 357 + | +--------------------+ 358 + v v | 359 + +------------------------------------------------------+ 360 + +----------- | disabled | <+ 361 + | +------------------------------------------------------+ | 362 + | | ^ | 363 + | | preempt_disable sched_need_resched | 364 + | preempt_enable | +--------------------+ | 365 + | v | v | | 366 + | +------------------------------------------------------+ | 367 + | | irq_disabled | | 368 + | +------------------------------------------------------+ | 369 + | | | ^ | 370 + | irq_entry irq_entry | | | 371 + | sched_need_resched v | irq_disable | 372 + | sched_waking +--------------+ | | | 373 + | +----- | | irq_enable | | 374 + | | | in_irq | | | | 375 + | +----> | | | | | 376 + | +--------------+ | | irq_disable 377 + | | | | | 378 + | irq_enable | irq_enable | | | 379 + | v v | | 380 + | #======================================================# | 381 + | H enabled H | 382 + | #======================================================# | 383 + | | ^ ^ preempt_enable | | 384 + | preempt_disable preempt_enable +--------------------+ | 385 + | v | | 386 + | +------------------+ | | 387 + +----------> | preempt_disabled | -+ | 388 + +------------------+ | 389 + | | 390 + +-------------------------------------------------------+ 391 + 392 + This monitor is designed to work on ``PREEMPT_RT`` kernels, the special case of 393 + events occurring in interrupt context is a shortcut to identify valid scenarios 394 + where the preemption tracepoints might not be visible, during interrupts 395 + preemption is always disabled. On non- ``PREEMPT_RT`` kernels, the interrupts 396 + might invoke a softirq to set ``need_resched`` and wake up a task. This is 397 + another special case that is currently not supported by the monitor. 398 + 344 399 References 345 400 ---------- 346 401
+1
kernel/trace/rv/Kconfig
··· 57 57 source "kernel/trace/rv/monitors/sts/Kconfig" 58 58 source "kernel/trace/rv/monitors/nrp/Kconfig" 59 59 source "kernel/trace/rv/monitors/sssw/Kconfig" 60 + source "kernel/trace/rv/monitors/opid/Kconfig" 60 61 # Add new sched monitors here 61 62 62 63 source "kernel/trace/rv/monitors/rtapp/Kconfig"
+1
kernel/trace/rv/Makefile
··· 16 16 obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o 17 17 obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o 18 18 obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o 19 + obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o 19 20 # Add new monitors here 20 21 obj-$(CONFIG_RV_REACTORS) += rv_reactors.o 21 22 obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
+19
kernel/trace/rv/monitors/opid/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + # 3 + config RV_MON_OPID 4 + depends on RV 5 + depends on TRACE_IRQFLAGS 6 + depends on TRACE_PREEMPT_TOGGLE 7 + depends on RV_MON_SCHED 8 + default y if PREEMPT_RT 9 + select DA_MON_EVENTS_IMPLICIT 10 + bool "opid monitor" 11 + help 12 + Monitor to ensure operations like wakeup and need resched occur with 13 + interrupts and preemption disabled or during IRQs, where preemption 14 + may not be disabled explicitly. 15 + 16 + This monitor is unstable on !PREEMPT_RT, say N unless you are testing it. 17 + 18 + For further information, see: 19 + Documentation/trace/rv/monitor_sched.rst
+168
kernel/trace/rv/monitors/opid/opid.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/ftrace.h> 3 + #include <linux/tracepoint.h> 4 + #include <linux/kernel.h> 5 + #include <linux/module.h> 6 + #include <linux/init.h> 7 + #include <linux/rv.h> 8 + #include <rv/instrumentation.h> 9 + #include <rv/da_monitor.h> 10 + 11 + #define MODULE_NAME "opid" 12 + 13 + #include <trace/events/sched.h> 14 + #include <trace/events/irq.h> 15 + #include <trace/events/preemptirq.h> 16 + #include <rv_trace.h> 17 + #include <monitors/sched/sched.h> 18 + 19 + #include "opid.h" 20 + 21 + static struct rv_monitor rv_opid; 22 + DECLARE_DA_MON_PER_CPU(opid, unsigned char); 23 + 24 + #ifdef CONFIG_X86_LOCAL_APIC 25 + #include <asm/trace/irq_vectors.h> 26 + 27 + static void handle_vector_irq_entry(void *data, int vector) 28 + { 29 + da_handle_event_opid(irq_entry_opid); 30 + } 31 + 32 + static void attach_vector_irq(void) 33 + { 34 + rv_attach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); 35 + if (IS_ENABLED(CONFIG_IRQ_WORK)) 36 + rv_attach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); 37 + if (IS_ENABLED(CONFIG_SMP)) { 38 + rv_attach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); 39 + rv_attach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); 40 + rv_attach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); 41 + } 42 + } 43 + 44 + static void detach_vector_irq(void) 45 + { 46 + rv_detach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry); 47 + if (IS_ENABLED(CONFIG_IRQ_WORK)) 48 + rv_detach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry); 49 + if (IS_ENABLED(CONFIG_SMP)) { 50 + rv_detach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry); 51 + rv_detach_trace_probe("opid", call_function_entry, handle_vector_irq_entry); 52 + rv_detach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry); 53 + } 54 + } 55 + 56 + #else 57 + /* We assume irq_entry tracepoints are sufficient on other architectures */ 58 + static void attach_vector_irq(void) { } 59 + static void detach_vector_irq(void) { } 60 + #endif 61 + 62 + static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip) 63 + { 64 + da_handle_event_opid(irq_disable_opid); 65 + } 66 + 67 + static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip) 68 + { 69 + da_handle_event_opid(irq_enable_opid); 70 + } 71 + 72 + static void handle_irq_entry(void *data, int irq, struct irqaction *action) 73 + { 74 + da_handle_event_opid(irq_entry_opid); 75 + } 76 + 77 + static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) 78 + { 79 + da_handle_event_opid(preempt_disable_opid); 80 + } 81 + 82 + static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) 83 + { 84 + da_handle_event_opid(preempt_enable_opid); 85 + } 86 + 87 + static void handle_sched_need_resched(void *data, struct task_struct *tsk, int cpu, int tif) 88 + { 89 + /* The monitor's intitial state is not in_irq */ 90 + if (this_cpu_read(hardirq_context)) 91 + da_handle_event_opid(sched_need_resched_opid); 92 + else 93 + da_handle_start_event_opid(sched_need_resched_opid); 94 + } 95 + 96 + static void handle_sched_waking(void *data, struct task_struct *p) 97 + { 98 + /* The monitor's intitial state is not in_irq */ 99 + if (this_cpu_read(hardirq_context)) 100 + da_handle_event_opid(sched_waking_opid); 101 + else 102 + da_handle_start_event_opid(sched_waking_opid); 103 + } 104 + 105 + static int enable_opid(void) 106 + { 107 + int retval; 108 + 109 + retval = da_monitor_init_opid(); 110 + if (retval) 111 + return retval; 112 + 113 + rv_attach_trace_probe("opid", irq_disable, handle_irq_disable); 114 + rv_attach_trace_probe("opid", irq_enable, handle_irq_enable); 115 + rv_attach_trace_probe("opid", irq_handler_entry, handle_irq_entry); 116 + rv_attach_trace_probe("opid", preempt_disable, handle_preempt_disable); 117 + rv_attach_trace_probe("opid", preempt_enable, handle_preempt_enable); 118 + rv_attach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); 119 + rv_attach_trace_probe("opid", sched_waking, handle_sched_waking); 120 + attach_vector_irq(); 121 + 122 + return 0; 123 + } 124 + 125 + static void disable_opid(void) 126 + { 127 + rv_opid.enabled = 0; 128 + 129 + rv_detach_trace_probe("opid", irq_disable, handle_irq_disable); 130 + rv_detach_trace_probe("opid", irq_enable, handle_irq_enable); 131 + rv_detach_trace_probe("opid", irq_handler_entry, handle_irq_entry); 132 + rv_detach_trace_probe("opid", preempt_disable, handle_preempt_disable); 133 + rv_detach_trace_probe("opid", preempt_enable, handle_preempt_enable); 134 + rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); 135 + rv_detach_trace_probe("opid", sched_waking, handle_sched_waking); 136 + detach_vector_irq(); 137 + 138 + da_monitor_destroy_opid(); 139 + } 140 + 141 + /* 142 + * This is the monitor register section. 143 + */ 144 + static struct rv_monitor rv_opid = { 145 + .name = "opid", 146 + .description = "operations with preemption and irq disabled.", 147 + .enable = enable_opid, 148 + .disable = disable_opid, 149 + .reset = da_monitor_reset_all_opid, 150 + .enabled = 0, 151 + }; 152 + 153 + static int __init register_opid(void) 154 + { 155 + return rv_register_monitor(&rv_opid, &rv_sched); 156 + } 157 + 158 + static void __exit unregister_opid(void) 159 + { 160 + rv_unregister_monitor(&rv_opid); 161 + } 162 + 163 + module_init(register_opid); 164 + module_exit(unregister_opid); 165 + 166 + MODULE_LICENSE("GPL"); 167 + MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>"); 168 + MODULE_DESCRIPTION("opid: operations with preemption and irq disabled.");
+104
kernel/trace/rv/monitors/opid/opid.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Automatically generated C representation of opid automaton 4 + * For further information about this format, see kernel documentation: 5 + * Documentation/trace/rv/deterministic_automata.rst 6 + */ 7 + 8 + enum states_opid { 9 + disabled_opid = 0, 10 + enabled_opid, 11 + in_irq_opid, 12 + irq_disabled_opid, 13 + preempt_disabled_opid, 14 + state_max_opid 15 + }; 16 + 17 + #define INVALID_STATE state_max_opid 18 + 19 + enum events_opid { 20 + irq_disable_opid = 0, 21 + irq_enable_opid, 22 + irq_entry_opid, 23 + preempt_disable_opid, 24 + preempt_enable_opid, 25 + sched_need_resched_opid, 26 + sched_waking_opid, 27 + event_max_opid 28 + }; 29 + 30 + struct automaton_opid { 31 + char *state_names[state_max_opid]; 32 + char *event_names[event_max_opid]; 33 + unsigned char function[state_max_opid][event_max_opid]; 34 + unsigned char initial_state; 35 + bool final_states[state_max_opid]; 36 + }; 37 + 38 + static const struct automaton_opid automaton_opid = { 39 + .state_names = { 40 + "disabled", 41 + "enabled", 42 + "in_irq", 43 + "irq_disabled", 44 + "preempt_disabled" 45 + }, 46 + .event_names = { 47 + "irq_disable", 48 + "irq_enable", 49 + "irq_entry", 50 + "preempt_disable", 51 + "preempt_enable", 52 + "sched_need_resched", 53 + "sched_waking" 54 + }, 55 + .function = { 56 + { 57 + INVALID_STATE, 58 + preempt_disabled_opid, 59 + disabled_opid, 60 + INVALID_STATE, 61 + irq_disabled_opid, 62 + disabled_opid, 63 + disabled_opid 64 + }, 65 + { 66 + irq_disabled_opid, 67 + INVALID_STATE, 68 + INVALID_STATE, 69 + preempt_disabled_opid, 70 + enabled_opid, 71 + INVALID_STATE, 72 + INVALID_STATE 73 + }, 74 + { 75 + INVALID_STATE, 76 + enabled_opid, 77 + in_irq_opid, 78 + INVALID_STATE, 79 + INVALID_STATE, 80 + in_irq_opid, 81 + in_irq_opid 82 + }, 83 + { 84 + INVALID_STATE, 85 + enabled_opid, 86 + in_irq_opid, 87 + disabled_opid, 88 + INVALID_STATE, 89 + irq_disabled_opid, 90 + INVALID_STATE 91 + }, 92 + { 93 + disabled_opid, 94 + INVALID_STATE, 95 + INVALID_STATE, 96 + INVALID_STATE, 97 + enabled_opid, 98 + INVALID_STATE, 99 + INVALID_STATE 100 + }, 101 + }, 102 + .initial_state = disabled_opid, 103 + .final_states = { 0, 1, 0, 0, 0 }, 104 + };
+15
kernel/trace/rv/monitors/opid/opid_trace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + /* 4 + * Snippet to be included in rv_trace.h 5 + */ 6 + 7 + #ifdef CONFIG_RV_MON_OPID 8 + DEFINE_EVENT(event_da_monitor, event_opid, 9 + TP_PROTO(char *state, char *event, char *next_state, bool final_state), 10 + TP_ARGS(state, event, next_state, final_state)); 11 + 12 + DEFINE_EVENT(error_da_monitor, error_opid, 13 + TP_PROTO(char *state, char *event), 14 + TP_ARGS(state, event)); 15 + #endif /* CONFIG_RV_MON_OPID */
+1
kernel/trace/rv/rv_trace.h
··· 62 62 #include <monitors/scpd/scpd_trace.h> 63 63 #include <monitors/snep/snep_trace.h> 64 64 #include <monitors/sts/sts_trace.h> 65 + #include <monitors/opid/opid_trace.h> 65 66 // Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here 66 67 67 68 #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */
+35
tools/verification/models/sched/opid.dot
··· 1 + digraph state_automaton { 2 + center = true; 3 + size = "7,11"; 4 + {node [shape = plaintext, style=invis, label=""] "__init_disabled"}; 5 + {node [shape = circle] "disabled"}; 6 + {node [shape = doublecircle] "enabled"}; 7 + {node [shape = circle] "enabled"}; 8 + {node [shape = circle] "in_irq"}; 9 + {node [shape = circle] "irq_disabled"}; 10 + {node [shape = circle] "preempt_disabled"}; 11 + "__init_disabled" -> "disabled"; 12 + "disabled" [label = "disabled"]; 13 + "disabled" -> "disabled" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; 14 + "disabled" -> "irq_disabled" [ label = "preempt_enable" ]; 15 + "disabled" -> "preempt_disabled" [ label = "irq_enable" ]; 16 + "enabled" [label = "enabled", color = green3]; 17 + "enabled" -> "enabled" [ label = "preempt_enable" ]; 18 + "enabled" -> "irq_disabled" [ label = "irq_disable" ]; 19 + "enabled" -> "preempt_disabled" [ label = "preempt_disable" ]; 20 + "in_irq" [label = "in_irq"]; 21 + "in_irq" -> "enabled" [ label = "irq_enable" ]; 22 + "in_irq" -> "in_irq" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; 23 + "irq_disabled" [label = "irq_disabled"]; 24 + "irq_disabled" -> "disabled" [ label = "preempt_disable" ]; 25 + "irq_disabled" -> "enabled" [ label = "irq_enable" ]; 26 + "irq_disabled" -> "in_irq" [ label = "irq_entry" ]; 27 + "irq_disabled" -> "irq_disabled" [ label = "sched_need_resched" ]; 28 + "preempt_disabled" [label = "preempt_disabled"]; 29 + "preempt_disabled" -> "disabled" [ label = "irq_disable" ]; 30 + "preempt_disabled" -> "enabled" [ label = "preempt_enable" ]; 31 + { rank = min ; 32 + "__init_disabled"; 33 + "disabled"; 34 + } 35 + }