Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ftrace: trace preempt off critical timings

Add preempt off timings. A lot of kernel core code is taken from the RT patch
latency trace that was written by Ingo Molnar.

This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers

Now instead of just tracing irqs off, preemption off can be selected
to be recorded.

When this is selected, it shares the same files as irqs off timings.
One can either trace preemption off, irqs off, or one or the other off.

By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording
of preempt off only is performed. "irqsoff" will only record the time
irqs are disabled, but "preemptirqsoff" will take the total time irqs
or preemption are disabled. Runtime switching of these options is now
supported by simpling echoing in the appropriate trace name into
/debugfs/tracing/current_tracer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Steven Rostedt and committed by
Thomas Gleixner
6cd8a4bb 81d68a96

+197 -53
+3
arch/x86/kernel/process_32.c
··· 185 185 186 186 local_irq_disable(); 187 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 188 + /* Don't trace irqs off for idle */ 189 + stop_critical_timings(); 188 190 idle(); 191 + start_critical_timings(); 189 192 } 190 193 tick_nohz_restart_sched_tick(); 191 194 preempt_enable_no_resched();
+8
include/linux/ftrace.h
··· 58 58 # define time_hardirqs_off(a0, a1) do { } while (0) 59 59 #endif 60 60 61 + #ifdef CONFIG_PREEMPT_TRACER 62 + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); 63 + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); 64 + #else 65 + # define trace_preempt_on(a0, a1) do { } while (0) 66 + # define trace_preempt_off(a0, a1) do { } while (0) 67 + #endif 68 + 61 69 #endif /* _LINUX_FTRACE_H */
+2 -1
include/linux/irqflags.h
··· 41 41 # define INIT_TRACE_IRQFLAGS 42 42 #endif 43 43 44 - #ifdef CONFIG_IRQSOFF_TRACER 44 + #if defined(CONFIG_IRQSOFF_TRACER) || \ 45 + defined(CONFIG_PREEMPT_TRACER) 45 46 extern void stop_critical_timings(void); 46 47 extern void start_critical_timings(void); 47 48 #else
+1 -1
include/linux/preempt.h
··· 10 10 #include <linux/linkage.h> 11 11 #include <linux/list.h> 12 12 13 - #ifdef CONFIG_DEBUG_PREEMPT 13 + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) 14 14 extern void add_preempt_count(int val); 15 15 extern void sub_preempt_count(int val); 16 16 #else
+23 -1
kernel/sched.c
··· 70 70 #include <linux/bootmem.h> 71 71 #include <linux/debugfs.h> 72 72 #include <linux/ctype.h> 73 + #include <linux/ftrace.h> 73 74 74 75 #include <asm/tlb.h> 75 76 #include <asm/irq_regs.h> ··· 4366 4365 #endif 4367 4366 } 4368 4367 4369 - #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4368 + #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 4369 + defined(CONFIG_PREEMPT_TRACER)) 4370 + 4371 + static inline unsigned long get_parent_ip(unsigned long addr) 4372 + { 4373 + if (in_lock_functions(addr)) { 4374 + addr = CALLER_ADDR2; 4375 + if (in_lock_functions(addr)) 4376 + addr = CALLER_ADDR3; 4377 + } 4378 + return addr; 4379 + } 4370 4380 4371 4381 void __kprobes add_preempt_count(int val) 4372 4382 { 4383 + #ifdef CONFIG_DEBUG_PREEMPT 4373 4384 /* 4374 4385 * Underflow? 4375 4386 */ 4376 4387 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4377 4388 return; 4389 + #endif 4378 4390 preempt_count() += val; 4391 + #ifdef CONFIG_DEBUG_PREEMPT 4379 4392 /* 4380 4393 * Spinlock count overflowing soon? 4381 4394 */ 4382 4395 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4383 4396 PREEMPT_MASK - 10); 4397 + #endif 4398 + if (preempt_count() == val) 4399 + trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); 4384 4400 } 4385 4401 EXPORT_SYMBOL(add_preempt_count); 4386 4402 4387 4403 void __kprobes sub_preempt_count(int val) 4388 4404 { 4405 + #ifdef CONFIG_DEBUG_PREEMPT 4389 4406 /* 4390 4407 * Underflow? 4391 4408 */ ··· 4415 4396 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4416 4397 !(preempt_count() & PREEMPT_MASK))) 4417 4398 return; 4399 + #endif 4418 4400 4401 + if (preempt_count() == val) 4402 + trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); 4419 4403 preempt_count() -= val; 4420 4404 } 4421 4405 EXPORT_SYMBOL(sub_preempt_count);
+25
kernel/trace/Kconfig
··· 44 44 45 45 echo 0 > /debugfs/tracing/tracing_max_latency 46 46 47 + (Note that kernel size and overhead increases with this option 48 + enabled. This option and the preempt-off timing option can be 49 + used together or separately.) 50 + 51 + config PREEMPT_TRACER 52 + bool "Preemption-off Latency Tracer" 53 + default n 54 + depends on GENERIC_TIME 55 + depends on PREEMPT 56 + select TRACING 57 + select TRACER_MAX_TRACE 58 + help 59 + This option measures the time spent in preemption off critical 60 + sections, with microsecond accuracy. 61 + 62 + The default measurement method is a maximum search, which is 63 + disabled by default and can be runtime (re-)started 64 + via: 65 + 66 + echo 0 > /debugfs/tracing/tracing_max_latency 67 + 68 + (Note that kernel size and overhead increases with this option 69 + enabled. This option and the irqs-off timing option can be 70 + used together or separately.) 71 + 47 72 config SCHED_TRACER 48 73 bool "Scheduling Latency Tracer" 49 74 depends on DEBUG_KERNEL
+1
kernel/trace/Makefile
··· 4 4 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 5 5 obj-$(CONFIG_FTRACE) += trace_functions.o 6 6 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 7 + obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 7 8 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 8 9 9 10 libftrace-y := ftrace.o
+134 -50
kernel/trace/trace_irqsoff.c
··· 21 21 static struct trace_array *irqsoff_trace __read_mostly; 22 22 static int tracer_enabled __read_mostly; 23 23 24 + static DEFINE_PER_CPU(int, tracing_cpu); 25 + 26 + enum { 27 + TRACER_IRQS_OFF = (1 << 1), 28 + TRACER_PREEMPT_OFF = (1 << 2), 29 + }; 30 + 31 + static int trace_type __read_mostly; 32 + 33 + #ifdef CONFIG_PREEMPT_TRACER 34 + static inline int notrace 35 + preempt_trace(void) 36 + { 37 + return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count()); 38 + } 39 + #else 40 + # define preempt_trace() (0) 41 + #endif 42 + 43 + #ifdef CONFIG_IRQSOFF_TRACER 44 + static inline int notrace 45 + irq_trace(void) 46 + { 47 + return ((trace_type & TRACER_IRQS_OFF) && 48 + irqs_disabled()); 49 + } 50 + #else 51 + # define irq_trace() (0) 52 + #endif 53 + 24 54 /* 25 55 * Sequence count - we record it when starting a measurement and 26 56 * skip the latency if the sequence has changed - some other section ··· 74 44 long disabled; 75 45 int cpu; 76 46 77 - if (likely(!tracer_enabled)) 47 + if (likely(!__get_cpu_var(tracing_cpu))) 78 48 return; 79 49 80 50 local_save_flags(flags); 81 - 82 - if (!irqs_disabled_flags(flags)) 83 - return; 84 51 85 52 cpu = raw_smp_processor_id(); 86 53 data = tr->data[cpu]; ··· 198 171 if (likely(!tracer_enabled)) 199 172 return; 200 173 174 + if (__get_cpu_var(tracing_cpu)) 175 + return; 176 + 201 177 cpu = raw_smp_processor_id(); 202 178 data = tr->data[cpu]; 203 179 204 180 if (unlikely(!data) || unlikely(!data->trace) || 205 - data->critical_start || atomic_read(&data->disabled)) 181 + atomic_read(&data->disabled)) 206 182 return; 207 183 208 184 atomic_inc(&data->disabled); 209 185 210 186 data->critical_sequence = max_sequence; 211 187 data->preempt_timestamp = now(cpu); 212 - data->critical_start = parent_ip; 188 + data->critical_start = parent_ip ? : ip; 213 189 tracing_reset(data); 214 190 215 191 local_save_flags(flags); 192 + 216 193 ftrace(tr, data, ip, parent_ip, flags); 194 + 195 + __get_cpu_var(tracing_cpu) = 1; 217 196 218 197 atomic_dec(&data->disabled); 219 198 } ··· 232 199 struct trace_array_cpu *data; 233 200 unsigned long flags; 234 201 235 - if (likely(!tracer_enabled)) 202 + /* Always clear the tracing cpu on stopping the trace */ 203 + if (unlikely(__get_cpu_var(tracing_cpu))) 204 + __get_cpu_var(tracing_cpu) = 0; 205 + else 206 + return; 207 + 208 + if (!tracer_enabled) 236 209 return; 237 210 238 211 cpu = raw_smp_processor_id(); ··· 251 212 atomic_inc(&data->disabled); 252 213 local_save_flags(flags); 253 214 ftrace(tr, data, ip, parent_ip, flags); 254 - check_critical_timing(tr, data, parent_ip, cpu); 215 + check_critical_timing(tr, data, parent_ip ? : ip, cpu); 255 216 data->critical_start = 0; 256 217 atomic_dec(&data->disabled); 257 218 } 258 219 220 + /* start and stop critical timings used to for stoppage (in idle) */ 259 221 void notrace start_critical_timings(void) 260 222 { 261 - unsigned long flags; 262 - 263 - local_save_flags(flags); 264 - 265 - if (irqs_disabled_flags(flags)) 223 + if (preempt_trace() || irq_trace()) 266 224 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 267 225 } 268 226 269 227 void notrace stop_critical_timings(void) 270 228 { 271 - unsigned long flags; 272 - 273 - local_save_flags(flags); 274 - 275 - if (irqs_disabled_flags(flags)) 229 + if (preempt_trace() || irq_trace()) 276 230 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 277 231 } 278 232 233 + #ifdef CONFIG_IRQSOFF_TRACER 279 234 #ifdef CONFIG_PROVE_LOCKING 280 235 void notrace time_hardirqs_on(unsigned long a0, unsigned long a1) 281 236 { 282 - unsigned long flags; 283 - 284 - local_save_flags(flags); 285 - 286 - if (irqs_disabled_flags(flags)) 237 + if (!preempt_trace() && irq_trace()) 287 238 stop_critical_timing(a0, a1); 288 239 } 289 240 290 241 void notrace time_hardirqs_off(unsigned long a0, unsigned long a1) 291 242 { 292 - unsigned long flags; 293 - 294 - local_save_flags(flags); 295 - 296 - if (irqs_disabled_flags(flags)) 243 + if (!preempt_trace() && irq_trace()) 297 244 start_critical_timing(a0, a1); 298 245 } 299 246 ··· 314 289 */ 315 290 void notrace trace_hardirqs_on(void) 316 291 { 317 - unsigned long flags; 318 - 319 - local_save_flags(flags); 320 - 321 - if (irqs_disabled_flags(flags)) 292 + if (!preempt_trace() && irq_trace()) 322 293 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 323 294 } 324 295 EXPORT_SYMBOL(trace_hardirqs_on); 325 296 326 297 void notrace trace_hardirqs_off(void) 327 298 { 328 - unsigned long flags; 329 - 330 - local_save_flags(flags); 331 - 332 - if (irqs_disabled_flags(flags)) 299 + if (!preempt_trace() && irq_trace()) 333 300 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 334 301 } 335 302 EXPORT_SYMBOL(trace_hardirqs_off); 336 303 337 304 void notrace trace_hardirqs_on_caller(unsigned long caller_addr) 338 305 { 339 - unsigned long flags; 340 - 341 - local_save_flags(flags); 342 - 343 - if (irqs_disabled_flags(flags)) 306 + if (!preempt_trace() && irq_trace()) 344 307 stop_critical_timing(CALLER_ADDR0, caller_addr); 345 308 } 346 309 EXPORT_SYMBOL(trace_hardirqs_on_caller); 347 310 348 311 void notrace trace_hardirqs_off_caller(unsigned long caller_addr) 349 312 { 350 - unsigned long flags; 351 - 352 - local_save_flags(flags); 353 - 354 - if (irqs_disabled_flags(flags)) 313 + if (!preempt_trace() && irq_trace()) 355 314 start_critical_timing(CALLER_ADDR0, caller_addr); 356 315 } 357 316 EXPORT_SYMBOL(trace_hardirqs_off_caller); 358 317 359 318 #endif /* CONFIG_PROVE_LOCKING */ 319 + #endif /* CONFIG_IRQSOFF_TRACER */ 320 + 321 + #ifdef CONFIG_PREEMPT_TRACER 322 + void notrace trace_preempt_on(unsigned long a0, unsigned long a1) 323 + { 324 + stop_critical_timing(a0, a1); 325 + } 326 + 327 + void notrace trace_preempt_off(unsigned long a0, unsigned long a1) 328 + { 329 + start_critical_timing(a0, a1); 330 + } 331 + #endif /* CONFIG_PREEMPT_TRACER */ 360 332 361 333 static void start_irqsoff_tracer(struct trace_array *tr) 362 334 { ··· 367 345 tracer_enabled = 0; 368 346 } 369 347 370 - static void irqsoff_tracer_init(struct trace_array *tr) 348 + static void __irqsoff_tracer_init(struct trace_array *tr) 371 349 { 372 350 irqsoff_trace = tr; 373 351 /* make sure that the tracer is visibel */ ··· 404 382 start_irqsoff_tracer(iter->tr); 405 383 } 406 384 385 + #ifdef CONFIG_IRQSOFF_TRACER 386 + static void irqsoff_tracer_init(struct trace_array *tr) 387 + { 388 + trace_type = TRACER_IRQS_OFF; 389 + 390 + __irqsoff_tracer_init(tr); 391 + } 407 392 static struct tracer irqsoff_tracer __read_mostly = 408 393 { 409 394 .name = "irqsoff", ··· 421 392 .ctrl_update = irqsoff_tracer_ctrl_update, 422 393 .print_max = 1, 423 394 }; 395 + # define register_irqsoff(trace) register_tracer(&trace) 396 + #else 397 + # define register_irqsoff(trace) do { } while (0) 398 + #endif 399 + 400 + #ifdef CONFIG_PREEMPT_TRACER 401 + static void preemptoff_tracer_init(struct trace_array *tr) 402 + { 403 + trace_type = TRACER_PREEMPT_OFF; 404 + 405 + __irqsoff_tracer_init(tr); 406 + } 407 + 408 + static struct tracer preemptoff_tracer __read_mostly = 409 + { 410 + .name = "preemptoff", 411 + .init = preemptoff_tracer_init, 412 + .reset = irqsoff_tracer_reset, 413 + .open = irqsoff_tracer_open, 414 + .close = irqsoff_tracer_close, 415 + .ctrl_update = irqsoff_tracer_ctrl_update, 416 + .print_max = 1, 417 + }; 418 + # define register_preemptoff(trace) register_tracer(&trace) 419 + #else 420 + # define register_preemptoff(trace) do { } while (0) 421 + #endif 422 + 423 + #if defined(CONFIG_IRQSOFF_TRACER) && \ 424 + defined(CONFIG_PREEMPT_TRACER) 425 + 426 + static void preemptirqsoff_tracer_init(struct trace_array *tr) 427 + { 428 + trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 429 + 430 + __irqsoff_tracer_init(tr); 431 + } 432 + 433 + static struct tracer preemptirqsoff_tracer __read_mostly = 434 + { 435 + .name = "preemptirqsoff", 436 + .init = preemptirqsoff_tracer_init, 437 + .reset = irqsoff_tracer_reset, 438 + .open = irqsoff_tracer_open, 439 + .close = irqsoff_tracer_close, 440 + .ctrl_update = irqsoff_tracer_ctrl_update, 441 + .print_max = 1, 442 + }; 443 + 444 + # define register_preemptirqsoff(trace) register_tracer(&trace) 445 + #else 446 + # define register_preemptirqsoff(trace) do { } while (0) 447 + #endif 424 448 425 449 __init static int init_irqsoff_tracer(void) 426 450 { 427 - register_tracer(&irqsoff_tracer); 451 + register_irqsoff(irqsoff_tracer); 452 + register_preemptoff(preemptoff_tracer); 453 + register_preemptirqsoff(preemptirqsoff_tracer); 428 454 429 455 return 0; 430 456 }