Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'trace-v5.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull more tracing updates from Steven Rostedt:

- osnoise and timerlat updates that will work with the RTLA tool
(Real-Time Linux Analysis).

Specifically it disconnects the work load (threads that look for
latency) from the tracing instances attached to them, allowing for
more than one instance to retrieve data from the work load.

- Optimization on division in the trace histogram trigger code to use
shift and multiply when possible. Also added documentation.

- Fix prototype to my_direct_func in direct ftrace trampoline sample
code.

* tag 'trace-v5.16-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
ftrace/samples: Add missing prototype for my_direct_func
tracing/selftests: Add tests for hist trigger expression parsing
tracing/histogram: Document hist trigger variables
tracing/histogram: Update division by 0 documentation
tracing/histogram: Optimize division by constants
tracing/osnoise: Remove PREEMPT_RT ifdefs from inside functions
tracing/osnoise: Remove STACKTRACE ifdefs from inside functions
tracing/osnoise: Allow multiple instances of the same tracer
tracing/osnoise: Remove TIMERLAT ifdefs from inside functions
tracing/osnoise: Support a list of trace_array *tr
tracing/osnoise: Use start/stop_per_cpu_kthreads() on osnoise_cpus_write()
tracing/osnoise: Split workload start from the tracer start
tracing/osnoise: Improve comments about barrier need for NMI callbacks
tracing/osnoise: Do not follow tracing_cpumask

+617 -184
+2 -1
Documentation/trace/histogram.rst
··· 1766 1766 Expressions support the use of addition, subtraction, multiplication and 1767 1767 division operators (+-\*/). 1768 1768 1769 - Note that division by zero always returns -1. 1769 + Note if division by zero cannot be detected at parse time (i.e. the 1770 + divisor is not a constant), the result will be -1. 1770 1771 1771 1772 Numeric constants can also be used directly in an expression:: 1772 1773
+11
kernel/trace/trace.c
··· 5605 5605 #ifdef CONFIG_HIST_TRIGGERS 5606 5606 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 5607 5607 "\t Format: hist:keys=<field1[,field2,...]>\n" 5608 + "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 5608 5609 "\t [:values=<field1[,field2,...]>]\n" 5609 5610 "\t [:sort=<field1[,field2,...]>]\n" 5610 5611 "\t [:size=#entries]\n" ··· 5616 5615 "\t Note, special fields can be used as well:\n" 5617 5616 "\t common_timestamp - to record current timestamp\n" 5618 5617 "\t common_cpu - to record the CPU the event happened on\n" 5618 + "\n" 5619 + "\t A hist trigger variable can be:\n" 5620 + "\t - a reference to a field e.g. x=current_timestamp,\n" 5621 + "\t - a reference to another variable e.g. y=$x,\n" 5622 + "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 5623 + "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 5624 + "\n" 5625 + "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n" 5626 + "\t multiplication(*) and division(/) operators. An operand can be either a\n" 5627 + "\t variable reference, field or numeric literal.\n" 5619 5628 "\n" 5620 5629 "\t When a matching event is hit, an entry is added to a hash\n" 5621 5630 "\t table using the key(s) and value(s) named, and the value of a\n"
+104 -1
kernel/trace/trace_events_hist.c
··· 68 68 C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ 69 69 C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \ 70 70 C(EXPECT_NUMBER, "Expecting numeric literal"), \ 71 - C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), 71 + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \ 72 + C(DIVISION_BY_ZERO, "Division by zero"), 72 73 73 74 #undef C 74 75 #define C(a, b) HIST_ERR_##a ··· 93 92 #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) 94 93 #define HIST_ACTIONS_MAX 8 95 94 #define HIST_CONST_DIGITS_MAX 21 95 + #define HIST_DIV_SHIFT 20 /* For optimizing division by constants */ 96 96 97 97 enum field_op_id { 98 98 FIELD_OP_NONE, ··· 162 160 163 161 /* Numeric literals are represented as u64 */ 164 162 u64 constant; 163 + /* Used to optimize division by constants */ 164 + u64 div_multiplier; 165 165 }; 166 166 167 167 static u64 hist_field_none(struct hist_field *field, ··· 313 309 return val1 >> __ffs64(val2); 314 310 315 311 return div64_u64(val1, val2); 312 + } 313 + 314 + static u64 div_by_power_of_two(struct hist_field *hist_field, 315 + struct tracing_map_elt *elt, 316 + struct trace_buffer *buffer, 317 + struct ring_buffer_event *rbe, 318 + void *event) 319 + { 320 + struct hist_field *operand1 = hist_field->operands[0]; 321 + struct hist_field *operand2 = hist_field->operands[1]; 322 + 323 + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); 324 + 325 + return val1 >> __ffs64(operand2->constant); 326 + } 327 + 328 + static u64 div_by_not_power_of_two(struct hist_field *hist_field, 329 + struct tracing_map_elt *elt, 330 + struct trace_buffer *buffer, 331 + struct ring_buffer_event *rbe, 332 + void *event) 333 + { 334 + struct hist_field *operand1 = hist_field->operands[0]; 335 + struct hist_field *operand2 = hist_field->operands[1]; 336 + 337 + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); 338 + 339 + return div64_u64(val1, operand2->constant); 340 + } 341 + 342 + static u64 div_by_mult_and_shift(struct hist_field *hist_field, 343 + struct tracing_map_elt *elt, 344 + struct trace_buffer *buffer, 345 + struct ring_buffer_event *rbe, 346 + void *event) 347 + { 348 + struct hist_field *operand1 = hist_field->operands[0]; 349 + struct hist_field *operand2 = hist_field->operands[1]; 350 + 351 + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); 352 + 353 + /* 354 + * If the divisor is a constant, do a multiplication and shift instead. 355 + * 356 + * Choose Z = some power of 2. If Y <= Z, then: 357 + * X / Y = (X * (Z / Y)) / Z 358 + * 359 + * (Z / Y) is a constant (mult) which is calculated at parse time, so: 360 + * X / Y = (X * mult) / Z 361 + * 362 + * The division by Z can be replaced by a shift since Z is a power of 2: 363 + * X / Y = (X * mult) >> HIST_DIV_SHIFT 364 + * 365 + * As long, as X < Z the results will not be off by more than 1. 366 + */ 367 + if (val1 < (1 << HIST_DIV_SHIFT)) { 368 + u64 mult = operand2->div_multiplier; 369 + 370 + return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT; 371 + } 372 + 373 + return div64_u64(val1, operand2->constant); 316 374 } 317 375 318 376 static u64 hist_field_mult(struct hist_field *hist_field, ··· 638 572 struct tracing_map_elt *elt; 639 573 void *key; 640 574 }; 575 + 576 + /* 577 + * Returns the specific division function to use if the divisor 578 + * is constant. This avoids extra branches when the trigger is hit. 579 + */ 580 + static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor) 581 + { 582 + u64 div = divisor->constant; 583 + 584 + if (!(div & (div - 1))) 585 + return div_by_power_of_two; 586 + 587 + /* If the divisor is too large, do a regular division */ 588 + if (div > (1 << HIST_DIV_SHIFT)) 589 + return div_by_not_power_of_two; 590 + 591 + divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div); 592 + return div_by_mult_and_shift; 593 + } 641 594 642 595 static void track_data_free(struct track_data *track_data) 643 596 { ··· 2659 2574 2660 2575 expr->operands[0] = operand1; 2661 2576 expr->operands[1] = operand2; 2577 + 2578 + if (field_op == FIELD_OP_DIV && 2579 + operand2_flags & HIST_FIELD_FL_CONST) { 2580 + u64 divisor = var2 ? var2->constant : operand2->constant; 2581 + 2582 + if (!divisor) { 2583 + hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str)); 2584 + ret = -EDOM; 2585 + goto free; 2586 + } 2587 + 2588 + /* 2589 + * Copy the divisor here so we don't have to look it up 2590 + * later if this is a var ref 2591 + */ 2592 + operand2->constant = divisor; 2593 + op_fn = hist_field_get_div_fn(operand2); 2594 + } 2662 2595 2663 2596 if (combine_consts) { 2664 2597 if (var1)
+435 -182
kernel/trace/trace_osnoise.c
··· 38 38 #define CREATE_TRACE_POINTS 39 39 #include <trace/events/osnoise.h> 40 40 41 - static struct trace_array *osnoise_trace; 42 - 43 41 /* 44 42 * Default values. 45 43 */ ··· 47 49 48 50 #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ 49 51 #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ 52 + 53 + /* 54 + * trace_array of the enabled osnoise/timerlat instances. 55 + */ 56 + struct osnoise_instance { 57 + struct list_head list; 58 + struct trace_array *tr; 59 + }; 60 + struct list_head osnoise_instances; 61 + 62 + static bool osnoise_has_registered_instances(void) 63 + { 64 + return !!list_first_or_null_rcu(&osnoise_instances, 65 + struct osnoise_instance, 66 + list); 67 + } 68 + 69 + /* 70 + * osnoise_instance_registered - check if a tr is already registered 71 + */ 72 + static int osnoise_instance_registered(struct trace_array *tr) 73 + { 74 + struct osnoise_instance *inst; 75 + int found = 0; 76 + 77 + rcu_read_lock(); 78 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 79 + if (inst->tr == tr) 80 + found = 1; 81 + } 82 + rcu_read_unlock(); 83 + 84 + return found; 85 + } 86 + 87 + /* 88 + * osnoise_register_instance - register a new trace instance 89 + * 90 + * Register a trace_array *tr in the list of instances running 91 + * osnoise/timerlat tracers. 92 + */ 93 + static int osnoise_register_instance(struct trace_array *tr) 94 + { 95 + struct osnoise_instance *inst; 96 + 97 + /* 98 + * register/unregister serialization is provided by trace's 99 + * trace_types_lock. 100 + */ 101 + lockdep_assert_held(&trace_types_lock); 102 + 103 + inst = kmalloc(sizeof(*inst), GFP_KERNEL); 104 + if (!inst) 105 + return -ENOMEM; 106 + 107 + INIT_LIST_HEAD_RCU(&inst->list); 108 + inst->tr = tr; 109 + list_add_tail_rcu(&inst->list, &osnoise_instances); 110 + 111 + return 0; 112 + } 113 + 114 + /* 115 + * osnoise_unregister_instance - unregister a registered trace instance 116 + * 117 + * Remove the trace_array *tr from the list of instances running 118 + * osnoise/timerlat tracers. 119 + */ 120 + static void osnoise_unregister_instance(struct trace_array *tr) 121 + { 122 + struct osnoise_instance *inst; 123 + int found = 0; 124 + 125 + /* 126 + * register/unregister serialization is provided by trace's 127 + * trace_types_lock. 128 + */ 129 + lockdep_assert_held(&trace_types_lock); 130 + 131 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 132 + if (inst->tr == tr) { 133 + list_del_rcu(&inst->list); 134 + found = 1; 135 + break; 136 + } 137 + } 138 + 139 + if (!found) 140 + return; 141 + 142 + synchronize_rcu(); 143 + kfree(inst); 144 + } 50 145 51 146 /* 52 147 * NMI runtime info. ··· 339 248 #endif 340 249 }; 341 250 342 - /* 343 - * Boolean variable used to inform that the tracer is currently sampling. 344 - */ 345 - static bool osnoise_busy; 251 + #ifdef CONFIG_TIMERLAT_TRACER 252 + static inline bool timerlat_enabled(void) 253 + { 254 + return osnoise_data.timerlat_tracer; 255 + } 256 + 257 + static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 258 + { 259 + struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 260 + /* 261 + * If the timerlat is enabled, but the irq handler did 262 + * not run yet enabling timerlat_tracer, do not trace. 263 + */ 264 + if (!tlat_var->tracing_thread) { 265 + osn_var->softirq.arrival_time = 0; 266 + osn_var->softirq.delta_start = 0; 267 + return 0; 268 + } 269 + return 1; 270 + } 271 + 272 + static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 273 + { 274 + struct timerlat_variables *tlat_var = this_cpu_tmr_var(); 275 + /* 276 + * If the timerlat is enabled, but the irq handler did 277 + * not run yet enabling timerlat_tracer, do not trace. 278 + */ 279 + if (!tlat_var->tracing_thread) { 280 + osn_var->thread.delta_start = 0; 281 + osn_var->thread.arrival_time = 0; 282 + return 0; 283 + } 284 + return 1; 285 + } 286 + #else /* CONFIG_TIMERLAT_TRACER */ 287 + static inline bool timerlat_enabled(void) 288 + { 289 + return false; 290 + } 291 + 292 + static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) 293 + { 294 + return 1; 295 + } 296 + static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) 297 + { 298 + return 1; 299 + } 300 + #endif 346 301 347 302 #ifdef CONFIG_PREEMPT_RT 348 303 /* ··· 452 315 * osnoise_taint - report an osnoise error. 453 316 */ 454 317 #define osnoise_taint(msg) ({ \ 455 - struct trace_array *tr = osnoise_trace; \ 318 + struct osnoise_instance *inst; \ 319 + struct trace_buffer *buffer; \ 456 320 \ 457 - trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \ 321 + rcu_read_lock(); \ 322 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ 323 + buffer = inst->tr->array_buffer.buffer; \ 324 + trace_array_printk_buf(buffer, _THIS_IP_, msg); \ 325 + } \ 326 + rcu_read_unlock(); \ 458 327 osnoise_data.tainted = true; \ 459 328 }) 460 329 461 330 /* 462 331 * Record an osnoise_sample into the tracer buffer. 463 332 */ 464 - static void trace_osnoise_sample(struct osnoise_sample *sample) 333 + static void 334 + __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) 465 335 { 466 - struct trace_array *tr = osnoise_trace; 467 - struct trace_buffer *buffer = tr->array_buffer.buffer; 468 336 struct trace_event_call *call = &event_osnoise; 469 337 struct ring_buffer_event *event; 470 338 struct osnoise_entry *entry; ··· 490 348 491 349 if (!call_filter_check_discard(call, entry, buffer, event)) 492 350 trace_buffer_unlock_commit_nostack(buffer, event); 351 + } 352 + 353 + /* 354 + * Record an osnoise_sample on all osnoise instances. 355 + */ 356 + static void trace_osnoise_sample(struct osnoise_sample *sample) 357 + { 358 + struct osnoise_instance *inst; 359 + struct trace_buffer *buffer; 360 + 361 + rcu_read_lock(); 362 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 363 + buffer = inst->tr->array_buffer.buffer; 364 + __trace_osnoise_sample(sample, buffer); 365 + } 366 + rcu_read_unlock(); 493 367 } 494 368 495 369 #ifdef CONFIG_TIMERLAT_TRACER ··· 546 388 } 547 389 #endif /* CONFIG_PREEMPT_RT */ 548 390 549 - /* 550 - * Record an timerlat_sample into the tracer buffer. 551 - */ 552 - static void trace_timerlat_sample(struct timerlat_sample *sample) 391 + static void 392 + __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) 553 393 { 554 - struct trace_array *tr = osnoise_trace; 555 394 struct trace_event_call *call = &event_osnoise; 556 - struct trace_buffer *buffer = tr->array_buffer.buffer; 557 395 struct ring_buffer_event *event; 558 396 struct timerlat_entry *entry; 559 397 ··· 564 410 565 411 if (!call_filter_check_discard(call, entry, buffer, event)) 566 412 trace_buffer_unlock_commit_nostack(buffer, event); 413 + } 414 + 415 + /* 416 + * Record an timerlat_sample into the tracer buffer. 417 + */ 418 + static void trace_timerlat_sample(struct timerlat_sample *sample) 419 + { 420 + struct osnoise_instance *inst; 421 + struct trace_buffer *buffer; 422 + 423 + rcu_read_lock(); 424 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 425 + buffer = inst->tr->array_buffer.buffer; 426 + __trace_timerlat_sample(sample, buffer); 427 + } 428 + rcu_read_unlock(); 567 429 } 568 430 569 431 #ifdef CONFIG_STACKTRACE ··· 621 451 return; 622 452 623 453 } 624 - /* 625 - * timerlat_dump_stack - dump a stack trace previously saved 626 - * 627 - * Dump a saved stack trace into the trace buffer. 628 - */ 629 - static void timerlat_dump_stack(void) 454 + 455 + static void 456 + __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) 630 457 { 631 458 struct trace_event_call *call = &event_osnoise; 632 - struct trace_array *tr = osnoise_trace; 633 - struct trace_buffer *buffer = tr->array_buffer.buffer; 634 459 struct ring_buffer_event *event; 635 - struct trace_stack *fstack; 636 460 struct stack_entry *entry; 637 - unsigned int size; 638 - 639 - preempt_disable_notrace(); 640 - fstack = this_cpu_ptr(&trace_stack); 641 - size = fstack->stack_size; 642 461 643 462 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, 644 463 tracing_gen_ctx()); 645 464 if (!event) 646 - goto out; 465 + return; 647 466 648 467 entry = ring_buffer_event_data(event); 649 468 ··· 641 482 642 483 if (!call_filter_check_discard(call, entry, buffer, event)) 643 484 trace_buffer_unlock_commit_nostack(buffer, event); 485 + } 644 486 645 - out: 487 + /* 488 + * timerlat_dump_stack - dump a stack trace previously saved 489 + */ 490 + static void timerlat_dump_stack(u64 latency) 491 + { 492 + struct osnoise_instance *inst; 493 + struct trace_buffer *buffer; 494 + struct trace_stack *fstack; 495 + unsigned int size; 496 + 497 + /* 498 + * trace only if latency > print_stack config, if enabled. 499 + */ 500 + if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) 501 + return; 502 + 503 + preempt_disable_notrace(); 504 + fstack = this_cpu_ptr(&trace_stack); 505 + size = fstack->stack_size; 506 + 507 + rcu_read_lock(); 508 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 509 + buffer = inst->tr->array_buffer.buffer; 510 + __timerlat_dump_stack(buffer, fstack, size); 511 + 512 + } 513 + rcu_read_unlock(); 646 514 preempt_enable_notrace(); 647 515 } 648 - #else 649 - #define timerlat_dump_stack() do {} while (0) 516 + #else /* CONFIG_STACKTRACE */ 517 + #define timerlat_dump_stack(u64 latency) do {} while (0) 650 518 #define timerlat_save_stack(a) do {} while (0) 651 519 #endif /* CONFIG_STACKTRACE */ 652 520 #endif /* CONFIG_TIMERLAT_TRACER */ ··· 1053 867 if (!osn_var->sampling) 1054 868 return; 1055 869 1056 - #ifdef CONFIG_TIMERLAT_TRACER 1057 - /* 1058 - * If the timerlat is enabled, but the irq handler did 1059 - * not run yet enabling timerlat_tracer, do not trace. 1060 - */ 1061 - if (unlikely(osnoise_data.timerlat_tracer)) { 1062 - struct timerlat_variables *tlat_var; 1063 - tlat_var = this_cpu_tmr_var(); 1064 - if (!tlat_var->tracing_thread) { 1065 - osn_var->softirq.arrival_time = 0; 1066 - osn_var->softirq.delta_start = 0; 870 + if (unlikely(timerlat_enabled())) 871 + if (!timerlat_softirq_exit(osn_var)) 1067 872 return; 1068 - } 1069 - } 1070 - #endif 1071 873 1072 874 duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); 1073 875 trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); ··· 1149 975 if (!osn_var->sampling) 1150 976 return; 1151 977 1152 - #ifdef CONFIG_TIMERLAT_TRACER 1153 - if (osnoise_data.timerlat_tracer) { 1154 - struct timerlat_variables *tlat_var; 1155 - tlat_var = this_cpu_tmr_var(); 1156 - if (!tlat_var->tracing_thread) { 1157 - osn_var->thread.delta_start = 0; 1158 - osn_var->thread.arrival_time = 0; 978 + if (unlikely(timerlat_enabled())) 979 + if (!timerlat_thread_exit(osn_var)) 1159 980 return; 1160 - } 1161 - } 1162 - #endif 1163 981 1164 982 duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); 1165 983 ··· 1244 1078 */ 1245 1079 static __always_inline void osnoise_stop_tracing(void) 1246 1080 { 1247 - struct trace_array *tr = osnoise_trace; 1081 + struct osnoise_instance *inst; 1082 + struct trace_array *tr; 1248 1083 1249 - trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1250 - "stop tracing hit on cpu %d\n", smp_processor_id()); 1084 + rcu_read_lock(); 1085 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1086 + tr = inst->tr; 1087 + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1088 + "stop tracing hit on cpu %d\n", smp_processor_id()); 1251 1089 1252 - tracer_tracing_off(tr); 1090 + tracer_tracing_off(tr); 1091 + } 1092 + rcu_read_unlock(); 1093 + } 1094 + 1095 + /* 1096 + * notify_new_max_latency - Notify a new max latency via fsnotify interface. 1097 + */ 1098 + static void notify_new_max_latency(u64 latency) 1099 + { 1100 + struct osnoise_instance *inst; 1101 + struct trace_array *tr; 1102 + 1103 + rcu_read_lock(); 1104 + list_for_each_entry_rcu(inst, &osnoise_instances, list) { 1105 + tr = inst->tr; 1106 + if (tr->max_latency < latency) { 1107 + tr->max_latency = latency; 1108 + latency_fsnotify(tr); 1109 + } 1110 + } 1111 + rcu_read_unlock(); 1253 1112 } 1254 1113 1255 1114 /* ··· 1288 1097 static int run_osnoise(void) 1289 1098 { 1290 1099 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1291 - struct trace_array *tr = osnoise_trace; 1292 1100 u64 start, sample, last_sample; 1293 1101 u64 last_int_count, int_count; 1294 1102 s64 noise = 0, max_noise = 0; ··· 1422 1232 1423 1233 trace_osnoise_sample(&s); 1424 1234 1425 - /* Keep a running maximum ever recorded osnoise "latency" */ 1426 - if (max_noise > tr->max_latency) { 1427 - tr->max_latency = max_noise; 1428 - latency_fsnotify(tr); 1429 - } 1235 + notify_new_max_latency(max_noise); 1430 1236 1431 1237 if (osnoise_data.stop_tracing_total) 1432 1238 if (s.noise > osnoise_data.stop_tracing_total) ··· 1480 1294 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) 1481 1295 { 1482 1296 struct osnoise_variables *osn_var = this_cpu_osn_var(); 1483 - struct trace_array *tr = osnoise_trace; 1484 1297 struct timerlat_variables *tlat; 1485 1298 struct timerlat_sample s; 1486 1299 u64 now; ··· 1518 1333 * running, the thread needs to receive the softirq delta_start. The 1519 1334 * reason being is that the softirq will be the last to be unfolded, 1520 1335 * resseting the thread delay to zero. 1336 + * 1337 + * The PREEMPT_RT is a special case, though. As softirqs run as threads 1338 + * on RT, moving the thread is enough. 1521 1339 */ 1522 - #ifndef CONFIG_PREEMPT_RT 1523 - if (osn_var->softirq.delta_start) { 1340 + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { 1524 1341 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1525 1342 &osn_var->softirq.delta_start); 1526 1343 ··· 1532 1345 copy_int_safe_time(osn_var, &osn_var->thread.delta_start, 1533 1346 &osn_var->irq.delta_start); 1534 1347 } 1535 - #else /* CONFIG_PREEMPT_RT */ 1536 - /* 1537 - * The sofirqs run as threads on RT, so there is not need 1538 - * to keep track of it. 1539 - */ 1540 - copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start); 1541 - #endif /* CONFIG_PREEMPT_RT */ 1542 1348 1543 1349 /* 1544 1350 * Compute the current time with the expected time. ··· 1545 1365 1546 1366 trace_timerlat_sample(&s); 1547 1367 1548 - /* Keep a running maximum ever recorded os noise "latency" */ 1549 - if (diff > tr->max_latency) { 1550 - tr->max_latency = diff; 1551 - latency_fsnotify(tr); 1552 - } 1368 + notify_new_max_latency(diff); 1553 1369 1554 1370 if (osnoise_data.stop_tracing) 1555 1371 if (time_to_us(diff) >= osnoise_data.stop_tracing) ··· 1633 1457 1634 1458 trace_timerlat_sample(&s); 1635 1459 1636 - #ifdef CONFIG_STACKTRACE 1637 - if (osnoise_data.print_stack) 1638 - if (osnoise_data.print_stack <= time_to_us(diff)) 1639 - timerlat_dump_stack(); 1640 - #endif /* CONFIG_STACKTRACE */ 1460 + timerlat_dump_stack(time_to_us(diff)); 1641 1461 1642 1462 tlat->tracing_thread = false; 1643 1463 if (osnoise_data.stop_tracing_total) ··· 1644 1472 } 1645 1473 1646 1474 hrtimer_cancel(&tlat->timer); 1475 + return 0; 1476 + } 1477 + #else /* CONFIG_TIMERLAT_TRACER */ 1478 + static int timerlat_main(void *data) 1479 + { 1647 1480 return 0; 1648 1481 } 1649 1482 #endif /* CONFIG_TIMERLAT_TRACER */ ··· 1693 1516 void *main = osnoise_main; 1694 1517 char comm[24]; 1695 1518 1696 - #ifdef CONFIG_TIMERLAT_TRACER 1697 - if (osnoise_data.timerlat_tracer) { 1519 + if (timerlat_enabled()) { 1698 1520 snprintf(comm, 24, "timerlat/%d", cpu); 1699 1521 main = timerlat_main; 1700 1522 } else { 1701 1523 snprintf(comm, 24, "osnoise/%d", cpu); 1702 1524 } 1703 - #else 1704 - snprintf(comm, 24, "osnoise/%d", cpu); 1705 - #endif 1525 + 1706 1526 kthread = kthread_create_on_cpu(main, NULL, cpu, comm); 1707 1527 1708 1528 if (IS_ERR(kthread)) { ··· 1720 1546 * This starts the kernel thread that will look for osnoise on many 1721 1547 * cpus. 1722 1548 */ 1723 - static int start_per_cpu_kthreads(struct trace_array *tr) 1549 + static int start_per_cpu_kthreads(void) 1724 1550 { 1725 1551 struct cpumask *current_mask = &save_cpumask; 1726 1552 int retval = 0; ··· 1728 1554 1729 1555 cpus_read_lock(); 1730 1556 /* 1731 - * Run only on CPUs in which trace and osnoise are allowed to run. 1557 + * Run only on online CPUs in which osnoise is allowed to run. 1732 1558 */ 1733 - cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask); 1734 - /* 1735 - * And the CPU is online. 1736 - */ 1737 - cpumask_and(current_mask, cpu_online_mask, current_mask); 1559 + cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); 1738 1560 1739 1561 for_each_possible_cpu(cpu) 1740 1562 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; ··· 1751 1581 #ifdef CONFIG_HOTPLUG_CPU 1752 1582 static void osnoise_hotplug_workfn(struct work_struct *dummy) 1753 1583 { 1754 - struct trace_array *tr = osnoise_trace; 1755 1584 unsigned int cpu = smp_processor_id(); 1756 - 1757 1585 1758 1586 mutex_lock(&trace_types_lock); 1759 1587 1760 - if (!osnoise_busy) 1588 + if (!osnoise_has_registered_instances()) 1761 1589 goto out_unlock_trace; 1762 1590 1763 1591 mutex_lock(&interface_lock); 1764 1592 cpus_read_lock(); 1765 1593 1766 1594 if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) 1767 - goto out_unlock; 1768 - 1769 - if (!cpumask_test_cpu(cpu, tr->tracing_cpumask)) 1770 1595 goto out_unlock; 1771 1596 1772 1597 start_kthread(cpu); ··· 1852 1687 return count; 1853 1688 } 1854 1689 1855 - static void osnoise_tracer_start(struct trace_array *tr); 1856 - static void osnoise_tracer_stop(struct trace_array *tr); 1857 - 1858 1690 /* 1859 1691 * osnoise_cpus_write - Write function for "cpus" entry 1860 1692 * @filp: The active open file structure ··· 1863 1701 * interface to the osnoise trace. By default, it lists all CPUs, 1864 1702 * in this way, allowing osnoise threads to run on any online CPU 1865 1703 * of the system. It serves to restrict the execution of osnoise to the 1866 - * set of CPUs writing via this interface. Note that osnoise also 1867 - * respects the "tracing_cpumask." Hence, osnoise threads will run only 1868 - * on the set of CPUs allowed here AND on "tracing_cpumask." Why not 1869 - * have just "tracing_cpumask?" Because the user might be interested 1870 - * in tracing what is running on other CPUs. For instance, one might 1871 - * run osnoise in one HT CPU while observing what is running on the 1872 - * sibling HT CPU. 1704 + * set of CPUs writing via this interface. Why not use "tracing_cpumask"? 1705 + * Because the user might be interested in tracing what is running on 1706 + * other CPUs. For instance, one might run osnoise in one HT CPU 1707 + * while observing what is running on the sibling HT CPU. 1873 1708 */ 1874 1709 static ssize_t 1875 1710 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, 1876 1711 loff_t *ppos) 1877 1712 { 1878 - struct trace_array *tr = osnoise_trace; 1879 1713 cpumask_var_t osnoise_cpumask_new; 1880 1714 int running, err; 1881 1715 char buf[256]; ··· 1890 1732 goto err_free; 1891 1733 1892 1734 /* 1893 - * trace_types_lock is taken to avoid concurrency on start/stop 1894 - * and osnoise_busy. 1735 + * trace_types_lock is taken to avoid concurrency on start/stop. 1895 1736 */ 1896 1737 mutex_lock(&trace_types_lock); 1897 - running = osnoise_busy; 1738 + running = osnoise_has_registered_instances(); 1898 1739 if (running) 1899 - osnoise_tracer_stop(tr); 1740 + stop_per_cpu_kthreads(); 1900 1741 1901 1742 mutex_lock(&interface_lock); 1902 1743 /* ··· 1909 1752 mutex_unlock(&interface_lock); 1910 1753 1911 1754 if (running) 1912 - osnoise_tracer_start(tr); 1755 + start_per_cpu_kthreads(); 1913 1756 mutex_unlock(&trace_types_lock); 1914 1757 1915 1758 free_cpumask_var(osnoise_cpumask_new); ··· 1993 1836 .llseek = generic_file_llseek, 1994 1837 }; 1995 1838 1839 + #ifdef CONFIG_TIMERLAT_TRACER 1840 + #ifdef CONFIG_STACKTRACE 1841 + static int init_timerlat_stack_tracefs(struct dentry *top_dir) 1842 + { 1843 + struct dentry *tmp; 1844 + 1845 + tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 1846 + &osnoise_print_stack, &trace_min_max_fops); 1847 + if (!tmp) 1848 + return -ENOMEM; 1849 + 1850 + return 0; 1851 + } 1852 + #else /* CONFIG_STACKTRACE */ 1853 + static int init_timerlat_stack_tracefs(struct dentry *top_dir) 1854 + { 1855 + return 0; 1856 + } 1857 + #endif /* CONFIG_STACKTRACE */ 1858 + 1859 + /* 1860 + * init_timerlat_tracefs - A function to initialize the timerlat interface files 1861 + */ 1862 + static int init_timerlat_tracefs(struct dentry *top_dir) 1863 + { 1864 + struct dentry *tmp; 1865 + 1866 + tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 1867 + &timerlat_period, &trace_min_max_fops); 1868 + if (!tmp) 1869 + return -ENOMEM; 1870 + 1871 + return init_timerlat_stack_tracefs(top_dir); 1872 + } 1873 + #else /* CONFIG_TIMERLAT_TRACER */ 1874 + static int init_timerlat_tracefs(struct dentry *top_dir) 1875 + { 1876 + return 0; 1877 + } 1878 + #endif /* CONFIG_TIMERLAT_TRACER */ 1879 + 1996 1880 /* 1997 1881 * init_tracefs - A function to initialize the tracefs interface files 1998 1882 * ··· 2078 1880 tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); 2079 1881 if (!tmp) 2080 1882 goto err; 2081 - #ifdef CONFIG_TIMERLAT_TRACER 2082 - #ifdef CONFIG_STACKTRACE 2083 - tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, 2084 - &osnoise_print_stack, &trace_min_max_fops); 2085 - if (!tmp) 2086 - goto err; 2087 - #endif 2088 1883 2089 - tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, 2090 - &timerlat_period, &trace_min_max_fops); 2091 - if (!tmp) 1884 + ret = init_timerlat_tracefs(top_dir); 1885 + if (ret) 2092 1886 goto err; 2093 - #endif 2094 1887 2095 1888 return 0; 2096 1889 ··· 2122 1933 return -EINVAL; 2123 1934 } 2124 1935 2125 - static int __osnoise_tracer_start(struct trace_array *tr) 1936 + /* 1937 + * osnoise_workload_start - start the workload and hook to events 1938 + */ 1939 + static int osnoise_workload_start(void) 2126 1940 { 2127 1941 int retval; 1942 + 1943 + /* 1944 + * Instances need to be registered after calling workload 1945 + * start. Hence, if there is already an instance, the 1946 + * workload was already registered. Otherwise, this 1947 + * code is on the way to register the first instance, 1948 + * and the workload will start. 1949 + */ 1950 + if (osnoise_has_registered_instances()) 1951 + return 0; 2128 1952 2129 1953 osn_var_reset_all(); 2130 1954 2131 1955 retval = osnoise_hook_events(); 2132 1956 if (retval) 2133 1957 return retval; 1958 + 2134 1959 /* 2135 - * Make sure NMIs see reseted values. 1960 + * Make sure that ftrace_nmi_enter/exit() see reset values 1961 + * before enabling trace_osnoise_callback_enabled. 2136 1962 */ 2137 1963 barrier(); 2138 1964 trace_osnoise_callback_enabled = true; 2139 1965 2140 - retval = start_per_cpu_kthreads(tr); 1966 + retval = start_per_cpu_kthreads(); 2141 1967 if (retval) { 2142 1968 unhook_irq_events(); 2143 1969 return retval; 2144 1970 } 2145 1971 2146 - osnoise_busy = true; 2147 - 2148 1972 return 0; 2149 1973 } 2150 1974 2151 - static void osnoise_tracer_start(struct trace_array *tr) 1975 + /* 1976 + * osnoise_workload_stop - stop the workload and unhook the events 1977 + */ 1978 + static void osnoise_workload_stop(void) 2152 1979 { 2153 - int retval; 2154 - 2155 - if (osnoise_busy) 2156 - return; 2157 - 2158 - retval = __osnoise_tracer_start(tr); 2159 - if (retval) 2160 - pr_err(BANNER "Error starting osnoise tracer\n"); 2161 - 2162 - } 2163 - 2164 - static void osnoise_tracer_stop(struct trace_array *tr) 2165 - { 2166 - if (!osnoise_busy) 1980 + /* 1981 + * Instances need to be unregistered before calling 1982 + * stop. Hence, if there is a registered instance, more 1983 + * than one instance is running, and the workload will not 1984 + * yet stop. Otherwise, this code is on the way to disable 1985 + * the last instance, and the workload can stop. 1986 + */ 1987 + if (osnoise_has_registered_instances()) 2167 1988 return; 2168 1989 2169 1990 trace_osnoise_callback_enabled = false; 1991 + /* 1992 + * Make sure that ftrace_nmi_enter/exit() see 1993 + * trace_osnoise_callback_enabled as false before continuing. 1994 + */ 2170 1995 barrier(); 2171 1996 2172 1997 stop_per_cpu_kthreads(); ··· 2188 1985 unhook_irq_events(); 2189 1986 unhook_softirq_events(); 2190 1987 unhook_thread_events(); 1988 + } 2191 1989 2192 - osnoise_busy = false; 1990 + static void osnoise_tracer_start(struct trace_array *tr) 1991 + { 1992 + int retval; 1993 + 1994 + /* 1995 + * If the instance is already registered, there is no need to 1996 + * register it again. 1997 + */ 1998 + if (osnoise_instance_registered(tr)) 1999 + return; 2000 + 2001 + retval = osnoise_workload_start(); 2002 + if (retval) 2003 + pr_err(BANNER "Error starting osnoise tracer\n"); 2004 + 2005 + osnoise_register_instance(tr); 2006 + } 2007 + 2008 + static void osnoise_tracer_stop(struct trace_array *tr) 2009 + { 2010 + osnoise_unregister_instance(tr); 2011 + osnoise_workload_stop(); 2193 2012 } 2194 2013 2195 2014 static int osnoise_tracer_init(struct trace_array *tr) 2196 2015 { 2197 - 2198 - /* Only allow one instance to enable this */ 2199 - if (osnoise_busy) 2016 + /* 2017 + * Only allow osnoise tracer if timerlat tracer is not running 2018 + * already. 2019 + */ 2020 + if (timerlat_enabled()) 2200 2021 return -EBUSY; 2201 2022 2202 - osnoise_trace = tr; 2203 2023 tr->max_latency = 0; 2204 2024 2205 2025 osnoise_tracer_start(tr); 2206 - 2207 2026 return 0; 2208 2027 } 2209 2028 ··· 2249 2024 { 2250 2025 int retval; 2251 2026 2252 - if (osnoise_busy) 2027 + /* 2028 + * If the instance is already registered, there is no need to 2029 + * register it again. 2030 + */ 2031 + if (osnoise_instance_registered(tr)) 2253 2032 return; 2254 2033 2255 - osnoise_data.timerlat_tracer = 1; 2256 - 2257 - retval = __osnoise_tracer_start(tr); 2034 + retval = osnoise_workload_start(); 2258 2035 if (retval) 2259 - goto out_err; 2036 + pr_err(BANNER "Error starting timerlat tracer\n"); 2037 + 2038 + osnoise_register_instance(tr); 2260 2039 2261 2040 return; 2262 - out_err: 2263 - pr_err(BANNER "Error starting timerlat tracer\n"); 2264 2041 } 2265 2042 2266 2043 static void timerlat_tracer_stop(struct trace_array *tr) 2267 2044 { 2268 2045 int cpu; 2269 2046 2270 - if (!osnoise_busy) 2271 - return; 2047 + osnoise_unregister_instance(tr); 2272 2048 2273 - for_each_online_cpu(cpu) 2274 - per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2049 + /* 2050 + * Instruct the threads to stop only if this is the last instance. 2051 + */ 2052 + if (!osnoise_has_registered_instances()) { 2053 + for_each_online_cpu(cpu) 2054 + per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; 2055 + } 2275 2056 2276 - osnoise_tracer_stop(tr); 2277 - 2278 - osnoise_data.timerlat_tracer = 0; 2057 + osnoise_workload_stop(); 2279 2058 } 2280 2059 2281 2060 static int timerlat_tracer_init(struct trace_array *tr) 2282 2061 { 2283 - /* Only allow one instance to enable this */ 2284 - if (osnoise_busy) 2062 + /* 2063 + * Only allow timerlat tracer if osnoise tracer is not running already. 2064 + */ 2065 + if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) 2285 2066 return -EBUSY; 2286 2067 2287 - osnoise_trace = tr; 2068 + /* 2069 + * If this is the first instance, set timerlat_tracer to block 2070 + * osnoise tracer start. 2071 + */ 2072 + if (!osnoise_has_registered_instances()) 2073 + osnoise_data.timerlat_tracer = 1; 2288 2074 2289 2075 tr->max_latency = 0; 2290 - 2291 2076 timerlat_tracer_start(tr); 2292 2077 2293 2078 return 0; ··· 2306 2071 static void timerlat_tracer_reset(struct trace_array *tr) 2307 2072 { 2308 2073 timerlat_tracer_stop(tr); 2074 + 2075 + /* 2076 + * If this is the last instance, reset timerlat_tracer allowing 2077 + * osnoise to be started. 2078 + */ 2079 + if (!osnoise_has_registered_instances()) 2080 + osnoise_data.timerlat_tracer = 0; 2309 2081 } 2310 2082 2311 2083 static struct tracer timerlat_tracer __read_mostly = { ··· 2324 2082 .print_header = print_timerlat_headers, 2325 2083 .allow_instances = true, 2326 2084 }; 2085 + 2086 + __init static int init_timerlat_tracer(void) 2087 + { 2088 + return register_tracer(&timerlat_tracer); 2089 + } 2090 + #else /* CONFIG_TIMERLAT_TRACER */ 2091 + __init static int init_timerlat_tracer(void) 2092 + { 2093 + return 0; 2094 + } 2327 2095 #endif /* CONFIG_TIMERLAT_TRACER */ 2328 2096 2329 2097 __init static int init_osnoise_tracer(void) ··· 2350 2098 return ret; 2351 2099 } 2352 2100 2353 - #ifdef CONFIG_TIMERLAT_TRACER 2354 - ret = register_tracer(&timerlat_tracer); 2101 + ret = init_timerlat_tracer(); 2355 2102 if (ret) { 2356 - pr_err(BANNER "Error registering timerlat\n"); 2103 + pr_err(BANNER "Error registering timerlat!\n"); 2357 2104 return ret; 2358 2105 } 2359 - #endif 2106 + 2360 2107 osnoise_init_hotplug_support(); 2108 + 2109 + INIT_LIST_HEAD_RCU(&osnoise_instances); 2361 2110 2362 2111 init_tracefs(); 2363 2112
+2
samples/ftrace/ftrace-direct-multi.c
··· 5 5 #include <linux/ftrace.h> 6 6 #include <linux/sched/stat.h> 7 7 8 + extern void my_direct_func(unsigned long ip); 9 + 8 10 void my_direct_func(unsigned long ip) 9 11 { 10 12 trace_printk("ip %lx\n", ip);
+63
tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
··· 1 + #!/bin/sh 2 + # SPDX-License-Identifier: GPL-2.0 3 + # description: event trigger - test histogram expression parsing 4 + # requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README 5 + 6 + 7 + fail() { #msg 8 + echo $1 9 + exit_fail 10 + } 11 + 12 + test_hist_expr() { # test_name expression expected_val 13 + trigger="events/sched/sched_process_fork/trigger" 14 + 15 + reset_trigger_file $trigger 16 + 17 + echo "Test hist trigger expressions - $1" 18 + 19 + echo "hist:keys=common_pid:x=$2" > $trigger 20 + 21 + for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done 22 + 23 + actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'` 24 + 25 + if [ $actual != $3 ]; then 26 + fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual" 27 + fi 28 + 29 + reset_trigger_file $trigger 30 + } 31 + 32 + check_error() { # test_name command-with-error-pos-by-^ 33 + trigger="events/sched/sched_process_fork/trigger" 34 + 35 + echo "Test hist trigger expressions - $1" 36 + ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger 37 + } 38 + 39 + test_hist_expr "Variable assignment" "123" "123" 40 + 41 + test_hist_expr "Subtraction not associative" "16-8-4-2" "2" 42 + 43 + test_hist_expr "Division not associative" "64/8/4/2" "1" 44 + 45 + test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14" 46 + 47 + test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12" 48 + 49 + test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8" 50 + 51 + test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5" 52 + 53 + # err pos for "too many subexpressions" is dependent on where 54 + # the last subexpression was detected. This can vary depending 55 + # on how the expression tree was generated. 56 + check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4' 57 + check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5' 58 + 59 + check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2' 60 + 61 + check_error "Division by zero" 'hist:keys=common_pid:x=3/^0' 62 + 63 + exit 0