at v4.3 19 kB view raw
1 2#ifndef _LINUX_TRACE_EVENT_H 3#define _LINUX_TRACE_EVENT_H 4 5#include <linux/ring_buffer.h> 6#include <linux/trace_seq.h> 7#include <linux/percpu.h> 8#include <linux/hardirq.h> 9#include <linux/perf_event.h> 10#include <linux/tracepoint.h> 11 12struct trace_array; 13struct trace_buffer; 14struct tracer; 15struct dentry; 16struct bpf_prog; 17 18struct trace_print_flags { 19 unsigned long mask; 20 const char *name; 21}; 22 23struct trace_print_flags_u64 { 24 unsigned long long mask; 25 const char *name; 26}; 27 28const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, 29 unsigned long flags, 30 const struct trace_print_flags *flag_array); 31 32const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, 33 const struct trace_print_flags *symbol_array); 34 35#if BITS_PER_LONG == 32 36const char *trace_print_symbols_seq_u64(struct trace_seq *p, 37 unsigned long long val, 38 const struct trace_print_flags_u64 39 *symbol_array); 40#endif 41 42const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, 43 unsigned int bitmask_size); 44 45const char *trace_print_hex_seq(struct trace_seq *p, 46 const unsigned char *buf, int len); 47 48const char *trace_print_array_seq(struct trace_seq *p, 49 const void *buf, int count, 50 size_t el_size); 51 52struct trace_iterator; 53struct trace_event; 54 55int trace_raw_output_prep(struct trace_iterator *iter, 56 struct trace_event *event); 57 58/* 59 * The trace entry - the most basic unit of tracing. This is what 60 * is printed in the end as a single line in the trace output, such as: 61 * 62 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter 63 */ 64struct trace_entry { 65 unsigned short type; 66 unsigned char flags; 67 unsigned char preempt_count; 68 int pid; 69}; 70 71#define TRACE_EVENT_TYPE_MAX \ 72 ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) 73 74/* 75 * Trace iterator - used by printout routines who present trace 76 * results to users and which routines might sleep, etc: 77 */ 78struct trace_iterator { 79 struct trace_array *tr; 80 struct tracer *trace; 81 struct trace_buffer *trace_buffer; 82 void *private; 83 int cpu_file; 84 struct mutex mutex; 85 struct ring_buffer_iter **buffer_iter; 86 unsigned long iter_flags; 87 88 /* trace_seq for __print_flags() and __print_symbolic() etc. */ 89 struct trace_seq tmp_seq; 90 91 cpumask_var_t started; 92 93 /* it's true when current open file is snapshot */ 94 bool snapshot; 95 96 /* The below is zeroed out in pipe_read */ 97 struct trace_seq seq; 98 struct trace_entry *ent; 99 unsigned long lost_events; 100 int leftover; 101 int ent_size; 102 int cpu; 103 u64 ts; 104 105 loff_t pos; 106 long idx; 107 108 /* All new field here will be zeroed out in pipe_read */ 109}; 110 111enum trace_iter_flags { 112 TRACE_FILE_LAT_FMT = 1, 113 TRACE_FILE_ANNOTATE = 2, 114 TRACE_FILE_TIME_IN_NS = 4, 115}; 116 117 118typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, 119 int flags, struct trace_event *event); 120 121struct trace_event_functions { 122 trace_print_func trace; 123 trace_print_func raw; 124 trace_print_func hex; 125 trace_print_func binary; 126}; 127 128struct trace_event { 129 struct hlist_node node; 130 struct list_head list; 131 int type; 132 struct trace_event_functions *funcs; 133}; 134 135extern int register_trace_event(struct trace_event *event); 136extern int unregister_trace_event(struct trace_event *event); 137 138/* Return values for print_line callback */ 139enum print_line_t { 140 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 141 TRACE_TYPE_HANDLED = 1, 142 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ 143 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 144}; 145 146/* 147 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 148 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 149 * simplifies those functions and keeps them in sync. 150 */ 151static inline enum print_line_t trace_handle_return(struct trace_seq *s) 152{ 153 return trace_seq_has_overflowed(s) ? 154 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 155} 156 157void tracing_generic_entry_update(struct trace_entry *entry, 158 unsigned long flags, 159 int pc); 160struct trace_event_file; 161 162struct ring_buffer_event * 163trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, 164 struct trace_event_file *trace_file, 165 int type, unsigned long len, 166 unsigned long flags, int pc); 167struct ring_buffer_event * 168trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, 169 int type, unsigned long len, 170 unsigned long flags, int pc); 171void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, 172 struct ring_buffer_event *event, 173 unsigned long flags, int pc); 174void trace_buffer_unlock_commit(struct ring_buffer *buffer, 175 struct ring_buffer_event *event, 176 unsigned long flags, int pc); 177void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, 178 struct ring_buffer_event *event, 179 unsigned long flags, int pc, 180 struct pt_regs *regs); 181void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 182 struct ring_buffer_event *event); 183 184void tracing_record_cmdline(struct task_struct *tsk); 185 186int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); 187 188struct event_filter; 189 190enum trace_reg { 191 TRACE_REG_REGISTER, 192 TRACE_REG_UNREGISTER, 193#ifdef CONFIG_PERF_EVENTS 194 TRACE_REG_PERF_REGISTER, 195 TRACE_REG_PERF_UNREGISTER, 196 TRACE_REG_PERF_OPEN, 197 TRACE_REG_PERF_CLOSE, 198 TRACE_REG_PERF_ADD, 199 TRACE_REG_PERF_DEL, 200#endif 201}; 202 203struct trace_event_call; 204 205struct trace_event_class { 206 const char *system; 207 void *probe; 208#ifdef CONFIG_PERF_EVENTS 209 void *perf_probe; 210#endif 211 int (*reg)(struct trace_event_call *event, 212 enum trace_reg type, void *data); 213 int (*define_fields)(struct trace_event_call *); 214 struct list_head *(*get_fields)(struct trace_event_call *); 215 struct list_head fields; 216 int (*raw_init)(struct trace_event_call *); 217}; 218 219extern int trace_event_reg(struct trace_event_call *event, 220 enum trace_reg type, void *data); 221 222struct trace_event_buffer { 223 struct ring_buffer *buffer; 224 struct ring_buffer_event *event; 225 struct trace_event_file *trace_file; 226 void *entry; 227 unsigned long flags; 228 int pc; 229}; 230 231void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 232 struct trace_event_file *trace_file, 233 unsigned long len); 234 235void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); 236 237enum { 238 TRACE_EVENT_FL_FILTERED_BIT, 239 TRACE_EVENT_FL_CAP_ANY_BIT, 240 TRACE_EVENT_FL_NO_SET_FILTER_BIT, 241 TRACE_EVENT_FL_IGNORE_ENABLE_BIT, 242 TRACE_EVENT_FL_WAS_ENABLED_BIT, 243 TRACE_EVENT_FL_USE_CALL_FILTER_BIT, 244 TRACE_EVENT_FL_TRACEPOINT_BIT, 245 TRACE_EVENT_FL_KPROBE_BIT, 246 TRACE_EVENT_FL_UPROBE_BIT, 247}; 248 249/* 250 * Event flags: 251 * FILTERED - The event has a filter attached 252 * CAP_ANY - Any user can enable for perf 253 * NO_SET_FILTER - Set when filter has error and is to be ignored 254 * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file 255 * WAS_ENABLED - Set and stays set when an event was ever enabled 256 * (used for module unloading, if a module event is enabled, 257 * it is best to clear the buffers that used it). 258 * USE_CALL_FILTER - For trace internal events, don't use file filter 259 * TRACEPOINT - Event is a tracepoint 260 * KPROBE - Event is a kprobe 261 * UPROBE - Event is a uprobe 262 */ 263enum { 264 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 265 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), 266 TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), 267 TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), 268 TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), 269 TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), 270 TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), 271 TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), 272 TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), 273}; 274 275#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) 276 277struct trace_event_call { 278 struct list_head list; 279 struct trace_event_class *class; 280 union { 281 char *name; 282 /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ 283 struct tracepoint *tp; 284 }; 285 struct trace_event event; 286 char *print_fmt; 287 struct event_filter *filter; 288 void *mod; 289 void *data; 290 /* 291 * bit 0: filter_active 292 * bit 1: allow trace by non root (cap any) 293 * bit 2: failed to apply filter 294 * bit 3: trace internal event (do not enable) 295 * bit 4: Event was enabled by module 296 * bit 5: use call filter rather than file filter 297 * bit 6: Event is a tracepoint 298 */ 299 int flags; /* static flags of different events */ 300 301#ifdef CONFIG_PERF_EVENTS 302 int perf_refcount; 303 struct hlist_head __percpu *perf_events; 304 struct bpf_prog *prog; 305 306 int (*perf_perm)(struct trace_event_call *, 307 struct perf_event *); 308#endif 309}; 310 311static inline const char * 312trace_event_name(struct trace_event_call *call) 313{ 314 if (call->flags & TRACE_EVENT_FL_TRACEPOINT) 315 return call->tp ? call->tp->name : NULL; 316 else 317 return call->name; 318} 319 320struct trace_array; 321struct trace_subsystem_dir; 322 323enum { 324 EVENT_FILE_FL_ENABLED_BIT, 325 EVENT_FILE_FL_RECORDED_CMD_BIT, 326 EVENT_FILE_FL_FILTERED_BIT, 327 EVENT_FILE_FL_NO_SET_FILTER_BIT, 328 EVENT_FILE_FL_SOFT_MODE_BIT, 329 EVENT_FILE_FL_SOFT_DISABLED_BIT, 330 EVENT_FILE_FL_TRIGGER_MODE_BIT, 331 EVENT_FILE_FL_TRIGGER_COND_BIT, 332}; 333 334/* 335 * Event file flags: 336 * ENABLED - The event is enabled 337 * RECORDED_CMD - The comms should be recorded at sched_switch 338 * FILTERED - The event has a filter attached 339 * NO_SET_FILTER - Set when filter has error and is to be ignored 340 * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED 341 * SOFT_DISABLED - When set, do not trace the event (even though its 342 * tracepoint may be enabled) 343 * TRIGGER_MODE - When set, invoke the triggers associated with the event 344 * TRIGGER_COND - When set, one or more triggers has an associated filter 345 */ 346enum { 347 EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), 348 EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), 349 EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), 350 EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), 351 EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), 352 EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), 353 EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), 354 EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), 355}; 356 357struct trace_event_file { 358 struct list_head list; 359 struct trace_event_call *event_call; 360 struct event_filter *filter; 361 struct dentry *dir; 362 struct trace_array *tr; 363 struct trace_subsystem_dir *system; 364 struct list_head triggers; 365 366 /* 367 * 32 bit flags: 368 * bit 0: enabled 369 * bit 1: enabled cmd record 370 * bit 2: enable/disable with the soft disable bit 371 * bit 3: soft disabled 372 * bit 4: trigger enabled 373 * 374 * Note: The bits must be set atomically to prevent races 375 * from other writers. Reads of flags do not need to be in 376 * sync as they occur in critical sections. But the way flags 377 * is currently used, these changes do not affect the code 378 * except that when a change is made, it may have a slight 379 * delay in propagating the changes to other CPUs due to 380 * caching and such. Which is mostly OK ;-) 381 */ 382 unsigned long flags; 383 atomic_t sm_ref; /* soft-mode reference counter */ 384 atomic_t tm_ref; /* trigger-mode reference counter */ 385}; 386 387#define __TRACE_EVENT_FLAGS(name, value) \ 388 static int __init trace_init_flags_##name(void) \ 389 { \ 390 event_##name.flags |= value; \ 391 return 0; \ 392 } \ 393 early_initcall(trace_init_flags_##name); 394 395#define __TRACE_EVENT_PERF_PERM(name, expr...) \ 396 static int perf_perm_##name(struct trace_event_call *tp_event, \ 397 struct perf_event *p_event) \ 398 { \ 399 return ({ expr; }); \ 400 } \ 401 static int __init trace_init_perf_perm_##name(void) \ 402 { \ 403 event_##name.perf_perm = &perf_perm_##name; \ 404 return 0; \ 405 } \ 406 early_initcall(trace_init_perf_perm_##name); 407 408#define PERF_MAX_TRACE_SIZE 2048 409 410#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 411 412enum event_trigger_type { 413 ETT_NONE = (0), 414 ETT_TRACE_ONOFF = (1 << 0), 415 ETT_SNAPSHOT = (1 << 1), 416 ETT_STACKTRACE = (1 << 2), 417 ETT_EVENT_ENABLE = (1 << 3), 418}; 419 420extern int filter_match_preds(struct event_filter *filter, void *rec); 421 422extern int filter_check_discard(struct trace_event_file *file, void *rec, 423 struct ring_buffer *buffer, 424 struct ring_buffer_event *event); 425extern int call_filter_check_discard(struct trace_event_call *call, void *rec, 426 struct ring_buffer *buffer, 427 struct ring_buffer_event *event); 428extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, 429 void *rec); 430extern void event_triggers_post_call(struct trace_event_file *file, 431 enum event_trigger_type tt); 432 433/** 434 * trace_trigger_soft_disabled - do triggers and test if soft disabled 435 * @file: The file pointer of the event to test 436 * 437 * If any triggers without filters are attached to this event, they 438 * will be called here. If the event is soft disabled and has no 439 * triggers that require testing the fields, it will return true, 440 * otherwise false. 441 */ 442static inline bool 443trace_trigger_soft_disabled(struct trace_event_file *file) 444{ 445 unsigned long eflags = file->flags; 446 447 if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { 448 if (eflags & EVENT_FILE_FL_TRIGGER_MODE) 449 event_triggers_call(file, NULL); 450 if (eflags & EVENT_FILE_FL_SOFT_DISABLED) 451 return true; 452 } 453 return false; 454} 455 456/* 457 * Helper function for event_trigger_unlock_commit{_regs}(). 458 * If there are event triggers attached to this event that requires 459 * filtering against its fields, then they wil be called as the 460 * entry already holds the field information of the current event. 461 * 462 * It also checks if the event should be discarded or not. 463 * It is to be discarded if the event is soft disabled and the 464 * event was only recorded to process triggers, or if the event 465 * filter is active and this event did not match the filters. 466 * 467 * Returns true if the event is discarded, false otherwise. 468 */ 469static inline bool 470__event_trigger_test_discard(struct trace_event_file *file, 471 struct ring_buffer *buffer, 472 struct ring_buffer_event *event, 473 void *entry, 474 enum event_trigger_type *tt) 475{ 476 unsigned long eflags = file->flags; 477 478 if (eflags & EVENT_FILE_FL_TRIGGER_COND) 479 *tt = event_triggers_call(file, entry); 480 481 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) 482 ring_buffer_discard_commit(buffer, event); 483 else if (!filter_check_discard(file, entry, buffer, event)) 484 return false; 485 486 return true; 487} 488 489/** 490 * event_trigger_unlock_commit - handle triggers and finish event commit 491 * @file: The file pointer assoctiated to the event 492 * @buffer: The ring buffer that the event is being written to 493 * @event: The event meta data in the ring buffer 494 * @entry: The event itself 495 * @irq_flags: The state of the interrupts at the start of the event 496 * @pc: The state of the preempt count at the start of the event. 497 * 498 * This is a helper function to handle triggers that require data 499 * from the event itself. It also tests the event against filters and 500 * if the event is soft disabled and should be discarded. 501 */ 502static inline void 503event_trigger_unlock_commit(struct trace_event_file *file, 504 struct ring_buffer *buffer, 505 struct ring_buffer_event *event, 506 void *entry, unsigned long irq_flags, int pc) 507{ 508 enum event_trigger_type tt = ETT_NONE; 509 510 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 511 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 512 513 if (tt) 514 event_triggers_post_call(file, tt); 515} 516 517/** 518 * event_trigger_unlock_commit_regs - handle triggers and finish event commit 519 * @file: The file pointer assoctiated to the event 520 * @buffer: The ring buffer that the event is being written to 521 * @event: The event meta data in the ring buffer 522 * @entry: The event itself 523 * @irq_flags: The state of the interrupts at the start of the event 524 * @pc: The state of the preempt count at the start of the event. 525 * 526 * This is a helper function to handle triggers that require data 527 * from the event itself. It also tests the event against filters and 528 * if the event is soft disabled and should be discarded. 529 * 530 * Same as event_trigger_unlock_commit() but calls 531 * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). 532 */ 533static inline void 534event_trigger_unlock_commit_regs(struct trace_event_file *file, 535 struct ring_buffer *buffer, 536 struct ring_buffer_event *event, 537 void *entry, unsigned long irq_flags, int pc, 538 struct pt_regs *regs) 539{ 540 enum event_trigger_type tt = ETT_NONE; 541 542 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 543 trace_buffer_unlock_commit_regs(buffer, event, 544 irq_flags, pc, regs); 545 546 if (tt) 547 event_triggers_post_call(file, tt); 548} 549 550#ifdef CONFIG_BPF_EVENTS 551unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); 552#else 553static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) 554{ 555 return 1; 556} 557#endif 558 559enum { 560 FILTER_OTHER = 0, 561 FILTER_STATIC_STRING, 562 FILTER_DYN_STRING, 563 FILTER_PTR_STRING, 564 FILTER_TRACE_FN, 565}; 566 567extern int trace_event_raw_init(struct trace_event_call *call); 568extern int trace_define_field(struct trace_event_call *call, const char *type, 569 const char *name, int offset, int size, 570 int is_signed, int filter_type); 571extern int trace_add_event_call(struct trace_event_call *call); 572extern int trace_remove_event_call(struct trace_event_call *call); 573 574#define is_signed_type(type) (((type)(-1)) < (type)1) 575 576int trace_set_clr_event(const char *system, const char *event, int set); 577 578/* 579 * The double __builtin_constant_p is because gcc will give us an error 580 * if we try to allocate the static variable to fmt if it is not a 581 * constant. Even with the outer if statement optimizing out. 582 */ 583#define event_trace_printk(ip, fmt, args...) \ 584do { \ 585 __trace_printk_check_format(fmt, ##args); \ 586 tracing_record_cmdline(current); \ 587 if (__builtin_constant_p(fmt)) { \ 588 static const char *trace_printk_fmt \ 589 __attribute__((section("__trace_printk_fmt"))) = \ 590 __builtin_constant_p(fmt) ? fmt : NULL; \ 591 \ 592 __trace_bprintk(ip, trace_printk_fmt, ##args); \ 593 } else \ 594 __trace_printk(ip, fmt, ##args); \ 595} while (0) 596 597#ifdef CONFIG_PERF_EVENTS 598struct perf_event; 599 600DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); 601 602extern int perf_trace_init(struct perf_event *event); 603extern void perf_trace_destroy(struct perf_event *event); 604extern int perf_trace_add(struct perf_event *event, int flags); 605extern void perf_trace_del(struct perf_event *event, int flags); 606extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 607 char *filter_str); 608extern void ftrace_profile_free_filter(struct perf_event *event); 609extern void *perf_trace_buf_prepare(int size, unsigned short type, 610 struct pt_regs **regs, int *rctxp); 611 612static inline void 613perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 614 u64 count, struct pt_regs *regs, void *head, 615 struct task_struct *task) 616{ 617 perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); 618} 619#endif 620 621#endif /* _LINUX_TRACE_EVENT_H */