at v4.4 20 kB view raw
1 2#ifndef _LINUX_TRACE_EVENT_H 3#define _LINUX_TRACE_EVENT_H 4 5#include <linux/ring_buffer.h> 6#include <linux/trace_seq.h> 7#include <linux/percpu.h> 8#include <linux/hardirq.h> 9#include <linux/perf_event.h> 10#include <linux/tracepoint.h> 11 12struct trace_array; 13struct trace_buffer; 14struct tracer; 15struct dentry; 16struct bpf_prog; 17 18struct trace_print_flags { 19 unsigned long mask; 20 const char *name; 21}; 22 23struct trace_print_flags_u64 { 24 unsigned long long mask; 25 const char *name; 26}; 27 28const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, 29 unsigned long flags, 30 const struct trace_print_flags *flag_array); 31 32const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, 33 const struct trace_print_flags *symbol_array); 34 35#if BITS_PER_LONG == 32 36const char *trace_print_symbols_seq_u64(struct trace_seq *p, 37 unsigned long long val, 38 const struct trace_print_flags_u64 39 *symbol_array); 40#endif 41 42const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, 43 unsigned int bitmask_size); 44 45const char *trace_print_hex_seq(struct trace_seq *p, 46 const unsigned char *buf, int len); 47 48const char *trace_print_array_seq(struct trace_seq *p, 49 const void *buf, int count, 50 size_t el_size); 51 52struct trace_iterator; 53struct trace_event; 54 55int trace_raw_output_prep(struct trace_iterator *iter, 56 struct trace_event *event); 57 58/* 59 * The trace entry - the most basic unit of tracing. This is what 60 * is printed in the end as a single line in the trace output, such as: 61 * 62 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter 63 */ 64struct trace_entry { 65 unsigned short type; 66 unsigned char flags; 67 unsigned char preempt_count; 68 int pid; 69}; 70 71#define TRACE_EVENT_TYPE_MAX \ 72 ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) 73 74/* 75 * Trace iterator - used by printout routines who present trace 76 * results to users and which routines might sleep, etc: 77 */ 78struct trace_iterator { 79 struct trace_array *tr; 80 struct tracer *trace; 81 struct trace_buffer *trace_buffer; 82 void *private; 83 int cpu_file; 84 struct mutex mutex; 85 struct ring_buffer_iter **buffer_iter; 86 unsigned long iter_flags; 87 88 /* trace_seq for __print_flags() and __print_symbolic() etc. */ 89 struct trace_seq tmp_seq; 90 91 cpumask_var_t started; 92 93 /* it's true when current open file is snapshot */ 94 bool snapshot; 95 96 /* The below is zeroed out in pipe_read */ 97 struct trace_seq seq; 98 struct trace_entry *ent; 99 unsigned long lost_events; 100 int leftover; 101 int ent_size; 102 int cpu; 103 u64 ts; 104 105 loff_t pos; 106 long idx; 107 108 /* All new field here will be zeroed out in pipe_read */ 109}; 110 111enum trace_iter_flags { 112 TRACE_FILE_LAT_FMT = 1, 113 TRACE_FILE_ANNOTATE = 2, 114 TRACE_FILE_TIME_IN_NS = 4, 115}; 116 117 118typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, 119 int flags, struct trace_event *event); 120 121struct trace_event_functions { 122 trace_print_func trace; 123 trace_print_func raw; 124 trace_print_func hex; 125 trace_print_func binary; 126}; 127 128struct trace_event { 129 struct hlist_node node; 130 struct list_head list; 131 int type; 132 struct trace_event_functions *funcs; 133}; 134 135extern int register_trace_event(struct trace_event *event); 136extern int unregister_trace_event(struct trace_event *event); 137 138/* Return values for print_line callback */ 139enum print_line_t { 140 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 141 TRACE_TYPE_HANDLED = 1, 142 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ 143 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 144}; 145 146/* 147 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 148 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 149 * simplifies those functions and keeps them in sync. 150 */ 151static inline enum print_line_t trace_handle_return(struct trace_seq *s) 152{ 153 return trace_seq_has_overflowed(s) ? 154 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 155} 156 157void tracing_generic_entry_update(struct trace_entry *entry, 158 unsigned long flags, 159 int pc); 160struct trace_event_file; 161 162struct ring_buffer_event * 163trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, 164 struct trace_event_file *trace_file, 165 int type, unsigned long len, 166 unsigned long flags, int pc); 167struct ring_buffer_event * 168trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, 169 int type, unsigned long len, 170 unsigned long flags, int pc); 171void trace_buffer_unlock_commit(struct trace_array *tr, 172 struct ring_buffer *buffer, 173 struct ring_buffer_event *event, 174 unsigned long flags, int pc); 175void trace_buffer_unlock_commit_regs(struct trace_array *tr, 176 struct ring_buffer *buffer, 177 struct ring_buffer_event *event, 178 unsigned long flags, int pc, 179 struct pt_regs *regs); 180void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 181 struct ring_buffer_event *event); 182 183void tracing_record_cmdline(struct task_struct *tsk); 184 185int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); 186 187struct event_filter; 188 189enum trace_reg { 190 TRACE_REG_REGISTER, 191 TRACE_REG_UNREGISTER, 192#ifdef CONFIG_PERF_EVENTS 193 TRACE_REG_PERF_REGISTER, 194 TRACE_REG_PERF_UNREGISTER, 195 TRACE_REG_PERF_OPEN, 196 TRACE_REG_PERF_CLOSE, 197 TRACE_REG_PERF_ADD, 198 TRACE_REG_PERF_DEL, 199#endif 200}; 201 202struct trace_event_call; 203 204struct trace_event_class { 205 const char *system; 206 void *probe; 207#ifdef CONFIG_PERF_EVENTS 208 void *perf_probe; 209#endif 210 int (*reg)(struct trace_event_call *event, 211 enum trace_reg type, void *data); 212 int (*define_fields)(struct trace_event_call *); 213 struct list_head *(*get_fields)(struct trace_event_call *); 214 struct list_head fields; 215 int (*raw_init)(struct trace_event_call *); 216}; 217 218extern int trace_event_reg(struct trace_event_call *event, 219 enum trace_reg type, void *data); 220 221struct trace_event_buffer { 222 struct ring_buffer *buffer; 223 struct ring_buffer_event *event; 224 struct trace_event_file *trace_file; 225 void *entry; 226 unsigned long flags; 227 int pc; 228}; 229 230void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 231 struct trace_event_file *trace_file, 232 unsigned long len); 233 234void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); 235 236enum { 237 TRACE_EVENT_FL_FILTERED_BIT, 238 TRACE_EVENT_FL_CAP_ANY_BIT, 239 TRACE_EVENT_FL_NO_SET_FILTER_BIT, 240 TRACE_EVENT_FL_IGNORE_ENABLE_BIT, 241 TRACE_EVENT_FL_WAS_ENABLED_BIT, 242 TRACE_EVENT_FL_USE_CALL_FILTER_BIT, 243 TRACE_EVENT_FL_TRACEPOINT_BIT, 244 TRACE_EVENT_FL_KPROBE_BIT, 245 TRACE_EVENT_FL_UPROBE_BIT, 246}; 247 248/* 249 * Event flags: 250 * FILTERED - The event has a filter attached 251 * CAP_ANY - Any user can enable for perf 252 * NO_SET_FILTER - Set when filter has error and is to be ignored 253 * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file 254 * WAS_ENABLED - Set and stays set when an event was ever enabled 255 * (used for module unloading, if a module event is enabled, 256 * it is best to clear the buffers that used it). 257 * USE_CALL_FILTER - For trace internal events, don't use file filter 258 * TRACEPOINT - Event is a tracepoint 259 * KPROBE - Event is a kprobe 260 * UPROBE - Event is a uprobe 261 */ 262enum { 263 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 264 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), 265 TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), 266 TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), 267 TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), 268 TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), 269 TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), 270 TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), 271 TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), 272}; 273 274#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) 275 276struct trace_event_call { 277 struct list_head list; 278 struct trace_event_class *class; 279 union { 280 char *name; 281 /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ 282 struct tracepoint *tp; 283 }; 284 struct trace_event event; 285 char *print_fmt; 286 struct event_filter *filter; 287 void *mod; 288 void *data; 289 /* 290 * bit 0: filter_active 291 * bit 1: allow trace by non root (cap any) 292 * bit 2: failed to apply filter 293 * bit 3: trace internal event (do not enable) 294 * bit 4: Event was enabled by module 295 * bit 5: use call filter rather than file filter 296 * bit 6: Event is a tracepoint 297 */ 298 int flags; /* static flags of different events */ 299 300#ifdef CONFIG_PERF_EVENTS 301 int perf_refcount; 302 struct hlist_head __percpu *perf_events; 303 struct bpf_prog *prog; 304 305 int (*perf_perm)(struct trace_event_call *, 306 struct perf_event *); 307#endif 308}; 309 310static inline const char * 311trace_event_name(struct trace_event_call *call) 312{ 313 if (call->flags & TRACE_EVENT_FL_TRACEPOINT) 314 return call->tp ? call->tp->name : NULL; 315 else 316 return call->name; 317} 318 319struct trace_array; 320struct trace_subsystem_dir; 321 322enum { 323 EVENT_FILE_FL_ENABLED_BIT, 324 EVENT_FILE_FL_RECORDED_CMD_BIT, 325 EVENT_FILE_FL_FILTERED_BIT, 326 EVENT_FILE_FL_NO_SET_FILTER_BIT, 327 EVENT_FILE_FL_SOFT_MODE_BIT, 328 EVENT_FILE_FL_SOFT_DISABLED_BIT, 329 EVENT_FILE_FL_TRIGGER_MODE_BIT, 330 EVENT_FILE_FL_TRIGGER_COND_BIT, 331 EVENT_FILE_FL_PID_FILTER_BIT, 332}; 333 334/* 335 * Event file flags: 336 * ENABLED - The event is enabled 337 * RECORDED_CMD - The comms should be recorded at sched_switch 338 * FILTERED - The event has a filter attached 339 * NO_SET_FILTER - Set when filter has error and is to be ignored 340 * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED 341 * SOFT_DISABLED - When set, do not trace the event (even though its 342 * tracepoint may be enabled) 343 * TRIGGER_MODE - When set, invoke the triggers associated with the event 344 * TRIGGER_COND - When set, one or more triggers has an associated filter 345 * PID_FILTER - When set, the event is filtered based on pid 346 */ 347enum { 348 EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), 349 EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), 350 EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), 351 EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), 352 EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), 353 EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), 354 EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), 355 EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), 356 EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), 357}; 358 359struct trace_event_file { 360 struct list_head list; 361 struct trace_event_call *event_call; 362 struct event_filter *filter; 363 struct dentry *dir; 364 struct trace_array *tr; 365 struct trace_subsystem_dir *system; 366 struct list_head triggers; 367 368 /* 369 * 32 bit flags: 370 * bit 0: enabled 371 * bit 1: enabled cmd record 372 * bit 2: enable/disable with the soft disable bit 373 * bit 3: soft disabled 374 * bit 4: trigger enabled 375 * 376 * Note: The bits must be set atomically to prevent races 377 * from other writers. Reads of flags do not need to be in 378 * sync as they occur in critical sections. But the way flags 379 * is currently used, these changes do not affect the code 380 * except that when a change is made, it may have a slight 381 * delay in propagating the changes to other CPUs due to 382 * caching and such. Which is mostly OK ;-) 383 */ 384 unsigned long flags; 385 atomic_t sm_ref; /* soft-mode reference counter */ 386 atomic_t tm_ref; /* trigger-mode reference counter */ 387}; 388 389#define __TRACE_EVENT_FLAGS(name, value) \ 390 static int __init trace_init_flags_##name(void) \ 391 { \ 392 event_##name.flags |= value; \ 393 return 0; \ 394 } \ 395 early_initcall(trace_init_flags_##name); 396 397#define __TRACE_EVENT_PERF_PERM(name, expr...) \ 398 static int perf_perm_##name(struct trace_event_call *tp_event, \ 399 struct perf_event *p_event) \ 400 { \ 401 return ({ expr; }); \ 402 } \ 403 static int __init trace_init_perf_perm_##name(void) \ 404 { \ 405 event_##name.perf_perm = &perf_perm_##name; \ 406 return 0; \ 407 } \ 408 early_initcall(trace_init_perf_perm_##name); 409 410#define PERF_MAX_TRACE_SIZE 2048 411 412#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 413 414enum event_trigger_type { 415 ETT_NONE = (0), 416 ETT_TRACE_ONOFF = (1 << 0), 417 ETT_SNAPSHOT = (1 << 1), 418 ETT_STACKTRACE = (1 << 2), 419 ETT_EVENT_ENABLE = (1 << 3), 420}; 421 422extern int filter_match_preds(struct event_filter *filter, void *rec); 423 424extern int filter_check_discard(struct trace_event_file *file, void *rec, 425 struct ring_buffer *buffer, 426 struct ring_buffer_event *event); 427extern int call_filter_check_discard(struct trace_event_call *call, void *rec, 428 struct ring_buffer *buffer, 429 struct ring_buffer_event *event); 430extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, 431 void *rec); 432extern void event_triggers_post_call(struct trace_event_file *file, 433 enum event_trigger_type tt); 434 435bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); 436 437/** 438 * trace_trigger_soft_disabled - do triggers and test if soft disabled 439 * @file: The file pointer of the event to test 440 * 441 * If any triggers without filters are attached to this event, they 442 * will be called here. If the event is soft disabled and has no 443 * triggers that require testing the fields, it will return true, 444 * otherwise false. 445 */ 446static inline bool 447trace_trigger_soft_disabled(struct trace_event_file *file) 448{ 449 unsigned long eflags = file->flags; 450 451 if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { 452 if (eflags & EVENT_FILE_FL_TRIGGER_MODE) 453 event_triggers_call(file, NULL); 454 if (eflags & EVENT_FILE_FL_SOFT_DISABLED) 455 return true; 456 if (eflags & EVENT_FILE_FL_PID_FILTER) 457 return trace_event_ignore_this_pid(file); 458 } 459 return false; 460} 461 462/* 463 * Helper function for event_trigger_unlock_commit{_regs}(). 464 * If there are event triggers attached to this event that requires 465 * filtering against its fields, then they wil be called as the 466 * entry already holds the field information of the current event. 467 * 468 * It also checks if the event should be discarded or not. 469 * It is to be discarded if the event is soft disabled and the 470 * event was only recorded to process triggers, or if the event 471 * filter is active and this event did not match the filters. 472 * 473 * Returns true if the event is discarded, false otherwise. 474 */ 475static inline bool 476__event_trigger_test_discard(struct trace_event_file *file, 477 struct ring_buffer *buffer, 478 struct ring_buffer_event *event, 479 void *entry, 480 enum event_trigger_type *tt) 481{ 482 unsigned long eflags = file->flags; 483 484 if (eflags & EVENT_FILE_FL_TRIGGER_COND) 485 *tt = event_triggers_call(file, entry); 486 487 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) 488 ring_buffer_discard_commit(buffer, event); 489 else if (!filter_check_discard(file, entry, buffer, event)) 490 return false; 491 492 return true; 493} 494 495/** 496 * event_trigger_unlock_commit - handle triggers and finish event commit 497 * @file: The file pointer assoctiated to the event 498 * @buffer: The ring buffer that the event is being written to 499 * @event: The event meta data in the ring buffer 500 * @entry: The event itself 501 * @irq_flags: The state of the interrupts at the start of the event 502 * @pc: The state of the preempt count at the start of the event. 503 * 504 * This is a helper function to handle triggers that require data 505 * from the event itself. It also tests the event against filters and 506 * if the event is soft disabled and should be discarded. 507 */ 508static inline void 509event_trigger_unlock_commit(struct trace_event_file *file, 510 struct ring_buffer *buffer, 511 struct ring_buffer_event *event, 512 void *entry, unsigned long irq_flags, int pc) 513{ 514 enum event_trigger_type tt = ETT_NONE; 515 516 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 517 trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); 518 519 if (tt) 520 event_triggers_post_call(file, tt); 521} 522 523/** 524 * event_trigger_unlock_commit_regs - handle triggers and finish event commit 525 * @file: The file pointer assoctiated to the event 526 * @buffer: The ring buffer that the event is being written to 527 * @event: The event meta data in the ring buffer 528 * @entry: The event itself 529 * @irq_flags: The state of the interrupts at the start of the event 530 * @pc: The state of the preempt count at the start of the event. 531 * 532 * This is a helper function to handle triggers that require data 533 * from the event itself. It also tests the event against filters and 534 * if the event is soft disabled and should be discarded. 535 * 536 * Same as event_trigger_unlock_commit() but calls 537 * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). 538 */ 539static inline void 540event_trigger_unlock_commit_regs(struct trace_event_file *file, 541 struct ring_buffer *buffer, 542 struct ring_buffer_event *event, 543 void *entry, unsigned long irq_flags, int pc, 544 struct pt_regs *regs) 545{ 546 enum event_trigger_type tt = ETT_NONE; 547 548 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 549 trace_buffer_unlock_commit_regs(file->tr, buffer, event, 550 irq_flags, pc, regs); 551 552 if (tt) 553 event_triggers_post_call(file, tt); 554} 555 556#ifdef CONFIG_BPF_EVENTS 557unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); 558#else 559static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) 560{ 561 return 1; 562} 563#endif 564 565enum { 566 FILTER_OTHER = 0, 567 FILTER_STATIC_STRING, 568 FILTER_DYN_STRING, 569 FILTER_PTR_STRING, 570 FILTER_TRACE_FN, 571}; 572 573extern int trace_event_raw_init(struct trace_event_call *call); 574extern int trace_define_field(struct trace_event_call *call, const char *type, 575 const char *name, int offset, int size, 576 int is_signed, int filter_type); 577extern int trace_add_event_call(struct trace_event_call *call); 578extern int trace_remove_event_call(struct trace_event_call *call); 579 580#define is_signed_type(type) (((type)(-1)) < (type)1) 581 582int trace_set_clr_event(const char *system, const char *event, int set); 583 584/* 585 * The double __builtin_constant_p is because gcc will give us an error 586 * if we try to allocate the static variable to fmt if it is not a 587 * constant. Even with the outer if statement optimizing out. 588 */ 589#define event_trace_printk(ip, fmt, args...) \ 590do { \ 591 __trace_printk_check_format(fmt, ##args); \ 592 tracing_record_cmdline(current); \ 593 if (__builtin_constant_p(fmt)) { \ 594 static const char *trace_printk_fmt \ 595 __attribute__((section("__trace_printk_fmt"))) = \ 596 __builtin_constant_p(fmt) ? fmt : NULL; \ 597 \ 598 __trace_bprintk(ip, trace_printk_fmt, ##args); \ 599 } else \ 600 __trace_printk(ip, fmt, ##args); \ 601} while (0) 602 603#ifdef CONFIG_PERF_EVENTS 604struct perf_event; 605 606DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); 607 608extern int perf_trace_init(struct perf_event *event); 609extern void perf_trace_destroy(struct perf_event *event); 610extern int perf_trace_add(struct perf_event *event, int flags); 611extern void perf_trace_del(struct perf_event *event, int flags); 612extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 613 char *filter_str); 614extern void ftrace_profile_free_filter(struct perf_event *event); 615extern void *perf_trace_buf_prepare(int size, unsigned short type, 616 struct pt_regs **regs, int *rctxp); 617 618static inline void 619perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 620 u64 count, struct pt_regs *regs, void *head, 621 struct task_struct *task) 622{ 623 perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); 624} 625#endif 626 627#endif /* _LINUX_TRACE_EVENT_H */