at v4.2 19 kB view raw
1 2#ifndef _LINUX_TRACE_EVENT_H 3#define _LINUX_TRACE_EVENT_H 4 5#include <linux/ring_buffer.h> 6#include <linux/trace_seq.h> 7#include <linux/percpu.h> 8#include <linux/hardirq.h> 9#include <linux/perf_event.h> 10#include <linux/tracepoint.h> 11 12struct trace_array; 13struct trace_buffer; 14struct tracer; 15struct dentry; 16struct bpf_prog; 17 18struct trace_print_flags { 19 unsigned long mask; 20 const char *name; 21}; 22 23struct trace_print_flags_u64 { 24 unsigned long long mask; 25 const char *name; 26}; 27 28const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, 29 unsigned long flags, 30 const struct trace_print_flags *flag_array); 31 32const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, 33 const struct trace_print_flags *symbol_array); 34 35#if BITS_PER_LONG == 32 36const char *trace_print_symbols_seq_u64(struct trace_seq *p, 37 unsigned long long val, 38 const struct trace_print_flags_u64 39 *symbol_array); 40#endif 41 42const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, 43 unsigned int bitmask_size); 44 45const char *trace_print_hex_seq(struct trace_seq *p, 46 const unsigned char *buf, int len); 47 48const char *trace_print_array_seq(struct trace_seq *p, 49 const void *buf, int count, 50 size_t el_size); 51 52struct trace_iterator; 53struct trace_event; 54 55int trace_raw_output_prep(struct trace_iterator *iter, 56 struct trace_event *event); 57 58/* 59 * The trace entry - the most basic unit of tracing. This is what 60 * is printed in the end as a single line in the trace output, such as: 61 * 62 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter 63 */ 64struct trace_entry { 65 unsigned short type; 66 unsigned char flags; 67 unsigned char preempt_count; 68 int pid; 69}; 70 71#define TRACE_EVENT_TYPE_MAX \ 72 ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) 73 74/* 75 * Trace iterator - used by printout routines who present trace 76 * results to users and which routines might sleep, etc: 77 */ 78struct trace_iterator { 79 struct trace_array *tr; 80 struct tracer *trace; 81 struct trace_buffer *trace_buffer; 82 void *private; 83 int cpu_file; 84 struct mutex mutex; 85 struct ring_buffer_iter **buffer_iter; 86 unsigned long iter_flags; 87 88 /* trace_seq for __print_flags() and __print_symbolic() etc. */ 89 struct trace_seq tmp_seq; 90 91 cpumask_var_t started; 92 93 /* it's true when current open file is snapshot */ 94 bool snapshot; 95 96 /* The below is zeroed out in pipe_read */ 97 struct trace_seq seq; 98 struct trace_entry *ent; 99 unsigned long lost_events; 100 int leftover; 101 int ent_size; 102 int cpu; 103 u64 ts; 104 105 loff_t pos; 106 long idx; 107 108 /* All new field here will be zeroed out in pipe_read */ 109}; 110 111enum trace_iter_flags { 112 TRACE_FILE_LAT_FMT = 1, 113 TRACE_FILE_ANNOTATE = 2, 114 TRACE_FILE_TIME_IN_NS = 4, 115}; 116 117 118typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, 119 int flags, struct trace_event *event); 120 121struct trace_event_functions { 122 trace_print_func trace; 123 trace_print_func raw; 124 trace_print_func hex; 125 trace_print_func binary; 126}; 127 128struct trace_event { 129 struct hlist_node node; 130 struct list_head list; 131 int type; 132 struct trace_event_functions *funcs; 133}; 134 135extern int register_trace_event(struct trace_event *event); 136extern int unregister_trace_event(struct trace_event *event); 137 138/* Return values for print_line callback */ 139enum print_line_t { 140 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ 141 TRACE_TYPE_HANDLED = 1, 142 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ 143 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 144}; 145 146/* 147 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 148 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 149 * simplifies those functions and keeps them in sync. 150 */ 151static inline enum print_line_t trace_handle_return(struct trace_seq *s) 152{ 153 return trace_seq_has_overflowed(s) ? 154 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 155} 156 157void tracing_generic_entry_update(struct trace_entry *entry, 158 unsigned long flags, 159 int pc); 160struct trace_event_file; 161 162struct ring_buffer_event * 163trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, 164 struct trace_event_file *trace_file, 165 int type, unsigned long len, 166 unsigned long flags, int pc); 167struct ring_buffer_event * 168trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, 169 int type, unsigned long len, 170 unsigned long flags, int pc); 171void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, 172 struct ring_buffer_event *event, 173 unsigned long flags, int pc); 174void trace_buffer_unlock_commit(struct ring_buffer *buffer, 175 struct ring_buffer_event *event, 176 unsigned long flags, int pc); 177void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, 178 struct ring_buffer_event *event, 179 unsigned long flags, int pc, 180 struct pt_regs *regs); 181void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 182 struct ring_buffer_event *event); 183 184void tracing_record_cmdline(struct task_struct *tsk); 185 186int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); 187 188struct event_filter; 189 190enum trace_reg { 191 TRACE_REG_REGISTER, 192 TRACE_REG_UNREGISTER, 193#ifdef CONFIG_PERF_EVENTS 194 TRACE_REG_PERF_REGISTER, 195 TRACE_REG_PERF_UNREGISTER, 196 TRACE_REG_PERF_OPEN, 197 TRACE_REG_PERF_CLOSE, 198 TRACE_REG_PERF_ADD, 199 TRACE_REG_PERF_DEL, 200#endif 201}; 202 203struct trace_event_call; 204 205struct trace_event_class { 206 const char *system; 207 void *probe; 208#ifdef CONFIG_PERF_EVENTS 209 void *perf_probe; 210#endif 211 int (*reg)(struct trace_event_call *event, 212 enum trace_reg type, void *data); 213 int (*define_fields)(struct trace_event_call *); 214 struct list_head *(*get_fields)(struct trace_event_call *); 215 struct list_head fields; 216 int (*raw_init)(struct trace_event_call *); 217}; 218 219extern int trace_event_reg(struct trace_event_call *event, 220 enum trace_reg type, void *data); 221 222struct trace_event_buffer { 223 struct ring_buffer *buffer; 224 struct ring_buffer_event *event; 225 struct trace_event_file *trace_file; 226 void *entry; 227 unsigned long flags; 228 int pc; 229}; 230 231void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 232 struct trace_event_file *trace_file, 233 unsigned long len); 234 235void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); 236 237enum { 238 TRACE_EVENT_FL_FILTERED_BIT, 239 TRACE_EVENT_FL_CAP_ANY_BIT, 240 TRACE_EVENT_FL_NO_SET_FILTER_BIT, 241 TRACE_EVENT_FL_IGNORE_ENABLE_BIT, 242 TRACE_EVENT_FL_WAS_ENABLED_BIT, 243 TRACE_EVENT_FL_USE_CALL_FILTER_BIT, 244 TRACE_EVENT_FL_TRACEPOINT_BIT, 245 TRACE_EVENT_FL_KPROBE_BIT, 246}; 247 248/* 249 * Event flags: 250 * FILTERED - The event has a filter attached 251 * CAP_ANY - Any user can enable for perf 252 * NO_SET_FILTER - Set when filter has error and is to be ignored 253 * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file 254 * WAS_ENABLED - Set and stays set when an event was ever enabled 255 * (used for module unloading, if a module event is enabled, 256 * it is best to clear the buffers that used it). 257 * USE_CALL_FILTER - For trace internal events, don't use file filter 258 * TRACEPOINT - Event is a tracepoint 259 * KPROBE - Event is a kprobe 260 */ 261enum { 262 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 263 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), 264 TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), 265 TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), 266 TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), 267 TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), 268 TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), 269 TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), 270}; 271 272struct trace_event_call { 273 struct list_head list; 274 struct trace_event_class *class; 275 union { 276 char *name; 277 /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ 278 struct tracepoint *tp; 279 }; 280 struct trace_event event; 281 char *print_fmt; 282 struct event_filter *filter; 283 void *mod; 284 void *data; 285 /* 286 * bit 0: filter_active 287 * bit 1: allow trace by non root (cap any) 288 * bit 2: failed to apply filter 289 * bit 3: trace internal event (do not enable) 290 * bit 4: Event was enabled by module 291 * bit 5: use call filter rather than file filter 292 * bit 6: Event is a tracepoint 293 */ 294 int flags; /* static flags of different events */ 295 296#ifdef CONFIG_PERF_EVENTS 297 int perf_refcount; 298 struct hlist_head __percpu *perf_events; 299 struct bpf_prog *prog; 300 301 int (*perf_perm)(struct trace_event_call *, 302 struct perf_event *); 303#endif 304}; 305 306static inline const char * 307trace_event_name(struct trace_event_call *call) 308{ 309 if (call->flags & TRACE_EVENT_FL_TRACEPOINT) 310 return call->tp ? call->tp->name : NULL; 311 else 312 return call->name; 313} 314 315struct trace_array; 316struct trace_subsystem_dir; 317 318enum { 319 EVENT_FILE_FL_ENABLED_BIT, 320 EVENT_FILE_FL_RECORDED_CMD_BIT, 321 EVENT_FILE_FL_FILTERED_BIT, 322 EVENT_FILE_FL_NO_SET_FILTER_BIT, 323 EVENT_FILE_FL_SOFT_MODE_BIT, 324 EVENT_FILE_FL_SOFT_DISABLED_BIT, 325 EVENT_FILE_FL_TRIGGER_MODE_BIT, 326 EVENT_FILE_FL_TRIGGER_COND_BIT, 327}; 328 329/* 330 * Event file flags: 331 * ENABLED - The event is enabled 332 * RECORDED_CMD - The comms should be recorded at sched_switch 333 * FILTERED - The event has a filter attached 334 * NO_SET_FILTER - Set when filter has error and is to be ignored 335 * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED 336 * SOFT_DISABLED - When set, do not trace the event (even though its 337 * tracepoint may be enabled) 338 * TRIGGER_MODE - When set, invoke the triggers associated with the event 339 * TRIGGER_COND - When set, one or more triggers has an associated filter 340 */ 341enum { 342 EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), 343 EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), 344 EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), 345 EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), 346 EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), 347 EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), 348 EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), 349 EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), 350}; 351 352struct trace_event_file { 353 struct list_head list; 354 struct trace_event_call *event_call; 355 struct event_filter *filter; 356 struct dentry *dir; 357 struct trace_array *tr; 358 struct trace_subsystem_dir *system; 359 struct list_head triggers; 360 361 /* 362 * 32 bit flags: 363 * bit 0: enabled 364 * bit 1: enabled cmd record 365 * bit 2: enable/disable with the soft disable bit 366 * bit 3: soft disabled 367 * bit 4: trigger enabled 368 * 369 * Note: The bits must be set atomically to prevent races 370 * from other writers. Reads of flags do not need to be in 371 * sync as they occur in critical sections. But the way flags 372 * is currently used, these changes do not affect the code 373 * except that when a change is made, it may have a slight 374 * delay in propagating the changes to other CPUs due to 375 * caching and such. Which is mostly OK ;-) 376 */ 377 unsigned long flags; 378 atomic_t sm_ref; /* soft-mode reference counter */ 379 atomic_t tm_ref; /* trigger-mode reference counter */ 380}; 381 382#define __TRACE_EVENT_FLAGS(name, value) \ 383 static int __init trace_init_flags_##name(void) \ 384 { \ 385 event_##name.flags |= value; \ 386 return 0; \ 387 } \ 388 early_initcall(trace_init_flags_##name); 389 390#define __TRACE_EVENT_PERF_PERM(name, expr...) \ 391 static int perf_perm_##name(struct trace_event_call *tp_event, \ 392 struct perf_event *p_event) \ 393 { \ 394 return ({ expr; }); \ 395 } \ 396 static int __init trace_init_perf_perm_##name(void) \ 397 { \ 398 event_##name.perf_perm = &perf_perm_##name; \ 399 return 0; \ 400 } \ 401 early_initcall(trace_init_perf_perm_##name); 402 403#define PERF_MAX_TRACE_SIZE 2048 404 405#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 406 407enum event_trigger_type { 408 ETT_NONE = (0), 409 ETT_TRACE_ONOFF = (1 << 0), 410 ETT_SNAPSHOT = (1 << 1), 411 ETT_STACKTRACE = (1 << 2), 412 ETT_EVENT_ENABLE = (1 << 3), 413}; 414 415extern int filter_match_preds(struct event_filter *filter, void *rec); 416 417extern int filter_check_discard(struct trace_event_file *file, void *rec, 418 struct ring_buffer *buffer, 419 struct ring_buffer_event *event); 420extern int call_filter_check_discard(struct trace_event_call *call, void *rec, 421 struct ring_buffer *buffer, 422 struct ring_buffer_event *event); 423extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, 424 void *rec); 425extern void event_triggers_post_call(struct trace_event_file *file, 426 enum event_trigger_type tt); 427 428/** 429 * trace_trigger_soft_disabled - do triggers and test if soft disabled 430 * @file: The file pointer of the event to test 431 * 432 * If any triggers without filters are attached to this event, they 433 * will be called here. If the event is soft disabled and has no 434 * triggers that require testing the fields, it will return true, 435 * otherwise false. 436 */ 437static inline bool 438trace_trigger_soft_disabled(struct trace_event_file *file) 439{ 440 unsigned long eflags = file->flags; 441 442 if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { 443 if (eflags & EVENT_FILE_FL_TRIGGER_MODE) 444 event_triggers_call(file, NULL); 445 if (eflags & EVENT_FILE_FL_SOFT_DISABLED) 446 return true; 447 } 448 return false; 449} 450 451/* 452 * Helper function for event_trigger_unlock_commit{_regs}(). 453 * If there are event triggers attached to this event that requires 454 * filtering against its fields, then they wil be called as the 455 * entry already holds the field information of the current event. 456 * 457 * It also checks if the event should be discarded or not. 458 * It is to be discarded if the event is soft disabled and the 459 * event was only recorded to process triggers, or if the event 460 * filter is active and this event did not match the filters. 461 * 462 * Returns true if the event is discarded, false otherwise. 463 */ 464static inline bool 465__event_trigger_test_discard(struct trace_event_file *file, 466 struct ring_buffer *buffer, 467 struct ring_buffer_event *event, 468 void *entry, 469 enum event_trigger_type *tt) 470{ 471 unsigned long eflags = file->flags; 472 473 if (eflags & EVENT_FILE_FL_TRIGGER_COND) 474 *tt = event_triggers_call(file, entry); 475 476 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags)) 477 ring_buffer_discard_commit(buffer, event); 478 else if (!filter_check_discard(file, entry, buffer, event)) 479 return false; 480 481 return true; 482} 483 484/** 485 * event_trigger_unlock_commit - handle triggers and finish event commit 486 * @file: The file pointer assoctiated to the event 487 * @buffer: The ring buffer that the event is being written to 488 * @event: The event meta data in the ring buffer 489 * @entry: The event itself 490 * @irq_flags: The state of the interrupts at the start of the event 491 * @pc: The state of the preempt count at the start of the event. 492 * 493 * This is a helper function to handle triggers that require data 494 * from the event itself. It also tests the event against filters and 495 * if the event is soft disabled and should be discarded. 496 */ 497static inline void 498event_trigger_unlock_commit(struct trace_event_file *file, 499 struct ring_buffer *buffer, 500 struct ring_buffer_event *event, 501 void *entry, unsigned long irq_flags, int pc) 502{ 503 enum event_trigger_type tt = ETT_NONE; 504 505 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 506 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 507 508 if (tt) 509 event_triggers_post_call(file, tt); 510} 511 512/** 513 * event_trigger_unlock_commit_regs - handle triggers and finish event commit 514 * @file: The file pointer assoctiated to the event 515 * @buffer: The ring buffer that the event is being written to 516 * @event: The event meta data in the ring buffer 517 * @entry: The event itself 518 * @irq_flags: The state of the interrupts at the start of the event 519 * @pc: The state of the preempt count at the start of the event. 520 * 521 * This is a helper function to handle triggers that require data 522 * from the event itself. It also tests the event against filters and 523 * if the event is soft disabled and should be discarded. 524 * 525 * Same as event_trigger_unlock_commit() but calls 526 * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). 527 */ 528static inline void 529event_trigger_unlock_commit_regs(struct trace_event_file *file, 530 struct ring_buffer *buffer, 531 struct ring_buffer_event *event, 532 void *entry, unsigned long irq_flags, int pc, 533 struct pt_regs *regs) 534{ 535 enum event_trigger_type tt = ETT_NONE; 536 537 if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) 538 trace_buffer_unlock_commit_regs(buffer, event, 539 irq_flags, pc, regs); 540 541 if (tt) 542 event_triggers_post_call(file, tt); 543} 544 545#ifdef CONFIG_BPF_SYSCALL 546unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); 547#else 548static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) 549{ 550 return 1; 551} 552#endif 553 554enum { 555 FILTER_OTHER = 0, 556 FILTER_STATIC_STRING, 557 FILTER_DYN_STRING, 558 FILTER_PTR_STRING, 559 FILTER_TRACE_FN, 560}; 561 562extern int trace_event_raw_init(struct trace_event_call *call); 563extern int trace_define_field(struct trace_event_call *call, const char *type, 564 const char *name, int offset, int size, 565 int is_signed, int filter_type); 566extern int trace_add_event_call(struct trace_event_call *call); 567extern int trace_remove_event_call(struct trace_event_call *call); 568 569#define is_signed_type(type) (((type)(-1)) < (type)1) 570 571int trace_set_clr_event(const char *system, const char *event, int set); 572 573/* 574 * The double __builtin_constant_p is because gcc will give us an error 575 * if we try to allocate the static variable to fmt if it is not a 576 * constant. Even with the outer if statement optimizing out. 577 */ 578#define event_trace_printk(ip, fmt, args...) \ 579do { \ 580 __trace_printk_check_format(fmt, ##args); \ 581 tracing_record_cmdline(current); \ 582 if (__builtin_constant_p(fmt)) { \ 583 static const char *trace_printk_fmt \ 584 __attribute__((section("__trace_printk_fmt"))) = \ 585 __builtin_constant_p(fmt) ? fmt : NULL; \ 586 \ 587 __trace_bprintk(ip, trace_printk_fmt, ##args); \ 588 } else \ 589 __trace_printk(ip, fmt, ##args); \ 590} while (0) 591 592#ifdef CONFIG_PERF_EVENTS 593struct perf_event; 594 595DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); 596 597extern int perf_trace_init(struct perf_event *event); 598extern void perf_trace_destroy(struct perf_event *event); 599extern int perf_trace_add(struct perf_event *event, int flags); 600extern void perf_trace_del(struct perf_event *event, int flags); 601extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, 602 char *filter_str); 603extern void ftrace_profile_free_filter(struct perf_event *event); 604extern void *perf_trace_buf_prepare(int size, unsigned short type, 605 struct pt_regs **regs, int *rctxp); 606 607static inline void 608perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 609 u64 count, struct pt_regs *regs, void *head, 610 struct task_struct *task) 611{ 612 perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); 613} 614#endif 615 616#endif /* _LINUX_TRACE_EVENT_H */