Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf_counter: Fix/complete ftrace event records sampling
perf_counter, ftrace: Fix perf_counter integration
tracing/filters: Always free pred on filter_add_subsystem_pred() failure
tracing/filters: Don't use pred on alloc failure
ring-buffer: Fix memleak in ring_buffer_free()
tracing: Fix recordmcount.pl to handle sections with only weak functions
ring-buffer: Fix advance of reader in rb_buffer_peek()
tracing: do not use functions starting with .L in recordmcount.pl
ring-buffer: do not disable ring buffer on oops_in_progress
ring-buffer: fix check of try_to_discard result

+207 -50
+3 -1
include/linux/ftrace_event.h
··· 89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 90 }; 91 92 - 93 struct ring_buffer_event * 94 trace_current_buffer_lock_reserve(int type, unsigned long len, 95 unsigned long flags, int pc);
··· 89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 90 }; 91 92 + void tracing_generic_entry_update(struct trace_entry *entry, 93 + unsigned long flags, 94 + int pc); 95 struct ring_buffer_event * 96 trace_current_buffer_lock_reserve(int type, unsigned long len, 97 unsigned long flags, int pc);
+8 -1
include/linux/perf_counter.h
··· 121 PERF_SAMPLE_CPU = 1U << 7, 122 PERF_SAMPLE_PERIOD = 1U << 8, 123 PERF_SAMPLE_STREAM_ID = 1U << 9, 124 125 - PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ 126 }; 127 128 /* ··· 414 __u64 ip[PERF_MAX_STACK_DEPTH]; 415 }; 416 417 struct task_struct; 418 419 /** ··· 687 struct pt_regs *regs; 688 u64 addr; 689 u64 period; 690 }; 691 692 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
··· 121 PERF_SAMPLE_CPU = 1U << 7, 122 PERF_SAMPLE_PERIOD = 1U << 8, 123 PERF_SAMPLE_STREAM_ID = 1U << 9, 124 + PERF_SAMPLE_TP_RECORD = 1U << 10, 125 126 + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ 127 }; 128 129 /* ··· 413 __u64 ip[PERF_MAX_STACK_DEPTH]; 414 }; 415 416 + struct perf_tracepoint_record { 417 + int size; 418 + char *record; 419 + }; 420 + 421 struct task_struct; 422 423 /** ··· 681 struct pt_regs *regs; 682 u64 addr; 683 u64 period; 684 + void *private; 685 }; 686 687 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
+147 -25
include/trace/ftrace.h
··· 144 #undef TP_fast_assign 145 #define TP_fast_assign(args...) args 146 147 #undef TRACE_EVENT 148 #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ 149 static int \ ··· 348 349 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 350 351 /* 352 * Stage 4 of the trace events. 353 * ··· 500 #define TP_FMT(fmt, args...) fmt "\n", ##args 501 502 #ifdef CONFIG_EVENT_PROFILE 503 - #define _TRACE_PROFILE(call, proto, args) \ 504 - static void ftrace_profile_##call(proto) \ 505 - { \ 506 - extern void perf_tpcounter_event(int); \ 507 - perf_tpcounter_event(event_##call.id); \ 508 - } \ 509 - \ 510 - static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ 511 - { \ 512 - int ret = 0; \ 513 - \ 514 - if (!atomic_inc_return(&event_call->profile_count)) \ 515 - ret = register_trace_##call(ftrace_profile_##call); \ 516 - \ 517 - return ret; \ 518 - } \ 519 - \ 520 - static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ 521 - { \ 522 - if (atomic_add_negative(-1, &event_call->profile_count)) \ 523 - unregister_trace_##call(ftrace_profile_##call); \ 524 - } 525 526 #define _TRACE_PROFILE_INIT(call) \ 527 .profile_count = ATOMIC_INIT(-1), \ ··· 507 .profile_disable = ftrace_profile_disable_##call, 508 509 #else 510 - #define _TRACE_PROFILE(call, proto, args) 511 #define _TRACE_PROFILE_INIT(call) 512 #endif 513 ··· 532 533 #undef TRACE_EVENT 534 #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 535 - _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ 536 \ 537 static struct ftrace_event_call event_##call; \ 538 \ ··· 615 616 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 617 618 - #undef _TRACE_PROFILE 619 #undef _TRACE_PROFILE_INIT 620
··· 144 #undef TP_fast_assign 145 #define TP_fast_assign(args...) args 146 147 + #undef TP_perf_assign 148 + #define TP_perf_assign(args...) 149 + 150 #undef TRACE_EVENT 151 #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ 152 static int \ ··· 345 346 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 347 348 + #ifdef CONFIG_EVENT_PROFILE 349 + 350 + /* 351 + * Generate the functions needed for tracepoint perf_counter support. 352 + * 353 + * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later 354 + * 355 + * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call) 356 + * { 357 + * int ret = 0; 358 + * 359 + * if (!atomic_inc_return(&event_call->profile_count)) 360 + * ret = register_trace_<call>(ftrace_profile_<call>); 361 + * 362 + * return ret; 363 + * } 364 + * 365 + * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call) 366 + * { 367 + * if (atomic_add_negative(-1, &event->call->profile_count)) 368 + * unregister_trace_<call>(ftrace_profile_<call>); 369 + * } 370 + * 371 + */ 372 + 373 + #undef TRACE_EVENT 374 + #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 375 + \ 376 + static void ftrace_profile_##call(proto); \ 377 + \ 378 + static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ 379 + { \ 380 + int ret = 0; \ 381 + \ 382 + if (!atomic_inc_return(&event_call->profile_count)) \ 383 + ret = register_trace_##call(ftrace_profile_##call); \ 384 + \ 385 + return ret; \ 386 + } \ 387 + \ 388 + static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ 389 + { \ 390 + if (atomic_add_negative(-1, &event_call->profile_count)) \ 391 + unregister_trace_##call(ftrace_profile_##call); \ 392 + } 393 + 394 + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 395 + 396 + #endif 397 + 398 /* 399 * Stage 4 of the trace events. 400 * ··· 447 #define TP_FMT(fmt, args...) fmt "\n", ##args 448 449 #ifdef CONFIG_EVENT_PROFILE 450 451 #define _TRACE_PROFILE_INIT(call) \ 452 .profile_count = ATOMIC_INIT(-1), \ ··· 476 .profile_disable = ftrace_profile_disable_##call, 477 478 #else 479 #define _TRACE_PROFILE_INIT(call) 480 #endif 481 ··· 502 503 #undef TRACE_EVENT 504 #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 505 \ 506 static struct ftrace_event_call event_##call; \ 507 \ ··· 586 587 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 588 589 + /* 590 + * Define the insertion callback to profile events 591 + * 592 + * The job is very similar to ftrace_raw_event_<call> except that we don't 593 + * insert in the ring buffer but in a perf counter. 594 + * 595 + * static void ftrace_profile_<call>(proto) 596 + * { 597 + * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets; 598 + * struct ftrace_event_call *event_call = &event_<call>; 599 + * extern void perf_tpcounter_event(int, u64, u64, void *, int); 600 + * struct ftrace_raw_##call *entry; 601 + * u64 __addr = 0, __count = 1; 602 + * unsigned long irq_flags; 603 + * int __entry_size; 604 + * int __data_size; 605 + * int pc; 606 + * 607 + * local_save_flags(irq_flags); 608 + * pc = preempt_count(); 609 + * 610 + * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); 611 + * __entry_size = __data_size + sizeof(*entry); 612 + * 613 + * do { 614 + * char raw_data[__entry_size]; <- allocate our sample in the stack 615 + * struct trace_entry *ent; 616 + * 617 + * entry = (struct ftrace_raw_<call> *)raw_data; 618 + * ent = &entry->ent; 619 + * tracing_generic_entry_update(ent, irq_flags, pc); 620 + * ent->type = event_call->id; 621 + * 622 + * <tstruct> <- do some jobs with dynamic arrays 623 + * 624 + * <assign> <- affect our values 625 + * 626 + * perf_tpcounter_event(event_call->id, __addr, __count, entry, 627 + * __entry_size); <- submit them to perf counter 628 + * } while (0); 629 + * 630 + * } 631 + */ 632 + 633 + #ifdef CONFIG_EVENT_PROFILE 634 + 635 + #undef __perf_addr 636 + #define __perf_addr(a) __addr = (a) 637 + 638 + #undef __perf_count 639 + #define __perf_count(c) __count = (c) 640 + 641 + #undef TRACE_EVENT 642 + #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 643 + static void ftrace_profile_##call(proto) \ 644 + { \ 645 + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 646 + struct ftrace_event_call *event_call = &event_##call; \ 647 + extern void perf_tpcounter_event(int, u64, u64, void *, int); \ 648 + struct ftrace_raw_##call *entry; \ 649 + u64 __addr = 0, __count = 1; \ 650 + unsigned long irq_flags; \ 651 + int __entry_size; \ 652 + int __data_size; \ 653 + int pc; \ 654 + \ 655 + local_save_flags(irq_flags); \ 656 + pc = preempt_count(); \ 657 + \ 658 + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 659 + __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ 660 + \ 661 + do { \ 662 + char raw_data[__entry_size]; \ 663 + struct trace_entry *ent; \ 664 + \ 665 + entry = (struct ftrace_raw_##call *)raw_data; \ 666 + ent = &entry->ent; \ 667 + tracing_generic_entry_update(ent, irq_flags, pc); \ 668 + ent->type = event_call->id; \ 669 + \ 670 + tstruct \ 671 + \ 672 + { assign; } \ 673 + \ 674 + perf_tpcounter_event(event_call->id, __addr, __count, entry,\ 675 + __entry_size); \ 676 + } while (0); \ 677 + \ 678 + } 679 + 680 + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 681 + #endif /* CONFIG_EVENT_PROFILE */ 682 + 683 #undef _TRACE_PROFILE_INIT 684
+19 -3
kernel/perf_counter.c
··· 2646 u64 counter; 2647 } group_entry; 2648 struct perf_callchain_entry *callchain = NULL; 2649 int callchain_size = 0; 2650 u64 time; 2651 struct { ··· 2715 header.size += sizeof(u64); 2716 } 2717 2718 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2719 if (ret) 2720 return; ··· 2782 perf_output_put(&handle, nr); 2783 } 2784 } 2785 2786 perf_output_end(&handle); 2787 } ··· 3712 }; 3713 3714 #ifdef CONFIG_EVENT_PROFILE 3715 - void perf_tpcounter_event(int event_id) 3716 { 3717 struct perf_sample_data data = { 3718 .regs = get_irq_regs(), 3719 - .addr = 0, 3720 }; 3721 3722 if (!data.regs) 3723 data.regs = task_pt_regs(current); 3724 3725 - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); 3726 } 3727 EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3728
··· 2646 u64 counter; 2647 } group_entry; 2648 struct perf_callchain_entry *callchain = NULL; 2649 + struct perf_tracepoint_record *tp; 2650 int callchain_size = 0; 2651 u64 time; 2652 struct { ··· 2714 header.size += sizeof(u64); 2715 } 2716 2717 + if (sample_type & PERF_SAMPLE_TP_RECORD) { 2718 + tp = data->private; 2719 + header.size += tp->size; 2720 + } 2721 + 2722 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2723 if (ret) 2724 return; ··· 2776 perf_output_put(&handle, nr); 2777 } 2778 } 2779 + 2780 + if (sample_type & PERF_SAMPLE_TP_RECORD) 2781 + perf_output_copy(&handle, tp->record, tp->size); 2782 2783 perf_output_end(&handle); 2784 } ··· 3703 }; 3704 3705 #ifdef CONFIG_EVENT_PROFILE 3706 + void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, 3707 + int entry_size) 3708 { 3709 + struct perf_tracepoint_record tp = { 3710 + .size = entry_size, 3711 + .record = record, 3712 + }; 3713 + 3714 struct perf_sample_data data = { 3715 .regs = get_irq_regs(), 3716 + .addr = addr, 3717 + .private = &tp, 3718 }; 3719 3720 if (!data.regs) 3721 data.regs = task_pt_regs(current); 3722 3723 + do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); 3724 } 3725 EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3726
+7 -8
kernel/trace/ring_buffer.c
··· 735 736 put_online_cpus(); 737 738 free_cpumask_var(buffer->cpumask); 739 740 kfree(buffer); ··· 1786 */ 1787 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 1788 1789 - if (!rb_try_to_discard(cpu_buffer, event)) 1790 goto out; 1791 1792 /* ··· 2384 * the box. Return the padding, and we will release 2385 * the current locks, and try again. 2386 */ 2387 - rb_advance_reader(cpu_buffer); 2388 return event; 2389 2390 case RINGBUF_TYPE_TIME_EXTEND: ··· 2486 * buffer too. A one time deal is all you get from reading 2487 * the ring buffer from an NMI. 2488 */ 2489 - if (likely(!in_nmi() && !oops_in_progress)) 2490 return 1; 2491 2492 tracing_off_permanent(); ··· 2519 if (dolock) 2520 spin_lock(&cpu_buffer->reader_lock); 2521 event = rb_buffer_peek(buffer, cpu, ts); 2522 if (dolock) 2523 spin_unlock(&cpu_buffer->reader_lock); 2524 local_irq_restore(flags); ··· 2592 spin_lock(&cpu_buffer->reader_lock); 2593 2594 event = rb_buffer_peek(buffer, cpu, ts); 2595 - if (!event) 2596 - goto out_unlock; 2597 2598 - rb_advance_reader(cpu_buffer); 2599 - 2600 - out_unlock: 2601 if (dolock) 2602 spin_unlock(&cpu_buffer->reader_lock); 2603 local_irq_restore(flags);
··· 735 736 put_online_cpus(); 737 738 + kfree(buffer->buffers); 739 free_cpumask_var(buffer->cpumask); 740 741 kfree(buffer); ··· 1785 */ 1786 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 1787 1788 + if (rb_try_to_discard(cpu_buffer, event)) 1789 goto out; 1790 1791 /* ··· 2383 * the box. Return the padding, and we will release 2384 * the current locks, and try again. 2385 */ 2386 return event; 2387 2388 case RINGBUF_TYPE_TIME_EXTEND: ··· 2486 * buffer too. A one time deal is all you get from reading 2487 * the ring buffer from an NMI. 2488 */ 2489 + if (likely(!in_nmi())) 2490 return 1; 2491 2492 tracing_off_permanent(); ··· 2519 if (dolock) 2520 spin_lock(&cpu_buffer->reader_lock); 2521 event = rb_buffer_peek(buffer, cpu, ts); 2522 + if (event && event->type_len == RINGBUF_TYPE_PADDING) 2523 + rb_advance_reader(cpu_buffer); 2524 if (dolock) 2525 spin_unlock(&cpu_buffer->reader_lock); 2526 local_irq_restore(flags); ··· 2590 spin_lock(&cpu_buffer->reader_lock); 2591 2592 event = rb_buffer_peek(buffer, cpu, ts); 2593 + if (event) 2594 + rb_advance_reader(cpu_buffer); 2595 2596 if (dolock) 2597 spin_unlock(&cpu_buffer->reader_lock); 2598 local_irq_restore(flags);
+1
kernel/trace/trace.c
··· 848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 850 } 851 852 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 853 int type,
··· 848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 850 } 851 + EXPORT_SYMBOL_GPL(tracing_generic_entry_update); 852 853 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 854 int type,
-4
kernel/trace/trace.h
··· 438 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 439 int *ent_cpu, u64 *ent_ts); 440 441 - void tracing_generic_entry_update(struct trace_entry *entry, 442 - unsigned long flags, 443 - int pc); 444 - 445 void default_wait_pipe(struct trace_iterator *iter); 446 void poll_wait_pipe(struct trace_iterator *iter); 447
··· 438 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 439 int *ent_cpu, u64 *ent_ts); 440 441 void default_wait_pipe(struct trace_iterator *iter); 442 void poll_wait_pipe(struct trace_iterator *iter); 443
+15 -5
kernel/trace/trace_events_filter.c
··· 624 return -ENOSPC; 625 } 626 627 - filter->preds[filter->n_preds] = pred; 628 - filter->n_preds++; 629 - 630 list_for_each_entry(call, &ftrace_events, list) { 631 632 if (!call->define_fields) ··· 640 } 641 replace_filter_string(call->filter, filter_string); 642 } 643 out: 644 return err; 645 } ··· 1029 1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1031 pred = create_logical_pred(elt->op); 1032 if (call) { 1033 err = filter_add_pred(ps, call, pred); 1034 filter_free_pred(pred); 1035 - } else 1036 err = filter_add_subsystem_pred(ps, system, 1037 pred, filter_string); 1038 if (err) 1039 return err; 1040 ··· 1053 } 1054 1055 pred = create_pred(elt->op, operand1, operand2); 1056 if (call) { 1057 err = filter_add_pred(ps, call, pred); 1058 filter_free_pred(pred); 1059 - } else 1060 err = filter_add_subsystem_pred(ps, system, pred, 1061 filter_string); 1062 if (err) 1063 return err; 1064
··· 624 return -ENOSPC; 625 } 626 627 list_for_each_entry(call, &ftrace_events, list) { 628 629 if (!call->define_fields) ··· 643 } 644 replace_filter_string(call->filter, filter_string); 645 } 646 + 647 + filter->preds[filter->n_preds] = pred; 648 + filter->n_preds++; 649 out: 650 return err; 651 } ··· 1029 1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1031 pred = create_logical_pred(elt->op); 1032 + if (!pred) 1033 + return -ENOMEM; 1034 if (call) { 1035 err = filter_add_pred(ps, call, pred); 1036 filter_free_pred(pred); 1037 + } else { 1038 err = filter_add_subsystem_pred(ps, system, 1039 pred, filter_string); 1040 + if (err) 1041 + filter_free_pred(pred); 1042 + } 1043 if (err) 1044 return err; 1045 ··· 1048 } 1049 1050 pred = create_pred(elt->op, operand1, operand2); 1051 + if (!pred) 1052 + return -ENOMEM; 1053 if (call) { 1054 err = filter_add_pred(ps, call, pred); 1055 filter_free_pred(pred); 1056 + } else { 1057 err = filter_add_subsystem_pred(ps, system, pred, 1058 filter_string); 1059 + if (err) 1060 + filter_free_pred(pred); 1061 + } 1062 if (err) 1063 return err; 1064
+6 -3
scripts/recordmcount.pl
··· 393 $read_function = 0; 394 } 395 # print out any recorded offsets 396 - update_funcs() if ($text_found); 397 398 # reset all markers and arrays 399 $text_found = 0; ··· 414 $offset = hex $1; 415 } else { 416 # if we already have a function, and this is weak, skip it 417 - if (!defined($ref_func) && !defined($weak{$text})) { 418 $ref_func = $text; 419 $offset = hex $1; 420 } ··· 444 } 445 446 # dump out anymore offsets that may have been found 447 - update_funcs() if ($text_found); 448 449 # If we did not find any mcount callers, we are done (do nothing). 450 if (!$opened) {
··· 393 $read_function = 0; 394 } 395 # print out any recorded offsets 396 + update_funcs() if (defined($ref_func)); 397 398 # reset all markers and arrays 399 $text_found = 0; ··· 414 $offset = hex $1; 415 } else { 416 # if we already have a function, and this is weak, skip it 417 + if (!defined($ref_func) && !defined($weak{$text}) && 418 + # PPC64 can have symbols that start with .L and 419 + # gcc considers these special. Don't use them! 420 + $text !~ /^\.L/) { 421 $ref_func = $text; 422 $offset = hex $1; 423 } ··· 441 } 442 443 # dump out anymore offsets that may have been found 444 + update_funcs() if (defined($ref_func)); 445 446 # If we did not find any mcount callers, we are done (do nothing). 447 if (!$opened) {
+1
tools/perf/builtin-record.c
··· 412 if (call_graph) 413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 414 415 attr->mmap = track; 416 attr->comm = track; 417 attr->inherit = (cpu < 0) && inherit;
··· 412 if (call_graph) 413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 414 415 + 416 attr->mmap = track; 417 attr->comm = track; 418 attr->inherit = (cpu < 0) && inherit;