Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf_counter: Set the CONFIG_PERF_COUNTERS default to y if CONFIG_PROFILING=y
perf: Fix read buffer overflow
perf top: Add mwait_idle_with_hints to skip_symbols[]
perf tools: Fix faulty check
perf report: Update for the new FORK/EXIT events
perf_counter: Full task tracing
perf_counter: Collapse inherit on read()
tracing, perf_counter: Add help text to CONFIG_EVENT_PROFILE
perf_counter tools: Fix link errors with older toolchains

+130 -56
+12 -1
include/linux/perf_counter.h
··· 181 181 freq : 1, /* use freq, not period */ 182 182 inherit_stat : 1, /* per task counts */ 183 183 enable_on_exec : 1, /* next exec enables */ 184 + task : 1, /* trace fork/exit */ 184 185 185 - __reserved_1 : 51; 186 + __reserved_1 : 50; 186 187 187 188 __u32 wakeup_events; /* wakeup every n events */ 188 189 __u32 __reserved_2; ··· 312 311 /* 313 312 * struct { 314 313 * struct perf_event_header header; 314 + * u32 pid, ppid; 315 + * u32 tid, ptid; 316 + * }; 317 + */ 318 + PERF_EVENT_EXIT = 4, 319 + 320 + /* 321 + * struct { 322 + * struct perf_event_header header; 315 323 * u64 time; 316 324 * u64 id; 317 325 * u64 stream_id; ··· 333 323 * struct { 334 324 * struct perf_event_header header; 335 325 * u32 pid, ppid; 326 + * u32 tid, ptid; 336 327 * }; 337 328 */ 338 329 PERF_EVENT_FORK = 7,
+10 -1
init/Kconfig
··· 940 940 941 941 config PERF_COUNTERS 942 942 bool "Kernel Performance Counters" 943 + default y if PROFILING 943 944 depends on HAVE_PERF_COUNTERS 944 945 select ANON_INODES 945 946 help ··· 962 961 Say Y if unsure. 963 962 964 963 config EVENT_PROFILE 965 - bool "Tracepoint profile sources" 964 + bool "Tracepoint profiling sources" 966 965 depends on PERF_COUNTERS && EVENT_TRACING 967 966 default y 967 + help 968 + Allow the use of tracepoints as software performance counters. 969 + 970 + When this is enabled, you can create perf counters based on 971 + tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID 972 + found in debugfs://tracing/events/*/*/id. (The -e/--events 973 + option to the perf tool can parse and interpret symbolic 974 + tracepoints, in the subsystem:tracepoint_name format.) 968 975 969 976 endmenu 970 977
+1 -3
kernel/fork.c
··· 1269 1269 write_unlock_irq(&tasklist_lock); 1270 1270 proc_fork_connector(p); 1271 1271 cgroup_post_fork(p); 1272 + perf_counter_fork(p); 1272 1273 return p; 1273 1274 1274 1275 bad_fork_free_pid: ··· 1410 1409 p->vfork_done = &vfork; 1411 1410 init_completion(&vfork); 1412 1411 } 1413 - 1414 - if (!(clone_flags & CLONE_THREAD)) 1415 - perf_counter_fork(p); 1416 1412 1417 1413 audit_finish_fork(p); 1418 1414 tracehook_report_clone(regs, clone_flags, nr, p);
+84 -43
kernel/perf_counter.c
··· 42 42 static atomic_t nr_counters __read_mostly; 43 43 static atomic_t nr_mmap_counters __read_mostly; 44 44 static atomic_t nr_comm_counters __read_mostly; 45 + static atomic_t nr_task_counters __read_mostly; 45 46 46 47 /* 47 48 * perf counter paranoia level: ··· 1655 1654 atomic_dec(&nr_mmap_counters); 1656 1655 if (counter->attr.comm) 1657 1656 atomic_dec(&nr_comm_counters); 1657 + if (counter->attr.task) 1658 + atomic_dec(&nr_task_counters); 1658 1659 } 1659 1660 1660 1661 if (counter->destroy) ··· 1691 1688 return 0; 1692 1689 } 1693 1690 1691 + static u64 perf_counter_read_tree(struct perf_counter *counter) 1692 + { 1693 + struct perf_counter *child; 1694 + u64 total = 0; 1695 + 1696 + total += perf_counter_read(counter); 1697 + list_for_each_entry(child, &counter->child_list, child_list) 1698 + total += perf_counter_read(child); 1699 + 1700 + return total; 1701 + } 1702 + 1694 1703 /* 1695 1704 * Read the performance counter - simple non blocking version for now 1696 1705 */ ··· 1722 1707 1723 1708 WARN_ON_ONCE(counter->ctx->parent_ctx); 1724 1709 mutex_lock(&counter->child_mutex); 1725 - values[0] = perf_counter_read(counter); 1710 + values[0] = perf_counter_read_tree(counter); 1726 1711 n = 1; 1727 1712 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1728 1713 values[n++] = counter->total_time_enabled + ··· 2834 2819 } 2835 2820 2836 2821 /* 2837 - * fork tracking 2822 + * task tracking -- fork/exit 2823 + * 2824 + * enabled by: attr.comm | attr.mmap | attr.task 2838 2825 */ 2839 2826 2840 - struct perf_fork_event { 2827 + struct perf_task_event { 2841 2828 struct task_struct *task; 2842 2829 2843 2830 struct { ··· 2847 2830 2848 2831 u32 pid; 2849 2832 u32 ppid; 2833 + u32 tid; 2834 + u32 ptid; 2850 2835 } event; 2851 2836 }; 2852 2837 2853 - static void perf_counter_fork_output(struct perf_counter *counter, 2854 - struct perf_fork_event *fork_event) 2838 + static void perf_counter_task_output(struct perf_counter *counter, 2839 + struct perf_task_event *task_event) 2855 2840 { 2856 2841 struct perf_output_handle handle; 2857 - int size = fork_event->event.header.size; 2858 - struct task_struct *task = fork_event->task; 2842 + int size = task_event->event.header.size; 2843 + struct task_struct *task = task_event->task; 2859 2844 int ret = perf_output_begin(&handle, counter, size, 0, 0); 2860 2845 2861 2846 if (ret) 2862 2847 return; 2863 2848 2864 - fork_event->event.pid = perf_counter_pid(counter, task); 2865 - fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); 2849 + task_event->event.pid = perf_counter_pid(counter, task); 2850 + task_event->event.ppid = perf_counter_pid(counter, task->real_parent); 2866 2851 2867 - perf_output_put(&handle, fork_event->event); 2852 + task_event->event.tid = perf_counter_tid(counter, task); 2853 + task_event->event.ptid = perf_counter_tid(counter, task->real_parent); 2854 + 2855 + perf_output_put(&handle, task_event->event); 2868 2856 perf_output_end(&handle); 2869 2857 } 2870 2858 2871 - static int perf_counter_fork_match(struct perf_counter *counter) 2859 + static int perf_counter_task_match(struct perf_counter *counter) 2872 2860 { 2873 - if (counter->attr.comm || counter->attr.mmap) 2861 + if (counter->attr.comm || counter->attr.mmap || counter->attr.task) 2874 2862 return 1; 2875 2863 2876 2864 return 0; 2877 2865 } 2878 2866 2879 - static void perf_counter_fork_ctx(struct perf_counter_context *ctx, 2880 - struct perf_fork_event *fork_event) 2867 + static void perf_counter_task_ctx(struct perf_counter_context *ctx, 2868 + struct perf_task_event *task_event) 2881 2869 { 2882 2870 struct perf_counter *counter; 2883 2871 ··· 2891 2869 2892 2870 rcu_read_lock(); 2893 2871 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 2894 - if (perf_counter_fork_match(counter)) 2895 - perf_counter_fork_output(counter, fork_event); 2872 + if (perf_counter_task_match(counter)) 2873 + perf_counter_task_output(counter, task_event); 2896 2874 } 2897 2875 rcu_read_unlock(); 2898 2876 } 2899 2877 2900 - static void perf_counter_fork_event(struct perf_fork_event *fork_event) 2878 + static void perf_counter_task_event(struct perf_task_event *task_event) 2901 2879 { 2902 2880 struct perf_cpu_context *cpuctx; 2903 2881 struct perf_counter_context *ctx; 2904 2882 2905 2883 cpuctx = &get_cpu_var(perf_cpu_context); 2906 - perf_counter_fork_ctx(&cpuctx->ctx, fork_event); 2884 + perf_counter_task_ctx(&cpuctx->ctx, task_event); 2907 2885 put_cpu_var(perf_cpu_context); 2908 2886 2909 2887 rcu_read_lock(); ··· 2913 2891 */ 2914 2892 ctx = rcu_dereference(current->perf_counter_ctxp); 2915 2893 if (ctx) 2916 - perf_counter_fork_ctx(ctx, fork_event); 2894 + perf_counter_task_ctx(ctx, task_event); 2917 2895 rcu_read_unlock(); 2896 + } 2897 + 2898 + static void perf_counter_task(struct task_struct *task, int new) 2899 + { 2900 + struct perf_task_event task_event; 2901 + 2902 + if (!atomic_read(&nr_comm_counters) && 2903 + !atomic_read(&nr_mmap_counters) && 2904 + !atomic_read(&nr_task_counters)) 2905 + return; 2906 + 2907 + task_event = (struct perf_task_event){ 2908 + .task = task, 2909 + .event = { 2910 + .header = { 2911 + .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, 2912 + .misc = 0, 2913 + .size = sizeof(task_event.event), 2914 + }, 2915 + /* .pid */ 2916 + /* .ppid */ 2917 + /* .tid */ 2918 + /* .ptid */ 2919 + }, 2920 + }; 2921 + 2922 + perf_counter_task_event(&task_event); 2918 2923 } 2919 2924 2920 2925 void perf_counter_fork(struct task_struct *task) 2921 2926 { 2922 - struct perf_fork_event fork_event; 2923 - 2924 - if (!atomic_read(&nr_comm_counters) && 2925 - !atomic_read(&nr_mmap_counters)) 2926 - return; 2927 - 2928 - fork_event = (struct perf_fork_event){ 2929 - .task = task, 2930 - .event = { 2931 - .header = { 2932 - .type = PERF_EVENT_FORK, 2933 - .misc = 0, 2934 - .size = sizeof(fork_event.event), 2935 - }, 2936 - /* .pid */ 2937 - /* .ppid */ 2938 - }, 2939 - }; 2940 - 2941 - perf_counter_fork_event(&fork_event); 2927 + perf_counter_task(task, 1); 2942 2928 } 2943 2929 2944 2930 /* ··· 3905 3875 atomic_inc(&nr_mmap_counters); 3906 3876 if (counter->attr.comm) 3907 3877 atomic_inc(&nr_comm_counters); 3878 + if (counter->attr.task) 3879 + atomic_inc(&nr_task_counters); 3908 3880 } 3909 3881 3910 3882 return counter; ··· 4268 4236 struct perf_counter_context *child_ctx; 4269 4237 unsigned long flags; 4270 4238 4271 - if (likely(!child->perf_counter_ctxp)) 4239 + if (likely(!child->perf_counter_ctxp)) { 4240 + perf_counter_task(child, 0); 4272 4241 return; 4242 + } 4273 4243 4274 4244 local_irq_save(flags); 4275 4245 /* ··· 4289 4255 * incremented the context's refcount before we do put_ctx below. 4290 4256 */ 4291 4257 spin_lock(&child_ctx->lock); 4292 - child->perf_counter_ctxp = NULL; 4293 4258 /* 4294 4259 * If this context is a clone; unclone it so it can't get 4295 4260 * swapped to another process while we're removing all 4296 4261 * the counters from it. 4297 4262 */ 4298 4263 unclone_ctx(child_ctx); 4299 - spin_unlock(&child_ctx->lock); 4300 - local_irq_restore(flags); 4264 + spin_unlock_irqrestore(&child_ctx->lock, flags); 4265 + 4266 + /* 4267 + * Report the task dead after unscheduling the counters so that we 4268 + * won't get any samples after PERF_EVENT_EXIT. We can however still 4269 + * get a few PERF_EVENT_READ events. 4270 + */ 4271 + perf_counter_task(child, 0); 4272 + 4273 + child->perf_counter_ctxp = NULL; 4301 4274 4302 4275 /* 4303 4276 * We can recurse on the same lock type through:
+1 -1
tools/perf/Makefile
··· 345 345 BUILTIN_OBJS += builtin-top.o 346 346 347 347 PERFLIBS = $(LIB_FILE) 348 - EXTLIBS = -lbfd 348 + EXTLIBS = -lbfd -liberty 349 349 350 350 # 351 351 # Platform specific tweaks
+19 -5
tools/perf/builtin-report.c
··· 99 99 struct fork_event { 100 100 struct perf_event_header header; 101 101 u32 pid, ppid; 102 + u32 tid, ptid; 102 103 }; 103 104 104 105 struct lost_event { ··· 253 252 { 254 253 int n = 0; 255 254 256 - while (pathname[n] == cwd[n] && n < cwdlen) 255 + while (n < cwdlen && pathname[n] == cwd[n]) 257 256 ++n; 258 257 259 258 return n; ··· 1609 1608 } 1610 1609 1611 1610 static int 1612 - process_fork_event(event_t *event, unsigned long offset, unsigned long head) 1611 + process_task_event(event_t *event, unsigned long offset, unsigned long head) 1613 1612 { 1614 1613 struct thread *thread = threads__findnew(event->fork.pid); 1615 1614 struct thread *parent = threads__findnew(event->fork.ppid); 1616 1615 1617 - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", 1616 + dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", 1618 1617 (void *)(offset + head), 1619 1618 (void *)(long)(event->header.size), 1620 - event->fork.pid, event->fork.ppid); 1619 + event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", 1620 + event->fork.pid, event->fork.tid, 1621 + event->fork.ppid, event->fork.ptid); 1622 + 1623 + /* 1624 + * A thread clone will have the same PID for both 1625 + * parent and child. 1626 + */ 1627 + if (thread == parent) 1628 + return 0; 1629 + 1630 + if (event->header.type == PERF_EVENT_EXIT) 1631 + return 0; 1621 1632 1622 1633 if (!thread || !parent || thread__fork(thread, parent)) { 1623 1634 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); ··· 1719 1706 return process_comm_event(event, offset, head); 1720 1707 1721 1708 case PERF_EVENT_FORK: 1722 - return process_fork_event(event, offset, head); 1709 + case PERF_EVENT_EXIT: 1710 + return process_task_event(event, offset, head); 1723 1711 1724 1712 case PERF_EVENT_LOST: 1725 1713 return process_lost_event(event, offset, head);
+1
tools/perf/builtin-top.c
··· 285 285 "enter_idle", 286 286 "exit_idle", 287 287 "mwait_idle", 288 + "mwait_idle_with_hints", 288 289 "ppc64_runlatch_off", 289 290 "pseries_dedicated_idle_sleep", 290 291 NULL
+1 -1
tools/perf/util/quote.c
··· 318 318 strbuf_addch(out, '"'); 319 319 if (prefix) { 320 320 int off = 0; 321 - while (prefix[off] && off < len && prefix[off] == in[off]) 321 + while (off < len && prefix[off] && prefix[off] == in[off]) 322 322 if (prefix[off] == '/') { 323 323 prefix += off + 1; 324 324 in += off + 1;
+1 -1
tools/perf/util/symbol.c
··· 565 565 goto out_elf_end; 566 566 567 567 secstrs = elf_getdata(sec_strndx, NULL); 568 - if (symstrs == NULL) 568 + if (secstrs == NULL) 569 569 goto out_elf_end; 570 570 571 571 nr_syms = shdr.sh_size / shdr.sh_entsize;