Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (47 commits)
perf report: Add --symbols parameter
perf report: Add --comms parameter
perf report: Add --dsos parameter
perf_counter tools: Adjust only prelinked symbol's addresses
perf_counter: Provide a way to enable counters on exec
perf_counter tools: Reduce perf stat measurement overhead/skew
perf stat: Use percentages for scaling output
perf_counter, x86: Update x86_pmu after WARN()
perf stat: Micro-optimize the code: memcpy is only required if no event is selected and !null_run
perf stat: Improve output
perf stat: Fix multi-run stats
perf stat: Add -n/--null option to run without counters
perf_counter tools: Remove dead code
perf_counter: Complete counter swap
perf report: Print sorted callchains per histogram entries
perf_counter tools: Prepare a small callchain framework
perf record: Fix unhandled io return value
perf_counter tools: Add alias for 'l1d' and 'l1i'
perf-report: Add bare minimum PERF_EVENT_READ parsing
perf-report: Add modes for inherited stats and no-samples
...

+1602 -420
+2
arch/powerpc/include/asm/perf_counter.h
··· 61 61 extern unsigned long perf_misc_flags(struct pt_regs *regs); 62 62 extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 63 63 64 + #define PERF_COUNTER_INDEX_OFFSET 1 65 + 64 66 /* 65 67 * Only override the default definitions in include/linux/perf_counter.h 66 68 * if we have hardware PMU support.
+3
arch/x86/include/asm/perf_counter.h
··· 87 87 #ifdef CONFIG_PERF_COUNTERS 88 88 extern void init_hw_perf_counters(void); 89 89 extern void perf_counters_lapic_init(void); 90 + 91 + #define PERF_COUNTER_INDEX_OFFSET 0 92 + 90 93 #else 91 94 static inline void init_hw_perf_counters(void) { } 92 95 static inline void perf_counters_lapic_init(void) { }
+10 -12
arch/x86/kernel/cpu/perf_counter.c
··· 401 401 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 402 402 }, 403 403 [ C(OP_WRITE) ] = { 404 - [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ 404 + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ 405 405 [ C(RESULT_MISS) ] = 0, 406 406 }, 407 407 [ C(OP_PREFETCH) ] = { ··· 912 912 err = checking_wrmsrl(hwc->counter_base + idx, 913 913 (u64)(-left) & x86_pmu.counter_mask); 914 914 915 + perf_counter_update_userpage(counter); 916 + 915 917 return ret; 916 918 } 917 919 ··· 969 967 unsigned int event; 970 968 971 969 if (!x86_pmu.num_counters_fixed) 972 - return -1; 973 - 974 - /* 975 - * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: 976 - */ 977 - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 978 - boot_cpu_data.x86_model == 28) 979 970 return -1; 980 971 981 972 event = hwc->config & ARCH_PERFMON_EVENT_MASK; ··· 1035 1040 1036 1041 x86_perf_counter_set_period(counter, hwc, idx); 1037 1042 x86_pmu.enable(hwc, idx); 1043 + 1044 + perf_counter_update_userpage(counter); 1038 1045 1039 1046 return 0; 1040 1047 } ··· 1130 1133 x86_perf_counter_update(counter, hwc, idx); 1131 1134 cpuc->counters[idx] = NULL; 1132 1135 clear_bit(idx, cpuc->used_mask); 1136 + 1137 + perf_counter_update_userpage(counter); 1133 1138 } 1134 1139 1135 1140 /* ··· 1427 1428 */ 1428 1429 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 1429 1430 1430 - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 1431 - 1432 1431 /* 1433 1432 * Install the hw-cache-events table: 1434 1433 */ ··· 1496 1499 pr_cont("%s PMU driver.\n", x86_pmu.name); 1497 1500 1498 1501 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1499 - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1500 1502 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", 1501 1503 x86_pmu.num_counters, X86_PMC_MAX_GENERIC); 1504 + x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1502 1505 } 1503 1506 perf_counter_mask = (1 << x86_pmu.num_counters) - 1; 1504 1507 perf_max_counters = x86_pmu.num_counters; 1505 1508 1506 1509 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1507 - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; 1508 1510 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", 1509 1511 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); 1512 + x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; 1510 1513 } 1511 1514 1512 1515 perf_counter_mask |= 1513 1516 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1517 + x86_pmu.intel_ctrl = perf_counter_mask; 1514 1518 1515 1519 perf_counters_lapic_init(); 1516 1520 register_die_notifier(&perf_counter_nmi_notifier);
+39 -7
include/linux/perf_counter.h
··· 178 178 mmap : 1, /* include mmap data */ 179 179 comm : 1, /* include comm data */ 180 180 freq : 1, /* use freq, not period */ 181 + inherit_stat : 1, /* per task counts */ 182 + enable_on_exec : 1, /* next exec enables */ 181 183 182 - __reserved_1 : 53; 184 + __reserved_1 : 51; 183 185 184 186 __u32 wakeup_events; /* wakeup every n events */ 185 187 __u32 __reserved_2; ··· 234 232 __u32 lock; /* seqlock for synchronization */ 235 233 __u32 index; /* hardware counter identifier */ 236 234 __s64 offset; /* add to hardware counter value */ 235 + __u64 time_enabled; /* time counter active */ 236 + __u64 time_running; /* time counter on cpu */ 237 + 238 + /* 239 + * Hole for extension of the self monitor capabilities 240 + */ 241 + 242 + __u64 __reserved[123]; /* align to 1k */ 237 243 238 244 /* 239 245 * Control data for the mmap() data buffer. ··· 263 253 #define PERF_EVENT_MISC_KERNEL (1 << 0) 264 254 #define PERF_EVENT_MISC_USER (2 << 0) 265 255 #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) 266 - #define PERF_EVENT_MISC_OVERFLOW (1 << 2) 267 256 268 257 struct perf_event_header { 269 258 __u32 type; ··· 336 327 PERF_EVENT_FORK = 7, 337 328 338 329 /* 339 - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field 340 - * will be PERF_SAMPLE_* 341 - * 330 + * struct { 331 + * struct perf_event_header header; 332 + * u32 pid, tid; 333 + * u64 value; 334 + * { u64 time_enabled; } && PERF_FORMAT_ENABLED 335 + * { u64 time_running; } && PERF_FORMAT_RUNNING 336 + * { u64 parent_id; } && PERF_FORMAT_ID 337 + * }; 338 + */ 339 + PERF_EVENT_READ = 8, 340 + 341 + /* 342 342 * struct { 343 343 * struct perf_event_header header; 344 344 * ··· 355 337 * { u32 pid, tid; } && PERF_SAMPLE_TID 356 338 * { u64 time; } && PERF_SAMPLE_TIME 357 339 * { u64 addr; } && PERF_SAMPLE_ADDR 358 - * { u64 config; } && PERF_SAMPLE_CONFIG 340 + * { u64 id; } && PERF_SAMPLE_ID 359 341 * { u32 cpu, res; } && PERF_SAMPLE_CPU 342 + * { u64 period; } && PERF_SAMPLE_PERIOD 360 343 * 361 344 * { u64 nr; 362 345 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP ··· 366 347 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 367 348 * }; 368 349 */ 350 + PERF_EVENT_SAMPLE = 9, 351 + 352 + PERF_EVENT_MAX, /* non-ABI */ 369 353 }; 370 354 371 355 enum perf_callchain_context { ··· 604 582 int nr_counters; 605 583 int nr_active; 606 584 int is_active; 585 + int nr_stat; 607 586 atomic_t refcount; 608 587 struct task_struct *task; 609 588 ··· 692 669 (counter->attr.type != PERF_TYPE_HW_CACHE); 693 670 } 694 671 695 - extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); 672 + extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; 673 + 674 + extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); 675 + 676 + static inline void 677 + perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 678 + { 679 + if (atomic_read(&perf_swcounter_enabled[event])) 680 + __perf_swcounter_event(event, nr, nmi, regs, addr); 681 + } 696 682 697 683 extern void __perf_counter_mmap(struct vm_area_struct *vma); 698 684
+271 -49
kernel/perf_counter.c
··· 236 236 237 237 list_add_rcu(&counter->event_entry, &ctx->event_list); 238 238 ctx->nr_counters++; 239 + if (counter->attr.inherit_stat) 240 + ctx->nr_stat++; 239 241 } 240 242 241 243 /* ··· 252 250 if (list_empty(&counter->list_entry)) 253 251 return; 254 252 ctx->nr_counters--; 253 + if (counter->attr.inherit_stat) 254 + ctx->nr_stat--; 255 255 256 256 list_del_init(&counter->list_entry); 257 257 list_del_rcu(&counter->event_entry); ··· 1010 1006 && !ctx1->pin_count && !ctx2->pin_count; 1011 1007 } 1012 1008 1009 + static void __perf_counter_read(void *counter); 1010 + 1011 + static void __perf_counter_sync_stat(struct perf_counter *counter, 1012 + struct perf_counter *next_counter) 1013 + { 1014 + u64 value; 1015 + 1016 + if (!counter->attr.inherit_stat) 1017 + return; 1018 + 1019 + /* 1020 + * Update the counter value, we cannot use perf_counter_read() 1021 + * because we're in the middle of a context switch and have IRQs 1022 + * disabled, which upsets smp_call_function_single(), however 1023 + * we know the counter must be on the current CPU, therefore we 1024 + * don't need to use it. 1025 + */ 1026 + switch (counter->state) { 1027 + case PERF_COUNTER_STATE_ACTIVE: 1028 + __perf_counter_read(counter); 1029 + break; 1030 + 1031 + case PERF_COUNTER_STATE_INACTIVE: 1032 + update_counter_times(counter); 1033 + break; 1034 + 1035 + default: 1036 + break; 1037 + } 1038 + 1039 + /* 1040 + * In order to keep per-task stats reliable we need to flip the counter 1041 + * values when we flip the contexts. 1042 + */ 1043 + value = atomic64_read(&next_counter->count); 1044 + value = atomic64_xchg(&counter->count, value); 1045 + atomic64_set(&next_counter->count, value); 1046 + 1047 + swap(counter->total_time_enabled, next_counter->total_time_enabled); 1048 + swap(counter->total_time_running, next_counter->total_time_running); 1049 + 1050 + /* 1051 + * Since we swizzled the values, update the user visible data too. 1052 + */ 1053 + perf_counter_update_userpage(counter); 1054 + perf_counter_update_userpage(next_counter); 1055 + } 1056 + 1057 + #define list_next_entry(pos, member) \ 1058 + list_entry(pos->member.next, typeof(*pos), member) 1059 + 1060 + static void perf_counter_sync_stat(struct perf_counter_context *ctx, 1061 + struct perf_counter_context *next_ctx) 1062 + { 1063 + struct perf_counter *counter, *next_counter; 1064 + 1065 + if (!ctx->nr_stat) 1066 + return; 1067 + 1068 + counter = list_first_entry(&ctx->event_list, 1069 + struct perf_counter, event_entry); 1070 + 1071 + next_counter = list_first_entry(&next_ctx->event_list, 1072 + struct perf_counter, event_entry); 1073 + 1074 + while (&counter->event_entry != &ctx->event_list && 1075 + &next_counter->event_entry != &next_ctx->event_list) { 1076 + 1077 + __perf_counter_sync_stat(counter, next_counter); 1078 + 1079 + counter = list_next_entry(counter, event_entry); 1080 + next_counter = list_next_entry(counter, event_entry); 1081 + } 1082 + } 1083 + 1013 1084 /* 1014 1085 * Called from scheduler to remove the counters of the current task, 1015 1086 * with interrupts disabled. ··· 1140 1061 ctx->task = next; 1141 1062 next_ctx->task = task; 1142 1063 do_switch = 0; 1064 + 1065 + perf_counter_sync_stat(ctx, next_ctx); 1143 1066 } 1144 1067 spin_unlock(&next_ctx->lock); 1145 1068 spin_unlock(&ctx->lock); ··· 1429 1348 } 1430 1349 1431 1350 /* 1351 + * Enable all of a task's counters that have been marked enable-on-exec. 1352 + * This expects task == current. 1353 + */ 1354 + static void perf_counter_enable_on_exec(struct task_struct *task) 1355 + { 1356 + struct perf_counter_context *ctx; 1357 + struct perf_counter *counter; 1358 + unsigned long flags; 1359 + int enabled = 0; 1360 + 1361 + local_irq_save(flags); 1362 + ctx = task->perf_counter_ctxp; 1363 + if (!ctx || !ctx->nr_counters) 1364 + goto out; 1365 + 1366 + __perf_counter_task_sched_out(ctx); 1367 + 1368 + spin_lock(&ctx->lock); 1369 + 1370 + list_for_each_entry(counter, &ctx->counter_list, list_entry) { 1371 + if (!counter->attr.enable_on_exec) 1372 + continue; 1373 + counter->attr.enable_on_exec = 0; 1374 + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) 1375 + continue; 1376 + counter->state = PERF_COUNTER_STATE_INACTIVE; 1377 + counter->tstamp_enabled = 1378 + ctx->time - counter->total_time_enabled; 1379 + enabled = 1; 1380 + } 1381 + 1382 + /* 1383 + * Unclone this context if we enabled any counter. 1384 + */ 1385 + if (enabled && ctx->parent_ctx) { 1386 + put_ctx(ctx->parent_ctx); 1387 + ctx->parent_ctx = NULL; 1388 + } 1389 + 1390 + spin_unlock(&ctx->lock); 1391 + 1392 + perf_counter_task_sched_in(task, smp_processor_id()); 1393 + out: 1394 + local_irq_restore(flags); 1395 + } 1396 + 1397 + /* 1432 1398 * Cross CPU call to read the hardware counter 1433 1399 */ 1434 - static void __read(void *info) 1400 + static void __perf_counter_read(void *info) 1435 1401 { 1436 1402 struct perf_counter *counter = info; 1437 1403 struct perf_counter_context *ctx = counter->ctx; ··· 1500 1372 */ 1501 1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1502 1374 smp_call_function_single(counter->oncpu, 1503 - __read, counter, 1); 1375 + __perf_counter_read, counter, 1); 1504 1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1505 1377 update_counter_times(counter); 1506 1378 } ··· 1636 1508 { 1637 1509 perf_pending_sync(counter); 1638 1510 1639 - atomic_dec(&nr_counters); 1640 - if (counter->attr.mmap) 1641 - atomic_dec(&nr_mmap_counters); 1642 - if (counter->attr.comm) 1643 - atomic_dec(&nr_comm_counters); 1511 + if (!counter->parent) { 1512 + atomic_dec(&nr_counters); 1513 + if (counter->attr.mmap) 1514 + atomic_dec(&nr_mmap_counters); 1515 + if (counter->attr.comm) 1516 + atomic_dec(&nr_comm_counters); 1517 + } 1644 1518 1645 1519 if (counter->destroy) 1646 1520 counter->destroy(counter); ··· 1881 1751 return 0; 1882 1752 } 1883 1753 1754 + static int perf_counter_index(struct perf_counter *counter) 1755 + { 1756 + if (counter->state != PERF_COUNTER_STATE_ACTIVE) 1757 + return 0; 1758 + 1759 + return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET; 1760 + } 1761 + 1884 1762 /* 1885 1763 * Callers need to ensure there can be no nesting of this function, otherwise 1886 1764 * the seqlock logic goes bad. We can not serialize this because the arch ··· 1913 1775 preempt_disable(); 1914 1776 ++userpg->lock; 1915 1777 barrier(); 1916 - userpg->index = counter->hw.idx; 1778 + userpg->index = perf_counter_index(counter); 1917 1779 userpg->offset = atomic64_read(&counter->count); 1918 1780 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 1919 1781 userpg->offset -= atomic64_read(&counter->hw.prev_count); 1782 + 1783 + userpg->time_enabled = counter->total_time_enabled + 1784 + atomic64_read(&counter->child_total_time_enabled); 1785 + 1786 + userpg->time_running = counter->total_time_running + 1787 + atomic64_read(&counter->child_total_time_running); 1920 1788 1921 1789 barrier(); 1922 1790 ++userpg->lock; ··· 2627 2483 u32 cpu, reserved; 2628 2484 } cpu_entry; 2629 2485 2630 - header.type = 0; 2486 + header.type = PERF_EVENT_SAMPLE; 2631 2487 header.size = sizeof(header); 2632 2488 2633 - header.misc = PERF_EVENT_MISC_OVERFLOW; 2489 + header.misc = 0; 2634 2490 header.misc |= perf_misc_flags(data->regs); 2635 2491 2636 2492 if (sample_type & PERF_SAMPLE_IP) { 2637 2493 ip = perf_instruction_pointer(data->regs); 2638 - header.type |= PERF_SAMPLE_IP; 2639 2494 header.size += sizeof(ip); 2640 2495 } 2641 2496 ··· 2643 2500 tid_entry.pid = perf_counter_pid(counter, current); 2644 2501 tid_entry.tid = perf_counter_tid(counter, current); 2645 2502 2646 - header.type |= PERF_SAMPLE_TID; 2647 2503 header.size += sizeof(tid_entry); 2648 2504 } 2649 2505 ··· 2652 2510 */ 2653 2511 time = sched_clock(); 2654 2512 2655 - header.type |= PERF_SAMPLE_TIME; 2656 2513 header.size += sizeof(u64); 2657 2514 } 2658 2515 2659 - if (sample_type & PERF_SAMPLE_ADDR) { 2660 - header.type |= PERF_SAMPLE_ADDR; 2516 + if (sample_type & PERF_SAMPLE_ADDR) 2661 2517 header.size += sizeof(u64); 2662 - } 2663 2518 2664 - if (sample_type & PERF_SAMPLE_ID) { 2665 - header.type |= PERF_SAMPLE_ID; 2519 + if (sample_type & PERF_SAMPLE_ID) 2666 2520 header.size += sizeof(u64); 2667 - } 2668 2521 2669 2522 if (sample_type & PERF_SAMPLE_CPU) { 2670 - header.type |= PERF_SAMPLE_CPU; 2671 2523 header.size += sizeof(cpu_entry); 2672 2524 2673 2525 cpu_entry.cpu = raw_smp_processor_id(); 2674 2526 } 2675 2527 2676 - if (sample_type & PERF_SAMPLE_PERIOD) { 2677 - header.type |= PERF_SAMPLE_PERIOD; 2528 + if (sample_type & PERF_SAMPLE_PERIOD) 2678 2529 header.size += sizeof(u64); 2679 - } 2680 2530 2681 2531 if (sample_type & PERF_SAMPLE_GROUP) { 2682 - header.type |= PERF_SAMPLE_GROUP; 2683 2532 header.size += sizeof(u64) + 2684 2533 counter->nr_siblings * sizeof(group_entry); 2685 2534 } ··· 2680 2547 2681 2548 if (callchain) { 2682 2549 callchain_size = (1 + callchain->nr) * sizeof(u64); 2683 - 2684 - header.type |= PERF_SAMPLE_CALLCHAIN; 2685 2550 header.size += callchain_size; 2686 - } 2551 + } else 2552 + header.size += sizeof(u64); 2687 2553 } 2688 2554 2689 2555 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); ··· 2733 2601 } 2734 2602 } 2735 2603 2736 - if (callchain) 2737 - perf_output_copy(&handle, callchain, callchain_size); 2604 + if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2605 + if (callchain) 2606 + perf_output_copy(&handle, callchain, callchain_size); 2607 + else { 2608 + u64 nr = 0; 2609 + perf_output_put(&handle, nr); 2610 + } 2611 + } 2738 2612 2613 + perf_output_end(&handle); 2614 + } 2615 + 2616 + /* 2617 + * read event 2618 + */ 2619 + 2620 + struct perf_read_event { 2621 + struct perf_event_header header; 2622 + 2623 + u32 pid; 2624 + u32 tid; 2625 + u64 value; 2626 + u64 format[3]; 2627 + }; 2628 + 2629 + static void 2630 + perf_counter_read_event(struct perf_counter *counter, 2631 + struct task_struct *task) 2632 + { 2633 + struct perf_output_handle handle; 2634 + struct perf_read_event event = { 2635 + .header = { 2636 + .type = PERF_EVENT_READ, 2637 + .misc = 0, 2638 + .size = sizeof(event) - sizeof(event.format), 2639 + }, 2640 + .pid = perf_counter_pid(counter, task), 2641 + .tid = perf_counter_tid(counter, task), 2642 + .value = atomic64_read(&counter->count), 2643 + }; 2644 + int ret, i = 0; 2645 + 2646 + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 2647 + event.header.size += sizeof(u64); 2648 + event.format[i++] = counter->total_time_enabled; 2649 + } 2650 + 2651 + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 2652 + event.header.size += sizeof(u64); 2653 + event.format[i++] = counter->total_time_running; 2654 + } 2655 + 2656 + if (counter->attr.read_format & PERF_FORMAT_ID) { 2657 + u64 id; 2658 + 2659 + event.header.size += sizeof(u64); 2660 + if (counter->parent) 2661 + id = counter->parent->id; 2662 + else 2663 + id = counter->id; 2664 + 2665 + event.format[i++] = id; 2666 + } 2667 + 2668 + ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2669 + if (ret) 2670 + return; 2671 + 2672 + perf_output_copy(&handle, &event, event.header.size); 2739 2673 perf_output_end(&handle); 2740 2674 } 2741 2675 ··· 2995 2797 void perf_counter_comm(struct task_struct *task) 2996 2798 { 2997 2799 struct perf_comm_event comm_event; 2800 + 2801 + if (task->perf_counter_ctxp) 2802 + perf_counter_enable_on_exec(task); 2998 2803 2999 2804 if (!atomic_read(&nr_comm_counters)) 3000 2805 return; ··· 3518 3317 put_cpu_var(perf_cpu_context); 3519 3318 } 3520 3319 3521 - void 3522 - perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 3320 + void __perf_swcounter_event(u32 event, u64 nr, int nmi, 3321 + struct pt_regs *regs, u64 addr) 3523 3322 { 3524 3323 struct perf_sample_data data = { 3525 3324 .regs = regs, ··· 3710 3509 } 3711 3510 #endif 3712 3511 3512 + atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; 3513 + 3514 + static void sw_perf_counter_destroy(struct perf_counter *counter) 3515 + { 3516 + u64 event = counter->attr.config; 3517 + 3518 + WARN_ON(counter->parent); 3519 + 3520 + atomic_dec(&perf_swcounter_enabled[event]); 3521 + } 3522 + 3713 3523 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) 3714 3524 { 3715 3525 const struct pmu *pmu = NULL; 3526 + u64 event = counter->attr.config; 3716 3527 3717 3528 /* 3718 3529 * Software counters (currently) can't in general distinguish ··· 3733 3520 * to be kernel events, and page faults are never hypervisor 3734 3521 * events. 3735 3522 */ 3736 - switch (counter->attr.config) { 3523 + switch (event) { 3737 3524 case PERF_COUNT_SW_CPU_CLOCK: 3738 3525 pmu = &perf_ops_cpu_clock; 3739 3526 ··· 3754 3541 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 3755 3542 case PERF_COUNT_SW_CONTEXT_SWITCHES: 3756 3543 case PERF_COUNT_SW_CPU_MIGRATIONS: 3544 + if (!counter->parent) { 3545 + atomic_inc(&perf_swcounter_enabled[event]); 3546 + counter->destroy = sw_perf_counter_destroy; 3547 + } 3757 3548 pmu = &perf_ops_generic; 3758 3549 break; 3759 3550 } ··· 3773 3556 int cpu, 3774 3557 struct perf_counter_context *ctx, 3775 3558 struct perf_counter *group_leader, 3559 + struct perf_counter *parent_counter, 3776 3560 gfp_t gfpflags) 3777 3561 { 3778 3562 const struct pmu *pmu; ··· 3808 3590 counter->pmu = NULL; 3809 3591 counter->ctx = ctx; 3810 3592 counter->oncpu = -1; 3593 + 3594 + counter->parent = parent_counter; 3811 3595 3812 3596 counter->ns = get_pid_ns(current->nsproxy->pid_ns); 3813 3597 counter->id = atomic64_inc_return(&perf_counter_id); ··· 3868 3648 3869 3649 counter->pmu = pmu; 3870 3650 3871 - atomic_inc(&nr_counters); 3872 - if (counter->attr.mmap) 3873 - atomic_inc(&nr_mmap_counters); 3874 - if (counter->attr.comm) 3875 - atomic_inc(&nr_comm_counters); 3651 + if (!counter->parent) { 3652 + atomic_inc(&nr_counters); 3653 + if (counter->attr.mmap) 3654 + atomic_inc(&nr_mmap_counters); 3655 + if (counter->attr.comm) 3656 + atomic_inc(&nr_comm_counters); 3657 + } 3876 3658 3877 3659 return counter; 3878 3660 } ··· 4037 3815 } 4038 3816 4039 3817 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, 4040 - GFP_KERNEL); 3818 + NULL, GFP_KERNEL); 4041 3819 ret = PTR_ERR(counter); 4042 3820 if (IS_ERR(counter)) 4043 3821 goto err_put_context; ··· 4103 3881 4104 3882 child_counter = perf_counter_alloc(&parent_counter->attr, 4105 3883 parent_counter->cpu, child_ctx, 4106 - group_leader, GFP_KERNEL); 3884 + group_leader, parent_counter, 3885 + GFP_KERNEL); 4107 3886 if (IS_ERR(child_counter)) 4108 3887 return child_counter; 4109 3888 get_ctx(child_ctx); ··· 4126 3903 * Link it up in the child's context: 4127 3904 */ 4128 3905 add_counter_to_ctx(child_counter, child_ctx); 4129 - 4130 - child_counter->parent = parent_counter; 4131 - /* 4132 - * inherit into child's child as well: 4133 - */ 4134 - child_counter->attr.inherit = 1; 4135 3906 4136 3907 /* 4137 3908 * Get a reference to the parent filp - we will fput it ··· 4170 3953 } 4171 3954 4172 3955 static void sync_child_counter(struct perf_counter *child_counter, 4173 - struct perf_counter *parent_counter) 3956 + struct task_struct *child) 4174 3957 { 3958 + struct perf_counter *parent_counter = child_counter->parent; 4175 3959 u64 child_val; 3960 + 3961 + if (child_counter->attr.inherit_stat) 3962 + perf_counter_read_event(child_counter, child); 4176 3963 4177 3964 child_val = atomic64_read(&child_counter->count); 4178 3965 ··· 4206 3985 4207 3986 static void 4208 3987 __perf_counter_exit_task(struct perf_counter *child_counter, 4209 - struct perf_counter_context *child_ctx) 3988 + struct perf_counter_context *child_ctx, 3989 + struct task_struct *child) 4210 3990 { 4211 3991 struct perf_counter *parent_counter; 4212 3992 ··· 4221 3999 * counters need to be zapped - but otherwise linger. 4222 4000 */ 4223 4001 if (parent_counter) { 4224 - sync_child_counter(child_counter, parent_counter); 4002 + sync_child_counter(child_counter, child); 4225 4003 free_counter(child_counter); 4226 4004 } 4227 4005 } ··· 4283 4061 again: 4284 4062 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, 4285 4063 list_entry) 4286 - __perf_counter_exit_task(child_counter, child_ctx); 4064 + __perf_counter_exit_task(child_counter, child_ctx, child); 4287 4065 4288 4066 /* 4289 4067 * If the last counter was a group counter, it will have appended all
+30
tools/perf/CREDITS
··· 1 + Most of the infrastructure that 'perf' uses here has been reused 2 + from the Git project, as of version: 3 + 4 + 66996ec: Sync with 1.6.2.4 5 + 6 + Here is an (incomplete!) list of main contributors to those files 7 + in util/* and elsewhere: 8 + 9 + Alex Riesen 10 + Christian Couder 11 + Dmitry Potapov 12 + Jeff King 13 + Johannes Schindelin 14 + Johannes Sixt 15 + Junio C Hamano 16 + Linus Torvalds 17 + Matthias Kestenholz 18 + Michal Ostrowski 19 + Miklos Vajna 20 + Petr Baudis 21 + Pierre Habouzit 22 + René Scharfe 23 + Samuel Tardieu 24 + Shawn O. Pearce 25 + Steffen Prohaska 26 + Steve Haslam 27 + 28 + Thanks guys! 29 + 30 + The full history of the files can be found in the upstream Git commits.
+13 -1
tools/perf/Documentation/perf-report.txt
··· 13 13 DESCRIPTION 14 14 ----------- 15 15 This command displays the performance counter profile information recorded 16 - via perf report. 16 + via perf record. 17 17 18 18 OPTIONS 19 19 ------- 20 20 -i:: 21 21 --input=:: 22 22 Input file name. (default: perf.data) 23 + -d:: 24 + --dsos=:: 25 + Only consider symbols in these dsos. CSV that understands 26 + file://filename entries. 27 + -C:: 28 + --comms=:: 29 + Only consider symbols in these comms. CSV that understands 30 + file://filename entries. 31 + -S:: 32 + --symbols=:: 33 + Only consider these symbols. CSV that understands 34 + file://filename entries. 23 35 24 36 SEE ALSO 25 37 --------
+3 -3
tools/perf/Documentation/perf-stat.txt
··· 8 8 SYNOPSIS 9 9 -------- 10 10 [verse] 11 - 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> 12 - 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] 11 + 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> 12 + 'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] 13 13 14 14 DESCRIPTION 15 15 ----------- ··· 40 40 -a:: 41 41 system-wide collection 42 42 43 - -l:: 43 + -S:: 44 44 scale counter values 45 45 46 46 EXAMPLES
+5 -1
tools/perf/Makefile
··· 290 290 291 291 LIB_H += ../../include/linux/perf_counter.h 292 292 LIB_H += perf.h 293 - LIB_H += types.h 293 + LIB_H += util/types.h 294 294 LIB_H += util/list.h 295 295 LIB_H += util/rbtree.h 296 296 LIB_H += util/levenshtein.h ··· 301 301 LIB_H += util/help.h 302 302 LIB_H += util/strbuf.h 303 303 LIB_H += util/string.h 304 + LIB_H += util/strlist.h 304 305 LIB_H += util/run-command.h 305 306 LIB_H += util/sigchain.h 306 307 LIB_H += util/symbol.h ··· 323 322 LIB_OBJS += util/quote.o 324 323 LIB_OBJS += util/strbuf.o 325 324 LIB_OBJS += util/string.o 325 + LIB_OBJS += util/strlist.o 326 326 LIB_OBJS += util/usage.o 327 327 LIB_OBJS += util/wrapper.o 328 328 LIB_OBJS += util/sigchain.o 329 329 LIB_OBJS += util/symbol.o 330 330 LIB_OBJS += util/color.o 331 331 LIB_OBJS += util/pager.o 332 + LIB_OBJS += util/header.o 333 + LIB_OBJS += util/callchain.o 332 334 333 335 BUILTIN_OBJS += builtin-annotate.o 334 336 BUILTIN_OBJS += builtin-help.o
+4 -4
tools/perf/builtin-annotate.c
··· 855 855 total_unknown = 0; 856 856 857 857 static int 858 - process_overflow_event(event_t *event, unsigned long offset, unsigned long head) 858 + process_sample_event(event_t *event, unsigned long offset, unsigned long head) 859 859 { 860 860 char level; 861 861 int show = 0; ··· 1013 1013 static int 1014 1014 process_event(event_t *event, unsigned long offset, unsigned long head) 1015 1015 { 1016 - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) 1017 - return process_overflow_event(event, offset, head); 1018 - 1019 1016 switch (event->header.type) { 1017 + case PERF_EVENT_SAMPLE: 1018 + return process_sample_event(event, offset, head); 1019 + 1020 1020 case PERF_EVENT_MMAP: 1021 1021 return process_mmap_event(event, offset, head); 1022 1022
+87 -40
tools/perf/builtin-record.c
··· 14 14 #include "util/parse-events.h" 15 15 #include "util/string.h" 16 16 17 + #include "util/header.h" 18 + 17 19 #include <unistd.h> 18 20 #include <sched.h> 19 21 ··· 41 39 static int append_file = 0; 42 40 static int call_graph = 0; 43 41 static int verbose = 0; 42 + static int inherit_stat = 0; 43 + static int no_samples = 0; 44 44 45 45 static long samples; 46 46 static struct timeval last_read; ··· 56 52 static int nr_cpu; 57 53 58 54 static int file_new = 1; 59 - static struct perf_file_header file_header; 55 + 56 + struct perf_header *header; 60 57 61 58 struct mmap_event { 62 59 struct perf_event_header header; ··· 311 306 continue; 312 307 pbf += n + 3; 313 308 if (*pbf == 'x') { /* vm_exec */ 314 - char *execname = strrchr(bf, ' '); 309 + char *execname = strchr(bf, '/'); 315 310 316 - if (execname == NULL || execname[1] != '/') 311 + if (execname == NULL) 317 312 continue; 318 313 319 - execname += 1; 320 314 size = strlen(execname); 321 315 execname[size - 1] = '\0'; /* Remove \n */ 322 316 memcpy(mmap_ev.filename, execname, size); ··· 333 329 fclose(fp); 334 330 } 335 331 336 - static void synthesize_samples(void) 332 + static void synthesize_all(void) 337 333 { 338 334 DIR *proc; 339 335 struct dirent dirent, *next; ··· 357 353 358 354 static int group_fd; 359 355 356 + static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) 357 + { 358 + struct perf_header_attr *h_attr; 359 + 360 + if (nr < header->attrs) { 361 + h_attr = header->attr[nr]; 362 + } else { 363 + h_attr = perf_header_attr__new(a); 364 + perf_header__add_attr(header, h_attr); 365 + } 366 + 367 + return h_attr; 368 + } 369 + 360 370 static void create_counter(int counter, int cpu, pid_t pid) 361 371 { 362 372 struct perf_counter_attr *attr = attrs + counter; 363 - int track = 1; 373 + struct perf_header_attr *h_attr; 374 + int track = !counter; /* only the first counter needs these */ 375 + struct { 376 + u64 count; 377 + u64 time_enabled; 378 + u64 time_running; 379 + u64 id; 380 + } read_data; 381 + 382 + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 383 + PERF_FORMAT_TOTAL_TIME_RUNNING | 384 + PERF_FORMAT_ID; 364 385 365 386 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 366 387 ··· 395 366 attr->sample_freq = freq; 396 367 } 397 368 369 + if (no_samples) 370 + attr->sample_freq = 0; 371 + 372 + if (inherit_stat) 373 + attr->inherit_stat = 1; 374 + 398 375 if (call_graph) 399 376 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 400 - 401 - if (file_new) { 402 - file_header.sample_type = attr->sample_type; 403 - } else { 404 - if (file_header.sample_type != attr->sample_type) { 405 - fprintf(stderr, "incompatible append\n"); 406 - exit(-1); 407 - } 408 - } 409 377 410 378 attr->mmap = track; 411 379 attr->comm = track; 412 380 attr->inherit = (cpu < 0) && inherit; 413 381 attr->disabled = 1; 414 - 415 - track = 0; /* only the first counter needs these */ 416 382 417 383 try_again: 418 384 fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); ··· 439 415 exit(-1); 440 416 } 441 417 418 + h_attr = get_header_attr(attr, counter); 419 + 420 + if (!file_new) { 421 + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 422 + fprintf(stderr, "incompatible append\n"); 423 + exit(-1); 424 + } 425 + } 426 + 427 + if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { 428 + perror("Unable to read perf file descriptor\n"); 429 + exit(-1); 430 + } 431 + 432 + perf_header_attr__add_id(h_attr, read_data.id); 433 + 442 434 assert(fd[nr_cpu][counter] >= 0); 443 435 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); 444 436 ··· 485 445 { 486 446 int counter; 487 447 488 - if (pid > 0) { 489 - pid_synthesize_comm_event(pid, 0); 490 - pid_synthesize_mmap_samples(pid); 491 - } 492 - 493 448 group_fd = -1; 494 449 for (counter = 0; counter < nr_counters; counter++) 495 450 create_counter(counter, cpu, pid); ··· 494 459 495 460 static void atexit_header(void) 496 461 { 497 - file_header.data_size += bytes_written; 462 + header->data_size += bytes_written; 498 463 499 - if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) 500 - perror("failed to write on file headers"); 464 + perf_header__write(header, output); 501 465 } 502 466 503 467 static int __cmd_record(int argc, const char **argv) 504 468 { 505 469 int i, counter; 506 470 struct stat st; 507 - pid_t pid; 471 + pid_t pid = 0; 508 472 int flags; 509 473 int ret; 510 474 ··· 534 500 exit(-1); 535 501 } 536 502 537 - if (!file_new) { 538 - if (read(output, &file_header, sizeof(file_header)) == -1) { 539 - perror("failed to read file headers"); 540 - exit(-1); 541 - } 542 - 543 - lseek(output, file_header.data_size, SEEK_CUR); 544 - } 503 + if (!file_new) 504 + header = perf_header__read(output); 505 + else 506 + header = perf_header__new(); 545 507 546 508 atexit(atexit_header); 547 509 548 510 if (!system_wide) { 549 - open_counters(-1, target_pid != -1 ? target_pid : getpid()); 511 + pid = target_pid; 512 + if (pid == -1) 513 + pid = getpid(); 514 + 515 + open_counters(-1, pid); 550 516 } else for (i = 0; i < nr_cpus; i++) 551 517 open_counters(i, target_pid); 518 + 519 + if (file_new) 520 + perf_header__write(header, output); 521 + 522 + if (!system_wide) { 523 + pid_synthesize_comm_event(pid, 0); 524 + pid_synthesize_mmap_samples(pid); 525 + } else 526 + synthesize_all(); 552 527 553 528 if (target_pid == -1 && argc) { 554 529 pid = fork(); ··· 582 539 } 583 540 } 584 541 585 - if (system_wide) 586 - synthesize_samples(); 587 - 588 - while (!done) { 542 + for (;;) { 589 543 int hits = samples; 590 544 591 545 for (i = 0; i < nr_cpu; i++) { ··· 590 550 mmap_read(&mmap_array[i][counter]); 591 551 } 592 552 593 - if (hits == samples) 553 + if (hits == samples) { 554 + if (done) 555 + break; 594 556 ret = poll(event_array, nr_poll, 100); 557 + } 595 558 } 596 559 597 560 /* ··· 643 600 "do call-graph (stack chain/backtrace) recording"), 644 601 OPT_BOOLEAN('v', "verbose", &verbose, 645 602 "be more verbose (show counter open errors, etc)"), 603 + OPT_BOOLEAN('s', "stat", &inherit_stat, 604 + "per thread counts"), 605 + OPT_BOOLEAN('n', "no-samples", &no_samples, 606 + "don't sample"), 646 607 OPT_END() 647 608 }; 648 609
+195 -37
tools/perf/builtin-report.c
··· 15 15 #include "util/rbtree.h" 16 16 #include "util/symbol.h" 17 17 #include "util/string.h" 18 + #include "util/callchain.h" 19 + #include "util/strlist.h" 18 20 19 21 #include "perf.h" 22 + #include "util/header.h" 20 23 21 24 #include "util/parse-options.h" 22 25 #include "util/parse-events.h" ··· 33 30 34 31 static char default_sort_order[] = "comm,dso"; 35 32 static char *sort_order = default_sort_order; 33 + static char *dso_list_str, *comm_list_str, *sym_list_str; 34 + static struct strlist *dso_list, *comm_list, *sym_list; 36 35 37 36 static int input; 38 37 static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; ··· 56 51 static regex_t parent_regex; 57 52 58 53 static int exclude_other = 1; 54 + static int callchain; 55 + 56 + static u64 sample_type; 59 57 60 58 struct ip_event { 61 59 struct perf_event_header header; 62 60 u64 ip; 63 61 u32 pid, tid; 64 62 unsigned char __more_data[]; 65 - }; 66 - 67 - struct ip_callchain { 68 - u64 nr; 69 - u64 ips[0]; 70 63 }; 71 64 72 65 struct mmap_event { ··· 100 97 u64 lost; 101 98 }; 102 99 100 + struct read_event { 101 + struct perf_event_header header; 102 + u32 pid,tid; 103 + u64 value; 104 + u64 format[3]; 105 + }; 106 + 103 107 typedef union event_union { 104 108 struct perf_event_header header; 105 109 struct ip_event ip; ··· 115 105 struct fork_event fork; 116 106 struct period_event period; 117 107 struct lost_event lost; 108 + struct read_event read; 118 109 } event_t; 119 110 120 111 static LIST_HEAD(dsos); ··· 240 229 241 230 static inline int is_anon_memory(const char *filename) 242 231 { 243 - return strcmp(filename, "//anon") == 0; 232 + return strcmp(filename, "//anon") == 0; 244 233 } 245 234 246 235 static struct map *map__new(struct mmap_event *event) ··· 411 400 412 401 list_for_each_entry_safe(pos, tmp, &self->maps, node) { 413 402 if (map__overlap(pos, map)) { 414 - list_del_init(&pos->node); 415 - /* XXX leaks dsos */ 416 - free(pos); 403 + if (verbose >= 2) { 404 + printf("overlapping maps:\n"); 405 + map__fprintf(map, stdout); 406 + map__fprintf(pos, stdout); 407 + } 408 + 409 + if (map->start <= pos->start && map->end > pos->start) 410 + pos->start = map->end; 411 + 412 + if (map->end >= pos->end && map->start < pos->end) 413 + pos->end = map->start; 414 + 415 + if (verbose >= 2) { 416 + printf("after collision:\n"); 417 + map__fprintf(pos, stdout); 418 + } 419 + 420 + if (pos->start >= pos->end) { 421 + list_del_init(&pos->node); 422 + free(pos); 423 + } 417 424 } 418 425 } 419 426 ··· 493 464 static struct rb_root hist; 494 465 495 466 struct hist_entry { 496 - struct rb_node rb_node; 467 + struct rb_node rb_node; 497 468 498 - struct thread *thread; 499 - struct map *map; 500 - struct dso *dso; 501 - struct symbol *sym; 502 - struct symbol *parent; 503 - u64 ip; 504 - char level; 469 + struct thread *thread; 470 + struct map *map; 471 + struct dso *dso; 472 + struct symbol *sym; 473 + struct symbol *parent; 474 + u64 ip; 475 + char level; 476 + struct callchain_node callchain; 477 + struct rb_root sorted_chain; 505 478 506 - u64 count; 479 + u64 count; 507 480 }; 508 481 509 482 /* ··· 776 745 } 777 746 778 747 static size_t 748 + callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) 749 + { 750 + struct callchain_list *chain; 751 + size_t ret = 0; 752 + 753 + if (!self) 754 + return 0; 755 + 756 + ret += callchain__fprintf(fp, self->parent, total_samples); 757 + 758 + 759 + list_for_each_entry(chain, &self->val, list) 760 + ret += fprintf(fp, " %p\n", (void *)chain->ip); 761 + 762 + return ret; 763 + } 764 + 765 + static size_t 766 + hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, 767 + u64 total_samples) 768 + { 769 + struct rb_node *rb_node; 770 + struct callchain_node *chain; 771 + size_t ret = 0; 772 + 773 + rb_node = rb_first(&self->sorted_chain); 774 + while (rb_node) { 775 + double percent; 776 + 777 + chain = rb_entry(rb_node, struct callchain_node, rb_node); 778 + percent = chain->hit * 100.0 / total_samples; 779 + ret += fprintf(fp, " %6.2f%%\n", percent); 780 + ret += callchain__fprintf(fp, chain, total_samples); 781 + ret += fprintf(fp, "\n"); 782 + rb_node = rb_next(rb_node); 783 + } 784 + 785 + return ret; 786 + } 787 + 788 + 789 + static size_t 779 790 hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) 780 791 { 781 792 struct sort_entry *se; ··· 857 784 858 785 ret += fprintf(fp, "\n"); 859 786 787 + if (callchain) 788 + hist_entry_callchain__fprintf(fp, self, total_samples); 789 + 860 790 return ret; 861 791 } 862 792 ··· 873 797 { 874 798 struct dso *dso = dsop ? *dsop : NULL; 875 799 struct map *map = mapp ? *mapp : NULL; 876 - uint64_t ip = *ipp; 800 + u64 ip = *ipp; 877 801 878 802 if (!thread) 879 803 return NULL; ··· 890 814 *mapp = map; 891 815 got_map: 892 816 ip = map->map_ip(map, ip); 893 - *ipp = ip; 894 817 895 818 dso = map->dso; 896 819 } else { ··· 903 828 dso = kernel_dso; 904 829 } 905 830 dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); 831 + dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); 832 + *ipp = ip; 906 833 907 834 if (dsop) 908 835 *dsop = dso; ··· 944 867 .level = level, 945 868 .count = count, 946 869 .parent = NULL, 870 + .sorted_chain = RB_ROOT 947 871 }; 948 872 int cmp; 949 873 ··· 987 909 988 910 if (!cmp) { 989 911 he->count += count; 912 + if (callchain) 913 + append_chain(&he->callchain, chain); 990 914 return 0; 991 915 } 992 916 ··· 1002 922 if (!he) 1003 923 return -ENOMEM; 1004 924 *he = entry; 925 + if (callchain) { 926 + callchain_init(&he->callchain); 927 + append_chain(&he->callchain, chain); 928 + } 1005 929 rb_link_node(&he->rb_node, parent, p); 1006 930 rb_insert_color(&he->rb_node, &hist); 1007 931 ··· 1081 997 struct rb_node **p = &output_hists.rb_node; 1082 998 struct rb_node *parent = NULL; 1083 999 struct hist_entry *iter; 1000 + 1001 + if (callchain) 1002 + sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); 1084 1003 1085 1004 while (*p != NULL) { 1086 1005 parent = *p; ··· 1202 1115 } 1203 1116 1204 1117 static int 1205 - process_overflow_event(event_t *event, unsigned long offset, unsigned long head) 1118 + process_sample_event(event_t *event, unsigned long offset, unsigned long head) 1206 1119 { 1207 1120 char level; 1208 1121 int show = 0; ··· 1214 1127 void *more_data = event->ip.__more_data; 1215 1128 struct ip_callchain *chain = NULL; 1216 1129 1217 - if (event->header.type & PERF_SAMPLE_PERIOD) { 1130 + if (sample_type & PERF_SAMPLE_PERIOD) { 1218 1131 period = *(u64 *)more_data; 1219 1132 more_data += sizeof(u64); 1220 1133 } 1221 1134 1222 - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", 1135 + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1223 1136 (void *)(offset + head), 1224 1137 (void *)(long)(event->header.size), 1225 1138 event->header.misc, ··· 1227 1140 (void *)(long)ip, 1228 1141 (long long)period); 1229 1142 1230 - if (event->header.type & PERF_SAMPLE_CALLCHAIN) { 1143 + if (sample_type & PERF_SAMPLE_CALLCHAIN) { 1231 1144 int i; 1232 1145 1233 1146 chain = (void *)more_data; ··· 1253 1166 return -1; 1254 1167 } 1255 1168 1169 + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) 1170 + return 0; 1171 + 1256 1172 if (event->header.misc & PERF_EVENT_MISC_KERNEL) { 1257 1173 show = SHOW_KERNEL; 1258 1174 level = 'k'; ··· 1277 1187 1278 1188 if (show & show_mask) { 1279 1189 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1190 + 1191 + if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1192 + return 0; 1193 + 1194 + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1195 + return 0; 1280 1196 1281 1197 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1282 1198 eprintf("problem incrementing symbol count, skipping event\n"); ··· 1424 1328 } 1425 1329 1426 1330 static int 1331 + process_read_event(event_t *event, unsigned long offset, unsigned long head) 1332 + { 1333 + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", 1334 + (void *)(offset + head), 1335 + (void *)(long)(event->header.size), 1336 + event->read.pid, 1337 + event->read.tid, 1338 + event->read.value); 1339 + 1340 + return 0; 1341 + } 1342 + 1343 + static int 1427 1344 process_event(event_t *event, unsigned long offset, unsigned long head) 1428 1345 { 1429 1346 trace_event(event); 1430 1347 1431 - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) 1432 - return process_overflow_event(event, offset, head); 1433 - 1434 1348 switch (event->header.type) { 1349 + case PERF_EVENT_SAMPLE: 1350 + return process_sample_event(event, offset, head); 1351 + 1435 1352 case PERF_EVENT_MMAP: 1436 1353 return process_mmap_event(event, offset, head); 1437 1354 ··· 1459 1350 1460 1351 case PERF_EVENT_LOST: 1461 1352 return process_lost_event(event, offset, head); 1353 + 1354 + case PERF_EVENT_READ: 1355 + return process_read_event(event, offset, head); 1462 1356 1463 1357 /* 1464 1358 * We dont process them right now but they are fine: ··· 1478 1366 return 0; 1479 1367 } 1480 1368 1481 - static struct perf_file_header file_header; 1369 + static struct perf_header *header; 1370 + 1371 + static u64 perf_header__sample_type(void) 1372 + { 1373 + u64 sample_type = 0; 1374 + int i; 1375 + 1376 + for (i = 0; i < header->attrs; i++) { 1377 + struct perf_header_attr *attr = header->attr[i]; 1378 + 1379 + if (!sample_type) 1380 + sample_type = attr->attr.sample_type; 1381 + else if (sample_type != attr->attr.sample_type) 1382 + die("non matching sample_type"); 1383 + } 1384 + 1385 + return sample_type; 1386 + } 1482 1387 1483 1388 static int __cmd_report(void) 1484 1389 { 1485 1390 int ret, rc = EXIT_FAILURE; 1486 1391 unsigned long offset = 0; 1487 - unsigned long head = sizeof(file_header); 1392 + unsigned long head, shift; 1488 1393 struct stat stat; 1489 1394 event_t *event; 1490 1395 uint32_t size; ··· 1529 1400 exit(0); 1530 1401 } 1531 1402 1532 - if (read(input, &file_header, sizeof(file_header)) == -1) { 1533 - perror("failed to read file headers"); 1534 - exit(-1); 1535 - } 1403 + header = perf_header__read(input); 1404 + head = header->data_offset; 1536 1405 1537 - if (sort__has_parent && 1538 - !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { 1406 + sample_type = perf_header__sample_type(); 1407 + 1408 + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { 1539 1409 fprintf(stderr, "selected --sort parent, but no callchain data\n"); 1540 1410 exit(-1); 1541 1411 } ··· 1554 1426 cwd = NULL; 1555 1427 cwdlen = 0; 1556 1428 } 1429 + 1430 + shift = page_size * (head / page_size); 1431 + offset += shift; 1432 + head -= shift; 1433 + 1557 1434 remap: 1558 1435 buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, 1559 1436 MAP_SHARED, input, offset); ··· 1575 1442 size = 8; 1576 1443 1577 1444 if (head + event->header.size >= page_size * mmap_window) { 1578 - unsigned long shift = page_size * (head / page_size); 1579 1445 int ret; 1446 + 1447 + shift = page_size * (head / page_size); 1580 1448 1581 1449 ret = munmap(buf, page_size * mmap_window); 1582 1450 assert(ret == 0); ··· 1616 1482 1617 1483 head += size; 1618 1484 1619 - if (offset + head >= sizeof(file_header) + file_header.data_size) 1485 + if (offset + head >= header->data_offset + header->data_size) 1620 1486 goto done; 1621 1487 1622 1488 if (offset + head < stat.st_size) ··· 1670 1536 "regex filter to identify parent, see: '--sort parent'"), 1671 1537 OPT_BOOLEAN('x', "exclude-other", &exclude_other, 1672 1538 "Only display entries with parent-match"), 1539 + OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), 1540 + OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", 1541 + "only consider symbols in these dsos"), 1542 + OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", 1543 + "only consider symbols in these comms"), 1544 + OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", 1545 + "only consider these symbols"), 1673 1546 OPT_END() 1674 1547 }; 1675 1548 ··· 1693 1552 } 1694 1553 1695 1554 free(str); 1555 + } 1556 + 1557 + static void setup_list(struct strlist **list, const char *list_str, 1558 + const char *list_name) 1559 + { 1560 + if (list_str) { 1561 + *list = strlist__new(true, list_str); 1562 + if (!*list) { 1563 + fprintf(stderr, "problems parsing %s list\n", 1564 + list_name); 1565 + exit(129); 1566 + } 1567 + } 1696 1568 } 1697 1569 1698 1570 int cmd_report(int argc, const char **argv, const char *prefix) ··· 1728 1574 */ 1729 1575 if (argc) 1730 1576 usage_with_options(report_usage, options); 1577 + 1578 + setup_list(&dso_list, dso_list_str, "dso"); 1579 + setup_list(&comm_list, comm_list_str, "comm"); 1580 + setup_list(&sym_list, sym_list_str, "symbol"); 1731 1581 1732 1582 setup_pager(); 1733 1583
+103 -70
tools/perf/builtin-stat.c
··· 32 32 * Wu Fengguang <fengguang.wu@intel.com> 33 33 * Mike Galbraith <efault@gmx.de> 34 34 * Paul Mackerras <paulus@samba.org> 35 + * Jaswinder Singh Rajput <jaswinder@kernel.org> 35 36 * 36 37 * Released under the GPL v2. (and only v2, not any later version) 37 38 */ ··· 46 45 #include <sys/prctl.h> 47 46 #include <math.h> 48 47 49 - static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { 48 + static struct perf_counter_attr default_attrs[] = { 50 49 51 50 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 52 51 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, ··· 60 59 61 60 }; 62 61 62 + #define MAX_RUN 100 63 + 63 64 static int system_wide = 0; 64 - static int inherit = 1; 65 65 static int verbose = 0; 66 + static int nr_cpus = 0; 67 + static int run_idx = 0; 68 + 69 + static int run_count = 1; 70 + static int inherit = 1; 71 + static int scale = 1; 72 + static int target_pid = -1; 73 + static int null_run = 0; 66 74 67 75 static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 68 - 69 - static int target_pid = -1; 70 - static int nr_cpus = 0; 71 - static unsigned int page_size; 72 - 73 - static int scale = 1; 74 - 75 - static const unsigned int default_count[] = { 76 - 1000000, 77 - 1000000, 78 - 10000, 79 - 10000, 80 - 1000000, 81 - 10000, 82 - }; 83 - 84 - #define MAX_RUN 100 85 - 86 - static int run_count = 1; 87 - static int run_idx = 0; 88 - 89 - static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; 90 - static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; 91 - 92 - //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; 93 - 94 76 95 77 static u64 runtime_nsecs[MAX_RUN]; 96 78 static u64 walltime_nsecs[MAX_RUN]; 97 79 static u64 runtime_cycles[MAX_RUN]; 80 + 81 + static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; 82 + static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; 98 83 99 84 static u64 event_res_avg[MAX_COUNTERS][3]; 100 85 static u64 event_res_noise[MAX_COUNTERS][3]; ··· 96 109 static u64 runtime_cycles_avg; 97 110 static u64 runtime_cycles_noise; 98 111 99 - static void create_perf_stat_counter(int counter) 112 + #define ERR_PERF_OPEN \ 113 + "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" 114 + 115 + static void create_perf_stat_counter(int counter, int pid) 100 116 { 101 117 struct perf_counter_attr *attr = attrs + counter; 102 118 ··· 109 119 110 120 if (system_wide) { 111 121 int cpu; 112 - for (cpu = 0; cpu < nr_cpus; cpu ++) { 122 + for (cpu = 0; cpu < nr_cpus; cpu++) { 113 123 fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); 114 - if (fd[cpu][counter] < 0 && verbose) { 115 - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); 116 - } 124 + if (fd[cpu][counter] < 0 && verbose) 125 + fprintf(stderr, ERR_PERF_OPEN, counter, 126 + fd[cpu][counter], strerror(errno)); 117 127 } 118 128 } else { 119 - attr->inherit = inherit; 120 - attr->disabled = 1; 129 + attr->inherit = inherit; 130 + attr->disabled = 1; 131 + attr->enable_on_exec = 1; 121 132 122 - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); 123 - if (fd[0][counter] < 0 && verbose) { 124 - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); 125 - } 133 + fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); 134 + if (fd[0][counter] < 0 && verbose) 135 + fprintf(stderr, ERR_PERF_OPEN, counter, 136 + fd[0][counter], strerror(errno)); 126 137 } 127 138 } 128 139 ··· 159 168 count[0] = count[1] = count[2] = 0; 160 169 161 170 nv = scale ? 3 : 1; 162 - for (cpu = 0; cpu < nr_cpus; cpu ++) { 171 + for (cpu = 0; cpu < nr_cpus; cpu++) { 163 172 if (fd[cpu][counter] < 0) 164 173 continue; 165 174 ··· 206 215 int status = 0; 207 216 int counter; 208 217 int pid; 218 + int child_ready_pipe[2], go_pipe[2]; 219 + char buf; 209 220 210 221 if (!system_wide) 211 222 nr_cpus = 1; 212 223 213 - for (counter = 0; counter < nr_counters; counter++) 214 - create_perf_stat_counter(counter); 215 - 216 - /* 217 - * Enable counters and exec the command: 218 - */ 219 - t0 = rdclock(); 220 - prctl(PR_TASK_PERF_COUNTERS_ENABLE); 224 + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { 225 + perror("failed to create pipes"); 226 + exit(1); 227 + } 221 228 222 229 if ((pid = fork()) < 0) 223 230 perror("failed to fork"); 224 231 225 232 if (!pid) { 226 - if (execvp(argv[0], (char **)argv)) { 227 - perror(argv[0]); 228 - exit(-1); 229 - } 233 + close(child_ready_pipe[0]); 234 + close(go_pipe[1]); 235 + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 236 + 237 + /* 238 + * Do a dummy execvp to get the PLT entry resolved, 239 + * so we avoid the resolver overhead on the real 240 + * execvp call. 241 + */ 242 + execvp("", (char **)argv); 243 + 244 + /* 245 + * Tell the parent we're ready to go 246 + */ 247 + close(child_ready_pipe[1]); 248 + 249 + /* 250 + * Wait until the parent tells us to go. 251 + */ 252 + read(go_pipe[0], &buf, 1); 253 + 254 + execvp(argv[0], (char **)argv); 255 + 256 + perror(argv[0]); 257 + exit(-1); 230 258 } 231 259 260 + /* 261 + * Wait for the child to be ready to exec. 262 + */ 263 + close(child_ready_pipe[1]); 264 + close(go_pipe[0]); 265 + read(child_ready_pipe[0], &buf, 1); 266 + close(child_ready_pipe[0]); 267 + 268 + for (counter = 0; counter < nr_counters; counter++) 269 + create_perf_stat_counter(counter, pid); 270 + 271 + /* 272 + * Enable counters and exec the command: 273 + */ 274 + t0 = rdclock(); 275 + 276 + close(go_pipe[1]); 232 277 wait(&status); 233 278 234 - prctl(PR_TASK_PERF_COUNTERS_DISABLE); 235 279 t1 = rdclock(); 236 280 237 281 walltime_nsecs[run_idx] = t1 - t0; ··· 288 262 { 289 263 double msecs = (double)count[0] / 1000000; 290 264 291 - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); 265 + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); 292 266 293 267 if (attrs[counter].type == PERF_TYPE_SOFTWARE && 294 268 attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { ··· 302 276 303 277 static void abs_printout(int counter, u64 *count, u64 *noise) 304 278 { 305 - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); 279 + fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); 306 280 307 281 if (runtime_cycles_avg && 308 282 attrs[counter].type == PERF_TYPE_HARDWARE && ··· 332 306 scaled = event_scaled_avg[counter]; 333 307 334 308 if (scaled == -1) { 335 - fprintf(stderr, " %14s %-20s\n", 309 + fprintf(stderr, " %14s %-24s\n", 336 310 "<not counted>", event_name(counter)); 337 311 return; 338 312 } ··· 390 364 event_res_avg[j]+1, event_res[i][j]+1); 391 365 update_avg("counter/2", j, 392 366 event_res_avg[j]+2, event_res[i][j]+2); 393 - update_avg("scaled", j, 394 - event_scaled_avg + j, event_scaled[i]+j); 367 + if (event_scaled[i][j] != -1) 368 + update_avg("scaled", j, 369 + event_scaled_avg + j, event_scaled[i]+j); 370 + else 371 + event_scaled_avg[j] = -1; 395 372 } 396 373 } 397 374 runtime_nsecs_avg /= run_count; ··· 458 429 for (counter = 0; counter < nr_counters; counter++) 459 430 print_counter(counter); 460 431 461 - 462 432 fprintf(stderr, "\n"); 463 - fprintf(stderr, " %14.9f seconds time elapsed.\n", 433 + fprintf(stderr, " %14.9f seconds time elapsed", 464 434 (double)walltime_nsecs_avg/1e9); 465 - fprintf(stderr, "\n"); 435 + if (run_count > 1) { 436 + fprintf(stderr, " ( +- %7.3f%% )", 437 + 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); 438 + } 439 + fprintf(stderr, "\n\n"); 466 440 } 467 441 468 442 static volatile int signr = -1; ··· 498 466 OPT_INTEGER('p', "pid", &target_pid, 499 467 "stat events on existing pid"), 500 468 OPT_BOOLEAN('a', "all-cpus", &system_wide, 501 - "system-wide collection from all CPUs"), 469 + "system-wide collection from all CPUs"), 502 470 OPT_BOOLEAN('S', "scale", &scale, 503 - "scale/normalize counters"), 471 + "scale/normalize counters"), 504 472 OPT_BOOLEAN('v', "verbose", &verbose, 505 473 "be more verbose (show counter open errors, etc)"), 506 474 OPT_INTEGER('r', "repeat", &run_count, 507 475 "repeat command and print average + stddev (max: 100)"), 476 + OPT_BOOLEAN('n', "null", &null_run, 477 + "null run - dont start any counters"), 508 478 OPT_END() 509 479 }; 510 480 ··· 514 480 { 515 481 int status; 516 482 517 - page_size = sysconf(_SC_PAGE_SIZE); 518 - 519 - memcpy(attrs, default_attrs, sizeof(attrs)); 520 - 521 483 argc = parse_options(argc, argv, options, stat_usage, 0); 522 484 if (!argc) 523 485 usage_with_options(stat_usage, options); 524 486 if (run_count <= 0 || run_count > MAX_RUN) 525 487 usage_with_options(stat_usage, options); 526 488 527 - if (!nr_counters) 528 - nr_counters = 8; 489 + /* Set attrs and nr_counters if no event is selected and !null_run */ 490 + if (!null_run && !nr_counters) { 491 + memcpy(attrs, default_attrs, sizeof(default_attrs)); 492 + nr_counters = ARRAY_SIZE(default_attrs); 493 + } 529 494 530 495 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 531 496 assert(nr_cpus <= MAX_NR_CPUS); ··· 544 511 status = 0; 545 512 for (run_idx = 0; run_idx < run_count; run_idx++) { 546 513 if (run_count != 1 && verbose) 547 - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); 514 + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); 548 515 status = run_perf_stat(argc, argv); 549 516 } 550 517
+6 -5
tools/perf/builtin-top.c
··· 392 392 samples--; 393 393 } 394 394 395 - static void process_event(u64 ip, int counter) 395 + static void process_event(u64 ip, int counter, int user) 396 396 { 397 397 samples++; 398 398 399 - if (ip < min_ip || ip > max_ip) { 399 + if (user) { 400 400 userspace_samples++; 401 401 return; 402 402 } ··· 509 509 510 510 old += size; 511 511 512 - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { 513 - if (event->header.type & PERF_SAMPLE_IP) 514 - process_event(event->ip.ip, md->counter); 512 + if (event->header.type == PERF_EVENT_SAMPLE) { 513 + int user = 514 + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; 515 + process_event(event->ip.ip, md->counter, user); 515 516 } 516 517 } 517 518
+4 -5
tools/perf/perf.h
··· 25 25 #include <sys/syscall.h> 26 26 27 27 #include "../../include/linux/perf_counter.h" 28 - #include "types.h" 28 + #include "util/types.h" 29 29 30 30 /* 31 31 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all ··· 72 72 #define MAX_COUNTERS 256 73 73 #define MAX_NR_CPUS 256 74 74 75 - struct perf_file_header { 76 - u64 version; 77 - u64 sample_type; 78 - u64 data_size; 75 + struct ip_callchain { 76 + u64 nr; 77 + u64 ips[0]; 79 78 }; 80 79 81 80 #endif
tools/perf/types.h tools/perf/util/types.h
+174
tools/perf/util/callchain.c
··· 1 + /* 2 + * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 3 + * 4 + * Handle the callchains from the stream in an ad-hoc radix tree and then 5 + * sort them in an rbtree. 6 + * 7 + */ 8 + 9 + #include <stdlib.h> 10 + #include <stdio.h> 11 + #include <stdbool.h> 12 + #include <errno.h> 13 + 14 + #include "callchain.h" 15 + 16 + 17 + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) 18 + { 19 + struct rb_node **p = &root->rb_node; 20 + struct rb_node *parent = NULL; 21 + struct callchain_node *rnode; 22 + 23 + while (*p) { 24 + parent = *p; 25 + rnode = rb_entry(parent, struct callchain_node, rb_node); 26 + 27 + if (rnode->hit < chain->hit) 28 + p = &(*p)->rb_left; 29 + else 30 + p = &(*p)->rb_right; 31 + } 32 + 33 + rb_link_node(&chain->rb_node, parent, p); 34 + rb_insert_color(&chain->rb_node, root); 35 + } 36 + 37 + /* 38 + * Once we get every callchains from the stream, we can now 39 + * sort them by hit 40 + */ 41 + void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) 42 + { 43 + struct callchain_node *child; 44 + 45 + list_for_each_entry(child, &node->children, brothers) 46 + sort_chain_to_rbtree(rb_root, child); 47 + 48 + if (node->hit) 49 + rb_insert_callchain(rb_root, node); 50 + } 51 + 52 + static struct callchain_node *create_child(struct callchain_node *parent) 53 + { 54 + struct callchain_node *new; 55 + 56 + new = malloc(sizeof(*new)); 57 + if (!new) { 58 + perror("not enough memory to create child for code path tree"); 59 + return NULL; 60 + } 61 + new->parent = parent; 62 + INIT_LIST_HEAD(&new->children); 63 + INIT_LIST_HEAD(&new->val); 64 + list_add_tail(&new->brothers, &parent->children); 65 + 66 + return new; 67 + } 68 + 69 + static void 70 + fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) 71 + { 72 + int i; 73 + 74 + for (i = start; i < chain->nr; i++) { 75 + struct callchain_list *call; 76 + 77 + call = malloc(sizeof(*chain)); 78 + if (!call) { 79 + perror("not enough memory for the code path tree"); 80 + return; 81 + } 82 + call->ip = chain->ips[i]; 83 + list_add_tail(&call->list, &node->val); 84 + } 85 + node->val_nr = i - start; 86 + } 87 + 88 + static void add_child(struct callchain_node *parent, struct ip_callchain *chain) 89 + { 90 + struct callchain_node *new; 91 + 92 + new = create_child(parent); 93 + fill_node(new, chain, parent->val_nr); 94 + 95 + new->hit = 1; 96 + } 97 + 98 + static void 99 + split_add_child(struct callchain_node *parent, struct ip_callchain *chain, 100 + struct callchain_list *to_split, int idx) 101 + { 102 + struct callchain_node *new; 103 + 104 + /* split */ 105 + new = create_child(parent); 106 + list_move_tail(&to_split->list, &new->val); 107 + new->hit = parent->hit; 108 + parent->hit = 0; 109 + parent->val_nr = idx; 110 + 111 + /* create the new one */ 112 + add_child(parent, chain); 113 + } 114 + 115 + static int 116 + __append_chain(struct callchain_node *root, struct ip_callchain *chain, 117 + int start); 118 + 119 + static int 120 + __append_chain_children(struct callchain_node *root, struct ip_callchain *chain) 121 + { 122 + struct callchain_node *rnode; 123 + 124 + /* lookup in childrens */ 125 + list_for_each_entry(rnode, &root->children, brothers) { 126 + int ret = __append_chain(rnode, chain, root->val_nr); 127 + if (!ret) 128 + return 0; 129 + } 130 + return -1; 131 + } 132 + 133 + static int 134 + __append_chain(struct callchain_node *root, struct ip_callchain *chain, 135 + int start) 136 + { 137 + struct callchain_list *cnode; 138 + int i = start; 139 + bool found = false; 140 + 141 + /* lookup in the current node */ 142 + list_for_each_entry(cnode, &root->val, list) { 143 + if (cnode->ip != chain->ips[i++]) 144 + break; 145 + if (!found) 146 + found = true; 147 + if (i == chain->nr) 148 + break; 149 + } 150 + 151 + /* matches not, relay on the parent */ 152 + if (!found) 153 + return -1; 154 + 155 + /* we match only a part of the node. Split it and add the new chain */ 156 + if (i < root->val_nr) { 157 + split_add_child(root, chain, cnode, i); 158 + return 0; 159 + } 160 + 161 + /* we match 100% of the path, increment the hit */ 162 + if (i == root->val_nr) { 163 + root->hit++; 164 + return 0; 165 + } 166 + 167 + return __append_chain_children(root, chain); 168 + } 169 + 170 + void append_chain(struct callchain_node *root, struct ip_callchain *chain) 171 + { 172 + if (__append_chain_children(root, chain) == -1) 173 + add_child(root, chain); 174 + }
+33
tools/perf/util/callchain.h
··· 1 + #ifndef __PERF_CALLCHAIN_H 2 + #define __PERF_CALLCHAIN_H 3 + 4 + #include "../perf.h" 5 + #include "list.h" 6 + #include "rbtree.h" 7 + 8 + 9 + struct callchain_node { 10 + struct callchain_node *parent; 11 + struct list_head brothers; 12 + struct list_head children; 13 + struct list_head val; 14 + struct rb_node rb_node; 15 + int val_nr; 16 + int hit; 17 + }; 18 + 19 + struct callchain_list { 20 + unsigned long ip; 21 + struct list_head list; 22 + }; 23 + 24 + static inline void callchain_init(struct callchain_node *node) 25 + { 26 + INIT_LIST_HEAD(&node->brothers); 27 + INIT_LIST_HEAD(&node->children); 28 + INIT_LIST_HEAD(&node->val); 29 + } 30 + 31 + void append_chain(struct callchain_node *root, struct ip_callchain *chain); 32 + void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); 33 + #endif
+242
tools/perf/util/header.c
··· 1 + #include <sys/types.h> 2 + #include <unistd.h> 3 + #include <stdio.h> 4 + #include <stdlib.h> 5 + 6 + #include "util.h" 7 + #include "header.h" 8 + 9 + /* 10 + * 11 + */ 12 + 13 + struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) 14 + { 15 + struct perf_header_attr *self = malloc(sizeof(*self)); 16 + 17 + if (!self) 18 + die("nomem"); 19 + 20 + self->attr = *attr; 21 + self->ids = 0; 22 + self->size = 1; 23 + self->id = malloc(sizeof(u64)); 24 + 25 + if (!self->id) 26 + die("nomem"); 27 + 28 + return self; 29 + } 30 + 31 + void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) 32 + { 33 + int pos = self->ids; 34 + 35 + self->ids++; 36 + if (self->ids > self->size) { 37 + self->size *= 2; 38 + self->id = realloc(self->id, self->size * sizeof(u64)); 39 + if (!self->id) 40 + die("nomem"); 41 + } 42 + self->id[pos] = id; 43 + } 44 + 45 + /* 46 + * 47 + */ 48 + 49 + struct perf_header *perf_header__new(void) 50 + { 51 + struct perf_header *self = malloc(sizeof(*self)); 52 + 53 + if (!self) 54 + die("nomem"); 55 + 56 + self->frozen = 0; 57 + 58 + self->attrs = 0; 59 + self->size = 1; 60 + self->attr = malloc(sizeof(void *)); 61 + 62 + if (!self->attr) 63 + die("nomem"); 64 + 65 + self->data_offset = 0; 66 + self->data_size = 0; 67 + 68 + return self; 69 + } 70 + 71 + void perf_header__add_attr(struct perf_header *self, 72 + struct perf_header_attr *attr) 73 + { 74 + int pos = self->attrs; 75 + 76 + if (self->frozen) 77 + die("frozen"); 78 + 79 + self->attrs++; 80 + if (self->attrs > self->size) { 81 + self->size *= 2; 82 + self->attr = realloc(self->attr, self->size * sizeof(void *)); 83 + if (!self->attr) 84 + die("nomem"); 85 + } 86 + self->attr[pos] = attr; 87 + } 88 + 89 + static const char *__perf_magic = "PERFFILE"; 90 + 91 + #define PERF_MAGIC (*(u64 *)__perf_magic) 92 + 93 + struct perf_file_section { 94 + u64 offset; 95 + u64 size; 96 + }; 97 + 98 + struct perf_file_attr { 99 + struct perf_counter_attr attr; 100 + struct perf_file_section ids; 101 + }; 102 + 103 + struct perf_file_header { 104 + u64 magic; 105 + u64 size; 106 + u64 attr_size; 107 + struct perf_file_section attrs; 108 + struct perf_file_section data; 109 + }; 110 + 111 + static void do_write(int fd, void *buf, size_t size) 112 + { 113 + while (size) { 114 + int ret = write(fd, buf, size); 115 + 116 + if (ret < 0) 117 + die("failed to write"); 118 + 119 + size -= ret; 120 + buf += ret; 121 + } 122 + } 123 + 124 + void perf_header__write(struct perf_header *self, int fd) 125 + { 126 + struct perf_file_header f_header; 127 + struct perf_file_attr f_attr; 128 + struct perf_header_attr *attr; 129 + int i; 130 + 131 + lseek(fd, sizeof(f_header), SEEK_SET); 132 + 133 + 134 + for (i = 0; i < self->attrs; i++) { 135 + attr = self->attr[i]; 136 + 137 + attr->id_offset = lseek(fd, 0, SEEK_CUR); 138 + do_write(fd, attr->id, attr->ids * sizeof(u64)); 139 + } 140 + 141 + 142 + self->attr_offset = lseek(fd, 0, SEEK_CUR); 143 + 144 + for (i = 0; i < self->attrs; i++) { 145 + attr = self->attr[i]; 146 + 147 + f_attr = (struct perf_file_attr){ 148 + .attr = attr->attr, 149 + .ids = { 150 + .offset = attr->id_offset, 151 + .size = attr->ids * sizeof(u64), 152 + } 153 + }; 154 + do_write(fd, &f_attr, sizeof(f_attr)); 155 + } 156 + 157 + 158 + self->data_offset = lseek(fd, 0, SEEK_CUR); 159 + 160 + f_header = (struct perf_file_header){ 161 + .magic = PERF_MAGIC, 162 + .size = sizeof(f_header), 163 + .attr_size = sizeof(f_attr), 164 + .attrs = { 165 + .offset = self->attr_offset, 166 + .size = self->attrs * sizeof(f_attr), 167 + }, 168 + .data = { 169 + .offset = self->data_offset, 170 + .size = self->data_size, 171 + }, 172 + }; 173 + 174 + lseek(fd, 0, SEEK_SET); 175 + do_write(fd, &f_header, sizeof(f_header)); 176 + lseek(fd, self->data_offset + self->data_size, SEEK_SET); 177 + 178 + self->frozen = 1; 179 + } 180 + 181 + static void do_read(int fd, void *buf, size_t size) 182 + { 183 + while (size) { 184 + int ret = read(fd, buf, size); 185 + 186 + if (ret < 0) 187 + die("failed to read"); 188 + 189 + size -= ret; 190 + buf += ret; 191 + } 192 + } 193 + 194 + struct perf_header *perf_header__read(int fd) 195 + { 196 + struct perf_header *self = perf_header__new(); 197 + struct perf_file_header f_header; 198 + struct perf_file_attr f_attr; 199 + u64 f_id; 200 + 201 + int nr_attrs, nr_ids, i, j; 202 + 203 + lseek(fd, 0, SEEK_SET); 204 + do_read(fd, &f_header, sizeof(f_header)); 205 + 206 + if (f_header.magic != PERF_MAGIC || 207 + f_header.size != sizeof(f_header) || 208 + f_header.attr_size != sizeof(f_attr)) 209 + die("incompatible file format"); 210 + 211 + nr_attrs = f_header.attrs.size / sizeof(f_attr); 212 + lseek(fd, f_header.attrs.offset, SEEK_SET); 213 + 214 + for (i = 0; i < nr_attrs; i++) { 215 + struct perf_header_attr *attr; 216 + off_t tmp = lseek(fd, 0, SEEK_CUR); 217 + 218 + do_read(fd, &f_attr, sizeof(f_attr)); 219 + 220 + attr = perf_header_attr__new(&f_attr.attr); 221 + 222 + nr_ids = f_attr.ids.size / sizeof(u64); 223 + lseek(fd, f_attr.ids.offset, SEEK_SET); 224 + 225 + for (j = 0; j < nr_ids; j++) { 226 + do_read(fd, &f_id, sizeof(f_id)); 227 + 228 + perf_header_attr__add_id(attr, f_id); 229 + } 230 + perf_header__add_attr(self, attr); 231 + lseek(fd, tmp, SEEK_SET); 232 + } 233 + 234 + self->data_offset = f_header.data.offset; 235 + self->data_size = f_header.data.size; 236 + 237 + lseek(fd, self->data_offset + self->data_size, SEEK_SET); 238 + 239 + self->frozen = 1; 240 + 241 + return self; 242 + }
+37
tools/perf/util/header.h
··· 1 + #ifndef _PERF_HEADER_H 2 + #define _PERF_HEADER_H 3 + 4 + #include "../../../include/linux/perf_counter.h" 5 + #include <sys/types.h> 6 + #include "types.h" 7 + 8 + struct perf_header_attr { 9 + struct perf_counter_attr attr; 10 + int ids, size; 11 + u64 *id; 12 + off_t id_offset; 13 + }; 14 + 15 + struct perf_header { 16 + int frozen; 17 + int attrs, size; 18 + struct perf_header_attr **attr; 19 + off_t attr_offset; 20 + u64 data_offset; 21 + u64 data_size; 22 + }; 23 + 24 + struct perf_header *perf_header__read(int fd); 25 + void perf_header__write(struct perf_header *self, int fd); 26 + 27 + void perf_header__add_attr(struct perf_header *self, 28 + struct perf_header_attr *attr); 29 + 30 + struct perf_header_attr * 31 + perf_header_attr__new(struct perf_counter_attr *attr); 32 + void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 33 + 34 + 35 + struct perf_header *perf_header__new(void); 36 + 37 + #endif /* _PERF_HEADER_H */
-15
tools/perf/util/help.c
··· 126 126 !S_ISREG(st.st_mode)) 127 127 return 0; 128 128 129 - #ifdef __MINGW32__ 130 - /* cannot trust the executable bit, peek into the file instead */ 131 - char buf[3] = { 0 }; 132 - int n; 133 - int fd = open(name, O_RDONLY); 134 - st.st_mode &= ~S_IXUSR; 135 - if (fd >= 0) { 136 - n = read(fd, buf, 2); 137 - if (n == 2) 138 - /* DOS executables start with "MZ" */ 139 - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) 140 - st.st_mode |= S_IXUSR; 141 - close(fd); 142 - } 143 - #endif 144 129 return st.st_mode & S_IXUSR; 145 130 } 146 131
+1 -4
tools/perf/util/pager.c
··· 9 9 10 10 static int spawned_pager; 11 11 12 - #ifndef __MINGW32__ 13 12 static void pager_preexec(void) 14 13 { 15 14 /* ··· 23 24 24 25 setenv("LESS", "FRSX", 0); 25 26 } 26 - #endif 27 27 28 28 static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; 29 29 static struct child_process pager_process; ··· 68 70 pager_argv[2] = pager; 69 71 pager_process.argv = pager_argv; 70 72 pager_process.in = -1; 71 - #ifndef __MINGW32__ 72 73 pager_process.preexec_cb = pager_preexec; 73 - #endif 74 + 74 75 if (start_command(&pager_process)) 75 76 return; 76 77
+104 -47
tools/perf/util/parse-events.c
··· 16 16 u8 type; 17 17 u64 config; 18 18 char *symbol; 19 + char *alias; 19 20 }; 20 21 21 - #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y 22 - #define CR(x, y) .type = PERF_TYPE_##x, .config = y 22 + #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x 23 + #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x 23 24 24 25 static struct event_symbol event_symbols[] = { 25 - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, 26 - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, 27 - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, 28 - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, 29 - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, 30 - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, 31 - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, 32 - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, 33 - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, 26 + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, 27 + { CHW(INSTRUCTIONS), "instructions", "" }, 28 + { CHW(CACHE_REFERENCES), "cache-references", "" }, 29 + { CHW(CACHE_MISSES), "cache-misses", "" }, 30 + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, 31 + { CHW(BRANCH_MISSES), "branch-misses", "" }, 32 + { CHW(BUS_CYCLES), "bus-cycles", "" }, 34 33 35 - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, 36 - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, 37 - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, 38 - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, 39 - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, 40 - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, 41 - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, 42 - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, 43 - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, 44 - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, 34 + { CSW(CPU_CLOCK), "cpu-clock", "" }, 35 + { CSW(TASK_CLOCK), "task-clock", "" }, 36 + { CSW(PAGE_FAULTS), "page-faults", "faults" }, 37 + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, 38 + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, 39 + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, 40 + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, 45 41 }; 46 42 47 43 #define __PERF_COUNTER_FIELD(config, name) \ ··· 70 74 71 75 #define MAX_ALIASES 8 72 76 73 - static char *hw_cache [][MAX_ALIASES] = { 74 - { "L1-data" , "l1-d", "l1d" }, 75 - { "L1-instruction" , "l1-i", "l1i" }, 76 - { "L2" , "l2" }, 77 - { "Data-TLB" , "dtlb", "d-tlb" }, 78 - { "Instruction-TLB" , "itlb", "i-tlb" }, 79 - { "Branch" , "bpu" , "btb", "bpc" }, 77 + static char *hw_cache[][MAX_ALIASES] = { 78 + { "L1-d$", "l1-d", "l1d", "L1-data", }, 79 + { "L1-i$", "l1-i", "l1i", "L1-instruction", }, 80 + { "LLC", "L2" }, 81 + { "dTLB", "d-tlb", "Data-TLB", }, 82 + { "iTLB", "i-tlb", "Instruction-TLB", }, 83 + { "branch", "branches", "bpu", "btb", "bpc", }, 80 84 }; 81 85 82 - static char *hw_cache_op [][MAX_ALIASES] = { 83 - { "Load" , "read" }, 84 - { "Store" , "write" }, 85 - { "Prefetch" , "speculative-read", "speculative-load" }, 86 + static char *hw_cache_op[][MAX_ALIASES] = { 87 + { "load", "loads", "read", }, 88 + { "store", "stores", "write", }, 89 + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, 86 90 }; 87 91 88 - static char *hw_cache_result [][MAX_ALIASES] = { 89 - { "Reference" , "ops", "access" }, 90 - { "Miss" }, 92 + static char *hw_cache_result[][MAX_ALIASES] = { 93 + { "refs", "Reference", "ops", "access", }, 94 + { "misses", "miss", }, 91 95 }; 96 + 97 + #define C(x) PERF_COUNT_HW_CACHE_##x 98 + #define CACHE_READ (1 << C(OP_READ)) 99 + #define CACHE_WRITE (1 << C(OP_WRITE)) 100 + #define CACHE_PREFETCH (1 << C(OP_PREFETCH)) 101 + #define COP(x) (1 << x) 102 + 103 + /* 104 + * cache operartion stat 105 + * L1I : Read and prefetch only 106 + * ITLB and BPU : Read-only 107 + */ 108 + static unsigned long hw_cache_stat[C(MAX)] = { 109 + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 110 + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), 111 + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 112 + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 113 + [C(ITLB)] = (CACHE_READ), 114 + [C(BPU)] = (CACHE_READ), 115 + }; 116 + 117 + static int is_cache_op_valid(u8 cache_type, u8 cache_op) 118 + { 119 + if (hw_cache_stat[cache_type] & COP(cache_op)) 120 + return 1; /* valid */ 121 + else 122 + return 0; /* invalid */ 123 + } 124 + 125 + static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) 126 + { 127 + static char name[50]; 128 + 129 + if (cache_result) { 130 + sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], 131 + hw_cache_op[cache_op][0], 132 + hw_cache_result[cache_result][0]); 133 + } else { 134 + sprintf(name, "%s-%s", hw_cache[cache_type][0], 135 + hw_cache_op[cache_op][1]); 136 + } 137 + 138 + return name; 139 + } 92 140 93 141 char *event_name(int counter) 94 142 { ··· 153 113 154 114 case PERF_TYPE_HW_CACHE: { 155 115 u8 cache_type, cache_op, cache_result; 156 - static char name[100]; 157 116 158 117 cache_type = (config >> 0) & 0xff; 159 118 if (cache_type > PERF_COUNT_HW_CACHE_MAX) ··· 166 127 if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) 167 128 return "unknown-ext-hardware-cache-result"; 168 129 169 - sprintf(name, "%s-Cache-%s-%ses", 170 - hw_cache[cache_type][0], 171 - hw_cache_op[cache_op][0], 172 - hw_cache_result[cache_result][0]); 130 + if (!is_cache_op_valid(cache_type, cache_op)) 131 + return "invalid-cache"; 173 132 174 - return name; 133 + return event_cache_name(cache_type, cache_op, cache_result); 175 134 } 176 135 177 136 case PERF_TYPE_SOFTWARE: ··· 200 163 return -1; 201 164 } 202 165 203 - static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) 166 + static int 167 + parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) 204 168 { 205 169 int cache_type = -1, cache_op = 0, cache_result = 0; 206 170 ··· 220 182 if (cache_op == -1) 221 183 cache_op = PERF_COUNT_HW_CACHE_OP_READ; 222 184 185 + if (!is_cache_op_valid(cache_type, cache_op)) 186 + return -EINVAL; 187 + 223 188 cache_result = parse_aliases(str, hw_cache_result, 224 189 PERF_COUNT_HW_CACHE_RESULT_MAX); 225 190 /* ··· 234 193 attr->config = cache_type | (cache_op << 8) | (cache_result << 16); 235 194 attr->type = PERF_TYPE_HW_CACHE; 236 195 196 + return 0; 197 + } 198 + 199 + static int check_events(const char *str, unsigned int i) 200 + { 201 + if (!strncmp(str, event_symbols[i].symbol, 202 + strlen(event_symbols[i].symbol))) 203 + return 1; 204 + 205 + if (strlen(event_symbols[i].alias)) 206 + if (!strncmp(str, event_symbols[i].alias, 207 + strlen(event_symbols[i].alias))) 208 + return 1; 237 209 return 0; 238 210 } 239 211 ··· 289 235 } 290 236 291 237 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { 292 - if (!strncmp(str, event_symbols[i].symbol, 293 - strlen(event_symbols[i].symbol))) { 294 - 238 + if (check_events(str, i)) { 295 239 attr->type = event_symbols[i].type; 296 240 attr->config = event_symbols[i].config; 297 241 ··· 341 289 { 342 290 struct event_symbol *syms = event_symbols; 343 291 unsigned int i, type, prev_type = -1; 292 + char name[40]; 344 293 345 294 fprintf(stderr, "\n"); 346 295 fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); ··· 354 301 if (type != prev_type) 355 302 fprintf(stderr, "\n"); 356 303 357 - fprintf(stderr, " %-30s [%s]\n", syms->symbol, 304 + if (strlen(syms->alias)) 305 + sprintf(name, "%s OR %s", syms->symbol, syms->alias); 306 + else 307 + strcpy(name, syms->symbol); 308 + fprintf(stderr, " %-40s [%s]\n", name, 358 309 event_type_descriptors[type]); 359 310 360 311 prev_type = type; 361 312 } 362 313 363 314 fprintf(stderr, "\n"); 364 - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", 315 + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", 365 316 "rNNN"); 366 317 fprintf(stderr, "\n"); 367 318
+2 -93
tools/perf/util/run-command.c
··· 65 65 cmd->err = fderr[0]; 66 66 } 67 67 68 - #ifndef __MINGW32__ 69 68 fflush(NULL); 70 69 cmd->pid = fork(); 71 70 if (!cmd->pid) { ··· 117 118 } 118 119 exit(127); 119 120 } 120 - #else 121 - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ 122 - const char **sargv = cmd->argv; 123 - char **env = environ; 124 - 125 - if (cmd->no_stdin) { 126 - s0 = dup(0); 127 - dup_devnull(0); 128 - } else if (need_in) { 129 - s0 = dup(0); 130 - dup2(fdin[0], 0); 131 - } else if (cmd->in) { 132 - s0 = dup(0); 133 - dup2(cmd->in, 0); 134 - } 135 - 136 - if (cmd->no_stderr) { 137 - s2 = dup(2); 138 - dup_devnull(2); 139 - } else if (need_err) { 140 - s2 = dup(2); 141 - dup2(fderr[1], 2); 142 - } 143 - 144 - if (cmd->no_stdout) { 145 - s1 = dup(1); 146 - dup_devnull(1); 147 - } else if (cmd->stdout_to_stderr) { 148 - s1 = dup(1); 149 - dup2(2, 1); 150 - } else if (need_out) { 151 - s1 = dup(1); 152 - dup2(fdout[1], 1); 153 - } else if (cmd->out > 1) { 154 - s1 = dup(1); 155 - dup2(cmd->out, 1); 156 - } 157 - 158 - if (cmd->dir) 159 - die("chdir in start_command() not implemented"); 160 - if (cmd->env) { 161 - env = copy_environ(); 162 - for (; *cmd->env; cmd->env++) 163 - env = env_setenv(env, *cmd->env); 164 - } 165 - 166 - if (cmd->perf_cmd) { 167 - cmd->argv = prepare_perf_cmd(cmd->argv); 168 - } 169 - 170 - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); 171 - 172 - if (cmd->env) 173 - free_environ(env); 174 - if (cmd->perf_cmd) 175 - free(cmd->argv); 176 - 177 - cmd->argv = sargv; 178 - if (s0 >= 0) 179 - dup2(s0, 0), close(s0); 180 - if (s1 >= 0) 181 - dup2(s1, 1), close(s1); 182 - if (s2 >= 0) 183 - dup2(s2, 2), close(s2); 184 - #endif 185 121 186 122 if (cmd->pid < 0) { 187 123 int err = errno; ··· 222 288 return run_command(&cmd); 223 289 } 224 290 225 - #ifdef __MINGW32__ 226 - static __stdcall unsigned run_thread(void *data) 227 - { 228 - struct async *async = data; 229 - return async->proc(async->fd_for_proc, async->data); 230 - } 231 - #endif 232 - 233 291 int start_async(struct async *async) 234 292 { 235 293 int pipe_out[2]; ··· 230 304 return error("cannot create pipe: %s", strerror(errno)); 231 305 async->out = pipe_out[0]; 232 306 233 - #ifndef __MINGW32__ 234 307 /* Flush stdio before fork() to avoid cloning buffers */ 235 308 fflush(NULL); 236 309 ··· 244 319 exit(!!async->proc(pipe_out[1], async->data)); 245 320 } 246 321 close(pipe_out[1]); 247 - #else 248 - async->fd_for_proc = pipe_out[1]; 249 - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); 250 - if (!async->tid) { 251 - error("cannot create thread: %s", strerror(errno)); 252 - close_pair(pipe_out); 253 - return -1; 254 - } 255 - #endif 322 + 256 323 return 0; 257 324 } 258 325 259 326 int finish_async(struct async *async) 260 327 { 261 - #ifndef __MINGW32__ 262 328 int ret = 0; 263 329 264 330 if (wait_or_whine(async->pid)) 265 331 ret = error("waitpid (async) failed"); 266 - #else 267 - DWORD ret = 0; 268 - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) 269 - ret = error("waiting for thread failed: %lu", GetLastError()); 270 - else if (!GetExitCodeThread(async->tid, &ret)) 271 - ret = error("cannot get thread exit code: %lu", GetLastError()); 272 - CloseHandle(async->tid); 273 - #endif 332 + 274 333 return ret; 275 334 } 276 335
-5
tools/perf/util/run-command.h
··· 79 79 int (*proc)(int fd, void *data); 80 80 void *data; 81 81 int out; /* caller reads from here and closes it */ 82 - #ifndef __MINGW32__ 83 82 pid_t pid; 84 - #else 85 - HANDLE tid; 86 - int fd_for_proc; 87 - #endif 88 83 }; 89 84 90 85 int start_async(struct async *async);
+1 -1
tools/perf/util/strbuf.c
··· 259 259 res = fread(sb->buf + sb->len, 1, size, f); 260 260 if (res > 0) 261 261 strbuf_setlen(sb, sb->len + res); 262 - else if (res < 0 && oldalloc == 0) 262 + else if (oldalloc == 0) 263 263 strbuf_release(sb); 264 264 return res; 265 265 }
+1 -1
tools/perf/util/string.h
··· 1 1 #ifndef _PERF_STRING_H_ 2 2 #define _PERF_STRING_H_ 3 3 4 - #include "../types.h" 4 + #include "types.h" 5 5 6 6 int hex2u64(const char *ptr, u64 *val); 7 7
+184
tools/perf/util/strlist.c
··· 1 + /* 2 + * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com> 3 + * 4 + * Licensed under the GPLv2. 5 + */ 6 + 7 + #include "strlist.h" 8 + #include <errno.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + 13 + static struct str_node *str_node__new(const char *s, bool dupstr) 14 + { 15 + struct str_node *self = malloc(sizeof(*self)); 16 + 17 + if (self != NULL) { 18 + if (dupstr) { 19 + s = strdup(s); 20 + if (s == NULL) 21 + goto out_delete; 22 + } 23 + self->s = s; 24 + } 25 + 26 + return self; 27 + 28 + out_delete: 29 + free(self); 30 + return NULL; 31 + } 32 + 33 + static void str_node__delete(struct str_node *self, bool dupstr) 34 + { 35 + if (dupstr) 36 + free((void *)self->s); 37 + free(self); 38 + } 39 + 40 + int strlist__add(struct strlist *self, const char *new_entry) 41 + { 42 + struct rb_node **p = &self->entries.rb_node; 43 + struct rb_node *parent = NULL; 44 + struct str_node *sn; 45 + 46 + while (*p != NULL) { 47 + int rc; 48 + 49 + parent = *p; 50 + sn = rb_entry(parent, struct str_node, rb_node); 51 + rc = strcmp(sn->s, new_entry); 52 + 53 + if (rc > 0) 54 + p = &(*p)->rb_left; 55 + else if (rc < 0) 56 + p = &(*p)->rb_right; 57 + else 58 + return -EEXIST; 59 + } 60 + 61 + sn = str_node__new(new_entry, self->dupstr); 62 + if (sn == NULL) 63 + return -ENOMEM; 64 + 65 + rb_link_node(&sn->rb_node, parent, p); 66 + rb_insert_color(&sn->rb_node, &self->entries); 67 + 68 + return 0; 69 + } 70 + 71 + int strlist__load(struct strlist *self, const char *filename) 72 + { 73 + char entry[1024]; 74 + int err; 75 + FILE *fp = fopen(filename, "r"); 76 + 77 + if (fp == NULL) 78 + return errno; 79 + 80 + while (fgets(entry, sizeof(entry), fp) != NULL) { 81 + const size_t len = strlen(entry); 82 + 83 + if (len == 0) 84 + continue; 85 + entry[len - 1] = '\0'; 86 + 87 + err = strlist__add(self, entry); 88 + if (err != 0) 89 + goto out; 90 + } 91 + 92 + err = 0; 93 + out: 94 + fclose(fp); 95 + return err; 96 + } 97 + 98 + void strlist__remove(struct strlist *self, struct str_node *sn) 99 + { 100 + rb_erase(&sn->rb_node, &self->entries); 101 + str_node__delete(sn, self->dupstr); 102 + } 103 + 104 + bool strlist__has_entry(struct strlist *self, const char *entry) 105 + { 106 + struct rb_node **p = &self->entries.rb_node; 107 + struct rb_node *parent = NULL; 108 + 109 + while (*p != NULL) { 110 + struct str_node *sn; 111 + int rc; 112 + 113 + parent = *p; 114 + sn = rb_entry(parent, struct str_node, rb_node); 115 + rc = strcmp(sn->s, entry); 116 + 117 + if (rc > 0) 118 + p = &(*p)->rb_left; 119 + else if (rc < 0) 120 + p = &(*p)->rb_right; 121 + else 122 + return true; 123 + } 124 + 125 + return false; 126 + } 127 + 128 + static int strlist__parse_list_entry(struct strlist *self, const char *s) 129 + { 130 + if (strncmp(s, "file://", 7) == 0) 131 + return strlist__load(self, s + 7); 132 + 133 + return strlist__add(self, s); 134 + } 135 + 136 + int strlist__parse_list(struct strlist *self, const char *s) 137 + { 138 + char *sep; 139 + int err; 140 + 141 + while ((sep = strchr(s, ',')) != NULL) { 142 + *sep = '\0'; 143 + err = strlist__parse_list_entry(self, s); 144 + *sep = ','; 145 + if (err != 0) 146 + return err; 147 + s = sep + 1; 148 + } 149 + 150 + return *s ? strlist__parse_list_entry(self, s) : 0; 151 + } 152 + 153 + struct strlist *strlist__new(bool dupstr, const char *slist) 154 + { 155 + struct strlist *self = malloc(sizeof(*self)); 156 + 157 + if (self != NULL) { 158 + self->entries = RB_ROOT; 159 + self->dupstr = dupstr; 160 + if (slist && strlist__parse_list(self, slist) != 0) 161 + goto out_error; 162 + } 163 + 164 + return self; 165 + out_error: 166 + free(self); 167 + return NULL; 168 + } 169 + 170 + void strlist__delete(struct strlist *self) 171 + { 172 + if (self != NULL) { 173 + struct str_node *pos; 174 + struct rb_node *next = rb_first(&self->entries); 175 + 176 + while (next) { 177 + pos = rb_entry(next, struct str_node, rb_node); 178 + next = rb_next(&pos->rb_node); 179 + strlist__remove(self, pos); 180 + } 181 + self->entries = RB_ROOT; 182 + free(self); 183 + } 184 + }
+32
tools/perf/util/strlist.h
··· 1 + #ifndef STRLIST_H_ 2 + #define STRLIST_H_ 3 + 4 + #include "rbtree.h" 5 + #include <stdbool.h> 6 + 7 + struct str_node { 8 + struct rb_node rb_node; 9 + const char *s; 10 + }; 11 + 12 + struct strlist { 13 + struct rb_root entries; 14 + bool dupstr; 15 + }; 16 + 17 + struct strlist *strlist__new(bool dupstr, const char *slist); 18 + void strlist__delete(struct strlist *self); 19 + 20 + void strlist__remove(struct strlist *self, struct str_node *sn); 21 + int strlist__load(struct strlist *self, const char *filename); 22 + int strlist__add(struct strlist *self, const char *str); 23 + 24 + bool strlist__has_entry(struct strlist *self, const char *entry); 25 + 26 + static inline bool strlist__empty(const struct strlist *self) 27 + { 28 + return rb_first(&self->entries) == NULL; 29 + } 30 + 31 + int strlist__parse_list(struct strlist *self, const char *s); 32 + #endif /* STRLIST_H_ */
+13 -3
tools/perf/util/symbol.c
··· 520 520 nr_syms = shdr.sh_size / shdr.sh_entsize; 521 521 522 522 memset(&sym, 0, sizeof(sym)); 523 - 523 + self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, 524 + ".gnu.prelink_undo", 525 + NULL) != NULL; 524 526 elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { 525 527 struct symbol *f; 526 528 u64 obj_start; ··· 537 535 gelf_getshdr(sec, &shdr); 538 536 obj_start = sym.st_value; 539 537 540 - sym.st_value -= shdr.sh_addr - shdr.sh_offset; 538 + if (self->prelinked) { 539 + if (verbose >= 2) 540 + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", 541 + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); 542 + 543 + sym.st_value -= shdr.sh_addr - shdr.sh_offset; 544 + } 541 545 542 546 f = symbol__new(sym.st_value, sym.st_size, 543 547 elf_sym__name(&sym, symstrs), ··· 576 568 577 569 if (!name) 578 570 return -1; 571 + 572 + self->prelinked = 0; 579 573 580 574 if (strncmp(self->name, "/tmp/perf-", 10) == 0) 581 575 return dso__load_perf_map(self, filter, verbose); ··· 639 629 if (vmlinux) 640 630 err = dso__load_vmlinux(self, vmlinux, filter, verbose); 641 631 642 - if (err) 632 + if (err < 0) 643 633 err = dso__load_kallsyms(self, filter, verbose); 644 634 645 635 return err;
+3 -2
tools/perf/util/symbol.h
··· 2 2 #define _PERF_SYMBOL_ 1 3 3 4 4 #include <linux/types.h> 5 - #include "../types.h" 5 + #include "types.h" 6 6 #include "list.h" 7 7 #include "rbtree.h" 8 8 ··· 20 20 struct dso { 21 21 struct list_head node; 22 22 struct rb_root syms; 23 - unsigned int sym_priv_size; 24 23 struct symbol *(*find_symbol)(struct dso *, u64 ip); 24 + unsigned int sym_priv_size; 25 + unsigned char prelinked; 25 26 char name[0]; 26 27 }; 27 28
-15
tools/perf/util/util.h
··· 67 67 #include <assert.h> 68 68 #include <regex.h> 69 69 #include <utime.h> 70 - #ifndef __MINGW32__ 71 70 #include <sys/wait.h> 72 71 #include <sys/poll.h> 73 72 #include <sys/socket.h> ··· 80 81 #include <netdb.h> 81 82 #include <pwd.h> 82 83 #include <inttypes.h> 83 - #if defined(__CYGWIN__) 84 - #undef _XOPEN_SOURCE 85 - #include <grp.h> 86 - #define _XOPEN_SOURCE 600 87 - #include "compat/cygwin.h" 88 - #else 89 - #undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ 90 - #include <grp.h> 91 - #define _ALL_SOURCE 1 92 - #endif 93 - #else /* __MINGW32__ */ 94 - /* pull in Windows compatibility stuff */ 95 - #include "compat/mingw.h" 96 - #endif /* __MINGW32__ */ 97 84 98 85 #ifndef NO_ICONV 99 86 #include <iconv.h>