Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf/stat' into perf/core

Merge reason: the perf stat improvements are tested and ready now.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

+351 -101
+8 -6
arch/x86/kernel/cpu/perf_event_amd.c
··· 96 96 */ 97 97 static const u64 amd_perfmon_event_map[] = 98 98 { 99 - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 100 - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 101 - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, 102 - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, 103 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 104 - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 99 + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 100 + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 101 + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, 102 + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, 103 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 104 + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 105 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 106 + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 105 107 }; 106 108 107 109 static u64 amd_pmu_event_map(int hw_event)
+21 -10
arch/x86/kernel/cpu/perf_event_intel.c
··· 36 36 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 37 37 }; 38 38 39 - static struct event_constraint intel_core_event_constraints[] = 39 + static struct event_constraint intel_core_event_constraints[] __read_mostly = 40 40 { 41 41 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 42 42 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ ··· 47 47 EVENT_CONSTRAINT_END 48 48 }; 49 49 50 - static struct event_constraint intel_core2_event_constraints[] = 50 + static struct event_constraint intel_core2_event_constraints[] __read_mostly = 51 51 { 52 52 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 53 53 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ ··· 70 70 EVENT_CONSTRAINT_END 71 71 }; 72 72 73 - static struct event_constraint intel_nehalem_event_constraints[] = 73 + static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = 74 74 { 75 75 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 76 76 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ ··· 86 86 EVENT_CONSTRAINT_END 87 87 }; 88 88 89 - static struct extra_reg intel_nehalem_extra_regs[] = 89 + static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 90 90 { 91 91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 92 92 EVENT_EXTRA_END 93 93 }; 94 94 95 - static struct event_constraint intel_nehalem_percore_constraints[] = 95 + static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = 96 96 { 97 97 INTEL_EVENT_CONSTRAINT(0xb7, 0), 98 98 EVENT_CONSTRAINT_END 99 99 }; 100 100 101 - static struct event_constraint intel_westmere_event_constraints[] = 101 + static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 102 102 { 103 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 104 104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ ··· 110 110 EVENT_CONSTRAINT_END 111 111 }; 112 112 113 - static struct event_constraint intel_snb_event_constraints[] = 113 + static struct event_constraint intel_snb_event_constraints[] __read_mostly = 114 114 { 115 115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 116 116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ ··· 123 123 EVENT_CONSTRAINT_END 124 124 }; 125 125 126 - static struct extra_reg intel_westmere_extra_regs[] = 126 + static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 127 127 { 128 128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 129 129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 130 130 EVENT_EXTRA_END 131 131 }; 132 132 133 - static struct event_constraint intel_westmere_percore_constraints[] = 133 + static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = 134 134 { 135 135 INTEL_EVENT_CONSTRAINT(0xb7, 0), 136 136 INTEL_EVENT_CONSTRAINT(0xbb, 0), 137 137 EVENT_CONSTRAINT_END 138 138 }; 139 139 140 - static struct event_constraint intel_gen_event_constraints[] = 140 + static struct event_constraint intel_gen_event_constraints[] __read_mostly = 141 141 { 142 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 143 143 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ ··· 1423 1423 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1424 1424 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1425 1425 1426 + /* UOPS_ISSUED.STALLED_CYCLES */ 1427 + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1428 + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1429 + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1430 + 1426 1431 if (ebx & 0x40) { 1427 1432 /* 1428 1433 * Erratum AAJ80 detected, we work it around by using ··· 1468 1463 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1469 1464 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1470 1465 x86_pmu.extra_regs = intel_westmere_extra_regs; 1466 + 1467 + /* UOPS_ISSUED.STALLED_CYCLES */ 1468 + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1469 + /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1470 + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1471 + 1471 1472 pr_cont("Westmere events, "); 1472 1473 break; 1473 1474
+2
include/linux/perf_event.h
··· 52 52 PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, 53 53 PERF_COUNT_HW_BRANCH_MISSES = 5, 54 54 PERF_COUNT_HW_BUS_CYCLES = 6, 55 + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, 56 + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, 55 57 56 58 PERF_COUNT_HW_MAX, /* non-ABI */ 57 59 };
+249 -35
tools/perf/builtin-stat.c
··· 46 46 #include "util/evlist.h" 47 47 #include "util/evsel.h" 48 48 #include "util/debug.h" 49 + #include "util/color.h" 49 50 #include "util/header.h" 50 51 #include "util/cpumap.h" 51 52 #include "util/thread.h" ··· 66 65 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 67 66 68 67 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 68 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 69 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 69 70 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 70 71 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 71 72 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 72 - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, 73 - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, 74 73 74 + }; 75 + 76 + /* 77 + * Detailed stats: 78 + */ 79 + static struct perf_event_attr detailed_attrs[] = { 80 + 81 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 82 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 83 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 84 + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 85 + 86 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 87 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 88 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 89 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 90 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 91 + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 92 + 93 + { .type = PERF_TYPE_HW_CACHE, 94 + .config = 95 + PERF_COUNT_HW_CACHE_L1D << 0 | 96 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 97 + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 98 + 99 + { .type = PERF_TYPE_HW_CACHE, 100 + .config = 101 + PERF_COUNT_HW_CACHE_L1D << 0 | 102 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 103 + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 104 + 105 + { .type = PERF_TYPE_HW_CACHE, 106 + .config = 107 + PERF_COUNT_HW_CACHE_LL << 0 | 108 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 109 + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 110 + 111 + { .type = PERF_TYPE_HW_CACHE, 112 + .config = 113 + PERF_COUNT_HW_CACHE_LL << 0 | 114 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 115 + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 75 116 }; 76 117 77 118 struct perf_evlist *evsel_list; ··· 129 86 static pid_t target_tid = -1; 130 87 static pid_t child_pid = -1; 131 88 static bool null_run = false; 89 + static bool detailed_run = false; 90 + static bool sync_run = false; 132 91 static bool big_num = true; 133 92 static int big_num_opt = -1; 134 93 static const char *cpu_list; ··· 201 156 202 157 struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 203 158 struct stats runtime_cycles_stats[MAX_NR_CPUS]; 159 + struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 160 + struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; 204 161 struct stats runtime_branches_stats[MAX_NR_CPUS]; 162 + struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 163 + struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 205 164 struct stats walltime_nsecs_stats; 206 165 207 166 static int create_perf_stat_counter(struct perf_evsel *evsel) ··· 242 193 } 243 194 244 195 /* 196 + * Update various tracking values we maintain to print 197 + * more semantic information such as miss/hit ratios, 198 + * instruction rates, etc: 199 + */ 200 + static void update_shadow_stats(struct perf_evsel *counter, u64 *count) 201 + { 202 + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 203 + update_stats(&runtime_nsecs_stats[0], count[0]); 204 + else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 205 + update_stats(&runtime_cycles_stats[0], count[0]); 206 + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 207 + update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 208 + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 209 + update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); 210 + else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 211 + update_stats(&runtime_branches_stats[0], count[0]); 212 + else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 213 + update_stats(&runtime_cacherefs_stats[0], count[0]); 214 + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 215 + update_stats(&runtime_l1_dcache_stats[0], count[0]); 216 + } 217 + 218 + /* 245 219 * Read out the results of a single counter: 246 220 * aggregate counts across CPUs in system-wide mode 247 221 */ ··· 289 217 /* 290 218 * Save the full runtime - to allow normalization during printout: 291 219 */ 292 - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 293 - update_stats(&runtime_nsecs_stats[0], count[0]); 294 - if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 295 - update_stats(&runtime_cycles_stats[0], count[0]); 296 - if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 297 - update_stats(&runtime_branches_stats[0], count[0]); 220 + update_shadow_stats(counter, count); 298 221 299 222 return 0; 300 223 } ··· 309 242 310 243 count = counter->counts->cpu[cpu].values; 311 244 312 - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 313 - update_stats(&runtime_nsecs_stats[cpu], count[0]); 314 - if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 315 - update_stats(&runtime_cycles_stats[cpu], count[0]); 316 - if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 317 - update_stats(&runtime_branches_stats[cpu], count[0]); 245 + update_shadow_stats(counter, count); 318 246 } 319 247 320 248 return 0; ··· 377 315 378 316 list_for_each_entry(counter, &evsel_list->entries, node) { 379 317 if (create_perf_stat_counter(counter) < 0) { 380 - if (errno == -EPERM || errno == -EACCES) { 318 + if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) { 319 + if (verbose) 320 + ui__warning("%s event is not supported by the kernel.\n", 321 + event_name(counter)); 322 + continue; 323 + } 324 + 325 + if (errno == EPERM || errno == EACCES) { 381 326 error("You may not have permission to collect %sstats.\n" 382 327 "\t Consider tweaking" 383 328 " /proc/sys/kernel/perf_event_paranoid or running as root.", 384 329 system_wide ? "system-wide " : ""); 385 - } else if (errno == ENOENT) { 386 - error("%s event is not supported. ", event_name(counter)); 387 330 } else { 388 331 error("open_counter returned with %d (%s). " 389 332 "/bin/dmesg may provide additional information.\n", ··· 439 372 return WEXITSTATUS(status); 440 373 } 441 374 375 + static void print_noise_pct(double total, double avg) 376 + { 377 + double pct = 0.0; 378 + 379 + if (avg) 380 + pct = 100.0*total/avg; 381 + 382 + fprintf(stderr, " ( +-%6.2f%% )", pct); 383 + } 384 + 442 385 static void print_noise(struct perf_evsel *evsel, double avg) 443 386 { 444 387 struct perf_stat *ps; ··· 457 380 return; 458 381 459 382 ps = evsel->priv; 460 - fprintf(stderr, " ( +- %7.3f%% )", 461 - 100 * stddev_stats(&ps->res_stats[0]) / avg); 383 + print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 462 384 } 463 385 464 386 static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) ··· 480 404 return; 481 405 482 406 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 483 - fprintf(stderr, " # %10.3f CPUs ", 484 - avg / avg_stats(&walltime_nsecs_stats)); 407 + fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats)); 408 + } 409 + 410 + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) 411 + { 412 + double total, ratio = 0.0; 413 + const char *color; 414 + 415 + total = avg_stats(&runtime_cycles_stats[cpu]); 416 + 417 + if (total) 418 + ratio = avg / total * 100.0; 419 + 420 + color = PERF_COLOR_NORMAL; 421 + if (ratio > 50.0) 422 + color = PERF_COLOR_RED; 423 + else if (ratio > 30.0) 424 + color = PERF_COLOR_MAGENTA; 425 + else if (ratio > 10.0) 426 + color = PERF_COLOR_YELLOW; 427 + 428 + fprintf(stderr, " # "); 429 + color_fprintf(stderr, color, "%6.2f%%", ratio); 430 + fprintf(stderr, " frontend cycles idle "); 431 + } 432 + 433 + static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg) 434 + { 435 + double total, ratio = 0.0; 436 + const char *color; 437 + 438 + total = avg_stats(&runtime_cycles_stats[cpu]); 439 + 440 + if (total) 441 + ratio = avg / total * 100.0; 442 + 443 + color = PERF_COLOR_NORMAL; 444 + if (ratio > 75.0) 445 + color = PERF_COLOR_RED; 446 + else if (ratio > 50.0) 447 + color = PERF_COLOR_MAGENTA; 448 + else if (ratio > 20.0) 449 + color = PERF_COLOR_YELLOW; 450 + 451 + fprintf(stderr, " # "); 452 + color_fprintf(stderr, color, "%6.2f%%", ratio); 453 + fprintf(stderr, " backend cycles idle "); 454 + } 455 + 456 + static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg) 457 + { 458 + double total, ratio = 0.0; 459 + const char *color; 460 + 461 + total = avg_stats(&runtime_branches_stats[cpu]); 462 + 463 + if (total) 464 + ratio = avg / total * 100.0; 465 + 466 + color = PERF_COLOR_NORMAL; 467 + if (ratio > 20.0) 468 + color = PERF_COLOR_RED; 469 + else if (ratio > 10.0) 470 + color = PERF_COLOR_MAGENTA; 471 + else if (ratio > 5.0) 472 + color = PERF_COLOR_YELLOW; 473 + 474 + fprintf(stderr, " # "); 475 + color_fprintf(stderr, color, "%6.2f%%", ratio); 476 + fprintf(stderr, " of all branches "); 477 + } 478 + 479 + static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg) 480 + { 481 + double total, ratio = 0.0; 482 + const char *color; 483 + 484 + total = avg_stats(&runtime_l1_dcache_stats[cpu]); 485 + 486 + if (total) 487 + ratio = avg / total * 100.0; 488 + 489 + color = PERF_COLOR_NORMAL; 490 + if (ratio > 20.0) 491 + color = PERF_COLOR_RED; 492 + else if (ratio > 10.0) 493 + color = PERF_COLOR_MAGENTA; 494 + else if (ratio > 5.0) 495 + color = PERF_COLOR_YELLOW; 496 + 497 + fprintf(stderr, " # "); 498 + color_fprintf(stderr, color, "%6.2f%%", ratio); 499 + fprintf(stderr, " of all L1-dcache hits "); 485 500 } 486 501 487 502 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ··· 609 442 if (total) 610 443 ratio = avg / total; 611 444 612 - fprintf(stderr, " # %10.3f IPC ", ratio); 445 + fprintf(stderr, " # %5.2f insns per cycle ", ratio); 446 + 447 + total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); 448 + total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); 449 + 450 + if (total && avg) { 451 + ratio = total / avg; 452 + fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); 453 + } 454 + 613 455 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 614 456 runtime_branches_stats[cpu].n != 0) { 615 - total = avg_stats(&runtime_branches_stats[cpu]); 457 + print_branch_misses(cpu, evsel, avg); 458 + } else if ( 459 + evsel->attr.type == PERF_TYPE_HW_CACHE && 460 + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 461 + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 462 + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 463 + runtime_l1_dcache_stats[cpu].n != 0) { 464 + print_l1_dcache_misses(cpu, evsel, avg); 465 + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 466 + runtime_cacherefs_stats[cpu].n != 0) { 467 + total = avg_stats(&runtime_cacherefs_stats[cpu]); 616 468 617 469 if (total) 618 470 ratio = avg * 100 / total; 619 471 620 - fprintf(stderr, " # %10.3f %% ", ratio); 472 + fprintf(stderr, " # %8.3f %% of all cache refs ", ratio); 621 473 474 + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 475 + print_stalled_cycles_frontend(cpu, evsel, avg); 476 + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 477 + print_stalled_cycles_backend(cpu, evsel, avg); 478 + } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 479 + total = avg_stats(&runtime_nsecs_stats[cpu]); 480 + 481 + if (total) 482 + ratio = 1.0 * avg / total; 483 + 484 + fprintf(stderr, " # %8.3f GHz ", ratio); 622 485 } else if (runtime_nsecs_stats[cpu].n != 0) { 623 486 total = avg_stats(&runtime_nsecs_stats[cpu]); 624 487 625 488 if (total) 626 489 ratio = 1000.0 * avg / total; 627 490 628 - fprintf(stderr, " # %10.3f M/sec", ratio); 491 + fprintf(stderr, " # %8.3f M/sec ", ratio); 492 + } else { 493 + fprintf(stderr, " "); 629 494 } 630 495 } 631 496 ··· 704 505 avg_enabled = avg_stats(&ps->res_stats[1]); 705 506 avg_running = avg_stats(&ps->res_stats[2]); 706 507 707 - fprintf(stderr, " (scaled from %.2f%%)", 708 - 100 * avg_running / avg_enabled); 508 + fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); 709 509 } 710 510 fprintf(stderr, "\n"); 711 511 } ··· 746 548 if (!csv_output) { 747 549 print_noise(counter, 1.0); 748 550 749 - if (run != ena) { 750 - fprintf(stderr, " (scaled from %.2f%%)", 751 - 100.0 * run / ena); 752 - } 551 + if (run != ena) 552 + fprintf(stderr, " (%.2f%%)", 100.0 * run / ena); 753 553 } 754 554 fputc('\n', stderr); 755 555 } ··· 791 595 fprintf(stderr, " %18.9f seconds time elapsed", 792 596 avg_stats(&walltime_nsecs_stats)/1e9); 793 597 if (run_count > 1) { 794 - fprintf(stderr, " ( +- %7.3f%% )", 795 - 100*stddev_stats(&walltime_nsecs_stats) / 796 - avg_stats(&walltime_nsecs_stats)); 598 + print_noise_pct(stddev_stats(&walltime_nsecs_stats), 599 + avg_stats(&walltime_nsecs_stats)); 797 600 } 798 601 fprintf(stderr, "\n\n"); 799 602 } ··· 854 659 "repeat command and print average + stddev (max: 100)"), 855 660 OPT_BOOLEAN('n', "null", &null_run, 856 661 "null run - dont start any counters"), 662 + OPT_BOOLEAN('d', "detailed", &detailed_run, 663 + "detailed run - start a lot of events"), 664 + OPT_BOOLEAN('S', "sync", &sync_run, 665 + "call sync() before starting a run"), 857 666 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 858 667 "print large numbers with thousands\' separators", 859 668 stat__set_big_num), ··· 919 720 } 920 721 921 722 /* Set attrs and nr_counters if no event is selected and !null_run */ 922 - if (!null_run && !evsel_list->nr_entries) { 723 + if (detailed_run) { 724 + size_t c; 725 + 726 + for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { 727 + pos = perf_evsel__new(&detailed_attrs[c], c); 728 + if (pos == NULL) 729 + goto out; 730 + perf_evlist__add(evsel_list, pos); 731 + } 732 + } 733 + /* Set attrs and nr_counters if no event is selected and !null_run */ 734 + if (!detailed_run && !null_run && !evsel_list->nr_entries) { 923 735 size_t c; 924 736 925 737 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { ··· 983 773 for (run_idx = 0; run_idx < run_count; run_idx++) { 984 774 if (run_count != 1 && verbose) 985 775 fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); 776 + 777 + if (sync_run) 778 + sync(); 779 + 986 780 status = run_perf_stat(argc, argv); 987 781 } 988 782
+68 -50
tools/perf/util/parse-events.c
··· 31 31 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x 32 32 33 33 static struct event_symbol event_symbols[] = { 34 - { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, 35 - { CHW(INSTRUCTIONS), "instructions", "" }, 36 - { CHW(CACHE_REFERENCES), "cache-references", "" }, 37 - { CHW(CACHE_MISSES), "cache-misses", "" }, 38 - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, 39 - { CHW(BRANCH_MISSES), "branch-misses", "" }, 40 - { CHW(BUS_CYCLES), "bus-cycles", "" }, 34 + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, 35 + { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" }, 36 + { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" }, 37 + { CHW(INSTRUCTIONS), "instructions", "" }, 38 + { CHW(CACHE_REFERENCES), "cache-references", "" }, 39 + { CHW(CACHE_MISSES), "cache-misses", "" }, 40 + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, 41 + { CHW(BRANCH_MISSES), "branch-misses", "" }, 42 + { CHW(BUS_CYCLES), "bus-cycles", "" }, 41 43 42 - { CSW(CPU_CLOCK), "cpu-clock", "" }, 43 - { CSW(TASK_CLOCK), "task-clock", "" }, 44 - { CSW(PAGE_FAULTS), "page-faults", "faults" }, 45 - { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, 46 - { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, 47 - { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, 48 - { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, 49 - { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, 50 - { CSW(EMULATION_FAULTS), "emulation-faults", "" }, 44 + { CSW(CPU_CLOCK), "cpu-clock", "" }, 45 + { CSW(TASK_CLOCK), "task-clock", "" }, 46 + { CSW(PAGE_FAULTS), "page-faults", "faults" }, 47 + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, 48 + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, 49 + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, 50 + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, 51 + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, 52 + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, 51 53 }; 52 54 53 55 #define __PERF_EVENT_FIELD(config, name) \ 54 56 ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) 55 57 56 - #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) 58 + #define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) 57 59 #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) 58 - #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) 60 + #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) 59 61 #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) 60 62 61 - static const char *hw_event_names[] = { 63 + static const char *hw_event_names[PERF_COUNT_HW_MAX] = { 62 64 "cycles", 63 65 "instructions", 64 66 "cache-references", ··· 68 66 "branches", 69 67 "branch-misses", 70 68 "bus-cycles", 69 + "stalled-cycles-frontend", 70 + "stalled-cycles-backend", 71 71 }; 72 72 73 - static const char *sw_event_names[] = { 74 - "cpu-clock-msecs", 75 - "task-clock-msecs", 73 + static const char *sw_event_names[PERF_COUNT_SW_MAX] = { 74 + "cpu-clock", 75 + "task-clock", 76 76 "page-faults", 77 77 "context-switches", 78 78 "CPU-migrations", ··· 311 307 312 308 switch (type) { 313 309 case PERF_TYPE_HARDWARE: 314 - if (config < PERF_COUNT_HW_MAX) 310 + if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) 315 311 return hw_event_names[config]; 316 312 return "unknown-hardware"; 317 313 ··· 337 333 } 338 334 339 335 case PERF_TYPE_SOFTWARE: 340 - if (config < PERF_COUNT_SW_MAX) 336 + if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) 341 337 return sw_event_names[config]; 342 338 return "unknown-software"; 343 339 ··· 652 648 int n; 653 649 654 650 n = strlen(event_symbols[i].symbol); 655 - if (!strncmp(str, event_symbols[i].symbol, n)) 651 + if (!strncasecmp(str, event_symbols[i].symbol, n)) 656 652 return n; 657 653 658 654 n = strlen(event_symbols[i].alias); 659 - if (n) 660 - if (!strncmp(str, event_symbols[i].alias, n)) 655 + if (n) { 656 + if (!strncasecmp(str, event_symbols[i].alias, n)) 661 657 return n; 658 + } 659 + 662 660 return 0; 663 661 } 664 662 ··· 724 718 return EVT_FAILED; 725 719 } 726 720 727 - static enum event_result 721 + static int 728 722 parse_event_modifier(const char **strp, struct perf_event_attr *attr) 729 723 { 730 724 const char *str = *strp; 731 725 int exclude = 0; 732 726 int eu = 0, ek = 0, eh = 0, precise = 0; 733 727 734 - if (*str++ != ':') 728 + if (!*str) 735 729 return 0; 730 + 731 + if (*str++ != ':') 732 + return -1; 733 + 736 734 while (*str) { 737 735 if (*str == 'u') { 738 736 if (!exclude) ··· 757 747 758 748 ++str; 759 749 } 760 - if (str >= *strp + 2) { 761 - *strp = str; 762 - attr->exclude_user = eu; 763 - attr->exclude_kernel = ek; 764 - attr->exclude_hv = eh; 765 - attr->precise_ip = precise; 766 - return 1; 767 - } 750 + if (str < *strp + 2) 751 + return -1; 752 + 753 + *strp = str; 754 + 755 + attr->exclude_user = eu; 756 + attr->exclude_kernel = ek; 757 + attr->exclude_hv = eh; 758 + attr->precise_ip = precise; 759 + 768 760 return 0; 769 761 } 770 762 ··· 809 797 return EVT_FAILED; 810 798 811 799 modifier: 812 - parse_event_modifier(str, attr); 800 + if (parse_event_modifier(str, attr) < 0) { 801 + fprintf(stderr, "invalid event modifier: '%s'\n", *str); 802 + fprintf(stderr, "Run 'perf list' for a list of valid events and modifiers\n"); 803 + 804 + return EVT_FAILED; 805 + } 813 806 814 807 return ret; 815 808 } ··· 929 912 930 913 snprintf(evt_path, MAXPATHLEN, "%s:%s", 931 914 sys_dirent.d_name, evt_dirent.d_name); 932 - printf(" %-42s [%s]\n", evt_path, 915 + printf(" %-50s [%s]\n", evt_path, 933 916 event_type_descriptors[PERF_TYPE_TRACEPOINT]); 934 917 } 935 918 closedir(evt_dir); ··· 994 977 else 995 978 snprintf(name, sizeof(name), "%s", syms->symbol); 996 979 997 - printf(" %-42s [%s]\n", name, 980 + printf(" %-50s [%s]\n", name, 998 981 event_type_descriptors[type]); 999 982 } 1000 983 } ··· 1012 995 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { 1013 996 char *name = event_cache_name(type, op, i); 1014 997 1015 - if (event_glob != NULL && 1016 - !strglobmatch(name, event_glob)) 998 + if (event_glob != NULL && !strglobmatch(name, event_glob)) 1017 999 continue; 1018 1000 1019 - printf(" %-42s [%s]\n", name, 1001 + printf(" %-50s [%s]\n", name, 1020 1002 event_type_descriptors[PERF_TYPE_HW_CACHE]); 1021 1003 ++printed; 1022 1004 } ··· 1025 1009 return printed; 1026 1010 } 1027 1011 1012 + #define MAX_NAME_LEN 100 1013 + 1028 1014 /* 1029 1015 * Print the help text for the event symbols: 1030 1016 */ 1031 1017 void print_events(const char *event_glob) 1032 1018 { 1033 - struct event_symbol *syms = event_symbols; 1034 1019 unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; 1035 - char name[40]; 1020 + struct event_symbol *syms = event_symbols; 1021 + char name[MAX_NAME_LEN]; 1036 1022 1037 1023 printf("\n"); 1038 1024 printf("List of pre-defined events (to be used in -e):\n"); ··· 1054 1036 continue; 1055 1037 1056 1038 if (strlen(syms->alias)) 1057 - sprintf(name, "%s OR %s", syms->symbol, syms->alias); 1039 + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); 1058 1040 else 1059 - strcpy(name, syms->symbol); 1060 - printf(" %-42s [%s]\n", name, 1041 + strncpy(name, syms->symbol, MAX_NAME_LEN); 1042 + printf(" %-50s [%s]\n", name, 1061 1043 event_type_descriptors[type]); 1062 1044 1063 1045 prev_type = type; ··· 1074 1056 return; 1075 1057 1076 1058 printf("\n"); 1077 - printf(" %-42s [%s]\n", 1059 + printf(" %-50s [%s]\n", 1078 1060 "rNNN (see 'perf list --help' on how to encode it)", 1079 1061 event_type_descriptors[PERF_TYPE_RAW]); 1080 1062 printf("\n"); 1081 1063 1082 - printf(" %-42s [%s]\n", 1064 + printf(" %-50s [%s]\n", 1083 1065 "mem:<addr>[:access]", 1084 1066 event_type_descriptors[PERF_TYPE_BREAKPOINT]); 1085 1067 printf("\n");
+3
tools/perf/util/python.c
··· 810 810 { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, 811 811 { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, 812 812 813 + { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 814 + { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 815 + 813 816 { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, 814 817 { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, 815 818 { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },