Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf diff: Report noisy for cycles diff

This patch prints the stddev and hist for the cycles diff of program
block. It can help us to understand if the cycles is noisy or not.

This patch is inspired by Andi Kleen's patch:

https://lwn.net/Articles/600471/

We create new option '--cycles-hist'.

Example:

perf record -b ./div
perf record -b ./div
perf diff -c cycles

# Baseline [Program Block Range] Cycles Diff Shared Object Symbol
# ........ .......................................................... .... ................. ............................
#
46.72% [div.c:40 -> div.c:40] 0 div [.] main
46.72% [div.c:42 -> div.c:44] 0 div [.] main
46.72% [div.c:42 -> div.c:39] 0 div [.] main
20.54% [random_r.c:357 -> random_r.c:394] 1 libc-2.27.so [.] __random_r
20.54% [random_r.c:357 -> random_r.c:380] 0 libc-2.27.so [.] __random_r
20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r
20.54% [random_r.c:388 -> random_r.c:391] 0 libc-2.27.so [.] __random_r
17.04% [random.c:288 -> random.c:291] 0 libc-2.27.so [.] __random
17.04% [random.c:291 -> random.c:291] 0 libc-2.27.so [.] __random
17.04% [random.c:293 -> random.c:293] 0 libc-2.27.so [.] __random
17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random
17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random
17.04% [random.c:298 -> random.c:298] 0 libc-2.27.so [.] __random
8.40% [div.c:22 -> div.c:25] 0 div [.] compute_flag
8.40% [div.c:27 -> div.c:28] 0 div [.] compute_flag
5.14% [rand.c:26 -> rand.c:27] 0 libc-2.27.so [.] rand
5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand
2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt
0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax
0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap
0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap
0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap
0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15
0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 [kernel.kallsyms] [k] native_sched_clock
0.00% [native_write_msr+0 -> native_write_msr+16] -13 [kernel.kallsyms] [k] native_write_msr

When we enable the option '--cycles-hist', the output is

perf diff -c cycles --cycles-hist

# Baseline [Program Block Range] Cycles Diff stddev/Hist Shared Object Symbol
# ........ .......................................................... .... ................. ................. ............................
#
46.72% [div.c:40 -> div.c:40] 0 ± 37.8% ▁█▁▁██▁█ div [.] main
46.72% [div.c:42 -> div.c:44] 0 ± 49.4% ▁▁▂█▂▂▂▂ div [.] main
46.72% [div.c:42 -> div.c:39] 0 ± 24.1% ▃█▂▄▁▃▂▁ div [.] main
20.54% [random_r.c:357 -> random_r.c:394] 1 ± 33.5% ▅▂▁█▃▁▂▁ libc-2.27.so [.] __random_r
20.54% [random_r.c:357 -> random_r.c:380] 0 ± 39.4% ▁▁█▁██▅▁ libc-2.27.so [.] __random_r
20.54% [random_r.c:388 -> random_r.c:388] 0 libc-2.27.so [.] __random_r
20.54% [random_r.c:388 -> random_r.c:391] 0 ± 41.2% ▁▃▁▂█▄▃▁ libc-2.27.so [.] __random_r
17.04% [random.c:288 -> random.c:291] 0 ± 48.8% ▁▁▁▁███▁ libc-2.27.so [.] __random
17.04% [random.c:291 -> random.c:291] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random
17.04% [random.c:293 -> random.c:293] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random
17.04% [random.c:295 -> random.c:295] 0 ±100.0% ▁█▁▁▁▁▁▁ libc-2.27.so [.] __random
17.04% [random.c:295 -> random.c:295] 0 libc-2.27.so [.] __random
17.04% [random.c:298 -> random.c:298] 0 ± 75.6% ▃█▁▁▁▁▁▁ libc-2.27.so [.] __random
8.40% [div.c:22 -> div.c:25] 0 ± 42.1% ▁▃▁▁███▁ div [.] compute_flag
8.40% [div.c:27 -> div.c:28] 0 ± 41.8% ██▁▁▄▁▁▄ div [.] compute_flag
5.14% [rand.c:26 -> rand.c:27] 0 ± 37.8% ▁▁▁████▁ libc-2.27.so [.] rand
5.14% [rand.c:28 -> rand.c:28] 0 libc-2.27.so [.] rand
2.15% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt
0.00% [kernel.kallsyms] [k] __x86_indirect_thunk_rax
0.00% [do_mmap+714 -> do_mmap+732] -10 [kernel.kallsyms] [k] do_mmap
0.00% [do_mmap+737 -> do_mmap+765] 1 [kernel.kallsyms] [k] do_mmap
0.00% [do_mmap+262 -> do_mmap+299] 0 [kernel.kallsyms] [k] do_mmap
0.00% [__x86_indirect_thunk_r15+0 -> __x86_indirect_thunk_r15+0] 7 [kernel.kallsyms] [k] __x86_indirect_thunk_r15
0.00% [native_sched_clock+0 -> native_sched_clock+119] -1 ± 38.5% ▄█▁ [kernel.kallsyms] [k] native_sched_clock
0.00% [native_write_msr+0 -> native_write_msr+16] -13 ± 47.1% ▁█▇▃▁▁ [kernel.kallsyms] [k] native_write_msr

v8:
---
Rebase to perf/core branch

v7:
---
1. v6 got Jiri's ACK.
2. Rebase to latest perf/core branch.

v6:
---
1. Jiri provides better code for using data__hpp_register() in ui_init().
Use this code in v6.

v5:
---
1. Refine the use of data__hpp_register() in ui_init() according to
Jiri's suggestion.

v4:
---
1. Rename the new option from '--noisy' to '--cycles-hist'
2. Remove the option '-n'.
3. Only update the spark value and stats when '--cycles-hist' is enabled.
4. Remove the code of printing '..'.

v3:
---
1. Move the histogram to a separate column
2. Move the svals[] out of struct stats

v2:
---
Jiri got a compile error,

CC builtin-diff.o
builtin-diff.c: In function ‘compute_cycles_diff’:
builtin-diff.c:712:10: error: taking the absolute value of unsigned type ‘u64’ {aka ‘long unsigned int’} has no effect [-Werror=absolute-value]
712 | labs(pair->block_info->cycles_spark[i] -
| ^~~~

Because the result of u64 - u64 is still u64. Now we change the type of
cycles_spark[] to s64.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20190925011446.30678-1-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jin Yao and committed by
Arnaldo Carvalho de Melo
cebf7d51 55542113

+203
+5
tools/perf/Documentation/perf-diff.txt
··· 95 95 diff.compute config option. See COMPARISON METHODS section for 96 96 more info. 97 97 98 + --cycles-hist:: 99 + Report a histogram and the standard deviation for cycles data. 100 + It can help us to judge if the reported cycles data is noisy or 101 + not. This option should be used with '-c cycles'. 102 + 98 103 -p:: 99 104 --period:: 100 105 Show period values for both compared hist entries.
+143
tools/perf/builtin-diff.c
··· 23 23 #include "util/time-utils.h" 24 24 #include "util/annotate.h" 25 25 #include "util/map.h" 26 + #include "util/spark.h" 26 27 #include <linux/err.h> 27 28 #include <linux/zalloc.h> 28 29 #include <subcmd/pager.h> ··· 54 53 PERF_HPP_DIFF__FORMULA, 55 54 PERF_HPP_DIFF__DELTA_ABS, 56 55 PERF_HPP_DIFF__CYCLES, 56 + PERF_HPP_DIFF__CYCLES_HIST, 57 57 58 58 PERF_HPP_DIFF__MAX_INDEX 59 59 }; ··· 89 87 static bool show_period; 90 88 static bool show_formula; 91 89 static bool show_baseline_only; 90 + static bool cycles_hist; 92 91 static unsigned int sort_compute = 1; 93 92 94 93 static s64 compute_wdiff_w1; ··· 167 164 [PERF_HPP_DIFF__CYCLES] = { 168 165 .name = "[Program Block Range] Cycles Diff", 169 166 .width = 70, 167 + }, 168 + [PERF_HPP_DIFF__CYCLES_HIST] = { 169 + .name = "stddev/Hist", 170 + .width = NUM_SPARKS + 9, 170 171 } 171 172 }; 172 173 ··· 617 610 bi->cycles_aggr = ch->cycles_aggr; 618 611 bi->num = ch->num; 619 612 bi->num_aggr = ch->num_aggr; 613 + 614 + memcpy(bi->cycles_spark, ch->cycles_spark, 615 + NUM_SPARKS * sizeof(u64)); 620 616 } 621 617 622 618 static int process_block_per_sym(struct hist_entry *he) ··· 699 689 return NULL; 700 690 } 701 691 692 + static void init_spark_values(unsigned long *svals, int num) 693 + { 694 + for (int i = 0; i < num; i++) 695 + svals[i] = 0; 696 + } 697 + 698 + static void update_spark_value(unsigned long *svals, int num, 699 + struct stats *stats, u64 val) 700 + { 701 + int n = stats->n; 702 + 703 + if (n < num) 704 + svals[n] = val; 705 + } 706 + 702 707 static void compute_cycles_diff(struct hist_entry *he, 703 708 struct hist_entry *pair) 704 709 { ··· 722 697 pair->diff.cycles = 723 698 pair->block_info->cycles_aggr / pair->block_info->num_aggr - 724 699 he->block_info->cycles_aggr / he->block_info->num_aggr; 700 + 701 + if (!cycles_hist) 702 + return; 703 + 704 + init_stats(&pair->diff.stats); 705 + init_spark_values(pair->diff.svals, NUM_SPARKS); 706 + 707 + for (int i = 0; i < pair->block_info->num; i++) { 708 + u64 val; 709 + 710 + if (i >= he->block_info->num || i >= NUM_SPARKS) 711 + break; 712 + 713 + val = labs(pair->block_info->cycles_spark[i] - 714 + he->block_info->cycles_spark[i]); 715 + 716 + update_spark_value(pair->diff.svals, NUM_SPARKS, 717 + &pair->diff.stats, val); 718 + update_stats(&pair->diff.stats, val); 719 + } 725 720 } 726 721 } 727 722 ··· 1300 1255 "Show period values."), 1301 1256 OPT_BOOLEAN('F', "formula", &show_formula, 1302 1257 "Show formula."), 1258 + OPT_BOOLEAN(0, "cycles-hist", &cycles_hist, 1259 + "Show cycles histogram and standard deviation " 1260 + "- WARNING: use only with -c cycles."), 1303 1261 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1304 1262 "dump raw trace in ASCII"), 1305 1263 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), ··· 1510 1462 return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); 1511 1463 } 1512 1464 1465 + static int all_zero(unsigned long *vals, int len) 1466 + { 1467 + int i; 1468 + 1469 + for (i = 0; i < len; i++) 1470 + if (vals[i] != 0) 1471 + return 0; 1472 + return 1; 1473 + } 1474 + 1475 + static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n) 1476 + { 1477 + int printed; 1478 + 1479 + if (n <= 1) 1480 + return 0; 1481 + 1482 + if (n > NUM_SPARKS) 1483 + n = NUM_SPARKS; 1484 + if (all_zero(svals, n)) 1485 + return 0; 1486 + 1487 + printed = print_spark(bf, size, svals, n); 1488 + printed += scnprintf(bf + printed, size - printed, " "); 1489 + return printed; 1490 + } 1491 + 1492 + static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt, 1493 + struct perf_hpp *hpp, struct hist_entry *he) 1494 + { 1495 + struct diff_hpp_fmt *dfmt = 1496 + container_of(fmt, struct diff_hpp_fmt, fmt); 1497 + struct hist_entry *pair = get_pair_fmt(he, dfmt); 1498 + struct block_hist *bh = container_of(he, struct block_hist, he); 1499 + struct block_hist *bh_pair; 1500 + struct hist_entry *block_he; 1501 + char spark[32], buf[128]; 1502 + double r; 1503 + int ret, pad; 1504 + 1505 + if (!pair) { 1506 + if (bh->block_idx) 1507 + hpp->skip = true; 1508 + 1509 + goto no_print; 1510 + } 1511 + 1512 + bh_pair = container_of(pair, struct block_hist, he); 1513 + 1514 + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); 1515 + if (!block_he) { 1516 + hpp->skip = true; 1517 + goto no_print; 1518 + } 1519 + 1520 + ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals, 1521 + block_he->diff.stats.n); 1522 + 1523 + r = rel_stddev_stats(stddev_stats(&block_he->diff.stats), 1524 + avg_stats(&block_he->diff.stats)); 1525 + 1526 + if (ret) { 1527 + /* 1528 + * Padding spaces if number of sparks less than NUM_SPARKS 1529 + * otherwise the output is not aligned. 1530 + */ 1531 + pad = NUM_SPARKS - ((ret - 1) / 3); 1532 + scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark); 1533 + ret = scnprintf(hpp->buf, hpp->size, "%*s", 1534 + dfmt->header_width, buf); 1535 + 1536 + if (pad) { 1537 + ret += scnprintf(hpp->buf + ret, hpp->size - ret, 1538 + "%-*s", pad, " "); 1539 + } 1540 + 1541 + return ret; 1542 + } 1543 + 1544 + no_print: 1545 + return scnprintf(hpp->buf, hpp->size, "%*s", 1546 + dfmt->header_width, " "); 1547 + } 1548 + 1513 1549 static void 1514 1550 hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) 1515 1551 { ··· 1799 1667 fmt->color = hpp__color_cycles; 1800 1668 fmt->sort = hist_entry__cmp_nop; 1801 1669 break; 1670 + case PERF_HPP_DIFF__CYCLES_HIST: 1671 + fmt->color = hpp__color_cycles_hist; 1672 + fmt->sort = hist_entry__cmp_nop; 1673 + break; 1802 1674 default: 1803 1675 fmt->sort = hist_entry__cmp_nop; 1804 1676 break; ··· 1828 1692 * PERF_HPP_DIFF__DELTA 1829 1693 * PERF_HPP_DIFF__RATIO 1830 1694 * PERF_HPP_DIFF__WEIGHTED_DIFF 1695 + * PERF_HPP_DIFF__CYCLES 1831 1696 */ 1832 1697 data__hpp_register(d, i ? compute_2_hpp[compute] : 1833 1698 PERF_HPP_DIFF__BASELINE); 1699 + 1700 + if (cycles_hist && i) 1701 + data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST); 1834 1702 1835 1703 /* 1836 1704 * And the rest: ··· 1989 1849 1990 1850 if (quiet) 1991 1851 perf_quiet_option(); 1852 + 1853 + if (cycles_hist && (compute != COMPUTE_CYCLES)) 1854 + usage_with_options(diff_usage, options); 1992 1855 1993 1856 symbol__annotation_init(); 1994 1857
+1
tools/perf/util/Build
··· 95 95 perf-y += call-path.o 96 96 perf-y += rwsem.o 97 97 perf-y += thread-stack.o 98 + perf-y += spark.o 98 99 perf-$(CONFIG_AUXTRACE) += auxtrace.o 99 100 perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 100 101 perf-$(CONFIG_AUXTRACE) += intel-pt.o
+4
tools/perf/util/annotate.c
··· 853 853 ch[offset].start < start) 854 854 return 0; 855 855 } 856 + 857 + if (ch[offset].num < NUM_SPARKS) 858 + ch[offset].cycles_spark[ch[offset].num] = cycles; 859 + 856 860 ch[offset].have_start = have_start; 857 861 ch[offset].start = start; 858 862 ch[offset].cycles += cycles;
+2
tools/perf/util/annotate.h
··· 11 11 #include <pthread.h> 12 12 #include <asm/bug.h> 13 13 #include "symbol_conf.h" 14 + #include "spark.h" 14 15 15 16 struct hist_browser_timer; 16 17 struct hist_entry; ··· 236 235 u64 cycles_aggr; 237 236 u64 cycles_max; 238 237 u64 cycles_min; 238 + s64 cycles_spark[NUM_SPARKS]; 239 239 u32 num; 240 240 u32 num_aggr; 241 241 u8 have_start;
+4
tools/perf/util/sort.h
··· 10 10 #include "callchain.h" 11 11 #include "values.h" 12 12 #include "hist.h" 13 + #include "stat.h" 14 + #include "spark.h" 13 15 14 16 struct option; 15 17 struct thread; ··· 73 71 /* PERF_HPP_DIFF__CYCLES */ 74 72 s64 cycles; 75 73 }; 74 + struct stats stats; 75 + unsigned long svals[NUM_SPARKS]; 76 76 }; 77 77 78 78 struct hist_entry_ops {
+34
tools/perf/util/spark.c
··· 1 + #include <stdio.h> 2 + #include <limits.h> 3 + #include <string.h> 4 + #include <stdlib.h> 5 + #include "spark.h" 6 + #include "stat.h" 7 + 8 + #define SPARK_SHIFT 8 9 + 10 + /* Print spark lines on outf for numval values in val. */ 11 + int print_spark(char *bf, int size, unsigned long *val, int numval) 12 + { 13 + static const char *ticks[NUM_SPARKS] = { 14 + "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█" 15 + }; 16 + int i, printed = 0; 17 + unsigned long min = ULONG_MAX, max = 0, f; 18 + 19 + for (i = 0; i < numval; i++) { 20 + if (val[i] < min) 21 + min = val[i]; 22 + if (val[i] > max) 23 + max = val[i]; 24 + } 25 + f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1); 26 + if (f < 1) 27 + f = 1; 28 + for (i = 0; i < numval; i++) { 29 + printed += scnprintf(bf + printed, size - printed, "%s", 30 + ticks[((val[i] - min) << SPARK_SHIFT) / f]); 31 + } 32 + 33 + return printed; 34 + }
+8
tools/perf/util/spark.h
··· 1 + #ifndef SPARK_H 2 + #define SPARK_H 1 3 + 4 + #define NUM_SPARKS 8 5 + 6 + int print_spark(char *bf, int size, unsigned long *val, int numval); 7 + 8 + #endif
+2
tools/perf/util/symbol.h
··· 11 11 #include <stdio.h> 12 12 #include "path.h" 13 13 #include "symbol_conf.h" 14 + #include "spark.h" 14 15 15 16 #ifdef HAVE_LIBELF_SUPPORT 16 17 #include <libelf.h> ··· 112 111 u64 end; 113 112 u64 cycles; 114 113 u64 cycles_aggr; 114 + s64 cycles_spark[NUM_SPARKS]; 115 115 int num; 116 116 int num_aggr; 117 117 refcount_t refcnt;