Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf bench: Add benchmark for evlist open/close operations

This new benchmark finds the total time that is taken to open, mmap,
enable, disable, munmap, close an evlist (time taken for new,
create_maps, config, delete is not counted in).

The evlist can be configured as in perf-record using the
-a,-C,-e,-u,--per-thread,-t,-p options.

The events can be duplicated in the evlist to quickly test performance
with many events using the -n options.

Furthermore, also the number of iterations used to calculate the
statistics is customizable.

Examples:
- Open one dummy event system-wide:

$ sudo ./perf bench internals evlist-open-close
Number of cpus: 4
Number of threads: 1
Number of events: 1 (4 fds)
Number of iterations: 100
Average open-close took: 613.870 usec (+- 32.852 usec)

- Open the group '{cs,cycles}' on CPU 0

$ sudo ./perf bench internals evlist-open-close -e '{cs,cycles}' -C 0
Number of cpus: 1
Number of threads: 1
Number of events: 2 (2 fds)
Number of iterations: 100
Average open-close took: 8503.220 usec (+- 252.652 usec)

- Open 10 'cycles' events for user 0, calculate average over 100 runs

$ sudo ./perf bench internals evlist-open-close -e cycles -n 10 -u 0 -i 100
Number of cpus: 4
Number of threads: 328
Number of events: 10 (13120 fds)
Number of iterations: 100
Average open-close took: 180043.140 usec (+- 2295.889 usec)

Committer notes:

Replaced a deprecated bzero() call with designated initialized zeroing.

Added some missing evlist allocation checks, one noted by Riccardo on
the mailing list.

Minor cosmetic changes (sent in private).

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210809201101.277594-1-rickyman7@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Riccardo Mancini and committed by
Arnaldo Carvalho de Melo
4241eabf f2c24eba

+260
+1
tools/perf/bench/Build
··· 13 13 perf-y += kallsyms-parse.o 14 14 perf-y += find-bit-bench.o 15 15 perf-y += inject-buildid.o 16 + perf-y += evlist-open-close.o 16 17 17 18 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o 18 19 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+1
tools/perf/bench/bench.h
··· 48 48 int bench_synthesize(int argc, const char **argv); 49 49 int bench_kallsyms_parse(int argc, const char **argv); 50 50 int bench_inject_build_id(int argc, const char **argv); 51 + int bench_evlist_open_close(int argc, const char **argv); 51 52 52 53 #define BENCH_FORMAT_DEFAULT_STR "default" 53 54 #define BENCH_FORMAT_DEFAULT 0
+257
tools/perf/bench/evlist-open-close.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdio.h> 3 + #include <stdlib.h> 4 + #include <limits.h> 5 + #include "bench.h" 6 + #include "../util/debug.h" 7 + #include "../util/stat.h" 8 + #include "../util/evlist.h" 9 + #include "../util/evsel.h" 10 + #include "../util/strbuf.h" 11 + #include "../util/record.h" 12 + #include "../util/parse-events.h" 13 + #include "internal/threadmap.h" 14 + #include "internal/cpumap.h" 15 + #include <linux/perf_event.h> 16 + #include <linux/kernel.h> 17 + #include <linux/time64.h> 18 + #include <linux/string.h> 19 + #include <subcmd/parse-options.h> 20 + 21 + #define MMAP_FLUSH_DEFAULT 1 22 + 23 + static int iterations = 100; 24 + static int nr_events = 1; 25 + static const char *event_string = "dummy"; 26 + 27 + static struct record_opts opts = { 28 + .sample_time = true, 29 + .mmap_pages = UINT_MAX, 30 + .user_freq = UINT_MAX, 31 + .user_interval = ULLONG_MAX, 32 + .freq = 4000, 33 + .target = { 34 + .uses_mmap = true, 35 + .default_per_cpu = true, 36 + }, 37 + .mmap_flush = MMAP_FLUSH_DEFAULT, 38 + .nr_threads_synthesize = 1, 39 + .ctl_fd = -1, 40 + .ctl_fd_ack = -1, 41 + }; 42 + 43 + static const struct option options[] = { 44 + OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"), 45 + OPT_INTEGER('n', "nr-events", &nr_events, 46 + "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"), 47 + OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"), 48 + OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"), 49 + OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"), 50 + OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"), 51 + OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"), 52 + OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"), 53 + OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), 54 + OPT_END() 55 + }; 56 + 57 + static const char *const bench_usage[] = { 58 + "perf bench internals evlist-open-close <options>", 59 + NULL 60 + }; 61 + 62 + static int evlist__count_evsel_fds(struct evlist *evlist) 63 + { 64 + struct evsel *evsel; 65 + int cnt = 0; 66 + 67 + evlist__for_each_entry(evlist, evsel) 68 + cnt += evsel->core.threads->nr * evsel->core.cpus->nr; 69 + 70 + return cnt; 71 + } 72 + 73 + static struct evlist *bench__create_evlist(char *evstr) 74 + { 75 + struct parse_events_error err = { .idx = 0, }; 76 + struct evlist *evlist = evlist__new(); 77 + int ret; 78 + 79 + if (!evlist) { 80 + pr_err("Not enough memory to create evlist\n"); 81 + return NULL; 82 + } 83 + 84 + ret = parse_events(evlist, evstr, &err); 85 + if (ret) { 86 + parse_events_print_error(&err, evstr); 87 + pr_err("Run 'perf list' for a list of valid events\n"); 88 + ret = 1; 89 + goto out_delete_evlist; 90 + } 91 + 92 + ret = evlist__create_maps(evlist, &opts.target); 93 + if (ret < 0) { 94 + pr_err("Not enough memory to create thread/cpu maps\n"); 95 + goto out_delete_evlist; 96 + } 97 + 98 + evlist__config(evlist, &opts, NULL); 99 + 100 + return evlist; 101 + 102 + out_delete_evlist: 103 + evlist__delete(evlist); 104 + return NULL; 105 + } 106 + 107 + static int bench__do_evlist_open_close(struct evlist *evlist) 108 + { 109 + char sbuf[STRERR_BUFSIZE]; 110 + int err = evlist__open(evlist); 111 + 112 + if (err < 0) { 113 + pr_err("evlist__open: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); 114 + return err; 115 + } 116 + 117 + err = evlist__mmap(evlist, opts.mmap_pages); 118 + if (err < 0) { 119 + pr_err("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); 120 + return err; 121 + } 122 + 123 + evlist__enable(evlist); 124 + evlist__disable(evlist); 125 + evlist__munmap(evlist); 126 + evlist__close(evlist); 127 + 128 + return 0; 129 + } 130 + 131 + static int bench_evlist_open_close__run(char *evstr) 132 + { 133 + // used to print statistics only 134 + struct evlist *evlist = bench__create_evlist(evstr); 135 + double time_average, time_stddev; 136 + struct timeval start, end, diff; 137 + struct stats time_stats; 138 + u64 runtime_us; 139 + int i, err; 140 + 141 + if (!evlist) 142 + return -ENOMEM; 143 + 144 + init_stats(&time_stats); 145 + 146 + printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr); 147 + printf(" Number of threads:\t%d\n", evlist->core.threads->nr); 148 + printf(" Number of events:\t%d (%d fds)\n", 149 + evlist->core.nr_entries, evlist__count_evsel_fds(evlist)); 150 + printf(" Number of iterations:\t%d\n", iterations); 151 + 152 + evlist__delete(evlist); 153 + 154 + for (i = 0; i < iterations; i++) { 155 + pr_debug("Started iteration %d\n", i); 156 + evlist = bench__create_evlist(evstr); 157 + if (!evlist) 158 + return -ENOMEM; 159 + 160 + gettimeofday(&start, NULL); 161 + err = bench__do_evlist_open_close(evlist); 162 + if (err) { 163 + evlist__delete(evlist); 164 + return err; 165 + } 166 + 167 + gettimeofday(&end, NULL); 168 + timersub(&end, &start, &diff); 169 + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 170 + update_stats(&time_stats, runtime_us); 171 + 172 + evlist__delete(evlist); 173 + pr_debug("Iteration %d took:\t%ldus\n", i, runtime_us); 174 + } 175 + 176 + time_average = avg_stats(&time_stats); 177 + time_stddev = stddev_stats(&time_stats); 178 + printf(" Average open-close took: %.3f usec (+- %.3f usec)\n", time_average, time_stddev); 179 + 180 + return 0; 181 + } 182 + 183 + static char *bench__repeat_event_string(const char *evstr, int n) 184 + { 185 + char sbuf[STRERR_BUFSIZE]; 186 + struct strbuf buf; 187 + int i, str_size = strlen(evstr), 188 + final_size = str_size * n + n, 189 + err = strbuf_init(&buf, final_size); 190 + 191 + if (err) { 192 + pr_err("strbuf_init: %s\n", str_error_r(err, sbuf, sizeof(sbuf))); 193 + goto out_error; 194 + } 195 + 196 + for (i = 0; i < n; i++) { 197 + err = strbuf_add(&buf, evstr, str_size); 198 + if (err) { 199 + pr_err("strbuf_add: %s\n", str_error_r(err, sbuf, sizeof(sbuf))); 200 + goto out_error; 201 + } 202 + 203 + err = strbuf_addch(&buf, i == n-1 ? '\0' : ','); 204 + if (err) { 205 + pr_err("strbuf_addch: %s\n", str_error_r(err, sbuf, sizeof(sbuf))); 206 + goto out_error; 207 + } 208 + } 209 + 210 + return strbuf_detach(&buf, NULL); 211 + 212 + out_error: 213 + strbuf_release(&buf); 214 + return NULL; 215 + } 216 + 217 + 218 + int bench_evlist_open_close(int argc, const char **argv) 219 + { 220 + char *evstr, errbuf[BUFSIZ]; 221 + int err; 222 + 223 + argc = parse_options(argc, argv, options, bench_usage, 0); 224 + if (argc) { 225 + usage_with_options(bench_usage, options); 226 + exit(EXIT_FAILURE); 227 + } 228 + 229 + err = target__validate(&opts.target); 230 + if (err) { 231 + target__strerror(&opts.target, err, errbuf, sizeof(errbuf)); 232 + pr_err("%s\n", errbuf); 233 + goto out; 234 + } 235 + 236 + err = target__parse_uid(&opts.target); 237 + if (err) { 238 + target__strerror(&opts.target, err, errbuf, sizeof(errbuf)); 239 + pr_err("%s", errbuf); 240 + goto out; 241 + } 242 + 243 + /* Enable ignoring missing threads when -u/-p option is defined. */ 244 + opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid; 245 + 246 + evstr = bench__repeat_event_string(event_string, nr_events); 247 + if (!evstr) { 248 + err = -ENOMEM; 249 + goto out; 250 + } 251 + 252 + err = bench_evlist_open_close__run(evstr); 253 + 254 + free(evstr); 255 + out: 256 + return err; 257 + }
+1
tools/perf/builtin-bench.c
··· 88 88 { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, 89 89 { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse }, 90 90 { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id }, 91 + { "evlist-open-close", "Benchmark evlist open and close", bench_evlist_open_close }, 91 92 { NULL, NULL, NULL } 92 93 }; 93 94