perf evsel: Support perf_evsel__open(cpus > 1 && threads > 1)

And a test for it:

[acme@felicio linux]$ perf test
1: vmlinux symtab matches kallsyms: Ok
2: detect open syscall event: Ok
3: detect open syscall event on all cpus: Ok
[acme@felicio linux]$

Translating C the test does:

1. generates different number of open syscalls on each CPU
by using sched_setaffinity
2. Verifies that the expected number of events is generated
on each CPU

It works as expected.

LKML-Reference: <new-submission>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

+166 -40
+110
tools/perf/builtin-test.c
··· 234 return err; 235 } 236 237 #include "util/evsel.h" 238 #include <sys/types.h> 239 ··· 322 return err; 323 } 324 325 static struct test { 326 const char *desc; 327 int (*func)(void); ··· 438 { 439 .desc = "detect open syscall event", 440 .func = test__open_syscall_event, 441 }, 442 { 443 .func = NULL,
··· 234 return err; 235 } 236 237 + #include "util/cpumap.h" 238 #include "util/evsel.h" 239 #include <sys/types.h> 240 ··· 321 return err; 322 } 323 324 + #include <sched.h> 325 + 326 + static int test__open_syscall_event_on_all_cpus(void) 327 + { 328 + int err = -1, fd, cpu; 329 + struct thread_map *threads; 330 + struct cpu_map *cpus; 331 + struct perf_evsel *evsel; 332 + struct perf_event_attr attr; 333 + unsigned int nr_open_calls = 111, i; 334 + cpu_set_t *cpu_set; 335 + size_t cpu_set_size; 336 + int id = trace_event__id("sys_enter_open"); 337 + 338 + if (id < 0) { 339 + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); 340 + return -1; 341 + } 342 + 343 + threads = thread_map__new(-1, getpid()); 344 + if (threads == NULL) { 345 + pr_debug("thread_map__new\n"); 346 + return -1; 347 + } 348 + 349 + cpus = cpu_map__new(NULL); 350 + if (threads == NULL) { 351 + pr_debug("thread_map__new\n"); 352 + return -1; 353 + } 354 + 355 + cpu_set = CPU_ALLOC(cpus->nr); 356 + 357 + if (cpu_set == NULL) 358 + goto out_thread_map_delete; 359 + 360 + cpu_set_size = CPU_ALLOC_SIZE(cpus->nr); 361 + CPU_ZERO_S(cpu_set_size, cpu_set); 362 + 363 + memset(&attr, 0, sizeof(attr)); 364 + attr.type = PERF_TYPE_TRACEPOINT; 365 + attr.config = id; 366 + evsel = perf_evsel__new(&attr, 0); 367 + if (evsel == NULL) { 368 + pr_debug("perf_evsel__new\n"); 369 + goto out_cpu_free; 370 + } 371 + 372 + if (perf_evsel__open(evsel, cpus, threads) < 0) { 373 + pr_debug("failed to open counter: %s, " 374 + "tweak /proc/sys/kernel/perf_event_paranoid?\n", 375 + strerror(errno)); 376 + goto out_evsel_delete; 377 + } 378 + 379 + for (cpu = 0; cpu < cpus->nr; ++cpu) { 380 + unsigned int ncalls = nr_open_calls + cpu; 381 + 382 + CPU_SET(cpu, cpu_set); 383 + sched_setaffinity(0, cpu_set_size, cpu_set); 384 + for (i = 0; i < ncalls; ++i) { 385 + fd = open("/etc/passwd", O_RDONLY); 386 + close(fd); 387 + } 388 + CPU_CLR(cpu, cpu_set); 389 + } 390 + 391 + /* 392 + * Here we need to explicitely preallocate the counts, as if 393 + * we use the auto allocation it will allocate just for 1 cpu, 394 + * as we start by cpu 0. 395 + */ 396 + if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { 397 + pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 398 + goto out_close_fd; 399 + } 400 + 401 + for (cpu = 0; cpu < cpus->nr; ++cpu) { 402 + unsigned int expected; 403 + 404 + if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { 405 + pr_debug("perf_evsel__open_read_on_cpu\n"); 406 + goto out_close_fd; 407 + } 408 + 409 + expected = nr_open_calls + cpu; 410 + if (evsel->counts->cpu[cpu].val != expected) { 411 + pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n", 412 + expected, cpu, evsel->counts->cpu[cpu].val); 413 + goto out_close_fd; 414 + } 415 + } 416 + 417 + err = 0; 418 + out_close_fd: 419 + perf_evsel__close_fd(evsel, 1, threads->nr); 420 + out_evsel_delete: 421 + perf_evsel__delete(evsel); 422 + out_cpu_free: 423 + CPU_FREE(cpu_set); 424 + out_thread_map_delete: 425 + thread_map__delete(threads); 426 + return err; 427 + } 428 + 429 static struct test { 430 const char *desc; 431 int (*func)(void); ··· 332 { 333 .desc = "detect open syscall event", 334 .func = test__open_syscall_event, 335 + }, 336 + { 337 + .desc = "detect open syscall event on all cpus", 338 + .func = test__open_syscall_event_on_all_cpus, 339 }, 340 { 341 .func = NULL,
+56 -40
tools/perf/util/evsel.c
··· 127 return 0; 128 } 129 130 - int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 131 { 132 - int cpu; 133 134 - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0) 135 return -1; 136 137 for (cpu = 0; cpu < cpus->nr; cpu++) { 138 - FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1, 139 - cpus->map[cpu], -1, 0); 140 - if (FD(evsel, cpu, 0) < 0) 141 - goto out_close; 142 } 143 144 return 0; 145 146 out_close: 147 - while (--cpu >= 0) { 148 - close(FD(evsel, cpu, 0)); 149 - FD(evsel, cpu, 0) = -1; 150 - } 151 return -1; 152 } 153 154 int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 155 { 156 - int thread; 157 - 158 - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr)) 159 - return -1; 160 - 161 - for (thread = 0; thread < threads->nr; thread++) { 162 - FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr, 163 - threads->map[thread], -1, -1, 0); 164 - if (FD(evsel, 0, thread) < 0) 165 - goto out_close; 166 - } 167 - 168 - return 0; 169 - 170 - out_close: 171 - while (--thread >= 0) { 172 - close(FD(evsel, 0, thread)); 173 - FD(evsel, 0, thread) = -1; 174 - } 175 - return -1; 176 - } 177 - 178 - int perf_evsel__open(struct perf_evsel *evsel, 179 - struct cpu_map *cpus, struct thread_map *threads) 180 - { 181 - if (threads == NULL) 182 - return perf_evsel__open_per_cpu(evsel, cpus); 183 - 184 - return perf_evsel__open_per_thread(evsel, threads); 185 }
··· 127 return 0; 128 } 129 130 + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 131 + struct thread_map *threads) 132 { 133 + int cpu, thread; 134 135 + if (evsel->fd == NULL && 136 + perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 137 return -1; 138 139 for (cpu = 0; cpu < cpus->nr; cpu++) { 140 + for (thread = 0; thread < threads->nr; thread++) { 141 + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 142 + threads->map[thread], 143 + cpus->map[cpu], -1, 0); 144 + if (FD(evsel, cpu, thread) < 0) 145 + goto out_close; 146 + } 147 } 148 149 return 0; 150 151 out_close: 152 + do { 153 + while (--thread >= 0) { 154 + close(FD(evsel, cpu, thread)); 155 + FD(evsel, cpu, thread) = -1; 156 + } 157 + thread = threads->nr; 158 + } while (--cpu >= 0); 159 return -1; 160 + } 161 + 162 + static struct { 163 + struct cpu_map map; 164 + int cpus[1]; 165 + } empty_cpu_map = { 166 + .map.nr = 1, 167 + .cpus = { -1, }, 168 + }; 169 + 170 + static struct { 171 + struct thread_map map; 172 + int threads[1]; 173 + } empty_thread_map = { 174 + .map.nr = 1, 175 + .threads = { -1, }, 176 + }; 177 + 178 + int perf_evsel__open(struct perf_evsel *evsel, 179 + struct cpu_map *cpus, struct thread_map *threads) 180 + { 181 + 182 + if (cpus == NULL) { 183 + /* Work around old compiler warnings about strict aliasing */ 184 + cpus = &empty_cpu_map.map; 185 + } 186 + 187 + if (threads == NULL) 188 + threads = &empty_thread_map.map; 189 + 190 + return __perf_evsel__open(evsel, cpus, threads); 191 + } 192 + 193 + int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 194 + { 195 + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); 196 } 197 198 int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 199 { 200 + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); 201 }