at v5.5-rc5 641 lines 15 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2#include <perf/evlist.h> 3#include <perf/evsel.h> 4#include <linux/bitops.h> 5#include <linux/list.h> 6#include <linux/hash.h> 7#include <sys/ioctl.h> 8#include <internal/evlist.h> 9#include <internal/evsel.h> 10#include <internal/xyarray.h> 11#include <internal/mmap.h> 12#include <internal/cpumap.h> 13#include <internal/threadmap.h> 14#include <internal/xyarray.h> 15#include <internal/lib.h> 16#include <linux/zalloc.h> 17#include <sys/ioctl.h> 18#include <stdlib.h> 19#include <errno.h> 20#include <unistd.h> 21#include <fcntl.h> 22#include <signal.h> 23#include <poll.h> 24#include <sys/mman.h> 25#include <perf/cpumap.h> 26#include <perf/threadmap.h> 27#include <api/fd/array.h> 28 29void perf_evlist__init(struct perf_evlist *evlist) 30{ 31 int i; 32 33 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 34 INIT_HLIST_HEAD(&evlist->heads[i]); 35 INIT_LIST_HEAD(&evlist->entries); 36 evlist->nr_entries = 0; 37 fdarray__init(&evlist->pollfd, 64); 38} 39 40static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 41 struct perf_evsel *evsel) 42{ 43 /* 44 * We already have cpus for evsel (via PMU sysfs) so 45 * keep it, if there's no target cpu list defined. 46 */ 47 if (!evsel->own_cpus || evlist->has_user_cpus) { 48 perf_cpu_map__put(evsel->cpus); 49 evsel->cpus = perf_cpu_map__get(evlist->cpus); 50 } else if (evsel->cpus != evsel->own_cpus) { 51 perf_cpu_map__put(evsel->cpus); 52 evsel->cpus = perf_cpu_map__get(evsel->own_cpus); 53 } 54 55 perf_thread_map__put(evsel->threads); 56 evsel->threads = perf_thread_map__get(evlist->threads); 57 evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); 58} 59 60static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 61{ 62 struct perf_evsel *evsel; 63 64 perf_evlist__for_each_evsel(evlist, evsel) 65 __perf_evlist__propagate_maps(evlist, evsel); 66} 67 68void perf_evlist__add(struct perf_evlist *evlist, 69 struct perf_evsel *evsel) 70{ 71 list_add_tail(&evsel->node, &evlist->entries); 72 evlist->nr_entries += 1; 73 __perf_evlist__propagate_maps(evlist, evsel); 74} 75 76void perf_evlist__remove(struct perf_evlist *evlist, 77 struct perf_evsel *evsel) 78{ 79 list_del_init(&evsel->node); 80 evlist->nr_entries -= 1; 81} 82 83struct perf_evlist *perf_evlist__new(void) 84{ 85 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 86 87 if (evlist != NULL) 88 perf_evlist__init(evlist); 89 90 return evlist; 91} 92 93struct perf_evsel * 94perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) 95{ 96 struct perf_evsel *next; 97 98 if (!prev) { 99 next = list_first_entry(&evlist->entries, 100 struct perf_evsel, 101 node); 102 } else { 103 next = list_next_entry(prev, node); 104 } 105 106 /* Empty list is noticed here so don't need checking on entry. */ 107 if (&next->node == &evlist->entries) 108 return NULL; 109 110 return next; 111} 112 113static void perf_evlist__purge(struct perf_evlist *evlist) 114{ 115 struct perf_evsel *pos, *n; 116 117 perf_evlist__for_each_entry_safe(evlist, n, pos) { 118 list_del_init(&pos->node); 119 perf_evsel__delete(pos); 120 } 121 122 evlist->nr_entries = 0; 123} 124 125void perf_evlist__exit(struct perf_evlist *evlist) 126{ 127 perf_cpu_map__put(evlist->cpus); 128 perf_thread_map__put(evlist->threads); 129 evlist->cpus = NULL; 130 evlist->threads = NULL; 131 fdarray__exit(&evlist->pollfd); 132} 133 134void perf_evlist__delete(struct perf_evlist *evlist) 135{ 136 if (evlist == NULL) 137 return; 138 139 perf_evlist__munmap(evlist); 140 perf_evlist__close(evlist); 141 perf_evlist__purge(evlist); 142 perf_evlist__exit(evlist); 143 free(evlist); 144} 145 146void perf_evlist__set_maps(struct perf_evlist *evlist, 147 struct perf_cpu_map *cpus, 148 struct perf_thread_map *threads) 149{ 150 /* 151 * Allow for the possibility that one or another of the maps isn't being 152 * changed i.e. don't put it. Note we are assuming the maps that are 153 * being applied are brand new and evlist is taking ownership of the 154 * original reference count of 1. If that is not the case it is up to 155 * the caller to increase the reference count. 156 */ 157 if (cpus != evlist->cpus) { 158 perf_cpu_map__put(evlist->cpus); 159 evlist->cpus = perf_cpu_map__get(cpus); 160 } 161 162 if (threads != evlist->threads) { 163 perf_thread_map__put(evlist->threads); 164 evlist->threads = perf_thread_map__get(threads); 165 } 166 167 perf_evlist__propagate_maps(evlist); 168} 169 170int perf_evlist__open(struct perf_evlist *evlist) 171{ 172 struct perf_evsel *evsel; 173 int err; 174 175 perf_evlist__for_each_entry(evlist, evsel) { 176 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 177 if (err < 0) 178 goto out_err; 179 } 180 181 return 0; 182 183out_err: 184 perf_evlist__close(evlist); 185 return err; 186} 187 188void perf_evlist__close(struct perf_evlist *evlist) 189{ 190 struct perf_evsel *evsel; 191 192 perf_evlist__for_each_entry_reverse(evlist, evsel) 193 perf_evsel__close(evsel); 194} 195 196void perf_evlist__enable(struct perf_evlist *evlist) 197{ 198 struct perf_evsel *evsel; 199 200 perf_evlist__for_each_entry(evlist, evsel) 201 perf_evsel__enable(evsel); 202} 203 204void perf_evlist__disable(struct perf_evlist *evlist) 205{ 206 struct perf_evsel *evsel; 207 208 perf_evlist__for_each_entry(evlist, evsel) 209 perf_evsel__disable(evsel); 210} 211 212u64 perf_evlist__read_format(struct perf_evlist *evlist) 213{ 214 struct perf_evsel *first = perf_evlist__first(evlist); 215 216 return first->attr.read_format; 217} 218 219#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 220 221static void perf_evlist__id_hash(struct perf_evlist *evlist, 222 struct perf_evsel *evsel, 223 int cpu, int thread, u64 id) 224{ 225 int hash; 226 struct perf_sample_id *sid = SID(evsel, cpu, thread); 227 228 sid->id = id; 229 sid->evsel = evsel; 230 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 231 hlist_add_head(&sid->node, &evlist->heads[hash]); 232} 233 234void perf_evlist__id_add(struct perf_evlist *evlist, 235 struct perf_evsel *evsel, 236 int cpu, int thread, u64 id) 237{ 238 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 239 evsel->id[evsel->ids++] = id; 240} 241 242int perf_evlist__id_add_fd(struct perf_evlist *evlist, 243 struct perf_evsel *evsel, 244 int cpu, int thread, int fd) 245{ 246 u64 read_data[4] = { 0, }; 247 int id_idx = 1; /* The first entry is the counter value */ 248 u64 id; 249 int ret; 250 251 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 252 if (!ret) 253 goto add; 254 255 if (errno != ENOTTY) 256 return -1; 257 258 /* Legacy way to get event id.. All hail to old kernels! */ 259 260 /* 261 * This way does not work with group format read, so bail 262 * out in that case. 263 */ 264 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 265 return -1; 266 267 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 268 read(fd, &read_data, sizeof(read_data)) == -1) 269 return -1; 270 271 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 272 ++id_idx; 273 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 274 ++id_idx; 275 276 id = read_data[id_idx]; 277 278add: 279 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 280 return 0; 281} 282 283int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 284{ 285 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 286 int nr_threads = perf_thread_map__nr(evlist->threads); 287 int nfds = 0; 288 struct perf_evsel *evsel; 289 290 perf_evlist__for_each_entry(evlist, evsel) { 291 if (evsel->system_wide) 292 nfds += nr_cpus; 293 else 294 nfds += nr_cpus * nr_threads; 295 } 296 297 if (fdarray__available_entries(&evlist->pollfd) < nfds && 298 fdarray__grow(&evlist->pollfd, nfds) < 0) 299 return -ENOMEM; 300 301 return 0; 302} 303 304int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 305 void *ptr, short revent) 306{ 307 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 308 309 if (pos >= 0) { 310 evlist->pollfd.priv[pos].ptr = ptr; 311 fcntl(fd, F_SETFL, O_NONBLOCK); 312 } 313 314 return pos; 315} 316 317static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 318 void *arg __maybe_unused) 319{ 320 struct perf_mmap *map = fda->priv[fd].ptr; 321 322 if (map) 323 perf_mmap__put(map); 324} 325 326int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 327{ 328 return fdarray__filter(&evlist->pollfd, revents_and_mask, 329 perf_evlist__munmap_filtered, NULL); 330} 331 332int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 333{ 334 return fdarray__poll(&evlist->pollfd, timeout); 335} 336 337static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) 338{ 339 int i; 340 struct perf_mmap *map; 341 342 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 343 if (!map) 344 return NULL; 345 346 for (i = 0; i < evlist->nr_mmaps; i++) { 347 struct perf_mmap *prev = i ? &map[i - 1] : NULL; 348 349 /* 350 * When the perf_mmap() call is made we grab one refcount, plus 351 * one extra to let perf_mmap__consume() get the last 352 * events after all real references (perf_mmap__get()) are 353 * dropped. 354 * 355 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 356 * thus does perf_mmap__get() on it. 357 */ 358 perf_mmap__init(&map[i], prev, overwrite, NULL); 359 } 360 361 return map; 362} 363 364static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 365 struct perf_evsel *evsel, int idx, int cpu, 366 int thread) 367{ 368 struct perf_sample_id *sid = SID(evsel, cpu, thread); 369 370 sid->idx = idx; 371 if (evlist->cpus && cpu >= 0) 372 sid->cpu = evlist->cpus->map[cpu]; 373 else 374 sid->cpu = -1; 375 if (!evsel->system_wide && evlist->threads && thread >= 0) 376 sid->tid = perf_thread_map__pid(evlist->threads, thread); 377 else 378 sid->tid = -1; 379} 380 381static struct perf_mmap* 382perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) 383{ 384 struct perf_mmap *maps; 385 386 maps = overwrite ? evlist->mmap_ovw : evlist->mmap; 387 388 if (!maps) { 389 maps = perf_evlist__alloc_mmap(evlist, overwrite); 390 if (!maps) 391 return NULL; 392 393 if (overwrite) 394 evlist->mmap_ovw = maps; 395 else 396 evlist->mmap = maps; 397 } 398 399 return &maps[idx]; 400} 401 402#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) 403 404static int 405perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 406 int output, int cpu) 407{ 408 return perf_mmap__mmap(map, mp, output, cpu); 409} 410 411static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map, 412 bool overwrite) 413{ 414 if (overwrite) 415 evlist->mmap_ovw_first = map; 416 else 417 evlist->mmap_first = map; 418} 419 420static int 421mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 422 int idx, struct perf_mmap_param *mp, int cpu_idx, 423 int thread, int *_output, int *_output_overwrite) 424{ 425 int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); 426 struct perf_evsel *evsel; 427 int revent; 428 429 perf_evlist__for_each_entry(evlist, evsel) { 430 bool overwrite = evsel->attr.write_backward; 431 struct perf_mmap *map; 432 int *output, fd, cpu; 433 434 if (evsel->system_wide && thread) 435 continue; 436 437 cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); 438 if (cpu == -1) 439 continue; 440 441 map = ops->get(evlist, overwrite, idx); 442 if (map == NULL) 443 return -ENOMEM; 444 445 if (overwrite) { 446 mp->prot = PROT_READ; 447 output = _output_overwrite; 448 } else { 449 mp->prot = PROT_READ | PROT_WRITE; 450 output = _output; 451 } 452 453 fd = FD(evsel, cpu, thread); 454 455 if (*output == -1) { 456 *output = fd; 457 458 /* 459 * The last one will be done at perf_mmap__consume(), so that we 460 * make sure we don't prevent tools from consuming every last event in 461 * the ring buffer. 462 * 463 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 464 * anymore, but the last events for it are still in the ring buffer, 465 * waiting to be consumed. 466 * 467 * Tools can chose to ignore this at their own discretion, but the 468 * evlist layer can't just drop it when filtering events in 469 * perf_evlist__filter_pollfd(). 470 */ 471 refcount_set(&map->refcnt, 2); 472 473 if (ops->mmap(map, mp, *output, evlist_cpu) < 0) 474 return -1; 475 476 if (!idx) 477 perf_evlist__set_mmap_first(evlist, map, overwrite); 478 } else { 479 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 480 return -1; 481 482 perf_mmap__get(map); 483 } 484 485 revent = !overwrite ? POLLIN : 0; 486 487 if (!evsel->system_wide && 488 perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { 489 perf_mmap__put(map); 490 return -1; 491 } 492 493 if (evsel->attr.read_format & PERF_FORMAT_ID) { 494 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 495 fd) < 0) 496 return -1; 497 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 498 thread); 499 } 500 } 501 502 return 0; 503} 504 505static int 506mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 507 struct perf_mmap_param *mp) 508{ 509 int thread; 510 int nr_threads = perf_thread_map__nr(evlist->threads); 511 512 for (thread = 0; thread < nr_threads; thread++) { 513 int output = -1; 514 int output_overwrite = -1; 515 516 if (ops->idx) 517 ops->idx(evlist, mp, thread, false); 518 519 if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, 520 &output, &output_overwrite)) 521 goto out_unmap; 522 } 523 524 return 0; 525 526out_unmap: 527 perf_evlist__munmap(evlist); 528 return -1; 529} 530 531static int 532mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 533 struct perf_mmap_param *mp) 534{ 535 int nr_threads = perf_thread_map__nr(evlist->threads); 536 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 537 int cpu, thread; 538 539 for (cpu = 0; cpu < nr_cpus; cpu++) { 540 int output = -1; 541 int output_overwrite = -1; 542 543 if (ops->idx) 544 ops->idx(evlist, mp, cpu, true); 545 546 for (thread = 0; thread < nr_threads; thread++) { 547 if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, 548 thread, &output, &output_overwrite)) 549 goto out_unmap; 550 } 551 } 552 553 return 0; 554 555out_unmap: 556 perf_evlist__munmap(evlist); 557 return -1; 558} 559 560static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) 561{ 562 int nr_mmaps; 563 564 nr_mmaps = perf_cpu_map__nr(evlist->cpus); 565 if (perf_cpu_map__empty(evlist->cpus)) 566 nr_mmaps = perf_thread_map__nr(evlist->threads); 567 568 return nr_mmaps; 569} 570 571int perf_evlist__mmap_ops(struct perf_evlist *evlist, 572 struct perf_evlist_mmap_ops *ops, 573 struct perf_mmap_param *mp) 574{ 575 struct perf_evsel *evsel; 576 const struct perf_cpu_map *cpus = evlist->cpus; 577 const struct perf_thread_map *threads = evlist->threads; 578 579 if (!ops || !ops->get || !ops->mmap) 580 return -EINVAL; 581 582 mp->mask = evlist->mmap_len - page_size - 1; 583 584 evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist); 585 586 perf_evlist__for_each_entry(evlist, evsel) { 587 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 588 evsel->sample_id == NULL && 589 perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 590 return -ENOMEM; 591 } 592 593 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 594 return -ENOMEM; 595 596 if (perf_cpu_map__empty(cpus)) 597 return mmap_per_thread(evlist, ops, mp); 598 599 return mmap_per_cpu(evlist, ops, mp); 600} 601 602int perf_evlist__mmap(struct perf_evlist *evlist, int pages) 603{ 604 struct perf_mmap_param mp; 605 struct perf_evlist_mmap_ops ops = { 606 .get = perf_evlist__mmap_cb_get, 607 .mmap = perf_evlist__mmap_cb_mmap, 608 }; 609 610 evlist->mmap_len = (pages + 1) * page_size; 611 612 return perf_evlist__mmap_ops(evlist, &ops, &mp); 613} 614 615void perf_evlist__munmap(struct perf_evlist *evlist) 616{ 617 int i; 618 619 if (evlist->mmap) { 620 for (i = 0; i < evlist->nr_mmaps; i++) 621 perf_mmap__munmap(&evlist->mmap[i]); 622 } 623 624 if (evlist->mmap_ovw) { 625 for (i = 0; i < evlist->nr_mmaps; i++) 626 perf_mmap__munmap(&evlist->mmap_ovw[i]); 627 } 628 629 zfree(&evlist->mmap); 630 zfree(&evlist->mmap_ovw); 631} 632 633struct perf_mmap* 634perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, 635 bool overwrite) 636{ 637 if (map) 638 return map->next; 639 640 return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; 641}