at master 71 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * builtin-inject.c 4 * 5 * Builtin inject command: Examine the live mode (stdin) event stream 6 * and repipe it to stdout while optionally injecting additional 7 * events into it. 8 */ 9#include "builtin.h" 10 11#include "util/color.h" 12#include "util/dso.h" 13#include "util/vdso.h" 14#include "util/evlist.h" 15#include "util/evsel.h" 16#include "util/map.h" 17#include "util/session.h" 18#include "util/tool.h" 19#include "util/debug.h" 20#include "util/build-id.h" 21#include "util/data.h" 22#include "util/auxtrace.h" 23#include "util/jit.h" 24#include "util/string2.h" 25#include "util/symbol.h" 26#include "util/synthetic-events.h" 27#include "util/thread.h" 28#include "util/namespaces.h" 29#include "util/util.h" 30#include "util/tsc.h" 31 32#include <internal/lib.h> 33 34#include <linux/err.h> 35#include <subcmd/parse-options.h> 36#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ 37 38#include <linux/list.h> 39#include <linux/string.h> 40#include <linux/zalloc.h> 41#include <linux/hash.h> 42#include <ctype.h> 43#include <errno.h> 44#include <signal.h> 45#include <inttypes.h> 46 47struct guest_event { 48 struct perf_sample sample; 49 union perf_event *event; 50 char *event_buf; 51}; 52 53struct guest_id { 54 /* hlist_node must be first, see free_hlist() */ 55 struct hlist_node node; 56 u64 id; 57 u64 host_id; 58 u32 vcpu; 59}; 60 61struct guest_tid { 62 /* hlist_node must be first, see free_hlist() */ 63 struct hlist_node node; 64 /* Thread ID of QEMU thread */ 65 u32 tid; 66 u32 vcpu; 67}; 68 69struct guest_vcpu { 70 /* Current host CPU */ 71 u32 cpu; 72 /* Thread ID of QEMU thread */ 73 u32 tid; 74}; 75 76struct guest_session { 77 char *perf_data_file; 78 u32 machine_pid; 79 u64 time_offset; 80 double time_scale; 81 struct perf_tool tool; 82 struct perf_data data; 83 struct perf_session *session; 84 char *tmp_file_name; 85 int tmp_fd; 86 struct perf_tsc_conversion host_tc; 87 struct perf_tsc_conversion guest_tc; 88 bool copy_kcore_dir; 89 bool have_tc; 90 bool fetched; 91 bool ready; 92 u16 dflt_id_hdr_size; 93 u64 dflt_id; 94 u64 highest_id; 95 /* Array of guest_vcpu */ 96 struct guest_vcpu *vcpu; 97 size_t vcpu_cnt; 98 /* Hash table for guest_id */ 99 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 100 /* Hash table for guest_tid */ 101 struct hlist_head tids[PERF_EVLIST__HLIST_SIZE]; 102 /* Place to stash next guest event */ 103 struct guest_event ev; 104}; 105 106enum build_id_rewrite_style { 107 BID_RWS__NONE = 0, 108 BID_RWS__INJECT_HEADER_LAZY, 109 BID_RWS__INJECT_HEADER_ALL, 110 BID_RWS__MMAP2_BUILDID_ALL, 111 BID_RWS__MMAP2_BUILDID_LAZY, 112}; 113 114struct perf_inject { 115 struct perf_tool tool; 116 struct perf_session *session; 117 enum build_id_rewrite_style build_id_style; 118 bool sched_stat; 119 bool have_auxtrace; 120 bool strip; 121 bool jit_mode; 122 bool in_place_update; 123 bool in_place_update_dry_run; 124 bool copy_kcore_dir; 125 const char *input_name; 126 struct perf_data output; 127 u64 bytes_written; 128 u64 aux_id; 129 struct list_head samples; 130 struct itrace_synth_opts itrace_synth_opts; 131 char *event_copy; 132 struct perf_file_section secs[HEADER_FEAT_BITS]; 133 struct guest_session guest_session; 134 struct strlist *known_build_ids; 135 const struct evsel *mmap_evsel; 136}; 137 138struct event_entry { 139 struct list_head node; 140 u32 tid; 141 union perf_event event[]; 142}; 143 144static int tool__inject_build_id(const struct perf_tool *tool, 145 struct perf_sample *sample, 146 struct machine *machine, 147 const struct evsel *evsel, 148 __u16 misc, 149 const char *filename, 150 struct dso *dso, u32 flags); 151static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 152 struct perf_sample *sample, 153 struct machine *machine, 154 const struct evsel *evsel, 155 __u16 misc, 156 __u32 pid, __u32 tid, 157 __u64 start, __u64 len, __u64 pgoff, 158 struct dso *dso, 159 __u32 prot, __u32 flags, 160 const char *filename); 161 162static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) 163{ 164 ssize_t size; 165 166 size = perf_data__write(&inject->output, buf, sz); 167 if (size < 0) 168 return -errno; 169 170 inject->bytes_written += size; 171 return 0; 172} 173 174static int perf_event__repipe_synth(const struct perf_tool *tool, 175 union perf_event *event) 176 177{ 178 struct perf_inject *inject = container_of(tool, struct perf_inject, 179 tool); 180 181 return output_bytes(inject, event, event->header.size); 182} 183 184static int perf_event__repipe_oe_synth(const struct perf_tool *tool, 185 union perf_event *event, 186 struct ordered_events *oe __maybe_unused) 187{ 188 return perf_event__repipe_synth(tool, event); 189} 190 191#ifdef HAVE_JITDUMP 192static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused, 193 union perf_event *event __maybe_unused, 194 struct ordered_events *oe __maybe_unused) 195{ 196 return 0; 197} 198#endif 199 200static int perf_event__repipe_op2_synth(const struct perf_tool *tool, 201 struct perf_session *session __maybe_unused, 202 union perf_event *event) 203{ 204 return perf_event__repipe_synth(tool, event); 205} 206 207static int perf_event__repipe_op4_synth(const struct perf_tool *tool, 208 struct perf_session *session __maybe_unused, 209 union perf_event *event, 210 u64 data __maybe_unused, 211 const char *str __maybe_unused) 212{ 213 return perf_event__repipe_synth(tool, event); 214} 215 216static int perf_event__repipe_attr(const struct perf_tool *tool, 217 union perf_event *event, 218 struct evlist **pevlist) 219{ 220 struct perf_inject *inject = container_of(tool, struct perf_inject, 221 tool); 222 int ret; 223 224 ret = perf_event__process_attr(tool, event, pevlist); 225 if (ret) 226 return ret; 227 228 /* If the output isn't a pipe then the attributes will be written as part of the header. */ 229 if (!inject->output.is_pipe) 230 return 0; 231 232 return perf_event__repipe_synth(tool, event); 233} 234 235static int perf_event__repipe_event_update(const struct perf_tool *tool, 236 union perf_event *event, 237 struct evlist **pevlist __maybe_unused) 238{ 239 return perf_event__repipe_synth(tool, event); 240} 241 242static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) 243{ 244 char buf[4096]; 245 ssize_t ssz; 246 int ret; 247 248 while (size > 0) { 249 ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); 250 if (ssz < 0) 251 return -errno; 252 ret = output_bytes(inject, buf, ssz); 253 if (ret) 254 return ret; 255 size -= ssz; 256 } 257 258 return 0; 259} 260 261static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool, 262 struct perf_session *session, 263 union perf_event *event) 264{ 265 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 266 int ret; 267 268 inject->have_auxtrace = true; 269 270 if (!inject->output.is_pipe) { 271 off_t offset; 272 273 offset = lseek(inject->output.file.fd, 0, SEEK_CUR); 274 if (offset == -1) 275 return -errno; 276 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, 277 event, offset); 278 if (ret < 0) 279 return ret; 280 } 281 282 if (perf_data__is_pipe(session->data) || !session->one_mmap) { 283 ret = output_bytes(inject, event, event->header.size); 284 if (ret < 0) 285 return ret; 286 ret = copy_bytes(inject, session->data, 287 event->auxtrace.size); 288 } else { 289 ret = output_bytes(inject, event, 290 event->header.size + event->auxtrace.size); 291 } 292 if (ret < 0) 293 return ret; 294 295 return event->auxtrace.size; 296} 297 298static int perf_event__repipe(const struct perf_tool *tool, 299 union perf_event *event, 300 struct perf_sample *sample __maybe_unused, 301 struct machine *machine __maybe_unused) 302{ 303 return perf_event__repipe_synth(tool, event); 304} 305 306static int perf_event__drop(const struct perf_tool *tool __maybe_unused, 307 union perf_event *event __maybe_unused, 308 struct perf_sample *sample __maybe_unused, 309 struct machine *machine __maybe_unused) 310{ 311 return 0; 312} 313 314static int perf_event__drop_aux(const struct perf_tool *tool, 315 union perf_event *event __maybe_unused, 316 struct perf_sample *sample, 317 struct machine *machine __maybe_unused) 318{ 319 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 320 321 if (!inject->aux_id) 322 inject->aux_id = sample->id; 323 324 return 0; 325} 326 327static union perf_event * 328perf_inject__cut_auxtrace_sample(struct perf_inject *inject, 329 union perf_event *event, 330 struct perf_sample *sample) 331{ 332 size_t sz1 = sample->aux_sample.data - (void *)event; 333 size_t sz2 = event->header.size - sample->aux_sample.size - sz1; 334 union perf_event *ev; 335 336 if (inject->event_copy == NULL) { 337 inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE); 338 if (!inject->event_copy) 339 return ERR_PTR(-ENOMEM); 340 } 341 ev = (union perf_event *)inject->event_copy; 342 if (sz1 > event->header.size || sz2 > event->header.size || 343 sz1 + sz2 > event->header.size || 344 sz1 < sizeof(struct perf_event_header) + sizeof(u64)) 345 return event; 346 347 memcpy(ev, event, sz1); 348 memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2); 349 ev->header.size = sz1 + sz2; 350 ((u64 *)((void *)ev + sz1))[-1] = 0; 351 352 return ev; 353} 354 355typedef int (*inject_handler)(const struct perf_tool *tool, 356 union perf_event *event, 357 struct perf_sample *sample, 358 struct evsel *evsel, 359 struct machine *machine); 360 361static int perf_event__repipe_sample(const struct perf_tool *tool, 362 union perf_event *event, 363 struct perf_sample *sample, 364 struct evsel *evsel, 365 struct machine *machine) 366{ 367 struct perf_inject *inject = container_of(tool, struct perf_inject, 368 tool); 369 370 if (evsel && evsel->handler) { 371 inject_handler f = evsel->handler; 372 return f(tool, event, sample, evsel, machine); 373 } 374 375 build_id__mark_dso_hit(tool, event, sample, evsel, machine); 376 377 if (inject->itrace_synth_opts.set && sample->aux_sample.size) { 378 event = perf_inject__cut_auxtrace_sample(inject, event, sample); 379 if (IS_ERR(event)) 380 return PTR_ERR(event); 381 } 382 383 return perf_event__repipe_synth(tool, event); 384} 385 386static struct dso *findnew_dso(int pid, int tid, const char *filename, 387 const struct dso_id *id, struct machine *machine) 388{ 389 struct thread *thread; 390 struct nsinfo *nsi = NULL; 391 struct nsinfo *nnsi; 392 struct dso *dso; 393 bool vdso; 394 395 thread = machine__findnew_thread(machine, pid, tid); 396 if (thread == NULL) { 397 pr_err("cannot find or create a task %d/%d.\n", tid, pid); 398 return NULL; 399 } 400 401 vdso = is_vdso_map(filename); 402 nsi = nsinfo__get(thread__nsinfo(thread)); 403 404 if (vdso) { 405 /* The vdso maps are always on the host and not the 406 * container. Ensure that we don't use setns to look 407 * them up. 408 */ 409 nnsi = nsinfo__copy(nsi); 410 if (nnsi) { 411 nsinfo__put(nsi); 412 nsinfo__clear_need_setns(nnsi); 413 nsi = nnsi; 414 } 415 dso = machine__findnew_vdso(machine, thread); 416 } else { 417 dso = machine__findnew_dso_id(machine, filename, id); 418 } 419 420 if (dso) { 421 mutex_lock(dso__lock(dso)); 422 dso__set_nsinfo(dso, nsi); 423 mutex_unlock(dso__lock(dso)); 424 } else 425 nsinfo__put(nsi); 426 427 thread__put(thread); 428 return dso; 429} 430 431/* 432 * The evsel used for the sample ID for mmap events. Typically stashed when 433 * processing mmap events. If not stashed, search the evlist for the first mmap 434 * gathering event. 435 */ 436static const struct evsel *inject__mmap_evsel(struct perf_inject *inject) 437{ 438 struct evsel *pos; 439 440 if (inject->mmap_evsel) 441 return inject->mmap_evsel; 442 443 evlist__for_each_entry(inject->session->evlist, pos) { 444 if (pos->core.attr.mmap) { 445 inject->mmap_evsel = pos; 446 return pos; 447 } 448 } 449 pr_err("No mmap events found\n"); 450 return NULL; 451} 452 453static int perf_event__repipe_common_mmap(const struct perf_tool *tool, 454 union perf_event *event, 455 struct perf_sample *sample, 456 struct machine *machine, 457 __u32 pid, __u32 tid, 458 __u64 start, __u64 len, __u64 pgoff, 459 __u32 flags, __u32 prot, 460 const char *filename, 461 const struct dso_id *dso_id, 462 int (*perf_event_process)(const struct perf_tool *tool, 463 union perf_event *event, 464 struct perf_sample *sample, 465 struct machine *machine)) 466{ 467 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 468 struct dso *dso = NULL; 469 bool dso_sought = false; 470 471#ifdef HAVE_JITDUMP 472 if (inject->jit_mode) { 473 u64 n = 0; 474 int ret; 475 476 /* If jit marker, then inject jit mmaps and generate ELF images. */ 477 ret = jit_process(inject->session, &inject->output, machine, 478 filename, pid, tid, &n); 479 if (ret < 0) 480 return ret; 481 if (ret) { 482 inject->bytes_written += n; 483 return 0; 484 } 485 } 486#endif 487 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 488 dso = findnew_dso(pid, tid, filename, dso_id, machine); 489 dso_sought = true; 490 if (dso) { 491 /* mark it not to inject build-id */ 492 dso__set_hit(dso); 493 } 494 } 495 if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) { 496 if (!dso_sought) { 497 dso = findnew_dso(pid, tid, filename, dso_id, machine); 498 dso_sought = true; 499 } 500 501 if (dso && !dso__hit(dso)) { 502 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 503 504 if (evsel) { 505 dso__set_hit(dso); 506 tool__inject_build_id(tool, sample, machine, evsel, 507 /*misc=*/sample->cpumode, 508 filename, dso, flags); 509 } 510 } 511 } else { 512 int err; 513 514 /* 515 * Remember the evsel for lazy build id generation. It is used 516 * for the sample id header type. 517 */ 518 if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 519 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) && 520 !inject->mmap_evsel) 521 inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event); 522 523 /* Create the thread, map, etc. Not done for the unordered inject all case. */ 524 err = perf_event_process(tool, event, sample, machine); 525 526 if (err) { 527 dso__put(dso); 528 return err; 529 } 530 } 531 if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) && 532 !(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) { 533 struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event); 534 535 if (evsel && !dso_sought) { 536 dso = findnew_dso(pid, tid, filename, dso_id, machine); 537 dso_sought = true; 538 } 539 if (evsel && dso && 540 !tool__inject_mmap2_build_id(tool, sample, machine, evsel, 541 sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID, 542 pid, tid, start, len, pgoff, 543 dso, 544 prot, flags, 545 filename)) { 546 /* Injected mmap2 so no need to repipe. */ 547 dso__put(dso); 548 return 0; 549 } 550 } 551 dso__put(dso); 552 if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) 553 return 0; 554 555 return perf_event__repipe(tool, event, sample, machine); 556} 557 558static int perf_event__repipe_mmap(const struct perf_tool *tool, 559 union perf_event *event, 560 struct perf_sample *sample, 561 struct machine *machine) 562{ 563 return perf_event__repipe_common_mmap( 564 tool, event, sample, machine, 565 event->mmap.pid, event->mmap.tid, 566 event->mmap.start, event->mmap.len, event->mmap.pgoff, 567 /*flags=*/0, PROT_EXEC, 568 event->mmap.filename, /*dso_id=*/NULL, 569 perf_event__process_mmap); 570} 571 572static int perf_event__repipe_mmap2(const struct perf_tool *tool, 573 union perf_event *event, 574 struct perf_sample *sample, 575 struct machine *machine) 576{ 577 struct dso_id id = dso_id_empty; 578 579 if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { 580 build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); 581 } else { 582 id.maj = event->mmap2.maj; 583 id.min = event->mmap2.min; 584 id.ino = event->mmap2.ino; 585 id.ino_generation = event->mmap2.ino_generation; 586 id.mmap2_valid = true; 587 id.mmap2_ino_generation_valid = true; 588 } 589 590 return perf_event__repipe_common_mmap( 591 tool, event, sample, machine, 592 event->mmap2.pid, event->mmap2.tid, 593 event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, 594 event->mmap2.flags, event->mmap2.prot, 595 event->mmap2.filename, &id, 596 perf_event__process_mmap2); 597} 598 599static int perf_event__repipe_fork(const struct perf_tool *tool, 600 union perf_event *event, 601 struct perf_sample *sample, 602 struct machine *machine) 603{ 604 int err; 605 606 err = perf_event__process_fork(tool, event, sample, machine); 607 perf_event__repipe(tool, event, sample, machine); 608 609 return err; 610} 611 612static int perf_event__repipe_comm(const struct perf_tool *tool, 613 union perf_event *event, 614 struct perf_sample *sample, 615 struct machine *machine) 616{ 617 int err; 618 619 err = perf_event__process_comm(tool, event, sample, machine); 620 perf_event__repipe(tool, event, sample, machine); 621 622 return err; 623} 624 625static int perf_event__repipe_namespaces(const struct perf_tool *tool, 626 union perf_event *event, 627 struct perf_sample *sample, 628 struct machine *machine) 629{ 630 int err = perf_event__process_namespaces(tool, event, sample, machine); 631 632 perf_event__repipe(tool, event, sample, machine); 633 634 return err; 635} 636 637static int perf_event__repipe_exit(const struct perf_tool *tool, 638 union perf_event *event, 639 struct perf_sample *sample, 640 struct machine *machine) 641{ 642 int err; 643 644 err = perf_event__process_exit(tool, event, sample, machine); 645 perf_event__repipe(tool, event, sample, machine); 646 647 return err; 648} 649 650#ifdef HAVE_LIBTRACEEVENT 651static int perf_event__repipe_tracing_data(const struct perf_tool *tool, 652 struct perf_session *session, 653 union perf_event *event) 654{ 655 perf_event__repipe_synth(tool, event); 656 657 return perf_event__process_tracing_data(tool, session, event); 658} 659#endif 660 661static int dso__read_build_id(struct dso *dso) 662{ 663 struct nscookie nsc; 664 struct build_id bid = { .size = 0, }; 665 666 if (dso__has_build_id(dso)) 667 return 0; 668 669 mutex_lock(dso__lock(dso)); 670 nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); 671 if (filename__read_build_id(dso__long_name(dso), &bid) > 0) 672 dso__set_build_id(dso, &bid); 673 else if (dso__nsinfo(dso)) { 674 char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); 675 676 if (new_name && filename__read_build_id(new_name, &bid) > 0) 677 dso__set_build_id(dso, &bid); 678 free(new_name); 679 } 680 nsinfo__mountns_exit(&nsc); 681 mutex_unlock(dso__lock(dso)); 682 683 return dso__has_build_id(dso) ? 0 : -1; 684} 685 686static struct strlist *perf_inject__parse_known_build_ids( 687 const char *known_build_ids_string) 688{ 689 struct str_node *pos, *tmp; 690 struct strlist *known_build_ids; 691 int bid_len; 692 693 known_build_ids = strlist__new(known_build_ids_string, NULL); 694 if (known_build_ids == NULL) 695 return NULL; 696 strlist__for_each_entry_safe(pos, tmp, known_build_ids) { 697 const char *build_id, *dso_name; 698 699 build_id = skip_spaces(pos->s); 700 dso_name = strchr(build_id, ' '); 701 if (dso_name == NULL) { 702 strlist__remove(known_build_ids, pos); 703 continue; 704 } 705 bid_len = dso_name - pos->s; 706 dso_name = skip_spaces(dso_name); 707 if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) { 708 strlist__remove(known_build_ids, pos); 709 continue; 710 } 711 for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) { 712 if (!isxdigit(build_id[2 * ix]) || 713 !isxdigit(build_id[2 * ix + 1])) { 714 strlist__remove(known_build_ids, pos); 715 break; 716 } 717 } 718 } 719 return known_build_ids; 720} 721 722static bool perf_inject__lookup_known_build_id(struct perf_inject *inject, 723 struct dso *dso) 724{ 725 struct str_node *pos; 726 727 strlist__for_each_entry(pos, inject->known_build_ids) { 728 struct build_id bid; 729 const char *build_id, *dso_name; 730 size_t bid_len; 731 732 build_id = skip_spaces(pos->s); 733 dso_name = strchr(build_id, ' '); 734 bid_len = dso_name - pos->s; 735 if (bid_len > sizeof(bid.data)) 736 bid_len = sizeof(bid.data); 737 dso_name = skip_spaces(dso_name); 738 if (strcmp(dso__long_name(dso), dso_name)) 739 continue; 740 for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) { 741 bid.data[ix] = (hex(build_id[2 * ix]) << 4 | 742 hex(build_id[2 * ix + 1])); 743 } 744 bid.size = bid_len / 2; 745 dso__set_build_id(dso, &bid); 746 return true; 747 } 748 return false; 749} 750 751static int tool__inject_build_id(const struct perf_tool *tool, 752 struct perf_sample *sample, 753 struct machine *machine, 754 const struct evsel *evsel, 755 __u16 misc, 756 const char *filename, 757 struct dso *dso, u32 flags) 758{ 759 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 760 int err; 761 762 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 763 return 0; 764 if (is_no_dso_memory(filename)) 765 return 0; 766 767 if (inject->known_build_ids != NULL && 768 perf_inject__lookup_known_build_id(inject, dso)) 769 return 1; 770 771 if (dso__read_build_id(dso) < 0) { 772 pr_debug("no build_id found for %s\n", filename); 773 return -1; 774 } 775 776 err = perf_event__synthesize_build_id(tool, sample, machine, 777 perf_event__repipe, 778 evsel, misc, dso__bid(dso), 779 filename); 780 if (err) { 781 pr_err("Can't synthesize build_id event for %s\n", filename); 782 return -1; 783 } 784 785 return 0; 786} 787 788static int tool__inject_mmap2_build_id(const struct perf_tool *tool, 789 struct perf_sample *sample, 790 struct machine *machine, 791 const struct evsel *evsel, 792 __u16 misc, 793 __u32 pid, __u32 tid, 794 __u64 start, __u64 len, __u64 pgoff, 795 struct dso *dso, 796 __u32 prot, __u32 flags, 797 const char *filename) 798{ 799 int err; 800 801 /* Return to repipe anonymous maps. */ 802 if (is_anon_memory(filename) || flags & MAP_HUGETLB) 803 return 1; 804 if (is_no_dso_memory(filename)) 805 return 1; 806 807 if (dso__read_build_id(dso)) { 808 pr_debug("no build_id found for %s\n", filename); 809 return -1; 810 } 811 812 err = perf_event__synthesize_mmap2_build_id(tool, sample, machine, 813 perf_event__repipe, 814 evsel, 815 misc, pid, tid, 816 start, len, pgoff, 817 dso__bid(dso), 818 prot, flags, 819 filename); 820 if (err) { 821 pr_err("Can't synthesize build_id event for %s\n", filename); 822 return -1; 823 } 824 return 0; 825} 826 827static int mark_dso_hit(const struct perf_inject *inject, 828 const struct perf_tool *tool, 829 struct perf_sample *sample, 830 struct machine *machine, 831 const struct evsel *mmap_evsel, 832 struct map *map, bool sample_in_dso) 833{ 834 struct dso *dso; 835 u16 misc = sample->cpumode; 836 837 if (!map) 838 return 0; 839 840 if (!sample_in_dso) { 841 u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL | 842 PERF_RECORD_MISC_GUEST_USER; 843 844 if ((misc & guest_mask) != 0) { 845 misc &= PERF_RECORD_MISC_HYPERVISOR; 846 misc |= __map__is_kernel(map) 847 ? PERF_RECORD_MISC_GUEST_KERNEL 848 : PERF_RECORD_MISC_GUEST_USER; 849 } else { 850 misc &= PERF_RECORD_MISC_HYPERVISOR; 851 misc |= __map__is_kernel(map) 852 ? PERF_RECORD_MISC_KERNEL 853 : PERF_RECORD_MISC_USER; 854 } 855 } 856 dso = map__dso(map); 857 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 858 if (dso && !dso__hit(dso)) { 859 dso__set_hit(dso); 860 tool__inject_build_id(tool, sample, machine, 861 mmap_evsel, misc, dso__long_name(dso), dso, 862 map__flags(map)); 863 } 864 } else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 865 if (!map__hit(map)) { 866 const struct build_id null_bid = { .size = 0 }; 867 const struct build_id *bid = dso ? dso__bid(dso) : &null_bid; 868 const char *filename = dso ? dso__long_name(dso) : ""; 869 870 map__set_hit(map); 871 perf_event__synthesize_mmap2_build_id(tool, sample, machine, 872 perf_event__repipe, 873 mmap_evsel, 874 misc, 875 sample->pid, sample->tid, 876 map__start(map), 877 map__end(map) - map__start(map), 878 map__pgoff(map), 879 bid, 880 map__prot(map), 881 map__flags(map), 882 filename); 883 } 884 } 885 return 0; 886} 887 888struct mark_dso_hit_args { 889 const struct perf_inject *inject; 890 const struct perf_tool *tool; 891 struct perf_sample *sample; 892 struct machine *machine; 893 const struct evsel *mmap_evsel; 894}; 895 896static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data) 897{ 898 struct mark_dso_hit_args *args = data; 899 struct map *map = node->ms.map; 900 901 return mark_dso_hit(args->inject, args->tool, args->sample, args->machine, 902 args->mmap_evsel, map, /*sample_in_dso=*/false); 903} 904 905int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, 906 struct perf_sample *sample, 907 struct evsel *evsel __maybe_unused, 908 struct machine *machine) 909{ 910 struct addr_location al; 911 struct thread *thread; 912 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 913 struct mark_dso_hit_args args = { 914 .inject = inject, 915 .tool = tool, 916 /* 917 * Use the parsed sample data of the sample event, which will 918 * have a later timestamp than the mmap event. 919 */ 920 .sample = sample, 921 .machine = machine, 922 .mmap_evsel = inject__mmap_evsel(inject), 923 }; 924 925 addr_location__init(&al); 926 thread = machine__findnew_thread(machine, sample->pid, sample->tid); 927 if (thread == NULL) { 928 pr_err("problem processing %d event, skipping it.\n", 929 event->header.type); 930 goto repipe; 931 } 932 933 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { 934 mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map, 935 /*sample_in_dso=*/true); 936 } 937 938 sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, 939 /*symbols=*/false, mark_dso_hit_callback, &args); 940 941 thread__put(thread); 942repipe: 943 perf_event__repipe(tool, event, sample, machine); 944 addr_location__exit(&al); 945 return 0; 946} 947 948static int perf_inject__sched_process_exit(const struct perf_tool *tool, 949 union perf_event *event __maybe_unused, 950 struct perf_sample *sample, 951 struct evsel *evsel __maybe_unused, 952 struct machine *machine __maybe_unused) 953{ 954 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 955 struct event_entry *ent; 956 957 list_for_each_entry(ent, &inject->samples, node) { 958 if (sample->tid == ent->tid) { 959 list_del_init(&ent->node); 960 free(ent); 961 break; 962 } 963 } 964 965 return 0; 966} 967 968static int perf_inject__sched_switch(const struct perf_tool *tool, 969 union perf_event *event, 970 struct perf_sample *sample, 971 struct evsel *evsel, 972 struct machine *machine) 973{ 974 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 975 struct event_entry *ent; 976 977 perf_inject__sched_process_exit(tool, event, sample, evsel, machine); 978 979 ent = malloc(event->header.size + sizeof(struct event_entry)); 980 if (ent == NULL) { 981 color_fprintf(stderr, PERF_COLOR_RED, 982 "Not enough memory to process sched switch event!"); 983 return -1; 984 } 985 986 ent->tid = sample->tid; 987 memcpy(&ent->event, event, event->header.size); 988 list_add(&ent->node, &inject->samples); 989 return 0; 990} 991 992#ifdef HAVE_LIBTRACEEVENT 993static int perf_inject__sched_stat(const struct perf_tool *tool, 994 union perf_event *event __maybe_unused, 995 struct perf_sample *sample, 996 struct evsel *evsel, 997 struct machine *machine) 998{ 999 struct event_entry *ent; 1000 union perf_event *event_sw; 1001 struct perf_sample sample_sw; 1002 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1003 u32 pid = evsel__intval(evsel, sample, "pid"); 1004 1005 list_for_each_entry(ent, &inject->samples, node) { 1006 if (pid == ent->tid) 1007 goto found; 1008 } 1009 1010 return 0; 1011found: 1012 event_sw = &ent->event[0]; 1013 evsel__parse_sample(evsel, event_sw, &sample_sw); 1014 1015 sample_sw.period = sample->period; 1016 sample_sw.time = sample->time; 1017 perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type, 1018 evsel->core.attr.read_format, &sample_sw); 1019 build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); 1020 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 1021} 1022#endif 1023 1024static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu) 1025{ 1026 if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL)) 1027 return NULL; 1028 return &gs->vcpu[vcpu]; 1029} 1030 1031static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz) 1032{ 1033 ssize_t ret = writen(gs->tmp_fd, buf, sz); 1034 1035 return ret < 0 ? ret : 0; 1036} 1037 1038static int guest_session__repipe(const struct perf_tool *tool, 1039 union perf_event *event, 1040 struct perf_sample *sample __maybe_unused, 1041 struct machine *machine __maybe_unused) 1042{ 1043 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1044 1045 return guest_session__output_bytes(gs, event, event->header.size); 1046} 1047 1048static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu) 1049{ 1050 struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid)); 1051 int hash; 1052 1053 if (!guest_tid) 1054 return -ENOMEM; 1055 1056 guest_tid->tid = tid; 1057 guest_tid->vcpu = vcpu; 1058 hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS); 1059 hlist_add_head(&guest_tid->node, &gs->tids[hash]); 1060 1061 return 0; 1062} 1063 1064static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused, 1065 union perf_event *event, 1066 u64 offset __maybe_unused, void *data) 1067{ 1068 struct guest_session *gs = data; 1069 unsigned int vcpu; 1070 struct guest_vcpu *guest_vcpu; 1071 int ret; 1072 1073 if (event->header.type != PERF_RECORD_COMM || 1074 event->comm.pid != gs->machine_pid) 1075 return 0; 1076 1077 /* 1078 * QEMU option -name debug-threads=on, causes thread names formatted as 1079 * below, although it is not an ABI. Also libvirt seems to use this by 1080 * default. Here we rely on it to tell us which thread is which VCPU. 1081 */ 1082 ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu); 1083 if (ret <= 0) 1084 return ret; 1085 pr_debug("Found VCPU: tid %u comm %s vcpu %u\n", 1086 event->comm.tid, event->comm.comm, vcpu); 1087 if (vcpu > INT_MAX) { 1088 pr_err("Invalid VCPU %u\n", vcpu); 1089 return -EINVAL; 1090 } 1091 guest_vcpu = guest_session__vcpu(gs, vcpu); 1092 if (!guest_vcpu) 1093 return -ENOMEM; 1094 if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) { 1095 pr_err("Fatal error: Two threads found with the same VCPU\n"); 1096 return -EINVAL; 1097 } 1098 guest_vcpu->tid = event->comm.tid; 1099 1100 return guest_session__map_tid(gs, event->comm.tid, vcpu); 1101} 1102 1103static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs) 1104{ 1105 return perf_session__peek_events(session, session->header.data_offset, 1106 session->header.data_size, 1107 host_peek_vm_comms_cb, gs); 1108} 1109 1110static bool evlist__is_id_used(struct evlist *evlist, u64 id) 1111{ 1112 return evlist__id2sid(evlist, id); 1113} 1114 1115static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist) 1116{ 1117 do { 1118 gs->highest_id += 1; 1119 } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id)); 1120 1121 return gs->highest_id; 1122} 1123 1124static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu) 1125{ 1126 struct guest_id *guest_id = zalloc(sizeof(*guest_id)); 1127 int hash; 1128 1129 if (!guest_id) 1130 return -ENOMEM; 1131 1132 guest_id->id = id; 1133 guest_id->host_id = host_id; 1134 guest_id->vcpu = vcpu; 1135 hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS); 1136 hlist_add_head(&guest_id->node, &gs->heads[hash]); 1137 1138 return 0; 1139} 1140 1141static u64 evlist__find_highest_id(struct evlist *evlist) 1142{ 1143 struct evsel *evsel; 1144 u64 highest_id = 1; 1145 1146 evlist__for_each_entry(evlist, evsel) { 1147 u32 j; 1148 1149 for (j = 0; j < evsel->core.ids; j++) { 1150 u64 id = evsel->core.id[j]; 1151 1152 if (id > highest_id) 1153 highest_id = id; 1154 } 1155 } 1156 1157 return highest_id; 1158} 1159 1160static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist) 1161{ 1162 struct evlist *evlist = gs->session->evlist; 1163 struct evsel *evsel; 1164 int ret; 1165 1166 evlist__for_each_entry(evlist, evsel) { 1167 u32 j; 1168 1169 for (j = 0; j < evsel->core.ids; j++) { 1170 struct perf_sample_id *sid; 1171 u64 host_id; 1172 u64 id; 1173 1174 id = evsel->core.id[j]; 1175 sid = evlist__id2sid(evlist, id); 1176 if (!sid || sid->cpu.cpu == -1) 1177 continue; 1178 host_id = guest_session__allocate_new_id(gs, host_evlist); 1179 ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu); 1180 if (ret) 1181 return ret; 1182 } 1183 } 1184 1185 return 0; 1186} 1187 1188static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id) 1189{ 1190 struct hlist_head *head; 1191 struct guest_id *guest_id; 1192 int hash; 1193 1194 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 1195 head = &gs->heads[hash]; 1196 1197 hlist_for_each_entry(guest_id, head, node) 1198 if (guest_id->id == id) 1199 return guest_id; 1200 1201 return NULL; 1202} 1203 1204static int process_attr(const struct perf_tool *tool, union perf_event *event, 1205 struct perf_sample *sample __maybe_unused, 1206 struct machine *machine __maybe_unused) 1207{ 1208 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1209 1210 return perf_event__process_attr(tool, event, &inject->session->evlist); 1211} 1212 1213static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel) 1214{ 1215 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1216 struct perf_event_attr attr = evsel->core.attr; 1217 u64 *id_array; 1218 u32 *vcpu_array; 1219 int ret = -ENOMEM; 1220 u32 i; 1221 1222 id_array = calloc(evsel->core.ids, sizeof(*id_array)); 1223 if (!id_array) 1224 return -ENOMEM; 1225 1226 vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array)); 1227 if (!vcpu_array) 1228 goto out; 1229 1230 for (i = 0; i < evsel->core.ids; i++) { 1231 u64 id = evsel->core.id[i]; 1232 struct guest_id *guest_id = guest_session__lookup_id(gs, id); 1233 1234 if (!guest_id) { 1235 pr_err("Failed to find guest id %"PRIu64"\n", id); 1236 ret = -EINVAL; 1237 goto out; 1238 } 1239 id_array[i] = guest_id->host_id; 1240 vcpu_array[i] = guest_id->vcpu; 1241 } 1242 1243 attr.sample_type |= PERF_SAMPLE_IDENTIFIER; 1244 attr.exclude_host = 1; 1245 attr.exclude_guest = 0; 1246 1247 ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids, 1248 id_array, process_attr); 1249 if (ret) 1250 pr_err("Failed to add guest attr.\n"); 1251 1252 for (i = 0; i < evsel->core.ids; i++) { 1253 struct perf_sample_id *sid; 1254 u32 vcpu = vcpu_array[i]; 1255 1256 sid = evlist__id2sid(inject->session->evlist, id_array[i]); 1257 /* Guest event is per-thread from the host point of view */ 1258 sid->cpu.cpu = -1; 1259 sid->tid = gs->vcpu[vcpu].tid; 1260 sid->machine_pid = gs->machine_pid; 1261 sid->vcpu.cpu = vcpu; 1262 } 1263out: 1264 free(vcpu_array); 1265 free(id_array); 1266 return ret; 1267} 1268 1269static int guest_session__add_attrs(struct guest_session *gs) 1270{ 1271 struct evlist *evlist = gs->session->evlist; 1272 struct evsel *evsel; 1273 int ret; 1274 1275 evlist__for_each_entry(evlist, evsel) { 1276 ret = guest_session__add_attr(gs, evsel); 1277 if (ret) 1278 return ret; 1279 } 1280 1281 return 0; 1282} 1283 1284static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt) 1285{ 1286 struct perf_session *session = inject->session; 1287 struct evlist *evlist = session->evlist; 1288 struct machine *machine = &session->machines.host; 1289 size_t from = evlist->core.nr_entries - new_cnt; 1290 1291 return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe, 1292 evlist, machine, from); 1293} 1294 1295static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid) 1296{ 1297 struct hlist_head *head; 1298 struct guest_tid *guest_tid; 1299 int hash; 1300 1301 hash = hash_32(tid, PERF_EVLIST__HLIST_BITS); 1302 head = &gs->tids[hash]; 1303 1304 hlist_for_each_entry(guest_tid, head, node) 1305 if (guest_tid->tid == tid) 1306 return guest_tid; 1307 1308 return NULL; 1309} 1310 1311static bool dso__is_in_kernel_space(struct dso *dso) 1312{ 1313 if (dso__is_vdso(dso)) 1314 return false; 1315 1316 return dso__is_kcore(dso) || 1317 dso__kernel(dso) || 1318 is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN); 1319} 1320 1321static u64 evlist__first_id(struct evlist *evlist) 1322{ 1323 struct evsel *evsel; 1324 1325 evlist__for_each_entry(evlist, evsel) { 1326 if (evsel->core.ids) 1327 return evsel->core.id[0]; 1328 } 1329 return 0; 1330} 1331 1332static int process_build_id(const struct perf_tool *tool, 1333 union perf_event *event, 1334 struct perf_sample *sample __maybe_unused, 1335 struct machine *machine __maybe_unused) 1336{ 1337 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1338 1339 return perf_event__process_build_id(tool, inject->session, event); 1340} 1341 1342static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid) 1343{ 1344 struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid); 1345 struct perf_sample synth_sample = { 1346 .pid = -1, 1347 .tid = -1, 1348 .time = -1, 1349 .stream_id = -1, 1350 .cpu = -1, 1351 .period = 1, 1352 .cpumode = dso__is_in_kernel_space(dso) 1353 ? PERF_RECORD_MISC_GUEST_KERNEL 1354 : PERF_RECORD_MISC_GUEST_USER, 1355 }; 1356 1357 if (!machine) 1358 return -ENOMEM; 1359 1360 dso__set_hit(dso); 1361 1362 return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine, 1363 process_build_id, inject__mmap_evsel(inject), 1364 /*misc=*/synth_sample.cpumode, 1365 dso__bid(dso), dso__long_name(dso)); 1366} 1367 1368static int guest_session__add_build_ids_cb(struct dso *dso, void *data) 1369{ 1370 struct guest_session *gs = data; 1371 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1372 1373 if (!dso__has_build_id(dso)) 1374 return 0; 1375 1376 return synthesize_build_id(inject, dso, gs->machine_pid); 1377 1378} 1379 1380static int guest_session__add_build_ids(struct guest_session *gs) 1381{ 1382 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1383 1384 /* Build IDs will be put in the Build ID feature section */ 1385 perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID); 1386 1387 return dsos__for_each_dso(&gs->session->machines.host.dsos, 1388 guest_session__add_build_ids_cb, 1389 gs); 1390} 1391 1392static int guest_session__ksymbol_event(const struct perf_tool *tool, 1393 union perf_event *event, 1394 struct perf_sample *sample __maybe_unused, 1395 struct machine *machine __maybe_unused) 1396{ 1397 struct guest_session *gs = container_of(tool, struct guest_session, tool); 1398 1399 /* Only support out-of-line i.e. no BPF support */ 1400 if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL) 1401 return 0; 1402 1403 return guest_session__output_bytes(gs, event, event->header.size); 1404} 1405 1406static int guest_session__start(struct guest_session *gs, const char *name, bool force) 1407{ 1408 char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX"; 1409 struct perf_session *session; 1410 int ret; 1411 1412 /* Only these events will be injected */ 1413 gs->tool.mmap = guest_session__repipe; 1414 gs->tool.mmap2 = guest_session__repipe; 1415 gs->tool.comm = guest_session__repipe; 1416 gs->tool.fork = guest_session__repipe; 1417 gs->tool.exit = guest_session__repipe; 1418 gs->tool.lost = guest_session__repipe; 1419 gs->tool.context_switch = guest_session__repipe; 1420 gs->tool.ksymbol = guest_session__ksymbol_event; 1421 gs->tool.text_poke = guest_session__repipe; 1422 /* 1423 * Processing a build ID creates a struct dso with that build ID. Later, 1424 * all guest dsos are iterated and the build IDs processed into the host 1425 * session where they will be output to the Build ID feature section 1426 * when the perf.data file header is written. 1427 */ 1428 gs->tool.build_id = perf_event__process_build_id; 1429 /* Process the id index to know what VCPU an ID belongs to */ 1430 gs->tool.id_index = perf_event__process_id_index; 1431 1432 gs->tool.ordered_events = true; 1433 gs->tool.ordering_requires_timestamps = true; 1434 1435 gs->data.path = name; 1436 gs->data.force = force; 1437 gs->data.mode = PERF_DATA_MODE_READ; 1438 1439 session = perf_session__new(&gs->data, &gs->tool); 1440 if (IS_ERR(session)) 1441 return PTR_ERR(session); 1442 gs->session = session; 1443 1444 /* 1445 * Initial events have zero'd ID samples. Get default ID sample size 1446 * used for removing them. 1447 */ 1448 gs->dflt_id_hdr_size = session->machines.host.id_hdr_size; 1449 /* And default ID for adding back a host-compatible ID sample */ 1450 gs->dflt_id = evlist__first_id(session->evlist); 1451 if (!gs->dflt_id) { 1452 pr_err("Guest data has no sample IDs"); 1453 return -EINVAL; 1454 } 1455 1456 /* Temporary file for guest events */ 1457 gs->tmp_file_name = strdup(tmp_file_name); 1458 if (!gs->tmp_file_name) 1459 return -ENOMEM; 1460 gs->tmp_fd = mkstemp(gs->tmp_file_name); 1461 if (gs->tmp_fd < 0) 1462 return -errno; 1463 1464 if (zstd_init(&gs->session->zstd_data, 0) < 0) 1465 pr_warning("Guest session decompression initialization failed.\n"); 1466 1467 /* 1468 * perf does not support processing 2 sessions simultaneously, so output 1469 * guest events to a temporary file. 1470 */ 1471 ret = perf_session__process_events(gs->session); 1472 if (ret) 1473 return ret; 1474 1475 if (lseek(gs->tmp_fd, 0, SEEK_SET)) 1476 return -errno; 1477 1478 return 0; 1479} 1480 1481/* Free hlist nodes assuming hlist_node is the first member of hlist entries */ 1482static void free_hlist(struct hlist_head *heads, size_t hlist_sz) 1483{ 1484 struct hlist_node *pos, *n; 1485 size_t i; 1486 1487 for (i = 0; i < hlist_sz; ++i) { 1488 hlist_for_each_safe(pos, n, &heads[i]) { 1489 hlist_del(pos); 1490 free(pos); 1491 } 1492 } 1493} 1494 1495static void guest_session__exit(struct guest_session *gs) 1496{ 1497 if (gs->session) { 1498 perf_session__delete(gs->session); 1499 free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE); 1500 free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE); 1501 } 1502 if (gs->tmp_file_name) { 1503 if (gs->tmp_fd >= 0) 1504 close(gs->tmp_fd); 1505 unlink(gs->tmp_file_name); 1506 zfree(&gs->tmp_file_name); 1507 } 1508 zfree(&gs->vcpu); 1509 zfree(&gs->perf_data_file); 1510} 1511 1512static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv) 1513{ 1514 tc->time_shift = time_conv->time_shift; 1515 tc->time_mult = time_conv->time_mult; 1516 tc->time_zero = time_conv->time_zero; 1517 tc->time_cycles = time_conv->time_cycles; 1518 tc->time_mask = time_conv->time_mask; 1519 tc->cap_user_time_zero = time_conv->cap_user_time_zero; 1520 tc->cap_user_time_short = time_conv->cap_user_time_short; 1521} 1522 1523static void guest_session__get_tc(struct guest_session *gs) 1524{ 1525 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1526 1527 get_tsc_conv(&gs->host_tc, &inject->session->time_conv); 1528 get_tsc_conv(&gs->guest_tc, &gs->session->time_conv); 1529} 1530 1531static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time) 1532{ 1533 u64 tsc; 1534 1535 if (!guest_time) { 1536 *host_time = 0; 1537 return; 1538 } 1539 1540 if (gs->guest_tc.cap_user_time_zero) 1541 tsc = perf_time_to_tsc(guest_time, &gs->guest_tc); 1542 else 1543 tsc = guest_time; 1544 1545 /* 1546 * This is the correct order of operations for x86 if the TSC Offset and 1547 * Multiplier values are used. 1548 */ 1549 tsc -= gs->time_offset; 1550 tsc /= gs->time_scale; 1551 1552 if (gs->host_tc.cap_user_time_zero) 1553 *host_time = tsc_to_perf_time(tsc, &gs->host_tc); 1554 else 1555 *host_time = tsc; 1556} 1557 1558static int guest_session__fetch(struct guest_session *gs) 1559{ 1560 void *buf; 1561 struct perf_event_header *hdr; 1562 size_t hdr_sz = sizeof(*hdr); 1563 ssize_t ret; 1564 1565 buf = gs->ev.event_buf; 1566 if (!buf) { 1567 buf = malloc(PERF_SAMPLE_MAX_SIZE); 1568 if (!buf) 1569 return -ENOMEM; 1570 gs->ev.event_buf = buf; 1571 } 1572 hdr = buf; 1573 ret = readn(gs->tmp_fd, buf, hdr_sz); 1574 if (ret < 0) 1575 return ret; 1576 1577 if (!ret) { 1578 /* Zero size means EOF */ 1579 hdr->size = 0; 1580 return 0; 1581 } 1582 1583 buf += hdr_sz; 1584 1585 ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz); 1586 if (ret < 0) 1587 return ret; 1588 1589 gs->ev.event = (union perf_event *)gs->ev.event_buf; 1590 gs->ev.sample.time = 0; 1591 1592 if (hdr->type >= PERF_RECORD_USER_TYPE_START) { 1593 pr_err("Unexpected type fetching guest event"); 1594 return 0; 1595 } 1596 1597 ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample); 1598 if (ret) { 1599 pr_err("Parse failed fetching guest event"); 1600 return ret; 1601 } 1602 1603 if (!gs->have_tc) { 1604 guest_session__get_tc(gs); 1605 gs->have_tc = true; 1606 } 1607 1608 guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time); 1609 1610 return 0; 1611} 1612 1613static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev, 1614 const struct perf_sample *sample) 1615{ 1616 struct evsel *evsel; 1617 void *array; 1618 int ret; 1619 1620 evsel = evlist__id2evsel(evlist, sample->id); 1621 array = ev; 1622 1623 if (!evsel) { 1624 pr_err("No evsel for id %"PRIu64"\n", sample->id); 1625 return -EINVAL; 1626 } 1627 1628 array += ev->header.size; 1629 ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); 1630 if (ret < 0) 1631 return ret; 1632 1633 if (ret & 7) { 1634 pr_err("Bad id sample size %d\n", ret); 1635 return -EINVAL; 1636 } 1637 1638 ev->header.size += ret; 1639 1640 return 0; 1641} 1642 1643static int guest_session__inject_events(struct guest_session *gs, u64 timestamp) 1644{ 1645 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 1646 int ret; 1647 1648 if (!gs->ready) 1649 return 0; 1650 1651 while (1) { 1652 struct perf_sample *sample; 1653 struct guest_id *guest_id; 1654 union perf_event *ev; 1655 u16 id_hdr_size; 1656 u8 cpumode; 1657 u64 id; 1658 1659 if (!gs->fetched) { 1660 ret = guest_session__fetch(gs); 1661 if (ret) 1662 return ret; 1663 gs->fetched = true; 1664 } 1665 1666 ev = gs->ev.event; 1667 sample = &gs->ev.sample; 1668 1669 if (!ev->header.size) 1670 return 0; /* EOF */ 1671 1672 if (sample->time > timestamp) 1673 return 0; 1674 1675 /* Change cpumode to guest */ 1676 cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1677 if (cpumode & PERF_RECORD_MISC_USER) 1678 cpumode = PERF_RECORD_MISC_GUEST_USER; 1679 else 1680 cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 1681 ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK; 1682 ev->header.misc |= cpumode; 1683 1684 id = sample->id; 1685 if (!id) { 1686 id = gs->dflt_id; 1687 id_hdr_size = gs->dflt_id_hdr_size; 1688 } else { 1689 struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id); 1690 1691 id_hdr_size = evsel__id_hdr_size(evsel); 1692 } 1693 1694 if (id_hdr_size & 7) { 1695 pr_err("Bad id_hdr_size %u\n", id_hdr_size); 1696 return -EINVAL; 1697 } 1698 1699 if (ev->header.size & 7) { 1700 pr_err("Bad event size %u\n", ev->header.size); 1701 return -EINVAL; 1702 } 1703 1704 /* Remove guest id sample */ 1705 ev->header.size -= id_hdr_size; 1706 1707 if (ev->header.size & 7) { 1708 pr_err("Bad raw event size %u\n", ev->header.size); 1709 return -EINVAL; 1710 } 1711 1712 guest_id = guest_session__lookup_id(gs, id); 1713 if (!guest_id) { 1714 pr_err("Guest event with unknown id %llu\n", 1715 (unsigned long long)id); 1716 return -EINVAL; 1717 } 1718 1719 /* Change to host ID to avoid conflicting ID values */ 1720 sample->id = guest_id->host_id; 1721 sample->stream_id = guest_id->host_id; 1722 1723 if (sample->cpu != (u32)-1) { 1724 if (sample->cpu >= gs->vcpu_cnt) { 1725 pr_err("Guest event with unknown VCPU %u\n", 1726 sample->cpu); 1727 return -EINVAL; 1728 } 1729 /* Change to host CPU instead of guest VCPU */ 1730 sample->cpu = gs->vcpu[sample->cpu].cpu; 1731 } 1732 1733 /* New id sample with new ID and CPU */ 1734 ret = evlist__append_id_sample(inject->session->evlist, ev, sample); 1735 if (ret) 1736 return ret; 1737 1738 if (ev->header.size & 7) { 1739 pr_err("Bad new event size %u\n", ev->header.size); 1740 return -EINVAL; 1741 } 1742 1743 gs->fetched = false; 1744 1745 ret = output_bytes(inject, ev, ev->header.size); 1746 if (ret) 1747 return ret; 1748 } 1749} 1750 1751static int guest_session__flush_events(struct guest_session *gs) 1752{ 1753 return guest_session__inject_events(gs, -1); 1754} 1755 1756static int host__repipe(const struct perf_tool *tool, 1757 union perf_event *event, 1758 struct perf_sample *sample, 1759 struct machine *machine) 1760{ 1761 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1762 int ret; 1763 1764 ret = guest_session__inject_events(&inject->guest_session, sample->time); 1765 if (ret) 1766 return ret; 1767 1768 return perf_event__repipe(tool, event, sample, machine); 1769} 1770 1771static int host__finished_init(const struct perf_tool *tool, struct perf_session *session, 1772 union perf_event *event) 1773{ 1774 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1775 struct guest_session *gs = &inject->guest_session; 1776 int ret; 1777 1778 /* 1779 * Peek through host COMM events to find QEMU threads and the VCPU they 1780 * are running. 1781 */ 1782 ret = host_peek_vm_comms(session, gs); 1783 if (ret) 1784 return ret; 1785 1786 if (!gs->vcpu_cnt) { 1787 pr_err("No VCPU threads found for pid %u\n", gs->machine_pid); 1788 return -EINVAL; 1789 } 1790 1791 /* 1792 * Allocate new (unused) host sample IDs and map them to the guest IDs. 1793 */ 1794 gs->highest_id = evlist__find_highest_id(session->evlist); 1795 ret = guest_session__map_ids(gs, session->evlist); 1796 if (ret) 1797 return ret; 1798 1799 ret = guest_session__add_attrs(gs); 1800 if (ret) 1801 return ret; 1802 1803 ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries); 1804 if (ret) { 1805 pr_err("Failed to synthesize id_index\n"); 1806 return ret; 1807 } 1808 1809 ret = guest_session__add_build_ids(gs); 1810 if (ret) { 1811 pr_err("Failed to add guest build IDs\n"); 1812 return ret; 1813 } 1814 1815 gs->ready = true; 1816 1817 ret = guest_session__inject_events(gs, 0); 1818 if (ret) 1819 return ret; 1820 1821 return perf_event__repipe_op2_synth(tool, session, event); 1822} 1823 1824/* 1825 * Obey finished-round ordering. The FINISHED_ROUND event is first processed 1826 * which flushes host events to file up until the last flush time. Then inject 1827 * guest events up to the same time. Finally write out the FINISHED_ROUND event 1828 * itself. 1829 */ 1830static int host__finished_round(const struct perf_tool *tool, 1831 union perf_event *event, 1832 struct ordered_events *oe) 1833{ 1834 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1835 int ret = perf_event__process_finished_round(tool, event, oe); 1836 u64 timestamp = ordered_events__last_flush_time(oe); 1837 1838 if (ret) 1839 return ret; 1840 1841 ret = guest_session__inject_events(&inject->guest_session, timestamp); 1842 if (ret) 1843 return ret; 1844 1845 return perf_event__repipe_oe_synth(tool, event, oe); 1846} 1847 1848static int host__context_switch(const struct perf_tool *tool, 1849 union perf_event *event, 1850 struct perf_sample *sample, 1851 struct machine *machine) 1852{ 1853 struct perf_inject *inject = container_of(tool, struct perf_inject, tool); 1854 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1855 struct guest_session *gs = &inject->guest_session; 1856 u32 pid = event->context_switch.next_prev_pid; 1857 u32 tid = event->context_switch.next_prev_tid; 1858 struct guest_tid *guest_tid; 1859 u32 vcpu; 1860 1861 if (out || pid != gs->machine_pid) 1862 goto out; 1863 1864 guest_tid = guest_session__lookup_tid(gs, tid); 1865 if (!guest_tid) 1866 goto out; 1867 1868 if (sample->cpu == (u32)-1) { 1869 pr_err("Switch event does not have CPU\n"); 1870 return -EINVAL; 1871 } 1872 1873 vcpu = guest_tid->vcpu; 1874 if (vcpu >= gs->vcpu_cnt) 1875 return -EINVAL; 1876 1877 /* Guest is switching in, record which CPU the VCPU is now running on */ 1878 gs->vcpu[vcpu].cpu = sample->cpu; 1879out: 1880 return host__repipe(tool, event, sample, machine); 1881} 1882 1883static void sig_handler(int sig __maybe_unused) 1884{ 1885 session_done = 1; 1886} 1887 1888static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) 1889{ 1890 struct perf_event_attr *attr = &evsel->core.attr; 1891 const char *name = evsel__name(evsel); 1892 1893 if (!(attr->sample_type & sample_type)) { 1894 pr_err("Samples for %s event do not have %s attribute set.", 1895 name, sample_msg); 1896 return -EINVAL; 1897 } 1898 1899 return 0; 1900} 1901 1902static int drop_sample(const struct perf_tool *tool __maybe_unused, 1903 union perf_event *event __maybe_unused, 1904 struct perf_sample *sample __maybe_unused, 1905 struct evsel *evsel __maybe_unused, 1906 struct machine *machine __maybe_unused) 1907{ 1908 return 0; 1909} 1910 1911static void strip_init(struct perf_inject *inject) 1912{ 1913 struct evlist *evlist = inject->session->evlist; 1914 struct evsel *evsel; 1915 1916 inject->tool.context_switch = perf_event__drop; 1917 1918 evlist__for_each_entry(evlist, evsel) 1919 evsel->handler = drop_sample; 1920} 1921 1922static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) 1923{ 1924 struct perf_inject *inject = opt->value; 1925 const char *args; 1926 char *dry_run; 1927 1928 if (unset) 1929 return 0; 1930 1931 inject->itrace_synth_opts.set = true; 1932 inject->itrace_synth_opts.vm_time_correlation = true; 1933 inject->in_place_update = true; 1934 1935 if (!str) 1936 return 0; 1937 1938 dry_run = skip_spaces(str); 1939 if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { 1940 inject->itrace_synth_opts.vm_tm_corr_dry_run = true; 1941 inject->in_place_update_dry_run = true; 1942 args = dry_run + strlen("dry-run"); 1943 } else { 1944 args = str; 1945 } 1946 1947 inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); 1948 1949 return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; 1950} 1951 1952static int parse_guest_data(const struct option *opt, const char *str, int unset) 1953{ 1954 struct perf_inject *inject = opt->value; 1955 struct guest_session *gs = &inject->guest_session; 1956 char *tok; 1957 char *s; 1958 1959 if (unset) 1960 return 0; 1961 1962 if (!str) 1963 goto bad_args; 1964 1965 s = strdup(str); 1966 if (!s) 1967 return -ENOMEM; 1968 1969 gs->perf_data_file = strsep(&s, ","); 1970 if (!gs->perf_data_file) 1971 goto bad_args; 1972 1973 gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file); 1974 if (gs->copy_kcore_dir) 1975 inject->output.is_dir = true; 1976 1977 tok = strsep(&s, ","); 1978 if (!tok) 1979 goto bad_args; 1980 gs->machine_pid = strtoul(tok, NULL, 0); 1981 if (!inject->guest_session.machine_pid) 1982 goto bad_args; 1983 1984 gs->time_scale = 1; 1985 1986 tok = strsep(&s, ","); 1987 if (!tok) 1988 goto out; 1989 gs->time_offset = strtoull(tok, NULL, 0); 1990 1991 tok = strsep(&s, ","); 1992 if (!tok) 1993 goto out; 1994 gs->time_scale = strtod(tok, NULL); 1995 if (!gs->time_scale) 1996 goto bad_args; 1997out: 1998 return 0; 1999 2000bad_args: 2001 pr_err("--guest-data option requires guest perf.data file name, " 2002 "guest machine PID, and optionally guest timestamp offset, " 2003 "and guest timestamp scale factor, separated by commas.\n"); 2004 return -1; 2005} 2006 2007static int save_section_info_cb(struct perf_file_section *section, 2008 struct perf_header *ph __maybe_unused, 2009 int feat, int fd __maybe_unused, void *data) 2010{ 2011 struct perf_inject *inject = data; 2012 2013 inject->secs[feat] = *section; 2014 return 0; 2015} 2016 2017static int save_section_info(struct perf_inject *inject) 2018{ 2019 struct perf_header *header = &inject->session->header; 2020 int fd = perf_data__fd(inject->session->data); 2021 2022 return perf_header__process_sections(header, fd, inject, save_section_info_cb); 2023} 2024 2025static bool keep_feat(int feat) 2026{ 2027 switch (feat) { 2028 /* Keep original information that describes the machine or software */ 2029 case HEADER_TRACING_DATA: 2030 case HEADER_HOSTNAME: 2031 case HEADER_OSRELEASE: 2032 case HEADER_VERSION: 2033 case HEADER_ARCH: 2034 case HEADER_NRCPUS: 2035 case HEADER_CPUDESC: 2036 case HEADER_CPUID: 2037 case HEADER_TOTAL_MEM: 2038 case HEADER_CPU_TOPOLOGY: 2039 case HEADER_NUMA_TOPOLOGY: 2040 case HEADER_PMU_MAPPINGS: 2041 case HEADER_CACHE: 2042 case HEADER_MEM_TOPOLOGY: 2043 case HEADER_CLOCKID: 2044 case HEADER_BPF_PROG_INFO: 2045 case HEADER_BPF_BTF: 2046 case HEADER_CPU_PMU_CAPS: 2047 case HEADER_CLOCK_DATA: 2048 case HEADER_HYBRID_TOPOLOGY: 2049 case HEADER_PMU_CAPS: 2050 return true; 2051 /* Information that can be updated */ 2052 case HEADER_BUILD_ID: 2053 case HEADER_CMDLINE: 2054 case HEADER_EVENT_DESC: 2055 case HEADER_BRANCH_STACK: 2056 case HEADER_GROUP_DESC: 2057 case HEADER_AUXTRACE: 2058 case HEADER_STAT: 2059 case HEADER_SAMPLE_TIME: 2060 case HEADER_DIR_FORMAT: 2061 case HEADER_COMPRESSED: 2062 default: 2063 return false; 2064 }; 2065} 2066 2067static int read_file(int fd, u64 offs, void *buf, size_t sz) 2068{ 2069 ssize_t ret = preadn(fd, buf, sz, offs); 2070 2071 if (ret < 0) 2072 return -errno; 2073 if ((size_t)ret != sz) 2074 return -EINVAL; 2075 return 0; 2076} 2077 2078static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) 2079{ 2080 int fd = perf_data__fd(inject->session->data); 2081 u64 offs = inject->secs[feat].offset; 2082 size_t sz = inject->secs[feat].size; 2083 void *buf = malloc(sz); 2084 int ret; 2085 2086 if (!buf) 2087 return -ENOMEM; 2088 2089 ret = read_file(fd, offs, buf, sz); 2090 if (ret) 2091 goto out_free; 2092 2093 ret = fw->write(fw, buf, sz); 2094out_free: 2095 free(buf); 2096 return ret; 2097} 2098 2099struct inject_fc { 2100 struct feat_copier fc; 2101 struct perf_inject *inject; 2102}; 2103 2104static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) 2105{ 2106 struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); 2107 struct perf_inject *inject = inj_fc->inject; 2108 int ret; 2109 2110 if (!inject->secs[feat].offset || 2111 !keep_feat(feat)) 2112 return 0; 2113 2114 ret = feat_copy(inject, feat, fw); 2115 if (ret < 0) 2116 return ret; 2117 2118 return 1; /* Feature section copied */ 2119} 2120 2121static int copy_kcore_dir(struct perf_inject *inject) 2122{ 2123 char *cmd; 2124 int ret; 2125 2126 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1", 2127 inject->input_name, inject->output.path); 2128 if (ret < 0) 2129 return ret; 2130 pr_debug("%s\n", cmd); 2131 ret = system(cmd); 2132 free(cmd); 2133 return ret; 2134} 2135 2136static int guest_session__copy_kcore_dir(struct guest_session *gs) 2137{ 2138 struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session); 2139 char *cmd; 2140 int ret; 2141 2142 ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1", 2143 gs->perf_data_file, inject->output.path, gs->machine_pid); 2144 if (ret < 0) 2145 return ret; 2146 pr_debug("%s\n", cmd); 2147 ret = system(cmd); 2148 free(cmd); 2149 return ret; 2150} 2151 2152static int output_fd(struct perf_inject *inject) 2153{ 2154 return inject->in_place_update ? -1 : perf_data__fd(&inject->output); 2155} 2156 2157static int __cmd_inject(struct perf_inject *inject) 2158{ 2159 int ret = -EINVAL; 2160 struct guest_session *gs = &inject->guest_session; 2161 struct perf_session *session = inject->session; 2162 int fd = output_fd(inject); 2163 u64 output_data_offset = perf_session__data_offset(session->evlist); 2164 /* 2165 * Pipe input hasn't loaded the attributes and will handle them as 2166 * events. So that the attributes don't overlap the data, write the 2167 * attributes after the data. 2168 */ 2169 bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 2170 2171 signal(SIGINT, sig_handler); 2172 2173 if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat || 2174 inject->itrace_synth_opts.set) { 2175 inject->tool.mmap = perf_event__repipe_mmap; 2176 inject->tool.mmap2 = perf_event__repipe_mmap2; 2177 inject->tool.fork = perf_event__repipe_fork; 2178#ifdef HAVE_LIBTRACEEVENT 2179 inject->tool.tracing_data = perf_event__repipe_tracing_data; 2180#endif 2181 } 2182 2183 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2184 inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2185 inject->tool.sample = perf_event__inject_buildid; 2186 } else if (inject->sched_stat) { 2187 struct evsel *evsel; 2188 2189 evlist__for_each_entry(session->evlist, evsel) { 2190 const char *name = evsel__name(evsel); 2191 2192 if (!strcmp(name, "sched:sched_switch")) { 2193 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) 2194 return -EINVAL; 2195 2196 evsel->handler = perf_inject__sched_switch; 2197 } else if (!strcmp(name, "sched:sched_process_exit")) 2198 evsel->handler = perf_inject__sched_process_exit; 2199#ifdef HAVE_LIBTRACEEVENT 2200 else if (!strncmp(name, "sched:sched_stat_", 17)) 2201 evsel->handler = perf_inject__sched_stat; 2202#endif 2203 } 2204 } else if (inject->itrace_synth_opts.vm_time_correlation) { 2205 session->itrace_synth_opts = &inject->itrace_synth_opts; 2206 memset(&inject->tool, 0, sizeof(inject->tool)); 2207 inject->tool.id_index = perf_event__process_id_index; 2208 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2209 inject->tool.auxtrace = perf_event__process_auxtrace; 2210 inject->tool.auxtrace_error = perf_event__process_auxtrace_error; 2211 inject->tool.ordered_events = true; 2212 inject->tool.ordering_requires_timestamps = true; 2213 } else if (inject->itrace_synth_opts.set) { 2214 session->itrace_synth_opts = &inject->itrace_synth_opts; 2215 inject->itrace_synth_opts.inject = true; 2216 inject->tool.comm = perf_event__repipe_comm; 2217 inject->tool.namespaces = perf_event__repipe_namespaces; 2218 inject->tool.exit = perf_event__repipe_exit; 2219 inject->tool.id_index = perf_event__process_id_index; 2220 inject->tool.auxtrace_info = perf_event__process_auxtrace_info; 2221 inject->tool.auxtrace = perf_event__process_auxtrace; 2222 inject->tool.aux = perf_event__drop_aux; 2223 inject->tool.itrace_start = perf_event__drop_aux; 2224 inject->tool.aux_output_hw_id = perf_event__drop_aux; 2225 inject->tool.ordered_events = true; 2226 inject->tool.ordering_requires_timestamps = true; 2227 /* Allow space in the header for new attributes */ 2228 output_data_offset = roundup(8192 + session->header.data_offset, 4096); 2229 if (inject->strip) 2230 strip_init(inject); 2231 } else if (gs->perf_data_file) { 2232 char *name = gs->perf_data_file; 2233 2234 /* 2235 * Not strictly necessary, but keep these events in order wrt 2236 * guest events. 2237 */ 2238 inject->tool.mmap = host__repipe; 2239 inject->tool.mmap2 = host__repipe; 2240 inject->tool.comm = host__repipe; 2241 inject->tool.fork = host__repipe; 2242 inject->tool.exit = host__repipe; 2243 inject->tool.lost = host__repipe; 2244 inject->tool.context_switch = host__repipe; 2245 inject->tool.ksymbol = host__repipe; 2246 inject->tool.text_poke = host__repipe; 2247 /* 2248 * Once the host session has initialized, set up sample ID 2249 * mapping and feed in guest attrs, build IDs and initial 2250 * events. 2251 */ 2252 inject->tool.finished_init = host__finished_init; 2253 /* Obey finished round ordering */ 2254 inject->tool.finished_round = host__finished_round; 2255 /* Keep track of which CPU a VCPU is runnng on */ 2256 inject->tool.context_switch = host__context_switch; 2257 /* 2258 * Must order events to be able to obey finished round 2259 * ordering. 2260 */ 2261 inject->tool.ordered_events = true; 2262 inject->tool.ordering_requires_timestamps = true; 2263 /* Set up a separate session to process guest perf.data file */ 2264 ret = guest_session__start(gs, name, session->data->force); 2265 if (ret) { 2266 pr_err("Failed to process %s, error %d\n", name, ret); 2267 return ret; 2268 } 2269 /* Allow space in the header for guest attributes */ 2270 output_data_offset += gs->session->header.data_offset; 2271 output_data_offset = roundup(output_data_offset, 4096); 2272 } 2273 2274 if (!inject->itrace_synth_opts.set) 2275 auxtrace_index__free(&session->auxtrace_index); 2276 2277 if (!inject->output.is_pipe && !inject->in_place_update) 2278 lseek(fd, output_data_offset, SEEK_SET); 2279 2280 ret = perf_session__process_events(session); 2281 if (ret) 2282 return ret; 2283 2284 if (gs->session) { 2285 /* 2286 * Remaining guest events have later timestamps. Flush them 2287 * out to file. 2288 */ 2289 ret = guest_session__flush_events(gs); 2290 if (ret) { 2291 pr_err("Failed to flush guest events\n"); 2292 return ret; 2293 } 2294 } 2295 2296 if (!inject->output.is_pipe && !inject->in_place_update) { 2297 struct inject_fc inj_fc = { 2298 .fc.copy = feat_copy_cb, 2299 .inject = inject, 2300 }; 2301 2302 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2303 inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) 2304 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 2305 /* 2306 * Keep all buildids when there is unprocessed AUX data because 2307 * it is not known which ones the AUX trace hits. 2308 */ 2309 if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && 2310 inject->have_auxtrace && !inject->itrace_synth_opts.set) 2311 perf_session__dsos_hit_all(session); 2312 /* 2313 * The AUX areas have been removed and replaced with 2314 * synthesized hardware events, so clear the feature flag. 2315 */ 2316 if (inject->itrace_synth_opts.set) { 2317 perf_header__clear_feat(&session->header, 2318 HEADER_AUXTRACE); 2319 if (inject->itrace_synth_opts.last_branch || 2320 inject->itrace_synth_opts.add_last_branch) 2321 perf_header__set_feat(&session->header, 2322 HEADER_BRANCH_STACK); 2323 } 2324 session->header.data_offset = output_data_offset; 2325 session->header.data_size = inject->bytes_written; 2326 perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2327 write_attrs_after_data); 2328 2329 if (inject->copy_kcore_dir) { 2330 ret = copy_kcore_dir(inject); 2331 if (ret) { 2332 pr_err("Failed to copy kcore\n"); 2333 return ret; 2334 } 2335 } 2336 if (gs->copy_kcore_dir) { 2337 ret = guest_session__copy_kcore_dir(gs); 2338 if (ret) { 2339 pr_err("Failed to copy guest kcore\n"); 2340 return ret; 2341 } 2342 } 2343 } 2344 2345 return ret; 2346} 2347 2348int cmd_inject(int argc, const char **argv) 2349{ 2350 struct perf_inject inject = { 2351 .input_name = "-", 2352 .samples = LIST_HEAD_INIT(inject.samples), 2353 .output = { 2354 .path = "-", 2355 .mode = PERF_DATA_MODE_WRITE, 2356 .use_stdio = true, 2357 }, 2358 }; 2359 struct perf_data data = { 2360 .mode = PERF_DATA_MODE_READ, 2361 .use_stdio = true, 2362 }; 2363 int ret; 2364 const char *known_build_ids = NULL; 2365 bool build_ids = false; 2366 bool build_id_all = false; 2367 bool mmap2_build_ids = false; 2368 bool mmap2_build_id_all = false; 2369 2370 struct option options[] = { 2371 OPT_BOOLEAN('b', "build-ids", &build_ids, 2372 "Inject build-ids into the output stream"), 2373 OPT_BOOLEAN(0, "buildid-all", &build_id_all, 2374 "Inject build-ids of all DSOs into the output stream"), 2375 OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids, 2376 "Drop unused mmap events, make others mmap2 with build IDs"), 2377 OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all, 2378 "Rewrite all mmap events as mmap2 events with build IDs"), 2379 OPT_STRING(0, "known-build-ids", &known_build_ids, 2380 "buildid path [,buildid path...]", 2381 "build-ids to use for given paths"), 2382 OPT_STRING('i', "input", &inject.input_name, "file", 2383 "input file name"), 2384 OPT_STRING('o', "output", &inject.output.path, "file", 2385 "output file name"), 2386 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 2387 "Merge sched-stat and sched-switch for getting events " 2388 "where and how long tasks slept"), 2389#ifdef HAVE_JITDUMP 2390 OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), 2391#endif 2392 OPT_INCR('v', "verbose", &verbose, 2393 "be more verbose (show build ids, etc)"), 2394 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 2395 "file", "vmlinux pathname"), 2396 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 2397 "don't load vmlinux even if found"), 2398 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 2399 "kallsyms pathname"), 2400 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 2401 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, 2402 NULL, "opts", "Instruction Tracing options\n" 2403 ITRACE_HELP, 2404 itrace_parse_synth_opts), 2405 OPT_BOOLEAN(0, "strip", &inject.strip, 2406 "strip non-synthesized events (use with --itrace)"), 2407 OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", 2408 "correlate time between VM guests and the host", 2409 parse_vm_time_correlation), 2410 OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts", 2411 "inject events from a guest perf.data file", 2412 parse_guest_data), 2413 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 2414 "guest mount directory under which every guest os" 2415 " instance has a subdir"), 2416 OPT_END() 2417 }; 2418 const char * const inject_usage[] = { 2419 "perf inject [<options>]", 2420 NULL 2421 }; 2422 bool ordered_events; 2423 2424 if (!inject.itrace_synth_opts.set) { 2425 /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ 2426 symbol_conf.lazy_load_kernel_maps = true; 2427 } 2428 2429#ifndef HAVE_JITDUMP 2430 set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); 2431#endif 2432 argc = parse_options(argc, argv, options, inject_usage, 0); 2433 2434 /* 2435 * Any (unrecognized) arguments left? 2436 */ 2437 if (argc) 2438 usage_with_options(inject_usage, options); 2439 2440 if (inject.strip && !inject.itrace_synth_opts.set) { 2441 pr_err("--strip option requires --itrace option\n"); 2442 return -1; 2443 } 2444 2445 if (symbol__validate_sym_arguments()) 2446 return -1; 2447 2448 if (inject.in_place_update) { 2449 if (!strcmp(inject.input_name, "-")) { 2450 pr_err("Input file name required for in-place updating\n"); 2451 return -1; 2452 } 2453 if (strcmp(inject.output.path, "-")) { 2454 pr_err("Output file name must not be specified for in-place updating\n"); 2455 return -1; 2456 } 2457 if (!data.force && !inject.in_place_update_dry_run) { 2458 pr_err("The input file would be updated in place, " 2459 "the --force option is required.\n"); 2460 return -1; 2461 } 2462 if (!inject.in_place_update_dry_run) 2463 data.in_place_update = true; 2464 } else { 2465 if (strcmp(inject.output.path, "-") && !inject.strip && 2466 has_kcore_dir(inject.input_name)) { 2467 inject.output.is_dir = true; 2468 inject.copy_kcore_dir = true; 2469 } 2470 if (perf_data__open(&inject.output)) { 2471 perror("failed to create output file"); 2472 return -1; 2473 } 2474 } 2475 if (mmap2_build_ids) 2476 inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY; 2477 if (mmap2_build_id_all) 2478 inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL; 2479 if (build_ids) 2480 inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY; 2481 if (build_id_all) 2482 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2483 2484 data.path = inject.input_name; 2485 2486 ordered_events = inject.jit_mode || inject.sched_stat || 2487 inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2488 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY; 2489 perf_tool__init(&inject.tool, ordered_events); 2490 inject.tool.sample = perf_event__repipe_sample; 2491 inject.tool.read = perf_event__repipe_sample; 2492 inject.tool.mmap = perf_event__repipe; 2493 inject.tool.mmap2 = perf_event__repipe; 2494 inject.tool.comm = perf_event__repipe; 2495 inject.tool.namespaces = perf_event__repipe; 2496 inject.tool.cgroup = perf_event__repipe; 2497 inject.tool.fork = perf_event__repipe; 2498 inject.tool.exit = perf_event__repipe; 2499 inject.tool.lost = perf_event__repipe; 2500 inject.tool.lost_samples = perf_event__repipe; 2501 inject.tool.aux = perf_event__repipe; 2502 inject.tool.itrace_start = perf_event__repipe; 2503 inject.tool.aux_output_hw_id = perf_event__repipe; 2504 inject.tool.context_switch = perf_event__repipe; 2505 inject.tool.throttle = perf_event__repipe; 2506 inject.tool.unthrottle = perf_event__repipe; 2507 inject.tool.ksymbol = perf_event__repipe; 2508 inject.tool.bpf = perf_event__repipe; 2509 inject.tool.text_poke = perf_event__repipe; 2510 inject.tool.attr = perf_event__repipe_attr; 2511 inject.tool.event_update = perf_event__repipe_event_update; 2512 inject.tool.tracing_data = perf_event__repipe_op2_synth; 2513 inject.tool.finished_round = perf_event__repipe_oe_synth; 2514 inject.tool.build_id = perf_event__repipe_op2_synth; 2515 inject.tool.id_index = perf_event__repipe_op2_synth; 2516 inject.tool.auxtrace_info = perf_event__repipe_op2_synth; 2517 inject.tool.auxtrace_error = perf_event__repipe_op2_synth; 2518 inject.tool.time_conv = perf_event__repipe_op2_synth; 2519 inject.tool.thread_map = perf_event__repipe_op2_synth; 2520 inject.tool.cpu_map = perf_event__repipe_op2_synth; 2521 inject.tool.stat_config = perf_event__repipe_op2_synth; 2522 inject.tool.stat = perf_event__repipe_op2_synth; 2523 inject.tool.stat_round = perf_event__repipe_op2_synth; 2524 inject.tool.feature = perf_event__repipe_op2_synth; 2525 inject.tool.finished_init = perf_event__repipe_op2_synth; 2526 inject.tool.compressed = perf_event__repipe_op4_synth; 2527 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2528 inject.tool.bpf_metadata = perf_event__repipe_op2_synth; 2529 inject.tool.dont_split_sample_group = true; 2530 inject.tool.merge_deferred_callchains = false; 2531 inject.session = __perf_session__new(&data, &inject.tool, 2532 /*trace_event_repipe=*/inject.output.is_pipe, 2533 /*host_env=*/NULL); 2534 2535 if (IS_ERR(inject.session)) { 2536 ret = PTR_ERR(inject.session); 2537 goto out_close_output; 2538 } 2539 2540 if (zstd_init(&(inject.session->zstd_data), 0) < 0) 2541 pr_warning("Decompression initialization failed.\n"); 2542 2543 /* Save original section info before feature bits change */ 2544 ret = save_section_info(&inject); 2545 if (ret) 2546 goto out_delete; 2547 2548 if (inject.output.is_pipe) { 2549 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2550 if (ret < 0) { 2551 pr_err("Couldn't write a new pipe header.\n"); 2552 goto out_delete; 2553 } 2554 2555 /* 2556 * If the input is already a pipe then the features and 2557 * attributes don't need synthesizing, they will be present in 2558 * the input. 2559 */ 2560 if (!data.is_pipe) { 2561 ret = perf_event__synthesize_for_pipe(&inject.tool, 2562 inject.session, 2563 &inject.output, 2564 perf_event__repipe); 2565 if (ret < 0) 2566 goto out_delete; 2567 } 2568 } 2569 2570 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY || 2571 inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) { 2572 /* 2573 * to make sure the mmap records are ordered correctly 2574 * and so that the correct especially due to jitted code 2575 * mmaps. We cannot generate the buildid hit list and 2576 * inject the jit mmaps at the same time for now. 2577 */ 2578 inject.tool.ordering_requires_timestamps = true; 2579 } 2580 if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) { 2581 inject.known_build_ids = 2582 perf_inject__parse_known_build_ids(known_build_ids); 2583 2584 if (inject.known_build_ids == NULL) { 2585 pr_err("Couldn't parse known build ids.\n"); 2586 goto out_delete; 2587 } 2588 } 2589 2590#ifdef HAVE_JITDUMP 2591 if (inject.jit_mode) { 2592 inject.tool.mmap2 = perf_event__repipe_mmap2; 2593 inject.tool.mmap = perf_event__repipe_mmap; 2594 inject.tool.ordering_requires_timestamps = true; 2595 /* 2596 * JIT MMAP injection injects all MMAP events in one go, so it 2597 * does not obey finished_round semantics. 2598 */ 2599 inject.tool.finished_round = perf_event__drop_oe; 2600 } 2601#endif 2602 ret = symbol__init(perf_session__env(inject.session)); 2603 if (ret < 0) 2604 goto out_delete; 2605 2606 ret = __cmd_inject(&inject); 2607 2608 guest_session__exit(&inject.guest_session); 2609 2610out_delete: 2611 strlist__delete(inject.known_build_ids); 2612 zstd_fini(&(inject.session->zstd_data)); 2613 perf_session__delete(inject.session); 2614out_close_output: 2615 if (!inject.in_place_update) 2616 perf_data__close(&inject.output); 2617 free(inject.itrace_synth_opts.vm_tm_corr_args); 2618 free(inject.event_copy); 2619 free(inject.guest_session.ev.event_buf); 2620 return ret; 2621}