Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf inject jit: Add namespaces support

This patch fixes "perf inject --jit" to properly operate on
namespaced/containerized processes:

* jitdump files are generated by the process, thus they should be
looked up in its mount NS.

* DSOs of injected MMAP events will later be looked up in the process
mount NS, so write them into its NS.

* PIDs & TIDs from jitdump events need to be translated to the PID as
seen by "perf record" before written into MMAP events.

For a process in a different PID NS, the TID & PID given in the jitdump
event are actually ignored; I use the TID & PID of the thread which
mmap()ed the jitdump file. This is simplified and won't do for forks of
the initial process, if they continue using the same jitdump file.
Future patches might improve it.

This was tested by recording a NodeJS process running with
"--perf-prof", inside a Docker container, and by recording another
NodeJS process running in the same namespaces as perf itself, to make
sure it's not broken for non-containerized processes.

Signed-off-by: Yonatan Goldschmidt <yonatan.goldschmidt@granulate.io>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20201105015604.1726943-1-yonatan.goldschmidt@granulate.io
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Yonatan Goldschmidt and committed by
Arnaldo Carvalho de Melo
67dec926 2b51c71b

+82 -22
+2 -2
tools/perf/builtin-inject.c
··· 313 313 * if jit marker, then inject jit mmaps and generate ELF images 314 314 */ 315 315 ret = jit_process(inject->session, &inject->output, machine, 316 - event->mmap.filename, event->mmap.pid, &n); 316 + event->mmap.filename, event->mmap.pid, event->mmap.tid, &n); 317 317 if (ret < 0) 318 318 return ret; 319 319 if (ret) { ··· 413 413 * if jit marker, then inject jit mmaps and generate ELF images 414 414 */ 415 415 ret = jit_process(inject->session, &inject->output, machine, 416 - event->mmap2.filename, event->mmap2.pid, &n); 416 + event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n); 417 417 if (ret < 0) 418 418 return ret; 419 419 if (ret) {
+1 -1
tools/perf/util/jit.h
··· 5 5 #include <data.h> 6 6 7 7 int jit_process(struct perf_session *session, struct perf_data *output, 8 - struct machine *machine, char *filename, pid_t pid, u64 *nbytes); 8 + struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes); 9 9 10 10 int jit_inject_record(const char *filename); 11 11
+65 -19
tools/perf/util/jitdump.c
··· 18 18 #include "event.h" 19 19 #include "debug.h" 20 20 #include "evlist.h" 21 + #include "namespaces.h" 21 22 #include "symbol.h" 22 23 #include <elf.h> 23 24 ··· 36 35 struct perf_data *output; 37 36 struct perf_session *session; 38 37 struct machine *machine; 38 + struct nsinfo *nsi; 39 39 union jr_entry *entry; 40 40 void *buf; 41 41 uint64_t sample_type; ··· 74 72 #define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) 75 73 76 74 static int 77 - jit_emit_elf(char *filename, 75 + jit_emit_elf(struct jit_buf_desc *jd, 76 + char *filename, 78 77 const char *sym, 79 78 uint64_t code_addr, 80 79 const void *code, ··· 86 83 uint32_t unwinding_header_size, 87 84 uint32_t unwinding_size) 88 85 { 89 - int ret, fd; 86 + int ret, fd, saved_errno; 87 + struct nscookie nsc; 90 88 91 89 if (verbose > 0) 92 90 fprintf(stderr, "write ELF image %s\n", filename); 93 91 92 + nsinfo__mountns_enter(jd->nsi, &nsc); 94 93 fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); 94 + saved_errno = errno; 95 + nsinfo__mountns_exit(&nsc); 95 96 if (fd == -1) { 96 - pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); 97 + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(saved_errno)); 97 98 return -1; 98 99 } 99 100 ··· 106 99 107 100 close(fd); 108 101 109 - if (ret) 110 - unlink(filename); 102 + if (ret) { 103 + nsinfo__mountns_enter(jd->nsi, &nsc); 104 + unlink(filename); 105 + nsinfo__mountns_exit(&nsc); 106 + } 111 107 112 108 return ret; 113 109 } ··· 144 134 jit_open(struct jit_buf_desc *jd, const char *name) 145 135 { 146 136 struct jitheader header; 137 + struct nscookie nsc; 147 138 struct jr_prefix *prefix; 148 139 ssize_t bs, bsz = 0; 149 140 void *n, *buf = NULL; 150 141 int ret, retval = -1; 151 142 143 + nsinfo__mountns_enter(jd->nsi, &nsc); 152 144 jd->in = fopen(name, "r"); 145 + nsinfo__mountns_exit(&nsc); 153 146 if (!jd->in) 154 147 return -1; 155 148 ··· 380 367 return 0; 381 368 } 382 369 370 + static pid_t jr_entry_pid(struct jit_buf_desc *jd, union jr_entry *jr) 371 + { 372 + if (jd->nsi && jd->nsi->in_pidns) 373 + return jd->nsi->tgid; 374 + return jr->load.pid; 375 + } 376 + 377 + static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr) 378 + { 379 + if (jd->nsi && jd->nsi->in_pidns) 380 + return jd->nsi->pid; 381 + return jr->load.tid; 382 + } 383 + 383 384 static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) 384 385 { 385 386 struct perf_tsc_conversion tc; ··· 429 402 const char *sym; 430 403 uint64_t count; 431 404 int ret, csize, usize; 432 - pid_t pid, tid; 405 + pid_t nspid, pid, tid; 433 406 struct { 434 407 u32 pid, tid; 435 408 u64 time; 436 409 } *id; 437 410 438 - pid = jr->load.pid; 439 - tid = jr->load.tid; 411 + nspid = jr->load.pid; 412 + pid = jr_entry_pid(jd, jr); 413 + tid = jr_entry_tid(jd, jr); 440 414 csize = jr->load.code_size; 441 415 usize = jd->unwinding_mapped_size; 442 416 addr = jr->load.code_addr; ··· 453 425 filename = event->mmap2.filename; 454 426 size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", 455 427 jd->dir, 456 - pid, 428 + nspid, 457 429 count); 458 430 459 431 size++; /* for \0 */ 460 432 461 433 size = PERF_ALIGN(size, sizeof(u64)); 462 434 uaddr = (uintptr_t)code; 463 - ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, 435 + ret = jit_emit_elf(jd, filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries, 464 436 jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size); 465 437 466 438 if (jd->debug_data && jd->nr_debug_entries) { ··· 479 451 free(event); 480 452 return -1; 481 453 } 482 - if (stat(filename, &st)) 454 + if (nsinfo__stat(filename, &st, jd->nsi)) 483 455 memset(&st, 0, sizeof(st)); 484 456 485 457 event->mmap2.header.type = PERF_RECORD_MMAP2; ··· 543 515 int usize; 544 516 u16 idr_size; 545 517 int ret; 546 - pid_t pid, tid; 518 + pid_t nspid, pid, tid; 547 519 struct { 548 520 u32 pid, tid; 549 521 u64 time; 550 522 } *id; 551 523 552 - pid = jr->move.pid; 553 - tid = jr->move.tid; 524 + nspid = jr->load.pid; 525 + pid = jr_entry_pid(jd, jr); 526 + tid = jr_entry_tid(jd, jr); 554 527 usize = jd->unwinding_mapped_size; 555 528 idr_size = jd->machine->id_hdr_size; 556 529 ··· 565 536 filename = event->mmap2.filename; 566 537 size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", 567 538 jd->dir, 568 - pid, 539 + nspid, 569 540 jr->move.code_index); 570 541 571 542 size++; /* for \0 */ 572 543 573 - if (stat(filename, &st)) 544 + if (nsinfo__stat(filename, &st, jd->nsi)) 574 545 memset(&st, 0, sizeof(st)); 575 546 576 547 size = PERF_ALIGN(size, sizeof(u64)); ··· 729 700 * as captured in the RECORD_MMAP record 730 701 */ 731 702 static int 732 - jit_detect(char *mmap_name, pid_t pid) 703 + jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) 733 704 { 734 705 char *p; 735 706 char *end = NULL; ··· 769 740 * pid does not match mmap pid 770 741 * pid==0 in system-wide mode (synthesized) 771 742 */ 772 - if (pid && pid2 != pid) 743 + if (pid && pid2 != nsi->nstgid) 773 744 return -1; 774 745 /* 775 746 * validate suffix ··· 811 782 struct machine *machine, 812 783 char *filename, 813 784 pid_t pid, 785 + pid_t tid, 814 786 u64 *nbytes) 815 787 { 788 + struct thread *thread; 789 + struct nsinfo *nsi; 816 790 struct evsel *first; 817 791 struct jit_buf_desc jd; 818 792 int ret; 819 793 794 + thread = machine__findnew_thread(machine, pid, tid); 795 + if (thread == NULL) { 796 + pr_err("problem processing JIT mmap event, skipping it.\n"); 797 + return 0; 798 + } 799 + 800 + nsi = nsinfo__get(thread->nsinfo); 801 + thread__put(thread); 802 + 820 803 /* 821 804 * first, detect marker mmap (i.e., the jitdump mmap) 822 805 */ 823 - if (jit_detect(filename, pid)) { 806 + if (jit_detect(filename, pid, nsi)) { 807 + nsinfo__put(nsi); 808 + 824 809 // Strip //anon* mmaps if we processed a jitdump for this pid 825 810 if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0)) 826 811 return 1; ··· 847 804 jd.session = session; 848 805 jd.output = output; 849 806 jd.machine = machine; 807 + jd.nsi = nsi; 850 808 851 809 /* 852 810 * track sample_type to compute id_all layout ··· 864 820 *nbytes = jd.bytes_written; 865 821 ret = 1; 866 822 } 823 + 824 + nsinfo__put(jd.nsi); 867 825 868 826 return ret; 869 827 }
+12
tools/perf/util/namespaces.c
··· 287 287 288 288 return rpath; 289 289 } 290 + 291 + int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) 292 + { 293 + int ret; 294 + struct nscookie nsc; 295 + 296 + nsinfo__mountns_enter(nsi, &nsc); 297 + ret = stat(filename, st); 298 + nsinfo__mountns_exit(&nsc); 299 + 300 + return ret; 301 + }
+2
tools/perf/util/namespaces.h
··· 8 8 #define __PERF_NAMESPACES_H 9 9 10 10 #include <sys/types.h> 11 + #include <sys/stat.h> 11 12 #include <linux/stddef.h> 12 13 #include <linux/perf_event.h> 13 14 #include <linux/refcount.h> ··· 57 56 void nsinfo__mountns_exit(struct nscookie *nc); 58 57 59 58 char *nsinfo__realpath(const char *path, struct nsinfo *nsi); 59 + int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); 60 60 61 61 static inline void __nsinfo__zput(struct nsinfo **nsip) 62 62 {