Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf inject: Overhaul handling of pipe files

Previously inject->is_pipe was set if the input or output were a
pipe. Determining the input was a pipe had to be done prior to
starting the session and opening the file. This was done by comparing
the input file name with '-' but it fails if the pipe file is written
to disk.

Opening a pipe file from disk will correctly set perf_data.is_pipe, but
this is too late for 'perf inject' and results in a broken file. A
workaround is 'cat pipe_perf|perf inject -i - ...'.

This change removes inject->is_pipe and changes the dependent
conditions to use the is_pipe flag on the input
(inject->session->data) and output files (inject->output). This
ensures the is_pipe condition reflects things like the header being
read.

The change removes the use of perf file header repiping, that is
writing the file header out while reading it in. The case of input
pipe and output file cannot repipe as the attributes for the file are
unknown. To resolve this, write the file header when writing to disk
and as the attributes may be unknown, write them after the data.

Update sessions repipe variable to be trace_event_repipe as those are
the only events now impacted by it. Update __perf_session__new as the
repipe_fd no longer needs passing. Fully removing repipe from session
header reading will be done in a later change.

Committer testing:

root@number:~# perf record -e syscalls:sys_enter_*sleep/max-stack=4/ -o - sleep 0.01 | perf report -i -
# To display the perf.data header info, please use --header/--header-only options.
#
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.050 MB - ]
#
# Total Lost Samples: 0
#
# Samples: 1 of event 'syscalls:sys_enter_clock_nanosleep'
# Event count (approx.): 1
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ...............................
#
100.00% sleep libc.so.6 [.] clock_nanosleep@GLIBC_2.2.5
|
---__libc_start_main@@GLIBC_2.34
__libc_start_call_main
0x562fc2560a9f
clock_nanosleep@GLIBC_2.2.5

#
# (Tip: Create an archive with symtabs to analyse on other machine: perf archive)
#
root@number:~# perf record -e syscalls:sys_enter_*sleep/max-stack=4/ -o - sleep 0.01 > pipe.data
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.050 MB - ]
root@number:~# perf report --stdio -i pipe.data
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 1 of event 'syscalls:sys_enter_clock_nanosleep'
# Event count (approx.): 1
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ...............................
#
100.00% sleep libc.so.6 [.] clock_nanosleep@GLIBC_2.2.5
|
---__libc_start_main@@GLIBC_2.34
__libc_start_call_main
0x55f775975a9f
clock_nanosleep@GLIBC_2.2.5

#
# (Tip: To set sampling period of individual events use perf record -e cpu/cpu-cycles,period=100001/,cpu/branches,period=10001/ ...)
#
root@number:~#

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yanteng Si <siyanteng@loongson.cn>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20240829150154.37929-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
89d64e72 e9a7053d

+48 -49
+31 -29
tools/perf/builtin-inject.c
··· 119 119 bool jit_mode; 120 120 bool in_place_update; 121 121 bool in_place_update_dry_run; 122 - bool is_pipe; 123 122 bool copy_kcore_dir; 124 123 const char *input_name; 125 124 struct perf_data output; ··· 204 205 if (ret) 205 206 return ret; 206 207 207 - if (!inject->is_pipe) 208 + /* If the output isn't a pipe then the attributes will be written as part of the header. */ 209 + if (!inject->output.is_pipe) 208 210 return 0; 209 211 210 212 return perf_event__repipe_synth(tool, event); ··· 1966 1966 struct guest_session *gs = &inject->guest_session; 1967 1967 struct perf_session *session = inject->session; 1968 1968 int fd = output_fd(inject); 1969 - u64 output_data_offset; 1969 + u64 output_data_offset = perf_session__data_offset(session->evlist); 1970 + /* 1971 + * Pipe input hasn't loaded the attributes and will handle them as 1972 + * events. So that the attributes don't overlap the data, write the 1973 + * attributes after the data. 1974 + */ 1975 + bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe; 1970 1976 1971 1977 signal(SIGINT, sig_handler); 1972 1978 ··· 1985 1979 inject->tool.tracing_data = perf_event__repipe_tracing_data; 1986 1980 #endif 1987 1981 } 1988 - 1989 - output_data_offset = perf_session__data_offset(session->evlist); 1990 1982 1991 1983 if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) { 1992 1984 inject->tool.sample = perf_event__inject_buildid; ··· 2079 2075 if (!inject->itrace_synth_opts.set) 2080 2076 auxtrace_index__free(&session->auxtrace_index); 2081 2077 2082 - if (!inject->is_pipe && !inject->in_place_update) 2078 + if (!inject->output.is_pipe && !inject->in_place_update) 2083 2079 lseek(fd, output_data_offset, SEEK_SET); 2084 2080 2085 2081 ret = perf_session__process_events(session); ··· 2098 2094 } 2099 2095 } 2100 2096 2101 - if (!inject->is_pipe && !inject->in_place_update) { 2097 + if (!inject->output.is_pipe && !inject->in_place_update) { 2102 2098 struct inject_fc inj_fc = { 2103 2099 .fc.copy = feat_copy_cb, 2104 2100 .inject = inject, ··· 2128 2124 } 2129 2125 session->header.data_offset = output_data_offset; 2130 2126 session->header.data_size = inject->bytes_written; 2131 - perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc); 2127 + perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, 2128 + write_attrs_after_data); 2132 2129 2133 2130 if (inject->copy_kcore_dir) { 2134 2131 ret = copy_kcore_dir(inject); ··· 2166 2161 .use_stdio = true, 2167 2162 }; 2168 2163 int ret; 2169 - bool repipe = true; 2170 2164 const char *known_build_ids = NULL; 2171 2165 bool build_ids; 2172 2166 bool build_id_all; ··· 2277 2273 inject.build_id_style = BID_RWS__INJECT_HEADER_ALL; 2278 2274 2279 2275 data.path = inject.input_name; 2280 - if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) { 2281 - inject.is_pipe = true; 2282 - /* 2283 - * Do not repipe header when input is a regular file 2284 - * since either it can rewrite the header at the end 2285 - * or write a new pipe header. 2286 - */ 2287 - if (strcmp(inject.input_name, "-")) 2288 - repipe = false; 2289 - } 2276 + 2290 2277 ordered_events = inject.jit_mode || inject.sched_stat || 2291 2278 (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY); 2292 2279 perf_tool__init(&inject.tool, ordered_events); ··· 2320 2325 inject.tool.compressed = perf_event__repipe_op4_synth; 2321 2326 inject.tool.auxtrace = perf_event__repipe_auxtrace; 2322 2327 inject.tool.dont_split_sample_group = true; 2323 - inject.session = __perf_session__new(&data, repipe, 2324 - output_fd(&inject), 2325 - &inject.tool); 2328 + inject.session = __perf_session__new(&data, &inject.tool, 2329 + /*trace_event_repipe=*/inject.output.is_pipe); 2330 + 2326 2331 if (IS_ERR(inject.session)) { 2327 2332 ret = PTR_ERR(inject.session); 2328 2333 goto out_close_output; ··· 2336 2341 if (ret) 2337 2342 goto out_delete; 2338 2343 2339 - if (!data.is_pipe && inject.output.is_pipe) { 2344 + if (inject.output.is_pipe) { 2340 2345 ret = perf_header__write_pipe(perf_data__fd(&inject.output)); 2341 2346 if (ret < 0) { 2342 2347 pr_err("Couldn't write a new pipe header.\n"); 2343 2348 goto out_delete; 2344 2349 } 2345 2350 2346 - ret = perf_event__synthesize_for_pipe(&inject.tool, 2347 - inject.session, 2348 - &inject.output, 2349 - perf_event__repipe); 2350 - if (ret < 0) 2351 - goto out_delete; 2351 + /* 2352 + * If the input is already a pipe then the features and 2353 + * attributes don't need synthesizing, they will be present in 2354 + * the input. 2355 + */ 2356 + if (!data.is_pipe) { 2357 + ret = perf_event__synthesize_for_pipe(&inject.tool, 2358 + inject.session, 2359 + &inject.output, 2360 + perf_event__repipe); 2361 + if (ret < 0) 2362 + goto out_delete; 2363 + } 2352 2364 } 2353 2365 2354 2366 if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
+6 -6
tools/perf/util/header.c
··· 3818 3818 int perf_session__inject_header(struct perf_session *session, 3819 3819 struct evlist *evlist, 3820 3820 int fd, 3821 - struct feat_copier *fc) 3821 + struct feat_copier *fc, 3822 + bool write_attrs_after_data) 3822 3823 { 3823 3824 return perf_session__do_write_header(session, evlist, fd, true, fc, 3824 - /*write_attrs_after_data=*/false); 3825 + write_attrs_after_data); 3825 3826 } 3826 3827 3827 3828 static int perf_header__getbuffer64(struct perf_header *header, ··· 4146 4145 struct perf_pipe_file_header f_header; 4147 4146 4148 4147 if (perf_file_header__read_pipe(&f_header, header, session->data, 4149 - session->repipe, repipe_fd) < 0) { 4148 + /*repipe=*/false, repipe_fd) < 0) { 4150 4149 pr_debug("incompatible file format\n"); 4151 4150 return -EINVAL; 4152 4151 } ··· 4561 4560 SEEK_SET); 4562 4561 } 4563 4562 4564 - size_read = trace_report(fd, &session->tevent, 4565 - session->repipe); 4563 + size_read = trace_report(fd, &session->tevent, session->trace_event_repipe); 4566 4564 padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; 4567 4565 4568 4566 if (readn(fd, buf, padding) < 0) { 4569 4567 pr_err("%s: reading input file", __func__); 4570 4568 return -1; 4571 4569 } 4572 - if (session->repipe) { 4570 + if (session->trace_event_repipe) { 4573 4571 int retw = write(STDOUT_FILENO, buf, padding); 4574 4572 if (retw <= 0 || retw != padding) { 4575 4573 pr_err("%s: repiping tracing data padding", __func__);
+2 -1
tools/perf/util/header.h
··· 150 150 int perf_session__inject_header(struct perf_session *session, 151 151 struct evlist *evlist, 152 152 int fd, 153 - struct feat_copier *fc); 153 + struct feat_copier *fc, 154 + bool write_attrs_after_data); 154 155 155 156 size_t perf_session__data_offset(const struct evlist *evlist); 156 157
+4 -4
tools/perf/util/session.c
··· 135 135 } 136 136 137 137 struct perf_session *__perf_session__new(struct perf_data *data, 138 - bool repipe, int repipe_fd, 139 - struct perf_tool *tool) 138 + struct perf_tool *tool, 139 + bool trace_event_repipe) 140 140 { 141 141 int ret = -ENOMEM; 142 142 struct perf_session *session = zalloc(sizeof(*session)); ··· 144 144 if (!session) 145 145 goto out; 146 146 147 - session->repipe = repipe; 147 + session->trace_event_repipe = trace_event_repipe; 148 148 session->tool = tool; 149 149 session->decomp_data.zstd_decomp = &session->zstd_data; 150 150 session->active_decomp = &session->decomp_data; ··· 162 162 session->data = data; 163 163 164 164 if (perf_data__is_read(data)) { 165 - ret = perf_session__open(session, repipe_fd); 165 + ret = perf_session__open(session, /*repipe_fd=*/-1); 166 166 if (ret < 0) 167 167 goto out_delete; 168 168
+5 -9
tools/perf/util/session.h
··· 59 59 #endif 60 60 /** @time_conv: Holds contents of last PERF_RECORD_TIME_CONV event. */ 61 61 struct perf_record_time_conv time_conv; 62 - /** 63 - * @repipe: When set causes certain reading (header and trace events) to 64 - * also write events. The written file descriptor must be provided for 65 - * the header but is implicitly stdout for trace events. 66 - */ 67 - bool repipe; 62 + /** @trace_event_repipe: When set causes read trace events to be written to stdout. */ 63 + bool trace_event_repipe; 68 64 /** 69 65 * @one_mmap: The reader will use a single mmap by default. There may be 70 66 * multiple data files in particular for aux events. If this is true ··· 106 110 struct perf_tool; 107 111 108 112 struct perf_session *__perf_session__new(struct perf_data *data, 109 - bool repipe, int repipe_fd, 110 - struct perf_tool *tool); 113 + struct perf_tool *tool, 114 + bool trace_event_repipe); 111 115 112 116 static inline struct perf_session *perf_session__new(struct perf_data *data, 113 117 struct perf_tool *tool) 114 118 { 115 - return __perf_session__new(data, false, -1, tool); 119 + return __perf_session__new(data, tool, /*trace_event_repipe=*/false); 116 120 } 117 121 118 122 void perf_session__delete(struct perf_session *session);