Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#include "debug.h"
3#include "evlist.h"
4#include "evsel.h"
5#include "evsel_config.h"
6#include "parse-events.h"
7#include <errno.h>
8#include <limits.h>
9#include <stdlib.h>
10#include <api/fs/fs.h>
11#include <subcmd/parse-options.h>
12#include <perf/cpumap.h>
13#include "cloexec.h"
14#include "util/perf_api_probe.h"
15#include "record.h"
16#include "../perf-sys.h"
17#include "topdown.h"
18#include "map_symbol.h"
19#include "mem-events.h"
20
21/*
22 * evsel__config_leader_sampling() uses special rules for leader sampling.
23 * However, if the leader is an AUX area event, then assume the event to sample
24 * is the next event.
25 */
26static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
27{
28 struct evsel *leader = evsel->leader;
29
30 if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader) ||
31 is_mem_loads_aux_event(leader)) {
32 evlist__for_each_entry(evlist, evsel) {
33 if (evsel->leader == leader && evsel != evsel->leader)
34 return evsel;
35 }
36 }
37
38 return leader;
39}
40
41static u64 evsel__config_term_mask(struct evsel *evsel)
42{
43 struct evsel_config_term *term;
44 struct list_head *config_terms = &evsel->config_terms;
45 u64 term_types = 0;
46
47 list_for_each_entry(term, config_terms, list) {
48 term_types |= 1 << term->type;
49 }
50 return term_types;
51}
52
53static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
54{
55 struct perf_event_attr *attr = &evsel->core.attr;
56 struct evsel *leader = evsel->leader;
57 struct evsel *read_sampler;
58 u64 term_types, freq_mask;
59
60 if (!leader->sample_read)
61 return;
62
63 read_sampler = evsel__read_sampler(evsel, evlist);
64
65 if (evsel == read_sampler)
66 return;
67
68 term_types = evsel__config_term_mask(evsel);
69 /*
70 * Disable sampling for all group members except those with explicit
71 * config terms or the leader. In the case of an AUX area event, the 2nd
72 * event in the group is the one that 'leads' the sampling.
73 */
74 freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
75 if ((term_types & freq_mask) == 0) {
76 attr->freq = 0;
77 attr->sample_freq = 0;
78 attr->sample_period = 0;
79 }
80 if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
81 attr->write_backward = 0;
82
83 /*
84 * We don't get a sample for slave events, we make them when delivering
85 * the group leader sample. Set the slave event to follow the master
86 * sample_type to ease up reporting.
87 * An AUX area event also has sample_type requirements, so also include
88 * the sample type bits from the leader's sample_type to cover that
89 * case.
90 */
91 attr->sample_type = read_sampler->core.attr.sample_type |
92 leader->core.attr.sample_type;
93}
94
95void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
96{
97 struct evsel *evsel;
98 bool use_sample_identifier = false;
99 bool use_comm_exec;
100 bool sample_id = opts->sample_id;
101
102 /*
103 * Set the evsel leader links before we configure attributes,
104 * since some might depend on this info.
105 */
106 if (opts->group)
107 evlist__set_leader(evlist);
108
109 if (evlist->core.cpus->map[0] < 0)
110 opts->no_inherit = true;
111
112 use_comm_exec = perf_can_comm_exec();
113
114 evlist__for_each_entry(evlist, evsel) {
115 evsel__config(evsel, opts, callchain);
116 if (evsel->tracking && use_comm_exec)
117 evsel->core.attr.comm_exec = 1;
118 }
119
120 /* Configure leader sampling here now that the sample type is known */
121 evlist__for_each_entry(evlist, evsel)
122 evsel__config_leader_sampling(evsel, evlist);
123
124 if (opts->full_auxtrace) {
125 /*
126 * Need to be able to synthesize and parse selected events with
127 * arbitrary sample types, which requires always being able to
128 * match the id.
129 */
130 use_sample_identifier = perf_can_sample_identifier();
131 sample_id = true;
132 } else if (evlist->core.nr_entries > 1) {
133 struct evsel *first = evlist__first(evlist);
134
135 evlist__for_each_entry(evlist, evsel) {
136 if (evsel->core.attr.sample_type == first->core.attr.sample_type)
137 continue;
138 use_sample_identifier = perf_can_sample_identifier();
139 break;
140 }
141 sample_id = true;
142 }
143
144 if (sample_id) {
145 evlist__for_each_entry(evlist, evsel)
146 evsel__set_sample_id(evsel, use_sample_identifier);
147 }
148
149 evlist__set_id_pos(evlist);
150}
151
152static int get_max_rate(unsigned int *rate)
153{
154 return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
155}
156
157static int record_opts__config_freq(struct record_opts *opts)
158{
159 bool user_freq = opts->user_freq != UINT_MAX;
160 unsigned int max_rate;
161
162 if (opts->user_interval != ULLONG_MAX)
163 opts->default_interval = opts->user_interval;
164 if (user_freq)
165 opts->freq = opts->user_freq;
166
167 /*
168 * User specified count overrides default frequency.
169 */
170 if (opts->default_interval)
171 opts->freq = 0;
172 else if (opts->freq) {
173 opts->default_interval = opts->freq;
174 } else {
175 pr_err("frequency and count are zero, aborting\n");
176 return -1;
177 }
178
179 if (get_max_rate(&max_rate))
180 return 0;
181
182 /*
183 * User specified frequency is over current maximum.
184 */
185 if (user_freq && (max_rate < opts->freq)) {
186 if (opts->strict_freq) {
187 pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
188 " Please use -F freq option with a lower value or consider\n"
189 " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
190 max_rate);
191 return -1;
192 } else {
193 pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
194 " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
195 " The kernel will lower it when perf's interrupts take too long.\n"
196 " Use --strict-freq to disable this throttling, refusing to record.\n",
197 max_rate, opts->freq, max_rate);
198
199 opts->freq = max_rate;
200 }
201 }
202
203 /*
204 * Default frequency is over current maximum.
205 */
206 if (max_rate < opts->freq) {
207 pr_warning("Lowering default frequency rate from %u to %u.\n"
208 "Please consider tweaking "
209 "/proc/sys/kernel/perf_event_max_sample_rate.\n",
210 opts->freq, max_rate);
211 opts->freq = max_rate;
212 }
213
214 return 0;
215}
216
217int record_opts__config(struct record_opts *opts)
218{
219 return record_opts__config_freq(opts);
220}
221
222bool evlist__can_select_event(struct evlist *evlist, const char *str)
223{
224 struct evlist *temp_evlist;
225 struct evsel *evsel;
226 int err, fd, cpu;
227 bool ret = false;
228 pid_t pid = -1;
229
230 temp_evlist = evlist__new();
231 if (!temp_evlist)
232 return false;
233
234 err = parse_events(temp_evlist, str, NULL);
235 if (err)
236 goto out_delete;
237
238 evsel = evlist__last(temp_evlist);
239
240 if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
241 struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
242
243 cpu = cpus ? cpus->map[0] : 0;
244 perf_cpu_map__put(cpus);
245 } else {
246 cpu = evlist->core.cpus->map[0];
247 }
248
249 while (1) {
250 fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
251 perf_event_open_cloexec_flag());
252 if (fd < 0) {
253 if (pid == -1 && errno == EACCES) {
254 pid = 0;
255 continue;
256 }
257 goto out_delete;
258 }
259 break;
260 }
261 close(fd);
262 ret = true;
263
264out_delete:
265 evlist__delete(temp_evlist);
266 return ret;
267}
268
269int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
270{
271 unsigned int freq;
272 struct record_opts *opts = opt->value;
273
274 if (!str)
275 return -EINVAL;
276
277 if (strcasecmp(str, "max") == 0) {
278 if (get_max_rate(&freq)) {
279 pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
280 return -1;
281 }
282 pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
283 } else {
284 freq = atoi(str);
285 }
286
287 opts->user_freq = freq;
288 return 0;
289}