Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6#include <dirent.h>
7#include <stdarg.h>
8#include <stdlib.h>
9#include <string.h>
10#include <unistd.h>
11#include <ctype.h>
12#include <errno.h>
13#include <fcntl.h>
14#include <sched.h>
15#include <stdio.h>
16
17#include "utils.h"
18
19#define MAX_MSG_LENGTH 1024
20int config_debug;
21
22/*
23 * err_msg - print an error message to the stderr
24 */
25void err_msg(const char *fmt, ...)
26{
27 char message[MAX_MSG_LENGTH];
28 va_list ap;
29
30 va_start(ap, fmt);
31 vsnprintf(message, sizeof(message), fmt, ap);
32 va_end(ap);
33
34 fprintf(stderr, "%s", message);
35}
36
37/*
38 * debug_msg - print a debug message to stderr if debug is set
39 */
40void debug_msg(const char *fmt, ...)
41{
42 char message[MAX_MSG_LENGTH];
43 va_list ap;
44
45 if (!config_debug)
46 return;
47
48 va_start(ap, fmt);
49 vsnprintf(message, sizeof(message), fmt, ap);
50 va_end(ap);
51
52 fprintf(stderr, "%s", message);
53}
54
55/*
56 * get_llong_from_str - get a long long int from a string
57 */
58long long get_llong_from_str(char *start)
59{
60 long long value;
61 char *end;
62
63 errno = 0;
64 value = strtoll(start, &end, 10);
65 if (errno || start == end)
66 return -1;
67
68 return value;
69}
70
71/*
72 * get_duration - fill output with a human readable duration since start_time
73 */
74void get_duration(time_t start_time, char *output, int output_size)
75{
76 time_t now = time(NULL);
77 struct tm *tm_info;
78 time_t duration;
79
80 duration = difftime(now, start_time);
81 tm_info = gmtime(&duration);
82
83 snprintf(output, output_size, "%3d %02d:%02d:%02d",
84 tm_info->tm_yday,
85 tm_info->tm_hour,
86 tm_info->tm_min,
87 tm_info->tm_sec);
88}
89
90/*
91 * parse_cpu_list - parse a cpu_list filling a char vector with cpus set
92 *
93 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char
94 * in the monitored_cpus.
95 *
96 * XXX: convert to a bitmask.
97 */
98int parse_cpu_list(char *cpu_list, char **monitored_cpus)
99{
100 char *mon_cpus;
101 const char *p;
102 int end_cpu;
103 int nr_cpus;
104 int cpu;
105 int i;
106
107 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
108
109 mon_cpus = malloc(nr_cpus * sizeof(char));
110 memset(mon_cpus, 0, (nr_cpus * sizeof(char)));
111
112 for (p = cpu_list; *p; ) {
113 cpu = atoi(p);
114 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
115 goto err;
116
117 while (isdigit(*p))
118 p++;
119 if (*p == '-') {
120 p++;
121 end_cpu = atoi(p);
122 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
123 goto err;
124 while (isdigit(*p))
125 p++;
126 } else
127 end_cpu = cpu;
128
129 if (cpu == end_cpu) {
130 debug_msg("cpu_list: adding cpu %d\n", cpu);
131 mon_cpus[cpu] = 1;
132 } else {
133 for (i = cpu; i <= end_cpu; i++) {
134 debug_msg("cpu_list: adding cpu %d\n", i);
135 mon_cpus[i] = 1;
136 }
137 }
138
139 if (*p == ',')
140 p++;
141 }
142
143 *monitored_cpus = mon_cpus;
144
145 return 0;
146
147err:
148 debug_msg("Error parsing the cpu list %s", cpu_list);
149 return 1;
150}
151
152/*
153 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
154 */
155long parse_seconds_duration(char *val)
156{
157 char *end;
158 long t;
159
160 t = strtol(val, &end, 10);
161
162 if (end) {
163 switch (*end) {
164 case 's':
165 case 'S':
166 break;
167 case 'm':
168 case 'M':
169 t *= 60;
170 break;
171 case 'h':
172 case 'H':
173 t *= 60 * 60;
174 break;
175
176 case 'd':
177 case 'D':
178 t *= 24 * 60 * 60;
179 break;
180 }
181 }
182
183 return t;
184}
185
186/*
187 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
188 */
189long parse_ns_duration(char *val)
190{
191 char *end;
192 long t;
193
194 t = strtol(val, &end, 10);
195
196 if (end) {
197 if (!strncmp(end, "ns", 2)) {
198 return t;
199 } else if (!strncmp(end, "us", 2)) {
200 t *= 1000;
201 return t;
202 } else if (!strncmp(end, "ms", 2)) {
203 t *= 1000 * 1000;
204 return t;
205 } else if (!strncmp(end, "s", 1)) {
206 t *= 1000 * 1000 * 1000;
207 return t;
208 }
209 return -1;
210 }
211
212 return t;
213}
214
215/*
216 * This is a set of helper functions to use SCHED_DEADLINE.
217 */
218#ifdef __x86_64__
219# define __NR_sched_setattr 314
220# define __NR_sched_getattr 315
221#elif __i386__
222# define __NR_sched_setattr 351
223# define __NR_sched_getattr 352
224#elif __arm__
225# define __NR_sched_setattr 380
226# define __NR_sched_getattr 381
227#elif __aarch64__
228# define __NR_sched_setattr 274
229# define __NR_sched_getattr 275
230#elif __powerpc__
231# define __NR_sched_setattr 355
232# define __NR_sched_getattr 356
233#elif __s390x__
234# define __NR_sched_setattr 345
235# define __NR_sched_getattr 346
236#endif
237
238#define SCHED_DEADLINE 6
239
240static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
241 unsigned int flags) {
242 return syscall(__NR_sched_setattr, pid, attr, flags);
243}
244
245static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
246 unsigned int size, unsigned int flags)
247{
248 return syscall(__NR_sched_getattr, pid, attr, size, flags);
249}
250
251int __set_sched_attr(int pid, struct sched_attr *attr)
252{
253 int flags = 0;
254 int retval;
255
256 retval = sched_setattr(pid, attr, flags);
257 if (retval < 0) {
258 err_msg("Failed to set sched attributes to the pid %d: %s\n",
259 pid, strerror(errno));
260 return 1;
261 }
262
263 return 0;
264}
265
266/*
267 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
268 *
269 * Check if the procfs entry is a directory of a process, and then check if the
270 * process has a comm with the prefix set in char *comm_prefix. As the
271 * current users of this function only check for kernel threads, there is no
272 * need to check for the threads for the process.
273 *
274 * Return: True if the proc_entry contains a comm file with comm_prefix*.
275 * Otherwise returns false.
276 */
277static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
278{
279 char buffer[MAX_PATH];
280 int comm_fd, retval;
281 char *t_name;
282
283 if (proc_entry->d_type != DT_DIR)
284 return 0;
285
286 if (*proc_entry->d_name == '.')
287 return 0;
288
289 /* check if the string is a pid */
290 for (t_name = proc_entry->d_name; t_name; t_name++) {
291 if (!isdigit(*t_name))
292 break;
293 }
294
295 if (*t_name != '\0')
296 return 0;
297
298 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
299 comm_fd = open(buffer, O_RDONLY);
300 if (comm_fd < 0)
301 return 0;
302
303 memset(buffer, 0, MAX_PATH);
304 retval = read(comm_fd, buffer, MAX_PATH);
305
306 close(comm_fd);
307
308 if (retval <= 0)
309 return 0;
310
311 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
312 if (retval)
313 return 0;
314
315 /* comm already have \n */
316 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
317
318 return 1;
319}
320
321/*
322 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
323 *
324 * This function uses procfs to list the currently running threads and then set the
325 * sched_attr *attr to the threads that start with char *comm_prefix. It is
326 * mainly used to set the priority to the kernel threads created by the
327 * tracers.
328 */
329int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
330{
331 struct dirent *proc_entry;
332 DIR *procfs;
333 int retval;
334
335 if (strlen(comm_prefix) >= MAX_PATH) {
336 err_msg("Command prefix is too long: %d < strlen(%s)\n",
337 MAX_PATH, comm_prefix);
338 return 1;
339 }
340
341 procfs = opendir("/proc");
342 if (!procfs) {
343 err_msg("Could not open procfs\n");
344 return 1;
345 }
346
347 while ((proc_entry = readdir(procfs))) {
348
349 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
350 if (!retval)
351 continue;
352
353 /* procfs_is_workload_pid confirmed it is a pid */
354 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
355 if (retval) {
356 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
357 goto out_err;
358 }
359
360 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
361 }
362 return 0;
363
364out_err:
365 closedir(procfs);
366 return 1;
367}
368
369#define INVALID_VAL (~0L)
370static long get_long_ns_after_colon(char *start)
371{
372 long val = INVALID_VAL;
373
374 /* find the ":" */
375 start = strstr(start, ":");
376 if (!start)
377 return -1;
378
379 /* skip ":" */
380 start++;
381 val = parse_ns_duration(start);
382
383 return val;
384}
385
386static long get_long_after_colon(char *start)
387{
388 long val = INVALID_VAL;
389
390 /* find the ":" */
391 start = strstr(start, ":");
392 if (!start)
393 return -1;
394
395 /* skip ":" */
396 start++;
397 val = get_llong_from_str(start);
398
399 return val;
400}
401
402/*
403 * parse priority in the format:
404 * SCHED_OTHER:
405 * o:<prio>
406 * O:<prio>
407 * SCHED_RR:
408 * r:<prio>
409 * R:<prio>
410 * SCHED_FIFO:
411 * f:<prio>
412 * F:<prio>
413 * SCHED_DEADLINE:
414 * d:runtime:period
415 * D:runtime:period
416 */
417int parse_prio(char *arg, struct sched_attr *sched_param)
418{
419 long prio;
420 long runtime;
421 long period;
422
423 memset(sched_param, 0, sizeof(*sched_param));
424 sched_param->size = sizeof(*sched_param);
425
426 switch (arg[0]) {
427 case 'd':
428 case 'D':
429 /* d:runtime:period */
430 if (strlen(arg) < 4)
431 return -1;
432
433 runtime = get_long_ns_after_colon(arg);
434 if (runtime == INVALID_VAL)
435 return -1;
436
437 period = get_long_ns_after_colon(&arg[2]);
438 if (period == INVALID_VAL)
439 return -1;
440
441 if (runtime > period)
442 return -1;
443
444 sched_param->sched_policy = SCHED_DEADLINE;
445 sched_param->sched_runtime = runtime;
446 sched_param->sched_deadline = period;
447 sched_param->sched_period = period;
448 break;
449 case 'f':
450 case 'F':
451 /* f:prio */
452 prio = get_long_after_colon(arg);
453 if (prio == INVALID_VAL)
454 return -1;
455
456 if (prio < sched_get_priority_min(SCHED_FIFO))
457 return -1;
458 if (prio > sched_get_priority_max(SCHED_FIFO))
459 return -1;
460
461 sched_param->sched_policy = SCHED_FIFO;
462 sched_param->sched_priority = prio;
463 break;
464 case 'r':
465 case 'R':
466 /* r:prio */
467 prio = get_long_after_colon(arg);
468 if (prio == INVALID_VAL)
469 return -1;
470
471 if (prio < sched_get_priority_min(SCHED_RR))
472 return -1;
473 if (prio > sched_get_priority_max(SCHED_RR))
474 return -1;
475
476 sched_param->sched_policy = SCHED_RR;
477 sched_param->sched_priority = prio;
478 break;
479 case 'o':
480 case 'O':
481 /* o:prio */
482 prio = get_long_after_colon(arg);
483 if (prio == INVALID_VAL)
484 return -1;
485
486 if (prio < sched_get_priority_min(SCHED_OTHER))
487 return -1;
488 if (prio > sched_get_priority_max(SCHED_OTHER))
489 return -1;
490
491 sched_param->sched_policy = SCHED_OTHER;
492 sched_param->sched_priority = prio;
493 break;
494 default:
495 return -1;
496 }
497 return 0;
498}
499
500/*
501 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
502 *
503 * This is used to reduce the exit from idle latency. The value
504 * will be reset once the file descriptor of /dev/cpu_dma_latecy
505 * is closed.
506 *
507 * Return: the /dev/cpu_dma_latecy file descriptor
508 */
509int set_cpu_dma_latency(int32_t latency)
510{
511 int retval;
512 int fd;
513
514 fd = open("/dev/cpu_dma_latency", O_RDWR);
515 if (fd < 0) {
516 err_msg("Error opening /dev/cpu_dma_latency\n");
517 return -1;
518 }
519
520 retval = write(fd, &latency, 4);
521 if (retval < 1) {
522 err_msg("Error setting /dev/cpu_dma_latency\n");
523 close(fd);
524 return -1;
525 }
526
527 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
528
529 return fd;
530}