rtla/timerlat_top: Add timerlat user-space support

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Add the support for running timerlat threads in user-space. In this
mode, enabled with -u/--user-threads, timerlat dispatches user-space
processes that will loop in the timerlat_fd, measuring the overhead
for going to user-space and then returning to the kernel - in addition
to the existing measurements.

Here is one example of the tool's output with -u enabled:

$ sudo timerlat top -u -d 600 -q
Timer Latency
0 00:10:01 | IRQ Timer Latency (us) | Thread Timer Latency (us) | Ret user Timer Latency (us)
CPU COUNT | cur min avg max | cur min avg max | cur min avg max
0 #600001 | 0 0 0 3 | 2 1 2 9 | 3 2 3 15
1 #600001 | 0 0 0 2 | 2 1 2 13 | 2 2 3 18
2 #600001 | 0 0 0 10 | 2 1 2 16 | 3 2 3 20
3 #600001 | 0 0 0 7 | 2 1 2 10 | 3 2 3 11
4 #600000 | 0 0 0 16 | 2 1 2 41 | 3 2 3 58
5 #600000 | 0 0 0 3 | 2 1 2 10 | 3 2 3 13
6 #600000 | 0 0 0 5 | 2 1 2 7 | 3 2 3 10
7 #600000 | 0 0 0 1 | 2 1 2 7 | 3 2 3 10

The tuning setup like -p or -C work for the user-space threads as well.

Link: https://lkml.kernel.org/r/758ad2292a0a1d884138d08219e1a0f572d257a2.1686066600.git.bristot@kernel.org

Cc: William White <chwhite@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Daniel Bristot de Oliveira and committed by

Steven Rostedt (Google) 2 years ago cdca4f4e 7bc4d308

+474 -6

7 changed files

expand all

tools

tracing

rtla

src

osnoise.c

osnoise.h

timerlat_top.c

timerlat_u.c

timerlat_u.h

utils.c

utils.h

+65

tools/tracing/rtla/src/osnoise.c

··· 841 841 context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; 842 842 } 843 843 844 + static int osnoise_get_workload(struct osnoise_context *context) 845 + { 846 + if (context->opt_workload != OSNOISE_OPTION_INIT_VAL) 847 + return context->opt_workload; 848 + 849 + if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL) 850 + return context->orig_opt_workload; 851 + 852 + context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD"); 853 + 854 + return context->orig_opt_workload; 855 + } 856 + 857 + int osnoise_set_workload(struct osnoise_context *context, bool onoff) 858 + { 859 + int opt_workload = osnoise_get_workload(context); 860 + int retval; 861 + 862 + if (opt_workload == OSNOISE_OPTION_INIT_VAL) 863 + return -1; 864 + 865 + if (opt_workload == onoff) 866 + return 0; 867 + 868 + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); 869 + if (retval < 0) 870 + return -1; 871 + 872 + context->opt_workload = onoff; 873 + 874 + return 0; 875 + } 876 + 877 + static void osnoise_restore_workload(struct osnoise_context *context) 878 + { 879 + int retval; 880 + 881 + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) 882 + return; 883 + 884 + if (context->orig_opt_workload == context->opt_workload) 885 + goto out_done; 886 + 887 + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload); 888 + if (retval < 0) 889 + err_msg("Could not restore original OSNOISE_WORKLOAD option\n"); 890 + 891 + out_done: 892 + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; 893 + } 894 + 895 + static void osnoise_put_workload(struct osnoise_context *context) 896 + { 897 + osnoise_restore_workload(context); 898 + 899 + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) 900 + return; 901 + 902 + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; 903 + } 904 + 844 905 /* 845 906 * enable_osnoise - enable osnoise tracer in the trace_instance 846 907 */ ··· 969 908 context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; 970 909 context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL; 971 910 911 + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; 912 + context->opt_workload = OSNOISE_OPTION_INIT_VAL; 913 + 972 914 osnoise_get_context(context); 973 915 974 916 return context; ··· 999 935 osnoise_put_print_stack(context); 1000 936 osnoise_put_tracing_thresh(context); 1001 937 osnoise_put_irq_disable(context); 938 + osnoise_put_workload(context); 1002 939 1003 940 free(context); 1004 941 }

tools/tracing/rtla/src/osnoise.h

··· 42 42 /* -1 as init value because 0 is off */ 43 43 int orig_opt_irq_disable; 44 44 int opt_irq_disable; 45 + 46 + /* -1 as init value because 0 is off */ 47 + int orig_opt_workload; 48 + int opt_workload; 45 49 }; 46 50 47 51 /* ··· 88 84 long long print_stack); 89 85 90 86 int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff); 87 + int osnoise_set_workload(struct osnoise_context *context, bool onoff); 91 88 92 89 /* 93 90 * osnoise_tool - osnoise based tool definition.

+102 -6

tools/tracing/rtla/src/timerlat_top.c

··· 13 13 #include <time.h> 14 14 #include <errno.h> 15 15 #include <sched.h> 16 + #include <pthread.h> 16 17 17 18 #include "utils.h" 18 19 #include "osnoise.h" 19 20 #include "timerlat.h" 20 21 #include "timerlat_aa.h" 22 + #include "timerlat_u.h" 21 23 22 24 struct timerlat_top_params { 23 25 char *cpus; ··· 42 40 int dump_tasks; 43 41 int cgroup; 44 42 int hk_cpus; 43 + int user_top; 45 44 cpu_set_t hk_cpu_set; 46 45 struct sched_attr sched_param; 47 46 struct trace_events *events; ··· 51 48 struct timerlat_top_cpu { 52 49 int irq_count; 53 50 int thread_count; 51 + int user_count; 54 52 55 53 unsigned long long cur_irq; 56 54 unsigned long long min_irq; ··· 62 58 unsigned long long min_thread; 63 59 unsigned long long sum_thread; 64 60 unsigned long long max_thread; 61 + 62 + unsigned long long cur_user; 63 + unsigned long long min_user; 64 + unsigned long long sum_user; 65 + unsigned long long max_user; 65 66 }; 66 67 67 68 struct timerlat_top_data { ··· 107 98 for (cpu = 0; cpu < nr_cpus; cpu++) { 108 99 data->cpu_data[cpu].min_irq = ~0; 109 100 data->cpu_data[cpu].min_thread = ~0; 101 + data->cpu_data[cpu].min_user = ~0; 110 102 } 111 103 112 104 return data; ··· 134 124 update_min(&cpu_data->min_irq, &latency); 135 125 update_sum(&cpu_data->sum_irq, &latency); 136 126 update_max(&cpu_data->max_irq, &latency); 137 - } else { 127 + } else if (thread == 1) { 138 128 cpu_data->thread_count++; 139 129 cpu_data->cur_thread = latency; 140 130 update_min(&cpu_data->min_thread, &latency); 141 131 update_sum(&cpu_data->sum_thread, &latency); 142 132 update_max(&cpu_data->max_thread, &latency); 133 + } else { 134 + cpu_data->user_count++; 135 + cpu_data->cur_user = latency; 136 + update_min(&cpu_data->min_user, &latency); 137 + update_sum(&cpu_data->sum_user, &latency); 138 + update_max(&cpu_data->max_user, &latency); 143 139 } 144 140 } 145 141 ··· 188 172 189 173 trace_seq_printf(s, "\033[2;37;40m"); 190 174 trace_seq_printf(s, " Timer Latency "); 175 + if (params->user_top) 176 + trace_seq_printf(s, " "); 191 177 trace_seq_printf(s, "\033[0;0;0m"); 192 178 trace_seq_printf(s, "\n"); 193 179 194 - trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, 180 + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, 195 181 params->output_divisor == 1 ? "ns" : "us", 196 182 params->output_divisor == 1 ? "ns" : "us"); 197 183 184 + if (params->user_top) { 185 + trace_seq_printf(s, " | Ret user Timer Latency (%s)", 186 + params->output_divisor == 1 ? "ns" : "us"); 187 + } 188 + 189 + trace_seq_printf(s, "\n"); 198 190 trace_seq_printf(s, "\033[2;30;47m"); 199 191 trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); 192 + if (params->user_top) 193 + trace_seq_printf(s, " | cur min avg max"); 200 194 trace_seq_printf(s, "\033[0;0;0m"); 201 195 trace_seq_printf(s, "\n"); 202 196 } ··· 259 233 trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); 260 234 trace_seq_printf(s, "%9llu ", 261 235 (cpu_data->sum_thread / cpu_data->thread_count) / divisor); 262 - trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); 236 + trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor); 237 + } 238 + 239 + if (!params->user_top) { 240 + trace_seq_printf(s, "\n"); 241 + return; 242 + } 243 + 244 + trace_seq_printf(s, " |"); 245 + 246 + if (!cpu_data->user_count) { 247 + trace_seq_printf(s, " - "); 248 + trace_seq_printf(s, " - "); 249 + trace_seq_printf(s, " - "); 250 + trace_seq_printf(s, " -\n"); 251 + } else { 252 + trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); 253 + trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); 254 + trace_seq_printf(s, "%9llu ", 255 + (cpu_data->sum_user / cpu_data->user_count) / divisor); 256 + trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor); 263 257 } 264 258 } 265 259 ··· 334 288 "", 335 289 " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", 336 290 " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", 337 - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]]", 291 + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]", 338 292 "", 339 293 " -h/--help: print this menu", 340 294 " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", ··· 363 317 " f:prio - use SCHED_FIFO with prio", 364 318 " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", 365 319 " in nanoseconds", 320 + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", 366 321 NULL, 367 322 }; 368 323 ··· 418 371 {"stack", required_argument, 0, 's'}, 419 372 {"thread", required_argument, 0, 'T'}, 420 373 {"trace", optional_argument, 0, 't'}, 374 + {"user-threads", no_argument, 0, 'u'}, 421 375 {"trigger", required_argument, 0, '0'}, 422 376 {"filter", required_argument, 0, '1'}, 423 377 {"dma-latency", required_argument, 0, '2'}, ··· 431 383 /* getopt_long stores the option index here. */ 432 384 int option_index = 0; 433 385 434 - c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:0:1:2:345:", 386 + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:u0:1:2:345:", 435 387 long_options, &option_index); 436 388 437 389 /* detect the end of the options. */ ··· 547 499 params->trace_output = "timerlat_trace.txt"; 548 500 549 501 break; 502 + case 'u': 503 + params->user_top = true; 504 + break; 550 505 case '0': /* trigger */ 551 506 if (params->events) { 552 507 retval = trace_event_add_trigger(params->events, optarg); ··· 614 563 timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) 615 564 { 616 565 int retval; 566 + int i; 617 567 618 568 if (!params->sleep_time) 619 569 params->sleep_time = 1; ··· 625 573 err_msg("Failed to apply CPUs config\n"); 626 574 goto out_err; 627 575 } 576 + } else { 577 + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) 578 + CPU_SET(i, &params->monitored_cpus); 628 579 } 629 580 630 581 if (params->stop_us) { ··· 680 625 * No need to check results as this is an automatic attempt. 681 626 */ 682 627 auto_house_keeping(&params->monitored_cpus); 628 + } 629 + 630 + if (params->user_top) { 631 + retval = osnoise_set_workload(top->context, 0); 632 + if (retval) { 633 + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); 634 + goto out_err; 635 + } 683 636 } 684 637 685 638 return 0; ··· 750 687 { 751 688 struct timerlat_top_params *params; 752 689 struct osnoise_tool *record = NULL; 690 + struct timerlat_u_params params_u; 753 691 struct osnoise_tool *top = NULL; 754 692 struct osnoise_tool *aa = NULL; 755 693 struct trace_instance *trace; 756 694 int dma_latency_fd = -1; 695 + pthread_t timerlat_u; 757 696 int return_value = 1; 758 697 char *max_lat; 759 698 int retval; ··· 792 727 } 793 728 } 794 729 795 - if (params->cgroup) { 730 + if (params->cgroup && !params->user_top) { 796 731 retval = set_comm_cgroup("timerlat/", params->cgroup_name); 797 732 if (!retval) { 798 733 err_msg("Failed to move threads to cgroup\n"); ··· 865 800 top->start_time = time(NULL); 866 801 timerlat_top_set_signals(params); 867 802 803 + if (params->user_top) { 804 + /* rtla asked to stop */ 805 + params_u.should_run = 1; 806 + /* all threads left */ 807 + params_u.stopped_running = 0; 808 + 809 + params_u.set = &params->monitored_cpus; 810 + if (params->set_sched) 811 + params_u.sched_param = &params->sched_param; 812 + else 813 + params_u.sched_param = NULL; 814 + 815 + params_u.cgroup_name = params->cgroup_name; 816 + 817 + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, &params_u); 818 + if (retval) 819 + err_msg("Error creating timerlat user-space threads\n"); 820 + } 821 + 868 822 while (!stop_tracing) { 869 823 sleep(params->sleep_time); 870 824 ··· 907 823 if (trace_is_off(&top->trace, &record->trace)) 908 824 break; 909 825 826 + /* is there still any user-threads ? */ 827 + if (params->user_top) { 828 + if (params_u.stopped_running) { 829 + debug_msg("timerlat user space threads stopped!\n"); 830 + break; 831 + } 832 + } 833 + } 834 + 835 + if (params->user_top && !params_u.stopped_running) { 836 + params_u.should_run = 0; 837 + sleep(1); 910 838 } 911 839 912 840 timerlat_print_stats(params, top);

+224

tools/tracing/rtla/src/timerlat_u.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 + */ 5 + 6 + #define _GNU_SOURCE 7 + #include <sched.h> 8 + #include <fcntl.h> 9 + #include <stdlib.h> 10 + #include <unistd.h> 11 + #include <stdio.h> 12 + #include <errno.h> 13 + #include <string.h> 14 + #include <tracefs.h> 15 + #include <pthread.h> 16 + #include <sys/wait.h> 17 + #include <sys/prctl.h> 18 + 19 + #include "utils.h" 20 + #include "timerlat_u.h" 21 + 22 + /* 23 + * This is the user-space main for the tool timerlatu/ threads. 24 + * 25 + * It is as simple as this: 26 + * - set affinity 27 + * - set priority 28 + * - open tracer fd 29 + * - spin 30 + * - close 31 + */ 32 + static int timerlat_u_main(int cpu, struct timerlat_u_params *params) 33 + { 34 + struct sched_param sp = { .sched_priority = 95 }; 35 + char buffer[1024]; 36 + int timerlat_fd; 37 + cpu_set_t set; 38 + int retval; 39 + 40 + /* 41 + * This all is only setting up the tool. 42 + */ 43 + CPU_ZERO(&set); 44 + CPU_SET(cpu, &set); 45 + 46 + retval = sched_setaffinity(gettid(), sizeof(set), &set); 47 + if (retval == -1) { 48 + err_msg("Error setting user thread affinity\n"); 49 + exit(1); 50 + } 51 + 52 + if (!params->sched_param) { 53 + retval = sched_setscheduler(0, SCHED_FIFO, &sp); 54 + if (retval < 0) { 55 + err_msg("Error setting timerlat u default priority: %s\n", strerror(errno)); 56 + exit(1); 57 + } 58 + } else { 59 + retval = __set_sched_attr(getpid(), params->sched_param); 60 + if (retval) { 61 + /* __set_sched_attr prints an error message, so */ 62 + exit(0); 63 + } 64 + } 65 + 66 + if (params->cgroup_name) { 67 + retval = set_pid_cgroup(gettid(), params->cgroup_name); 68 + if (!retval) { 69 + err_msg("Error setting timerlat u cgroup pid\n"); 70 + pthread_exit(&retval); 71 + } 72 + } 73 + 74 + /* 75 + * This is the tool's loop. If you want to use as base for your own tool... 76 + * go ahead. 77 + */ 78 + snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu); 79 + 80 + timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY); 81 + if (timerlat_fd < 0) { 82 + err_msg("Error opening %s:%s\n", buffer, strerror(errno)); 83 + exit(1); 84 + } 85 + 86 + debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu); 87 + 88 + /* add should continue with a signal handler */ 89 + while (true) { 90 + retval = read(timerlat_fd, buffer, 1024); 91 + if (retval < 0) 92 + break; 93 + } 94 + 95 + close(timerlat_fd); 96 + 97 + debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu); 98 + exit(0); 99 + } 100 + 101 + /* 102 + * timerlat_u_send_kill - send a kill signal for all processes 103 + * 104 + * Return the number of processes that received the kill. 105 + */ 106 + static int timerlat_u_send_kill(pid_t *procs, int nr_cpus) 107 + { 108 + int killed = 0; 109 + int i, retval; 110 + 111 + for (i = 0; i < nr_cpus; i++) { 112 + if (!procs[i]) 113 + continue; 114 + retval = kill(procs[i], SIGKILL); 115 + if (!retval) 116 + killed++; 117 + else 118 + err_msg("Error killing child process %d\n", procs[i]); 119 + } 120 + 121 + return killed; 122 + } 123 + 124 + /** 125 + * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU 126 + * 127 + * This is a thread main that will fork one new process for each monitored 128 + * CPU. It will wait for: 129 + * 130 + * - rtla to tell to kill the child processes 131 + * - some child process to die, and the cleanup all the processes 132 + * 133 + * whichever comes first. 134 + * 135 + */ 136 + void *timerlat_u_dispatcher(void *data) 137 + { 138 + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); 139 + struct timerlat_u_params *params = data; 140 + char proc_name[128]; 141 + int procs_count = 0; 142 + int retval = 1; 143 + pid_t *procs; 144 + int wstatus; 145 + pid_t pid; 146 + int i; 147 + 148 + debug_msg("Dispatching timerlat u procs\n"); 149 + 150 + procs = calloc(nr_cpus, sizeof(pid_t)); 151 + if (!procs) 152 + pthread_exit(&retval); 153 + 154 + for (i = 0; i < nr_cpus; i++) { 155 + if (params->set && !CPU_ISSET(i, params->set)) 156 + continue; 157 + 158 + pid = fork(); 159 + 160 + /* child */ 161 + if (!pid) { 162 + 163 + /* 164 + * rename the process 165 + */ 166 + snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i); 167 + pthread_setname_np(pthread_self(), proc_name); 168 + prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0); 169 + 170 + timerlat_u_main(i, params); 171 + /* timerlat_u_main should exit()! Anyways... */ 172 + pthread_exit(&retval); 173 + } 174 + 175 + /* parent */ 176 + if (pid == -1) { 177 + timerlat_u_send_kill(procs, nr_cpus); 178 + debug_msg("Failed to create child processes"); 179 + pthread_exit(&retval); 180 + } 181 + 182 + procs_count++; 183 + procs[i] = pid; 184 + } 185 + 186 + while (params->should_run) { 187 + /* check if processes died */ 188 + pid = waitpid(-1, &wstatus, WNOHANG); 189 + if (pid != 0) { 190 + for (i = 0; i < nr_cpus; i++) { 191 + if (procs[i] == pid) { 192 + procs[i] = 0; 193 + procs_count--; 194 + } 195 + } 196 + break; 197 + } 198 + 199 + sleep(1); 200 + } 201 + 202 + timerlat_u_send_kill(procs, nr_cpus); 203 + 204 + while (procs_count) { 205 + pid = waitpid(-1, &wstatus, 0); 206 + if (pid == -1) { 207 + err_msg("Failed to monitor child processes"); 208 + pthread_exit(&retval); 209 + } 210 + for (i = 0; i < nr_cpus; i++) { 211 + if (procs[i] == pid) { 212 + procs[i] = 0; 213 + procs_count--; 214 + } 215 + } 216 + } 217 + 218 + params->stopped_running = 1; 219 + 220 + free(procs); 221 + retval = 0; 222 + pthread_exit(&retval); 223 + 224 + }

+18

tools/tracing/rtla/src/timerlat_u.h

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> 4 + */ 5 + 6 + struct timerlat_u_params { 7 + /* timerlat -> timerlat_u: user-space threads can keep running */ 8 + int should_run; 9 + /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */ 10 + int stopped_running; 11 + 12 + /* threads config */ 13 + cpu_set_t *set; 14 + char *cgroup_name; 15 + struct sched_attr *sched_param; 16 + }; 17 + 18 + void *timerlat_u_dispatcher(void *data);

+58

tools/tracing/rtla/src/utils.c

··· 626 626 return 1; 627 627 } 628 628 629 + /* 630 + * set_comm_cgroup - Set cgroup to pid_t pid 631 + * 632 + * If cgroup argument is not NULL, the threads will move to the given cgroup. 633 + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. 634 + * 635 + * Supports cgroup v2. 636 + * 637 + * Returns 1 on success, 0 otherwise. 638 + */ 639 + int set_pid_cgroup(pid_t pid, const char *cgroup) 640 + { 641 + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; 642 + char cgroup_procs[MAX_PATH]; 643 + char pid_str[24]; 644 + int retval; 645 + int cg_fd; 646 + 647 + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); 648 + if (!retval) { 649 + err_msg("Did not find cgroupv2 mount point\n"); 650 + return 0; 651 + } 652 + 653 + if (!cgroup) { 654 + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], 655 + sizeof(cgroup_path) - strlen(cgroup_path)); 656 + if (!retval) { 657 + err_msg("Did not find self cgroup\n"); 658 + return 0; 659 + } 660 + } else { 661 + snprintf(&cgroup_path[strlen(cgroup_path)], 662 + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); 663 + } 664 + 665 + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); 666 + 667 + debug_msg("Using cgroup path at: %s\n", cgroup_procs); 668 + 669 + cg_fd = open(cgroup_procs, O_RDWR); 670 + if (cg_fd < 0) 671 + return 0; 672 + 673 + snprintf(pid_str, sizeof(pid_str), "%d\n", pid); 674 + 675 + retval = write(cg_fd, pid_str, strlen(pid_str)); 676 + if (retval < 0) 677 + err_msg("Error setting cgroup attributes for pid:%s - %s\n", 678 + pid_str, strerror(errno)); 679 + else 680 + debug_msg("Set cgroup attributes for pid:%s\n", pid_str); 681 + 682 + close(cg_fd); 683 + 684 + return (retval >= 0); 685 + } 686 + 629 687 /** 630 688 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix 631 689 *

tools/tracing/rtla/src/utils.h

··· 57 57 58 58 int parse_prio(char *arg, struct sched_attr *sched_param); 59 59 int parse_cpu_set(char *cpu_list, cpu_set_t *set); 60 + int __set_sched_attr(int pid, struct sched_attr *attr); 60 61 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); 61 62 int set_comm_cgroup(const char *comm_prefix, const char *cgroup); 63 + int set_pid_cgroup(pid_t pid, const char *cgroup); 62 64 int set_cpu_dma_latency(int32_t latency); 63 65 int auto_house_keeping(cpu_set_t *monitored_cpus); 64 66