Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rtla/timerlat_hist: Add timerlat user-space support

Add the support for running timerlat threads in user-space. In this
mode, enabled with -u/--user-threads, timerlat dispatches user-space
processes that will loop in the timerlat_fd, measuring the overhead
for going to user-space and then returning to the kernel - in addition
to the existing measurements.

Here is one example of the tool's output with -u enabled:

$ sudo timerlat hist -u -c 1-3 -d 600
# RTLA timerlat histogram
# Time unit is microseconds (us)
# Duration: 0 00:10:01
Index IRQ-001 Thr-001 Usr-001 IRQ-002 Thr-002 Usr-002 IRQ-003 Thr-003 Usr-003
0 477555 0 0 425287 0 0 474357 0 0
1 122385 7998 0 174616 1921 0 125412 3138 0
2 47 587376 492150 89 594717 447830 147 593463 454872
3 11 2549 101930 7 2682 145580 64 2530 138680
4 3 1954 2833 1 463 4917 11 548 4656
5 0 60 1037 0 138 1117 6 179 1130
6 0 26 1837 0 38 277 1 76 339
7 0 15 143 0 28 147 2 37 156
8 0 10 23 0 11 75 0 12 80
9 0 7 17 0 0 26 0 11 42
10 0 2 11 0 0 18 0 2 20
11 0 0 7 0 1 8 0 2 12
12 0 0 6 0 1 4 0 2 8
13 0 1 3 0 0 0 0 0 1
14 0 1 0 0 0 1 0 0 2
15 0 1 0 0 0 0 0 0 2
16 0 1 2 0 0 0 0 0 0
17 0 0 1 0 0 0 0 0 0
19 0 0 1 0 0 0 0 0 0
over: 0 0 0 0 0 0 0 0 0
count: 600001 600001 600001 600000 600000 600000 600000 600000 600000
min: 0 1 2 0 1 2 0 1 2
avg: 0 1 2 0 2 2 0 2 2
max: 4 16 19 4 12 14 7 12 15

The tuning setup like -p or -C work for the user-space threads as well.

Link: https://lkml.kernel.org/r/b6a042d55003c4a67ff7dce28d96044b7044f00d.1686066600.git.bristot@kernel.org

Cc: William White <chwhite@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Tested-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Daniel Bristot de Oliveira and committed by
Steven Rostedt (Google)
ed774f74 cdca4f4e

+118 -11
+118 -11
tools/tracing/rtla/src/timerlat_hist.c
··· 12 12 #include <stdio.h> 13 13 #include <time.h> 14 14 #include <sched.h> 15 + #include <pthread.h> 15 16 16 17 #include "utils.h" 17 18 #include "osnoise.h" 18 19 #include "timerlat.h" 19 20 #include "timerlat_aa.h" 21 + #include "timerlat_u.h" 20 22 21 23 struct timerlat_hist_params { 22 24 char *cpus; ··· 39 37 int hk_cpus; 40 38 int no_aa; 41 39 int dump_tasks; 40 + int user_hist; 42 41 cpu_set_t hk_cpu_set; 43 42 struct sched_attr sched_param; 44 43 struct trace_events *events; ··· 56 53 struct timerlat_hist_cpu { 57 54 int *irq; 58 55 int *thread; 56 + int *user; 59 57 60 58 int irq_count; 61 59 int thread_count; 60 + int user_count; 62 61 63 62 unsigned long long min_irq; 64 63 unsigned long long sum_irq; ··· 69 64 unsigned long long min_thread; 70 65 unsigned long long sum_thread; 71 66 unsigned long long max_thread; 67 + 68 + unsigned long long min_user; 69 + unsigned long long sum_user; 70 + unsigned long long max_user; 72 71 }; 73 72 74 73 struct timerlat_hist_data { ··· 97 88 98 89 if (data->hist[cpu].thread) 99 90 free(data->hist[cpu].thread); 91 + 92 + if (data->hist[cpu].user) 93 + free(data->hist[cpu].user); 94 + 100 95 } 101 96 102 97 /* one set of histograms per CPU */ ··· 137 124 data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); 138 125 if (!data->hist[cpu].irq) 139 126 goto cleanup; 127 + 140 128 data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); 141 129 if (!data->hist[cpu].thread) 130 + goto cleanup; 131 + 132 + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); 133 + if (!data->hist[cpu].user) 142 134 goto cleanup; 143 135 } 144 136 ··· 151 133 for (cpu = 0; cpu < nr_cpus; cpu++) { 152 134 data->hist[cpu].min_irq = ~0; 153 135 data->hist[cpu].min_thread = ~0; 136 + data->hist[cpu].min_user = ~0; 154 137 } 155 138 156 139 return data; ··· 166 147 */ 167 148 static void 168 149 timerlat_hist_update(struct osnoise_tool *tool, int cpu, 169 - unsigned long long thread, 150 + unsigned long long context, 170 151 unsigned long long latency) 171 152 { 172 153 struct timerlat_hist_params *params = tool->params; ··· 181 162 if (data->bucket_size) 182 163 bucket = latency / data->bucket_size; 183 164 184 - if (!thread) { 165 + if (!context) { 185 166 hist = data->hist[cpu].irq; 186 167 data->hist[cpu].irq_count++; 187 168 update_min(&data->hist[cpu].min_irq, &latency); 188 169 update_sum(&data->hist[cpu].sum_irq, &latency); 189 170 update_max(&data->hist[cpu].max_irq, &latency); 190 - } else { 171 + } else if (context == 1) { 191 172 hist = data->hist[cpu].thread; 192 173 data->hist[cpu].thread_count++; 193 174 update_min(&data->hist[cpu].min_thread, &latency); 194 175 update_sum(&data->hist[cpu].sum_thread, &latency); 195 176 update_max(&data->hist[cpu].max_thread, &latency); 177 + } else { /* user */ 178 + hist = data->hist[cpu].user; 179 + data->hist[cpu].user_count++; 180 + update_min(&data->hist[cpu].min_user, &latency); 181 + update_sum(&data->hist[cpu].sum_user, &latency); 182 + update_max(&data->hist[cpu].max_user, &latency); 196 183 } 197 184 198 185 if (bucket < entries) ··· 215 190 struct tep_event *event, void *data) 216 191 { 217 192 struct trace_instance *trace = data; 218 - unsigned long long thread, latency; 193 + unsigned long long context, latency; 219 194 struct osnoise_tool *tool; 220 195 int cpu = record->cpu; 221 196 222 197 tool = container_of(trace, struct osnoise_tool, trace); 223 198 224 - tep_get_field_val(s, event, "context", record, &thread, 1); 199 + tep_get_field_val(s, event, "context", record, &context, 1); 225 200 tep_get_field_val(s, event, "timer_latency", record, &latency, 1); 226 201 227 - timerlat_hist_update(tool, cpu, thread, latency); 202 + timerlat_hist_update(tool, cpu, context, latency); 228 203 229 204 return 0; 230 205 } ··· 266 241 267 242 if (!params->no_thread) 268 243 trace_seq_printf(s, " Thr-%03d", cpu); 244 + 245 + if (params->user_hist) 246 + trace_seq_printf(s, " Usr-%03d", cpu); 269 247 } 270 248 trace_seq_printf(s, "\n"); 271 249 ··· 307 279 if (!params->no_thread) 308 280 trace_seq_printf(trace->seq, "%9d ", 309 281 data->hist[cpu].thread_count); 282 + 283 + if (params->user_hist) 284 + trace_seq_printf(trace->seq, "%9d ", 285 + data->hist[cpu].user_count); 310 286 } 311 287 trace_seq_printf(trace->seq, "\n"); 312 288 ··· 331 299 if (!params->no_thread) 332 300 trace_seq_printf(trace->seq, "%9llu ", 333 301 data->hist[cpu].min_thread); 302 + 303 + if (params->user_hist) 304 + trace_seq_printf(trace->seq, "%9llu ", 305 + data->hist[cpu].min_user); 334 306 } 335 307 trace_seq_printf(trace->seq, "\n"); 336 308 ··· 359 323 if (!params->no_thread) { 360 324 if (data->hist[cpu].thread_count) 361 325 trace_seq_printf(trace->seq, "%9llu ", 362 - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); 326 + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); 327 + else 328 + trace_seq_printf(trace->seq, " - "); 329 + } 330 + 331 + if (params->user_hist) { 332 + if (data->hist[cpu].user_count) 333 + trace_seq_printf(trace->seq, "%9llu ", 334 + data->hist[cpu].sum_user / data->hist[cpu].user_count); 363 335 else 364 336 trace_seq_printf(trace->seq, " - "); 365 337 } ··· 391 347 if (!params->no_thread) 392 348 trace_seq_printf(trace->seq, "%9llu ", 393 349 data->hist[cpu].max_thread); 350 + 351 + if (params->user_hist) 352 + trace_seq_printf(trace->seq, "%9llu ", 353 + data->hist[cpu].max_user); 394 354 } 395 355 trace_seq_printf(trace->seq, "\n"); 396 356 trace_seq_do_printf(trace->seq); ··· 440 392 data->hist[cpu].thread[bucket]); 441 393 } 442 394 395 + if (params->user_hist) { 396 + total += data->hist[cpu].user[bucket]; 397 + trace_seq_printf(trace->seq, "%9d ", 398 + data->hist[cpu].user[bucket]); 399 + } 400 + 443 401 } 444 402 445 403 if (total == 0 && !params->with_zeros) { ··· 475 421 if (!params->no_thread) 476 422 trace_seq_printf(trace->seq, "%9d ", 477 423 data->hist[cpu].thread[data->entries]); 424 + 425 + if (params->user_hist) 426 + trace_seq_printf(trace->seq, "%9d ", 427 + data->hist[cpu].user[data->entries]); 478 428 } 479 429 trace_seq_printf(trace->seq, "\n"); 480 430 trace_seq_do_printf(trace->seq); ··· 499 441 " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", 500 442 " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", 501 443 " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", 502 - " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task]", 444 + " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]", 503 445 "", 504 446 " -h/--help: print this menu", 505 447 " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", ··· 534 476 " f:prio - use SCHED_FIFO with prio", 535 477 " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", 536 478 " in nanoseconds", 479 + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", 537 480 NULL, 538 481 }; 539 482 ··· 591 532 {"stack", required_argument, 0, 's'}, 592 533 {"thread", required_argument, 0, 'T'}, 593 534 {"trace", optional_argument, 0, 't'}, 535 + {"user-threads", no_argument, 0, 'u'}, 594 536 {"event", required_argument, 0, 'e'}, 595 537 {"no-irq", no_argument, 0, '0'}, 596 538 {"no-thread", no_argument, 0, '1'}, ··· 610 550 /* getopt_long stores the option index here. */ 611 551 int option_index = 0; 612 552 613 - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:0123456:7:8:9\1", 553 + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:u0123456:7:8:9\1", 614 554 long_options, &option_index); 615 555 616 556 /* detect the end of the options. */ ··· 720 660 else 721 661 params->trace_output = "timerlat_trace.txt"; 722 662 break; 663 + case 'u': 664 + params->user_hist = 1; 665 + break; 723 666 case '0': /* no irq */ 724 667 params->no_irq = 1; 725 668 break; ··· 807 744 static int 808 745 timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) 809 746 { 810 - int retval; 747 + int retval, i; 811 748 812 749 if (!params->sleep_time) 813 750 params->sleep_time = 1; ··· 818 755 err_msg("Failed to apply CPUs config\n"); 819 756 goto out_err; 820 757 } 758 + } else { 759 + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) 760 + CPU_SET(i, &params->monitored_cpus); 821 761 } 822 762 823 763 if (params->stop_us) { ··· 871 805 * No need to check results as this is an automatic attempt. 872 806 */ 873 807 auto_house_keeping(&params->monitored_cpus); 808 + } 809 + 810 + if (params->user_hist) { 811 + retval = osnoise_set_workload(tool->context, 0); 812 + if (retval) { 813 + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); 814 + goto out_err; 815 + } 874 816 } 875 817 876 818 return 0; ··· 941 867 { 942 868 struct timerlat_hist_params *params; 943 869 struct osnoise_tool *record = NULL; 870 + struct timerlat_u_params params_u; 944 871 struct osnoise_tool *tool = NULL; 945 872 struct osnoise_tool *aa = NULL; 946 873 struct trace_instance *trace; 947 874 int dma_latency_fd = -1; 948 875 int return_value = 1; 876 + pthread_t timerlat_u; 949 877 int retval; 950 878 951 879 params = timerlat_hist_parse_args(argc, argv); ··· 982 906 } 983 907 } 984 908 985 - if (params->cgroup) { 909 + if (params->cgroup && !params->user_hist) { 986 910 retval = set_comm_cgroup("timerlat/", params->cgroup_name); 987 911 if (!retval) { 988 912 err_msg("Failed to move threads to cgroup\n"); ··· 1046 970 tool->start_time = time(NULL); 1047 971 timerlat_hist_set_signals(params); 1048 972 973 + if (params->user_hist) { 974 + /* rtla asked to stop */ 975 + params_u.should_run = 1; 976 + /* all threads left */ 977 + params_u.stopped_running = 0; 978 + 979 + params_u.set = &params->monitored_cpus; 980 + if (params->set_sched) 981 + params_u.sched_param = &params->sched_param; 982 + else 983 + params_u.sched_param = NULL; 984 + 985 + params_u.cgroup_name = params->cgroup_name; 986 + 987 + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, &params_u); 988 + if (retval) 989 + err_msg("Error creating timerlat user-space threads\n"); 990 + } 991 + 1049 992 while (!stop_tracing) { 1050 993 sleep(params->sleep_time); 1051 994 ··· 1081 986 1082 987 if (trace_is_off(&tool->trace, &record->trace)) 1083 988 break; 989 + 990 + /* is there still any user-threads ? */ 991 + if (params->user_hist) { 992 + if (params_u.stopped_running) { 993 + debug_msg("timerlat user-space threads stopped!\n"); 994 + break; 995 + } 996 + } 997 + } 998 + if (params->user_hist && !params_u.stopped_running) { 999 + params_u.should_run = 0; 1000 + sleep(1); 1084 1001 } 1085 1002 1086 1003 timerlat_print_stats(params, tool);