Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rtla/timerlat: Add action on threshold feature

Extend the functionality provided by the -t/--trace option, which
triggers saving the contents of a tracefs buffer after tracing is
stopped, to support implementing arbitrary actions.

A new option, --on-threshold, is added, taking an argument
that further specifies the action. Actions added in this patch are:

- trace[,file=<filename>]: Saves tracefs buffer, optionally taking a
filename.
- signal,num=<sig>,pid=<pid>: Sends signal to process. "parent" might
be specified instead of number to send signal to parent process.
- shell,command=<command>: Execute shell command.

Multiple actions may be specified and will be executed in order,
including multiple actions of the same type. Trace output requested via
-t and -a now adds a trace action to the end of the list.

If an action fails, the following actions are not executed. For
example, this command:

$ rtla timerlat -T 20 --on-threshold trace \
--on-threshold shell,command="grep ipi_send timerlat_trace.txt" \
--on-threshold signal,num=2,pid=parent

will send signal 2 (SIGINT) to parent process, but only if saved trace
contains the text "ipi_send".

This way, the feature can be used for flexible reactions on latency
spikes, and allows combining rtla with other tooling like perf.

Cc: John Kacur <jkacur@redhat.com>
Cc: Luis Goncalves <lgoncalv@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Chang Yin <cyin@redhat.com>
Cc: Costa Shulyupin <costa.shul@redhat.com>
Cc: Crystal Wood <crwood@redhat.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/20250626123405.1496931-3-tglozar@redhat.com
Signed-off-by: Tomas Glozar <tglozar@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Tomas Glozar and committed by
Steven Rostedt (Google)
6ea082b1 8b6cbcac

+341 -22
+1
tools/tracing/rtla/src/Build
··· 1 1 rtla-y += trace.o 2 2 rtla-y += utils.o 3 + rtla-y += actions.o 3 4 rtla-y += osnoise.o 4 5 rtla-y += osnoise_top.o 5 6 rtla-y += osnoise_hist.o
+235
tools/tracing/rtla/src/actions.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <stdlib.h> 3 + #include <string.h> 4 + #include <signal.h> 5 + #include <unistd.h> 6 + 7 + #include "actions.h" 8 + #include "trace.h" 9 + #include "utils.h" 10 + 11 + /* 12 + * actions_init - initialize struct actions 13 + */ 14 + void 15 + actions_init(struct actions *self) 16 + { 17 + self->size = action_default_size; 18 + self->list = calloc(self->size, sizeof(struct action)); 19 + self->len = 0; 20 + 21 + memset(&self->present, 0, sizeof(self->present)); 22 + 23 + /* This has to be set by the user */ 24 + self->trace_output_inst = NULL; 25 + } 26 + 27 + /* 28 + * actions_destroy - destroy struct actions 29 + */ 30 + void 31 + actions_destroy(struct actions *self) 32 + { 33 + /* Free any action-specific data */ 34 + for (struct action *action = self->list; action < self->list + self->len; action++) { 35 + if (action->type == ACTION_SHELL) 36 + free(action->command); 37 + if (action->type == ACTION_TRACE_OUTPUT) 38 + free(action->trace_output); 39 + } 40 + 41 + /* Free action list */ 42 + free(self->list); 43 + } 44 + 45 + /* 46 + * actions_new - Get pointer to new action 47 + */ 48 + static struct action * 49 + actions_new(struct actions *self) 50 + { 51 + if (self->size >= self->len) { 52 + self->size *= 2; 53 + self->list = realloc(self->list, self->size * sizeof(struct action)); 54 + } 55 + 56 + return &self->list[self->len++]; 57 + } 58 + 59 + /* 60 + * actions_add_trace_output - add an action to output trace 61 + */ 62 + int 63 + actions_add_trace_output(struct actions *self, const char *trace_output) 64 + { 65 + struct action *action = actions_new(self); 66 + 67 + self->present[ACTION_TRACE_OUTPUT] = true; 68 + action->type = ACTION_TRACE_OUTPUT; 69 + action->trace_output = calloc(strlen(trace_output) + 1, sizeof(char)); 70 + if (!action->trace_output) 71 + return -1; 72 + strcpy(action->trace_output, trace_output); 73 + 74 + return 0; 75 + } 76 + 77 + /* 78 + * actions_add_trace_output - add an action to send signal to a process 79 + */ 80 + int 81 + actions_add_signal(struct actions *self, int signal, int pid) 82 + { 83 + struct action *action = actions_new(self); 84 + 85 + self->present[ACTION_SIGNAL] = true; 86 + action->type = ACTION_SIGNAL; 87 + action->signal = signal; 88 + action->pid = pid; 89 + 90 + return 0; 91 + } 92 + 93 + /* 94 + * actions_add_shell - add an action to execute a shell command 95 + */ 96 + int 97 + actions_add_shell(struct actions *self, const char *command) 98 + { 99 + struct action *action = actions_new(self); 100 + 101 + self->present[ACTION_SHELL] = true; 102 + action->type = ACTION_SHELL; 103 + action->command = calloc(strlen(command) + 1, sizeof(char)); 104 + if (!action->command) 105 + return -1; 106 + strcpy(action->command, command); 107 + 108 + return 0; 109 + } 110 + 111 + /* 112 + * actions_parse - add an action based on text specification 113 + */ 114 + int 115 + actions_parse(struct actions *self, const char *trigger) 116 + { 117 + enum action_type type = ACTION_NONE; 118 + char *token; 119 + char trigger_c[strlen(trigger)]; 120 + 121 + /* For ACTION_SIGNAL */ 122 + int signal = 0, pid = 0; 123 + 124 + /* For ACTION_TRACE_OUTPUT */ 125 + char *trace_output; 126 + 127 + strcpy(trigger_c, trigger); 128 + token = strtok(trigger_c, ","); 129 + 130 + if (strcmp(token, "trace") == 0) 131 + type = ACTION_TRACE_OUTPUT; 132 + else if (strcmp(token, "signal") == 0) 133 + type = ACTION_SIGNAL; 134 + else if (strcmp(token, "shell") == 0) 135 + type = ACTION_SHELL; 136 + else 137 + /* Invalid trigger type */ 138 + return -1; 139 + 140 + token = strtok(NULL, ","); 141 + 142 + switch (type) { 143 + case ACTION_TRACE_OUTPUT: 144 + /* Takes no argument */ 145 + if (token == NULL) 146 + trace_output = "timerlat_trace.txt"; 147 + else { 148 + if (strlen(token) > 5 && strncmp(token, "file=", 5) == 0) { 149 + trace_output = token + 5; 150 + } else { 151 + /* Invalid argument */ 152 + return -1; 153 + } 154 + 155 + token = strtok(NULL, ","); 156 + if (token != NULL) 157 + /* Only one argument allowed */ 158 + return -1; 159 + } 160 + return actions_add_trace_output(self, trace_output); 161 + case ACTION_SIGNAL: 162 + /* Takes two arguments, num (signal) and pid */ 163 + while (token != NULL) { 164 + if (strlen(token) > 4 && strncmp(token, "num=", 4) == 0) { 165 + signal = atoi(token + 4); 166 + } else if (strlen(token) > 4 && strncmp(token, "pid=", 4) == 0) { 167 + if (strncmp(token + 4, "parent", 7) == 0) 168 + pid = -1; 169 + else 170 + pid = atoi(token + 4); 171 + } else { 172 + /* Invalid argument */ 173 + return -1; 174 + } 175 + 176 + token = strtok(NULL, ","); 177 + } 178 + 179 + if (!signal || !pid) 180 + /* Missing argument */ 181 + return -1; 182 + 183 + return actions_add_signal(self, signal, pid); 184 + case ACTION_SHELL: 185 + if (token == NULL) 186 + return -1; 187 + if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0) 188 + return actions_add_shell(self, token + 8); 189 + return -1; 190 + default: 191 + return -1; 192 + } 193 + } 194 + 195 + /* 196 + * actions_perform - perform all actions 197 + */ 198 + int 199 + actions_perform(const struct actions *self) 200 + { 201 + int pid, retval; 202 + const struct action *action; 203 + 204 + for (action = self->list; action < self->list + self->len; action++) { 205 + switch (action->type) { 206 + case ACTION_TRACE_OUTPUT: 207 + retval = save_trace_to_file(self->trace_output_inst, action->trace_output); 208 + if (retval) { 209 + err_msg("Error saving trace\n"); 210 + return retval; 211 + } 212 + break; 213 + case ACTION_SIGNAL: 214 + if (action->pid == -1) 215 + pid = getppid(); 216 + else 217 + pid = action->pid; 218 + retval = kill(pid, action->signal); 219 + if (retval) { 220 + err_msg("Error sending signal\n"); 221 + return retval; 222 + } 223 + break; 224 + case ACTION_SHELL: 225 + retval = system(action->command); 226 + if (retval) 227 + return retval; 228 + break; 229 + default: 230 + break; 231 + } 232 + } 233 + 234 + return 0; 235 + }
+49
tools/tracing/rtla/src/actions.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <tracefs.h> 3 + #include <stdbool.h> 4 + 5 + enum action_type { 6 + ACTION_NONE = 0, 7 + ACTION_TRACE_OUTPUT, 8 + ACTION_SIGNAL, 9 + ACTION_SHELL, 10 + ACTION_FIELD_N 11 + }; 12 + 13 + struct action { 14 + enum action_type type; 15 + union { 16 + struct { 17 + /* For ACTION_TRACE_OUTPUT */ 18 + char *trace_output; 19 + }; 20 + struct { 21 + /* For ACTION_SIGNAL */ 22 + int signal; 23 + int pid; 24 + }; 25 + struct { 26 + /* For ACTION_SHELL */ 27 + char *command; 28 + }; 29 + }; 30 + }; 31 + 32 + static const int action_default_size = 8; 33 + 34 + struct actions { 35 + struct action *list; 36 + int len, size; 37 + bool present[ACTION_FIELD_N]; 38 + 39 + /* External dependencies */ 40 + struct tracefs_instance *trace_output_inst; 41 + }; 42 + 43 + void actions_init(struct actions *self); 44 + void actions_destroy(struct actions *self); 45 + int actions_add_trace_output(struct actions *self, const char *trace_output); 46 + int actions_add_signal(struct actions *self, int signal, int pid); 47 + int actions_add_shell(struct actions *self, const char *command); 48 + int actions_parse(struct actions *self, const char *trigger); 49 + int actions_perform(const struct actions *self);
+2 -1
tools/tracing/rtla/src/timerlat.h
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + #include "actions.h" 2 3 #include "osnoise.h" 3 4 4 5 /* ··· 23 22 /* Common params */ 24 23 char *cpus; 25 24 cpu_set_t monitored_cpus; 26 - char *trace_output; 27 25 char *cgroup_name; 28 26 unsigned long long runtime; 29 27 long long stop_us; ··· 48 48 struct sched_attr sched_param; 49 49 struct trace_events *events; 50 50 enum timerlat_tracing_mode mode; 51 + struct actions actions; 51 52 union { 52 53 struct { 53 54 /* top only */
+27 -10
tools/tracing/rtla/src/timerlat_hist.c
··· 757 757 " --warm-up s: let the workload run for s seconds before collecting data", 758 758 " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", 759 759 " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", 760 + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", 760 761 NULL, 761 762 }; 762 763 ··· 787 786 int auto_thresh; 788 787 int retval; 789 788 int c; 789 + char *trace_output = NULL; 790 790 791 791 params = calloc(1, sizeof(*params)); 792 792 if (!params) 793 793 exit(1); 794 + 795 + actions_init(&params->actions); 794 796 795 797 /* disabled by default */ 796 798 params->dma_latency = -1; ··· 845 841 {"warm-up", required_argument, 0, '\2'}, 846 842 {"trace-buffer-size", required_argument, 0, '\3'}, 847 843 {"deepest-idle-state", required_argument, 0, '\4'}, 844 + {"on-threshold", required_argument, 0, '\5'}, 848 845 {0, 0, 0, 0} 849 846 }; 850 847 ··· 871 866 params->print_stack = auto_thresh; 872 867 873 868 /* set trace */ 874 - params->trace_output = "timerlat_trace.txt"; 869 + trace_output = "timerlat_trace.txt"; 875 870 876 871 break; 877 872 case 'c': ··· 961 956 case 't': 962 957 if (optarg) { 963 958 if (optarg[0] == '=') 964 - params->trace_output = &optarg[1]; 959 + trace_output = &optarg[1]; 965 960 else 966 - params->trace_output = &optarg[0]; 961 + trace_output = &optarg[0]; 967 962 } else if (optind < argc && argv[optind][0] != '-') 968 - params->trace_output = argv[optind]; 963 + trace_output = argv[optind]; 969 964 else 970 - params->trace_output = "timerlat_trace.txt"; 965 + trace_output = "timerlat_trace.txt"; 971 966 break; 972 967 case 'u': 973 968 params->user_workload = 1; ··· 1037 1032 case '\4': 1038 1033 params->deepest_idle_state = get_llong_from_str(optarg); 1039 1034 break; 1035 + case '\5': 1036 + retval = actions_parse(&params->actions, optarg); 1037 + if (retval) { 1038 + err_msg("Invalid action %s\n", optarg); 1039 + exit(EXIT_FAILURE); 1040 + } 1041 + break; 1040 1042 default: 1041 1043 timerlat_hist_usage("Invalid option"); 1042 1044 } 1043 1045 } 1046 + 1047 + if (trace_output) 1048 + actions_add_trace_output(&params->actions, trace_output); 1044 1049 1045 1050 if (geteuid()) { 1046 1051 err_msg("rtla needs root permission\n"); ··· 1076 1061 * If auto-analysis or trace output is enabled, switch from BPF mode to 1077 1062 * mixed mode 1078 1063 */ 1079 - if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) 1064 + if (params->mode == TRACING_MODE_BPF && 1065 + (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) 1080 1066 params->mode = TRACING_MODE_MIXED; 1081 1067 1082 1068 return params; ··· 1270 1254 } 1271 1255 } 1272 1256 1273 - if (params->trace_output) { 1257 + if (params->actions.present[ACTION_TRACE_OUTPUT]) { 1274 1258 record = osnoise_init_trace_tool("timerlat"); 1275 1259 if (!record) { 1276 1260 err_msg("Failed to enable the trace instance\n"); 1277 1261 goto out_free; 1278 1262 } 1263 + params->actions.trace_output_inst = record->trace.inst; 1279 1264 1280 1265 if (params->events) { 1281 1266 retval = trace_events_enable(&record->trace, params->events); ··· 1342 1325 * tracing while enabling other instances. The trace instance is the 1343 1326 * one with most valuable information. 1344 1327 */ 1345 - if (params->trace_output) 1328 + if (params->actions.present[ACTION_TRACE_OUTPUT]) 1346 1329 trace_instance_start(&record->trace); 1347 1330 if (!params->no_aa) 1348 1331 trace_instance_start(&aa->trace); ··· 1412 1395 if (!params->no_aa) 1413 1396 timerlat_auto_analysis(params->stop_us, params->stop_total_us); 1414 1397 1415 - save_trace_to_file(record ? record->trace.inst : NULL, 1416 - params->trace_output); 1398 + actions_perform(&params->actions); 1417 1399 return_value = FAILED; 1418 1400 } 1419 1401 ··· 1434 1418 osnoise_destroy_tool(aa); 1435 1419 osnoise_destroy_tool(record); 1436 1420 osnoise_destroy_tool(tool); 1421 + actions_destroy(&params->actions); 1437 1422 if (params->mode != TRACING_MODE_TRACEFS) 1438 1423 timerlat_bpf_destroy(); 1439 1424 free(params);
+27 -11
tools/tracing/rtla/src/timerlat_top.c
··· 516 516 " --warm-up s: let the workload run for s seconds before collecting data", 517 517 " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", 518 518 " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", 519 + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", 519 520 NULL, 520 521 }; 521 522 ··· 546 545 long long auto_thresh; 547 546 int retval; 548 547 int c; 548 + char *trace_output = NULL; 549 549 550 550 params = calloc(1, sizeof(*params)); 551 551 if (!params) 552 552 exit(1); 553 + 554 + actions_init(&params->actions); 553 555 554 556 /* disabled by default */ 555 557 params->dma_latency = -1; ··· 596 592 {"warm-up", required_argument, 0, '6'}, 597 593 {"trace-buffer-size", required_argument, 0, '7'}, 598 594 {"deepest-idle-state", required_argument, 0, '8'}, 595 + {"on-threshold", required_argument, 0, '9'}, 599 596 {0, 0, 0, 0} 600 597 }; 601 598 ··· 622 617 params->print_stack = auto_thresh; 623 618 624 619 /* set trace */ 625 - params->trace_output = "timerlat_trace.txt"; 620 + trace_output = "timerlat_trace.txt"; 626 621 break; 627 622 case '5': 628 623 /* it is here because it is similar to -a */ ··· 717 712 case 't': 718 713 if (optarg) { 719 714 if (optarg[0] == '=') 720 - params->trace_output = &optarg[1]; 715 + trace_output = &optarg[1]; 721 716 else 722 - params->trace_output = &optarg[0]; 717 + trace_output = &optarg[0]; 723 718 } else if (optind < argc && argv[optind][0] != '-') 724 - params->trace_output = argv[optind]; 719 + trace_output = argv[optind]; 725 720 else 726 - params->trace_output = "timerlat_trace.txt"; 727 - 721 + trace_output = "timerlat_trace.txt"; 728 722 break; 729 723 case 'u': 730 724 params->user_workload = true; ··· 775 771 case '8': 776 772 params->deepest_idle_state = get_llong_from_str(optarg); 777 773 break; 774 + case '9': 775 + retval = actions_parse(&params->actions, optarg); 776 + if (retval) { 777 + err_msg("Invalid action %s\n", optarg); 778 + exit(EXIT_FAILURE); 779 + } 780 + break; 778 781 default: 779 782 timerlat_top_usage("Invalid option"); 780 783 } 781 784 } 785 + 786 + if (trace_output) 787 + actions_add_trace_output(&params->actions, trace_output); 782 788 783 789 if (geteuid()) { 784 790 err_msg("rtla needs root permission\n"); ··· 811 797 * If auto-analysis or trace output is enabled, switch from BPF mode to 812 798 * mixed mode 813 799 */ 814 - if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa) 800 + if (params->mode == TRACING_MODE_BPF && 801 + (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa)) 815 802 params->mode = TRACING_MODE_MIXED; 816 803 817 804 return params; ··· 1114 1099 } 1115 1100 } 1116 1101 1117 - if (params->trace_output) { 1102 + if (params->actions.present[ACTION_TRACE_OUTPUT]) { 1118 1103 record = osnoise_init_trace_tool("timerlat"); 1119 1104 if (!record) { 1120 1105 err_msg("Failed to enable the trace instance\n"); 1121 1106 goto out_free; 1122 1107 } 1108 + params->actions.trace_output_inst = record->trace.inst; 1123 1109 1124 1110 if (params->events) { 1125 1111 retval = trace_events_enable(&record->trace, params->events); ··· 1187 1171 * tracing while enabling other instances. The trace instance is the 1188 1172 * one with most valuable information. 1189 1173 */ 1190 - if (params->trace_output) 1174 + if (params->actions.present[ACTION_TRACE_OUTPUT]) 1191 1175 trace_instance_start(&record->trace); 1192 1176 if (!params->no_aa) 1193 1177 trace_instance_start(&aa->trace); ··· 1230 1214 if (!params->no_aa) 1231 1215 timerlat_auto_analysis(params->stop_us, params->stop_total_us); 1232 1216 1233 - save_trace_to_file(record ? record->trace.inst : NULL, 1234 - params->trace_output); 1217 + actions_perform(&params->actions); 1235 1218 return_value = FAILED; 1236 1219 } else if (params->aa_only) { 1237 1220 /* ··· 1263 1248 osnoise_destroy_tool(aa); 1264 1249 osnoise_destroy_tool(record); 1265 1250 osnoise_destroy_tool(top); 1251 + actions_destroy(&params->actions); 1266 1252 if (params->mode != TRACING_MODE_TRACEFS) 1267 1253 timerlat_bpf_destroy(); 1268 1254 free(params);