Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tools/accounting/delaytop: add delaytop to record top-n task delay

Problem
=======

The "getdelays" can only display the latency of a single task
by specifying a PID, but it has the following limitations:

1. single-task perspective: only supports querying the latency (CPU, I/O,
memory, etc.) of an individual task via PID and cannot provide a global
analysis of high-latency processes across the system.

2. lack of High-Latency process awareness: when the overall system
latency is high (e.g., a spike in CPU latency), there is no way to
quickly identify the top N processes contributing to the highest
latency.

3. poor interactivity: It lacks dynamic sorting and refresh
capabilities (similar to top), making it difficult to monitor latency
changes in real time.

Solution
========

To address these limitations, we introduce the "delaytop" with the
following capabilities:

1. system view: monitors latency metrics (CPU, I/O, memory, IRQ, etc.)
for all system processes

2. supports field-based sorting (e.g., default sort by CPU latency in
descending order)

3. dynamic interactive interface: focus on specific processes with
--pid; limit displayed entries with --processes 20; control monitoring
duration with --iterations;

Use case
========
bash# ./delaytop
Top 20 processes (sorted by CPU delay):

PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)
---------------------------------------------------------------------------------------------
26 26 kworker/1:0H 5.55 0.00 0.00 0.00 0.00 0.00 0.00 0.00
32 32 kworker/2:0H-kb 2.93 0.00 0.00 0.00 0.00 0.00 0.00 0.00
38 38 kworker/3:0H-ev 2.88 0.00 0.00 0.00 0.00 0.00 0.00 0.00
84 84 kworker/R-vfio- 1.62 0.00 0.00 0.00 0.00 0.00 0.00 0.00
24 24 ksoftirqd/1 1.43 0.00 0.00 0.00 0.00 0.00 0.00 0.00
19 19 idle_inject/0 0.99 0.00 0.00 0.00 0.00 0.00 0.00 0.00
16 16 rcu_exp_par_gp_ 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00
11 11 kworker/0:1 0.87 0.00 0.00 0.00 0.00 0.00 0.00 0.00
22 22 idle_inject/1 0.80 0.00 0.00 0.00 0.00 0.00 0.00 0.00
3 3 pool_workqueue_ 0.74 0.00 0.00 0.00 0.00 0.00 0.00 0.00
81 81 scsi_eh_1 0.59 0.00 0.00 0.00 0.00 0.00 0.00 0.00
30 30 ksoftirqd/2 0.42 0.00 0.00 0.00 0.00 0.00 0.00 0.00
36 36 ksoftirqd/3 0.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00
9 9 kworker/0:0-eve 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00
8 8 kworker/R-netns 0.34 0.00 0.00 0.00 0.00 0.00 0.00 0.00
76 76 kworker/1:1-pm 0.32 0.00 0.00 0.00 0.00 0.00 0.00 0.00
21 21 cpuhp/1 0.30 0.00 0.00 0.00 0.00 0.00 0.00 0.00
4 4 kworker/R-rcu_g 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00
12 12 kworker/u16:0-i 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00
1 1 init 0.18 0.00 0.00 0.00 0.00 0.00 0.08 0.00

Link: https://lkml.kernel.org/r/20250619211843633h05gWrBDMFkEH6xAVm_5y@zte.com.cn
Co-developed-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Yaxin Wang <wang.yaxin@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: Qiang Tu <tu.qiang35@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: ye xingchen <ye.xingchen@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Yaxin Wang and committed by
Andrew Morton
01bda058 896f6122

+674 -1
+1 -1
tools/accounting/Makefile
··· 2 2 CC := $(CROSS_COMPILE)gcc 3 3 CFLAGS := -I../../usr/include 4 4 5 - PROGS := getdelays procacct 5 + PROGS := getdelays procacct delaytop 6 6 7 7 all: $(PROGS) 8 8
+673
tools/accounting/delaytop.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * delaytop.c - task delay monitoring tool. 4 + * 5 + * This tool provides real-time monitoring and statistics of 6 + * system, container, and task-level delays, including CPU, 7 + * memory, IO, and IRQ and delay accounting. It supports both 8 + * interactive (top-like), and can output delay information 9 + * for the whole system, specific containers (cgroups), or 10 + * individual tasks (PIDs). 11 + * 12 + * Key features: 13 + * - Collects per-task delay accounting statistics via taskstats. 14 + * - Supports sorting, filtering. 15 + * - Supports both interactive (screen refresh). 16 + * 17 + * Copyright (C) Fan Yu, ZTE Corp. 2025 18 + * Copyright (C) Wang Yaxin, ZTE Corp. 2025 19 + * 20 + * Compile with 21 + * gcc -I/usr/src/linux/include delaytop.c -o delaytop 22 + */ 23 + 24 + #include <stdio.h> 25 + #include <stdlib.h> 26 + #include <string.h> 27 + #include <errno.h> 28 + #include <unistd.h> 29 + #include <fcntl.h> 30 + #include <getopt.h> 31 + #include <signal.h> 32 + #include <time.h> 33 + #include <dirent.h> 34 + #include <ctype.h> 35 + #include <sys/types.h> 36 + #include <sys/stat.h> 37 + #include <sys/socket.h> 38 + #include <sys/select.h> 39 + #include <termios.h> 40 + #include <limits.h> 41 + #include <linux/genetlink.h> 42 + #include <linux/taskstats.h> 43 + #include <linux/cgroupstats.h> 44 + #include <ncurses.h> 45 + 46 + #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) 47 + #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) 48 + #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) 49 + 50 + #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 51 + #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 52 + 53 + #define TASK_COMM_LEN 16 54 + #define MAX_MSG_SIZE 1024 55 + #define MAX_TASKS 1000 56 + #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field 57 + 58 + /* Program settings structure */ 59 + struct config { 60 + int delay; /* Update interval in seconds */ 61 + int iterations; /* Number of iterations, 0 == infinite */ 62 + int max_processes; /* Maximum number of processes to show */ 63 + char sort_field; /* Field to sort by */ 64 + int output_one_time; /* Output once and exit */ 65 + int monitor_pid; /* Monitor specific PID */ 66 + char *container_path; /* Path to container cgroup */ 67 + }; 68 + 69 + /* Task delay information structure */ 70 + struct task_info { 71 + int pid; 72 + int tgid; 73 + char command[TASK_COMM_LEN]; 74 + unsigned long long cpu_count; 75 + unsigned long long cpu_delay_total; 76 + unsigned long long blkio_count; 77 + unsigned long long blkio_delay_total; 78 + unsigned long long swapin_count; 79 + unsigned long long swapin_delay_total; 80 + unsigned long long freepages_count; 81 + unsigned long long freepages_delay_total; 82 + unsigned long long thrashing_count; 83 + unsigned long long thrashing_delay_total; 84 + unsigned long long compact_count; 85 + unsigned long long compact_delay_total; 86 + unsigned long long wpcopy_count; 87 + unsigned long long wpcopy_delay_total; 88 + unsigned long long irq_count; 89 + unsigned long long irq_delay_total; 90 + }; 91 + 92 + /* Container statistics structure */ 93 + struct container_stats { 94 + int nr_sleeping; /* Number of sleeping processes */ 95 + int nr_running; /* Number of running processes */ 96 + int nr_stopped; /* Number of stopped processes */ 97 + int nr_uninterruptible; /* Number of uninterruptible processes */ 98 + int nr_io_wait; /* Number of processes in IO wait */ 99 + }; 100 + 101 + /* Global variables */ 102 + static struct config cfg; 103 + static struct task_info tasks[MAX_TASKS]; 104 + static int task_count; 105 + static int running = 1; 106 + static struct container_stats container_stats; 107 + 108 + /* Netlink socket variables */ 109 + static int nl_sd = -1; 110 + static int family_id; 111 + 112 + /* Set terminal to non-canonical mode for q-to-quit */ 113 + static struct termios orig_termios; 114 + static void enable_raw_mode(void) 115 + { 116 + struct termios raw; 117 + 118 + tcgetattr(STDIN_FILENO, &orig_termios); 119 + raw = orig_termios; 120 + raw.c_lflag &= ~(ICANON | ECHO); 121 + tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); 122 + } 123 + static void disable_raw_mode(void) 124 + { 125 + tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); 126 + } 127 + 128 + /* Display usage information and command line options */ 129 + static void usage(void) 130 + { 131 + printf("Usage: delaytop [Options]\n" 132 + "Options:\n" 133 + " -h, --help Show this help message and exit\n" 134 + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" 135 + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" 136 + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" 137 + " -o, --once Display once and exit\n" 138 + " -p, --pid=PID Monitor only the specified PID\n" 139 + " -C, --container=PATH Monitor the container at specified cgroup path\n"); 140 + exit(0); 141 + } 142 + 143 + /* Parse command line arguments and set configuration */ 144 + static void parse_args(int argc, char **argv) 145 + { 146 + int c; 147 + struct option long_options[] = { 148 + {"help", no_argument, 0, 'h'}, 149 + {"delay", required_argument, 0, 'd'}, 150 + {"iterations", required_argument, 0, 'n'}, 151 + {"pid", required_argument, 0, 'p'}, 152 + {"once", no_argument, 0, 'o'}, 153 + {"processes", required_argument, 0, 'P'}, 154 + {"container", required_argument, 0, 'C'}, 155 + {0, 0, 0, 0} 156 + }; 157 + 158 + /* Set defaults */ 159 + cfg.delay = 2; 160 + cfg.iterations = 0; 161 + cfg.max_processes = 20; 162 + cfg.sort_field = 'c'; /* Default sort by CPU delay */ 163 + cfg.output_one_time = 0; 164 + cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ 165 + cfg.container_path = NULL; 166 + 167 + while (1) { 168 + int option_index = 0; 169 + 170 + c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index); 171 + if (c == -1) 172 + break; 173 + 174 + switch (c) { 175 + case 'h': 176 + usage(); 177 + break; 178 + case 'd': 179 + cfg.delay = atoi(optarg); 180 + if (cfg.delay < 1) { 181 + fprintf(stderr, "Error: delay must be >= 1.\n"); 182 + exit(1); 183 + } 184 + break; 185 + case 'n': 186 + cfg.iterations = atoi(optarg); 187 + if (cfg.iterations < 0) { 188 + fprintf(stderr, "Error: iterations must be >= 0.\n"); 189 + exit(1); 190 + } 191 + break; 192 + case 'p': 193 + cfg.monitor_pid = atoi(optarg); 194 + if (cfg.monitor_pid < 1) { 195 + fprintf(stderr, "Error: pid must be >= 1.\n"); 196 + exit(1); 197 + } 198 + break; 199 + case 'o': 200 + cfg.output_one_time = 1; 201 + break; 202 + case 'P': 203 + cfg.max_processes = atoi(optarg); 204 + if (cfg.max_processes < 1) { 205 + fprintf(stderr, "Error: processes must be >= 1.\n"); 206 + exit(1); 207 + } 208 + if (cfg.max_processes > MAX_TASKS) { 209 + fprintf(stderr, "Warning: processes capped to %d.\n", 210 + MAX_TASKS); 211 + cfg.max_processes = MAX_TASKS; 212 + } 213 + break; 214 + case 'C': 215 + cfg.container_path = strdup(optarg); 216 + break; 217 + default: 218 + fprintf(stderr, "Try 'delaytop --help' for more information.\n"); 219 + exit(1); 220 + } 221 + } 222 + } 223 + 224 + /* Create a raw netlink socket and bind */ 225 + static int create_nl_socket(void) 226 + { 227 + int fd; 228 + struct sockaddr_nl local; 229 + 230 + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 231 + if (fd < 0) 232 + return -1; 233 + 234 + memset(&local, 0, sizeof(local)); 235 + local.nl_family = AF_NETLINK; 236 + 237 + if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 238 + close(fd); 239 + return -1; 240 + } 241 + 242 + return fd; 243 + } 244 + 245 + /* Send a command via netlink */ 246 + static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, 247 + __u8 genl_cmd, __u16 nla_type, 248 + void *nla_data, int nla_len) 249 + { 250 + struct sockaddr_nl nladdr; 251 + struct nlattr *na; 252 + int r, buflen; 253 + char *buf; 254 + 255 + struct { 256 + struct nlmsghdr n; 257 + struct genlmsghdr g; 258 + char buf[MAX_MSG_SIZE]; 259 + } msg; 260 + 261 + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 262 + msg.n.nlmsg_type = nlmsg_type; 263 + msg.n.nlmsg_flags = NLM_F_REQUEST; 264 + msg.n.nlmsg_seq = 0; 265 + msg.n.nlmsg_pid = nlmsg_pid; 266 + msg.g.cmd = genl_cmd; 267 + msg.g.version = 0x1; 268 + na = (struct nlattr *) GENLMSG_DATA(&msg); 269 + na->nla_type = nla_type; 270 + na->nla_len = nla_len + NLA_HDRLEN; 271 + memcpy(NLA_DATA(na), nla_data, nla_len); 272 + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 273 + 274 + buf = (char *) &msg; 275 + buflen = msg.n.nlmsg_len; 276 + memset(&nladdr, 0, sizeof(nladdr)); 277 + nladdr.nl_family = AF_NETLINK; 278 + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, 279 + sizeof(nladdr))) < buflen) { 280 + if (r > 0) { 281 + buf += r; 282 + buflen -= r; 283 + } else if (errno != EAGAIN) 284 + return -1; 285 + } 286 + return 0; 287 + } 288 + 289 + /* Get family ID for taskstats via netlink */ 290 + static int get_family_id(int sd) 291 + { 292 + struct { 293 + struct nlmsghdr n; 294 + struct genlmsghdr g; 295 + char buf[256]; 296 + } ans; 297 + 298 + int id = 0, rc; 299 + struct nlattr *na; 300 + int rep_len; 301 + char name[100]; 302 + 303 + strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); 304 + name[sizeof(name) - 1] = '\0'; 305 + rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 306 + CTRL_ATTR_FAMILY_NAME, (void *)name, 307 + strlen(TASKSTATS_GENL_NAME)+1); 308 + if (rc < 0) 309 + return 0; 310 + 311 + rep_len = recv(sd, &ans, sizeof(ans), 0); 312 + if (ans.n.nlmsg_type == NLMSG_ERROR || 313 + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) 314 + return 0; 315 + 316 + na = (struct nlattr *) GENLMSG_DATA(&ans); 317 + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 318 + if (na->nla_type == CTRL_ATTR_FAMILY_ID) 319 + id = *(__u16 *) NLA_DATA(na); 320 + return id; 321 + } 322 + 323 + static int read_comm(int pid, char *comm_buf, size_t buf_size) 324 + { 325 + char path[64]; 326 + size_t len; 327 + FILE *fp; 328 + 329 + snprintf(path, sizeof(path), "/proc/%d/comm", pid); 330 + fp = fopen(path, "r"); 331 + if (!fp) 332 + return -1; 333 + if (fgets(comm_buf, buf_size, fp)) { 334 + len = strlen(comm_buf); 335 + if (len > 0 && comm_buf[len - 1] == '\n') 336 + comm_buf[len - 1] = '\0'; 337 + } else { 338 + fclose(fp); 339 + return -1; 340 + } 341 + fclose(fp); 342 + return 0; 343 + } 344 + 345 + static int fetch_and_fill_task_info(int pid, const char *comm) 346 + { 347 + struct { 348 + struct nlmsghdr n; 349 + struct genlmsghdr g; 350 + char buf[MAX_MSG_SIZE]; 351 + } resp; 352 + struct taskstats stats; 353 + struct nlattr *nested; 354 + struct nlattr *na; 355 + int nested_len; 356 + int nl_len; 357 + int rc; 358 + 359 + if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, 360 + TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { 361 + return -1; 362 + } 363 + rc = recv(nl_sd, &resp, sizeof(resp), 0); 364 + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) 365 + return -1; 366 + nl_len = GENLMSG_PAYLOAD(&resp.n); 367 + na = (struct nlattr *) GENLMSG_DATA(&resp); 368 + while (nl_len > 0) { 369 + if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { 370 + nested = (struct nlattr *) NLA_DATA(na); 371 + nested_len = NLA_PAYLOAD(na->nla_len); 372 + while (nested_len > 0) { 373 + if (nested->nla_type == TASKSTATS_TYPE_STATS) { 374 + memcpy(&stats, NLA_DATA(nested), sizeof(stats)); 375 + if (task_count < MAX_TASKS) { 376 + tasks[task_count].pid = pid; 377 + tasks[task_count].tgid = pid; 378 + strncpy(tasks[task_count].command, comm, 379 + TASK_COMM_LEN - 1); 380 + tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; 381 + SET_TASK_STAT(task_count, cpu_count); 382 + SET_TASK_STAT(task_count, cpu_delay_total); 383 + SET_TASK_STAT(task_count, blkio_count); 384 + SET_TASK_STAT(task_count, blkio_delay_total); 385 + SET_TASK_STAT(task_count, swapin_count); 386 + SET_TASK_STAT(task_count, swapin_delay_total); 387 + SET_TASK_STAT(task_count, freepages_count); 388 + SET_TASK_STAT(task_count, freepages_delay_total); 389 + SET_TASK_STAT(task_count, thrashing_count); 390 + SET_TASK_STAT(task_count, thrashing_delay_total); 391 + SET_TASK_STAT(task_count, compact_count); 392 + SET_TASK_STAT(task_count, compact_delay_total); 393 + SET_TASK_STAT(task_count, wpcopy_count); 394 + SET_TASK_STAT(task_count, wpcopy_delay_total); 395 + SET_TASK_STAT(task_count, irq_count); 396 + SET_TASK_STAT(task_count, irq_delay_total); 397 + task_count++; 398 + } 399 + break; 400 + } 401 + nested_len -= NLA_ALIGN(nested->nla_len); 402 + nested = NLA_NEXT(nested); 403 + } 404 + } 405 + nl_len -= NLA_ALIGN(na->nla_len); 406 + na = NLA_NEXT(na); 407 + } 408 + return 0; 409 + } 410 + 411 + static void get_task_delays(void) 412 + { 413 + char comm[TASK_COMM_LEN]; 414 + struct dirent *entry; 415 + DIR *dir; 416 + int pid; 417 + 418 + task_count = 0; 419 + if (cfg.monitor_pid > 0) { 420 + if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) 421 + fetch_and_fill_task_info(cfg.monitor_pid, comm); 422 + return; 423 + } 424 + 425 + dir = opendir("/proc"); 426 + if (!dir) { 427 + fprintf(stderr, "Error opening /proc directory\n"); 428 + return; 429 + } 430 + 431 + while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { 432 + if (!isdigit(entry->d_name[0])) 433 + continue; 434 + pid = atoi(entry->d_name); 435 + if (pid == 0) 436 + continue; 437 + if (read_comm(pid, comm, sizeof(comm)) != 0) 438 + continue; 439 + fetch_and_fill_task_info(pid, comm); 440 + } 441 + closedir(dir); 442 + } 443 + 444 + /* Calculate average delay in milliseconds */ 445 + static double average_ms(unsigned long long total, unsigned long long count) 446 + { 447 + if (count == 0) 448 + return 0; 449 + return (double)total / 1000000.0 / count; 450 + } 451 + 452 + /* Comparison function for sorting tasks */ 453 + static int compare_tasks(const void *a, const void *b) 454 + { 455 + const struct task_info *t1 = (const struct task_info *)a; 456 + const struct task_info *t2 = (const struct task_info *)b; 457 + double avg1, avg2; 458 + 459 + switch (cfg.sort_field) { 460 + case 'c': /* CPU */ 461 + avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count); 462 + avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count); 463 + if (avg1 != avg2) 464 + return avg2 > avg1 ? 1 : -1; 465 + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; 466 + 467 + default: 468 + return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; 469 + } 470 + } 471 + 472 + /* Sort tasks by selected field */ 473 + static void sort_tasks(void) 474 + { 475 + if (task_count > 0) 476 + qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); 477 + } 478 + 479 + /* Get container statistics via cgroupstats */ 480 + static void get_container_stats(void) 481 + { 482 + int rc, cfd; 483 + struct { 484 + struct nlmsghdr n; 485 + struct genlmsghdr g; 486 + char buf[MAX_MSG_SIZE]; 487 + } req, resp; 488 + struct nlattr *na; 489 + int nl_len; 490 + struct cgroupstats stats; 491 + 492 + /* Check if container path is set */ 493 + if (!cfg.container_path) 494 + return; 495 + 496 + /* Open container cgroup */ 497 + cfd = open(cfg.container_path, O_RDONLY); 498 + if (cfd < 0) { 499 + fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); 500 + return; 501 + } 502 + 503 + /* Send request for container stats */ 504 + if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, 505 + CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { 506 + fprintf(stderr, "Failed to send request for container stats\n"); 507 + close(cfd); 508 + return; 509 + } 510 + 511 + /* Receive response */ 512 + rc = recv(nl_sd, &resp, sizeof(resp), 0); 513 + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 514 + fprintf(stderr, "Failed to receive response for container stats\n"); 515 + close(cfd); 516 + return; 517 + } 518 + 519 + /* Parse response */ 520 + nl_len = GENLMSG_PAYLOAD(&resp.n); 521 + na = (struct nlattr *) GENLMSG_DATA(&resp); 522 + while (nl_len > 0) { 523 + if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { 524 + /* Get the cgroupstats structure */ 525 + memcpy(&stats, NLA_DATA(na), sizeof(stats)); 526 + 527 + /* Fill container stats */ 528 + container_stats.nr_sleeping = stats.nr_sleeping; 529 + container_stats.nr_running = stats.nr_running; 530 + container_stats.nr_stopped = stats.nr_stopped; 531 + container_stats.nr_uninterruptible = stats.nr_uninterruptible; 532 + container_stats.nr_io_wait = stats.nr_io_wait; 533 + break; 534 + } 535 + nl_len -= NLA_ALIGN(na->nla_len); 536 + na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 537 + } 538 + 539 + close(cfd); 540 + } 541 + 542 + /* Display results to stdout or log file */ 543 + static void display_results(void) 544 + { 545 + time_t now = time(NULL); 546 + struct tm *tm_now = localtime(&now); 547 + char timestamp[32]; 548 + int i, count; 549 + FILE *out = stdout; 550 + 551 + fprintf(out, "\033[H\033[J"); 552 + 553 + if (cfg.container_path) { 554 + fprintf(out, "Container Information (%s):\n", cfg.container_path); 555 + fprintf(out, "Processes: running=%d, sleeping=%d, ", 556 + container_stats.nr_running, container_stats.nr_sleeping); 557 + fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 558 + container_stats.nr_stopped, container_stats.nr_uninterruptible, 559 + container_stats.nr_io_wait); 560 + } 561 + fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", 562 + cfg.max_processes); 563 + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); 564 + fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); 565 + fprintf(out, "-----------------------------------------------"); 566 + fprintf(out, "----------------------------------------------\n"); 567 + count = task_count < cfg.max_processes ? task_count : cfg.max_processes; 568 + 569 + for (i = 0; i < count; i++) { 570 + fprintf(out, "%5d %5d %-15s ", 571 + tasks[i].pid, tasks[i].tgid, tasks[i].command); 572 + fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", 573 + average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), 574 + average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), 575 + average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), 576 + average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count), 577 + average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count), 578 + average_ms(tasks[i].compact_delay_total, tasks[i].compact_count), 579 + average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count), 580 + average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); 581 + } 582 + 583 + fprintf(out, "\n"); 584 + } 585 + 586 + /* Main function */ 587 + int main(int argc, char **argv) 588 + { 589 + int iterations = 0; 590 + int use_q_quit = 0; 591 + 592 + /* Parse command line arguments */ 593 + parse_args(argc, argv); 594 + 595 + /* Setup netlink socket */ 596 + nl_sd = create_nl_socket(); 597 + if (nl_sd < 0) { 598 + fprintf(stderr, "Error creating netlink socket\n"); 599 + exit(1); 600 + } 601 + 602 + /* Get family ID for taskstats via netlink */ 603 + family_id = get_family_id(nl_sd); 604 + if (!family_id) { 605 + fprintf(stderr, "Error getting taskstats family ID\n"); 606 + close(nl_sd); 607 + exit(1); 608 + } 609 + 610 + if (!cfg.output_one_time) { 611 + use_q_quit = 1; 612 + enable_raw_mode(); 613 + printf("Press 'q' to quit.\n"); 614 + fflush(stdout); 615 + } 616 + 617 + /* Main loop */ 618 + while (running) { 619 + /* Get container stats if container path provided */ 620 + if (cfg.container_path) 621 + get_container_stats(); 622 + 623 + /* Get task delays */ 624 + get_task_delays(); 625 + 626 + /* Sort tasks */ 627 + sort_tasks(); 628 + 629 + /* Display results to stdout or log file */ 630 + display_results(); 631 + 632 + /* Check for iterations */ 633 + if (cfg.iterations > 0 && ++iterations >= cfg.iterations) 634 + break; 635 + 636 + /* Exit if output_one_time is set */ 637 + if (cfg.output_one_time) 638 + break; 639 + 640 + /* Check for 'q' key to quit */ 641 + if (use_q_quit) { 642 + struct timeval tv = {cfg.delay, 0}; 643 + fd_set readfds; 644 + 645 + FD_ZERO(&readfds); 646 + FD_SET(STDIN_FILENO, &readfds); 647 + int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); 648 + 649 + if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { 650 + char ch = 0; 651 + 652 + read(STDIN_FILENO, &ch, 1); 653 + if (ch == 'q' || ch == 'Q') { 654 + running = 0; 655 + break; 656 + } 657 + } 658 + } else { 659 + sleep(cfg.delay); 660 + } 661 + } 662 + 663 + /* Restore terminal mode */ 664 + if (use_q_quit) 665 + disable_raw_mode(); 666 + 667 + /* Cleanup */ 668 + close(nl_sd); 669 + if (cfg.container_path) 670 + free(cfg.container_path); 671 + 672 + return 0; 673 + }