Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

delaytop: enhance error logging and add PSI feature description

This patch improves error diagnostics and documentation for delaytop:

1) Enhanced error logging:
- Added explicit error messages in critical failure paths
- Implemented BOOL_FPRINT macro for robust output handling

2) PSI feature documentation:
- Updated header comment to reflect PSI monitoring capability
- Improved output formatting for PSI information

System Pressure Information: (avg10/avg60/avg300/total)
CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms)
CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms)
Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms)
IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms)
IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms)
IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms)

Link: https://lkml.kernel.org/r/202507281628341752gMXCMN7S-Vz_LHYHum9r@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Acked-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: Fan Yu <fan.yu9@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

fan.yu9@zte.com.cn and committed by
Andrew Morton
d92dccd0 8c54f7e3

+143 -80
+35 -26
Documentation/accounting/delay-accounting.rst
··· 132 132 133 133 The above command can be used with -v to get more debug information. 134 134 135 - After the system starts, use `delaytop` to get the Top-N high-latency tasks. 136 - this tool supports sorting by CPU latency in descending order by default, 135 + After the system starts, use `delaytop` to get the system-wide delay information, 136 + which includes system-wide PSI information and Top-N high-latency tasks. 137 + 138 + `delaytop` supports sorting by CPU latency in descending order by default, 137 139 displays the top 20 high-latency tasks by default, and refreshes the latency 138 140 data every 2 seconds by default. 139 141 140 - Get Top-N tasks delay, since system boot:: 142 + Get PSI information and Top-N tasks delay, since system boot:: 141 143 142 144 bash# ./delaytop 145 + System Pressure Information: (avg10/avg60/avg300/total) 146 + CPU some: 0.0%/ 0.0%/ 0.0%/ 345(ms) 147 + CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) 148 + Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) 149 + Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) 150 + IO full: 0.0%/ 0.0%/ 0.0%/ 65(ms) 151 + IO some: 0.0%/ 0.0%/ 0.0%/ 79(ms) 152 + IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) 143 153 Top 20 processes (sorted by CPU delay): 144 - 145 - PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) 146 - --------------------------------------------------------------------------------------------- 147 - 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 148 - 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 149 - 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 150 - 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 151 - 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 152 - 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 153 - 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 154 - 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 155 - 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 156 - 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 157 - 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 158 - 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 159 - 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 160 - 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 161 - 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 162 - 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 163 - 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 164 - 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 165 - 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 166 - 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 154 + PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) 155 + ---------------------------------------------------------------------------------------------- 156 + 161 161 zombie_memcg_re 1.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00 157 + 130 130 blkcg_punt_bio 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 158 + 444 444 scsi_tmf_0 0.73 0.00 0.00 0.00 0.00 0.00 0.00 0.00 159 + 1280 1280 rsyslogd 0.53 0.04 0.00 0.00 0.00 0.00 0.00 0.00 160 + 12 12 ksoftirqd/0 0.47 0.00 0.00 0.00 0.00 0.00 0.00 0.00 161 + 1277 1277 nbd-server 0.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 162 + 308 308 kworker/2:2-sys 0.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00 163 + 55 55 netns 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 164 + 1187 1187 acpid 0.31 0.03 0.00 0.00 0.00 0.00 0.00 0.00 165 + 6184 6184 kworker/1:2-sys 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 166 + 186 186 kaluad 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 167 + 18 18 ksoftirqd/1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 168 + 185 185 kmpath_rdacd 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 169 + 190 190 kstrp 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 170 + 2759 2759 agetty 0.20 0.03 0.00 0.00 0.00 0.00 0.00 0.00 171 + 1190 1190 kworker/0:3-sys 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 172 + 1272 1272 sshd 0.15 0.04 0.00 0.00 0.00 0.00 0.00 0.00 173 + 1156 1156 license 0.15 0.11 0.00 0.00 0.00 0.00 0.00 0.00 174 + 134 134 md 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 175 + 6142 6142 kworker/3:2-xfs 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 167 176 168 177 Dynamic interactive interface of delaytop:: 169 178
+108 -54
tools/accounting/delaytop.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* 3 - * delaytop.c - task delay monitoring tool. 3 + * delaytop.c - system-wide delay monitoring tool. 4 4 * 5 5 * This tool provides real-time monitoring and statistics of 6 6 * system, container, and task-level delays, including CPU, 7 - * memory, IO, and IRQ and delay accounting. It supports both 8 - * interactive (top-like), and can output delay information 9 - * for the whole system, specific containers (cgroups), or 10 - * individual tasks (PIDs). 7 + * memory, IO, and IRQ. It supports both interactive (top-like), 8 + * and can output delay information for the whole system, specific 9 + * containers (cgroups), or individual tasks (PIDs). 11 10 * 12 11 * Key features: 13 12 * - Collects per-task delay accounting statistics via taskstats. 13 + * - Collects system-wide PSI information. 14 14 * - Supports sorting, filtering. 15 15 * - Supports both interactive (screen refresh). 16 16 * ··· 32 32 #include <time.h> 33 33 #include <dirent.h> 34 34 #include <ctype.h> 35 + #include <stdbool.h> 35 36 #include <sys/types.h> 36 37 #include <sys/stat.h> 37 38 #include <sys/socket.h> ··· 42 41 #include <linux/genetlink.h> 43 42 #include <linux/taskstats.h> 44 43 #include <linux/cgroupstats.h> 45 - #include <ncurses.h> 46 44 47 45 #define PSI_CPU_SOME "/proc/pressure/cpu" 48 46 #define PSI_CPU_FULL "/proc/pressure/cpu" ··· 62 62 #define MAX_MSG_SIZE 1024 63 63 #define MAX_TASKS 1000 64 64 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field 65 + #define BOOL_FPRINT(stream, fmt, ...) \ 66 + ({ \ 67 + int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ 68 + ret >= 0; \ 69 + }) 70 + #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" 65 71 66 72 /* Program settings structure */ 67 73 struct config { ··· 268 262 local.nl_family = AF_NETLINK; 269 263 270 264 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 265 + fprintf(stderr, "Failed to bind socket when create nl_socket\n"); 271 266 close(fd); 272 267 return -1; 273 268 } ··· 339 332 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 340 333 CTRL_ATTR_FAMILY_NAME, (void *)name, 341 334 strlen(TASKSTATS_GENL_NAME)+1); 342 - if (rc < 0) 335 + if (rc < 0) { 336 + fprintf(stderr, "Failed to send cmd for family id\n"); 343 337 return 0; 338 + } 344 339 345 340 rep_len = recv(sd, &ans, sizeof(ans), 0); 346 341 if (ans.n.nlmsg_type == NLMSG_ERROR || 347 - (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) 342 + (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { 343 + fprintf(stderr, "Failed to receive response for family id\n"); 348 344 return 0; 345 + } 349 346 350 347 na = (struct nlattr *) GENLMSG_DATA(&ans); 351 348 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); ··· 444 433 static int read_comm(int pid, char *comm_buf, size_t buf_size) 445 434 { 446 435 char path[64]; 436 + int ret = -1; 447 437 size_t len; 448 438 FILE *fp; 449 439 450 440 snprintf(path, sizeof(path), "/proc/%d/comm", pid); 451 441 fp = fopen(path, "r"); 452 - if (!fp) 453 - return -1; 442 + if (!fp) { 443 + fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); 444 + return ret; 445 + } 446 + 454 447 if (fgets(comm_buf, buf_size, fp)) { 455 448 len = strlen(comm_buf); 456 449 if (len > 0 && comm_buf[len - 1] == '\n') 457 450 comm_buf[len - 1] = '\0'; 458 - } else { 459 - fclose(fp); 460 - return -1; 451 + ret = 0; 461 452 } 453 + 462 454 fclose(fp); 463 - return 0; 455 + 456 + return ret; 464 457 } 465 458 466 - static int fetch_and_fill_task_info(int pid, const char *comm) 459 + static void fetch_and_fill_task_info(int pid, const char *comm) 467 460 { 468 461 struct { 469 462 struct nlmsghdr n; ··· 481 466 int nl_len; 482 467 int rc; 483 468 469 + /* Send request for task stats */ 484 470 if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, 485 471 TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { 486 - return -1; 472 + fprintf(stderr, "Failed to send request for task stats\n"); 473 + return; 487 474 } 475 + 476 + /* Receive response */ 488 477 rc = recv(nl_sd, &resp, sizeof(resp), 0); 489 - if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) 490 - return -1; 478 + if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 479 + fprintf(stderr, "Failed to receive response for task stats\n"); 480 + return; 481 + } 482 + 483 + /* Parse response */ 491 484 nl_len = GENLMSG_PAYLOAD(&resp.n); 492 485 na = (struct nlattr *) GENLMSG_DATA(&resp); 493 486 while (nl_len > 0) { ··· 538 515 nl_len -= NLA_ALIGN(na->nla_len); 539 516 na = NLA_NEXT(na); 540 517 } 541 - return 0; 518 + return; 542 519 } 543 520 544 521 static void get_task_delays(void) ··· 677 654 { 678 655 time_t now = time(NULL); 679 656 struct tm *tm_now = localtime(&now); 680 - char timestamp[32]; 681 - int i, count; 682 657 FILE *out = stdout; 658 + char timestamp[32]; 659 + bool suc = true; 660 + int i, count; 683 661 684 - fprintf(out, "\033[H\033[J"); 662 + /* Clear terminal screen */ 663 + suc &= BOOL_FPRINT(out, "\033[H\033[J"); 664 + 685 665 /* PSI output (one-line, no cat style) */ 686 - fprintf(out, "System Pressure Information: "); 687 - fprintf(out, "(avg10/avg60/avg300/total)\n"); 688 - fprintf(out, "CPU:"); 689 - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, 690 - psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); 691 - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, 692 - psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); 666 + suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); 667 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 668 + "CPU some:", 669 + psi.cpu_some_avg10, 670 + psi.cpu_some_avg60, 671 + psi.cpu_some_avg300, 672 + psi.cpu_some_total / 1000); 673 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 674 + "CPU full:", 675 + psi.cpu_full_avg10, 676 + psi.cpu_full_avg60, 677 + psi.cpu_full_avg300, 678 + psi.cpu_full_total / 1000); 679 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 680 + "Memory full:", 681 + psi.memory_full_avg10, 682 + psi.memory_full_avg60, 683 + psi.memory_full_avg300, 684 + psi.memory_full_total / 1000); 685 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 686 + "Memory some:", 687 + psi.memory_some_avg10, 688 + psi.memory_some_avg60, 689 + psi.memory_some_avg300, 690 + psi.memory_some_total / 1000); 691 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 692 + "IO full:", 693 + psi.io_full_avg10, 694 + psi.io_full_avg60, 695 + psi.io_full_avg300, 696 + psi.io_full_total / 1000); 697 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 698 + "IO some:", 699 + psi.io_some_avg10, 700 + psi.io_some_avg60, 701 + psi.io_some_avg300, 702 + psi.io_some_total / 1000); 703 + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 704 + "IRQ full:", 705 + psi.irq_full_avg10, 706 + psi.irq_full_avg60, 707 + psi.irq_full_avg300, 708 + psi.irq_full_total / 1000); 693 709 694 - fprintf(out, "Memory:"); 695 - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, 696 - psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); 697 - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, 698 - psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); 699 - 700 - fprintf(out, "IO:"); 701 - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, 702 - psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); 703 - fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, 704 - psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); 705 - fprintf(out, "IRQ:"); 706 - fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, 707 - psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); 708 710 if (cfg.container_path) { 709 - fprintf(out, "Container Information (%s):\n", cfg.container_path); 710 - fprintf(out, "Processes: running=%d, sleeping=%d, ", 711 + suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); 712 + suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", 711 713 container_stats.nr_running, container_stats.nr_sleeping); 712 - fprintf(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 714 + suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 713 715 container_stats.nr_stopped, container_stats.nr_uninterruptible, 714 716 container_stats.nr_io_wait); 715 717 } 716 - fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", 718 + suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n", 717 719 cfg.max_processes); 718 - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); 719 - fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); 720 - fprintf(out, "-----------------------------------------------"); 721 - fprintf(out, "----------------------------------------------\n"); 720 + suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); 721 + suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", 722 + "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", 723 + "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)"); 724 + 725 + suc &= BOOL_FPRINT(out, "-----------------------------------------------"); 726 + suc &= BOOL_FPRINT(out, "----------------------------------------------\n"); 722 727 count = task_count < cfg.max_processes ? task_count : cfg.max_processes; 723 728 724 729 for (i = 0; i < count; i++) { 725 - fprintf(out, "%5d %5d %-15s ", 730 + suc &= BOOL_FPRINT(out, "%5d %5d %-15s", 726 731 tasks[i].pid, tasks[i].tgid, tasks[i].command); 727 - fprintf(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", 732 + suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", 728 733 average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), 729 734 average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), 730 735 average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), ··· 763 712 average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); 764 713 } 765 714 766 - fprintf(out, "\n"); 715 + suc &= BOOL_FPRINT(out, "\n"); 716 + 717 + if (!suc) 718 + perror("Error writing to output"); 767 719 } 768 720 769 721 /* Main function */